netns: Use net_eq() to compare net-namespaces for optimization.
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / ip6_output.c
... / ...
CommitLineData
1/*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
29#include <linux/errno.h>
30#include <linux/kernel.h>
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
39#include <linux/module.h>
40
41#include <linux/netfilter.h>
42#include <linux/netfilter_ipv6.h>
43
44#include <net/sock.h>
45#include <net/snmp.h>
46
47#include <net/ipv6.h>
48#include <net/ndisc.h>
49#include <net/protocol.h>
50#include <net/ip6_route.h>
51#include <net/addrconf.h>
52#include <net/rawv6.h>
53#include <net/icmp.h>
54#include <net/xfrm.h>
55#include <net/checksum.h>
56#include <linux/mroute6.h>
57
58static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
59
60static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
61{
62 static u32 ipv6_fragmentation_id = 1;
63 static DEFINE_SPINLOCK(ip6_id_lock);
64
65 spin_lock_bh(&ip6_id_lock);
66 fhdr->identification = htonl(ipv6_fragmentation_id);
67 if (++ipv6_fragmentation_id == 0)
68 ipv6_fragmentation_id = 1;
69 spin_unlock_bh(&ip6_id_lock);
70}
71
72int __ip6_local_out(struct sk_buff *skb)
73{
74 int len;
75
76 len = skb->len - sizeof(struct ipv6hdr);
77 if (len > IPV6_MAXPLEN)
78 len = 0;
79 ipv6_hdr(skb)->payload_len = htons(len);
80
81 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
82 dst_output);
83}
84
85int ip6_local_out(struct sk_buff *skb)
86{
87 int err;
88
89 err = __ip6_local_out(skb);
90 if (likely(err == 1))
91 err = dst_output(skb);
92
93 return err;
94}
95EXPORT_SYMBOL_GPL(ip6_local_out);
96
97static int ip6_output_finish(struct sk_buff *skb)
98{
99 struct dst_entry *dst = skb->dst;
100
101 if (dst->hh)
102 return neigh_hh_output(dst->hh, skb);
103 else if (dst->neighbour)
104 return dst->neighbour->output(skb);
105
106 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
107 kfree_skb(skb);
108 return -EINVAL;
109
110}
111
112/* dev_loopback_xmit for use with netfilter. */
113static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
114{
115 skb_reset_mac_header(newskb);
116 __skb_pull(newskb, skb_network_offset(newskb));
117 newskb->pkt_type = PACKET_LOOPBACK;
118 newskb->ip_summed = CHECKSUM_UNNECESSARY;
119 BUG_TRAP(newskb->dst);
120
121 netif_rx(newskb);
122 return 0;
123}
124
125
126static int ip6_output2(struct sk_buff *skb)
127{
128 struct dst_entry *dst = skb->dst;
129 struct net_device *dev = dst->dev;
130
131 skb->protocol = htons(ETH_P_IPV6);
132 skb->dev = dev;
133
134 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
135 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
136 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
137
138 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
139 ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
140 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
141 &ipv6_hdr(skb)->saddr))) {
142 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
143
144 /* Do not check for IFF_ALLMULTI; multicast routing
145 is not supported in any case.
146 */
147 if (newskb)
148 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
149 NULL, newskb->dev,
150 ip6_dev_loopback_xmit);
151
152 if (ipv6_hdr(skb)->hop_limit == 0) {
153 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
154 kfree_skb(skb);
155 return 0;
156 }
157 }
158
159 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
160 }
161
162 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
163 ip6_output_finish);
164}
165
166static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
167{
168 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
169
170 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
171 skb->dst->dev->mtu : dst_mtu(skb->dst);
172}
173
174int ip6_output(struct sk_buff *skb)
175{
176 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
177 if (unlikely(idev->cnf.disable_ipv6)) {
178 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
179 kfree_skb(skb);
180 return 0;
181 }
182
183 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
184 dst_allfrag(skb->dst))
185 return ip6_fragment(skb, ip6_output2);
186 else
187 return ip6_output2(skb);
188}
189
190/*
191 * xmit an sk_buff (used by TCP)
192 */
193
194int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
195 struct ipv6_txoptions *opt, int ipfragok)
196{
197 struct ipv6_pinfo *np = inet6_sk(sk);
198 struct in6_addr *first_hop = &fl->fl6_dst;
199 struct dst_entry *dst = skb->dst;
200 struct ipv6hdr *hdr;
201 u8 proto = fl->proto;
202 int seg_len = skb->len;
203 int hlimit, tclass;
204 u32 mtu;
205
206 if (opt) {
207 unsigned int head_room;
208
209 /* First: exthdrs may take lots of space (~8K for now)
210 MAX_HEADER is not enough.
211 */
212 head_room = opt->opt_nflen + opt->opt_flen;
213 seg_len += head_room;
214 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
215
216 if (skb_headroom(skb) < head_room) {
217 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
218 if (skb2 == NULL) {
219 IP6_INC_STATS(ip6_dst_idev(skb->dst),
220 IPSTATS_MIB_OUTDISCARDS);
221 kfree_skb(skb);
222 return -ENOBUFS;
223 }
224 kfree_skb(skb);
225 skb = skb2;
226 if (sk)
227 skb_set_owner_w(skb, sk);
228 }
229 if (opt->opt_flen)
230 ipv6_push_frag_opts(skb, opt, &proto);
231 if (opt->opt_nflen)
232 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
233 }
234
235 skb_push(skb, sizeof(struct ipv6hdr));
236 skb_reset_network_header(skb);
237 hdr = ipv6_hdr(skb);
238
239 /*
240 * Fill in the IPv6 header
241 */
242
243 hlimit = -1;
244 if (np)
245 hlimit = np->hop_limit;
246 if (hlimit < 0)
247 hlimit = ip6_dst_hoplimit(dst);
248
249 tclass = -1;
250 if (np)
251 tclass = np->tclass;
252 if (tclass < 0)
253 tclass = 0;
254
255 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
256
257 hdr->payload_len = htons(seg_len);
258 hdr->nexthdr = proto;
259 hdr->hop_limit = hlimit;
260
261 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
262 ipv6_addr_copy(&hdr->daddr, first_hop);
263
264 skb->priority = sk->sk_priority;
265 skb->mark = sk->sk_mark;
266
267 mtu = dst_mtu(dst);
268 if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
269 IP6_INC_STATS(ip6_dst_idev(skb->dst),
270 IPSTATS_MIB_OUTREQUESTS);
271 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
272 dst_output);
273 }
274
275 if (net_ratelimit())
276 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
277 skb->dev = dst->dev;
278 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
279 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
280 kfree_skb(skb);
281 return -EMSGSIZE;
282}
283
284EXPORT_SYMBOL(ip6_xmit);
285
286/*
287 * To avoid extra problems ND packets are send through this
288 * routine. It's code duplication but I really want to avoid
289 * extra checks since ipv6_build_header is used by TCP (which
290 * is for us performance critical)
291 */
292
293int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
294 const struct in6_addr *saddr, const struct in6_addr *daddr,
295 int proto, int len)
296{
297 struct ipv6_pinfo *np = inet6_sk(sk);
298 struct ipv6hdr *hdr;
299 int totlen;
300
301 skb->protocol = htons(ETH_P_IPV6);
302 skb->dev = dev;
303
304 totlen = len + sizeof(struct ipv6hdr);
305
306 skb_reset_network_header(skb);
307 skb_put(skb, sizeof(struct ipv6hdr));
308 hdr = ipv6_hdr(skb);
309
310 *(__be32*)hdr = htonl(0x60000000);
311
312 hdr->payload_len = htons(len);
313 hdr->nexthdr = proto;
314 hdr->hop_limit = np->hop_limit;
315
316 ipv6_addr_copy(&hdr->saddr, saddr);
317 ipv6_addr_copy(&hdr->daddr, daddr);
318
319 return 0;
320}
321
322static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
323{
324 struct ip6_ra_chain *ra;
325 struct sock *last = NULL;
326
327 read_lock(&ip6_ra_lock);
328 for (ra = ip6_ra_chain; ra; ra = ra->next) {
329 struct sock *sk = ra->sk;
330 if (sk && ra->sel == sel &&
331 (!sk->sk_bound_dev_if ||
332 sk->sk_bound_dev_if == skb->dev->ifindex)) {
333 if (last) {
334 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
335 if (skb2)
336 rawv6_rcv(last, skb2);
337 }
338 last = sk;
339 }
340 }
341
342 if (last) {
343 rawv6_rcv(last, skb);
344 read_unlock(&ip6_ra_lock);
345 return 1;
346 }
347 read_unlock(&ip6_ra_lock);
348 return 0;
349}
350
351static int ip6_forward_proxy_check(struct sk_buff *skb)
352{
353 struct ipv6hdr *hdr = ipv6_hdr(skb);
354 u8 nexthdr = hdr->nexthdr;
355 int offset;
356
357 if (ipv6_ext_hdr(nexthdr)) {
358 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
359 if (offset < 0)
360 return 0;
361 } else
362 offset = sizeof(struct ipv6hdr);
363
364 if (nexthdr == IPPROTO_ICMPV6) {
365 struct icmp6hdr *icmp6;
366
367 if (!pskb_may_pull(skb, (skb_network_header(skb) +
368 offset + 1 - skb->data)))
369 return 0;
370
371 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
372
373 switch (icmp6->icmp6_type) {
374 case NDISC_ROUTER_SOLICITATION:
375 case NDISC_ROUTER_ADVERTISEMENT:
376 case NDISC_NEIGHBOUR_SOLICITATION:
377 case NDISC_NEIGHBOUR_ADVERTISEMENT:
378 case NDISC_REDIRECT:
379 /* For reaction involving unicast neighbor discovery
380 * message destined to the proxied address, pass it to
381 * input function.
382 */
383 return 1;
384 default:
385 break;
386 }
387 }
388
389 /*
390 * The proxying router can't forward traffic sent to a link-local
391 * address, so signal the sender and discard the packet. This
392 * behavior is clarified by the MIPv6 specification.
393 */
394 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
395 dst_link_failure(skb);
396 return -1;
397 }
398
399 return 0;
400}
401
402static inline int ip6_forward_finish(struct sk_buff *skb)
403{
404 return dst_output(skb);
405}
406
407int ip6_forward(struct sk_buff *skb)
408{
409 struct dst_entry *dst = skb->dst;
410 struct ipv6hdr *hdr = ipv6_hdr(skb);
411 struct inet6_skb_parm *opt = IP6CB(skb);
412 struct net *net = dev_net(dst->dev);
413
414 if (ipv6_devconf.forwarding == 0)
415 goto error;
416
417 if (skb_warn_if_lro(skb))
418 goto drop;
419
420 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
421 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
422 goto drop;
423 }
424
425 skb_forward_csum(skb);
426
427 /*
428 * We DO NOT make any processing on
429 * RA packets, pushing them to user level AS IS
430 * without ane WARRANTY that application will be able
431 * to interpret them. The reason is that we
432 * cannot make anything clever here.
433 *
434 * We are not end-node, so that if packet contains
435 * AH/ESP, we cannot make anything.
436 * Defragmentation also would be mistake, RA packets
437 * cannot be fragmented, because there is no warranty
438 * that different fragments will go along one path. --ANK
439 */
440 if (opt->ra) {
441 u8 *ptr = skb_network_header(skb) + opt->ra;
442 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
443 return 0;
444 }
445
446 /*
447 * check and decrement ttl
448 */
449 if (hdr->hop_limit <= 1) {
450 /* Force OUTPUT device used as source address */
451 skb->dev = dst->dev;
452 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
453 0, skb->dev);
454 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
455
456 kfree_skb(skb);
457 return -ETIMEDOUT;
458 }
459
460 /* XXX: idev->cnf.proxy_ndp? */
461 if (ipv6_devconf.proxy_ndp &&
462 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
463 int proxied = ip6_forward_proxy_check(skb);
464 if (proxied > 0)
465 return ip6_input(skb);
466 else if (proxied < 0) {
467 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
468 goto drop;
469 }
470 }
471
472 if (!xfrm6_route_forward(skb)) {
473 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
474 goto drop;
475 }
476 dst = skb->dst;
477
478 /* IPv6 specs say nothing about it, but it is clear that we cannot
479 send redirects to source routed frames.
480 We don't send redirects to frames decapsulated from IPsec.
481 */
482 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
483 !skb->sp) {
484 struct in6_addr *target = NULL;
485 struct rt6_info *rt;
486 struct neighbour *n = dst->neighbour;
487
488 /*
489 * incoming and outgoing devices are the same
490 * send a redirect.
491 */
492
493 rt = (struct rt6_info *) dst;
494 if ((rt->rt6i_flags & RTF_GATEWAY))
495 target = (struct in6_addr*)&n->primary_key;
496 else
497 target = &hdr->daddr;
498
499 /* Limit redirects both by destination (here)
500 and by source (inside ndisc_send_redirect)
501 */
502 if (xrlim_allow(dst, 1*HZ))
503 ndisc_send_redirect(skb, n, target);
504 } else {
505 int addrtype = ipv6_addr_type(&hdr->saddr);
506
507 /* This check is security critical. */
508 if (addrtype == IPV6_ADDR_ANY ||
509 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
510 goto error;
511 if (addrtype & IPV6_ADDR_LINKLOCAL) {
512 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
513 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
514 goto error;
515 }
516 }
517
518 if (skb->len > dst_mtu(dst)) {
519 /* Again, force OUTPUT device used as source address */
520 skb->dev = dst->dev;
521 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
522 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
523 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
524 kfree_skb(skb);
525 return -EMSGSIZE;
526 }
527
528 if (skb_cow(skb, dst->dev->hard_header_len)) {
529 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
530 goto drop;
531 }
532
533 hdr = ipv6_hdr(skb);
534
535 /* Mangling hops number delayed to point after skb COW */
536
537 hdr->hop_limit--;
538
539 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
540 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
541 ip6_forward_finish);
542
543error:
544 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
545drop:
546 kfree_skb(skb);
547 return -EINVAL;
548}
549
550static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
551{
552 to->pkt_type = from->pkt_type;
553 to->priority = from->priority;
554 to->protocol = from->protocol;
555 dst_release(to->dst);
556 to->dst = dst_clone(from->dst);
557 to->dev = from->dev;
558 to->mark = from->mark;
559
560#ifdef CONFIG_NET_SCHED
561 to->tc_index = from->tc_index;
562#endif
563 nf_copy(to, from);
564#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
565 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
566 to->nf_trace = from->nf_trace;
567#endif
568 skb_copy_secmark(to, from);
569}
570
571int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
572{
573 u16 offset = sizeof(struct ipv6hdr);
574 struct ipv6_opt_hdr *exthdr =
575 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
576 unsigned int packet_len = skb->tail - skb->network_header;
577 int found_rhdr = 0;
578 *nexthdr = &ipv6_hdr(skb)->nexthdr;
579
580 while (offset + 1 <= packet_len) {
581
582 switch (**nexthdr) {
583
584 case NEXTHDR_HOP:
585 break;
586 case NEXTHDR_ROUTING:
587 found_rhdr = 1;
588 break;
589 case NEXTHDR_DEST:
590#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
591 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
592 break;
593#endif
594 if (found_rhdr)
595 return offset;
596 break;
597 default :
598 return offset;
599 }
600
601 offset += ipv6_optlen(exthdr);
602 *nexthdr = &exthdr->nexthdr;
603 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
604 offset);
605 }
606
607 return offset;
608}
609
610static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
611{
612 struct net_device *dev;
613 struct sk_buff *frag;
614 struct rt6_info *rt = (struct rt6_info*)skb->dst;
615 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
616 struct ipv6hdr *tmp_hdr;
617 struct frag_hdr *fh;
618 unsigned int mtu, hlen, left, len;
619 __be32 frag_id = 0;
620 int ptr, offset = 0, err=0;
621 u8 *prevhdr, nexthdr = 0;
622
623 dev = rt->u.dst.dev;
624 hlen = ip6_find_1stfragopt(skb, &prevhdr);
625 nexthdr = *prevhdr;
626
627 mtu = ip6_skb_dst_mtu(skb);
628
629 /* We must not fragment if the socket is set to force MTU discovery
630 * or if the skb it not generated by a local socket. (This last
631 * check should be redundant, but it's free.)
632 */
633 if (!skb->local_df) {
634 skb->dev = skb->dst->dev;
635 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
636 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
637 kfree_skb(skb);
638 return -EMSGSIZE;
639 }
640
641 if (np && np->frag_size < mtu) {
642 if (np->frag_size)
643 mtu = np->frag_size;
644 }
645 mtu -= hlen + sizeof(struct frag_hdr);
646
647 if (skb_shinfo(skb)->frag_list) {
648 int first_len = skb_pagelen(skb);
649 int truesizes = 0;
650
651 if (first_len - hlen > mtu ||
652 ((first_len - hlen) & 7) ||
653 skb_cloned(skb))
654 goto slow_path;
655
656 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
657 /* Correct geometry. */
658 if (frag->len > mtu ||
659 ((frag->len & 7) && frag->next) ||
660 skb_headroom(frag) < hlen)
661 goto slow_path;
662
663 /* Partially cloned skb? */
664 if (skb_shared(frag))
665 goto slow_path;
666
667 BUG_ON(frag->sk);
668 if (skb->sk) {
669 sock_hold(skb->sk);
670 frag->sk = skb->sk;
671 frag->destructor = sock_wfree;
672 truesizes += frag->truesize;
673 }
674 }
675
676 err = 0;
677 offset = 0;
678 frag = skb_shinfo(skb)->frag_list;
679 skb_shinfo(skb)->frag_list = NULL;
680 /* BUILD HEADER */
681
682 *prevhdr = NEXTHDR_FRAGMENT;
683 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
684 if (!tmp_hdr) {
685 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
686 return -ENOMEM;
687 }
688
689 __skb_pull(skb, hlen);
690 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
691 __skb_push(skb, hlen);
692 skb_reset_network_header(skb);
693 memcpy(skb_network_header(skb), tmp_hdr, hlen);
694
695 ipv6_select_ident(skb, fh);
696 fh->nexthdr = nexthdr;
697 fh->reserved = 0;
698 fh->frag_off = htons(IP6_MF);
699 frag_id = fh->identification;
700
701 first_len = skb_pagelen(skb);
702 skb->data_len = first_len - skb_headlen(skb);
703 skb->truesize -= truesizes;
704 skb->len = first_len;
705 ipv6_hdr(skb)->payload_len = htons(first_len -
706 sizeof(struct ipv6hdr));
707
708 dst_hold(&rt->u.dst);
709
710 for (;;) {
711 /* Prepare header of the next frame,
712 * before previous one went down. */
713 if (frag) {
714 frag->ip_summed = CHECKSUM_NONE;
715 skb_reset_transport_header(frag);
716 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
717 __skb_push(frag, hlen);
718 skb_reset_network_header(frag);
719 memcpy(skb_network_header(frag), tmp_hdr,
720 hlen);
721 offset += skb->len - hlen - sizeof(struct frag_hdr);
722 fh->nexthdr = nexthdr;
723 fh->reserved = 0;
724 fh->frag_off = htons(offset);
725 if (frag->next != NULL)
726 fh->frag_off |= htons(IP6_MF);
727 fh->identification = frag_id;
728 ipv6_hdr(frag)->payload_len =
729 htons(frag->len -
730 sizeof(struct ipv6hdr));
731 ip6_copy_metadata(frag, skb);
732 }
733
734 err = output(skb);
735 if(!err)
736 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
737
738 if (err || !frag)
739 break;
740
741 skb = frag;
742 frag = skb->next;
743 skb->next = NULL;
744 }
745
746 kfree(tmp_hdr);
747
748 if (err == 0) {
749 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
750 dst_release(&rt->u.dst);
751 return 0;
752 }
753
754 while (frag) {
755 skb = frag->next;
756 kfree_skb(frag);
757 frag = skb;
758 }
759
760 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
761 dst_release(&rt->u.dst);
762 return err;
763 }
764
765slow_path:
766 left = skb->len - hlen; /* Space per frame */
767 ptr = hlen; /* Where to start from */
768
769 /*
770 * Fragment the datagram.
771 */
772
773 *prevhdr = NEXTHDR_FRAGMENT;
774
775 /*
776 * Keep copying data until we run out.
777 */
778 while(left > 0) {
779 len = left;
780 /* IF: it doesn't fit, use 'mtu' - the data space left */
781 if (len > mtu)
782 len = mtu;
783 /* IF: we are not sending upto and including the packet end
784 then align the next start on an eight byte boundary */
785 if (len < left) {
786 len &= ~7;
787 }
788 /*
789 * Allocate buffer.
790 */
791
792 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
793 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
794 IP6_INC_STATS(ip6_dst_idev(skb->dst),
795 IPSTATS_MIB_FRAGFAILS);
796 err = -ENOMEM;
797 goto fail;
798 }
799
800 /*
801 * Set up data on packet
802 */
803
804 ip6_copy_metadata(frag, skb);
805 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
806 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
807 skb_reset_network_header(frag);
808 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
809 frag->transport_header = (frag->network_header + hlen +
810 sizeof(struct frag_hdr));
811
812 /*
813 * Charge the memory for the fragment to any owner
814 * it might possess
815 */
816 if (skb->sk)
817 skb_set_owner_w(frag, skb->sk);
818
819 /*
820 * Copy the packet header into the new buffer.
821 */
822 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
823
824 /*
825 * Build fragment header.
826 */
827 fh->nexthdr = nexthdr;
828 fh->reserved = 0;
829 if (!frag_id) {
830 ipv6_select_ident(skb, fh);
831 frag_id = fh->identification;
832 } else
833 fh->identification = frag_id;
834
835 /*
836 * Copy a block of the IP datagram.
837 */
838 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
839 BUG();
840 left -= len;
841
842 fh->frag_off = htons(offset);
843 if (left > 0)
844 fh->frag_off |= htons(IP6_MF);
845 ipv6_hdr(frag)->payload_len = htons(frag->len -
846 sizeof(struct ipv6hdr));
847
848 ptr += len;
849 offset += len;
850
851 /*
852 * Put this fragment into the sending queue.
853 */
854 err = output(frag);
855 if (err)
856 goto fail;
857
858 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
859 }
860 IP6_INC_STATS(ip6_dst_idev(skb->dst),
861 IPSTATS_MIB_FRAGOKS);
862 kfree_skb(skb);
863 return err;
864
865fail:
866 IP6_INC_STATS(ip6_dst_idev(skb->dst),
867 IPSTATS_MIB_FRAGFAILS);
868 kfree_skb(skb);
869 return err;
870}
871
872static inline int ip6_rt_check(struct rt6key *rt_key,
873 struct in6_addr *fl_addr,
874 struct in6_addr *addr_cache)
875{
876 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
877 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
878}
879
880static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
881 struct dst_entry *dst,
882 struct flowi *fl)
883{
884 struct ipv6_pinfo *np = inet6_sk(sk);
885 struct rt6_info *rt = (struct rt6_info *)dst;
886
887 if (!dst)
888 goto out;
889
890 /* Yes, checking route validity in not connected
891 * case is not very simple. Take into account,
892 * that we do not support routing by source, TOS,
893 * and MSG_DONTROUTE --ANK (980726)
894 *
895 * 1. ip6_rt_check(): If route was host route,
896 * check that cached destination is current.
897 * If it is network route, we still may
898 * check its validity using saved pointer
899 * to the last used address: daddr_cache.
900 * We do not want to save whole address now,
901 * (because main consumer of this service
902 * is tcp, which has not this problem),
903 * so that the last trick works only on connected
904 * sockets.
905 * 2. oif also should be the same.
906 */
907 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
908#ifdef CONFIG_IPV6_SUBTREES
909 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
910#endif
911 (fl->oif && fl->oif != dst->dev->ifindex)) {
912 dst_release(dst);
913 dst = NULL;
914 }
915
916out:
917 return dst;
918}
919
920static int ip6_dst_lookup_tail(struct sock *sk,
921 struct dst_entry **dst, struct flowi *fl)
922{
923 int err;
924 struct net *net = sock_net(sk);
925
926 if (*dst == NULL)
927 *dst = ip6_route_output(net, sk, fl);
928
929 if ((err = (*dst)->error))
930 goto out_err_release;
931
932 if (ipv6_addr_any(&fl->fl6_src)) {
933 err = ipv6_dev_get_saddr(ip6_dst_idev(*dst)->dev,
934 &fl->fl6_dst,
935 sk ? inet6_sk(sk)->srcprefs : 0,
936 &fl->fl6_src);
937 if (err)
938 goto out_err_release;
939 }
940
941#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
942 /*
943 * Here if the dst entry we've looked up
944 * has a neighbour entry that is in the INCOMPLETE
945 * state and the src address from the flow is
946 * marked as OPTIMISTIC, we release the found
947 * dst entry and replace it instead with the
948 * dst entry of the nexthop router
949 */
950 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
951 struct inet6_ifaddr *ifp;
952 struct flowi fl_gw;
953 int redirect;
954
955 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
956 (*dst)->dev, 1);
957
958 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
959 if (ifp)
960 in6_ifa_put(ifp);
961
962 if (redirect) {
963 /*
964 * We need to get the dst entry for the
965 * default router instead
966 */
967 dst_release(*dst);
968 memcpy(&fl_gw, fl, sizeof(struct flowi));
969 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
970 *dst = ip6_route_output(net, sk, &fl_gw);
971 if ((err = (*dst)->error))
972 goto out_err_release;
973 }
974 }
975#endif
976
977 return 0;
978
979out_err_release:
980 if (err == -ENETUNREACH)
981 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
982 dst_release(*dst);
983 *dst = NULL;
984 return err;
985}
986
987/**
988 * ip6_dst_lookup - perform route lookup on flow
989 * @sk: socket which provides route info
990 * @dst: pointer to dst_entry * for result
991 * @fl: flow to lookup
992 *
993 * This function performs a route lookup on the given flow.
994 *
995 * It returns zero on success, or a standard errno code on error.
996 */
997int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
998{
999 *dst = NULL;
1000 return ip6_dst_lookup_tail(sk, dst, fl);
1001}
1002EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1003
1004/**
1005 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1006 * @sk: socket which provides the dst cache and route info
1007 * @dst: pointer to dst_entry * for result
1008 * @fl: flow to lookup
1009 *
1010 * This function performs a route lookup on the given flow with the
1011 * possibility of using the cached route in the socket if it is valid.
1012 * It will take the socket dst lock when operating on the dst cache.
1013 * As a result, this function can only be used in process context.
1014 *
1015 * It returns zero on success, or a standard errno code on error.
1016 */
1017int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1018{
1019 *dst = NULL;
1020 if (sk) {
1021 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1022 *dst = ip6_sk_dst_check(sk, *dst, fl);
1023 }
1024
1025 return ip6_dst_lookup_tail(sk, dst, fl);
1026}
1027EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1028
1029static inline int ip6_ufo_append_data(struct sock *sk,
1030 int getfrag(void *from, char *to, int offset, int len,
1031 int odd, struct sk_buff *skb),
1032 void *from, int length, int hh_len, int fragheaderlen,
1033 int transhdrlen, int mtu,unsigned int flags)
1034
1035{
1036 struct sk_buff *skb;
1037 int err;
1038
1039 /* There is support for UDP large send offload by network
1040 * device, so create one single skb packet containing complete
1041 * udp datagram
1042 */
1043 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1044 skb = sock_alloc_send_skb(sk,
1045 hh_len + fragheaderlen + transhdrlen + 20,
1046 (flags & MSG_DONTWAIT), &err);
1047 if (skb == NULL)
1048 return -ENOMEM;
1049
1050 /* reserve space for Hardware header */
1051 skb_reserve(skb, hh_len);
1052
1053 /* create space for UDP/IP header */
1054 skb_put(skb,fragheaderlen + transhdrlen);
1055
1056 /* initialize network header pointer */
1057 skb_reset_network_header(skb);
1058
1059 /* initialize protocol header pointer */
1060 skb->transport_header = skb->network_header + fragheaderlen;
1061
1062 skb->ip_summed = CHECKSUM_PARTIAL;
1063 skb->csum = 0;
1064 sk->sk_sndmsg_off = 0;
1065 }
1066
1067 err = skb_append_datato_frags(sk,skb, getfrag, from,
1068 (length - transhdrlen));
1069 if (!err) {
1070 struct frag_hdr fhdr;
1071
1072 /* specify the length of each IP datagram fragment*/
1073 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1074 sizeof(struct frag_hdr);
1075 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1076 ipv6_select_ident(skb, &fhdr);
1077 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1078 __skb_queue_tail(&sk->sk_write_queue, skb);
1079
1080 return 0;
1081 }
1082 /* There is not enough support do UPD LSO,
1083 * so follow normal path
1084 */
1085 kfree_skb(skb);
1086
1087 return err;
1088}
1089
1090int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1091 int offset, int len, int odd, struct sk_buff *skb),
1092 void *from, int length, int transhdrlen,
1093 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1094 struct rt6_info *rt, unsigned int flags)
1095{
1096 struct inet_sock *inet = inet_sk(sk);
1097 struct ipv6_pinfo *np = inet6_sk(sk);
1098 struct sk_buff *skb;
1099 unsigned int maxfraglen, fragheaderlen;
1100 int exthdrlen;
1101 int hh_len;
1102 int mtu;
1103 int copy;
1104 int err;
1105 int offset = 0;
1106 int csummode = CHECKSUM_NONE;
1107
1108 if (flags&MSG_PROBE)
1109 return 0;
1110 if (skb_queue_empty(&sk->sk_write_queue)) {
1111 /*
1112 * setup for corking
1113 */
1114 if (opt) {
1115 if (np->cork.opt == NULL) {
1116 np->cork.opt = kmalloc(opt->tot_len,
1117 sk->sk_allocation);
1118 if (unlikely(np->cork.opt == NULL))
1119 return -ENOBUFS;
1120 } else if (np->cork.opt->tot_len < opt->tot_len) {
1121 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1122 return -EINVAL;
1123 }
1124 memcpy(np->cork.opt, opt, opt->tot_len);
1125 inet->cork.flags |= IPCORK_OPT;
1126 /* need source address above miyazawa*/
1127 }
1128 dst_hold(&rt->u.dst);
1129 inet->cork.dst = &rt->u.dst;
1130 inet->cork.fl = *fl;
1131 np->cork.hop_limit = hlimit;
1132 np->cork.tclass = tclass;
1133 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1134 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1135 if (np->frag_size < mtu) {
1136 if (np->frag_size)
1137 mtu = np->frag_size;
1138 }
1139 inet->cork.fragsize = mtu;
1140 if (dst_allfrag(rt->u.dst.path))
1141 inet->cork.flags |= IPCORK_ALLFRAG;
1142 inet->cork.length = 0;
1143 sk->sk_sndmsg_page = NULL;
1144 sk->sk_sndmsg_off = 0;
1145 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1146 rt->rt6i_nfheader_len;
1147 length += exthdrlen;
1148 transhdrlen += exthdrlen;
1149 } else {
1150 rt = (struct rt6_info *)inet->cork.dst;
1151 fl = &inet->cork.fl;
1152 if (inet->cork.flags & IPCORK_OPT)
1153 opt = np->cork.opt;
1154 transhdrlen = 0;
1155 exthdrlen = 0;
1156 mtu = inet->cork.fragsize;
1157 }
1158
1159 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1160
1161 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1162 (opt ? opt->opt_nflen : 0);
1163 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1164
1165 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1166 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1167 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1168 return -EMSGSIZE;
1169 }
1170 }
1171
1172 /*
1173 * Let's try using as much space as possible.
1174 * Use MTU if total length of the message fits into the MTU.
1175 * Otherwise, we need to reserve fragment header and
1176 * fragment alignment (= 8-15 octects, in total).
1177 *
1178 * Note that we may need to "move" the data from the tail of
1179 * of the buffer to the new fragment when we split
1180 * the message.
1181 *
1182 * FIXME: It may be fragmented into multiple chunks
1183 * at once if non-fragmentable extension headers
1184 * are too large.
1185 * --yoshfuji
1186 */
1187
1188 inet->cork.length += length;
1189 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1190 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1191
1192 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1193 fragheaderlen, transhdrlen, mtu,
1194 flags);
1195 if (err)
1196 goto error;
1197 return 0;
1198 }
1199
1200 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1201 goto alloc_new_skb;
1202
1203 while (length > 0) {
1204 /* Check if the remaining data fits into current packet. */
1205 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1206 if (copy < length)
1207 copy = maxfraglen - skb->len;
1208
1209 if (copy <= 0) {
1210 char *data;
1211 unsigned int datalen;
1212 unsigned int fraglen;
1213 unsigned int fraggap;
1214 unsigned int alloclen;
1215 struct sk_buff *skb_prev;
1216alloc_new_skb:
1217 skb_prev = skb;
1218
1219 /* There's no room in the current skb */
1220 if (skb_prev)
1221 fraggap = skb_prev->len - maxfraglen;
1222 else
1223 fraggap = 0;
1224
1225 /*
1226 * If remaining data exceeds the mtu,
1227 * we know we need more fragment(s).
1228 */
1229 datalen = length + fraggap;
1230 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1231 datalen = maxfraglen - fragheaderlen;
1232
1233 fraglen = datalen + fragheaderlen;
1234 if ((flags & MSG_MORE) &&
1235 !(rt->u.dst.dev->features&NETIF_F_SG))
1236 alloclen = mtu;
1237 else
1238 alloclen = datalen + fragheaderlen;
1239
1240 /*
1241 * The last fragment gets additional space at tail.
1242 * Note: we overallocate on fragments with MSG_MODE
1243 * because we have no idea if we're the last one.
1244 */
1245 if (datalen == length + fraggap)
1246 alloclen += rt->u.dst.trailer_len;
1247
1248 /*
1249 * We just reserve space for fragment header.
1250 * Note: this may be overallocation if the message
1251 * (without MSG_MORE) fits into the MTU.
1252 */
1253 alloclen += sizeof(struct frag_hdr);
1254
1255 if (transhdrlen) {
1256 skb = sock_alloc_send_skb(sk,
1257 alloclen + hh_len,
1258 (flags & MSG_DONTWAIT), &err);
1259 } else {
1260 skb = NULL;
1261 if (atomic_read(&sk->sk_wmem_alloc) <=
1262 2 * sk->sk_sndbuf)
1263 skb = sock_wmalloc(sk,
1264 alloclen + hh_len, 1,
1265 sk->sk_allocation);
1266 if (unlikely(skb == NULL))
1267 err = -ENOBUFS;
1268 }
1269 if (skb == NULL)
1270 goto error;
1271 /*
1272 * Fill in the control structures
1273 */
1274 skb->ip_summed = csummode;
1275 skb->csum = 0;
1276 /* reserve for fragmentation */
1277 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1278
1279 /*
1280 * Find where to start putting bytes
1281 */
1282 data = skb_put(skb, fraglen);
1283 skb_set_network_header(skb, exthdrlen);
1284 data += fragheaderlen;
1285 skb->transport_header = (skb->network_header +
1286 fragheaderlen);
1287 if (fraggap) {
1288 skb->csum = skb_copy_and_csum_bits(
1289 skb_prev, maxfraglen,
1290 data + transhdrlen, fraggap, 0);
1291 skb_prev->csum = csum_sub(skb_prev->csum,
1292 skb->csum);
1293 data += fraggap;
1294 pskb_trim_unique(skb_prev, maxfraglen);
1295 }
1296 copy = datalen - transhdrlen - fraggap;
1297 if (copy < 0) {
1298 err = -EINVAL;
1299 kfree_skb(skb);
1300 goto error;
1301 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1302 err = -EFAULT;
1303 kfree_skb(skb);
1304 goto error;
1305 }
1306
1307 offset += copy;
1308 length -= datalen - fraggap;
1309 transhdrlen = 0;
1310 exthdrlen = 0;
1311 csummode = CHECKSUM_NONE;
1312
1313 /*
1314 * Put the packet on the pending queue
1315 */
1316 __skb_queue_tail(&sk->sk_write_queue, skb);
1317 continue;
1318 }
1319
1320 if (copy > length)
1321 copy = length;
1322
1323 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1324 unsigned int off;
1325
1326 off = skb->len;
1327 if (getfrag(from, skb_put(skb, copy),
1328 offset, copy, off, skb) < 0) {
1329 __skb_trim(skb, off);
1330 err = -EFAULT;
1331 goto error;
1332 }
1333 } else {
1334 int i = skb_shinfo(skb)->nr_frags;
1335 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1336 struct page *page = sk->sk_sndmsg_page;
1337 int off = sk->sk_sndmsg_off;
1338 unsigned int left;
1339
1340 if (page && (left = PAGE_SIZE - off) > 0) {
1341 if (copy >= left)
1342 copy = left;
1343 if (page != frag->page) {
1344 if (i == MAX_SKB_FRAGS) {
1345 err = -EMSGSIZE;
1346 goto error;
1347 }
1348 get_page(page);
1349 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1350 frag = &skb_shinfo(skb)->frags[i];
1351 }
1352 } else if(i < MAX_SKB_FRAGS) {
1353 if (copy > PAGE_SIZE)
1354 copy = PAGE_SIZE;
1355 page = alloc_pages(sk->sk_allocation, 0);
1356 if (page == NULL) {
1357 err = -ENOMEM;
1358 goto error;
1359 }
1360 sk->sk_sndmsg_page = page;
1361 sk->sk_sndmsg_off = 0;
1362
1363 skb_fill_page_desc(skb, i, page, 0, 0);
1364 frag = &skb_shinfo(skb)->frags[i];
1365 } else {
1366 err = -EMSGSIZE;
1367 goto error;
1368 }
1369 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1370 err = -EFAULT;
1371 goto error;
1372 }
1373 sk->sk_sndmsg_off += copy;
1374 frag->size += copy;
1375 skb->len += copy;
1376 skb->data_len += copy;
1377 skb->truesize += copy;
1378 atomic_add(copy, &sk->sk_wmem_alloc);
1379 }
1380 offset += copy;
1381 length -= copy;
1382 }
1383 return 0;
1384error:
1385 inet->cork.length -= length;
1386 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1387 return err;
1388}
1389
1390static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1391{
1392 inet->cork.flags &= ~IPCORK_OPT;
1393 kfree(np->cork.opt);
1394 np->cork.opt = NULL;
1395 if (inet->cork.dst) {
1396 dst_release(inet->cork.dst);
1397 inet->cork.dst = NULL;
1398 inet->cork.flags &= ~IPCORK_ALLFRAG;
1399 }
1400 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1401}
1402
1403int ip6_push_pending_frames(struct sock *sk)
1404{
1405 struct sk_buff *skb, *tmp_skb;
1406 struct sk_buff **tail_skb;
1407 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1408 struct inet_sock *inet = inet_sk(sk);
1409 struct ipv6_pinfo *np = inet6_sk(sk);
1410 struct ipv6hdr *hdr;
1411 struct ipv6_txoptions *opt = np->cork.opt;
1412 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1413 struct flowi *fl = &inet->cork.fl;
1414 unsigned char proto = fl->proto;
1415 int err = 0;
1416
1417 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1418 goto out;
1419 tail_skb = &(skb_shinfo(skb)->frag_list);
1420
1421 /* move skb->data to ip header from ext header */
1422 if (skb->data < skb_network_header(skb))
1423 __skb_pull(skb, skb_network_offset(skb));
1424 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1425 __skb_pull(tmp_skb, skb_network_header_len(skb));
1426 *tail_skb = tmp_skb;
1427 tail_skb = &(tmp_skb->next);
1428 skb->len += tmp_skb->len;
1429 skb->data_len += tmp_skb->len;
1430 skb->truesize += tmp_skb->truesize;
1431 __sock_put(tmp_skb->sk);
1432 tmp_skb->destructor = NULL;
1433 tmp_skb->sk = NULL;
1434 }
1435
1436 /* Allow local fragmentation. */
1437 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1438 skb->local_df = 1;
1439
1440 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1441 __skb_pull(skb, skb_network_header_len(skb));
1442 if (opt && opt->opt_flen)
1443 ipv6_push_frag_opts(skb, opt, &proto);
1444 if (opt && opt->opt_nflen)
1445 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1446
1447 skb_push(skb, sizeof(struct ipv6hdr));
1448 skb_reset_network_header(skb);
1449 hdr = ipv6_hdr(skb);
1450
1451 *(__be32*)hdr = fl->fl6_flowlabel |
1452 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1453
1454 hdr->hop_limit = np->cork.hop_limit;
1455 hdr->nexthdr = proto;
1456 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1457 ipv6_addr_copy(&hdr->daddr, final_dst);
1458
1459 skb->priority = sk->sk_priority;
1460 skb->mark = sk->sk_mark;
1461
1462 skb->dst = dst_clone(&rt->u.dst);
1463 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1464 if (proto == IPPROTO_ICMPV6) {
1465 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1466
1467 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1468 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1469 }
1470
1471 err = ip6_local_out(skb);
1472 if (err) {
1473 if (err > 0)
1474 err = np->recverr ? net_xmit_errno(err) : 0;
1475 if (err)
1476 goto error;
1477 }
1478
1479out:
1480 ip6_cork_release(inet, np);
1481 return err;
1482error:
1483 goto out;
1484}
1485
1486void ip6_flush_pending_frames(struct sock *sk)
1487{
1488 struct sk_buff *skb;
1489
1490 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1491 if (skb->dst)
1492 IP6_INC_STATS(ip6_dst_idev(skb->dst),
1493 IPSTATS_MIB_OUTDISCARDS);
1494 kfree_skb(skb);
1495 }
1496
1497 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1498}