net: Embed hh_cache inside of struct neighbour.
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / ip6_output.c
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61 int __ip6_local_out(struct sk_buff *skb)
62 {
63 int len;
64
65 len = skb->len - sizeof(struct ipv6hdr);
66 if (len > IPV6_MAXPLEN)
67 len = 0;
68 ipv6_hdr(skb)->payload_len = htons(len);
69
70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71 skb_dst(skb)->dev, dst_output);
72 }
73
74 int ip6_local_out(struct sk_buff *skb)
75 {
76 int err;
77
78 err = __ip6_local_out(skb);
79 if (likely(err == 1))
80 err = dst_output(skb);
81
82 return err;
83 }
84 EXPORT_SYMBOL_GPL(ip6_local_out);
85
86 /* dev_loopback_xmit for use with netfilter. */
87 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
88 {
89 skb_reset_mac_header(newskb);
90 __skb_pull(newskb, skb_network_offset(newskb));
91 newskb->pkt_type = PACKET_LOOPBACK;
92 newskb->ip_summed = CHECKSUM_UNNECESSARY;
93 WARN_ON(!skb_dst(newskb));
94
95 netif_rx_ni(newskb);
96 return 0;
97 }
98
99 static int ip6_finish_output2(struct sk_buff *skb)
100 {
101 struct dst_entry *dst = skb_dst(skb);
102 struct net_device *dev = dst->dev;
103 struct neighbour *neigh;
104
105 skb->protocol = htons(ETH_P_IPV6);
106 skb->dev = dev;
107
108 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
109 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
110
111 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
112 ((mroute6_socket(dev_net(dev), skb) &&
113 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
114 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
115 &ipv6_hdr(skb)->saddr))) {
116 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
117
118 /* Do not check for IFF_ALLMULTI; multicast routing
119 is not supported in any case.
120 */
121 if (newskb)
122 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
123 newskb, NULL, newskb->dev,
124 ip6_dev_loopback_xmit);
125
126 if (ipv6_hdr(skb)->hop_limit == 0) {
127 IP6_INC_STATS(dev_net(dev), idev,
128 IPSTATS_MIB_OUTDISCARDS);
129 kfree_skb(skb);
130 return 0;
131 }
132 }
133
134 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
135 skb->len);
136 }
137
138 neigh = dst->neighbour;
139 if (neigh) {
140 struct hh_cache *hh = &neigh->hh;
141 if (hh->hh_len)
142 return neigh_hh_output(hh, skb);
143 else
144 return dst->neighbour->output(skb);
145 }
146 IP6_INC_STATS_BH(dev_net(dst->dev),
147 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
148 kfree_skb(skb);
149 return -EINVAL;
150 }
151
152 static int ip6_finish_output(struct sk_buff *skb)
153 {
154 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
155 dst_allfrag(skb_dst(skb)))
156 return ip6_fragment(skb, ip6_finish_output2);
157 else
158 return ip6_finish_output2(skb);
159 }
160
161 int ip6_output(struct sk_buff *skb)
162 {
163 struct net_device *dev = skb_dst(skb)->dev;
164 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
165 if (unlikely(idev->cnf.disable_ipv6)) {
166 IP6_INC_STATS(dev_net(dev), idev,
167 IPSTATS_MIB_OUTDISCARDS);
168 kfree_skb(skb);
169 return 0;
170 }
171
172 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
173 ip6_finish_output,
174 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
175 }
176
177 /*
178 * xmit an sk_buff (used by TCP, SCTP and DCCP)
179 */
180
181 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
182 struct ipv6_txoptions *opt)
183 {
184 struct net *net = sock_net(sk);
185 struct ipv6_pinfo *np = inet6_sk(sk);
186 struct in6_addr *first_hop = &fl6->daddr;
187 struct dst_entry *dst = skb_dst(skb);
188 struct ipv6hdr *hdr;
189 u8 proto = fl6->flowi6_proto;
190 int seg_len = skb->len;
191 int hlimit = -1;
192 int tclass = 0;
193 u32 mtu;
194
195 if (opt) {
196 unsigned int head_room;
197
198 /* First: exthdrs may take lots of space (~8K for now)
199 MAX_HEADER is not enough.
200 */
201 head_room = opt->opt_nflen + opt->opt_flen;
202 seg_len += head_room;
203 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
204
205 if (skb_headroom(skb) < head_room) {
206 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
207 if (skb2 == NULL) {
208 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
209 IPSTATS_MIB_OUTDISCARDS);
210 kfree_skb(skb);
211 return -ENOBUFS;
212 }
213 kfree_skb(skb);
214 skb = skb2;
215 skb_set_owner_w(skb, sk);
216 }
217 if (opt->opt_flen)
218 ipv6_push_frag_opts(skb, opt, &proto);
219 if (opt->opt_nflen)
220 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
221 }
222
223 skb_push(skb, sizeof(struct ipv6hdr));
224 skb_reset_network_header(skb);
225 hdr = ipv6_hdr(skb);
226
227 /*
228 * Fill in the IPv6 header
229 */
230 if (np) {
231 tclass = np->tclass;
232 hlimit = np->hop_limit;
233 }
234 if (hlimit < 0)
235 hlimit = ip6_dst_hoplimit(dst);
236
237 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
238
239 hdr->payload_len = htons(seg_len);
240 hdr->nexthdr = proto;
241 hdr->hop_limit = hlimit;
242
243 ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
244 ipv6_addr_copy(&hdr->daddr, first_hop);
245
246 skb->priority = sk->sk_priority;
247 skb->mark = sk->sk_mark;
248
249 mtu = dst_mtu(dst);
250 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
251 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
252 IPSTATS_MIB_OUT, skb->len);
253 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
254 dst->dev, dst_output);
255 }
256
257 if (net_ratelimit())
258 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
259 skb->dev = dst->dev;
260 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
261 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
262 kfree_skb(skb);
263 return -EMSGSIZE;
264 }
265
266 EXPORT_SYMBOL(ip6_xmit);
267
268 /*
269 * To avoid extra problems ND packets are send through this
270 * routine. It's code duplication but I really want to avoid
271 * extra checks since ipv6_build_header is used by TCP (which
272 * is for us performance critical)
273 */
274
275 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
276 const struct in6_addr *saddr, const struct in6_addr *daddr,
277 int proto, int len)
278 {
279 struct ipv6_pinfo *np = inet6_sk(sk);
280 struct ipv6hdr *hdr;
281
282 skb->protocol = htons(ETH_P_IPV6);
283 skb->dev = dev;
284
285 skb_reset_network_header(skb);
286 skb_put(skb, sizeof(struct ipv6hdr));
287 hdr = ipv6_hdr(skb);
288
289 *(__be32*)hdr = htonl(0x60000000);
290
291 hdr->payload_len = htons(len);
292 hdr->nexthdr = proto;
293 hdr->hop_limit = np->hop_limit;
294
295 ipv6_addr_copy(&hdr->saddr, saddr);
296 ipv6_addr_copy(&hdr->daddr, daddr);
297
298 return 0;
299 }
300
301 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
302 {
303 struct ip6_ra_chain *ra;
304 struct sock *last = NULL;
305
306 read_lock(&ip6_ra_lock);
307 for (ra = ip6_ra_chain; ra; ra = ra->next) {
308 struct sock *sk = ra->sk;
309 if (sk && ra->sel == sel &&
310 (!sk->sk_bound_dev_if ||
311 sk->sk_bound_dev_if == skb->dev->ifindex)) {
312 if (last) {
313 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
314 if (skb2)
315 rawv6_rcv(last, skb2);
316 }
317 last = sk;
318 }
319 }
320
321 if (last) {
322 rawv6_rcv(last, skb);
323 read_unlock(&ip6_ra_lock);
324 return 1;
325 }
326 read_unlock(&ip6_ra_lock);
327 return 0;
328 }
329
330 static int ip6_forward_proxy_check(struct sk_buff *skb)
331 {
332 struct ipv6hdr *hdr = ipv6_hdr(skb);
333 u8 nexthdr = hdr->nexthdr;
334 int offset;
335
336 if (ipv6_ext_hdr(nexthdr)) {
337 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
338 if (offset < 0)
339 return 0;
340 } else
341 offset = sizeof(struct ipv6hdr);
342
343 if (nexthdr == IPPROTO_ICMPV6) {
344 struct icmp6hdr *icmp6;
345
346 if (!pskb_may_pull(skb, (skb_network_header(skb) +
347 offset + 1 - skb->data)))
348 return 0;
349
350 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
351
352 switch (icmp6->icmp6_type) {
353 case NDISC_ROUTER_SOLICITATION:
354 case NDISC_ROUTER_ADVERTISEMENT:
355 case NDISC_NEIGHBOUR_SOLICITATION:
356 case NDISC_NEIGHBOUR_ADVERTISEMENT:
357 case NDISC_REDIRECT:
358 /* For reaction involving unicast neighbor discovery
359 * message destined to the proxied address, pass it to
360 * input function.
361 */
362 return 1;
363 default:
364 break;
365 }
366 }
367
368 /*
369 * The proxying router can't forward traffic sent to a link-local
370 * address, so signal the sender and discard the packet. This
371 * behavior is clarified by the MIPv6 specification.
372 */
373 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
374 dst_link_failure(skb);
375 return -1;
376 }
377
378 return 0;
379 }
380
381 static inline int ip6_forward_finish(struct sk_buff *skb)
382 {
383 return dst_output(skb);
384 }
385
386 int ip6_forward(struct sk_buff *skb)
387 {
388 struct dst_entry *dst = skb_dst(skb);
389 struct ipv6hdr *hdr = ipv6_hdr(skb);
390 struct inet6_skb_parm *opt = IP6CB(skb);
391 struct net *net = dev_net(dst->dev);
392 u32 mtu;
393
394 if (net->ipv6.devconf_all->forwarding == 0)
395 goto error;
396
397 if (skb_warn_if_lro(skb))
398 goto drop;
399
400 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
401 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
402 goto drop;
403 }
404
405 if (skb->pkt_type != PACKET_HOST)
406 goto drop;
407
408 skb_forward_csum(skb);
409
410 /*
411 * We DO NOT make any processing on
412 * RA packets, pushing them to user level AS IS
413 * without ane WARRANTY that application will be able
414 * to interpret them. The reason is that we
415 * cannot make anything clever here.
416 *
417 * We are not end-node, so that if packet contains
418 * AH/ESP, we cannot make anything.
419 * Defragmentation also would be mistake, RA packets
420 * cannot be fragmented, because there is no warranty
421 * that different fragments will go along one path. --ANK
422 */
423 if (opt->ra) {
424 u8 *ptr = skb_network_header(skb) + opt->ra;
425 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
426 return 0;
427 }
428
429 /*
430 * check and decrement ttl
431 */
432 if (hdr->hop_limit <= 1) {
433 /* Force OUTPUT device used as source address */
434 skb->dev = dst->dev;
435 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
436 IP6_INC_STATS_BH(net,
437 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
438
439 kfree_skb(skb);
440 return -ETIMEDOUT;
441 }
442
443 /* XXX: idev->cnf.proxy_ndp? */
444 if (net->ipv6.devconf_all->proxy_ndp &&
445 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
446 int proxied = ip6_forward_proxy_check(skb);
447 if (proxied > 0)
448 return ip6_input(skb);
449 else if (proxied < 0) {
450 IP6_INC_STATS(net, ip6_dst_idev(dst),
451 IPSTATS_MIB_INDISCARDS);
452 goto drop;
453 }
454 }
455
456 if (!xfrm6_route_forward(skb)) {
457 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
458 goto drop;
459 }
460 dst = skb_dst(skb);
461
462 /* IPv6 specs say nothing about it, but it is clear that we cannot
463 send redirects to source routed frames.
464 We don't send redirects to frames decapsulated from IPsec.
465 */
466 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
467 !skb_sec_path(skb)) {
468 struct in6_addr *target = NULL;
469 struct rt6_info *rt;
470 struct neighbour *n = dst->neighbour;
471
472 /*
473 * incoming and outgoing devices are the same
474 * send a redirect.
475 */
476
477 rt = (struct rt6_info *) dst;
478 if ((rt->rt6i_flags & RTF_GATEWAY))
479 target = (struct in6_addr*)&n->primary_key;
480 else
481 target = &hdr->daddr;
482
483 if (!rt->rt6i_peer)
484 rt6_bind_peer(rt, 1);
485
486 /* Limit redirects both by destination (here)
487 and by source (inside ndisc_send_redirect)
488 */
489 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
490 ndisc_send_redirect(skb, n, target);
491 } else {
492 int addrtype = ipv6_addr_type(&hdr->saddr);
493
494 /* This check is security critical. */
495 if (addrtype == IPV6_ADDR_ANY ||
496 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
497 goto error;
498 if (addrtype & IPV6_ADDR_LINKLOCAL) {
499 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
500 ICMPV6_NOT_NEIGHBOUR, 0);
501 goto error;
502 }
503 }
504
505 mtu = dst_mtu(dst);
506 if (mtu < IPV6_MIN_MTU)
507 mtu = IPV6_MIN_MTU;
508
509 if (skb->len > mtu && !skb_is_gso(skb)) {
510 /* Again, force OUTPUT device used as source address */
511 skb->dev = dst->dev;
512 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
513 IP6_INC_STATS_BH(net,
514 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
515 IP6_INC_STATS_BH(net,
516 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
517 kfree_skb(skb);
518 return -EMSGSIZE;
519 }
520
521 if (skb_cow(skb, dst->dev->hard_header_len)) {
522 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
523 goto drop;
524 }
525
526 hdr = ipv6_hdr(skb);
527
528 /* Mangling hops number delayed to point after skb COW */
529
530 hdr->hop_limit--;
531
532 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
533 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
534 ip6_forward_finish);
535
536 error:
537 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
538 drop:
539 kfree_skb(skb);
540 return -EINVAL;
541 }
542
543 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
544 {
545 to->pkt_type = from->pkt_type;
546 to->priority = from->priority;
547 to->protocol = from->protocol;
548 skb_dst_drop(to);
549 skb_dst_set(to, dst_clone(skb_dst(from)));
550 to->dev = from->dev;
551 to->mark = from->mark;
552
553 #ifdef CONFIG_NET_SCHED
554 to->tc_index = from->tc_index;
555 #endif
556 nf_copy(to, from);
557 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
558 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
559 to->nf_trace = from->nf_trace;
560 #endif
561 skb_copy_secmark(to, from);
562 }
563
564 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
565 {
566 u16 offset = sizeof(struct ipv6hdr);
567 struct ipv6_opt_hdr *exthdr =
568 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
569 unsigned int packet_len = skb->tail - skb->network_header;
570 int found_rhdr = 0;
571 *nexthdr = &ipv6_hdr(skb)->nexthdr;
572
573 while (offset + 1 <= packet_len) {
574
575 switch (**nexthdr) {
576
577 case NEXTHDR_HOP:
578 break;
579 case NEXTHDR_ROUTING:
580 found_rhdr = 1;
581 break;
582 case NEXTHDR_DEST:
583 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
584 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
585 break;
586 #endif
587 if (found_rhdr)
588 return offset;
589 break;
590 default :
591 return offset;
592 }
593
594 offset += ipv6_optlen(exthdr);
595 *nexthdr = &exthdr->nexthdr;
596 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
597 offset);
598 }
599
600 return offset;
601 }
602
603 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
604 {
605 struct sk_buff *frag;
606 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
607 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
608 struct ipv6hdr *tmp_hdr;
609 struct frag_hdr *fh;
610 unsigned int mtu, hlen, left, len;
611 __be32 frag_id = 0;
612 int ptr, offset = 0, err=0;
613 u8 *prevhdr, nexthdr = 0;
614 struct net *net = dev_net(skb_dst(skb)->dev);
615
616 hlen = ip6_find_1stfragopt(skb, &prevhdr);
617 nexthdr = *prevhdr;
618
619 mtu = ip6_skb_dst_mtu(skb);
620
621 /* We must not fragment if the socket is set to force MTU discovery
622 * or if the skb it not generated by a local socket.
623 */
624 if (!skb->local_df && skb->len > mtu) {
625 skb->dev = skb_dst(skb)->dev;
626 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
627 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
628 IPSTATS_MIB_FRAGFAILS);
629 kfree_skb(skb);
630 return -EMSGSIZE;
631 }
632
633 if (np && np->frag_size < mtu) {
634 if (np->frag_size)
635 mtu = np->frag_size;
636 }
637 mtu -= hlen + sizeof(struct frag_hdr);
638
639 if (skb_has_frag_list(skb)) {
640 int first_len = skb_pagelen(skb);
641 struct sk_buff *frag2;
642
643 if (first_len - hlen > mtu ||
644 ((first_len - hlen) & 7) ||
645 skb_cloned(skb))
646 goto slow_path;
647
648 skb_walk_frags(skb, frag) {
649 /* Correct geometry. */
650 if (frag->len > mtu ||
651 ((frag->len & 7) && frag->next) ||
652 skb_headroom(frag) < hlen)
653 goto slow_path_clean;
654
655 /* Partially cloned skb? */
656 if (skb_shared(frag))
657 goto slow_path_clean;
658
659 BUG_ON(frag->sk);
660 if (skb->sk) {
661 frag->sk = skb->sk;
662 frag->destructor = sock_wfree;
663 }
664 skb->truesize -= frag->truesize;
665 }
666
667 err = 0;
668 offset = 0;
669 frag = skb_shinfo(skb)->frag_list;
670 skb_frag_list_init(skb);
671 /* BUILD HEADER */
672
673 *prevhdr = NEXTHDR_FRAGMENT;
674 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
675 if (!tmp_hdr) {
676 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
677 IPSTATS_MIB_FRAGFAILS);
678 return -ENOMEM;
679 }
680
681 __skb_pull(skb, hlen);
682 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
683 __skb_push(skb, hlen);
684 skb_reset_network_header(skb);
685 memcpy(skb_network_header(skb), tmp_hdr, hlen);
686
687 ipv6_select_ident(fh);
688 fh->nexthdr = nexthdr;
689 fh->reserved = 0;
690 fh->frag_off = htons(IP6_MF);
691 frag_id = fh->identification;
692
693 first_len = skb_pagelen(skb);
694 skb->data_len = first_len - skb_headlen(skb);
695 skb->len = first_len;
696 ipv6_hdr(skb)->payload_len = htons(first_len -
697 sizeof(struct ipv6hdr));
698
699 dst_hold(&rt->dst);
700
701 for (;;) {
702 /* Prepare header of the next frame,
703 * before previous one went down. */
704 if (frag) {
705 frag->ip_summed = CHECKSUM_NONE;
706 skb_reset_transport_header(frag);
707 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
708 __skb_push(frag, hlen);
709 skb_reset_network_header(frag);
710 memcpy(skb_network_header(frag), tmp_hdr,
711 hlen);
712 offset += skb->len - hlen - sizeof(struct frag_hdr);
713 fh->nexthdr = nexthdr;
714 fh->reserved = 0;
715 fh->frag_off = htons(offset);
716 if (frag->next != NULL)
717 fh->frag_off |= htons(IP6_MF);
718 fh->identification = frag_id;
719 ipv6_hdr(frag)->payload_len =
720 htons(frag->len -
721 sizeof(struct ipv6hdr));
722 ip6_copy_metadata(frag, skb);
723 }
724
725 err = output(skb);
726 if(!err)
727 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
728 IPSTATS_MIB_FRAGCREATES);
729
730 if (err || !frag)
731 break;
732
733 skb = frag;
734 frag = skb->next;
735 skb->next = NULL;
736 }
737
738 kfree(tmp_hdr);
739
740 if (err == 0) {
741 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
742 IPSTATS_MIB_FRAGOKS);
743 dst_release(&rt->dst);
744 return 0;
745 }
746
747 while (frag) {
748 skb = frag->next;
749 kfree_skb(frag);
750 frag = skb;
751 }
752
753 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
754 IPSTATS_MIB_FRAGFAILS);
755 dst_release(&rt->dst);
756 return err;
757
758 slow_path_clean:
759 skb_walk_frags(skb, frag2) {
760 if (frag2 == frag)
761 break;
762 frag2->sk = NULL;
763 frag2->destructor = NULL;
764 skb->truesize += frag2->truesize;
765 }
766 }
767
768 slow_path:
769 left = skb->len - hlen; /* Space per frame */
770 ptr = hlen; /* Where to start from */
771
772 /*
773 * Fragment the datagram.
774 */
775
776 *prevhdr = NEXTHDR_FRAGMENT;
777
778 /*
779 * Keep copying data until we run out.
780 */
781 while(left > 0) {
782 len = left;
783 /* IF: it doesn't fit, use 'mtu' - the data space left */
784 if (len > mtu)
785 len = mtu;
786 /* IF: we are not sending up to and including the packet end
787 then align the next start on an eight byte boundary */
788 if (len < left) {
789 len &= ~7;
790 }
791 /*
792 * Allocate buffer.
793 */
794
795 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
796 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
797 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
798 IPSTATS_MIB_FRAGFAILS);
799 err = -ENOMEM;
800 goto fail;
801 }
802
803 /*
804 * Set up data on packet
805 */
806
807 ip6_copy_metadata(frag, skb);
808 skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
809 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
810 skb_reset_network_header(frag);
811 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
812 frag->transport_header = (frag->network_header + hlen +
813 sizeof(struct frag_hdr));
814
815 /*
816 * Charge the memory for the fragment to any owner
817 * it might possess
818 */
819 if (skb->sk)
820 skb_set_owner_w(frag, skb->sk);
821
822 /*
823 * Copy the packet header into the new buffer.
824 */
825 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
826
827 /*
828 * Build fragment header.
829 */
830 fh->nexthdr = nexthdr;
831 fh->reserved = 0;
832 if (!frag_id) {
833 ipv6_select_ident(fh);
834 frag_id = fh->identification;
835 } else
836 fh->identification = frag_id;
837
838 /*
839 * Copy a block of the IP datagram.
840 */
841 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
842 BUG();
843 left -= len;
844
845 fh->frag_off = htons(offset);
846 if (left > 0)
847 fh->frag_off |= htons(IP6_MF);
848 ipv6_hdr(frag)->payload_len = htons(frag->len -
849 sizeof(struct ipv6hdr));
850
851 ptr += len;
852 offset += len;
853
854 /*
855 * Put this fragment into the sending queue.
856 */
857 err = output(frag);
858 if (err)
859 goto fail;
860
861 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
862 IPSTATS_MIB_FRAGCREATES);
863 }
864 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
865 IPSTATS_MIB_FRAGOKS);
866 kfree_skb(skb);
867 return err;
868
869 fail:
870 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
871 IPSTATS_MIB_FRAGFAILS);
872 kfree_skb(skb);
873 return err;
874 }
875
876 static inline int ip6_rt_check(const struct rt6key *rt_key,
877 const struct in6_addr *fl_addr,
878 const struct in6_addr *addr_cache)
879 {
880 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
881 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
882 }
883
884 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
885 struct dst_entry *dst,
886 const struct flowi6 *fl6)
887 {
888 struct ipv6_pinfo *np = inet6_sk(sk);
889 struct rt6_info *rt = (struct rt6_info *)dst;
890
891 if (!dst)
892 goto out;
893
894 /* Yes, checking route validity in not connected
895 * case is not very simple. Take into account,
896 * that we do not support routing by source, TOS,
897 * and MSG_DONTROUTE --ANK (980726)
898 *
899 * 1. ip6_rt_check(): If route was host route,
900 * check that cached destination is current.
901 * If it is network route, we still may
902 * check its validity using saved pointer
903 * to the last used address: daddr_cache.
904 * We do not want to save whole address now,
905 * (because main consumer of this service
906 * is tcp, which has not this problem),
907 * so that the last trick works only on connected
908 * sockets.
909 * 2. oif also should be the same.
910 */
911 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
912 #ifdef CONFIG_IPV6_SUBTREES
913 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
914 #endif
915 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
916 dst_release(dst);
917 dst = NULL;
918 }
919
920 out:
921 return dst;
922 }
923
924 static int ip6_dst_lookup_tail(struct sock *sk,
925 struct dst_entry **dst, struct flowi6 *fl6)
926 {
927 int err;
928 struct net *net = sock_net(sk);
929
930 if (*dst == NULL)
931 *dst = ip6_route_output(net, sk, fl6);
932
933 if ((err = (*dst)->error))
934 goto out_err_release;
935
936 if (ipv6_addr_any(&fl6->saddr)) {
937 struct rt6_info *rt = (struct rt6_info *) *dst;
938 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
939 sk ? inet6_sk(sk)->srcprefs : 0,
940 &fl6->saddr);
941 if (err)
942 goto out_err_release;
943 }
944
945 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
946 /*
947 * Here if the dst entry we've looked up
948 * has a neighbour entry that is in the INCOMPLETE
949 * state and the src address from the flow is
950 * marked as OPTIMISTIC, we release the found
951 * dst entry and replace it instead with the
952 * dst entry of the nexthop router
953 */
954 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
955 struct inet6_ifaddr *ifp;
956 struct flowi6 fl_gw6;
957 int redirect;
958
959 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
960 (*dst)->dev, 1);
961
962 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
963 if (ifp)
964 in6_ifa_put(ifp);
965
966 if (redirect) {
967 /*
968 * We need to get the dst entry for the
969 * default router instead
970 */
971 dst_release(*dst);
972 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
973 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
974 *dst = ip6_route_output(net, sk, &fl_gw6);
975 if ((err = (*dst)->error))
976 goto out_err_release;
977 }
978 }
979 #endif
980
981 return 0;
982
983 out_err_release:
984 if (err == -ENETUNREACH)
985 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
986 dst_release(*dst);
987 *dst = NULL;
988 return err;
989 }
990
991 /**
992 * ip6_dst_lookup - perform route lookup on flow
993 * @sk: socket which provides route info
994 * @dst: pointer to dst_entry * for result
995 * @fl6: flow to lookup
996 *
997 * This function performs a route lookup on the given flow.
998 *
999 * It returns zero on success, or a standard errno code on error.
1000 */
1001 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
1002 {
1003 *dst = NULL;
1004 return ip6_dst_lookup_tail(sk, dst, fl6);
1005 }
1006 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1007
1008 /**
1009 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1010 * @sk: socket which provides route info
1011 * @fl6: flow to lookup
1012 * @final_dst: final destination address for ipsec lookup
1013 * @can_sleep: we are in a sleepable context
1014 *
1015 * This function performs a route lookup on the given flow.
1016 *
1017 * It returns a valid dst pointer on success, or a pointer encoded
1018 * error code.
1019 */
1020 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1021 const struct in6_addr *final_dst,
1022 bool can_sleep)
1023 {
1024 struct dst_entry *dst = NULL;
1025 int err;
1026
1027 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1028 if (err)
1029 return ERR_PTR(err);
1030 if (final_dst)
1031 ipv6_addr_copy(&fl6->daddr, final_dst);
1032 if (can_sleep)
1033 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1034
1035 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1036 }
1037 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1038
1039 /**
1040 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1041 * @sk: socket which provides the dst cache and route info
1042 * @fl6: flow to lookup
1043 * @final_dst: final destination address for ipsec lookup
1044 * @can_sleep: we are in a sleepable context
1045 *
1046 * This function performs a route lookup on the given flow with the
1047 * possibility of using the cached route in the socket if it is valid.
1048 * It will take the socket dst lock when operating on the dst cache.
1049 * As a result, this function can only be used in process context.
1050 *
1051 * It returns a valid dst pointer on success, or a pointer encoded
1052 * error code.
1053 */
1054 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1055 const struct in6_addr *final_dst,
1056 bool can_sleep)
1057 {
1058 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1059 int err;
1060
1061 dst = ip6_sk_dst_check(sk, dst, fl6);
1062
1063 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1064 if (err)
1065 return ERR_PTR(err);
1066 if (final_dst)
1067 ipv6_addr_copy(&fl6->daddr, final_dst);
1068 if (can_sleep)
1069 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1070
1071 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1072 }
1073 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1074
1075 static inline int ip6_ufo_append_data(struct sock *sk,
1076 int getfrag(void *from, char *to, int offset, int len,
1077 int odd, struct sk_buff *skb),
1078 void *from, int length, int hh_len, int fragheaderlen,
1079 int transhdrlen, int mtu,unsigned int flags)
1080
1081 {
1082 struct sk_buff *skb;
1083 int err;
1084
1085 /* There is support for UDP large send offload by network
1086 * device, so create one single skb packet containing complete
1087 * udp datagram
1088 */
1089 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1090 skb = sock_alloc_send_skb(sk,
1091 hh_len + fragheaderlen + transhdrlen + 20,
1092 (flags & MSG_DONTWAIT), &err);
1093 if (skb == NULL)
1094 return -ENOMEM;
1095
1096 /* reserve space for Hardware header */
1097 skb_reserve(skb, hh_len);
1098
1099 /* create space for UDP/IP header */
1100 skb_put(skb,fragheaderlen + transhdrlen);
1101
1102 /* initialize network header pointer */
1103 skb_reset_network_header(skb);
1104
1105 /* initialize protocol header pointer */
1106 skb->transport_header = skb->network_header + fragheaderlen;
1107
1108 skb->ip_summed = CHECKSUM_PARTIAL;
1109 skb->csum = 0;
1110 }
1111
1112 err = skb_append_datato_frags(sk,skb, getfrag, from,
1113 (length - transhdrlen));
1114 if (!err) {
1115 struct frag_hdr fhdr;
1116
1117 /* Specify the length of each IPv6 datagram fragment.
1118 * It has to be a multiple of 8.
1119 */
1120 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1121 sizeof(struct frag_hdr)) & ~7;
1122 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1123 ipv6_select_ident(&fhdr);
1124 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1125 __skb_queue_tail(&sk->sk_write_queue, skb);
1126
1127 return 0;
1128 }
1129 /* There is not enough support do UPD LSO,
1130 * so follow normal path
1131 */
1132 kfree_skb(skb);
1133
1134 return err;
1135 }
1136
1137 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1138 gfp_t gfp)
1139 {
1140 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1141 }
1142
1143 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1144 gfp_t gfp)
1145 {
1146 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1147 }
1148
1149 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1150 int offset, int len, int odd, struct sk_buff *skb),
1151 void *from, int length, int transhdrlen,
1152 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1153 struct rt6_info *rt, unsigned int flags, int dontfrag)
1154 {
1155 struct inet_sock *inet = inet_sk(sk);
1156 struct ipv6_pinfo *np = inet6_sk(sk);
1157 struct inet_cork *cork;
1158 struct sk_buff *skb;
1159 unsigned int maxfraglen, fragheaderlen;
1160 int exthdrlen;
1161 int hh_len;
1162 int mtu;
1163 int copy;
1164 int err;
1165 int offset = 0;
1166 int csummode = CHECKSUM_NONE;
1167 __u8 tx_flags = 0;
1168
1169 if (flags&MSG_PROBE)
1170 return 0;
1171 cork = &inet->cork.base;
1172 if (skb_queue_empty(&sk->sk_write_queue)) {
1173 /*
1174 * setup for corking
1175 */
1176 if (opt) {
1177 if (WARN_ON(np->cork.opt))
1178 return -EINVAL;
1179
1180 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1181 if (unlikely(np->cork.opt == NULL))
1182 return -ENOBUFS;
1183
1184 np->cork.opt->tot_len = opt->tot_len;
1185 np->cork.opt->opt_flen = opt->opt_flen;
1186 np->cork.opt->opt_nflen = opt->opt_nflen;
1187
1188 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1189 sk->sk_allocation);
1190 if (opt->dst0opt && !np->cork.opt->dst0opt)
1191 return -ENOBUFS;
1192
1193 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1194 sk->sk_allocation);
1195 if (opt->dst1opt && !np->cork.opt->dst1opt)
1196 return -ENOBUFS;
1197
1198 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1199 sk->sk_allocation);
1200 if (opt->hopopt && !np->cork.opt->hopopt)
1201 return -ENOBUFS;
1202
1203 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1204 sk->sk_allocation);
1205 if (opt->srcrt && !np->cork.opt->srcrt)
1206 return -ENOBUFS;
1207
1208 /* need source address above miyazawa*/
1209 }
1210 dst_hold(&rt->dst);
1211 cork->dst = &rt->dst;
1212 inet->cork.fl.u.ip6 = *fl6;
1213 np->cork.hop_limit = hlimit;
1214 np->cork.tclass = tclass;
1215 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1216 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1217 if (np->frag_size < mtu) {
1218 if (np->frag_size)
1219 mtu = np->frag_size;
1220 }
1221 cork->fragsize = mtu;
1222 if (dst_allfrag(rt->dst.path))
1223 cork->flags |= IPCORK_ALLFRAG;
1224 cork->length = 0;
1225 sk->sk_sndmsg_page = NULL;
1226 sk->sk_sndmsg_off = 0;
1227 exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
1228 rt->rt6i_nfheader_len;
1229 length += exthdrlen;
1230 transhdrlen += exthdrlen;
1231 } else {
1232 rt = (struct rt6_info *)cork->dst;
1233 fl6 = &inet->cork.fl.u.ip6;
1234 opt = np->cork.opt;
1235 transhdrlen = 0;
1236 exthdrlen = 0;
1237 mtu = cork->fragsize;
1238 }
1239
1240 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1241
1242 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1243 (opt ? opt->opt_nflen : 0);
1244 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1245
1246 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1247 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1248 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1249 return -EMSGSIZE;
1250 }
1251 }
1252
1253 /* For UDP, check if TX timestamp is enabled */
1254 if (sk->sk_type == SOCK_DGRAM) {
1255 err = sock_tx_timestamp(sk, &tx_flags);
1256 if (err)
1257 goto error;
1258 }
1259
1260 /*
1261 * Let's try using as much space as possible.
1262 * Use MTU if total length of the message fits into the MTU.
1263 * Otherwise, we need to reserve fragment header and
1264 * fragment alignment (= 8-15 octects, in total).
1265 *
1266 * Note that we may need to "move" the data from the tail of
1267 * of the buffer to the new fragment when we split
1268 * the message.
1269 *
1270 * FIXME: It may be fragmented into multiple chunks
1271 * at once if non-fragmentable extension headers
1272 * are too large.
1273 * --yoshfuji
1274 */
1275
1276 cork->length += length;
1277 if (length > mtu) {
1278 int proto = sk->sk_protocol;
1279 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1280 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1281 return -EMSGSIZE;
1282 }
1283
1284 if (proto == IPPROTO_UDP &&
1285 (rt->dst.dev->features & NETIF_F_UFO)) {
1286
1287 err = ip6_ufo_append_data(sk, getfrag, from, length,
1288 hh_len, fragheaderlen,
1289 transhdrlen, mtu, flags);
1290 if (err)
1291 goto error;
1292 return 0;
1293 }
1294 }
1295
1296 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1297 goto alloc_new_skb;
1298
1299 while (length > 0) {
1300 /* Check if the remaining data fits into current packet. */
1301 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1302 if (copy < length)
1303 copy = maxfraglen - skb->len;
1304
1305 if (copy <= 0) {
1306 char *data;
1307 unsigned int datalen;
1308 unsigned int fraglen;
1309 unsigned int fraggap;
1310 unsigned int alloclen;
1311 struct sk_buff *skb_prev;
1312 alloc_new_skb:
1313 skb_prev = skb;
1314
1315 /* There's no room in the current skb */
1316 if (skb_prev)
1317 fraggap = skb_prev->len - maxfraglen;
1318 else
1319 fraggap = 0;
1320
1321 /*
1322 * If remaining data exceeds the mtu,
1323 * we know we need more fragment(s).
1324 */
1325 datalen = length + fraggap;
1326 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1327 datalen = maxfraglen - fragheaderlen;
1328
1329 fraglen = datalen + fragheaderlen;
1330 if ((flags & MSG_MORE) &&
1331 !(rt->dst.dev->features&NETIF_F_SG))
1332 alloclen = mtu;
1333 else
1334 alloclen = datalen + fragheaderlen;
1335
1336 /*
1337 * The last fragment gets additional space at tail.
1338 * Note: we overallocate on fragments with MSG_MODE
1339 * because we have no idea if we're the last one.
1340 */
1341 if (datalen == length + fraggap)
1342 alloclen += rt->dst.trailer_len;
1343
1344 /*
1345 * We just reserve space for fragment header.
1346 * Note: this may be overallocation if the message
1347 * (without MSG_MORE) fits into the MTU.
1348 */
1349 alloclen += sizeof(struct frag_hdr);
1350
1351 if (transhdrlen) {
1352 skb = sock_alloc_send_skb(sk,
1353 alloclen + hh_len,
1354 (flags & MSG_DONTWAIT), &err);
1355 } else {
1356 skb = NULL;
1357 if (atomic_read(&sk->sk_wmem_alloc) <=
1358 2 * sk->sk_sndbuf)
1359 skb = sock_wmalloc(sk,
1360 alloclen + hh_len, 1,
1361 sk->sk_allocation);
1362 if (unlikely(skb == NULL))
1363 err = -ENOBUFS;
1364 else {
1365 /* Only the initial fragment
1366 * is time stamped.
1367 */
1368 tx_flags = 0;
1369 }
1370 }
1371 if (skb == NULL)
1372 goto error;
1373 /*
1374 * Fill in the control structures
1375 */
1376 skb->ip_summed = csummode;
1377 skb->csum = 0;
1378 /* reserve for fragmentation */
1379 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1380
1381 if (sk->sk_type == SOCK_DGRAM)
1382 skb_shinfo(skb)->tx_flags = tx_flags;
1383
1384 /*
1385 * Find where to start putting bytes
1386 */
1387 data = skb_put(skb, fraglen);
1388 skb_set_network_header(skb, exthdrlen);
1389 data += fragheaderlen;
1390 skb->transport_header = (skb->network_header +
1391 fragheaderlen);
1392 if (fraggap) {
1393 skb->csum = skb_copy_and_csum_bits(
1394 skb_prev, maxfraglen,
1395 data + transhdrlen, fraggap, 0);
1396 skb_prev->csum = csum_sub(skb_prev->csum,
1397 skb->csum);
1398 data += fraggap;
1399 pskb_trim_unique(skb_prev, maxfraglen);
1400 }
1401 copy = datalen - transhdrlen - fraggap;
1402 if (copy < 0) {
1403 err = -EINVAL;
1404 kfree_skb(skb);
1405 goto error;
1406 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1407 err = -EFAULT;
1408 kfree_skb(skb);
1409 goto error;
1410 }
1411
1412 offset += copy;
1413 length -= datalen - fraggap;
1414 transhdrlen = 0;
1415 exthdrlen = 0;
1416 csummode = CHECKSUM_NONE;
1417
1418 /*
1419 * Put the packet on the pending queue
1420 */
1421 __skb_queue_tail(&sk->sk_write_queue, skb);
1422 continue;
1423 }
1424
1425 if (copy > length)
1426 copy = length;
1427
1428 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1429 unsigned int off;
1430
1431 off = skb->len;
1432 if (getfrag(from, skb_put(skb, copy),
1433 offset, copy, off, skb) < 0) {
1434 __skb_trim(skb, off);
1435 err = -EFAULT;
1436 goto error;
1437 }
1438 } else {
1439 int i = skb_shinfo(skb)->nr_frags;
1440 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1441 struct page *page = sk->sk_sndmsg_page;
1442 int off = sk->sk_sndmsg_off;
1443 unsigned int left;
1444
1445 if (page && (left = PAGE_SIZE - off) > 0) {
1446 if (copy >= left)
1447 copy = left;
1448 if (page != frag->page) {
1449 if (i == MAX_SKB_FRAGS) {
1450 err = -EMSGSIZE;
1451 goto error;
1452 }
1453 get_page(page);
1454 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1455 frag = &skb_shinfo(skb)->frags[i];
1456 }
1457 } else if(i < MAX_SKB_FRAGS) {
1458 if (copy > PAGE_SIZE)
1459 copy = PAGE_SIZE;
1460 page = alloc_pages(sk->sk_allocation, 0);
1461 if (page == NULL) {
1462 err = -ENOMEM;
1463 goto error;
1464 }
1465 sk->sk_sndmsg_page = page;
1466 sk->sk_sndmsg_off = 0;
1467
1468 skb_fill_page_desc(skb, i, page, 0, 0);
1469 frag = &skb_shinfo(skb)->frags[i];
1470 } else {
1471 err = -EMSGSIZE;
1472 goto error;
1473 }
1474 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1475 err = -EFAULT;
1476 goto error;
1477 }
1478 sk->sk_sndmsg_off += copy;
1479 frag->size += copy;
1480 skb->len += copy;
1481 skb->data_len += copy;
1482 skb->truesize += copy;
1483 atomic_add(copy, &sk->sk_wmem_alloc);
1484 }
1485 offset += copy;
1486 length -= copy;
1487 }
1488 return 0;
1489 error:
1490 cork->length -= length;
1491 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1492 return err;
1493 }
1494
1495 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1496 {
1497 if (np->cork.opt) {
1498 kfree(np->cork.opt->dst0opt);
1499 kfree(np->cork.opt->dst1opt);
1500 kfree(np->cork.opt->hopopt);
1501 kfree(np->cork.opt->srcrt);
1502 kfree(np->cork.opt);
1503 np->cork.opt = NULL;
1504 }
1505
1506 if (inet->cork.base.dst) {
1507 dst_release(inet->cork.base.dst);
1508 inet->cork.base.dst = NULL;
1509 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1510 }
1511 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1512 }
1513
1514 int ip6_push_pending_frames(struct sock *sk)
1515 {
1516 struct sk_buff *skb, *tmp_skb;
1517 struct sk_buff **tail_skb;
1518 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1519 struct inet_sock *inet = inet_sk(sk);
1520 struct ipv6_pinfo *np = inet6_sk(sk);
1521 struct net *net = sock_net(sk);
1522 struct ipv6hdr *hdr;
1523 struct ipv6_txoptions *opt = np->cork.opt;
1524 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1525 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1526 unsigned char proto = fl6->flowi6_proto;
1527 int err = 0;
1528
1529 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1530 goto out;
1531 tail_skb = &(skb_shinfo(skb)->frag_list);
1532
1533 /* move skb->data to ip header from ext header */
1534 if (skb->data < skb_network_header(skb))
1535 __skb_pull(skb, skb_network_offset(skb));
1536 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1537 __skb_pull(tmp_skb, skb_network_header_len(skb));
1538 *tail_skb = tmp_skb;
1539 tail_skb = &(tmp_skb->next);
1540 skb->len += tmp_skb->len;
1541 skb->data_len += tmp_skb->len;
1542 skb->truesize += tmp_skb->truesize;
1543 tmp_skb->destructor = NULL;
1544 tmp_skb->sk = NULL;
1545 }
1546
1547 /* Allow local fragmentation. */
1548 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1549 skb->local_df = 1;
1550
1551 ipv6_addr_copy(final_dst, &fl6->daddr);
1552 __skb_pull(skb, skb_network_header_len(skb));
1553 if (opt && opt->opt_flen)
1554 ipv6_push_frag_opts(skb, opt, &proto);
1555 if (opt && opt->opt_nflen)
1556 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1557
1558 skb_push(skb, sizeof(struct ipv6hdr));
1559 skb_reset_network_header(skb);
1560 hdr = ipv6_hdr(skb);
1561
1562 *(__be32*)hdr = fl6->flowlabel |
1563 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1564
1565 hdr->hop_limit = np->cork.hop_limit;
1566 hdr->nexthdr = proto;
1567 ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
1568 ipv6_addr_copy(&hdr->daddr, final_dst);
1569
1570 skb->priority = sk->sk_priority;
1571 skb->mark = sk->sk_mark;
1572
1573 skb_dst_set(skb, dst_clone(&rt->dst));
1574 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1575 if (proto == IPPROTO_ICMPV6) {
1576 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1577
1578 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1579 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1580 }
1581
1582 err = ip6_local_out(skb);
1583 if (err) {
1584 if (err > 0)
1585 err = net_xmit_errno(err);
1586 if (err)
1587 goto error;
1588 }
1589
1590 out:
1591 ip6_cork_release(inet, np);
1592 return err;
1593 error:
1594 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1595 goto out;
1596 }
1597
1598 void ip6_flush_pending_frames(struct sock *sk)
1599 {
1600 struct sk_buff *skb;
1601
1602 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1603 if (skb_dst(skb))
1604 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1605 IPSTATS_MIB_OUTDISCARDS);
1606 kfree_skb(skb);
1607 }
1608
1609 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1610 }