pcnet32: Remove redundant set of skb->dev
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
1da177e4
LT
40
41#include <linux/netfilter.h>
42#include <linux/netfilter_ipv6.h>
43
44#include <net/sock.h>
45#include <net/snmp.h>
46
47#include <net/ipv6.h>
48#include <net/ndisc.h>
49#include <net/protocol.h>
50#include <net/ip6_route.h>
51#include <net/addrconf.h>
52#include <net/rawv6.h>
53#include <net/icmp.h>
54#include <net/xfrm.h>
55#include <net/checksum.h>
7bc570c8 56#include <linux/mroute6.h>
1da177e4
LT
57
58static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
59
60static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
61{
62 static u32 ipv6_fragmentation_id = 1;
63 static DEFINE_SPINLOCK(ip6_id_lock);
64
65 spin_lock_bh(&ip6_id_lock);
66 fhdr->identification = htonl(ipv6_fragmentation_id);
67 if (++ipv6_fragmentation_id == 0)
68 ipv6_fragmentation_id = 1;
69 spin_unlock_bh(&ip6_id_lock);
70}
71
ef76bc23
HX
72int __ip6_local_out(struct sk_buff *skb)
73{
74 int len;
75
76 len = skb->len - sizeof(struct ipv6hdr);
77 if (len > IPV6_MAXPLEN)
78 len = 0;
79 ipv6_hdr(skb)->payload_len = htons(len);
80
6e23ae2a 81 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
ef76bc23
HX
82 dst_output);
83}
84
85int ip6_local_out(struct sk_buff *skb)
86{
87 int err;
88
89 err = __ip6_local_out(skb);
90 if (likely(err == 1))
91 err = dst_output(skb);
92
93 return err;
94}
95EXPORT_SYMBOL_GPL(ip6_local_out);
96
ad643a79 97static int ip6_output_finish(struct sk_buff *skb)
1da177e4 98{
1da177e4 99 struct dst_entry *dst = skb->dst;
1da177e4 100
3644f0ce
SH
101 if (dst->hh)
102 return neigh_hh_output(dst->hh, skb);
103 else if (dst->neighbour)
1da177e4
LT
104 return dst->neighbour->output(skb);
105
483a47d2
DL
106 IP6_INC_STATS_BH(dev_net(dst->dev),
107 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
108 kfree_skb(skb);
109 return -EINVAL;
110
111}
112
113/* dev_loopback_xmit for use with netfilter. */
114static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
115{
459a98ed 116 skb_reset_mac_header(newskb);
bbe735e4 117 __skb_pull(newskb, skb_network_offset(newskb));
1da177e4
LT
118 newskb->pkt_type = PACKET_LOOPBACK;
119 newskb->ip_summed = CHECKSUM_UNNECESSARY;
547b792c 120 WARN_ON(!newskb->dst);
1da177e4
LT
121
122 netif_rx(newskb);
123 return 0;
124}
125
126
127static int ip6_output2(struct sk_buff *skb)
128{
129 struct dst_entry *dst = skb->dst;
130 struct net_device *dev = dst->dev;
131
132 skb->protocol = htons(ETH_P_IPV6);
133 skb->dev = dev;
134
0660e03f 135 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
1da177e4 136 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
a11d206d 137 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1da177e4
LT
138
139 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
bd91b8bf
BT
140 ((mroute6_socket(dev_net(dev)) &&
141 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
7bc570c8
YH
142 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
143 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
144 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
145
146 /* Do not check for IFF_ALLMULTI; multicast routing
147 is not supported in any case.
148 */
149 if (newskb)
6e23ae2a
PM
150 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
151 NULL, newskb->dev,
1da177e4
LT
152 ip6_dev_loopback_xmit);
153
0660e03f 154 if (ipv6_hdr(skb)->hop_limit == 0) {
3bd653c8
DL
155 IP6_INC_STATS(dev_net(dev), idev,
156 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
157 kfree_skb(skb);
158 return 0;
159 }
160 }
161
3bd653c8 162 IP6_INC_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCASTPKTS);
1da177e4
LT
163 }
164
6e23ae2a
PM
165 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
166 ip6_output_finish);
1da177e4
LT
167}
168
628a5c56
JH
169static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
170{
171 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
172
173 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
174 skb->dst->dev->mtu : dst_mtu(skb->dst);
175}
176
1da177e4
LT
177int ip6_output(struct sk_buff *skb)
178{
778d80be
YH
179 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
180 if (unlikely(idev->cnf.disable_ipv6)) {
3bd653c8
DL
181 IP6_INC_STATS(dev_net(skb->dst->dev), idev,
182 IPSTATS_MIB_OUTDISCARDS);
778d80be
YH
183 kfree_skb(skb);
184 return 0;
185 }
186
628a5c56 187 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
e89e9cf5 188 dst_allfrag(skb->dst))
1da177e4
LT
189 return ip6_fragment(skb, ip6_output2);
190 else
191 return ip6_output2(skb);
192}
193
1da177e4
LT
194/*
195 * xmit an sk_buff (used by TCP)
196 */
197
198int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
199 struct ipv6_txoptions *opt, int ipfragok)
200{
3bd653c8 201 struct net *net = sock_net(sk);
b30bd282 202 struct ipv6_pinfo *np = inet6_sk(sk);
1da177e4
LT
203 struct in6_addr *first_hop = &fl->fl6_dst;
204 struct dst_entry *dst = skb->dst;
205 struct ipv6hdr *hdr;
206 u8 proto = fl->proto;
207 int seg_len = skb->len;
41a1f8ea 208 int hlimit, tclass;
1da177e4
LT
209 u32 mtu;
210
211 if (opt) {
c2636b4d 212 unsigned int head_room;
1da177e4
LT
213
214 /* First: exthdrs may take lots of space (~8K for now)
215 MAX_HEADER is not enough.
216 */
217 head_room = opt->opt_nflen + opt->opt_flen;
218 seg_len += head_room;
219 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
220
221 if (skb_headroom(skb) < head_room) {
222 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d 223 if (skb2 == NULL) {
3bd653c8 224 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
a11d206d
YH
225 IPSTATS_MIB_OUTDISCARDS);
226 kfree_skb(skb);
1da177e4
LT
227 return -ENOBUFS;
228 }
a11d206d
YH
229 kfree_skb(skb);
230 skb = skb2;
1da177e4
LT
231 if (sk)
232 skb_set_owner_w(skb, sk);
233 }
234 if (opt->opt_flen)
235 ipv6_push_frag_opts(skb, opt, &proto);
236 if (opt->opt_nflen)
237 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
238 }
239
e2d1bca7
ACM
240 skb_push(skb, sizeof(struct ipv6hdr));
241 skb_reset_network_header(skb);
0660e03f 242 hdr = ipv6_hdr(skb);
1da177e4 243
77e2f14f
WY
244 /* Allow local fragmentation. */
245 if (ipfragok)
246 skb->local_df = 1;
247
1da177e4
LT
248 /*
249 * Fill in the IPv6 header
250 */
251
1da177e4
LT
252 hlimit = -1;
253 if (np)
254 hlimit = np->hop_limit;
255 if (hlimit < 0)
6b75d090 256 hlimit = ip6_dst_hoplimit(dst);
1da177e4 257
41a1f8ea
YH
258 tclass = -1;
259 if (np)
260 tclass = np->tclass;
261 if (tclass < 0)
262 tclass = 0;
263
90bcaf7b 264 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
41a1f8ea 265
1da177e4
LT
266 hdr->payload_len = htons(seg_len);
267 hdr->nexthdr = proto;
268 hdr->hop_limit = hlimit;
269
270 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
271 ipv6_addr_copy(&hdr->daddr, first_hop);
272
a2c2064f 273 skb->priority = sk->sk_priority;
4a19ec58 274 skb->mark = sk->sk_mark;
a2c2064f 275
1da177e4 276 mtu = dst_mtu(dst);
283d07ac 277 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
3bd653c8 278 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
a11d206d 279 IPSTATS_MIB_OUTREQUESTS);
6e23ae2a 280 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
6869c4d8 281 dst_output);
1da177e4
LT
282 }
283
284 if (net_ratelimit())
285 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
286 skb->dev = dst->dev;
287 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
3bd653c8 288 IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
289 kfree_skb(skb);
290 return -EMSGSIZE;
291}
292
7159039a
YH
293EXPORT_SYMBOL(ip6_xmit);
294
1da177e4
LT
295/*
296 * To avoid extra problems ND packets are send through this
297 * routine. It's code duplication but I really want to avoid
298 * extra checks since ipv6_build_header is used by TCP (which
299 * is for us performance critical)
300 */
301
302int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
9acd9f3a 303 const struct in6_addr *saddr, const struct in6_addr *daddr,
1da177e4
LT
304 int proto, int len)
305{
306 struct ipv6_pinfo *np = inet6_sk(sk);
307 struct ipv6hdr *hdr;
308 int totlen;
309
310 skb->protocol = htons(ETH_P_IPV6);
311 skb->dev = dev;
312
313 totlen = len + sizeof(struct ipv6hdr);
314
55f79cc0
ACM
315 skb_reset_network_header(skb);
316 skb_put(skb, sizeof(struct ipv6hdr));
0660e03f 317 hdr = ipv6_hdr(skb);
1da177e4 318
ae08e1f0 319 *(__be32*)hdr = htonl(0x60000000);
1da177e4
LT
320
321 hdr->payload_len = htons(len);
322 hdr->nexthdr = proto;
323 hdr->hop_limit = np->hop_limit;
324
325 ipv6_addr_copy(&hdr->saddr, saddr);
326 ipv6_addr_copy(&hdr->daddr, daddr);
327
328 return 0;
329}
330
331static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
332{
333 struct ip6_ra_chain *ra;
334 struct sock *last = NULL;
335
336 read_lock(&ip6_ra_lock);
337 for (ra = ip6_ra_chain; ra; ra = ra->next) {
338 struct sock *sk = ra->sk;
0bd1b59b
AM
339 if (sk && ra->sel == sel &&
340 (!sk->sk_bound_dev_if ||
341 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
342 if (last) {
343 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
344 if (skb2)
345 rawv6_rcv(last, skb2);
346 }
347 last = sk;
348 }
349 }
350
351 if (last) {
352 rawv6_rcv(last, skb);
353 read_unlock(&ip6_ra_lock);
354 return 1;
355 }
356 read_unlock(&ip6_ra_lock);
357 return 0;
358}
359
e21e0b5f
VN
360static int ip6_forward_proxy_check(struct sk_buff *skb)
361{
0660e03f 362 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f
VN
363 u8 nexthdr = hdr->nexthdr;
364 int offset;
365
366 if (ipv6_ext_hdr(nexthdr)) {
367 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
368 if (offset < 0)
369 return 0;
370 } else
371 offset = sizeof(struct ipv6hdr);
372
373 if (nexthdr == IPPROTO_ICMPV6) {
374 struct icmp6hdr *icmp6;
375
d56f90a7
ACM
376 if (!pskb_may_pull(skb, (skb_network_header(skb) +
377 offset + 1 - skb->data)))
e21e0b5f
VN
378 return 0;
379
d56f90a7 380 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
381
382 switch (icmp6->icmp6_type) {
383 case NDISC_ROUTER_SOLICITATION:
384 case NDISC_ROUTER_ADVERTISEMENT:
385 case NDISC_NEIGHBOUR_SOLICITATION:
386 case NDISC_NEIGHBOUR_ADVERTISEMENT:
387 case NDISC_REDIRECT:
388 /* For reaction involving unicast neighbor discovery
389 * message destined to the proxied address, pass it to
390 * input function.
391 */
392 return 1;
393 default:
394 break;
395 }
396 }
397
74553b09
VN
398 /*
399 * The proxying router can't forward traffic sent to a link-local
400 * address, so signal the sender and discard the packet. This
401 * behavior is clarified by the MIPv6 specification.
402 */
403 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
404 dst_link_failure(skb);
405 return -1;
406 }
407
e21e0b5f
VN
408 return 0;
409}
410
1da177e4
LT
411static inline int ip6_forward_finish(struct sk_buff *skb)
412{
413 return dst_output(skb);
414}
415
416int ip6_forward(struct sk_buff *skb)
417{
418 struct dst_entry *dst = skb->dst;
0660e03f 419 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 420 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 421 struct net *net = dev_net(dst->dev);
1ab1457c 422
53b7997f 423 if (net->ipv6.devconf_all->forwarding == 0)
1da177e4
LT
424 goto error;
425
4497b076
BH
426 if (skb_warn_if_lro(skb))
427 goto drop;
428
1da177e4 429 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
3bd653c8 430 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
431 goto drop;
432 }
433
35fc92a9 434 skb_forward_csum(skb);
1da177e4
LT
435
436 /*
437 * We DO NOT make any processing on
438 * RA packets, pushing them to user level AS IS
439 * without ane WARRANTY that application will be able
440 * to interpret them. The reason is that we
441 * cannot make anything clever here.
442 *
443 * We are not end-node, so that if packet contains
444 * AH/ESP, we cannot make anything.
445 * Defragmentation also would be mistake, RA packets
446 * cannot be fragmented, because there is no warranty
447 * that different fragments will go along one path. --ANK
448 */
449 if (opt->ra) {
d56f90a7 450 u8 *ptr = skb_network_header(skb) + opt->ra;
1da177e4
LT
451 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
452 return 0;
453 }
454
455 /*
456 * check and decrement ttl
457 */
458 if (hdr->hop_limit <= 1) {
459 /* Force OUTPUT device used as source address */
460 skb->dev = dst->dev;
461 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
462 0, skb->dev);
483a47d2
DL
463 IP6_INC_STATS_BH(net,
464 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
465
466 kfree_skb(skb);
467 return -ETIMEDOUT;
468 }
469
fbea49e1 470 /* XXX: idev->cnf.proxy_ndp? */
53b7997f 471 if (net->ipv6.devconf_all->proxy_ndp &&
8a3edd80 472 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
473 int proxied = ip6_forward_proxy_check(skb);
474 if (proxied > 0)
e21e0b5f 475 return ip6_input(skb);
74553b09 476 else if (proxied < 0) {
3bd653c8
DL
477 IP6_INC_STATS(net, ip6_dst_idev(dst),
478 IPSTATS_MIB_INDISCARDS);
74553b09
VN
479 goto drop;
480 }
e21e0b5f
VN
481 }
482
1da177e4 483 if (!xfrm6_route_forward(skb)) {
3bd653c8 484 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
485 goto drop;
486 }
487 dst = skb->dst;
488
489 /* IPv6 specs say nothing about it, but it is clear that we cannot
490 send redirects to source routed frames.
1e5dc146 491 We don't send redirects to frames decapsulated from IPsec.
1da177e4 492 */
1e5dc146 493 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
def8b4fa 494 !skb_sec_path(skb)) {
1da177e4
LT
495 struct in6_addr *target = NULL;
496 struct rt6_info *rt;
497 struct neighbour *n = dst->neighbour;
498
499 /*
500 * incoming and outgoing devices are the same
501 * send a redirect.
502 */
503
504 rt = (struct rt6_info *) dst;
505 if ((rt->rt6i_flags & RTF_GATEWAY))
506 target = (struct in6_addr*)&n->primary_key;
507 else
508 target = &hdr->daddr;
509
510 /* Limit redirects both by destination (here)
511 and by source (inside ndisc_send_redirect)
512 */
513 if (xrlim_allow(dst, 1*HZ))
514 ndisc_send_redirect(skb, n, target);
5bb1ab09
DS
515 } else {
516 int addrtype = ipv6_addr_type(&hdr->saddr);
517
1da177e4 518 /* This check is security critical. */
f81b2e7d
YH
519 if (addrtype == IPV6_ADDR_ANY ||
520 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
521 goto error;
522 if (addrtype & IPV6_ADDR_LINKLOCAL) {
523 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
524 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
525 goto error;
526 }
1da177e4
LT
527 }
528
529 if (skb->len > dst_mtu(dst)) {
530 /* Again, force OUTPUT device used as source address */
531 skb->dev = dst->dev;
532 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
483a47d2
DL
533 IP6_INC_STATS_BH(net,
534 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
535 IP6_INC_STATS_BH(net,
536 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
537 kfree_skb(skb);
538 return -EMSGSIZE;
539 }
540
541 if (skb_cow(skb, dst->dev->hard_header_len)) {
3bd653c8 542 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
543 goto drop;
544 }
545
0660e03f 546 hdr = ipv6_hdr(skb);
1da177e4
LT
547
548 /* Mangling hops number delayed to point after skb COW */
1ab1457c 549
1da177e4
LT
550 hdr->hop_limit--;
551
483a47d2 552 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
6e23ae2a
PM
553 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
554 ip6_forward_finish);
1da177e4
LT
555
556error:
483a47d2 557 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
558drop:
559 kfree_skb(skb);
560 return -EINVAL;
561}
562
563static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
564{
565 to->pkt_type = from->pkt_type;
566 to->priority = from->priority;
567 to->protocol = from->protocol;
1da177e4
LT
568 dst_release(to->dst);
569 to->dst = dst_clone(from->dst);
570 to->dev = from->dev;
82e91ffe 571 to->mark = from->mark;
1da177e4
LT
572
573#ifdef CONFIG_NET_SCHED
574 to->tc_index = from->tc_index;
575#endif
e7ac05f3 576 nf_copy(to, from);
ba9dda3a
JK
577#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
578 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
579 to->nf_trace = from->nf_trace;
580#endif
984bc16c 581 skb_copy_secmark(to, from);
1da177e4
LT
582}
583
584int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
585{
586 u16 offset = sizeof(struct ipv6hdr);
0660e03f
ACM
587 struct ipv6_opt_hdr *exthdr =
588 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
27a884dc 589 unsigned int packet_len = skb->tail - skb->network_header;
1da177e4 590 int found_rhdr = 0;
0660e03f 591 *nexthdr = &ipv6_hdr(skb)->nexthdr;
1da177e4
LT
592
593 while (offset + 1 <= packet_len) {
594
595 switch (**nexthdr) {
596
597 case NEXTHDR_HOP:
27637df9 598 break;
1da177e4 599 case NEXTHDR_ROUTING:
27637df9
MN
600 found_rhdr = 1;
601 break;
1da177e4 602 case NEXTHDR_DEST:
59fbb3a6 603#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
27637df9
MN
604 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
605 break;
606#endif
607 if (found_rhdr)
608 return offset;
1da177e4
LT
609 break;
610 default :
611 return offset;
612 }
27637df9
MN
613
614 offset += ipv6_optlen(exthdr);
615 *nexthdr = &exthdr->nexthdr;
d56f90a7
ACM
616 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
617 offset);
1da177e4
LT
618 }
619
620 return offset;
621}
622
623static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
624{
1da177e4
LT
625 struct sk_buff *frag;
626 struct rt6_info *rt = (struct rt6_info*)skb->dst;
d91675f9 627 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
628 struct ipv6hdr *tmp_hdr;
629 struct frag_hdr *fh;
630 unsigned int mtu, hlen, left, len;
ae08e1f0 631 __be32 frag_id = 0;
1da177e4
LT
632 int ptr, offset = 0, err=0;
633 u8 *prevhdr, nexthdr = 0;
3bd653c8 634 struct net *net = dev_net(skb->dst->dev);
1da177e4 635
1da177e4
LT
636 hlen = ip6_find_1stfragopt(skb, &prevhdr);
637 nexthdr = *prevhdr;
638
628a5c56 639 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
640
641 /* We must not fragment if the socket is set to force MTU discovery
642 * or if the skb it not generated by a local socket. (This last
643 * check should be redundant, but it's free.)
644 */
b5c15fc0 645 if (!skb->local_df) {
b881ef76
JH
646 skb->dev = skb->dst->dev;
647 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
3bd653c8
DL
648 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
649 IPSTATS_MIB_FRAGFAILS);
b881ef76
JH
650 kfree_skb(skb);
651 return -EMSGSIZE;
652 }
653
d91675f9
YH
654 if (np && np->frag_size < mtu) {
655 if (np->frag_size)
656 mtu = np->frag_size;
657 }
658 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4
LT
659
660 if (skb_shinfo(skb)->frag_list) {
661 int first_len = skb_pagelen(skb);
29ffe1a5 662 int truesizes = 0;
1da177e4
LT
663
664 if (first_len - hlen > mtu ||
665 ((first_len - hlen) & 7) ||
666 skb_cloned(skb))
667 goto slow_path;
668
669 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
670 /* Correct geometry. */
671 if (frag->len > mtu ||
672 ((frag->len & 7) && frag->next) ||
673 skb_headroom(frag) < hlen)
674 goto slow_path;
675
1da177e4
LT
676 /* Partially cloned skb? */
677 if (skb_shared(frag))
678 goto slow_path;
2fdba6b0
HX
679
680 BUG_ON(frag->sk);
681 if (skb->sk) {
682 sock_hold(skb->sk);
683 frag->sk = skb->sk;
684 frag->destructor = sock_wfree;
29ffe1a5 685 truesizes += frag->truesize;
2fdba6b0 686 }
1da177e4
LT
687 }
688
689 err = 0;
690 offset = 0;
691 frag = skb_shinfo(skb)->frag_list;
692 skb_shinfo(skb)->frag_list = NULL;
693 /* BUILD HEADER */
694
9a217a1c 695 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 696 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 697 if (!tmp_hdr) {
3bd653c8
DL
698 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
699 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
700 return -ENOMEM;
701 }
702
1da177e4
LT
703 __skb_pull(skb, hlen);
704 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
705 __skb_push(skb, hlen);
706 skb_reset_network_header(skb);
d56f90a7 707 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4
LT
708
709 ipv6_select_ident(skb, fh);
710 fh->nexthdr = nexthdr;
711 fh->reserved = 0;
712 fh->frag_off = htons(IP6_MF);
713 frag_id = fh->identification;
714
715 first_len = skb_pagelen(skb);
716 skb->data_len = first_len - skb_headlen(skb);
29ffe1a5 717 skb->truesize -= truesizes;
1da177e4 718 skb->len = first_len;
0660e03f
ACM
719 ipv6_hdr(skb)->payload_len = htons(first_len -
720 sizeof(struct ipv6hdr));
a11d206d
YH
721
722 dst_hold(&rt->u.dst);
1da177e4
LT
723
724 for (;;) {
725 /* Prepare header of the next frame,
726 * before previous one went down. */
727 if (frag) {
728 frag->ip_summed = CHECKSUM_NONE;
badff6d0 729 skb_reset_transport_header(frag);
1da177e4 730 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
731 __skb_push(frag, hlen);
732 skb_reset_network_header(frag);
d56f90a7
ACM
733 memcpy(skb_network_header(frag), tmp_hdr,
734 hlen);
1da177e4
LT
735 offset += skb->len - hlen - sizeof(struct frag_hdr);
736 fh->nexthdr = nexthdr;
737 fh->reserved = 0;
738 fh->frag_off = htons(offset);
739 if (frag->next != NULL)
740 fh->frag_off |= htons(IP6_MF);
741 fh->identification = frag_id;
0660e03f
ACM
742 ipv6_hdr(frag)->payload_len =
743 htons(frag->len -
744 sizeof(struct ipv6hdr));
1da177e4
LT
745 ip6_copy_metadata(frag, skb);
746 }
1ab1457c 747
1da177e4 748 err = output(skb);
dafee490 749 if(!err)
3bd653c8
DL
750 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
751 IPSTATS_MIB_FRAGCREATES);
dafee490 752
1da177e4
LT
753 if (err || !frag)
754 break;
755
756 skb = frag;
757 frag = skb->next;
758 skb->next = NULL;
759 }
760
a51482bd 761 kfree(tmp_hdr);
1da177e4
LT
762
763 if (err == 0) {
3bd653c8
DL
764 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
765 IPSTATS_MIB_FRAGOKS);
a11d206d 766 dst_release(&rt->u.dst);
1da177e4
LT
767 return 0;
768 }
769
770 while (frag) {
771 skb = frag->next;
772 kfree_skb(frag);
773 frag = skb;
774 }
775
3bd653c8
DL
776 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
777 IPSTATS_MIB_FRAGFAILS);
a11d206d 778 dst_release(&rt->u.dst);
1da177e4
LT
779 return err;
780 }
781
782slow_path:
783 left = skb->len - hlen; /* Space per frame */
784 ptr = hlen; /* Where to start from */
785
786 /*
787 * Fragment the datagram.
788 */
789
790 *prevhdr = NEXTHDR_FRAGMENT;
791
792 /*
793 * Keep copying data until we run out.
794 */
795 while(left > 0) {
796 len = left;
797 /* IF: it doesn't fit, use 'mtu' - the data space left */
798 if (len > mtu)
799 len = mtu;
800 /* IF: we are not sending upto and including the packet end
801 then align the next start on an eight byte boundary */
802 if (len < left) {
803 len &= ~7;
804 }
805 /*
806 * Allocate buffer.
807 */
808
f5184d26 809 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
64ce2073 810 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
3bd653c8 811 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
a11d206d 812 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
813 err = -ENOMEM;
814 goto fail;
815 }
816
817 /*
818 * Set up data on packet
819 */
820
821 ip6_copy_metadata(frag, skb);
822 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
823 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 824 skb_reset_network_header(frag);
badff6d0 825 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
826 frag->transport_header = (frag->network_header + hlen +
827 sizeof(struct frag_hdr));
1da177e4
LT
828
829 /*
830 * Charge the memory for the fragment to any owner
831 * it might possess
832 */
833 if (skb->sk)
834 skb_set_owner_w(frag, skb->sk);
835
836 /*
837 * Copy the packet header into the new buffer.
838 */
d626f62b 839 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
840
841 /*
842 * Build fragment header.
843 */
844 fh->nexthdr = nexthdr;
845 fh->reserved = 0;
f36d6ab1 846 if (!frag_id) {
1da177e4
LT
847 ipv6_select_ident(skb, fh);
848 frag_id = fh->identification;
849 } else
850 fh->identification = frag_id;
851
852 /*
853 * Copy a block of the IP datagram.
854 */
8984e41d 855 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
1da177e4
LT
856 BUG();
857 left -= len;
858
859 fh->frag_off = htons(offset);
860 if (left > 0)
861 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
862 ipv6_hdr(frag)->payload_len = htons(frag->len -
863 sizeof(struct ipv6hdr));
1da177e4
LT
864
865 ptr += len;
866 offset += len;
867
868 /*
869 * Put this fragment into the sending queue.
870 */
1da177e4
LT
871 err = output(frag);
872 if (err)
873 goto fail;
dafee490 874
3bd653c8
DL
875 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
876 IPSTATS_MIB_FRAGCREATES);
1da177e4 877 }
3bd653c8 878 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
a11d206d 879 IPSTATS_MIB_FRAGOKS);
1da177e4 880 kfree_skb(skb);
1da177e4
LT
881 return err;
882
883fail:
3bd653c8 884 IP6_INC_STATS(net, ip6_dst_idev(skb->dst),
a11d206d 885 IPSTATS_MIB_FRAGFAILS);
1ab1457c 886 kfree_skb(skb);
1da177e4
LT
887 return err;
888}
889
cf6b1982
YH
890static inline int ip6_rt_check(struct rt6key *rt_key,
891 struct in6_addr *fl_addr,
892 struct in6_addr *addr_cache)
893{
894 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
895 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
896}
897
497c615a
HX
898static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
899 struct dst_entry *dst,
900 struct flowi *fl)
1da177e4 901{
497c615a
HX
902 struct ipv6_pinfo *np = inet6_sk(sk);
903 struct rt6_info *rt = (struct rt6_info *)dst;
1da177e4 904
497c615a
HX
905 if (!dst)
906 goto out;
907
908 /* Yes, checking route validity in not connected
909 * case is not very simple. Take into account,
910 * that we do not support routing by source, TOS,
911 * and MSG_DONTROUTE --ANK (980726)
912 *
cf6b1982
YH
913 * 1. ip6_rt_check(): If route was host route,
914 * check that cached destination is current.
497c615a
HX
915 * If it is network route, we still may
916 * check its validity using saved pointer
917 * to the last used address: daddr_cache.
918 * We do not want to save whole address now,
919 * (because main consumer of this service
920 * is tcp, which has not this problem),
921 * so that the last trick works only on connected
922 * sockets.
923 * 2. oif also should be the same.
924 */
cf6b1982 925 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
8e1ef0a9
YH
926#ifdef CONFIG_IPV6_SUBTREES
927 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
928#endif
cf6b1982 929 (fl->oif && fl->oif != dst->dev->ifindex)) {
497c615a
HX
930 dst_release(dst);
931 dst = NULL;
1da177e4
LT
932 }
933
497c615a
HX
934out:
935 return dst;
936}
937
938static int ip6_dst_lookup_tail(struct sock *sk,
939 struct dst_entry **dst, struct flowi *fl)
940{
941 int err;
3b1e0a65 942 struct net *net = sock_net(sk);
497c615a 943
1da177e4 944 if (*dst == NULL)
8a3edd80 945 *dst = ip6_route_output(net, sk, fl);
1da177e4
LT
946
947 if ((err = (*dst)->error))
948 goto out_err_release;
949
950 if (ipv6_addr_any(&fl->fl6_src)) {
191cd582 951 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
7cbca67c
YH
952 &fl->fl6_dst,
953 sk ? inet6_sk(sk)->srcprefs : 0,
954 &fl->fl6_src);
44456d37 955 if (err)
1da177e4 956 goto out_err_release;
1da177e4
LT
957 }
958
95c385b4 959#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
e550dfb0
NH
960 /*
961 * Here if the dst entry we've looked up
962 * has a neighbour entry that is in the INCOMPLETE
963 * state and the src address from the flow is
964 * marked as OPTIMISTIC, we release the found
965 * dst entry and replace it instead with the
966 * dst entry of the nexthop router
967 */
968 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
969 struct inet6_ifaddr *ifp;
970 struct flowi fl_gw;
971 int redirect;
972
973 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
974 (*dst)->dev, 1);
975
976 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
977 if (ifp)
978 in6_ifa_put(ifp);
979
980 if (redirect) {
981 /*
982 * We need to get the dst entry for the
983 * default router instead
984 */
985 dst_release(*dst);
986 memcpy(&fl_gw, fl, sizeof(struct flowi));
987 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
988 *dst = ip6_route_output(net, sk, &fl_gw);
989 if ((err = (*dst)->error))
990 goto out_err_release;
95c385b4 991 }
e550dfb0 992 }
95c385b4
NH
993#endif
994
1da177e4
LT
995 return 0;
996
997out_err_release:
ca46f9c8 998 if (err == -ENETUNREACH)
483a47d2 999 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1000 dst_release(*dst);
1001 *dst = NULL;
1002 return err;
1003}
34a0b3cd 1004
497c615a
HX
1005/**
1006 * ip6_dst_lookup - perform route lookup on flow
1007 * @sk: socket which provides route info
1008 * @dst: pointer to dst_entry * for result
1009 * @fl: flow to lookup
1010 *
1011 * This function performs a route lookup on the given flow.
1012 *
1013 * It returns zero on success, or a standard errno code on error.
1014 */
1015int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1016{
1017 *dst = NULL;
1018 return ip6_dst_lookup_tail(sk, dst, fl);
1019}
3cf3dc6c
ACM
1020EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1021
497c615a
HX
1022/**
1023 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1024 * @sk: socket which provides the dst cache and route info
1025 * @dst: pointer to dst_entry * for result
1026 * @fl: flow to lookup
1027 *
1028 * This function performs a route lookup on the given flow with the
1029 * possibility of using the cached route in the socket if it is valid.
1030 * It will take the socket dst lock when operating on the dst cache.
1031 * As a result, this function can only be used in process context.
1032 *
1033 * It returns zero on success, or a standard errno code on error.
1034 */
1035int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1036{
1037 *dst = NULL;
1038 if (sk) {
1039 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1040 *dst = ip6_sk_dst_check(sk, *dst, fl);
1041 }
1042
1043 return ip6_dst_lookup_tail(sk, dst, fl);
1044}
1045EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1046
34a0b3cd 1047static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1048 int getfrag(void *from, char *to, int offset, int len,
1049 int odd, struct sk_buff *skb),
1050 void *from, int length, int hh_len, int fragheaderlen,
1051 int transhdrlen, int mtu,unsigned int flags)
1052
1053{
1054 struct sk_buff *skb;
1055 int err;
1056
1057 /* There is support for UDP large send offload by network
1058 * device, so create one single skb packet containing complete
1059 * udp datagram
1060 */
1061 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1062 skb = sock_alloc_send_skb(sk,
1063 hh_len + fragheaderlen + transhdrlen + 20,
1064 (flags & MSG_DONTWAIT), &err);
1065 if (skb == NULL)
1066 return -ENOMEM;
1067
1068 /* reserve space for Hardware header */
1069 skb_reserve(skb, hh_len);
1070
1071 /* create space for UDP/IP header */
1072 skb_put(skb,fragheaderlen + transhdrlen);
1073
1074 /* initialize network header pointer */
c1d2bbe1 1075 skb_reset_network_header(skb);
e89e9cf5
AR
1076
1077 /* initialize protocol header pointer */
b0e380b1 1078 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1079
84fa7933 1080 skb->ip_summed = CHECKSUM_PARTIAL;
e89e9cf5
AR
1081 skb->csum = 0;
1082 sk->sk_sndmsg_off = 0;
1083 }
1084
1085 err = skb_append_datato_frags(sk,skb, getfrag, from,
1086 (length - transhdrlen));
1087 if (!err) {
1088 struct frag_hdr fhdr;
1089
1090 /* specify the length of each IP datagram fragment*/
1ab1457c 1091 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
7967168c 1092 sizeof(struct frag_hdr);
f83ef8c0 1093 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
e89e9cf5
AR
1094 ipv6_select_ident(skb, &fhdr);
1095 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1096 __skb_queue_tail(&sk->sk_write_queue, skb);
1097
1098 return 0;
1099 }
1100 /* There is not enough support do UPD LSO,
1101 * so follow normal path
1102 */
1103 kfree_skb(skb);
1104
1105 return err;
1106}
1da177e4 1107
0178b695
HX
1108static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1109 gfp_t gfp)
1110{
1111 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1112}
1113
1114static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1115 gfp_t gfp)
1116{
1117 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1118}
1119
41a1f8ea
YH
1120int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1121 int offset, int len, int odd, struct sk_buff *skb),
1122 void *from, int length, int transhdrlen,
1123 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1124 struct rt6_info *rt, unsigned int flags)
1da177e4
LT
1125{
1126 struct inet_sock *inet = inet_sk(sk);
1127 struct ipv6_pinfo *np = inet6_sk(sk);
1128 struct sk_buff *skb;
1129 unsigned int maxfraglen, fragheaderlen;
1130 int exthdrlen;
1131 int hh_len;
1132 int mtu;
1133 int copy;
1134 int err;
1135 int offset = 0;
1136 int csummode = CHECKSUM_NONE;
1137
1138 if (flags&MSG_PROBE)
1139 return 0;
1140 if (skb_queue_empty(&sk->sk_write_queue)) {
1141 /*
1142 * setup for corking
1143 */
1144 if (opt) {
0178b695 1145 if (WARN_ON(np->cork.opt))
1da177e4 1146 return -EINVAL;
0178b695
HX
1147
1148 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1149 if (unlikely(np->cork.opt == NULL))
1150 return -ENOBUFS;
1151
1152 np->cork.opt->tot_len = opt->tot_len;
1153 np->cork.opt->opt_flen = opt->opt_flen;
1154 np->cork.opt->opt_nflen = opt->opt_nflen;
1155
1156 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1157 sk->sk_allocation);
1158 if (opt->dst0opt && !np->cork.opt->dst0opt)
1159 return -ENOBUFS;
1160
1161 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1162 sk->sk_allocation);
1163 if (opt->dst1opt && !np->cork.opt->dst1opt)
1164 return -ENOBUFS;
1165
1166 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1167 sk->sk_allocation);
1168 if (opt->hopopt && !np->cork.opt->hopopt)
1169 return -ENOBUFS;
1170
1171 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1172 sk->sk_allocation);
1173 if (opt->srcrt && !np->cork.opt->srcrt)
1174 return -ENOBUFS;
1175
1da177e4
LT
1176 /* need source address above miyazawa*/
1177 }
1178 dst_hold(&rt->u.dst);
c8cdaf99 1179 inet->cork.dst = &rt->u.dst;
1da177e4
LT
1180 inet->cork.fl = *fl;
1181 np->cork.hop_limit = hlimit;
41a1f8ea 1182 np->cork.tclass = tclass;
628a5c56
JH
1183 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1184 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
c7503609 1185 if (np->frag_size < mtu) {
d91675f9
YH
1186 if (np->frag_size)
1187 mtu = np->frag_size;
1188 }
1189 inet->cork.fragsize = mtu;
1da177e4
LT
1190 if (dst_allfrag(rt->u.dst.path))
1191 inet->cork.flags |= IPCORK_ALLFRAG;
1192 inet->cork.length = 0;
1193 sk->sk_sndmsg_page = NULL;
1194 sk->sk_sndmsg_off = 0;
01488942 1195 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
a1b05140 1196 rt->rt6i_nfheader_len;
1da177e4
LT
1197 length += exthdrlen;
1198 transhdrlen += exthdrlen;
1199 } else {
c8cdaf99 1200 rt = (struct rt6_info *)inet->cork.dst;
1da177e4 1201 fl = &inet->cork.fl;
0178b695 1202 opt = np->cork.opt;
1da177e4
LT
1203 transhdrlen = 0;
1204 exthdrlen = 0;
1205 mtu = inet->cork.fragsize;
1206 }
1207
1208 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1209
a1b05140 1210 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1211 (opt ? opt->opt_nflen : 0);
1da177e4
LT
1212 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1213
1214 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1215 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1216 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1217 return -EMSGSIZE;
1218 }
1219 }
1220
1221 /*
1222 * Let's try using as much space as possible.
1223 * Use MTU if total length of the message fits into the MTU.
1224 * Otherwise, we need to reserve fragment header and
1225 * fragment alignment (= 8-15 octects, in total).
1226 *
1227 * Note that we may need to "move" the data from the tail of
1ab1457c 1228 * of the buffer to the new fragment when we split
1da177e4
LT
1229 * the message.
1230 *
1ab1457c 1231 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1232 * at once if non-fragmentable extension headers
1233 * are too large.
1ab1457c 1234 * --yoshfuji
1da177e4
LT
1235 */
1236
1237 inet->cork.length += length;
e89e9cf5
AR
1238 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1239 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1240
baa829d8
PM
1241 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1242 fragheaderlen, transhdrlen, mtu,
1243 flags);
1244 if (err)
e89e9cf5 1245 goto error;
e89e9cf5
AR
1246 return 0;
1247 }
1da177e4
LT
1248
1249 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1250 goto alloc_new_skb;
1251
1252 while (length > 0) {
1253 /* Check if the remaining data fits into current packet. */
1254 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1255 if (copy < length)
1256 copy = maxfraglen - skb->len;
1257
1258 if (copy <= 0) {
1259 char *data;
1260 unsigned int datalen;
1261 unsigned int fraglen;
1262 unsigned int fraggap;
1263 unsigned int alloclen;
1264 struct sk_buff *skb_prev;
1265alloc_new_skb:
1266 skb_prev = skb;
1267
1268 /* There's no room in the current skb */
1269 if (skb_prev)
1270 fraggap = skb_prev->len - maxfraglen;
1271 else
1272 fraggap = 0;
1273
1274 /*
1275 * If remaining data exceeds the mtu,
1276 * we know we need more fragment(s).
1277 */
1278 datalen = length + fraggap;
1279 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1280 datalen = maxfraglen - fragheaderlen;
1281
1282 fraglen = datalen + fragheaderlen;
1283 if ((flags & MSG_MORE) &&
1284 !(rt->u.dst.dev->features&NETIF_F_SG))
1285 alloclen = mtu;
1286 else
1287 alloclen = datalen + fragheaderlen;
1288
1289 /*
1290 * The last fragment gets additional space at tail.
1291 * Note: we overallocate on fragments with MSG_MODE
1292 * because we have no idea if we're the last one.
1293 */
1294 if (datalen == length + fraggap)
1295 alloclen += rt->u.dst.trailer_len;
1296
1297 /*
1298 * We just reserve space for fragment header.
1ab1457c 1299 * Note: this may be overallocation if the message
1da177e4
LT
1300 * (without MSG_MORE) fits into the MTU.
1301 */
1302 alloclen += sizeof(struct frag_hdr);
1303
1304 if (transhdrlen) {
1305 skb = sock_alloc_send_skb(sk,
1306 alloclen + hh_len,
1307 (flags & MSG_DONTWAIT), &err);
1308 } else {
1309 skb = NULL;
1310 if (atomic_read(&sk->sk_wmem_alloc) <=
1311 2 * sk->sk_sndbuf)
1312 skb = sock_wmalloc(sk,
1313 alloclen + hh_len, 1,
1314 sk->sk_allocation);
1315 if (unlikely(skb == NULL))
1316 err = -ENOBUFS;
1317 }
1318 if (skb == NULL)
1319 goto error;
1320 /*
1321 * Fill in the control structures
1322 */
1323 skb->ip_summed = csummode;
1324 skb->csum = 0;
1325 /* reserve for fragmentation */
1326 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1327
1328 /*
1329 * Find where to start putting bytes
1330 */
1331 data = skb_put(skb, fraglen);
c14d2450 1332 skb_set_network_header(skb, exthdrlen);
1da177e4 1333 data += fragheaderlen;
b0e380b1
ACM
1334 skb->transport_header = (skb->network_header +
1335 fragheaderlen);
1da177e4
LT
1336 if (fraggap) {
1337 skb->csum = skb_copy_and_csum_bits(
1338 skb_prev, maxfraglen,
1339 data + transhdrlen, fraggap, 0);
1340 skb_prev->csum = csum_sub(skb_prev->csum,
1341 skb->csum);
1342 data += fraggap;
e9fa4f7b 1343 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1344 }
1345 copy = datalen - transhdrlen - fraggap;
1346 if (copy < 0) {
1347 err = -EINVAL;
1348 kfree_skb(skb);
1349 goto error;
1350 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1351 err = -EFAULT;
1352 kfree_skb(skb);
1353 goto error;
1354 }
1355
1356 offset += copy;
1357 length -= datalen - fraggap;
1358 transhdrlen = 0;
1359 exthdrlen = 0;
1360 csummode = CHECKSUM_NONE;
1361
1362 /*
1363 * Put the packet on the pending queue
1364 */
1365 __skb_queue_tail(&sk->sk_write_queue, skb);
1366 continue;
1367 }
1368
1369 if (copy > length)
1370 copy = length;
1371
1372 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1373 unsigned int off;
1374
1375 off = skb->len;
1376 if (getfrag(from, skb_put(skb, copy),
1377 offset, copy, off, skb) < 0) {
1378 __skb_trim(skb, off);
1379 err = -EFAULT;
1380 goto error;
1381 }
1382 } else {
1383 int i = skb_shinfo(skb)->nr_frags;
1384 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1385 struct page *page = sk->sk_sndmsg_page;
1386 int off = sk->sk_sndmsg_off;
1387 unsigned int left;
1388
1389 if (page && (left = PAGE_SIZE - off) > 0) {
1390 if (copy >= left)
1391 copy = left;
1392 if (page != frag->page) {
1393 if (i == MAX_SKB_FRAGS) {
1394 err = -EMSGSIZE;
1395 goto error;
1396 }
1397 get_page(page);
1398 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1399 frag = &skb_shinfo(skb)->frags[i];
1400 }
1401 } else if(i < MAX_SKB_FRAGS) {
1402 if (copy > PAGE_SIZE)
1403 copy = PAGE_SIZE;
1404 page = alloc_pages(sk->sk_allocation, 0);
1405 if (page == NULL) {
1406 err = -ENOMEM;
1407 goto error;
1408 }
1409 sk->sk_sndmsg_page = page;
1410 sk->sk_sndmsg_off = 0;
1411
1412 skb_fill_page_desc(skb, i, page, 0, 0);
1413 frag = &skb_shinfo(skb)->frags[i];
1da177e4
LT
1414 } else {
1415 err = -EMSGSIZE;
1416 goto error;
1417 }
1418 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1419 err = -EFAULT;
1420 goto error;
1421 }
1422 sk->sk_sndmsg_off += copy;
1423 frag->size += copy;
1424 skb->len += copy;
1425 skb->data_len += copy;
f945fa7a
HX
1426 skb->truesize += copy;
1427 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1428 }
1429 offset += copy;
1430 length -= copy;
1431 }
1432 return 0;
1433error:
1434 inet->cork.length -= length;
3bd653c8 1435 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1436 return err;
1437}
1438
bf138862
PE
1439static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1440{
0178b695
HX
1441 if (np->cork.opt) {
1442 kfree(np->cork.opt->dst0opt);
1443 kfree(np->cork.opt->dst1opt);
1444 kfree(np->cork.opt->hopopt);
1445 kfree(np->cork.opt->srcrt);
1446 kfree(np->cork.opt);
1447 np->cork.opt = NULL;
1448 }
1449
c8cdaf99
YH
1450 if (inet->cork.dst) {
1451 dst_release(inet->cork.dst);
1452 inet->cork.dst = NULL;
bf138862
PE
1453 inet->cork.flags &= ~IPCORK_ALLFRAG;
1454 }
1455 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1456}
1457
1da177e4
LT
1458int ip6_push_pending_frames(struct sock *sk)
1459{
1460 struct sk_buff *skb, *tmp_skb;
1461 struct sk_buff **tail_skb;
1462 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1463 struct inet_sock *inet = inet_sk(sk);
1464 struct ipv6_pinfo *np = inet6_sk(sk);
3bd653c8 1465 struct net *net = sock_net(sk);
1da177e4
LT
1466 struct ipv6hdr *hdr;
1467 struct ipv6_txoptions *opt = np->cork.opt;
c8cdaf99 1468 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1da177e4
LT
1469 struct flowi *fl = &inet->cork.fl;
1470 unsigned char proto = fl->proto;
1471 int err = 0;
1472
1473 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1474 goto out;
1475 tail_skb = &(skb_shinfo(skb)->frag_list);
1476
1477 /* move skb->data to ip header from ext header */
d56f90a7 1478 if (skb->data < skb_network_header(skb))
bbe735e4 1479 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1480 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1481 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1482 *tail_skb = tmp_skb;
1483 tail_skb = &(tmp_skb->next);
1484 skb->len += tmp_skb->len;
1485 skb->data_len += tmp_skb->len;
1da177e4
LT
1486 skb->truesize += tmp_skb->truesize;
1487 __sock_put(tmp_skb->sk);
1488 tmp_skb->destructor = NULL;
1489 tmp_skb->sk = NULL;
1da177e4
LT
1490 }
1491
28a89453 1492 /* Allow local fragmentation. */
b5c15fc0 1493 if (np->pmtudisc < IPV6_PMTUDISC_DO)
28a89453
HX
1494 skb->local_df = 1;
1495
1da177e4 1496 ipv6_addr_copy(final_dst, &fl->fl6_dst);
cfe1fc77 1497 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1498 if (opt && opt->opt_flen)
1499 ipv6_push_frag_opts(skb, opt, &proto);
1500 if (opt && opt->opt_nflen)
1501 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1502
e2d1bca7
ACM
1503 skb_push(skb, sizeof(struct ipv6hdr));
1504 skb_reset_network_header(skb);
0660e03f 1505 hdr = ipv6_hdr(skb);
1ab1457c 1506
90bcaf7b 1507 *(__be32*)hdr = fl->fl6_flowlabel |
41a1f8ea 1508 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1da177e4 1509
1da177e4
LT
1510 hdr->hop_limit = np->cork.hop_limit;
1511 hdr->nexthdr = proto;
1512 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1513 ipv6_addr_copy(&hdr->daddr, final_dst);
1514
a2c2064f 1515 skb->priority = sk->sk_priority;
4a19ec58 1516 skb->mark = sk->sk_mark;
a2c2064f 1517
1da177e4 1518 skb->dst = dst_clone(&rt->u.dst);
3bd653c8 1519 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
14878f75
DS
1520 if (proto == IPPROTO_ICMPV6) {
1521 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1522
5a57d4c7 1523 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
e41b5368 1524 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
14878f75
DS
1525 }
1526
ef76bc23 1527 err = ip6_local_out(skb);
1da177e4
LT
1528 if (err) {
1529 if (err > 0)
3320da89 1530 err = np->recverr ? net_xmit_errno(err) : 0;
1da177e4
LT
1531 if (err)
1532 goto error;
1533 }
1534
1535out:
bf138862 1536 ip6_cork_release(inet, np);
1da177e4
LT
1537 return err;
1538error:
1539 goto out;
1540}
1541
1542void ip6_flush_pending_frames(struct sock *sk)
1543{
1da177e4
LT
1544 struct sk_buff *skb;
1545
1546 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
e1f52208 1547 if (skb->dst)
3bd653c8 1548 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb->dst),
e1f52208 1549 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1550 kfree_skb(skb);
1551 }
1552
bf138862 1553 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1554}