netns: Use net_eq() to compare net-namespaces for optimization.
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / ipv6 / ip6_output.c
CommitLineData
1da177e4
LT
1/*
2 * IPv6 output functions
1ab1457c 3 * Linux INET6 implementation
1da177e4
LT
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * Based on linux/net/ipv4/ip_output.c
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 *
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
21 *
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
27 */
28
1da177e4 29#include <linux/errno.h>
ef76bc23 30#include <linux/kernel.h>
1da177e4
LT
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/net.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/in6.h>
37#include <linux/tcp.h>
38#include <linux/route.h>
b59f45d0 39#include <linux/module.h>
1da177e4
LT
40
41#include <linux/netfilter.h>
42#include <linux/netfilter_ipv6.h>
43
44#include <net/sock.h>
45#include <net/snmp.h>
46
47#include <net/ipv6.h>
48#include <net/ndisc.h>
49#include <net/protocol.h>
50#include <net/ip6_route.h>
51#include <net/addrconf.h>
52#include <net/rawv6.h>
53#include <net/icmp.h>
54#include <net/xfrm.h>
55#include <net/checksum.h>
7bc570c8 56#include <linux/mroute6.h>
1da177e4
LT
57
58static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
59
60static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
61{
62 static u32 ipv6_fragmentation_id = 1;
63 static DEFINE_SPINLOCK(ip6_id_lock);
64
65 spin_lock_bh(&ip6_id_lock);
66 fhdr->identification = htonl(ipv6_fragmentation_id);
67 if (++ipv6_fragmentation_id == 0)
68 ipv6_fragmentation_id = 1;
69 spin_unlock_bh(&ip6_id_lock);
70}
71
ef76bc23
HX
72int __ip6_local_out(struct sk_buff *skb)
73{
74 int len;
75
76 len = skb->len - sizeof(struct ipv6hdr);
77 if (len > IPV6_MAXPLEN)
78 len = 0;
79 ipv6_hdr(skb)->payload_len = htons(len);
80
6e23ae2a 81 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
ef76bc23
HX
82 dst_output);
83}
84
85int ip6_local_out(struct sk_buff *skb)
86{
87 int err;
88
89 err = __ip6_local_out(skb);
90 if (likely(err == 1))
91 err = dst_output(skb);
92
93 return err;
94}
95EXPORT_SYMBOL_GPL(ip6_local_out);
96
ad643a79 97static int ip6_output_finish(struct sk_buff *skb)
1da177e4 98{
1da177e4 99 struct dst_entry *dst = skb->dst;
1da177e4 100
3644f0ce
SH
101 if (dst->hh)
102 return neigh_hh_output(dst->hh, skb);
103 else if (dst->neighbour)
1da177e4
LT
104 return dst->neighbour->output(skb);
105
a11d206d 106 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
107 kfree_skb(skb);
108 return -EINVAL;
109
110}
111
112/* dev_loopback_xmit for use with netfilter. */
113static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
114{
459a98ed 115 skb_reset_mac_header(newskb);
bbe735e4 116 __skb_pull(newskb, skb_network_offset(newskb));
1da177e4
LT
117 newskb->pkt_type = PACKET_LOOPBACK;
118 newskb->ip_summed = CHECKSUM_UNNECESSARY;
119 BUG_TRAP(newskb->dst);
120
121 netif_rx(newskb);
122 return 0;
123}
124
125
126static int ip6_output2(struct sk_buff *skb)
127{
128 struct dst_entry *dst = skb->dst;
129 struct net_device *dev = dst->dev;
130
131 skb->protocol = htons(ETH_P_IPV6);
132 skb->dev = dev;
133
0660e03f 134 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
1da177e4 135 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
a11d206d 136 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1da177e4
LT
137
138 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
7bc570c8
YH
139 ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
140 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
141 &ipv6_hdr(skb)->saddr))) {
1da177e4
LT
142 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
143
144 /* Do not check for IFF_ALLMULTI; multicast routing
145 is not supported in any case.
146 */
147 if (newskb)
6e23ae2a
PM
148 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
149 NULL, newskb->dev,
1da177e4
LT
150 ip6_dev_loopback_xmit);
151
0660e03f 152 if (ipv6_hdr(skb)->hop_limit == 0) {
a11d206d 153 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
154 kfree_skb(skb);
155 return 0;
156 }
157 }
158
a11d206d 159 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
1da177e4
LT
160 }
161
6e23ae2a
PM
162 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
163 ip6_output_finish);
1da177e4
LT
164}
165
628a5c56
JH
166static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
167{
168 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
169
170 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
171 skb->dst->dev->mtu : dst_mtu(skb->dst);
172}
173
1da177e4
LT
174int ip6_output(struct sk_buff *skb)
175{
778d80be
YH
176 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
177 if (unlikely(idev->cnf.disable_ipv6)) {
178 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
179 kfree_skb(skb);
180 return 0;
181 }
182
628a5c56 183 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
e89e9cf5 184 dst_allfrag(skb->dst))
1da177e4
LT
185 return ip6_fragment(skb, ip6_output2);
186 else
187 return ip6_output2(skb);
188}
189
1da177e4
LT
190/*
191 * xmit an sk_buff (used by TCP)
192 */
193
194int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
195 struct ipv6_txoptions *opt, int ipfragok)
196{
b30bd282 197 struct ipv6_pinfo *np = inet6_sk(sk);
1da177e4
LT
198 struct in6_addr *first_hop = &fl->fl6_dst;
199 struct dst_entry *dst = skb->dst;
200 struct ipv6hdr *hdr;
201 u8 proto = fl->proto;
202 int seg_len = skb->len;
41a1f8ea 203 int hlimit, tclass;
1da177e4
LT
204 u32 mtu;
205
206 if (opt) {
c2636b4d 207 unsigned int head_room;
1da177e4
LT
208
209 /* First: exthdrs may take lots of space (~8K for now)
210 MAX_HEADER is not enough.
211 */
212 head_room = opt->opt_nflen + opt->opt_flen;
213 seg_len += head_room;
214 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
215
216 if (skb_headroom(skb) < head_room) {
217 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
a11d206d
YH
218 if (skb2 == NULL) {
219 IP6_INC_STATS(ip6_dst_idev(skb->dst),
220 IPSTATS_MIB_OUTDISCARDS);
221 kfree_skb(skb);
1da177e4
LT
222 return -ENOBUFS;
223 }
a11d206d
YH
224 kfree_skb(skb);
225 skb = skb2;
1da177e4
LT
226 if (sk)
227 skb_set_owner_w(skb, sk);
228 }
229 if (opt->opt_flen)
230 ipv6_push_frag_opts(skb, opt, &proto);
231 if (opt->opt_nflen)
232 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
233 }
234
e2d1bca7
ACM
235 skb_push(skb, sizeof(struct ipv6hdr));
236 skb_reset_network_header(skb);
0660e03f 237 hdr = ipv6_hdr(skb);
1da177e4
LT
238
239 /*
240 * Fill in the IPv6 header
241 */
242
1da177e4
LT
243 hlimit = -1;
244 if (np)
245 hlimit = np->hop_limit;
246 if (hlimit < 0)
6b75d090 247 hlimit = ip6_dst_hoplimit(dst);
1da177e4 248
41a1f8ea
YH
249 tclass = -1;
250 if (np)
251 tclass = np->tclass;
252 if (tclass < 0)
253 tclass = 0;
254
90bcaf7b 255 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
41a1f8ea 256
1da177e4
LT
257 hdr->payload_len = htons(seg_len);
258 hdr->nexthdr = proto;
259 hdr->hop_limit = hlimit;
260
261 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
262 ipv6_addr_copy(&hdr->daddr, first_hop);
263
a2c2064f 264 skb->priority = sk->sk_priority;
4a19ec58 265 skb->mark = sk->sk_mark;
a2c2064f 266
1da177e4 267 mtu = dst_mtu(dst);
89114afd 268 if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
a11d206d
YH
269 IP6_INC_STATS(ip6_dst_idev(skb->dst),
270 IPSTATS_MIB_OUTREQUESTS);
6e23ae2a 271 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
6869c4d8 272 dst_output);
1da177e4
LT
273 }
274
275 if (net_ratelimit())
276 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
277 skb->dev = dst->dev;
278 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
a11d206d 279 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
280 kfree_skb(skb);
281 return -EMSGSIZE;
282}
283
7159039a
YH
284EXPORT_SYMBOL(ip6_xmit);
285
1da177e4
LT
286/*
287 * To avoid extra problems ND packets are send through this
288 * routine. It's code duplication but I really want to avoid
289 * extra checks since ipv6_build_header is used by TCP (which
290 * is for us performance critical)
291 */
292
293int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
9acd9f3a 294 const struct in6_addr *saddr, const struct in6_addr *daddr,
1da177e4
LT
295 int proto, int len)
296{
297 struct ipv6_pinfo *np = inet6_sk(sk);
298 struct ipv6hdr *hdr;
299 int totlen;
300
301 skb->protocol = htons(ETH_P_IPV6);
302 skb->dev = dev;
303
304 totlen = len + sizeof(struct ipv6hdr);
305
55f79cc0
ACM
306 skb_reset_network_header(skb);
307 skb_put(skb, sizeof(struct ipv6hdr));
0660e03f 308 hdr = ipv6_hdr(skb);
1da177e4 309
ae08e1f0 310 *(__be32*)hdr = htonl(0x60000000);
1da177e4
LT
311
312 hdr->payload_len = htons(len);
313 hdr->nexthdr = proto;
314 hdr->hop_limit = np->hop_limit;
315
316 ipv6_addr_copy(&hdr->saddr, saddr);
317 ipv6_addr_copy(&hdr->daddr, daddr);
318
319 return 0;
320}
321
322static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
323{
324 struct ip6_ra_chain *ra;
325 struct sock *last = NULL;
326
327 read_lock(&ip6_ra_lock);
328 for (ra = ip6_ra_chain; ra; ra = ra->next) {
329 struct sock *sk = ra->sk;
0bd1b59b
AM
330 if (sk && ra->sel == sel &&
331 (!sk->sk_bound_dev_if ||
332 sk->sk_bound_dev_if == skb->dev->ifindex)) {
1da177e4
LT
333 if (last) {
334 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
335 if (skb2)
336 rawv6_rcv(last, skb2);
337 }
338 last = sk;
339 }
340 }
341
342 if (last) {
343 rawv6_rcv(last, skb);
344 read_unlock(&ip6_ra_lock);
345 return 1;
346 }
347 read_unlock(&ip6_ra_lock);
348 return 0;
349}
350
e21e0b5f
VN
351static int ip6_forward_proxy_check(struct sk_buff *skb)
352{
0660e03f 353 struct ipv6hdr *hdr = ipv6_hdr(skb);
e21e0b5f
VN
354 u8 nexthdr = hdr->nexthdr;
355 int offset;
356
357 if (ipv6_ext_hdr(nexthdr)) {
358 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
359 if (offset < 0)
360 return 0;
361 } else
362 offset = sizeof(struct ipv6hdr);
363
364 if (nexthdr == IPPROTO_ICMPV6) {
365 struct icmp6hdr *icmp6;
366
d56f90a7
ACM
367 if (!pskb_may_pull(skb, (skb_network_header(skb) +
368 offset + 1 - skb->data)))
e21e0b5f
VN
369 return 0;
370
d56f90a7 371 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
e21e0b5f
VN
372
373 switch (icmp6->icmp6_type) {
374 case NDISC_ROUTER_SOLICITATION:
375 case NDISC_ROUTER_ADVERTISEMENT:
376 case NDISC_NEIGHBOUR_SOLICITATION:
377 case NDISC_NEIGHBOUR_ADVERTISEMENT:
378 case NDISC_REDIRECT:
379 /* For reaction involving unicast neighbor discovery
380 * message destined to the proxied address, pass it to
381 * input function.
382 */
383 return 1;
384 default:
385 break;
386 }
387 }
388
74553b09
VN
389 /*
390 * The proxying router can't forward traffic sent to a link-local
391 * address, so signal the sender and discard the packet. This
392 * behavior is clarified by the MIPv6 specification.
393 */
394 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
395 dst_link_failure(skb);
396 return -1;
397 }
398
e21e0b5f
VN
399 return 0;
400}
401
1da177e4
LT
402static inline int ip6_forward_finish(struct sk_buff *skb)
403{
404 return dst_output(skb);
405}
406
407int ip6_forward(struct sk_buff *skb)
408{
409 struct dst_entry *dst = skb->dst;
0660e03f 410 struct ipv6hdr *hdr = ipv6_hdr(skb);
1da177e4 411 struct inet6_skb_parm *opt = IP6CB(skb);
c346dca1 412 struct net *net = dev_net(dst->dev);
1ab1457c 413
1da177e4
LT
414 if (ipv6_devconf.forwarding == 0)
415 goto error;
416
4497b076
BH
417 if (skb_warn_if_lro(skb))
418 goto drop;
419
1da177e4 420 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
a11d206d 421 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
422 goto drop;
423 }
424
35fc92a9 425 skb_forward_csum(skb);
1da177e4
LT
426
427 /*
428 * We DO NOT make any processing on
429 * RA packets, pushing them to user level AS IS
430 * without ane WARRANTY that application will be able
431 * to interpret them. The reason is that we
432 * cannot make anything clever here.
433 *
434 * We are not end-node, so that if packet contains
435 * AH/ESP, we cannot make anything.
436 * Defragmentation also would be mistake, RA packets
437 * cannot be fragmented, because there is no warranty
438 * that different fragments will go along one path. --ANK
439 */
440 if (opt->ra) {
d56f90a7 441 u8 *ptr = skb_network_header(skb) + opt->ra;
1da177e4
LT
442 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
443 return 0;
444 }
445
446 /*
447 * check and decrement ttl
448 */
449 if (hdr->hop_limit <= 1) {
450 /* Force OUTPUT device used as source address */
451 skb->dev = dst->dev;
452 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
453 0, skb->dev);
a11d206d 454 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
1da177e4
LT
455
456 kfree_skb(skb);
457 return -ETIMEDOUT;
458 }
459
fbea49e1
YH
460 /* XXX: idev->cnf.proxy_ndp? */
461 if (ipv6_devconf.proxy_ndp &&
8a3edd80 462 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
74553b09
VN
463 int proxied = ip6_forward_proxy_check(skb);
464 if (proxied > 0)
e21e0b5f 465 return ip6_input(skb);
74553b09 466 else if (proxied < 0) {
a11d206d 467 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
74553b09
VN
468 goto drop;
469 }
e21e0b5f
VN
470 }
471
1da177e4 472 if (!xfrm6_route_forward(skb)) {
a11d206d 473 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
1da177e4
LT
474 goto drop;
475 }
476 dst = skb->dst;
477
478 /* IPv6 specs say nothing about it, but it is clear that we cannot
479 send redirects to source routed frames.
1e5dc146 480 We don't send redirects to frames decapsulated from IPsec.
1da177e4 481 */
1e5dc146
MN
482 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
483 !skb->sp) {
1da177e4
LT
484 struct in6_addr *target = NULL;
485 struct rt6_info *rt;
486 struct neighbour *n = dst->neighbour;
487
488 /*
489 * incoming and outgoing devices are the same
490 * send a redirect.
491 */
492
493 rt = (struct rt6_info *) dst;
494 if ((rt->rt6i_flags & RTF_GATEWAY))
495 target = (struct in6_addr*)&n->primary_key;
496 else
497 target = &hdr->daddr;
498
499 /* Limit redirects both by destination (here)
500 and by source (inside ndisc_send_redirect)
501 */
502 if (xrlim_allow(dst, 1*HZ))
503 ndisc_send_redirect(skb, n, target);
5bb1ab09
DS
504 } else {
505 int addrtype = ipv6_addr_type(&hdr->saddr);
506
1da177e4 507 /* This check is security critical. */
f81b2e7d
YH
508 if (addrtype == IPV6_ADDR_ANY ||
509 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
5bb1ab09
DS
510 goto error;
511 if (addrtype & IPV6_ADDR_LINKLOCAL) {
512 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
513 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
514 goto error;
515 }
1da177e4
LT
516 }
517
518 if (skb->len > dst_mtu(dst)) {
519 /* Again, force OUTPUT device used as source address */
520 skb->dev = dst->dev;
521 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
a11d206d
YH
522 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
523 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
524 kfree_skb(skb);
525 return -EMSGSIZE;
526 }
527
528 if (skb_cow(skb, dst->dev->hard_header_len)) {
a11d206d 529 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
530 goto drop;
531 }
532
0660e03f 533 hdr = ipv6_hdr(skb);
1da177e4
LT
534
535 /* Mangling hops number delayed to point after skb COW */
1ab1457c 536
1da177e4
LT
537 hdr->hop_limit--;
538
a11d206d 539 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
6e23ae2a
PM
540 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
541 ip6_forward_finish);
1da177e4
LT
542
543error:
a11d206d 544 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
1da177e4
LT
545drop:
546 kfree_skb(skb);
547 return -EINVAL;
548}
549
550static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
551{
552 to->pkt_type = from->pkt_type;
553 to->priority = from->priority;
554 to->protocol = from->protocol;
1da177e4
LT
555 dst_release(to->dst);
556 to->dst = dst_clone(from->dst);
557 to->dev = from->dev;
82e91ffe 558 to->mark = from->mark;
1da177e4
LT
559
560#ifdef CONFIG_NET_SCHED
561 to->tc_index = from->tc_index;
562#endif
e7ac05f3 563 nf_copy(to, from);
ba9dda3a
JK
564#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
565 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
566 to->nf_trace = from->nf_trace;
567#endif
984bc16c 568 skb_copy_secmark(to, from);
1da177e4
LT
569}
570
571int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
572{
573 u16 offset = sizeof(struct ipv6hdr);
0660e03f
ACM
574 struct ipv6_opt_hdr *exthdr =
575 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
27a884dc 576 unsigned int packet_len = skb->tail - skb->network_header;
1da177e4 577 int found_rhdr = 0;
0660e03f 578 *nexthdr = &ipv6_hdr(skb)->nexthdr;
1da177e4
LT
579
580 while (offset + 1 <= packet_len) {
581
582 switch (**nexthdr) {
583
584 case NEXTHDR_HOP:
27637df9 585 break;
1da177e4 586 case NEXTHDR_ROUTING:
27637df9
MN
587 found_rhdr = 1;
588 break;
1da177e4 589 case NEXTHDR_DEST:
59fbb3a6 590#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
27637df9
MN
591 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
592 break;
593#endif
594 if (found_rhdr)
595 return offset;
1da177e4
LT
596 break;
597 default :
598 return offset;
599 }
27637df9
MN
600
601 offset += ipv6_optlen(exthdr);
602 *nexthdr = &exthdr->nexthdr;
d56f90a7
ACM
603 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
604 offset);
1da177e4
LT
605 }
606
607 return offset;
608}
609
610static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
611{
612 struct net_device *dev;
613 struct sk_buff *frag;
614 struct rt6_info *rt = (struct rt6_info*)skb->dst;
d91675f9 615 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
1da177e4
LT
616 struct ipv6hdr *tmp_hdr;
617 struct frag_hdr *fh;
618 unsigned int mtu, hlen, left, len;
ae08e1f0 619 __be32 frag_id = 0;
1da177e4
LT
620 int ptr, offset = 0, err=0;
621 u8 *prevhdr, nexthdr = 0;
622
623 dev = rt->u.dst.dev;
624 hlen = ip6_find_1stfragopt(skb, &prevhdr);
625 nexthdr = *prevhdr;
626
628a5c56 627 mtu = ip6_skb_dst_mtu(skb);
b881ef76
JH
628
629 /* We must not fragment if the socket is set to force MTU discovery
630 * or if the skb it not generated by a local socket. (This last
631 * check should be redundant, but it's free.)
632 */
b5c15fc0 633 if (!skb->local_df) {
b881ef76
JH
634 skb->dev = skb->dst->dev;
635 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
636 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
637 kfree_skb(skb);
638 return -EMSGSIZE;
639 }
640
d91675f9
YH
641 if (np && np->frag_size < mtu) {
642 if (np->frag_size)
643 mtu = np->frag_size;
644 }
645 mtu -= hlen + sizeof(struct frag_hdr);
1da177e4
LT
646
647 if (skb_shinfo(skb)->frag_list) {
648 int first_len = skb_pagelen(skb);
29ffe1a5 649 int truesizes = 0;
1da177e4
LT
650
651 if (first_len - hlen > mtu ||
652 ((first_len - hlen) & 7) ||
653 skb_cloned(skb))
654 goto slow_path;
655
656 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
657 /* Correct geometry. */
658 if (frag->len > mtu ||
659 ((frag->len & 7) && frag->next) ||
660 skb_headroom(frag) < hlen)
661 goto slow_path;
662
1da177e4
LT
663 /* Partially cloned skb? */
664 if (skb_shared(frag))
665 goto slow_path;
2fdba6b0
HX
666
667 BUG_ON(frag->sk);
668 if (skb->sk) {
669 sock_hold(skb->sk);
670 frag->sk = skb->sk;
671 frag->destructor = sock_wfree;
29ffe1a5 672 truesizes += frag->truesize;
2fdba6b0 673 }
1da177e4
LT
674 }
675
676 err = 0;
677 offset = 0;
678 frag = skb_shinfo(skb)->frag_list;
679 skb_shinfo(skb)->frag_list = NULL;
680 /* BUILD HEADER */
681
9a217a1c 682 *prevhdr = NEXTHDR_FRAGMENT;
d56f90a7 683 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
1da177e4 684 if (!tmp_hdr) {
a11d206d 685 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
686 return -ENOMEM;
687 }
688
1da177e4
LT
689 __skb_pull(skb, hlen);
690 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
e2d1bca7
ACM
691 __skb_push(skb, hlen);
692 skb_reset_network_header(skb);
d56f90a7 693 memcpy(skb_network_header(skb), tmp_hdr, hlen);
1da177e4
LT
694
695 ipv6_select_ident(skb, fh);
696 fh->nexthdr = nexthdr;
697 fh->reserved = 0;
698 fh->frag_off = htons(IP6_MF);
699 frag_id = fh->identification;
700
701 first_len = skb_pagelen(skb);
702 skb->data_len = first_len - skb_headlen(skb);
29ffe1a5 703 skb->truesize -= truesizes;
1da177e4 704 skb->len = first_len;
0660e03f
ACM
705 ipv6_hdr(skb)->payload_len = htons(first_len -
706 sizeof(struct ipv6hdr));
a11d206d
YH
707
708 dst_hold(&rt->u.dst);
1da177e4
LT
709
710 for (;;) {
711 /* Prepare header of the next frame,
712 * before previous one went down. */
713 if (frag) {
714 frag->ip_summed = CHECKSUM_NONE;
badff6d0 715 skb_reset_transport_header(frag);
1da177e4 716 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
e2d1bca7
ACM
717 __skb_push(frag, hlen);
718 skb_reset_network_header(frag);
d56f90a7
ACM
719 memcpy(skb_network_header(frag), tmp_hdr,
720 hlen);
1da177e4
LT
721 offset += skb->len - hlen - sizeof(struct frag_hdr);
722 fh->nexthdr = nexthdr;
723 fh->reserved = 0;
724 fh->frag_off = htons(offset);
725 if (frag->next != NULL)
726 fh->frag_off |= htons(IP6_MF);
727 fh->identification = frag_id;
0660e03f
ACM
728 ipv6_hdr(frag)->payload_len =
729 htons(frag->len -
730 sizeof(struct ipv6hdr));
1da177e4
LT
731 ip6_copy_metadata(frag, skb);
732 }
1ab1457c 733
1da177e4 734 err = output(skb);
dafee490 735 if(!err)
a11d206d 736 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
dafee490 737
1da177e4
LT
738 if (err || !frag)
739 break;
740
741 skb = frag;
742 frag = skb->next;
743 skb->next = NULL;
744 }
745
a51482bd 746 kfree(tmp_hdr);
1da177e4
LT
747
748 if (err == 0) {
a11d206d
YH
749 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
750 dst_release(&rt->u.dst);
1da177e4
LT
751 return 0;
752 }
753
754 while (frag) {
755 skb = frag->next;
756 kfree_skb(frag);
757 frag = skb;
758 }
759
a11d206d
YH
760 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
761 dst_release(&rt->u.dst);
1da177e4
LT
762 return err;
763 }
764
765slow_path:
766 left = skb->len - hlen; /* Space per frame */
767 ptr = hlen; /* Where to start from */
768
769 /*
770 * Fragment the datagram.
771 */
772
773 *prevhdr = NEXTHDR_FRAGMENT;
774
775 /*
776 * Keep copying data until we run out.
777 */
778 while(left > 0) {
779 len = left;
780 /* IF: it doesn't fit, use 'mtu' - the data space left */
781 if (len > mtu)
782 len = mtu;
783 /* IF: we are not sending upto and including the packet end
784 then align the next start on an eight byte boundary */
785 if (len < left) {
786 len &= ~7;
787 }
788 /*
789 * Allocate buffer.
790 */
791
f5184d26 792 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
64ce2073 793 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
a11d206d
YH
794 IP6_INC_STATS(ip6_dst_idev(skb->dst),
795 IPSTATS_MIB_FRAGFAILS);
1da177e4
LT
796 err = -ENOMEM;
797 goto fail;
798 }
799
800 /*
801 * Set up data on packet
802 */
803
804 ip6_copy_metadata(frag, skb);
805 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
806 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
c1d2bbe1 807 skb_reset_network_header(frag);
badff6d0 808 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
b0e380b1
ACM
809 frag->transport_header = (frag->network_header + hlen +
810 sizeof(struct frag_hdr));
1da177e4
LT
811
812 /*
813 * Charge the memory for the fragment to any owner
814 * it might possess
815 */
816 if (skb->sk)
817 skb_set_owner_w(frag, skb->sk);
818
819 /*
820 * Copy the packet header into the new buffer.
821 */
d626f62b 822 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
1da177e4
LT
823
824 /*
825 * Build fragment header.
826 */
827 fh->nexthdr = nexthdr;
828 fh->reserved = 0;
f36d6ab1 829 if (!frag_id) {
1da177e4
LT
830 ipv6_select_ident(skb, fh);
831 frag_id = fh->identification;
832 } else
833 fh->identification = frag_id;
834
835 /*
836 * Copy a block of the IP datagram.
837 */
8984e41d 838 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
1da177e4
LT
839 BUG();
840 left -= len;
841
842 fh->frag_off = htons(offset);
843 if (left > 0)
844 fh->frag_off |= htons(IP6_MF);
0660e03f
ACM
845 ipv6_hdr(frag)->payload_len = htons(frag->len -
846 sizeof(struct ipv6hdr));
1da177e4
LT
847
848 ptr += len;
849 offset += len;
850
851 /*
852 * Put this fragment into the sending queue.
853 */
1da177e4
LT
854 err = output(frag);
855 if (err)
856 goto fail;
dafee490 857
a11d206d 858 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
1da177e4 859 }
a11d206d
YH
860 IP6_INC_STATS(ip6_dst_idev(skb->dst),
861 IPSTATS_MIB_FRAGOKS);
1da177e4 862 kfree_skb(skb);
1da177e4
LT
863 return err;
864
865fail:
a11d206d
YH
866 IP6_INC_STATS(ip6_dst_idev(skb->dst),
867 IPSTATS_MIB_FRAGFAILS);
1ab1457c 868 kfree_skb(skb);
1da177e4
LT
869 return err;
870}
871
cf6b1982
YH
872static inline int ip6_rt_check(struct rt6key *rt_key,
873 struct in6_addr *fl_addr,
874 struct in6_addr *addr_cache)
875{
876 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
877 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
878}
879
497c615a
HX
880static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
881 struct dst_entry *dst,
882 struct flowi *fl)
1da177e4 883{
497c615a
HX
884 struct ipv6_pinfo *np = inet6_sk(sk);
885 struct rt6_info *rt = (struct rt6_info *)dst;
1da177e4 886
497c615a
HX
887 if (!dst)
888 goto out;
889
890 /* Yes, checking route validity in not connected
891 * case is not very simple. Take into account,
892 * that we do not support routing by source, TOS,
893 * and MSG_DONTROUTE --ANK (980726)
894 *
cf6b1982
YH
895 * 1. ip6_rt_check(): If route was host route,
896 * check that cached destination is current.
497c615a
HX
897 * If it is network route, we still may
898 * check its validity using saved pointer
899 * to the last used address: daddr_cache.
900 * We do not want to save whole address now,
901 * (because main consumer of this service
902 * is tcp, which has not this problem),
903 * so that the last trick works only on connected
904 * sockets.
905 * 2. oif also should be the same.
906 */
cf6b1982 907 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
8e1ef0a9
YH
908#ifdef CONFIG_IPV6_SUBTREES
909 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
910#endif
cf6b1982 911 (fl->oif && fl->oif != dst->dev->ifindex)) {
497c615a
HX
912 dst_release(dst);
913 dst = NULL;
1da177e4
LT
914 }
915
497c615a
HX
916out:
917 return dst;
918}
919
920static int ip6_dst_lookup_tail(struct sock *sk,
921 struct dst_entry **dst, struct flowi *fl)
922{
923 int err;
3b1e0a65 924 struct net *net = sock_net(sk);
497c615a 925
1da177e4 926 if (*dst == NULL)
8a3edd80 927 *dst = ip6_route_output(net, sk, fl);
1da177e4
LT
928
929 if ((err = (*dst)->error))
930 goto out_err_release;
931
932 if (ipv6_addr_any(&fl->fl6_src)) {
5e5f3f0f 933 err = ipv6_dev_get_saddr(ip6_dst_idev(*dst)->dev,
7cbca67c
YH
934 &fl->fl6_dst,
935 sk ? inet6_sk(sk)->srcprefs : 0,
936 &fl->fl6_src);
44456d37 937 if (err)
1da177e4 938 goto out_err_release;
1da177e4
LT
939 }
940
95c385b4
NH
941#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
942 /*
943 * Here if the dst entry we've looked up
944 * has a neighbour entry that is in the INCOMPLETE
945 * state and the src address from the flow is
946 * marked as OPTIMISTIC, we release the found
947 * dst entry and replace it instead with the
948 * dst entry of the nexthop router
949 */
950 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
951 struct inet6_ifaddr *ifp;
952 struct flowi fl_gw;
953 int redirect;
954
8a3edd80 955 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
1cab3da6 956 (*dst)->dev, 1);
95c385b4
NH
957
958 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
959 if (ifp)
960 in6_ifa_put(ifp);
961
962 if (redirect) {
963 /*
964 * We need to get the dst entry for the
965 * default router instead
966 */
967 dst_release(*dst);
968 memcpy(&fl_gw, fl, sizeof(struct flowi));
969 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
8a3edd80 970 *dst = ip6_route_output(net, sk, &fl_gw);
95c385b4
NH
971 if ((err = (*dst)->error))
972 goto out_err_release;
973 }
974 }
975#endif
976
1da177e4
LT
977 return 0;
978
979out_err_release:
ca46f9c8
MC
980 if (err == -ENETUNREACH)
981 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
982 dst_release(*dst);
983 *dst = NULL;
984 return err;
985}
34a0b3cd 986
497c615a
HX
987/**
988 * ip6_dst_lookup - perform route lookup on flow
989 * @sk: socket which provides route info
990 * @dst: pointer to dst_entry * for result
991 * @fl: flow to lookup
992 *
993 * This function performs a route lookup on the given flow.
994 *
995 * It returns zero on success, or a standard errno code on error.
996 */
997int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
998{
999 *dst = NULL;
1000 return ip6_dst_lookup_tail(sk, dst, fl);
1001}
3cf3dc6c
ACM
1002EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1003
497c615a
HX
1004/**
1005 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1006 * @sk: socket which provides the dst cache and route info
1007 * @dst: pointer to dst_entry * for result
1008 * @fl: flow to lookup
1009 *
1010 * This function performs a route lookup on the given flow with the
1011 * possibility of using the cached route in the socket if it is valid.
1012 * It will take the socket dst lock when operating on the dst cache.
1013 * As a result, this function can only be used in process context.
1014 *
1015 * It returns zero on success, or a standard errno code on error.
1016 */
1017int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1018{
1019 *dst = NULL;
1020 if (sk) {
1021 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1022 *dst = ip6_sk_dst_check(sk, *dst, fl);
1023 }
1024
1025 return ip6_dst_lookup_tail(sk, dst, fl);
1026}
1027EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1028
34a0b3cd 1029static inline int ip6_ufo_append_data(struct sock *sk,
e89e9cf5
AR
1030 int getfrag(void *from, char *to, int offset, int len,
1031 int odd, struct sk_buff *skb),
1032 void *from, int length, int hh_len, int fragheaderlen,
1033 int transhdrlen, int mtu,unsigned int flags)
1034
1035{
1036 struct sk_buff *skb;
1037 int err;
1038
1039 /* There is support for UDP large send offload by network
1040 * device, so create one single skb packet containing complete
1041 * udp datagram
1042 */
1043 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1044 skb = sock_alloc_send_skb(sk,
1045 hh_len + fragheaderlen + transhdrlen + 20,
1046 (flags & MSG_DONTWAIT), &err);
1047 if (skb == NULL)
1048 return -ENOMEM;
1049
1050 /* reserve space for Hardware header */
1051 skb_reserve(skb, hh_len);
1052
1053 /* create space for UDP/IP header */
1054 skb_put(skb,fragheaderlen + transhdrlen);
1055
1056 /* initialize network header pointer */
c1d2bbe1 1057 skb_reset_network_header(skb);
e89e9cf5
AR
1058
1059 /* initialize protocol header pointer */
b0e380b1 1060 skb->transport_header = skb->network_header + fragheaderlen;
e89e9cf5 1061
84fa7933 1062 skb->ip_summed = CHECKSUM_PARTIAL;
e89e9cf5
AR
1063 skb->csum = 0;
1064 sk->sk_sndmsg_off = 0;
1065 }
1066
1067 err = skb_append_datato_frags(sk,skb, getfrag, from,
1068 (length - transhdrlen));
1069 if (!err) {
1070 struct frag_hdr fhdr;
1071
1072 /* specify the length of each IP datagram fragment*/
1ab1457c 1073 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
7967168c 1074 sizeof(struct frag_hdr);
f83ef8c0 1075 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
e89e9cf5
AR
1076 ipv6_select_ident(skb, &fhdr);
1077 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1078 __skb_queue_tail(&sk->sk_write_queue, skb);
1079
1080 return 0;
1081 }
1082 /* There is not enough support do UPD LSO,
1083 * so follow normal path
1084 */
1085 kfree_skb(skb);
1086
1087 return err;
1088}
1da177e4 1089
41a1f8ea
YH
1090int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1091 int offset, int len, int odd, struct sk_buff *skb),
1092 void *from, int length, int transhdrlen,
1093 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1094 struct rt6_info *rt, unsigned int flags)
1da177e4
LT
1095{
1096 struct inet_sock *inet = inet_sk(sk);
1097 struct ipv6_pinfo *np = inet6_sk(sk);
1098 struct sk_buff *skb;
1099 unsigned int maxfraglen, fragheaderlen;
1100 int exthdrlen;
1101 int hh_len;
1102 int mtu;
1103 int copy;
1104 int err;
1105 int offset = 0;
1106 int csummode = CHECKSUM_NONE;
1107
1108 if (flags&MSG_PROBE)
1109 return 0;
1110 if (skb_queue_empty(&sk->sk_write_queue)) {
1111 /*
1112 * setup for corking
1113 */
1114 if (opt) {
1115 if (np->cork.opt == NULL) {
1116 np->cork.opt = kmalloc(opt->tot_len,
1117 sk->sk_allocation);
1118 if (unlikely(np->cork.opt == NULL))
1119 return -ENOBUFS;
1120 } else if (np->cork.opt->tot_len < opt->tot_len) {
1121 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1122 return -EINVAL;
1123 }
1124 memcpy(np->cork.opt, opt, opt->tot_len);
1125 inet->cork.flags |= IPCORK_OPT;
1126 /* need source address above miyazawa*/
1127 }
1128 dst_hold(&rt->u.dst);
c8cdaf99 1129 inet->cork.dst = &rt->u.dst;
1da177e4
LT
1130 inet->cork.fl = *fl;
1131 np->cork.hop_limit = hlimit;
41a1f8ea 1132 np->cork.tclass = tclass;
628a5c56
JH
1133 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1134 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
c7503609 1135 if (np->frag_size < mtu) {
d91675f9
YH
1136 if (np->frag_size)
1137 mtu = np->frag_size;
1138 }
1139 inet->cork.fragsize = mtu;
1da177e4
LT
1140 if (dst_allfrag(rt->u.dst.path))
1141 inet->cork.flags |= IPCORK_ALLFRAG;
1142 inet->cork.length = 0;
1143 sk->sk_sndmsg_page = NULL;
1144 sk->sk_sndmsg_off = 0;
01488942 1145 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
a1b05140 1146 rt->rt6i_nfheader_len;
1da177e4
LT
1147 length += exthdrlen;
1148 transhdrlen += exthdrlen;
1149 } else {
c8cdaf99 1150 rt = (struct rt6_info *)inet->cork.dst;
1da177e4
LT
1151 fl = &inet->cork.fl;
1152 if (inet->cork.flags & IPCORK_OPT)
1153 opt = np->cork.opt;
1154 transhdrlen = 0;
1155 exthdrlen = 0;
1156 mtu = inet->cork.fragsize;
1157 }
1158
1159 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1160
a1b05140 1161 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
b4ce9277 1162 (opt ? opt->opt_nflen : 0);
1da177e4
LT
1163 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1164
1165 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1166 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1167 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1168 return -EMSGSIZE;
1169 }
1170 }
1171
1172 /*
1173 * Let's try using as much space as possible.
1174 * Use MTU if total length of the message fits into the MTU.
1175 * Otherwise, we need to reserve fragment header and
1176 * fragment alignment (= 8-15 octects, in total).
1177 *
1178 * Note that we may need to "move" the data from the tail of
1ab1457c 1179 * of the buffer to the new fragment when we split
1da177e4
LT
1180 * the message.
1181 *
1ab1457c 1182 * FIXME: It may be fragmented into multiple chunks
1da177e4
LT
1183 * at once if non-fragmentable extension headers
1184 * are too large.
1ab1457c 1185 * --yoshfuji
1da177e4
LT
1186 */
1187
1188 inet->cork.length += length;
e89e9cf5
AR
1189 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1190 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1191
baa829d8
PM
1192 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1193 fragheaderlen, transhdrlen, mtu,
1194 flags);
1195 if (err)
e89e9cf5 1196 goto error;
e89e9cf5
AR
1197 return 0;
1198 }
1da177e4
LT
1199
1200 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1201 goto alloc_new_skb;
1202
1203 while (length > 0) {
1204 /* Check if the remaining data fits into current packet. */
1205 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1206 if (copy < length)
1207 copy = maxfraglen - skb->len;
1208
1209 if (copy <= 0) {
1210 char *data;
1211 unsigned int datalen;
1212 unsigned int fraglen;
1213 unsigned int fraggap;
1214 unsigned int alloclen;
1215 struct sk_buff *skb_prev;
1216alloc_new_skb:
1217 skb_prev = skb;
1218
1219 /* There's no room in the current skb */
1220 if (skb_prev)
1221 fraggap = skb_prev->len - maxfraglen;
1222 else
1223 fraggap = 0;
1224
1225 /*
1226 * If remaining data exceeds the mtu,
1227 * we know we need more fragment(s).
1228 */
1229 datalen = length + fraggap;
1230 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1231 datalen = maxfraglen - fragheaderlen;
1232
1233 fraglen = datalen + fragheaderlen;
1234 if ((flags & MSG_MORE) &&
1235 !(rt->u.dst.dev->features&NETIF_F_SG))
1236 alloclen = mtu;
1237 else
1238 alloclen = datalen + fragheaderlen;
1239
1240 /*
1241 * The last fragment gets additional space at tail.
1242 * Note: we overallocate on fragments with MSG_MODE
1243 * because we have no idea if we're the last one.
1244 */
1245 if (datalen == length + fraggap)
1246 alloclen += rt->u.dst.trailer_len;
1247
1248 /*
1249 * We just reserve space for fragment header.
1ab1457c 1250 * Note: this may be overallocation if the message
1da177e4
LT
1251 * (without MSG_MORE) fits into the MTU.
1252 */
1253 alloclen += sizeof(struct frag_hdr);
1254
1255 if (transhdrlen) {
1256 skb = sock_alloc_send_skb(sk,
1257 alloclen + hh_len,
1258 (flags & MSG_DONTWAIT), &err);
1259 } else {
1260 skb = NULL;
1261 if (atomic_read(&sk->sk_wmem_alloc) <=
1262 2 * sk->sk_sndbuf)
1263 skb = sock_wmalloc(sk,
1264 alloclen + hh_len, 1,
1265 sk->sk_allocation);
1266 if (unlikely(skb == NULL))
1267 err = -ENOBUFS;
1268 }
1269 if (skb == NULL)
1270 goto error;
1271 /*
1272 * Fill in the control structures
1273 */
1274 skb->ip_summed = csummode;
1275 skb->csum = 0;
1276 /* reserve for fragmentation */
1277 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1278
1279 /*
1280 * Find where to start putting bytes
1281 */
1282 data = skb_put(skb, fraglen);
c14d2450 1283 skb_set_network_header(skb, exthdrlen);
1da177e4 1284 data += fragheaderlen;
b0e380b1
ACM
1285 skb->transport_header = (skb->network_header +
1286 fragheaderlen);
1da177e4
LT
1287 if (fraggap) {
1288 skb->csum = skb_copy_and_csum_bits(
1289 skb_prev, maxfraglen,
1290 data + transhdrlen, fraggap, 0);
1291 skb_prev->csum = csum_sub(skb_prev->csum,
1292 skb->csum);
1293 data += fraggap;
e9fa4f7b 1294 pskb_trim_unique(skb_prev, maxfraglen);
1da177e4
LT
1295 }
1296 copy = datalen - transhdrlen - fraggap;
1297 if (copy < 0) {
1298 err = -EINVAL;
1299 kfree_skb(skb);
1300 goto error;
1301 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1302 err = -EFAULT;
1303 kfree_skb(skb);
1304 goto error;
1305 }
1306
1307 offset += copy;
1308 length -= datalen - fraggap;
1309 transhdrlen = 0;
1310 exthdrlen = 0;
1311 csummode = CHECKSUM_NONE;
1312
1313 /*
1314 * Put the packet on the pending queue
1315 */
1316 __skb_queue_tail(&sk->sk_write_queue, skb);
1317 continue;
1318 }
1319
1320 if (copy > length)
1321 copy = length;
1322
1323 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1324 unsigned int off;
1325
1326 off = skb->len;
1327 if (getfrag(from, skb_put(skb, copy),
1328 offset, copy, off, skb) < 0) {
1329 __skb_trim(skb, off);
1330 err = -EFAULT;
1331 goto error;
1332 }
1333 } else {
1334 int i = skb_shinfo(skb)->nr_frags;
1335 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1336 struct page *page = sk->sk_sndmsg_page;
1337 int off = sk->sk_sndmsg_off;
1338 unsigned int left;
1339
1340 if (page && (left = PAGE_SIZE - off) > 0) {
1341 if (copy >= left)
1342 copy = left;
1343 if (page != frag->page) {
1344 if (i == MAX_SKB_FRAGS) {
1345 err = -EMSGSIZE;
1346 goto error;
1347 }
1348 get_page(page);
1349 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1350 frag = &skb_shinfo(skb)->frags[i];
1351 }
1352 } else if(i < MAX_SKB_FRAGS) {
1353 if (copy > PAGE_SIZE)
1354 copy = PAGE_SIZE;
1355 page = alloc_pages(sk->sk_allocation, 0);
1356 if (page == NULL) {
1357 err = -ENOMEM;
1358 goto error;
1359 }
1360 sk->sk_sndmsg_page = page;
1361 sk->sk_sndmsg_off = 0;
1362
1363 skb_fill_page_desc(skb, i, page, 0, 0);
1364 frag = &skb_shinfo(skb)->frags[i];
1da177e4
LT
1365 } else {
1366 err = -EMSGSIZE;
1367 goto error;
1368 }
1369 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1370 err = -EFAULT;
1371 goto error;
1372 }
1373 sk->sk_sndmsg_off += copy;
1374 frag->size += copy;
1375 skb->len += copy;
1376 skb->data_len += copy;
f945fa7a
HX
1377 skb->truesize += copy;
1378 atomic_add(copy, &sk->sk_wmem_alloc);
1da177e4
LT
1379 }
1380 offset += copy;
1381 length -= copy;
1382 }
1383 return 0;
1384error:
1385 inet->cork.length -= length;
a11d206d 1386 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1387 return err;
1388}
1389
bf138862
PE
1390static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1391{
1392 inet->cork.flags &= ~IPCORK_OPT;
1393 kfree(np->cork.opt);
1394 np->cork.opt = NULL;
c8cdaf99
YH
1395 if (inet->cork.dst) {
1396 dst_release(inet->cork.dst);
1397 inet->cork.dst = NULL;
bf138862
PE
1398 inet->cork.flags &= ~IPCORK_ALLFRAG;
1399 }
1400 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1401}
1402
1da177e4
LT
1403int ip6_push_pending_frames(struct sock *sk)
1404{
1405 struct sk_buff *skb, *tmp_skb;
1406 struct sk_buff **tail_skb;
1407 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1408 struct inet_sock *inet = inet_sk(sk);
1409 struct ipv6_pinfo *np = inet6_sk(sk);
1410 struct ipv6hdr *hdr;
1411 struct ipv6_txoptions *opt = np->cork.opt;
c8cdaf99 1412 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1da177e4
LT
1413 struct flowi *fl = &inet->cork.fl;
1414 unsigned char proto = fl->proto;
1415 int err = 0;
1416
1417 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1418 goto out;
1419 tail_skb = &(skb_shinfo(skb)->frag_list);
1420
1421 /* move skb->data to ip header from ext header */
d56f90a7 1422 if (skb->data < skb_network_header(skb))
bbe735e4 1423 __skb_pull(skb, skb_network_offset(skb));
1da177e4 1424 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
cfe1fc77 1425 __skb_pull(tmp_skb, skb_network_header_len(skb));
1da177e4
LT
1426 *tail_skb = tmp_skb;
1427 tail_skb = &(tmp_skb->next);
1428 skb->len += tmp_skb->len;
1429 skb->data_len += tmp_skb->len;
1da177e4
LT
1430 skb->truesize += tmp_skb->truesize;
1431 __sock_put(tmp_skb->sk);
1432 tmp_skb->destructor = NULL;
1433 tmp_skb->sk = NULL;
1da177e4
LT
1434 }
1435
28a89453 1436 /* Allow local fragmentation. */
b5c15fc0 1437 if (np->pmtudisc < IPV6_PMTUDISC_DO)
28a89453
HX
1438 skb->local_df = 1;
1439
1da177e4 1440 ipv6_addr_copy(final_dst, &fl->fl6_dst);
cfe1fc77 1441 __skb_pull(skb, skb_network_header_len(skb));
1da177e4
LT
1442 if (opt && opt->opt_flen)
1443 ipv6_push_frag_opts(skb, opt, &proto);
1444 if (opt && opt->opt_nflen)
1445 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1446
e2d1bca7
ACM
1447 skb_push(skb, sizeof(struct ipv6hdr));
1448 skb_reset_network_header(skb);
0660e03f 1449 hdr = ipv6_hdr(skb);
1ab1457c 1450
90bcaf7b 1451 *(__be32*)hdr = fl->fl6_flowlabel |
41a1f8ea 1452 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1da177e4 1453
1da177e4
LT
1454 hdr->hop_limit = np->cork.hop_limit;
1455 hdr->nexthdr = proto;
1456 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1457 ipv6_addr_copy(&hdr->daddr, final_dst);
1458
a2c2064f 1459 skb->priority = sk->sk_priority;
4a19ec58 1460 skb->mark = sk->sk_mark;
a2c2064f 1461
1da177e4 1462 skb->dst = dst_clone(&rt->u.dst);
a11d206d 1463 IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
14878f75
DS
1464 if (proto == IPPROTO_ICMPV6) {
1465 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1466
1467 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1468 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1469 }
1470
ef76bc23 1471 err = ip6_local_out(skb);
1da177e4
LT
1472 if (err) {
1473 if (err > 0)
3320da89 1474 err = np->recverr ? net_xmit_errno(err) : 0;
1da177e4
LT
1475 if (err)
1476 goto error;
1477 }
1478
1479out:
bf138862 1480 ip6_cork_release(inet, np);
1da177e4
LT
1481 return err;
1482error:
1483 goto out;
1484}
1485
1486void ip6_flush_pending_frames(struct sock *sk)
1487{
1da177e4
LT
1488 struct sk_buff *skb;
1489
1490 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
e1f52208
YH
1491 if (skb->dst)
1492 IP6_INC_STATS(ip6_dst_idev(skb->dst),
1493 IPSTATS_MIB_OUTDISCARDS);
1da177e4
LT
1494 kfree_skb(skb);
1495 }
1496
bf138862 1497 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1da177e4 1498}