2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
42 #include <linux/bpf-cgroup.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
57 #include <net/checksum.h>
58 #include <linux/mroute6.h>
59 #include <net/l3mdev.h>
60 #include <net/lwtunnel.h>
62 static int ip6_finish_output2(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
64 struct dst_entry
*dst
= skb_dst(skb
);
65 struct net_device
*dev
= dst
->dev
;
66 struct neighbour
*neigh
;
67 struct in6_addr
*nexthop
;
70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb
)->daddr
)) {
71 struct inet6_dev
*idev
= ip6_dst_idev(skb_dst(skb
));
73 if (!(dev
->flags
& IFF_LOOPBACK
) && sk_mc_loop(sk
) &&
74 ((mroute6_socket(net
, skb
) &&
75 !(IP6CB(skb
)->flags
& IP6SKB_FORWARDED
)) ||
76 ipv6_chk_mcast_addr(dev
, &ipv6_hdr(skb
)->daddr
,
77 &ipv6_hdr(skb
)->saddr
))) {
78 struct sk_buff
*newskb
= skb_clone(skb
, GFP_ATOMIC
);
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
84 NF_HOOK(NFPROTO_IPV6
, NF_INET_POST_ROUTING
,
85 net
, sk
, newskb
, NULL
, newskb
->dev
,
88 if (ipv6_hdr(skb
)->hop_limit
== 0) {
89 IP6_INC_STATS(net
, idev
,
90 IPSTATS_MIB_OUTDISCARDS
);
96 IP6_UPD_PO_STATS(net
, idev
, IPSTATS_MIB_OUTMCAST
, skb
->len
);
98 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb
)->daddr
) <=
99 IPV6_ADDR_SCOPE_NODELOCAL
&&
100 !(dev
->flags
& IFF_LOOPBACK
)) {
106 if (lwtunnel_xmit_redirect(dst
->lwtstate
)) {
107 int res
= lwtunnel_xmit(skb
);
109 if (res
< 0 || res
== LWTUNNEL_XMIT_DONE
)
114 nexthop
= rt6_nexthop((struct rt6_info
*)dst
, &ipv6_hdr(skb
)->daddr
);
115 neigh
= __ipv6_neigh_lookup_noref(dst
->dev
, nexthop
);
116 if (unlikely(!neigh
))
117 neigh
= __neigh_create(&nd_tbl
, nexthop
, dst
->dev
, false);
118 if (!IS_ERR(neigh
)) {
119 sock_confirm_neigh(skb
, neigh
);
120 ret
= neigh_output(neigh
, skb
);
121 rcu_read_unlock_bh();
124 rcu_read_unlock_bh();
126 IP6_INC_STATS(net
, ip6_dst_idev(dst
), IPSTATS_MIB_OUTNOROUTES
);
131 static int ip6_finish_output(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
135 ret
= BPF_CGROUP_RUN_PROG_INET_EGRESS(sk
, skb
);
141 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
142 /* Policy lookup after SNAT yielded a new policy */
143 if (skb_dst(skb
)->xfrm
) {
144 IPCB(skb
)->flags
|= IPSKB_REROUTED
;
145 return dst_output(net
, sk
, skb
);
149 if ((skb
->len
> ip6_skb_dst_mtu(skb
) && !skb_is_gso(skb
)) ||
150 dst_allfrag(skb_dst(skb
)) ||
151 (IP6CB(skb
)->frag_max_size
&& skb
->len
> IP6CB(skb
)->frag_max_size
))
152 return ip6_fragment(net
, sk
, skb
, ip6_finish_output2
);
154 return ip6_finish_output2(net
, sk
, skb
);
157 int ip6_output(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
159 struct net_device
*dev
= skb_dst(skb
)->dev
;
160 struct inet6_dev
*idev
= ip6_dst_idev(skb_dst(skb
));
162 skb
->protocol
= htons(ETH_P_IPV6
);
165 if (unlikely(idev
->cnf
.disable_ipv6
)) {
166 IP6_INC_STATS(net
, idev
, IPSTATS_MIB_OUTDISCARDS
);
171 return NF_HOOK_COND(NFPROTO_IPV6
, NF_INET_POST_ROUTING
,
172 net
, sk
, skb
, NULL
, dev
,
174 !(IP6CB(skb
)->flags
& IP6SKB_REROUTED
));
177 bool ip6_autoflowlabel(struct net
*net
, const struct ipv6_pinfo
*np
)
179 if (!np
->autoflowlabel_set
)
180 return ip6_default_np_autolabel(net
);
182 return np
->autoflowlabel
;
186 * xmit an sk_buff (used by TCP, SCTP and DCCP)
187 * Note : socket lock is not held for SYNACK packets, but might be modified
188 * by calls to skb_set_owner_w() and ipv6_local_error(),
189 * which are using proper atomic operations or spinlocks.
191 int ip6_xmit(const struct sock
*sk
, struct sk_buff
*skb
, struct flowi6
*fl6
,
192 __u32 mark
, struct ipv6_txoptions
*opt
, int tclass
)
194 struct net
*net
= sock_net(sk
);
195 const struct ipv6_pinfo
*np
= inet6_sk(sk
);
196 struct in6_addr
*first_hop
= &fl6
->daddr
;
197 struct dst_entry
*dst
= skb_dst(skb
);
199 u8 proto
= fl6
->flowi6_proto
;
200 int seg_len
= skb
->len
;
205 unsigned int head_room
;
207 /* First: exthdrs may take lots of space (~8K for now)
208 MAX_HEADER is not enough.
210 head_room
= opt
->opt_nflen
+ opt
->opt_flen
;
211 seg_len
+= head_room
;
212 head_room
+= sizeof(struct ipv6hdr
) + LL_RESERVED_SPACE(dst
->dev
);
214 if (skb_headroom(skb
) < head_room
) {
215 struct sk_buff
*skb2
= skb_realloc_headroom(skb
, head_room
);
217 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
218 IPSTATS_MIB_OUTDISCARDS
);
224 /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
225 * it is safe to call in our context (socket lock not held)
227 skb_set_owner_w(skb
, (struct sock
*)sk
);
230 ipv6_push_frag_opts(skb
, opt
, &proto
);
232 ipv6_push_nfrag_opts(skb
, opt
, &proto
, &first_hop
,
236 skb_push(skb
, sizeof(struct ipv6hdr
));
237 skb_reset_network_header(skb
);
241 * Fill in the IPv6 header
244 hlimit
= np
->hop_limit
;
246 hlimit
= ip6_dst_hoplimit(dst
);
248 ip6_flow_hdr(hdr
, tclass
, ip6_make_flowlabel(net
, skb
, fl6
->flowlabel
,
249 ip6_autoflowlabel(net
, np
), fl6
));
251 hdr
->payload_len
= htons(seg_len
);
252 hdr
->nexthdr
= proto
;
253 hdr
->hop_limit
= hlimit
;
255 hdr
->saddr
= fl6
->saddr
;
256 hdr
->daddr
= *first_hop
;
258 skb
->protocol
= htons(ETH_P_IPV6
);
259 skb
->priority
= sk
->sk_priority
;
263 if ((skb
->len
<= mtu
) || skb
->ignore_df
|| skb_is_gso(skb
)) {
264 IP6_UPD_PO_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
265 IPSTATS_MIB_OUT
, skb
->len
);
267 /* if egress device is enslaved to an L3 master device pass the
268 * skb to its handler for processing
270 skb
= l3mdev_ip6_out((struct sock
*)sk
, skb
);
274 /* hooks should never assume socket lock is held.
275 * we promote our socket to non const
277 return NF_HOOK(NFPROTO_IPV6
, NF_INET_LOCAL_OUT
,
278 net
, (struct sock
*)sk
, skb
, NULL
, dst
->dev
,
283 /* ipv6_local_error() does not require socket lock,
284 * we promote our socket to non const
286 ipv6_local_error((struct sock
*)sk
, EMSGSIZE
, fl6
, mtu
);
288 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)), IPSTATS_MIB_FRAGFAILS
);
292 EXPORT_SYMBOL(ip6_xmit
);
294 static int ip6_call_ra_chain(struct sk_buff
*skb
, int sel
)
296 struct ip6_ra_chain
*ra
;
297 struct sock
*last
= NULL
;
299 read_lock(&ip6_ra_lock
);
300 for (ra
= ip6_ra_chain
; ra
; ra
= ra
->next
) {
301 struct sock
*sk
= ra
->sk
;
302 if (sk
&& ra
->sel
== sel
&&
303 (!sk
->sk_bound_dev_if
||
304 sk
->sk_bound_dev_if
== skb
->dev
->ifindex
)) {
306 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
308 rawv6_rcv(last
, skb2
);
315 rawv6_rcv(last
, skb
);
316 read_unlock(&ip6_ra_lock
);
319 read_unlock(&ip6_ra_lock
);
323 static int ip6_forward_proxy_check(struct sk_buff
*skb
)
325 struct ipv6hdr
*hdr
= ipv6_hdr(skb
);
326 u8 nexthdr
= hdr
->nexthdr
;
330 if (ipv6_ext_hdr(nexthdr
)) {
331 offset
= ipv6_skip_exthdr(skb
, sizeof(*hdr
), &nexthdr
, &frag_off
);
335 offset
= sizeof(struct ipv6hdr
);
337 if (nexthdr
== IPPROTO_ICMPV6
) {
338 struct icmp6hdr
*icmp6
;
340 if (!pskb_may_pull(skb
, (skb_network_header(skb
) +
341 offset
+ 1 - skb
->data
)))
344 icmp6
= (struct icmp6hdr
*)(skb_network_header(skb
) + offset
);
346 switch (icmp6
->icmp6_type
) {
347 case NDISC_ROUTER_SOLICITATION
:
348 case NDISC_ROUTER_ADVERTISEMENT
:
349 case NDISC_NEIGHBOUR_SOLICITATION
:
350 case NDISC_NEIGHBOUR_ADVERTISEMENT
:
352 /* For reaction involving unicast neighbor discovery
353 * message destined to the proxied address, pass it to
363 * The proxying router can't forward traffic sent to a link-local
364 * address, so signal the sender and discard the packet. This
365 * behavior is clarified by the MIPv6 specification.
367 if (ipv6_addr_type(&hdr
->daddr
) & IPV6_ADDR_LINKLOCAL
) {
368 dst_link_failure(skb
);
375 static inline int ip6_forward_finish(struct net
*net
, struct sock
*sk
,
378 struct dst_entry
*dst
= skb_dst(skb
);
380 __IP6_INC_STATS(net
, ip6_dst_idev(dst
), IPSTATS_MIB_OUTFORWDATAGRAMS
);
381 __IP6_ADD_STATS(net
, ip6_dst_idev(dst
), IPSTATS_MIB_OUTOCTETS
, skb
->len
);
383 return dst_output(net
, sk
, skb
);
386 static unsigned int ip6_dst_mtu_forward(const struct dst_entry
*dst
)
389 struct inet6_dev
*idev
;
391 if (dst_metric_locked(dst
, RTAX_MTU
)) {
392 mtu
= dst_metric_raw(dst
, RTAX_MTU
);
399 idev
= __in6_dev_get(dst
->dev
);
401 mtu
= idev
->cnf
.mtu6
;
407 static bool ip6_pkt_too_big(const struct sk_buff
*skb
, unsigned int mtu
)
412 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
413 if (IP6CB(skb
)->frag_max_size
&& IP6CB(skb
)->frag_max_size
> mtu
)
419 if (skb_is_gso(skb
) && skb_gso_validate_mtu(skb
, mtu
))
425 int ip6_forward(struct sk_buff
*skb
)
427 struct dst_entry
*dst
= skb_dst(skb
);
428 struct ipv6hdr
*hdr
= ipv6_hdr(skb
);
429 struct inet6_skb_parm
*opt
= IP6CB(skb
);
430 struct net
*net
= dev_net(dst
->dev
);
433 if (net
->ipv6
.devconf_all
->forwarding
== 0)
436 if (skb
->pkt_type
!= PACKET_HOST
)
439 if (unlikely(skb
->sk
))
442 if (skb_warn_if_lro(skb
))
445 if (!xfrm6_policy_check(NULL
, XFRM_POLICY_FWD
, skb
)) {
446 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
447 IPSTATS_MIB_INDISCARDS
);
451 skb_forward_csum(skb
);
454 * We DO NOT make any processing on
455 * RA packets, pushing them to user level AS IS
456 * without ane WARRANTY that application will be able
457 * to interpret them. The reason is that we
458 * cannot make anything clever here.
460 * We are not end-node, so that if packet contains
461 * AH/ESP, we cannot make anything.
462 * Defragmentation also would be mistake, RA packets
463 * cannot be fragmented, because there is no warranty
464 * that different fragments will go along one path. --ANK
466 if (unlikely(opt
->flags
& IP6SKB_ROUTERALERT
)) {
467 if (ip6_call_ra_chain(skb
, ntohs(opt
->ra
)))
472 * check and decrement ttl
474 if (hdr
->hop_limit
<= 1) {
475 /* Force OUTPUT device used as source address */
477 icmpv6_send(skb
, ICMPV6_TIME_EXCEED
, ICMPV6_EXC_HOPLIMIT
, 0);
478 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
479 IPSTATS_MIB_INHDRERRORS
);
485 /* XXX: idev->cnf.proxy_ndp? */
486 if (net
->ipv6
.devconf_all
->proxy_ndp
&&
487 pneigh_lookup(&nd_tbl
, net
, &hdr
->daddr
, skb
->dev
, 0)) {
488 int proxied
= ip6_forward_proxy_check(skb
);
490 return ip6_input(skb
);
491 else if (proxied
< 0) {
492 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
493 IPSTATS_MIB_INDISCARDS
);
498 if (!xfrm6_route_forward(skb
)) {
499 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
500 IPSTATS_MIB_INDISCARDS
);
505 /* IPv6 specs say nothing about it, but it is clear that we cannot
506 send redirects to source routed frames.
507 We don't send redirects to frames decapsulated from IPsec.
509 if (skb
->dev
== dst
->dev
&& opt
->srcrt
== 0 && !skb_sec_path(skb
)) {
510 struct in6_addr
*target
= NULL
;
511 struct inet_peer
*peer
;
515 * incoming and outgoing devices are the same
519 rt
= (struct rt6_info
*) dst
;
520 if (rt
->rt6i_flags
& RTF_GATEWAY
)
521 target
= &rt
->rt6i_gateway
;
523 target
= &hdr
->daddr
;
525 peer
= inet_getpeer_v6(net
->ipv6
.peers
, &hdr
->daddr
, 1);
527 /* Limit redirects both by destination (here)
528 and by source (inside ndisc_send_redirect)
530 if (inet_peer_xrlim_allow(peer
, 1*HZ
))
531 ndisc_send_redirect(skb
, target
);
535 int addrtype
= ipv6_addr_type(&hdr
->saddr
);
537 /* This check is security critical. */
538 if (addrtype
== IPV6_ADDR_ANY
||
539 addrtype
& (IPV6_ADDR_MULTICAST
| IPV6_ADDR_LOOPBACK
))
541 if (addrtype
& IPV6_ADDR_LINKLOCAL
) {
542 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
,
543 ICMPV6_NOT_NEIGHBOUR
, 0);
548 mtu
= ip6_dst_mtu_forward(dst
);
549 if (mtu
< IPV6_MIN_MTU
)
552 if (ip6_pkt_too_big(skb
, mtu
)) {
553 /* Again, force OUTPUT device used as source address */
555 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
);
556 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
557 IPSTATS_MIB_INTOOBIGERRORS
);
558 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
559 IPSTATS_MIB_FRAGFAILS
);
564 if (skb_cow(skb
, dst
->dev
->hard_header_len
)) {
565 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
566 IPSTATS_MIB_OUTDISCARDS
);
572 /* Mangling hops number delayed to point after skb COW */
576 return NF_HOOK(NFPROTO_IPV6
, NF_INET_FORWARD
,
577 net
, NULL
, skb
, skb
->dev
, dst
->dev
,
581 __IP6_INC_STATS(net
, ip6_dst_idev(dst
), IPSTATS_MIB_INADDRERRORS
);
587 static void ip6_copy_metadata(struct sk_buff
*to
, struct sk_buff
*from
)
589 to
->pkt_type
= from
->pkt_type
;
590 to
->priority
= from
->priority
;
591 to
->protocol
= from
->protocol
;
593 skb_dst_set(to
, dst_clone(skb_dst(from
)));
595 to
->mark
= from
->mark
;
597 #ifdef CONFIG_NET_SCHED
598 to
->tc_index
= from
->tc_index
;
601 skb_copy_secmark(to
, from
);
604 int ip6_fragment(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
,
605 int (*output
)(struct net
*, struct sock
*, struct sk_buff
*))
607 struct sk_buff
*frag
;
608 struct rt6_info
*rt
= (struct rt6_info
*)skb_dst(skb
);
609 struct ipv6_pinfo
*np
= skb
->sk
&& !dev_recursion_level() ?
610 inet6_sk(skb
->sk
) : NULL
;
611 struct ipv6hdr
*tmp_hdr
;
613 unsigned int mtu
, hlen
, left
, len
;
616 int ptr
, offset
= 0, err
= 0;
617 u8
*prevhdr
, nexthdr
= 0;
619 err
= ip6_find_1stfragopt(skb
, &prevhdr
);
625 mtu
= ip6_skb_dst_mtu(skb
);
627 /* We must not fragment if the socket is set to force MTU discovery
628 * or if the skb it not generated by a local socket.
630 if (unlikely(!skb
->ignore_df
&& skb
->len
> mtu
))
633 if (IP6CB(skb
)->frag_max_size
) {
634 if (IP6CB(skb
)->frag_max_size
> mtu
)
637 /* don't send fragments larger than what we received */
638 mtu
= IP6CB(skb
)->frag_max_size
;
639 if (mtu
< IPV6_MIN_MTU
)
643 if (np
&& np
->frag_size
< mtu
) {
647 if (mtu
< hlen
+ sizeof(struct frag_hdr
) + 8)
649 mtu
-= hlen
+ sizeof(struct frag_hdr
);
651 frag_id
= ipv6_select_ident(net
, &ipv6_hdr(skb
)->daddr
,
652 &ipv6_hdr(skb
)->saddr
);
654 if (skb
->ip_summed
== CHECKSUM_PARTIAL
&&
655 (err
= skb_checksum_help(skb
)))
658 hroom
= LL_RESERVED_SPACE(rt
->dst
.dev
);
659 if (skb_has_frag_list(skb
)) {
660 unsigned int first_len
= skb_pagelen(skb
);
661 struct sk_buff
*frag2
;
663 if (first_len
- hlen
> mtu
||
664 ((first_len
- hlen
) & 7) ||
666 skb_headroom(skb
) < (hroom
+ sizeof(struct frag_hdr
)))
669 skb_walk_frags(skb
, frag
) {
670 /* Correct geometry. */
671 if (frag
->len
> mtu
||
672 ((frag
->len
& 7) && frag
->next
) ||
673 skb_headroom(frag
) < (hlen
+ hroom
+ sizeof(struct frag_hdr
)))
674 goto slow_path_clean
;
676 /* Partially cloned skb? */
677 if (skb_shared(frag
))
678 goto slow_path_clean
;
683 frag
->destructor
= sock_wfree
;
685 skb
->truesize
-= frag
->truesize
;
692 *prevhdr
= NEXTHDR_FRAGMENT
;
693 tmp_hdr
= kmemdup(skb_network_header(skb
), hlen
, GFP_ATOMIC
);
698 frag
= skb_shinfo(skb
)->frag_list
;
699 skb_frag_list_init(skb
);
701 __skb_pull(skb
, hlen
);
702 fh
= __skb_push(skb
, sizeof(struct frag_hdr
));
703 __skb_push(skb
, hlen
);
704 skb_reset_network_header(skb
);
705 memcpy(skb_network_header(skb
), tmp_hdr
, hlen
);
707 fh
->nexthdr
= nexthdr
;
709 fh
->frag_off
= htons(IP6_MF
);
710 fh
->identification
= frag_id
;
712 first_len
= skb_pagelen(skb
);
713 skb
->data_len
= first_len
- skb_headlen(skb
);
714 skb
->len
= first_len
;
715 ipv6_hdr(skb
)->payload_len
= htons(first_len
-
716 sizeof(struct ipv6hdr
));
719 /* Prepare header of the next frame,
720 * before previous one went down. */
722 frag
->ip_summed
= CHECKSUM_NONE
;
723 skb_reset_transport_header(frag
);
724 fh
= __skb_push(frag
, sizeof(struct frag_hdr
));
725 __skb_push(frag
, hlen
);
726 skb_reset_network_header(frag
);
727 memcpy(skb_network_header(frag
), tmp_hdr
,
729 offset
+= skb
->len
- hlen
- sizeof(struct frag_hdr
);
730 fh
->nexthdr
= nexthdr
;
732 fh
->frag_off
= htons(offset
);
734 fh
->frag_off
|= htons(IP6_MF
);
735 fh
->identification
= frag_id
;
736 ipv6_hdr(frag
)->payload_len
=
738 sizeof(struct ipv6hdr
));
739 ip6_copy_metadata(frag
, skb
);
742 err
= output(net
, sk
, skb
);
744 IP6_INC_STATS(net
, ip6_dst_idev(&rt
->dst
),
745 IPSTATS_MIB_FRAGCREATES
);
758 IP6_INC_STATS(net
, ip6_dst_idev(&rt
->dst
),
759 IPSTATS_MIB_FRAGOKS
);
763 kfree_skb_list(frag
);
765 IP6_INC_STATS(net
, ip6_dst_idev(&rt
->dst
),
766 IPSTATS_MIB_FRAGFAILS
);
770 skb_walk_frags(skb
, frag2
) {
774 frag2
->destructor
= NULL
;
775 skb
->truesize
+= frag2
->truesize
;
780 left
= skb
->len
- hlen
; /* Space per frame */
781 ptr
= hlen
; /* Where to start from */
784 * Fragment the datagram.
787 troom
= rt
->dst
.dev
->needed_tailroom
;
790 * Keep copying data until we run out.
793 u8
*fragnexthdr_offset
;
796 /* IF: it doesn't fit, use 'mtu' - the data space left */
799 /* IF: we are not sending up to and including the packet end
800 then align the next start on an eight byte boundary */
805 /* Allocate buffer */
806 frag
= alloc_skb(len
+ hlen
+ sizeof(struct frag_hdr
) +
807 hroom
+ troom
, GFP_ATOMIC
);
814 * Set up data on packet
817 ip6_copy_metadata(frag
, skb
);
818 skb_reserve(frag
, hroom
);
819 skb_put(frag
, len
+ hlen
+ sizeof(struct frag_hdr
));
820 skb_reset_network_header(frag
);
821 fh
= (struct frag_hdr
*)(skb_network_header(frag
) + hlen
);
822 frag
->transport_header
= (frag
->network_header
+ hlen
+
823 sizeof(struct frag_hdr
));
826 * Charge the memory for the fragment to any owner
830 skb_set_owner_w(frag
, skb
->sk
);
833 * Copy the packet header into the new buffer.
835 skb_copy_from_linear_data(skb
, skb_network_header(frag
), hlen
);
837 fragnexthdr_offset
= skb_network_header(frag
);
838 fragnexthdr_offset
+= prevhdr
- skb_network_header(skb
);
839 *fragnexthdr_offset
= NEXTHDR_FRAGMENT
;
842 * Build fragment header.
844 fh
->nexthdr
= nexthdr
;
846 fh
->identification
= frag_id
;
849 * Copy a block of the IP datagram.
851 BUG_ON(skb_copy_bits(skb
, ptr
, skb_transport_header(frag
),
855 fh
->frag_off
= htons(offset
);
857 fh
->frag_off
|= htons(IP6_MF
);
858 ipv6_hdr(frag
)->payload_len
= htons(frag
->len
-
859 sizeof(struct ipv6hdr
));
865 * Put this fragment into the sending queue.
867 err
= output(net
, sk
, frag
);
871 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
872 IPSTATS_MIB_FRAGCREATES
);
874 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
875 IPSTATS_MIB_FRAGOKS
);
880 if (skb
->sk
&& dst_allfrag(skb_dst(skb
)))
881 sk_nocaps_add(skb
->sk
, NETIF_F_GSO_MASK
);
883 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
);
887 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
888 IPSTATS_MIB_FRAGFAILS
);
893 static inline int ip6_rt_check(const struct rt6key
*rt_key
,
894 const struct in6_addr
*fl_addr
,
895 const struct in6_addr
*addr_cache
)
897 return (rt_key
->plen
!= 128 || !ipv6_addr_equal(fl_addr
, &rt_key
->addr
)) &&
898 (!addr_cache
|| !ipv6_addr_equal(fl_addr
, addr_cache
));
901 static struct dst_entry
*ip6_sk_dst_check(struct sock
*sk
,
902 struct dst_entry
*dst
,
903 const struct flowi6
*fl6
)
905 struct ipv6_pinfo
*np
= inet6_sk(sk
);
911 if (dst
->ops
->family
!= AF_INET6
) {
916 rt
= (struct rt6_info
*)dst
;
917 /* Yes, checking route validity in not connected
918 * case is not very simple. Take into account,
919 * that we do not support routing by source, TOS,
920 * and MSG_DONTROUTE --ANK (980726)
922 * 1. ip6_rt_check(): If route was host route,
923 * check that cached destination is current.
924 * If it is network route, we still may
925 * check its validity using saved pointer
926 * to the last used address: daddr_cache.
927 * We do not want to save whole address now,
928 * (because main consumer of this service
929 * is tcp, which has not this problem),
930 * so that the last trick works only on connected
932 * 2. oif also should be the same.
934 if (ip6_rt_check(&rt
->rt6i_dst
, &fl6
->daddr
, np
->daddr_cache
) ||
935 #ifdef CONFIG_IPV6_SUBTREES
936 ip6_rt_check(&rt
->rt6i_src
, &fl6
->saddr
, np
->saddr_cache
) ||
938 (!(fl6
->flowi6_flags
& FLOWI_FLAG_SKIP_NH_OIF
) &&
939 (fl6
->flowi6_oif
&& fl6
->flowi6_oif
!= dst
->dev
->ifindex
))) {
948 static int ip6_dst_lookup_tail(struct net
*net
, const struct sock
*sk
,
949 struct dst_entry
**dst
, struct flowi6
*fl6
)
951 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
958 /* The correct way to handle this would be to do
959 * ip6_route_get_saddr, and then ip6_route_output; however,
960 * the route-specific preferred source forces the
961 * ip6_route_output call _before_ ip6_route_get_saddr.
963 * In source specific routing (no src=any default route),
964 * ip6_route_output will fail given src=any saddr, though, so
965 * that's why we try it again later.
967 if (ipv6_addr_any(&fl6
->saddr
) && (!*dst
|| !(*dst
)->error
)) {
969 bool had_dst
= *dst
!= NULL
;
972 *dst
= ip6_route_output(net
, sk
, fl6
);
973 rt
= (*dst
)->error
? NULL
: (struct rt6_info
*)*dst
;
974 err
= ip6_route_get_saddr(net
, rt
, &fl6
->daddr
,
975 sk
? inet6_sk(sk
)->srcprefs
: 0,
978 goto out_err_release
;
980 /* If we had an erroneous initial result, pretend it
981 * never existed and let the SA-enabled version take
984 if (!had_dst
&& (*dst
)->error
) {
990 flags
|= RT6_LOOKUP_F_IFACE
;
994 *dst
= ip6_route_output_flags(net
, sk
, fl6
, flags
);
998 goto out_err_release
;
1000 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1002 * Here if the dst entry we've looked up
1003 * has a neighbour entry that is in the INCOMPLETE
1004 * state and the src address from the flow is
1005 * marked as OPTIMISTIC, we release the found
1006 * dst entry and replace it instead with the
1007 * dst entry of the nexthop router
1009 rt
= (struct rt6_info
*) *dst
;
1011 n
= __ipv6_neigh_lookup_noref(rt
->dst
.dev
,
1012 rt6_nexthop(rt
, &fl6
->daddr
));
1013 err
= n
&& !(n
->nud_state
& NUD_VALID
) ? -EINVAL
: 0;
1014 rcu_read_unlock_bh();
1017 struct inet6_ifaddr
*ifp
;
1018 struct flowi6 fl_gw6
;
1021 ifp
= ipv6_get_ifaddr(net
, &fl6
->saddr
,
1024 redirect
= (ifp
&& ifp
->flags
& IFA_F_OPTIMISTIC
);
1030 * We need to get the dst entry for the
1031 * default router instead
1034 memcpy(&fl_gw6
, fl6
, sizeof(struct flowi6
));
1035 memset(&fl_gw6
.daddr
, 0, sizeof(struct in6_addr
));
1036 *dst
= ip6_route_output(net
, sk
, &fl_gw6
);
1037 err
= (*dst
)->error
;
1039 goto out_err_release
;
1043 if (ipv6_addr_v4mapped(&fl6
->saddr
) &&
1044 !(ipv6_addr_v4mapped(&fl6
->daddr
) || ipv6_addr_any(&fl6
->daddr
))) {
1045 err
= -EAFNOSUPPORT
;
1046 goto out_err_release
;
1055 if (err
== -ENETUNREACH
)
1056 IP6_INC_STATS(net
, NULL
, IPSTATS_MIB_OUTNOROUTES
);
1061 * ip6_dst_lookup - perform route lookup on flow
1062 * @sk: socket which provides route info
1063 * @dst: pointer to dst_entry * for result
1064 * @fl6: flow to lookup
1066 * This function performs a route lookup on the given flow.
1068 * It returns zero on success, or a standard errno code on error.
1070 int ip6_dst_lookup(struct net
*net
, struct sock
*sk
, struct dst_entry
**dst
,
1074 return ip6_dst_lookup_tail(net
, sk
, dst
, fl6
);
1076 EXPORT_SYMBOL_GPL(ip6_dst_lookup
);
1079 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1080 * @sk: socket which provides route info
1081 * @fl6: flow to lookup
1082 * @final_dst: final destination address for ipsec lookup
1084 * This function performs a route lookup on the given flow.
1086 * It returns a valid dst pointer on success, or a pointer encoded
1089 struct dst_entry
*ip6_dst_lookup_flow(const struct sock
*sk
, struct flowi6
*fl6
,
1090 const struct in6_addr
*final_dst
)
1092 struct dst_entry
*dst
= NULL
;
1095 err
= ip6_dst_lookup_tail(sock_net(sk
), sk
, &dst
, fl6
);
1097 return ERR_PTR(err
);
1099 fl6
->daddr
= *final_dst
;
1101 return xfrm_lookup_route(sock_net(sk
), dst
, flowi6_to_flowi(fl6
), sk
, 0);
1103 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow
);
1106 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1107 * @sk: socket which provides the dst cache and route info
1108 * @fl6: flow to lookup
1109 * @final_dst: final destination address for ipsec lookup
1111 * This function performs a route lookup on the given flow with the
1112 * possibility of using the cached route in the socket if it is valid.
1113 * It will take the socket dst lock when operating on the dst cache.
1114 * As a result, this function can only be used in process context.
1116 * It returns a valid dst pointer on success, or a pointer encoded
1119 struct dst_entry
*ip6_sk_dst_lookup_flow(struct sock
*sk
, struct flowi6
*fl6
,
1120 const struct in6_addr
*final_dst
)
1122 struct dst_entry
*dst
= sk_dst_check(sk
, inet6_sk(sk
)->dst_cookie
);
1124 dst
= ip6_sk_dst_check(sk
, dst
, fl6
);
1126 dst
= ip6_dst_lookup_flow(sk
, fl6
, final_dst
);
1130 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow
);
1132 static inline struct ipv6_opt_hdr
*ip6_opt_dup(struct ipv6_opt_hdr
*src
,
1135 return src
? kmemdup(src
, (src
->hdrlen
+ 1) * 8, gfp
) : NULL
;
1138 static inline struct ipv6_rt_hdr
*ip6_rthdr_dup(struct ipv6_rt_hdr
*src
,
1141 return src
? kmemdup(src
, (src
->hdrlen
+ 1) * 8, gfp
) : NULL
;
1144 static void ip6_append_data_mtu(unsigned int *mtu
,
1146 unsigned int fragheaderlen
,
1147 struct sk_buff
*skb
,
1148 struct rt6_info
*rt
,
1149 unsigned int orig_mtu
)
1151 if (!(rt
->dst
.flags
& DST_XFRM_TUNNEL
)) {
1153 /* first fragment, reserve header_len */
1154 *mtu
= orig_mtu
- rt
->dst
.header_len
;
1158 * this fragment is not first, the headers
1159 * space is regarded as data space.
1163 *maxfraglen
= ((*mtu
- fragheaderlen
) & ~7)
1164 + fragheaderlen
- sizeof(struct frag_hdr
);
1168 static int ip6_setup_cork(struct sock
*sk
, struct inet_cork_full
*cork
,
1169 struct inet6_cork
*v6_cork
, struct ipcm6_cookie
*ipc6
,
1170 struct rt6_info
*rt
, struct flowi6
*fl6
)
1172 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1174 struct ipv6_txoptions
*opt
= ipc6
->opt
;
1180 if (WARN_ON(v6_cork
->opt
))
1183 v6_cork
->opt
= kzalloc(sizeof(*opt
), sk
->sk_allocation
);
1184 if (unlikely(!v6_cork
->opt
))
1187 v6_cork
->opt
->tot_len
= sizeof(*opt
);
1188 v6_cork
->opt
->opt_flen
= opt
->opt_flen
;
1189 v6_cork
->opt
->opt_nflen
= opt
->opt_nflen
;
1191 v6_cork
->opt
->dst0opt
= ip6_opt_dup(opt
->dst0opt
,
1193 if (opt
->dst0opt
&& !v6_cork
->opt
->dst0opt
)
1196 v6_cork
->opt
->dst1opt
= ip6_opt_dup(opt
->dst1opt
,
1198 if (opt
->dst1opt
&& !v6_cork
->opt
->dst1opt
)
1201 v6_cork
->opt
->hopopt
= ip6_opt_dup(opt
->hopopt
,
1203 if (opt
->hopopt
&& !v6_cork
->opt
->hopopt
)
1206 v6_cork
->opt
->srcrt
= ip6_rthdr_dup(opt
->srcrt
,
1208 if (opt
->srcrt
&& !v6_cork
->opt
->srcrt
)
1211 /* need source address above miyazawa*/
1214 cork
->base
.dst
= &rt
->dst
;
1215 cork
->fl
.u
.ip6
= *fl6
;
1216 v6_cork
->hop_limit
= ipc6
->hlimit
;
1217 v6_cork
->tclass
= ipc6
->tclass
;
1218 if (rt
->dst
.flags
& DST_XFRM_TUNNEL
)
1219 mtu
= np
->pmtudisc
>= IPV6_PMTUDISC_PROBE
?
1220 READ_ONCE(rt
->dst
.dev
->mtu
) : dst_mtu(&rt
->dst
);
1222 mtu
= np
->pmtudisc
>= IPV6_PMTUDISC_PROBE
?
1223 READ_ONCE(rt
->dst
.dev
->mtu
) : dst_mtu(rt
->dst
.path
);
1224 if (np
->frag_size
< mtu
) {
1226 mtu
= np
->frag_size
;
1228 if (mtu
< IPV6_MIN_MTU
)
1230 cork
->base
.fragsize
= mtu
;
1231 if (dst_allfrag(rt
->dst
.path
))
1232 cork
->base
.flags
|= IPCORK_ALLFRAG
;
1233 cork
->base
.length
= 0;
1238 static int __ip6_append_data(struct sock
*sk
,
1240 struct sk_buff_head
*queue
,
1241 struct inet_cork
*cork
,
1242 struct inet6_cork
*v6_cork
,
1243 struct page_frag
*pfrag
,
1244 int getfrag(void *from
, char *to
, int offset
,
1245 int len
, int odd
, struct sk_buff
*skb
),
1246 void *from
, int length
, int transhdrlen
,
1247 unsigned int flags
, struct ipcm6_cookie
*ipc6
,
1248 const struct sockcm_cookie
*sockc
)
1250 struct sk_buff
*skb
, *skb_prev
= NULL
;
1251 unsigned int maxfraglen
, fragheaderlen
, mtu
, orig_mtu
, pmtu
;
1253 int dst_exthdrlen
= 0;
1260 struct rt6_info
*rt
= (struct rt6_info
*)cork
->dst
;
1261 struct ipv6_txoptions
*opt
= v6_cork
->opt
;
1262 int csummode
= CHECKSUM_NONE
;
1263 unsigned int maxnonfragsize
, headersize
;
1265 skb
= skb_peek_tail(queue
);
1267 exthdrlen
= opt
? opt
->opt_flen
: 0;
1268 dst_exthdrlen
= rt
->dst
.header_len
- rt
->rt6i_nfheader_len
;
1271 mtu
= cork
->fragsize
;
1274 hh_len
= LL_RESERVED_SPACE(rt
->dst
.dev
);
1276 fragheaderlen
= sizeof(struct ipv6hdr
) + rt
->rt6i_nfheader_len
+
1277 (opt
? opt
->opt_nflen
: 0);
1278 maxfraglen
= ((mtu
- fragheaderlen
) & ~7) + fragheaderlen
-
1279 sizeof(struct frag_hdr
);
1281 headersize
= sizeof(struct ipv6hdr
) +
1282 (opt
? opt
->opt_flen
+ opt
->opt_nflen
: 0) +
1283 (dst_allfrag(&rt
->dst
) ?
1284 sizeof(struct frag_hdr
) : 0) +
1285 rt
->rt6i_nfheader_len
;
1287 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1288 * the first fragment
1290 if (headersize
+ transhdrlen
> mtu
)
1293 if (cork
->length
+ length
> mtu
- headersize
&& ipc6
->dontfrag
&&
1294 (sk
->sk_protocol
== IPPROTO_UDP
||
1295 sk
->sk_protocol
== IPPROTO_RAW
)) {
1296 ipv6_local_rxpmtu(sk
, fl6
, mtu
- headersize
+
1297 sizeof(struct ipv6hdr
));
1301 if (ip6_sk_ignore_df(sk
))
1302 maxnonfragsize
= sizeof(struct ipv6hdr
) + IPV6_MAXPLEN
;
1304 maxnonfragsize
= mtu
;
1306 if (cork
->length
+ length
> maxnonfragsize
- headersize
) {
1308 pmtu
= max_t(int, mtu
- headersize
+ sizeof(struct ipv6hdr
), 0);
1309 ipv6_local_error(sk
, EMSGSIZE
, fl6
, pmtu
);
1313 /* CHECKSUM_PARTIAL only with no extension headers and when
1314 * we are not going to fragment
1316 if (transhdrlen
&& sk
->sk_protocol
== IPPROTO_UDP
&&
1317 headersize
== sizeof(struct ipv6hdr
) &&
1318 length
<= mtu
- headersize
&&
1319 !(flags
& MSG_MORE
) &&
1320 rt
->dst
.dev
->features
& (NETIF_F_IPV6_CSUM
| NETIF_F_HW_CSUM
))
1321 csummode
= CHECKSUM_PARTIAL
;
1323 if (sk
->sk_type
== SOCK_DGRAM
|| sk
->sk_type
== SOCK_RAW
) {
1324 sock_tx_timestamp(sk
, sockc
->tsflags
, &tx_flags
);
1325 if (tx_flags
& SKBTX_ANY_SW_TSTAMP
&&
1326 sk
->sk_tsflags
& SOF_TIMESTAMPING_OPT_ID
)
1327 tskey
= sk
->sk_tskey
++;
1331 * Let's try using as much space as possible.
1332 * Use MTU if total length of the message fits into the MTU.
1333 * Otherwise, we need to reserve fragment header and
1334 * fragment alignment (= 8-15 octects, in total).
1336 * Note that we may need to "move" the data from the tail of
1337 * of the buffer to the new fragment when we split
1340 * FIXME: It may be fragmented into multiple chunks
1341 * at once if non-fragmentable extension headers
1346 cork
->length
+= length
;
1350 while (length
> 0) {
1351 /* Check if the remaining data fits into current packet. */
1352 copy
= (cork
->length
<= mtu
&& !(cork
->flags
& IPCORK_ALLFRAG
) ? mtu
: maxfraglen
) - skb
->len
;
1354 copy
= maxfraglen
- skb
->len
;
1358 unsigned int datalen
;
1359 unsigned int fraglen
;
1360 unsigned int fraggap
;
1361 unsigned int alloclen
;
1363 /* There's no room in the current skb */
1365 fraggap
= skb
->len
- maxfraglen
;
1368 /* update mtu and maxfraglen if necessary */
1369 if (!skb
|| !skb_prev
)
1370 ip6_append_data_mtu(&mtu
, &maxfraglen
,
1371 fragheaderlen
, skb
, rt
,
1377 * If remaining data exceeds the mtu,
1378 * we know we need more fragment(s).
1380 datalen
= length
+ fraggap
;
1382 if (datalen
> (cork
->length
<= mtu
&& !(cork
->flags
& IPCORK_ALLFRAG
) ? mtu
: maxfraglen
) - fragheaderlen
)
1383 datalen
= maxfraglen
- fragheaderlen
- rt
->dst
.trailer_len
;
1384 if ((flags
& MSG_MORE
) &&
1385 !(rt
->dst
.dev
->features
&NETIF_F_SG
))
1388 alloclen
= datalen
+ fragheaderlen
;
1390 alloclen
+= dst_exthdrlen
;
1392 if (datalen
!= length
+ fraggap
) {
1394 * this is not the last fragment, the trailer
1395 * space is regarded as data space.
1397 datalen
+= rt
->dst
.trailer_len
;
1400 alloclen
+= rt
->dst
.trailer_len
;
1401 fraglen
= datalen
+ fragheaderlen
;
1404 * We just reserve space for fragment header.
1405 * Note: this may be overallocation if the message
1406 * (without MSG_MORE) fits into the MTU.
1408 alloclen
+= sizeof(struct frag_hdr
);
1410 copy
= datalen
- transhdrlen
- fraggap
;
1416 skb
= sock_alloc_send_skb(sk
,
1418 (flags
& MSG_DONTWAIT
), &err
);
1421 if (refcount_read(&sk
->sk_wmem_alloc
) <=
1423 skb
= sock_wmalloc(sk
,
1424 alloclen
+ hh_len
, 1,
1432 * Fill in the control structures
1434 skb
->protocol
= htons(ETH_P_IPV6
);
1435 skb
->ip_summed
= csummode
;
1437 /* reserve for fragmentation and ipsec header */
1438 skb_reserve(skb
, hh_len
+ sizeof(struct frag_hdr
) +
1441 /* Only the initial fragment is time stamped */
1442 skb_shinfo(skb
)->tx_flags
= tx_flags
;
1444 skb_shinfo(skb
)->tskey
= tskey
;
1448 * Find where to start putting bytes
1450 data
= skb_put(skb
, fraglen
);
1451 skb_set_network_header(skb
, exthdrlen
);
1452 data
+= fragheaderlen
;
1453 skb
->transport_header
= (skb
->network_header
+
1456 skb
->csum
= skb_copy_and_csum_bits(
1457 skb_prev
, maxfraglen
,
1458 data
+ transhdrlen
, fraggap
, 0);
1459 skb_prev
->csum
= csum_sub(skb_prev
->csum
,
1462 pskb_trim_unique(skb_prev
, maxfraglen
);
1465 getfrag(from
, data
+ transhdrlen
, offset
,
1466 copy
, fraggap
, skb
) < 0) {
1473 length
-= datalen
- fraggap
;
1478 if ((flags
& MSG_CONFIRM
) && !skb_prev
)
1479 skb_set_dst_pending_confirm(skb
, 1);
1482 * Put the packet on the pending queue
1484 __skb_queue_tail(queue
, skb
);
1491 if (!(rt
->dst
.dev
->features
&NETIF_F_SG
)) {
1495 if (getfrag(from
, skb_put(skb
, copy
),
1496 offset
, copy
, off
, skb
) < 0) {
1497 __skb_trim(skb
, off
);
1502 int i
= skb_shinfo(skb
)->nr_frags
;
1505 if (!sk_page_frag_refill(sk
, pfrag
))
1508 if (!skb_can_coalesce(skb
, i
, pfrag
->page
,
1511 if (i
== MAX_SKB_FRAGS
)
1514 __skb_fill_page_desc(skb
, i
, pfrag
->page
,
1516 skb_shinfo(skb
)->nr_frags
= ++i
;
1517 get_page(pfrag
->page
);
1519 copy
= min_t(int, copy
, pfrag
->size
- pfrag
->offset
);
1521 page_address(pfrag
->page
) + pfrag
->offset
,
1522 offset
, copy
, skb
->len
, skb
) < 0)
1525 pfrag
->offset
+= copy
;
1526 skb_frag_size_add(&skb_shinfo(skb
)->frags
[i
- 1], copy
);
1528 skb
->data_len
+= copy
;
1529 skb
->truesize
+= copy
;
1530 refcount_add(copy
, &sk
->sk_wmem_alloc
);
1541 cork
->length
-= length
;
1542 IP6_INC_STATS(sock_net(sk
), rt
->rt6i_idev
, IPSTATS_MIB_OUTDISCARDS
);
1546 int ip6_append_data(struct sock
*sk
,
1547 int getfrag(void *from
, char *to
, int offset
, int len
,
1548 int odd
, struct sk_buff
*skb
),
1549 void *from
, int length
, int transhdrlen
,
1550 struct ipcm6_cookie
*ipc6
, struct flowi6
*fl6
,
1551 struct rt6_info
*rt
, unsigned int flags
,
1552 const struct sockcm_cookie
*sockc
)
1554 struct inet_sock
*inet
= inet_sk(sk
);
1555 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1559 if (flags
&MSG_PROBE
)
1561 if (skb_queue_empty(&sk
->sk_write_queue
)) {
1565 err
= ip6_setup_cork(sk
, &inet
->cork
, &np
->cork
,
1570 exthdrlen
= (ipc6
->opt
? ipc6
->opt
->opt_flen
: 0);
1571 length
+= exthdrlen
;
1572 transhdrlen
+= exthdrlen
;
1574 fl6
= &inet
->cork
.fl
.u
.ip6
;
1578 return __ip6_append_data(sk
, fl6
, &sk
->sk_write_queue
, &inet
->cork
.base
,
1579 &np
->cork
, sk_page_frag(sk
), getfrag
,
1580 from
, length
, transhdrlen
, flags
, ipc6
, sockc
);
1582 EXPORT_SYMBOL_GPL(ip6_append_data
);
1584 static void ip6_cork_release(struct inet_cork_full
*cork
,
1585 struct inet6_cork
*v6_cork
)
1588 kfree(v6_cork
->opt
->dst0opt
);
1589 kfree(v6_cork
->opt
->dst1opt
);
1590 kfree(v6_cork
->opt
->hopopt
);
1591 kfree(v6_cork
->opt
->srcrt
);
1592 kfree(v6_cork
->opt
);
1593 v6_cork
->opt
= NULL
;
1596 if (cork
->base
.dst
) {
1597 dst_release(cork
->base
.dst
);
1598 cork
->base
.dst
= NULL
;
1599 cork
->base
.flags
&= ~IPCORK_ALLFRAG
;
1601 memset(&cork
->fl
, 0, sizeof(cork
->fl
));
1604 struct sk_buff
*__ip6_make_skb(struct sock
*sk
,
1605 struct sk_buff_head
*queue
,
1606 struct inet_cork_full
*cork
,
1607 struct inet6_cork
*v6_cork
)
1609 struct sk_buff
*skb
, *tmp_skb
;
1610 struct sk_buff
**tail_skb
;
1611 struct in6_addr final_dst_buf
, *final_dst
= &final_dst_buf
;
1612 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1613 struct net
*net
= sock_net(sk
);
1614 struct ipv6hdr
*hdr
;
1615 struct ipv6_txoptions
*opt
= v6_cork
->opt
;
1616 struct rt6_info
*rt
= (struct rt6_info
*)cork
->base
.dst
;
1617 struct flowi6
*fl6
= &cork
->fl
.u
.ip6
;
1618 unsigned char proto
= fl6
->flowi6_proto
;
1620 skb
= __skb_dequeue(queue
);
1623 tail_skb
= &(skb_shinfo(skb
)->frag_list
);
1625 /* move skb->data to ip header from ext header */
1626 if (skb
->data
< skb_network_header(skb
))
1627 __skb_pull(skb
, skb_network_offset(skb
));
1628 while ((tmp_skb
= __skb_dequeue(queue
)) != NULL
) {
1629 __skb_pull(tmp_skb
, skb_network_header_len(skb
));
1630 *tail_skb
= tmp_skb
;
1631 tail_skb
= &(tmp_skb
->next
);
1632 skb
->len
+= tmp_skb
->len
;
1633 skb
->data_len
+= tmp_skb
->len
;
1634 skb
->truesize
+= tmp_skb
->truesize
;
1635 tmp_skb
->destructor
= NULL
;
1639 /* Allow local fragmentation. */
1640 skb
->ignore_df
= ip6_sk_ignore_df(sk
);
1642 *final_dst
= fl6
->daddr
;
1643 __skb_pull(skb
, skb_network_header_len(skb
));
1644 if (opt
&& opt
->opt_flen
)
1645 ipv6_push_frag_opts(skb
, opt
, &proto
);
1646 if (opt
&& opt
->opt_nflen
)
1647 ipv6_push_nfrag_opts(skb
, opt
, &proto
, &final_dst
, &fl6
->saddr
);
1649 skb_push(skb
, sizeof(struct ipv6hdr
));
1650 skb_reset_network_header(skb
);
1651 hdr
= ipv6_hdr(skb
);
1653 ip6_flow_hdr(hdr
, v6_cork
->tclass
,
1654 ip6_make_flowlabel(net
, skb
, fl6
->flowlabel
,
1655 ip6_autoflowlabel(net
, np
), fl6
));
1656 hdr
->hop_limit
= v6_cork
->hop_limit
;
1657 hdr
->nexthdr
= proto
;
1658 hdr
->saddr
= fl6
->saddr
;
1659 hdr
->daddr
= *final_dst
;
1661 skb
->priority
= sk
->sk_priority
;
1662 skb
->mark
= sk
->sk_mark
;
1664 skb_dst_set(skb
, dst_clone(&rt
->dst
));
1665 IP6_UPD_PO_STATS(net
, rt
->rt6i_idev
, IPSTATS_MIB_OUT
, skb
->len
);
1666 if (proto
== IPPROTO_ICMPV6
) {
1667 struct inet6_dev
*idev
= ip6_dst_idev(skb_dst(skb
));
1669 ICMP6MSGOUT_INC_STATS(net
, idev
, icmp6_hdr(skb
)->icmp6_type
);
1670 ICMP6_INC_STATS(net
, idev
, ICMP6_MIB_OUTMSGS
);
1673 ip6_cork_release(cork
, v6_cork
);
1678 int ip6_send_skb(struct sk_buff
*skb
)
1680 struct net
*net
= sock_net(skb
->sk
);
1681 struct rt6_info
*rt
= (struct rt6_info
*)skb_dst(skb
);
1684 err
= ip6_local_out(net
, skb
->sk
, skb
);
1687 err
= net_xmit_errno(err
);
1689 IP6_INC_STATS(net
, rt
->rt6i_idev
,
1690 IPSTATS_MIB_OUTDISCARDS
);
1696 int ip6_push_pending_frames(struct sock
*sk
)
1698 struct sk_buff
*skb
;
1700 skb
= ip6_finish_skb(sk
);
1704 return ip6_send_skb(skb
);
1706 EXPORT_SYMBOL_GPL(ip6_push_pending_frames
);
1708 static void __ip6_flush_pending_frames(struct sock
*sk
,
1709 struct sk_buff_head
*queue
,
1710 struct inet_cork_full
*cork
,
1711 struct inet6_cork
*v6_cork
)
1713 struct sk_buff
*skb
;
1715 while ((skb
= __skb_dequeue_tail(queue
)) != NULL
) {
1717 IP6_INC_STATS(sock_net(sk
), ip6_dst_idev(skb_dst(skb
)),
1718 IPSTATS_MIB_OUTDISCARDS
);
1722 ip6_cork_release(cork
, v6_cork
);
1725 void ip6_flush_pending_frames(struct sock
*sk
)
1727 __ip6_flush_pending_frames(sk
, &sk
->sk_write_queue
,
1728 &inet_sk(sk
)->cork
, &inet6_sk(sk
)->cork
);
1730 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames
);
1732 struct sk_buff
*ip6_make_skb(struct sock
*sk
,
1733 int getfrag(void *from
, char *to
, int offset
,
1734 int len
, int odd
, struct sk_buff
*skb
),
1735 void *from
, int length
, int transhdrlen
,
1736 struct ipcm6_cookie
*ipc6
, struct flowi6
*fl6
,
1737 struct rt6_info
*rt
, unsigned int flags
,
1738 const struct sockcm_cookie
*sockc
)
1740 struct inet_cork_full cork
;
1741 struct inet6_cork v6_cork
;
1742 struct sk_buff_head queue
;
1743 int exthdrlen
= (ipc6
->opt
? ipc6
->opt
->opt_flen
: 0);
1746 if (flags
& MSG_PROBE
)
1749 __skb_queue_head_init(&queue
);
1751 cork
.base
.flags
= 0;
1753 cork
.base
.opt
= NULL
;
1754 cork
.base
.dst
= NULL
;
1756 err
= ip6_setup_cork(sk
, &cork
, &v6_cork
, ipc6
, rt
, fl6
);
1758 ip6_cork_release(&cork
, &v6_cork
);
1759 return ERR_PTR(err
);
1761 if (ipc6
->dontfrag
< 0)
1762 ipc6
->dontfrag
= inet6_sk(sk
)->dontfrag
;
1764 err
= __ip6_append_data(sk
, fl6
, &queue
, &cork
.base
, &v6_cork
,
1765 ¤t
->task_frag
, getfrag
, from
,
1766 length
+ exthdrlen
, transhdrlen
+ exthdrlen
,
1767 flags
, ipc6
, sockc
);
1769 __ip6_flush_pending_frames(sk
, &queue
, &cork
, &v6_cork
);
1770 return ERR_PTR(err
);
1773 return __ip6_make_skb(sk
, &queue
, &cork
, &v6_cork
);