2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <linux/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/if_vlan.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
50 #include <net/dst_metadata.h>
51 #include <net/erspan.h>
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is a good
66 solution, but it supposes maintaining new variable in ALL
67 skb, even if no tunneling is used.
69 Current solution: xmit_recursion breaks dead loops. This is a percpu
70 counter, since when we enter the first ndo_xmit(), cpu migration is
71 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems:
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end.
87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution.
90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially,
92 taking into account fragmentation. TO be short, ttl is not solution at all.
94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu
99 rapidly degrades to value <68, where looping stops.
100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-)
110 static bool log_ecn_error
= true;
111 module_param(log_ecn_error
, bool, 0644);
112 MODULE_PARM_DESC(log_ecn_error
, "Log packets received with corrupted ECN");
114 static struct rtnl_link_ops ipgre_link_ops __read_mostly
;
115 static int ipgre_tunnel_init(struct net_device
*dev
);
116 static void erspan_build_header(struct sk_buff
*skb
,
117 __be32 id
, u32 index
, bool truncate
);
119 static unsigned int ipgre_net_id __read_mostly
;
120 static unsigned int gre_tap_net_id __read_mostly
;
121 static unsigned int erspan_net_id __read_mostly
;
123 static void ipgre_err(struct sk_buff
*skb
, u32 info
,
124 const struct tnl_ptk_info
*tpi
)
127 /* All the routers (except for Linux) return only
128 8 bytes of packet payload. It means, that precise relaying of
129 ICMP in the real Internet is absolutely infeasible.
131 Moreover, Cisco "wise men" put GRE key to the third word
132 in GRE header. It makes impossible maintaining even soft
133 state for keyed GRE tunnels with enabled checksum. Tell
136 Well, I wonder, rfc1812 was written by Cisco employee,
137 what the hell these idiots break standards established
140 struct net
*net
= dev_net(skb
->dev
);
141 struct ip_tunnel_net
*itn
;
142 const struct iphdr
*iph
;
143 const int type
= icmp_hdr(skb
)->type
;
144 const int code
= icmp_hdr(skb
)->code
;
145 unsigned int data_len
= 0;
150 case ICMP_PARAMETERPROB
:
153 case ICMP_DEST_UNREACH
:
156 case ICMP_PORT_UNREACH
:
157 /* Impossible event. */
160 /* All others are translated to HOST_UNREACH.
161 rfc2003 contains "deep thoughts" about NET_UNREACH,
162 I believe they are just ether pollution. --ANK
168 case ICMP_TIME_EXCEEDED
:
169 if (code
!= ICMP_EXC_TTL
)
171 data_len
= icmp_hdr(skb
)->un
.reserved
[1] * 4; /* RFC 4884 4.1 */
178 if (tpi
->proto
== htons(ETH_P_TEB
))
179 itn
= net_generic(net
, gre_tap_net_id
);
181 itn
= net_generic(net
, ipgre_net_id
);
183 iph
= (const struct iphdr
*)(icmp_hdr(skb
) + 1);
184 t
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
, tpi
->flags
,
185 iph
->daddr
, iph
->saddr
, tpi
->key
);
190 #if IS_ENABLED(CONFIG_IPV6)
191 if (tpi
->proto
== htons(ETH_P_IPV6
) &&
192 !ip6_err_gen_icmpv6_unreach(skb
, iph
->ihl
* 4 + tpi
->hdr_len
,
197 if (t
->parms
.iph
.daddr
== 0 ||
198 ipv4_is_multicast(t
->parms
.iph
.daddr
))
201 if (t
->parms
.iph
.ttl
== 0 && type
== ICMP_TIME_EXCEEDED
)
204 if (time_before(jiffies
, t
->err_time
+ IPTUNNEL_ERR_TIMEO
))
208 t
->err_time
= jiffies
;
211 static void gre_err(struct sk_buff
*skb
, u32 info
)
213 /* All the routers (except for Linux) return only
214 * 8 bytes of packet payload. It means, that precise relaying of
215 * ICMP in the real Internet is absolutely infeasible.
217 * Moreover, Cisco "wise men" put GRE key to the third word
218 * in GRE header. It makes impossible maintaining even soft
220 * GRE tunnels with enabled checksum. Tell them "thank you".
222 * Well, I wonder, rfc1812 was written by Cisco employee,
223 * what the hell these idiots break standards established
227 const struct iphdr
*iph
= (struct iphdr
*)skb
->data
;
228 const int type
= icmp_hdr(skb
)->type
;
229 const int code
= icmp_hdr(skb
)->code
;
230 struct tnl_ptk_info tpi
;
231 bool csum_err
= false;
233 if (gre_parse_header(skb
, &tpi
, &csum_err
, htons(ETH_P_IP
),
235 if (!csum_err
) /* ignore csum errors. */
239 if (type
== ICMP_DEST_UNREACH
&& code
== ICMP_FRAG_NEEDED
) {
240 ipv4_update_pmtu(skb
, dev_net(skb
->dev
), info
,
241 skb
->dev
->ifindex
, 0, IPPROTO_GRE
, 0);
244 if (type
== ICMP_REDIRECT
) {
245 ipv4_redirect(skb
, dev_net(skb
->dev
), skb
->dev
->ifindex
, 0,
250 ipgre_err(skb
, info
, &tpi
);
253 static int erspan_rcv(struct sk_buff
*skb
, struct tnl_ptk_info
*tpi
,
256 struct net
*net
= dev_net(skb
->dev
);
257 struct metadata_dst
*tun_dst
= NULL
;
258 struct ip_tunnel_net
*itn
;
259 struct ip_tunnel
*tunnel
;
260 struct erspanhdr
*ershdr
;
261 const struct iphdr
*iph
;
265 itn
= net_generic(net
, erspan_net_id
);
266 len
= gre_hdr_len
+ sizeof(*ershdr
);
268 if (unlikely(!pskb_may_pull(skb
, len
)))
272 ershdr
= (struct erspanhdr
*)(skb
->data
+ gre_hdr_len
);
274 /* The original GRE header does not have key field,
275 * Use ERSPAN 10-bit session ID as key.
277 tpi
->key
= cpu_to_be32(ntohs(ershdr
->session_id
) & ID_MASK
);
278 index
= ershdr
->md
.index
;
279 tunnel
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
,
280 tpi
->flags
| TUNNEL_KEY
,
281 iph
->saddr
, iph
->daddr
, tpi
->key
);
284 if (__iptunnel_pull_header(skb
,
285 gre_hdr_len
+ sizeof(*ershdr
),
290 if (tunnel
->collect_md
) {
291 struct ip_tunnel_info
*info
;
292 struct erspan_metadata
*md
;
296 tpi
->flags
|= TUNNEL_KEY
;
298 tun_id
= key32_to_tunnel_id(tpi
->key
);
300 tun_dst
= ip_tun_rx_dst(skb
, flags
,
301 tun_id
, sizeof(*md
));
303 return PACKET_REJECT
;
305 md
= ip_tunnel_info_opts(&tun_dst
->u
.tun_info
);
307 dst_release((struct dst_entry
*)tun_dst
);
308 return PACKET_REJECT
;
312 info
= &tun_dst
->u
.tun_info
;
313 info
->key
.tun_flags
|= TUNNEL_ERSPAN_OPT
;
314 info
->options_len
= sizeof(*md
);
316 tunnel
->index
= ntohl(index
);
319 skb_reset_mac_header(skb
);
320 ip_tunnel_rcv(tunnel
, skb
, tpi
, tun_dst
, log_ecn_error
);
328 static int __ipgre_rcv(struct sk_buff
*skb
, const struct tnl_ptk_info
*tpi
,
329 struct ip_tunnel_net
*itn
, int hdr_len
, bool raw_proto
)
331 struct metadata_dst
*tun_dst
= NULL
;
332 const struct iphdr
*iph
;
333 struct ip_tunnel
*tunnel
;
336 tunnel
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
, tpi
->flags
,
337 iph
->saddr
, iph
->daddr
, tpi
->key
);
340 if (__iptunnel_pull_header(skb
, hdr_len
, tpi
->proto
,
341 raw_proto
, false) < 0)
344 if (tunnel
->dev
->type
!= ARPHRD_NONE
)
345 skb_pop_mac_header(skb
);
347 skb_reset_mac_header(skb
);
348 if (tunnel
->collect_md
) {
352 flags
= tpi
->flags
& (TUNNEL_CSUM
| TUNNEL_KEY
);
353 tun_id
= key32_to_tunnel_id(tpi
->key
);
354 tun_dst
= ip_tun_rx_dst(skb
, flags
, tun_id
, 0);
356 return PACKET_REJECT
;
359 ip_tunnel_rcv(tunnel
, skb
, tpi
, tun_dst
, log_ecn_error
);
369 static int ipgre_rcv(struct sk_buff
*skb
, const struct tnl_ptk_info
*tpi
,
372 struct net
*net
= dev_net(skb
->dev
);
373 struct ip_tunnel_net
*itn
;
376 if (tpi
->proto
== htons(ETH_P_TEB
))
377 itn
= net_generic(net
, gre_tap_net_id
);
379 itn
= net_generic(net
, ipgre_net_id
);
381 res
= __ipgre_rcv(skb
, tpi
, itn
, hdr_len
, false);
382 if (res
== PACKET_NEXT
&& tpi
->proto
== htons(ETH_P_TEB
)) {
383 /* ipgre tunnels in collect metadata mode should receive
384 * also ETH_P_TEB traffic.
386 itn
= net_generic(net
, ipgre_net_id
);
387 res
= __ipgre_rcv(skb
, tpi
, itn
, hdr_len
, true);
392 static int gre_rcv(struct sk_buff
*skb
)
394 struct tnl_ptk_info tpi
;
395 bool csum_err
= false;
398 #ifdef CONFIG_NET_IPGRE_BROADCAST
399 if (ipv4_is_multicast(ip_hdr(skb
)->daddr
)) {
400 /* Looped back packet, drop it! */
401 if (rt_is_output_route(skb_rtable(skb
)))
406 hdr_len
= gre_parse_header(skb
, &tpi
, &csum_err
, htons(ETH_P_IP
), 0);
410 if (unlikely(tpi
.proto
== htons(ETH_P_ERSPAN
))) {
411 if (erspan_rcv(skb
, &tpi
, hdr_len
) == PACKET_RCVD
)
416 if (ipgre_rcv(skb
, &tpi
, hdr_len
) == PACKET_RCVD
)
420 icmp_send(skb
, ICMP_DEST_UNREACH
, ICMP_PORT_UNREACH
, 0);
426 static void __gre_xmit(struct sk_buff
*skb
, struct net_device
*dev
,
427 const struct iphdr
*tnl_params
,
430 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
432 if (tunnel
->parms
.o_flags
& TUNNEL_SEQ
)
435 /* Push GRE header. */
436 gre_build_header(skb
, tunnel
->tun_hlen
,
437 tunnel
->parms
.o_flags
, proto
, tunnel
->parms
.o_key
,
438 htonl(tunnel
->o_seqno
));
440 ip_tunnel_xmit(skb
, dev
, tnl_params
, tnl_params
->protocol
);
443 static int gre_handle_offloads(struct sk_buff
*skb
, bool csum
)
445 return iptunnel_handle_offloads(skb
, csum
? SKB_GSO_GRE_CSUM
: SKB_GSO_GRE
);
448 static struct rtable
*gre_get_rt(struct sk_buff
*skb
,
449 struct net_device
*dev
,
451 const struct ip_tunnel_key
*key
)
453 struct net
*net
= dev_net(dev
);
455 memset(fl
, 0, sizeof(*fl
));
456 fl
->daddr
= key
->u
.ipv4
.dst
;
457 fl
->saddr
= key
->u
.ipv4
.src
;
458 fl
->flowi4_tos
= RT_TOS(key
->tos
);
459 fl
->flowi4_mark
= skb
->mark
;
460 fl
->flowi4_proto
= IPPROTO_GRE
;
462 return ip_route_output_key(net
, fl
);
465 static struct rtable
*prepare_fb_xmit(struct sk_buff
*skb
,
466 struct net_device
*dev
,
470 struct ip_tunnel_info
*tun_info
;
471 const struct ip_tunnel_key
*key
;
472 struct rtable
*rt
= NULL
;
477 tun_info
= skb_tunnel_info(skb
);
478 key
= &tun_info
->key
;
479 use_cache
= ip_tunnel_dst_cache_usable(skb
, tun_info
);
482 rt
= dst_cache_get_ip4(&tun_info
->dst_cache
, &fl
->saddr
);
484 rt
= gre_get_rt(skb
, dev
, fl
, key
);
488 dst_cache_set_ip4(&tun_info
->dst_cache
, &rt
->dst
,
492 min_headroom
= LL_RESERVED_SPACE(rt
->dst
.dev
) + rt
->dst
.header_len
493 + tunnel_hlen
+ sizeof(struct iphdr
);
494 if (skb_headroom(skb
) < min_headroom
|| skb_header_cloned(skb
)) {
495 int head_delta
= SKB_DATA_ALIGN(min_headroom
-
498 err
= pskb_expand_head(skb
, max_t(int, head_delta
, 0),
509 dev
->stats
.tx_dropped
++;
513 static void gre_fb_xmit(struct sk_buff
*skb
, struct net_device
*dev
,
516 struct ip_tunnel_info
*tun_info
;
517 const struct ip_tunnel_key
*key
;
518 struct rtable
*rt
= NULL
;
523 tun_info
= skb_tunnel_info(skb
);
524 if (unlikely(!tun_info
|| !(tun_info
->mode
& IP_TUNNEL_INFO_TX
) ||
525 ip_tunnel_info_af(tun_info
) != AF_INET
))
528 key
= &tun_info
->key
;
529 tunnel_hlen
= gre_calc_hlen(key
->tun_flags
);
531 rt
= prepare_fb_xmit(skb
, dev
, &fl
, tunnel_hlen
);
535 /* Push Tunnel header. */
536 if (gre_handle_offloads(skb
, !!(tun_info
->key
.tun_flags
& TUNNEL_CSUM
)))
539 flags
= tun_info
->key
.tun_flags
& (TUNNEL_CSUM
| TUNNEL_KEY
);
540 gre_build_header(skb
, tunnel_hlen
, flags
, proto
,
541 tunnel_id_to_key32(tun_info
->key
.tun_id
), 0);
543 df
= key
->tun_flags
& TUNNEL_DONT_FRAGMENT
? htons(IP_DF
) : 0;
545 iptunnel_xmit(skb
->sk
, rt
, skb
, fl
.saddr
, key
->u
.ipv4
.dst
, IPPROTO_GRE
,
546 key
->tos
, key
->ttl
, df
, false);
553 dev
->stats
.tx_dropped
++;
556 static void erspan_fb_xmit(struct sk_buff
*skb
, struct net_device
*dev
,
559 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
560 struct ip_tunnel_info
*tun_info
;
561 const struct ip_tunnel_key
*key
;
562 struct erspan_metadata
*md
;
563 struct rtable
*rt
= NULL
;
564 bool truncate
= false;
569 tun_info
= skb_tunnel_info(skb
);
570 if (unlikely(!tun_info
|| !(tun_info
->mode
& IP_TUNNEL_INFO_TX
) ||
571 ip_tunnel_info_af(tun_info
) != AF_INET
))
574 key
= &tun_info
->key
;
576 /* ERSPAN has fixed 8 byte GRE header */
577 tunnel_hlen
= 8 + sizeof(struct erspanhdr
);
579 rt
= prepare_fb_xmit(skb
, dev
, &fl
, tunnel_hlen
);
583 if (gre_handle_offloads(skb
, false))
586 if (skb
->len
> dev
->mtu
+ dev
->hard_header_len
) {
587 pskb_trim(skb
, dev
->mtu
+ dev
->hard_header_len
);
591 md
= ip_tunnel_info_opts(tun_info
);
595 erspan_build_header(skb
, tunnel_id_to_key32(key
->tun_id
),
596 ntohl(md
->index
), truncate
);
598 gre_build_header(skb
, 8, TUNNEL_SEQ
,
599 htons(ETH_P_ERSPAN
), 0, htonl(tunnel
->o_seqno
++));
601 df
= key
->tun_flags
& TUNNEL_DONT_FRAGMENT
? htons(IP_DF
) : 0;
603 iptunnel_xmit(skb
->sk
, rt
, skb
, fl
.saddr
, key
->u
.ipv4
.dst
, IPPROTO_GRE
,
604 key
->tos
, key
->ttl
, df
, false);
611 dev
->stats
.tx_dropped
++;
614 static int gre_fill_metadata_dst(struct net_device
*dev
, struct sk_buff
*skb
)
616 struct ip_tunnel_info
*info
= skb_tunnel_info(skb
);
620 if (ip_tunnel_info_af(info
) != AF_INET
)
623 rt
= gre_get_rt(skb
, dev
, &fl4
, &info
->key
);
628 info
->key
.u
.ipv4
.src
= fl4
.saddr
;
632 static netdev_tx_t
ipgre_xmit(struct sk_buff
*skb
,
633 struct net_device
*dev
)
635 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
636 const struct iphdr
*tnl_params
;
638 if (tunnel
->collect_md
) {
639 gre_fb_xmit(skb
, dev
, skb
->protocol
);
643 if (dev
->header_ops
) {
644 /* Need space for new headers */
645 if (skb_cow_head(skb
, dev
->needed_headroom
-
646 (tunnel
->hlen
+ sizeof(struct iphdr
))))
649 tnl_params
= (const struct iphdr
*)skb
->data
;
651 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
654 skb_pull(skb
, tunnel
->hlen
+ sizeof(struct iphdr
));
655 skb_reset_mac_header(skb
);
657 if (skb_cow_head(skb
, dev
->needed_headroom
))
660 tnl_params
= &tunnel
->parms
.iph
;
663 if (gre_handle_offloads(skb
, !!(tunnel
->parms
.o_flags
& TUNNEL_CSUM
)))
666 __gre_xmit(skb
, dev
, tnl_params
, skb
->protocol
);
671 dev
->stats
.tx_dropped
++;
675 static inline u8
tos_to_cos(u8 tos
)
684 static void erspan_build_header(struct sk_buff
*skb
,
685 __be32 id
, u32 index
, bool truncate
)
687 struct iphdr
*iphdr
= ip_hdr(skb
);
688 struct ethhdr
*eth
= eth_hdr(skb
);
689 enum erspan_encap_type enc_type
;
690 struct erspanhdr
*ershdr
;
697 enc_type
= ERSPAN_ENCAP_NOVLAN
;
699 /* If mirrored packet has vlan tag, extract tci and
700 * perserve vlan header in the mirrored frame.
702 if (eth
->h_proto
== htons(ETH_P_8021Q
)) {
703 qp
= (struct qtag_prefix
*)(skb
->data
+ 2 * ETH_ALEN
);
704 vlan_tci
= ntohs(qp
->tci
);
705 enc_type
= ERSPAN_ENCAP_INFRAME
;
708 skb_push(skb
, sizeof(*ershdr
));
709 ershdr
= (struct erspanhdr
*)skb
->data
;
710 memset(ershdr
, 0, sizeof(*ershdr
));
712 ershdr
->ver_vlan
= htons((vlan_tci
& VLAN_MASK
) |
713 (ERSPAN_VERSION
<< VER_OFFSET
));
714 ershdr
->session_id
= htons((u16
)(ntohl(id
) & ID_MASK
) |
715 ((tos_to_cos(iphdr
->tos
) << COS_OFFSET
) & COS_MASK
) |
716 (enc_type
<< EN_OFFSET
& EN_MASK
) |
717 ((truncate
<< T_OFFSET
) & T_MASK
));
718 ershdr
->md
.index
= htonl(index
& INDEX_MASK
);
721 static netdev_tx_t
erspan_xmit(struct sk_buff
*skb
,
722 struct net_device
*dev
)
724 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
725 bool truncate
= false;
727 if (tunnel
->collect_md
) {
728 erspan_fb_xmit(skb
, dev
, skb
->protocol
);
732 if (gre_handle_offloads(skb
, false))
735 if (skb_cow_head(skb
, dev
->needed_headroom
))
738 if (skb
->len
> dev
->mtu
+ dev
->hard_header_len
) {
739 pskb_trim(skb
, dev
->mtu
+ dev
->hard_header_len
);
743 /* Push ERSPAN header */
744 erspan_build_header(skb
, tunnel
->parms
.o_key
, tunnel
->index
, truncate
);
745 tunnel
->parms
.o_flags
&= ~TUNNEL_KEY
;
746 __gre_xmit(skb
, dev
, &tunnel
->parms
.iph
, htons(ETH_P_ERSPAN
));
751 dev
->stats
.tx_dropped
++;
755 static netdev_tx_t
gre_tap_xmit(struct sk_buff
*skb
,
756 struct net_device
*dev
)
758 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
760 if (tunnel
->collect_md
) {
761 gre_fb_xmit(skb
, dev
, htons(ETH_P_TEB
));
765 if (gre_handle_offloads(skb
, !!(tunnel
->parms
.o_flags
& TUNNEL_CSUM
)))
768 if (skb_cow_head(skb
, dev
->needed_headroom
))
771 __gre_xmit(skb
, dev
, &tunnel
->parms
.iph
, htons(ETH_P_TEB
));
776 dev
->stats
.tx_dropped
++;
780 static int ipgre_tunnel_ioctl(struct net_device
*dev
,
781 struct ifreq
*ifr
, int cmd
)
784 struct ip_tunnel_parm p
;
786 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
788 if (cmd
== SIOCADDTUNNEL
|| cmd
== SIOCCHGTUNNEL
) {
789 if (p
.iph
.version
!= 4 || p
.iph
.protocol
!= IPPROTO_GRE
||
790 p
.iph
.ihl
!= 5 || (p
.iph
.frag_off
&htons(~IP_DF
)) ||
791 ((p
.i_flags
|p
.o_flags
)&(GRE_VERSION
|GRE_ROUTING
)))
794 p
.i_flags
= gre_flags_to_tnl_flags(p
.i_flags
);
795 p
.o_flags
= gre_flags_to_tnl_flags(p
.o_flags
);
797 err
= ip_tunnel_ioctl(dev
, &p
, cmd
);
801 p
.i_flags
= gre_tnl_flags_to_gre_flags(p
.i_flags
);
802 p
.o_flags
= gre_tnl_flags_to_gre_flags(p
.o_flags
);
804 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &p
, sizeof(p
)))
809 /* Nice toy. Unfortunately, useless in real life :-)
810 It allows to construct virtual multiprotocol broadcast "LAN"
811 over the Internet, provided multicast routing is tuned.
814 I have no idea was this bicycle invented before me,
815 so that I had to set ARPHRD_IPGRE to a random value.
816 I have an impression, that Cisco could make something similar,
817 but this feature is apparently missing in IOS<=11.2(8).
819 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
820 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
822 ping -t 255 224.66.66.66
824 If nobody answers, mbone does not work.
826 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
827 ip addr add 10.66.66.<somewhat>/24 dev Universe
829 ifconfig Universe add fe80::<Your_real_addr>/10
830 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
833 ftp fec0:6666:6666::193.233.7.65
836 static int ipgre_header(struct sk_buff
*skb
, struct net_device
*dev
,
838 const void *daddr
, const void *saddr
, unsigned int len
)
840 struct ip_tunnel
*t
= netdev_priv(dev
);
842 struct gre_base_hdr
*greh
;
844 iph
= skb_push(skb
, t
->hlen
+ sizeof(*iph
));
845 greh
= (struct gre_base_hdr
*)(iph
+1);
846 greh
->flags
= gre_tnl_flags_to_gre_flags(t
->parms
.o_flags
);
847 greh
->protocol
= htons(type
);
849 memcpy(iph
, &t
->parms
.iph
, sizeof(struct iphdr
));
851 /* Set the source hardware address. */
853 memcpy(&iph
->saddr
, saddr
, 4);
855 memcpy(&iph
->daddr
, daddr
, 4);
857 return t
->hlen
+ sizeof(*iph
);
859 return -(t
->hlen
+ sizeof(*iph
));
862 static int ipgre_header_parse(const struct sk_buff
*skb
, unsigned char *haddr
)
864 const struct iphdr
*iph
= (const struct iphdr
*) skb_mac_header(skb
);
865 memcpy(haddr
, &iph
->saddr
, 4);
869 static const struct header_ops ipgre_header_ops
= {
870 .create
= ipgre_header
,
871 .parse
= ipgre_header_parse
,
874 #ifdef CONFIG_NET_IPGRE_BROADCAST
875 static int ipgre_open(struct net_device
*dev
)
877 struct ip_tunnel
*t
= netdev_priv(dev
);
879 if (ipv4_is_multicast(t
->parms
.iph
.daddr
)) {
883 rt
= ip_route_output_gre(t
->net
, &fl4
,
887 RT_TOS(t
->parms
.iph
.tos
),
890 return -EADDRNOTAVAIL
;
893 if (!__in_dev_get_rtnl(dev
))
894 return -EADDRNOTAVAIL
;
895 t
->mlink
= dev
->ifindex
;
896 ip_mc_inc_group(__in_dev_get_rtnl(dev
), t
->parms
.iph
.daddr
);
901 static int ipgre_close(struct net_device
*dev
)
903 struct ip_tunnel
*t
= netdev_priv(dev
);
905 if (ipv4_is_multicast(t
->parms
.iph
.daddr
) && t
->mlink
) {
906 struct in_device
*in_dev
;
907 in_dev
= inetdev_by_index(t
->net
, t
->mlink
);
909 ip_mc_dec_group(in_dev
, t
->parms
.iph
.daddr
);
915 static const struct net_device_ops ipgre_netdev_ops
= {
916 .ndo_init
= ipgre_tunnel_init
,
917 .ndo_uninit
= ip_tunnel_uninit
,
918 #ifdef CONFIG_NET_IPGRE_BROADCAST
919 .ndo_open
= ipgre_open
,
920 .ndo_stop
= ipgre_close
,
922 .ndo_start_xmit
= ipgre_xmit
,
923 .ndo_do_ioctl
= ipgre_tunnel_ioctl
,
924 .ndo_change_mtu
= ip_tunnel_change_mtu
,
925 .ndo_get_stats64
= ip_tunnel_get_stats64
,
926 .ndo_get_iflink
= ip_tunnel_get_iflink
,
929 #define GRE_FEATURES (NETIF_F_SG | \
934 static void ipgre_tunnel_setup(struct net_device
*dev
)
936 dev
->netdev_ops
= &ipgre_netdev_ops
;
937 dev
->type
= ARPHRD_IPGRE
;
938 ip_tunnel_setup(dev
, ipgre_net_id
);
941 static void __gre_tunnel_init(struct net_device
*dev
)
943 struct ip_tunnel
*tunnel
;
946 tunnel
= netdev_priv(dev
);
947 tunnel
->tun_hlen
= gre_calc_hlen(tunnel
->parms
.o_flags
);
948 tunnel
->parms
.iph
.protocol
= IPPROTO_GRE
;
950 tunnel
->hlen
= tunnel
->tun_hlen
+ tunnel
->encap_hlen
;
952 t_hlen
= tunnel
->hlen
+ sizeof(struct iphdr
);
954 dev
->features
|= GRE_FEATURES
;
955 dev
->hw_features
|= GRE_FEATURES
;
957 if (!(tunnel
->parms
.o_flags
& TUNNEL_SEQ
)) {
958 /* TCP offload with GRE SEQ is not supported, nor
959 * can we support 2 levels of outer headers requiring
962 if (!(tunnel
->parms
.o_flags
& TUNNEL_CSUM
) ||
963 (tunnel
->encap
.type
== TUNNEL_ENCAP_NONE
)) {
964 dev
->features
|= NETIF_F_GSO_SOFTWARE
;
965 dev
->hw_features
|= NETIF_F_GSO_SOFTWARE
;
968 /* Can use a lockless transmit, unless we generate
971 dev
->features
|= NETIF_F_LLTX
;
975 static int ipgre_tunnel_init(struct net_device
*dev
)
977 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
978 struct iphdr
*iph
= &tunnel
->parms
.iph
;
980 __gre_tunnel_init(dev
);
982 memcpy(dev
->dev_addr
, &iph
->saddr
, 4);
983 memcpy(dev
->broadcast
, &iph
->daddr
, 4);
985 dev
->flags
= IFF_NOARP
;
989 if (iph
->daddr
&& !tunnel
->collect_md
) {
990 #ifdef CONFIG_NET_IPGRE_BROADCAST
991 if (ipv4_is_multicast(iph
->daddr
)) {
994 dev
->flags
= IFF_BROADCAST
;
995 dev
->header_ops
= &ipgre_header_ops
;
998 } else if (!tunnel
->collect_md
) {
999 dev
->header_ops
= &ipgre_header_ops
;
1002 return ip_tunnel_init(dev
);
1005 static const struct gre_protocol ipgre_protocol
= {
1007 .err_handler
= gre_err
,
1010 static int __net_init
ipgre_init_net(struct net
*net
)
1012 return ip_tunnel_init_net(net
, ipgre_net_id
, &ipgre_link_ops
, NULL
);
1015 static void __net_exit
ipgre_exit_net(struct net
*net
)
1017 struct ip_tunnel_net
*itn
= net_generic(net
, ipgre_net_id
);
1018 ip_tunnel_delete_net(itn
, &ipgre_link_ops
);
1021 static struct pernet_operations ipgre_net_ops
= {
1022 .init
= ipgre_init_net
,
1023 .exit
= ipgre_exit_net
,
1024 .id
= &ipgre_net_id
,
1025 .size
= sizeof(struct ip_tunnel_net
),
1028 static int ipgre_tunnel_validate(struct nlattr
*tb
[], struct nlattr
*data
[],
1029 struct netlink_ext_ack
*extack
)
1037 if (data
[IFLA_GRE_IFLAGS
])
1038 flags
|= nla_get_be16(data
[IFLA_GRE_IFLAGS
]);
1039 if (data
[IFLA_GRE_OFLAGS
])
1040 flags
|= nla_get_be16(data
[IFLA_GRE_OFLAGS
]);
1041 if (flags
& (GRE_VERSION
|GRE_ROUTING
))
1044 if (data
[IFLA_GRE_COLLECT_METADATA
] &&
1045 data
[IFLA_GRE_ENCAP_TYPE
] &&
1046 nla_get_u16(data
[IFLA_GRE_ENCAP_TYPE
]) != TUNNEL_ENCAP_NONE
)
1052 static int ipgre_tap_validate(struct nlattr
*tb
[], struct nlattr
*data
[],
1053 struct netlink_ext_ack
*extack
)
1057 if (tb
[IFLA_ADDRESS
]) {
1058 if (nla_len(tb
[IFLA_ADDRESS
]) != ETH_ALEN
)
1060 if (!is_valid_ether_addr(nla_data(tb
[IFLA_ADDRESS
])))
1061 return -EADDRNOTAVAIL
;
1067 if (data
[IFLA_GRE_REMOTE
]) {
1068 memcpy(&daddr
, nla_data(data
[IFLA_GRE_REMOTE
]), 4);
1074 return ipgre_tunnel_validate(tb
, data
, extack
);
1077 static int erspan_validate(struct nlattr
*tb
[], struct nlattr
*data
[],
1078 struct netlink_ext_ack
*extack
)
1086 ret
= ipgre_tap_validate(tb
, data
, extack
);
1090 /* ERSPAN should only have GRE sequence and key flag */
1091 if (data
[IFLA_GRE_OFLAGS
])
1092 flags
|= nla_get_be16(data
[IFLA_GRE_OFLAGS
]);
1093 if (data
[IFLA_GRE_IFLAGS
])
1094 flags
|= nla_get_be16(data
[IFLA_GRE_IFLAGS
]);
1095 if (!data
[IFLA_GRE_COLLECT_METADATA
] &&
1096 flags
!= (GRE_SEQ
| GRE_KEY
))
1099 /* ERSPAN Session ID only has 10-bit. Since we reuse
1100 * 32-bit key field as ID, check it's range.
1102 if (data
[IFLA_GRE_IKEY
] &&
1103 (ntohl(nla_get_be32(data
[IFLA_GRE_IKEY
])) & ~ID_MASK
))
1106 if (data
[IFLA_GRE_OKEY
] &&
1107 (ntohl(nla_get_be32(data
[IFLA_GRE_OKEY
])) & ~ID_MASK
))
1113 static int ipgre_netlink_parms(struct net_device
*dev
,
1114 struct nlattr
*data
[],
1115 struct nlattr
*tb
[],
1116 struct ip_tunnel_parm
*parms
,
1119 struct ip_tunnel
*t
= netdev_priv(dev
);
1121 memset(parms
, 0, sizeof(*parms
));
1123 parms
->iph
.protocol
= IPPROTO_GRE
;
1128 if (data
[IFLA_GRE_LINK
])
1129 parms
->link
= nla_get_u32(data
[IFLA_GRE_LINK
]);
1131 if (data
[IFLA_GRE_IFLAGS
])
1132 parms
->i_flags
= gre_flags_to_tnl_flags(nla_get_be16(data
[IFLA_GRE_IFLAGS
]));
1134 if (data
[IFLA_GRE_OFLAGS
])
1135 parms
->o_flags
= gre_flags_to_tnl_flags(nla_get_be16(data
[IFLA_GRE_OFLAGS
]));
1137 if (data
[IFLA_GRE_IKEY
])
1138 parms
->i_key
= nla_get_be32(data
[IFLA_GRE_IKEY
]);
1140 if (data
[IFLA_GRE_OKEY
])
1141 parms
->o_key
= nla_get_be32(data
[IFLA_GRE_OKEY
]);
1143 if (data
[IFLA_GRE_LOCAL
])
1144 parms
->iph
.saddr
= nla_get_in_addr(data
[IFLA_GRE_LOCAL
]);
1146 if (data
[IFLA_GRE_REMOTE
])
1147 parms
->iph
.daddr
= nla_get_in_addr(data
[IFLA_GRE_REMOTE
]);
1149 if (data
[IFLA_GRE_TTL
])
1150 parms
->iph
.ttl
= nla_get_u8(data
[IFLA_GRE_TTL
]);
1152 if (data
[IFLA_GRE_TOS
])
1153 parms
->iph
.tos
= nla_get_u8(data
[IFLA_GRE_TOS
]);
1155 if (!data
[IFLA_GRE_PMTUDISC
] || nla_get_u8(data
[IFLA_GRE_PMTUDISC
])) {
1158 parms
->iph
.frag_off
= htons(IP_DF
);
1161 if (data
[IFLA_GRE_COLLECT_METADATA
]) {
1162 t
->collect_md
= true;
1163 if (dev
->type
== ARPHRD_IPGRE
)
1164 dev
->type
= ARPHRD_NONE
;
1167 if (data
[IFLA_GRE_IGNORE_DF
]) {
1168 if (nla_get_u8(data
[IFLA_GRE_IGNORE_DF
])
1169 && (parms
->iph
.frag_off
& htons(IP_DF
)))
1171 t
->ignore_df
= !!nla_get_u8(data
[IFLA_GRE_IGNORE_DF
]);
1174 if (data
[IFLA_GRE_FWMARK
])
1175 *fwmark
= nla_get_u32(data
[IFLA_GRE_FWMARK
]);
1177 if (data
[IFLA_GRE_ERSPAN_INDEX
]) {
1178 t
->index
= nla_get_u32(data
[IFLA_GRE_ERSPAN_INDEX
]);
1180 if (t
->index
& ~INDEX_MASK
)
1187 /* This function returns true when ENCAP attributes are present in the nl msg */
1188 static bool ipgre_netlink_encap_parms(struct nlattr
*data
[],
1189 struct ip_tunnel_encap
*ipencap
)
1193 memset(ipencap
, 0, sizeof(*ipencap
));
1198 if (data
[IFLA_GRE_ENCAP_TYPE
]) {
1200 ipencap
->type
= nla_get_u16(data
[IFLA_GRE_ENCAP_TYPE
]);
1203 if (data
[IFLA_GRE_ENCAP_FLAGS
]) {
1205 ipencap
->flags
= nla_get_u16(data
[IFLA_GRE_ENCAP_FLAGS
]);
1208 if (data
[IFLA_GRE_ENCAP_SPORT
]) {
1210 ipencap
->sport
= nla_get_be16(data
[IFLA_GRE_ENCAP_SPORT
]);
1213 if (data
[IFLA_GRE_ENCAP_DPORT
]) {
1215 ipencap
->dport
= nla_get_be16(data
[IFLA_GRE_ENCAP_DPORT
]);
1221 static int gre_tap_init(struct net_device
*dev
)
1223 __gre_tunnel_init(dev
);
1224 dev
->priv_flags
|= IFF_LIVE_ADDR_CHANGE
;
1225 netif_keep_dst(dev
);
1227 return ip_tunnel_init(dev
);
1230 static const struct net_device_ops gre_tap_netdev_ops
= {
1231 .ndo_init
= gre_tap_init
,
1232 .ndo_uninit
= ip_tunnel_uninit
,
1233 .ndo_start_xmit
= gre_tap_xmit
,
1234 .ndo_set_mac_address
= eth_mac_addr
,
1235 .ndo_validate_addr
= eth_validate_addr
,
1236 .ndo_change_mtu
= ip_tunnel_change_mtu
,
1237 .ndo_get_stats64
= ip_tunnel_get_stats64
,
1238 .ndo_get_iflink
= ip_tunnel_get_iflink
,
1239 .ndo_fill_metadata_dst
= gre_fill_metadata_dst
,
1242 static int erspan_tunnel_init(struct net_device
*dev
)
1244 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
1247 tunnel
->tun_hlen
= 8;
1248 tunnel
->parms
.iph
.protocol
= IPPROTO_GRE
;
1249 tunnel
->hlen
= tunnel
->tun_hlen
+ tunnel
->encap_hlen
+
1250 sizeof(struct erspanhdr
);
1251 t_hlen
= tunnel
->hlen
+ sizeof(struct iphdr
);
1253 dev
->features
|= GRE_FEATURES
;
1254 dev
->hw_features
|= GRE_FEATURES
;
1255 dev
->priv_flags
|= IFF_LIVE_ADDR_CHANGE
;
1256 netif_keep_dst(dev
);
1258 return ip_tunnel_init(dev
);
1261 static const struct net_device_ops erspan_netdev_ops
= {
1262 .ndo_init
= erspan_tunnel_init
,
1263 .ndo_uninit
= ip_tunnel_uninit
,
1264 .ndo_start_xmit
= erspan_xmit
,
1265 .ndo_set_mac_address
= eth_mac_addr
,
1266 .ndo_validate_addr
= eth_validate_addr
,
1267 .ndo_change_mtu
= ip_tunnel_change_mtu
,
1268 .ndo_get_stats64
= ip_tunnel_get_stats64
,
1269 .ndo_get_iflink
= ip_tunnel_get_iflink
,
1270 .ndo_fill_metadata_dst
= gre_fill_metadata_dst
,
1273 static void ipgre_tap_setup(struct net_device
*dev
)
1277 dev
->netdev_ops
= &gre_tap_netdev_ops
;
1278 dev
->priv_flags
&= ~IFF_TX_SKB_SHARING
;
1279 dev
->priv_flags
|= IFF_LIVE_ADDR_CHANGE
;
1280 ip_tunnel_setup(dev
, gre_tap_net_id
);
1283 static int ipgre_newlink(struct net
*src_net
, struct net_device
*dev
,
1284 struct nlattr
*tb
[], struct nlattr
*data
[],
1285 struct netlink_ext_ack
*extack
)
1287 struct ip_tunnel_parm p
;
1288 struct ip_tunnel_encap ipencap
;
1292 if (ipgre_netlink_encap_parms(data
, &ipencap
)) {
1293 struct ip_tunnel
*t
= netdev_priv(dev
);
1294 err
= ip_tunnel_encap_setup(t
, &ipencap
);
1300 err
= ipgre_netlink_parms(dev
, data
, tb
, &p
, &fwmark
);
1303 return ip_tunnel_newlink(dev
, tb
, &p
, fwmark
);
1306 static int ipgre_changelink(struct net_device
*dev
, struct nlattr
*tb
[],
1307 struct nlattr
*data
[],
1308 struct netlink_ext_ack
*extack
)
1310 struct ip_tunnel
*t
= netdev_priv(dev
);
1311 struct ip_tunnel_parm p
;
1312 struct ip_tunnel_encap ipencap
;
1313 __u32 fwmark
= t
->fwmark
;
1316 if (ipgre_netlink_encap_parms(data
, &ipencap
)) {
1317 err
= ip_tunnel_encap_setup(t
, &ipencap
);
1323 err
= ipgre_netlink_parms(dev
, data
, tb
, &p
, &fwmark
);
1326 return ip_tunnel_changelink(dev
, tb
, &p
, fwmark
);
1329 static size_t ipgre_get_size(const struct net_device
*dev
)
1334 /* IFLA_GRE_IFLAGS */
1336 /* IFLA_GRE_OFLAGS */
1342 /* IFLA_GRE_LOCAL */
1344 /* IFLA_GRE_REMOTE */
1350 /* IFLA_GRE_PMTUDISC */
1352 /* IFLA_GRE_ENCAP_TYPE */
1354 /* IFLA_GRE_ENCAP_FLAGS */
1356 /* IFLA_GRE_ENCAP_SPORT */
1358 /* IFLA_GRE_ENCAP_DPORT */
1360 /* IFLA_GRE_COLLECT_METADATA */
1362 /* IFLA_GRE_IGNORE_DF */
1364 /* IFLA_GRE_FWMARK */
1366 /* IFLA_GRE_ERSPAN_INDEX */
1371 static int ipgre_fill_info(struct sk_buff
*skb
, const struct net_device
*dev
)
1373 struct ip_tunnel
*t
= netdev_priv(dev
);
1374 struct ip_tunnel_parm
*p
= &t
->parms
;
1376 if (nla_put_u32(skb
, IFLA_GRE_LINK
, p
->link
) ||
1377 nla_put_be16(skb
, IFLA_GRE_IFLAGS
,
1378 gre_tnl_flags_to_gre_flags(p
->i_flags
)) ||
1379 nla_put_be16(skb
, IFLA_GRE_OFLAGS
,
1380 gre_tnl_flags_to_gre_flags(p
->o_flags
)) ||
1381 nla_put_be32(skb
, IFLA_GRE_IKEY
, p
->i_key
) ||
1382 nla_put_be32(skb
, IFLA_GRE_OKEY
, p
->o_key
) ||
1383 nla_put_in_addr(skb
, IFLA_GRE_LOCAL
, p
->iph
.saddr
) ||
1384 nla_put_in_addr(skb
, IFLA_GRE_REMOTE
, p
->iph
.daddr
) ||
1385 nla_put_u8(skb
, IFLA_GRE_TTL
, p
->iph
.ttl
) ||
1386 nla_put_u8(skb
, IFLA_GRE_TOS
, p
->iph
.tos
) ||
1387 nla_put_u8(skb
, IFLA_GRE_PMTUDISC
,
1388 !!(p
->iph
.frag_off
& htons(IP_DF
))) ||
1389 nla_put_u32(skb
, IFLA_GRE_FWMARK
, t
->fwmark
))
1390 goto nla_put_failure
;
1392 if (nla_put_u16(skb
, IFLA_GRE_ENCAP_TYPE
,
1394 nla_put_be16(skb
, IFLA_GRE_ENCAP_SPORT
,
1396 nla_put_be16(skb
, IFLA_GRE_ENCAP_DPORT
,
1398 nla_put_u16(skb
, IFLA_GRE_ENCAP_FLAGS
,
1400 goto nla_put_failure
;
1402 if (nla_put_u8(skb
, IFLA_GRE_IGNORE_DF
, t
->ignore_df
))
1403 goto nla_put_failure
;
1405 if (t
->collect_md
) {
1406 if (nla_put_flag(skb
, IFLA_GRE_COLLECT_METADATA
))
1407 goto nla_put_failure
;
1411 if (nla_put_u32(skb
, IFLA_GRE_ERSPAN_INDEX
, t
->index
))
1412 goto nla_put_failure
;
1420 static void erspan_setup(struct net_device
*dev
)
1423 dev
->netdev_ops
= &erspan_netdev_ops
;
1424 dev
->priv_flags
&= ~IFF_TX_SKB_SHARING
;
1425 dev
->priv_flags
|= IFF_LIVE_ADDR_CHANGE
;
1426 ip_tunnel_setup(dev
, erspan_net_id
);
1429 static const struct nla_policy ipgre_policy
[IFLA_GRE_MAX
+ 1] = {
1430 [IFLA_GRE_LINK
] = { .type
= NLA_U32
},
1431 [IFLA_GRE_IFLAGS
] = { .type
= NLA_U16
},
1432 [IFLA_GRE_OFLAGS
] = { .type
= NLA_U16
},
1433 [IFLA_GRE_IKEY
] = { .type
= NLA_U32
},
1434 [IFLA_GRE_OKEY
] = { .type
= NLA_U32
},
1435 [IFLA_GRE_LOCAL
] = { .len
= FIELD_SIZEOF(struct iphdr
, saddr
) },
1436 [IFLA_GRE_REMOTE
] = { .len
= FIELD_SIZEOF(struct iphdr
, daddr
) },
1437 [IFLA_GRE_TTL
] = { .type
= NLA_U8
},
1438 [IFLA_GRE_TOS
] = { .type
= NLA_U8
},
1439 [IFLA_GRE_PMTUDISC
] = { .type
= NLA_U8
},
1440 [IFLA_GRE_ENCAP_TYPE
] = { .type
= NLA_U16
},
1441 [IFLA_GRE_ENCAP_FLAGS
] = { .type
= NLA_U16
},
1442 [IFLA_GRE_ENCAP_SPORT
] = { .type
= NLA_U16
},
1443 [IFLA_GRE_ENCAP_DPORT
] = { .type
= NLA_U16
},
1444 [IFLA_GRE_COLLECT_METADATA
] = { .type
= NLA_FLAG
},
1445 [IFLA_GRE_IGNORE_DF
] = { .type
= NLA_U8
},
1446 [IFLA_GRE_FWMARK
] = { .type
= NLA_U32
},
1447 [IFLA_GRE_ERSPAN_INDEX
] = { .type
= NLA_U32
},
1450 static struct rtnl_link_ops ipgre_link_ops __read_mostly
= {
1452 .maxtype
= IFLA_GRE_MAX
,
1453 .policy
= ipgre_policy
,
1454 .priv_size
= sizeof(struct ip_tunnel
),
1455 .setup
= ipgre_tunnel_setup
,
1456 .validate
= ipgre_tunnel_validate
,
1457 .newlink
= ipgre_newlink
,
1458 .changelink
= ipgre_changelink
,
1459 .dellink
= ip_tunnel_dellink
,
1460 .get_size
= ipgre_get_size
,
1461 .fill_info
= ipgre_fill_info
,
1462 .get_link_net
= ip_tunnel_get_link_net
,
1465 static struct rtnl_link_ops ipgre_tap_ops __read_mostly
= {
1467 .maxtype
= IFLA_GRE_MAX
,
1468 .policy
= ipgre_policy
,
1469 .priv_size
= sizeof(struct ip_tunnel
),
1470 .setup
= ipgre_tap_setup
,
1471 .validate
= ipgre_tap_validate
,
1472 .newlink
= ipgre_newlink
,
1473 .changelink
= ipgre_changelink
,
1474 .dellink
= ip_tunnel_dellink
,
1475 .get_size
= ipgre_get_size
,
1476 .fill_info
= ipgre_fill_info
,
1477 .get_link_net
= ip_tunnel_get_link_net
,
1480 static struct rtnl_link_ops erspan_link_ops __read_mostly
= {
1482 .maxtype
= IFLA_GRE_MAX
,
1483 .policy
= ipgre_policy
,
1484 .priv_size
= sizeof(struct ip_tunnel
),
1485 .setup
= erspan_setup
,
1486 .validate
= erspan_validate
,
1487 .newlink
= ipgre_newlink
,
1488 .changelink
= ipgre_changelink
,
1489 .dellink
= ip_tunnel_dellink
,
1490 .get_size
= ipgre_get_size
,
1491 .fill_info
= ipgre_fill_info
,
1492 .get_link_net
= ip_tunnel_get_link_net
,
1495 struct net_device
*gretap_fb_dev_create(struct net
*net
, const char *name
,
1496 u8 name_assign_type
)
1498 struct nlattr
*tb
[IFLA_MAX
+ 1];
1499 struct net_device
*dev
;
1500 LIST_HEAD(list_kill
);
1501 struct ip_tunnel
*t
;
1504 memset(&tb
, 0, sizeof(tb
));
1506 dev
= rtnl_create_link(net
, name
, name_assign_type
,
1507 &ipgre_tap_ops
, tb
);
1511 /* Configure flow based GRE device. */
1512 t
= netdev_priv(dev
);
1513 t
->collect_md
= true;
1515 err
= ipgre_newlink(net
, dev
, tb
, NULL
, NULL
);
1518 return ERR_PTR(err
);
1521 /* openvswitch users expect packet sizes to be unrestricted,
1522 * so set the largest MTU we can.
1524 err
= __ip_tunnel_change_mtu(dev
, IP_MAX_MTU
, false);
1528 err
= rtnl_configure_link(dev
, NULL
);
1534 ip_tunnel_dellink(dev
, &list_kill
);
1535 unregister_netdevice_many(&list_kill
);
1536 return ERR_PTR(err
);
1538 EXPORT_SYMBOL_GPL(gretap_fb_dev_create
);
1540 static int __net_init
ipgre_tap_init_net(struct net
*net
)
1542 return ip_tunnel_init_net(net
, gre_tap_net_id
, &ipgre_tap_ops
, "gretap0");
1545 static void __net_exit
ipgre_tap_exit_net(struct net
*net
)
1547 struct ip_tunnel_net
*itn
= net_generic(net
, gre_tap_net_id
);
1548 ip_tunnel_delete_net(itn
, &ipgre_tap_ops
);
1551 static struct pernet_operations ipgre_tap_net_ops
= {
1552 .init
= ipgre_tap_init_net
,
1553 .exit
= ipgre_tap_exit_net
,
1554 .id
= &gre_tap_net_id
,
1555 .size
= sizeof(struct ip_tunnel_net
),
1558 static int __net_init
erspan_init_net(struct net
*net
)
1560 return ip_tunnel_init_net(net
, erspan_net_id
,
1561 &erspan_link_ops
, "erspan0");
1564 static void __net_exit
erspan_exit_net(struct net
*net
)
1566 struct ip_tunnel_net
*itn
= net_generic(net
, erspan_net_id
);
1568 ip_tunnel_delete_net(itn
, &erspan_link_ops
);
1571 static struct pernet_operations erspan_net_ops
= {
1572 .init
= erspan_init_net
,
1573 .exit
= erspan_exit_net
,
1574 .id
= &erspan_net_id
,
1575 .size
= sizeof(struct ip_tunnel_net
),
1578 static int __init
ipgre_init(void)
1582 pr_info("GRE over IPv4 tunneling driver\n");
1584 err
= register_pernet_device(&ipgre_net_ops
);
1588 err
= register_pernet_device(&ipgre_tap_net_ops
);
1590 goto pnet_tap_failed
;
1592 err
= register_pernet_device(&erspan_net_ops
);
1594 goto pnet_erspan_failed
;
1596 err
= gre_add_protocol(&ipgre_protocol
, GREPROTO_CISCO
);
1598 pr_info("%s: can't add protocol\n", __func__
);
1599 goto add_proto_failed
;
1602 err
= rtnl_link_register(&ipgre_link_ops
);
1604 goto rtnl_link_failed
;
1606 err
= rtnl_link_register(&ipgre_tap_ops
);
1608 goto tap_ops_failed
;
1610 err
= rtnl_link_register(&erspan_link_ops
);
1612 goto erspan_link_failed
;
1617 rtnl_link_unregister(&ipgre_tap_ops
);
1619 rtnl_link_unregister(&ipgre_link_ops
);
1621 gre_del_protocol(&ipgre_protocol
, GREPROTO_CISCO
);
1623 unregister_pernet_device(&erspan_net_ops
);
1625 unregister_pernet_device(&ipgre_tap_net_ops
);
1627 unregister_pernet_device(&ipgre_net_ops
);
1631 static void __exit
ipgre_fini(void)
1633 rtnl_link_unregister(&ipgre_tap_ops
);
1634 rtnl_link_unregister(&ipgre_link_ops
);
1635 rtnl_link_unregister(&erspan_link_ops
);
1636 gre_del_protocol(&ipgre_protocol
, GREPROTO_CISCO
);
1637 unregister_pernet_device(&ipgre_tap_net_ops
);
1638 unregister_pernet_device(&ipgre_net_ops
);
1639 unregister_pernet_device(&erspan_net_ops
);
1642 module_init(ipgre_init
);
1643 module_exit(ipgre_fini
);
1644 MODULE_LICENSE("GPL");
1645 MODULE_ALIAS_RTNL_LINK("gre");
1646 MODULE_ALIAS_RTNL_LINK("gretap");
1647 MODULE_ALIAS_RTNL_LINK("erspan");
1648 MODULE_ALIAS_NETDEV("gre0");
1649 MODULE_ALIAS_NETDEV("gretap0");
1650 MODULE_ALIAS_NETDEV("erspan0");