rt = ip_route_output_key(geneve->net, fl4);
if (IS_ERR(rt)) {
netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr);
- dev->stats.tx_carrier_errors++;
- return rt;
+ return ERR_PTR(-ENETUNREACH);
}
if (rt->dst.dev == dev) { /* is this necessary? */
netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr);
- dev->stats.collisions++;
ip_rt_put(rt);
- return ERR_PTR(-EINVAL);
+ return ERR_PTR(-ELOOP);
}
return rt;
}
struct ip_tunnel_info *info = NULL;
struct rtable *rt = NULL;
const struct iphdr *iip; /* interior IP header */
+ int err = -EINVAL;
struct flowi4 fl4;
__u8 tos, ttl;
__be16 sport;
bool udp_csum;
__be16 df;
- int err;
if (geneve->collect_md) {
info = skb_tunnel_info(skb);
rt = geneve_get_rt(skb, dev, &fl4, info);
if (IS_ERR(rt)) {
netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr);
- dev->stats.tx_carrier_errors++;
+ err = PTR_ERR(rt);
goto tx_error;
}
tx_error:
dev_kfree_skb(skb);
err:
- dev->stats.tx_errors++;
+ if (err == -ELOOP)
+ dev->stats.collisions++;
+ else if (err == -ENETUNREACH)
+ dev->stats.tx_carrier_errors++;
+ else
+ dev->stats.tx_errors++;
return NETDEV_TX_OK;
}
+static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+ struct ip_tunnel_info *info = skb_tunnel_info(skb);
+ struct geneve_dev *geneve = netdev_priv(dev);
+ struct rtable *rt;
+ struct flowi4 fl4;
+
+ if (ip_tunnel_info_af(info) != AF_INET)
+ return -EINVAL;
+
+ rt = geneve_get_rt(skb, dev, &fl4, info);
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
+
+ ip_rt_put(rt);
+ info->key.u.ipv4.src = fl4.saddr;
+ info->key.tp_src = udp_flow_src_port(geneve->net, skb,
+ 1, USHRT_MAX, true);
+ info->key.tp_dst = geneve->dst_port;
+ return 0;
+}
+
static const struct net_device_ops geneve_netdev_ops = {
.ndo_init = geneve_init,
.ndo_uninit = geneve_uninit,
.ndo_change_mtu = eth_change_mtu,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
+ .ndo_fill_metadata_dst = geneve_fill_metadata_dst,
};
static void geneve_get_drvinfo(struct net_device *dev,
return 0;
}
+static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb,
+ struct ip_tunnel_info *info,
+ __be16 sport, __be16 dport)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct rtable *rt;
+ struct flowi4 fl4;
+
+ memset(&fl4, 0, sizeof(fl4));
+ fl4.flowi4_tos = RT_TOS(info->key.tos);
+ fl4.flowi4_mark = skb->mark;
+ fl4.flowi4_proto = IPPROTO_UDP;
+ fl4.daddr = info->key.u.ipv4.dst;
+
+ rt = ip_route_output_key(vxlan->net, &fl4);
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
+ ip_rt_put(rt);
+
+ info->key.u.ipv4.src = fl4.saddr;
+ info->key.tp_src = sport;
+ info->key.tp_dst = dport;
+ return 0;
+}
+
+static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct ip_tunnel_info *info = skb_tunnel_info(skb);
+ __be16 sport, dport;
+
+ sport = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
+ vxlan->cfg.port_max, true);
+ dport = info->key.tp_dst ? : vxlan->cfg.dst_port;
+
+ if (ip_tunnel_info_af(info) == AF_INET)
+ return egress_ipv4_tun_info(dev, skb, info, sport, dport);
+ return -EINVAL;
+}
+
static const struct net_device_ops vxlan_netdev_ops = {
.ndo_init = vxlan_init,
.ndo_uninit = vxlan_uninit,
.ndo_fdb_add = vxlan_fdb_add,
.ndo_fdb_del = vxlan_fdb_delete,
.ndo_fdb_dump = vxlan_fdb_dump,
+ .ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
};
/* Info for udev, that this is a virtual tunnel endpoint */
* This function is used to pass protocol port error state information
* to the switch driver. The switch driver can react to the proto_down
* by doing a phys down on the associated switch port.
+ * int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb);
+ * This function is used to get egress tunnel information for given skb.
+ * This is useful for retrieving outer tunnel header parameters while
+ * sampling packet.
*
*/
struct net_device_ops {
int (*ndo_get_iflink)(const struct net_device *dev);
int (*ndo_change_proto_down)(struct net_device *dev,
bool proto_down);
+ int (*ndo_fill_metadata_dst)(struct net_device *dev,
+ struct sk_buff *skb);
};
/**
void dev_remove_offload(struct packet_offload *po);
int dev_get_iflink(const struct net_device *dev);
+int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
unsigned short mask);
struct net_device *dev_get_by_name(struct net *net, const char *name);
return tun_dst;
}
+static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb)
+{
+ struct metadata_dst *md_dst = skb_metadata_dst(skb);
+ int md_size = md_dst->u.tun_info.options_len;
+ struct metadata_dst *new_md;
+
+ if (!md_dst)
+ return ERR_PTR(-EINVAL);
+
+ new_md = metadata_dst_alloc(md_size, GFP_ATOMIC);
+ if (!new_md)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
+ sizeof(struct ip_tunnel_info) + md_size);
+ skb_dst_drop(skb);
+ dst_hold(&new_md->dst);
+ skb_dst_set(skb, &new_md->dst);
+ return new_md;
+}
+
+static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb)
+{
+ struct metadata_dst *dst;
+
+ dst = tun_dst_unclone(skb);
+ if (IS_ERR(dst))
+ return NULL;
+
+ return &dst->u.tun_info;
+}
+
static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb,
__be16 flags,
__be64 tunnel_id,
#include <linux/rtnetlink.h>
#include <linux/stat.h>
#include <net/dst.h>
+#include <net/dst_metadata.h>
#include <net/pkt_sched.h>
#include <net/checksum.h>
#include <net/xfrm.h>
}
EXPORT_SYMBOL(dev_get_iflink);
+/**
+ * dev_fill_metadata_dst - Retrieve tunnel egress information.
+ * @dev: targeted interface
+ * @skb: The packet.
+ *
+ * For better visibility of tunnel traffic OVS needs to retrieve
+ * egress tunnel information for a packet. Following API allows
+ * user to get this info.
+ */
+int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+ struct ip_tunnel_info *info;
+
+ if (!dev->netdev_ops || !dev->netdev_ops->ndo_fill_metadata_dst)
+ return -EINVAL;
+
+ info = skb_tunnel_info_unclone(skb);
+ if (!info)
+ return -ENOMEM;
+ if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
+ return -EINVAL;
+
+ return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
+}
+EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
+
/**
* __dev_get_by_name - find a device by its name
* @net: the applicable net namespace
csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
}
+static struct rtable *gre_get_rt(struct sk_buff *skb,
+ struct net_device *dev,
+ struct flowi4 *fl,
+ const struct ip_tunnel_key *key)
+{
+ struct net *net = dev_net(dev);
+
+ memset(fl, 0, sizeof(*fl));
+ fl->daddr = key->u.ipv4.dst;
+ fl->saddr = key->u.ipv4.src;
+ fl->flowi4_tos = RT_TOS(key->tos);
+ fl->flowi4_mark = skb->mark;
+ fl->flowi4_proto = IPPROTO_GRE;
+
+ return ip_route_output_key(net, fl);
+}
+
static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel_info *tun_info;
- struct net *net = dev_net(dev);
const struct ip_tunnel_key *key;
struct flowi4 fl;
struct rtable *rt;
goto err_free_skb;
key = &tun_info->key;
- memset(&fl, 0, sizeof(fl));
- fl.daddr = key->u.ipv4.dst;
- fl.saddr = key->u.ipv4.src;
- fl.flowi4_tos = RT_TOS(key->tos);
- fl.flowi4_mark = skb->mark;
- fl.flowi4_proto = IPPROTO_GRE;
-
- rt = ip_route_output_key(net, &fl);
+ rt = gre_get_rt(skb, dev, &fl, key);
if (IS_ERR(rt))
goto err_free_skb;
dev->stats.tx_dropped++;
}
+static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+ struct ip_tunnel_info *info = skb_tunnel_info(skb);
+ struct rtable *rt;
+ struct flowi4 fl4;
+
+ if (ip_tunnel_info_af(info) != AF_INET)
+ return -EINVAL;
+
+ rt = gre_get_rt(skb, dev, &fl4, &info->key);
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
+
+ ip_rt_put(rt);
+ info->key.u.ipv4.src = fl4.saddr;
+ return 0;
+}
+
static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
struct net_device *dev)
{
.ndo_change_mtu = ip_tunnel_change_mtu,
.ndo_get_stats64 = ip_tunnel_get_stats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
+ .ndo_fill_metadata_dst = gre_fill_metadata_dst,
};
static void ipgre_tap_setup(struct net_device *dev)
struct sw_flow_key *key, const struct nlattr *attr,
const struct nlattr *actions, int actions_len)
{
- struct ip_tunnel_info info;
struct dp_upcall_info upcall;
const struct nlattr *a;
int rem;
if (vport) {
int err;
- upcall.egress_tun_info = &info;
- err = ovs_vport_get_egress_tun_info(vport, skb,
- &upcall);
- if (err)
- upcall.egress_tun_info = NULL;
+ err = dev_fill_metadata_dst(vport->dev, skb);
+ if (!err)
+ upcall.egress_tun_info = skb_tunnel_info(skb);
}
break;
if (upcall_info->egress_tun_info) {
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
- err = ovs_nla_put_egress_tunnel_key(user_skb,
- upcall_info->egress_tun_info,
- upcall_info->egress_tun_opts);
+ err = ovs_nla_put_tunnel_info(user_skb,
+ upcall_info->egress_tun_info);
BUG_ON(err);
nla_nest_end(user_skb, nla);
}
*/
struct dp_upcall_info {
struct ip_tunnel_info *egress_tun_info;
- const void *egress_tun_opts;
const struct nlattr *userdata;
const struct nlattr *actions;
int actions_len;
if ((output->tun_flags & TUNNEL_OAM) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
return -EMSGSIZE;
- if (tun_opts) {
+ if (swkey_tun_opts_len) {
if (output->tun_flags & TUNNEL_GENEVE_OPT &&
nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
swkey_tun_opts_len, tun_opts))
return 0;
}
-int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
- const struct ip_tunnel_info *egress_tun_info,
- const void *egress_tun_opts)
+int ovs_nla_put_tunnel_info(struct sk_buff *skb,
+ struct ip_tunnel_info *tun_info)
{
- return __ipv4_tun_to_nlattr(skb, &egress_tun_info->key,
- egress_tun_opts,
- egress_tun_info->options_len);
+ return __ipv4_tun_to_nlattr(skb, &tun_info->key,
+ ip_tunnel_info_opts(tun_info),
+ tun_info->options_len);
}
static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
if (!start)
return -EMSGSIZE;
- err = ipv4_tun_to_nlattr(skb, &tun_info->key,
- tun_info->options_len ?
- ip_tunnel_info_opts(tun_info) : NULL,
- tun_info->options_len);
+ err = ovs_nla_put_tunnel_info(skb, tun_info);
if (err)
return err;
nla_nest_end(skb, start);
int ovs_nla_get_match(struct net *, struct sw_flow_match *,
const struct nlattr *key, const struct nlattr *mask,
bool log);
-int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
- const struct ip_tunnel_info *,
- const void *egress_tun_opts);
+
+int ovs_nla_put_tunnel_info(struct sk_buff *skb,
+ struct ip_tunnel_info *tun_info);
bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log);
int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
return 0;
}
-static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct dp_upcall_info *upcall)
-{
- struct geneve_port *geneve_port = geneve_vport(vport);
- struct net *net = ovs_dp_get_net(vport->dp);
- __be16 dport = htons(geneve_port->port_no);
- __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
-
- return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
- skb, IPPROTO_UDP, sport, dport);
-}
-
static struct vport *geneve_tnl_create(const struct vport_parms *parms)
{
struct net *net = ovs_dp_get_net(parms->dp);
.get_options = geneve_get_options,
.send = ovs_netdev_send,
.owner = THIS_MODULE,
- .get_egress_tun_info = geneve_get_egress_tun_info,
};
static int __init ovs_geneve_tnl_init(void)
return ovs_netdev_link(vport, parms->name);
}
-static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct dp_upcall_info *upcall)
-{
- return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
- skb, IPPROTO_GRE, 0, 0);
-}
-
static struct vport_ops ovs_gre_vport_ops = {
.type = OVS_VPORT_TYPE_GRE,
.create = gre_create,
.send = ovs_netdev_send,
- .get_egress_tun_info = gre_get_egress_tun_info,
.destroy = ovs_netdev_tunnel_destroy,
.owner = THIS_MODULE,
};
return ovs_netdev_link(vport, parms->name);
}
-static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct dp_upcall_info *upcall)
-{
- struct vxlan_dev *vxlan = netdev_priv(vport->dev);
- struct net *net = ovs_dp_get_net(vport->dp);
- __be16 dst_port = vxlan_dev_dst_port(vxlan);
- __be16 src_port;
- int port_min;
- int port_max;
-
- inet_get_local_port_range(net, &port_min, &port_max);
- src_port = udp_flow_src_port(net, skb, 0, 0, true);
-
- return ovs_tunnel_get_egress_info(upcall, net,
- skb, IPPROTO_UDP,
- src_port, dst_port);
-}
-
static struct vport_ops ovs_vxlan_netdev_vport_ops = {
.type = OVS_VPORT_TYPE_VXLAN,
.create = vxlan_create,
.destroy = ovs_netdev_tunnel_destroy,
.get_options = vxlan_get_options,
.send = ovs_netdev_send,
- .get_egress_tun_info = vxlan_get_egress_tun_info,
};
static int __init ovs_vxlan_tnl_init(void)
call_rcu(&vport->rcu, free_vport_rcu);
}
EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
-
-int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
- struct net *net,
- struct sk_buff *skb,
- u8 ipproto,
- __be16 tp_src,
- __be16 tp_dst)
-{
- struct ip_tunnel_info *egress_tun_info = upcall->egress_tun_info;
- const struct ip_tunnel_info *tun_info = skb_tunnel_info(skb);
- const struct ip_tunnel_key *tun_key;
- u32 skb_mark = skb->mark;
- struct rtable *rt;
- struct flowi4 fl;
-
- if (unlikely(!tun_info))
- return -EINVAL;
- if (ip_tunnel_info_af(tun_info) != AF_INET)
- return -EINVAL;
-
- tun_key = &tun_info->key;
-
- /* Route lookup to get srouce IP address.
- * The process may need to be changed if the corresponding process
- * in vports ops changed.
- */
- rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto);
- if (IS_ERR(rt))
- return PTR_ERR(rt);
-
- ip_rt_put(rt);
-
- /* Generate egress_tun_info based on tun_info,
- * saddr, tp_src and tp_dst
- */
- ip_tunnel_key_init(&egress_tun_info->key,
- fl.saddr, tun_key->u.ipv4.dst,
- tun_key->tos,
- tun_key->ttl,
- tp_src, tp_dst,
- tun_key->tun_id,
- tun_key->tun_flags);
- egress_tun_info->options_len = tun_info->options_len;
- egress_tun_info->mode = tun_info->mode;
- upcall->egress_tun_opts = ip_tunnel_info_opts(egress_tun_info);
- return 0;
-}
-EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info);
-
-int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct dp_upcall_info *upcall)
-{
- /* get_egress_tun_info() is only implemented on tunnel ports. */
- if (unlikely(!vport->ops->get_egress_tun_info))
- return -EINVAL;
-
- return vport->ops->get_egress_tun_info(vport, skb, upcall);
-}
#include <linux/skbuff.h>
#include <linux/spinlock.h>
#include <linux/u64_stats_sync.h>
-#include <net/route.h>
#include "datapath.h"
int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
-int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
- struct net *net,
- struct sk_buff *,
- u8 ipproto,
- __be16 tp_src,
- __be16 tp_dst);
-
-int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct dp_upcall_info *upcall);
-
/**
* struct vport_portids - array of netlink portids of a vport.
* must be protected by rcu.
* have any configuration.
* @send: Send a packet on the device.
* zero for dropped packets or negative for error.
- * @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for
- * a packet.
*/
struct vport_ops {
enum ovs_vport_type type;
int (*get_options)(const struct vport *, struct sk_buff *);
void (*send)(struct vport *, struct sk_buff *);
- int (*get_egress_tun_info)(struct vport *, struct sk_buff *,
- struct dp_upcall_info *upcall);
-
struct module *owner;
struct list_head list;
};
int ovs_vport_ops_register(struct vport_ops *ops);
void ovs_vport_ops_unregister(struct vport_ops *ops);
-static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
- const struct ip_tunnel_key *key,
- u32 mark,
- struct flowi4 *fl,
- u8 protocol)
-{
- struct rtable *rt;
-
- memset(fl, 0, sizeof(*fl));
- fl->daddr = key->u.ipv4.dst;
- fl->saddr = key->u.ipv4.src;
- fl->flowi4_tos = RT_TOS(key->tos);
- fl->flowi4_mark = mark;
- fl->flowi4_proto = protocol;
-
- rt = ip_route_output_key(net, fl);
- return rt;
-}
-
static inline void ovs_vport_send(struct vport *vport, struct sk_buff *skb)
{
vport->ops->send(vport, skb);