IB/mlx5: Use IP version matching to classify IP traffic
authorAriel Levkovich <lariel@mellanox.com>
Mon, 3 Apr 2017 10:11:03 +0000 (13:11 +0300)
committerDoug Ledford <dledford@redhat.com>
Fri, 21 Apr 2017 16:26:05 +0000 (12:26 -0400)
This change adds the ability for flow steering to classify IPv4/6
packets with MPLS tag (Ethertype 0x8847 and 0x8848) as standard IP
packets and hit IPv4/6 classifed steering rules.

When user added a flow rule with IP classification, driver was
implicitly adding ethertype matching to the created rule in order
to distinguish between IPv4 and IPv6 protocols.
Since IP packets with MPLS tag header have MPLS ethertype, they missed
the rule and ended up hitting the default filters.
Such behavior prevented from MPLS packets to undergo inbound traffic
load balancing flows (if such were defined by configuring RSS) to
achieve higher throughput - the way that non-MPLS IP packets performed.

Since our device is able to look past the MPLS tag and identify the
next protocol we introduce this solution which replaces Ethertype
matching by the device's capability to perform IP version parsing
and matching in order to distinguish between IPv4 and IPv6.
Therefore, whenever a flow with IP spec is added and device support IP
version matching, driver will implicitly add IP version matching to the
rule (Based on the IP spec type) without Ethertype matching which will
cause relevant MPLS tagged packets to hit this rule as well.
Otherwise (device doesn't support IP version matching), we fall back to
setting Ethertype matching.

If the user's filters specify an L2 ethertype and an IP spec
the rule will then match both the ethertype and the IP version.

The device's support for IP version matching is reported by the
device via dedicated capability bit in query_device_cap and named
outer/inner_ip_version.

Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/mlx5/main.c
include/linux/mlx5/mlx5_ifc.h

index 1c85c39d1d03704c56ae7c6d6313b16d3398cb21..c28b6952b0abf625d0b73409d590276b7209c99f 100644 (file)
@@ -1736,8 +1736,11 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
                   offsetof(typeof(filter), field) -\
                   sizeof(filter.field))
 
-static int parse_flow_attr(u32 *match_c, u32 *match_v,
-                          const union ib_flow_spec *ib_spec, u32 *tag_id)
+#define IPV4_VERSION 4
+#define IPV6_VERSION 6
+static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
+                          u32 *match_v, const union ib_flow_spec *ib_spec,
+                          u32 *tag_id)
 {
        void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
                                           misc_parameters);
@@ -1745,17 +1748,22 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
                                           misc_parameters);
        void *headers_c;
        void *headers_v;
+       int match_ipv;
 
        if (ib_spec->type & IB_FLOW_SPEC_INNER) {
                headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
                                         inner_headers);
                headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
                                         inner_headers);
+               match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+                                       ft_field_support.inner_ip_version);
        } else {
                headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
                                         outer_headers);
                headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
                                         outer_headers);
+               match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+                                       ft_field_support.outer_ip_version);
        }
 
        switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
@@ -1811,10 +1819,17 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
                if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
                        return -EOPNOTSUPP;
 
-               MLX5_SET(fte_match_set_lyr_2_4, headers_c,
-                        ethertype, 0xffff);
-               MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-                        ethertype, ETH_P_IP);
+               if (match_ipv) {
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+                                ip_version, 0xf);
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+                                ip_version, IPV4_VERSION);
+               } else {
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+                                ethertype, 0xffff);
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+                                ethertype, ETH_P_IP);
+               }
 
                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
                                    src_ipv4_src_ipv6.ipv4_layout.ipv4),
@@ -1843,10 +1858,17 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
                if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
                        return -EOPNOTSUPP;
 
-               MLX5_SET(fte_match_set_lyr_2_4, headers_c,
-                        ethertype, 0xffff);
-               MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-                        ethertype, ETH_P_IPV6);
+               if (match_ipv) {
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+                                ip_version, 0xf);
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+                                ip_version, IPV6_VERSION);
+               } else {
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+                                ethertype, 0xffff);
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+                                ethertype, ETH_P_IPV6);
+               }
 
                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
                                    src_ipv4_src_ipv6.ipv6_layout.ipv6),
@@ -1968,10 +1990,16 @@ static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr)
               is_multicast_ether_addr(eth_spec->val.dst_mac);
 }
 
-static bool is_valid_ethertype(const struct ib_flow_attr *flow_attr,
+static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
+                              const struct ib_flow_attr *flow_attr,
                               bool check_inner)
 {
        union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
+       int match_ipv = check_inner ?
+                       MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+                                       ft_field_support.inner_ip_version) :
+                       MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+                                       ft_field_support.outer_ip_version);
        int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
        bool ipv4_spec_valid, ipv6_spec_valid;
        unsigned int ip_spec_type = 0;
@@ -2002,16 +2030,20 @@ static bool is_valid_ethertype(const struct ib_flow_attr *flow_attr,
                        (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
                ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
                        (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
-               type_valid = ipv4_spec_valid || ipv6_spec_valid;
+
+               type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
+                            (((eth_type == ETH_P_MPLS_UC) ||
+                              (eth_type == ETH_P_MPLS_MC)) && match_ipv);
        }
 
        return type_valid;
 }
 
-static bool is_valid_attr(const struct ib_flow_attr *flow_attr)
+static bool is_valid_attr(struct mlx5_core_dev *mdev,
+                         const struct ib_flow_attr *flow_attr)
 {
-       return is_valid_ethertype(flow_attr, false) &&
-              is_valid_ethertype(flow_attr, true);
+       return is_valid_ethertype(mdev, flow_attr, false) &&
+              is_valid_ethertype(mdev, flow_attr, true);
 }
 
 static void put_flow_table(struct mlx5_ib_dev *dev,
@@ -2154,7 +2186,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
        u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
        int err = 0;
 
-       if (!is_valid_attr(flow_attr))
+       if (!is_valid_attr(dev->mdev, flow_attr))
                return ERR_PTR(-EINVAL);
 
        spec = mlx5_vzalloc(sizeof(*spec));
@@ -2167,7 +2199,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
        INIT_LIST_HEAD(&handler->list);
 
        for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
-               err = parse_flow_attr(spec->match_criteria,
+               err = parse_flow_attr(dev->mdev, spec->match_criteria,
                                      spec->match_value, ib_flow, &flow_tag);
                if (err < 0)
                        goto free;
index 7c50bd39b297e5d359043f7545be8ca04d9045c8..4da6e803b627056cf4aa299986cc359d139ce27f 100644 (file)
@@ -236,7 +236,7 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
        u8         outer_dmac[0x1];
        u8         outer_smac[0x1];
        u8         outer_ether_type[0x1];
-       u8         reserved_at_3[0x1];
+       u8         outer_ip_version[0x1];
        u8         outer_first_prio[0x1];
        u8         outer_first_cfi[0x1];
        u8         outer_first_vid[0x1];
@@ -265,7 +265,7 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
        u8         inner_dmac[0x1];
        u8         inner_smac[0x1];
        u8         inner_ether_type[0x1];
-       u8         reserved_at_23[0x1];
+       u8         inner_ip_version[0x1];
        u8         inner_first_prio[0x1];
        u8         inner_first_cfi[0x1];
        u8         inner_first_vid[0x1];
@@ -371,7 +371,7 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
        u8         cvlan_tag[0x1];
        u8         svlan_tag[0x1];
        u8         frag[0x1];
-       u8         reserved_at_93[0x4];
+       u8         ip_version[0x4];
        u8         tcp_flags[0x9];
 
        u8         tcp_sport[0x10];