hv_netvsc: Add ethtool handler to set and get UDP hash levels
authorHaiyang Zhang <haiyangz@microsoft.com>
Tue, 22 Aug 2017 02:22:39 +0000 (19:22 -0700)
committerDavid S. Miller <davem@davemloft.net>
Tue, 22 Aug 2017 21:08:12 +0000 (14:08 -0700)
The patch add the functions to switch UDP hash level between
L3 and L4 by ethtool command. UDP over IPv4 and v6 can be set
differently. The default hash level is L4. We currently only
allow switching TX hash level from within the guests.

On Azure, fragmented UDP packets have high loss rate with L4
hashing. Using L3 hashing is recommended in this case.

For example, for UDP over IPv4 on eth0:
To include UDP port numbers in hasing:
ethtool -N eth0 rx-flow-hash udp4 sdfn
To exclude UDP port numbers in hasing:
ethtool -N eth0 rx-flow-hash udp4 sd
To show UDP hash level:
ethtool -n eth0 rx-flow-hash udp4

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc_drv.c

index 9198dd1240ede754f6b1bc45b9e9322e0485349e..ff1c0c8d5e0da059e2faf1e4220415ddd599bcd9 100644 (file)
@@ -720,6 +720,8 @@ struct net_device_context {
        u32 tx_send_table[VRSS_SEND_TAB_SIZE];
 
        /* Ethtool settings */
+       bool udp4_l4_hash;
+       bool udp6_l4_hash;
        u8 duplex;
        u32 speed;
        struct netvsc_ethtool_stats eth_stats;
index d8612b1a8e4eb0291cf563463a0c8917dca0b342..c0c4c9195a3f91041f44abc6a183272965867ff1 100644 (file)
@@ -190,10 +190,12 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
        return ppi;
 }
 
-/* Azure hosts don't support non-TCP port numbers in hashing yet. We compute
- * hash for non-TCP traffic with only IP numbers.
+/* Azure hosts don't support non-TCP port numbers in hashing for fragmented
+ * packets. We can use ethtool to change UDP hash level when necessary.
  */
-static inline u32 netvsc_get_hash(struct sk_buff *skb)
+static inline u32 netvsc_get_hash(
+       struct sk_buff *skb,
+       const struct net_device_context *ndc)
 {
        struct flow_keys flow;
        u32 hash;
@@ -204,7 +206,11 @@ static inline u32 netvsc_get_hash(struct sk_buff *skb)
        if (!skb_flow_dissect_flow_keys(skb, &flow, 0))
                return 0;
 
-       if (flow.basic.ip_proto == IPPROTO_TCP) {
+       if (flow.basic.ip_proto == IPPROTO_TCP ||
+           (flow.basic.ip_proto == IPPROTO_UDP &&
+            ((flow.basic.n_proto == htons(ETH_P_IP) && ndc->udp4_l4_hash) ||
+             (flow.basic.n_proto == htons(ETH_P_IPV6) &&
+              ndc->udp6_l4_hash)))) {
                return skb_get_hash(skb);
        } else {
                if (flow.basic.n_proto == htons(ETH_P_IP))
@@ -227,7 +233,7 @@ static inline int netvsc_get_tx_queue(struct net_device *ndev,
        struct sock *sk = skb->sk;
        int q_idx;
 
-       q_idx = ndc->tx_send_table[netvsc_get_hash(skb) &
+       q_idx = ndc->tx_send_table[netvsc_get_hash(skb, ndc) &
                                   (VRSS_SEND_TAB_SIZE - 1)];
 
        /* If queue index changed record the new value */
@@ -891,6 +897,9 @@ static void netvsc_init_settings(struct net_device *dev)
 {
        struct net_device_context *ndc = netdev_priv(dev);
 
+       ndc->udp4_l4_hash = true;
+       ndc->udp6_l4_hash = true;
+
        ndc->speed = SPEED_UNKNOWN;
        ndc->duplex = DUPLEX_FULL;
 }
@@ -1228,7 +1237,8 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 }
 
 static int
-netvsc_get_rss_hash_opts(struct ethtool_rxnfc *info)
+netvsc_get_rss_hash_opts(struct net_device_context *ndc,
+                        struct ethtool_rxnfc *info)
 {
        info->data = RXH_IP_SRC | RXH_IP_DST;
 
@@ -1236,9 +1246,20 @@ netvsc_get_rss_hash_opts(struct ethtool_rxnfc *info)
        case TCP_V4_FLOW:
        case TCP_V6_FLOW:
                info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
-               /* fallthrough */
+               break;
+
        case UDP_V4_FLOW:
+               if (ndc->udp4_l4_hash)
+                       info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+
+               break;
+
        case UDP_V6_FLOW:
+               if (ndc->udp6_l4_hash)
+                       info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+
+               break;
+
        case IPV4_FLOW:
        case IPV6_FLOW:
                break;
@@ -1266,11 +1287,51 @@ netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
                return 0;
 
        case ETHTOOL_GRXFH:
-               return netvsc_get_rss_hash_opts(info);
+               return netvsc_get_rss_hash_opts(ndc, info);
        }
        return -EOPNOTSUPP;
 }
 
+static int netvsc_set_rss_hash_opts(struct net_device_context *ndc,
+                                   struct ethtool_rxnfc *info)
+{
+       if (info->data == (RXH_IP_SRC | RXH_IP_DST |
+                          RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+               if (info->flow_type == UDP_V4_FLOW)
+                       ndc->udp4_l4_hash = true;
+               else if (info->flow_type == UDP_V6_FLOW)
+                       ndc->udp6_l4_hash = true;
+               else
+                       return -EOPNOTSUPP;
+
+               return 0;
+       }
+
+       if (info->data == (RXH_IP_SRC | RXH_IP_DST)) {
+               if (info->flow_type == UDP_V4_FLOW)
+                       ndc->udp4_l4_hash = false;
+               else if (info->flow_type == UDP_V6_FLOW)
+                       ndc->udp6_l4_hash = false;
+               else
+                       return -EOPNOTSUPP;
+
+               return 0;
+       }
+
+       return -EOPNOTSUPP;
+}
+
+static int
+netvsc_set_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *info)
+{
+       struct net_device_context *ndc = netdev_priv(ndev);
+
+       if (info->cmd == ETHTOOL_SRXFH)
+               return netvsc_set_rss_hash_opts(ndc, info);
+
+       return -EOPNOTSUPP;
+}
+
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void netvsc_poll_controller(struct net_device *dev)
 {
@@ -1469,6 +1530,7 @@ static const struct ethtool_ops ethtool_ops = {
        .set_channels   = netvsc_set_channels,
        .get_ts_info    = ethtool_op_get_ts_info,
        .get_rxnfc      = netvsc_get_rxnfc,
+       .set_rxnfc      = netvsc_set_rxnfc,
        .get_rxfh_key_size = netvsc_get_rxfh_key_size,
        .get_rxfh_indir_size = netvsc_rss_indir_size,
        .get_rxfh       = netvsc_get_rxfh,