From ee5c7fb3404724b9e25fe24c81fbcda60f3f2659 Mon Sep 17 00:00:00 2001 From: Sathya Perla Date: Mon, 24 Jul 2017 12:34:28 -0400 Subject: [PATCH] bnxt_en: add vf-rep RX/TX and netdev implementation This patch introduces the RX/TX and a simple netdev implementation for VF-reps. The VF-reps use the RX/TX rings of the PF. For each VF-rep the PF driver issues a VFR_ALLOC FW cmd that returns "cfa_code" and "cfa_action" values. The FW sets up the filter tables in such a way that VF traffic by default (in absence of other rules) gets punted to the parent PF. The cfa_code value in the RX-compl informs the driver of the source VF. For traffic being transmitted from the VF-rep, the TX BD is tagged with a cfa_action value that informs the HW to punt it to the corresponding VF. Signed-off-by: Sathya Perla Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 68 +++++- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 10 + drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c | 218 +++++++++++++++++- drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h | 4 + 4 files changed, 288 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ebdeeb4a5756..f262fe6092d7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -245,6 +245,16 @@ const u16 bnxt_lhint_arr[] = { TX_BD_FLAGS_LHINT_2048_AND_LARGER, }; +static u16 bnxt_xmit_get_cfa_action(struct sk_buff *skb) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + + if (!md_dst || md_dst->type != METADATA_HW_PORT_MUX) + return 0; + + return md_dst->u.port_info.port_id; +} + static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct bnxt *bp = netdev_priv(dev); @@ -289,7 +299,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_buf->nr_frags = last_frag; vlan_tag_flags = 0; - cfa_action = 0; + cfa_action = bnxt_xmit_get_cfa_action(skb); if (skb_vlan_tag_present(skb)) { vlan_tag_flags = TX_BD_CFA_META_KEY_VLAN | skb_vlan_tag_get(skb); @@ -324,7 +334,8 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_push1->tx_bd_hsize_lflags = 0; tx_push1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags); - tx_push1->tx_bd_cfa_action = cpu_to_le32(cfa_action); + tx_push1->tx_bd_cfa_action = + cpu_to_le32(cfa_action << TX_BD_CFA_ACTION_SHIFT); end = pdata + length; end = PTR_ALIGN(end, 8) - 1; @@ -429,7 +440,8 @@ normal_tx: txbd->tx_bd_len_flags_type = cpu_to_le32(flags); txbd1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags); - txbd1->tx_bd_cfa_action = cpu_to_le32(cfa_action); + txbd1->tx_bd_cfa_action = + cpu_to_le32(cfa_action << TX_BD_CFA_ACTION_SHIFT); for (i = 0; i < last_frag; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; @@ -1034,7 +1046,10 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, bnxt_sched_reset(bp, rxr); return; } - + /* Store cfa_code in tpa_info to use in tpa_end + * completion processing. + */ + tpa_info->cfa_code = TPA_START_CFA_CODE(tpa_start1); prod_rx_buf->data = tpa_info->data; prod_rx_buf->data_ptr = tpa_info->data_ptr; @@ -1269,6 +1284,17 @@ static inline struct sk_buff *bnxt_gro_skb(struct bnxt *bp, return skb; } +/* Given the cfa_code of a received packet determine which + * netdev (vf-rep or PF) the packet is destined to. + */ +static struct net_device *bnxt_get_pkt_dev(struct bnxt *bp, u16 cfa_code) +{ + struct net_device *dev = bnxt_get_vf_rep(bp, cfa_code); + + /* if vf-rep dev is NULL, the must belongs to the PF */ + return dev ? dev : bp->dev; +} + static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, @@ -1362,7 +1388,9 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, return NULL; } } - skb->protocol = eth_type_trans(skb, bp->dev); + + skb->protocol = + eth_type_trans(skb, bnxt_get_pkt_dev(bp, tpa_info->cfa_code)); if (tpa_info->hash_type != PKT_HASH_TYPE_NONE) skb_set_hash(skb, tpa_info->rss_hash, tpa_info->hash_type); @@ -1389,6 +1417,18 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, return skb; } +static void bnxt_deliver_skb(struct bnxt *bp, struct bnxt_napi *bnapi, + struct sk_buff *skb) +{ + if (skb->dev != bp->dev) { + /* this packet belongs to a vf-rep */ + bnxt_vf_rep_rx(bp, skb); + return; + } + skb_record_rx_queue(skb, bnapi->index); + napi_gro_receive(&bnapi->napi, skb); +} + /* returns the following: * 1 - 1 packet successfully received * 0 - successful TPA_START, packet not completed yet @@ -1405,7 +1445,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, struct rx_cmp *rxcmp; struct rx_cmp_ext *rxcmp1; u32 tmp_raw_cons = *raw_cons; - u16 cons, prod, cp_cons = RING_CMP(tmp_raw_cons); + u16 cfa_code, cons, prod, cp_cons = RING_CMP(tmp_raw_cons); struct bnxt_sw_rx_bd *rx_buf; unsigned int len; u8 *data_ptr, agg_bufs, cmp_type; @@ -1447,8 +1487,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, rc = -ENOMEM; if (likely(skb)) { - skb_record_rx_queue(skb, bnapi->index); - napi_gro_receive(&bnapi->napi, skb); + bnxt_deliver_skb(bp, bnapi, skb); rc = 1; } *event |= BNXT_RX_EVENT; @@ -1537,7 +1576,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, skb_set_hash(skb, le32_to_cpu(rxcmp->rx_cmp_rss_hash), type); } - skb->protocol = eth_type_trans(skb, dev); + cfa_code = RX_CMP_CFA_CODE(rxcmp1); + skb->protocol = eth_type_trans(skb, bnxt_get_pkt_dev(bp, cfa_code)); if ((rxcmp1->rx_cmp_flags2 & cpu_to_le32(RX_CMP_FLAGS2_META_FORMAT_VLAN)) && @@ -1562,8 +1602,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, } } - skb_record_rx_queue(skb, bnapi->index); - napi_gro_receive(&bnapi->napi, skb); + bnxt_deliver_skb(bp, bnapi, skb); rc = 1; next_rx: @@ -6246,6 +6285,9 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) /* Poll link status and check for SFP+ module status */ bnxt_get_port_module_status(bp); + /* VF-reps may need to be re-opened after the PF is re-opened */ + if (BNXT_PF(bp)) + bnxt_vf_reps_open(bp); return 0; open_err: @@ -6334,6 +6376,10 @@ int bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) if (rc) netdev_warn(bp->dev, "timeout waiting for SRIOV config operation to complete!\n"); } + + /* Close the VF-reps before closing PF */ + if (BNXT_PF(bp)) + bnxt_vf_reps_close(bp); #endif /* Change device state to avoid TX queue wake up's */ bnxt_tx_disable(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index a7d5f42fb6a3..63756f0389d7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -20,6 +20,7 @@ #include #include +#include struct tx_bd { __le32 tx_bd_len_flags_type; @@ -243,6 +244,10 @@ struct rx_cmp_ext { ((le32_to_cpu((rxcmp1)->rx_cmp_flags2) & \ RX_CMP_FLAGS2_T_L4_CS_CALC) >> 3) +#define RX_CMP_CFA_CODE(rxcmpl1) \ + ((le32_to_cpu((rxcmpl1)->rx_cmp_cfa_code_errors_v2) & \ + RX_CMPL_CFA_CODE_MASK) >> RX_CMPL_CFA_CODE_SFT) + struct rx_agg_cmp { __le32 rx_agg_cmp_len_flags_type; #define RX_AGG_CMP_TYPE (0x3f << 0) @@ -312,6 +317,10 @@ struct rx_tpa_start_cmp_ext { __le32 rx_tpa_start_cmp_hdr_info; }; +#define TPA_START_CFA_CODE(rx_tpa_start) \ + ((le32_to_cpu((rx_tpa_start)->rx_tpa_start_cmp_cfa_code_v2) & \ + RX_TPA_START_CMP_CFA_CODE) >> RX_TPA_START_CMPL_CFA_CODE_SHIFT) + struct rx_tpa_end_cmp { __le32 rx_tpa_end_cmp_len_flags_type; #define RX_TPA_END_CMP_TYPE (0x3f << 0) @@ -940,6 +949,7 @@ struct bnxt_vf_rep_stats { struct bnxt_vf_rep { struct bnxt *bp; struct net_device *dev; + struct metadata_dst *dst; u16 vf_idx; u16 tx_cfa_action; u16 rx_cfa_code; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index eab358c2ac97..60bdb181358e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -17,6 +17,178 @@ #include "bnxt_vfr.h" #define CFA_HANDLE_INVALID 0xffff +#define VF_IDX_INVALID 0xffff + +static int hwrm_cfa_vfr_alloc(struct bnxt *bp, u16 vf_idx, + u16 *tx_cfa_action, u16 *rx_cfa_code) +{ + struct hwrm_cfa_vfr_alloc_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_cfa_vfr_alloc_input req = { 0 }; + int rc; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_VFR_ALLOC, -1, -1); + req.vf_id = cpu_to_le16(vf_idx); + sprintf(req.vfr_name, "vfr%d", vf_idx); + + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (!rc) { + *tx_cfa_action = le16_to_cpu(resp->tx_cfa_action); + *rx_cfa_code = le16_to_cpu(resp->rx_cfa_code); + netdev_dbg(bp->dev, "tx_cfa_action=0x%x, rx_cfa_code=0x%x", + *tx_cfa_action, *rx_cfa_code); + } else { + netdev_info(bp->dev, "%s error rc=%d", __func__, rc); + } + + mutex_unlock(&bp->hwrm_cmd_lock); + return rc; +} + +static int hwrm_cfa_vfr_free(struct bnxt *bp, u16 vf_idx) +{ + struct hwrm_cfa_vfr_free_input req = { 0 }; + int rc; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_VFR_FREE, -1, -1); + sprintf(req.vfr_name, "vfr%d", vf_idx); + + rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc) + netdev_info(bp->dev, "%s error rc=%d", __func__, rc); + return rc; +} + +static int bnxt_vf_rep_open(struct net_device *dev) +{ + struct bnxt_vf_rep *vf_rep = netdev_priv(dev); + struct bnxt *bp = vf_rep->bp; + + /* Enable link and TX only if the parent PF is open. */ + if (netif_running(bp->dev)) { + netif_carrier_on(dev); + netif_tx_start_all_queues(dev); + } + return 0; +} + +static int bnxt_vf_rep_close(struct net_device *dev) +{ + netif_carrier_off(dev); + netif_tx_disable(dev); + + return 0; +} + +static netdev_tx_t bnxt_vf_rep_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct bnxt_vf_rep *vf_rep = netdev_priv(dev); + int rc, len = skb->len; + + skb_dst_drop(skb); + dst_hold((struct dst_entry *)vf_rep->dst); + skb_dst_set(skb, (struct dst_entry *)vf_rep->dst); + skb->dev = vf_rep->dst->u.port_info.lower_dev; + + rc = dev_queue_xmit(skb); + if (!rc) { + vf_rep->tx_stats.packets++; + vf_rep->tx_stats.bytes += len; + } + return rc; +} + +static void +bnxt_vf_rep_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct bnxt_vf_rep *vf_rep = netdev_priv(dev); + + stats->rx_packets = vf_rep->rx_stats.packets; + stats->rx_bytes = vf_rep->rx_stats.bytes; + stats->tx_packets = vf_rep->tx_stats.packets; + stats->tx_bytes = vf_rep->tx_stats.bytes; +} + +struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code) +{ + u16 vf_idx; + + if (cfa_code && bp->cfa_code_map && BNXT_PF(bp)) { + vf_idx = bp->cfa_code_map[cfa_code]; + if (vf_idx != VF_IDX_INVALID) + return bp->vf_reps[vf_idx]->dev; + } + return NULL; +} + +void bnxt_vf_rep_rx(struct bnxt *bp, struct sk_buff *skb) +{ + struct bnxt_vf_rep *vf_rep = netdev_priv(skb->dev); + struct bnxt_vf_rep_stats *rx_stats; + + rx_stats = &vf_rep->rx_stats; + vf_rep->rx_stats.bytes += skb->len; + vf_rep->rx_stats.packets++; + + netif_receive_skb(skb); +} + +static void bnxt_vf_rep_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver)); + strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version)); +} + +static const struct ethtool_ops bnxt_vf_rep_ethtool_ops = { + .get_drvinfo = bnxt_vf_rep_get_drvinfo +}; + +static const struct net_device_ops bnxt_vf_rep_netdev_ops = { + .ndo_open = bnxt_vf_rep_open, + .ndo_stop = bnxt_vf_rep_close, + .ndo_start_xmit = bnxt_vf_rep_xmit, + .ndo_get_stats64 = bnxt_vf_rep_get_stats64 +}; + +/* Called when the parent PF interface is closed: + * As the mode transition from SWITCHDEV to LEGACY + * happens under the rtnl_lock() this routine is safe + * under the rtnl_lock() + */ +void bnxt_vf_reps_close(struct bnxt *bp) +{ + struct bnxt_vf_rep *vf_rep; + u16 num_vfs, i; + + if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV) + return; + + num_vfs = pci_num_vf(bp->pdev); + for (i = 0; i < num_vfs; i++) { + vf_rep = bp->vf_reps[i]; + if (netif_running(vf_rep->dev)) + bnxt_vf_rep_close(vf_rep->dev); + } +} + +/* Called when the parent PF interface is opened (re-opened): + * As the mode transition from SWITCHDEV to LEGACY + * happen under the rtnl_lock() this routine is safe + * under the rtnl_lock() + */ +void bnxt_vf_reps_open(struct bnxt *bp) +{ + int i; + + if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV) + return; + + for (i = 0; i < pci_num_vf(bp->pdev); i++) + bnxt_vf_rep_open(bp->vf_reps[i]->dev); +} static void __bnxt_vf_reps_destroy(struct bnxt *bp) { @@ -27,6 +199,11 @@ static void __bnxt_vf_reps_destroy(struct bnxt *bp) for (i = 0; i < num_vfs; i++) { vf_rep = bp->vf_reps[i]; if (vf_rep) { + dst_release((struct dst_entry *)vf_rep->dst); + + if (vf_rep->tx_cfa_action != CFA_HANDLE_INVALID) + hwrm_cfa_vfr_free(bp, vf_rep->vf_idx); + if (vf_rep->dev) { /* if register_netdev failed, then netdev_ops * would have been set to NULL @@ -60,6 +237,9 @@ void bnxt_vf_reps_destroy(struct bnxt *bp) bnxt_close_nic(bp, false, false); closed = true; } + /* un-publish cfa_code_map so that RX path can't see it anymore */ + kfree(bp->cfa_code_map); + bp->cfa_code_map = NULL; bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY; if (closed) @@ -92,6 +272,8 @@ static void bnxt_vf_rep_netdev_init(struct bnxt *bp, struct bnxt_vf_rep *vf_rep, { struct net_device *pf_dev = bp->dev; + dev->netdev_ops = &bnxt_vf_rep_netdev_ops; + dev->ethtool_ops = &bnxt_vf_rep_ethtool_ops; /* Just inherit all the featues of the parent PF as the VF-R * uses the RX/TX rings of the parent PF */ @@ -107,7 +289,7 @@ static void bnxt_vf_rep_netdev_init(struct bnxt *bp, struct bnxt_vf_rep *vf_rep, static int bnxt_vf_reps_create(struct bnxt *bp) { - u16 num_vfs = pci_num_vf(bp->pdev); + u16 *cfa_code_map = NULL, num_vfs = pci_num_vf(bp->pdev); struct bnxt_vf_rep *vf_rep; struct net_device *dev; int rc, i; @@ -116,6 +298,16 @@ static int bnxt_vf_reps_create(struct bnxt *bp) if (!bp->vf_reps) return -ENOMEM; + /* storage for cfa_code to vf-idx mapping */ + cfa_code_map = kmalloc(sizeof(*bp->cfa_code_map) * MAX_CFA_CODE, + GFP_KERNEL); + if (!cfa_code_map) { + rc = -ENOMEM; + goto err; + } + for (i = 0; i < MAX_CFA_CODE; i++) + cfa_code_map[i] = VF_IDX_INVALID; + for (i = 0; i < num_vfs; i++) { dev = alloc_etherdev(sizeof(*vf_rep)); if (!dev) { @@ -130,6 +322,26 @@ static int bnxt_vf_reps_create(struct bnxt *bp) vf_rep->vf_idx = i; vf_rep->tx_cfa_action = CFA_HANDLE_INVALID; + /* get cfa handles from FW */ + rc = hwrm_cfa_vfr_alloc(bp, vf_rep->vf_idx, + &vf_rep->tx_cfa_action, + &vf_rep->rx_cfa_code); + if (rc) { + rc = -ENOLINK; + goto err; + } + cfa_code_map[vf_rep->rx_cfa_code] = vf_rep->vf_idx; + + vf_rep->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, + GFP_KERNEL); + if (!vf_rep->dst) { + rc = -ENOMEM; + goto err; + } + /* only cfa_action is needed to mux a packet while TXing */ + vf_rep->dst->u.port_info.port_id = vf_rep->tx_cfa_action; + vf_rep->dst->u.port_info.lower_dev = bp->dev; + bnxt_vf_rep_netdev_init(bp, vf_rep, dev); rc = register_netdev(dev); if (rc) { @@ -139,11 +351,15 @@ static int bnxt_vf_reps_create(struct bnxt *bp) } } + /* publish cfa_code_map only after all VF-reps have been initialized */ + bp->cfa_code_map = cfa_code_map; bp->eswitch_mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; + netif_keep_dst(bp->dev); return 0; err: netdev_info(bp->dev, "%s error=%d", __func__, rc); + kfree(cfa_code_map); __bnxt_vf_reps_destroy(bp); return rc; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h index 310c9c567152..c6cd55afbb89 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h @@ -34,5 +34,9 @@ static inline void bnxt_link_bp_to_dl(struct devlink *dl, struct bnxt *bp) int bnxt_dl_register(struct bnxt *bp); void bnxt_dl_unregister(struct bnxt *bp); void bnxt_vf_reps_destroy(struct bnxt *bp); +void bnxt_vf_reps_close(struct bnxt *bp); +void bnxt_vf_reps_open(struct bnxt *bp); +void bnxt_vf_rep_rx(struct bnxt *bp, struct sk_buff *skb); +struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code); #endif /* BNXT_VFR_H */ -- 2.20.1