be2net: avoid disabling sriov while VFs are assigned
authorSathya Perla <sathya.perla@emulex.com>
Tue, 8 May 2012 19:41:24 +0000 (19:41 +0000)
committerDavid S. Miller <davem@davemloft.net>
Fri, 11 May 2012 03:33:00 +0000 (23:33 -0400)
Calling pci_disable_sriov() while VFs are assigned to VMs causes
kernel panic. This patch uses PCI_DEV_FLAGS_ASSIGNED bit state of the
VF's pci_dev to avoid this. Also, the unconditional function reset cmd
issued on a PF probe can delete the VF configuration for the
previously enabled VFs. A scratchpad register is now used to issue a
function reset only when needed (i.e., in a crash dump scenario.)

Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/emulex/benet/be.h
drivers/net/ethernet/emulex/benet/be_hw.h
drivers/net/ethernet/emulex/benet/be_main.c

index c3ee9103ff4f12c11b436f018f2ac99f304a384c..ecf1a81f26e28a28e49c2b0135ce765d78932561 100644 (file)
@@ -313,6 +313,11 @@ struct be_vf_cfg {
        u32 tx_rate;
 };
 
+enum vf_state {
+       ENABLED = 0,
+       ASSIGNED = 1
+};
+
 #define BE_FLAGS_LINK_STATUS_INIT              1
 #define BE_FLAGS_WORKER_SCHEDULED              (1 << 3)
 #define BE_UC_PMAC_COUNT               30
@@ -403,8 +408,9 @@ struct be_adapter {
        u32 flash_status;
        struct completion flash_compl;
 
-       u32 num_vfs;
-       u8 is_virtfn;
+       u32 num_vfs;            /* Number of VFs provisioned by PF driver */
+       u32 dev_num_vfs;        /* Number of VFs supported by HW */
+       u8 virtfn;
        struct be_vf_cfg *vf_cfg;
        bool be3_native;
        u32 sli_family;
@@ -417,8 +423,10 @@ struct be_adapter {
        u32 uc_macs;            /* Count of secondary UC MAC programmed */
 };
 
-#define be_physfn(adapter) (!adapter->is_virtfn)
+#define be_physfn(adapter)             (!adapter->virtfn)
 #define        sriov_enabled(adapter)          (adapter->num_vfs > 0)
+#define        sriov_want(adapter)             (adapter->dev_num_vfs && num_vfs && \
+                                        be_physfn(adapter))
 #define for_all_vfs(adapter, vf_cfg, i)                                        \
        for (i = 0, vf_cfg = &adapter->vf_cfg[i]; i < adapter->num_vfs; \
                i++, vf_cfg++)
@@ -547,14 +555,6 @@ static inline u8 is_udp_pkt(struct sk_buff *skb)
        return val;
 }
 
-static inline void be_check_sriov_fn_type(struct be_adapter *adapter)
-{
-       u32 sli_intf;
-
-       pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
-       adapter->is_virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
-}
-
 static inline void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
 {
        u32 addr;
index 0949aa609164edc4b01610aabeafbdca064449c9..f38b58c8dbba1adede8db7d359503ad5fea1c2fc 100644 (file)
@@ -58,6 +58,8 @@
 
 #define SLI_PORT_CONTROL_IP_MASK       0x08000000
 
+#define PCICFG_CUST_SCRATCHPAD_CSR     0x1EC
+
 /********* Memory BAR register ************/
 #define PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET     0xfc
 /* Host Interrupt Enable, if set interrupts are enabled although "PCI Interrupt
index 6d5d30be0481297dcb0f6bf614bb81f5cd1c8e96..a01f734675327ab131dbd9a8cc09e63ace9b495f 100644 (file)
@@ -1049,6 +1049,29 @@ static int be_set_vf_tx_rate(struct net_device *netdev,
        return status;
 }
 
+static int be_find_vfs(struct be_adapter *adapter, int vf_state)
+{
+       struct pci_dev *dev, *pdev = adapter->pdev;
+       int vfs = 0, assigned_vfs = 0, pos, vf_fn;
+       u16 offset, stride;
+
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+       pci_read_config_word(pdev, pos + PCI_SRIOV_VF_OFFSET, &offset);
+       pci_read_config_word(pdev, pos + PCI_SRIOV_VF_STRIDE, &stride);
+
+       dev = pci_get_device(pdev->vendor, PCI_ANY_ID, NULL);
+       while (dev) {
+               vf_fn = (pdev->devfn + offset + stride * vfs) & 0xFFFF;
+               if (dev->is_virtfn && dev->devfn == vf_fn) {
+                       vfs++;
+                       if (dev->dev_flags & PCI_DEV_FLAGS_ASSIGNED)
+                               assigned_vfs++;
+               }
+               dev = pci_get_device(pdev->vendor, PCI_ANY_ID, dev);
+       }
+       return (vf_state == ASSIGNED) ? assigned_vfs : vfs;
+}
+
 static void be_eqd_update(struct be_adapter *adapter, struct be_eq_obj *eqo)
 {
        struct be_rx_stats *stats = rx_stats(&adapter->rx_obj[eqo->idx]);
@@ -1789,9 +1812,9 @@ static void be_tx_queues_destroy(struct be_adapter *adapter)
 
 static int be_num_txqs_want(struct be_adapter *adapter)
 {
-       if (sriov_enabled(adapter) || be_is_mc(adapter) ||
-               lancer_chip(adapter) || !be_physfn(adapter) ||
-               adapter->generation == BE_GEN2)
+       if (sriov_want(adapter) || be_is_mc(adapter) ||
+           lancer_chip(adapter) || !be_physfn(adapter) ||
+           adapter->generation == BE_GEN2)
                return 1;
        else
                return MAX_TX_QS;
@@ -2118,7 +2141,7 @@ static void be_msix_disable(struct be_adapter *adapter)
 static uint be_num_rss_want(struct be_adapter *adapter)
 {
        if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
-            adapter->num_vfs == 0 && be_physfn(adapter) &&
+            !sriov_want(adapter) && be_physfn(adapter) &&
             !be_is_mc(adapter))
                return (adapter->be3_native) ? BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
        else
@@ -2152,53 +2175,6 @@ done:
        return;
 }
 
-static int be_sriov_enable(struct be_adapter *adapter)
-{
-       be_check_sriov_fn_type(adapter);
-
-#ifdef CONFIG_PCI_IOV
-       if (be_physfn(adapter) && num_vfs) {
-               int status, pos;
-               u16 dev_vfs;
-
-               pos = pci_find_ext_capability(adapter->pdev,
-                                               PCI_EXT_CAP_ID_SRIOV);
-               pci_read_config_word(adapter->pdev,
-                                    pos + PCI_SRIOV_TOTAL_VF, &dev_vfs);
-
-               adapter->num_vfs = min_t(u16, num_vfs, dev_vfs);
-               if (adapter->num_vfs != num_vfs)
-                       dev_info(&adapter->pdev->dev,
-                                "Device supports %d VFs and not %d\n",
-                                adapter->num_vfs, num_vfs);
-
-               status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
-               if (status)
-                       adapter->num_vfs = 0;
-
-               if (adapter->num_vfs) {
-                       adapter->vf_cfg = kcalloc(num_vfs,
-                                               sizeof(struct be_vf_cfg),
-                                               GFP_KERNEL);
-                       if (!adapter->vf_cfg)
-                               return -ENOMEM;
-               }
-       }
-#endif
-       return 0;
-}
-
-static void be_sriov_disable(struct be_adapter *adapter)
-{
-#ifdef CONFIG_PCI_IOV
-       if (sriov_enabled(adapter)) {
-               pci_disable_sriov(adapter->pdev);
-               kfree(adapter->vf_cfg);
-               adapter->num_vfs = 0;
-       }
-#endif
-}
-
 static inline int be_msix_vec_get(struct be_adapter *adapter,
                                struct be_eq_obj *eqo)
 {
@@ -2500,6 +2476,11 @@ static void be_vf_clear(struct be_adapter *adapter)
        struct be_vf_cfg *vf_cfg;
        u32 vf;
 
+       if (be_find_vfs(adapter, ASSIGNED)) {
+               dev_warn(&adapter->pdev->dev, "VFs are assigned to VMs\n");
+               goto done;
+       }
+
        for_all_vfs(adapter, vf_cfg, vf) {
                if (lancer_chip(adapter))
                        be_cmd_set_mac_list(adapter, NULL, 0, vf + 1);
@@ -2509,6 +2490,10 @@ static void be_vf_clear(struct be_adapter *adapter)
 
                be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
        }
+       pci_disable_sriov(adapter->pdev);
+done:
+       kfree(adapter->vf_cfg);
+       adapter->num_vfs = 0;
 }
 
 static int be_clear(struct be_adapter *adapter)
@@ -2538,29 +2523,60 @@ static int be_clear(struct be_adapter *adapter)
        be_cmd_fw_clean(adapter);
 
        be_msix_disable(adapter);
-       kfree(adapter->pmac_id);
+       pci_write_config_dword(adapter->pdev, PCICFG_CUST_SCRATCHPAD_CSR, 0);
        return 0;
 }
 
-static void be_vf_setup_init(struct be_adapter *adapter)
+static int be_vf_setup_init(struct be_adapter *adapter)
 {
        struct be_vf_cfg *vf_cfg;
        int vf;
 
+       adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
+                                 GFP_KERNEL);
+       if (!adapter->vf_cfg)
+               return -ENOMEM;
+
        for_all_vfs(adapter, vf_cfg, vf) {
                vf_cfg->if_handle = -1;
                vf_cfg->pmac_id = -1;
        }
+       return 0;
 }
 
 static int be_vf_setup(struct be_adapter *adapter)
 {
        struct be_vf_cfg *vf_cfg;
+       struct device *dev = &adapter->pdev->dev;
        u32 cap_flags, en_flags, vf;
        u16 def_vlan, lnk_speed;
-       int status;
+       int status, enabled_vfs;
+
+       enabled_vfs = be_find_vfs(adapter, ENABLED);
+       if (enabled_vfs) {
+               dev_warn(dev, "%d VFs are already enabled\n", enabled_vfs);
+               dev_warn(dev, "Ignoring num_vfs=%d setting\n", num_vfs);
+               return 0;
+       }
 
-       be_vf_setup_init(adapter);
+       if (num_vfs > adapter->dev_num_vfs) {
+               dev_warn(dev, "Device supports %d VFs and not %d\n",
+                        adapter->dev_num_vfs, num_vfs);
+               num_vfs = adapter->dev_num_vfs;
+       }
+
+       status = pci_enable_sriov(adapter->pdev, num_vfs);
+       if (!status) {
+               adapter->num_vfs = num_vfs;
+       } else {
+               /* Platform doesn't support SRIOV though device supports it */
+               dev_warn(dev, "SRIOV enable failed\n");
+               return 0;
+       }
+
+       status = be_vf_setup_init(adapter);
+       if (status)
+               goto err;
 
        cap_flags = en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST |
                                BE_IF_FLAGS_MULTICAST;
@@ -2571,9 +2587,11 @@ static int be_vf_setup(struct be_adapter *adapter)
                        goto err;
        }
 
-       status = be_vf_eth_addr_config(adapter);
-       if (status)
-               goto err;
+       if (!enabled_vfs) {
+               status = be_vf_eth_addr_config(adapter);
+               if (status)
+                       goto err;
+       }
 
        for_all_vfs(adapter, vf_cfg, vf) {
                status = be_cmd_link_status_query(adapter, NULL, &lnk_speed,
@@ -2630,9 +2648,25 @@ do_none:
        return status;
 }
 
+/* Routine to query per function resource limits */
+static int be_get_config(struct be_adapter *adapter)
+{
+       int pos;
+       u16 dev_num_vfs;
+
+       pos = pci_find_ext_capability(adapter->pdev, PCI_EXT_CAP_ID_SRIOV);
+       if (pos) {
+               pci_read_config_word(adapter->pdev, pos + PCI_SRIOV_TOTAL_VF,
+                                    &dev_num_vfs);
+               adapter->dev_num_vfs = dev_num_vfs;
+       }
+       return 0;
+}
+
 static int be_setup(struct be_adapter *adapter)
 {
        struct net_device *netdev = adapter->netdev;
+       struct device *dev = &adapter->pdev->dev;
        u32 cap_flags, en_flags;
        u32 tx_fc, rx_fc;
        int status;
@@ -2640,6 +2674,8 @@ static int be_setup(struct be_adapter *adapter)
 
        be_setup_init(adapter);
 
+       be_get_config(adapter);
+
        be_cmd_req_native_mode(adapter);
 
        be_msix_enable(adapter);
@@ -2718,10 +2754,11 @@ static int be_setup(struct be_adapter *adapter)
 
        pcie_set_readrq(adapter->pdev, 4096);
 
-       if (sriov_enabled(adapter)) {
-               status = be_vf_setup(adapter);
-               if (status)
-                       goto err;
+       if (be_physfn(adapter) && num_vfs) {
+               if (adapter->dev_num_vfs)
+                       be_vf_setup(adapter);
+               else
+                       dev_warn(dev, "device doesn't support SRIOV\n");
        }
 
        be_cmd_get_phy_info(adapter);
@@ -2731,6 +2768,7 @@ static int be_setup(struct be_adapter *adapter)
        schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
        adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
 
+       pci_write_config_dword(adapter->pdev, PCICFG_CUST_SCRATCHPAD_CSR, 1);
        return 0;
 err:
        be_clear(adapter);
@@ -3352,8 +3390,6 @@ static void __devexit be_remove(struct pci_dev *pdev)
 
        be_ctrl_cleanup(adapter);
 
-       be_sriov_disable(adapter);
-
        pci_set_drvdata(pdev, NULL);
        pci_release_regions(pdev);
        pci_disable_device(pdev);
@@ -3367,7 +3403,7 @@ bool be_is_wol_supported(struct be_adapter *adapter)
                !be_is_wol_excluded(adapter)) ? true : false;
 }
 
-static int be_get_config(struct be_adapter *adapter)
+static int be_get_initial_config(struct be_adapter *adapter)
 {
        int status;
 
@@ -3410,7 +3446,7 @@ static int be_get_config(struct be_adapter *adapter)
        return 0;
 }
 
-static int be_dev_family_check(struct be_adapter *adapter)
+static int be_dev_type_check(struct be_adapter *adapter)
 {
        struct pci_dev *pdev = adapter->pdev;
        u32 sli_intf = 0, if_type;
@@ -3443,6 +3479,9 @@ static int be_dev_family_check(struct be_adapter *adapter)
        default:
                adapter->generation = 0;
        }
+
+       pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
+       adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
        return 0;
 }
 
@@ -3586,6 +3625,14 @@ reschedule:
        schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
 }
 
+static bool be_reset_required(struct be_adapter *adapter)
+{
+       u32 reg;
+
+       pci_read_config_dword(adapter->pdev, PCICFG_CUST_SCRATCHPAD_CSR, &reg);
+       return reg;
+}
+
 static int __devinit be_probe(struct pci_dev *pdev,
                        const struct pci_device_id *pdev_id)
 {
@@ -3611,7 +3658,7 @@ static int __devinit be_probe(struct pci_dev *pdev,
        adapter->pdev = pdev;
        pci_set_drvdata(pdev, adapter);
 
-       status = be_dev_family_check(adapter);
+       status = be_dev_type_check(adapter);
        if (status)
                goto free_netdev;
 
@@ -3629,13 +3676,9 @@ static int __devinit be_probe(struct pci_dev *pdev,
                }
        }
 
-       status = be_sriov_enable(adapter);
-       if (status)
-               goto free_netdev;
-
        status = be_ctrl_init(adapter);
        if (status)
-               goto disable_sriov;
+               goto free_netdev;
 
        if (lancer_chip(adapter)) {
                status = lancer_wait_ready(adapter);
@@ -3662,9 +3705,11 @@ static int __devinit be_probe(struct pci_dev *pdev,
        if (status)
                goto ctrl_clean;
 
-       status = be_cmd_reset_function(adapter);
-       if (status)
-               goto ctrl_clean;
+       if (be_reset_required(adapter)) {
+               status = be_cmd_reset_function(adapter);
+               if (status)
+                       goto ctrl_clean;
+       }
 
        /* The INTR bit may be set in the card when probed by a kdump kernel
         * after a crash.
@@ -3676,7 +3721,7 @@ static int __devinit be_probe(struct pci_dev *pdev,
        if (status)
                goto ctrl_clean;
 
-       status = be_get_config(adapter);
+       status = be_get_initial_config(adapter);
        if (status)
                goto stats_clean;
 
@@ -3705,8 +3750,6 @@ stats_clean:
        be_stats_cleanup(adapter);
 ctrl_clean:
        be_ctrl_cleanup(adapter);
-disable_sriov:
-       be_sriov_disable(adapter);
 free_netdev:
        free_netdev(netdev);
        pci_set_drvdata(pdev, NULL);