IPoIB: Fix pkey change flow for virtualization environments
authorErez Shitrit <erezsh@mellanox.com>
Thu, 18 Jul 2013 11:02:32 +0000 (14:02 +0300)
committerRoland Dreier <roland@purestorage.com>
Wed, 31 Jul 2013 21:23:44 +0000 (14:23 -0700)
IPoIB's required behaviour w.r.t to the pkey used by the device is the following:

- For "parent" interfaces (e.g ib0, ib1, etc) who are created
  automatically as a result of hot-plug events from the IB core, the
  driver needs to take whatever pkey vlaue it finds in index 0, and
  stick to that index.

- For child interfaces (e.g ib0.8001, etc) created by admin directive,
  the driver needs to use and stick to the value provided during its
  creation.

In SR-IOV environment its possible for the VF probe to take place
before the cloud management software provisions the suitable pkey for
the VF in the paravirtualed PKEY table index 0. When this is the case,
the VF IB stack will find in index 0 an invalide pkey, which is all
zeros.

Moreover, the cloud managment can assign the pkey value at index 0 at
any time of the guest life cycle.

The correct behavior for IPoIB to address these requirements for
parent interfaces is to use PKEY_CHANGE event as trigger to optionally
re-init the device pkey value and re-create all the relevant resources
accordingly, if the value of the pkey in index 0 has changed (from
invalid to valid or from valid value X to invalid value Y).

This patch enhances the heavy flushing code which is triggered by pkey
change event, to behave correctly for parent devices. For child
devices, the code remains the same, namely chases pkey value and not
index.

Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
drivers/infiniband/ulp/ipoib/ipoib_ib.c

index 2cfa76f5d99eac87bf788eb41bb2a7c3815ec700..196b1d13cbcbc09548e92a4be6c9a18cdc6aae36 100644 (file)
@@ -932,12 +932,47 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
        return 0;
 }
 
+/*
+ * Takes whatever value which is in pkey index 0 and updates priv->pkey
+ * returns 0 if the pkey value was changed.
+ */
+static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
+{
+       int result;
+       u16 prev_pkey;
+
+       prev_pkey = priv->pkey;
+       result = ib_query_pkey(priv->ca, priv->port, 0, &priv->pkey);
+       if (result) {
+               ipoib_warn(priv, "ib_query_pkey port %d failed (ret = %d)\n",
+                          priv->port, result);
+               return result;
+       }
+
+       priv->pkey |= 0x8000;
+
+       if (prev_pkey != priv->pkey) {
+               ipoib_dbg(priv, "pkey changed from 0x%x to 0x%x\n",
+                         prev_pkey, priv->pkey);
+               /*
+                * Update the pkey in the broadcast address, while making sure to set
+                * the full membership bit, so that we join the right broadcast group.
+                */
+               priv->dev->broadcast[8] = priv->pkey >> 8;
+               priv->dev->broadcast[9] = priv->pkey & 0xff;
+               return 0;
+       }
+
+       return 1;
+}
+
 static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
                                enum ipoib_flush_level level)
 {
        struct ipoib_dev_priv *cpriv;
        struct net_device *dev = priv->dev;
        u16 new_index;
+       int result;
 
        mutex_lock(&priv->vlan_mutex);
 
@@ -951,6 +986,10 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
        mutex_unlock(&priv->vlan_mutex);
 
        if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
+               /* for non-child devices must check/update the pkey value here */
+               if (level == IPOIB_FLUSH_HEAVY &&
+                   !test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
+                       update_parent_pkey(priv);
                ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
                return;
        }
@@ -961,21 +1000,32 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
        }
 
        if (level == IPOIB_FLUSH_HEAVY) {
-               if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
-                       clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
-                       ipoib_ib_dev_down(dev, 0);
-                       ipoib_ib_dev_stop(dev, 0);
-                       if (ipoib_pkey_dev_delay_open(dev))
+               /* child devices chase their origin pkey value, while non-child
+                * (parent) devices should always takes what present in pkey index 0
+                */
+               if (test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
+                       if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
+                               clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+                               ipoib_ib_dev_down(dev, 0);
+                               ipoib_ib_dev_stop(dev, 0);
+                               if (ipoib_pkey_dev_delay_open(dev))
+                                       return;
+                       }
+                       /* restart QP only if P_Key index is changed */
+                       if (test_and_set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) &&
+                           new_index == priv->pkey_index) {
+                               ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
                                return;
+                       }
+                       priv->pkey_index = new_index;
+               } else {
+                       result = update_parent_pkey(priv);
+                       /* restart QP only if P_Key value changed */
+                       if (result) {
+                               ipoib_dbg(priv, "Not flushing - P_Key value not changed.\n");
+                               return;
+                       }
                }
-
-               /* restart QP only if P_Key index is changed */
-               if (test_and_set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) &&
-                   new_index == priv->pkey_index) {
-                       ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
-                       return;
-               }
-               priv->pkey_index = new_index;
        }
 
        if (level == IPOIB_FLUSH_LIGHT) {