IPoIB: Handle P_Key table reordering
authorYosef Etigin <yosefe@voltaire.com>
Sat, 19 May 2007 15:51:54 +0000 (08:51 -0700)
committerRoland Dreier <rolandd@cisco.com>
Sat, 19 May 2007 15:51:54 +0000 (08:51 -0700)
SM reconfiguration or failover possibly causes a shuffling of the values
in the P_Key table. Right now, IPoIB only queries for the P_Key index
once when it creates the device QP, and hence there are problems if the
index of a P_Key value changes.  Fix this by using the PKEY_CHANGE event
to trigger a recheck of the P_Key index.

Signed-off-by: Yosef Etigin <yosefe@voltaire.com>
Acked-by: Michael S. Tsirkin <mst@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_verbs.c

index 87310eeb6df07eb90c6e2e61e2f4515a85c4f22b..93d4a9a1e1ddb751ab86beaae2e609c0ac1444d5 100644 (file)
@@ -201,15 +201,17 @@ struct ipoib_dev_priv {
        struct list_head multicast_list;
        struct rb_root multicast_tree;
 
-       struct delayed_work pkey_task;
+       struct delayed_work pkey_poll_task;
        struct delayed_work mcast_task;
        struct work_struct flush_task;
        struct work_struct restart_task;
        struct delayed_work ah_reap_task;
+       struct work_struct pkey_event_task;
 
        struct ib_device *ca;
        u8                port;
        u16               pkey;
+       u16               pkey_index;
        struct ib_pd     *pd;
        struct ib_mr     *mr;
        struct ib_cq     *cq;
@@ -333,12 +335,13 @@ struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
 
 int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
 void ipoib_ib_dev_flush(struct work_struct *work);
+void ipoib_pkey_event(struct work_struct *work);
 void ipoib_ib_dev_cleanup(struct net_device *dev);
 
 int ipoib_ib_dev_open(struct net_device *dev);
 int ipoib_ib_dev_up(struct net_device *dev);
 int ipoib_ib_dev_down(struct net_device *dev, int flush);
-int ipoib_ib_dev_stop(struct net_device *dev);
+int ipoib_ib_dev_stop(struct net_device *dev, int flush);
 
 int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
 void ipoib_dev_cleanup(struct net_device *dev);
index 68d72c6f7ffbf40be9156b214ac79df8614bf4fc..e3b0937011d122df60b7a38e3a5410c565733327 100644 (file)
@@ -448,6 +448,13 @@ int ipoib_ib_dev_open(struct net_device *dev)
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        int ret;
 
+       if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) {
+               ipoib_warn(priv, "P_Key 0x%04x not found\n", priv->pkey);
+               clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+               return -1;
+       }
+       set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+
        ret = ipoib_init_qp(dev);
        if (ret) {
                ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
@@ -457,14 +464,14 @@ int ipoib_ib_dev_open(struct net_device *dev)
        ret = ipoib_ib_post_receives(dev);
        if (ret) {
                ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
-               ipoib_ib_dev_stop(dev);
+               ipoib_ib_dev_stop(dev, 1);
                return -1;
        }
 
        ret = ipoib_cm_dev_open(dev);
        if (ret) {
                ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
-               ipoib_ib_dev_stop(dev);
+               ipoib_ib_dev_stop(dev, 1);
                return -1;
        }
 
@@ -516,7 +523,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
        if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
                mutex_lock(&pkey_mutex);
                set_bit(IPOIB_PKEY_STOP, &priv->flags);
-               cancel_delayed_work(&priv->pkey_task);
+               cancel_delayed_work(&priv->pkey_poll_task);
                mutex_unlock(&pkey_mutex);
                if (flush)
                        flush_workqueue(ipoib_workqueue);
@@ -543,7 +550,7 @@ static int recvs_pending(struct net_device *dev)
        return pending;
 }
 
-int ipoib_ib_dev_stop(struct net_device *dev)
+int ipoib_ib_dev_stop(struct net_device *dev, int flush)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ib_qp_attr qp_attr;
@@ -629,7 +636,8 @@ timeout:
        /* Wait for all AHs to be reaped */
        set_bit(IPOIB_STOP_REAPER, &priv->flags);
        cancel_delayed_work(&priv->ah_reap_task);
-       flush_workqueue(ipoib_workqueue);
+       if (flush)
+               flush_workqueue(ipoib_workqueue);
 
        begin = jiffies;
 
@@ -673,13 +681,24 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
        return 0;
 }
 
-void ipoib_ib_dev_flush(struct work_struct *work)
+static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
 {
-       struct ipoib_dev_priv *cpriv, *priv =
-               container_of(work, struct ipoib_dev_priv, flush_task);
+       struct ipoib_dev_priv *cpriv;
        struct net_device *dev = priv->dev;
+       u16 new_index;
+
+       mutex_lock(&priv->vlan_mutex);
 
-       if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) ) {
+       /*
+        * Flush any child interfaces too -- they might be up even if
+        * the parent is down.
+        */
+       list_for_each_entry(cpriv, &priv->child_intfs, list)
+               __ipoib_ib_dev_flush(cpriv, pkey_event);
+
+       mutex_unlock(&priv->vlan_mutex);
+
+       if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
                ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
                return;
        }
@@ -689,10 +708,32 @@ void ipoib_ib_dev_flush(struct work_struct *work)
                return;
        }
 
+       if (pkey_event) {
+               if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
+                       clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+                       ipoib_ib_dev_down(dev, 0);
+                       ipoib_pkey_dev_delay_open(dev);
+                       return;
+               }
+               set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+
+               /* restart QP only if P_Key index is changed */
+               if (new_index == priv->pkey_index) {
+                       ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
+                       return;
+               }
+               priv->pkey_index = new_index;
+       }
+
        ipoib_dbg(priv, "flushing\n");
 
        ipoib_ib_dev_down(dev, 0);
 
+       if (pkey_event) {
+               ipoib_ib_dev_stop(dev, 0);
+               ipoib_ib_dev_open(dev);
+       }
+
        /*
         * The device could have been brought down between the start and when
         * we get here, don't bring it back up if it's not configured up
@@ -701,14 +742,24 @@ void ipoib_ib_dev_flush(struct work_struct *work)
                ipoib_ib_dev_up(dev);
                ipoib_mcast_restart_task(&priv->restart_task);
        }
+}
 
-       mutex_lock(&priv->vlan_mutex);
+void ipoib_ib_dev_flush(struct work_struct *work)
+{
+       struct ipoib_dev_priv *priv =
+               container_of(work, struct ipoib_dev_priv, flush_task);
 
-       /* Flush any child interfaces too */
-       list_for_each_entry(cpriv, &priv->child_intfs, list)
-               ipoib_ib_dev_flush(&cpriv->flush_task);
+       ipoib_dbg(priv, "Flushing %s\n", priv->dev->name);
+       __ipoib_ib_dev_flush(priv, 0);
+}
 
-       mutex_unlock(&priv->vlan_mutex);
+void ipoib_pkey_event(struct work_struct *work)
+{
+       struct ipoib_dev_priv *priv =
+               container_of(work, struct ipoib_dev_priv, pkey_event_task);
+
+       ipoib_dbg(priv, "Flushing %s and restarting its QP\n", priv->dev->name);
+       __ipoib_ib_dev_flush(priv, 1);
 }
 
 void ipoib_ib_dev_cleanup(struct net_device *dev)
@@ -736,7 +787,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
 void ipoib_pkey_poll(struct work_struct *work)
 {
        struct ipoib_dev_priv *priv =
-               container_of(work, struct ipoib_dev_priv, pkey_task.work);
+               container_of(work, struct ipoib_dev_priv, pkey_poll_task.work);
        struct net_device *dev = priv->dev;
 
        ipoib_pkey_dev_check_presence(dev);
@@ -747,7 +798,7 @@ void ipoib_pkey_poll(struct work_struct *work)
                mutex_lock(&pkey_mutex);
                if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
                        queue_delayed_work(ipoib_workqueue,
-                                          &priv->pkey_task,
+                                          &priv->pkey_poll_task,
                                           HZ);
                mutex_unlock(&pkey_mutex);
        }
@@ -766,7 +817,7 @@ int ipoib_pkey_dev_delay_open(struct net_device *dev)
                mutex_lock(&pkey_mutex);
                clear_bit(IPOIB_PKEY_STOP, &priv->flags);
                queue_delayed_work(ipoib_workqueue,
-                                  &priv->pkey_task,
+                                  &priv->pkey_poll_task,
                                   HZ);
                mutex_unlock(&pkey_mutex);
                return 1;
index 0a428f2b05c77373da6eac2d80595b45d8a18716..894b1dcdf3eb31ff0e326aaf9a0b9b793303b8ff 100644 (file)
@@ -107,7 +107,7 @@ int ipoib_open(struct net_device *dev)
                return -EINVAL;
 
        if (ipoib_ib_dev_up(dev)) {
-               ipoib_ib_dev_stop(dev);
+               ipoib_ib_dev_stop(dev, 1);
                return -EINVAL;
        }
 
@@ -152,7 +152,7 @@ static int ipoib_stop(struct net_device *dev)
        flush_workqueue(ipoib_workqueue);
 
        ipoib_ib_dev_down(dev, 1);
-       ipoib_ib_dev_stop(dev);
+       ipoib_ib_dev_stop(dev, 1);
 
        if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
                struct ipoib_dev_priv *cpriv;
@@ -988,7 +988,8 @@ static void ipoib_setup(struct net_device *dev)
        INIT_LIST_HEAD(&priv->dead_ahs);
        INIT_LIST_HEAD(&priv->multicast_list);
 
-       INIT_DELAYED_WORK(&priv->pkey_task,    ipoib_pkey_poll);
+       INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
+       INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event);
        INIT_DELAYED_WORK(&priv->mcast_task,   ipoib_mcast_join_task);
        INIT_WORK(&priv->flush_task,   ipoib_ib_dev_flush);
        INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
index 5c3c6a43a52b55733601073f0617491e6c3baa2c..791252621b26a40969034deb4e24495e06a1d352 100644 (file)
@@ -33,8 +33,6 @@
  * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $
  */
 
-#include <rdma/ib_cache.h>
-
 #include "ipoib.h"
 
 int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
@@ -49,7 +47,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
        if (!qp_attr)
                goto out;
 
-       if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
+       if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
                clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
                ret = -ENXIO;
                goto out;
@@ -94,26 +92,16 @@ int ipoib_init_qp(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        int ret;
-       u16 pkey_index;
        struct ib_qp_attr qp_attr;
        int attr_mask;
 
-       /*
-        * Search through the port P_Key table for the requested pkey value.
-        * The port has to be assigned to the respective IB partition in
-        * advance.
-        */
-       ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index);
-       if (ret) {
-               clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
-               return ret;
-       }
-       set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+       if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
+               return -1;
 
        qp_attr.qp_state = IB_QPS_INIT;
        qp_attr.qkey = 0;
        qp_attr.port_num = priv->port;
-       qp_attr.pkey_index = pkey_index;
+       qp_attr.pkey_index = priv->pkey_index;
        attr_mask =
            IB_QP_QKEY |
            IB_QP_PORT |
@@ -259,14 +247,18 @@ void ipoib_event(struct ib_event_handler *handler,
        struct ipoib_dev_priv *priv =
                container_of(handler, struct ipoib_dev_priv, event_handler);
 
-       if ((record->event == IB_EVENT_PORT_ERR    ||
-            record->event == IB_EVENT_PKEY_CHANGE ||
-            record->event == IB_EVENT_PORT_ACTIVE ||
-            record->event == IB_EVENT_LID_CHANGE  ||
-            record->event == IB_EVENT_SM_CHANGE   ||
-            record->event == IB_EVENT_CLIENT_REREGISTER) &&
-           record->element.port_num == priv->port) {
+       if (record->element.port_num != priv->port)
+               return;
+
+       if (record->event == IB_EVENT_PORT_ERR    ||
+           record->event == IB_EVENT_PORT_ACTIVE ||
+           record->event == IB_EVENT_LID_CHANGE  ||
+           record->event == IB_EVENT_SM_CHANGE   ||
+           record->event == IB_EVENT_CLIENT_REREGISTER) {
                ipoib_dbg(priv, "Port state change event\n");
                queue_work(ipoib_workqueue, &priv->flush_task);
+       } else if (record->event == IB_EVENT_PKEY_CHANGE) {
+               ipoib_dbg(priv, "P_Key change event on port:%d\n", priv->port);
+               queue_work(ipoib_workqueue, &priv->pkey_event_task);
        }
 }