i40iw: Add support for port reuse on active side connections
authorShiraz Saleem <shiraz.saleem@intel.com>
Tue, 19 Sep 2017 14:19:13 +0000 (09:19 -0500)
committerDoug Ledford <dledford@redhat.com>
Fri, 22 Sep 2017 17:43:36 +0000 (13:43 -0400)
During OpenMPI scale up testing, we observe rdma_connect
failures if ports are reused on multiple connections.
This is because the Control Queue-Pair (CQP) command to add
the reused port to Accelerated Port Bit VectorTable (APBVT)
fails as there already exists an entry.

Check for duplicate port before invoking the CQP command
to add APBVT entry and delete the entry only if the port
is not in use.

Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/i40iw/i40iw_cm.c
drivers/infiniband/hw/i40iw/i40iw_cm.h

index b7215448bb63bb0431fd025ec7c5c0428e781f6c..5230dd3c938c2c506bc590afd8ef054d300ac480 100644 (file)
@@ -1504,23 +1504,40 @@ static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core,
 }
 
 /**
- * listen_port_in_use - determine if port is in use
- * @port: Listen port number
+ * i40iw_port_in_use - determine if port is in use
+ * @port: port number
+ * @active_side: flag for listener side vs active side
  */
-static bool i40iw_listen_port_in_use(struct i40iw_cm_core *cm_core, u16 port)
+static bool i40iw_port_in_use(struct i40iw_cm_core *cm_core, u16 port, bool active_side)
 {
        struct i40iw_cm_listener *listen_node;
+       struct i40iw_cm_node *cm_node;
        unsigned long flags;
        bool ret = false;
 
-       spin_lock_irqsave(&cm_core->listen_list_lock, flags);
-       list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
-               if (listen_node->loc_port == port) {
-                       ret = true;
-                       break;
+       if (active_side) {
+               /* search connected node list */
+               spin_lock_irqsave(&cm_core->ht_lock, flags);
+               list_for_each_entry(cm_node, &cm_core->connected_nodes, list) {
+                       if (cm_node->loc_port == port) {
+                               ret = true;
+                               break;
+                       }
+               }
+                       if (!ret)
+                               clear_bit(port, cm_core->active_side_ports);
+               spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+       } else {
+               spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+               list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
+                       if (listen_node->loc_port == port) {
+                               ret = true;
+                               break;
+                       }
                }
+               spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
        }
-       spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
        return ret;
 }
 
@@ -1868,7 +1885,7 @@ static int i40iw_dec_refcnt_listen(struct i40iw_cm_core *cm_core,
                spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
 
                if (listener->iwdev) {
-                       if (apbvt_del && !i40iw_listen_port_in_use(cm_core, listener->loc_port))
+                       if (apbvt_del && !i40iw_port_in_use(cm_core, listener->loc_port, false))
                                i40iw_manage_apbvt(listener->iwdev,
                                                   listener->loc_port,
                                                   I40IW_MANAGE_APBVT_DEL);
@@ -2247,21 +2264,21 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node)
        if (cm_node->listener) {
                i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true);
        } else {
-               if (!i40iw_listen_port_in_use(cm_core, cm_node->loc_port) &&
-                   cm_node->apbvt_set) {
+               if (!i40iw_port_in_use(cm_core, cm_node->loc_port, true) && cm_node->apbvt_set) {
                        i40iw_manage_apbvt(cm_node->iwdev,
                                           cm_node->loc_port,
                                           I40IW_MANAGE_APBVT_DEL);
-                       i40iw_get_addr_info(cm_node, &nfo);
-                       if (cm_node->qhash_set) {
-                               i40iw_manage_qhash(cm_node->iwdev,
-                                                  &nfo,
-                                                  I40IW_QHASH_TYPE_TCP_ESTABLISHED,
-                                                  I40IW_QHASH_MANAGE_TYPE_DELETE,
-                                                  NULL,
-                                                  false);
-                               cm_node->qhash_set = 0;
-                       }
+                       cm_node->apbvt_set = 0;
+               }
+               i40iw_get_addr_info(cm_node, &nfo);
+               if (cm_node->qhash_set) {
+                       i40iw_manage_qhash(cm_node->iwdev,
+                                          &nfo,
+                                          I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+                                          I40IW_QHASH_MANAGE_TYPE_DELETE,
+                                          NULL,
+                                          false);
+                       cm_node->qhash_set = 0;
                }
        }
 
@@ -3738,10 +3755,8 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        struct sockaddr_in *raddr;
        struct sockaddr_in6 *laddr6;
        struct sockaddr_in6 *raddr6;
-       bool qhash_set = false;
-       int apbvt_set = 0;
-       int err = 0;
-       enum i40iw_status_code status;
+       int ret = 0;
+       unsigned long flags;
 
        ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn);
        if (!ibqp)
@@ -3790,32 +3805,6 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        cm_info.user_pri = rt_tos2priority(cm_id->tos);
        i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, "%s TOS:[%d] UP:[%d]\n",
                    __func__, cm_id->tos, cm_info.user_pri);
-       if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) ||
-           (!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32,
-                                    raddr6->sin6_addr.in6_u.u6_addr32,
-                                    sizeof(laddr6->sin6_addr.in6_u.u6_addr32)))) {
-               status = i40iw_manage_qhash(iwdev,
-                                           &cm_info,
-                                           I40IW_QHASH_TYPE_TCP_ESTABLISHED,
-                                           I40IW_QHASH_MANAGE_TYPE_ADD,
-                                           NULL,
-                                           true);
-               if (status)
-                       return -EINVAL;
-               qhash_set = true;
-       }
-       status = i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD);
-       if (status) {
-               i40iw_manage_qhash(iwdev,
-                                  &cm_info,
-                                  I40IW_QHASH_TYPE_TCP_ESTABLISHED,
-                                  I40IW_QHASH_MANAGE_TYPE_DELETE,
-                                  NULL,
-                                  false);
-               return -EINVAL;
-       }
-
-       apbvt_set = 1;
        cm_id->add_ref(cm_id);
        cm_node = i40iw_create_cm_node(&iwdev->cm_core, iwdev,
                                       conn_param->private_data_len,
@@ -3823,17 +3812,40 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                                       &cm_info);
 
        if (IS_ERR(cm_node)) {
-               err = PTR_ERR(cm_node);
-               goto err_out;
+               ret = PTR_ERR(cm_node);
+               cm_id->rem_ref(cm_id);
+               return ret;
+       }
+
+       if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) ||
+           (!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32,
+                                    raddr6->sin6_addr.in6_u.u6_addr32,
+                                    sizeof(laddr6->sin6_addr.in6_u.u6_addr32)))) {
+               if (i40iw_manage_qhash(iwdev, &cm_info, I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+                                      I40IW_QHASH_MANAGE_TYPE_ADD, NULL, true)) {
+                       ret = -EINVAL;
+                       goto err;
+               }
+               cm_node->qhash_set = true;
        }
 
+       spin_lock_irqsave(&iwdev->cm_core.ht_lock, flags);
+       if (!test_and_set_bit(cm_info.loc_port, iwdev->cm_core.active_side_ports)) {
+               spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
+               if (i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD)) {
+                       ret =  -EINVAL;
+                       goto err;
+               }
+       } else {
+               spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
+       }
+
+       cm_node->apbvt_set = true;
        i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
        if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
            !cm_node->ord_size)
                cm_node->ord_size = 1;
 
-       cm_node->apbvt_set = apbvt_set;
-       cm_node->qhash_set = qhash_set;
        iwqp->cm_node = cm_node;
        cm_node->iwqp = iwqp;
        iwqp->cm_id = cm_id;
@@ -3841,11 +3853,9 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 
        if (cm_node->state != I40IW_CM_STATE_OFFLOADED) {
                cm_node->state = I40IW_CM_STATE_SYN_SENT;
-               err = i40iw_send_syn(cm_node, 0);
-               if (err) {
-                       i40iw_rem_ref_cm_node(cm_node);
-                       goto err_out;
-               }
+               ret = i40iw_send_syn(cm_node, 0);
+               if (ret)
+                       goto err;
        }
 
        i40iw_debug(cm_node->dev,
@@ -3854,9 +3864,10 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                    cm_node->rem_port,
                    cm_node,
                    cm_node->cm_id);
+
        return 0;
 
-err_out:
+err:
        if (cm_info.ipv4)
                i40iw_debug(&iwdev->sc_dev,
                            I40IW_DEBUG_CM,
@@ -3868,22 +3879,10 @@ err_out:
                            "Api - connect() FAILED: dest addr=%pI6",
                            cm_info.rem_addr);
 
-       if (qhash_set)
-               i40iw_manage_qhash(iwdev,
-                                  &cm_info,
-                                  I40IW_QHASH_TYPE_TCP_ESTABLISHED,
-                                  I40IW_QHASH_MANAGE_TYPE_DELETE,
-                                  NULL,
-                                  false);
-
-       if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core,
-                                                  cm_info.loc_port))
-               i40iw_manage_apbvt(iwdev,
-                                  cm_info.loc_port,
-                                  I40IW_MANAGE_APBVT_DEL);
+       i40iw_rem_ref_cm_node(cm_node);
        cm_id->rem_ref(cm_id);
        iwdev->cm_core.stats_connect_errs++;
-       return err;
+       return ret;
 }
 
 /**
index 8626e7f1fdd34af8349dba569e9b0f782ea580d9..45abef76295b24429accdce4e7a49d91dbc33cce 100644 (file)
@@ -71,6 +71,7 @@
 #define        I40IW_HW_IRD_SETTING_32 32
 #define        I40IW_HW_IRD_SETTING_64 64
 
+#define MAX_PORTS              65536
 #define I40IW_VLAN_PRIO_SHIFT   13
 
 enum ietf_mpa_flags {
@@ -413,6 +414,8 @@ struct i40iw_cm_core {
        spinlock_t ht_lock; /* manage hash table */
        spinlock_t listen_list_lock; /* listen list */
 
+       unsigned long active_side_ports[BITS_TO_LONGS(MAX_PORTS)];
+
        u64     stats_nodes_created;
        u64     stats_nodes_destroyed;
        u64     stats_listen_created;