IB/ipoib: Clean up send-only multicast joins
authorDoug Ledford <dledford@redhat.com>
Thu, 3 Sep 2015 21:05:58 +0000 (17:05 -0400)
committerDoug Ledford <dledford@redhat.com>
Thu, 3 Sep 2015 21:05:58 +0000 (17:05 -0400)
Even though we don't expect the group to be created by the SM we
sill need to provide all the parameters to force the SM to validate
they are correct.

Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/ulp/ipoib/ipoib_multicast.c

index 0d23e0568deb6fee19247ddbe43fbaabc477edf4..5e2db3b7c8bb1fc57e4402e1d05720f539395a68 100644 (file)
@@ -448,8 +448,7 @@ out_locked:
        return status;
 }
 
-static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
-                            int create)
+static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ib_sa_multicast *multicast;
@@ -471,7 +470,14 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
                IB_SA_MCMEMBER_REC_PKEY         |
                IB_SA_MCMEMBER_REC_JOIN_STATE;
 
-       if (create) {
+       if (mcast != priv->broadcast) {
+               /*
+                * RFC 4391:
+                *  The MGID MUST use the same P_Key, Q_Key, SL, MTU,
+                *  and HopLimit as those used in the broadcast-GID.  The rest
+                *  of attributes SHOULD follow the values used in the
+                *  broadcast-GID as well.
+                */
                comp_mask |=
                        IB_SA_MCMEMBER_REC_QKEY                 |
                        IB_SA_MCMEMBER_REC_MTU_SELECTOR         |
@@ -492,6 +498,22 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
                rec.sl            = priv->broadcast->mcmember.sl;
                rec.flow_label    = priv->broadcast->mcmember.flow_label;
                rec.hop_limit     = priv->broadcast->mcmember.hop_limit;
+
+               /*
+                * Historically Linux IPoIB has never properly supported SEND
+                * ONLY join. It emulated it by not providing all the required
+                * attributes, which is enough to prevent group creation and
+                * detect if there are full members or not. A major problem
+                * with supporting SEND ONLY is detecting when the group is
+                * auto-destroyed as IPoIB will cache the MLID..
+                */
+#if 1
+               if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
+                       comp_mask &= ~IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
+#else
+               if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
+                       rec.join_state = 4;
+#endif
        }
 
        multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
@@ -517,7 +539,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
        struct ib_port_attr port_attr;
        unsigned long delay_until = 0;
        struct ipoib_mcast *mcast = NULL;
-       int create = 1;
 
        if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
                return;
@@ -566,7 +587,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
                if (IS_ERR_OR_NULL(priv->broadcast->mc) &&
                    !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) {
                        mcast = priv->broadcast;
-                       create = 0;
                        if (mcast->backoff > 1 &&
                            time_before(jiffies, mcast->delay_until)) {
                                delay_until = mcast->delay_until;
@@ -590,12 +610,8 @@ void ipoib_mcast_join_task(struct work_struct *work)
                                /* Found the next unjoined group */
                                init_completion(&mcast->done);
                                set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-                               if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
-                                       create = 0;
-                               else
-                                       create = 1;
                                spin_unlock_irq(&priv->lock);
-                               ipoib_mcast_join(dev, mcast, create);
+                               ipoib_mcast_join(dev, mcast);
                                spin_lock_irq(&priv->lock);
                        } else if (!delay_until ||
                                 time_before(mcast->delay_until, delay_until))
@@ -618,7 +634,7 @@ out:
        }
        spin_unlock_irq(&priv->lock);
        if (mcast)
-               ipoib_mcast_join(dev, mcast, create);
+               ipoib_mcast_join(dev, mcast);
 }
 
 int ipoib_mcast_start_thread(struct net_device *dev)