IB/cma: Join and leave multicast groups with IGMP
authorMoni Shoua <monis@mellanox.com>
Wed, 23 Dec 2015 12:56:57 +0000 (14:56 +0200)
committerDoug Ledford <dledford@redhat.com>
Wed, 23 Dec 2015 15:39:53 +0000 (10:39 -0500)
Since RoCEv2 is a protocol over IP header it is required to send IGMP
join and leave requests to the network when joining and leaving
multicast groups.

Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/core/cma.c
drivers/infiniband/core/multicast.c
include/rdma/ib_sa.h

index 75987b0c570ccb182b21760745ef2acbba264163..559ee3d27b4a683dcbc0510f83212d73fe2a11c4 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/in6.h>
 #include <linux/mutex.h>
 #include <linux/random.h>
+#include <linux/igmp.h>
 #include <linux/idr.h>
 #include <linux/inetdevice.h>
 #include <linux/slab.h>
@@ -304,6 +305,7 @@ struct cma_multicast {
        void                    *context;
        struct sockaddr_storage addr;
        struct kref             mcref;
+       bool                    igmp_joined;
 };
 
 struct cma_work {
@@ -400,6 +402,26 @@ static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
        hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
 }
 
+static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
+{
+       struct in_device *in_dev = NULL;
+
+       if (ndev) {
+               rtnl_lock();
+               in_dev = __in_dev_get_rtnl(ndev);
+               if (in_dev) {
+                       if (join)
+                               ip_mc_inc_group(in_dev,
+                                               *(__be32 *)(mgid->raw + 12));
+                       else
+                               ip_mc_dec_group(in_dev,
+                                               *(__be32 *)(mgid->raw + 12));
+               }
+               rtnl_unlock();
+       }
+       return (in_dev) ? 0 : -ENODEV;
+}
+
 static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
                               struct cma_device *cma_dev)
 {
@@ -1532,8 +1554,24 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
                                      id_priv->id.port_num)) {
                        ib_sa_free_multicast(mc->multicast.ib);
                        kfree(mc);
-               } else
+               } else {
+                       if (mc->igmp_joined) {
+                               struct rdma_dev_addr *dev_addr =
+                                       &id_priv->id.route.addr.dev_addr;
+                               struct net_device *ndev = NULL;
+
+                               if (dev_addr->bound_dev_if)
+                                       ndev = dev_get_by_index(&init_net,
+                                                               dev_addr->bound_dev_if);
+                               if (ndev) {
+                                       cma_igmp_send(ndev,
+                                                     &mc->multicast.ib->rec.mgid,
+                                                     false);
+                                       dev_put(ndev);
+                               }
+                       }
                        kref_put(&mc->mcref, release_mc);
+               }
        }
 }
 
@@ -3645,12 +3683,23 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
        event.status = status;
        event.param.ud.private_data = mc->context;
        if (!status) {
+               struct rdma_dev_addr *dev_addr =
+                       &id_priv->id.route.addr.dev_addr;
+               struct net_device *ndev =
+                       dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+               enum ib_gid_type gid_type =
+                       id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
+                       rdma_start_port(id_priv->cma_dev->device)];
+
                event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
                ib_init_ah_from_mcmember(id_priv->id.device,
                                         id_priv->id.port_num, &multicast->rec,
+                                        ndev, gid_type,
                                         &event.param.ud.ah_attr);
                event.param.ud.qp_num = 0xFFFFFF;
                event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
+               if (ndev)
+                       dev_put(ndev);
        } else
                event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
 
@@ -3783,9 +3832,10 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
 {
        struct iboe_mcast_work *work;
        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
-       int err;
+       int err = 0;
        struct sockaddr *addr = (struct sockaddr *)&mc->addr;
        struct net_device *ndev = NULL;
+       enum ib_gid_type gid_type;
 
        if (cma_zero_addr((struct sockaddr *)&mc->addr))
                return -EINVAL;
@@ -3815,9 +3865,25 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
        mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
        mc->multicast.ib->rec.hop_limit = 1;
        mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
+
+       gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
+                  rdma_start_port(id_priv->cma_dev->device)];
+       if (addr->sa_family == AF_INET) {
+               if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+                       err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
+                                           true);
+               if (!err) {
+                       mc->igmp_joined = true;
+                       mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
+               }
+       } else {
+               if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+                       err = -ENOTSUPP;
+       }
        dev_put(ndev);
-       if (!mc->multicast.ib->rec.mtu) {
-               err = -EINVAL;
+       if (err || !mc->multicast.ib->rec.mtu) {
+               if (!err)
+                       err = -EINVAL;
                goto out2;
        }
        rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
@@ -3856,7 +3922,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
        memcpy(&mc->addr, addr, rdma_addr_size(addr));
        mc->context = context;
        mc->id_priv = id_priv;
-
+       mc->igmp_joined = false;
        spin_lock(&id_priv->lock);
        list_add(&mc->list, &id_priv->mc_list);
        spin_unlock(&id_priv->lock);
@@ -3901,9 +3967,25 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
                        if (rdma_cap_ib_mcast(id->device, id->port_num)) {
                                ib_sa_free_multicast(mc->multicast.ib);
                                kfree(mc);
-                       } else if (rdma_protocol_roce(id->device, id->port_num))
+                       } else if (rdma_protocol_roce(id->device, id->port_num)) {
+                               if (mc->igmp_joined) {
+                                       struct rdma_dev_addr *dev_addr =
+                                               &id->route.addr.dev_addr;
+                                       struct net_device *ndev = NULL;
+
+                                       if (dev_addr->bound_dev_if)
+                                               ndev = dev_get_by_index(&init_net,
+                                                                       dev_addr->bound_dev_if);
+                                       if (ndev) {
+                                               cma_igmp_send(ndev,
+                                                             &mc->multicast.ib->rec.mgid,
+                                                             false);
+                                               dev_put(ndev);
+                                       }
+                                       mc->igmp_joined = false;
+                               }
                                kref_put(&mc->mcref, release_mc);
-
+                       }
                        return;
                }
        }
index 6911ae68e8d8703505588cc35cccdd15c0ce60c9..250937cb9a1a5071f054a24e74414a8c90bd88a0 100644 (file)
@@ -723,14 +723,27 @@ EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
 
 int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
                             struct ib_sa_mcmember_rec *rec,
+                            struct net_device *ndev,
+                            enum ib_gid_type gid_type,
                             struct ib_ah_attr *ah_attr)
 {
        int ret;
        u16 gid_index;
        u8 p;
 
-       ret = ib_find_cached_gid(device, &rec->port_gid, IB_GID_TYPE_IB,
-                                NULL, &p, &gid_index);
+       if (rdma_protocol_roce(device, port_num)) {
+               ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
+                                                gid_type, port_num,
+                                                ndev,
+                                                &gid_index);
+       } else if (rdma_protocol_ib(device, port_num)) {
+               ret = ib_find_cached_gid(device, &rec->port_gid,
+                                        IB_GID_TYPE_IB, NULL, &p,
+                                        &gid_index);
+       } else {
+               ret = -EINVAL;
+       }
+
        if (ret)
                return ret;
 
index 0a40ed241156da581b9d1832e0f3c1305fdc20bb..cdc1c81aa275bda38f97f1e71c3c774dfbc8c983 100644 (file)
@@ -403,6 +403,8 @@ int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
  */
 int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
                             struct ib_sa_mcmember_rec *rec,
+                            struct net_device *ndev,
+                            enum ib_gid_type gid_type,
                             struct ib_ah_attr *ah_attr);
 
 /**