smc: netlink interface for SMC sockets
authorUrsula Braun <ubraun@linux.vnet.ibm.com>
Mon, 9 Jan 2017 15:55:26 +0000 (16:55 +0100)
committerDavid S. Miller <davem@davemloft.net>
Mon, 9 Jan 2017 21:07:41 +0000 (16:07 -0500)
Support for SMC socket monitoring via netlink sockets of protocol
NETLINK_SOCK_DIAG.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/smc.h [new file with mode: 0644]
include/net/sock.h
include/uapi/linux/netlink.h
include/uapi/linux/smc_diag.h [new file with mode: 0644]
net/smc/Kconfig
net/smc/Makefile
net/smc/af_smc.c
net/smc/smc.h
net/smc/smc_close.c
net/smc/smc_diag.c [new file with mode: 0644]

diff --git a/include/net/smc.h b/include/net/smc.h
new file mode 100644 (file)
index 0000000..12d2635
--- /dev/null
@@ -0,0 +1,20 @@
+/*
+ *  Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ *  Definitions for the SMC module (socket related)
+ *
+ *  Copyright IBM Corp. 2016
+ *
+ *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
+ */
+#ifndef _SMC_H
+#define _SMC_H
+
+struct smc_hashinfo {
+       rwlock_t lock;
+       struct hlist_head ht;
+};
+
+int smc_hash_sk(struct sock *sk);
+void smc_unhash_sk(struct sock *sk);
+#endif /* _SMC_H */
index 99deda67eba0ef7f0a8699db740c3b9b357ce271..389a0a619b457ca8c94a907f910a89c5d41feb40 100644 (file)
@@ -70,6 +70,7 @@
 #include <net/checksum.h>
 #include <net/tcp_states.h>
 #include <linux/net_tstamp.h>
+#include <net/smc.h>
 
 /*
  * This structure really needs to be cleaned up.
@@ -986,6 +987,7 @@ struct request_sock_ops;
 struct timewait_sock_ops;
 struct inet_hashinfo;
 struct raw_hashinfo;
+struct smc_hashinfo;
 struct module;
 
 /*
@@ -1094,6 +1096,7 @@ struct proto {
                struct inet_hashinfo    *hashinfo;
                struct udp_table        *udp_table;
                struct raw_hashinfo     *raw_hash;
+               struct smc_hashinfo     *smc_hash;
        } h;
 
        struct module           *owner;
index 0dba4e4ed2be21af9f02df5bd844cbb087a51706..f3946a27bd07d5164fac6964785c86044f031790 100644 (file)
@@ -27,6 +27,7 @@
 #define NETLINK_ECRYPTFS       19
 #define NETLINK_RDMA           20
 #define NETLINK_CRYPTO         21      /* Crypto layer */
+#define NETLINK_SMC            22      /* SMC monitoring */
 
 #define NETLINK_INET_DIAG      NETLINK_SOCK_DIAG
 
diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h
new file mode 100644 (file)
index 0000000..0063919
--- /dev/null
@@ -0,0 +1,85 @@
+#ifndef _UAPI_SMC_DIAG_H_
+#define _UAPI_SMC_DIAG_H_
+
+#include <linux/types.h>
+#include <linux/inet_diag.h>
+#include <rdma/ib_verbs.h>
+
+/* Request structure */
+struct smc_diag_req {
+       __u8    diag_family;
+       __u8    pad[2];
+       __u8    diag_ext;               /* Query extended information */
+       struct inet_diag_sockid id;
+};
+
+/* Base info structure. It contains socket identity (addrs/ports/cookie) based
+ * on the internal clcsock, and more SMC-related socket data
+ */
+struct smc_diag_msg {
+       __u8    diag_family;
+       __u8    diag_state;
+       __u8    diag_fallback;
+       __u8    diag_shutdown;
+       struct inet_diag_sockid id;
+
+       __u32   diag_uid;
+       __u64   diag_inode;
+};
+
+/* Extensions */
+
+enum {
+       SMC_DIAG_NONE,
+       SMC_DIAG_CONNINFO,
+       SMC_DIAG_LGRINFO,
+       SMC_DIAG_SHUTDOWN,
+       __SMC_DIAG_MAX,
+};
+
+#define SMC_DIAG_MAX (__SMC_DIAG_MAX - 1)
+
+/* SMC_DIAG_CONNINFO */
+
+struct smc_diag_cursor {
+       __u16   reserved;
+       __u16   wrap;
+       __u32   count;
+};
+
+struct smc_diag_conninfo {
+       __u32                   token;          /* unique connection id */
+       __u32                   sndbuf_size;    /* size of send buffer */
+       __u32                   rmbe_size;      /* size of RMB element */
+       __u32                   peer_rmbe_size; /* size of peer RMB element */
+       /* local RMB element cursors */
+       struct smc_diag_cursor  rx_prod;        /* received producer cursor */
+       struct smc_diag_cursor  rx_cons;        /* received consumer cursor */
+       /* peer RMB element cursors */
+       struct smc_diag_cursor  tx_prod;        /* sent producer cursor */
+       struct smc_diag_cursor  tx_cons;        /* sent consumer cursor */
+       __u8                    rx_prod_flags;  /* received producer flags */
+       __u8                    rx_conn_state_flags; /* recvd connection flags*/
+       __u8                    tx_prod_flags;  /* sent producer flags */
+       __u8                    tx_conn_state_flags; /* sent connection flags*/
+       /* send buffer cursors */
+       struct smc_diag_cursor  tx_prep;        /* prepared to be sent cursor */
+       struct smc_diag_cursor  tx_sent;        /* sent cursor */
+       struct smc_diag_cursor  tx_fin;         /* confirmed sent cursor */
+};
+
+/* SMC_DIAG_LINKINFO */
+
+struct smc_diag_linkinfo {
+       __u8 link_id;                   /* link identifier */
+       __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */
+       __u8 ibport;                    /* RDMA device port number */
+       __u8 gid[40];                   /* local GID */
+       __u8 peer_gid[40];              /* peer GID */
+};
+
+struct smc_diag_lgrinfo {
+       struct smc_diag_linkinfo        lnk[1];
+       __u8                            role;
+};
+#endif /* _UAPI_SMC_DIAG_H_ */
index bc029803e7283a24ee5ff4c9737a67ac459d38f8..c717ef0896aa2accaee05e3cf4c10d066c61eeb3 100644 (file)
@@ -9,3 +9,12 @@ config SMC
          a separate socket family SMC.
 
          Select this option if you want to run SMC socket applications
+
+config SMC_DIAG
+       tristate "SMC: socket monitoring interface"
+       depends on SMC
+       ---help---
+         Support for SMC socket monitoring interface used by tools such as
+         smcss.
+
+         if unsure, say Y.
index 5cf0cafaa208013e2e7dd2278b34f9047c49c13c..188104654b545b4a2c28495d0fca9cf9b020743b 100644 (file)
@@ -1,3 +1,4 @@
 obj-$(CONFIG_SMC)      += smc.o
+obj-$(CONFIG_SMC_DIAG) += smc_diag.o
 smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
 smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o
index 3f543d58bc5c6f7bdcb56717b13c3e2085470852..5d4208ad029e27a64537d7ad73ba3994206337e8 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/in.h>
 #include <net/sock.h>
 #include <net/tcp.h>
+#include <net/smc.h>
 
 #include "smc.h"
 #include "smc_clc.h"
@@ -59,13 +60,48 @@ static void smc_set_keepalive(struct sock *sk, int val)
        smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
 }
 
-static struct proto smc_proto = {
+static struct smc_hashinfo smc_v4_hashinfo = {
+       .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
+};
+
+int smc_hash_sk(struct sock *sk)
+{
+       struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
+       struct hlist_head *head;
+
+       head = &h->ht;
+
+       write_lock_bh(&h->lock);
+       sk_add_node(sk, head);
+       sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+       write_unlock_bh(&h->lock);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(smc_hash_sk);
+
+void smc_unhash_sk(struct sock *sk)
+{
+       struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
+
+       write_lock_bh(&h->lock);
+       if (sk_del_node_init(sk))
+               sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+       write_unlock_bh(&h->lock);
+}
+EXPORT_SYMBOL_GPL(smc_unhash_sk);
+
+struct proto smc_proto = {
        .name           = "SMC",
        .owner          = THIS_MODULE,
        .keepalive      = smc_set_keepalive,
+       .hash           = smc_hash_sk,
+       .unhash         = smc_unhash_sk,
        .obj_size       = sizeof(struct smc_sock),
+       .h.smc_hash     = &smc_v4_hashinfo,
        .slab_flags     = SLAB_DESTROY_BY_RCU,
 };
+EXPORT_SYMBOL_GPL(smc_proto);
 
 static int smc_release(struct socket *sock)
 {
@@ -109,6 +145,7 @@ static int smc_release(struct socket *sock)
                schedule_delayed_work(&smc->sock_put_work,
                                      SMC_CLOSE_SOCK_PUT_DELAY);
        }
+       sk->sk_prot->unhash(sk);
        release_sock(sk);
 
        sock_put(sk);
@@ -144,6 +181,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
        INIT_LIST_HEAD(&smc->accept_q);
        spin_lock_init(&smc->accept_q_lock);
        INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work);
+       sk->sk_prot->hash(sk);
        sk_refcnt_debug_inc(sk);
 
        return sk;
@@ -536,6 +574,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
                lsmc->sk.sk_err = -rc;
                new_sk->sk_state = SMC_CLOSED;
                sock_set_flag(new_sk, SOCK_DEAD);
+               sk->sk_prot->unhash(new_sk);
                sock_put(new_sk);
                *new_smc = NULL;
                goto out;
@@ -545,6 +584,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
                        sock_release(new_clcsock);
                new_sk->sk_state = SMC_CLOSED;
                sock_set_flag(new_sk, SOCK_DEAD);
+               sk->sk_prot->unhash(new_sk);
                sock_put(new_sk);
                *new_smc = NULL;
                goto out;
@@ -1320,6 +1360,7 @@ static int __init smc_init(void)
                pr_err("%s: sock_register fails with %d\n", __func__, rc);
                goto out_proto;
        }
+       INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
 
        rc = smc_ib_register_client();
        if (rc) {
index 959a5d2014abc01f7d77cb88af4f304271c579ab..ee5fbea24549d2df9f55bb0ad177d548c637bb0f 100644 (file)
@@ -21,6 +21,8 @@
 
 #define SMC_MAX_PORTS          2       /* Max # of ports */
 
+extern struct proto smc_proto;
+
 #ifdef ATOMIC64_INIT
 #define KERNEL_HAS_ATOMIC64
 #endif
index d70c05b570214f7049e2930aeddca54d2279e40a..03dfcc6b76614a57dbd69e14a79524464803783a 100644 (file)
@@ -384,6 +384,7 @@ void smc_close_sock_put_work(struct work_struct *work)
                                            struct smc_sock,
                                            sock_put_work);
 
+       smc->sk.sk_prot->unhash(&smc->sk);
        sock_put(&smc->sk);
 }
 
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
new file mode 100644 (file)
index 0000000..d2d01cf
--- /dev/null
@@ -0,0 +1,215 @@
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * Monitoring SMC transport protocol sockets
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/sock_diag.h>
+#include <linux/inet_diag.h>
+#include <linux/smc_diag.h>
+#include <net/netlink.h>
+#include <net/smc.h>
+
+#include "smc.h"
+#include "smc_core.h"
+
+static void smc_gid_be16_convert(__u8 *buf, u8 *gid_raw)
+{
+       sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x",
+               be16_to_cpu(((__be16 *)gid_raw)[0]),
+               be16_to_cpu(((__be16 *)gid_raw)[1]),
+               be16_to_cpu(((__be16 *)gid_raw)[2]),
+               be16_to_cpu(((__be16 *)gid_raw)[3]),
+               be16_to_cpu(((__be16 *)gid_raw)[4]),
+               be16_to_cpu(((__be16 *)gid_raw)[5]),
+               be16_to_cpu(((__be16 *)gid_raw)[6]),
+               be16_to_cpu(((__be16 *)gid_raw)[7]));
+}
+
+static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk)
+{
+       struct smc_sock *smc = smc_sk(sk);
+
+       r->diag_family = sk->sk_family;
+       if (!smc->clcsock)
+               return;
+       r->id.idiag_sport = htons(smc->clcsock->sk->sk_num);
+       r->id.idiag_dport = smc->clcsock->sk->sk_dport;
+       r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if;
+       sock_diag_save_cookie(sk, r->id.idiag_cookie);
+       memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
+       memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
+       r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr;
+       r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr;
+}
+
+static int smc_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
+                                  struct smc_diag_msg *r,
+                                  struct user_namespace *user_ns)
+{
+       if (nla_put_u8(skb, SMC_DIAG_SHUTDOWN, sk->sk_shutdown))
+               return 1;
+
+       r->diag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
+       r->diag_inode = sock_i_ino(sk);
+       return 0;
+}
+
+static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
+                          struct netlink_callback *cb,
+                          const struct smc_diag_req *req,
+                          struct nlattr *bc)
+{
+       struct smc_sock *smc = smc_sk(sk);
+       struct user_namespace *user_ns;
+       struct smc_diag_msg *r;
+       struct nlmsghdr *nlh;
+
+       nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+                       cb->nlh->nlmsg_type, sizeof(*r), NLM_F_MULTI);
+       if (!nlh)
+               return -EMSGSIZE;
+
+       r = nlmsg_data(nlh);
+       smc_diag_msg_common_fill(r, sk);
+       r->diag_state = sk->sk_state;
+       r->diag_fallback = smc->use_fallback;
+       user_ns = sk_user_ns(NETLINK_CB(cb->skb).sk);
+       if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns))
+               goto errout;
+
+       if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.lgr) {
+               struct smc_connection *conn = &smc->conn;
+               struct smc_diag_conninfo cinfo = {
+                       .token = conn->alert_token_local,
+                       .sndbuf_size = conn->sndbuf_size,
+                       .rmbe_size = conn->rmbe_size,
+                       .peer_rmbe_size = conn->peer_rmbe_size,
+
+                       .rx_prod.wrap = conn->local_rx_ctrl.prod.wrap,
+                       .rx_prod.count = conn->local_rx_ctrl.prod.count,
+                       .rx_cons.wrap = conn->local_rx_ctrl.cons.wrap,
+                       .rx_cons.count = conn->local_rx_ctrl.cons.count,
+
+                       .tx_prod.wrap = conn->local_tx_ctrl.prod.wrap,
+                       .tx_prod.count = conn->local_tx_ctrl.prod.count,
+                       .tx_cons.wrap = conn->local_tx_ctrl.cons.wrap,
+                       .tx_cons.count = conn->local_tx_ctrl.cons.count,
+
+                       .tx_prod_flags =
+                               *(u8 *)&conn->local_tx_ctrl.prod_flags,
+                       .tx_conn_state_flags =
+                               *(u8 *)&conn->local_tx_ctrl.conn_state_flags,
+                       .rx_prod_flags = *(u8 *)&conn->local_rx_ctrl.prod_flags,
+                       .rx_conn_state_flags =
+                               *(u8 *)&conn->local_rx_ctrl.conn_state_flags,
+
+                       .tx_prep.wrap = conn->tx_curs_prep.wrap,
+                       .tx_prep.count = conn->tx_curs_prep.count,
+                       .tx_sent.wrap = conn->tx_curs_sent.wrap,
+                       .tx_sent.count = conn->tx_curs_sent.count,
+                       .tx_fin.wrap = conn->tx_curs_fin.wrap,
+                       .tx_fin.count = conn->tx_curs_fin.count,
+               };
+
+               if (nla_put(skb, SMC_DIAG_CONNINFO, sizeof(cinfo), &cinfo) < 0)
+                       goto errout;
+       }
+
+       if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr) {
+               struct smc_diag_lgrinfo linfo = {
+                       .role = smc->conn.lgr->role,
+                       .lnk[0].ibport = smc->conn.lgr->lnk[0].ibport,
+                       .lnk[0].link_id = smc->conn.lgr->lnk[0].link_id,
+               };
+
+               memcpy(linfo.lnk[0].ibname,
+                      smc->conn.lgr->lnk[0].smcibdev->ibdev->name,
+                      sizeof(smc->conn.lgr->lnk[0].smcibdev->ibdev->name));
+               smc_gid_be16_convert(linfo.lnk[0].gid,
+                                    smc->conn.lgr->lnk[0].gid.raw);
+               smc_gid_be16_convert(linfo.lnk[0].peer_gid,
+                                    smc->conn.lgr->lnk[0].peer_gid);
+
+               if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0)
+                       goto errout;
+       }
+
+       nlmsg_end(skb, nlh);
+       return 0;
+
+errout:
+       nlmsg_cancel(skb, nlh);
+       return -EMSGSIZE;
+}
+
+static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct net *net = sock_net(skb->sk);
+       struct nlattr *bc = NULL;
+       struct hlist_head *head;
+       struct sock *sk;
+       int rc = 0;
+
+       read_lock(&smc_proto.h.smc_hash->lock);
+       head = &smc_proto.h.smc_hash->ht;
+       if (hlist_empty(head))
+               goto out;
+
+       sk_for_each(sk, head) {
+               if (!net_eq(sock_net(sk), net))
+                       continue;
+               rc = __smc_diag_dump(sk, skb, cb, nlmsg_data(cb->nlh), bc);
+               if (rc)
+                       break;
+       }
+
+out:
+       read_unlock(&smc_proto.h.smc_hash->lock);
+       return rc;
+}
+
+static int smc_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+{
+       struct net *net = sock_net(skb->sk);
+
+       if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY &&
+           h->nlmsg_flags & NLM_F_DUMP) {
+               {
+                       struct netlink_dump_control c = {
+                               .dump = smc_diag_dump,
+                               .min_dump_alloc = SKB_WITH_OVERHEAD(32768),
+                       };
+                       return netlink_dump_start(net->diag_nlsk, skb, h, &c);
+               }
+       }
+       return 0;
+}
+
+static const struct sock_diag_handler smc_diag_handler = {
+       .family = AF_SMC,
+       .dump = smc_diag_handler_dump,
+};
+
+static int __init smc_diag_init(void)
+{
+       return sock_diag_register(&smc_diag_handler);
+}
+
+static void __exit smc_diag_exit(void)
+{
+       sock_diag_unregister(&smc_diag_handler);
+}
+
+module_init(smc_diag_init);
+module_exit(smc_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 43 /* AF_SMC */);