struct smc_clc_msg_accept_confirm *clc)
{
smc->conn.peer_conn_idx = clc->conn_idx;
+ smc->conn.peer_rmbe_size = smc_uncompress_bufsize(clc->rmbe_size);
+ atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
}
static void smc_link_save_peer_info(struct smc_link *link,
link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
smc_conn_save_peer_info(smc, &aclc);
+
+ rc = smc_sndbuf_create(smc);
+ if (rc) {
+ reason_code = SMC_CLC_DECL_MEM;
+ goto decline_rdma_unlock;
+ }
+ rc = smc_rmb_create(smc);
+ if (rc) {
+ reason_code = SMC_CLC_DECL_MEM;
+ goto decline_rdma_unlock;
+ }
+
if (local_contact == SMC_FIRST_CONTACT)
smc_link_save_peer_info(link, &aclc);
/* tbd in follow-on patch: more steps to setup RDMA communcication,
}
link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
- /* tbd in follow-on patch: more steps to setup RDMA communcication,
- * create rmbs, map rmbs
- */
+ rc = smc_sndbuf_create(new_smc);
+ if (rc) {
+ reason_code = SMC_CLC_DECL_MEM;
+ goto decline_rdma;
+ }
+ rc = smc_rmb_create(new_smc);
+ if (rc) {
+ reason_code = SMC_CLC_DECL_MEM;
+ goto decline_rdma;
+ }
rc = smc_clc_send_accept(new_smc, local_contact);
if (rc)
IPPROTO_TCP, &smc->clcsock);
if (rc)
sk_common_release(sk);
+ smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
+ smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
out:
return rc;
struct smc_link_group *lgr; /* link group of connection */
u32 alert_token_local; /* unique conn. id */
u8 peer_conn_idx; /* from tcp handshake */
+ int peer_rmbe_size; /* size of peer rx buffer */
+ atomic_t peer_rmbe_space;/* remaining free bytes in peer
+ * rmbe
+ */
+
+ struct smc_buf_desc *sndbuf_desc; /* send buffer descriptor */
+ int sndbuf_size; /* sndbuf size <== sock wmem */
+ struct smc_buf_desc *rmb_desc; /* RMBE descriptor */
+ int rmbe_size; /* RMBE size <== sock rmem */
+ int rmbe_size_short;/* compressed notation */
};
struct smc_sock { /* smc sock container */
return be32_to_cpu(t);
}
+#define SMC_BUF_MIN_SIZE 16384 /* minimum size of an RMB */
+
+#define SMC_RMBE_SIZES 16 /* number of distinct sizes for an RMBE */
+/* theoretically, the RFC states that largest size would be 512K,
+ * i.e. compressed 5 and thus 6 sizes (0..5), despite
+ * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
+ */
+
+/* convert the RMB size into the compressed notation - minimum 16K.
+ * In contrast to plain ilog2, this rounds towards the next power of 2,
+ * so the socket application gets at least its desired sndbuf / rcvbuf size.
+ */
+static inline u8 smc_compress_bufsize(int size)
+{
+ u8 compressed;
+
+ if (size <= SMC_BUF_MIN_SIZE)
+ return 0;
+
+ size = (size - 1) >> 14;
+ compressed = ilog2(size) + 1;
+ if (compressed >= SMC_RMBE_SIZES)
+ compressed = SMC_RMBE_SIZES - 1;
+ return compressed;
+}
+
+/* convert the RMB size from compressed notation into integer */
+static inline int smc_uncompress_bufsize(u8 compressed)
+{
+ u32 size;
+
+ size = 0x00000001 << (((int)compressed) + 14);
+ return (int)size;
+}
+
#ifdef CONFIG_XFRM
static inline bool using_ipsec(struct smc_sock *smc)
{
struct smc_link *lnk;
u8 rndvec[3];
int rc = 0;
+ int i;
lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
if (!lgr) {
lgr->daddr = peer_in_addr;
memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
lgr->vlan_id = vlan_id;
+ rwlock_init(&lgr->sndbufs_lock);
+ rwlock_init(&lgr->rmbs_lock);
+ for (i = 0; i < SMC_RMBE_SIZES; i++) {
+ INIT_LIST_HEAD(&lgr->sndbufs[i]);
+ INIT_LIST_HEAD(&lgr->rmbs[i]);
+ }
INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
lgr->conns_all = RB_ROOT;
return rc;
}
+static void smc_sndbuf_unuse(struct smc_connection *conn)
+{
+ if (conn->sndbuf_desc) {
+ conn->sndbuf_desc->used = 0;
+ conn->sndbuf_size = 0;
+ }
+}
+
+static void smc_rmb_unuse(struct smc_connection *conn)
+{
+ if (conn->rmb_desc) {
+ conn->rmb_desc->used = 0;
+ conn->rmbe_size = 0;
+ }
+}
+
/* remove a finished connection from its link group */
void smc_conn_free(struct smc_connection *conn)
{
if (!lgr)
return;
smc_lgr_unregister_conn(conn);
+ smc_rmb_unuse(conn);
+ smc_sndbuf_unuse(conn);
}
static void smc_link_clear(struct smc_link *lnk)
lnk->peer_qpn = 0;
}
+static void smc_lgr_free_sndbufs(struct smc_link_group *lgr)
+{
+ struct smc_buf_desc *sndbuf_desc, *bf_desc;
+ int i;
+
+ for (i = 0; i < SMC_RMBE_SIZES; i++) {
+ list_for_each_entry_safe(sndbuf_desc, bf_desc, &lgr->sndbufs[i],
+ list) {
+ kfree(sndbuf_desc->cpu_addr);
+ kfree(sndbuf_desc);
+ }
+ }
+}
+
+static void smc_lgr_free_rmbs(struct smc_link_group *lgr)
+{
+ struct smc_buf_desc *rmb_desc, *bf_desc;
+ int i;
+
+ for (i = 0; i < SMC_RMBE_SIZES; i++) {
+ list_for_each_entry_safe(rmb_desc, bf_desc, &lgr->rmbs[i],
+ list) {
+ kfree(rmb_desc->cpu_addr);
+ kfree(rmb_desc);
+ }
+ }
+}
+
/* remove a link group */
void smc_lgr_free(struct smc_link_group *lgr)
{
+ smc_lgr_free_rmbs(lgr);
+ smc_lgr_free_sndbufs(lgr);
smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
kfree(lgr);
}
sizeof(lcl->mac)) &&
!lgr->sync_err &&
(lgr->role == role) &&
- (lgr->vlan_id == vlan_id)) {
+ (lgr->vlan_id == vlan_id) &&
+ ((role == SMC_CLNT) ||
+ (lgr->conns_num < SMC_RMBS_PER_LGR_MAX))) {
/* link group found */
local_contact = SMC_REUSE_CONTACT;
conn->lgr = lgr;
out:
return rc ? rc : local_contact;
}
+
+/* try to reuse a sndbuf description slot of the sndbufs list for a certain
+ * buf_size; if not available, return NULL
+ */
+static inline
+struct smc_buf_desc *smc_sndbuf_get_slot(struct smc_link_group *lgr,
+ int compressed_bufsize)
+{
+ struct smc_buf_desc *sndbuf_slot;
+
+ read_lock_bh(&lgr->sndbufs_lock);
+ list_for_each_entry(sndbuf_slot, &lgr->sndbufs[compressed_bufsize],
+ list) {
+ if (cmpxchg(&sndbuf_slot->used, 0, 1) == 0) {
+ read_unlock_bh(&lgr->sndbufs_lock);
+ return sndbuf_slot;
+ }
+ }
+ read_unlock_bh(&lgr->sndbufs_lock);
+ return NULL;
+}
+
+/* try to reuse an rmb description slot of the rmbs list for a certain
+ * rmbe_size; if not available, return NULL
+ */
+static inline
+struct smc_buf_desc *smc_rmb_get_slot(struct smc_link_group *lgr,
+ int compressed_bufsize)
+{
+ struct smc_buf_desc *rmb_slot;
+
+ read_lock_bh(&lgr->rmbs_lock);
+ list_for_each_entry(rmb_slot, &lgr->rmbs[compressed_bufsize],
+ list) {
+ if (cmpxchg(&rmb_slot->used, 0, 1) == 0) {
+ read_unlock_bh(&lgr->rmbs_lock);
+ return rmb_slot;
+ }
+ }
+ read_unlock_bh(&lgr->rmbs_lock);
+ return NULL;
+}
+
+/* create the tx buffer for an SMC socket */
+int smc_sndbuf_create(struct smc_sock *smc)
+{
+ struct smc_connection *conn = &smc->conn;
+ struct smc_link_group *lgr = conn->lgr;
+ int tmp_bufsize, tmp_bufsize_short;
+ struct smc_buf_desc *sndbuf_desc;
+ int rc;
+
+ /* use socket send buffer size (w/o overhead) as start value */
+ for (tmp_bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2);
+ tmp_bufsize_short >= 0; tmp_bufsize_short--) {
+ tmp_bufsize = smc_uncompress_bufsize(tmp_bufsize_short);
+ /* check for reusable sndbuf_slot in the link group */
+ sndbuf_desc = smc_sndbuf_get_slot(lgr, tmp_bufsize_short);
+ if (sndbuf_desc) {
+ memset(sndbuf_desc->cpu_addr, 0, tmp_bufsize);
+ break; /* found reusable slot */
+ }
+ /* try to alloc a new send buffer */
+ sndbuf_desc = kzalloc(sizeof(*sndbuf_desc), GFP_KERNEL);
+ if (!sndbuf_desc)
+ break; /* give up with -ENOMEM */
+ sndbuf_desc->cpu_addr = kzalloc(tmp_bufsize,
+ GFP_KERNEL | __GFP_NOWARN |
+ __GFP_NOMEMALLOC |
+ __GFP_NORETRY);
+ if (!sndbuf_desc->cpu_addr) {
+ kfree(sndbuf_desc);
+ /* if send buffer allocation has failed,
+ * try a smaller one
+ */
+ continue;
+ }
+ rc = smc_ib_buf_map(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+ tmp_bufsize, sndbuf_desc,
+ DMA_TO_DEVICE);
+ if (rc) {
+ kfree(sndbuf_desc->cpu_addr);
+ kfree(sndbuf_desc);
+ continue; /* if mapping failed, try smaller one */
+ }
+ sndbuf_desc->used = 1;
+ write_lock_bh(&lgr->sndbufs_lock);
+ list_add(&sndbuf_desc->list,
+ &lgr->sndbufs[tmp_bufsize_short]);
+ write_unlock_bh(&lgr->sndbufs_lock);
+ break;
+ }
+ if (sndbuf_desc && sndbuf_desc->cpu_addr) {
+ conn->sndbuf_desc = sndbuf_desc;
+ conn->sndbuf_size = tmp_bufsize;
+ smc->sk.sk_sndbuf = tmp_bufsize * 2;
+ return 0;
+ } else {
+ return -ENOMEM;
+ }
+}
+
+/* create the RMB for an SMC socket (even though the SMC protocol
+ * allows more than one RMB-element per RMB, the Linux implementation
+ * uses just one RMB-element per RMB, i.e. uses an extra RMB for every
+ * connection in a link group
+ */
+int smc_rmb_create(struct smc_sock *smc)
+{
+ struct smc_connection *conn = &smc->conn;
+ struct smc_link_group *lgr = conn->lgr;
+ int tmp_bufsize, tmp_bufsize_short;
+ struct smc_buf_desc *rmb_desc;
+ int rc;
+
+ /* use socket recv buffer size (w/o overhead) as start value */
+ for (tmp_bufsize_short = smc_compress_bufsize(smc->sk.sk_rcvbuf / 2);
+ tmp_bufsize_short >= 0; tmp_bufsize_short--) {
+ tmp_bufsize = smc_uncompress_bufsize(tmp_bufsize_short);
+ /* check for reusable rmb_slot in the link group */
+ rmb_desc = smc_rmb_get_slot(lgr, tmp_bufsize_short);
+ if (rmb_desc) {
+ memset(rmb_desc->cpu_addr, 0, tmp_bufsize);
+ break; /* found reusable slot */
+ }
+ /* try to alloc a new RMB */
+ rmb_desc = kzalloc(sizeof(*rmb_desc), GFP_KERNEL);
+ if (!rmb_desc)
+ break; /* give up with -ENOMEM */
+ rmb_desc->cpu_addr = kzalloc(tmp_bufsize,
+ GFP_KERNEL | __GFP_NOWARN |
+ __GFP_NOMEMALLOC |
+ __GFP_NORETRY);
+ if (!rmb_desc->cpu_addr) {
+ kfree(rmb_desc);
+ /* if RMB allocation has failed,
+ * try a smaller one
+ */
+ continue;
+ }
+ rc = smc_ib_buf_map(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+ tmp_bufsize, rmb_desc,
+ DMA_FROM_DEVICE);
+ if (rc) {
+ kfree(rmb_desc->cpu_addr);
+ kfree(rmb_desc);
+ continue; /* if mapping failed, try smaller one */
+ }
+ rmb_desc->used = 1;
+ write_lock_bh(&lgr->rmbs_lock);
+ list_add(&rmb_desc->list,
+ &lgr->rmbs[tmp_bufsize_short]);
+ write_unlock_bh(&lgr->rmbs_lock);
+ break;
+ }
+ if (rmb_desc && rmb_desc->cpu_addr) {
+ conn->rmb_desc = rmb_desc;
+ conn->rmbe_size = tmp_bufsize;
+ conn->rmbe_size_short = tmp_bufsize_short;
+ smc->sk.sk_rcvbuf = tmp_bufsize * 2;
+ return 0;
+ } else {
+ return -ENOMEM;
+ }
+}