static int fi_ibv_msg_ep_reject(struct fid_pep *pep, fid_t handle, const void *param, size_t paramlen) { struct fi_ibv_connreq *connreq; int ret; connreq = container_of(handle, struct fi_ibv_connreq, handle); ret = rdma_reject(connreq->id, param, (uint8_t) paramlen) ? -errno : 0; free(connreq); return ret; }
static int connect_client (struct rdma_cm_id *client) { if (!client) return -1; if ( -1 == kiro_attach_qp (client)) { g_critical ("Could not create a QP for the new connection"); rdma_destroy_id (client); return -1; } struct kiro_connection_context *ctx = (struct kiro_connection_context *)g_try_malloc0 (sizeof (struct kiro_connection_context)); if (!ctx) { g_critical ("Failed to create connection context"); rdma_destroy_id (client); return -1; } ctx->cf_mr_recv = kiro_create_rdma_memory (client->pd, sizeof (struct kiro_ctrl_msg), IBV_ACCESS_LOCAL_WRITE); ctx->cf_mr_send = kiro_create_rdma_memory (client->pd, sizeof (struct kiro_ctrl_msg), IBV_ACCESS_LOCAL_WRITE); if (!ctx->cf_mr_recv || !ctx->cf_mr_send) { g_critical ("Failed to register control message memory"); goto error; } ctx->cf_mr_recv->size = ctx->cf_mr_send->size = sizeof (struct kiro_ctrl_msg); client->context = ctx; if (rdma_post_recv (client, client, ctx->cf_mr_recv->mem, ctx->cf_mr_recv->size, ctx->cf_mr_recv->mr)) { g_critical ("Posting preemtive receive for connection failed: %s", strerror (errno)); goto error; } if (rdma_accept (client, NULL)) { g_warning ("Failed to establish connection to the client: %s", strerror (errno)); goto error; } g_debug ("Client connection setup successfull"); return 0; error: rdma_reject (client, NULL, 0); kiro_destroy_connection_context (&ctx); rdma_destroy_id (client); return -1; }
static int connect_handler(struct rdma_cm_id *cma_id) { struct cmatest_node *node; struct rdma_conn_param conn_param; int ret; if (test.conn_index == connections) { ret = -ENOMEM; goto err1; } node = &test.nodes[test.conn_index++]; node->cma_id = cma_id; cma_id->context = node; ret = verify_test_params(node); if (ret) goto err2; ret = init_node(node); if (ret) goto err2; ret = post_recvs(node); if (ret) goto err2; memset(&conn_param, 0, sizeof conn_param); conn_param.qp_num = node->cma_id->qp->qp_num; ret = rdma_accept(node->cma_id, &conn_param); if (ret) { perror("udaddy: failure accepting"); goto err2; } node->connected = 1; test.connects_left--; return 0; err2: node->cma_id = NULL; connect_error(); err1: printf("udaddy: failing connection request\n"); rdma_reject(cma_id, NULL, 0); return ret; }
static int connect_handler(struct rdma_cm_id *cma_id) { struct cmatest_node *node; int ret; if (test.conn_index == connections) { ret = -ENOMEM; goto err1; } node = &test.nodes[test.conn_index++]; node->cma_id = cma_id; cma_id->context = node; ret = init_node(node); if (ret) goto err2; ret = post_recvs(node); if (ret) goto err2; ret = rdma_accept(node->cma_id, NULL); if (ret) { perror("cmatose: failure accepting"); goto err2; } return 0; err2: node->cma_id = NULL; connect_error(); err1: printf("cmatose: failing connection request\n"); rdma_reject(cma_id, NULL, 0); return ret; }
static int ibw_refill_cq_recv(struct ibw_conn *conn) { struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv); struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv); int rc; struct ibv_sge list = { .addr = (uintptr_t) NULL, /* filled below */ .length = pctx->opts.recv_bufsize, .lkey = pconn->mr_recv->lkey /* always the same */ }; struct ibv_recv_wr wr = { .wr_id = 0, /* filled below */ .sg_list = &list, .num_sge = 1, }; struct ibv_recv_wr *bad_wr; DEBUG(DEBUG_DEBUG, ("ibw_refill_cq_recv(cmid: %p)\n", pconn->cm_id)); list.addr = (uintptr_t) pconn->buf_recv + pctx->opts.recv_bufsize * pconn->recv_index; wr.wr_id = pconn->recv_index; pconn->recv_index = (pconn->recv_index + 1) % pctx->opts.max_recv_wr; rc = ibv_post_recv(pconn->cm_id->qp, &wr, &bad_wr); if (rc) { sprintf(ibw_lasterr, "refill/ibv_post_recv failed with %d\n", rc); DEBUG(DEBUG_ERR, (ibw_lasterr)); return -2; } return 0; } static int ibw_fill_cq(struct ibw_conn *conn) { struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv); struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv); int i, rc; struct ibv_sge list = { .addr = (uintptr_t) NULL, /* filled below */ .length = pctx->opts.recv_bufsize, .lkey = pconn->mr_recv->lkey /* always the same */ }; struct ibv_recv_wr wr = { .wr_id = 0, /* filled below */ .sg_list = &list, .num_sge = 1, }; struct ibv_recv_wr *bad_wr; DEBUG(DEBUG_DEBUG, ("ibw_fill_cq(cmid: %p)\n", pconn->cm_id)); for(i = pctx->opts.max_recv_wr; i!=0; i--) { list.addr = (uintptr_t) pconn->buf_recv + pctx->opts.recv_bufsize * pconn->recv_index; wr.wr_id = pconn->recv_index; pconn->recv_index = (pconn->recv_index + 1) % pctx->opts.max_recv_wr; rc = ibv_post_recv(pconn->cm_id->qp, &wr, &bad_wr); if (rc) { sprintf(ibw_lasterr, "fill/ibv_post_recv failed with %d\n", rc); DEBUG(DEBUG_ERR, (ibw_lasterr)); return -2; } } return 0; } static int ibw_manage_connect(struct ibw_conn *conn) { struct rdma_conn_param conn_param; struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv); int rc; DEBUG(DEBUG_DEBUG, ("ibw_manage_connect(cmid: %p)\n", pconn->cm_id)); if (ibw_setup_cq_qp(conn)) return -1; /* cm connect */ memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 1; conn_param.initiator_depth = 1; conn_param.retry_count = 10; rc = rdma_connect(pconn->cm_id, &conn_param); if (rc) sprintf(ibw_lasterr, "rdma_connect error %d\n", rc); return rc; } static void ibw_event_handler_cm(struct tevent_context *ev, struct tevent_fd *fde, uint16_t flags, void *private_data) { int rc; struct ibw_ctx *ctx = talloc_get_type(private_data, struct ibw_ctx); struct ibw_ctx_priv *pctx = talloc_get_type(ctx->internal, struct ibw_ctx_priv); struct ibw_conn *conn = NULL; struct ibw_conn_priv *pconn = NULL; struct rdma_cm_id *cma_id = NULL; struct rdma_cm_event *event = NULL; assert(ctx!=NULL); rc = rdma_get_cm_event(pctx->cm_channel, &event); if (rc) { ctx->state = IBWS_ERROR; event = NULL; sprintf(ibw_lasterr, "rdma_get_cm_event error %d\n", rc); goto error; } cma_id = event->id; DEBUG(DEBUG_DEBUG, ("cma_event type %d cma_id %p (%s)\n", event->event, cma_id, (cma_id == pctx->cm_id) ? "parent" : "child")); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: DEBUG(DEBUG_DEBUG, ("RDMA_CM_EVENT_ADDR_RESOLVED\n")); /* continuing from ibw_connect ... */ rc = rdma_resolve_route(cma_id, 2000); if (rc) { sprintf(ibw_lasterr, "rdma_resolve_route error %d\n", rc); goto error; } /* continued at RDMA_CM_EVENT_ROUTE_RESOLVED */ break; case RDMA_CM_EVENT_ROUTE_RESOLVED: DEBUG(DEBUG_DEBUG, ("RDMA_CM_EVENT_ROUTE_RESOLVED\n")); /* after RDMA_CM_EVENT_ADDR_RESOLVED: */ assert(cma_id->context!=NULL); conn = talloc_get_type(cma_id->context, struct ibw_conn); rc = ibw_manage_connect(conn); if (rc) goto error; break; case RDMA_CM_EVENT_CONNECT_REQUEST: DEBUG(DEBUG_DEBUG, ("RDMA_CM_EVENT_CONNECT_REQUEST\n")); ctx->state = IBWS_CONNECT_REQUEST; conn = ibw_conn_new(ctx, ctx); pconn = talloc_get_type(conn->internal, struct ibw_conn_priv); pconn->cm_id = cma_id; /* !!! event will be freed but id not */ cma_id->context = (void *)conn; DEBUG(DEBUG_DEBUG, ("pconn->cm_id %p\n", pconn->cm_id)); if (ibw_setup_cq_qp(conn)) goto error; conn->state = IBWC_INIT; pctx->connstate_func(ctx, conn); /* continued at ibw_accept when invoked by the func above */ if (!pconn->is_accepted) { rc = rdma_reject(cma_id, NULL, 0); if (rc) DEBUG(DEBUG_ERR, ("rdma_reject failed with rc=%d\n", rc)); talloc_free(conn); DEBUG(DEBUG_DEBUG, ("pconn->cm_id %p wasn't accepted\n", pconn->cm_id)); } /* TODO: clarify whether if it's needed by upper layer: */ ctx->state = IBWS_READY; pctx->connstate_func(ctx, NULL); /* NOTE: more requests can arrive until RDMA_CM_EVENT_ESTABLISHED ! */ break; case RDMA_CM_EVENT_ESTABLISHED: /* expected after ibw_accept and ibw_connect[not directly] */ DEBUG(DEBUG_INFO, ("ESTABLISHED (conn: %p)\n", cma_id->context)); conn = talloc_get_type(cma_id->context, struct ibw_conn); assert(conn!=NULL); /* important assumption */ DEBUG(DEBUG_DEBUG, ("ibw_setup_cq_qp succeeded (cmid=%p)\n", cma_id)); /* client conn is up */ conn->state = IBWC_CONNECTED; /* both ctx and conn have changed */ pctx->connstate_func(ctx, conn); break; case RDMA_CM_EVENT_ADDR_ERROR: sprintf(ibw_lasterr, "RDMA_CM_EVENT_ADDR_ERROR, error %d\n", event->status); case RDMA_CM_EVENT_ROUTE_ERROR: sprintf(ibw_lasterr, "RDMA_CM_EVENT_ROUTE_ERROR, error %d\n", event->status); case RDMA_CM_EVENT_CONNECT_ERROR: sprintf(ibw_lasterr, "RDMA_CM_EVENT_CONNECT_ERROR, error %d\n", event->status); case RDMA_CM_EVENT_UNREACHABLE: sprintf(ibw_lasterr, "RDMA_CM_EVENT_UNREACHABLE, error %d\n", event->status); goto error; case RDMA_CM_EVENT_REJECTED: sprintf(ibw_lasterr, "RDMA_CM_EVENT_REJECTED, error %d\n", event->status); DEBUG(DEBUG_INFO, ("cm event handler: %s", ibw_lasterr)); conn = talloc_get_type(cma_id->context, struct ibw_conn); if (conn) { /* must be done BEFORE connstate */ if ((rc=rdma_ack_cm_event(event))) DEBUG(DEBUG_ERR, ("reject/rdma_ack_cm_event failed with %d\n", rc)); event = NULL; /* not to touch cma_id or conn */ conn->state = IBWC_ERROR; /* it should free the conn */ pctx->connstate_func(NULL, conn); } break; /* this is not strictly an error */ case RDMA_CM_EVENT_DISCONNECTED: DEBUG(DEBUG_DEBUG, ("RDMA_CM_EVENT_DISCONNECTED\n")); if ((rc=rdma_ack_cm_event(event))) DEBUG(DEBUG_ERR, ("disc/rdma_ack_cm_event failed with %d\n", rc)); event = NULL; /* don't ack more */ if (cma_id!=pctx->cm_id) { DEBUG(DEBUG_ERR, ("client DISCONNECT event cm_id=%p\n", cma_id)); conn = talloc_get_type(cma_id->context, struct ibw_conn); conn->state = IBWC_DISCONNECTED; pctx->connstate_func(NULL, conn); } break; case RDMA_CM_EVENT_DEVICE_REMOVAL: sprintf(ibw_lasterr, "cma detected device removal!\n"); goto error; default: sprintf(ibw_lasterr, "unknown event %d\n", event->event); goto error; } if (event!=NULL && (rc=rdma_ack_cm_event(event))) { sprintf(ibw_lasterr, "rdma_ack_cm_event failed with %d\n", rc); goto error; } return; error: DEBUG(DEBUG_ERR, ("cm event handler: %s", ibw_lasterr)); if (event!=NULL) { if (cma_id!=NULL && cma_id!=pctx->cm_id) { conn = talloc_get_type(cma_id->context, struct ibw_conn); if (conn) { conn->state = IBWC_ERROR; pctx->connstate_func(NULL, conn); } } else { ctx->state = IBWS_ERROR; pctx->connstate_func(ctx, NULL); } if ((rc=rdma_ack_cm_event(event))!=0) { DEBUG(DEBUG_ERR, ("rdma_ack_cm_event failed with %d\n", rc)); } } return; }
static int sdp_init_qp(struct sock *sk, struct rdma_cm_id *id) { struct ib_qp_init_attr qp_init_attr = { .event_handler = sdp_qp_event_handler, .cap.max_send_wr = SDP_TX_SIZE, .cap.max_recv_wr = sdp_rx_size, .cap.max_inline_data = sdp_inline_thresh, .sq_sig_type = IB_SIGNAL_REQ_WR, .qp_type = IB_QPT_RC, }; struct ib_device *device = id->device; int rc; sdp_dbg(sk, "%s\n", __func__); sdp_sk(sk)->max_sge = sdp_get_max_dev_sge(device); sdp_dbg(sk, "Max sges: %d\n", sdp_sk(sk)->max_sge); qp_init_attr.cap.max_send_sge = MIN(sdp_sk(sk)->max_sge, SDP_MAX_SEND_SGES); sdp_dbg(sk, "Setting max send sge to: %d\n", qp_init_attr.cap.max_send_sge); qp_init_attr.cap.max_recv_sge = MIN(sdp_sk(sk)->max_sge, SDP_MAX_RECV_SGES); sdp_dbg(sk, "Setting max recv sge to: %d\n", qp_init_attr.cap.max_recv_sge); sdp_sk(sk)->sdp_dev = ib_get_client_data(device, &sdp_client); if (!sdp_sk(sk)->sdp_dev) { sdp_warn(sk, "SDP not available on device %s\n", device->name); rc = -ENODEV; goto err_rx; } rc = sdp_rx_ring_create(sdp_sk(sk), device); if (rc) goto err_rx; rc = sdp_tx_ring_create(sdp_sk(sk), device); if (rc) goto err_tx; qp_init_attr.recv_cq = sdp_sk(sk)->rx_ring.cq; qp_init_attr.send_cq = sdp_sk(sk)->tx_ring.cq; rc = rdma_create_qp(id, sdp_sk(sk)->sdp_dev->pd, &qp_init_attr); if (rc) { sdp_warn(sk, "Unable to create QP: %d.\n", rc); goto err_qp; } sdp_sk(sk)->qp = id->qp; sdp_sk(sk)->ib_device = device; sdp_sk(sk)->qp_active = 1; sdp_sk(sk)->context.device = device; sdp_sk(sk)->inline_thresh = qp_init_attr.cap.max_inline_data; sdp_dbg(sk, "%s done\n", __func__); return 0; err_qp: sdp_tx_ring_destroy(sdp_sk(sk)); err_tx: sdp_rx_ring_destroy(sdp_sk(sk)); err_rx: return rc; } static int sdp_get_max_send_frags(u32 buf_size) { return MIN( /* +1 to conpensate on not aligned buffers */ (PAGE_ALIGN(buf_size) >> PAGE_SHIFT) + 1, SDP_MAX_SEND_SGES - 1); } static int sdp_connect_handler(struct sock *sk, struct rdma_cm_id *id, struct rdma_cm_event *event) { struct sockaddr_in *dst_addr; struct sock *child; const struct sdp_hh *h; int rc = 0; sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id); h = event->param.conn.private_data; SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh); if (h->ipv_cap & HH_IPV_MASK & ~(HH_IPV4 | HH_IPV6)) { sdp_warn(sk, "Bad IPV field in SDP Hello header: 0x%x\n", h->ipv_cap & HH_IPV_MASK); return -EINVAL; } if (!h->max_adverts) return -EINVAL; #if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)) child = sk_clone(sk, GFP_KERNEL); #else child = sk_clone_lock(sk, GFP_KERNEL); #endif if (!child) return -ENOMEM; sdp_init_sock(child); dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; sdp_inet_dport(child) = dst_addr->sin_port; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (inet6_sk(sk)) { struct ipv6_pinfo *newnp; newnp = inet_sk(child)->pinet6 = sdp_inet6_sk_generic(child); memcpy(newnp, inet6_sk(sk), sizeof(struct ipv6_pinfo)); if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV4) { /* V6 mapped */ sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr; ipv6_addr_set(&child->sk_v6_daddr, 0, 0, htonl(0x0000FFFF), h->src_addr.ip4.addr); ipv6_addr_set(&child->sk_v6_rcv_saddr, 0, 0, htonl(0x0000FFFF), h->dst_addr.ip4.addr); ipv6_addr_copy(&child->sk_v6_rcv_saddr, &child->sk_v6_daddr); } else if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV6) { struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *)dst_addr; struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *)&id->route.addr.src_addr; ipv6_addr_copy(&child->sk_v6_daddr, &dst_addr6->sin6_addr); ipv6_addr_copy(&child->sk_v6_rcv_saddr, &src_addr6->sin6_addr); ipv6_addr_copy(&newnp->saddr, &src_addr6->sin6_addr); } else { sdp_warn(child, "Bad IPV field: 0x%x\n", h->ipv_cap & HH_IPV_MASK); } sdp_inet_daddr(child) = sdp_inet_saddr(child) = sdp_inet_rcv_saddr(child) = LOOPBACK4_IPV6; } else #endif { sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr; } #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_rename(sk); #endif __sock_put(child, SOCK_REF_CLONE); down_read(&device_removal_lock); rc = sdp_init_qp(child, id); if (rc) { bh_unlock_sock(child); up_read(&device_removal_lock); sdp_sk(child)->destructed_already = 1; #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_close(child); #endif sk_free(child); return rc; } sdp_sk(child)->max_bufs = ntohs(h->bsdh.bufs); atomic_set(&sdp_sk(child)->tx_ring.credits, sdp_sk(child)->max_bufs); sdp_sk(child)->min_bufs = tx_credits(sdp_sk(child)) / 4; sdp_sk(child)->xmit_size_goal = ntohl(h->localrcvsz) - sizeof(struct sdp_bsdh); sdp_sk(child)->send_frags = sdp_get_max_send_frags(sdp_sk(child)->xmit_size_goal); sdp_init_buffers(sdp_sk(child), rcvbuf_initial_size); id->context = child; sdp_sk(child)->id = id; list_add_tail(&sdp_sk(child)->backlog_queue, &sdp_sk(sk)->backlog_queue); sdp_sk(child)->parent = sk; bh_unlock_sock(child); sdp_add_sock(sdp_sk(child)); up_read(&device_removal_lock); sdp_exch_state(child, TCPF_LISTEN | TCPF_CLOSE, TCP_SYN_RECV); /* child->sk_write_space(child); */ /* child->sk_data_ready(child, 0); */ sk->sk_data_ready(sk); return 0; } static int sdp_response_handler(struct sock *sk, struct rdma_cm_id *id, struct rdma_cm_event *event) { const struct sdp_hah *h; struct sockaddr_in *dst_addr; sdp_dbg(sk, "%s\n", __func__); sdp_exch_state(sk, TCPF_SYN_SENT, TCP_ESTABLISHED); sdp_set_default_moderation(sdp_sk(sk)); if (sock_flag(sk, SOCK_KEEPOPEN)) sdp_start_keepalive_timer(sk); if (sock_flag(sk, SOCK_DEAD)) return 0; h = event->param.conn.private_data; SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh); sdp_sk(sk)->max_bufs = ntohs(h->bsdh.bufs); atomic_set(&sdp_sk(sk)->tx_ring.credits, sdp_sk(sk)->max_bufs); sdp_sk(sk)->min_bufs = tx_credits(sdp_sk(sk)) / 4; sdp_sk(sk)->xmit_size_goal = ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh); sdp_sk(sk)->send_frags = sdp_get_max_send_frags(sdp_sk(sk)->xmit_size_goal); sdp_sk(sk)->xmit_size_goal = MIN(sdp_sk(sk)->xmit_size_goal, sdp_sk(sk)->send_frags * PAGE_SIZE); sdp_sk(sk)->poll_cq = 1; sk->sk_state_change(sk); sk_wake_async(sk, 0, POLL_OUT); dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; sdp_inet_dport(sk) = dst_addr->sin_port; sdp_inet_daddr(sk) = dst_addr->sin_addr.s_addr; #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_rename(sk); #endif return 0; } static int sdp_connected_handler(struct sock *sk) { struct sock *parent; sdp_dbg(sk, "%s\n", __func__); parent = sdp_sk(sk)->parent; BUG_ON(!parent); sdp_exch_state(sk, TCPF_SYN_RECV, TCP_ESTABLISHED); #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_rename(sk); #endif sdp_set_default_moderation(sdp_sk(sk)); if (sock_flag(sk, SOCK_KEEPOPEN)) sdp_start_keepalive_timer(sk); if (sock_flag(sk, SOCK_DEAD)) return 0; lock_sock(parent); if (!sdp_sk(parent)->id) { /* TODO: look at SOCK_DEAD? */ sdp_dbg(sk, "parent is going away.\n"); goto done; } sk_acceptq_added(parent); sdp_dbg(parent, "%s child connection established\n", __func__); list_del_init(&sdp_sk(sk)->backlog_queue); list_add_tail(&sdp_sk(sk)->accept_queue, &sdp_sk(parent)->accept_queue); parent->sk_state_change(parent); sk_wake_async(parent, 0, POLL_OUT); done: release_sock(parent); return 0; } static int sdp_disconnected_handler(struct sock *sk) { struct sdp_sock *ssk = sdp_sk(sk); sdp_dbg(sk, "%s\n", __func__); if (ssk->tx_ring.cq) if (sdp_xmit_poll(ssk, 1)) sdp_post_sends(ssk, 0); if (sk->sk_state == TCP_SYN_RECV) { sdp_connected_handler(sk); if (rcv_nxt(ssk)) return 0; } return -ECONNRESET; } int sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { struct rdma_conn_param conn_param; struct sock *parent = NULL; struct sock *child = NULL; struct sock *sk; struct sdp_hah hah; struct sdp_hh hh; int rc = 0, rc2; sk = id->context; if (!sk) { sdp_dbg(NULL, "cm_id is being torn down, event %s\n", rdma_cm_event_str(event->event)); return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ? -EINVAL : 0; } sdp_add_to_history(sk, rdma_cm_event_str(event->event)); lock_sock_nested(sk, SINGLE_DEPTH_NESTING); sdp_dbg(sk, "event: %s\n", rdma_cm_event_str(event->event)); if (!sdp_sk(sk)->id) { sdp_dbg(sk, "socket is being torn down\n"); rc = event->event == RDMA_CM_EVENT_CONNECT_REQUEST ? -EINVAL : 0; release_sock(sk); return rc; } switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: if (sdp_link_layer_ib_only && rdma_node_get_transport(id->device->node_type) == RDMA_TRANSPORT_IB && rdma_port_get_link_layer(id->device, id->port_num) != IB_LINK_LAYER_INFINIBAND) { sdp_dbg(sk, "Link layer is: %d. Only IB link layer " "is allowed\n", rdma_port_get_link_layer(id->device, id->port_num)); rc = -ENETUNREACH; break; } rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT); break; case RDMA_CM_EVENT_ADDR_ERROR: rc = -ENETUNREACH; break; case RDMA_CM_EVENT_ROUTE_RESOLVED: rc = sdp_init_qp(sk, id); if (rc) break; memset(&hh, 0, sizeof hh); hh.bsdh.mid = SDP_MID_HELLO; hh.bsdh.len = htonl(sizeof(struct sdp_hh)); hh.max_adverts = 1; hh.majv_minv = SDP_MAJV_MINV; sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size); hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk))); atomic_set(&sdp_sk(sk)->remote_credits, rx_ring_posted(sdp_sk(sk))); hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_frags * PAGE_SIZE + sizeof(struct sdp_bsdh)); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (inet6_sk(sk)) { struct sockaddr *src_addr = (struct sockaddr *)&id->route.addr.src_addr; struct sockaddr_in *addr4 = (struct sockaddr_in *)src_addr; struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)src_addr; if (src_addr->sa_family == AF_INET) { /* IPv4 over IPv6 */ ipv6_addr_set(&sk->sk_v6_rcv_saddr, 0, 0, htonl(0xFFFF), addr4->sin_addr.s_addr); } else { sk->sk_v6_rcv_saddr = addr6->sin6_addr; } inet6_sk(sk)->saddr = sk->sk_v6_rcv_saddr; } else #endif { sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) = ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; } memset(&conn_param, 0, sizeof conn_param); conn_param.private_data_len = sizeof hh; conn_param.private_data = &hh; conn_param.responder_resources = 4 /* TODO */; conn_param.initiator_depth = 4 /* TODO */; conn_param.retry_count = sdp_retry_count; SDP_DUMP_PACKET(sk, "TX", NULL, &hh.bsdh); if (sdp_apm_enable) { rc = rdma_enable_apm(id, RDMA_ALT_PATH_BEST); if (rc) sdp_warn(sk, "APM couldn't be enabled for active side: %d\n", rc); } rc = rdma_connect(id, &conn_param); break; case RDMA_CM_EVENT_ALT_ROUTE_RESOLVED: sdp_dbg(sk, "alt route was resolved slid=%d, dlid=%d\n", id->route.path_rec[1].slid, id->route.path_rec[1].dlid); break; case RDMA_CM_EVENT_ALT_PATH_LOADED: sdp_dbg(sk, "alt route path loaded\n"); break; case RDMA_CM_EVENT_ALT_ROUTE_ERROR: sdp_warn(sk, "alt route resolve error\n"); break; case RDMA_CM_EVENT_ROUTE_ERROR: rc = -ETIMEDOUT; break; case RDMA_CM_EVENT_CONNECT_REQUEST: rc = sdp_connect_handler(sk, id, event); if (rc) { sdp_dbg(sk, "Destroying qp\n"); rdma_reject(id, NULL, 0); break; } child = id->context; atomic_set(&sdp_sk(child)->remote_credits, rx_ring_posted(sdp_sk(child))); memset(&hah, 0, sizeof hah); hah.bsdh.mid = SDP_MID_HELLO_ACK; hah.bsdh.bufs = htons(rx_ring_posted(sdp_sk(child))); hah.bsdh.len = htonl(sizeof(struct sdp_hah)); hah.majv_minv = SDP_MAJV_MINV; hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec, but just in case */ hah.actrcvsz = htonl(sdp_sk(child)->recv_frags * PAGE_SIZE + sizeof(struct sdp_bsdh)); memset(&conn_param, 0, sizeof conn_param); conn_param.private_data_len = sizeof hah; conn_param.private_data = &hah; conn_param.responder_resources = 4 /* TODO */; conn_param.initiator_depth = 4 /* TODO */; conn_param.retry_count = sdp_retry_count; SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh); rc = rdma_accept(id, &conn_param); if (rc) { sdp_sk(child)->id = NULL; id->qp = NULL; id->context = NULL; parent = sdp_sk(child)->parent; /* TODO: hold ? */ } else if (sdp_apm_enable) { rc2 = rdma_enable_apm(id, RDMA_ALT_PATH_BEST); if (rc2) sdp_warn(sk, "APM couldn't be enabled for passive side: %d\n", rc2); } break; case RDMA_CM_EVENT_CONNECT_RESPONSE: rc = sdp_response_handler(sk, id, event); if (rc) { sdp_dbg(sk, "Destroying qp\n"); rdma_reject(id, NULL, 0); } else { rc = rdma_accept(id, NULL); if (!rc && sdp_apm_enable) { rc2 = rdma_enable_apm(id, RDMA_ALT_PATH_BEST); if (rc2) sdp_warn(sk, "APM couldn't be enabled for passive side:%d \n", rc2); } } break; case RDMA_CM_EVENT_CONNECT_ERROR: rc = -ETIMEDOUT; break; case RDMA_CM_EVENT_UNREACHABLE: rc = -ENETUNREACH; break; case RDMA_CM_EVENT_REJECTED: rc = -ECONNREFUSED; break; case RDMA_CM_EVENT_ESTABLISHED: sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) = ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; rc = sdp_connected_handler(sk); break; case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */ if (sk->sk_state == TCP_LAST_ACK) { sdp_cancel_dreq_wait_timeout(sdp_sk(sk)); sdp_exch_state(sk, TCPF_LAST_ACK, TCP_TIME_WAIT); sdp_dbg(sk, "%s: waiting for Infiniband tear down\n", __func__); } sdp_sk(sk)->qp_active = 0; rdma_disconnect(id); if (sk->sk_state != TCP_TIME_WAIT) { if (sk->sk_state == TCP_CLOSE_WAIT) { sdp_dbg(sk, "IB teardown while in " "TCP_CLOSE_WAIT taking reference to " "let close() finish the work\n"); sock_hold(sk, SOCK_REF_CMA); sdp_start_cma_timewait_timeout(sdp_sk(sk), SDP_CMA_TIMEWAIT_TIMEOUT); } sdp_set_error(sk, -EPIPE); rc = sdp_disconnected_handler(sk); } break; case RDMA_CM_EVENT_TIMEWAIT_EXIT: rc = sdp_disconnected_handler(sk); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: rc = -ENETRESET; break; case RDMA_CM_EVENT_ADDR_CHANGE: sdp_dbg(sk, "Got Address change event\n"); rc = 0; break; default: printk(KERN_ERR "SDP: Unexpected CMA event: %d\n", event->event); rc = -ECONNABORTED; break; } sdp_dbg(sk, "event: %s handled\n", rdma_cm_event_str(event->event)); if (rc && sdp_sk(sk)->id == id) { child = sk; sdp_sk(sk)->id = NULL; id->qp = NULL; id->context = NULL; parent = sdp_sk(sk)->parent; sdp_reset_sk(sk, rc); } release_sock(sk); sdp_dbg(sk, "event: %s done. status %d\n", rdma_cm_event_str(event->event), rc); if (parent) { lock_sock(parent); if (!sdp_sk(parent)->id) { /* TODO: look at SOCK_DEAD? */ sdp_dbg(sk, "parent is going away.\n"); child = NULL; goto done; } if (!list_empty(&sdp_sk(child)->backlog_queue)) list_del_init(&sdp_sk(child)->backlog_queue); else child = NULL; done: release_sock(parent); if (child) sdp_common_release(child); } return rc; }
static int isert_cm_conn_req_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { /* passed in rdma_create_id */ struct isert_portal *portal = cm_id->context; struct ib_device *ib_dev = cm_id->device; struct isert_device *isert_dev; struct isert_connection *isert_conn; struct rdma_conn_param *ini_conn_param; struct rdma_conn_param tgt_conn_param; struct isert_cm_hdr cm_hdr = { 0 }; int err; TRACE_ENTRY(); if (unlikely(!try_module_get(THIS_MODULE))) { err = -EINVAL; goto fail_get; } mutex_lock(&dev_list_mutex); isert_dev = isert_device_find(ib_dev); if (!isert_dev) { isert_dev = isert_device_create(ib_dev); if (unlikely(IS_ERR(isert_dev))) { err = PTR_ERR(isert_dev); mutex_unlock(&dev_list_mutex); goto fail_dev_create; } } isert_dev->refcnt++; mutex_unlock(&dev_list_mutex); isert_conn = isert_conn_create(cm_id, isert_dev); if (unlikely(IS_ERR(isert_conn))) { err = PTR_ERR(isert_conn); goto fail_conn_create; } isert_conn->state = ISER_CONN_HANDSHAKE; isert_conn->portal = portal; mutex_lock(&dev_list_mutex); list_add_tail(&isert_conn->portal_node, &portal->conn_list); mutex_unlock(&dev_list_mutex); /* initiator is dst, target is src */ memcpy(&isert_conn->peer_addr, &cm_id->route.addr.dst_addr, sizeof(isert_conn->peer_addr)); memcpy(&isert_conn->self_addr, &cm_id->route.addr.src_addr, sizeof(isert_conn->self_addr)); ini_conn_param = &event->param.conn; memset(&tgt_conn_param, 0, sizeof(tgt_conn_param)); tgt_conn_param.flow_control = ini_conn_param->flow_control; tgt_conn_param.rnr_retry_count = ini_conn_param->rnr_retry_count; tgt_conn_param.initiator_depth = isert_dev->device_attr.max_qp_init_rd_atom; if (tgt_conn_param.initiator_depth > ini_conn_param->initiator_depth) tgt_conn_param.initiator_depth = ini_conn_param->initiator_depth; tgt_conn_param.private_data_len = sizeof(cm_hdr); tgt_conn_param.private_data = &cm_hdr; cm_hdr.flags = ISER_ZBVA_NOT_SUPPORTED | ISER_SEND_W_INV_NOT_SUPPORTED; kref_get(&isert_conn->kref); err = rdma_accept(cm_id, &tgt_conn_param); if (unlikely(err)) { pr_err("Failed to accept conn request, err:%d\n", err); goto fail_accept; } switch (isert_conn->peer_addr.ss_family) { case AF_INET: #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33) pr_info("iser accepted connection cm_id:%p " NIPQUAD_FMT "->" NIPQUAD_FMT "\n", cm_id, NIPQUAD(((struct sockaddr_in *)&isert_conn->peer_addr)->sin_addr.s_addr), NIPQUAD(((struct sockaddr_in *)&isert_conn->self_addr)->sin_addr.s_addr)); #else pr_info("iser accepted connection cm_id:%p " "%pI4->%pI4\n", cm_id, &((struct sockaddr_in *)&isert_conn->peer_addr)->sin_addr.s_addr, &((struct sockaddr_in *)&isert_conn->self_addr)->sin_addr.s_addr); #endif break; case AF_INET6: #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29) pr_info("iser accepted connection cm_id:%p " NIP6_FMT "->" NIP6_FMT "\n", cm_id, NIP6(((struct sockaddr_in6 *)&isert_conn->peer_addr)->sin6_addr), NIP6(((struct sockaddr_in6 *)&isert_conn->self_addr)->sin6_addr)); #else pr_info("iser accepted connection cm_id:%p " "%pI6->%pI6\n", cm_id, &((struct sockaddr_in6 *)&isert_conn->peer_addr)->sin6_addr, &((struct sockaddr_in6 *)&isert_conn->self_addr)->sin6_addr); #endif break; default: pr_info("iser accepted connection cm_id:%p\n", cm_id); } out: TRACE_EXIT_RES(err); return err; fail_accept: set_bit(ISERT_CONNECTION_ABORTED, &isert_conn->flags); isert_cm_timewait_exit_handler(cm_id, NULL); err = 0; goto out; fail_conn_create: mutex_lock(&dev_list_mutex); isert_deref_device(isert_dev); mutex_unlock(&dev_list_mutex); fail_dev_create: rdma_reject(cm_id, NULL, 0); fail_get: module_put(THIS_MODULE); goto out; }
/** * Process RC connection request event. * * @param[in] iface * @param[in] event * * @return status */ static void process_connect_request(struct iface *iface, struct rdma_cm_event *event) { const struct cm_priv_request *priv; struct cm_priv_reject rej; conn_t *conn; int ret = 0; int c; ni_t *ni; if (!event->param.conn.private_data || (event->param.conn.private_data_len < sizeof(struct cm_priv_request))) { rej.reason = REJECT_REASON_BAD_PARAM; goto reject; } priv = event->param.conn.private_data; ni = iface->ni[ni_options_to_type(priv->options)]; if (!ni) { rej.reason = REJECT_REASON_NO_NI; goto reject; } conn = get_conn(ni, priv->src_id); if (!conn) { WARN(); rej.reason = REJECT_REASON_ERROR; goto reject; } pthread_mutex_lock(&conn->mutex); switch (conn->state) { case CONN_STATE_CONNECTED: /* We received a connection request but we are already connected. Reject it. */ rej.reason = REJECT_REASON_CONNECTED; pthread_mutex_unlock(&conn->mutex); conn_put(conn); goto reject; break; case CONN_STATE_DISCONNECTED: /* we received a connection request and we are disconnected * - accept it */ ret = accept_connection_request(ni, conn, event); break; case CONN_STATE_DISCONNECTING: /* Not sure how to handle that case. Ignore and disconnect * anyway? */ rej.reason = REJECT_REASON_DISCONNECTING; pthread_mutex_unlock(&conn->mutex); conn_put(conn); goto reject; break; case CONN_STATE_RESOLVING_ADDR: case CONN_STATE_RESOLVING_ROUTE: case CONN_STATE_CONNECTING: /* we received a connection request but we are already connecting * - accept connection from higher id * - reject connection from lower id * - accept connection from self, but cleanup */ c = compare_id(&priv->src_id, &ni->id); if (c > 0) ret = accept_connection_request(ni, conn, event); else if (c < 0) { rej.reason = REJECT_REASON_CONNECTING; pthread_mutex_unlock(&conn->mutex); conn_put(conn); goto reject; } else { ret = accept_connection_self(ni, conn, event); } break; } pthread_mutex_unlock(&conn->mutex); conn_put(conn); return; reject: rdma_reject(event->id, &rej, sizeof(rej)); return; }
/** * \brief Create an RDMA transport server * * \param cmid The CM id passed up in the connect event * \param q_depth A hint from the client on the depth of it's SQ/RQ * \param msize The max message size * \returns A pointer to the newly allocated transport */ Nptrans * np_rdmatrans_create(struct rdma_cm_id *cmid, int q_depth, int msize) { int i, ret; u8 *p; struct Nptrans *trans; struct Rdmatrans *rdma; struct ibv_qp_init_attr qp_attr; struct rdma_conn_param cparam; rdma = calloc(1, sizeof *rdma); if (!rdma) goto error; ret = pthread_mutex_init(&rdma->lock, NULL); if (ret) goto error; ret = pthread_cond_init(&rdma->cond, NULL); if (ret) goto error; rdma->connected = 0; rdma->cm_id = cmid; rdma->context = cmid->verbs; rdma->q_depth = q_depth; rdma->msize = msize + sizeof(Rdmactx); rdma->pd = ibv_alloc_pd(rdma->context); if (!rdma->pd) goto error; /* Create receive buffer space and register it */ rdma->rcv_buf = malloc(rdma->msize * q_depth); if (!rdma->rcv_buf) goto error; rdma->rcv_mr = ibv_reg_mr(rdma->pd, rdma->rcv_buf, rdma->msize * q_depth, IBV_ACCESS_LOCAL_WRITE); if (!rdma->rcv_mr) goto error; /* Create send buffer space and register it */ rdma->snd_buf = malloc(rdma->msize * q_depth); if (!rdma->snd_buf) goto error; rdma->next_buf = 0; rdma->snd_mr = ibv_reg_mr(rdma->pd, rdma->snd_buf, rdma->msize * q_depth, 0); if (!rdma->snd_mr) goto error; rdma->ch = ibv_create_comp_channel(rdma->context); if (!rdma->ch) goto error; rdma->fd = rdma->ch->fd; rdma->cq = ibv_create_cq(rdma->context, 2*q_depth, rdma, rdma->ch, 0); if (!rdma->cq) goto error; ibv_req_notify_cq(rdma->cq, 0); /* Create the CQ */ memset(&qp_attr, 0, sizeof qp_attr); qp_attr.send_cq = rdma->cq; qp_attr.recv_cq = rdma->cq; qp_attr.cap.max_send_wr = q_depth; qp_attr.cap.max_recv_wr = q_depth; qp_attr.cap.max_send_sge = 1; qp_attr.cap.max_send_sge = 1; qp_attr.cap.max_recv_sge = 1; qp_attr.cap.max_inline_data = 64; qp_attr.qp_type = IBV_QPT_RC; ret = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr); if (ret) goto error; rdma->qp = rdma->cm_id->qp; p = rdma->rcv_buf; for (i = 0; i < q_depth; i++) rdma_post_recv(rdma, (Rdmactx *)(p + i*rdma->msize)); trans = np_trans_create(rdma, rdma_trans_recv, rdma_trans_send, rdma_trans_destroy); if (!trans) goto error; rdma->trans = trans; memset(&cparam, 0, sizeof(cparam)); cparam.responder_resources = 1; cparam.initiator_depth = 1; cparam.private_data = NULL; cparam.private_data_len = 0; ret = rdma_accept(cmid, &cparam); if (ret) { np_uerror(ret); goto error; } rdma->connected = 1; return trans; error: if (rdma) rdma_trans_destroy(rdma); rdma_reject(cmid, NULL, 0); return NULL; }
static gboolean process_cm_event (GIOChannel *source, GIOCondition condition, gpointer data) { // Right now, we don't need 'source' and 'condition' // Tell the compiler to ignore them by (void)-ing them (void) source; (void) condition; g_debug ("CM event handler triggered"); if (!G_TRYLOCK (connection_handling)) { // Unsafe to handle connection management right now. // Wait for next dispatch. g_debug ("Connection handling is busy. Waiting for next dispatch"); return TRUE; } KiroServerPrivate *priv = (KiroServerPrivate *)data; struct rdma_cm_event *active_event; if (0 <= rdma_get_cm_event (priv->ec, &active_event)) { struct rdma_cm_event *ev = g_try_malloc (sizeof (*active_event)); if (!ev) { g_critical ("Unable to allocate memory for Event handling!"); rdma_ack_cm_event (active_event); goto exit; } memcpy (ev, active_event, sizeof (*active_event)); rdma_ack_cm_event (active_event); if (ev->event == RDMA_CM_EVENT_CONNECT_REQUEST) { if (TRUE == priv->close_signal) { //Main thread has signalled shutdown! //Don't connect this client any more. //Sorry mate! rdma_reject (ev->id, NULL, 0); goto exit; } do { g_debug ("Got connection request from client"); struct kiro_client_connection *cc = (struct kiro_client_connection *)g_try_malloc (sizeof (struct kiro_client_connection)); if (!cc) { errno = ENOMEM; rdma_reject (ev->id, NULL, 0); goto fail; } if (connect_client (ev->id)) goto fail; // Post a welcoming "Receive" for handshaking if (grant_client_access (ev->id, priv->mem, priv->mem_size, KIRO_ACK_RDMA)) goto fail; ibv_req_notify_cq (ev->id->recv_cq, 0); // Make the respective Queue push events onto the channel // Connection set-up successfully! (Server) // ctx was created by 'welcome_client' struct kiro_connection_context *ctx = (struct kiro_connection_context *) (ev->id->context); ctx->identifier = priv->next_client_id++; ctx->container = cc; // Make the connection aware of its container // Fill the client connection container. Also create a // g_io_channel wrapper for the new clients receive queue event // channel and add a main_loop watch to it. cc->id = ctx->identifier; cc->conn = ev->id; cc->rcv_ec = g_io_channel_unix_new (ev->id->recv_cq_channel->fd); priv->clients = g_list_append (priv->clients, (gpointer)cc); GList *client = g_list_find (priv->clients, (gpointer)cc); if (!client->data || client->data != cc) { g_critical ("Could not add client to list"); goto fail; } cc->source_id = g_io_add_watch (cc->rcv_ec, G_IO_IN | G_IO_PRI, process_rdma_event, (gpointer)client); g_io_channel_unref (cc->rcv_ec); // main_loop now holds a reference. We don't need ours any more g_debug ("Client connection assigned with ID %u", ctx->identifier); g_debug ("Currently %u clients in total are connected", g_list_length (priv->clients)); break; fail: g_warning ("Failed to accept client connection: %s", strerror (errno)); if (errno == EINVAL) g_message ("This might happen if the client pulls back the connection request before the server can handle it."); } while(0); } else if (ev->event == RDMA_CM_EVENT_DISCONNECTED) { struct kiro_connection_context *ctx = (struct kiro_connection_context *) (ev->id->context); if (!ctx->container) { g_debug ("Got disconnect request from unknown client"); goto exit; } GList *client = g_list_find (priv->clients, (gconstpointer) ctx->container); if (client) { g_debug ("Got disconnect request from client ID %u", ctx->identifier); struct kiro_client_connection *cc = (struct kiro_client_connection *)ctx->container; g_source_remove (cc->source_id); // this also unrefs the GIOChannel of the source. Nice. priv->clients = g_list_delete_link (priv->clients, client); g_free (cc); ctx->container = NULL; } else g_debug ("Got disconnect request from unknown client"); // Note: // The ProtectionDomain needs to be buffered and freed manually. // Each connecting client is attached with its own pd, which we // create manually. So we also need to clean it up manually. // This needs to be done AFTER the connection is brought down, so we // buffer the pointer to the pd and clean it up afterwards. struct ibv_pd *pd = ev->id->pd; kiro_destroy_connection (& (ev->id)); g_free (pd); g_debug ("Connection closed successfully. %u connected clients remaining", g_list_length (priv->clients)); } exit: g_free (ev); } G_UNLOCK (connection_handling); g_debug ("CM event handling done"); return TRUE; }
static ssize_t fi_ibv_rdm_process_connect_request(struct rdma_cm_event *event, struct fi_ibv_rdm_ep *ep) { struct ibv_qp_init_attr qp_attr; struct rdma_conn_param cm_params; struct fi_ibv_rdm_tagged_conn *conn = NULL; struct rdma_cm_id *id = event->id; ssize_t ret = FI_SUCCESS; char *p = (char *) event->param.conn.private_data; if (ep->is_closing) { int rej_message = 0xdeadbeef; if (rdma_reject(id, &rej_message, sizeof(int))) { VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_reject\n", errno); ret = -errno; if (rdma_destroy_id(id)) { VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_destroy_id\n", errno); ret = (ret == FI_SUCCESS) ? -errno : ret; } } assert(ret == FI_SUCCESS); return ret; } HASH_FIND(hh, fi_ibv_rdm_tagged_conn_hash, p, FI_IBV_RDM_DFLT_ADDRLEN, conn); if (!conn) { conn = memalign(FI_IBV_RDM_MEM_ALIGNMENT, sizeof(*conn)); if (!conn) return -FI_ENOMEM; memset(conn, 0, sizeof(struct fi_ibv_rdm_tagged_conn)); conn->state = FI_VERBS_CONN_ALLOCATED; dlist_init(&conn->postponed_requests_head); fi_ibv_rdm_unpack_cm_params(&event->param.conn, conn, ep); fi_ibv_rdm_conn_init_cm_role(conn, ep); FI_INFO(&fi_ibv_prov, FI_LOG_AV, "CONN REQUEST, NOT found in hash, new conn %p %d, addr %s:%u, HASH ADD\n", conn, conn->cm_role, inet_ntoa(conn->addr.sin_addr), ntohs(conn->addr.sin_port)); HASH_ADD(hh, fi_ibv_rdm_tagged_conn_hash, addr, FI_IBV_RDM_DFLT_ADDRLEN, conn); } else { if (conn->cm_role != FI_VERBS_CM_ACTIVE) { /* * Do it before rdma_create_qp since that call would * modify event->param.conn.private_data buffer */ fi_ibv_rdm_unpack_cm_params(&event->param.conn, conn, ep); } FI_INFO(&fi_ibv_prov, FI_LOG_AV, "CONN REQUEST, FOUND in hash, conn %p %d, addr %s:%u\n", conn, conn->cm_role, inet_ntoa(conn->addr.sin_addr), ntohs(conn->addr.sin_port)); } if (conn->cm_role == FI_VERBS_CM_ACTIVE) { int rej_message = 0xdeadbeef; if (rdma_reject(id, &rej_message, sizeof(rej_message))) { VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_reject\n", errno); ret = -errno; if (rdma_destroy_id(id)) { VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_destroy_id\n", errno); ret = (ret == FI_SUCCESS) ? -errno : ret; } } if (conn->state == FI_VERBS_CONN_ALLOCATED) { ret = fi_ibv_rdm_start_connection(ep, conn); if (ret != FI_SUCCESS) goto err; } } else { assert(conn->state == FI_VERBS_CONN_ALLOCATED || conn->state == FI_VERBS_CONN_STARTED); const size_t idx = (conn->cm_role == FI_VERBS_CM_PASSIVE) ? 0 : 1; conn->state = FI_VERBS_CONN_STARTED; assert (conn->id[idx] == NULL); conn->id[idx] = id; ret = fi_ibv_rdm_prepare_conn_memory(ep, conn); if (ret != FI_SUCCESS) goto err; fi_ibv_rdm_tagged_init_qp_attributes(&qp_attr, ep); if (rdma_create_qp(id, ep->domain->pd, &qp_attr)) { ret = -errno; goto err; } conn->qp[idx] = id->qp; ret = fi_ibv_rdm_repost_receives(conn, ep, ep->rq_wr_depth); if (ret < 0) { VERBS_INFO(FI_LOG_AV, "repost receives failed\n"); goto err; } else { ret = FI_SUCCESS; } id->context = conn; fi_ibv_rdm_pack_cm_params(&cm_params, conn, ep); if (rdma_accept(id, &cm_params)) { VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_accept\n", errno); ret = -errno; goto err; } if (cm_params.private_data) { free((void *) cm_params.private_data); } } return ret; err: /* ret err code is already set here, just cleanup resources */ fi_ibv_rdm_conn_cleanup(conn); return ret; }