static int route_handler(struct cmatest_node *node) { struct rdma_conn_param conn_param; int ret; ret = init_node(node); if (ret) goto err; ret = post_recvs(node); if (ret) goto err; memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 1; conn_param.initiator_depth = 1; conn_param.retry_count = 5; conn_param.private_data = test.rai->ai_connect; conn_param.private_data_len = test.rai->ai_connect_len; ret = rdma_connect(node->cma_id, &conn_param); if (ret) { perror("cmatose: failure connecting"); goto err; } return 0; err: connect_error(); return ret; }
//static int run(int argc, char **argv) //int RDMA_Connect(struct RDMA_communicator *comm, struct RDMA_param *param) int RDMA_Active_Init(struct RDMA_communicator *comm, struct RDMA_param *param) { struct addrinfo *addr; // struct rdma_cm_id *cm_id= NULL; // struct rdma_event_channel *ec = NULL; struct rdma_conn_param cm_params; char port[8]; // int i,j; sprintf(port, "%d", RDMA_PORT); TEST_NZ(getaddrinfo(param->host, port, NULL, &addr)); TEST_Z(comm->ec = rdma_create_event_channel()); TEST_NZ(rdma_create_id(comm->ec, &(comm->cm_id), NULL, RDMA_PS_TCP)); TEST_NZ(rdma_resolve_addr(comm->cm_id, NULL, addr->ai_addr, TIMEOUT_IN_MS)); TEST_NZ(wait_for_event(comm->ec, RDMA_CM_EVENT_ADDR_RESOLVED)); freeaddrinfo(addr); build_connection(comm->cm_id); TEST_NZ(rdma_resolve_route(comm->cm_id, TIMEOUT_IN_MS)); TEST_NZ(wait_for_event(comm->ec, RDMA_CM_EVENT_ROUTE_RESOLVED)); build_params(&cm_params); TEST_NZ(rdma_connect(comm->cm_id, &cm_params)); TEST_NZ(wait_for_event(comm->ec, RDMA_CM_EVENT_ESTABLISHED)); // on_connect(cm_id->context); return 0; }
static void iser_route_handler(struct rdma_cm_id *cma_id) { struct rdma_conn_param conn_param; int ret; ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context); if (ret) goto failure; iser_dbg("path.mtu is %d setting it to %d\n", cma_id->route.path_rec->mtu, IB_MTU_1024); /* we must set the MTU to 1024 as this is what the target is assuming */ if (cma_id->route.path_rec->mtu > IB_MTU_1024) cma_id->route.path_rec->mtu = IB_MTU_1024; memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 4; conn_param.initiator_depth = 1; conn_param.retry_count = 7; conn_param.rnr_retry_count = 6; ret = rdma_connect(cma_id, &conn_param); if (ret) { iser_err("failure connecting: %d\n", ret); goto failure; } return; failure: iser_connect_error(cma_id); }
static int krping_connect_client(struct krping_cb *cb) { struct rdma_conn_param conn_param; int ret; memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 1; conn_param.initiator_depth = 1; conn_param.retry_count = 10; ret = rdma_connect(cb->cm_id, &conn_param); if (ret) { log(LOG_ERR, "rdma_connect error %d\n", ret); return ret; } krping_wait(cb, CONNECTED); if (cb->state == ERROR) { log(LOG_ERR, "wait for CONNECTED state %d\n", cb->state); return -1; } DEBUG_LOG(PFX "rdma_connect successful\n"); return 0; }
static int route_handler(struct cmatest_node *node) { struct rdma_conn_param conn_param; int ret; ret = verify_test_params(node); if (ret) goto err; ret = init_node(node); if (ret) goto err; ret = post_recvs(node); if (ret) goto err; memset(&conn_param, 0, sizeof conn_param); ret = rdma_connect(node->cma_id, &conn_param); if (ret) { perror("udaddy: failure connecting"); goto err; } return 0; err: connect_error(); return ret; }
//static int run(int argc, char **argv) int ibrdma_send(char* host, char* port, void* data, uint64_t size) { struct addrinfo *addr; struct rdma_cm_id *cmid= NULL; struct rdma_event_channel *ec = NULL; struct rdma_conn_param cm_params; TEST_NZ(getaddrinfo(host, port, NULL, &addr)); TEST_Z(ec = rdma_create_event_channel()); TEST_NZ(rdma_create_id(ec, &cmid, NULL, RDMA_PS_TCP)); TEST_NZ(rdma_resolve_addr(cmid, NULL, addr->ai_addr, TIMEOUT_IN_MS)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ADDR_RESOLVED)); freeaddrinfo(addr); build_connection(cmid); TEST_NZ(rdma_resolve_route(cmid, TIMEOUT_IN_MS)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ROUTE_RESOLVED)); build_params(&cm_params); TEST_NZ(rdma_connect(cmid, &cm_params)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ESTABLISHED)); on_connect(cmid->context); /* Init MSG send to start RDMA*/ init_tfile(data, size); send_init(cmid->context); /*----------------------------*/ TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_DISCONNECTED)); rdma_destroy_id(cmid); rdma_destroy_event_channel(ec); return 0; }
static int rping_connect_client(struct rping_cb *cb) { struct rdma_conn_param conn_param; int ret; memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 1; conn_param.initiator_depth = 1; conn_param.retry_count = 7; ret = rdma_connect(cb->cm_id, &conn_param); if (ret) { perror("rdma_connect"); return ret; } sem_wait(&cb->sem); if (cb->state != CONNECTED) { fprintf(stderr, "wait for CONNECTED state %d\n", cb->state); return -1; } DEBUG_LOG("rmda_connect successful\n"); return 0; }
static ssize_t fi_ibv_rdm_process_route_resolved(struct rdma_cm_event *event, struct fi_ibv_rdm_ep *ep) { struct fi_ibv_rdm_tagged_conn *conn = event->id->context; ssize_t ret = FI_SUCCESS; struct rdma_conn_param cm_params; fi_ibv_rdm_pack_cm_params(&cm_params, conn, ep); VERBS_INFO(FI_LOG_AV, "ROUTE RESOLVED, conn %p, addr %s:%u\n", conn, inet_ntoa(conn->addr.sin_addr), ntohs(conn->addr.sin_port)); if (rdma_connect(event->id, &cm_params)) { VERBS_INFO_ERRNO(FI_LOG_AV, "rdma_connect failed\n", errno); ret = -errno; free((void *)cm_params.private_data); assert(0); } return ret; }
int ibrdma_transfer(struct transfer_info *tfi, int num_tfi) { struct addrinfo *addr; struct rdma_cm_id *cmid= NULL; struct rdma_event_channel *ec = NULL; struct rdma_conn_param cm_params; TEST_NZ(getaddrinfo(host, port, NULL, &addr)); TEST_Z(ec = rdma_create_event_channel()); TEST_NZ(rdma_create_id(ec, &cmid, NULL, RDMA_PS_TCP)); TEST_NZ(rdma_resolve_addr(cmid, NULL, addr->ai_addr, TIMEOUT_IN_MS)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ADDR_RESOLVED)); freeaddrinfo(addr); build_connection(cmid); TEST_NZ(rdma_resolve_route(cmid, TIMEOUT_IN_MS)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ROUTE_RESOLVED)); build_params(&cm_params); TEST_NZ(rdma_connect(cmid, &cm_params)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ESTABLISHED)); on_connect(cmid->context); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_DISCONNECTED)); rdma_destroy_id(&cmid); rdma_destroy_event_channel(&ec); return 0; }
static int iser_route_handler(struct rdma_cm_id *cma_id) { struct rdma_conn_param conn_param; int ret; ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context); if (ret) goto failure; memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 4; conn_param.initiator_depth = 1; conn_param.retry_count = 7; conn_param.rnr_retry_count = 6; ret = rdma_connect(cma_id, &conn_param); if (ret) { iser_err("failure connecting: %d\n", ret); goto failure; } return 0; failure: return iser_connect_error(cma_id); }
int client_on_route_resolved(struct rdma_cm_id *id) { struct rdma_conn_param cm_params; build_params(&cm_params); TEST_NZ(rdma_connect(id, &cm_params)); return 0; }
int on_route_resolved(struct rdma_cm_id *id) { struct rdma_conn_param cm_params; printf("route resolved.\n"); build_params(&cm_params); TEST_NZ(rdma_connect(id, &cm_params)); return 0; }
int on_route_resolved(struct rdma_cm_id *id) { struct rdma_conn_param cm_params; printf("route resolved.\n"); memset(&cm_params, 0, sizeof(cm_params)); TEST_NZ(rdma_connect(id, &cm_params)); return 0; }
int on_route_resolved(struct rdma_cm_id *id) { struct rdma_conn_param cm_params; printf("route resolved.\n"); build_params(&cm_params); TEST_NZ(rdma_connect(id, &cm_params)); //register_memory((struct connection *)(id->context)); return 0; }
static int fio_rdmaio_connect(struct thread_data *td, struct fio_file *f) { struct rdmaio_data *rd = td->io_ops->data; struct rdma_conn_param conn_param; struct ibv_send_wr *bad_wr; memset(&conn_param, 0, sizeof(conn_param)); conn_param.responder_resources = 1; conn_param.initiator_depth = 1; conn_param.retry_count = 10; if (rdma_connect(rd->cm_id, &conn_param) != 0) { log_err("fio: rdma_connect fail\n"); return 1; } if (get_next_channel_event (td, rd->cm_channel, RDMA_CM_EVENT_ESTABLISHED) != 0) { log_err("fio: wait for RDMA_CM_EVENT_ESTABLISHED\n"); return 1; } /* send task request */ rd->send_buf.mode = htonl(rd->rdma_protocol); rd->send_buf.nr = htonl(td->o.iodepth); if (ibv_post_send(rd->qp, &rd->sq_wr, &bad_wr) != 0) { log_err("fio: ibv_post_send fail"); return 1; } if (rdma_poll_wait(td, IBV_WC_SEND) < 0) return 1; /* wait for remote MR info from server side */ if (rdma_poll_wait(td, IBV_WC_RECV) < 0) return 1; /* In SEND/RECV test, it's a good practice to setup the iodepth of * of the RECV side deeper than that of the SEND side to * avoid RNR (receiver not ready) error. The * SEND side may send so many unsolicited message before * RECV side commits sufficient recv buffers into recv queue. * This may lead to RNR error. Here, SEND side pauses for a while * during which RECV side commits sufficient recv buffers. */ usleep(500000); return 0; }
static int fi_ibv_msg_ep_connect(struct fid_ep *ep, const void *addr, const void *param, size_t paramlen) { struct fi_ibv_msg_ep *_ep; struct rdma_conn_param conn_param; struct sockaddr *src_addr, *dst_addr; int ret; _ep = container_of(ep, struct fi_ibv_msg_ep, ep_fid); if (!_ep->id->qp) { ret = ep->fid.ops->control(&ep->fid, FI_ENABLE, NULL); if (ret) return ret; } memset(&conn_param, 0, sizeof conn_param); conn_param.private_data = param; conn_param.private_data_len = paramlen; conn_param.responder_resources = RDMA_MAX_RESP_RES; conn_param.initiator_depth = RDMA_MAX_INIT_DEPTH; conn_param.flow_control = 1; conn_param.retry_count = 15; conn_param.rnr_retry_count = 7; if (_ep->srq_ep) conn_param.srq = 1; src_addr = rdma_get_local_addr(_ep->id); if (src_addr) { FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "src_addr: %s:%d\n", inet_ntoa(((struct sockaddr_in *)src_addr)->sin_addr), ntohs(((struct sockaddr_in *)src_addr)->sin_port)); } dst_addr = rdma_get_peer_addr(_ep->id); if (dst_addr) { FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "dst_addr: %s:%d\n", inet_ntoa(((struct sockaddr_in *)dst_addr)->sin_addr), ntohs(((struct sockaddr_in *)dst_addr)->sin_port)); } return rdma_connect(_ep->id, &conn_param) ? -errno : 0; }
RDMACMSocket* RDMACMSocket::connect(const HostAndPort& hp) { struct rdma_cm_id* client_id = NULL; struct rdma_addrinfo hints, *res; memset(&hints, 0, sizeof(hints)); hints.ai_port_space = RDMA_PS_TCP; res = NULL; char* hostname = const_cast<char*>(hp.hostname); char* port_str = const_cast<char*>(hp.port_str); if (rdma_getaddrinfo(hostname, port_str, &hints, &res) < 0) { perror("rdma_getaddrinfo"); exit(1); } struct ibv_qp_init_attr attr; memset(&attr, 0, sizeof(attr)); attr.cap.max_send_wr = PACKET_WINDOW_SIZE; attr.cap.max_recv_wr = PACKET_WINDOW_SIZE; attr.cap.max_send_sge = 1; attr.cap.max_recv_sge = 1; attr.cap.max_inline_data = 0; attr.sq_sig_all = 1; if (rdma_create_ep(&client_id, res, NULL, &attr) < 0) { rdma_freeaddrinfo(res); perror("rdma_create_ep"); exit(1); } rdma_freeaddrinfo(res); if (rdma_connect(client_id, NULL) < 0) { rdma_destroy_ep(client_id); perror("rdma_connect"); exit(1); } return new RDMACMSocket(client_id); }
int on_route_resolved(struct rdma_cm_id *id) { struct rdma_conn_param cm_params; struct timeval start, end, dt; printf("route resolved.\n"); build_params(&cm_params); TEST_NZ(rdma_connect(id, &cm_params)); gettimeofday(&start, NULL); register_memory((struct connection *)(id->context)); gettimeofday(&end, NULL); timersub(&end, &start, &dt); long usec = dt.tv_usec + 1000000 * dt.tv_sec; printf("[Register] takes %ld micro_secs.\n", usec); post_receive_for_msg(id->context); return 0; }
void IBConnection::on_route_resolved() { L_(debug) << "route resolved"; // Initiate rdma connection auto private_data = get_private_data(); assert(private_data->size() <= 255); struct rdma_conn_param conn_param = rdma_conn_param(); conn_param.initiator_depth = 1; conn_param.retry_count = 7; conn_param.private_data = private_data->data(); conn_param.private_data_len = static_cast<uint8_t>(private_data->size()); // TODO: Hack to prevent connection issues when using softiwarp. std::this_thread::sleep_for(std::chrono::milliseconds(500)); int err = rdma_connect(cm_id_, &conn_param); if (err) { L_(fatal) << "rdma_connect failed: " << strerror(errno); throw InfinibandException("rdma_connect failed"); } }
int ibrdma_transfer(struct transfer_info tfi, int num_tfi) { struct addrinfo *addr; struct rdma_cm_id *cmid= NULL; struct rdma_event_channel *ec = NULL; struct rdma_conn_param cm_params; int i,j; /*Allocation buffer space for reading from local fs to memory*/ struct transfer_file *ffile = tfi.tfiles; int nf = tfi.tfiles; char* host = tfi.ib_host; char* port; sprintf(port,"%d",tfi.ib_port); for (i = 0; i < NUM_FILE_BUF_C; i++) { tfi.fbufs[i].fbuf = (char *)malloc(FILE_BUF_SIZE_C); tfi.fbufs[i].size = 0; } TEST_NZ(getaddrinfo(host, port, NULL, &addr)); TEST_Z(ec = rdma_create_event_channel()); TEST_NZ(rdma_create_id(ec, &cmid, NULL, RDMA_PS_TCP)); TEST_NZ(rdma_resolve_addr(cmid, NULL, addr->ai_addr, TIMEOUT_IN_MS)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ADDR_RESOLVED)); freeaddrinfo(addr); build_connection(cmid); TEST_NZ(rdma_resolve_route(cmid, TIMEOUT_IN_MS)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ROUTE_RESOLVED)); build_params(&cm_params); TEST_NZ(rdma_connect(cmid, &cm_params)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ESTABLISHED)); on_connect(cmid->context); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_DISCONNECTED)); rdma_destroy_id(&cmid); rdma_destroy_event_channel(&ec); return 0; }
/* * Connect unconnected endpoint. */ int rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { struct rdma_cm_id *id; int rc = 0; int retry_count = 0; if (ep->rep_connected != 0) { struct rpcrdma_xprt *xprt; retry: rc = rpcrdma_ep_disconnect(ep, ia); if (rc && rc != -ENOTCONN) dprintk("RPC: %s: rpcrdma_ep_disconnect" " status %i\n", __func__, rc); rpcrdma_clean_cq(ep->rep_cq); xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); id = rpcrdma_create_id(xprt, ia, (struct sockaddr *)&xprt->rx_data.addr); if (IS_ERR(id)) { rc = PTR_ERR(id); goto out; } /* TEMP TEMP TEMP - fail if new device: * Deregister/remarshal *all* requests! * Close and recreate adapter, pd, etc! * Re-determine all attributes still sane! * More stuff I haven't thought of! * Rrrgh! */ if (ia->ri_id->device != id->device) { printk("RPC: %s: can't reconnect on " "different device!\n", __func__); rdma_destroy_id(id); rc = -ENETDOWN; goto out; } /* END TEMP */ rdma_destroy_qp(ia->ri_id); rdma_destroy_id(ia->ri_id); ia->ri_id = id; } rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); if (rc) { dprintk("RPC: %s: rdma_create_qp failed %i\n", __func__, rc); goto out; } /* XXX Tavor device performs badly with 2K MTU! */ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device); if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR && (pcid->vendor == PCI_VENDOR_ID_MELLANOX || pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) { struct ib_qp_attr attr = { .path_mtu = IB_MTU_1024 }; rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU); } } ep->rep_connected = 0; rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); if (rc) { dprintk("RPC: %s: rdma_connect() failed with %i\n", __func__, rc); goto out; } wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); /* * Check state. A non-peer reject indicates no listener * (ECONNREFUSED), which may be a transient state. All * others indicate a transport condition which has already * undergone a best-effort. */ if (ep->rep_connected == -ECONNREFUSED && ++retry_count <= RDMA_CONNECT_RETRY_MAX) { dprintk("RPC: %s: non-peer_reject, retry\n", __func__); goto retry; } if (ep->rep_connected <= 0) { /* Sometimes, the only way to reliably connect to remote * CMs is to use same nonzero values for ORD and IRD. */ if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 && (ep->rep_remote_cma.responder_resources == 0 || ep->rep_remote_cma.initiator_depth != ep->rep_remote_cma.responder_resources)) { if (ep->rep_remote_cma.responder_resources == 0) ep->rep_remote_cma.responder_resources = 1; ep->rep_remote_cma.initiator_depth = ep->rep_remote_cma.responder_resources; goto retry; } rc = ep->rep_connected; } else { dprintk("RPC: %s: connected\n", __func__); } out: if (rc) ep->rep_connected = rc; return rc; }
struct rdma_conn_param cm_param; build_param(&cm_param); while (rdma_get_cm_event(ec_, &event_) == 0) { struct rdma_cm_event event_copy; memcpy(&event_copy, event_, sizeof(*event_) ); rdma_ack_cm_event(event_); if (event_copy.event == RDMA_CM_EVENT_ADDR_RESOLVED) { build_conn(event_copy.id); ib_end_->on_pre_conn(event_copy.id); TEST_NZ(rdma_resolve_route(event_copy.id, TIMEOUT_IN_MS) ) } else if (event_copy.event == RDMA_CM_EVENT_ROUTE_RESOLVED) { TEST_NZ(rdma_connect(event_copy.id, &cm_param) ) } else if (event_copy.event == RDMA_CM_EVENT_CONNECT_REQUEST) { build_conn(event_copy.id); ib_end_->on_pre_conn(event_copy.id); TEST_NZ(rdma_accept(event_copy.id, &cm_param) ) } else if (event_copy.event == RDMA_CM_EVENT_ESTABLISHED) { ib_end_->on_conn(event_copy.id); } else if (event_copy.event == RDMA_CM_EVENT_DISCONNECTED) { rdma_destroy_qp(event_copy.id); ib_end_->on_disconn(event_copy.id);
static int ibw_refill_cq_recv(struct ibw_conn *conn) { struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv); struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv); int rc; struct ibv_sge list = { .addr = (uintptr_t) NULL, /* filled below */ .length = pctx->opts.recv_bufsize, .lkey = pconn->mr_recv->lkey /* always the same */ }; struct ibv_recv_wr wr = { .wr_id = 0, /* filled below */ .sg_list = &list, .num_sge = 1, }; struct ibv_recv_wr *bad_wr; DEBUG(DEBUG_DEBUG, ("ibw_refill_cq_recv(cmid: %p)\n", pconn->cm_id)); list.addr = (uintptr_t) pconn->buf_recv + pctx->opts.recv_bufsize * pconn->recv_index; wr.wr_id = pconn->recv_index; pconn->recv_index = (pconn->recv_index + 1) % pctx->opts.max_recv_wr; rc = ibv_post_recv(pconn->cm_id->qp, &wr, &bad_wr); if (rc) { sprintf(ibw_lasterr, "refill/ibv_post_recv failed with %d\n", rc); DEBUG(DEBUG_ERR, (ibw_lasterr)); return -2; } return 0; } static int ibw_fill_cq(struct ibw_conn *conn) { struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv); struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv); int i, rc; struct ibv_sge list = { .addr = (uintptr_t) NULL, /* filled below */ .length = pctx->opts.recv_bufsize, .lkey = pconn->mr_recv->lkey /* always the same */ }; struct ibv_recv_wr wr = { .wr_id = 0, /* filled below */ .sg_list = &list, .num_sge = 1, }; struct ibv_recv_wr *bad_wr; DEBUG(DEBUG_DEBUG, ("ibw_fill_cq(cmid: %p)\n", pconn->cm_id)); for(i = pctx->opts.max_recv_wr; i!=0; i--) { list.addr = (uintptr_t) pconn->buf_recv + pctx->opts.recv_bufsize * pconn->recv_index; wr.wr_id = pconn->recv_index; pconn->recv_index = (pconn->recv_index + 1) % pctx->opts.max_recv_wr; rc = ibv_post_recv(pconn->cm_id->qp, &wr, &bad_wr); if (rc) { sprintf(ibw_lasterr, "fill/ibv_post_recv failed with %d\n", rc); DEBUG(DEBUG_ERR, (ibw_lasterr)); return -2; } } return 0; } static int ibw_manage_connect(struct ibw_conn *conn) { struct rdma_conn_param conn_param; struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv); int rc; DEBUG(DEBUG_DEBUG, ("ibw_manage_connect(cmid: %p)\n", pconn->cm_id)); if (ibw_setup_cq_qp(conn)) return -1; /* cm connect */ memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 1; conn_param.initiator_depth = 1; conn_param.retry_count = 10; rc = rdma_connect(pconn->cm_id, &conn_param); if (rc) sprintf(ibw_lasterr, "rdma_connect error %d\n", rc); return rc; } static void ibw_event_handler_cm(struct tevent_context *ev, struct tevent_fd *fde, uint16_t flags, void *private_data) { int rc; struct ibw_ctx *ctx = talloc_get_type(private_data, struct ibw_ctx); struct ibw_ctx_priv *pctx = talloc_get_type(ctx->internal, struct ibw_ctx_priv); struct ibw_conn *conn = NULL; struct ibw_conn_priv *pconn = NULL; struct rdma_cm_id *cma_id = NULL; struct rdma_cm_event *event = NULL; assert(ctx!=NULL); rc = rdma_get_cm_event(pctx->cm_channel, &event); if (rc) { ctx->state = IBWS_ERROR; event = NULL; sprintf(ibw_lasterr, "rdma_get_cm_event error %d\n", rc); goto error; } cma_id = event->id; DEBUG(DEBUG_DEBUG, ("cma_event type %d cma_id %p (%s)\n", event->event, cma_id, (cma_id == pctx->cm_id) ? "parent" : "child")); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: DEBUG(DEBUG_DEBUG, ("RDMA_CM_EVENT_ADDR_RESOLVED\n")); /* continuing from ibw_connect ... */ rc = rdma_resolve_route(cma_id, 2000); if (rc) { sprintf(ibw_lasterr, "rdma_resolve_route error %d\n", rc); goto error; } /* continued at RDMA_CM_EVENT_ROUTE_RESOLVED */ break; case RDMA_CM_EVENT_ROUTE_RESOLVED: DEBUG(DEBUG_DEBUG, ("RDMA_CM_EVENT_ROUTE_RESOLVED\n")); /* after RDMA_CM_EVENT_ADDR_RESOLVED: */ assert(cma_id->context!=NULL); conn = talloc_get_type(cma_id->context, struct ibw_conn); rc = ibw_manage_connect(conn); if (rc) goto error; break; case RDMA_CM_EVENT_CONNECT_REQUEST: DEBUG(DEBUG_DEBUG, ("RDMA_CM_EVENT_CONNECT_REQUEST\n")); ctx->state = IBWS_CONNECT_REQUEST; conn = ibw_conn_new(ctx, ctx); pconn = talloc_get_type(conn->internal, struct ibw_conn_priv); pconn->cm_id = cma_id; /* !!! event will be freed but id not */ cma_id->context = (void *)conn; DEBUG(DEBUG_DEBUG, ("pconn->cm_id %p\n", pconn->cm_id)); if (ibw_setup_cq_qp(conn)) goto error; conn->state = IBWC_INIT; pctx->connstate_func(ctx, conn); /* continued at ibw_accept when invoked by the func above */ if (!pconn->is_accepted) { rc = rdma_reject(cma_id, NULL, 0); if (rc) DEBUG(DEBUG_ERR, ("rdma_reject failed with rc=%d\n", rc)); talloc_free(conn); DEBUG(DEBUG_DEBUG, ("pconn->cm_id %p wasn't accepted\n", pconn->cm_id)); } /* TODO: clarify whether if it's needed by upper layer: */ ctx->state = IBWS_READY; pctx->connstate_func(ctx, NULL); /* NOTE: more requests can arrive until RDMA_CM_EVENT_ESTABLISHED ! */ break; case RDMA_CM_EVENT_ESTABLISHED: /* expected after ibw_accept and ibw_connect[not directly] */ DEBUG(DEBUG_INFO, ("ESTABLISHED (conn: %p)\n", cma_id->context)); conn = talloc_get_type(cma_id->context, struct ibw_conn); assert(conn!=NULL); /* important assumption */ DEBUG(DEBUG_DEBUG, ("ibw_setup_cq_qp succeeded (cmid=%p)\n", cma_id)); /* client conn is up */ conn->state = IBWC_CONNECTED; /* both ctx and conn have changed */ pctx->connstate_func(ctx, conn); break; case RDMA_CM_EVENT_ADDR_ERROR: sprintf(ibw_lasterr, "RDMA_CM_EVENT_ADDR_ERROR, error %d\n", event->status); case RDMA_CM_EVENT_ROUTE_ERROR: sprintf(ibw_lasterr, "RDMA_CM_EVENT_ROUTE_ERROR, error %d\n", event->status); case RDMA_CM_EVENT_CONNECT_ERROR: sprintf(ibw_lasterr, "RDMA_CM_EVENT_CONNECT_ERROR, error %d\n", event->status); case RDMA_CM_EVENT_UNREACHABLE: sprintf(ibw_lasterr, "RDMA_CM_EVENT_UNREACHABLE, error %d\n", event->status); goto error; case RDMA_CM_EVENT_REJECTED: sprintf(ibw_lasterr, "RDMA_CM_EVENT_REJECTED, error %d\n", event->status); DEBUG(DEBUG_INFO, ("cm event handler: %s", ibw_lasterr)); conn = talloc_get_type(cma_id->context, struct ibw_conn); if (conn) { /* must be done BEFORE connstate */ if ((rc=rdma_ack_cm_event(event))) DEBUG(DEBUG_ERR, ("reject/rdma_ack_cm_event failed with %d\n", rc)); event = NULL; /* not to touch cma_id or conn */ conn->state = IBWC_ERROR; /* it should free the conn */ pctx->connstate_func(NULL, conn); } break; /* this is not strictly an error */ case RDMA_CM_EVENT_DISCONNECTED: DEBUG(DEBUG_DEBUG, ("RDMA_CM_EVENT_DISCONNECTED\n")); if ((rc=rdma_ack_cm_event(event))) DEBUG(DEBUG_ERR, ("disc/rdma_ack_cm_event failed with %d\n", rc)); event = NULL; /* don't ack more */ if (cma_id!=pctx->cm_id) { DEBUG(DEBUG_ERR, ("client DISCONNECT event cm_id=%p\n", cma_id)); conn = talloc_get_type(cma_id->context, struct ibw_conn); conn->state = IBWC_DISCONNECTED; pctx->connstate_func(NULL, conn); } break; case RDMA_CM_EVENT_DEVICE_REMOVAL: sprintf(ibw_lasterr, "cma detected device removal!\n"); goto error; default: sprintf(ibw_lasterr, "unknown event %d\n", event->event); goto error; } if (event!=NULL && (rc=rdma_ack_cm_event(event))) { sprintf(ibw_lasterr, "rdma_ack_cm_event failed with %d\n", rc); goto error; } return; error: DEBUG(DEBUG_ERR, ("cm event handler: %s", ibw_lasterr)); if (event!=NULL) { if (cma_id!=NULL && cma_id!=pctx->cm_id) { conn = talloc_get_type(cma_id->context, struct ibw_conn); if (conn) { conn->state = IBWC_ERROR; pctx->connstate_func(NULL, conn); } } else { ctx->state = IBWS_ERROR; pctx->connstate_func(ctx, NULL); } if ((rc=rdma_ack_cm_event(event))!=0) { DEBUG(DEBUG_ERR, ("rdma_ack_cm_event failed with %d\n", rc)); } } return; }
static int p9_rdma_bind_privport(struct p9_trans_rdma *rdma) { struct sockaddr_in cl = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_ANY), }; int port, err = -EINVAL; for (port = P9_DEF_MAX_RESVPORT; port >= P9_DEF_MIN_RESVPORT; port--) { cl.sin_port = htons((ushort)port); err = rdma_bind_addr(rdma->cm_id, (struct sockaddr *)&cl); if (err != -EADDRINUSE) break; } return err; } /** * trans_create_rdma - Transport method for creating atransport instance * @client: client instance * @addr: IP address string * @args: Mount options string */ static int rdma_create_trans(struct p9_client *client, const char *addr, char *args) { int err; struct p9_rdma_opts opts; struct p9_trans_rdma *rdma; struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct ib_device_attr devattr; struct ib_cq_init_attr cq_attr = {}; /* Parse the transport specific mount options */ err = parse_opts(args, &opts); if (err < 0) return err; /* Create and initialize the RDMA transport structure */ rdma = alloc_rdma(&opts); if (!rdma) return -ENOMEM; /* Create the RDMA CM ID */ rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(rdma->cm_id)) goto error; /* Associate the client with the transport */ client->trans = rdma; /* Bind to a privileged port if we need to */ if (opts.privport) { err = p9_rdma_bind_privport(rdma); if (err < 0) { pr_err("%s (%d): problem binding to privport: %d\n", __func__, task_pid_nr(current), -err); goto error; } } /* Resolve the server's address */ rdma->addr.sin_family = AF_INET; rdma->addr.sin_addr.s_addr = in_aton(addr); rdma->addr.sin_port = htons(opts.port); err = rdma_resolve_addr(rdma->cm_id, NULL, (struct sockaddr *)&rdma->addr, rdma->timeout); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED)) goto error; /* Resolve the route to the server */ err = rdma_resolve_route(rdma->cm_id, rdma->timeout); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED)) goto error; /* Query the device attributes */ err = ib_query_device(rdma->cm_id->device, &devattr); if (err) goto error; /* Create the Completion Queue */ cq_attr.cqe = opts.sq_depth + opts.rq_depth + 1; rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, cq_event_handler, client, &cq_attr); if (IS_ERR(rdma->cq)) goto error; ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); /* Create the Protection Domain */ rdma->pd = ib_alloc_pd(rdma->cm_id->device); if (IS_ERR(rdma->pd)) goto error; /* Cache the DMA lkey in the transport */ rdma->dma_mr = NULL; if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) rdma->lkey = rdma->cm_id->device->local_dma_lkey; else { rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(rdma->dma_mr)) goto error; rdma->lkey = rdma->dma_mr->lkey; } /* Create the Queue Pair */ memset(&qp_attr, 0, sizeof qp_attr); qp_attr.event_handler = qp_event_handler; qp_attr.qp_context = client; qp_attr.cap.max_send_wr = opts.sq_depth; qp_attr.cap.max_recv_wr = opts.rq_depth; qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE; qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; qp_attr.send_cq = rdma->cq; qp_attr.recv_cq = rdma->cq; err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr); if (err) goto error; rdma->qp = rdma->cm_id->qp; /* Request a connection */ memset(&conn_param, 0, sizeof(conn_param)); conn_param.private_data = NULL; conn_param.private_data_len = 0; conn_param.responder_resources = P9_RDMA_IRD; conn_param.initiator_depth = P9_RDMA_ORD; err = rdma_connect(rdma->cm_id, &conn_param); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_CONNECTED)) goto error; client->status = Connected; return 0; error: rdma_destroy_trans(rdma); return -ENOTCONN; }
static int sdp_init_qp(struct sock *sk, struct rdma_cm_id *id) { struct ib_qp_init_attr qp_init_attr = { .event_handler = sdp_qp_event_handler, .cap.max_send_wr = SDP_TX_SIZE, .cap.max_recv_wr = sdp_rx_size, .cap.max_inline_data = sdp_inline_thresh, .sq_sig_type = IB_SIGNAL_REQ_WR, .qp_type = IB_QPT_RC, }; struct ib_device *device = id->device; int rc; sdp_dbg(sk, "%s\n", __func__); sdp_sk(sk)->max_sge = sdp_get_max_dev_sge(device); sdp_dbg(sk, "Max sges: %d\n", sdp_sk(sk)->max_sge); qp_init_attr.cap.max_send_sge = MIN(sdp_sk(sk)->max_sge, SDP_MAX_SEND_SGES); sdp_dbg(sk, "Setting max send sge to: %d\n", qp_init_attr.cap.max_send_sge); qp_init_attr.cap.max_recv_sge = MIN(sdp_sk(sk)->max_sge, SDP_MAX_RECV_SGES); sdp_dbg(sk, "Setting max recv sge to: %d\n", qp_init_attr.cap.max_recv_sge); sdp_sk(sk)->sdp_dev = ib_get_client_data(device, &sdp_client); if (!sdp_sk(sk)->sdp_dev) { sdp_warn(sk, "SDP not available on device %s\n", device->name); rc = -ENODEV; goto err_rx; } rc = sdp_rx_ring_create(sdp_sk(sk), device); if (rc) goto err_rx; rc = sdp_tx_ring_create(sdp_sk(sk), device); if (rc) goto err_tx; qp_init_attr.recv_cq = sdp_sk(sk)->rx_ring.cq; qp_init_attr.send_cq = sdp_sk(sk)->tx_ring.cq; rc = rdma_create_qp(id, sdp_sk(sk)->sdp_dev->pd, &qp_init_attr); if (rc) { sdp_warn(sk, "Unable to create QP: %d.\n", rc); goto err_qp; } sdp_sk(sk)->qp = id->qp; sdp_sk(sk)->ib_device = device; sdp_sk(sk)->qp_active = 1; sdp_sk(sk)->context.device = device; sdp_sk(sk)->inline_thresh = qp_init_attr.cap.max_inline_data; sdp_dbg(sk, "%s done\n", __func__); return 0; err_qp: sdp_tx_ring_destroy(sdp_sk(sk)); err_tx: sdp_rx_ring_destroy(sdp_sk(sk)); err_rx: return rc; } static int sdp_get_max_send_frags(u32 buf_size) { return MIN( /* +1 to conpensate on not aligned buffers */ (PAGE_ALIGN(buf_size) >> PAGE_SHIFT) + 1, SDP_MAX_SEND_SGES - 1); } static int sdp_connect_handler(struct sock *sk, struct rdma_cm_id *id, struct rdma_cm_event *event) { struct sockaddr_in *dst_addr; struct sock *child; const struct sdp_hh *h; int rc = 0; sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id); h = event->param.conn.private_data; SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh); if (h->ipv_cap & HH_IPV_MASK & ~(HH_IPV4 | HH_IPV6)) { sdp_warn(sk, "Bad IPV field in SDP Hello header: 0x%x\n", h->ipv_cap & HH_IPV_MASK); return -EINVAL; } if (!h->max_adverts) return -EINVAL; #if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)) child = sk_clone(sk, GFP_KERNEL); #else child = sk_clone_lock(sk, GFP_KERNEL); #endif if (!child) return -ENOMEM; sdp_init_sock(child); dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; sdp_inet_dport(child) = dst_addr->sin_port; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (inet6_sk(sk)) { struct ipv6_pinfo *newnp; newnp = inet_sk(child)->pinet6 = sdp_inet6_sk_generic(child); memcpy(newnp, inet6_sk(sk), sizeof(struct ipv6_pinfo)); if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV4) { /* V6 mapped */ sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr; ipv6_addr_set(&child->sk_v6_daddr, 0, 0, htonl(0x0000FFFF), h->src_addr.ip4.addr); ipv6_addr_set(&child->sk_v6_rcv_saddr, 0, 0, htonl(0x0000FFFF), h->dst_addr.ip4.addr); ipv6_addr_copy(&child->sk_v6_rcv_saddr, &child->sk_v6_daddr); } else if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV6) { struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *)dst_addr; struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *)&id->route.addr.src_addr; ipv6_addr_copy(&child->sk_v6_daddr, &dst_addr6->sin6_addr); ipv6_addr_copy(&child->sk_v6_rcv_saddr, &src_addr6->sin6_addr); ipv6_addr_copy(&newnp->saddr, &src_addr6->sin6_addr); } else { sdp_warn(child, "Bad IPV field: 0x%x\n", h->ipv_cap & HH_IPV_MASK); } sdp_inet_daddr(child) = sdp_inet_saddr(child) = sdp_inet_rcv_saddr(child) = LOOPBACK4_IPV6; } else #endif { sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr; } #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_rename(sk); #endif __sock_put(child, SOCK_REF_CLONE); down_read(&device_removal_lock); rc = sdp_init_qp(child, id); if (rc) { bh_unlock_sock(child); up_read(&device_removal_lock); sdp_sk(child)->destructed_already = 1; #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_close(child); #endif sk_free(child); return rc; } sdp_sk(child)->max_bufs = ntohs(h->bsdh.bufs); atomic_set(&sdp_sk(child)->tx_ring.credits, sdp_sk(child)->max_bufs); sdp_sk(child)->min_bufs = tx_credits(sdp_sk(child)) / 4; sdp_sk(child)->xmit_size_goal = ntohl(h->localrcvsz) - sizeof(struct sdp_bsdh); sdp_sk(child)->send_frags = sdp_get_max_send_frags(sdp_sk(child)->xmit_size_goal); sdp_init_buffers(sdp_sk(child), rcvbuf_initial_size); id->context = child; sdp_sk(child)->id = id; list_add_tail(&sdp_sk(child)->backlog_queue, &sdp_sk(sk)->backlog_queue); sdp_sk(child)->parent = sk; bh_unlock_sock(child); sdp_add_sock(sdp_sk(child)); up_read(&device_removal_lock); sdp_exch_state(child, TCPF_LISTEN | TCPF_CLOSE, TCP_SYN_RECV); /* child->sk_write_space(child); */ /* child->sk_data_ready(child, 0); */ sk->sk_data_ready(sk); return 0; } static int sdp_response_handler(struct sock *sk, struct rdma_cm_id *id, struct rdma_cm_event *event) { const struct sdp_hah *h; struct sockaddr_in *dst_addr; sdp_dbg(sk, "%s\n", __func__); sdp_exch_state(sk, TCPF_SYN_SENT, TCP_ESTABLISHED); sdp_set_default_moderation(sdp_sk(sk)); if (sock_flag(sk, SOCK_KEEPOPEN)) sdp_start_keepalive_timer(sk); if (sock_flag(sk, SOCK_DEAD)) return 0; h = event->param.conn.private_data; SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh); sdp_sk(sk)->max_bufs = ntohs(h->bsdh.bufs); atomic_set(&sdp_sk(sk)->tx_ring.credits, sdp_sk(sk)->max_bufs); sdp_sk(sk)->min_bufs = tx_credits(sdp_sk(sk)) / 4; sdp_sk(sk)->xmit_size_goal = ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh); sdp_sk(sk)->send_frags = sdp_get_max_send_frags(sdp_sk(sk)->xmit_size_goal); sdp_sk(sk)->xmit_size_goal = MIN(sdp_sk(sk)->xmit_size_goal, sdp_sk(sk)->send_frags * PAGE_SIZE); sdp_sk(sk)->poll_cq = 1; sk->sk_state_change(sk); sk_wake_async(sk, 0, POLL_OUT); dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; sdp_inet_dport(sk) = dst_addr->sin_port; sdp_inet_daddr(sk) = dst_addr->sin_addr.s_addr; #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_rename(sk); #endif return 0; } static int sdp_connected_handler(struct sock *sk) { struct sock *parent; sdp_dbg(sk, "%s\n", __func__); parent = sdp_sk(sk)->parent; BUG_ON(!parent); sdp_exch_state(sk, TCPF_SYN_RECV, TCP_ESTABLISHED); #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_rename(sk); #endif sdp_set_default_moderation(sdp_sk(sk)); if (sock_flag(sk, SOCK_KEEPOPEN)) sdp_start_keepalive_timer(sk); if (sock_flag(sk, SOCK_DEAD)) return 0; lock_sock(parent); if (!sdp_sk(parent)->id) { /* TODO: look at SOCK_DEAD? */ sdp_dbg(sk, "parent is going away.\n"); goto done; } sk_acceptq_added(parent); sdp_dbg(parent, "%s child connection established\n", __func__); list_del_init(&sdp_sk(sk)->backlog_queue); list_add_tail(&sdp_sk(sk)->accept_queue, &sdp_sk(parent)->accept_queue); parent->sk_state_change(parent); sk_wake_async(parent, 0, POLL_OUT); done: release_sock(parent); return 0; } static int sdp_disconnected_handler(struct sock *sk) { struct sdp_sock *ssk = sdp_sk(sk); sdp_dbg(sk, "%s\n", __func__); if (ssk->tx_ring.cq) if (sdp_xmit_poll(ssk, 1)) sdp_post_sends(ssk, 0); if (sk->sk_state == TCP_SYN_RECV) { sdp_connected_handler(sk); if (rcv_nxt(ssk)) return 0; } return -ECONNRESET; } int sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { struct rdma_conn_param conn_param; struct sock *parent = NULL; struct sock *child = NULL; struct sock *sk; struct sdp_hah hah; struct sdp_hh hh; int rc = 0, rc2; sk = id->context; if (!sk) { sdp_dbg(NULL, "cm_id is being torn down, event %s\n", rdma_cm_event_str(event->event)); return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ? -EINVAL : 0; } sdp_add_to_history(sk, rdma_cm_event_str(event->event)); lock_sock_nested(sk, SINGLE_DEPTH_NESTING); sdp_dbg(sk, "event: %s\n", rdma_cm_event_str(event->event)); if (!sdp_sk(sk)->id) { sdp_dbg(sk, "socket is being torn down\n"); rc = event->event == RDMA_CM_EVENT_CONNECT_REQUEST ? -EINVAL : 0; release_sock(sk); return rc; } switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: if (sdp_link_layer_ib_only && rdma_node_get_transport(id->device->node_type) == RDMA_TRANSPORT_IB && rdma_port_get_link_layer(id->device, id->port_num) != IB_LINK_LAYER_INFINIBAND) { sdp_dbg(sk, "Link layer is: %d. Only IB link layer " "is allowed\n", rdma_port_get_link_layer(id->device, id->port_num)); rc = -ENETUNREACH; break; } rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT); break; case RDMA_CM_EVENT_ADDR_ERROR: rc = -ENETUNREACH; break; case RDMA_CM_EVENT_ROUTE_RESOLVED: rc = sdp_init_qp(sk, id); if (rc) break; memset(&hh, 0, sizeof hh); hh.bsdh.mid = SDP_MID_HELLO; hh.bsdh.len = htonl(sizeof(struct sdp_hh)); hh.max_adverts = 1; hh.majv_minv = SDP_MAJV_MINV; sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size); hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk))); atomic_set(&sdp_sk(sk)->remote_credits, rx_ring_posted(sdp_sk(sk))); hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_frags * PAGE_SIZE + sizeof(struct sdp_bsdh)); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (inet6_sk(sk)) { struct sockaddr *src_addr = (struct sockaddr *)&id->route.addr.src_addr; struct sockaddr_in *addr4 = (struct sockaddr_in *)src_addr; struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)src_addr; if (src_addr->sa_family == AF_INET) { /* IPv4 over IPv6 */ ipv6_addr_set(&sk->sk_v6_rcv_saddr, 0, 0, htonl(0xFFFF), addr4->sin_addr.s_addr); } else { sk->sk_v6_rcv_saddr = addr6->sin6_addr; } inet6_sk(sk)->saddr = sk->sk_v6_rcv_saddr; } else #endif { sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) = ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; } memset(&conn_param, 0, sizeof conn_param); conn_param.private_data_len = sizeof hh; conn_param.private_data = &hh; conn_param.responder_resources = 4 /* TODO */; conn_param.initiator_depth = 4 /* TODO */; conn_param.retry_count = sdp_retry_count; SDP_DUMP_PACKET(sk, "TX", NULL, &hh.bsdh); if (sdp_apm_enable) { rc = rdma_enable_apm(id, RDMA_ALT_PATH_BEST); if (rc) sdp_warn(sk, "APM couldn't be enabled for active side: %d\n", rc); } rc = rdma_connect(id, &conn_param); break; case RDMA_CM_EVENT_ALT_ROUTE_RESOLVED: sdp_dbg(sk, "alt route was resolved slid=%d, dlid=%d\n", id->route.path_rec[1].slid, id->route.path_rec[1].dlid); break; case RDMA_CM_EVENT_ALT_PATH_LOADED: sdp_dbg(sk, "alt route path loaded\n"); break; case RDMA_CM_EVENT_ALT_ROUTE_ERROR: sdp_warn(sk, "alt route resolve error\n"); break; case RDMA_CM_EVENT_ROUTE_ERROR: rc = -ETIMEDOUT; break; case RDMA_CM_EVENT_CONNECT_REQUEST: rc = sdp_connect_handler(sk, id, event); if (rc) { sdp_dbg(sk, "Destroying qp\n"); rdma_reject(id, NULL, 0); break; } child = id->context; atomic_set(&sdp_sk(child)->remote_credits, rx_ring_posted(sdp_sk(child))); memset(&hah, 0, sizeof hah); hah.bsdh.mid = SDP_MID_HELLO_ACK; hah.bsdh.bufs = htons(rx_ring_posted(sdp_sk(child))); hah.bsdh.len = htonl(sizeof(struct sdp_hah)); hah.majv_minv = SDP_MAJV_MINV; hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec, but just in case */ hah.actrcvsz = htonl(sdp_sk(child)->recv_frags * PAGE_SIZE + sizeof(struct sdp_bsdh)); memset(&conn_param, 0, sizeof conn_param); conn_param.private_data_len = sizeof hah; conn_param.private_data = &hah; conn_param.responder_resources = 4 /* TODO */; conn_param.initiator_depth = 4 /* TODO */; conn_param.retry_count = sdp_retry_count; SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh); rc = rdma_accept(id, &conn_param); if (rc) { sdp_sk(child)->id = NULL; id->qp = NULL; id->context = NULL; parent = sdp_sk(child)->parent; /* TODO: hold ? */ } else if (sdp_apm_enable) { rc2 = rdma_enable_apm(id, RDMA_ALT_PATH_BEST); if (rc2) sdp_warn(sk, "APM couldn't be enabled for passive side: %d\n", rc2); } break; case RDMA_CM_EVENT_CONNECT_RESPONSE: rc = sdp_response_handler(sk, id, event); if (rc) { sdp_dbg(sk, "Destroying qp\n"); rdma_reject(id, NULL, 0); } else { rc = rdma_accept(id, NULL); if (!rc && sdp_apm_enable) { rc2 = rdma_enable_apm(id, RDMA_ALT_PATH_BEST); if (rc2) sdp_warn(sk, "APM couldn't be enabled for passive side:%d \n", rc2); } } break; case RDMA_CM_EVENT_CONNECT_ERROR: rc = -ETIMEDOUT; break; case RDMA_CM_EVENT_UNREACHABLE: rc = -ENETUNREACH; break; case RDMA_CM_EVENT_REJECTED: rc = -ECONNREFUSED; break; case RDMA_CM_EVENT_ESTABLISHED: sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) = ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; rc = sdp_connected_handler(sk); break; case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */ if (sk->sk_state == TCP_LAST_ACK) { sdp_cancel_dreq_wait_timeout(sdp_sk(sk)); sdp_exch_state(sk, TCPF_LAST_ACK, TCP_TIME_WAIT); sdp_dbg(sk, "%s: waiting for Infiniband tear down\n", __func__); } sdp_sk(sk)->qp_active = 0; rdma_disconnect(id); if (sk->sk_state != TCP_TIME_WAIT) { if (sk->sk_state == TCP_CLOSE_WAIT) { sdp_dbg(sk, "IB teardown while in " "TCP_CLOSE_WAIT taking reference to " "let close() finish the work\n"); sock_hold(sk, SOCK_REF_CMA); sdp_start_cma_timewait_timeout(sdp_sk(sk), SDP_CMA_TIMEWAIT_TIMEOUT); } sdp_set_error(sk, -EPIPE); rc = sdp_disconnected_handler(sk); } break; case RDMA_CM_EVENT_TIMEWAIT_EXIT: rc = sdp_disconnected_handler(sk); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: rc = -ENETRESET; break; case RDMA_CM_EVENT_ADDR_CHANGE: sdp_dbg(sk, "Got Address change event\n"); rc = 0; break; default: printk(KERN_ERR "SDP: Unexpected CMA event: %d\n", event->event); rc = -ECONNABORTED; break; } sdp_dbg(sk, "event: %s handled\n", rdma_cm_event_str(event->event)); if (rc && sdp_sk(sk)->id == id) { child = sk; sdp_sk(sk)->id = NULL; id->qp = NULL; id->context = NULL; parent = sdp_sk(sk)->parent; sdp_reset_sk(sk, rc); } release_sock(sk); sdp_dbg(sk, "event: %s done. status %d\n", rdma_cm_event_str(event->event), rc); if (parent) { lock_sock(parent); if (!sdp_sk(parent)->id) { /* TODO: look at SOCK_DEAD? */ sdp_dbg(sk, "parent is going away.\n"); child = NULL; goto done; } if (!list_empty(&sdp_sk(child)->backlog_queue)) list_del_init(&sdp_sk(child)->backlog_queue); else child = NULL; done: release_sock(parent); if (child) sdp_common_release(child); } return rc; }
/** * trans_create_rdma - Transport method for creating atransport instance * @client: client instance * @addr: IP address string * @args: Mount options string */ static int rdma_create_trans(struct p9_client *client, const char *addr, char *args) { int err; struct p9_rdma_opts opts; struct p9_trans_rdma *rdma; struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct ib_device_attr devattr; /* Parse the transport specific mount options */ err = parse_opts(args, &opts); if (err < 0) return err; /* Create and initialize the RDMA transport structure */ rdma = alloc_rdma(&opts); if (!rdma) return -ENOMEM; /* Create the RDMA CM ID */ rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP); if (IS_ERR(rdma->cm_id)) goto error; /* Associate the client with the transport */ client->trans = rdma; /* Resolve the server's address */ rdma->addr.sin_family = AF_INET; rdma->addr.sin_addr.s_addr = in_aton(addr); rdma->addr.sin_port = htons(opts.port); err = rdma_resolve_addr(rdma->cm_id, NULL, (struct sockaddr *)&rdma->addr, rdma->timeout); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED)) goto error; /* Resolve the route to the server */ err = rdma_resolve_route(rdma->cm_id, rdma->timeout); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED)) goto error; /* Query the device attributes */ err = ib_query_device(rdma->cm_id->device, &devattr); if (err) goto error; /* Create the Completion Queue */ rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, cq_event_handler, client, opts.sq_depth + opts.rq_depth + 1, 0); if (IS_ERR(rdma->cq)) goto error; ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); /* Create the Protection Domain */ rdma->pd = ib_alloc_pd(rdma->cm_id->device); if (IS_ERR(rdma->pd)) goto error; /* Cache the DMA lkey in the transport */ rdma->dma_mr = NULL; if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) rdma->lkey = rdma->cm_id->device->local_dma_lkey; else { rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(rdma->dma_mr)) goto error; rdma->lkey = rdma->dma_mr->lkey; } /* Create the Queue Pair */ memset(&qp_attr, 0, sizeof qp_attr); qp_attr.event_handler = qp_event_handler; qp_attr.qp_context = client; qp_attr.cap.max_send_wr = opts.sq_depth; qp_attr.cap.max_recv_wr = opts.rq_depth; qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE; qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; qp_attr.send_cq = rdma->cq; qp_attr.recv_cq = rdma->cq; err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr); if (err) goto error; rdma->qp = rdma->cm_id->qp; /* Request a connection */ memset(&conn_param, 0, sizeof(conn_param)); conn_param.private_data = NULL; conn_param.private_data_len = 0; conn_param.responder_resources = P9_RDMA_IRD; conn_param.initiator_depth = P9_RDMA_ORD; err = rdma_connect(rdma->cm_id, &conn_param); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_CONNECTED)) goto error; client->status = Connected; return 0; error: rdma_destroy_trans(rdma); return -ENOTCONN; }
int rdma_client_connect(struct pingpong_context *ctx,struct perftest_parameters *user_param) { char *service; int temp,num_of_retry= NUM_OF_RETRIES; struct sockaddr_in sin; struct addrinfo *res; struct rdma_cm_event *event; struct rdma_conn_param conn_param; struct addrinfo hints; memset(&hints, 0, sizeof hints); hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; if (check_add_port(&service,user_param->port,user_param->servername,&hints,&res)) { fprintf(stderr, "Problem in resolving basic adress and port\n"); return FAILURE; } sin.sin_addr.s_addr = ((struct sockaddr_in*)res->ai_addr)->sin_addr.s_addr; sin.sin_family = PF_INET; sin.sin_port = htons((unsigned short)user_param->port); while (1) { if (num_of_retry == 0) { fprintf(stderr, "Received %d times ADDR_ERROR\n",NUM_OF_RETRIES); return FAILURE; } if (rdma_resolve_addr(ctx->cm_id, NULL,(struct sockaddr *)&sin,2000)) { fprintf(stderr, "rdma_resolve_addr failed\n"); return FAILURE; } if (rdma_get_cm_event(ctx->cm_channel,&event)) { fprintf(stderr, "rdma_get_cm_events failed\n"); return FAILURE; } if (event->event == RDMA_CM_EVENT_ADDR_ERROR) { num_of_retry--; rdma_ack_cm_event(event); continue; } if (event->event != RDMA_CM_EVENT_ADDR_RESOLVED) { fprintf(stderr, "unexpected CM event %d\n",event->event); rdma_ack_cm_event(event); return FAILURE; } rdma_ack_cm_event(event); break; } if (user_param->tos != DEF_TOS) { if (rdma_set_option(ctx->cm_id,RDMA_OPTION_ID,RDMA_OPTION_ID_TOS,&user_param->tos,sizeof(uint8_t))) { fprintf(stderr, " Set TOS option failed: %d\n",event->event); return FAILURE; } } while (1) { if (num_of_retry <= 0) { fprintf(stderr, "Received %d times ADDR_ERROR - aborting\n",NUM_OF_RETRIES); return FAILURE; } if (rdma_resolve_route(ctx->cm_id,2000)) { fprintf(stderr, "rdma_resolve_route failed\n"); return FAILURE; } if (rdma_get_cm_event(ctx->cm_channel,&event)) { fprintf(stderr, "rdma_get_cm_events failed\n"); return FAILURE; } if (event->event == RDMA_CM_EVENT_ROUTE_ERROR) { num_of_retry--; rdma_ack_cm_event(event); continue; } if (event->event != RDMA_CM_EVENT_ROUTE_RESOLVED) { fprintf(stderr, "unexpected CM event %d\n",event->event); rdma_ack_cm_event(event); return FAILURE; } rdma_ack_cm_event(event); break; } ctx->context = ctx->cm_id->verbs; temp = user_param->work_rdma_cm; user_param->work_rdma_cm = ON; if (ctx_init(ctx,user_param)) { fprintf(stderr," Unable to create the resources needed by comm struct\n"); return FAILURE; } memset(&conn_param, 0, sizeof conn_param); if (user_param->verb == READ || user_param->verb == ATOMIC) { conn_param.responder_resources = user_param->out_reads; conn_param.initiator_depth = user_param->out_reads; } user_param->work_rdma_cm = temp; conn_param.retry_count = user_param->retry_count; conn_param.rnr_retry_count = 7; if (user_param->work_rdma_cm == OFF) { if (post_one_recv_wqe(ctx)) { fprintf(stderr, "Couldn't post send \n"); return 1; } } if (rdma_connect(ctx->cm_id,&conn_param)) { fprintf(stderr, "Function rdma_connect failed\n"); return FAILURE; } if (rdma_get_cm_event(ctx->cm_channel,&event)) { fprintf(stderr, "rdma_get_cm_events failed\n"); return FAILURE; } if (event->event != RDMA_CM_EVENT_ESTABLISHED) { rdma_ack_cm_event(event); fprintf(stderr, "Unexpected CM event bl blka %d\n", event->event); return FAILURE; } if (user_param->connection_type == UD) { user_param->rem_ud_qpn = event->param.ud.qp_num; user_param->rem_ud_qkey = event->param.ud.qkey; ctx->ah[0] = ibv_create_ah(ctx->pd,&event->param.ud.ah_attr); if (!ctx->ah) { printf(" Unable to create address handler for UD QP\n"); return FAILURE; } if (user_param->tst == LAT || (user_param->tst == BW && user_param->duplex)) { if (send_qp_num_for_ah(ctx,user_param)) { printf(" Unable to send my QP number\n"); return FAILURE; } } } rdma_ack_cm_event(event); return SUCCESS; }
static int run(int argc, char **argv) { struct addrinfo *addr; //struct rdma_cm_event *event = NULL; struct rdma_cm_id *cmid= NULL; struct rdma_event_channel *ec = NULL; struct rdma_conn_param cm_params; if (argc != 4) usage(argv[0]); if (strcmp(argv[1], "write") == 0) set_mode(M_WRITE); else if (strcmp(argv[1], "read") == 0) set_mode(M_READ); else usage(argv[0]); TEST_NZ(getaddrinfo(argv[2], argv[3], NULL, &addr)); TEST_Z(ec = rdma_create_event_channel()); /*create rdma socket*/ TEST_NZ(rdma_create_id(ec, &cmid, NULL, RDMA_PS_TCP)); /* int rdma_resolve_addr (struct rdma_cm_id *id, struct sockaddr *src_addr, struct sockaddr dst_addr, int timeout_ms) id RDMA identifier src_addr Source address information. This parameter may be NULL. dst_addr Destination address information timeout_ms Time to wait for resolution to complete Description: Resolve destination and optional source addresses from IP addresses to an RDMA address. If suc- cessful, the specified rdma_cm_id will be bound to a local device. */ TEST_NZ(rdma_resolve_addr(cmid, NULL, addr->ai_addr, TIMEOUT_IN_MS)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ADDR_RESOLVED)); freeaddrinfo(addr); build_connection(cmid); sprintf(get_local_message_region(cmid->context), "message from active/client side with pid %d", getpid()); /*--------------------*/ /* int rdma_resolve_route (struct rdma_cm_id *id, int timeout_ms); id RDMA identifier timeout_ms Time to wait for resolution to complete Description: Resolves an RDMA route to the destination address in order to establish a connection. The destination address must have already been resolved by calling rdma_resolve_addr. */ TEST_NZ(rdma_resolve_route(cmid, TIMEOUT_IN_MS)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ROUTE_RESOLVED)); /* -------------------- */ print_path_rec(cmid); /* int rdma_connect (struct rdma_cm_id *id, struct rdma_conn_param *conn_param); id RDMA identifier conn_param connection parameters Description: For an rdma_cm_id of type RDMA_PS_TCP, this call initiates a connection request to a remote destination. For an rdma_cm_id of type RDMA_PS_UDP, it initiates a lookup of the remote QP providing the datagram service */ build_params(&cm_params); printf("Connecting ...\n"); TEST_NZ(rdma_connect(cmid, &cm_params)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ESTABLISHED)); printf("Connected !\n"); /* --------------------- */ /*TODO: do something */ on_connect(cmid->context); send_mr(cmid->context); /*--------------------*/ rdma_disconnect(cmid); rdma_destroy_id(cmid); rdma_destroy_event_channel(ec); return 0; /*=================*/ /*=================*/ /* while (rdma_get_cm_event(ec, &event) == 0) { memcpy(&event_copy, event, sizeof(*event)); rdma_ack_cm_event(event); if (on_event(&event_copy)) break; } */ }
//static int run(int argc, char **argv) //int RDMA_Connect(struct RDMA_communicator *comm, struct RDMA_param *param) int RDMA_Active_Init(struct RDMA_communicator *comm, struct RDMA_param *param) { struct addrinfo *addr; // struct rdma_cm_id *cm_id= NULL; // struct rdma_event_channel *ec = NULL; struct rdma_conn_param cm_params; char port[8]; // int i,j; sprintf(port, "%d", RDMA_PORT); if(getaddrinfo(param->host, port, NULL, &addr)){ fprintf(stderr, "RDMA lib: SEND: ERROR: getaddrinfo failed @ %s:%d", __FILE__, __LINE__); exit(1); } if(!(comm->ec = rdma_create_event_channel())){ fprintf(stderr, "RDMA lib: SEND: ERROR: rdma event channel create failed @ %s:%d", __FILE__, __LINE__); exit(1); } if (rdma_create_id(comm->ec, &(comm->cm_id), NULL, RDMA_PS_TCP)){ fprintf(stderr, "RDMA lib: SEND: ERROR: rdma id create failed @ %s:%d", __FILE__, __LINE__); exit(1); } if (rdma_resolve_addr(comm->cm_id, NULL, addr->ai_addr, TIMEOUT_IN_MS)) { fprintf(stderr, "RDMA lib: SEND: ERROR: rdma address resolve failed @ %s:%d", __FILE__, __LINE__); exit(1); } if (wait_for_event(comm->ec, RDMA_CM_EVENT_ADDR_RESOLVED)) { fprintf(stderr, "RDMA lib: SEND: ERROR: event wait failed @ %s:%d", __FILE__, __LINE__); exit(1); } freeaddrinfo(addr); build_connection(comm->cm_id); if (rdma_resolve_route(comm->cm_id, TIMEOUT_IN_MS)) { fprintf(stderr, "RDMA lib: SEND: ERROR: rdma route resolve failed @ %s:%d", __FILE__, __LINE__); exit(1); } if (wait_for_event(comm->ec, RDMA_CM_EVENT_ROUTE_RESOLVED)) { fprintf(stderr, "RDMA lib: SEND: ERROR: event wait failed @ %s:%d", __FILE__, __LINE__); exit(1); } build_params(&cm_params); if (rdma_connect(comm->cm_id, &cm_params)) { fprintf(stderr, "RDMA lib: SEND: ERROR: rdma connection failed @ %s:%d", __FILE__, __LINE__); exit(1); } if (wait_for_event(comm->ec, RDMA_CM_EVENT_ESTABLISHED)) { fprintf(stderr, "RDMA lib: SEND: ERROR: event wait failed @ %s:%d\n", __FILE__, __LINE__); exit(1); } // on_connect(cm_id->context); int i ; for (i = 0; i < RDMA_BUF_NUM_C; i++){ rdma_msg_mr[i] = NULL;} char *value; value = getenv("RDMA_CLIENT_NUM_S"); if (value == NULL) { rdma_buf_size = RDMA_BUF_SIZE_C; } else { rdma_buf_size = MAX_RDMA_BUF_SIZE_C / atoi(value); } fprintf(stderr, "rdma_buf_size: %d\n", rdma_buf_size); return 0; }
/** * Process CM event. * * there is a listening rdmacm id per iface * this is called as a handler from libev * * @param[in] w * @param[in] revents */ void process_cm_event(EV_P_ ev_io *w, int revents) { struct iface *iface = w->data; ni_t *ni; struct rdma_cm_event *event; conn_t *conn; struct rdma_conn_param conn_param; struct cm_priv_request priv; struct ibv_qp_init_attr init; uintptr_t ctx; if (rdma_get_cm_event(iface->cm_channel, &event)) { WARN(); return; } /* In case of connection requests conn will be NULL. */ ctx = (uintptr_t) event->id->context; if (ctx & 1) { /* Loopback. The context is not a conn but the NI. */ ctx &= ~1; conn = NULL; ni = (void *)ctx; } else { conn = (void *)ctx; ni = conn ? conn->obj.obj_ni : NULL; } ptl_info("Rank got CM event %d for id %p\n", event->event, event->id); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: if (!conn) break; pthread_mutex_lock(&conn->mutex); if (conn->state != CONN_STATE_RESOLVING_ADDR) { /* Our connect attempt got overriden by the remote * side. */ conn_put(conn); pthread_mutex_unlock(&conn->mutex); break; } assert(conn->rdma.cm_id == event->id); conn->state = CONN_STATE_RESOLVING_ROUTE; if (rdma_resolve_route(event->id, get_param(PTL_RDMA_TIMEOUT))) { conn->state = CONN_STATE_DISCONNECTED; pthread_cond_broadcast(&conn->move_wait); conn->rdma.cm_id = NULL; conn_put(conn); } pthread_mutex_unlock(&conn->mutex); break; case RDMA_CM_EVENT_ROUTE_RESOLVED: if (!conn) break; memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 1; conn_param.initiator_depth = 1; conn_param.retry_count = 7; conn_param.rnr_retry_count = 7; conn_param.private_data = &priv; conn_param.private_data_len = sizeof(priv); pthread_mutex_lock(&conn->mutex); if (conn->state != CONN_STATE_RESOLVING_ROUTE) { /* Our connect attempt got overriden by the remote * side. */ conn_put(conn); pthread_mutex_unlock(&conn->mutex); break; } assert(conn->rdma.cm_id == event->id); /* Create the QP. */ memset(&init, 0, sizeof(init)); init.qp_context = ni; init.send_cq = ni->rdma.cq; init.recv_cq = ni->rdma.cq; init.cap.max_send_wr = ni->iface->cap.max_send_wr; init.cap.max_send_sge = ni->iface->cap.max_send_sge; init.qp_type = IBV_QPT_RC; init.srq = ni->rdma.srq; priv.src_id = ni->id; priv.options = ni->options; assert(conn->rdma.cm_id == event->id); if (rdma_create_qp(event->id, ni->iface->pd, &init)) { WARN(); conn->state = CONN_STATE_DISCONNECTED; pthread_cond_broadcast(&conn->move_wait); conn->rdma.cm_id = NULL; conn_put(conn); } else if (rdma_connect(event->id, &conn_param)) { WARN(); conn->state = CONN_STATE_DISCONNECTED; pthread_cond_broadcast(&conn->move_wait); rdma_destroy_qp(conn->rdma.cm_id); conn->rdma.cm_id = NULL; conn_put(conn); } else { conn->state = CONN_STATE_CONNECTING; } pthread_mutex_unlock(&conn->mutex); break; case RDMA_CM_EVENT_ESTABLISHED: if (!conn) { /* Self connection. Let the initiator side finish the * connection. */ break; } pthread_mutex_lock(&conn->mutex); atomic_inc(&ni->rdma.num_conn); if (conn->state != CONN_STATE_CONNECTING) { pthread_mutex_unlock(&conn->mutex); break; } assert(conn->rdma.cm_id == event->id); get_qp_param(conn); conn->state = CONN_STATE_CONNECTED; pthread_cond_broadcast(&conn->move_wait); pthread_mutex_unlock(&conn->mutex); break; case RDMA_CM_EVENT_CONNECT_REQUEST: process_connect_request(iface, event); break; case RDMA_CM_EVENT_REJECTED: if (!conn) break; process_connect_reject(event, conn); break; case RDMA_CM_EVENT_DISCONNECTED: if (!conn) { /* That should be the loopback connection only. */ assert(ni->rdma.self_cm_id == event->id); rdma_disconnect(ni->rdma.self_cm_id); rdma_destroy_qp(ni->rdma.self_cm_id); break; } pthread_mutex_lock(&conn->mutex); assert(conn->state != CONN_STATE_DISCONNECTED); if (conn->state != CONN_STATE_DISCONNECTING) { /* Not disconnecting yet, so we have to disconnect too. */ rdma_disconnect(conn->rdma.cm_id); rdma_destroy_qp(conn->rdma.cm_id); } conn->state = CONN_STATE_DISCONNECTED; pthread_cond_broadcast(&conn->move_wait); atomic_dec(&ni->rdma.num_conn); pthread_mutex_unlock(&conn->mutex); break; case RDMA_CM_EVENT_CONNECT_ERROR: if (!conn) break; pthread_mutex_lock(&conn->mutex); if (conn->state != CONN_STATE_DISCONNECTED) { conn->state = CONN_STATE_DISCONNECTED; pthread_cond_broadcast(&conn->move_wait); conn->rdma.cm_id->context = NULL; rdma_destroy_qp(conn->rdma.cm_id); pthread_mutex_unlock(&conn->mutex); conn_put(conn); } else { pthread_mutex_unlock(&conn->mutex); } break; case RDMA_CM_EVENT_TIMEWAIT_EXIT: break; default: ptl_warn("Got unexpected CM event: %d\n", event->event); break; } rdma_ack_cm_event(event); }