void Acceptor::listen(std::string const& hostname, std::string const& port) { auto res = create_addr_info(hostname, port); ibv_qp_init_attr init_attr; memset(&init_attr, 0, sizeof(init_attr)); init_attr.cap.max_send_wr = m_credits; init_attr.cap.max_recv_wr = m_credits; init_attr.cap.max_send_sge = 1; init_attr.cap.max_recv_sge = 1; init_attr.cap.max_inline_data = 0; init_attr.sq_sig_all = 1; init_attr.qp_type = IBV_QPT_RC; int ret = rdma_create_ep(&m_cm_id, res, NULL, &init_attr); destroy_addr_info(res); if (ret) { rdma_destroy_ep(m_cm_id); throw exception::acceptor::generic_error( "Error on rdma_create_ep: " + std::string(strerror(errno))); } if (rdma_listen(m_cm_id, 128)) { rdma_destroy_ep(m_cm_id); throw exception::acceptor::generic_error( "Error on rdma_listen: " + std::string(strerror(errno))); } }
std::unique_ptr<Socket> Acceptor::accept() { rdma_cm_id* new_cm_id; if (rdma_get_request(m_cm_id, &new_cm_id)) { throw exception::acceptor::generic_error( "Error on rdma_get_request: " + std::string(strerror(errno))); } rdma_conn_param conn_param; memset(&conn_param, 0, sizeof(rdma_conn_param)); conn_param.rnr_retry_count = m_rnr_retry_count; if (rdma_accept(new_cm_id, &conn_param)) { rdma_destroy_ep(new_cm_id); throw exception::acceptor::generic_error( "Error on rdma_accept: " + std::string(strerror(errno))); } ibv_qp_attr attr; memset(&attr, 0, sizeof(ibv_qp_attr)); attr.min_rnr_timer = m_min_rtr_timer; int flags = IBV_QP_MIN_RNR_TIMER; if (ibv_modify_qp(new_cm_id->qp, &attr, flags)) { rdma_destroy_ep(new_cm_id); throw exception::acceptor::generic_error( "Error on ibv_modify_qp: " + std::string(strerror(errno))); } std::unique_ptr<Socket> socket_ptr(new Socket(new_cm_id, m_credits)); return socket_ptr; }
int fi_ibv_getinfo(uint32_t version, const char *node, const char *service, uint64_t flags, struct fi_info *hints, struct fi_info **info) { struct rdma_cm_id *id; struct rdma_addrinfo *rai; int ret; ret = fi_ibv_init_info(); if (ret) goto out; ret = fi_ibv_create_ep(node, service, flags, hints, &rai, &id); if (ret) goto out; if (id->verbs) { ret = fi_ibv_get_matching_info(ibv_get_device_name(id->verbs->device), hints, rai, info); } else { ret = fi_ibv_get_matching_info(NULL, hints, rai, info); } rdma_destroy_ep(id); rdma_freeaddrinfo(rai); out: if (!ret || ret == -FI_ENOMEM) return ret; else return -FI_ENODATA; }
static void fi_ibv_free_msg_ep(struct fi_ibv_msg_ep *ep) { if (ep->id) rdma_destroy_ep(ep->id); fi_freeinfo(ep->info); free(ep); }
RDMACMSocket::~RDMACMSocket() { rdma_disconnect(this->client_id); rdma_dereg_mr(this->verbs_mr); rdma_destroy_ep(this->client_id); this->verbs_buf.free(); }
void RDMACMSocket::post_recv(const Buffer& buf) { if (rdma_post_recv(this->client_id, buf.addr, buf.addr, buf.size, this->verbs_mr) < 0) { rdma_dereg_mr(this->verbs_mr); this->verbs_buf.free(); rdma_destroy_ep(this->client_id); perror("rdma_post_recv"); exit(1); } }
static int __fi_pep_close(fid_t fid) { struct __fid_pep *pep; pep = container_of(fid, struct __fid_pep, pep_fid.fid); if (pep->id) rdma_destroy_ep(pep->id); free(pep); return 0; }
void kiro_server_stop (KiroServer *self) { g_return_if_fail (self != NULL); KiroServerPrivate *priv = KIRO_SERVER_GET_PRIVATE (self); if (!priv->base) return; //Shut down event listening priv->close_signal = TRUE; g_debug ("Event handling stopped"); g_list_foreach (priv->clients, disconnect_client, NULL); g_list_free (priv->clients); // Stop the main loop and clear its memory g_main_loop_quit (priv->main_loop); g_main_loop_unref (priv->main_loop); priv->main_loop = NULL; // Ask the main thread to join (It probably already has, but we do it // anyways. Just in case!) g_thread_join (priv->main_thread); priv->main_thread = NULL; // We don't need the connection management IO channel container any more. // Unref and thus free it. g_io_channel_unref (priv->conn_ec); priv->conn_ec = NULL; priv->close_signal = FALSE; // kiro_destroy_connection would try to call rdma_disconnect on the given // connection. But the server never 'connects' to anywhere, so this would // cause a crash. We need to destroy the enpoint manually without disconnect struct kiro_connection_context *ctx = (struct kiro_connection_context *) (priv->base->context); kiro_destroy_connection_context (&ctx); rdma_destroy_ep (priv->base); priv->base = NULL; rdma_destroy_event_channel (priv->ec); priv->ec = NULL; g_message ("Server stopped successfully"); }
int fi_ibv_create_ep(const char *node, const char *service, uint64_t flags, const struct fi_info *hints, struct rdma_addrinfo **rai, struct rdma_cm_id **id) { struct rdma_addrinfo *_rai; struct sockaddr *local_addr; int ret; ret = fi_ibv_get_rdma_rai(node, service, flags, hints, &_rai); if (ret) { return ret; } ret = rdma_create_ep(id, _rai, NULL, NULL); if (ret) { VERBS_INFO_ERRNO(FI_LOG_FABRIC, "rdma_create_ep", errno); ret = -errno; goto err1; } if (rai && !_rai->ai_src_addr) { local_addr = rdma_get_local_addr(*id); _rai->ai_src_len = fi_ibv_sockaddr_len(local_addr); if (!(_rai->ai_src_addr = malloc(_rai->ai_src_len))) { ret = -FI_ENOMEM; goto err2; } memcpy(_rai->ai_src_addr, local_addr, _rai->ai_src_len); } if (rai) { *rai = _rai; } else { rdma_freeaddrinfo(_rai); } return ret; err2: rdma_destroy_ep(*id); err1: rdma_freeaddrinfo(_rai); return ret; }
RDMACMSocket* RDMACMSocket::connect(const HostAndPort& hp) { struct rdma_cm_id* client_id = NULL; struct rdma_addrinfo hints, *res; memset(&hints, 0, sizeof(hints)); hints.ai_port_space = RDMA_PS_TCP; res = NULL; char* hostname = const_cast<char*>(hp.hostname); char* port_str = const_cast<char*>(hp.port_str); if (rdma_getaddrinfo(hostname, port_str, &hints, &res) < 0) { perror("rdma_getaddrinfo"); exit(1); } struct ibv_qp_init_attr attr; memset(&attr, 0, sizeof(attr)); attr.cap.max_send_wr = PACKET_WINDOW_SIZE; attr.cap.max_recv_wr = PACKET_WINDOW_SIZE; attr.cap.max_send_sge = 1; attr.cap.max_recv_sge = 1; attr.cap.max_inline_data = 0; attr.sq_sig_all = 1; if (rdma_create_ep(&client_id, res, NULL, &attr) < 0) { rdma_freeaddrinfo(res); perror("rdma_create_ep"); exit(1); } rdma_freeaddrinfo(res); if (rdma_connect(client_id, NULL) < 0) { rdma_destroy_ep(client_id); perror("rdma_connect"); exit(1); } return new RDMACMSocket(client_id); }
static int fi_ibv_msg_ep_setname(fid_t ep_fid, void *addr, size_t addrlen) { struct fi_ibv_msg_ep *ep; void *save_addr; struct rdma_cm_id *id; int ret; ep = container_of(ep_fid, struct fi_ibv_msg_ep, ep_fid); if (addrlen != ep->info->src_addrlen) { FI_INFO(&fi_ibv_prov, FI_LOG_EP_CTRL,"addrlen expected: %d, got: %d.\n", ep->info->src_addrlen, addrlen); return -FI_EINVAL; } save_addr = ep->info->src_addr; ep->info->src_addr = malloc(ep->info->src_addrlen); if (!ep->info->src_addr) { ret = -FI_ENOMEM; goto err1; } memcpy(ep->info->src_addr, addr, ep->info->src_addrlen); ret = fi_ibv_create_ep(NULL, NULL, 0, ep->info, NULL, &id); if (ret) goto err2; if (ep->id) rdma_destroy_ep(ep->id); ep->id = id; free(save_addr); return 0; err2: free(ep->info->src_addr); err1: ep->info->src_addr = save_addr; return ret; }
void RDMACMSocket::setup_verbs_buf() { this->verbs_buf = Buffer::allocate(PACKET_SIZE * PACKET_WINDOW_SIZE * 2); this->verbs_mr = rdma_reg_msgs(this->client_id, this->verbs_buf.addr, this->verbs_buf.size); if (this->verbs_mr == NULL) { this->verbs_buf.free(); rdma_destroy_ep(this->client_id); perror("rdma_reg_msgs"); exit(1); } char* send_buf_begin = this->verbs_buf.addr + PACKET_SIZE * PACKET_WINDOW_SIZE; for (int i = 0; i < PACKET_WINDOW_SIZE; ++i) { Buffer recv_buf(this->verbs_buf.addr + i * PACKET_SIZE, PACKET_SIZE); post_recv(recv_buf); Buffer send_buf(send_buf_begin + i * PACKET_SIZE, PACKET_SIZE); send_bufs.push_back(send_buf); } }
static int fi_ibv_copy_ifaddr(const char *name, const char *service, uint64_t flags, struct fi_info *info) { struct rdma_addrinfo *rai; struct fi_info *fi; struct rdma_cm_id *id; int ret; ret = fi_ibv_get_rdma_rai(name, service, flags, NULL, &rai); if (ret) { FI_WARN(&fi_ibv_prov, FI_LOG_FABRIC, "rdma_getaddrinfo failed for name:%s\n", name); return ret; } ret = rdma_create_ep(&id, rai, NULL, NULL); if (!ret) { for (fi = info; fi; fi = fi->next) if (!strncmp(id->verbs->device->name, fi->domain_attr->name, strlen(id->verbs->device->name))) break; if (!fi) { FI_WARN(&fi_ibv_prov, FI_LOG_FABRIC, "No matching fi_info for device: " "%s with address: %s\n", id->verbs->device->name, name); } else { if (fi->src_addr) { free(fi->src_addr); fi->src_addr = NULL; } fi_ibv_rai_to_fi(rai, fi); } rdma_destroy_ep(id); } rdma_freeaddrinfo(rai); return 0; }
void fi_ibv_destroy_ep(struct rdma_addrinfo *rai, struct rdma_cm_id **id) { rdma_freeaddrinfo(rai); rdma_destroy_ep(*id); }
/* Builds a list of interfaces that correspond to active verbs devices */ static int fi_ibv_getifaddrs(struct dlist_entry *verbs_devs) { struct ifaddrs *ifaddr, *ifa; char name[INET6_ADDRSTRLEN]; struct rdma_addrinfo *rai; struct rdma_cm_id *id; const char *ret_ptr; int ret, num_verbs_ifs = 0; char *iface = NULL; size_t iface_len = 0; int exact_match = 0; ret = getifaddrs(&ifaddr); if (ret) { VERBS_WARN(FI_LOG_FABRIC, "Unable to get interface addresses\n"); return ret; } /* select best iface name based on user's input */ if (fi_param_get_str(&fi_ibv_prov, "iface", &iface) == FI_SUCCESS) { iface_len = strlen(iface); if (iface_len > IFNAMSIZ) { VERBS_INFO(FI_LOG_EP_CTRL, "Too long iface name: %s, max: %d\n", iface, IFNAMSIZ); return -FI_EINVAL; } for (ifa = ifaddr; ifa && !exact_match; ifa = ifa->ifa_next) exact_match = !strcmp(ifa->ifa_name, iface); } for (ifa = ifaddr; ifa; ifa = ifa->ifa_next) { if (!ifa->ifa_addr || !(ifa->ifa_flags & IFF_UP) || !strcmp(ifa->ifa_name, "lo")) continue; if(iface) { if(exact_match) { if(strcmp(ifa->ifa_name, iface)) continue; } else { if(strncmp(ifa->ifa_name, iface, iface_len)) continue; } } switch (ifa->ifa_addr->sa_family) { case AF_INET: ret_ptr = inet_ntop(AF_INET, &ofi_sin_addr(ifa->ifa_addr), name, INET6_ADDRSTRLEN); break; case AF_INET6: ret_ptr = inet_ntop(AF_INET6, &ofi_sin6_addr(ifa->ifa_addr), name, INET6_ADDRSTRLEN); break; default: continue; } if (!ret_ptr) { VERBS_WARN(FI_LOG_FABRIC, "inet_ntop failed: %s(%d)\n", strerror(errno), errno); goto err1; } ret = fi_ibv_create_ep(name, NULL, FI_NUMERICHOST | FI_SOURCE, NULL, &rai, &id); if (ret) continue; ret = fi_ibv_add_rai(verbs_devs, id, rai); if (ret) goto err2; VERBS_DBG(FI_LOG_FABRIC, "Found active interface for verbs device: " "%s with address: %s\n", ibv_get_device_name(id->verbs->device), name); rdma_destroy_ep(id); num_verbs_ifs++; } freeifaddrs(ifaddr); return num_verbs_ifs ? 0 : -FI_ENODATA; err2: rdma_destroy_ep(id); err1: fi_ibv_verbs_devs_free(verbs_devs); freeifaddrs(ifaddr); return ret; }
static int run(void) { struct rdma_addrinfo hints, *res; struct ibv_qp_init_attr attr; struct ibv_wc wc; int ret; memset(&hints, 0, sizeof hints); hints.ai_port_space = RDMA_PS_TCP; ret = rdma_getaddrinfo(server, port, &hints, &res); if (ret) { printf("rdma_getaddrinfo %d\n", errno); return ret; } memset(&attr, 0, sizeof attr); attr.cap.max_send_wr = attr.cap.max_recv_wr = 1; attr.cap.max_send_sge = attr.cap.max_recv_sge = 1; attr.cap.max_inline_data = 16; attr.qp_context = id; attr.sq_sig_all = 1; ret = rdma_create_ep(&id, res, NULL, &attr); rdma_freeaddrinfo(res); if (ret) { printf("rdma_create_ep %d\n", errno); return ret; } mr = rdma_reg_msgs(id, recv_msg, 16); if (!mr) { printf("rdma_reg_msgs %d\n", errno); return ret; } ret = rdma_post_recv(id, NULL, recv_msg, 16, mr); if (ret) { printf("rdma_post_recv %d\n", errno); return ret; } ret = rdma_connect(id, NULL); if (ret) { printf("rdma_connect %d\n", errno); return ret; } s = get_dtime(); ret = rdma_post_send(id, NULL, send_msg, 16, NULL, IBV_SEND_INLINE); if (ret) { printf("rdma_post_send %d\n", errno); return ret; } e = get_dtime(); ret = rdma_get_recv_comp(id, &wc); if (ret <= 0) { printf("rdma_get_recv_comp %d\n", ret); return ret; } printf("time %f\n", e - s); rdma_disconnect(id); rdma_dereg_mr(mr); rdma_destroy_ep(id); return 0; }
static int xrc_test(void) { struct rdma_cm_id *conn_id, *lookup_id; struct ibv_qp_init_attr attr; struct rdma_conn_param param; struct rdma_cm_event *event; struct ibv_wc wc; int ret; conn_id = xrc_listen_recv(); if (!conn_id) return -1; ret = xrc_create_srq_listen(rdma_get_local_addr(conn_id), sizeof(struct sockaddr_storage)); if (ret) return -1; memset(&attr, 0, sizeof attr); attr.qp_type = IBV_QPT_XRC_RECV; attr.ext.xrc_recv.xrcd = srq_id->srq->ext.xrc.xrcd; ret = rdma_create_qp(conn_id, NULL, &attr); if (ret) { printf("Unable to create xrc recv qp %d\n", errno); return ret; } ret = rdma_accept(conn_id, NULL); if (ret) { printf("rdma_accept failed for xrc recv qp %d\n", errno); return ret; } ret = rdma_get_request(srq_id, &lookup_id); if (ret) { printf("rdma_get_request %d\n", errno); return ret; } mr = rdma_reg_msgs(srq_id, recv_msg, sizeof recv_msg); if (!mr) { printf("ibv_reg_msgs %d\n", errno); return ret; } ret = rdma_post_recv(srq_id, NULL, recv_msg, sizeof recv_msg, mr); if (ret) { printf("rdma_post_recv %d\n", errno); return ret; } memset(¶m, 0, sizeof param); param.qp_num = srq_id->srq->ext.xrc.srq_num; ret = rdma_accept(lookup_id, ¶m); if (ret) { printf("rdma_accept failed for srqn lookup %d\n", errno); return ret; } rdma_destroy_id(lookup_id); ret = rdma_get_recv_comp(srq_id, &wc); if (ret <= 0) { printf("rdma_get_recv_comp %d\n", ret); return ret; } ret = rdma_get_cm_event(conn_id->channel, &event); if (ret || event->event != RDMA_CM_EVENT_DISCONNECTED) { printf("Failed to get disconnect event\n"); return -1; } rdma_ack_cm_event(event); rdma_disconnect(conn_id); rdma_destroy_ep(conn_id); rdma_dereg_mr(mr); rdma_destroy_ep(srq_id); rdma_destroy_ep(listen_id); return 0; }
static int rc_test(void) { struct rdma_addrinfo hints, *res; struct ibv_qp_init_attr attr; struct ibv_wc wc; int ret; memset(&hints, 0, sizeof hints); hints.ai_flags = RAI_PASSIVE; hints.ai_port_space = RDMA_PS_TCP; ret = rdma_getaddrinfo(NULL, port, &hints, &res); if (ret) { printf("rdma_getaddrinfo %d\n", errno); return ret; } memset(&attr, 0, sizeof attr); attr.cap.max_send_wr = attr.cap.max_recv_wr = 1; attr.cap.max_send_sge = attr.cap.max_recv_sge = 1; attr.cap.max_inline_data = sizeof send_msg; attr.sq_sig_all = 1; ret = rdma_create_ep(&listen_id, res, NULL, &attr); rdma_freeaddrinfo(res); if (ret) { printf("rdma_create_ep %d\n", errno); return ret; } ret = rdma_listen(listen_id, 0); if (ret) { printf("rdma_listen %d\n", errno); return ret; } ret = rdma_get_request(listen_id, &id); if (ret) { printf("rdma_get_request %d\n", errno); return ret; } mr = rdma_reg_msgs(id, recv_msg, sizeof recv_msg); if (!mr) { printf("rdma_reg_msgs %d\n", errno); return ret; } ret = rdma_post_recv(id, NULL, recv_msg, sizeof recv_msg, mr); if (ret) { printf("rdma_post_recv %d\n", errno); return ret; } ret = rdma_accept(id, NULL); if (ret) { printf("rdma_accept %d\n", errno); return ret; } ret = rdma_get_recv_comp(id, &wc); if (ret <= 0) { printf("rdma_get_recv_comp %d\n", ret); return ret; } ret = rdma_post_send(id, NULL, send_msg, sizeof send_msg, NULL, IBV_SEND_INLINE); if (ret) { printf("rdma_post_send %d\n", errno); return ret; } ret = rdma_get_send_comp(id, &wc); if (ret <= 0) { printf("rdma_get_send_comp %d\n", ret); return ret; } rdma_disconnect(id); rdma_dereg_mr(mr); rdma_destroy_ep(id); rdma_destroy_ep(listen_id); return 0; }
static gboolean process_rdma_event (GIOChannel *source, GIOCondition condition, gpointer data) { // Right now, we don't need 'source' // Tell the compiler to ignore it by (void)-ing it (void) source; if (!G_TRYLOCK (rdma_handling)) { g_debug ("RDMA handling will wait for the next dispatch."); return TRUE; } g_debug ("Got message on condition: %i", condition); void *payload = ((GList *)data)->data; struct kiro_client_connection *cc = (struct kiro_client_connection *)payload; struct ibv_wc wc; gint num_comp = ibv_poll_cq (cc->conn->recv_cq, 1, &wc); if (!num_comp) { g_critical ("RDMA event handling was triggered, but there is no completion on the queue"); goto end_rmda_eh; } if (num_comp < 0) { g_critical ("Failure getting receive completion event from the queue: %s", strerror (errno)); goto end_rmda_eh; } g_debug ("Got %i receive events from the queue", num_comp); void *cq_ctx; struct ibv_cq *cq; int err = ibv_get_cq_event (cc->conn->recv_cq_channel, &cq, &cq_ctx); if (!err) ibv_ack_cq_events (cq, 1); struct kiro_connection_context *ctx = (struct kiro_connection_context *)cc->conn->context; guint type = ((struct kiro_ctrl_msg *)ctx->cf_mr_recv->mem)->msg_type; g_debug ("Received a message from Client %u of type %u", cc->id, type); switch (type) { case KIRO_PING: { struct kiro_ctrl_msg *msg = (struct kiro_ctrl_msg *) (ctx->cf_mr_send->mem); msg->msg_type = KIRO_PONG; if (!send_msg (cc->conn, ctx->cf_mr_send)) { g_warning ("Failure while trying to post PONG send: %s", strerror (errno)); goto done; } break; } case KIRO_ACK_RDMA: { g_debug ("ACK received"); if (G_TRYLOCK (realloc_timeout)) { g_debug ("Client %i has ACKed the reallocation request", cc->id); GList *client = g_list_find (realloc_list, (gpointer)cc); if (client) { realloc_list = g_list_remove_link (realloc_list, client); if (cc->backup_mri->mr) ibv_dereg_mr (cc->backup_mri->mr); g_free (cc->backup_mri); cc->backup_mri = NULL; g_debug ("Client %i removed from realloc_list", cc->id); } G_UNLOCK (realloc_timeout); } break; } default: g_debug ("Message Type is unknow. Ignoring..."); } done: //Post a generic receive in order to stay responsive to any messages from //the client if (rdma_post_recv (cc->conn, cc->conn, ctx->cf_mr_recv->mem, ctx->cf_mr_recv->size, ctx->cf_mr_recv->mr)) { //TODO: Connection teardown in an event handler routine? Not a good //idea... g_critical ("Posting generic receive for event handling failed: %s", strerror (errno)); kiro_destroy_connection_context (&ctx); rdma_destroy_ep (cc->conn); goto end_rmda_eh; } ibv_req_notify_cq (cc->conn->recv_cq, 0); // Make the respective Queue push events onto the channel g_debug ("Finished RDMA event handling"); end_rmda_eh: G_UNLOCK (rdma_handling); return TRUE; }
int kiro_server_start (KiroServer *self, const char *address, const char *port, void *mem, size_t mem_size) { g_return_val_if_fail (self != NULL, -1); KiroServerPrivate *priv = KIRO_SERVER_GET_PRIVATE (self); if (priv->base) { g_debug ("Server already started."); return -1; } if (!mem || mem_size == 0) { g_warning ("Invalid memory given to provide."); return -1; } struct rdma_addrinfo hints, *res_addrinfo; memset (&hints, 0, sizeof (hints)); hints.ai_port_space = RDMA_PS_IB; hints.ai_flags = RAI_PASSIVE; char *addr_c = g_strdup (address); char *port_c = g_strdup (port); int rtn = rdma_getaddrinfo (addr_c, port_c, &hints, &res_addrinfo); g_free (addr_c); g_free (port_c); if (rtn) { g_critical ("Failed to create address information: %s", strerror (errno)); return -1; } struct ibv_qp_init_attr qp_attr; memset (&qp_attr, 0, sizeof (qp_attr)); qp_attr.cap.max_send_wr = 10; qp_attr.cap.max_recv_wr = 10; qp_attr.cap.max_send_sge = 1; qp_attr.cap.max_recv_sge = 1; qp_attr.qp_context = priv->base; qp_attr.sq_sig_all = 1; if (rdma_create_ep (& (priv->base), res_addrinfo, NULL, &qp_attr)) { g_critical ("Endpoint creation failed: %s", strerror (errno)); g_free (res_addrinfo); return -1; } g_free (res_addrinfo); // No longer needed g_debug ("Endpoint created"); char *addr_local = NULL; struct sockaddr *src_addr = rdma_get_local_addr (priv->base); if (!src_addr) { addr_local = "NONE"; } else { addr_local = inet_ntoa (((struct sockaddr_in *)src_addr)->sin_addr); /* if(src_addr->sa_family == AF_INET) addr_local = &(((struct sockaddr_in*)src_addr)->sin_addr); else addr_local = &(((struct sockaddr_in6*)src_addr)->sin6_addr); */ } g_message ("Server bound to address %s:%s", addr_local, port); if (rdma_listen (priv->base, 0)) { g_critical ("Failed to put server into listening state: %s", strerror (errno)); rdma_destroy_ep (priv->base); return -1; } priv->mem = mem; priv->mem_size = mem_size; priv->ec = rdma_create_event_channel(); if (rdma_migrate_id (priv->base, priv->ec)) { g_critical ("Was unable to migrate connection to new Event Channel: %s", strerror (errno)); rdma_destroy_ep (priv->base); return -1; } priv->main_loop = g_main_loop_new (NULL, FALSE); priv->conn_ec = g_io_channel_unix_new (priv->ec->fd); g_io_add_watch (priv->conn_ec, G_IO_IN | G_IO_PRI, process_cm_event, (gpointer)priv); priv->main_thread = g_thread_new ("KIRO Server main loop", start_server_main_loop, priv->main_loop); // We gave control to the main_loop (with add_watch) and don't need our ref // any longer g_io_channel_unref (priv->conn_ec); g_message ("Enpoint listening"); return 0; }
Acceptor::~Acceptor() { rdma_destroy_ep(m_cm_id); }
static int fi_ibv_mr_reg(struct fid *fid, const void *buf, size_t len, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context) { struct fi_ibv_mem_desc *md; int fi_ibv_access = 0; struct fid_domain *domain; if (flags) return -FI_EBADFLAGS; if (fid->fclass != FI_CLASS_DOMAIN) { return -FI_EINVAL; } domain = container_of(fid, struct fid_domain, fid); md = calloc(1, sizeof *md); if (!md) return -FI_ENOMEM; md->domain = container_of(domain, struct fi_ibv_domain, domain_fid); md->mr_fid.fid.fclass = FI_CLASS_MR; md->mr_fid.fid.context = context; md->mr_fid.fid.ops = &fi_ibv_mr_ops; /* Enable local write access by default for FI_EP_RDM which hides local * registration requirements. This allows to avoid buffering or double * registration */ if (!(md->domain->info->caps & FI_LOCAL_MR)) fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE; /* Local read access to an MR is enabled by default in verbs */ if (access & FI_RECV) fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE; /* iWARP spec requires Remote Write access for an MR that is used * as a data sink for a Remote Read */ if (access & FI_READ) { fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE; if (md->domain->verbs->device->transport_type == IBV_TRANSPORT_IWARP) fi_ibv_access |= IBV_ACCESS_REMOTE_WRITE; } if (access & FI_WRITE) fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE; if (access & FI_REMOTE_READ) fi_ibv_access |= IBV_ACCESS_REMOTE_READ; /* Verbs requires Local Write access too for Remote Write access */ if (access & FI_REMOTE_WRITE) fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC; md->mr = ibv_reg_mr(md->domain->pd, (void *) buf, len, fi_ibv_access); if (!md->mr) goto err; md->mr_fid.mem_desc = (void *) (uintptr_t) md->mr->lkey; md->mr_fid.key = md->mr->rkey; *mr = &md->mr_fid; if(md->domain->eq && (md->domain->eq_flags & FI_REG_MR)) { struct fi_eq_entry entry = { .fid = &md->mr_fid.fid, .context = context }; fi_ibv_eq_write_event(md->domain->eq, FI_MR_COMPLETE, &entry, sizeof(entry)); } return 0; err: free(md); return -errno; } static int fi_ibv_mr_regv(struct fid *fid, const struct iovec * iov, size_t count, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context) { if (count > VERBS_MR_IOV_LIMIT) { VERBS_WARN(FI_LOG_FABRIC, "iov count > %d not supported\n", VERBS_MR_IOV_LIMIT); return -FI_EINVAL; } return fi_ibv_mr_reg(fid, iov->iov_base, iov->iov_len, access, offset, requested_key, flags, mr, context); } static int fi_ibv_mr_regattr(struct fid *fid, const struct fi_mr_attr *attr, uint64_t flags, struct fid_mr **mr) { return fi_ibv_mr_regv(fid, attr->mr_iov, attr->iov_count, attr->access, 0, attr->requested_key, flags, mr, attr->context); } static int fi_ibv_domain_bind(struct fid *fid, struct fid *bfid, uint64_t flags) { struct fi_ibv_domain *domain; struct fi_ibv_eq *eq; domain = container_of(fid, struct fi_ibv_domain, domain_fid.fid); switch (bfid->fclass) { case FI_CLASS_EQ: eq = container_of(bfid, struct fi_ibv_eq, eq_fid); domain->eq = eq; domain->eq_flags = flags; break; default: return -EINVAL; } return 0; } static int fi_ibv_domain_close(fid_t fid) { struct fi_ibv_domain *domain; int ret; domain = container_of(fid, struct fi_ibv_domain, domain_fid.fid); if (domain->rdm) { rdma_destroy_ep(domain->rdm_cm->listener); free(domain->rdm_cm); } if (domain->pd) { ret = ibv_dealloc_pd(domain->pd); if (ret) return -ret; domain->pd = NULL; } fi_freeinfo(domain->info); free(domain); return 0; } static int fi_ibv_open_device_by_name(struct fi_ibv_domain *domain, const char *name) { struct ibv_context **dev_list; int i, ret = -FI_ENODEV; if (!name) return -FI_EINVAL; dev_list = rdma_get_devices(NULL); if (!dev_list) return -errno; for (i = 0; dev_list[i] && ret; i++) { if (domain->rdm) { ret = strncmp(name, ibv_get_device_name(dev_list[i]->device), strlen(name) - strlen(verbs_rdm_domain.suffix)); } else { ret = strcmp(name, ibv_get_device_name(dev_list[i]->device)); } if (!ret) domain->verbs = dev_list[i]; } rdma_free_devices(dev_list); return ret; } static struct fi_ops fi_ibv_fid_ops = { .size = sizeof(struct fi_ops), .close = fi_ibv_domain_close, .bind = fi_ibv_domain_bind, .control = fi_no_control, .ops_open = fi_no_ops_open, }; static struct fi_ops_mr fi_ibv_domain_mr_ops = { .size = sizeof(struct fi_ops_mr), .reg = fi_ibv_mr_reg, .regv = fi_ibv_mr_regv, .regattr = fi_ibv_mr_regattr, }; static struct fi_ops_domain fi_ibv_domain_ops = { .size = sizeof(struct fi_ops_domain), .av_open = fi_no_av_open, .cq_open = fi_ibv_cq_open, .endpoint = fi_ibv_open_ep, .scalable_ep = fi_no_scalable_ep, .cntr_open = fi_no_cntr_open, .poll_open = fi_no_poll_open, .stx_ctx = fi_no_stx_context, .srx_ctx = fi_ibv_srq_context, }; static struct fi_ops_domain fi_ibv_rdm_domain_ops = { .size = sizeof(struct fi_ops_domain), .av_open = fi_ibv_rdm_av_open, .cq_open = fi_ibv_rdm_cq_open, .endpoint = fi_ibv_rdm_open_ep, .scalable_ep = fi_no_scalable_ep, .cntr_open = fi_rbv_rdm_cntr_open, .poll_open = fi_no_poll_open, .stx_ctx = fi_no_stx_context, .srx_ctx = fi_no_srx_context, }; static int fi_ibv_domain(struct fid_fabric *fabric, struct fi_info *info, struct fid_domain **domain, void *context) { struct fi_ibv_domain *_domain; struct fi_ibv_fabric *fab; struct fi_info *fi; int ret; fi = fi_ibv_get_verbs_info(info->domain_attr->name); if (!fi) return -FI_EINVAL; fab = container_of(fabric, struct fi_ibv_fabric, util_fabric.fabric_fid); ret = ofi_check_domain_attr(&fi_ibv_prov, fabric->api_version, fi->domain_attr, info->domain_attr); if (ret) return ret; _domain = calloc(1, sizeof *_domain); if (!_domain) return -FI_ENOMEM; _domain->info = fi_dupinfo(info); if (!_domain->info) goto err1; _domain->rdm = FI_IBV_EP_TYPE_IS_RDM(info); if (_domain->rdm) { _domain->rdm_cm = calloc(1, sizeof(*_domain->rdm_cm)); if (!_domain->rdm_cm) { ret = -FI_ENOMEM; goto err2; } } ret = fi_ibv_open_device_by_name(_domain, info->domain_attr->name); if (ret) goto err2; _domain->pd = ibv_alloc_pd(_domain->verbs); if (!_domain->pd) { ret = -errno; goto err2; } _domain->domain_fid.fid.fclass = FI_CLASS_DOMAIN; _domain->domain_fid.fid.context = context; _domain->domain_fid.fid.ops = &fi_ibv_fid_ops; _domain->domain_fid.mr = &fi_ibv_domain_mr_ops; if (_domain->rdm) { _domain->domain_fid.ops = &fi_ibv_rdm_domain_ops; _domain->rdm_cm->ec = rdma_create_event_channel(); if (!_domain->rdm_cm->ec) { VERBS_INFO(FI_LOG_EP_CTRL, "Failed to create listener event channel: %s\n", strerror(errno)); ret = -FI_EOTHER; goto err2; } if (fi_fd_nonblock(_domain->rdm_cm->ec->fd) != 0) { VERBS_INFO_ERRNO(FI_LOG_EP_CTRL, "fcntl", errno); ret = -FI_EOTHER; goto err3; } if (rdma_create_id(_domain->rdm_cm->ec, &_domain->rdm_cm->listener, NULL, RDMA_PS_TCP)) { VERBS_INFO(FI_LOG_EP_CTRL, "Failed to create cm listener: %s\n", strerror(errno)); ret = -FI_EOTHER; goto err3; } _domain->rdm_cm->is_bound = 0; } else { _domain->domain_fid.ops = &fi_ibv_domain_ops; } _domain->fab = fab; *domain = &_domain->domain_fid; return 0; err3: if (_domain->rdm) rdma_destroy_event_channel(_domain->rdm_cm->ec); err2: if (_domain->rdm) free(_domain->rdm_cm); fi_freeinfo(_domain->info); err1: free(_domain); return ret; } static int fi_ibv_trywait(struct fid_fabric *fabric, struct fid **fids, int count) { struct fi_ibv_cq *cq; int ret, i; for (i = 0; i < count; i++) { switch (fids[i]->fclass) { case FI_CLASS_CQ: cq = container_of(fids[i], struct fi_ibv_cq, cq_fid.fid); ret = cq->trywait(fids[i]); if (ret) return ret; break; case FI_CLASS_EQ: /* We are always ready to wait on an EQ since * rdmacm EQ is based on an fd */ continue; case FI_CLASS_CNTR: case FI_CLASS_WAIT: return -FI_ENOSYS; default: return -FI_EINVAL; } } return FI_SUCCESS; } static int fi_ibv_fabric_close(fid_t fid) { struct fi_ibv_fabric *fab; int ret; fab = container_of(fid, struct fi_ibv_fabric, util_fabric.fabric_fid.fid); ret = ofi_fabric_close(&fab->util_fabric); if (ret) return ret; free(fab); return 0; } static struct fi_ops fi_ibv_fi_ops = { .size = sizeof(struct fi_ops), .close = fi_ibv_fabric_close, .bind = fi_no_bind, .control = fi_no_control, .ops_open = fi_no_ops_open, }; static struct fi_ops_fabric fi_ibv_ops_fabric = { .size = sizeof(struct fi_ops_fabric), .domain = fi_ibv_domain, .passive_ep = fi_ibv_passive_ep, .eq_open = fi_ibv_eq_open, .wait_open = fi_no_wait_open, .trywait = fi_ibv_trywait }; int fi_ibv_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric, void *context) { struct fi_ibv_fabric *fab; struct fi_info *info; int ret; ret = fi_ibv_init_info(); if (ret) return ret; fab = calloc(1, sizeof(*fab)); if (!fab) return -FI_ENOMEM; for (info = verbs_info; info; info = info->next) { ret = ofi_fabric_init(&fi_ibv_prov, info->fabric_attr, attr, &fab->util_fabric, context); if (ret != -FI_ENODATA) break; } if (ret) { free(fab); return ret; } *fabric = &fab->util_fabric.fabric_fid; (*fabric)->fid.ops = &fi_ibv_fi_ops; (*fabric)->ops = &fi_ibv_ops_fabric; return 0; }