int main(int argc, char **argv) { struct sockaddr_in addr; struct rdma_cm_event *event = NULL; struct rdma_cm_id *listener = NULL; struct rdma_event_channel *ec = NULL; uint16_t port = 0; memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; TEST_Z(ec = rdma_create_event_channel()); TEST_NZ(rdma_create_id(ec, &listener, NULL, RDMA_PS_TCP)); TEST_NZ(rdma_bind_addr(listener, (struct sockaddr *)&addr)); TEST_NZ(rdma_listen(listener, 10)); /* backlog=10 is arbitrary */ port = ntohs(rdma_get_src_port(listener)); printf("listening on port %d.\n", port); while (rdma_get_cm_event(ec, &event) == 0) { struct rdma_cm_event event_copy; memcpy(&event_copy, event, sizeof(*event)); rdma_ack_cm_event(event); if (on_event(&event_copy)) break; } rdma_destroy_id(listener); rdma_destroy_event_channel(ec); return 0; }
/// The IBConnectionGroup default constructor. IBConnectionGroup() { ec_ = rdma_create_event_channel(); if (!ec_) throw InfinibandException("rdma_create_event_channel failed"); fcntl(ec_->fd, F_SETFL, O_NONBLOCK); }
int ibrdma_transfer(struct transfer_info *tfi, int num_tfi) { struct addrinfo *addr; struct rdma_cm_id *cmid= NULL; struct rdma_event_channel *ec = NULL; struct rdma_conn_param cm_params; TEST_NZ(getaddrinfo(host, port, NULL, &addr)); TEST_Z(ec = rdma_create_event_channel()); TEST_NZ(rdma_create_id(ec, &cmid, NULL, RDMA_PS_TCP)); TEST_NZ(rdma_resolve_addr(cmid, NULL, addr->ai_addr, TIMEOUT_IN_MS)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ADDR_RESOLVED)); freeaddrinfo(addr); build_connection(cmid); TEST_NZ(rdma_resolve_route(cmid, TIMEOUT_IN_MS)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ROUTE_RESOLVED)); build_params(&cm_params); TEST_NZ(rdma_connect(cmid, &cm_params)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ESTABLISHED)); on_connect(cmid->context); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_DISCONNECTED)); rdma_destroy_id(&cmid); rdma_destroy_event_channel(&ec); return 0; }
//static int run(int argc, char **argv) int ibrdma_send(char* host, char* port, void* data, uint64_t size) { struct addrinfo *addr; struct rdma_cm_id *cmid= NULL; struct rdma_event_channel *ec = NULL; struct rdma_conn_param cm_params; TEST_NZ(getaddrinfo(host, port, NULL, &addr)); TEST_Z(ec = rdma_create_event_channel()); TEST_NZ(rdma_create_id(ec, &cmid, NULL, RDMA_PS_TCP)); TEST_NZ(rdma_resolve_addr(cmid, NULL, addr->ai_addr, TIMEOUT_IN_MS)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ADDR_RESOLVED)); freeaddrinfo(addr); build_connection(cmid); TEST_NZ(rdma_resolve_route(cmid, TIMEOUT_IN_MS)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ROUTE_RESOLVED)); build_params(&cm_params); TEST_NZ(rdma_connect(cmid, &cm_params)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ESTABLISHED)); on_connect(cmid->context); /* Init MSG send to start RDMA*/ init_tfile(data, size); send_init(cmid->context); /*----------------------------*/ TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_DISCONNECTED)); rdma_destroy_id(cmid); rdma_destroy_event_channel(ec); return 0; }
static int migrate_channel(struct rdma_cm_id *listen_id) { struct rdma_event_channel *channel; int i, ret; printf("migrating to new event channel\n"); channel = rdma_create_event_channel(); if (!channel) { perror("cmatose: failed to create event channel"); return -1; } ret = 0; if (listen_id) ret = rdma_migrate_id(listen_id, channel); for (i = 0; i < connections && !ret; i++) ret = rdma_migrate_id(test.nodes[i].cma_id, channel); if (!ret) { rdma_destroy_event_channel(test.channel); test.channel = channel; } else perror("cmatose: failure migrating to channel"); return ret; }
//static int run(int argc, char **argv) //int RDMA_Connect(struct RDMA_communicator *comm, struct RDMA_param *param) int RDMA_Active_Init(struct RDMA_communicator *comm, struct RDMA_param *param) { struct addrinfo *addr; // struct rdma_cm_id *cm_id= NULL; // struct rdma_event_channel *ec = NULL; struct rdma_conn_param cm_params; char port[8]; // int i,j; sprintf(port, "%d", RDMA_PORT); TEST_NZ(getaddrinfo(param->host, port, NULL, &addr)); TEST_Z(comm->ec = rdma_create_event_channel()); TEST_NZ(rdma_create_id(comm->ec, &(comm->cm_id), NULL, RDMA_PS_TCP)); TEST_NZ(rdma_resolve_addr(comm->cm_id, NULL, addr->ai_addr, TIMEOUT_IN_MS)); TEST_NZ(wait_for_event(comm->ec, RDMA_CM_EVENT_ADDR_RESOLVED)); freeaddrinfo(addr); build_connection(comm->cm_id); TEST_NZ(rdma_resolve_route(comm->cm_id, TIMEOUT_IN_MS)); TEST_NZ(wait_for_event(comm->ec, RDMA_CM_EVENT_ROUTE_RESOLVED)); build_params(&cm_params); TEST_NZ(rdma_connect(comm->cm_id, &cm_params)); TEST_NZ(wait_for_event(comm->ec, RDMA_CM_EVENT_ESTABLISHED)); // on_connect(cm_id->context); return 0; }
int xfer_rdma_init(struct xfer_data *data) { int duplex = 0; /* Get the PID and prepend it to every output on stdout/stderr * This helps to parse output when multiple client/server are * run from single host */ pid = getpid(); printf("%d: | port=%d | ib_port=%d | tx_depth=%d | sl=%d | duplex=%d | cma=%d |\n", pid, data->port, data->ib_port, data->tx_depth, sl, duplex, data->use_cma); srand48(pid * time(NULL)); page_size = sysconf(_SC_PAGESIZE); if (data->use_cma) { data->cm_channel = rdma_create_event_channel(); if (!data->cm_channel) { fprintf(stderr, "%d:%s: rdma_create_event_channel failed\n", pid, __func__); return -1; } if (rdma_create_id(data->cm_channel, &data->cm_id, NULL, RDMA_PS_TCP)) { fprintf(stderr, "%d:%s: rdma_create_id failed\n", pid, __func__); return -1; } } else { // use an alternative to CMA here } return 0; }
int main(int argc, char **argv) { struct addrinfo *addr; struct rdma_cm_event *event = NULL; struct rdma_cm_id *conn= NULL; struct rdma_event_channel *ec = NULL; if (argc != 3) die("usage: client <server-address> <server-port>"); TEST_NZ(getaddrinfo(argv[1], argv[2], NULL, &addr)); TEST_Z(ec = rdma_create_event_channel()); TEST_NZ(rdma_create_id(ec, &conn, NULL, RDMA_PS_TCP)); TEST_NZ(rdma_resolve_addr(conn, NULL, addr->ai_addr, TIMEOUT_IN_MS)); freeaddrinfo(addr); while (rdma_get_cm_event(ec, &event) == 0) { struct rdma_cm_event event_copy; memcpy(&event_copy, event, sizeof(*event)); rdma_ack_cm_event(event); if (on_event(&event_copy)) break; } rdma_destroy_event_channel(ec); return 0; }
static int fi_ibv_rdm_cm_init(struct fi_ibv_rdm_cm* cm, const struct rdma_addrinfo* rai) { struct sockaddr_in* src_addr = (struct sockaddr_in*)rai->ai_src_addr; cm->ec = rdma_create_event_channel(); if (!cm->ec) { VERBS_INFO(FI_LOG_EP_CTRL, "Failed to create listener event channel: %s\n", strerror(errno)); return -FI_EOTHER; } if (fi_fd_nonblock(cm->ec->fd) != 0) { VERBS_INFO_ERRNO(FI_LOG_EP_CTRL, "fcntl", errno); return -FI_EOTHER; } if (rdma_create_id(cm->ec, &cm->listener, NULL, RDMA_PS_TCP)) { VERBS_INFO(FI_LOG_EP_CTRL, "Failed to create cm listener: %s\n", strerror(errno)); return -FI_EOTHER; } if (fi_ibv_rdm_find_ipoib_addr(src_addr, cm)) { VERBS_INFO(FI_LOG_EP_CTRL, "Failed to find correct IPoIB address\n"); return -FI_ENODEV; } cm->my_addr.sin_port = src_addr->sin_port; char my_ipoib_addr_str[INET6_ADDRSTRLEN]; inet_ntop(cm->my_addr.sin_family, &cm->my_addr.sin_addr.s_addr, my_ipoib_addr_str, INET_ADDRSTRLEN); VERBS_INFO(FI_LOG_EP_CTRL, "My IPoIB: %s\n", my_ipoib_addr_str); if (rdma_bind_addr(cm->listener, (struct sockaddr *)&cm->my_addr)) { VERBS_INFO(FI_LOG_EP_CTRL, "Failed to bind cm listener to my IPoIB addr %s: %s\n", my_ipoib_addr_str, strerror(errno)); return -FI_EOTHER; } if (!cm->my_addr.sin_port) { cm->my_addr.sin_port = rdma_get_src_port(cm->listener); } assert(cm->my_addr.sin_family == AF_INET); VERBS_INFO(FI_LOG_EP_CTRL, "My ep_addr: %s:%u\n", inet_ntoa(cm->my_addr.sin_addr), ntohs(cm->my_addr.sin_port)); return FI_SUCCESS; }
static int __fi_eq_open(struct fid_fabric *fabric, const struct fi_eq_attr *attr, struct fid_eq **eq, void *context) { struct __fid_eq_cm *_eq; long flags = 0; int ret; if (attr->format != FI_EQ_FORMAT_CM) return -FI_ENOSYS; _eq = calloc(1, sizeof *_eq); if (!_eq) return -FI_ENOMEM; _eq->fab = container_of(fabric, struct __fid_fabric, fabric_fid); switch (attr->wait_obj) { case FI_WAIT_FD: _eq->channel = rdma_create_event_channel(); if (!_eq->channel) { ret = -errno; goto err1; } fcntl(_eq->channel->fd, F_GETFL, &flags); ret = fcntl(_eq->channel->fd, F_SETFL, flags | O_NONBLOCK); if (ret) { ret = -errno; goto err2; } break; case FI_WAIT_NONE: break; default: return -FI_ENOSYS; } _eq->flags = attr->flags; _eq->eq_fid.fid.fclass = FID_CLASS_EQ; _eq->eq_fid.fid.context = context; _eq->eq_fid.fid.ops = &__fi_eq_cm_ops; _eq->eq_fid.ops = &__fi_eq_cm_data_ops; *eq = &_eq->eq_fid; return 0; err2: if (_eq->channel) rdma_destroy_event_channel(_eq->channel); err1: free(_eq); return ret; }
neigh_table_mgr::neigh_table_mgr():m_neigh_cma_event_channel(NULL) { // Creating cma_event_channel m_neigh_cma_event_channel = rdma_create_event_channel(); BULLSEYE_EXCLUDE_BLOCK_START if (m_neigh_cma_event_channel == NULL) { neigh_mgr_logdbg("Failed to create neigh_cma_event_channel (errno=%d %m)", errno); } else { neigh_mgr_logdbg("Creation of neigh_cma_event_channel on fd=%d", m_neigh_cma_event_channel->fd); } BULLSEYE_EXCLUDE_BLOCK_END start_garbage_collector(DEFAULT_GARBAGE_COLLECTOR_TIME); }
void connect_to_server() { /// recv addr and port from server int server_rank = cfio_map_get_server_of_client(rank); char server_ip[NI_MAXHOST]; unsigned short server_port_int; char server_port[11]; MPI_Status sta; MPI_Recv(server_ip, NI_MAXHOST, MPI_CHAR, server_rank, rank, MPI_COMM_WORLD, &sta); MPI_Recv(&server_port_int, 1, MPI_UNSIGNED_SHORT, server_rank, rank + 1, MPI_COMM_WORLD, &sta); sprintf(server_port, "%hu", server_port_int); struct addrinfo *addr; TEST_NZ(getaddrinfo(server_ip, server_port, NULL, &addr)); TEST_Z(ec = rdma_create_event_channel()); TEST_NZ(rdma_create_id(ec, &cm_id, NULL, RDMA_PS_TCP)); TEST_NZ(rdma_resolve_addr(cm_id, NULL, addr->ai_addr, TIMEOUT_IN_MS)); freeaddrinfo(addr); }
diod_rdma_t diod_rdma_create (void) { int n; diod_rdma_t rdma; rdma = malloc (sizeof (*rdma)); if (!rdma) msg_exit ("out of memory"); rdma->event_channel = rdma_create_event_channel(); if (!rdma->event_channel) msg_exit ("rdma_create_event_channel failed"); n = rdma_create_id(rdma->event_channel, &rdma->listen_id, NULL, RDMA_PS_TCP); if (n) errn_exit (n, "rdma_create_id"); return rdma; }
int main(int argc, char **argv) { struct addrinfo *addr; struct rdma_cm_event *event = NULL; struct rdma_cm_id *conn= NULL; struct rdma_event_channel *ec = NULL; if (argc != 4) usage(argv[0]); if (strcmp(argv[1], "write") == 0) set_mode(M_WRITE); else if (strcmp(argv[1], "read") == 0) set_mode(M_READ); else usage(argv[0]); TEST_NZ(getaddrinfo(argv[2], argv[3], NULL, &addr)); TEST_Z(ec = rdma_create_event_channel()); TEST_NZ(rdma_create_id(ec, &conn, NULL, RDMA_PS_TCP)); TEST_NZ(rdma_resolve_addr(conn, NULL, addr->ai_addr, TIMEOUT_IN_MS)); freeaddrinfo(addr); while (rdma_get_cm_event(ec, &event) == 0) { struct rdma_cm_event event_copy; memcpy(&event_copy, event, sizeof(*event)); rdma_ack_cm_event(event); if (on_event(&event_copy)) break; } rdma_destroy_event_channel(ec); return 0; }
int ibrdma_transfer(struct transfer_info tfi, int num_tfi) { struct addrinfo *addr; struct rdma_cm_id *cmid= NULL; struct rdma_event_channel *ec = NULL; struct rdma_conn_param cm_params; int i,j; /*Allocation buffer space for reading from local fs to memory*/ struct transfer_file *ffile = tfi.tfiles; int nf = tfi.tfiles; char* host = tfi.ib_host; char* port; sprintf(port,"%d",tfi.ib_port); for (i = 0; i < NUM_FILE_BUF_C; i++) { tfi.fbufs[i].fbuf = (char *)malloc(FILE_BUF_SIZE_C); tfi.fbufs[i].size = 0; } TEST_NZ(getaddrinfo(host, port, NULL, &addr)); TEST_Z(ec = rdma_create_event_channel()); TEST_NZ(rdma_create_id(ec, &cmid, NULL, RDMA_PS_TCP)); TEST_NZ(rdma_resolve_addr(cmid, NULL, addr->ai_addr, TIMEOUT_IN_MS)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ADDR_RESOLVED)); freeaddrinfo(addr); build_connection(cmid); TEST_NZ(rdma_resolve_route(cmid, TIMEOUT_IN_MS)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ROUTE_RESOLVED)); build_params(&cm_params); TEST_NZ(rdma_connect(cmid, &cm_params)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ESTABLISHED)); on_connect(cmid->context); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_DISCONNECTED)); rdma_destroy_id(&cmid); rdma_destroy_event_channel(&ec); return 0; }
void client_test(char *ip, char *port) { struct addrinfo *addr; struct rdma_cm_event *event = NULL; struct rdma_cm_id *conn= NULL; struct rdma_event_channel *ec = NULL; TEST_NZ(getaddrinfo(ip, port, NULL, &addr)); TEST_Z(ec = rdma_create_event_channel()); TEST_NZ(rdma_create_id(ec, &conn, NULL, RDMA_PS_TCP)); TEST_NZ(rdma_resolve_addr(conn, NULL, addr->ai_addr, TIMEOUT_IN_MS)); freeaddrinfo(addr); while (rdma_get_cm_event(ec, &event) == 0) { struct rdma_cm_event event_copy; memcpy(&event_copy, event, sizeof(*event)); rdma_ack_cm_event(event); if (on_event(&event_copy)) { s_ctx->ec = ec; s_ctx->id = conn; on_connection(event_copy.id);//send our memory information to server using post_send //printf("wait for msg_send_completion\n"); poll_cq(NULL);//wait for send_completion //printf("wait for msg_recv_completion\n"); poll_cq(NULL);//wait for recv_completion break; } } return; };
int main(int argc, char **argv) { int op, ret; while ((op = getopt(argc, argv, "m:M:sb:c:C:S:p:")) != -1) { switch (op) { case 'm': dst_addr = optarg; break; case 'M': unmapped_addr = 1; dst_addr = optarg; break; case 's': is_sender = 1; break; case 'b': src_addr = optarg; test.src_addr = (struct sockaddr *) &test.src_in; break; case 'c': connections = atoi(optarg); break; case 'C': message_count = atoi(optarg); break; case 'S': message_size = atoi(optarg); break; case 'p': port_space = strtol(optarg, NULL, 0); break; default: printf("usage: %s\n", argv[0]); printf("\t-m multicast_address\n"); printf("\t[-M unmapped_multicast_address]\n" "\t replaces -m and requires -b\n"); printf("\t[-s(ender)]\n"); printf("\t[-b bind_address]\n"); printf("\t[-c connections]\n"); printf("\t[-C message_count]\n"); printf("\t[-S message_size]\n"); printf("\t[-p port_space - %#x for UDP (default), " "%#x for IPOIB]\n", RDMA_PS_UDP, RDMA_PS_IPOIB); exit(1); } } if (unmapped_addr && !src_addr) { printf("unmapped multicast address requires binding " "to source address\n"); exit(1); } test.dst_addr = (struct sockaddr *) &test.dst_in; test.connects_left = connections; test.channel = rdma_create_event_channel(); if (!test.channel) { perror("failed to create event channel"); exit(1); } if (alloc_nodes()) exit(1); ret = run(); printf("test complete\n"); destroy_nodes(); rdma_destroy_event_channel(test.channel); printf("return status %d\n", ret); return ret; }
int kiro_server_start (KiroServer *self, const char *address, const char *port, void *mem, size_t mem_size) { g_return_val_if_fail (self != NULL, -1); KiroServerPrivate *priv = KIRO_SERVER_GET_PRIVATE (self); if (priv->base) { g_debug ("Server already started."); return -1; } if (!mem || mem_size == 0) { g_warning ("Invalid memory given to provide."); return -1; } struct rdma_addrinfo hints, *res_addrinfo; memset (&hints, 0, sizeof (hints)); hints.ai_port_space = RDMA_PS_IB; hints.ai_flags = RAI_PASSIVE; char *addr_c = g_strdup (address); char *port_c = g_strdup (port); int rtn = rdma_getaddrinfo (addr_c, port_c, &hints, &res_addrinfo); g_free (addr_c); g_free (port_c); if (rtn) { g_critical ("Failed to create address information: %s", strerror (errno)); return -1; } struct ibv_qp_init_attr qp_attr; memset (&qp_attr, 0, sizeof (qp_attr)); qp_attr.cap.max_send_wr = 10; qp_attr.cap.max_recv_wr = 10; qp_attr.cap.max_send_sge = 1; qp_attr.cap.max_recv_sge = 1; qp_attr.qp_context = priv->base; qp_attr.sq_sig_all = 1; if (rdma_create_ep (& (priv->base), res_addrinfo, NULL, &qp_attr)) { g_critical ("Endpoint creation failed: %s", strerror (errno)); g_free (res_addrinfo); return -1; } g_free (res_addrinfo); // No longer needed g_debug ("Endpoint created"); char *addr_local = NULL; struct sockaddr *src_addr = rdma_get_local_addr (priv->base); if (!src_addr) { addr_local = "NONE"; } else { addr_local = inet_ntoa (((struct sockaddr_in *)src_addr)->sin_addr); /* if(src_addr->sa_family == AF_INET) addr_local = &(((struct sockaddr_in*)src_addr)->sin_addr); else addr_local = &(((struct sockaddr_in6*)src_addr)->sin6_addr); */ } g_message ("Server bound to address %s:%s", addr_local, port); if (rdma_listen (priv->base, 0)) { g_critical ("Failed to put server into listening state: %s", strerror (errno)); rdma_destroy_ep (priv->base); return -1; } priv->mem = mem; priv->mem_size = mem_size; priv->ec = rdma_create_event_channel(); if (rdma_migrate_id (priv->base, priv->ec)) { g_critical ("Was unable to migrate connection to new Event Channel: %s", strerror (errno)); rdma_destroy_ep (priv->base); return -1; } priv->main_loop = g_main_loop_new (NULL, FALSE); priv->conn_ec = g_io_channel_unix_new (priv->ec->fd); g_io_add_watch (priv->conn_ec, G_IO_IN | G_IO_PRI, process_cm_event, (gpointer)priv); priv->main_thread = g_thread_new ("KIRO Server main loop", start_server_main_loop, priv->main_loop); // We gave control to the main_loop (with add_watch) and don't need our ref // any longer g_io_channel_unref (priv->conn_ec); g_message ("Enpoint listening"); return 0; }
struct ibw_ctx *ibw_init(struct ibw_initattr *attr, int nattr, void *ctx_userdata, ibw_connstate_fn_t ibw_connstate, ibw_receive_fn_t ibw_receive, struct tevent_context *ectx) { struct ibw_ctx *ctx = talloc_zero(NULL, struct ibw_ctx); struct ibw_ctx_priv *pctx; int rc; DEBUG(DEBUG_DEBUG, ("ibw_init(ctx_userdata: %p, ectx: %p)\n", ctx_userdata, ectx)); /* initialize basic data structures */ memset(ibw_lasterr, 0, IBW_LASTERR_BUFSIZE); assert(ctx!=NULL); ibw_lasterr[0] = '\0'; talloc_set_destructor(ctx, ibw_ctx_destruct); ctx->ctx_userdata = ctx_userdata; pctx = talloc_zero(ctx, struct ibw_ctx_priv); talloc_set_destructor(pctx, ibw_ctx_priv_destruct); ctx->internal = (void *)pctx; assert(pctx!=NULL); pctx->connstate_func = ibw_connstate; pctx->receive_func = ibw_receive; pctx->ectx = ectx; /* process attributes */ if (ibw_process_init_attrs(attr, nattr, &pctx->opts)) goto cleanup; /* init cm */ pctx->cm_channel = rdma_create_event_channel(); if (!pctx->cm_channel) { sprintf(ibw_lasterr, "rdma_create_event_channel error %d\n", errno); goto cleanup; } pctx->cm_channel_event = tevent_add_fd(pctx->ectx, pctx, pctx->cm_channel->fd, TEVENT_FD_READ, ibw_event_handler_cm, ctx); #if RDMA_USER_CM_MAX_ABI_VERSION >= 2 rc = rdma_create_id(pctx->cm_channel, &pctx->cm_id, ctx, RDMA_PS_TCP); #else rc = rdma_create_id(pctx->cm_channel, &pctx->cm_id, ctx); #endif if (rc) { rc = errno; sprintf(ibw_lasterr, "rdma_create_id error %d\n", rc); goto cleanup; } DEBUG(DEBUG_DEBUG, ("created cm_id %p\n", pctx->cm_id)); pctx->pagesize = sysconf(_SC_PAGESIZE); return ctx; /* don't put code here */ cleanup: DEBUG(DEBUG_ERR, (ibw_lasterr)); if (ctx) talloc_free(ctx); return NULL; }
int fi_ibv_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, struct fid_eq **eq, void *context) { struct fi_ibv_eq *_eq; struct epoll_event event; int ret; _eq = calloc(1, sizeof *_eq); if (!_eq) return -ENOMEM; _eq->fab = container_of(fabric, struct fi_ibv_fabric, fabric_fid); fastlock_init(&_eq->lock); ret = dlistfd_head_init(&_eq->list_head); if (ret) { FI_INFO(&fi_ibv_prov, FI_LOG_EQ, "Unable to initialize dlistfd\n"); goto err1; } _eq->epfd = epoll_create1(0); if (_eq->epfd < 0) { ret = -errno; goto err2; } memset(&event, 0, sizeof(event)); event.events = EPOLLIN; if (epoll_ctl(_eq->epfd, EPOLL_CTL_ADD, _eq->list_head.signal.fd[FI_READ_FD], &event)) { ret = -errno; goto err3; } switch (attr->wait_obj) { case FI_WAIT_NONE: case FI_WAIT_UNSPEC: case FI_WAIT_FD: _eq->channel = rdma_create_event_channel(); if (!_eq->channel) { ret = -errno; goto err3; } ret = fi_fd_nonblock(_eq->channel->fd); if (ret) goto err4; if (epoll_ctl(_eq->epfd, EPOLL_CTL_ADD, _eq->channel->fd, &event)) { ret = -errno; goto err4; } break; default: ret = -FI_ENOSYS; goto err1; } _eq->flags = attr->flags; _eq->eq_fid.fid.fclass = FI_CLASS_EQ; _eq->eq_fid.fid.context = context; _eq->eq_fid.fid.ops = &fi_ibv_eq_fi_ops; _eq->eq_fid.ops = &fi_ibv_eq_ops; *eq = &_eq->eq_fid; return 0; err4: if (_eq->channel) rdma_destroy_event_channel(_eq->channel); err3: close(_eq->epfd); err2: dlistfd_head_free(&_eq->list_head); err1: fastlock_destroy(&_eq->lock); free(_eq); return ret; }
int main(int argc, char *argv[]) { struct pdata rep_pdata; struct rdma_event_channel *cm_channel; struct rdma_cm_id *listen_id; struct rdma_cm_id *cm_id; struct rdma_cm_event *event; struct rdma_conn_param conn_param = { }; struct ibv_pd *pd; struct ibv_comp_channel *comp_chan; struct ibv_cq *cq; struct ibv_cq *evt_cq; struct ibv_mr *mr; struct ibv_qp_init_attr qp_attr = { }; struct ibv_sge sge; struct ibv_send_wr send_wr = { }; struct ibv_send_wr *bad_send_wr; struct ibv_recv_wr recv_wr = { }; struct ibv_recv_wr *bad_recv_wr; struct ibv_wc wc; void *cq_context; struct sockaddr_in sin; uint32_t *buf; int err; /* Set up RDMA CM structures */ cm_channel = rdma_create_event_channel(); if (!cm_channel) return 1; err = rdma_create_id(cm_channel, &listen_id, NULL, RDMA_PS_TCP); if (err) return err; sin.sin_family = AF_INET; sin.sin_port = htons(20079); sin.sin_addr.s_addr = INADDR_ANY; /* Bind to local port and listen for connection request */ err = rdma_bind_addr(listen_id, (struct sockaddr *) &sin); if (err) return 1; err = rdma_listen(listen_id, 1); if (err) return 1; err = rdma_get_cm_event(cm_channel, &event); if (err) return err; printf("after get_cm_event\n"); if (event->event != RDMA_CM_EVENT_CONNECT_REQUEST) return 1; cm_id = event->id; rdma_ack_cm_event(event); /* Create verbs objects now that we know which device to use */ pd = ibv_alloc_pd(cm_id->verbs); if (!pd) return 1; comp_chan = ibv_create_comp_channel(cm_id->verbs); if (!comp_chan) return 1; cq = ibv_create_cq(cm_id->verbs, 2, NULL, comp_chan, 0); if (!cq) return 1; if (ibv_req_notify_cq(cq, 0)) return 1; buf = calloc(2, sizeof(uint32_t)); if (!buf) return 1; mr = ibv_reg_mr(pd, buf, 2 * sizeof(uint32_t), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE); if (!mr) return 1; qp_attr.cap.max_send_wr = 1; qp_attr.cap.max_send_sge = 1; qp_attr.cap.max_recv_wr = 1; qp_attr.cap.max_recv_sge = 1; qp_attr.send_cq = cq; qp_attr.recv_cq = cq; qp_attr.qp_type = IBV_QPT_RC; err = rdma_create_qp(cm_id, pd, &qp_attr); if (err) return err; /* Post receive before accepting connection */ sge.addr = (uintptr_t) buf + sizeof(uint32_t); sge.length = sizeof(uint32_t); sge.lkey = mr->lkey; recv_wr.sg_list = &sge; recv_wr.num_sge = 1; if (ibv_post_recv(cm_id->qp, &recv_wr, &bad_recv_wr)) return 1; rep_pdata.buf_va = htonll((uintptr_t) buf); rep_pdata.buf_rkey = htonl(mr->rkey); conn_param.responder_resources = 1; conn_param.private_data = &rep_pdata; conn_param.private_data_len = sizeof rep_pdata; /* Accept connection */ printf("before accept\n"); err = rdma_accept(cm_id, &conn_param); if (err) return 1; printf("after accept\n"); err = rdma_get_cm_event(cm_channel, &event); if (err) return err; if (event->event != RDMA_CM_EVENT_ESTABLISHED) return 1; rdma_ack_cm_event(event); /* Wait for receive completion */ if (ibv_get_cq_event(comp_chan, &evt_cq, &cq_context)) return 1; if (ibv_req_notify_cq(cq, 0)) return 1; if (ibv_poll_cq(cq, 1, &wc) < 1) return 1; if (wc.status != IBV_WC_SUCCESS) return 1; /* Add two integers and send reply back */ buf[0] = htonl(ntohl(buf[0]) + ntohl(buf[1])); sge.addr = (uintptr_t) buf; sge.length = sizeof(uint32_t); sge.lkey = mr->lkey; send_wr.opcode = IBV_WR_SEND; send_wr.send_flags = IBV_SEND_SIGNALED; send_wr.sg_list = &sge; send_wr.num_sge = 1; if (ibv_post_send(cm_id->qp, &send_wr, &bad_send_wr)) return 1; /* Wait for send completion */ if (ibv_get_cq_event(comp_chan, &evt_cq, &cq_context)) return 1; if (ibv_poll_cq(cq, 1, &wc) < 1) return 1; if (wc.status != IBV_WC_SUCCESS) return 1; printf("before ack cq 2\n"); ibv_ack_cq_events(cq, 2); return 0; }
//static int run(int argc, char **argv) //int RDMA_Connect(struct RDMA_communicator *comm, struct RDMA_param *param) int RDMA_Active_Init(struct RDMA_communicator *comm, struct RDMA_param *param) { struct addrinfo *addr; // struct rdma_cm_id *cm_id= NULL; // struct rdma_event_channel *ec = NULL; struct rdma_conn_param cm_params; char port[8]; // int i,j; sprintf(port, "%d", RDMA_PORT); if(getaddrinfo(param->host, port, NULL, &addr)){ fprintf(stderr, "RDMA lib: SEND: ERROR: getaddrinfo failed @ %s:%d", __FILE__, __LINE__); exit(1); } if(!(comm->ec = rdma_create_event_channel())){ fprintf(stderr, "RDMA lib: SEND: ERROR: rdma event channel create failed @ %s:%d", __FILE__, __LINE__); exit(1); } if (rdma_create_id(comm->ec, &(comm->cm_id), NULL, RDMA_PS_TCP)){ fprintf(stderr, "RDMA lib: SEND: ERROR: rdma id create failed @ %s:%d", __FILE__, __LINE__); exit(1); } if (rdma_resolve_addr(comm->cm_id, NULL, addr->ai_addr, TIMEOUT_IN_MS)) { fprintf(stderr, "RDMA lib: SEND: ERROR: rdma address resolve failed @ %s:%d", __FILE__, __LINE__); exit(1); } if (wait_for_event(comm->ec, RDMA_CM_EVENT_ADDR_RESOLVED)) { fprintf(stderr, "RDMA lib: SEND: ERROR: event wait failed @ %s:%d", __FILE__, __LINE__); exit(1); } freeaddrinfo(addr); build_connection(comm->cm_id); if (rdma_resolve_route(comm->cm_id, TIMEOUT_IN_MS)) { fprintf(stderr, "RDMA lib: SEND: ERROR: rdma route resolve failed @ %s:%d", __FILE__, __LINE__); exit(1); } if (wait_for_event(comm->ec, RDMA_CM_EVENT_ROUTE_RESOLVED)) { fprintf(stderr, "RDMA lib: SEND: ERROR: event wait failed @ %s:%d", __FILE__, __LINE__); exit(1); } build_params(&cm_params); if (rdma_connect(comm->cm_id, &cm_params)) { fprintf(stderr, "RDMA lib: SEND: ERROR: rdma connection failed @ %s:%d", __FILE__, __LINE__); exit(1); } if (wait_for_event(comm->ec, RDMA_CM_EVENT_ESTABLISHED)) { fprintf(stderr, "RDMA lib: SEND: ERROR: event wait failed @ %s:%d\n", __FILE__, __LINE__); exit(1); } // on_connect(cm_id->context); int i ; for (i = 0; i < RDMA_BUF_NUM_C; i++){ rdma_msg_mr[i] = NULL;} char *value; value = getenv("RDMA_CLIENT_NUM_S"); if (value == NULL) { rdma_buf_size = RDMA_BUF_SIZE_C; } else { rdma_buf_size = MAX_RDMA_BUF_SIZE_C / atoi(value); } fprintf(stderr, "rdma_buf_size: %d\n", rdma_buf_size); return 0; }
int main(int argc, char *argv[]) { struct rping_cb *cb; int op; int ret = 0; int persistent_server = 0; cb = malloc(sizeof(*cb)); if (!cb) return -ENOMEM; memset(cb, 0, sizeof(*cb)); cb->server = -1; cb->state = IDLE; cb->size = 64; cb->sin.ss_family = PF_INET; cb->port = htons(7174); sem_init(&cb->sem, 0, 0); opterr = 0; while ((op=getopt(argc, argv, "a:Pp:C:S:t:scvVd")) != -1) { switch (op) { case 'a': ret = get_addr(optarg, (struct sockaddr *) &cb->sin); break; case 'P': persistent_server = 1; break; case 'p': cb->port = htons(atoi(optarg)); DEBUG_LOG("port %d\n", (int) atoi(optarg)); break; case 's': cb->server = 1; DEBUG_LOG("server\n"); break; case 'c': cb->server = 0; DEBUG_LOG("client\n"); break; case 'S': cb->size = atoi(optarg); if ((cb->size < RPING_MIN_BUFSIZE) || (cb->size > (RPING_BUFSIZE - 1))) { fprintf(stderr, "Invalid size %d " "(valid range is %Zd to %d)\n", cb->size, RPING_MIN_BUFSIZE, RPING_BUFSIZE); ret = EINVAL; } else DEBUG_LOG("size %d\n", (int) atoi(optarg)); break; case 'C': cb->count = atoi(optarg); if (cb->count < 0) { fprintf(stderr, "Invalid count %d\n", cb->count); ret = EINVAL; } else DEBUG_LOG("count %d\n", (int) cb->count); break; case 'v': cb->verbose++; DEBUG_LOG("verbose\n"); break; case 'V': cb->validate++; DEBUG_LOG("validate data\n"); break; case 'd': debug++; break; default: usage("rping"); ret = EINVAL; goto out; } } if (ret) goto out; if (cb->server == -1) { usage("rping"); ret = EINVAL; goto out; } cb->cm_channel = rdma_create_event_channel(); if (!cb->cm_channel) { perror("rdma_create_event_channel"); ret = errno; goto out; } ret = rdma_create_id(cb->cm_channel, &cb->cm_id, cb, RDMA_PS_TCP); if (ret) { perror("rdma_create_id"); goto out2; } DEBUG_LOG("created cm_id %p\n", cb->cm_id); ret = pthread_create(&cb->cmthread, NULL, cm_thread, cb); if (ret) { perror("pthread_create"); goto out2; } if (cb->server) { if (persistent_server) ret = rping_run_persistent_server(cb); else ret = rping_run_server(cb); } else { ret = rping_run_client(cb); } DEBUG_LOG("destroy cm_id %p\n", cb->cm_id); rdma_destroy_id(cb->cm_id); out2: rdma_destroy_event_channel(cb->cm_channel); out: free(cb); return ret; }
void client_test(char *ip, char *port) { struct addrinfo *addr; struct rdma_cm_event *event = NULL; struct rdma_cm_id *conn= NULL; struct rdma_event_channel *ec = NULL; struct timeval t1, t2, t3, t11, t12; struct timeval dt, dt1, dt2, dt11, dt12, dt13; gettimeofday(&t1, NULL); TEST_NZ(getaddrinfo(ip, port, NULL, &addr)); TEST_Z(ec = rdma_create_event_channel()); gettimeofday(&t11, NULL); TEST_NZ(rdma_create_id(ec, &conn, NULL, RDMA_PS_TCP)); gettimeofday(&t12, NULL); TEST_NZ(rdma_resolve_addr(conn, NULL, addr->ai_addr, TIMEOUT_IN_MS)); freeaddrinfo(addr); gettimeofday(&t2, NULL); while (rdma_get_cm_event(ec, &event) == 0) { struct rdma_cm_event event_copy; memcpy(&event_copy, event, sizeof(*event)); rdma_ack_cm_event(event); if (on_event(&event_copy)) { s_ctx->ec = ec; s_ctx->id = conn; on_connection(event_copy.id);//send our memory information to server using post_send poll_cq(NULL);//wait for send_completion poll_cq(NULL);//wait for recv_completion break; } } gettimeofday(&t3, NULL); timersub(&t3, &t1, &dt); timersub(&t3, &t2, &dt2); timersub(&t2, &t1, &dt1); timersub(&t2, &t12, &dt13); timersub(&t12, &t11, &dt12); timersub(&t11, &t1, &dt11); long usec = dt.tv_usec + 10000 * dt.tv_sec; printf("[dt]:\t%ld us.\n", usec); printf("[dt1]:\t%ld us.\n", dt1.tv_usec+1000000 *dt1.tv_sec); printf("Including the following steps: \n"); printf("[dt11]:\t%ld us.\n", dt11.tv_usec+1000000 *dt11.tv_sec); printf("[dt12]:\t%ld us.\n", dt12.tv_usec+1000000 *dt12.tv_sec); printf("[dt13]:\t%ld us.\n", dt13.tv_usec+1000000 *dt13.tv_sec); printf("[dt2] takes %ld micro_secs.\n", dt2.tv_usec+1000000*dt2.tv_sec); printf("[dt]:total time\t[dt1]:pre_setup\t[dt2]:send/recv\t.\n"); printf("[dt11]:create_event_channel\t[dt12]:create_id\t[dt13]:resolve_address.\n"); return; };
int main(int argc, char **argv) { struct sockaddr_in addr; struct rdma_cm_event *event = NULL; struct rdma_cm_id *listener = NULL; struct rdma_event_channel *ec = NULL; uint16_t port = 0; if (argc != 2) usage(argv[0]); if (strcmp(argv[1], "write") == 0) set_mode(M_WRITE); else if (strcmp(argv[1], "read") == 0) set_mode(M_READ); else usage(argv[0]); memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; TEST_Z(ec = rdma_create_event_channel()); TEST_NZ(rdma_create_id(ec, &listener, NULL, RDMA_PS_TCP)); TEST_NZ(rdma_bind_addr(listener, (struct sockaddr *)&addr)); TEST_NZ(rdma_listen(listener, 10)); /* backlog=10 is arbitrary */ port = ntohs(rdma_get_src_port(listener)); printf("listening on port %d.\n", port); while (1) { // pthread_t thread_id; // pthread_attr_t thread_attr; // simple_context_t *context = NULL; // struct rdma_cm_id *id = NULL; int rc =0; fprintf(stderr, "Waiting for cm_event... "); if ((rc = rdma_get_cm_event(ec, &event))){ fprintf(stderr, "get event failed : %d\n", rc); break; } fprintf(stderr, "\"%s\"\n", event_type_str(event->event)); switch (event->event){ case RDMA_CM_EVENT_CONNECT_REQUEST: accept_connection(event->id); break; case RDMA_CM_EVENT_ESTABLISHED: on_connect(event->id->context); // pthread_attr_init(&thread_attr); // pthread_create(&thread_id, // &thread_attr, // handle_server_cq, // (void *)(event->id->context)); break; case RDMA_CM_EVENT_DISCONNECTED: fprintf(stderr, "Disconnect from id : %p \n", event->id); // fprintf(stderr, "Disconnect from id : %p (total connections %d)\n", // event->id, connections); // context = (simple_context_t *)(event->id->context); // id = event->id; break; default: break; } rdma_ack_cm_event(event); // if (context){ // context->quit_cq_thread = 1; // pthread_join(thread_id, NULL); // rdma_destroy_id(id); // free_connection(context); // context = NULL; // } } rdma_destroy_id(listener); rdma_destroy_event_channel(ec); return 0; /* while (rdma_get_cm_event(ec, &event) == 0) { struct rdma_cm_event event_copy; memcpy(&event_copy, event, sizeof(*event)); rdma_ack_cm_event(event); if (on_event(&event_copy)) break; } rdma_destroy_id(listener); rdma_destroy_event_channel(ec); return 0; */ }
static int fio_rdmaio_init(struct thread_data *td) { struct rdmaio_data *rd = td->io_ops->data; struct rdmaio_options *o = td->eo; unsigned int max_bs; int ret, i; if (td_rw(td)) { log_err("fio: rdma connections must be read OR write\n"); return 1; } if (td_random(td)) { log_err("fio: RDMA network IO can't be random\n"); return 1; } if (compat_options(td)) return 1; if (!o->port) { log_err("fio: no port has been specified which is required " "for the rdma engine\n"); return 1; } if (check_set_rlimits(td)) return 1; rd->rdma_protocol = o->verb; rd->cq_event_num = 0; rd->cm_channel = rdma_create_event_channel(); if (!rd->cm_channel) { log_err("fio: rdma_create_event_channel fail\n"); return 1; } ret = rdma_create_id(rd->cm_channel, &rd->cm_id, rd, RDMA_PS_TCP); if (ret) { log_err("fio: rdma_create_id fail\n"); return 1; } if ((rd->rdma_protocol == FIO_RDMA_MEM_WRITE) || (rd->rdma_protocol == FIO_RDMA_MEM_READ)) { rd->rmt_us = malloc(FIO_RDMA_MAX_IO_DEPTH * sizeof(struct remote_u)); memset(rd->rmt_us, 0, FIO_RDMA_MAX_IO_DEPTH * sizeof(struct remote_u)); rd->rmt_nr = 0; } rd->io_us_queued = malloc(td->o.iodepth * sizeof(struct io_u *)); memset(rd->io_us_queued, 0, td->o.iodepth * sizeof(struct io_u *)); rd->io_u_queued_nr = 0; rd->io_us_flight = malloc(td->o.iodepth * sizeof(struct io_u *)); memset(rd->io_us_flight, 0, td->o.iodepth * sizeof(struct io_u *)); rd->io_u_flight_nr = 0; rd->io_us_completed = malloc(td->o.iodepth * sizeof(struct io_u *)); memset(rd->io_us_completed, 0, td->o.iodepth * sizeof(struct io_u *)); rd->io_u_completed_nr = 0; if (td_read(td)) { /* READ as the server */ rd->is_client = 0; td->flags |= TD_F_NO_PROGRESS; /* server rd->rdma_buf_len will be setup after got request */ ret = fio_rdmaio_setup_listen(td, o->port); } else { /* WRITE as the client */ rd->is_client = 1; ret = fio_rdmaio_setup_connect(td, td->o.filename, o->port); } max_bs = max(td->o.max_bs[DDIR_READ], td->o.max_bs[DDIR_WRITE]); rd->send_buf.max_bs = htonl(max_bs); /* register each io_u in the free list */ for (i = 0; i < td->io_u_freelist.nr; i++) { struct io_u *io_u = td->io_u_freelist.io_us[i]; io_u->engine_data = malloc(sizeof(struct rdma_io_u_data)); memset(io_u->engine_data, 0, sizeof(struct rdma_io_u_data)); ((struct rdma_io_u_data *)io_u->engine_data)->wr_id = i; io_u->mr = ibv_reg_mr(rd->pd, io_u->buf, max_bs, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE); if (io_u->mr == NULL) { log_err("fio: ibv_reg_mr io_u failed\n"); return 1; } rd->send_buf.rmt_us[i].buf = htonll((uint64_t) (unsigned long)io_u->buf); rd->send_buf.rmt_us[i].rkey = htonl(io_u->mr->rkey); rd->send_buf.rmt_us[i].size = htonl(max_bs); #if 0 log_info("fio: Send rkey %x addr %" PRIx64 " len %d to client\n", io_u->mr->rkey, io_u->buf, max_bs); */ #endif } rd->send_buf.nr = htonl(i); return ret; }
static int run(int argc, char **argv) { struct addrinfo *addr; //struct rdma_cm_event *event = NULL; struct rdma_cm_id *cmid= NULL; struct rdma_event_channel *ec = NULL; struct rdma_conn_param cm_params; if (argc != 4) usage(argv[0]); if (strcmp(argv[1], "write") == 0) set_mode(M_WRITE); else if (strcmp(argv[1], "read") == 0) set_mode(M_READ); else usage(argv[0]); TEST_NZ(getaddrinfo(argv[2], argv[3], NULL, &addr)); TEST_Z(ec = rdma_create_event_channel()); /*create rdma socket*/ TEST_NZ(rdma_create_id(ec, &cmid, NULL, RDMA_PS_TCP)); /* int rdma_resolve_addr (struct rdma_cm_id *id, struct sockaddr *src_addr, struct sockaddr dst_addr, int timeout_ms) id RDMA identifier src_addr Source address information. This parameter may be NULL. dst_addr Destination address information timeout_ms Time to wait for resolution to complete Description: Resolve destination and optional source addresses from IP addresses to an RDMA address. If suc- cessful, the specified rdma_cm_id will be bound to a local device. */ TEST_NZ(rdma_resolve_addr(cmid, NULL, addr->ai_addr, TIMEOUT_IN_MS)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ADDR_RESOLVED)); freeaddrinfo(addr); build_connection(cmid); sprintf(get_local_message_region(cmid->context), "message from active/client side with pid %d", getpid()); /*--------------------*/ /* int rdma_resolve_route (struct rdma_cm_id *id, int timeout_ms); id RDMA identifier timeout_ms Time to wait for resolution to complete Description: Resolves an RDMA route to the destination address in order to establish a connection. The destination address must have already been resolved by calling rdma_resolve_addr. */ TEST_NZ(rdma_resolve_route(cmid, TIMEOUT_IN_MS)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ROUTE_RESOLVED)); /* -------------------- */ print_path_rec(cmid); /* int rdma_connect (struct rdma_cm_id *id, struct rdma_conn_param *conn_param); id RDMA identifier conn_param connection parameters Description: For an rdma_cm_id of type RDMA_PS_TCP, this call initiates a connection request to a remote destination. For an rdma_cm_id of type RDMA_PS_UDP, it initiates a lookup of the remote QP providing the datagram service */ build_params(&cm_params); printf("Connecting ...\n"); TEST_NZ(rdma_connect(cmid, &cm_params)); TEST_NZ(wait_for_event(ec, RDMA_CM_EVENT_ESTABLISHED)); printf("Connected !\n"); /* --------------------- */ /*TODO: do something */ on_connect(cmid->context); send_mr(cmid->context); /*--------------------*/ rdma_disconnect(cmid); rdma_destroy_id(cmid); rdma_destroy_event_channel(ec); return 0; /*=================*/ /*=================*/ /* while (rdma_get_cm_event(ec, &event) == 0) { memcpy(&event_copy, event, sizeof(*event)); rdma_ack_cm_event(event); if (on_event(&event_copy)) break; } */ }
static int fi_ibv_mr_reg(struct fid *fid, const void *buf, size_t len, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context) { struct fi_ibv_mem_desc *md; int fi_ibv_access = 0; struct fid_domain *domain; if (flags) return -FI_EBADFLAGS; if (fid->fclass != FI_CLASS_DOMAIN) { return -FI_EINVAL; } domain = container_of(fid, struct fid_domain, fid); md = calloc(1, sizeof *md); if (!md) return -FI_ENOMEM; md->domain = container_of(domain, struct fi_ibv_domain, domain_fid); md->mr_fid.fid.fclass = FI_CLASS_MR; md->mr_fid.fid.context = context; md->mr_fid.fid.ops = &fi_ibv_mr_ops; /* Enable local write access by default for FI_EP_RDM which hides local * registration requirements. This allows to avoid buffering or double * registration */ if (!(md->domain->info->caps & FI_LOCAL_MR)) fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE; /* Local read access to an MR is enabled by default in verbs */ if (access & FI_RECV) fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE; /* iWARP spec requires Remote Write access for an MR that is used * as a data sink for a Remote Read */ if (access & FI_READ) { fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE; if (md->domain->verbs->device->transport_type == IBV_TRANSPORT_IWARP) fi_ibv_access |= IBV_ACCESS_REMOTE_WRITE; } if (access & FI_WRITE) fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE; if (access & FI_REMOTE_READ) fi_ibv_access |= IBV_ACCESS_REMOTE_READ; /* Verbs requires Local Write access too for Remote Write access */ if (access & FI_REMOTE_WRITE) fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC; md->mr = ibv_reg_mr(md->domain->pd, (void *) buf, len, fi_ibv_access); if (!md->mr) goto err; md->mr_fid.mem_desc = (void *) (uintptr_t) md->mr->lkey; md->mr_fid.key = md->mr->rkey; *mr = &md->mr_fid; if(md->domain->eq && (md->domain->eq_flags & FI_REG_MR)) { struct fi_eq_entry entry = { .fid = &md->mr_fid.fid, .context = context }; fi_ibv_eq_write_event(md->domain->eq, FI_MR_COMPLETE, &entry, sizeof(entry)); } return 0; err: free(md); return -errno; } static int fi_ibv_mr_regv(struct fid *fid, const struct iovec * iov, size_t count, uint64_t access, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context) { if (count > VERBS_MR_IOV_LIMIT) { VERBS_WARN(FI_LOG_FABRIC, "iov count > %d not supported\n", VERBS_MR_IOV_LIMIT); return -FI_EINVAL; } return fi_ibv_mr_reg(fid, iov->iov_base, iov->iov_len, access, offset, requested_key, flags, mr, context); } static int fi_ibv_mr_regattr(struct fid *fid, const struct fi_mr_attr *attr, uint64_t flags, struct fid_mr **mr) { return fi_ibv_mr_regv(fid, attr->mr_iov, attr->iov_count, attr->access, 0, attr->requested_key, flags, mr, attr->context); } static int fi_ibv_domain_bind(struct fid *fid, struct fid *bfid, uint64_t flags) { struct fi_ibv_domain *domain; struct fi_ibv_eq *eq; domain = container_of(fid, struct fi_ibv_domain, domain_fid.fid); switch (bfid->fclass) { case FI_CLASS_EQ: eq = container_of(bfid, struct fi_ibv_eq, eq_fid); domain->eq = eq; domain->eq_flags = flags; break; default: return -EINVAL; } return 0; } static int fi_ibv_domain_close(fid_t fid) { struct fi_ibv_domain *domain; int ret; domain = container_of(fid, struct fi_ibv_domain, domain_fid.fid); if (domain->rdm) { rdma_destroy_ep(domain->rdm_cm->listener); free(domain->rdm_cm); } if (domain->pd) { ret = ibv_dealloc_pd(domain->pd); if (ret) return -ret; domain->pd = NULL; } fi_freeinfo(domain->info); free(domain); return 0; } static int fi_ibv_open_device_by_name(struct fi_ibv_domain *domain, const char *name) { struct ibv_context **dev_list; int i, ret = -FI_ENODEV; if (!name) return -FI_EINVAL; dev_list = rdma_get_devices(NULL); if (!dev_list) return -errno; for (i = 0; dev_list[i] && ret; i++) { if (domain->rdm) { ret = strncmp(name, ibv_get_device_name(dev_list[i]->device), strlen(name) - strlen(verbs_rdm_domain.suffix)); } else { ret = strcmp(name, ibv_get_device_name(dev_list[i]->device)); } if (!ret) domain->verbs = dev_list[i]; } rdma_free_devices(dev_list); return ret; } static struct fi_ops fi_ibv_fid_ops = { .size = sizeof(struct fi_ops), .close = fi_ibv_domain_close, .bind = fi_ibv_domain_bind, .control = fi_no_control, .ops_open = fi_no_ops_open, }; static struct fi_ops_mr fi_ibv_domain_mr_ops = { .size = sizeof(struct fi_ops_mr), .reg = fi_ibv_mr_reg, .regv = fi_ibv_mr_regv, .regattr = fi_ibv_mr_regattr, }; static struct fi_ops_domain fi_ibv_domain_ops = { .size = sizeof(struct fi_ops_domain), .av_open = fi_no_av_open, .cq_open = fi_ibv_cq_open, .endpoint = fi_ibv_open_ep, .scalable_ep = fi_no_scalable_ep, .cntr_open = fi_no_cntr_open, .poll_open = fi_no_poll_open, .stx_ctx = fi_no_stx_context, .srx_ctx = fi_ibv_srq_context, }; static struct fi_ops_domain fi_ibv_rdm_domain_ops = { .size = sizeof(struct fi_ops_domain), .av_open = fi_ibv_rdm_av_open, .cq_open = fi_ibv_rdm_cq_open, .endpoint = fi_ibv_rdm_open_ep, .scalable_ep = fi_no_scalable_ep, .cntr_open = fi_rbv_rdm_cntr_open, .poll_open = fi_no_poll_open, .stx_ctx = fi_no_stx_context, .srx_ctx = fi_no_srx_context, }; static int fi_ibv_domain(struct fid_fabric *fabric, struct fi_info *info, struct fid_domain **domain, void *context) { struct fi_ibv_domain *_domain; struct fi_ibv_fabric *fab; struct fi_info *fi; int ret; fi = fi_ibv_get_verbs_info(info->domain_attr->name); if (!fi) return -FI_EINVAL; fab = container_of(fabric, struct fi_ibv_fabric, util_fabric.fabric_fid); ret = ofi_check_domain_attr(&fi_ibv_prov, fabric->api_version, fi->domain_attr, info->domain_attr); if (ret) return ret; _domain = calloc(1, sizeof *_domain); if (!_domain) return -FI_ENOMEM; _domain->info = fi_dupinfo(info); if (!_domain->info) goto err1; _domain->rdm = FI_IBV_EP_TYPE_IS_RDM(info); if (_domain->rdm) { _domain->rdm_cm = calloc(1, sizeof(*_domain->rdm_cm)); if (!_domain->rdm_cm) { ret = -FI_ENOMEM; goto err2; } } ret = fi_ibv_open_device_by_name(_domain, info->domain_attr->name); if (ret) goto err2; _domain->pd = ibv_alloc_pd(_domain->verbs); if (!_domain->pd) { ret = -errno; goto err2; } _domain->domain_fid.fid.fclass = FI_CLASS_DOMAIN; _domain->domain_fid.fid.context = context; _domain->domain_fid.fid.ops = &fi_ibv_fid_ops; _domain->domain_fid.mr = &fi_ibv_domain_mr_ops; if (_domain->rdm) { _domain->domain_fid.ops = &fi_ibv_rdm_domain_ops; _domain->rdm_cm->ec = rdma_create_event_channel(); if (!_domain->rdm_cm->ec) { VERBS_INFO(FI_LOG_EP_CTRL, "Failed to create listener event channel: %s\n", strerror(errno)); ret = -FI_EOTHER; goto err2; } if (fi_fd_nonblock(_domain->rdm_cm->ec->fd) != 0) { VERBS_INFO_ERRNO(FI_LOG_EP_CTRL, "fcntl", errno); ret = -FI_EOTHER; goto err3; } if (rdma_create_id(_domain->rdm_cm->ec, &_domain->rdm_cm->listener, NULL, RDMA_PS_TCP)) { VERBS_INFO(FI_LOG_EP_CTRL, "Failed to create cm listener: %s\n", strerror(errno)); ret = -FI_EOTHER; goto err3; } _domain->rdm_cm->is_bound = 0; } else { _domain->domain_fid.ops = &fi_ibv_domain_ops; } _domain->fab = fab; *domain = &_domain->domain_fid; return 0; err3: if (_domain->rdm) rdma_destroy_event_channel(_domain->rdm_cm->ec); err2: if (_domain->rdm) free(_domain->rdm_cm); fi_freeinfo(_domain->info); err1: free(_domain); return ret; } static int fi_ibv_trywait(struct fid_fabric *fabric, struct fid **fids, int count) { struct fi_ibv_cq *cq; int ret, i; for (i = 0; i < count; i++) { switch (fids[i]->fclass) { case FI_CLASS_CQ: cq = container_of(fids[i], struct fi_ibv_cq, cq_fid.fid); ret = cq->trywait(fids[i]); if (ret) return ret; break; case FI_CLASS_EQ: /* We are always ready to wait on an EQ since * rdmacm EQ is based on an fd */ continue; case FI_CLASS_CNTR: case FI_CLASS_WAIT: return -FI_ENOSYS; default: return -FI_EINVAL; } } return FI_SUCCESS; } static int fi_ibv_fabric_close(fid_t fid) { struct fi_ibv_fabric *fab; int ret; fab = container_of(fid, struct fi_ibv_fabric, util_fabric.fabric_fid.fid); ret = ofi_fabric_close(&fab->util_fabric); if (ret) return ret; free(fab); return 0; } static struct fi_ops fi_ibv_fi_ops = { .size = sizeof(struct fi_ops), .close = fi_ibv_fabric_close, .bind = fi_no_bind, .control = fi_no_control, .ops_open = fi_no_ops_open, }; static struct fi_ops_fabric fi_ibv_ops_fabric = { .size = sizeof(struct fi_ops_fabric), .domain = fi_ibv_domain, .passive_ep = fi_ibv_passive_ep, .eq_open = fi_ibv_eq_open, .wait_open = fi_no_wait_open, .trywait = fi_ibv_trywait }; int fi_ibv_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric, void *context) { struct fi_ibv_fabric *fab; struct fi_info *info; int ret; ret = fi_ibv_init_info(); if (ret) return ret; fab = calloc(1, sizeof(*fab)); if (!fab) return -FI_ENOMEM; for (info = verbs_info; info; info = info->next) { ret = ofi_fabric_init(&fi_ibv_prov, info->fabric_attr, attr, &fab->util_fabric, context); if (ret != -FI_ENODATA) break; } if (ret) { free(fab); return ret; } *fabric = &fab->util_fabric.fabric_fid; (*fabric)->fid.ops = &fi_ibv_fi_ops; (*fabric)->ops = &fi_ibv_ops_fabric; return 0; }
void bind( boost::asio::ip::tcp::endpoint const & ep , boost::system::error_code &ec) { if(event_channel_) { HPX_IBVERBS_THROWS_IF(ec, boost::asio::error::already_connected); } else { event_channel_ = rdma_create_event_channel(); if(!event_channel_) { int verrno = errno; close(ec); boost::system::error_code err(verrno, boost::system::system_category()); HPX_IBVERBS_THROWS_IF( ec , err ); return; } set_nonblocking(event_channel_->fd, ec); if(ec) { close(ec); return; } int ret = 0; ret = rdma_create_id(event_channel_, &listener_, NULL, RDMA_PS_TCP); if(ret) { int verrno = errno; close(ec); boost::system::error_code err(verrno, boost::system::system_category()); HPX_IBVERBS_THROWS_IF( ec , err ); return; } std::string host = ep.address().to_string(); std::string port = boost::lexical_cast<std::string>(ep.port()); addrinfo *addr; getaddrinfo(host.c_str(), port.c_str(), NULL, &addr); ret = rdma_bind_addr(listener_, addr->ai_addr); freeaddrinfo(addr); if(ret) { int verrno = errno; close(ec); boost::system::error_code err(verrno, boost::system::system_category()); HPX_IBVERBS_THROWS_IF( ec , err ); return; } ret = rdma_listen(listener_, 10); /* backlog = 10 is arbitrary */ if(ret) { int verrno = errno; close(ec); boost::system::error_code err(verrno, boost::system::system_category()); HPX_IBVERBS_THROWS_IF( ec , err ); return; } HPX_IBVERBS_RESET_EC(ec); } }
int main(int argc, char **argv) { int op, ret; while ((op = getopt(argc, argv, "s:b:c:C:S:t:p:mT")) != -1) { switch (op) { case 's': dst_addr = optarg; break; case 'b': src_addr = optarg; break; case 'c': connections = atoi(optarg); break; case 'C': message_count = atoi(optarg); break; case 'S': message_size = atoi(optarg); break; case 't': set_tos = 1; tos = (uint8_t) strtoul(optarg, NULL, 0); break; case 'p': port = optarg; break; case 'm': migrate = 1; break; case 'T': set_ts = 1; break; default: printf("usage: %s\n", argv[0]); printf("\t[-s server_address]\n"); printf("\t[-b bind_address]\n"); printf("\t[-c connections]\n"); printf("\t[-C message_count]\n"); printf("\t[-S message_size]\n"); printf("\t[-t type_of_service]\n"); printf("\t[-p port_number]\n"); printf("\t[-m(igrate)]\n"); printf("\t[-T(imestamping)]\n"); exit(1); } } test.connects_left = connections; test.channel = rdma_create_event_channel(); if (!test.channel) { printf("failed to create event channel\n"); exit(1); } if (alloc_nodes()) exit(1); if (dst_addr) ret = run_client(); else ret = run_server(); printf("test complete\n"); destroy_nodes(); rdma_destroy_event_channel(test.channel); if (test.rai) rdma_freeaddrinfo(test.rai); printf("return status %d\n", ret); return ret; }