/* * rpmem_cmd_push -- push back command's argument */ int rpmem_cmd_push(struct rpmem_cmd *cmd, const char *arg) { size_t argv_count = (size_t)cmd->args.argc + 2; char **argv = realloc(cmd->args.argv, argv_count * sizeof(char *)); if (!argv) { RPMEM_LOG(ERR, "reallocating command argv"); goto err_realloc; } cmd->args.argv = argv; char *arg_dup = strdup(arg); if (!arg_dup) { RPMEM_LOG(ERR, "allocating argument"); goto err_strdup; } cmd->args.argv[cmd->args.argc] = arg_dup; cmd->args.argc++; cmd->args.argv[cmd->args.argc] = NULL; return 0; err_strdup: err_realloc: return -1; }
/* * rpmem_obj_check_hdr_resp -- (internal) check response message header */ static int rpmem_obc_check_hdr_resp(struct rpmem_msg_hdr_resp *resp, enum rpmem_msg_type type, size_t size) { if (resp->type != type) { RPMEM_LOG(ERR, "invalid message type -- %u", resp->type); errno = EPROTO; return -1; } if (resp->size != size) { RPMEM_LOG(ERR, "invalid message size -- %lu", resp->size); errno = EPROTO; return -1; } if (resp->status >= MAX_RPMEM_ERR) { RPMEM_LOG(ERR, "invalid status -- %u", resp->status); errno = EPROTO; return -1; } if (resp->status) { enum rpmem_err status = (enum rpmem_err)resp->status; RPMEM_LOG(ERR, "request failed: %s", rpmem_util_proto_errstr(status)); errno = rpmem_util_proto_errno(status); return -1; } return 0; }
/* * rpmem_cmd_log -- print executing command */ static void rpmem_cmd_log(struct rpmem_cmd *cmd) { RPMEM_ASSERT(cmd->args.argc > 0); size_t size = 0; for (int i = 0; i < cmd->args.argc; i++) { size += strlen(cmd->args.argv[i]) + 1; } char *buff = malloc(size); if (!buff) { RPMEM_LOG(ERR, "allocating log buffer for command"); return; } size_t pos = 0; for (int i = 0; pos < size && i < cmd->args.argc; i++) { int ret = snprintf(&buff[pos], size - pos, "%s%s", cmd->args.argv[i], i == cmd->args.argc - 1 ? "" : " "); if (ret < 0) { RPMEM_LOG(ERR, "printing command's argument failed"); goto out; } pos += (size_t)ret; } RPMEM_LOG(INFO, "executing command '%s'", buff); out: free(buff); }
/* * rpmem_obc_create -- perform create request operation * * Returns error if connection has not been established yet. */ int rpmem_obc_create(struct rpmem_obc *rpc, const struct rpmem_req_attr *req, struct rpmem_resp_attr *res, const struct rpmem_pool_attr *pool_attr) { if (!rpmem_obc_is_connected(rpc)) { ERR("out-of-band connection not established"); errno = ENOTCONN; goto err_notconnected; } if (rpmem_obc_check_req(req)) goto err_req; size_t msg_size; struct rpmem_msg_create *msg = rpmem_obc_alloc_create_msg(req, pool_attr, &msg_size); if (!msg) goto err_alloc_msg; RPMEM_LOG(INFO, "sending create request message"); rpmem_hton_msg_create(msg); if (rpmem_ssh_send(rpc->ssh, msg, msg_size)) { ERR("!sending create request message failed"); goto err_msg_send; } RPMEM_LOG(NOTICE, "create request message sent"); RPMEM_LOG(INFO, "receiving create request response"); struct rpmem_msg_create_resp resp; if (rpmem_ssh_recv(rpc->ssh, &resp, sizeof(resp))) { ERR("!receiving create request response failed"); goto err_msg_recv; } RPMEM_LOG(NOTICE, "create request response received"); rpmem_ntoh_msg_create_resp(&resp); if (rpmem_obc_check_create_resp(&resp)) goto err_msg_resp; rpmem_obc_get_res(res, &resp.ibc); free(msg); return 0; err_msg_resp: err_msg_recv: err_msg_send: free(msg); err_alloc_msg: err_req: err_notconnected: return -1; }
/* * rpmem_obc_set_attr -- perform set attributes request operation * * Returns error if connection is not already established. */ int rpmem_obc_set_attr(struct rpmem_obc *rpc, const struct rpmem_pool_attr *pool_attr) { if (!rpmem_obc_is_connected(rpc)) { ERR("out-of-band connection not established"); errno = ENOTCONN; goto err_notconnected; } struct rpmem_msg_set_attr msg; rpmem_obc_set_msg_hdr(&msg.hdr, RPMEM_MSG_TYPE_SET_ATTR, sizeof(msg)); if (pool_attr) { memcpy(&msg.pool_attr, pool_attr, sizeof(msg.pool_attr)); } else { RPMEM_LOG(INFO, "using zeroed pool attributes"); memset(&msg.pool_attr, 0, sizeof(msg.pool_attr)); } RPMEM_LOG(INFO, "sending set attributes request message"); rpmem_hton_msg_set_attr(&msg); if (rpmem_ssh_send(rpc->ssh, &msg, sizeof(msg))) { ERR("!sending set attributes request message failed"); goto err_msg_send; } RPMEM_LOG(NOTICE, "set attributes request message sent"); RPMEM_LOG(INFO, "receiving set attributes request response"); struct rpmem_msg_set_attr_resp resp; if (rpmem_ssh_recv(rpc->ssh, &resp, sizeof(resp))) { ERR("!receiving set attributes request response failed"); goto err_msg_recv; } RPMEM_LOG(NOTICE, "set attributes request response received"); rpmem_ntoh_msg_set_attr_resp(&resp); if (rpmem_obc_check_set_attr_resp(&resp)) goto err_msg_resp; return 0; err_msg_resp: err_msg_recv: err_msg_send: err_notconnected: return -1; }
/* * rpmem_fip_persist_gpspm -- (internal) perform persist operation for GPSPM */ static int rpmem_fip_persist_gpspm(struct rpmem_fip *fip, size_t offset, size_t len, unsigned lane) { int ret; struct rpmem_fip_plane_gpspm *lanep = &fip->lanes.gpspm[lane]; ret = rpmem_fip_lane_wait(&lanep->lane, FI_SEND); if (unlikely(ret)) { RPMEM_LOG(ERR, "waiting for SEND buffer"); return ret; } RPMEM_ASSERT(!rpmem_fip_lane_busy(&lanep->lane)); rpmem_fip_lane_begin(&lanep->lane, FI_SEND | FI_RECV); void *laddr = (void *)((uintptr_t)fip->laddr + offset); uint64_t raddr = fip->raddr + offset; struct rpmem_msg_persist *msg; struct rpmem_fip_plane_gpspm *gpspm = (void *)lanep; /* WRITE for requested memory region */ ret = rpmem_fip_writemsg(fip->ep, &gpspm->write, laddr, len, raddr); if (unlikely(ret)) { RPMEM_FI_ERR((int)ret, "RMA write"); return ret; } /* SEND persist message */ msg = rpmem_fip_msg_get_pmsg(&gpspm->send); msg->lane = lane; msg->addr = raddr; msg->size = len; ret = rpmem_fip_sendmsg(fip->ep, &gpspm->send); if (unlikely(ret)) { RPMEM_FI_ERR(ret, "MSG send"); return ret; } /* wait for persist operation completion */ ret = rpmem_fip_lane_wait(&lanep->lane, FI_RECV); if (unlikely(ret)) { RPMEM_LOG(ERR, "persist operation failed"); return ret; } return ret; }
/* * rpmem_cmd_wait -- wait for process to change state */ int rpmem_cmd_wait(struct rpmem_cmd *cmd, int *status) { if (cmd->pid <= 0) { RPMEM_LOG(ERR, "wrong PID: %i", cmd->pid); errno = EINVAL; return -1; } if (waitpid(cmd->pid, status, 0) != cmd->pid) { RPMEM_LOG(ERR, "!waitpid failed"); return -1; } return 0; }
/* * rpmem_obc_alloc_open_msg -- (internal) allocate and fill open request message */ static struct rpmem_msg_open * rpmem_obc_alloc_open_msg(const struct rpmem_req_attr *req, const struct rpmem_pool_attr *pool_attr, size_t *msg_sizep) { size_t pool_desc_size = strlen(req->pool_desc) + 1; size_t msg_size = sizeof(struct rpmem_msg_open) + pool_desc_size; struct rpmem_msg_open *msg = malloc(msg_size); if (!msg) { RPMEM_LOG(ERR, "!cannot allocate open request message"); return NULL; } rpmem_obc_set_msg_hdr(&msg->hdr, RPMEM_MSG_TYPE_OPEN, msg_size); msg->major = RPMEM_PROTO_MAJOR; msg->minor = RPMEM_PROTO_MINOR; msg->pool_size = req->pool_size; msg->nlanes = req->nlanes; msg->provider = req->provider; rpmem_obc_set_pool_desc(&msg->pool_desc, req->pool_desc, pool_desc_size); *msg_sizep = msg_size; return msg; }
/* * rpmem_obc_alloc_create_msg -- (internal) allocate and fill create request * message */ static struct rpmem_msg_create * rpmem_obc_alloc_create_msg(const struct rpmem_req_attr *req, const struct rpmem_pool_attr *pool_attr, size_t *msg_sizep) { size_t pool_desc_size = strlen(req->pool_desc) + 1; size_t msg_size = sizeof(struct rpmem_msg_create) + pool_desc_size; struct rpmem_msg_create *msg = malloc(msg_size); if (!msg) { ERR("!cannot allocate create request message"); return NULL; } rpmem_obc_set_msg_hdr(&msg->hdr, RPMEM_MSG_TYPE_CREATE, msg_size); msg->major = RPMEM_PROTO_MAJOR; msg->minor = RPMEM_PROTO_MINOR; msg->pool_size = req->pool_size; msg->nlanes = req->nlanes; msg->provider = req->provider; rpmem_obc_set_pool_desc(&msg->pool_desc, req->pool_desc, pool_desc_size); if (pool_attr) { pack_rpmem_pool_attr(pool_attr, &msg->pool_attr); } else { RPMEM_LOG(INFO, "using zeroed pool attributes"); memset(&msg->pool_attr, 0, sizeof(msg->pool_attr)); } *msg_sizep = msg_size; return msg; }
/* * rpmem_ssh_monitor -- check connection state of ssh * * Return value: * 0 - disconnected * 1 - connected * <0 - error */ int rpmem_ssh_monitor(struct rpmem_ssh *rps, int nonblock) { uint32_t buff; int flags = MSG_PEEK; if (nonblock) flags |= MSG_DONTWAIT; int ret = rpmem_xread(rps->cmd->fd_out, &buff, sizeof(buff), flags); if (!ret) { RPMEM_LOG(ERR, "unexpected data received"); errno = EPROTO; return -1; } if (ret < 0) { if (errno == EAGAIN || errno == EWOULDBLOCK) return 1; else return ret; } return 0; }
/* * rpmem_fip_getinfo -- (internal) get fabric interface information */ static int rpmem_fip_getinfo(struct rpmem_fip *fip, const char *node, const char *service, enum rpmem_provider provider) { int ret = 0; struct fi_info *hints = rpmem_fip_get_hints(provider); if (!hints) { RPMEM_LOG(ERR, "!getting fabric interface information hints"); goto err_hints; } ret = fi_getinfo(RPMEM_FIVERSION, node, service, 0, hints, &fip->fi); if (ret) { RPMEM_FI_ERR(ret, "getting fabric interface information"); goto err_fi_getinfo; } rpmem_fip_print_info(fip->fi); /* fallback to free the hints */ err_fi_getinfo: fi_freeinfo(hints); err_hints: return ret; }
/* * rpmem_obc_check_ibc_attr -- (internal) check in-band connection * attributes */ static int rpmem_obc_check_ibc_attr(struct rpmem_msg_ibc_attr *ibc) { if (ibc->port == 0 || ibc->port > UINT16_MAX) { RPMEM_LOG(ERR, "invalid port number -- %u", ibc->port); errno = EPROTO; return -1; } if (ibc->persist_method != RPMEM_PM_GPSPM && ibc->persist_method != RPMEM_PM_APM) { RPMEM_LOG(ERR, "invalid persistency method -- %u", ibc->persist_method); errno = EPROTO; return -1; } return 0; }
/* * rpmem_obc_init -- initialize rpmem obc handle */ struct rpmem_obc * rpmem_obc_init(void) { struct rpmem_obc *rpc = calloc(1, sizeof(*rpc)); if (!rpc) { RPMEM_LOG(ERR, "!allocation of rpmem obc failed"); return NULL; } return rpc; }
/* * rpmem_fip_init_memory -- (internal) initialize common memory resources */ static int rpmem_fip_init_memory(struct rpmem_fip *fip) { ASSERTne(Pagesize, 0); int ret; /* * Register local memory space. The local memory will be used * with WRITE operation in rpmem_fip_persist function thus * the FI_WRITE access flag. */ ret = fi_mr_reg(fip->domain, fip->laddr, fip->size, FI_WRITE, 0, 0, 0, &fip->mr, NULL); if (ret) { RPMEM_FI_ERR(ret, "registrating memory"); return ret; } /* get local memory descriptor */ fip->mr_desc = fi_mr_desc(fip->mr); /* allocate buffer for read operation */ ASSERT(IS_PAGE_ALIGNED(RPMEM_RD_BUFF_SIZE)); errno = posix_memalign((void **)&fip->rd_buff, Pagesize, RPMEM_RD_BUFF_SIZE); if (errno) { RPMEM_LOG(ERR, "!allocating read buffer"); ret = -1; goto err_malloc_rd_buff; } /* * Register buffer for read operation. * The read operation utilizes READ operation thus * the FI_REMOTE_WRITE flag. */ ret = fi_mr_reg(fip->domain, fip->rd_buff, RPMEM_RD_BUFF_SIZE, FI_REMOTE_WRITE, 0, 0, 0, &fip->rd_mr, NULL); if (ret) { RPMEM_FI_ERR(ret, "registrating read buffer"); goto err_rd_mr; } /* get read buffer local memory descriptor */ fip->rd_mr_desc = fi_mr_desc(fip->rd_mr); return 0; err_rd_mr: free(fip->rd_buff); err_malloc_rd_buff: RPMEM_FI_CLOSE(fip->mr, "unregistering memory"); return ret; }
/* * rpmem_obc_check_req -- (internal) check request attributes */ static int rpmem_obc_check_req(const struct rpmem_req_attr *req) { if (req->provider >= MAX_RPMEM_PROV) { RPMEM_LOG(ERR, "invalid provider"); errno = EINVAL; return -1; } return 0; }
/* * rpmem_fip_process_stop -- stop process thread */ int rpmem_fip_process_stop(struct rpmem_fip *fip) { int ret; fip->closing = 1; void *tret; ret = pthread_join(fip->process_thread, &tret); if (ret) { RPMEM_LOG(ERR, "joining process thread -- %d", ret); return ret; } else { ret = (int)(uintptr_t)tret; if (ret) { RPMEM_LOG(ERR, "process thread failed -- %d", ret); } } return ret; }
/* * rpmem_cmd_term -- terminate process by sending SIGINT signal */ int rpmem_cmd_term(struct rpmem_cmd *cmd) { os_close(cmd->fd_in); os_close(cmd->fd_out); os_close(cmd->fd_err); RPMEM_ASSERT(cmd->pid > 0); int rv = kill(cmd->pid, SIGINT); if (rv) RPMEM_LOG(ERR, "!kill failed"); return rv; }
/* * prmem_fip_process -- run process thread */ int rpmem_fip_process_start(struct rpmem_fip *fip) { int ret; ret = pthread_create(&fip->process_thread, NULL, rpmem_fip_process_thread, fip); if (ret) { RPMEM_LOG(ERR, "creating process thread -- %d", ret); } return ret; }
/* * rpmem_cmd_init -- initialize command */ struct rpmem_cmd * rpmem_cmd_init(void) { struct rpmem_cmd *cmd = calloc(1, sizeof(*cmd)); if (!cmd) { RPMEM_LOG(ERR, "allocating command buffer"); goto err_alloc_cmd; } return cmd; err_alloc_cmd: return NULL; }
/* * rpmem_fip_persist -- perform remote persist operation */ int rpmem_fip_persist(struct rpmem_fip *fip, size_t offset, size_t len, unsigned lane) { RPMEM_ASSERT(lane < fip->nlanes); if (unlikely(lane >= fip->nlanes)) { errno = EINVAL; return -1; } int ret = fip->ops->persist(fip, offset, len, lane); if (ret) { RPMEM_LOG(ERR, "persist operation failed"); } return ret; }
/* * rpmem_fip_init -- initialize fabric provider */ struct rpmem_fip * rpmem_fip_init(const char *node, const char *service, struct rpmem_fip_attr *attr, unsigned *nlanes) { int ret; struct rpmem_fip *fip = calloc(1, sizeof(*fip)); if (!fip) { RPMEM_LOG(ERR, "!allocating fabric handle"); return NULL; } ret = rpmem_fip_getinfo(fip, node, service, attr->provider); if (ret) goto err_getinfo; rpmem_fip_set_attr(fip, attr); *nlanes = fip->nlanes; ret = rpmem_fip_init_fabric_res(fip); if (ret) goto err_init_fabric_res; ret = rpmem_fip_init_memory(fip); if (ret) goto err_init_memory; ret = rpmem_fip_init_lanes(fip); if (ret) goto err_init_lanes; return fip; err_init_lanes: rpmem_fip_fini_memory(fip); err_init_memory: rpmem_fip_fini_fabric_res(fip); err_init_fabric_res: fi_freeinfo(fip->fi); err_getinfo: free(fip); return NULL; }
/* * rpmem_fip_persist_apm -- (internal) perform persist operation for APM */ static int rpmem_fip_persist_apm(struct rpmem_fip *fip, size_t offset, size_t len, unsigned lane) { struct rpmem_fip_plane_apm *lanep = &fip->lanes.apm[lane]; RPMEM_ASSERT(!rpmem_fip_lane_busy(&lanep->lane)); rpmem_fip_lane_begin(&lanep->lane, FI_READ); int ret; void *laddr = (void *)((uintptr_t)fip->laddr + offset); uint64_t raddr = fip->raddr + offset; /* WRITE for requested memory region */ ret = rpmem_fip_writemsg(fip->ep, &lanep->write, laddr, len, raddr); if (unlikely(ret)) { RPMEM_FI_ERR(ret, "RMA write"); return ret; } /* READ to read-after-write buffer */ ret = rpmem_fip_readmsg(fip->ep, &lanep->read, &fip->raw_buff, sizeof(fip->raw_buff), raddr); if (unlikely(ret)) { RPMEM_FI_ERR(ret, "RMA read"); return ret; } /* wait for READ completion */ ret = rpmem_fip_lane_wait(&lanep->lane, FI_READ); if (unlikely(ret)) { RPMEM_LOG(ERR, "waiting for READ completion failed"); return ret; } return ret; }
/* * rpmem_fip_process_gpspm -- (internal) process completion queue entry for * GPSPM */ static int rpmem_fip_process_gpspm(struct rpmem_fip *fip, void *context, uint64_t flags) { if (flags & FI_RECV) { /* RECV completion */ struct rpmem_fip_msg *resp = context; struct rpmem_msg_persist_resp *msg_resp = rpmem_fip_msg_get_pres(resp); VALGRIND_DO_MAKE_MEM_DEFINED(msg_resp, sizeof(*msg_resp)); if (unlikely(msg_resp->lane >= fip->nlanes)) { RPMEM_LOG(ERR, "lane number received (%lu) is greater " "than maximum lane number (%u)", msg_resp->lane, fip->nlanes - 1); return -1; } struct rpmem_fip_lane *lanep = &fip->lanes.gpspm[msg_resp->lane].lane; /* post RECV buffer immediately */ int ret = rpmem_fip_gpspm_post_resp(fip, resp); if (unlikely(ret)) RPMEM_FI_ERR((int)ret, "MSG send"); rpmem_fip_lane_sigret(lanep, flags, ret); return ret; } struct rpmem_fip_lane *lanep = context; /* SEND completion */ rpmem_fip_lane_signal(lanep, flags); return 0; }
/* * rpmem_ssh_open -- open ssh connection with specified node and wait for status */ struct rpmem_ssh * rpmem_ssh_open(const struct rpmem_target_info *info) { struct rpmem_ssh *ssh = rpmem_ssh_exec(info, NULL); if (!ssh) return NULL; /* * Read initial status from invoked command. * This is for synchronization purposes and to make it possible * to inform client that command's initialization failed. */ int32_t status; int ret = rpmem_ssh_recv(ssh, &status, sizeof(status)); if (ret) { if (ret == 1 || errno == ECONNRESET) ERR("%s", rpmem_ssh_strerror(ssh, errno)); else ERR("!%s", info->node); goto err_recv_status; } if (status) { ERR("%s: unexpected status received -- '%d'", info->node, status); errno = status; goto err_status; } RPMEM_LOG(INFO, "received status: %u", status); return ssh; err_recv_status: err_status: rpmem_ssh_close(ssh); return NULL; }
/* * rpmem_obc_close -- perform close request operation * * Returns error if connection is not already established. * * NOTE: this function does not close the connection, but sends close request * message to remote node and receives a response. The connection must be * closed using rpmem_obc_disconnect function. */ int rpmem_obc_close(struct rpmem_obc *rpc, int flags) { if (!rpmem_obc_is_connected(rpc)) { errno = ENOTCONN; return -1; } struct rpmem_msg_close msg; rpmem_obc_set_msg_hdr(&msg.hdr, RPMEM_MSG_TYPE_CLOSE, sizeof(msg)); msg.flags = (uint32_t)flags; RPMEM_LOG(INFO, "sending close request message"); rpmem_hton_msg_close(&msg); if (rpmem_ssh_send(rpc->ssh, &msg, sizeof(msg))) { RPMEM_LOG(ERR, "!sending close request failed"); return -1; } RPMEM_LOG(NOTICE, "close request message sent"); RPMEM_LOG(INFO, "receiving close request response"); struct rpmem_msg_close_resp resp; if (rpmem_ssh_recv(rpc->ssh, &resp, sizeof(resp))) { RPMEM_LOG(ERR, "!receiving close request response failed"); return -1; } RPMEM_LOG(NOTICE, "close request response received"); rpmem_ntoh_msg_close_resp(&resp); if (rpmem_obc_check_close_resp(&resp)) return -1; return 0; }
/* * rpmem_fip_process -- (internal) process completion events */ static int rpmem_fip_process(struct rpmem_fip *fip) { ssize_t sret; struct fi_cq_err_entry err; const char *str_err; int ret; struct fi_cq_msg_entry *cq_entries; cq_entries = malloc(fip->cq_size * sizeof(*cq_entries)); if (!cq_entries) { RPMEM_LOG(ERR, "!allocating completion queue buffer"); return -1; } while (!fip->closing) { sret = fi_cq_sread(fip->cq, cq_entries, fip->cq_size, NULL, RPMEM_FIP_CQ_WAIT_MS); if (unlikely(fip->closing)) break; if (unlikely(sret == -FI_EAGAIN)) continue; if (unlikely(sret < 0)) { ret = (int)sret; goto err_cq_read; } for (ssize_t i = 0; i < sret; i++) { struct fi_cq_msg_entry *comp = &cq_entries[i]; /* * If the context is NULL it probably means that * we get an unexpected CQ entry. The CQ is configured * with FI_SELECTIVE_COMPLETION so every inbound or * outbound operation must be issued with FI_COMPLETION * flag and non-NULL context. */ RPMEM_ASSERT(comp->op_context); /* read operation */ if (unlikely(comp->op_context == &fip->rd_lane)) { rpmem_fip_lane_signal(&fip->rd_lane.lane, FI_READ); continue; } /* persist operation */ ret = fip->ops->process(fip, comp->op_context, comp->flags); if (unlikely(ret)) { RPMEM_LOG(ERR, "persist operation failed"); goto err; } } } free(cq_entries); return 0; err_cq_read: sret = fi_cq_readerr(fip->cq, &err, 0); if (sret < 0) { RPMEM_FI_ERR((int)sret, "error reading from completion queue: " "cannot read error from event queue"); goto err; } str_err = fi_cq_strerror(fip->cq, err.prov_errno, NULL, NULL, 0); RPMEM_LOG(ERR, "error reading from completion queue: %s", str_err); err: rpmem_fip_signal_all(fip, ret); free(cq_entries); return ret; }
/* * rpmem_fip_init_lanes_gpspm -- (internal) initialize lanes for GPSPM */ static int rpmem_fip_init_lanes_gpspm(struct rpmem_fip *fip) { int ret = 0; /* allocate GPSPM lanes */ fip->lanes.gpspm = calloc(1, fip->nlanes * sizeof(*fip->lanes.gpspm)); if (!fip->lanes.gpspm) { RPMEM_LOG(ERR, "allocating GPSPM lanes"); goto err_malloc_lanes; } /* allocate persist messages buffer */ size_t msg_size = fip->nlanes * sizeof(struct rpmem_msg_persist); fip->pmsg = malloc(msg_size); if (!fip->pmsg) { RPMEM_LOG(ERR, "!allocating messages buffer"); ret = -1; goto err_malloc_pmsg; } /* * Register persist messages buffer. The persist messages * are sent to daemon thus the FI_SEND access flag. */ ret = fi_mr_reg(fip->domain, fip->pmsg, msg_size, FI_SEND, 0, 0, 0, &fip->pmsg_mr, NULL); if (ret) { RPMEM_FI_ERR(ret, "registering messages buffer"); goto err_fi_mr_reg_pmsg; } /* get persist messages buffer local descriptor */ fip->pmsg_mr_desc = fi_mr_desc(fip->pmsg_mr); /* allocate persist response messages buffer */ size_t msg_resp_size = fip->nlanes * sizeof(struct rpmem_msg_persist_resp); fip->pres = malloc(msg_resp_size); if (!fip->pres) { RPMEM_LOG(ERR, "!allocating messages response buffer"); ret = -1; goto err_malloc_pres; } /* * Register persist messages response buffer. The persist response * messages are received from daemon thus the FI_RECV access flag. */ ret = fi_mr_reg(fip->domain, fip->pres, msg_resp_size, FI_RECV, 0, 0, 0, &fip->pres_mr, NULL); if (ret) { RPMEM_FI_ERR(ret, "registering messages response buffer"); goto err_fi_mr_reg_pres; } /* get persist response messages buffer local descriptor */ fip->pres_mr_desc = fi_mr_desc(fip->pres_mr); /* allocate RECV structures for fi_recvmsg(3) */ fip->recv = malloc(fip->nlanes * sizeof(*fip->recv)); if (!fip->recv) { RPMEM_LOG(ERR, "!allocating response message iov buffer"); goto err_malloc_recv; } /* * Initialize all required structures for: * WRITE, SEND and RECV operations. * * If the completion is required the FI_COMPLETION flag and * appropriate context should be used. * * In GPSPM only the RECV and SEND completions are required. * * For RECV the context is RECV operation structure used for * fi_recvmsg(3) function call. * * For SEND the context is lane structure. * * The received buffer contains a lane id which is used * to obtain a lane which must be signaled that operation * has been completed. */ unsigned i; for (i = 0; i < fip->nlanes; i++) { ret = rpmem_fip_lane_init(&fip->lanes.gpspm[i].lane); if (ret) goto err_lane_init; /* WRITE */ rpmem_fip_rma_init(&fip->lanes.gpspm[i].write, fip->mr_desc, 0, fip->rkey, &fip->lanes.gpspm[i], 0); /* SEND */ rpmem_fip_msg_init(&fip->lanes.gpspm[i].send, fip->pmsg_mr_desc, 0, &fip->lanes.gpspm[i], &fip->pmsg[i], sizeof(fip->pmsg[i]), FI_COMPLETION); /* RECV */ rpmem_fip_msg_init(&fip->recv[i], fip->pres_mr_desc, 0, &fip->recv[i], &fip->pres[i], sizeof(fip->pres[i]), FI_COMPLETION); } return 0; err_lane_init: for (unsigned j = 0; j < i; j++) rpmem_fip_lane_fini(&fip->lanes.gpspm[i].lane); err_malloc_recv: RPMEM_FI_CLOSE(fip->pres_mr, "unregistering messages " "response buffer"); err_fi_mr_reg_pres: free(fip->pres); err_malloc_pres: RPMEM_FI_CLOSE(fip->pmsg_mr, "unregistering messages buffer"); err_fi_mr_reg_pmsg: free(fip->pmsg); err_malloc_pmsg: free(fip->lanes.gpspm); err_malloc_lanes: return ret; }
/* * rpmem_fip_init_lanes_apm -- (internal) initialize lanes for APM */ static int rpmem_fip_init_lanes_apm(struct rpmem_fip *fip) { int ret; /* allocate APM lanes */ fip->lanes.apm = calloc(1, fip->nlanes * sizeof(*fip->lanes.apm)); if (!fip->lanes.apm) { RPMEM_LOG(ERR, "!allocating APM lanes"); goto err_malloc_lanes; } /* register read-after-write buffer */ ret = fi_mr_reg(fip->domain, &fip->raw_buff, sizeof(fip->raw_buff), FI_REMOTE_WRITE, 0, 0, 0, &fip->raw_mr, NULL); if (ret) { RPMEM_FI_ERR(ret, "registering APM read buffer"); goto err_fi_raw_mr; } /* get read-after-write buffer local descriptor */ fip->raw_mr_desc = fi_mr_desc(fip->raw_mr); /* * Initialize all required structures for: * WRITE and READ operations. * * If the completion is required the FI_COMPLETION flag and * appropriate context should be used. * * In APM only the READ completion is required. * The context is a lane structure. */ unsigned i; for (i = 0; i < fip->nlanes; i++) { ret = rpmem_fip_lane_init(&fip->lanes.apm[i].lane); if (ret) goto err_lane_init; /* WRITE */ rpmem_fip_rma_init(&fip->lanes.apm[i].write, fip->mr_desc, 0, fip->rkey, &fip->lanes.apm[i], 0); /* READ */ rpmem_fip_rma_init(&fip->lanes.apm[i].read, fip->raw_mr_desc, 0, fip->rkey, &fip->lanes.apm[i], FI_COMPLETION); } return 0; err_lane_init: for (unsigned j = 0; j < i; j++) rpmem_fip_lane_fini(&fip->lanes.apm[i].lane); err_fi_raw_mr: free(fip->lanes.apm); err_malloc_lanes: return -1; }
/* * rpmem_ssh_open -- open ssh connection with specified node */ struct rpmem_ssh * rpmem_ssh_open(const struct rpmem_target_info *info) { struct rpmem_ssh *rps = calloc(1, sizeof(*rps)); if (!rps) goto err_zalloc; char *user_at_node = get_user_at_node(info); if (!user_at_node) goto err_user_node; rps->cmd = rpmem_cmd_init(); if (!rps->cmd) goto err_cmd_init; int ret = rpmem_cmd_push(rps->cmd, get_ssh()); if (ret) goto err_push; if (info->flags & RPMEM_HAS_SERVICE) { /* port number is optional */ ret = rpmem_cmd_push(rps->cmd, "-p"); if (ret) goto err_push; ret = rpmem_cmd_push(rps->cmd, info->service); if (ret) goto err_push; } /* * Disable allocating pseudo-terminal in order to transfer binary * data safely. */ ret = rpmem_cmd_push(rps->cmd, "-T"); if (ret) goto err_push; if (info->flags & RPMEM_FLAGS_USE_IPV4) { ret = rpmem_cmd_push(rps->cmd, "-4"); if (ret) goto err_push; } /* fail if password required for authentication */ ret = rpmem_cmd_push(rps->cmd, "-oBatchMode=yes"); if (ret) goto err_push; ret = rpmem_cmd_push(rps->cmd, user_at_node); if (ret) goto err_push; ret = rpmem_cmd_push(rps->cmd, rpmem_util_cmd_get()); if (ret) goto err_push; ret = rpmem_cmd_run(rps->cmd); if (ret) goto err_run; /* * Read initial status from invoked command. * This is for synchronization purposes and to make it possible * to inform client that command's initialization failed. */ int32_t status; ret = rpmem_ssh_recv(rps, &status, sizeof(status)); if (ret) { if (ret == 1 || errno == ECONNRESET) ERR("%s", rpmem_ssh_strerror(rps)); else ERR("!%s", info->node); goto err_recv_status; } if (status) { ERR("%s: unexpected status received -- '%d'", info->node, status); goto err_status; } RPMEM_LOG(INFO, "received status: %u", status); free(user_at_node); return rps; err_status: err_recv_status: err_run: rpmem_cmd_term(rps->cmd); rpmem_cmd_wait(rps->cmd, NULL); err_push: rpmem_cmd_fini(rps->cmd); err_cmd_init: free(user_at_node); err_user_node: free(rps); err_zalloc: return NULL; }
/* * rpmem_cmd_run -- run command and connect with stdin, stdout and stderr * using unix sockets. * * The communication with child process is done via socketpairs on * stdin, stdout and stderr. The socketpairs are used instead of pipes * because reading from disconnected pipe causes a SIGPIPE signal. * When using socketpair it is possible to read data using recv(3) * function with MSG_NOSIGNAL flag, which doesn't send a signal. */ int rpmem_cmd_run(struct rpmem_cmd *cmd) { int fd_in[2]; int fd_out[2]; int fd_err[2]; rpmem_cmd_log(cmd); /* socketpair for stdin */ int ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fd_in); if (ret < 0) { RPMEM_LOG(ERR, "creating pipe for stdin"); goto err_pipe_in; } /* parent process stdin socket */ cmd->fd_in = fd_in[1]; /* socketpair for stdout */ ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fd_out); if (ret < 0) { RPMEM_LOG(ERR, "creating pipe for stdout"); goto err_pipe_out; } /* parent process stdout socket */ cmd->fd_out = fd_out[0]; /* socketpair for stderr */ ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fd_err); if (ret < 0) { RPMEM_LOG(ERR, "creating pipe for stderr"); goto err_pipe_err; } /* socketpair for stderr */ cmd->fd_err = fd_err[0]; cmd->pid = fork(); if (cmd->pid == -1) { RPMEM_LOG(ERR, "forking command"); goto err_fork; } if (!cmd->pid) { dup2(fd_in[0], 0); dup2(fd_out[1], 1); dup2(fd_err[1], 2); execvp(cmd->args.argv[0], cmd->args.argv); exit(EXIT_FAILURE); } os_close(fd_in[0]); os_close(fd_out[1]); os_close(fd_err[1]); return 0; err_fork: os_close(fd_err[0]); os_close(fd_err[1]); err_pipe_err: os_close(fd_out[0]); os_close(fd_out[1]); err_pipe_out: os_close(fd_in[0]); os_close(fd_in[1]); err_pipe_in: return -1; }