/* * rpmem_obc_set_pool_desc -- (internal) fill the pool descriptor field */ static void rpmem_obc_set_pool_desc(struct rpmem_msg_pool_desc *pool_desc, const char *desc, size_t size) { RPMEM_ASSERT(size <= UINT32_MAX); RPMEM_ASSERT(size > 0); pool_desc->size = (uint32_t)size; memcpy(pool_desc->desc, desc, size); pool_desc->desc[size - 1] = '\0'; }
/* * rpmem_fip_read -- perform read operation */ int rpmem_fip_read(struct rpmem_fip *fip, void *buff, size_t len, size_t off) { RPMEM_ASSERT(!rpmem_fip_lane_busy(&fip->rd_lane.lane)); int ret = 0; size_t rd = 0; uint8_t *cbuff = buff; while (rd < len) { rpmem_fip_lane_begin(&fip->rd_lane.lane, FI_READ); size_t rd_len = len - rd < RPMEM_RD_BUFF_SIZE ? len - rd : RPMEM_RD_BUFF_SIZE; size_t rd_off = off + rd; uint64_t raddr = fip->raddr + rd_off; ret = rpmem_fip_readmsg(fip->ep, &fip->rd_lane.read, fip->rd_buff, rd_len, raddr); ret = rpmem_fip_lane_wait(&fip->rd_lane.lane, FI_READ); if (ret) return ret; memcpy(&cbuff[rd], fip->rd_buff, rd_len); rd += rd_len; } return ret; }
/* * rpmem_cmd_log -- print executing command */ static void rpmem_cmd_log(struct rpmem_cmd *cmd) { RPMEM_ASSERT(cmd->args.argc > 0); size_t size = 0; for (int i = 0; i < cmd->args.argc; i++) { size += strlen(cmd->args.argv[i]) + 1; } char *buff = malloc(size); if (!buff) { RPMEM_LOG(ERR, "allocating log buffer for command"); return; } size_t pos = 0; for (int i = 0; pos < size && i < cmd->args.argc; i++) { int ret = snprintf(&buff[pos], size - pos, "%s%s", cmd->args.argv[i], i == cmd->args.argc - 1 ? "" : " "); if (ret < 0) { RPMEM_LOG(ERR, "printing command's argument failed"); goto out; } pos += (size_t)ret; } RPMEM_LOG(INFO, "executing command '%s'", buff); out: free(buff); }
/* * rpmem_fip_read -- perform read operation */ int rpmem_fip_read(struct rpmem_fip *fip, void *buff, size_t len, size_t off) { RPMEM_ASSERT(!rpmem_fip_lane_busy(&fip->rd_lane.lane)); int ret; size_t rd = 0; uint8_t *cbuff = buff; while (rd < len) { rpmem_fip_lane_begin(&fip->rd_lane.lane, FI_READ); size_t rd_len = len - rd < RPMEM_RD_BUFF_SIZE ? len - rd : RPMEM_RD_BUFF_SIZE; size_t rd_off = off + rd; uint64_t raddr = fip->raddr + rd_off; ret = rpmem_fip_readmsg(fip->ep, &fip->rd_lane.read, fip->rd_buff, rd_len, raddr); VALGRIND_DO_MAKE_MEM_DEFINED(fip->rd_buff, rd_len); ret = rpmem_fip_lane_wait(&fip->rd_lane.lane, FI_READ); if (ret) { ERR("error when processing read request"); errno = ret; return -1; } memcpy(&cbuff[rd], fip->rd_buff, rd_len); rd += rd_len; } return 0; }
/* * rpmem_cmd_term -- terminate process by sending SIGINT signal */ int rpmem_cmd_term(struct rpmem_cmd *cmd) { os_close(cmd->fd_in); os_close(cmd->fd_out); os_close(cmd->fd_err); RPMEM_ASSERT(cmd->pid > 0); int rv = kill(cmd->pid, SIGINT); if (rv) RPMEM_LOG(ERR, "!kill failed"); return rv; }
/* * rpmem_fip_persist_gpspm -- (internal) perform persist operation for GPSPM */ static int rpmem_fip_persist_gpspm(struct rpmem_fip *fip, size_t offset, size_t len, unsigned lane) { int ret; struct rpmem_fip_plane_gpspm *lanep = &fip->lanes.gpspm[lane]; ret = rpmem_fip_lane_wait(&lanep->lane, FI_SEND); if (unlikely(ret)) { RPMEM_LOG(ERR, "waiting for SEND buffer"); return ret; } RPMEM_ASSERT(!rpmem_fip_lane_busy(&lanep->lane)); rpmem_fip_lane_begin(&lanep->lane, FI_SEND | FI_RECV); void *laddr = (void *)((uintptr_t)fip->laddr + offset); uint64_t raddr = fip->raddr + offset; struct rpmem_msg_persist *msg; struct rpmem_fip_plane_gpspm *gpspm = (void *)lanep; /* WRITE for requested memory region */ ret = rpmem_fip_writemsg(fip->ep, &gpspm->write, laddr, len, raddr); if (unlikely(ret)) { RPMEM_FI_ERR((int)ret, "RMA write"); return ret; } /* SEND persist message */ msg = rpmem_fip_msg_get_pmsg(&gpspm->send); msg->lane = lane; msg->addr = raddr; msg->size = len; ret = rpmem_fip_sendmsg(fip->ep, &gpspm->send); if (unlikely(ret)) { RPMEM_FI_ERR(ret, "MSG send"); return ret; } /* wait for persist operation completion */ ret = rpmem_fip_lane_wait(&lanep->lane, FI_RECV); if (unlikely(ret)) { RPMEM_LOG(ERR, "persist operation failed"); return ret; } return ret; }
/* * rpmem_fip_persist -- perform remote persist operation */ int rpmem_fip_persist(struct rpmem_fip *fip, size_t offset, size_t len, unsigned lane) { RPMEM_ASSERT(lane < fip->nlanes); if (unlikely(lane >= fip->nlanes)) { errno = EINVAL; return -1; } int ret = fip->ops->persist(fip, offset, len, lane); if (ret) ERR("persist operation failed"); return ret; }
/* * rpmem_fip_signal_all -- (internal) signal all lanes about completion with * error code */ static void rpmem_fip_signal_all(struct rpmem_fip *fip, int ret) { switch (fip->persist_method) { case RPMEM_PM_APM: for (unsigned i = 0; i < fip->nlanes; i++) rpmem_fip_lane_sigret(&fip->lanes.apm[i].lane, FI_WRITE | FI_READ, ret); break; case RPMEM_PM_GPSPM: for (unsigned i = 0; i < fip->nlanes; i++) rpmem_fip_lane_sigret(&fip->lanes.gpspm[i].lane, FI_WRITE | FI_SEND | FI_RECV, ret); break; default: RPMEM_ASSERT(0); } rpmem_fip_lane_sigret(&fip->rd_lane.lane, FI_READ, ret); }
/* * rpmem_fip_persist_apm -- (internal) perform persist operation for APM */ static int rpmem_fip_persist_apm(struct rpmem_fip *fip, size_t offset, size_t len, unsigned lane) { struct rpmem_fip_plane_apm *lanep = &fip->lanes.apm[lane]; RPMEM_ASSERT(!rpmem_fip_lane_busy(&lanep->lane)); rpmem_fip_lane_begin(&lanep->lane, FI_READ); int ret; void *laddr = (void *)((uintptr_t)fip->laddr + offset); uint64_t raddr = fip->raddr + offset; /* WRITE for requested memory region */ ret = rpmem_fip_writemsg(fip->ep, &lanep->write, laddr, len, raddr); if (unlikely(ret)) { RPMEM_FI_ERR(ret, "RMA write"); return ret; } /* READ to read-after-write buffer */ ret = rpmem_fip_readmsg(fip->ep, &lanep->read, &fip->raw_buff, sizeof(fip->raw_buff), raddr); if (unlikely(ret)) { RPMEM_FI_ERR(ret, "RMA read"); return ret; } /* wait for READ completion */ ret = rpmem_fip_lane_wait(&lanep->lane, FI_READ); if (unlikely(ret)) { RPMEM_LOG(ERR, "waiting for READ completion failed"); return ret; } return ret; }
/* * rpmem_fip_process -- (internal) process completion events */ static int rpmem_fip_process(struct rpmem_fip *fip) { ssize_t sret; struct fi_cq_err_entry err; const char *str_err; int ret; struct fi_cq_msg_entry *cq_entries; cq_entries = malloc(fip->cq_size * sizeof(*cq_entries)); if (!cq_entries) { RPMEM_LOG(ERR, "!allocating completion queue buffer"); return -1; } while (!fip->closing) { sret = fi_cq_sread(fip->cq, cq_entries, fip->cq_size, NULL, RPMEM_FIP_CQ_WAIT_MS); if (unlikely(fip->closing)) break; if (unlikely(sret == -FI_EAGAIN)) continue; if (unlikely(sret < 0)) { ret = (int)sret; goto err_cq_read; } for (ssize_t i = 0; i < sret; i++) { struct fi_cq_msg_entry *comp = &cq_entries[i]; /* * If the context is NULL it probably means that * we get an unexpected CQ entry. The CQ is configured * with FI_SELECTIVE_COMPLETION so every inbound or * outbound operation must be issued with FI_COMPLETION * flag and non-NULL context. */ RPMEM_ASSERT(comp->op_context); /* read operation */ if (unlikely(comp->op_context == &fip->rd_lane)) { rpmem_fip_lane_signal(&fip->rd_lane.lane, FI_READ); continue; } /* persist operation */ ret = fip->ops->process(fip, comp->op_context, comp->flags); if (unlikely(ret)) { RPMEM_LOG(ERR, "persist operation failed"); goto err; } } } free(cq_entries); return 0; err_cq_read: sret = fi_cq_readerr(fip->cq, &err, 0); if (sret < 0) { RPMEM_FI_ERR((int)sret, "error reading from completion queue: " "cannot read error from event queue"); goto err; } str_err = fi_cq_strerror(fip->cq, err.prov_errno, NULL, NULL, 0); RPMEM_LOG(ERR, "error reading from completion queue: %s", str_err); err: rpmem_fip_signal_all(fip, ret); free(cq_entries); return ret; }