/* * rpmemd_fip_cq_thread -- completion queue worker thread */ static void * rpmemd_fip_cq_thread(void *arg) { struct rpmemd_fip *fip = arg; struct fi_cq_err_entry err; const char *str_err; ssize_t sret; int ret = 0; while (!fip->closing) { sret = fi_cq_sread(fip->cq, fip->cq_entries, fip->cq_size, NULL, RPMEM_FIP_CQ_WAIT_MS); if (unlikely(fip->closing)) break; if (unlikely(sret == -FI_EAGAIN)) continue; if (unlikely(sret < 0)) { ret = (int)sret; goto err_cq_read; } for (ssize_t i = 0; i < sret; i++) { struct fi_cq_msg_entry *entry = &fip->cq_entries[i]; RPMEMD_ASSERT(entry->op_context); struct rpmemd_fip_lane *lanep = entry->op_context; /* signal lane about SEND completion */ if (entry->flags & FI_SEND) rpmem_fip_lane_signal(&lanep->lane, FI_SEND); /* add lane to worker's ring buffer */ if (entry->flags & FI_RECV) { ret = rpmemd_fip_worker_push(lanep->worker, lanep); } if (ret) goto err; } } return 0; err_cq_read: sret = fi_cq_readerr(fip->cq, &err, 0); if (sret < 0) { RPMEMD_FI_ERR((int)sret, "error reading from completion queue: " "cannot read error from completion queue"); goto err; } str_err = fi_cq_strerror(fip->cq, err.prov_errno, NULL, NULL, 0); RPMEMD_LOG(ERR, "error reading from completion queue: %s", str_err); err: return (void *)(uintptr_t)ret; }
static const char *rxm_cq_strerror(struct fid_cq *cq_fid, int prov_errno, const void *err_data, char *buf, size_t len) { struct util_cq *cq; struct rxm_ep *rxm_ep; struct fid_list_entry *fid_entry; cq = container_of(cq_fid, struct util_cq, cq_fid); fid_entry = container_of(cq->ep_list.next, struct fid_list_entry, entry); rxm_ep = container_of(fid_entry->fid, struct rxm_ep, util_ep.ep_fid); return fi_cq_strerror(rxm_ep->msg_cq, prov_errno, err_data, buf, len); }
void cq_readerr(struct fid_cq *cq, char *cq_str) { struct fi_cq_err_entry cq_err; const char *err_str; int ret; ret = fi_cq_readerr(cq, &cq_err, 0); if (ret < 0) FI_PRINTERR("fi_cq_readerr", ret); err_str = fi_cq_strerror(cq, cq_err.prov_errno, cq_err.err_data, NULL, 0); FI_DEBUG("%s %s (%d)\n", cq_str, err_str, cq_err.prov_errno); }
void cq_readerr(struct fid_cq *cq, char *cq_str) { struct fi_cq_err_entry cq_err; const char *err_str; int ret; ret = fi_cq_readerr(cq, &cq_err, 0); if (ret < 0) { FT_PRINTERR("fi_cq_readerr", ret); } else { err_str = fi_cq_strerror(cq, cq_err.prov_errno, cq_err.err_data, NULL, 0); fprintf(stderr, "%s: %d %s\n", cq_str, cq_err.err, fi_strerror(cq_err.err)); fprintf(stderr, "%s: prov_err: %s (%d)\n", cq_str, err_str, cq_err.prov_errno); } }
/* * All EPs use the same underlying datagram provider, so pick any and use its * associated CQ. */ static const char *rxd_cq_strerror(struct fid_cq *cq_fid, int prov_errno, const void *err_data, char *buf, size_t len) { struct fid_list_entry *fid_entry; struct util_ep *util_ep; struct rxd_cq *cq; struct rxd_ep *ep; const char *str; cq = container_of(cq_fid, struct rxd_cq, util_cq.cq_fid); fastlock_acquire(&cq->util_cq.ep_list_lock); assert(!dlist_empty(&cq->util_cq.ep_list)); fid_entry = container_of(cq->util_cq.ep_list.next, struct fid_list_entry, entry); util_ep = container_of(fid_entry->fid, struct util_ep, ep_fid.fid); ep = container_of(util_ep, struct rxd_ep, util_ep); str = fi_cq_strerror(ep->dg_cq, prov_errno, err_data, buf, len); fastlock_release(&cq->util_cq.ep_list_lock); return str; }
std::string get_cq_error_string(fid_cq* cq, ssize_t ec) { fi_cq_err_entry entry = {}; std::stringstream error{}; if (ec < 0) { ec = -ec; } if (ec != FI_EAVAIL) { error << "fi_cq_sread error: " << fi_error_to_string(int(ec)) << "(" << ec << ") "; } auto rc = fi_cq_readerr(cq, &entry, 0); if (rc < 0) { error << "fi_cq_readerr error: " << fi_error_to_string(int(rc)) << "(" << rc << ")"; } else { error << "fi_cq_readerr provider error: " << fi_cq_strerror(cq, entry.prov_errno, entry.err_data, nullptr, 0) << "(" << entry.prov_errno << ") error: " << fi_error_to_string(entry.err); } return error.str(); }
/* * rpmem_fip_process -- (internal) process completion events */ static int rpmem_fip_process(struct rpmem_fip *fip) { ssize_t sret; struct fi_cq_err_entry err; const char *str_err; int ret; struct fi_cq_msg_entry *cq_entries; cq_entries = malloc(fip->cq_size * sizeof(*cq_entries)); if (!cq_entries) { RPMEM_LOG(ERR, "!allocating completion queue buffer"); return -1; } while (!fip->closing) { sret = fi_cq_sread(fip->cq, cq_entries, fip->cq_size, NULL, RPMEM_FIP_CQ_WAIT_MS); if (unlikely(fip->closing)) break; if (unlikely(sret == -FI_EAGAIN)) continue; if (unlikely(sret < 0)) { ret = (int)sret; goto err_cq_read; } for (ssize_t i = 0; i < sret; i++) { struct fi_cq_msg_entry *comp = &cq_entries[i]; /* * If the context is NULL it probably means that * we get an unexpected CQ entry. The CQ is configured * with FI_SELECTIVE_COMPLETION so every inbound or * outbound operation must be issued with FI_COMPLETION * flag and non-NULL context. */ RPMEM_ASSERT(comp->op_context); /* read operation */ if (unlikely(comp->op_context == &fip->rd_lane)) { rpmem_fip_lane_signal(&fip->rd_lane.lane, FI_READ); continue; } /* persist operation */ ret = fip->ops->process(fip, comp->op_context, comp->flags); if (unlikely(ret)) { RPMEM_LOG(ERR, "persist operation failed"); goto err; } } } free(cq_entries); return 0; err_cq_read: sret = fi_cq_readerr(fip->cq, &err, 0); if (sret < 0) { RPMEM_FI_ERR((int)sret, "error reading from completion queue: " "cannot read error from event queue"); goto err; } str_err = fi_cq_strerror(fip->cq, err.prov_errno, NULL, NULL, 0); RPMEM_LOG(ERR, "error reading from completion queue: %s", str_err); err: rpmem_fip_signal_all(fip, ret); free(cq_entries); return ret; }
int do_test(void) { struct fi_cq_msg_entry comp; int len = msg_len * post_depth; int msg_cnt = num_msgs; int tx_bufs_sent = 0; int ret; char *mp; u64 time_elap; #if SREAD == 0 int eagain_cnt = EAGAIN_TRIES; #endif print_trace("in\n"); if (!ctx.buf) { ctx.buf = kmalloc(len, GFP_KERNEL); if (!ctx.buf) { print_err("kalloc failed!\n"); return -ENOMEM; } ret = fi_mr_reg(ctx.domain, ctx.buf, len, 0, 0, 0, 0, &ctx.mr, NULL); if (ret) { print_err("fi_mr_reg returned %d\n", ret); kfree(ctx.buf); ctx.buf = ERR_PTR(-EFAULT); return ret; } } else if (IS_ERR(ctx.buf)) return 0; print_msg("post_depth %d num_msgs %d msg_len %d SREAD[%d]\n", post_depth, num_msgs, msg_len, SREAD); print_dbg("ctx.buf %p '%s' len %ld msg_len %d\n", ctx.buf, ctx.buf, strlen(ctx.buf)+1, msg_len); time_elap = get_jiffies_64(); for (mp = ctx.buf; msg_cnt > 0 && !kthread_should_stop(); ) { int post_cnt, cnt; post_cnt = (msg_cnt > post_depth ? post_depth : msg_cnt); for (cnt = 0, mp = ctx.buf; cnt < post_cnt; cnt++, mp += msg_len) { if (verify) { sprintf(mp, TEST_MESSAGE, tx_bufs_sent); tx_bufs_sent++; } ret = fi_send(ctx.ep, mp, msg_len, fi_mr_desc(ctx.mr), 0, mp); if (ret) { print_err("fi_send returned %d '%s'\n", ret, fi_strerror(ret)); return ret; } if (kthread_should_stop()) return -EINTR; } /* reap completions */ for (cnt = 0; cnt < post_cnt; cnt++) { #if SREAD ret = fi_cq_sread(ctx.scq, &comp, 1, 0, TIMEOUT); if (ret == -ETIMEDOUT) { print_msg("%s(ETIMEDOUT) cnt %d post_cnt %d " "msg_cnt %d\n", "fi_cq_sread", cnt, post_cnt, msg_cnt); } if (kthread_should_stop()) return -EINTR; #else do { ret = fi_cq_read(ctx.scq, &comp, 1); if (ret == 0 || ret == -EAGAIN) { if (--eagain_cnt <= 0) { dprint(DEBUG_HIGH, "%s(resched %d) cnt " "%d post_cnt %d\n", "fi_cq_read", ret, cnt, post_cnt); eagain_cnt = EAGAIN_TRIES; schedule(); } } if (kthread_should_stop()) return -EINTR; } while (ret == 0 || ret == -EAGAIN); #endif if (ret < 0) { struct fi_cq_err_entry cqe = { 0 }; int rc; rc = fi_cq_readerr(ctx.scq, &cqe, 0); print_err("fi_cq_read returned %d '%s'\n", ret, fi_strerror(ret)); if (rc) { char buf[64]; print_err("fi_cq_readerr() err '%s'(%d)" "\n", fi_strerror(cqe.err), cqe.err); print_err("fi_cq_readerr() prov_err " "'%s'(%d)\n", fi_cq_strerror(ctx.scq, cqe.prov_errno, cqe.err_data, buf, sizeof(buf)), cqe.prov_errno); } return ret; } if (!ret) print_err("fi_cq_sread no completion? ret %d\n", ret); #if 0 if ((char *)comp.op_context < (char *)ctx.buf || (char *)comp.op_context >= (char *) &ctx.buf[msg_len*post_depth]) { print_err("cq.op_context(%p) not in range " "[ctx.buf(%p) ... &ctx.buf[%d](%p)]\n", (void *)comp.op_context, (void *)ctx.buf, msg_len, (void *)&ctx.buf[msg_len]); } #endif if (verify) print_msg("Tx '%s'\n", (char *) comp.op_context); } msg_cnt -= post_cnt; } time_elap = get_jiffies_64() - time_elap; #define AGIG (1024UL*1024UL*1024UL) #define AMEG (1024UL*1024UL) #define AKILO (1024UL) { struct timeval tv; ulong rate, rate_mod, bytes, units_of; char units; jiffies_to_timeval(time_elap, &tv); bytes = (ulong) num_msgs * (ulong) msg_len; if (bytes >= AKILO && tv.tv_sec > 0) { rate = bytes / tv.tv_sec; rate_mod = bytes % tv.tv_sec; if (rate >= AGIG) { units = 'G'; units_of = AGIG; } else if (rate >= AMEG) { units = 'M'; units_of = AMEG; } else { units = 'K'; units_of = AKILO; } rate /= units_of; } else { rate = rate_mod = 0UL; units = ' '; units_of = 1UL; } print_info("Tx %d msgs (%lu.%lu%cB) @ ~%lu.%lu %cB/sec (%ld sec %ld " "usec)\n", num_msgs, (bytes/units_of), (bytes % units_of), units, rate, rate_mod, units, tv.tv_sec, tv.tv_usec); } return 0; }
static const char *rxd_cq_strerror(struct fid_cq *cq_fid, int prov_errno, const void *err_data, char *buf, size_t len) { struct rxd_cq *rxd_cq = container_of(cq_fid, struct rxd_cq, util_cq.cq_fid); return fi_cq_strerror(rxd_cq->dg_cq, prov_errno, err_data, buf, len); }
int main(int argc, char *argv[]) { uint64_t flags = 0; char *service = NULL; char *node = NULL; struct pingpong_context *ctx; struct timeval start, end; unsigned long size = 4096; // No provider support yet //enum ibv_mtu mtu = IBV_MTU_1024; //size_t mtu = 1024; int rx_depth_default = 500; int rx_depth = 0; int iters = 1000; int use_event = 0; int rcnt, scnt; int ret, rc = 0; char * ptr; srand48(getpid() * time(NULL)); opts = INIT_OPTS; hints = fi_allocinfo(); if (!hints) return 1; while (1) { int c; c = getopt(argc, argv, "S:m:r:n:eh" ADDR_OPTS INFO_OPTS); if (c == -1) break; switch (c) { case 'S': errno = 0; size = strtol(optarg, &ptr, 10); if (ptr == optarg || *ptr != '\0' || ((size == LONG_MIN || size == LONG_MAX) && errno == ERANGE)) { fprintf(stderr, "Cannot convert from string to long\n"); rc = 1; goto err1; } break; // No provider support yet /*case 'm': mtu = strtol(optarg, NULL, 0); mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0)); if (mtu < 0) { usage(argv[0]); return 1; } break; */ case 'r': rx_depth = strtol(optarg, NULL, 0); break; case 'n': iters = strtol(optarg, NULL, 0); break; case 'e': ++use_event; break; default: ft_parse_addr_opts(c, optarg, &opts); ft_parseinfo(c, optarg, hints); break; case '?': case 'h': usage(argv[0]); return 1; } } if (optind == argc - 1) opts.dst_addr = argv[optind]; else if (optind < argc) { usage(argv[0]); return 1; } page_size = sysconf(_SC_PAGESIZE); hints->ep_attr->type = FI_EP_MSG; hints->caps = FI_MSG; hints->mode = FI_LOCAL_MR; rc = ft_read_addr_opts(&node, &service, hints, &flags, &opts); if (rc) return -rc; rc = fi_getinfo(FT_FIVERSION, node, service, flags, hints, &fi); if (rc) { FT_PRINTERR("fi_getinfo", rc); return -rc; } fi_freeinfo(hints); if (rx_depth) { if (rx_depth > fi->rx_attr->size) { fprintf(stderr, "rx_depth requested: %d, " "rx_depth supported: %zd\n", rx_depth, fi->rx_attr->size); rc = 1; goto err1; } } else { rx_depth = (rx_depth_default > fi->rx_attr->size) ? fi->rx_attr->size : rx_depth_default; } ctx = pp_init_ctx(fi, size, rx_depth, use_event); if (!ctx) { rc = 1; goto err1; } if (opts.dst_addr) { /* client connect */ if (pp_connect_ctx(ctx)) { rc = 1; goto err2; } } else { /* server listen and accept */ pp_listen_ctx(ctx); pp_accept_ctx(ctx); } ctx->pending = PINGPONG_RECV_WCID; if (opts.dst_addr) { if (pp_post_send(ctx)) { fprintf(stderr, "Couldn't post send\n"); rc = 1; goto err3; } ctx->pending |= PINGPONG_SEND_WCID; } if (gettimeofday(&start, NULL)) { perror("gettimeofday"); rc = 1; goto err3; } rcnt = scnt = 0; while (rcnt < iters || scnt < iters) { struct fi_cq_entry wc; struct fi_cq_err_entry cq_err; int rd; if (use_event) { /* Blocking read */ rd = fi_cq_sread(ctx->cq, &wc, 1, NULL, -1); } else { do { rd = fi_cq_read(ctx->cq, &wc, 1); } while (rd == -FI_EAGAIN); } if (rd < 0) { fi_cq_readerr(ctx->cq, &cq_err, 0); fprintf(stderr, "cq fi_cq_readerr() %s (%d)\n", fi_cq_strerror(ctx->cq, cq_err.err, cq_err.err_data, NULL, 0), cq_err.err); rc = rd; goto err3; } switch ((int) (uintptr_t) wc.op_context) { case PINGPONG_SEND_WCID: ++scnt; break; case PINGPONG_RECV_WCID: if (--ctx->routs <= 1) { ctx->routs += pp_post_recv(ctx, ctx->rx_depth - ctx->routs); if (ctx->routs < ctx->rx_depth) { fprintf(stderr, "Couldn't post receive (%d)\n", ctx->routs); rc = 1; goto err3; } } ++rcnt; break; default: fprintf(stderr, "Completion for unknown wc_id %d\n", (int) (uintptr_t) wc.op_context); rc = 1; goto err3; } ctx->pending &= ~(int) (uintptr_t) wc.op_context; if (scnt < iters && !ctx->pending) { if (pp_post_send(ctx)) { fprintf(stderr, "Couldn't post send\n"); rc = 1; goto err3; } ctx->pending = PINGPONG_RECV_WCID | PINGPONG_SEND_WCID; } } if (gettimeofday(&end, NULL)) { perror("gettimeofday"); rc = 1; goto err3; } { float usec = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec); long long bytes = (long long) size * iters * 2; printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n", bytes, usec / 1000000., bytes * 8. / usec); printf("%d iters in %.2f seconds = %.2f usec/iter\n", iters, usec / 1000000., usec / iters); } err3: fi_shutdown(ctx->ep, 0); err2: ret = pp_close_ctx(ctx); if (!rc) rc = ret; err1: fi_freeinfo(fi); return rc; }