void api_write_read(int len) { int ret; struct fi_cq_tagged_entry cqe; struct fi_cq_err_entry err_cqe = {0}; rdm_api_init_data(source, len, 0xab); rdm_api_init_data(target, len, 0); fi_write(ep[0], source, len, loc_mr[0], gni_addr[1], (uint64_t)target, mr_key[1], target); while ((ret = fi_cq_read(msg_cq[0], &cqe, 1)) == -FI_EAGAIN) pthread_yield(); if (ret == -FI_EAVAIL) { fi_cq_readerr(msg_cq[0], &err_cqe, 0); dbg_printf("fi_cq_readerr err:%d\n", err_cqe.err); } if (write_allowed(FI_RMA, fi[0]->caps, fi[1]->caps)) { cr_assert(ret == 1, "fi_write failed caps:0x%lx ret:%d", fi[0]->caps, ret); } else { cr_assert(err_cqe.err == FI_EOPNOTSUPP, "fi_write should fail caps:0x%lx err:%d", fi[0]->caps, err_cqe.err); } fi_read(ep[0], source, len, loc_mr[0], gni_addr[1], (uint64_t)target, mr_key[1], (void *)READ_CTX); while ((ret = fi_cq_read(msg_cq[0], &cqe, 1)) == -FI_EAGAIN) pthread_yield(); if (ret == -FI_EAVAIL) { fi_cq_readerr(msg_cq[0], &err_cqe, 0); dbg_printf("fi_cq_readerr err:%d\n", err_cqe.err); } if (read_allowed(FI_RMA, fi[0]->caps, fi[1]->caps)) { cr_assert(ret == 1, "fi_read failed caps:0x%lx rcaps:0x%lx", fi[0]->caps, fi[1]->caps); } else { cr_assert(err_cqe.err == FI_EOPNOTSUPP, "fi_read should fail caps:0x%lx rcaps:0x%lx", fi[0]->caps, fi[1]->caps); } }
int rdm_sr_check_canceled(struct fid_cq *cq) { struct fi_cq_err_entry ee; fi_cq_readerr(cq, &ee, 0); return (ee.err == FI_ECANCELED); }
/* * rpmemd_fip_cq_thread -- completion queue worker thread */ static void * rpmemd_fip_cq_thread(void *arg) { struct rpmemd_fip *fip = arg; struct fi_cq_err_entry err; const char *str_err; ssize_t sret; int ret = 0; while (!fip->closing) { sret = fi_cq_sread(fip->cq, fip->cq_entries, fip->cq_size, NULL, RPMEM_FIP_CQ_WAIT_MS); if (unlikely(fip->closing)) break; if (unlikely(sret == -FI_EAGAIN)) continue; if (unlikely(sret < 0)) { ret = (int)sret; goto err_cq_read; } for (ssize_t i = 0; i < sret; i++) { struct fi_cq_msg_entry *entry = &fip->cq_entries[i]; RPMEMD_ASSERT(entry->op_context); struct rpmemd_fip_lane *lanep = entry->op_context; /* signal lane about SEND completion */ if (entry->flags & FI_SEND) rpmem_fip_lane_signal(&lanep->lane, FI_SEND); /* add lane to worker's ring buffer */ if (entry->flags & FI_RECV) { ret = rpmemd_fip_worker_push(lanep->worker, lanep); } if (ret) goto err; } } return 0; err_cq_read: sret = fi_cq_readerr(fip->cq, &err, 0); if (sret < 0) { RPMEMD_FI_ERR((int)sret, "error reading from completion queue: " "cannot read error from completion queue"); goto err; } str_err = fi_cq_strerror(fip->cq, err.prov_errno, NULL, NULL, 0); RPMEMD_LOG(ERR, "error reading from completion queue: %s", str_err); err: return (void *)(uintptr_t)ret; }
void api_do_read_buf(void) { int ret; int len = 8*1024; ssize_t sz; struct fi_cq_tagged_entry cqe; struct fi_cq_err_entry err_cqe; rdm_api_init_data(source, BUF_SZ, 0); rdm_api_init_data(target, BUF_SZ, 0xad); /* cause a chained transaction */ sz = fi_read(ep[0], source+6, len, loc_mr[0], gni_addr[1], (uint64_t)target+6, mr_key[1], (void *)READ_CTX); cr_assert_eq(sz, 0); while ((ret = fi_cq_read(msg_cq[0], &cqe, 1)) == -FI_EAGAIN) pthread_yield(); if (ret == -FI_EAVAIL) { fi_cq_readerr(msg_cq[0], &err_cqe, 0); dbg_printf("fi_cq_readerr err:%d\n", err_cqe.err); } if (read_allowed(FI_RMA, fi[0]->caps, fi[1]->caps)) { cr_assert(ret == 1, "fi_read failed caps:0x%lx rcaps:0x%lx", fi[0]->caps, fi[1]->caps); } else { cr_assert(err_cqe.err == FI_EOPNOTSUPP, "fi_read should fail caps:0x%lx rcaps:0x%lx", fi[0]->caps, fi[1]->caps); } }
static inline int rdm_str_addr_sr_check_err_cqe(struct fid_cq *cq) { int ret = FI_SUCCESS, cnt; struct fi_cq_err_entry ee; size_t name_size; char *buffer; fi_addr_t fi_addr; /*application provided error_data buffer and length*/ ee.err_data_size = addrlen; ee.err_data = malloc(addrlen); cr_assert((ee.err_data != NULL), "malloc failed"); buffer = malloc(addrlen); cr_assert((buffer != NULL), "malloc failed"); cnt = fi_cq_readerr(cq, &ee, 0); cr_assert((cnt == 1), "fi_cq_readerr didn't return entry"); if ((hints->caps & FI_SOURCE_ERR) && ee.err == FI_EADDRNOTAVAIL) { ret = fi_av_insert(av[1], ee.err_data, 1, &fi_addr, 0, NULL); cr_assert(ret == 1, "fi_av_insert failed"); name_size = addrlen; ret = fi_av_lookup(av[1], fi_addr, buffer, &name_size); cr_assert(ret == FI_SUCCESS, "fi_av_lookup failed"); cr_assert(name_size == addrlen); cr_assert(strncmp((char *)buffer, (char *)ee.err_data, addrlen) == 0); } return ret; }
void cq_readerr(struct fid_cq *cq, char *cq_str) { struct fi_cq_err_entry cq_err; const char *err_str; int ret; ret = fi_cq_readerr(cq, &cq_err, 0); if (ret < 0) FI_PRINTERR("fi_cq_readerr", ret); err_str = fi_cq_strerror(cq, cq_err.prov_errno, cq_err.err_data, NULL, 0); FI_DEBUG("%s %s (%d)\n", cq_str, err_str, cq_err.prov_errno); }
int ft_cq_readerr(struct fid_cq *cq) { struct fi_cq_err_entry cq_err; int ret; ret = fi_cq_readerr(cq, &cq_err, 0); if (ret < 0) { FT_PRINTERR("fi_cq_readerr", ret); } else { FT_CQ_ERR(cq, cq_err, NULL, 0); ret = -cq_err.err; } return ret; }
void cq_readerr(struct fid_cq *cq, char *cq_str) { struct fi_cq_err_entry cq_err; const char *err_str; int ret; ret = fi_cq_readerr(cq, &cq_err, 0); if (ret < 0) { FT_PRINTERR("fi_cq_readerr", ret); } else { err_str = fi_cq_strerror(cq, cq_err.prov_errno, cq_err.err_data, NULL, 0); fprintf(stderr, "%s: %d %s\n", cq_str, cq_err.err, fi_strerror(cq_err.err)); fprintf(stderr, "%s: prov_err: %s (%d)\n", cq_str, err_str, cq_err.prov_errno); } }
Test(gnix_cancel, cancel_ep_send) { int ret; struct gnix_fid_ep *gnix_ep; struct gnix_fab_req *req; struct fi_cq_err_entry buf; struct gnix_vc *vc; void *foobar_ptr = NULL; gnix_ht_key_t *key; /* simulate a posted request */ gnix_ep = container_of(ep[0], struct gnix_fid_ep, ep_fid); req = _gnix_fr_alloc(gnix_ep); req->msg.send_info[0].send_addr = 0xdeadbeef; req->msg.cum_send_len = req->msg.send_info[0].send_len = 128; req->user_context = foobar_ptr; req->type = GNIX_FAB_RQ_SEND; /* allocate, store vc */ ret = _gnix_vc_alloc(gnix_ep, NULL, &vc); cr_assert(ret == FI_SUCCESS, "_gnix_vc_alloc failed"); key = (gnix_ht_key_t *)&gnix_ep->my_name.gnix_addr; ret = _gnix_ht_insert(gnix_ep->vc_ht, *key, vc); cr_assert(!ret); /* make a dummy request */ fastlock_acquire(&vc->tx_queue_lock); dlist_insert_head(&req->dlist, &vc->tx_queue); fastlock_release(&vc->tx_queue_lock); /* cancel simulated request */ ret = fi_cancel(&ep[0]->fid, foobar_ptr); cr_assert(ret == FI_SUCCESS, "fi_cancel failed"); /* check for event */ ret = fi_cq_readerr(msg_cq[0], &buf, FI_SEND); cr_assert(ret == 1, "did not find one error event"); cr_assert(buf.buf == (void *) 0xdeadbeef, "buffer mismatch"); cr_assert(buf.data == 0, "data mismatch"); cr_assert(buf.err == FI_ECANCELED, "error code mismatch"); cr_assert(buf.prov_errno == FI_ECANCELED, "prov error code mismatch"); cr_assert(buf.len == 128, "length mismatch"); }
void do_send_err(int len) { int ret; struct fi_cq_tagged_entry s_cqe; struct fi_cq_err_entry err_cqe; ssize_t sz; uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; uint64_t r_e[NUMEPS] = {0}; rdm_sr_init_data(source, len, 0xab); rdm_sr_init_data(target, len, 0); sz = fi_send(ep[0], source, len, loc_mr[0], gni_addr[1], target); cr_assert_eq(sz, 0); while ((ret = fi_cq_read(msg_cq[0], &s_cqe, 1)) == -FI_EAGAIN) { pthread_yield(); } cr_assert_eq(ret, -FI_EAVAIL); ret = fi_cq_readerr(msg_cq[0], &err_cqe, 0); cr_assert_eq(ret, 1); cr_assert((uint64_t)err_cqe.op_context == (uint64_t)target, "Bad error context"); cr_assert(err_cqe.flags == (FI_MSG | FI_SEND)); cr_assert(err_cqe.len == 0, "Bad error len"); cr_assert(err_cqe.buf == 0, "Bad error buf"); cr_assert(err_cqe.data == 0, "Bad error data"); cr_assert(err_cqe.tag == 0, "Bad error tag"); cr_assert(err_cqe.olen == 0, "Bad error olen"); cr_assert(err_cqe.err == FI_ECANCELED, "Bad error errno"); cr_assert(err_cqe.prov_errno == GNI_RC_TRANSACTION_ERROR, "Bad prov errno"); cr_assert(err_cqe.err_data == NULL, "Bad error provider data"); s_e[0] = 1; rdm_sr_check_cntrs(s, r, s_e, r_e); }
void do_read_error(int len) { int ret; ssize_t sz; struct fi_cq_tagged_entry cqe; struct fi_cq_err_entry err_cqe; init_data(source, len, 0); init_data(target, len, 0xad); sz = fi_read(ep[0], source, len, loc_mr, gni_addr[1], (uint64_t)target, mr_key, (void *)READ_CTX); cr_assert_eq(sz, 0); while ((ret = fi_cq_read(send_cq, &cqe, 1)) == -FI_EAGAIN) { pthread_yield(); } cr_assert_eq(ret, -FI_EAVAIL); ret = fi_cq_readerr(send_cq, &err_cqe, 0); cr_assert_eq(ret, 1); cr_assert((uint64_t)err_cqe.op_context == (uint64_t)READ_CTX, "Bad error context"); cr_assert(err_cqe.flags == (FI_RMA | FI_READ)); cr_assert(err_cqe.len == 0, "Bad error len"); cr_assert(err_cqe.buf == 0, "Bad error buf"); cr_assert(err_cqe.data == 0, "Bad error data"); cr_assert(err_cqe.tag == 0, "Bad error tag"); cr_assert(err_cqe.olen == 0, "Bad error olen"); cr_assert(err_cqe.err == FI_ECANCELED, "Bad error errno"); cr_assert(err_cqe.prov_errno == GNI_RC_TRANSACTION_ERROR, "Bad prov errno"); cr_assert(err_cqe.err_data == NULL, "Bad error provider data"); rdm_rma_check_cntrs(0, 0, 0, 1); }
Test(gnix_cancel, cancel_ep_recv) { int ret; struct fi_cq_err_entry buf; /* simulate a posted request */ ret = fi_recv(ep[0], (void *) 0xdeadbeef, 128, 0, FI_ADDR_UNSPEC, (void *) 0xcafebabe); cr_assert(ret == FI_SUCCESS, "fi_recv failed"); /* cancel simulated request */ ret = fi_cancel(&ep[0]->fid, (void *) 0xcafebabe); cr_assert(ret == FI_SUCCESS, "fi_cancel failed"); /* check for event */ ret = fi_cq_readerr(msg_cq[0], &buf, FI_RECV); cr_assert(ret == 1, "did not find one error event"); cr_assert(buf.buf == (void *) 0xdeadbeef, "buffer mismatch"); cr_assert(buf.data == 0, "data mismatch"); cr_assert(buf.err == FI_ECANCELED, "error code mismatch"); cr_assert(buf.prov_errno == FI_ECANCELED, "prov error code mismatch"); cr_assert(buf.len == 128, "length mismatch"); }
std::string get_cq_error_string(fid_cq* cq, ssize_t ec) { fi_cq_err_entry entry = {}; std::stringstream error{}; if (ec < 0) { ec = -ec; } if (ec != FI_EAVAIL) { error << "fi_cq_sread error: " << fi_error_to_string(int(ec)) << "(" << ec << ") "; } auto rc = fi_cq_readerr(cq, &entry, 0); if (rc < 0) { error << "fi_cq_readerr error: " << fi_error_to_string(int(rc)) << "(" << rc << ")"; } else { error << "fi_cq_readerr provider error: " << fi_cq_strerror(cq, entry.prov_errno, entry.err_data, nullptr, 0) << "(" << entry.prov_errno << ") error: " << fi_error_to_string(entry.err); } return error.str(); }
/* * rpmem_fip_process -- (internal) process completion events */ static int rpmem_fip_process(struct rpmem_fip *fip) { ssize_t sret; struct fi_cq_err_entry err; const char *str_err; int ret; struct fi_cq_msg_entry *cq_entries; cq_entries = malloc(fip->cq_size * sizeof(*cq_entries)); if (!cq_entries) { RPMEM_LOG(ERR, "!allocating completion queue buffer"); return -1; } while (!fip->closing) { sret = fi_cq_sread(fip->cq, cq_entries, fip->cq_size, NULL, RPMEM_FIP_CQ_WAIT_MS); if (unlikely(fip->closing)) break; if (unlikely(sret == -FI_EAGAIN)) continue; if (unlikely(sret < 0)) { ret = (int)sret; goto err_cq_read; } for (ssize_t i = 0; i < sret; i++) { struct fi_cq_msg_entry *comp = &cq_entries[i]; /* * If the context is NULL it probably means that * we get an unexpected CQ entry. The CQ is configured * with FI_SELECTIVE_COMPLETION so every inbound or * outbound operation must be issued with FI_COMPLETION * flag and non-NULL context. */ RPMEM_ASSERT(comp->op_context); /* read operation */ if (unlikely(comp->op_context == &fip->rd_lane)) { rpmem_fip_lane_signal(&fip->rd_lane.lane, FI_READ); continue; } /* persist operation */ ret = fip->ops->process(fip, comp->op_context, comp->flags); if (unlikely(ret)) { RPMEM_LOG(ERR, "persist operation failed"); goto err; } } } free(cq_entries); return 0; err_cq_read: sret = fi_cq_readerr(fip->cq, &err, 0); if (sret < 0) { RPMEM_FI_ERR((int)sret, "error reading from completion queue: " "cannot read error from event queue"); goto err; } str_err = fi_cq_strerror(fip->cq, err.prov_errno, NULL, NULL, 0); RPMEM_LOG(ERR, "error reading from completion queue: %s", str_err); err: rpmem_fip_signal_all(fip, ret); free(cq_entries); return ret; }
int do_test(void) { struct fi_cq_msg_entry comp; int len = msg_len * post_depth; int msg_cnt = num_msgs; int tx_bufs_sent = 0; int ret; char *mp; u64 time_elap; #if SREAD == 0 int eagain_cnt = EAGAIN_TRIES; #endif print_trace("in\n"); if (!ctx.buf) { ctx.buf = kmalloc(len, GFP_KERNEL); if (!ctx.buf) { print_err("kalloc failed!\n"); return -ENOMEM; } ret = fi_mr_reg(ctx.domain, ctx.buf, len, 0, 0, 0, 0, &ctx.mr, NULL); if (ret) { print_err("fi_mr_reg returned %d\n", ret); kfree(ctx.buf); ctx.buf = ERR_PTR(-EFAULT); return ret; } } else if (IS_ERR(ctx.buf)) return 0; print_msg("post_depth %d num_msgs %d msg_len %d SREAD[%d]\n", post_depth, num_msgs, msg_len, SREAD); print_dbg("ctx.buf %p '%s' len %ld msg_len %d\n", ctx.buf, ctx.buf, strlen(ctx.buf)+1, msg_len); time_elap = get_jiffies_64(); for (mp = ctx.buf; msg_cnt > 0 && !kthread_should_stop(); ) { int post_cnt, cnt; post_cnt = (msg_cnt > post_depth ? post_depth : msg_cnt); for (cnt = 0, mp = ctx.buf; cnt < post_cnt; cnt++, mp += msg_len) { if (verify) { sprintf(mp, TEST_MESSAGE, tx_bufs_sent); tx_bufs_sent++; } ret = fi_send(ctx.ep, mp, msg_len, fi_mr_desc(ctx.mr), 0, mp); if (ret) { print_err("fi_send returned %d '%s'\n", ret, fi_strerror(ret)); return ret; } if (kthread_should_stop()) return -EINTR; } /* reap completions */ for (cnt = 0; cnt < post_cnt; cnt++) { #if SREAD ret = fi_cq_sread(ctx.scq, &comp, 1, 0, TIMEOUT); if (ret == -ETIMEDOUT) { print_msg("%s(ETIMEDOUT) cnt %d post_cnt %d " "msg_cnt %d\n", "fi_cq_sread", cnt, post_cnt, msg_cnt); } if (kthread_should_stop()) return -EINTR; #else do { ret = fi_cq_read(ctx.scq, &comp, 1); if (ret == 0 || ret == -EAGAIN) { if (--eagain_cnt <= 0) { dprint(DEBUG_HIGH, "%s(resched %d) cnt " "%d post_cnt %d\n", "fi_cq_read", ret, cnt, post_cnt); eagain_cnt = EAGAIN_TRIES; schedule(); } } if (kthread_should_stop()) return -EINTR; } while (ret == 0 || ret == -EAGAIN); #endif if (ret < 0) { struct fi_cq_err_entry cqe = { 0 }; int rc; rc = fi_cq_readerr(ctx.scq, &cqe, 0); print_err("fi_cq_read returned %d '%s'\n", ret, fi_strerror(ret)); if (rc) { char buf[64]; print_err("fi_cq_readerr() err '%s'(%d)" "\n", fi_strerror(cqe.err), cqe.err); print_err("fi_cq_readerr() prov_err " "'%s'(%d)\n", fi_cq_strerror(ctx.scq, cqe.prov_errno, cqe.err_data, buf, sizeof(buf)), cqe.prov_errno); } return ret; } if (!ret) print_err("fi_cq_sread no completion? ret %d\n", ret); #if 0 if ((char *)comp.op_context < (char *)ctx.buf || (char *)comp.op_context >= (char *) &ctx.buf[msg_len*post_depth]) { print_err("cq.op_context(%p) not in range " "[ctx.buf(%p) ... &ctx.buf[%d](%p)]\n", (void *)comp.op_context, (void *)ctx.buf, msg_len, (void *)&ctx.buf[msg_len]); } #endif if (verify) print_msg("Tx '%s'\n", (char *) comp.op_context); } msg_cnt -= post_cnt; } time_elap = get_jiffies_64() - time_elap; #define AGIG (1024UL*1024UL*1024UL) #define AMEG (1024UL*1024UL) #define AKILO (1024UL) { struct timeval tv; ulong rate, rate_mod, bytes, units_of; char units; jiffies_to_timeval(time_elap, &tv); bytes = (ulong) num_msgs * (ulong) msg_len; if (bytes >= AKILO && tv.tv_sec > 0) { rate = bytes / tv.tv_sec; rate_mod = bytes % tv.tv_sec; if (rate >= AGIG) { units = 'G'; units_of = AGIG; } else if (rate >= AMEG) { units = 'M'; units_of = AMEG; } else { units = 'K'; units_of = AKILO; } rate /= units_of; } else { rate = rate_mod = 0UL; units = ' '; units_of = 1UL; } print_info("Tx %d msgs (%lu.%lu%cB) @ ~%lu.%lu %cB/sec (%ld sec %ld " "usec)\n", num_msgs, (bytes/units_of), (bytes % units_of), units, rate, rate_mod, units, tv.tv_sec, tv.tv_usec); } return 0; }
void do_atomic_write_fetch(void) { int ret; ssize_t sz; uint64_t operand; struct fi_cq_tagged_entry cqe; struct fi_cq_err_entry err_cqe; /* u64 */ *((uint64_t *)source) = SOURCE_DATA; *((uint64_t *)target) = TARGET_DATA; sz = fi_atomic(ep[0], source, 1, loc_mr[0], gni_addr[1], (uint64_t)target, mr_key[1], FI_UINT64, FI_ATOMIC_WRITE, target); cr_assert_eq(sz, 0); while ((ret = fi_cq_read(msg_cq[0], &cqe, 1)) == -FI_EAGAIN) pthread_yield(); if (ret == -FI_EAVAIL) { fi_cq_readerr(msg_cq[0], &err_cqe, 0); dbg_printf("fi_cq_readerr err:%d\n", err_cqe.err); } if (write_allowed(FI_ATOMIC, fi[0]->caps, fi[1]->caps)) { cr_assert(ret == 1, "fi_atomic failed caps:0x%lx rcaps:0x%lx", fi[0]->caps, fi[1]->caps); } else { cr_assert(err_cqe.err == FI_EOPNOTSUPP, "fi_atomic should fail caps:0x%lx rcaps:0x%lx", fi[0]->caps, fi[1]->caps); } /* u64 */ operand = SOURCE_DATA; *((uint64_t *)source) = FETCH_SOURCE_DATA; *((uint64_t *)target) = TARGET_DATA; sz = fi_fetch_atomic(ep[0], &operand, 1, NULL, source, loc_mr[0], gni_addr[1], (uint64_t)target, mr_key[1], FI_UINT64, FI_ATOMIC_READ, target); cr_assert_eq(sz, 0); while ((ret = fi_cq_read(msg_cq[0], &cqe, 1)) == -FI_EAGAIN) pthread_yield(); if (ret == -FI_EAVAIL) { fi_cq_readerr(msg_cq[0], &err_cqe, 0); dbg_printf("fi_cq_readerr err:%d\n", err_cqe.err); } if (read_allowed(FI_ATOMIC, fi[0]->caps, fi[1]->caps)) { cr_assert(ret == 1, "fi_fetch_atomic failed caps:0x%lx rcaps:0x%lx", fi[0]->caps, fi[1]->caps); } else { cr_assert(err_cqe.err == FI_EOPNOTSUPP, "fi_fetch_atomic should fail caps:0x%lx rcaps:0x%lx", fi[0]->caps, fi[1]->caps); } }
int MPID_nem_ofi_poll(int in_blocking_poll) { int complete = 0, mpi_errno = MPI_SUCCESS; ssize_t ret; cq_tagged_entry_t wc; cq_err_entry_t error; MPIDI_VC_t *vc; MPID_Request *req; req_fn reqFn; BEGIN_FUNC(FCNAME); do { /* ----------------------------------------------------- */ /* Poll the completion queue */ /* The strategy here is */ /* ret>0 successfull poll, events returned */ /* ret==0 empty poll, no events/no error */ /* ret<0, error, but some error instances should not */ /* cause MPI to terminate */ /* ----------------------------------------------------- */ ret = fi_cq_read(gl_data.cq, /* Tagged completion queue */ (void *) &wc, /* OUT: Tagged completion entry */ 1); /* Number of entries to poll */ if (ret > 0) { if (NULL != wc.op_context) { req = context_to_req(wc.op_context); if (REQ_OFI(req)->event_callback) { MPIDI_CH3I_NM_OFI_RC(REQ_OFI(req)->event_callback(&wc, req)); continue; } reqFn = req->dev.OnDataAvail; if (reqFn) { if (REQ_OFI(req)->pack_buffer) { MPIU_Free(REQ_OFI(req)->pack_buffer); } vc = REQ_OFI(req)->vc; complete = 0; MPIDI_CH3I_NM_OFI_RC(reqFn(vc, req, &complete)); continue; } else { MPIU_Assert(0); } } else { MPIU_Assert(0); } } else if (ret == -FI_EAGAIN) ; else if (ret < 0) { if (ret == -FI_EAVAIL) { ret = fi_cq_readerr(gl_data.cq, (void *) &error, 0); if (error.err == FI_ETRUNC) { /* ----------------------------------------------------- */ /* This error message should only be delivered on send */ /* events. We want to ignore truncation errors */ /* on the sender side, but complete the request anyway */ /* Other kinds of requests, this is fatal. */ /* ----------------------------------------------------- */ req = context_to_req(error.op_context); if (req->kind == MPID_REQUEST_SEND) { mpi_errno = REQ_OFI(req)->event_callback(NULL, req); } else if (req->kind == MPID_REQUEST_RECV) { mpi_errno = REQ_OFI(req)->event_callback(&wc, req); req->status.MPI_ERROR = MPI_ERR_TRUNCATE; req->status.MPI_TAG = error.tag; } else { mpi_errno = MPI_ERR_OTHER; } } else if (error.err == FI_ECANCELED) { req = context_to_req(error.op_context); MPIR_STATUS_SET_CANCEL_BIT(req->status, TRUE); } else { mpi_errno = MPI_ERR_OTHER; } } else { MPIR_ERR_CHKANDJUMP4(1, mpi_errno, MPI_ERR_OTHER, "**ofi_poll", "**ofi_poll %s %d %s %s", __SHORT_FILE__, __LINE__, FCNAME, fi_strerror(-ret)); } } } while (in_blocking_poll && (ret > 0)); END_FUNC_RC(FCNAME); }
int main(int argc, char *argv[]) { uint64_t flags = 0; char *service = NULL; char *node = NULL; struct pingpong_context *ctx; struct timeval start, end; unsigned long size = 4096; // No provider support yet //enum ibv_mtu mtu = IBV_MTU_1024; //size_t mtu = 1024; int rx_depth_default = 500; int rx_depth = 0; int iters = 1000; int use_event = 0; int rcnt, scnt; int ret, rc = 0; char * ptr; srand48(getpid() * time(NULL)); opts = INIT_OPTS; hints = fi_allocinfo(); if (!hints) return 1; while (1) { int c; c = getopt(argc, argv, "S:m:r:n:eh" ADDR_OPTS INFO_OPTS); if (c == -1) break; switch (c) { case 'S': errno = 0; size = strtol(optarg, &ptr, 10); if (ptr == optarg || *ptr != '\0' || ((size == LONG_MIN || size == LONG_MAX) && errno == ERANGE)) { fprintf(stderr, "Cannot convert from string to long\n"); rc = 1; goto err1; } break; // No provider support yet /*case 'm': mtu = strtol(optarg, NULL, 0); mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0)); if (mtu < 0) { usage(argv[0]); return 1; } break; */ case 'r': rx_depth = strtol(optarg, NULL, 0); break; case 'n': iters = strtol(optarg, NULL, 0); break; case 'e': ++use_event; break; default: ft_parse_addr_opts(c, optarg, &opts); ft_parseinfo(c, optarg, hints); break; case '?': case 'h': usage(argv[0]); return 1; } } if (optind == argc - 1) opts.dst_addr = argv[optind]; else if (optind < argc) { usage(argv[0]); return 1; } page_size = sysconf(_SC_PAGESIZE); hints->ep_attr->type = FI_EP_MSG; hints->caps = FI_MSG; hints->mode = FI_LOCAL_MR; rc = ft_read_addr_opts(&node, &service, hints, &flags, &opts); if (rc) return -rc; rc = fi_getinfo(FT_FIVERSION, node, service, flags, hints, &fi); if (rc) { FT_PRINTERR("fi_getinfo", rc); return -rc; } fi_freeinfo(hints); if (rx_depth) { if (rx_depth > fi->rx_attr->size) { fprintf(stderr, "rx_depth requested: %d, " "rx_depth supported: %zd\n", rx_depth, fi->rx_attr->size); rc = 1; goto err1; } } else { rx_depth = (rx_depth_default > fi->rx_attr->size) ? fi->rx_attr->size : rx_depth_default; } ctx = pp_init_ctx(fi, size, rx_depth, use_event); if (!ctx) { rc = 1; goto err1; } if (opts.dst_addr) { /* client connect */ if (pp_connect_ctx(ctx)) { rc = 1; goto err2; } } else { /* server listen and accept */ pp_listen_ctx(ctx); pp_accept_ctx(ctx); } ctx->pending = PINGPONG_RECV_WCID; if (opts.dst_addr) { if (pp_post_send(ctx)) { fprintf(stderr, "Couldn't post send\n"); rc = 1; goto err3; } ctx->pending |= PINGPONG_SEND_WCID; } if (gettimeofday(&start, NULL)) { perror("gettimeofday"); rc = 1; goto err3; } rcnt = scnt = 0; while (rcnt < iters || scnt < iters) { struct fi_cq_entry wc; struct fi_cq_err_entry cq_err; int rd; if (use_event) { /* Blocking read */ rd = fi_cq_sread(ctx->cq, &wc, 1, NULL, -1); } else { do { rd = fi_cq_read(ctx->cq, &wc, 1); } while (rd == -FI_EAGAIN); } if (rd < 0) { fi_cq_readerr(ctx->cq, &cq_err, 0); fprintf(stderr, "cq fi_cq_readerr() %s (%d)\n", fi_cq_strerror(ctx->cq, cq_err.err, cq_err.err_data, NULL, 0), cq_err.err); rc = rd; goto err3; } switch ((int) (uintptr_t) wc.op_context) { case PINGPONG_SEND_WCID: ++scnt; break; case PINGPONG_RECV_WCID: if (--ctx->routs <= 1) { ctx->routs += pp_post_recv(ctx, ctx->rx_depth - ctx->routs); if (ctx->routs < ctx->rx_depth) { fprintf(stderr, "Couldn't post receive (%d)\n", ctx->routs); rc = 1; goto err3; } } ++rcnt; break; default: fprintf(stderr, "Completion for unknown wc_id %d\n", (int) (uintptr_t) wc.op_context); rc = 1; goto err3; } ctx->pending &= ~(int) (uintptr_t) wc.op_context; if (scnt < iters && !ctx->pending) { if (pp_post_send(ctx)) { fprintf(stderr, "Couldn't post send\n"); rc = 1; goto err3; } ctx->pending = PINGPONG_RECV_WCID | PINGPONG_SEND_WCID; } } if (gettimeofday(&end, NULL)) { perror("gettimeofday"); rc = 1; goto err3; } { float usec = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec); long long bytes = (long long) size * iters * 2; printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n", bytes, usec / 1000000., bytes * 8. / usec); printf("%d iters in %.2f seconds = %.2f usec/iter\n", iters, usec / 1000000., usec / iters); } err3: fi_shutdown(ctx->ep, 0); err2: ret = pp_close_ctx(ctx); if (!rc) rc = ret; err1: fi_freeinfo(fi); return rc; }
int mca_btl_ofi_context_progress(mca_btl_ofi_context_t *context) { int ret = 0; int events_read; int events = 0; struct fi_cq_entry cq_entry[MCA_BTL_OFI_DEFAULT_MAX_CQE]; struct fi_cq_err_entry cqerr = {0}; mca_btl_ofi_completion_context_t *c_ctx; mca_btl_ofi_base_completion_t *comp; mca_btl_ofi_rdma_completion_t *rdma_comp; mca_btl_ofi_frag_completion_t *frag_comp; ret = fi_cq_read(context->cq, &cq_entry, mca_btl_ofi_component.num_cqe_read); if (0 < ret) { events_read = ret; for (int i = 0; i < events_read; i++) { if (NULL != cq_entry[i].op_context) { ++events; c_ctx = (mca_btl_ofi_completion_context_t*) cq_entry[i].op_context; /* We are casting to every type here just for simplicity. */ comp = (mca_btl_ofi_base_completion_t*) c_ctx->comp; frag_comp = (mca_btl_ofi_frag_completion_t*) c_ctx->comp; rdma_comp = (mca_btl_ofi_rdma_completion_t*) c_ctx->comp; switch (comp->type) { case MCA_BTL_OFI_TYPE_GET: case MCA_BTL_OFI_TYPE_PUT: case MCA_BTL_OFI_TYPE_AOP: case MCA_BTL_OFI_TYPE_AFOP: case MCA_BTL_OFI_TYPE_CSWAP: /* call the callback */ if (rdma_comp->cbfunc) { rdma_comp->cbfunc (comp->btl, comp->endpoint, rdma_comp->local_address, rdma_comp->local_handle, rdma_comp->cbcontext, rdma_comp->cbdata, OPAL_SUCCESS); } MCA_BTL_OFI_NUM_RDMA_DEC((mca_btl_ofi_module_t*) comp->btl); break; case MCA_BTL_OFI_TYPE_RECV: mca_btl_ofi_recv_frag((mca_btl_ofi_module_t*) comp->btl, (mca_btl_ofi_endpoint_t*) comp->endpoint, context, frag_comp->frag); break; case MCA_BTL_OFI_TYPE_SEND: MCA_BTL_OFI_NUM_SEND_DEC((mca_btl_ofi_module_t*) comp->btl); mca_btl_ofi_frag_complete(frag_comp->frag, OPAL_SUCCESS); break; default: /* catasthrophic */ BTL_ERROR(("unknown completion type")); MCA_BTL_OFI_ABORT(); } /* return the completion handler */ opal_free_list_return(comp->my_list, (opal_free_list_item_t*) comp); } } } else if (OPAL_UNLIKELY(ret == -FI_EAVAIL)) { ret = fi_cq_readerr(context->cq, &cqerr, 0); /* cq readerr failed!? */ if (0 > ret) { BTL_ERROR(("%s:%d: Error returned from fi_cq_readerr: %s(%d)", __FILE__, __LINE__, fi_strerror(-ret), ret)); } else { BTL_ERROR(("fi_cq_readerr: (provider err_code = %d)\n", cqerr.prov_errno)); } MCA_BTL_OFI_ABORT(); } #ifdef FI_EINTR /* sometimes, sockets provider complain about interupt. We do nothing. */ else if (OPAL_UNLIKELY(ret == -FI_EINTR)) { } #endif /* If the error is not FI_EAGAIN, report the error and abort. */ else if (OPAL_UNLIKELY(ret != -FI_EAGAIN)) { BTL_ERROR(("fi_cq_read returned error %d:%s", ret, fi_strerror(-ret))); MCA_BTL_OFI_ABORT(); } return events; }