static int tagged_peek(uint64_t tag) { struct fi_cq_tagged_entry comp; struct fi_msg_tagged msg; int ret; memset(&msg, 0, sizeof msg); msg.tag = tag; msg.context = &rx_ctx; ret = fi_trecvmsg(ep, &msg, FI_PEEK); if (ret) { FT_PRINTERR("FI_PEEK", ret); return ret; } ret = fi_cq_sread(rxcq, &comp, 1, NULL, -1); if (ret != 1) { if (ret == -FI_EAVAIL) ret = ft_cq_readerr(rxcq); else FT_PRINTERR("fi_cq_sread", ret); } return ret; }
/* * rpmemd_fip_cq_thread -- completion queue worker thread */ static void * rpmemd_fip_cq_thread(void *arg) { struct rpmemd_fip *fip = arg; struct fi_cq_err_entry err; const char *str_err; ssize_t sret; int ret = 0; while (!fip->closing) { sret = fi_cq_sread(fip->cq, fip->cq_entries, fip->cq_size, NULL, RPMEM_FIP_CQ_WAIT_MS); if (unlikely(fip->closing)) break; if (unlikely(sret == -FI_EAGAIN)) continue; if (unlikely(sret < 0)) { ret = (int)sret; goto err_cq_read; } for (ssize_t i = 0; i < sret; i++) { struct fi_cq_msg_entry *entry = &fip->cq_entries[i]; RPMEMD_ASSERT(entry->op_context); struct rpmemd_fip_lane *lanep = entry->op_context; /* signal lane about SEND completion */ if (entry->flags & FI_SEND) rpmem_fip_lane_signal(&lanep->lane, FI_SEND); /* add lane to worker's ring buffer */ if (entry->flags & FI_RECV) { ret = rpmemd_fip_worker_push(lanep->worker, lanep); } if (ret) goto err; } } return 0; err_cq_read: sret = fi_cq_readerr(fip->cq, &err, 0); if (sret < 0) { RPMEMD_FI_ERR((int)sret, "error reading from completion queue: " "cannot read error from completion queue"); goto err; } str_err = fi_cq_strerror(fip->cq, err.prov_errno, NULL, NULL, 0); RPMEMD_LOG(ERR, "error reading from completion queue: %s", str_err); err: return (void *)(uintptr_t)ret; }
static int wait_recvs() { struct fi_cq_tagged_entry entry; int ret; if (opts.comp_method == FT_COMP_SREAD) { ret = fi_cq_sread(rxcq, &entry, 1, NULL, -1); } else { do { ret = fi_cq_read(rxcq, &entry, 1); } while (ret == -FI_EAGAIN); } if ((ret == 1) && send_data) { if (entry.data != opts.transfer_size) { printf("ERROR incorrect remote CQ data value. Got %lu, expected %d\n", (unsigned long)entry.data, opts.transfer_size); return -FI_EOTHER; } } if (ret < 1) printf("ERROR fi_cq_(s)read returned %d %s\n", ret, fi_strerror(-ret)); return ret; }
static int run_test() { int ret; size_t size = 1000; uint64_t remote_cq_data; struct fi_cq_data_entry comp; if (fi->domain_attr->cq_data_size >= sizeof(uint64_t)) { remote_cq_data = 0x0123456789abcdefULL; } else { remote_cq_data = 0x0123456789abcdef & ((0x1ULL << (fi->domain_attr->cq_data_size * 8)) - 1); } if (opts.dst_addr) { fprintf(stdout, "Posting send with immediate data: 0x%" PRIx64 "\n", remote_cq_data); ret = fi_senddata(ep, buf, size, fi_mr_desc(mr), remote_cq_data, 0, buf); if (ret) { FT_PRINTERR("fi_send", ret); return ret; } ft_wait_for_comp(txcq, 1); fprintf(stdout, "Done\n"); } else { fprintf(stdout, "Waiting for immediate data from client\n"); ret = fi_cq_sread(rxcq, &comp, 1, NULL, -1); if (ret < 0) { if (ret == -FI_EAVAIL) { cq_readerr(rxcq, "rxcq"); } else { FT_PRINTERR("fi_cq_sread", ret); } return ret; } /* Verify completion data */ if (comp.flags & FI_REMOTE_CQ_DATA) { if (comp.data == remote_cq_data) fprintf(stdout, "remote_cq_data: success\n"); else fprintf(stdout, "remote_cq_data: failure\n"); fprintf(stdout, "Expected data:0x%" PRIx64 ", Received data:0x%" PRIx64 "\n", remote_cq_data, comp.data); } } return 0; }
static int cq_signal() { struct fid_cq *cq; struct fi_cq_tagged_entry entry; int64_t elapsed; int testret; int ret; testret = FAIL; ret = create_cq(&cq, 1, 0, FI_CQ_FORMAT_UNSPEC, FI_WAIT_UNSPEC); if (ret) { sprintf(err_buf, "fi_cq_open(1, 0, FI_CQ_FORMAT_UNSPEC, " "FI_WAIT_UNSPEC) = %d, %s", ret, fi_strerror(-ret)); goto fail1; } ret = fi_cq_signal(cq); if (ret) { sprintf(err_buf, "fi_cq_signal = %d %s", ret, fi_strerror(-ret)); goto fail2; } ft_start(); ret = fi_cq_sread(cq, &entry, 1, NULL, 2000); ft_stop(); elapsed = get_elapsed(&start, &end, MILLI); if (ret != -FI_EAGAIN && ret != -FI_ECANCELED) { sprintf(err_buf, "fi_cq_sread = %d %s", ret, fi_strerror(-ret)); goto fail2; } if (elapsed > 1000) { sprintf(err_buf, "fi_cq_sread - signal ignored"); goto fail2; } ret = fi_close(&cq->fid); if (ret) { sprintf(err_buf, "close(cq) = %d, %s", ret, fi_strerror(-ret)); goto fail1; } cq = NULL; testret = PASS; fail2: FT_CLOSE_FID(cq); fail1: cq = NULL; return TEST_RET_VAL(ret, testret); }
static int run_test() { int ret; size_t size = 1000; uint64_t remote_cq_data; struct fi_cq_data_entry comp; /* Set remote_cq_data based on the cq_data_size we got from fi_getinfo */ remote_cq_data = 0x0123456789abcdef & ((0x1ULL << (cq_data_size * 8)) - 1); if (dst_addr) { fprintf(stdout, "Posting send with immediate data: %lx\n", remote_cq_data); ret = fi_senddata(ep, buf, size, fi_mr_desc(mr), remote_cq_data, 0, buf); if (ret) { FI_PRINTERR("fi_send", ret); return ret; } wait_for_completion(scq, 1); fprintf(stdout, "Done\n"); } else { ret = fi_recv(ep, buf, size, fi_mr_desc(mr), 0, buf); if (ret) { FI_PRINTERR("fi_recv", ret); return ret; } fprintf(stdout, "Waiting for immediate data from client\n"); ret = fi_cq_sread(rcq, &comp, 1, NULL, -1); if (ret < 0) { if (ret == -FI_EAVAIL) { cq_readerr(rcq, "rcq"); } else { FI_PRINTERR("fi_cq_read: rcq", ret); } return ret; } /* Verify completion data */ if (comp.flags & FI_REMOTE_CQ_DATA) { if (comp.data == remote_cq_data) fprintf(stdout, "remote_cq_data: success\n"); else fprintf(stdout, "remote_cq_data: failure\n"); fprintf(stdout, "Expected data:0x%lx, Received data:0x%lx\n", remote_cq_data, comp.data); } } return 0; }
static int run_test() { int ret; size_t size = 1000; struct fi_cq_data_entry comp; if (opts.dst_addr) { fprintf(stdout, "Posting send with CQ data: 0x%" PRIx64 "\n", remote_cq_data); ret = fi_senddata(ep, buf, size, fi_mr_desc(mr), remote_cq_data, 0, buf); if (ret) { FT_PRINTERR("fi_send", ret); return ret; } ret = ft_get_tx_comp(++tx_seq); fprintf(stdout, "Done\n"); } else { fprintf(stdout, "Waiting for CQ data from client\n"); ret = fi_cq_sread(rxcq, &comp, 1, NULL, -1); if (ret < 0) { if (ret == -FI_EAVAIL) { ret = ft_cq_readerr(rxcq); } else { FT_PRINTERR("fi_cq_sread", ret); } return ret; } if (comp.flags & FI_REMOTE_CQ_DATA) { if (comp.data == remote_cq_data) { fprintf(stdout, "remote_cq_data: success\n"); ret = 0; } else { fprintf(stdout, "error, Expected data:0x%" PRIx64 ", Received data:0x%" PRIx64 "\n", remote_cq_data, comp.data); ret = -FI_EIO; } } else { fprintf(stdout, "error, CQ data flag not set\n"); ret = -FI_EBADFLAGS; } } return ret; }
/* * fi_cq_err_entry can be cast to any CQ entry format. */ static int ft_wait_for_comp(struct fid_cq *cq, uint64_t *cur, uint64_t total, int timeout) { struct fi_cq_err_entry comp; int ret; while (total - *cur > 0) { ret = fi_cq_sread(cq, &comp, 1, NULL, timeout); if (ret > 0) (*cur)++; else if (ret < 0 && ret != -FI_EAGAIN) return ret; } return 0; }
/* * rpmem_fip_process -- (internal) process completion events */ static int rpmem_fip_process(struct rpmem_fip *fip) { ssize_t sret; struct fi_cq_err_entry err; const char *str_err; int ret; struct fi_cq_msg_entry *cq_entries; cq_entries = malloc(fip->cq_size * sizeof(*cq_entries)); if (!cq_entries) { RPMEM_LOG(ERR, "!allocating completion queue buffer"); return -1; } while (!fip->closing) { sret = fi_cq_sread(fip->cq, cq_entries, fip->cq_size, NULL, RPMEM_FIP_CQ_WAIT_MS); if (unlikely(fip->closing)) break; if (unlikely(sret == -FI_EAGAIN)) continue; if (unlikely(sret < 0)) { ret = (int)sret; goto err_cq_read; } for (ssize_t i = 0; i < sret; i++) { struct fi_cq_msg_entry *comp = &cq_entries[i]; /* * If the context is NULL it probably means that * we get an unexpected CQ entry. The CQ is configured * with FI_SELECTIVE_COMPLETION so every inbound or * outbound operation must be issued with FI_COMPLETION * flag and non-NULL context. */ RPMEM_ASSERT(comp->op_context); /* read operation */ if (unlikely(comp->op_context == &fip->rd_lane)) { rpmem_fip_lane_signal(&fip->rd_lane.lane, FI_READ); continue; } /* persist operation */ ret = fip->ops->process(fip, comp->op_context, comp->flags); if (unlikely(ret)) { RPMEM_LOG(ERR, "persist operation failed"); goto err; } } } free(cq_entries); return 0; err_cq_read: sret = fi_cq_readerr(fip->cq, &err, 0); if (sret < 0) { RPMEM_FI_ERR((int)sret, "error reading from completion queue: " "cannot read error from event queue"); goto err; } str_err = fi_cq_strerror(fip->cq, err.prov_errno, NULL, NULL, 0); RPMEM_LOG(ERR, "error reading from completion queue: %s", str_err); err: rpmem_fip_signal_all(fip, ret); free(cq_entries); return ret; }
static int send_recv() { void *context[MAX_POLL_CNT]; struct fi_cq_entry comp; int ret, send_pending = 0, recv_pending = 0; int ret_count = 0; int i; fprintf(stdout, "Posting a recv...\n"); ret = fi_recv(ep, buf, rx_size, fi_mr_desc(mr), remote_fi_addr, &fi_ctx_recv); if (ret) { FT_PRINTERR("fi_recv", ret); return ret; } recv_pending++; fprintf(stdout, "Posting a send...\n"); ret = fi_send(ep, buf, tx_size, fi_mr_desc(mr), remote_fi_addr, &fi_ctx_send); if (ret) { FT_PRINTERR("fi_send", ret); return ret; } send_pending++; while (send_pending || recv_pending) { struct fid_cq *cq; /* Poll send and recv CQs */ do { ret_count = fi_poll(pollset, context, MAX_POLL_CNT); if (ret_count < 0) { FT_PRINTERR("fi_poll", ret_count); return ret_count; } } while (!ret_count); fprintf(stdout, "Retreived %d event(s)\n", ret_count); for (i = 0; i < ret_count; i++) { if (context[i] == &txcq) { printf("Send completion received\n"); cq = txcq; send_pending--; } else if (context[i] == &rxcq) { printf("Recv completion received\n"); cq = rxcq; recv_pending--; } else { printf("Unknown completion received\n"); return -1; } /* Read the completion entry */ ret = fi_cq_sread(cq, &comp, 1, NULL, -1); if (ret < 0) { if (ret == -FI_EAVAIL) { cq_readerr(cq, "cq"); } else { FT_PRINTERR("fi_cq_sread", ret); } return ret; } } } return 0; }
int do_test(void) { struct fi_cq_msg_entry comp; int len = msg_len * post_depth; int msg_cnt = num_msgs; int tx_bufs_sent = 0; int ret; char *mp; u64 time_elap; #if SREAD == 0 int eagain_cnt = EAGAIN_TRIES; #endif print_trace("in\n"); if (!ctx.buf) { ctx.buf = kmalloc(len, GFP_KERNEL); if (!ctx.buf) { print_err("kalloc failed!\n"); return -ENOMEM; } ret = fi_mr_reg(ctx.domain, ctx.buf, len, 0, 0, 0, 0, &ctx.mr, NULL); if (ret) { print_err("fi_mr_reg returned %d\n", ret); kfree(ctx.buf); ctx.buf = ERR_PTR(-EFAULT); return ret; } } else if (IS_ERR(ctx.buf)) return 0; print_msg("post_depth %d num_msgs %d msg_len %d SREAD[%d]\n", post_depth, num_msgs, msg_len, SREAD); print_dbg("ctx.buf %p '%s' len %ld msg_len %d\n", ctx.buf, ctx.buf, strlen(ctx.buf)+1, msg_len); time_elap = get_jiffies_64(); for (mp = ctx.buf; msg_cnt > 0 && !kthread_should_stop(); ) { int post_cnt, cnt; post_cnt = (msg_cnt > post_depth ? post_depth : msg_cnt); for (cnt = 0, mp = ctx.buf; cnt < post_cnt; cnt++, mp += msg_len) { if (verify) { sprintf(mp, TEST_MESSAGE, tx_bufs_sent); tx_bufs_sent++; } ret = fi_send(ctx.ep, mp, msg_len, fi_mr_desc(ctx.mr), 0, mp); if (ret) { print_err("fi_send returned %d '%s'\n", ret, fi_strerror(ret)); return ret; } if (kthread_should_stop()) return -EINTR; } /* reap completions */ for (cnt = 0; cnt < post_cnt; cnt++) { #if SREAD ret = fi_cq_sread(ctx.scq, &comp, 1, 0, TIMEOUT); if (ret == -ETIMEDOUT) { print_msg("%s(ETIMEDOUT) cnt %d post_cnt %d " "msg_cnt %d\n", "fi_cq_sread", cnt, post_cnt, msg_cnt); } if (kthread_should_stop()) return -EINTR; #else do { ret = fi_cq_read(ctx.scq, &comp, 1); if (ret == 0 || ret == -EAGAIN) { if (--eagain_cnt <= 0) { dprint(DEBUG_HIGH, "%s(resched %d) cnt " "%d post_cnt %d\n", "fi_cq_read", ret, cnt, post_cnt); eagain_cnt = EAGAIN_TRIES; schedule(); } } if (kthread_should_stop()) return -EINTR; } while (ret == 0 || ret == -EAGAIN); #endif if (ret < 0) { struct fi_cq_err_entry cqe = { 0 }; int rc; rc = fi_cq_readerr(ctx.scq, &cqe, 0); print_err("fi_cq_read returned %d '%s'\n", ret, fi_strerror(ret)); if (rc) { char buf[64]; print_err("fi_cq_readerr() err '%s'(%d)" "\n", fi_strerror(cqe.err), cqe.err); print_err("fi_cq_readerr() prov_err " "'%s'(%d)\n", fi_cq_strerror(ctx.scq, cqe.prov_errno, cqe.err_data, buf, sizeof(buf)), cqe.prov_errno); } return ret; } if (!ret) print_err("fi_cq_sread no completion? ret %d\n", ret); #if 0 if ((char *)comp.op_context < (char *)ctx.buf || (char *)comp.op_context >= (char *) &ctx.buf[msg_len*post_depth]) { print_err("cq.op_context(%p) not in range " "[ctx.buf(%p) ... &ctx.buf[%d](%p)]\n", (void *)comp.op_context, (void *)ctx.buf, msg_len, (void *)&ctx.buf[msg_len]); } #endif if (verify) print_msg("Tx '%s'\n", (char *) comp.op_context); } msg_cnt -= post_cnt; } time_elap = get_jiffies_64() - time_elap; #define AGIG (1024UL*1024UL*1024UL) #define AMEG (1024UL*1024UL) #define AKILO (1024UL) { struct timeval tv; ulong rate, rate_mod, bytes, units_of; char units; jiffies_to_timeval(time_elap, &tv); bytes = (ulong) num_msgs * (ulong) msg_len; if (bytes >= AKILO && tv.tv_sec > 0) { rate = bytes / tv.tv_sec; rate_mod = bytes % tv.tv_sec; if (rate >= AGIG) { units = 'G'; units_of = AGIG; } else if (rate >= AMEG) { units = 'M'; units_of = AMEG; } else { units = 'K'; units_of = AKILO; } rate /= units_of; } else { rate = rate_mod = 0UL; units = ' '; units_of = 1UL; } print_info("Tx %d msgs (%lu.%lu%cB) @ ~%lu.%lu %cB/sec (%ld sec %ld " "usec)\n", num_msgs, (bytes/units_of), (bytes % units_of), units, rate, rate_mod, units, tv.tv_sec, tv.tv_usec); } return 0; }
static int send_recv() { struct fi_cq_entry comp; struct epoll_event event; int ret; if (opts.dst_addr) { /* Client */ fprintf(stdout, "Posting a send...\n"); sprintf(buf, "Hello World!"); ret = fi_send(ep, buf, sizeof("Hello World!"), fi_mr_desc(mr), 0, buf); if (ret) { FT_PRINTERR("fi_send", ret); return ret; } memset((void *)&event, 0, sizeof event); ret = TEMP_FAILURE_RETRY(epoll_wait(epfd, &event, 1, -1)); if (ret < 0) { ret = -errno; FT_PRINTERR("epoll_wait", ret); return ret; } if (event.data.ptr != &scq->fid) { fprintf(stdout, "unexpected event!\n"); } /* Read send queue */ ret = fi_cq_sread(scq, &comp, 1, NULL, 0); if (ret < 0) { FT_PROCESS_CQ_ERR(ret, scq, "fi_cq_sread", "scq"); return ret; } fprintf(stdout, "Send completion received\n"); } else { /* Server */ fprintf(stdout, "Posting a recv...\n"); ret = fi_recv(ep, buf, buffer_size, fi_mr_desc(mr), 0, buf); if (ret) { FT_PRINTERR("fi_recv", ret); return ret; } fprintf(stdout, "Waiting for client...\n"); memset((void *)&event, 0, sizeof event); ret = TEMP_FAILURE_RETRY(epoll_wait(epfd, &event, 1, -1)); if (ret < 0) { ret = -errno; FT_PRINTERR("epoll_wait", ret); return ret; } if (event.data.ptr != &rcq->fid) { fprintf(stdout, "unexpected event!\n"); } /* Read recv queue */ ret = fi_cq_sread(rcq, &comp, 1, NULL, 0); if (ret < 0) { FT_PROCESS_CQ_ERR(ret, rcq, "fi_cq_sread", "rcq"); return ret; } fprintf(stdout, "Received data from client: %s\n", (char *)buf); } return 0; }
static void test_connect_with_accept_blocking_on_eq_fq_CLIENT(void) { int ret; printf("CLIENT running\n"); // Get the server's node (IP addr) and service (port) MPI_Recv(ofi_node, sizeof(ofi_node) - 1, MPI_CHAR, 0, 101, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Recv(ofi_service, sizeof(ofi_service) - 1, MPI_CHAR, 0, 102, MPI_COMM_WORLD, MPI_STATUS_IGNORE); printf("CLIENT received via MPI: %s / %s\n", ofi_node, ofi_service); //setup_ofi(ofi_node, ofi_service); setup_ofi(NULL, NULL, 0); memset(&sin, 0, sizeof(sin)); sin.sin_family = AF_INET; inet_aton(ofi_node, &sin.sin_addr); sin.sin_port = htons(atoi(ofi_service)); printf("CLIENT translated: %s\n", addrstr(&sin)); setup_ofi_active(fidev.info, &ficonn.ep); // Print server addr printf("CLIENT connecting to %s\n", addrstr(&sin)); // Connect! printf("Client connecting...\n"); ret = fi_connect(ficonn.ep, //fidev.info->dest_addr, &sin, (void*) client_data, sizeof(client_data)); if (ret < 0) { error("fi_connect failed"); } #if WANT_FDS // Now wait for the listen to complete int nevents; #define NEVENTS 32 struct epoll_event events[NEVENTS]; int timeout = 10000; while (1) { printf("CLIENT blocking on epoll\n"); nevents = epoll_wait(epoll_fd, events, NEVENTS, timeout); if (nevents < 0) { if (errno != EINTR) { error("client epoll wait failed"); } else { continue; } } else { printf("CLIENT successfully woke up from epoll! %d events\n", nevents); for (int i = 0; i < nevents; ++i) { if (events[i].data.u32 != 2222) { error("CLIENT unexpected epoll return type"); } } // If we got the expected event, then go read from the EQ break; } } #endif // Wait for FI_CONNECTED event uint32_t event; uint8_t *entry_buffer; size_t expected_len = sizeof(struct fi_eq_cm_entry) + sizeof(client_data); entry_buffer = (uint8_t*) calloc(1, expected_len); if (NULL == entry_buffer) { error("calloc failed"); } struct fi_eq_cm_entry *entry = (struct fi_eq_cm_entry*) entry_buffer; while (1) { printf("CLIENT waiting for FI_CONNECTED\n"); #if WANT_FDS ret = fi_eq_read(fidev.eq, &event, entry, expected_len, 0); #else ret = fi_eq_sread(fidev.eq, &event, entry, expected_len, -1, 0); #endif if (-FI_EAVAIL == ret) { fprintf(stderr, "client fi_eq_sread failed because there's something in the error queue\n"); char buffer[2048]; struct fi_eq_err_entry *err_entry = (struct fi_eq_err_entry*) buffer; ret = fi_eq_readerr(fidev.eq, err_entry, 0); fprintf(stderr, "error code: %d (%s), prov err code: %d (%s)\n", err_entry->err, fi_strerror(err_entry->err), err_entry->prov_errno, fi_strerror(err_entry->prov_errno)); error("sad panda"); } else if (ret == -EAGAIN) { fprintf(stderr, "CLIENT fi_eq_sread fail got -EAGAIN... trying again...\n"); sleep(1); continue; } else if (ret < 0) { fprintf(stderr, "SERVER fi_eq_sread fail: %s, ret = %d)\n", fi_strerror(-ret), ret); error("client fi_eq_sread failed for some random reason"); } else if (event != FI_CONNECTED) { error("client got some unexpected event"); } else if (ret != expected_len) { error("client got wrong length back from fi_eq_sread"); } uint32_t *d = (uint32_t*) entry->data; for (int i = 0; i < (sizeof(server_data) / sizeof(uint32_t)); ++i) { if (d[i] != server_data[i]) { printf("CLIENT got wrong CM client data: d[%d]=%d, should be %d\n", i, d[i], server_data[i]); } } printf("client got FI_CONNECTED, correct size, and correct data -- yay!\n"); break; } printf("CLIENT connecting -- waiting for server before sending\n"); MPI_Barrier(MPI_COMM_WORLD); sleep(1); int msg[4] = { 99, 100, 101, 102 }; int len = sizeof(msg); printf("CLIENT sending len of %d\n", len); struct fid_mr no_mr; struct fid_mr *mr; void *send_context = (void*) 0x42; #if 0 fi_mr_reg(fidev.domain, msg, len, FI_SEND | FI_RECV, 0, (uint64_t)(uintptr_t) msg, 0, &mr, NULL); #else // Try using no mr, like fi_msg_pingpong... memset(&no_mr, 0, sizeof(no_mr)); mr = &no_mr; #endif ret = fi_send(ficonn.ep, msg, len, fi_mr_desc(mr), 0, send_context); if (ret < 0) { printf("fi_Send failed! %d, %s\n", ret, fi_strerror(-ret)); MPI_Abort(MPI_COMM_WORLD, 37); } // Wait for send completion struct fi_cq_entry cqe; while (1) { ret = fi_cq_sread(ficonn.cq, &cqe, 1, 0, -1); if (cqe.op_context == send_context) { printf("CLIENT send completed\n"); break; } else { printf("CLIENT got some other completion... continuing\n"); } } printf("CLIENT sent -- waiting for server before teardown\n"); MPI_Barrier(MPI_COMM_WORLD); printf("CLIENT tearing down\n"); fi_close(&(mr->fid)); teardown_ofi(); }
static void test_connect_with_accept_blocking_on_eq_fq_SERVER(void) { int ret; printf("SERVER running\n"); setup_ofi(NULL, NULL, FI_SOURCE); #if WANT_FDS // Add the EQ FD to the epoll fd static struct epoll_event edt; memset(&edt, 0, sizeof(edt)); edt.events = EPOLLIN; edt.data.u32 = 2222; ret = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fidev.eq_fd, &edt); if (ret < 0) { error("server epoll_ctl failed"); } #endif // Make a PEP ret = fi_passive_ep(fidev.fabric, fidev.info, &fidev.pep, NULL); if (0 != ret) { error("fi_passive_ep failed"); } #if WANT_FIXED_PORT size_t ss = sizeof(sin); ret = fi_getname(&(fidev.pep->fid), &sin, &ss); if (0 != ret) { error("fi_setname failed"); } sin.sin_port = htons(listen_port); // Bind the PEP to listen on a specific port ret = fi_setname(&(fidev.pep->fid), &sin, sizeof(sin)); if (0 != ret) { error("fi_setname failed"); } #endif // Bind the EQ to the PEP ret = fi_pep_bind(fidev.pep, &fidev.eq->fid, 0); if (0 != ret) { error("fi_pep_bind failed"); } // Listen ret = fi_listen(fidev.pep); if (0 != ret) { error("fi_listen failed"); } // Get the actual address of this PEP struct sockaddr_in sinout; size_t s = sizeof(sinout); ret = fi_getname(&(fidev.pep->fid), &sinout, &s); if (0 != ret) { error("fi_setname failed"); } sin.sin_family = sinout.sin_family; sin.sin_addr = sinout.sin_addr; sin.sin_port = sinout.sin_port; // Print server addr printf("SERVER listening on %s\n", addrstr(&sin)); // Send our node (IP addr) and service (port) to the client snprintf(ofi_node, sizeof(ofi_node) - 1, "%s", inet_ntoa(sin.sin_addr)); snprintf(ofi_service, sizeof(ofi_service) - 1, "%d", ntohs(sin.sin_port)); MPI_Send(ofi_node, sizeof(ofi_node) - 1, MPI_CHAR, 1, 101, MPI_COMM_WORLD); MPI_Send(ofi_service, sizeof(ofi_service) - 1, MPI_CHAR, 1, 102, MPI_COMM_WORLD); printf("SERVER sent via MPI to client: %s / %s\n", ofi_node, ofi_service); #if WANT_FDS // Now wait for the listen to complete int nevents; #define NEVENTS 32 struct epoll_event events[NEVENTS]; int timeout = 10000; while (1) { printf("SERVER blocking on epoll\n"); nevents = epoll_wait(epoll_fd, events, NEVENTS, timeout); if (nevents < 0) { if (errno != EINTR) { error("server epoll wait failed"); } else { continue; } } else { printf("SERVER successfully woke up from epoll! %d events\n", nevents); for (int i = 0; i < nevents; ++i) { if (events[i].data.u32 != 2222) { error("server unexpected epoll return type"); } } // If we got the expected event, then go read from the EQ break; } } #endif // Wait for the FI_CONNREQ event uint32_t event; uint8_t *entry_buffer; size_t expected_len = sizeof(struct fi_eq_cm_entry) + sizeof(client_data); entry_buffer = (uint8_t*) calloc(1, expected_len); if (NULL == entry_buffer) { error("calloc failed"); } struct fi_eq_cm_entry *entry = (struct fi_eq_cm_entry*) entry_buffer; while (1) { printf("SERVER waiting for FI_CONNREQ\n"); #if WANT_FDS ret = fi_eq_read(fidev.eq, &event, entry, expected_len, 0); #else ret = fi_eq_sread(fidev.eq, &event, entry, expected_len, -1, 0); #endif if (-FI_EAVAIL == ret) { printf("server fi_eq_sread failed because there's something in the error queue\n"); char buffer[2048]; struct fi_eq_err_entry *err_entry = (struct fi_eq_err_entry*) buffer; ret = fi_eq_readerr(fidev.eq, err_entry, 0); printf("error code: %d (%s), prov err code: %d (%s)\n", err_entry->err, fi_strerror(err_entry->err), err_entry->prov_errno, fi_strerror(err_entry->prov_errno)); error("sad panda"); } else if (-EAGAIN == ret) { fprintf(stderr, "SERVER fi_eq_sread fail got -EAGAIN... trying again...\n"); sleep(1); continue; } else if (ret < 0) { fprintf(stderr, "SERVER fi_eq_sread fail: %s (FI_EAVAIL = %d, -ret = %d)\n", fi_strerror(-ret), FI_EAVAIL, -ret); error("SERVER fi_eq_sread failed for some random reason"); } else if (event != FI_CONNREQ) { error("SERVER got some unexpected event"); } else if (ret != expected_len) { error("SERVER got wrong length back from fi_eq_sread"); } uint32_t *d = (uint32_t*) entry->data; for (int i = 0; i < (sizeof(client_data) / sizeof(uint32_t)); ++i) { if (d[i] != client_data[i]) { printf("SERVER got wrong CM client data: d[%d]=%d, should be %d\n", i, d[i], client_data[i]); } } printf("SERVER got FI_CONNREQ, correct size, and correct data -- yay!\n"); break; } // Silly logistics: setup_ofi_active adds the fd to the epoll set. // But we already added it. So for simplicity, just remove it // here so that setup_ofi_active() can re-add it. #if WANT_FDS // Remove the EQ FD from the epoll fd ret = epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fidev.eq_fd, &edt); if (ret < 0) { error("server epoll_ctl DEL failed"); } #endif // Make an active endpoint setup_ofi_active(entry->info, &ficonn.ep); // Accept the incoming connection ret = fi_accept(ficonn.ep, (void*) server_data, sizeof(server_data)); if (ret != 0) { printf("fi_accept: ret=%d, %s\n", ret, fi_strerror(-ret)); error("SERVER fi_accept failed\n"); } // Need to read and get a FI_CONNECTED event while (1) { printf("SERVER waiting for FI_CONNECTED\n"); #if WANT_FDS ret = fi_eq_read(fidev.eq, &event, entry, expected_len, 0); #else ret = fi_eq_sread(fidev.eq, &event, entry, expected_len, -1, 0); #endif if (-FI_EAVAIL == ret) { printf("server fi_eq_sread failed because there's something in the error queue\n"); char buffer[2048]; struct fi_eq_err_entry *err_entry = (struct fi_eq_err_entry*) buffer; ret = fi_eq_readerr(fidev.eq, err_entry, 0); printf("error code: %d (%s), prov err code: %d (%s)\n", err_entry->err, fi_strerror(err_entry->err), err_entry->prov_errno, fi_strerror(err_entry->prov_errno)); error("sad panda"); } else if (-EAGAIN == ret) { fprintf(stderr, "SERVER fi_eq_sread fail got -EAGAIN... trying again...\n"); sleep(1); continue; } else if (ret < 0) { fprintf(stderr, "SERVER fi_eq_sread fail: %s (FI_EAVAIL = %d, -ret = %d)\n", fi_strerror(-ret), FI_EAVAIL, -ret); error("SERVER fi_eq_sread failed for some random reason"); } else if (event != FI_CONNECTED) { error("SERVER got some unexpected event"); } printf("SERVER got FI_CONNECTED -- yay!\n"); break; } // Post a recv buffer for the client to send int msg[4] = { 0 }; int len = sizeof(msg); printf("SERVER receiving len of %d\n", len); struct fid_mr no_mr; struct fid_mr *mr; void *recv_context = (void*) 0x17; #if 0 fi_mr_reg(fidev.domain, msg, len, FI_SEND | FI_RECV, 0, (uint64_t)(uintptr_t) msg, 0, &mr, NULL); #else // Try using no mr, like fi_msg_pingpong... memset(&no_mr, 0, sizeof(no_mr)); mr = &no_mr; #endif ret = fi_recv(ficonn.ep, msg, len, fi_mr_desc(mr), 0, recv_context); if (ret < 0) { printf("fi_recv failed! %d, %s\n", ret, fi_strerror(-ret)); MPI_Abort(MPI_COMM_WORLD, 37); } sleep(1); printf("SERVER posted receive -- waiting for client to send\n"); MPI_Barrier(MPI_COMM_WORLD); // Wait for receive completion struct fi_cq_entry cqe; while (1) { ret = fi_cq_sread(ficonn.cq, &cqe, 1, 0, -1); if (cqe.op_context == recv_context) { printf("SERVER receive completed\n"); break; } else { printf("SERVER got some other completion... continuing\n"); } } printf("SERVER finished -- waiting for client before teardown\n"); MPI_Barrier(MPI_COMM_WORLD); printf("SERVER tearing down\n"); fi_close(&(mr->fid)); teardown_ofi(); }
int main(int argc, char *argv[]) { uint64_t flags = 0; char *service = NULL; char *node = NULL; struct pingpong_context *ctx; struct timeval start, end; unsigned long size = 4096; // No provider support yet //enum ibv_mtu mtu = IBV_MTU_1024; //size_t mtu = 1024; int rx_depth_default = 500; int rx_depth = 0; int iters = 1000; int use_event = 0; int rcnt, scnt; int ret, rc = 0; char * ptr; srand48(getpid() * time(NULL)); opts = INIT_OPTS; hints = fi_allocinfo(); if (!hints) return 1; while (1) { int c; c = getopt(argc, argv, "S:m:r:n:eh" ADDR_OPTS INFO_OPTS); if (c == -1) break; switch (c) { case 'S': errno = 0; size = strtol(optarg, &ptr, 10); if (ptr == optarg || *ptr != '\0' || ((size == LONG_MIN || size == LONG_MAX) && errno == ERANGE)) { fprintf(stderr, "Cannot convert from string to long\n"); rc = 1; goto err1; } break; // No provider support yet /*case 'm': mtu = strtol(optarg, NULL, 0); mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0)); if (mtu < 0) { usage(argv[0]); return 1; } break; */ case 'r': rx_depth = strtol(optarg, NULL, 0); break; case 'n': iters = strtol(optarg, NULL, 0); break; case 'e': ++use_event; break; default: ft_parse_addr_opts(c, optarg, &opts); ft_parseinfo(c, optarg, hints); break; case '?': case 'h': usage(argv[0]); return 1; } } if (optind == argc - 1) opts.dst_addr = argv[optind]; else if (optind < argc) { usage(argv[0]); return 1; } page_size = sysconf(_SC_PAGESIZE); hints->ep_attr->type = FI_EP_MSG; hints->caps = FI_MSG; hints->mode = FI_LOCAL_MR; rc = ft_read_addr_opts(&node, &service, hints, &flags, &opts); if (rc) return -rc; rc = fi_getinfo(FT_FIVERSION, node, service, flags, hints, &fi); if (rc) { FT_PRINTERR("fi_getinfo", rc); return -rc; } fi_freeinfo(hints); if (rx_depth) { if (rx_depth > fi->rx_attr->size) { fprintf(stderr, "rx_depth requested: %d, " "rx_depth supported: %zd\n", rx_depth, fi->rx_attr->size); rc = 1; goto err1; } } else { rx_depth = (rx_depth_default > fi->rx_attr->size) ? fi->rx_attr->size : rx_depth_default; } ctx = pp_init_ctx(fi, size, rx_depth, use_event); if (!ctx) { rc = 1; goto err1; } if (opts.dst_addr) { /* client connect */ if (pp_connect_ctx(ctx)) { rc = 1; goto err2; } } else { /* server listen and accept */ pp_listen_ctx(ctx); pp_accept_ctx(ctx); } ctx->pending = PINGPONG_RECV_WCID; if (opts.dst_addr) { if (pp_post_send(ctx)) { fprintf(stderr, "Couldn't post send\n"); rc = 1; goto err3; } ctx->pending |= PINGPONG_SEND_WCID; } if (gettimeofday(&start, NULL)) { perror("gettimeofday"); rc = 1; goto err3; } rcnt = scnt = 0; while (rcnt < iters || scnt < iters) { struct fi_cq_entry wc; struct fi_cq_err_entry cq_err; int rd; if (use_event) { /* Blocking read */ rd = fi_cq_sread(ctx->cq, &wc, 1, NULL, -1); } else { do { rd = fi_cq_read(ctx->cq, &wc, 1); } while (rd == -FI_EAGAIN); } if (rd < 0) { fi_cq_readerr(ctx->cq, &cq_err, 0); fprintf(stderr, "cq fi_cq_readerr() %s (%d)\n", fi_cq_strerror(ctx->cq, cq_err.err, cq_err.err_data, NULL, 0), cq_err.err); rc = rd; goto err3; } switch ((int) (uintptr_t) wc.op_context) { case PINGPONG_SEND_WCID: ++scnt; break; case PINGPONG_RECV_WCID: if (--ctx->routs <= 1) { ctx->routs += pp_post_recv(ctx, ctx->rx_depth - ctx->routs); if (ctx->routs < ctx->rx_depth) { fprintf(stderr, "Couldn't post receive (%d)\n", ctx->routs); rc = 1; goto err3; } } ++rcnt; break; default: fprintf(stderr, "Completion for unknown wc_id %d\n", (int) (uintptr_t) wc.op_context); rc = 1; goto err3; } ctx->pending &= ~(int) (uintptr_t) wc.op_context; if (scnt < iters && !ctx->pending) { if (pp_post_send(ctx)) { fprintf(stderr, "Couldn't post send\n"); rc = 1; goto err3; } ctx->pending = PINGPONG_RECV_WCID | PINGPONG_SEND_WCID; } } if (gettimeofday(&end, NULL)) { perror("gettimeofday"); rc = 1; goto err3; } { float usec = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec); long long bytes = (long long) size * iters * 2; printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n", bytes, usec / 1000000., bytes * 8. / usec); printf("%d iters in %.2f seconds = %.2f usec/iter\n", iters, usec / 1000000., usec / iters); } err3: fi_shutdown(ctx->ep, 0); err2: ret = pp_close_ctx(ctx); if (!rc) rc = ret; err1: fi_freeinfo(fi); return rc; }