static void do_read_wait(int len) { int i, iters = 100; ssize_t sz; uint64_t old_w_cnt, new_w_cnt; uint64_t old_r_cnt; #define READ_CTX 0x4e3dda1aULL init_data(source, len, 0); init_data(target, len, 0xad); old_w_cnt = fi_cntr_read(write_cntr); old_r_cnt = fi_cntr_read(read_cntr); for (i = 0; i < iters; i++) { sz = fi_read(ep[0], source, len, loc_mr, gni_addr[1], (uint64_t)target, mr_key, (void *)READ_CTX); cr_assert_eq(sz, 0); } fi_cntr_wait(read_cntr, old_r_cnt + iters, -1); cr_assert(check_data(source, target, len), "Data mismatch"); new_w_cnt = fi_cntr_read(write_cntr); /* * no fi_read called so old and new read cnts should be equal */ cr_assert(new_w_cnt == old_w_cnt); }
static void do_write_wait(int len) { uint64_t old_w_cnt, new_w_cnt; uint64_t old_r_cnt, new_r_cnt; ssize_t sz; const int iters = 100; int i; init_data(source, len, 0xab); init_data(target, len, 0); old_w_cnt = fi_cntr_read(write_cntr); old_r_cnt = fi_cntr_read(read_cntr); for (i = 0; i < iters; i++) { sz = fi_write(ep[0], source, len, loc_mr, gni_addr[1], (uint64_t)target, mr_key, target); cr_assert_eq(sz, 0); } fi_cntr_wait(write_cntr, old_w_cnt+iters, -1); new_w_cnt = fi_cntr_read(write_cntr); cr_assert(old_w_cnt + iters == new_w_cnt); cr_assert(check_data(source, target, len), "Data mismatch"); new_r_cnt = fi_cntr_read(read_cntr); /* * no fi_read called so old and new read cnts should be equal */ cr_assert(new_r_cnt == old_r_cnt); }
static void do_read(int len) { ssize_t sz; uint64_t old_w_cnt, new_w_cnt; uint64_t old_r_cnt, new_r_cnt; #define READ_CTX 0x4e3dda1aULL init_data(source, len, 0); init_data(target, len, 0xad); old_w_cnt = fi_cntr_read(write_cntr); old_r_cnt = fi_cntr_read(read_cntr); sz = fi_read(ep[0], source, len, loc_mr, gni_addr[1], (uint64_t)target, mr_key, (void *)READ_CTX); cr_assert_eq(sz, 0); do { new_r_cnt = fi_cntr_read(read_cntr); if (new_r_cnt == (old_r_cnt + 1)) break; pthread_yield(); } while (1); cr_assert(check_data(source, target, len), "Data mismatch"); new_w_cnt = fi_cntr_read(write_cntr); /* * no fi_read called so old and new read cnts should be equal */ cr_assert(new_w_cnt == old_w_cnt); }
void rdm_rma_check_cntrs(uint64_t w, uint64_t r, uint64_t w_e, uint64_t r_e) { writes += w; reads += r; write_errs += w_e; read_errs += r_e; cr_assert(fi_cntr_read(write_cntr) == writes, "Bad write count"); cr_assert(fi_cntr_read(read_cntr) == reads, "Bad read count"); cr_assert(fi_cntr_readerr(write_cntr) == write_errs, "Bad write err count"); cr_assert(fi_cntr_readerr(read_cntr) == read_errs, "Bad read err count"); }
void rdm_sr_check_cntrs(uint64_t s[], uint64_t r[], uint64_t s_e[], uint64_t r_e[]) { int i = 0; for (; i < NUMEPS; i++) { sends[i] += s[i]; recvs[i] += r[i]; send_errs[i] += s_e[i]; recv_errs[i] += r_e[i]; cr_assert(fi_cntr_read(send_cntr[i]) == sends[i], "Bad send count"); cr_assert(fi_cntr_read(recv_cntr[i]) == recvs[i], "Bad recv count"); cr_assert(fi_cntr_readerr(send_cntr[i]) == send_errs[i], "Bad send err count"); cr_assert(fi_cntr_readerr(recv_cntr[i]) == recv_errs[i], "Bad recv err count"); } }
/* * this test attempts to demonstrate issue ofi-cray/libfabric-cray#559. * For domains with control_progress AUTO, this test should not hang. */ Test(rdm_sr, inject_progress) { int ret, len = 64; ssize_t sz; struct fi_cq_tagged_entry cqe; uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; uint64_t r_e[NUMEPS] = {0}; rdm_sr_init_data(source, len, 0x23); rdm_sr_init_data(target, len, 0); sz = fi_inject(ep[0], source, len, gni_addr[1]); cr_assert_eq(sz, 0); sz = fi_recv(ep[1], target, len, rem_mr[1], gni_addr[0], source); cr_assert_eq(sz, 0); /* * do progress until send counter is updated. * This works because we have FI_PROGRESS_AUTO for control progress */ while (fi_cntr_read(send_cntr[0]) < 1) { pthread_yield(); } while ((ret = fi_cq_read(msg_cq[1], &cqe, 1)) == -FI_EAGAIN) { pthread_yield(); } cr_assert_eq(ret, 1); rdm_sr_check_cqe(&cqe, source, (FI_MSG|FI_RECV), target, len, (uint64_t)source); dbg_printf("got recv context event!\n"); s[0] = 1; r[1] = 1; rdm_sr_check_cntrs(s, r, s_e, r_e); /* make sure inject does not generate a send competion */ cr_assert_eq(fi_cq_read(msg_cq[0], &cqe, 1), -FI_EAGAIN); cr_assert(rdm_sr_check_data(source, target, len), "Data mismatch"); }
int ft_get_rx_comp(uint64_t total) { int ret = FI_SUCCESS; if (rxcq) { ret = ft_get_cq_comp(rxcq, &rx_cq_cntr, total, timeout); } else if (rxcntr) { while (fi_cntr_read(rxcntr) < total) { ret = fi_cntr_wait(rxcntr, total, timeout); if (ret) FT_PRINTERR("fi_cntr_wait", ret); else break; } } else { FT_ERR("Trying to get a RX completion when no RX CQ or counter were opened"); ret = -FI_EOTHER; } return ret; }
/* ssize_t fi_injectdata(struct fid_ep *ep, const void *buf, size_t len, uint64_t data, fi_addr_t dest_addr); */ void do_injectdata(int len) { int ret; ssize_t sz; struct fi_cq_tagged_entry cqe; uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; uint64_t r_e[NUMEPS] = {0}; rdm_sr_init_data(source, len, 0xab); rdm_sr_init_data(target, len, 0); sz = fi_injectdata(ep[0], source, len, (uint64_t)source, gni_addr[1]); cr_assert_eq(sz, 0); sz = fi_recv(ep[1], target, len, rem_mr[0], gni_addr[0], source); cr_assert_eq(sz, 0); while ((ret = fi_cq_read(msg_cq[1], &cqe, 1)) == -FI_EAGAIN) { pthread_yield(); /* Manually progress connection to domain 1 */ fi_cq_read(msg_cq[0], &cqe, 1); } rdm_sr_check_cqe(&cqe, source, (FI_MSG|FI_RECV|FI_REMOTE_CQ_DATA), target, len, (uint64_t)source); dbg_printf("got recv context event!\n"); /* don't progress until send counter is updated */ while (fi_cntr_read(send_cntr[0]) < 1) { pthread_yield(); } s[0] = 1; r[1] = 1; rdm_sr_check_cntrs(s, r, s_e, r_e); /* make sure inject does not generate a send competion */ cr_assert_eq(fi_cq_read(msg_cq[0], &cqe, 1), -FI_EAGAIN); cr_assert(rdm_sr_check_data(source, target, len), "Data mismatch"); }
void sep_injectdata(int index, int len) { int ret; ssize_t sz; struct fi_cq_tagged_entry cqe = { (void *) -1, UINT_MAX, UINT_MAX, (void *) -1, UINT_MAX, UINT_MAX }; uint64_t s[NUMEPS] = {0}, r[NUMEPS] = {0}, s_e[NUMEPS] = {0}; uint64_t r_e[NUMEPS] = {0}; sep_init_data(source, len, 0x9b + index); sep_init_data(target, len, 0); sz = fi_injectdata(tx_ep[0][index], source, len, (uint64_t)source, rx_addr[index]); cr_assert_eq(sz, 0); sz = fi_recv(rx_ep[1][index], target, len, rem_mr[0], FI_ADDR_UNSPEC, source); cr_assert_eq(sz, 0); while ((ret = fi_cq_read(rx_cq[1][index], &cqe, 1)) == -FI_EAGAIN) { pthread_yield(); /* Manually progress connection to domain 1 */ fi_cq_read(tx_cq[0][index], &cqe, 1); } sep_check_cqe(&cqe, source, (FI_MSG|FI_RECV|FI_REMOTE_CQ_DATA), target, len, (uint64_t)source, false); /* don't progress until send counter is updated */ while (fi_cntr_read(send_cntr[0]) < 1) { pthread_yield(); } s[0] = 1; r[1] = 1; sep_check_cntrs(s, r, s_e, r_e); /* make sure inject does not generate a send competion */ cr_assert_eq(fi_cq_read(tx_cq[0][index], &cqe, 1), -FI_EAGAIN); cr_assert(sep_check_data(source, target, len), "Data mismatch"); }
void do_sep_send_recv_iter(int idx, int len) { ssize_t sz; int i = 0; uint64_t cntr; ssize_t ret, src_done, dest_done; struct fi_cq_tagged_entry s_cqe = {(void *) -1, UINT_MAX, UINT_MAX, (void *) -1, UINT_MAX, UINT_MAX}; struct fi_cq_tagged_entry d_cqe = {(void *) -1, UINT_MAX, UINT_MAX, (void *) -1, UINT_MAX, UINT_MAX}; struct fi_cq_tagged_entry s_expected_cqe, d_expected_cqe; init_bufs((void **) source, NUMEPS, len); init_bufs((void **) target, NUMEPS, len); for (i = 0; i < NUMEPS; i++) { dbg_printf(BLUE "From ep(%d) to ep(%d) of xfer size %d\n" COLOR_RESET, i, NUMEPS - 1 - i, len); s_expected_cqe.buf = NULL; s_expected_cqe.data = 0; s_expected_cqe.flags = (FI_MSG | FI_TRANSMIT/*FI_SEND*/); s_expected_cqe.len = 0; s_expected_cqe.op_context = target[NUMEPS - 1 - i]; s_expected_cqe.tag = 0; sz = fi_send(tx_ep[i][idx], source[i], len, NULL, gni_addr[NUMEPS - 1 - i], target[NUMEPS - 1 - i]); cr_assert(sz == FI_SUCCESS, "Invalid return value: %s", fi_strerror((int) -sz)); d_expected_cqe.buf = NULL; d_expected_cqe.data = 0; d_expected_cqe.flags = (FI_MSG | FI_RECV); d_expected_cqe.len = len; d_expected_cqe.op_context = source[i]; d_expected_cqe.tag = 0; sz = fi_recv(rx_ep[NUMEPS - 1 - i][idx], target[NUMEPS - 1 - i], len, NULL, gni_addr[i], source[i]); cr_assert(sz == FI_SUCCESS, "Invalid return value: %s", fi_strerror((int) -sz)); src_done = dest_done = 0; /* Progress sender and receiver */ do { ret = fi_cq_read(tx_cq[i][idx], &s_cqe, 1); if (ret == 1) src_done = 1; ret = fi_cq_read(rx_cq[NUMEPS - 1 - i][idx], &d_cqe, 1); if (ret == 1) dest_done = 1; } while (src_done != 1 || dest_done != 1); cntr = fi_cntr_read(send_cntr[i]); cr_assert(cntr == ++sends[i], "Invalid send counter: actual(%lu), expected(%lu)", cntr, sends[i]); cntr = fi_cntr_read(recv_cntr[NUMEPS - 1 - i]); cr_assert(cntr == ++recvs[NUMEPS - 1 - i], "Invalid recv counter: actual(%lu), expected(%lu)", cntr, recvs[NUMEPS - 1 - i]); check_tagged_cqe(s_expected_cqe, s_cqe); check_tagged_cqe(d_expected_cqe, d_cqe); check_buf(source[i], target[NUMEPS - 1 - i], len); } }
Test(cntr, send_recv) { int ret, i, got_r = 0; struct fi_context r_context, s_context; struct fi_cq_entry cqe; uint64_t old_s_cnt, new_s_cnt; uint64_t old_r_cnt, new_r_cnt; char s_buffer[128], r_buffer[128]; old_s_cnt = fi_cntr_read(write_cntr); old_r_cnt = fi_cntr_read(rcv_cntr); for (i = 0; i < 16; i++) { sprintf(s_buffer, "Hello there iter=%d", i); memset(r_buffer, 0, 128); ret = fi_recv(ep[1], r_buffer, sizeof(r_buffer), NULL, gni_addr[0], &r_context); cr_assert_eq(ret, FI_SUCCESS, "fi_recv"); ret = fi_send(ep[0], s_buffer, strlen(s_buffer), NULL, gni_addr[1], &s_context); cr_assert_eq(ret, FI_SUCCESS, "fi_send"); while ((ret = fi_cq_read(send_cq, &cqe, 1)) == -FI_EAGAIN) pthread_yield(); cr_assert((cqe.op_context == &r_context) || (cqe.op_context == &s_context), "fi_cq_read"); got_r = (cqe.op_context == &r_context) ? 1 : 0; if (got_r) { new_r_cnt = fi_cntr_read(rcv_cntr); old_r_cnt++; cr_assert(new_r_cnt == old_r_cnt); } else { new_s_cnt = fi_cntr_read(write_cntr); old_s_cnt++; cr_assert(new_s_cnt == old_s_cnt); } while ((ret = fi_cq_read(recv_cq, &cqe, 1)) == -FI_EAGAIN) pthread_yield(); if (got_r) cr_assert((cqe.op_context == &s_context), "fi_cq_read"); else cr_assert((cqe.op_context == &r_context), "fi_cq_read"); if (got_r) { new_s_cnt = fi_cntr_read(write_cntr); old_s_cnt++; cr_assert(new_s_cnt == old_s_cnt); } else { new_r_cnt = fi_cntr_read(rcv_cntr); old_r_cnt++; cr_assert(new_r_cnt == old_r_cnt); } cr_assert(strcmp(s_buffer, r_buffer) == 0, "check message"); got_r = 0; } }
static int cntr_loop() { size_t i, opened, cntr_cnt; uint64_t value, expected; struct timespec start, stop; int ret, testret = FAIL, timeout = 5000; cntr_cnt = MIN(fi->domain_attr->cntr_cnt, MAX_COUNTER_CHECK); struct fid_cntr **cntrs = calloc(cntr_cnt, sizeof(struct fid_cntr *)); if (!cntrs) { perror("calloc"); return -FI_ENOMEM; } for (opened = 0; opened < cntr_cnt; opened++) { ret = ft_cntr_open(&cntrs[opened]); if (ret) { FT_PRINTERR("fi_cntr_open", ret); goto close; } } for (i = 0; i < opened; i++) { ret = fi_cntr_set(cntrs[i], i); if (ret) { FT_PRINTERR("fi_cntr_set", ret); goto close; } ret = fi_cntr_seterr(cntrs[i], i << 1); if (ret) { FT_PRINTERR("fi_cntr_seterr", ret); goto close; } } for (i = 0; i < opened; i++) { ret = fi_cntr_add(cntrs[i], i); if (ret) { FT_PRINTERR("fi_cntr_add", ret); goto close; } ret = fi_cntr_adderr(cntrs[i], i); if (ret) { FT_PRINTERR("fi_cntr_adderr", ret); goto close; } } for (i = 0; i < opened; i++) { clock_gettime(CLOCK_MONOTONIC, &start); expected = i + i; do { value = fi_cntr_read(cntrs[i]); clock_gettime(CLOCK_MONOTONIC, &stop); sched_yield(); } while ((value != expected) && ((stop.tv_sec - start.tv_sec) > timeout)); if (value != expected) { FT_PRINTERR("fi_cntr_read", value); goto close; } clock_gettime(CLOCK_MONOTONIC, &start); expected = (i << 1) + i; do { value = fi_cntr_readerr(cntrs[i]); clock_gettime(CLOCK_MONOTONIC, &stop); sched_yield(); } while ((value != expected) && ((stop.tv_sec - start.tv_sec) > timeout)); if (value != expected) { FT_PRINTERR("fi_cntr_readerr", value); goto close; } } testret = PASS; close: for (i = 0; i < opened; i++) { ret = fi_close(&(cntrs[i])->fid); if (ret) { FT_PRINTERR("fi_cntr_close", ret); break; } } if (i < cntr_cnt) testret = FAIL; free(cntrs); return TEST_RET_VAL(ret, testret); }