static int run_test() { int ret, i; if (!(tx_ctx_arr = calloc(ep_cnt, sizeof *tx_ctx_arr))) return -FI_ENOMEM; if (!(rx_ctx_arr = calloc(ep_cnt, sizeof *rx_ctx_arr))) return -FI_ENOMEM; /* Post recvs */ for (i = 0; i < ep_cnt; i++) { if (rx_shared_ctx) { fprintf(stdout, "Posting recv #%d for shared rx ctx\n", i); ret = ft_post_rx(srx_ctx, rx_size, &rx_ctx_arr[i]); } else { fprintf(stdout, "Posting recv for endpoint #%d\n", i); ret = ft_post_rx(ep_array[i], rx_size, &rx_ctx_arr[i]); } if (ret) return ret; } if (opts.dst_addr) { /* Post sends addressed to remote EPs */ for (i = 0; i < ep_cnt; i++) { if (tx_shared_ctx) fprintf(stdout, "Posting send #%d to shared tx ctx\n", i); else fprintf(stdout, "Posting send to endpoint #%d\n", i); ret = ft_tx(ep_array[i], addr_array[i], tx_size, &tx_ctx_arr[i]); if (ret) return ret; } } /* Wait for recv completions */ ret = ft_get_rx_comp(rx_seq - 1); if (ret) return ret; if (!opts.dst_addr) { /* Post sends addressed to remote EPs */ for (i = 0; i < ep_cnt; i++) { if (tx_shared_ctx) fprintf(stdout, "Posting send #%d to shared tx ctx\n", i); else fprintf(stdout, "Posting send to endpoint #%d\n", i); ret = ft_tx(ep_array[i], addr_array[i], tx_size, &tx_ctx_arr[i]); if (ret) return ret; } } return 0; }
static int init_fabric(void) { int ret; ret = ft_getinfo(hints, &fi); if (ret) return ret; ret = get_dupinfo(); if (ret) return ret; ret = ft_open_fabric_res(); if (ret) return ret; av_attr.count = ep_cnt; ret = alloc_ep_res(fi); if (ret) return ret; ret = alloc_ep(); if (ret) return ret; ret = bind_ep_array_res(); if (ret) return ret; /* Post recv */ if (rx_shared_ctx) ret = ft_post_rx(srx_ctx, MAX(rx_size, FT_MAX_CTRL_MSG), &rx_ctx); else ret = ft_post_rx(ep_array[0], MAX(rx_size, FT_MAX_CTRL_MSG), &rx_ctx); if (ret) return ret; ret = init_av(); return ret; }
int ft_exchange_keys(struct fi_rma_iov *peer_iov) { struct fi_rma_iov *rma_iov; int ret; if (opts.dst_addr) { rma_iov = tx_buf + ft_tx_prefix_size(); rma_iov->addr = fi->domain_attr->mr_mode == FI_MR_SCALABLE ? 0 : (uintptr_t) rx_buf + ft_rx_prefix_size(); rma_iov->key = fi_mr_key(mr); ret = ft_tx(sizeof *rma_iov); if (ret) return ret; ret = ft_get_rx_comp(rx_seq); if (ret) return ret; rma_iov = rx_buf + ft_rx_prefix_size(); *peer_iov = *rma_iov; ret = ft_post_rx(rx_size); } else { ret = ft_get_rx_comp(rx_seq); if (ret) return ret; rma_iov = rx_buf + ft_rx_prefix_size(); *peer_iov = *rma_iov; ret = ft_post_rx(rx_size); if (ret) return ret; rma_iov = tx_buf + ft_tx_prefix_size(); rma_iov->addr = fi->domain_attr->mr_mode == FI_MR_SCALABLE ? 0 : (uintptr_t) rx_buf + ft_rx_prefix_size(); rma_iov->key = fi_mr_key(mr); ret = ft_tx(sizeof *rma_iov); } return ret; }
int ft_init_ep(void) { int flags, ret; if (fi->ep_attr->type == FI_EP_MSG) FT_EP_BIND(ep, eq, 0); FT_EP_BIND(ep, av, 0); FT_EP_BIND(ep, txcq, FI_TRANSMIT); FT_EP_BIND(ep, rxcq, FI_RECV); ret = ft_get_cq_fd(txcq, &tx_fd); if (ret) return ret; ret = ft_get_cq_fd(rxcq, &rx_fd); if (ret) return ret; /* TODO: use control structure to select counter bindings explicitly */ flags = !txcq ? FI_SEND : 0; if (hints->caps & (FI_WRITE | FI_READ)) flags |= hints->caps & (FI_WRITE | FI_READ); else if (hints->caps & FI_RMA) flags |= FI_WRITE | FI_READ; FT_EP_BIND(ep, txcntr, flags); flags = !rxcq ? FI_RECV : 0; if (hints->caps & (FI_REMOTE_WRITE | FI_REMOTE_READ)) flags |= hints->caps & (FI_REMOTE_WRITE | FI_REMOTE_READ); else if (hints->caps & FI_RMA) flags |= FI_REMOTE_WRITE | FI_REMOTE_READ; FT_EP_BIND(ep, rxcntr, flags); ret = fi_enable(ep); if (ret) { FT_PRINTERR("fi_enable", ret); return ret; } if (fi->rx_attr->op_flags != FI_MULTI_RECV) { /* Initial receive will get remote address for unconnected EPs */ ret = ft_post_rx(ep, MAX(rx_size, FT_MAX_CTRL_MSG), &rx_ctx); if (ret) return ret; } return 0; }
ssize_t ft_rx(size_t size) { ssize_t ret; ret = ft_get_rx_comp(rx_seq); if (ret) return ret; if (ft_check_opts(FT_OPT_VERIFY_DATA | FT_OPT_ACTIVE)) { ret = ft_check_buf((char *) rx_buf + ft_rx_prefix_size(), size); if (ret) return ret; } /* TODO: verify CQ data, if available */ ret = ft_post_rx(rx_size); return ret; }
static int do_transfers(void) { int i, ret; for (i = 0; i < num_eps; i++) { rx_buf = recv_bufs[i]; ret = ft_post_rx(eps[i], opts.transfer_size, &recv_ctx[i]); if (ret) return ret; } for (i = 0; i < num_eps; i++) { if (ft_check_opts(FT_OPT_VERIFY_DATA)) ft_fill_buf(send_bufs[i], opts.transfer_size); tx_buf = send_bufs[i]; ret = ft_post_tx(eps[i], remote_addr[i], opts.transfer_size, &send_ctx[i]); if (ret) return ret; } ret = ft_get_tx_comp(num_eps); if (ret < 0) return ret; ret = ft_get_rx_comp(num_eps); if (ret < 0) return ret; if (ft_check_opts(FT_OPT_VERIFY_DATA)) { for (i = 0; i < num_eps; i++) { ret = ft_check_buf(recv_bufs[i], opts.transfer_size); if (ret) return ret; } } for (i = 0; i < num_eps; i++) ft_finalize_ep(eps[i]); printf("PASSED multi ep\n"); return 0; }
ssize_t ft_rx(struct fid_ep *ep, size_t size) { ssize_t ret; ret = ft_get_rx_comp(rx_seq); if (ret) return ret; if (ft_check_opts(FT_OPT_VERIFY_DATA | FT_OPT_ACTIVE)) { ret = ft_check_buf((char *) rx_buf + ft_rx_prefix_size(), size); if (ret) return ret; } /* TODO: verify CQ data, if available */ /* Ignore the size arg. Post a buffer large enough to handle all message * sizes. ft_sync() makes use of ft_rx() and gets called in tests just before * message size is updated. The recvs posted are always for the next incoming * message */ ret = ft_post_rx(ep, rx_size, &rx_ctx); return ret; }
static int server_connect(void) { struct fi_eq_cm_entry entry; uint32_t event; ssize_t rd; int ret, k; int num_conn_reqs = 0, num_connected = 0; struct ep_info *ep_state_array = NULL; ep_array = calloc(ep_cnt, sizeof(*ep_array)); if (!ep_array) return -FI_ENOMEM; ep_state_array = calloc(ep_cnt, sizeof(*ep_state_array)); if (!ep_state_array) return -FI_ENOMEM; while (num_connected != ep_cnt) { rd = fi_eq_sread(eq, &event, &entry, sizeof entry, -1, 0); if (rd != sizeof entry) { FT_PROCESS_EQ_ERR(rd, eq, "fi_eq_sread", "cm-event"); ret = (int) rd; goto err; } switch(event) { case FI_CONNREQ: if (num_conn_reqs == ep_cnt) { fprintf(stderr, "Unexpected CM event %d\n", event); ret = -FI_EOTHER; goto err; } fi = ep_state_array[num_conn_reqs].fi = entry.info; ep_state_array[num_conn_reqs].state = FT_EP_CONNECT_RCVD; if (num_conn_reqs == 0) { ret = fi_domain(fabric, fi, &domain, NULL); if (ret) { FT_PRINTERR("fi_domain", ret); goto err; } ret = alloc_ep_res(fi); if (ret) goto err; } ret = fi_endpoint(domain, fi, &ep_array[num_conn_reqs], NULL); if (ret) { FT_PRINTERR("fi_endpoint", ret); goto err; } ep_state_array[num_conn_reqs].ep = ep_array[num_conn_reqs]; ret = bind_ep_res(ep_array[num_conn_reqs]); if (ret) goto err; ret = fi_accept(ep_array[num_conn_reqs], NULL, 0); if (ret) { FT_PRINTERR("fi_accept", ret); goto err; } ep_state_array[num_conn_reqs].state = FT_EP_CONNECTING; num_conn_reqs++; break; case FI_CONNECTED: if (num_conn_reqs <= num_connected) { ret = -FI_EOTHER; goto err; } for (k = 0; k < num_conn_reqs; k++) { if (ep_state_array[k].state != FT_EP_CONNECTING) continue; if (&ep_state_array[k].ep->fid == entry.fid) { ep_state_array[k].state = FT_EP_CONNECTED; num_connected++; if (num_connected != ep_cnt) fi_freeinfo(ep_state_array[k].fi); break; } } if (k == num_conn_reqs) { fprintf(stderr, "Unexpected CM event %d fid %p (ep %p)\n", event, entry.fid, ep); ret = -FI_EOTHER; goto err; } break; default: ret = -FI_EOTHER; goto err; } } /* Post recv */ if (rx_shared_ctx) ret = ft_post_rx(srx_ctx, MAX(rx_size, FT_MAX_CTRL_MSG), &rx_ctx); else ret = ft_post_rx(ep_array[0], MAX(rx_size, FT_MAX_CTRL_MSG), &rx_ctx); if (ret) goto err; free(ep_state_array); return 0; err: for (k = 0; k < ep_cnt; k++) { switch(ep_state_array[k].state) { case FT_EP_CONNECT_RCVD: fi_reject(pep, ep_state_array[k].fi->handle, NULL, 0); break; case FT_EP_CONNECTING: case FT_EP_CONNECTED: fi_shutdown(ep_state_array[k].ep, 0); break; case FT_EP_STATE_INIT: default: break; } } free(ep_state_array); return ret; }
static int client_connect(void) { struct fi_eq_cm_entry entry; uint32_t event; ssize_t rd; int i, ret; ret = ft_getinfo(hints, &fi); if (ret) return ret; ret = get_dupinfo(); if (ret) return ret; ret = ft_open_fabric_res(); if (ret) return ret; ret = alloc_ep_res(fi); if (ret) return ret; ret = alloc_ep(); if (ret) return ret; ret = bind_ep_array_res(); if (ret) return ret; for (i = 0; i < ep_cnt; i++) { ret = fi_connect(ep_array[i], fi->dest_addr, NULL, 0); if (ret) { FT_PRINTERR("fi_connect", ret); return ret; } rd = fi_eq_sread(eq, &event, &entry, sizeof entry, -1, 0); if (rd != sizeof entry) { FT_PROCESS_EQ_ERR(rd, eq, "fi_eq_sread", "connect"); ret = (int) rd; return ret; } if (event != FI_CONNECTED || entry.fid != &ep_array[i]->fid) { fprintf(stderr, "Unexpected CM event %d fid %p (ep %p)\n", event, entry.fid, ep); ret = -FI_EOTHER; return ret; } } /* Post recv */ if (rx_shared_ctx) ret = ft_post_rx(srx_ctx, MAX(rx_size, FT_MAX_CTRL_MSG), &rx_ctx); else ret = ft_post_rx(ep_array[0], MAX(rx_size, FT_MAX_CTRL_MSG), &rx_ctx); if (ret) return ret; return 0; }
int bandwidth(void) { int ret, i, j; ret = ft_sync(); if (ret) return ret; /* The loop structured allows for the possibility that the sender * immediately overruns the receiving side on the first transfer (or * the entire window). This could result in exercising parts of the * provider's implementation of FI_RM_ENABLED. For better or worse, * some MPI-level benchmarks tend to use this type of loop for measuring * bandwidth. */ if (opts.dst_addr) { for (i = 0; i < opts.iterations + opts.warmup_iterations; i++) { if (i == opts.warmup_iterations) ft_start(); for(j = 0; j < opts.window_size; j++) { if (opts.transfer_size < fi->tx_attr->inject_size) ret = ft_inject(opts.transfer_size); else ret = ft_post_tx(opts.transfer_size); if (ret) return ret; } ret = ft_get_tx_comp(tx_seq); if (ret) return ret; ret = ft_rx(4); if (ret) return ret; } } else { for (i = 0; i < opts.iterations + opts.warmup_iterations; i++) { if (i == opts.warmup_iterations) ft_start(); for(j = 0; j < opts.window_size; j++) { ret = ft_post_rx(opts.transfer_size); if (ret) return ret; } ret = ft_get_rx_comp(rx_seq-1); /* rx_seq is always one ahead */ if (ret) return ret; ret = ft_tx(4); if (ret) return ret; } } ft_stop(); if (opts.machr) show_perf_mr(opts.transfer_size, opts.iterations, &start, &end, opts.window_size, opts.argc, opts.argv); else show_perf(NULL, opts.transfer_size, opts.iterations, &start, &end, opts.window_size); return 0; }