static int run_test(void) { int ret, i; ret = sync_test(); if (ret) { fprintf(stderr, "sync_test failed!\n"); goto out; } ft_start(); if (opts.dst_addr) { for (i = 0; i < opts.iterations; i++) { ret = ft_tx(ep, remote_fi_addr, opts.transfer_size, &tx_ctx); if (ret) goto out; } } else { ret = wait_for_recv_completion(opts.iterations); if (ret) goto out; } ft_stop(); if (opts.machr) show_perf_mr(opts.transfer_size, opts.iterations, &start, &end, 1, opts.argc, opts.argv); else show_perf(test_name, opts.transfer_size, opts.iterations, &start, &end, 1); out: return ret; }
static int run_test(void) { int ret, i; ret = ft_sync(); if (ret) return ret; ft_start(); for (i = 0; i < opts.iterations; i++) { ret = ft_rma(opts.rma_op, ep, opts.transfer_size, &remote, ep); if (ret) return ret; } ft_stop(); if (opts.machr) show_perf_mr(opts.transfer_size, opts.iterations, &start, &end, 1, opts.argc, opts.argv); else show_perf(NULL, opts.transfer_size, opts.iterations, &start, &end, 1); return 0; }
static int run_test(void) { int ret, i; ret = sync_test(); if (ret) return ret; clock_gettime(CLOCK_MONOTONIC, &start); for (i = 0; i < opts.iterations; i++) { ret = opts.dst_addr ? send_xfer(opts.transfer_size) : recv_xfer(opts.transfer_size); if (ret) return ret; ret = opts.dst_addr ? recv_xfer(opts.transfer_size) : send_xfer(opts.transfer_size); if (ret) return ret; } clock_gettime(CLOCK_MONOTONIC, &end); if (opts.machr) show_perf_mr(opts.transfer_size, opts.iterations, &start, &end, 2, opts.argc, opts.argv); else show_perf(test_name, opts.transfer_size, opts.iterations, &start, &end, 2); return 0; }
static int run_test(void) { int ret, i, t; ret = sync_test(); if (ret) goto out; gettimeofday(&start, NULL); for (i = 0; i < iterations; i++) { for (t = 0; t < transfer_count; t++) { ret = dst_addr ? send_xfer(transfer_size) : recv_xfer(transfer_size); if (ret) goto out; } for (t = 0; t < transfer_count; t++) { ret = dst_addr ? recv_xfer(transfer_size) : send_xfer(transfer_size); if (ret) goto out; } } gettimeofday(&end, NULL); show_perf(); ret = 0; out: return ret; }
int pingpong(void) { int ret, i; ret = ft_sync(); if (ret) return ret; ft_start(); for (i = 0; i < opts.iterations; i++) { ret = opts.dst_addr ? ft_tx(opts.transfer_size) : ft_rx(opts.transfer_size); if (ret) return ret; ret = opts.dst_addr ? ft_rx(opts.transfer_size) : ft_tx(opts.transfer_size); if (ret) return ret; } ft_stop(); if (opts.machr) show_perf_mr(opts.transfer_size, opts.iterations, &start, &end, 2, opts.argc, opts.argv); else show_perf(test_name, opts.transfer_size, opts.iterations, &start, &end, 2); return 0; }
static int run_test(void) { int ret, i; ret = sync_test(); if (ret) goto out; clock_gettime(CLOCK_MONOTONIC, &start); for (i = 0; i < iterations; i++) { ret = dst_addr ? send_xfer(transfer_size) : recv_xfer(transfer_size); if (ret) goto out; ret = dst_addr ? recv_xfer(transfer_size) : send_xfer(transfer_size); if (ret) goto out; } clock_gettime(CLOCK_MONOTONIC, &end); show_perf(test_name, transfer_size, iterations, &start, &end, 2); ret = 0; out: return ret; }
static int run_test(void) { int ret = 0, i; if ((ret = sync_test())) { goto out; } if (bidir || client) { gettimeofday(&start, NULL); for (i = 0; i < iterations; i++) { if ((ret = write_xfer(transfer_size))) { goto out; } } if ((ret = poll_all_sends())) { goto out; } gettimeofday(&end, NULL); show_perf(); } if ((ret = sync_test())) { goto out; } out: return ret; }
static int stream(void) { int ret, i; ret = ft_sync(); if (ret) return ret; ft_start(); for (i = 0; i < opts.iterations; i++) { ret = opts.dst_addr ? ft_tx(ep, remote_fi_addr, opts.transfer_size, &tx_ctx) : ft_rx(ep, opts.transfer_size); if (ret) return ret; } ft_stop(); if (opts.machr) show_perf_mr(opts.transfer_size, opts.iterations, &start, &end, 1, opts.argc, opts.argv); else show_perf(NULL, opts.transfer_size, opts.iterations, &start, &end, 1); return 0; }
static int run_op(void) { int ret, i; count = (size_t *) malloc(sizeof(size_t)); sync_test(); clock_gettime(CLOCK_MONOTONIC, &start); switch (op_type) { case FI_MIN: case FI_MAX: case FI_ATOMIC_READ: case FI_ATOMIC_WRITE: ret = is_valid_base_atomic_op(op_type); if (ret > 0) { for (i = 0; i < iterations; i++) { ret = execute_base_atomic_op(op_type); if (ret) break; } } ret = is_valid_fetch_atomic_op(op_type); if (ret > 0) { for (i = 0; i < iterations; i++) { ret = execute_fetch_atomic_op(op_type); if (ret) break; } } break; case FI_CSWAP: ret = is_valid_compare_atomic_op(op_type); if (ret > 0) { ret = execute_compare_atomic_op(op_type); } else { goto out; } break; default: ret = -EINVAL; goto out; } clock_gettime(CLOCK_MONOTONIC, &end); if (ret) goto out; show_perf(test_name, transfer_size, iterations, &start, &end, 2); ret = 0; out: free(count); return ret; }
static int run_test(void) { int ret, i, t; off_t offset; uint8_t marker = 0; poll_byte = buf + transfer_size - 1; *poll_byte = -1; offset = riomap(rs, buf, transfer_size, PROT_WRITE, 0, 0); if (offset == -1) { perror("riomap"); ret = -1; goto out; } ret = sync_test(); if (ret) goto out; gettimeofday(&start, NULL); for (i = 0; i < iterations; i++) { if (dst_addr) { for (t = 0; t < transfer_count - 1; t++) { ret = send_xfer(transfer_size); if (ret) goto out; } *poll_byte = (uint8_t) marker++; ret = send_xfer(transfer_size); if (ret) goto out; ret = recv_xfer(transfer_size, marker++); } else { ret = recv_xfer(transfer_size, marker++); if (ret) goto out; for (t = 0; t < transfer_count - 1; t++) { ret = send_xfer(transfer_size); if (ret) goto out; } *poll_byte = (uint8_t) marker++; ret = send_xfer(transfer_size); } if (ret) goto out; } gettimeofday(&end, NULL); show_perf(); ret = riounmap(rs, buf, transfer_size); out: return ret; }
/*--------------------------------------------------------*/ void AzsSvrg::_train_test() { if (rseed > 0) { srand(rseed); /* initialize the random seed */ } /*--- initialization ---*/ int dim = m_trn_x->rowNum(); reset_weights(dim); /*--- iterate ... ---*/ AzTimeLog::print("--- Training begins ... ", log_out); AzsSvrgData_fast prev_fast; AzsSvrgData_compact prev_compact; int ite; for (ite = 0; ite < ite_num; ++ite) { if (do_show_timing) AzTimeLog::print("--- iteration#", ite+1, log_out); if (doing_svrg(ite) && (ite-sgd_ite) % svrg_interval == 0) { if (do_show_timing) AzTimeLog::print("Computing gradient average ... ", log_out); if (do_compact) get_avg_gradient_compact(&prev_compact); else get_avg_gradient_fast(&prev_fast); } if (do_show_timing) AzTimeLog::print("Updating weights ... ", log_out); AzIntArr ia_dxs; const int *dxs = gen_seq(dataSize(), ia_dxs); int ix; for (ix = 0; ix < dataSize(); ++ix) { int dx = dxs[ix]; /* data point index */ AzDvect v_deriv(class_num); get_deriv(dx, &v_deriv); /* compute the derivatives */ if (doing_svrg(ite)) { if (do_compact) updateDelta_svrg_compact(dx, &v_deriv, prev_compact); else updateDelta_svrg_fast(dx, &v_deriv, prev_fast); } else { updateDelta_sgd(dx, &v_deriv); } flushDelta(); } show_perf(ite); } if (do_show_timing) AzTimeLog::print("--- End of training ... ", log_out); /*--- write predictions to a file if requested ---*/ if (s_pred_fn.length() > 0) { AzTimeLog::print("Writing predictions to ", s_pred_fn.c_str(), log_out); write_pred(m_tst_x, s_pred_fn.c_str()); } }
int pingpong(void) { int ret, i; ret = ft_sync(); if (ret) return ret; if (opts.dst_addr) { for (i = 0; i < opts.iterations + opts.warmup_iterations; i++) { if (i == opts.warmup_iterations) ft_start(); if (opts.transfer_size < fi->tx_attr->inject_size) ret = ft_inject(opts.transfer_size); else ret = ft_tx(opts.transfer_size); if (ret) return ret; ret = ft_rx(opts.transfer_size); if (ret) return ret; } } else { for (i = 0; i < opts.iterations + opts.warmup_iterations; i++) { if (i == opts.warmup_iterations) ft_start(); ret = ft_rx(opts.transfer_size); if (ret) return ret; if (opts.transfer_size < fi->tx_attr->inject_size) ret = ft_inject(opts.transfer_size); else ret = ft_tx(opts.transfer_size); if (ret) return ret; } } ft_stop(); if (opts.machr) show_perf_mr(opts.transfer_size, opts.iterations, &start, &end, 2, opts.argc, opts.argv); else show_perf(NULL, opts.transfer_size, opts.iterations, &start, &end, 2); return 0; }
static int run_test(void) { int ret, i; ret = sync_test(); if (ret) return ret; clock_gettime(CLOCK_MONOTONIC, &start); for (i = 0; i < opts.iterations; i++) { switch (op_type) { case FT_RMA_WRITE: ret = write_data(opts.transfer_size); break; case FT_RMA_WRITEDATA: ret = write_data_with_cq_data(opts.transfer_size); if (ret) return ret; ret = wait_remote_writedata_completion(); break; case FT_RMA_READ: ret = read_data(opts.transfer_size); break; } if (ret) return ret; ret = ft_wait_for_comp(txcq, 1); if (ret) return ret; } clock_gettime(CLOCK_MONOTONIC, &end); if (opts.machr) show_perf_mr(opts.transfer_size, opts.iterations, &start, &end, 1, opts.argc, opts.argv); else show_perf(test_name, opts.transfer_size, opts.iterations, &start, &end, 1); return 0; }
static int ft_run_bandwidth(void) { size_t recv_cnt; int ret, i; for (i = 0; i < ft_ctrl.size_cnt; i += ft_ctrl.inc_step) { ft_tx_ctrl.msg_size = ft_ctrl.size_array[i]; if (ft_tx_ctrl.msg_size > fabric_info->ep_attr->max_msg_size) break; if (((test_info.class_function == FT_FUNC_INJECT) || (test_info.class_function == FT_FUNC_INJECTDATA)) && (ft_tx_ctrl.msg_size > fabric_info->tx_attr->inject_size)) break; ft_ctrl.xfer_iter = test_info.test_flags & FT_FLAG_QUICKTEST ? 5 : size_to_count(ft_tx_ctrl.msg_size); recv_cnt = ft_ctrl.xfer_iter; ret = ft_sync_test(0); if (ret) return ret; ret = ft_post_recv_bufs(); if (ret) return ret; clock_gettime(CLOCK_MONOTONIC, &start); ret = (test_info.ep_type == FI_EP_DGRAM) ? ft_bw_dgram(&recv_cnt) : ft_bw(); clock_gettime(CLOCK_MONOTONIC, &end); if (ret) { FT_PRINTERR("bw test failed!", ret); return ret; } show_perf("bw", ft_tx_ctrl.msg_size, recv_cnt, &start, &end, 1); } return 0; }
int bandwidth(void) { int ret, i, j; ret = ft_sync(); if (ret) return ret; /* The loop structured allows for the possibility that the sender * immediately overruns the receiving side on the first transfer (or * the entire window). This could result in exercising parts of the * provider's implementation of FI_RM_ENABLED. For better or worse, * some MPI-level benchmarks tend to use this type of loop for measuring * bandwidth. */ if (opts.dst_addr) { for (i = 0; i < opts.iterations + opts.warmup_iterations; i++) { if (i == opts.warmup_iterations) ft_start(); for(j = 0; j < opts.window_size; j++) { if (opts.transfer_size < fi->tx_attr->inject_size) ret = ft_inject(opts.transfer_size); else ret = ft_post_tx(opts.transfer_size); if (ret) return ret; } ret = ft_get_tx_comp(tx_seq); if (ret) return ret; ret = ft_rx(4); if (ret) return ret; } } else { for (i = 0; i < opts.iterations + opts.warmup_iterations; i++) { if (i == opts.warmup_iterations) ft_start(); for(j = 0; j < opts.window_size; j++) { ret = ft_post_rx(opts.transfer_size); if (ret) return ret; } ret = ft_get_rx_comp(rx_seq-1); /* rx_seq is always one ahead */ if (ret) return ret; ret = ft_tx(4); if (ret) return ret; } } ft_stop(); if (opts.machr) show_perf_mr(opts.transfer_size, opts.iterations, &start, &end, opts.window_size, opts.argc, opts.argv); else show_perf(NULL, opts.transfer_size, opts.iterations, &start, &end, opts.window_size); return 0; }