int measure_ireduce_overlap(nbctest_params_t *params, nbctest_result_t *result) { #ifdef HAVE_NBC double starttime, endtime; int rc; static MPI_Request req; starttime = timeslot_startsync(); result->inittime = hpctimer_wtime(); rc = MPI_Ireduce(mempool_alloc(sbufpool, sbufsize), mempool_alloc(rbufpool, sbufsize), params->count, MPI_DOUBLE, MPI_SUM, root, params->comm, &req); result->inittime = hpctimer_wtime() - result->inittime; nbcbench_simulate_computing(params, &req, result); result->waittime = hpctimer_wtime(); rc = MPI_Wait(&req, MPI_STATUS_IGNORE); result->waittime = hpctimer_wtime() - result->waittime; endtime = timeslot_stopsync(); if ((rc == MPI_SUCCESS) && (starttime > 0.0) && (endtime > 0.0)) { result->totaltime = endtime - starttime; return MEASURE_SUCCESS; } #endif return MEASURE_FAILURE; }
int measure_ibarrier_overlap(nbctest_params_t *params, nbctest_result_t *result) { #ifdef HAVE_NBC double starttime, endtime; int rc; static MPI_Request req; starttime = timeslot_startsync(); result->inittime = hpctimer_wtime(); rc = MPI_Ibarrier(params->comm, &req); result->inittime = hpctimer_wtime() - result->inittime; nbcbench_simulate_computing(params, &req, result); result->waittime = hpctimer_wtime(); rc = MPI_Wait(&req, MPI_STATUS_IGNORE); result->waittime = hpctimer_wtime() - result->waittime; endtime = timeslot_stopsync(); if ((rc == MPI_SUCCESS) && (starttime > 0.0) && (endtime > 0.0)) { result->totaltime = endtime - starttime; return MEASURE_SUCCESS; } #endif return MEASURE_FAILURE; }
/* * timeslot_stopsync: Returns timestamp or TIMESLOT_TIME_INVALID * if current timeslot have finished. */ double timeslot_stopsync() { if (mpiperf_synctype == SYNC_TIME) { timeslot_slotstop = hpctimer_wtime(); return (timeslot_slotstop - timeslot_slotstart < timeslot_len) ? timeslot_slotstop : TIMESLOT_TIME_INVALID; } return hpctimer_wtime(); }
int test(int n) { printf("-- Unrolling depth: %d; n: %d] --\n", 8, n); int *v, i, sum = 0; double t; int *partial_sum = (int*) _mm_malloc(sizeof(int) * 8, 64); memset((void*)partial_sum, 0, sizeof(int) * 8); if ( (v = malloc(sizeof(*v) * n)) == NULL ) { fprintf(stderr, "No enough memory\n"); exit(EXIT_FAILURE); } for (i = 0; i < n; i++) { v[i] = 1; } register int p1, p2, p3, p4, p5, p6, p7, p8; p1=p2=p3=p4=p5=p6=p7=p8 =0; t = hpctimer_wtime(); for (i = 0; i < n; i += 8) { p1 += v[i]; p2 += v[i + 1]; p3 += v[i + 2]; p4 += v[i + 3]; p5 += v[i + 4]; p6 += v[i + 5]; p7 += v[i + 6]; p8 += v[i + 7]; } /* for (i = 0; i < 8; i++) { sum += partial_sum[i]; } */ /* sum += partial_sum[0] + partial_sum[1] + partial_sum[2] + partial_sum[3] + partial_sum[4] + partial_sum[5] + partial_sum[6] + partial_sum[7]; */ sum = p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8; t = hpctimer_wtime() - t; printf("Sum: %d\n", sum); printf("Elapsed time (sec.): %.6f\n", t); _mm_free(partial_sum); free(v); return 0; }
/* mpigclock_measure_offset_adaptive: Measures clock's offset of peer. */ static double mpigclock_measure_offset_adaptive(MPI_Comm comm, int root, int peer, double *min_rtt) { int rank, commsize, rttmin_notchanged = 0; double starttime, stoptime, peertime, rtt, rttmin = 1E12, invalidtime = INVALIDTIME, offset; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &commsize); offset = 0.0; for (;;) { if (rank != root) { /* Peer process */ starttime = hpctimer_wtime(); MPI_Send(&starttime, 1, MPI_DOUBLE, root, MPIGCLOCK_MSGTAG, comm); MPI_Recv(&peertime, 1, MPI_DOUBLE, root, MPIGCLOCK_MSGTAG, comm, MPI_STATUS_IGNORE); stoptime = hpctimer_wtime(); rtt = stoptime - starttime; if (rtt < rttmin) { rttmin = rtt; rttmin_notchanged = 0; offset = peertime - rtt / 2.0 - starttime; } else { if (++rttmin_notchanged == MPIGCLOCK_RTTMIN_NOTCHANGED_MAX) { MPI_Send(&invalidtime, 1, MPI_DOUBLE, root, MPIGCLOCK_MSGTAG, comm); break; } } } else { /* Root process */ MPI_Recv(&starttime, 1, MPI_DOUBLE, peer, MPIGCLOCK_MSGTAG, comm, MPI_STATUS_IGNORE); peertime = hpctimer_wtime(); if (starttime < 0.0) { break; } MPI_Send(&peertime, 1, MPI_DOUBLE, peer, MPIGCLOCK_MSGTAG, comm); } } /* for */ if( rank != root ){ *min_rtt = rttmin; } else { rtt = 0.0; } return offset; }
int main() { double t; int i; t = hpctimer_wtime(); for (i = 0; i < nreps; i++) { blend_map(z, x, y, n, 0); } t = (hpctimer_wtime() - t) / nreps; printf("Elapsed time: %.6f sec.\n", t); return 0; }
/* timeslot_startsync: Wait for the next timeslot and returns its start time. */ double timeslot_startsync() { double starttime = 0.0; if (mpiperf_synctype == SYNC_TIME) { starttime = timeslot_stagestart + timeslot_len * (timeslot++); if ((timeslot_slotstart = hpctimer_wtime()) > starttime) { return TIMESLOT_TIME_INVALID; } else { while ((timeslot_slotstart = hpctimer_wtime()) < starttime) { /* Wait */ ; } } return timeslot_slotstart; } return hpctimer_wtime(); }
/* timeslot_initialize_test: */ int timeslot_initialize_test(MPI_Comm comm) { int commsize; /* Synchronize clocks */ double synctime = hpctimer_wtime(); mpigclock_sync(comm, mpiperf_master_rank, MPIGCLOCK_SYNC_LINEAR); synctime = hpctimer_wtime() - synctime; MPI_Comm_size(comm, &commsize); logger_log("Clock synchronization time (commsize: %d, root: %d): %.6f sec.", commsize, mpiperf_master_rank, synctime); logger_log("Local clock offset (commsize: %d, root: %d): %.6f sec.", commsize, mpiperf_master_rank, mpigclock_offset()); bcasttime = measure_bcast_double(comm) * TIMESLOT_BCAST_OVERHEAD; logger_log("MPI_Bcast time: %.6f sec.", bcasttime); return MPIPERF_SUCCESS; }
/* * measure_bcast_double: Measures MPI_Bcast function time * for sending one element of type double. * This value is used for determining start time * of the first timeslot. */ static double measure_bcast_double(MPI_Comm comm) { double buf, totaltime = 0.0, optime, maxtime = 0.0; int i, nreps = 3; /* Warmup call */ MPI_Bcast(&buf, 1, MPI_DOUBLE, mpiperf_master_rank, comm); /* Measures (upper bound) */ for (i = 0; i < nreps; i++) { MPI_Barrier(comm); optime = hpctimer_wtime(); MPI_Bcast(&buf, 1, MPI_DOUBLE, mpiperf_master_rank, comm); optime = hpctimer_wtime() - optime; MPI_Reduce(&optime, &maxtime, 1, MPI_DOUBLE, MPI_MAX, mpiperf_master_rank, comm); totaltime = stat_fmax2(totaltime, maxtime); } return totaltime; }
int main(int argc, char *argv[]) { double tfirst, t; int i; int repeat_count = 1; int current_repeat = 0; double average = 0.0; if (argc >= 2) { repeat_count = atoi(argv[1]); } /* First run: warmup */ tfirst = hpctimer_wtime(); blend_map(z, x, y, n, 0); tfirst = hpctimer_wtime() - tfirst; srand(time(NULL)); while (current_repeat < repeat_count) { /* Measures */ t = hpctimer_wtime(); for (i = 0; i < nreps; i++) { blend_map(z, x, y, n, rand()); } t = (hpctimer_wtime() - t) / nreps; //printf("First run (sec.): %.6f\n", tfirst); //printf("Mean of %d runs (sec.): %.6f\n", nreps, t); average += t; current_repeat++; } average /= repeat_count; printf("%.6f\n", average); return 0; }
/* measure_waitpatternup_sync: */ double measure_waitpatternup_sync(bench_t *bench) { double starttime, endtime, deadline; starttime = timeslot_startsync(); for (deadline = hpctimer_wtime() + (rank + 1) * 1e-6; hpctimer_wtime() < deadline; ) { /* Wait */ } endtime = timeslot_stopsync(); if (starttime > 0.0 && endtime > 0.0) { return endtime - starttime; } else if (starttime < 0.0) { return MEASURE_STARTED_LATE; } else if (endtime < 0.0) { return MEASURE_TIME_TOOLONG; } return MEASURE_TIME_INVVAL; }
/* timeslot_setlen: Set start time of the first time slot. */ double timeslot_set_starttime(MPI_Comm comm) { MPI_Barrier(comm); timeslot = 0; if (IS_MASTER_RANK) { timeslot_stagestart = hpctimer_wtime() + bcasttime; /* logger_log("timeslot: [%.6f ... %.6f] - %.6f sec.", timeslot_starttime, timeslot_starttime + timeslot_len, timeslot_len); */ } MPI_Bcast(×lot_stagestart, 1, MPI_DOUBLE, mpiperf_master_rank, comm); /* Translate global time to local */ timeslot_stagestart -= mpigclock_offset(); return timeslot_stagestart; }