int main(int argc, char **argv) { int rank, size; MPI_Win win = MPI_WIN_NULL; int *baseptr = NULL; int errs = 0, mpi_errno = MPI_SUCCESS; int val1 = 0, val2 = 0, flag = 0; MPI_Request reqs[2]; MPI_Status stats[2]; MTest_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN); MPI_Win_allocate(2 * sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &baseptr, &win); /* Initialize window buffer */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, win); baseptr[0] = 1; baseptr[1] = 2; MPI_Win_unlock(rank, win); MPI_Barrier(MPI_COMM_WORLD); /* Issue request-based get with testall. */ MPI_Win_lock_all(0, win); MPI_Rget(&val1, 1, MPI_INT, 0, 0, 1, MPI_INT, win, &reqs[0]); MPI_Rget(&val2, 1, MPI_INT, 0, 1, 1, MPI_INT, win, &reqs[1]); do { mpi_errno = MPI_Testall(2, reqs, &flag, stats); } while (flag == 0); /* Check get value. */ if (val1 != 1 || val2 != 2) { printf("%d - Got val1 = %d, val2 = %d, expected 1, 2\n", rank, val1, val2); fflush(stdout); errs++; } /* Check return error code. */ if (mpi_errno != MPI_SUCCESS) { printf("%d - Got return errno %d, expected MPI_SUCCESS(%d)\n", rank, mpi_errno, MPI_SUCCESS); fflush(stdout); errs++; } MPI_Win_unlock_all(win); MPI_Barrier(MPI_COMM_WORLD); MPI_Win_free(&win); MTest_Finalize(errs); MPI_Finalize(); return errs != 0; }
int MPIX_Rget_x(void *origin_addr, MPI_Count origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, MPI_Count target_count, MPI_Datatype target_datatype, MPI_Win win, MPI_Request *request) { int rc = MPI_SUCCESS; if (likely (origin_count <= bigmpi_int_max && target_count <= bigmpi_int_max)) { rc = MPI_Rget(origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, win, request); } else { MPI_Datatype neworigin_datatype, newtarget_datatype; MPIX_Type_contiguous_x(origin_count, origin_datatype, &neworigin_datatype); MPIX_Type_contiguous_x(target_count, target_datatype, &newtarget_datatype); MPI_Type_commit(&neworigin_datatype); MPI_Type_commit(&newtarget_datatype); rc = MPI_Rget(origin_addr, 1, neworigin_datatype, target_rank, target_disp, 1, newtarget_datatype, win, request); MPI_Type_free(&neworigin_datatype); MPI_Type_free(&newtarget_datatype); } return rc; }
JNIEXPORT jlong JNICALL Java_mpi_Win_rGet(JNIEnv *env, jobject jthis, jlong win, jobject origin, jint orgCount, jlong orgType, jint targetRank, jint targetDisp, jint targetCount, jlong targetType, jint base) { void *orgPtr = (*env)->GetDirectBufferAddress(env, origin); MPI_Request request; int rc = MPI_Rget(orgPtr, orgCount, (MPI_Datatype)orgType, targetRank, (MPI_Aint)targetDisp, targetCount, (MPI_Datatype)targetType, (MPI_Win)win, &request); ompi_java_exceptionCheck(env, rc); return (jlong)request; }
int main( int argc, char *argv[] ) { int rank, nproc, i; int errors = 0, all_errors = 0; int *buf; MPI_Win window; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); if (nproc < 2) { if (rank == 0) printf("Error: must be run with two or more processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } /** Create using MPI_Win_create() **/ if (rank == 0) { MPI_Alloc_mem(4*sizeof(int), MPI_INFO_NULL, &buf); *buf = nproc-1; } else buf = NULL; MPI_Win_create(buf, 4*sizeof(int)*(rank == 0), 1, MPI_INFO_NULL, MPI_COMM_WORLD, &window); /* PROC_NULL Communication */ { MPI_Request pn_req[4]; int val[4], res; MPI_Win_lock_all(0, window); MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, MPI_PROC_NULL, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]); MPI_Rget(&val[1], 1, MPI_INT, MPI_PROC_NULL, 1, 1, MPI_INT, window, &pn_req[1]); MPI_Rput(&val[2], 1, MPI_INT, MPI_PROC_NULL, 2, 1, MPI_INT, window, &pn_req[2]); MPI_Raccumulate(&val[3], 1, MPI_INT, MPI_PROC_NULL, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]); assert(pn_req[0] != MPI_REQUEST_NULL); assert(pn_req[1] != MPI_REQUEST_NULL); assert(pn_req[2] != MPI_REQUEST_NULL); assert(pn_req[3] != MPI_REQUEST_NULL); MPI_Win_unlock_all(window); MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE); } MPI_Barrier(MPI_COMM_WORLD); MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, window); /* GET-ACC: Test third-party communication, through rank 0. */ for (i = 0; i < ITER; i++) { MPI_Request gacc_req; int val = -1, exp = -1; /* Processes form a ring. Process 0 starts first, then passes a token * to the right. Each process, in turn, performs third-party * communication via process 0's window. */ if (rank > 0) { MPI_Recv(NULL, 0, MPI_BYTE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } MPI_Rget_accumulate(&rank, 1, MPI_INT, &val, 1, MPI_INT, 0, 0, 1, MPI_INT, MPI_REPLACE, window, &gacc_req); assert(gacc_req != MPI_REQUEST_NULL); MPI_Wait(&gacc_req, MPI_STATUS_IGNORE); MPI_Win_flush(0, window); exp = (rank + nproc-1) % nproc; if (val != exp) { printf("%d - Got %d, expected %d\n", rank, val, exp); errors++; } if (rank < nproc-1) { MPI_Send(NULL, 0, MPI_BYTE, rank+1, 0, MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) *buf = nproc-1; MPI_Win_sync(window); /* GET+PUT: Test third-party communication, through rank 0. */ for (i = 0; i < ITER; i++) { MPI_Request req; int val = -1, exp = -1; /* Processes form a ring. Process 0 starts first, then passes a token * to the right. Each process, in turn, performs third-party * communication via process 0's window. */ if (rank > 0) { MPI_Recv(NULL, 0, MPI_BYTE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } MPI_Rget(&val, 1, MPI_INT, 0, 0, 1, MPI_INT, window, &req); assert(req != MPI_REQUEST_NULL); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Rput(&rank, 1, MPI_INT, 0, 0, 1, MPI_INT, window, &req); assert(req != MPI_REQUEST_NULL); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Win_flush(0, window); exp = (rank + nproc-1) % nproc; if (val != exp) { printf("%d - Got %d, expected %d\n", rank, val, exp); errors++; } if (rank < nproc-1) { MPI_Send(NULL, 0, MPI_BYTE, rank+1, 0, MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) *buf = nproc-1; MPI_Win_sync(window); /* GET+ACC: Test third-party communication, through rank 0. */ for (i = 0; i < ITER; i++) { MPI_Request req; int val = -1, exp = -1; /* Processes form a ring. Process 0 starts first, then passes a token * to the right. Each process, in turn, performs third-party * communication via process 0's window. */ if (rank > 0) { MPI_Recv(NULL, 0, MPI_BYTE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } MPI_Rget(&val, 1, MPI_INT, 0, 0, 1, MPI_INT, window, &req); assert(req != MPI_REQUEST_NULL); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Raccumulate(&rank, 1, MPI_INT, 0, 0, 1, MPI_INT, MPI_REPLACE, window, &req); assert(req != MPI_REQUEST_NULL); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Win_flush(0, window); exp = (rank + nproc-1) % nproc; if (val != exp) { printf("%d - Got %d, expected %d\n", rank, val, exp); errors++; } if (rank < nproc-1) { MPI_Send(NULL, 0, MPI_BYTE, rank+1, 0, MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); } MPI_Win_unlock(0, window); MPI_Barrier(MPI_COMM_WORLD); /* Wait inside of an epoch */ { MPI_Request pn_req[4]; int val[4], res; const int target = 0; MPI_Win_lock_all(0, window); MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]); MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]); MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]); MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]); assert(pn_req[0] != MPI_REQUEST_NULL); assert(pn_req[1] != MPI_REQUEST_NULL); assert(pn_req[2] != MPI_REQUEST_NULL); assert(pn_req[3] != MPI_REQUEST_NULL); MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE); MPI_Win_unlock_all(window); } MPI_Barrier(MPI_COMM_WORLD); /* Wait outside of an epoch */ { MPI_Request pn_req[4]; int val[4], res; const int target = 0; MPI_Win_lock_all(0, window); MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]); MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]); MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]); MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]); assert(pn_req[0] != MPI_REQUEST_NULL); assert(pn_req[1] != MPI_REQUEST_NULL); assert(pn_req[2] != MPI_REQUEST_NULL); assert(pn_req[3] != MPI_REQUEST_NULL); MPI_Win_unlock_all(window); MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE); } /* Wait in a different epoch */ { MPI_Request pn_req[4]; int val[4], res; const int target = 0; MPI_Win_lock_all(0, window); MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]); MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]); MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]); MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]); assert(pn_req[0] != MPI_REQUEST_NULL); assert(pn_req[1] != MPI_REQUEST_NULL); assert(pn_req[2] != MPI_REQUEST_NULL); assert(pn_req[3] != MPI_REQUEST_NULL); MPI_Win_unlock_all(window); MPI_Win_lock_all(0, window); MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE); MPI_Win_unlock_all(window); } /* Wait in a fence epoch */ { MPI_Request pn_req[4]; int val[4], res; const int target = 0; MPI_Win_lock_all(0, window); MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]); MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]); MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]); MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]); assert(pn_req[0] != MPI_REQUEST_NULL); assert(pn_req[1] != MPI_REQUEST_NULL); assert(pn_req[2] != MPI_REQUEST_NULL); assert(pn_req[3] != MPI_REQUEST_NULL); MPI_Win_unlock_all(window); MPI_Win_fence(0, window); MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE); MPI_Win_fence(0, window); } MPI_Win_free(&window); if (buf) MPI_Free_mem(buf); MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0 && all_errors == 0) printf(" No Errors\n"); MPI_Finalize(); return 0; }
dart_ret_t dart_get_handle( void * dest, dart_gptr_t gptr, size_t nbytes, dart_handle_t * handle) { MPI_Request mpi_req; MPI_Aint disp_s, disp_rel; MPI_Datatype mpi_type; MPI_Win win; dart_unit_t target_unitid_abs = gptr.unitid; dart_unit_t target_unitid_rel = target_unitid_abs; int mpi_ret; uint64_t offset = gptr.addr_or_offs.offset; uint16_t index = gptr.flags; int16_t seg_id = gptr.segid; /* * MPI uses offset type int, do not copy more than INT_MAX elements: */ if (nbytes > INT_MAX) { DART_LOG_ERROR("dart_get_handle ! failed: nbytes > INT_MAX"); return DART_ERR_INVAL; } int n_count = (int)(nbytes); mpi_type = MPI_BYTE; *handle = (dart_handle_t) malloc(sizeof(struct dart_handle_struct)); if (seg_id > 0) { unit_g2l(index, target_unitid_abs, &target_unitid_rel); } DART_LOG_DEBUG("dart_get_handle() uid_abs:%d uid_rel:%d " "o:%"PRIu64" s:%d i:%d, nbytes:%zu", target_unitid_abs, target_unitid_rel, offset, seg_id, index, nbytes); DART_LOG_TRACE("dart_get_handle: allocated handle:%p", (void *)(*handle)); #if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) DART_LOG_DEBUG("dart_get_handle: shared windows enabled"); if (seg_id >= 0) { int i; char * baseptr; /* * Use memcpy if the target is in the same node as the calling unit: */ i = dart_sharedmem_table[index][gptr.unitid]; if (i >= 0) { DART_LOG_DEBUG("dart_get_handle: shared memory segment, seg_id:%d", seg_id); if (seg_id) { if (dart_adapt_transtable_get_baseptr(seg_id, i, &baseptr) == -1) { DART_LOG_ERROR("dart_get_handle ! " "dart_adapt_transtable_get_baseptr failed"); return DART_ERR_INVAL; } } else { baseptr = dart_sharedmem_local_baseptr_set[i]; } baseptr += offset; DART_LOG_DEBUG("dart_get_handle: memcpy %zu bytes", nbytes); memcpy((char*)dest, baseptr, nbytes); /* * Mark request as completed: */ (*handle)->request = MPI_REQUEST_NULL; if (seg_id != 0) { (*handle)->dest = target_unitid_rel; (*handle)->win = dart_win_lists[index]; } else { (*handle)->dest = target_unitid_abs; (*handle)->win = dart_win_local_alloc; } return DART_OK; } } #else DART_LOG_DEBUG("dart_get_handle: shared windows disabled"); #endif /* !defined(DART_MPI_DISABLE_SHARED_WINDOWS) */ /* * MPI shared windows disabled or target and calling unit are on different * nodes, use MPI_RGet: */ if (seg_id != 0) { /* * The memory accessed is allocated with collective allocation. */ DART_LOG_TRACE("dart_get_handle: collective, segment:%d", seg_id); win = dart_win_lists[index]; /* Translate local unitID (relative to teamid) into global unitID * (relative to DART_TEAM_ALL). * * Note: target_unitid should not be the global unitID but rather the * local unitID relative to the team associated with the specified win * object. */ if (dart_adapt_transtable_get_disp( seg_id, target_unitid_rel, &disp_s) == -1) { DART_LOG_ERROR( "dart_get_handle ! dart_adapt_transtable_get_disp failed"); free(*handle); return DART_ERR_INVAL; } disp_rel = disp_s + offset; DART_LOG_TRACE("dart_get_handle: -- disp_s:%"PRId64" disp_rel:%"PRId64"", disp_s, disp_rel); /* TODO: Check if * MPI_Rget_accumulate( * NULL, 0, MPI_BYTE, dest, nbytes, MPI_BYTE, * target_unitid, disp_rel, nbytes, MPI_BYTE, MPI_NO_OP, win, * &mpi_req) * ... could be an better alternative? */ DART_LOG_DEBUG("dart_get_handle: -- %d elements (collective allocation) " "from %d at offset %"PRIu64"", n_count, target_unitid_rel, offset); DART_LOG_DEBUG("dart_get_handle: -- MPI_Rget"); mpi_ret = MPI_Rget( dest, // origin address n_count, // origin count mpi_type, // origin data type target_unitid_rel, // target rank disp_rel, // target disp in window n_count, // target count mpi_type, // target data type win, // window &mpi_req); if (mpi_ret != MPI_SUCCESS) { DART_LOG_ERROR("dart_get_handle ! MPI_Rget failed"); free(*handle); return DART_ERR_INVAL; } (*handle)->dest = target_unitid_rel; } else { /* * The memory accessed is allocated with local allocation. */ DART_LOG_TRACE("dart_get_handle: -- local, segment:%d", seg_id); DART_LOG_DEBUG("dart_get_handle: -- %d elements (local allocation) " "from %d at offset %"PRIu64"", n_count, target_unitid_abs, offset); win = dart_win_local_alloc; DART_LOG_DEBUG("dart_get_handle: -- MPI_Rget"); mpi_ret = MPI_Rget( dest, // origin address n_count, // origin count mpi_type, // origin data type target_unitid_abs, // target rank offset, // target disp in window n_count, // target count mpi_type, // target data type win, // window &mpi_req); if (mpi_ret != MPI_SUCCESS) { DART_LOG_ERROR("dart_get_handle ! MPI_Rget failed"); free(*handle); return DART_ERR_INVAL; } (*handle)->dest = target_unitid_abs; } (*handle)->request = mpi_req; (*handle)->win = win; DART_LOG_TRACE("dart_get_handle > handle(%p) dest:%d win:%"PRIu64" req:%d", (void*)(*handle), (*handle)->dest, (uint64_t)win, mpi_req); return DART_OK; }
FORT_DLL_SPEC void FORT_CALL mpi_rget_ ( void*v1, MPI_Fint *v2, MPI_Fint *v3, MPI_Fint *v4, MPI_Fint *v5, MPI_Fint *v6, MPI_Fint *v7, MPI_Fint *v8, MPI_Fint *v9, MPI_Fint *ierr ){ *ierr = MPI_Rget( v1, (int)*v2, (MPI_Datatype)(*v3), (int)*v4, (MPI_Aint)*v5, (int)*v6, (MPI_Datatype)(*v7), (MPI_Win)*v8, (MPI_Request *)(v9) ); }
int main(int argc, char **argv) { FILE *fp, *fp2; char testName[32] = "MPI_Rget", file1[64], file2[64]; int dblSize, proc, nprocs, npairs, partner; unsigned int i, j, k, size, localSize, NLOOP = NLOOP_MAX; unsigned int smin = MIN_P2P_SIZE, smed = MED_P2P_SIZE, smax = MAX_P2P_SIZE; double tScale = USEC, bwScale = MB_8; double tStart, timeMin, timeMinGlobal, overhead, threshold_lo, threshold_hi; double msgBytes, sizeBytes, localMax, UsedMem; double tElapsed[NREPS], tElapsedGlobal[NREPS]; double *A, *B; MPI_Win win; MPI_Status stat; MPI_Request req; // Initialize parallel environment MPI_Init(&argc, &argv); MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); MPI_Comm_rank( MPI_COMM_WORLD, &proc ); // Test input parameters if( nprocs%2 != 0 && proc == 0 ) fatalError( "P2P test requires an even number of processors" ); // Check for user defined limits checkEnvP2P( proc, &NLOOP, &smin, &smed, &smax ); // Initialize local variables localMax = 0.0; npairs = nprocs/2; if( proc < npairs ) partner = proc + npairs; if( proc >= npairs ) partner = proc - npairs; UsedMem = (double)smax*(double)sizeof(double)*2.0; // Allocate and initialize arrays srand( SEED ); A = doubleVector( smax ); B = doubleVector( smax ); // Open output file and write header if( proc == 0 ){ // Check timer overhead in seconds timerTest( &overhead, &threshold_lo, &threshold_hi ); // Open output files and write headers sprintf( file1, "rget_time-np_%.4d.dat", nprocs ); sprintf( file2, "rget_bw-np_%.4d.dat", nprocs ); fp = fopen( file1, "a" ); fp2 = fopen( file2, "a" ); printHeaders( fp, fp2, testName, UsedMem, overhead, threshold_lo ); } // Get type size MPI_Type_size( MPI_DOUBLE, &dblSize ); // Set up a window for RMA MPI_Win_create( A, smax*dblSize, dblSize, MPI_INFO_NULL, MPI_COMM_WORLD, &win ); MPI_Win_lock_all( 0, win ); //================================================================ // Single loop with minimum size to verify that inner loop length // is long enough for the timings to be accurate //================================================================ // Warmup with a medium size message if( proc < npairs ){ MPI_Rget( B, smed, MPI_DOUBLE, partner, 0, smed, MPI_DOUBLE, win, &req ); MPI_Wait( &req, &stat ); MPI_Win_flush_all( win ); } // Test if current NLOOP is enough to capture fastest test cases MPI_Barrier( MPI_COMM_WORLD ); tStart = benchTimer(); if( proc < npairs ){ for(j = 0; j < NLOOP; j++){ MPI_Rget( B, smin, MPI_DOUBLE, partner, 0, smin, MPI_DOUBLE, win, &req ); MPI_Wait( &req, &stat ); MPI_Win_flush_all( win ); } } timeMin = benchTimer() - tStart; MPI_Reduce( &timeMin, &timeMinGlobal, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD ); if( proc == 0 ) resetInnerLoop( timeMinGlobal, threshold_lo, &NLOOP ); MPI_Bcast( &NLOOP, 1, MPI_INT, 0, MPI_COMM_WORLD ); //================================================================ // Execute test for each requested size //================================================================ for( size = smin; size <= smax; size = size*2 ){ // Warmup with a medium size message if( proc < npairs ){ MPI_Rget( B, smed, MPI_DOUBLE, partner, 0, smed, MPI_DOUBLE, win, &req ); MPI_Wait( &req, &stat ); MPI_Win_flush_all( win ); } // Repeat NREPS to collect statistics for(i = 0; i < NREPS; i++){ MPI_Barrier( MPI_COMM_WORLD ); tStart = benchTimer(); if( proc < npairs ){ for(j = 0; j < NLOOP; j++){ MPI_Rget( B, size, MPI_DOUBLE, partner, 0, size, MPI_DOUBLE, win, &req ); MPI_Wait( &req, &stat ); MPI_Win_flush_all( win ); } } tElapsed[i] = benchTimer() - tStart; } MPI_Reduce( tElapsed, tElapsedGlobal, NREPS, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD ); // Only task 0 needs to do the analysis of the collected data if( proc == 0 ){ // sizeBytes is size to write to file // msgBytes is actual data exchanged on the wire msgBytes = (double)size*(double)npairs*(double)dblSize; sizeBytes = (double)size*(double)dblSize; post_process( fp, fp2, threshold_hi, tElapsedGlobal, tScale, bwScale, size*dblSize, sizeBytes, msgBytes, &NLOOP, &localMax, &localSize ); } MPI_Bcast( &NLOOP, 1, MPI_INT, 0, MPI_COMM_WORLD ); } MPI_Win_unlock_all( win ); MPI_Win_free( &win ); MPI_Barrier( MPI_COMM_WORLD ); free( A ); free( B ); //================================================================ // Print completion message, free memory and exit //================================================================ if( proc == 0 ){ printSummary( fp2, testName, localMax, localSize ); fclose( fp2 ); fclose( fp ); } MPI_Finalize(); return 0; }