int main(int argc, char **argv) { FILE *fp, *fp2, *pipe; char testName[32] = "MPI_Allreduce", file1[64], file2[64], pipeStr[8]; int dblSize, proc, nprocs, nodeCPUs, nodes; unsigned int i, j, size, localSize, NLOOP = NLOOP_MAX; unsigned int smin = MIN_COL_SIZE, smed = MED_COL_SIZE, smax = MAX_COL_SIZE; double tScale = USEC, bwScale = MB_8; double tStart, timeMin, timeMinGlobal, overhead, threshold_lo, threshold_hi; double msgBytes, sizeBytes, UsedMem, localMax; double tElapsed[NREPS], tElapsedGlobal[NREPS]; double *A, *B; pipe = popen( "cat /proc/cpuinfo | grep processor | wc -l", "r" ); fgets( pipeStr, 8, pipe ); pclose(pipe); nodeCPUs = atoi(pipeStr); // Initialize parallel environment MPI_Init( &argc, &argv ); MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); MPI_Comm_rank( MPI_COMM_WORLD, &proc ); // Reset maximum message size to fit within node memory if( nprocs > nodeCPUs ){ nodes = nprocs / nodeCPUs; if( smax > nodes ) smax = smax / nodes; if( smed > nodes ) smed = smed / nodes; } // Check for user defined limits checkEnvCOL( proc, &NLOOP, &smin, &smed, &smax ); // Initialize local variables dblSize = sizeof(double); UsedMem = (double)smax*(double)dblSize*(double)( nprocs + 1 ); // Allocate and initialize arrays srand( SEED ); A = doubleVector( smax ); B = doubleVector( smax*nprocs ); // Open output file and write header if( proc == 0 ){ // Check timer overhead in seconds timerTest( &overhead, &threshold_lo, &threshold_hi ); // Open output files and write headers sprintf( file1, "allreduce_time-np_%.4d.dat", nprocs ); sprintf( file2, "allreduce_bw-np_%.4d.dat", nprocs ); fp = fopen( file1, "a" ); fp2 = fopen( file2, "a" ); printHeaders( fp, fp2, testName, UsedMem, overhead, threshold_lo ); } //================================================================ // Single loop with minimum size to verify that inner loop length // is long enough for the timings to be accurate //================================================================ // Warmup with a medium size message MPI_Allreduce( A, B, smed, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); // Test is current NLOOP is enough to capture fastest test cases MPI_Barrier( MPI_COMM_WORLD ); tStart = benchTimer(); for(j = 0; j < NLOOP; j++){ MPI_Allreduce( A, B, smin, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); } timeMin = benchTimer() - tStart; MPI_Reduce( &timeMin, &timeMinGlobal, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD ); if( proc == 0 ) resetInnerLoop( timeMinGlobal, threshold_lo, &NLOOP ); MPI_Bcast( &NLOOP, 1, MPI_INT, 0, MPI_COMM_WORLD ); //================================================================ // Execute test for each requested size //================================================================ localMax = 0.0; for( size = smin; size <= smax; size = size*2 ){ // Warmup with a medium size message MPI_Allreduce( A, B, smed, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); // Repeat NREPS to collect statistics for(i = 0; i < NREPS; i++){ MPI_Barrier( MPI_COMM_WORLD ); tStart = benchTimer(); for(j = 0; j < NLOOP; j++){ MPI_Allreduce( A, B, size, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); } tElapsed[i] = benchTimer() - tStart; } MPI_Reduce( tElapsed, tElapsedGlobal, NREPS, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD ); // Only task 0 needs to do the analysis of the collected data if( proc == 0 ){ // sizeBytes is size to write to file // msgBytes is actual data exchanged on the wire msgBytes = (double)size*(double)nprocs*(double)dblSize; sizeBytes = (double)size*(double)dblSize; post_process( fp, fp2, threshold_hi, tElapsedGlobal, tScale, bwScale, size*dblSize, sizeBytes, msgBytes, &NLOOP, &localMax, &localSize ); } MPI_Bcast( &NLOOP, 1, MPI_INT, 0, MPI_COMM_WORLD ); } //================================================================ // Print completion message, free memory and exit //================================================================ if( proc == 0 ){ fclose(fp); fclose(fp2); fprintf( stdout,"\n %s test completed.\n\n", testName ); } free( A ); free( B ); MPI_Finalize(); return 0; }
int main( int argc, char **argv ) { FILE *fp; char testName[32] = "MPI_Latency", file1[64], file2[64]; int dblSize, proc, nprocs, partner, tag = 0, NodeProcs; unsigned int i, j, size, localSize, NLOOP = NLOOP_MAX; unsigned int smin = MIN_P2P_SIZE, smed = MED_P2P_SIZE, smax = MAX_P2P_SIZE; double tScale = USEC; double overhead, threshold_lo, threshold_hi; double tStart, timeMin, timeMinGlobal, msgBytes, localMax, UsedMem, ReqMem, NodeMem; double tAvg, tMin, tMax, stdDev; double tElapsed[NREPS], tElapsedGlobal[NREPS], tMsg[NREPS]; char sndBuffer = 'a', rcvBuffer = 'b'; double *A, *B; MPI_Status status; // Initialize parallel environment MPI_Init( &argc, &argv ); MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); MPI_Comm_rank( MPI_COMM_WORLD, &proc ); // Test input parameters if( nprocs != 2 && proc == 0 ) fatalError( "P2P latency will only run with 2 tasks" ); // Check for user defined limits checkEnvP2P( proc, &NLOOP, &smin, &smed, &smax ); // Initialize local variables partner = 1 - proc; dblSize = sizeof(double); UsedMem = (double)smed*(double)dblSize*2.0; // Allocate and initialize arrays // TODO: Consider Mersenne Twister to improve startup time srand( SEED ); A = doubleVector( smed ); B = doubleVector( smed ); // Open output file and write header if( proc == 0 ){ // Check timer overhead in seconds timerTest( &overhead, &threshold_lo, &threshold_hi ); // Open output files and write headers sprintf( file1, "latency.dat" ); fp = fopen( file1, "a" ); printLatencyHeader( fp, testName, UsedMem, overhead, threshold_lo ); } //================================================================ // Single loop with minimum size to verify that inner loop length // is long enough for the timings to be accurate //================================================================ // Warmup with a medium size exchange if( proc == 0 ){ MPI_Send( A, smed, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD ); MPI_Recv( B, smed, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD, &status ); }else{ MPI_Recv( B, smed, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD, &status ); MPI_Send( A, smed, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD ); } // Test if current NLOOP is enough to capture fastest test cases MPI_Barrier( MPI_COMM_WORLD ); tStart = benchTimer(); if( proc == 0 ){ for(j = 0; j < NLOOP; j++){ MPI_Send( &sndBuffer, 1, MPI_CHAR, partner, tag, MPI_COMM_WORLD ); MPI_Recv( &rcvBuffer, 1, MPI_CHAR, partner, tag, MPI_COMM_WORLD, &status ); } }else{ for(j = 0; j < NLOOP; j++){ MPI_Recv( &rcvBuffer, 1, MPI_CHAR, partner, tag, MPI_COMM_WORLD, &status ); MPI_Send( &sndBuffer, 1, MPI_CHAR, partner, tag, MPI_COMM_WORLD ); } } timeMin = benchTimer() - tStart; MPI_Reduce( &timeMin, &timeMinGlobal, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD ); if( proc == 0 ) resetInnerLoop( timeMinGlobal, threshold_lo, &NLOOP ); MPI_Bcast( &NLOOP, 1, MPI_INT, 0, MPI_COMM_WORLD ); //================================================================ // Execute test //================================================================ // Warmup with a medium size exchange if( proc == 0 ){ MPI_Send( A, smed, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD ); MPI_Recv( B, smed, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD, &status ); }else{ MPI_Recv( B, smed, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD, &status ); MPI_Send( A, smed, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD ); } // Repeat NREPS to collect statistics for(i = 0; i < NREPS; i++){ MPI_Barrier(MPI_COMM_WORLD); tStart = benchTimer(); if( proc == 0 ){ for(j = 0; j < NLOOP; j++){ MPI_Send( &sndBuffer, 1, MPI_CHAR, partner, tag, MPI_COMM_WORLD ); MPI_Recv( &rcvBuffer, 1, MPI_CHAR, partner, tag, MPI_COMM_WORLD, &status ); } }else{ for(j = 0; j < NLOOP; j++){ MPI_Recv( &rcvBuffer, 1, MPI_CHAR, partner, tag, MPI_COMM_WORLD, &status ); MPI_Send( &sndBuffer, 1, MPI_CHAR, partner, tag, MPI_COMM_WORLD ); } } tElapsed[i] = benchTimer() - tStart; } MPI_Reduce( tElapsed, tElapsedGlobal, NREPS, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD ); // Only task 0 needs to do the analysis of the collected data if( proc == 0 ){ // Get the time per iteration for(i = 0; i < NREPS; i++){ tMsg[i] = 0.5*tElapsedGlobal[i] / ( (double)NLOOP ); } // Calculate Average, Minimum and Maximum values stats( NREPS, tMsg, &tAvg, &tMax, &tMin, &stdDev, tScale ); // Save these results to file saveData( fp, sizeof(char), NLOOP, tAvg, tMax, tMin, stdDev ); fprintf( stdout, "MPI latency is %6.1f usec\n\n", tMin ); } //================================================================ // Print completion message, free memory and exit //================================================================ if( proc == 0 ) fclose( fp ); free( A ); free( B ); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { FILE *fp, *fp2; char testName[32] = "MPI_Get_Fence", file1[64], file2[64]; int dblSize, proc, nprocs, npairs, partner; unsigned int i, j, k, size, localSize, NLOOP = NLOOP_MAX; unsigned int smin = MIN_P2P_SIZE, smed = MED_P2P_SIZE, smax = MAX_P2P_SIZE; double tScale = USEC, bwScale = MB_8; double tStart, timeMin, timeMinGlobal, overhead, threshold_lo, threshold_hi; double msgBytes, sizeBytes, localMax, UsedMem; double tElapsed[NREPS], tElapsedGlobal[NREPS]; double *A, *B; MPI_Win win; // Initialize parallel environment MPI_Init(&argc, &argv); MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); MPI_Comm_rank( MPI_COMM_WORLD, &proc ); // Test input parameters if( nprocs%2 != 0 && proc == 0 ) fatalError( "P2P test requires an even number of processors" ); // Check for user defined limits checkEnvP2P( proc, &NLOOP, &smin, &smed, &smax ); // Initialize local variables localMax = 0.0; npairs = nprocs/2; if( proc < npairs ) partner = proc + npairs; if( proc >= npairs ) partner = proc - npairs; UsedMem = (double)smax*(double)sizeof(double)*2.0; // Allocate and initialize arrays srand( SEED ); A = doubleVector( smax ); B = doubleVector( smax ); // Open output file and write header if( proc == 0 ){ // Check timer overhead in seconds timerTest( &overhead, &threshold_lo, &threshold_hi ); // Open output files and write headers sprintf( file1, "getfence_time-np_%.4d.dat", nprocs ); sprintf( file2, "getfence_bw-np_%.4d.dat", nprocs ); fp = fopen( file1, "a" ); fp2 = fopen( file2, "a" ); printHeaders( fp, fp2, testName, UsedMem, overhead, threshold_lo ); } // Get type size MPI_Type_size( MPI_DOUBLE, &dblSize ); // Set up a window for RMA MPI_Win_create( A, smax*dblSize, dblSize, MPI_INFO_NULL, MPI_COMM_WORLD, &win ); //================================================================ // Single loop with minimum size to verify that inner loop length // is long enough for the timings to be accurate //================================================================ // Warmup with a medium size message if( proc < npairs ){ MPI_Win_fence( 0, win ); MPI_Get( B, smed, MPI_DOUBLE, partner, 0, smed, MPI_DOUBLE, win ); MPI_Win_fence( 0, win ); }else{ MPI_Win_fence( 0, win ); MPI_Win_fence( 0, win ); } // Test if current NLOOP is enough to capture fastest test cases MPI_Barrier( MPI_COMM_WORLD ); tStart = benchTimer(); if( proc < npairs ){ for(j = 0; j < NLOOP; j++){ MPI_Win_fence( 0, win ); MPI_Get( B, smin, MPI_DOUBLE, partner, 0, smin, MPI_DOUBLE, win ); MPI_Win_fence( 0, win ); } }else{ for(j = 0; j < NLOOP; j++){ MPI_Win_fence( 0, win ); MPI_Win_fence( 0, win ); } } timeMin = benchTimer() - tStart; MPI_Reduce( &timeMin, &timeMinGlobal, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD ); if( proc == 0 ) resetInnerLoop( timeMinGlobal, threshold_lo, &NLOOP ); MPI_Bcast( &NLOOP, 1, MPI_INT, 0, MPI_COMM_WORLD ); //================================================================ // Execute test for each requested size //================================================================ for( size = smin; size <= smax; size = size*2 ){ // Warmup with a medium size message if( proc < npairs ){ MPI_Win_fence( 0, win ); MPI_Get( B, smed, MPI_DOUBLE, partner, 0, smed, MPI_DOUBLE, win ); MPI_Win_fence( 0, win ); }else{ MPI_Win_fence( 0, win ); MPI_Win_fence( 0, win ); } // Repeat NREPS to collect statistics for(i = 0; i < NREPS; i++){ MPI_Barrier( MPI_COMM_WORLD ); tStart = benchTimer(); if( proc < npairs ){ for(j = 0; j < NLOOP; j++){ MPI_Win_fence( 0, win ); MPI_Get( B, size, MPI_DOUBLE, partner, 0, size, MPI_DOUBLE, win ); MPI_Win_fence( 0, win ); } }else{ for(j = 0; j < NLOOP; j++){ MPI_Win_fence( 0, win ); MPI_Win_fence( 0, win ); } } tElapsed[i] = benchTimer() - tStart; } MPI_Reduce( tElapsed, tElapsedGlobal, NREPS, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD ); // Only task 0 needs to do the analysis of the collected data if( proc == 0 ){ // sizeBytes is size to write to file // msgBytes is actual data exchanged on the wire msgBytes = (double)size*(double)npairs*(double)dblSize; sizeBytes = (double)size*(double)dblSize; post_process( fp, fp2, threshold_hi, tElapsedGlobal, tScale, bwScale, size*dblSize, sizeBytes, msgBytes, &NLOOP, &localMax, &localSize ); } MPI_Bcast( &NLOOP, 1, MPI_INT, 0, MPI_COMM_WORLD ); } MPI_Win_free( &win ); MPI_Barrier( MPI_COMM_WORLD ); free( A ); free( B ); //================================================================ // Print completion message, free memory and exit //================================================================ if( proc == 0 ){ printSummary( fp2, testName, localMax, localSize ); fclose( fp2 ); fclose( fp ); } MPI_Finalize(); return 0; }
std::pair< CombinerVariables::TypesOfChannelGeometry, std::vector< std::vector<double> > > InfinitelyDeepRectangularChannel::getInternalParameters() const { std::vector<double> doubleVector (1,channelWidth); std::vector< std::vector<double> > resultVector (1,doubleVector); return std::pair<CombinerVariables::TypesOfChannelGeometry, std::vector< std::vector<double> > >(this->typeOfChannelGeometry, resultVector); }