Пример #1
0
int main(int argc, char **argv)
{
    FILE   *fp, *fp2, *pipe;
    char   testName[32] = "MPI_Allreduce", file1[64], file2[64], pipeStr[8];
    int    dblSize, proc, nprocs, nodeCPUs, nodes;
    unsigned int i, j, size, localSize, NLOOP = NLOOP_MAX;
    unsigned int smin = MIN_COL_SIZE, smed = MED_COL_SIZE, smax = MAX_COL_SIZE;
    double tScale = USEC, bwScale = MB_8;
    double tStart, timeMin, timeMinGlobal, overhead, threshold_lo, threshold_hi;
    double msgBytes, sizeBytes, UsedMem, localMax;
    double tElapsed[NREPS], tElapsedGlobal[NREPS];
    double *A, *B;

    pipe = popen( "cat /proc/cpuinfo | grep processor | wc -l", "r" );
    fgets( pipeStr, 8, pipe ); pclose(pipe);
    nodeCPUs = atoi(pipeStr);

    // Initialize parallel environment
    MPI_Init( &argc, &argv );
    MPI_Comm_size( MPI_COMM_WORLD, &nprocs );
    MPI_Comm_rank( MPI_COMM_WORLD, &proc );

    // Reset maximum message size to fit within node memory
    if( nprocs > nodeCPUs ){
        nodes = nprocs / nodeCPUs;
        if( smax > nodes ) smax = smax / nodes;
        if( smed > nodes ) smed = smed / nodes;
    }

    // Check for user defined limits
    checkEnvCOL( proc, &NLOOP, &smin, &smed, &smax );

    // Initialize local variables
    dblSize = sizeof(double);
    UsedMem = (double)smax*(double)dblSize*(double)( nprocs + 1 );

    // Allocate and initialize arrays
    srand( SEED );
    A = doubleVector( smax );
    B = doubleVector( smax*nprocs );

    // Open output file and write header
    if( proc == 0 ){
        // Check timer overhead in seconds
        timerTest( &overhead, &threshold_lo, &threshold_hi );
        // Open output files and write headers
        sprintf( file1, "allreduce_time-np_%.4d.dat", nprocs );
        sprintf( file2, "allreduce_bw-np_%.4d.dat",   nprocs );
        fp  = fopen( file1, "a" );
        fp2 = fopen( file2, "a" );
        printHeaders( fp, fp2, testName, UsedMem, overhead, threshold_lo );
    }

    //================================================================
    // Single loop with minimum size to verify that inner loop length  
    // is long enough for the timings to be accurate                     
    //================================================================
    // Warmup with a medium size message
    MPI_Allreduce( A, B, smed, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD );
    // Test is current NLOOP is enough to capture fastest test cases
    MPI_Barrier( MPI_COMM_WORLD );
    tStart = benchTimer();
    for(j = 0; j < NLOOP; j++){
        MPI_Allreduce( A, B, smin, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
    }
    timeMin = benchTimer() - tStart;
    MPI_Reduce( &timeMin, &timeMinGlobal, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD );
    if( proc == 0 ) resetInnerLoop( timeMinGlobal, threshold_lo, &NLOOP );
    MPI_Bcast( &NLOOP, 1, MPI_INT, 0, MPI_COMM_WORLD );

    //================================================================
    // Execute test for each requested size                  
    //================================================================
    localMax = 0.0;
    for( size = smin; size <= smax; size = size*2 ){

        // Warmup with a medium size message
        MPI_Allreduce( A, B, smed, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD );

        // Repeat NREPS to collect statistics
        for(i = 0; i < NREPS; i++){
            MPI_Barrier( MPI_COMM_WORLD );
            tStart = benchTimer();
            for(j = 0; j < NLOOP; j++){
                MPI_Allreduce( A, B, size, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
            }
            tElapsed[i] = benchTimer() - tStart;
        }
        MPI_Reduce( tElapsed, tElapsedGlobal, NREPS, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD );   
        // Only task 0 needs to do the analysis of the collected data
        if( proc == 0 ){
            // sizeBytes is size to write to file
            // msgBytes is actual data exchanged on the wire
            msgBytes  = (double)size*(double)nprocs*(double)dblSize;
            sizeBytes = (double)size*(double)dblSize;
            post_process( fp, fp2, threshold_hi, tElapsedGlobal, tScale, 
                          bwScale, size*dblSize, sizeBytes, msgBytes, &NLOOP, 
                          &localMax, &localSize );
        }
        MPI_Bcast( &NLOOP, 1, MPI_INT, 0, MPI_COMM_WORLD ); 
    }
    //================================================================
    // Print completion message, free memory and exit                  
    //================================================================
    if( proc == 0 ){
        fclose(fp);
        fclose(fp2);
        fprintf( stdout,"\n %s test completed.\n\n", testName );
    }
    free( A );
    free( B );

    MPI_Finalize();
    return 0;
}
Пример #2
0
int main( int argc, char **argv )
{
    FILE    *fp;
    char    testName[32] = "MPI_Latency", file1[64], file2[64];
    int     dblSize, proc, nprocs, partner, tag = 0, NodeProcs;
    unsigned int i, j, size, localSize, NLOOP = NLOOP_MAX;
    unsigned int smin = MIN_P2P_SIZE, smed = MED_P2P_SIZE, smax = MAX_P2P_SIZE;
    double  tScale = USEC;
    double  overhead, threshold_lo, threshold_hi;
    double  tStart, timeMin, timeMinGlobal, msgBytes, localMax, UsedMem, ReqMem, NodeMem;
    double  tAvg, tMin, tMax, stdDev;
    double  tElapsed[NREPS], tElapsedGlobal[NREPS], tMsg[NREPS];
    char    sndBuffer = 'a', rcvBuffer = 'b';
    double  *A, *B;
    MPI_Status status;

    // Initialize parallel environment
    MPI_Init( &argc, &argv );
    MPI_Comm_size( MPI_COMM_WORLD, &nprocs );
    MPI_Comm_rank( MPI_COMM_WORLD, &proc );

    // Test input parameters
    if( nprocs != 2 && proc == 0 )
        fatalError( "P2P latency will only run with 2 tasks" );

    // Check for user defined limits
    checkEnvP2P( proc, &NLOOP, &smin, &smed, &smax );

    // Initialize local variables
    partner = 1 - proc;
    dblSize  = sizeof(double);
    UsedMem = (double)smed*(double)dblSize*2.0;

    // Allocate and initialize arrays
    // TODO: Consider Mersenne Twister to improve startup time
    srand( SEED );
    A  = doubleVector( smed );
    B  = doubleVector( smed );

    // Open output file and write header
    if( proc == 0 ){
        // Check timer overhead in seconds
        timerTest( &overhead, &threshold_lo, &threshold_hi );
        // Open output files and write headers
        sprintf( file1, "latency.dat" );
        fp  = fopen( file1, "a" );
        printLatencyHeader( fp, testName, UsedMem, overhead, threshold_lo );
    }

    //================================================================
    // Single loop with minimum size to verify that inner loop length  
    // is long enough for the timings to be accurate                     
    //================================================================
    // Warmup with a medium size exchange
    if( proc == 0 ){
        MPI_Send( A, smed, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD );
        MPI_Recv( B, smed, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD, &status );
    }else{
        MPI_Recv( B, smed, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD, &status );
        MPI_Send( A, smed, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD );
    }
    // Test if current NLOOP is enough to capture fastest test cases
    MPI_Barrier( MPI_COMM_WORLD );
    tStart = benchTimer();
    if( proc == 0 ){
        for(j = 0; j < NLOOP; j++){
            MPI_Send( &sndBuffer, 1, MPI_CHAR, partner, tag, MPI_COMM_WORLD );
       	    MPI_Recv( &rcvBuffer, 1, MPI_CHAR, partner, tag, MPI_COMM_WORLD, &status );
        }
    }else{
        for(j = 0; j < NLOOP; j++){
       	    MPI_Recv( &rcvBuffer, 1, MPI_CHAR, partner, tag, MPI_COMM_WORLD, &status );
            MPI_Send( &sndBuffer, 1, MPI_CHAR, partner, tag, MPI_COMM_WORLD );
        }
    }
    timeMin = benchTimer() - tStart;
    MPI_Reduce( &timeMin, &timeMinGlobal, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD );
    if( proc == 0 ) resetInnerLoop( timeMinGlobal, threshold_lo, &NLOOP );
    MPI_Bcast( &NLOOP, 1, MPI_INT, 0, MPI_COMM_WORLD );

    //================================================================
    // Execute test
    //================================================================
    // Warmup with a medium size exchange
    if( proc == 0 ){
        MPI_Send( A, smed, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD );
        MPI_Recv( B, smed, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD, &status );
    }else{
        MPI_Recv( B, smed, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD, &status );
        MPI_Send( A, smed, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD );
    }

    // Repeat NREPS to collect statistics
    for(i = 0; i < NREPS; i++){
        MPI_Barrier(MPI_COMM_WORLD);
        tStart = benchTimer();
        if( proc == 0 ){
            for(j = 0; j < NLOOP; j++){
               MPI_Send( &sndBuffer, 1, MPI_CHAR, partner, tag, MPI_COMM_WORLD );
               MPI_Recv( &rcvBuffer, 1, MPI_CHAR, partner, tag, MPI_COMM_WORLD, &status );
            }
        }else{
            for(j = 0; j < NLOOP; j++){
                MPI_Recv( &rcvBuffer, 1, MPI_CHAR, partner, tag, MPI_COMM_WORLD, &status );
                MPI_Send( &sndBuffer, 1, MPI_CHAR, partner, tag, MPI_COMM_WORLD );
                }
        }
        tElapsed[i] = benchTimer() - tStart;
    } 
    MPI_Reduce( tElapsed, tElapsedGlobal, NREPS, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD );
    
    // Only task 0 needs to do the analysis of the collected data
    if( proc == 0 ){
        // Get the time per iteration
        for(i = 0; i < NREPS; i++){
            tMsg[i]  = 0.5*tElapsedGlobal[i] / ( (double)NLOOP );
        }
        // Calculate Average, Minimum and Maximum values
        stats( NREPS, tMsg,  &tAvg,  &tMax,  &tMin,  &stdDev, tScale );
        // Save these results to file
        saveData( fp,  sizeof(char), NLOOP, tAvg,  tMax,  tMin,  stdDev );
	    fprintf( stdout, "MPI latency is %6.1f usec\n\n", tMin );
        
    }
    //================================================================
    // Print completion message, free memory and exit                  
    //================================================================
    if( proc == 0 ) fclose( fp );
    free( A );
    free( B );

    MPI_Finalize();
    return 0;
}
Пример #3
0
int main(int argc, char **argv)
{
    FILE    *fp, *fp2;
    char    testName[32] = "MPI_Get_Fence", file1[64], file2[64];
    int     dblSize, proc, nprocs, npairs, partner;
    unsigned int i, j, k, size, localSize, NLOOP = NLOOP_MAX;
    unsigned int smin = MIN_P2P_SIZE, smed = MED_P2P_SIZE, smax = MAX_P2P_SIZE;
    double  tScale = USEC, bwScale = MB_8;
    double  tStart, timeMin, timeMinGlobal, overhead, threshold_lo, threshold_hi;
    double  msgBytes, sizeBytes, localMax, UsedMem;
    double  tElapsed[NREPS], tElapsedGlobal[NREPS];
    double  *A, *B;
    MPI_Win   win;

    // Initialize parallel environment
    MPI_Init(&argc, &argv);
    MPI_Comm_size( MPI_COMM_WORLD, &nprocs );
    MPI_Comm_rank( MPI_COMM_WORLD, &proc );

    // Test input parameters
    if( nprocs%2 != 0 && proc == 0 )
        fatalError( "P2P test requires an even number of processors" );

    // Check for user defined limits
    checkEnvP2P( proc, &NLOOP, &smin, &smed, &smax );

    // Initialize local variables
    localMax = 0.0;
    npairs   = nprocs/2;
    if( proc < npairs  ) partner = proc + npairs;
    if( proc >= npairs ) partner = proc - npairs;
    UsedMem = (double)smax*(double)sizeof(double)*2.0;

    // Allocate and initialize arrays
    srand( SEED );
    A = doubleVector( smax );
    B = doubleVector( smax );

    // Open output file and write header
    if( proc == 0 ){
        // Check timer overhead in seconds
        timerTest( &overhead, &threshold_lo, &threshold_hi );
        // Open output files and write headers
        sprintf( file1, "getfence_time-np_%.4d.dat", nprocs );
        sprintf( file2, "getfence_bw-np_%.4d.dat",   nprocs );
        fp  = fopen( file1, "a" );
        fp2 = fopen( file2, "a" );
        printHeaders( fp, fp2, testName, UsedMem, overhead, threshold_lo );
    }

    // Get type size
    MPI_Type_size( MPI_DOUBLE, &dblSize );
    // Set up a window for RMA
    MPI_Win_create( A, smax*dblSize, dblSize, MPI_INFO_NULL, MPI_COMM_WORLD, &win );

    //================================================================
    // Single loop with minimum size to verify that inner loop length  
    // is long enough for the timings to be accurate                     
    //================================================================
    // Warmup with a medium size message
    if( proc < npairs ){
        MPI_Win_fence( 0, win );
        MPI_Get( B, smed, MPI_DOUBLE, partner, 0, smed, MPI_DOUBLE, win );
        MPI_Win_fence( 0, win );
    }else{
        MPI_Win_fence( 0, win );
        MPI_Win_fence( 0, win );
    }
    // Test if current NLOOP is enough to capture fastest test cases
    MPI_Barrier( MPI_COMM_WORLD );
    tStart = benchTimer();
    if( proc < npairs ){
        for(j = 0; j < NLOOP; j++){
            MPI_Win_fence( 0, win );
        	MPI_Get( B, smin, MPI_DOUBLE, partner, 0, smin, MPI_DOUBLE, win );
            MPI_Win_fence( 0, win );
        }
    }else{
        for(j = 0; j < NLOOP; j++){
            MPI_Win_fence( 0, win );
            MPI_Win_fence( 0, win );
        }
    }
    timeMin = benchTimer() - tStart;
    MPI_Reduce( &timeMin, &timeMinGlobal, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD );
    if( proc == 0 ) resetInnerLoop( timeMinGlobal, threshold_lo, &NLOOP );
    MPI_Bcast( &NLOOP, 1, MPI_INT, 0, MPI_COMM_WORLD );


    //================================================================
    // Execute test for each requested size                  
    //================================================================
    for( size = smin; size <= smax; size = size*2 ){

        // Warmup with a medium size message
        if( proc < npairs ){
            MPI_Win_fence( 0, win );
            MPI_Get( B, smed, MPI_DOUBLE, partner, 0, smed, MPI_DOUBLE, win );
            MPI_Win_fence( 0, win );
        }else{
            MPI_Win_fence( 0, win );
            MPI_Win_fence( 0, win );
        }

        // Repeat NREPS to collect statistics
        for(i = 0; i < NREPS; i++){
            MPI_Barrier( MPI_COMM_WORLD );
            tStart = benchTimer();
            if( proc < npairs ){
                for(j = 0; j < NLOOP; j++){
                    MPI_Win_fence( 0, win );
        	        MPI_Get( B, size, MPI_DOUBLE, partner, 0, size, MPI_DOUBLE, win );
                    MPI_Win_fence( 0, win );
                }
            }else{
                for(j = 0; j < NLOOP; j++){
                    MPI_Win_fence( 0, win );
                    MPI_Win_fence( 0, win );
                }
            }
        	tElapsed[i] = benchTimer() - tStart;
        }
        MPI_Reduce( tElapsed, tElapsedGlobal, NREPS, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD );
        // Only task 0 needs to do the analysis of the collected data
        if( proc == 0 ){
            // sizeBytes is size to write to file
            // msgBytes is actual data exchanged on the wire
            msgBytes  = (double)size*(double)npairs*(double)dblSize;
            sizeBytes = (double)size*(double)dblSize;
            post_process( fp, fp2, threshold_hi, tElapsedGlobal, tScale, 
                          bwScale, size*dblSize, sizeBytes, msgBytes, &NLOOP, 
                          &localMax, &localSize );
        }
        MPI_Bcast( &NLOOP, 1, MPI_INT, 0, MPI_COMM_WORLD );

    }
    MPI_Win_free( &win );
    MPI_Barrier( MPI_COMM_WORLD );
    free( A );
    free( B );

    //================================================================
    // Print completion message, free memory and exit                  
    //================================================================
    if( proc == 0 ){
        printSummary( fp2, testName, localMax, localSize );
        fclose( fp2 ); 
        fclose( fp );
    }

    MPI_Finalize();
    return 0;
}
std::pair< CombinerVariables::TypesOfChannelGeometry, std::vector< std::vector<double> > > InfinitelyDeepRectangularChannel::getInternalParameters() const
{
	std::vector<double> doubleVector (1,channelWidth);
	std::vector< std::vector<double> > resultVector (1,doubleVector);
	return std::pair<CombinerVariables::TypesOfChannelGeometry, std::vector< std::vector<double> > >(this->typeOfChannelGeometry, resultVector);
}