int main(int argc, char *argv[]) { double minTime = 99999.0; double testTime = 0.0; double maxBidirBw = 0.0; double maxUnidirBw = 0.0; int maxBidirSize = 0; int maxUnidirSize = 0; char* procFile = NULL; int* procList = NULL; int procListSize; int rank, wsize, iters, procs; int messStart, messStop, messFactor, messSize; int procsPerNode, totalops, procIdx; int useBarrier, printPairs, useNearestRank, dummy; double currBw; char allocPattern; MPI_Comm activeComm; command = argv[0]; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &wsize); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if(rank == 0) { //sleep(5); //while (!__sync_bool_compare_and_swap(&r_sync, 59, 0)){ sleep(2); printf("sync value %d \n", r_sync); }; int k = 0; #pragma omp parallel #pragma omp atomic k++; printf ("Done Sync. Number of Threads counted = %i\n",k); int N = 1048576; int i = 0; int j = 0; double start, diff; for(j=0; j < 50 ; j++) { #pragma omp parallel for for (i=0; i<N; i++) { a[i] = 1.0; b[i] = 2.0; } start = MPI_Wtime(); #pragma omp parallel for for (i = 0; i < N; i++) { a[i]= b[i]; } diff = MPI_Wtime() - start ; printf("ELAPSED time to copy : %11.6f N: %d \n",diff, N); } }else{ //__sync_fetch_and_add(&r_sync, 1); printf("OK other ranks \n"); //sleep(100); } MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); return 0 ; if ( !processArgs(argc, argv, rank, wsize, &iters, &dummy, &messStart, &messStop, &messFactor, &procFile, &procsPerNode, &allocPattern, &printPairs, &useBarrier, &useNearestRank) ) { if ( rank == 0 ) printUse(); MPI_Finalize(); exit(-1); } if ( ! getProcList(procFile, wsize, &procList, &procListSize, procsPerNode, allocPattern) ) { if ( procFile ) printf("Failed to get process list from file %s.\n", procFile); else printf("Failed to allocate process list.\n"); exit(-1); } if ( rank == 0 ) printReportHeader(); //Set up intra-node shared memory structures. if(rank == 0) { //One rank per node allocates shared send request lists. req_array = malloc(wsize*sizeof(ReqInfo)); /*if(!IS_SM_BUF((void*)req_array)){ printf("Failed to allocate from SM region :%p \n", req_array); exit(-1); }*/ } //broadcast address of the pointer variable so that each rank has access to it MPI_Bcast(&req_array , 1, MPI_LONG, 0, MPI_COMM_WORLD); //printf("Broadcasting of shared mem region complete! rank : %d region_addr : %lu region_ptr : %lu \n", rank, &req_array, req_array); //printf("Access shm region ! rank : %d region for sync : %lu \n", rank, req_array[rank].sync); /* if(rank == 0){ char* comBuf = (char*)malloc(10); memset(comBuf, 1, 10); req_array[30].buffer = comBuf ; comBuf[0] = 'n'; req_array[30].sync = malloc(sizeof(int)); req_array[30].sync[0] = 12; printf("rank : %d done sending buff ptr : %p sync ptr : %p \n",rank, comBuf, req_array[30].sync ); printf("sleeping ! pid() %d \n", getpid()); sleep(40000); }else if(rank == 30){ while(req_array[30].sync == NULL){ } while(req_array[30].sync[0] != 12){ } printf("rank : %d buffer value: %c sync value : %d buff ptr : %p sync ptr : %p \n", rank, req_array[30].buffer[0], req_array[30].sync[0], req_array[30].buffer, req_array[30].sync ); printf("sleeping ! pid() %d \n", getpid()); sleep(40000); } MPI_Barrier(MPI_COMM_WORLD); return 0;*/ printPairs = 1 ; for ( procIdx = 0; procIdx < procListSize; procIdx++ ) { procs = procList[procIdx]; if ( rank == 0 && printPairs ) { printActivePairs(procs, rank, wsize, procsPerNode, allocPattern, useNearestRank); } /* Create Communicator of all active processes */ createActiveComm(procs, rank, wsize, procsPerNode, allocPattern, printPairs, useNearestRank, &activeComm); // messStart = 8388608; // messStop = 4096 ; // messStop = 8388608 ; for ( messSize = messStart; messSize <= messStop; messSize *= messFactor ) { testTime = runUnicomTest(procs, messSize, iters, rank, wsize, procsPerNode, allocPattern, useBarrier, useNearestRank, &activeComm); if ( rank == 0 && testTime > 0 ) { totalops = iters * (procs/2); currBw = ((double)totalops*(double)messSize/testTime)/1000000; if ( currBw > maxUnidirBw ) { maxUnidirBw = currBw; maxUnidirSize = messSize; } if ( testTime < minTime ) minTime = testTime; } } if ( activeComm != MPI_COMM_NULL ) MPI_Comm_free(&activeComm); if ( rank == 0 ) printf("\n"); } for ( procIdx = 0; procIdx < procListSize; procIdx++ ) { procs = procList[procIdx]; if ( rank == 0 && printPairs ) { printActivePairs(procs, rank, wsize, procsPerNode, allocPattern, useNearestRank); } // Create Communicator of all active processes createActiveComm(procs, rank, wsize, procsPerNode, allocPattern, printPairs, useNearestRank, &activeComm); for ( messSize = messStart; messSize <= messStop; messSize *= messFactor ) { testTime = runBicomTest(procs, messSize, iters, rank, wsize, procsPerNode, allocPattern, useBarrier, useNearestRank, &activeComm); if ( rank == 0 && testTime > 0 ) { totalops = iters * procs; currBw = (((double)totalops*(double)messSize)/testTime)/1000000; if ( currBw > maxBidirBw ) { maxBidirBw = currBw; maxBidirSize = messSize; } if ( testTime < minTime ) minTime = testTime; } } if ( activeComm != MPI_COMM_NULL ) MPI_Comm_free(&activeComm); if ( rank == 0 ) printf("\n"); } if ( rank == 0 ) { printf("Max Unidirectional Bandwith : %13.2f for message size of %7d bytes\n", maxUnidirBw, maxUnidirSize); printf("Max Bidirectional Bandwith : %13.2f for message size of %7d bytes\n", maxBidirBw, maxBidirSize); printParameters(iters, messStart, messStop, messFactor, procFile, procsPerNode, allocPattern, useBarrier); free(req_array); } printReportFooter(minTime, rank, wsize, procsPerNode, useNearestRank); free(procList); MPI_Finalize(); exit(0); }
void runTest ( TESTPARAMS * testParams ) { int procIdx, procs, messIdx; MPI_Comm activeComm = MPI_COMM_NULL; int *messList, messListSize; int testCount, iters; unsigned int i; char buf[256]; int width; double result; STATSTRUCT dp, sum; prestaRankPrint ( 0, "\n\n%s Test Results \n(tasks, size, ops/sample, samples) : min/mean/max \n", testParams->name ); if ( argStruct.verbose ) { if ( testParams->id == LATEN ) { if ( argStruct.sumLocalBW == 1 ) { sprintf ( buf, outputCharLBWFormat, "Test", "Processes", "Op Size (bytes)", "Ops", "Latency (us)" ); } else { sprintf ( buf, outputCharFormat, "Test", "Processes", "Op Size (bytes)", "Ops", "BW (MB)", "Latency (us)" ); } } else { if ( argStruct.sumLocalBW == 1 ) { sprintf ( buf, outputCharLBWFormat, "Test", "Processes", "Op Size (bytes)", "Ops", "BW (MB)" ); } else { sprintf ( buf, outputCharFormat, "Test", "Processes", "Op Size (bytes)", "Ops", "BW (MB)", "Op time (us)" ); } } prestaRankPrint ( 0, "%s", buf ); width = strlen ( buf ); } else width = 80; for ( i = 0; i < width; i++ ) buf[i] = '-'; buf[i] = '\0'; prestaRankPrint ( 0, "%s\n", buf ); messList = testParams->messList; messListSize = testParams->messListSize; testParams->maxBW = 0.0; testParams->maxBWMessSize = 0; init_stats ( &sum, 0, 0, 0, 0 ); for ( procIdx = 0; procIdx < argStruct.procListSize; procIdx++ ) { procs = argStruct.procList[procIdx]; if ( procs > wsize ) procs = wsize; /* Create Communicator of all active processes */ procs = createActiveComm ( procs, argStruct.procsPerNode, argStruct.allocPattern, argStruct.useNearestRank, &activeComm ); prestaDebug ( "rank %d returned from createActiveCom\n", rank ); prestaDebug ( "messListSize is %d \n", messListSize ); for ( messIdx = 0; messIdx < messListSize; messIdx++ ) { if ( argStruct.iterList != NULL && argStruct.iterList[messIdx] != 0 ) iters = argStruct.iterList[messIdx]; else iters = argStruct.iters; if ( argStruct.testCountList != NULL ) { fprintf ( stderr, "Before init_stats! messIdx is %d, procIdx is %d, testCountList is %p\n", messIdx, procIdx, argStruct.testCountList ); init_stats ( &dp, procs, messList[messIdx], iters, argStruct.testCountList[procIdx] ); } else { init_stats ( &dp, procs, messList[messIdx], iters, argStruct.samples ); } for ( testCount = 0; testCount < dp.samples; testCount++ ) { /* Run test and save current result */ testParams->rankResult = testParams->testFunc ( dp.msize, iters, &activeComm ); /* TODO : Remove this if unnecessary */ if ( testParams->rankResult < minTime ) minTime = testParams->rankResult; if ( !generateResults ( testParams, procs, dp.msize, iters, &result ) ) prestaAbort ( "Failed to generate test results." ); update_stats ( &dp, result ); update_stats ( &sum, result ); } if ( testParams->id == LATEN ) { prestaRankPrint ( 0, "(%6d, %9d, %6d, %6d): %6.3f / %6.3f / %6.3f\n", dp.tasks, dp.msize, dp.iters, dp.samples, dp.min, dp.mean, dp.max ); } else { prestaRankPrint ( 0, "(%6d, %7d, %5d, %5d): %12.3f / %12.3f / %12.3f\n", dp.tasks, dp.msize, dp.iters, dp.samples, dp.min, dp.mean, dp.max ); } } } if ( testParams->id == LATEN ) { prestaRankPrint ( 0, "\nSummary : min/mean/max = %6.3f / %6.3f / %6.3f\n", sum.min, sum.mean, sum.max ); } else { prestaRankPrint ( 0, "\nSummary : min/mean/max = %12.3f / %12.3f / %12.3f\n", sum.min, sum.mean, sum.max ); } if ( rank == 0 && argStruct.printPairs ) { printActivePairs ( procs, argStruct.procsPerNode, argStruct.allocPattern, argStruct.useNearestRank ); } if ( activeComm != MPI_COMM_NULL ) MPI_Comm_free ( &activeComm ); }
main(int argc, char** argv) { int rank, wsize, iters, i, procs, currtarg, dummy; double diff = 0.0; double start, max, mintime = 9999; MPI_Status stat; char comBuf; MPI_Comm activeComm; char* procFile = NULL; int* procList = NULL; int procListSize; int messStart, messStop, messFactor; int procsPerNode, procIdx, useBarrier, printPairs, useNearestRank; char allocPattern; command = argv[0]; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &wsize); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if ( !processArgs(argc, argv, rank, wsize, &iters, &dummy, &messStart, &messStop, &messFactor, &procFile, &procsPerNode, &allocPattern, &printPairs, &useBarrier, &useNearestRank) ) { if ( rank == 0 ) printUse(); MPI_Finalize(); exit(-1); } if ( ! getProcList(procFile, wsize, &procList, &procListSize, procsPerNode, allocPattern) ) { if ( procFile ) printf("Failed to get process list from file %s.\n", procFile); else printf("Failed to allocate process list.\n"); exit(-1); } if ( rank == 0 ) printReportHeader(); currtarg = getTargetRank(rank, wsize, procsPerNode, useNearestRank); for ( procIdx = 0; procIdx < procListSize; procIdx++ ) { procs = procList[procIdx]; if ( printPairs ) { printActivePairs(procs, rank, wsize, procsPerNode, allocPattern, useNearestRank); } /* Create Communicator of all active processes */ createActiveComm(procs, rank, wsize, procsPerNode, allocPattern, printPairs, useNearestRank, &activeComm); if ( isActiveProc(rank, wsize, procsPerNode, procs, allocPattern, useNearestRank) ) { if ( rank < currtarg ) { /* Ensure pair communication has been initialized */ MPI_Recv(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD, &stat); MPI_Send(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD); } else { /* Ensure pair communication has been initialized */ MPI_Send(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD); MPI_Recv(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD, &stat); } //generic_barrier(activeComm); MPI_Barrier(activeComm); //generic_barrier(activeComm); MPI_Barrier(activeComm); if ( rank < currtarg ) { /* Time operation loop */ start = MPI_Wtime(); for ( i = 0; i < iters; i++ ) { MPI_Send(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD); MPI_Recv(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD, &stat); } } else { /* Time operation loop */ start = MPI_Wtime(); for ( i = 0; i < iters; i++ ) { MPI_Recv(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD, &stat); MPI_Send(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD); } } if ( useBarrier ) MPI_Barrier(activeComm); //generic_barrier(activeComm); diff = MPI_Wtime() - start; } if ( activeComm != MPI_COMM_NULL ) MPI_Comm_free(&activeComm); /* Get maximum sample length */ MPI_Reduce(&diff, &max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); if ( rank == 0 ) { if ( max < mintime ) mintime = max; printf(outputFormat, procs, max/iters/2*1000000); } } if ( rank == 0 ) { printParameters(iters, procFile, procsPerNode, allocPattern, useBarrier); } printReportFooter(mintime, rank, wsize, procsPerNode, useNearestRank); MPI_Finalize(); exit(0); }
int main(int argc, char *argv[]) { double minTime = 99999.0; double testTime = 0.0; double maxBidirBw = 0.0; double maxUnidirBw = 0.0; int maxBidirSize = 0; int maxUnidirSize = 0; char* procFile = NULL; int* procList = NULL; int procListSize; int rank, wsize, iters, procs; int messStart, messStop, messFactor, messSize; int procsPerNode, totalops, procIdx; int useBarrier, printPairs, useNearestRank, dummy; double currBw; char allocPattern; MPI_Comm activeComm; command = argv[0]; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &wsize); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if ( !processArgs(argc, argv, rank, wsize, &iters, &dummy, &messStart, &messStop, &messFactor, &procFile, &procsPerNode, &allocPattern, &printPairs, &useBarrier, &useNearestRank) ) { if ( rank == 0 ) printUse(); MPI_Finalize(); exit(-1); } if ( ! getProcList(procFile, wsize, &procList, &procListSize, procsPerNode, allocPattern) ) { if ( procFile ) printf("Failed to get process list from file %s.\n", procFile); else printf("Failed to allocate process list.\n"); exit(-1); } if ( rank == 0 ) printReportHeader(); for ( procIdx = 0; procIdx < procListSize; procIdx++ ) { procs = procList[procIdx]; if ( rank == 0 && printPairs ) { printActivePairs(procs, rank, wsize, procsPerNode, allocPattern, useNearestRank); } /* Create Communicator of all active processes */ createActiveComm(procs, rank, wsize, procsPerNode, allocPattern, printPairs, useNearestRank, &activeComm); for ( messSize = messStart; messSize <= messStop; messSize *= messFactor ) { testTime = runUnicomTest(procs, messSize, iters, rank, wsize, procsPerNode, allocPattern, useBarrier, useNearestRank, &activeComm); if ( rank == 0 && testTime > 0 ) { totalops = iters * (procs/2); currBw = ((double)totalops*(double)messSize/testTime)/1000000; if ( currBw > maxUnidirBw ) { maxUnidirBw = currBw; maxUnidirSize = messSize; } if ( testTime < minTime ) minTime = testTime; } } if ( activeComm != MPI_COMM_NULL ) MPI_Comm_free(&activeComm); if ( rank == 0 ) printf("\n"); } for ( procIdx = 0; procIdx < procListSize; procIdx++ ) { procs = procList[procIdx]; if ( rank == 0 && printPairs ) { printActivePairs(procs, rank, wsize, procsPerNode, allocPattern, useNearestRank); } /* Create Communicator of all active processes */ createActiveComm(procs, rank, wsize, procsPerNode, allocPattern, printPairs, useNearestRank, &activeComm); for ( messSize = messStart; messSize <= messStop; messSize *= messFactor ) { testTime = runBicomTest(procs, messSize, iters, rank, wsize, procsPerNode, allocPattern, useBarrier, useNearestRank, &activeComm); if ( rank == 0 && testTime > 0 ) { totalops = iters * procs; currBw = (((double)totalops*(double)messSize)/testTime)/1000000; if ( currBw > maxBidirBw ) { maxBidirBw = currBw; maxBidirSize = messSize; } if ( testTime < minTime ) minTime = testTime; } } if ( activeComm != MPI_COMM_NULL ) MPI_Comm_free(&activeComm); if ( rank == 0 ) printf("\n"); } if ( rank == 0 ) { printf("Max Unidirectional Bandwith : %13.2f for message size of %7d bytes\n", maxUnidirBw, maxUnidirSize); printf("Max Bidirectional Bandwith : %13.2f for message size of %7d bytes\n", maxBidirBw, maxBidirSize); printParameters(iters, messStart, messStop, messFactor, procFile, procsPerNode, allocPattern, useBarrier); } printReportFooter(minTime, rank, wsize, procsPerNode, useNearestRank); free(procList); MPI_Finalize(); exit(0); }