示例#1
0
int main(int argc, char *argv[])
{
  double minTime        = 99999.0;
  double testTime       = 0.0;
  double maxBidirBw     = 0.0;
  double maxUnidirBw    = 0.0;
  int    maxBidirSize   = 0;
  int    maxUnidirSize  = 0;
  char*  procFile       = NULL;
  int*   procList       = NULL;
  int    procListSize;
  int    rank, wsize, iters, procs;
  int    messStart, messStop, messFactor, messSize;
  int    procsPerNode, totalops, procIdx;
  int    useBarrier, printPairs, useNearestRank, dummy;
  double currBw;
  char   allocPattern;
  MPI_Comm activeComm;

  command = argv[0];

  MPI_Init(&argc, &argv);

  MPI_Comm_size(MPI_COMM_WORLD, &wsize);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);


if(rank == 0) {
    //sleep(5);
    //while (!__sync_bool_compare_and_swap(&r_sync, 59, 0)){ sleep(2); printf("sync value %d \n", r_sync); };
    int k = 0;
    #pragma omp parallel
    #pragma omp atomic 
                k++;
    printf ("Done Sync. Number of Threads counted = %i\n",k);


    int N = 1048576;
    int i = 0;
    int j = 0;
  double start, diff;
for(j=0; j < 50 ; j++) {
    #pragma omp parallel for
    for (i=0; i<N; i++) {
            a[i] = 1.0;
            b[i] = 2.0;
        }


    start = MPI_Wtime();
    #pragma omp parallel for
    for (i = 0; i < N; i++) {
        a[i]= b[i];
    }
        diff = MPI_Wtime() - start ;
        printf("ELAPSED time to copy : %11.6f N: %d \n",diff, N);
}
}else{
//__sync_fetch_and_add(&r_sync, 1);
printf("OK other ranks  \n");
//sleep(100);
}
MPI_Barrier(MPI_COMM_WORLD);

MPI_Finalize();

return 0 ;



  if ( !processArgs(argc, argv, rank, wsize, &iters, &dummy,
                    &messStart, &messStop, &messFactor, 
                    &procFile, &procsPerNode, &allocPattern, 
                    &printPairs, &useBarrier, &useNearestRank) )
  {
    if ( rank == 0 )
      printUse();

    MPI_Finalize();
    exit(-1);
  }

  if ( ! getProcList(procFile, wsize, &procList, &procListSize,
                     procsPerNode, allocPattern) )
  {
    if ( procFile )
      printf("Failed to get process list from file %s.\n", procFile);
    else
      printf("Failed to allocate process list.\n");

    exit(-1);
  }

  if ( rank == 0 )
    printReportHeader();


  //Set up intra-node shared memory structures.
  if(rank == 0) {
        //One rank per node allocates shared send request lists.
        req_array = malloc(wsize*sizeof(ReqInfo));
        /*if(!IS_SM_BUF((void*)req_array)){
            printf("Failed to allocate from SM region :%p \n", req_array);
            exit(-1);
        }*/
  }
  //broadcast address of the pointer variable so that each rank has access to it
  MPI_Bcast(&req_array , 1, MPI_LONG, 0, MPI_COMM_WORLD);
  //printf("Broadcasting of shared mem region complete! rank : %d  region_addr : %lu region_ptr : %lu \n", rank, &req_array, req_array);
  //printf("Access shm region ! rank : %d  region for sync : %lu  \n", rank, req_array[rank].sync);
 /* 
  if(rank == 0){
	char* comBuf  = (char*)malloc(10);
	memset(comBuf, 1, 10);
	req_array[30].buffer = comBuf ;
	comBuf[0] = 'n';
        req_array[30].sync = malloc(sizeof(int));
        req_array[30].sync[0] = 12;
	printf("rank : %d done sending buff ptr : %p  sync ptr : %p \n",rank, comBuf, req_array[30].sync );
        printf("sleeping ! pid() %d \n", getpid());
	sleep(40000);
  }else if(rank == 30){
	while(req_array[30].sync == NULL){
	
	}

	while(req_array[30].sync[0] != 12){
	
	}
	printf("rank : %d  buffer value: %c sync value : %d  buff ptr : %p  sync ptr : %p  \n",
		rank, req_array[30].buffer[0], req_array[30].sync[0], req_array[30].buffer, req_array[30].sync );
        printf("sleeping ! pid() %d \n", getpid());
		sleep(40000);
  } 
  MPI_Barrier(MPI_COMM_WORLD);
  return 0;*/

  printPairs = 1 ;
  for ( procIdx = 0; procIdx < procListSize; procIdx++ )
  {
    procs = procList[procIdx];

    if ( rank == 0 && printPairs )
    {
      printActivePairs(procs, rank, wsize, procsPerNode, 
                       allocPattern, useNearestRank);
    }

    /*  Create Communicator of all active processes  */
    createActiveComm(procs, rank, wsize, procsPerNode, 
                     allocPattern, printPairs, useNearestRank, &activeComm);
//    messStart = 8388608;
//    messStop = 4096 ;
//    messStop = 8388608 ;
    for ( messSize = messStart; messSize <= messStop;
          messSize *= messFactor )
    {
      testTime = runUnicomTest(procs, messSize, iters, rank, 
                               wsize, procsPerNode, allocPattern, 
                               useBarrier, useNearestRank, &activeComm);

      if ( rank == 0 && testTime > 0 )
      {
        totalops   = iters * (procs/2);
        currBw = ((double)totalops*(double)messSize/testTime)/1000000;

        if ( currBw > maxUnidirBw )
        {
          maxUnidirBw   = currBw;
          maxUnidirSize = messSize;
        }

        if ( testTime < minTime )
          minTime = testTime;
      }
    }

    if ( activeComm != MPI_COMM_NULL )
      MPI_Comm_free(&activeComm);

    if ( rank == 0 )
      printf("\n");
  }
  
  for ( procIdx = 0; procIdx < procListSize; procIdx++ )
  {
    procs = procList[procIdx];

    if ( rank == 0 && printPairs )
    {
      printActivePairs(procs, rank, wsize, procsPerNode, 
                       allocPattern, useNearestRank);
    }

    // Create Communicator of all active processes
    createActiveComm(procs, rank, wsize, procsPerNode, 
                     allocPattern, printPairs, useNearestRank, &activeComm);

    for ( messSize = messStart; messSize <= messStop; 
          messSize *= messFactor )
    {
      testTime = runBicomTest(procs, messSize, iters, rank,
                              wsize, procsPerNode, allocPattern,
                              useBarrier, useNearestRank, &activeComm);

      if ( rank == 0 && testTime > 0 )
      {
        totalops   = iters * procs;
        currBw = (((double)totalops*(double)messSize)/testTime)/1000000;

        if ( currBw > maxBidirBw )
        {
          maxBidirBw   = currBw;
          maxBidirSize = messSize;
        }

        if ( testTime < minTime )
          minTime = testTime;
      }
    }

    if ( activeComm != MPI_COMM_NULL )
      MPI_Comm_free(&activeComm);

    if ( rank == 0 )
      printf("\n");
  }


  if ( rank == 0 )
  {
    printf("Max Unidirectional Bandwith :  %13.2f for message size of %7d bytes\n",
           maxUnidirBw, maxUnidirSize);
    printf("Max  Bidirectional Bandwith :  %13.2f for message size of %7d bytes\n",
           maxBidirBw, maxBidirSize);

    printParameters(iters, messStart, messStop, messFactor, 
                    procFile, procsPerNode, allocPattern, useBarrier);
    free(req_array);
  }

  printReportFooter(minTime, rank, wsize, procsPerNode, useNearestRank);
  
  free(procList);

  MPI_Finalize();

  exit(0);
}
示例#2
0
文件: com.c 项目: 8l/insieme
void
runTest ( TESTPARAMS * testParams )
{
  int procIdx, procs, messIdx;
  MPI_Comm activeComm = MPI_COMM_NULL;
  int *messList, messListSize;
  int testCount, iters;
  unsigned int i;
  char buf[256];
  int width;
  double result;
  STATSTRUCT dp, sum;

  prestaRankPrint ( 0,
                    "\n\n%s Test Results \n(tasks, size, ops/sample, samples) : min/mean/max \n",
                    testParams->name );

  if ( argStruct.verbose )
  {
    if ( testParams->id == LATEN )
    {
      if ( argStruct.sumLocalBW == 1 )
      {
        sprintf ( buf, outputCharLBWFormat, "Test", "Processes",
                  "Op Size (bytes)", "Ops", "Latency (us)" );
      }
      else
      {
        sprintf ( buf, outputCharFormat, "Test", "Processes",
                  "Op Size (bytes)", "Ops", "BW (MB)", "Latency (us)" );
      }
    }
    else
    {
      if ( argStruct.sumLocalBW == 1 )
      {
        sprintf ( buf, outputCharLBWFormat, "Test", "Processes",
                  "Op Size (bytes)", "Ops", "BW (MB)" );
      }
      else
      {
        sprintf ( buf, outputCharFormat, "Test", "Processes",
                  "Op Size (bytes)", "Ops", "BW (MB)", "Op time (us)" );
      }
    }
    prestaRankPrint ( 0, "%s", buf );
    width = strlen ( buf );
  }
  else
    width = 80;

  for ( i = 0; i < width; i++ )
    buf[i] = '-';
  buf[i] = '\0';

  prestaRankPrint ( 0, "%s\n", buf );

  messList = testParams->messList;
  messListSize = testParams->messListSize;

  testParams->maxBW = 0.0;
  testParams->maxBWMessSize = 0;

  init_stats ( &sum, 0, 0, 0, 0 );

  for ( procIdx = 0; procIdx < argStruct.procListSize; procIdx++ )
  {
    procs = argStruct.procList[procIdx];
    if ( procs > wsize )
      procs = wsize;

    /*  Create Communicator of all active processes  */
    procs = createActiveComm ( procs, argStruct.procsPerNode,
                               argStruct.allocPattern,
                               argStruct.useNearestRank, &activeComm );

    prestaDebug ( "rank %d returned from createActiveCom\n", rank );
    prestaDebug ( "messListSize is %d \n", messListSize );

    for ( messIdx = 0; messIdx < messListSize; messIdx++ )
    {
      if ( argStruct.iterList != NULL && argStruct.iterList[messIdx] != 0 )
        iters = argStruct.iterList[messIdx];
      else
        iters = argStruct.iters;

      if ( argStruct.testCountList != NULL )
      {
        fprintf ( stderr,
                  "Before init_stats! messIdx is %d, procIdx is %d, testCountList is %p\n",
                  messIdx, procIdx, argStruct.testCountList );
        init_stats ( &dp, procs, messList[messIdx], iters,
                     argStruct.testCountList[procIdx] );
      }
      else
      {
        init_stats ( &dp, procs, messList[messIdx], iters, argStruct.samples );
      }

      for ( testCount = 0; testCount < dp.samples; testCount++ )
      {
        /*  Run test and save current result  */
        testParams->rankResult =
          testParams->testFunc ( dp.msize, iters, &activeComm );

        /*  TODO : Remove this if unnecessary   */
        if ( testParams->rankResult < minTime )
          minTime = testParams->rankResult;

        if ( !generateResults
             ( testParams, procs, dp.msize, iters, &result ) )
          prestaAbort ( "Failed to generate test results." );

        update_stats ( &dp, result );
        update_stats ( &sum, result );
      }

      if ( testParams->id == LATEN )
      {
        prestaRankPrint ( 0,
                          "(%6d, %9d, %6d, %6d):  %6.3f / %6.3f / %6.3f\n",
                          dp.tasks, dp.msize, dp.iters, dp.samples, dp.min,
                          dp.mean, dp.max );
      }
      else
      {
        prestaRankPrint ( 0,
                          "(%6d, %7d, %5d, %5d):  %12.3f / %12.3f / %12.3f\n",
                          dp.tasks, dp.msize, dp.iters, dp.samples, dp.min,
                          dp.mean, dp.max );
      }
    }
  }

  if ( testParams->id == LATEN )
  {
    prestaRankPrint ( 0,
                      "\nSummary  :             min/mean/max = %6.3f / %6.3f / %6.3f\n",
                      sum.min, sum.mean, sum.max );
  }
  else
  {
    prestaRankPrint ( 0,
                      "\nSummary  :         min/mean/max = %12.3f / %12.3f / %12.3f\n",
                      sum.min, sum.mean, sum.max );
  }

  if ( rank == 0 && argStruct.printPairs )
  {
    printActivePairs ( procs, argStruct.procsPerNode,
                       argStruct.allocPattern, argStruct.useNearestRank );
  }

  if ( activeComm != MPI_COMM_NULL )
    MPI_Comm_free ( &activeComm );
}
示例#3
0
main(int argc, char** argv)
{
  int rank, wsize, iters, i, procs, currtarg, dummy;
  double diff = 0.0;
  double start, max, mintime = 9999;
  MPI_Status stat;
  char comBuf;
  MPI_Comm activeComm;
  char*  procFile       = NULL;
  int*   procList       = NULL;
  int    procListSize;
  int    messStart, messStop, messFactor;
  int    procsPerNode, procIdx, useBarrier, printPairs, useNearestRank;
  char   allocPattern;

  command = argv[0];

  MPI_Init(&argc, &argv);

  MPI_Comm_size(MPI_COMM_WORLD, &wsize);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  if ( !processArgs(argc, argv, rank, wsize, &iters, 
                    &dummy, &messStart, &messStop, &messFactor, 
                    &procFile, &procsPerNode, &allocPattern, 
                    &printPairs, &useBarrier, &useNearestRank) )
  {
    if ( rank == 0 )
      printUse();

    MPI_Finalize();
    exit(-1);
  }

  if ( ! getProcList(procFile, wsize, &procList, &procListSize,
                     procsPerNode, allocPattern) )
  {
    if ( procFile )
      printf("Failed to get process list from file %s.\n", procFile);
    else
      printf("Failed to allocate process list.\n");

    exit(-1);
  }

  if ( rank == 0 )
    printReportHeader();

  currtarg = getTargetRank(rank, wsize, procsPerNode, useNearestRank);

  for ( procIdx = 0; procIdx < procListSize; procIdx++ )
  {
    procs = procList[procIdx];

    if ( printPairs )
    {
      printActivePairs(procs, rank, wsize, procsPerNode, 
                       allocPattern, useNearestRank);
    }

    /*  Create Communicator of all active processes  */
    createActiveComm(procs, rank, wsize, procsPerNode, 
                     allocPattern, printPairs, useNearestRank, &activeComm);
    

    if ( isActiveProc(rank, wsize, procsPerNode, procs, 
                      allocPattern, useNearestRank) )
    {
      if ( rank < currtarg )
      {
        /*  Ensure pair communication has been initialized  */
        MPI_Recv(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD, &stat);
        MPI_Send(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD);
      }
      else 
      {
        /*  Ensure pair communication has been initialized  */
        MPI_Send(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD);
        MPI_Recv(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD, &stat);
      }
    
      //generic_barrier(activeComm);
      MPI_Barrier(activeComm);
      //generic_barrier(activeComm);
      MPI_Barrier(activeComm);

      if ( rank < currtarg )
      {
        /*  Time operation loop  */
        start = MPI_Wtime();

        for ( i = 0; i < iters; i++ )
        {
          MPI_Send(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD);
          MPI_Recv(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD, &stat);
        }
      }
      else 
      {
        /*  Time operation loop  */
        start = MPI_Wtime();

        for ( i = 0; i < iters; i++ )
        {
          MPI_Recv(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD, &stat);
          MPI_Send(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD);
        }
      }

      if ( useBarrier )
        MPI_Barrier(activeComm);
        //generic_barrier(activeComm);

      diff = MPI_Wtime() - start;
    }

    if ( activeComm != MPI_COMM_NULL )
      MPI_Comm_free(&activeComm);

    /*  Get maximum sample length  */
    MPI_Reduce(&diff, &max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

    if ( rank == 0 )
    {
      if ( max < mintime )
        mintime = max;

      printf(outputFormat, procs, max/iters/2*1000000);
    }
  }
  
  if ( rank == 0 )
  {
    printParameters(iters, procFile, procsPerNode, 
                    allocPattern, useBarrier);
  }

  printReportFooter(mintime, rank, wsize, procsPerNode, useNearestRank);

  MPI_Finalize();

  exit(0);
}
示例#4
0
文件: com.c 项目: uswick/presta_HMPI
int main(int argc, char *argv[])
{
  double minTime        = 99999.0;
  double testTime       = 0.0;
  double maxBidirBw     = 0.0;
  double maxUnidirBw    = 0.0;
  int    maxBidirSize   = 0;
  int    maxUnidirSize  = 0;
  char*  procFile       = NULL;
  int*   procList       = NULL;
  int    procListSize;
  int    rank, wsize, iters, procs;
  int    messStart, messStop, messFactor, messSize;
  int    procsPerNode, totalops, procIdx;
  int    useBarrier, printPairs, useNearestRank, dummy;
  double currBw;
  char   allocPattern;
  MPI_Comm activeComm;

  command = argv[0];

  MPI_Init(&argc, &argv);

  MPI_Comm_size(MPI_COMM_WORLD, &wsize);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  if ( !processArgs(argc, argv, rank, wsize, &iters, &dummy,
                    &messStart, &messStop, &messFactor, 
                    &procFile, &procsPerNode, &allocPattern, 
                    &printPairs, &useBarrier, &useNearestRank) )
  {
    if ( rank == 0 )
      printUse();

    MPI_Finalize();
    exit(-1);
  }

  if ( ! getProcList(procFile, wsize, &procList, &procListSize,
                     procsPerNode, allocPattern) )
  {
    if ( procFile )
      printf("Failed to get process list from file %s.\n", procFile);
    else
      printf("Failed to allocate process list.\n");

    exit(-1);
  }

  if ( rank == 0 )
    printReportHeader();

  for ( procIdx = 0; procIdx < procListSize; procIdx++ )
  {
    procs = procList[procIdx];

    if ( rank == 0 && printPairs )
    {
      printActivePairs(procs, rank, wsize, procsPerNode, 
                       allocPattern, useNearestRank);
    }

    /*  Create Communicator of all active processes  */
    createActiveComm(procs, rank, wsize, procsPerNode, 
                     allocPattern, printPairs, useNearestRank, &activeComm);

    for ( messSize = messStart; messSize <= messStop; 
          messSize *= messFactor )
    {
      testTime = runUnicomTest(procs, messSize, iters, rank, 
                               wsize, procsPerNode, allocPattern, 
                               useBarrier, useNearestRank, &activeComm);

      if ( rank == 0 && testTime > 0 )
      {
        totalops   = iters * (procs/2);
        currBw = ((double)totalops*(double)messSize/testTime)/1000000;

        if ( currBw > maxUnidirBw )
        {
          maxUnidirBw   = currBw;
          maxUnidirSize = messSize;
        }

        if ( testTime < minTime )
          minTime = testTime;
      }
    }

    if ( activeComm != MPI_COMM_NULL )
      MPI_Comm_free(&activeComm);

    if ( rank == 0 )
      printf("\n");
  }
  
  for ( procIdx = 0; procIdx < procListSize; procIdx++ )
  {
    procs = procList[procIdx];

    if ( rank == 0 && printPairs )
    {
      printActivePairs(procs, rank, wsize, procsPerNode, 
                       allocPattern, useNearestRank);
    }

    /*  Create Communicator of all active processes  */
    createActiveComm(procs, rank, wsize, procsPerNode, 
                     allocPattern, printPairs, useNearestRank, &activeComm);

    for ( messSize = messStart; messSize <= messStop; 
          messSize *= messFactor )
    {
      testTime = runBicomTest(procs, messSize, iters, rank, 
                              wsize, procsPerNode, allocPattern, 
                              useBarrier, useNearestRank, &activeComm);

      if ( rank == 0 && testTime > 0 )
      {
        totalops   = iters * procs;
        currBw = (((double)totalops*(double)messSize)/testTime)/1000000;

        if ( currBw > maxBidirBw )
        {
          maxBidirBw   = currBw;
          maxBidirSize = messSize;
        }

        if ( testTime < minTime )
          minTime = testTime;
      }
    }

    if ( activeComm != MPI_COMM_NULL )
      MPI_Comm_free(&activeComm);

    if ( rank == 0 )
      printf("\n");
  }

  if ( rank == 0 )
  {
    printf("Max Unidirectional Bandwith :  %13.2f for message size of %7d bytes\n",
           maxUnidirBw, maxUnidirSize);
    printf("Max  Bidirectional Bandwith :  %13.2f for message size of %7d bytes\n",
           maxBidirBw, maxBidirSize);

    printParameters(iters, messStart, messStop, messFactor, 
                    procFile, procsPerNode, allocPattern, useBarrier);
  }

  printReportFooter(minTime, rank, wsize, procsPerNode, useNearestRank);
  
  free(procList);

  MPI_Finalize();

  exit(0);
}