Beispiel #1
0
double runBicomTest(int procs, int bufsize, int iters, int rank, 
                    int wsize, int procsPerNode, char allocPattern, 
                    int useBarrier, int useNearestRank, MPI_Comm *activeComm)
{
  int i, currtarg;
  double start, diff, max;
  double totalops;
  MPI_Status stat;
  char *comBuf;
  char *comBuf2;
  uintptr_t sbuf, rbuf , sbuf2, rbuf2 ;

  currtarg = getTargetRank(rank, wsize, procsPerNode, useNearestRank);
  diff     = 0;

//   printf("START BI rank [ %d ] buffsize %d \n", rank, bufsize);
if ( isActiveProc(rank, wsize, procsPerNode, procs,
                     allocPattern, useNearestRank) )
   {
     if(0){

     }else{
//        printf("START BI COMM INIT rank [ %d ] buffsize %d \n", rank, bufsize);
         comBuf  = (char*)memalign( LINESIZE, bufsize);
         comBuf2  = (char*)memalign( LINESIZE, bufsize);
         //memset(comBuf, 0, bufsize);

         //allocate memory for sync flags both this rank and the target
         //we will keep senders sync flag --> to indicate done copying
         //we will keep recievers sync flag --> to indicate completion of transfer to start again

         req_array[currtarg].sync = malloc(sizeof(int));
         req_array[currtarg].sync[0] = 0;

         //we need offset for request going for target for synergistic transfer
         //Senders offset doesn't matter
         req_array[currtarg].offset = malloc(sizeof(int));
 		 req_array[currtarg].offset[0] = 0;

         //initialize
         //req_array[rank].offset[0] = 0;
         if ( rank < currtarg ){
 	      //setup sender
               req_array[currtarg].buffer = comBuf;
               req_array[currtarg].buffer2 = comBuf2;
 	    }else{
 	     //setup reciever
               req_array[currtarg].buffer = comBuf;
               req_array[currtarg].buffer2 = comBuf2;
 	    }
     }

     MPI_Barrier(*activeComm);
     MPI_Barrier(*activeComm);


//     printf("START BI COMM Active Comm rank [ %d ] buffsize %d \n", rank, bufsize);
     if ( rank < currtarg )
     {
       // Sender - Time operation loop
       start = MPI_Wtime();
             //wait untill receiver completes
           if(0){

 	      }else{
                    volatile int* offsetptr = req_array[currtarg].offset;
                    int offset = 0;
                    sbuf = (uintptr_t) comBuf ;
                    sbuf2 = (uintptr_t) req_array[rank].buffer2 ;
                    rbuf = (uintptr_t) req_array[rank].buffer ;
                    rbuf2 = (uintptr_t) comBuf2 ;

       		        for ( i = 0; i < iters; i++ )  {
//       		             int in = 0 ;
//       		             if(offsetptr[i] != 0)
//                               printf("[WARN] SENDER CHECK OFFSET NOT ZERO !! rank [ %d ] iteration [ %d ]  in [ %d ]  buffsize [ %d ] offset_ptr [ %d ]\n",rank, i ,in, bufsize , offsetptr[i]);
                        //length to copy is min of len - offset and BLOCK_SIZE
                         while(1) {
                               offset = __sync_fetch_and_add((volatile int *) offsetptr, BLOCKSIZE);
//                               printf("SENDER CHECK LOOP rank [ %d ] iteration [ %d ]  in [ %d ]  buffsize [ %d ] offset [ %d ]\n",rank, i ,in, bufsize , offset);
                               if ((int)offset >= bufsize) break ;
//                               printf("SENDER IN LOOP rank [ %d ] iteration [ %d ]  in [ %d ]  buffsize [ %d ] offset [ %d ]\n",rank, i ,in, bufsize , offset);

                               int left =  bufsize - (int) offset;
                               memcpy((void*)(rbuf + (int)offset), (void*)(sbuf  + (int)offset),
                                      (left < BLOCKSIZE ? left : BLOCKSIZE));
                               memcpy((void*)(rbuf2 + (int)offset), (void*)(sbuf2  + (int)offset),
                                                                     (left < BLOCKSIZE ? left : BLOCKSIZE));
//                               __sync_fetch_and_add(&count, 1);
//                               in = 1 ;
                          }
//                         printf("Sender rank [ %d ] iteration [ %d ]  in [ %d ]  buffsize [ %d ] offset [ %d ]\n",rank, i ,in, bufsize , offset);
                           req_array[currtarg].sync[0] = 2 ;
//                         printf("SYNC Sender rank [ %d ] iteration [ %d ]  S1 [ %d ]  S2 [ %d ] offset [ %d ]\n",rank, i ,req_array[currtarg].sync[0], req_array[rank].sync[0] , offset);
                           while(!__sync_bool_compare_and_swap(req_array[rank].sync, 1 ,0));
//                         printf("SYNC-EXIT Sender rank [ %d ] iteration [ %d ]  S1 [ %d ]  S2 [ %d ] offset [ %d ]\n",rank, i ,req_array[currtarg].sync[0], req_array[rank].sync[0] , offset);
 			         }
             }
     }
     else
     {
       //Reciever code
       start = MPI_Wtime();
             if(0){

             }else{
               volatile int* offsetptr = req_array[rank].offset;
               int offset = 0;
               //reciever now ready to setup the transfer for synergistic protocol
               rbuf = (uintptr_t) comBuf ;
               rbuf2 = (uintptr_t) req_array[rank].buffer2 ;
               sbuf = (uintptr_t) req_array[rank].buffer ;
               sbuf2 = (uintptr_t) comBuf2 ;

       	       for ( i = 0; i < iters; i++ )
       	       {
//       	            int in = 0 ;
//       	            if(offsetptr[i] != 0)
//                       printf("[WARN] RECIEVER CHECK OFFSET NOT ZERO !! rank [ %d ] iteration [ %d ]  in [ %d ]  buffsize [ %d ] offset_ptr [ %d ]\n",rank, i ,in, bufsize , offsetptr[i]);
                    while(1) {
                            offset = __sync_fetch_and_add((volatile int *) offsetptr, BLOCKSIZE);
//                           printf("RECIEVER CHECK LOOP rank [ %d ] iteration [ %d ]  in [ %d ]  buffsize [ %d ] offset [ %d ]\n",rank, i ,in, bufsize , offset);
                            if ((int)offset >= bufsize) break ;
//                          printf("RECIEVER IN LOOP rank [ %d ] iteration [ %d ]  in [ %d ]  buffsize [ %d ] offset [ %d ]\n",rank, i ,in, bufsize , offset);
                            int left = bufsize - (int)offset;
//                          printf("RECIEVER IN LOOP 2 rank [ %d ] iteration [ %d ]  left [ %d ]  disp [ %ld ] offset [ %d ]\n",rank, i ,left, disp, offset);
                            memcpy((void*)(rbuf + (int)offset), (void*)(sbuf  + (int)offset),
                                   (left <  BLOCKSIZE ? left : BLOCKSIZE));
                            memcpy((void*)(rbuf2 + (int)offset), (void*)(sbuf2  + (int)offset),
                                                               (left <  BLOCKSIZE ? left : BLOCKSIZE));
//                          __sync_fetch_and_add(&count, 1);
//                          in = 1 ;
                         }
//                  printf("SYNC Reciever rank [ %d ] iteration [ %d ]  S1 [ %d ]  S2 [ %d ] offset [ %d ]\n",rank, i , req_array[rank].sync[0],req_array[currtarg].sync[0] , offset);
                    while(!__sync_bool_compare_and_swap(req_array[rank].sync, 2 ,0));
                    *offsetptr = 0 ;
                    req_array[currtarg].sync[0] = 1 ;
//                   printf("SYNC-EXIT Reciever rank [ %d ] iteration [ %d ]  S1 [ %d ]  S2 [ %d ] offset [ %d ]\n",rank, i , req_array[rank].sync[0], req_array[currtarg].sync[0], offset);
 		        }
             }


     }

     //if ( useBarrier )
       //MPI_Barrier(*activeComm);
       //generic_barrier(*activeComm);

     diff = MPI_Wtime() - start;

     MPI_Barrier(*activeComm);
     MPI_Barrier(*activeComm);

//     printf("START FREE 1 rank [%d] bufsize [%d] usebarrier [%d] \n", rank, bufsize, useBarrier);
     free(req_array[currtarg].sync);
     free(req_array[currtarg].offset);
     free(comBuf);
//     printf("FINISHED FREE rank [%d] bufsize [%d]\n", rank, bufsize);

   }
  MPI_Barrier(MPI_COMM_WORLD);

  /*  Get maximum sample length  */
  MPI_Reduce(&diff, &max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

  if ( rank == 0 )
  {
    totalops   = (double)(iters * procs);

    /*
      Bandwidth is calculated as :
      
        (# of processes * operations per time sample * message size)
        ------------------------------------------------------------
                  maximum sample length of all processes
    */

    if ( max > 0 )
    {
      printf(outputFormat, "Bidirectional", procs, bufsize, 
             ((double)totalops*(double)bufsize/max)/1000000,
             max/iters*1000000);
    }
    else
    {
      printf("Invalid measurement.  Increase number of operation per measurement.\n");
    }
  }

  return max;
}
Beispiel #2
0
main(int argc, char** argv)
{
  int rank, wsize, iters, i, procs, currtarg, dummy;
  double diff = 0.0;
  double start, max, mintime = 9999;
  MPI_Status stat;
  char comBuf;
  MPI_Comm activeComm;
  char*  procFile       = NULL;
  int*   procList       = NULL;
  int    procListSize;
  int    messStart, messStop, messFactor;
  int    procsPerNode, procIdx, useBarrier, printPairs, useNearestRank;
  char   allocPattern;

  command = argv[0];

  MPI_Init(&argc, &argv);

  MPI_Comm_size(MPI_COMM_WORLD, &wsize);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  if ( !processArgs(argc, argv, rank, wsize, &iters, 
                    &dummy, &messStart, &messStop, &messFactor, 
                    &procFile, &procsPerNode, &allocPattern, 
                    &printPairs, &useBarrier, &useNearestRank) )
  {
    if ( rank == 0 )
      printUse();

    MPI_Finalize();
    exit(-1);
  }

  if ( ! getProcList(procFile, wsize, &procList, &procListSize,
                     procsPerNode, allocPattern) )
  {
    if ( procFile )
      printf("Failed to get process list from file %s.\n", procFile);
    else
      printf("Failed to allocate process list.\n");

    exit(-1);
  }

  if ( rank == 0 )
    printReportHeader();

  currtarg = getTargetRank(rank, wsize, procsPerNode, useNearestRank);

  for ( procIdx = 0; procIdx < procListSize; procIdx++ )
  {
    procs = procList[procIdx];

    if ( printPairs )
    {
      printActivePairs(procs, rank, wsize, procsPerNode, 
                       allocPattern, useNearestRank);
    }

    /*  Create Communicator of all active processes  */
    createActiveComm(procs, rank, wsize, procsPerNode, 
                     allocPattern, printPairs, useNearestRank, &activeComm);
    

    if ( isActiveProc(rank, wsize, procsPerNode, procs, 
                      allocPattern, useNearestRank) )
    {
      if ( rank < currtarg )
      {
        /*  Ensure pair communication has been initialized  */
        MPI_Recv(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD, &stat);
        MPI_Send(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD);
      }
      else 
      {
        /*  Ensure pair communication has been initialized  */
        MPI_Send(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD);
        MPI_Recv(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD, &stat);
      }
    
      //generic_barrier(activeComm);
      MPI_Barrier(activeComm);
      //generic_barrier(activeComm);
      MPI_Barrier(activeComm);

      if ( rank < currtarg )
      {
        /*  Time operation loop  */
        start = MPI_Wtime();

        for ( i = 0; i < iters; i++ )
        {
          MPI_Send(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD);
          MPI_Recv(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD, &stat);
        }
      }
      else 
      {
        /*  Time operation loop  */
        start = MPI_Wtime();

        for ( i = 0; i < iters; i++ )
        {
          MPI_Recv(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD, &stat);
          MPI_Send(&comBuf, 0, MPI_INT, currtarg, 0, MPI_COMM_WORLD);
        }
      }

      if ( useBarrier )
        MPI_Barrier(activeComm);
        //generic_barrier(activeComm);

      diff = MPI_Wtime() - start;
    }

    if ( activeComm != MPI_COMM_NULL )
      MPI_Comm_free(&activeComm);

    /*  Get maximum sample length  */
    MPI_Reduce(&diff, &max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

    if ( rank == 0 )
    {
      if ( max < mintime )
        mintime = max;

      printf(outputFormat, procs, max/iters/2*1000000);
    }
  }
  
  if ( rank == 0 )
  {
    printParameters(iters, procFile, procsPerNode, 
                    allocPattern, useBarrier);
  }

  printReportFooter(mintime, rank, wsize, procsPerNode, useNearestRank);

  MPI_Finalize();

  exit(0);
}
Beispiel #3
0
double runUnicomTest(int procs, int bufsize, int iters, int rank, 
                     int wsize, int procsPerNode, char allocPattern, 
                     int useBarrier, int useNearestRank, MPI_Comm *activeComm)
{
  int i, currtarg;
  double diff;
  double start, maxtime;
  MPI_Status stat;
  char *comBuf;
  double totalops;
  uintptr_t sbuf, rbuf ; 
  currtarg = getTargetRank(rank, wsize, procsPerNode, useNearestRank);

  diff     = 0;
  maxtime  = 0;

//   printf("START rank [ %d ]\n" , rank);
   int count = 0 ;
//   if(bufsize != 262144) return 1.0 ;

   if ( isActiveProc(rank, wsize, procsPerNode, procs,
                     allocPattern, useNearestRank) )
   {
     if(0){
         comBuf = memalign( LINESIZE, bufsize + sizeof(int));

         //printf("cpoint1 rank: %d wsize : %d  \n", rank, wsize);
//         memset(comBuf, 0, bufsize);
         req_array[currtarg].sync = comBuf + bufsize ;

         //printf("cpoint2 rank: %d sync addr : %p \n", rank, req_array[currtarg].sync);
         req_array[currtarg].sync[0] = 0;
         //printf("cpoint3 rank: %d \n", rank);
     }else{
         comBuf  = (char*)memalign( LINESIZE, bufsize);
         //memset(comBuf, 0, bufsize);

         //allocate memory for sync flags both this rank and the target
         //we will keep senders sync flag --> to indicate done copying
         //we will keep recievers sync flag --> to indicate completion of transfer to start again

         req_array[currtarg].sync = malloc(sizeof(int));
         req_array[currtarg].sync[0] = 0;

         //we need offset for request going for target for synergistic transfer
         //Senders offset doesn't matter
         req_array[currtarg].offset = malloc(sizeof(int));
 		 req_array[currtarg].offset[0] = 0;

         //initialize
         //req_array[rank].offset[0] = 0;
         if ( rank < currtarg ){
 	      //setup sender
               req_array[currtarg].buffer = comBuf;
 	    }else{
 	     //setup reciever
               req_array[currtarg].buffer = comBuf;
 	    }
     }

     MPI_Barrier(*activeComm);
     MPI_Barrier(*activeComm);

    #pragma omp parallel
       {
    #pragma omp master
           {
               int k = omp_get_num_threads();
               printf ("Number of Threads requested = %i\n",k);
           }
       }
//     printf("START Comm rank [ %d ] buffsize %d \n", rank, bufsize);
     if ( rank < currtarg )
     {
       // Sender - Time operation loop
       start = MPI_Wtime();
             //wait untill receiver completes
           if(0){

 	      }else{
                    volatile int* offsetptr = req_array[currtarg].offset;
                    int offset = 0;
                    sbuf = (uintptr_t) comBuf ;
                    rbuf = (uintptr_t) req_array[rank].buffer ;
                    pthrequest* req = malloc(sizeof(pthrequest));
                    req->sbuf = sbuf ;
                    req->rbuf = rbuf ;
       		        req->offsetptr = offsetptr ;
       		        req->bufsize = bufsize ;
       		        req->s_sync = req_array[rank].sync ;
       		        req->r_sync = req_array[currtarg].sync ;
       		        for ( i = 0; i < iters; i++ )  {
       		                //do_strasnfer(req);
		    		//sleep(10);
 			        }
             }
     }
     else
     {
       //Reciever code
       start = MPI_Wtime();
             if(0){

             }else{
                    volatile int* offsetptr = req_array[rank].offset;
                    int offset = 0;
               //reciever now ready to setup the transfer for synergistic protocol
                    rbuf = (uintptr_t) comBuf ;
                    sbuf = (uintptr_t) req_array[rank].buffer ;
                    pthrequest* req = malloc(sizeof(pthrequest));
                    req->sbuf = sbuf ;
                    req->rbuf = rbuf ;
       		        req->offsetptr = offsetptr ;
       		        req->bufsize = bufsize ;
       		        req->s_sync = req_array[currtarg].sync ;
       		        req->r_sync = req_array[rank].sync ;
       	            for ( i = 0; i < iters; i++ )
       	            {
       	                //do_rtrasnfer(req);
		    	//sleep(10);
 		            }
             }


     }

//     if ( useBarrier )
//       MPI_Barrier(*activeComm);
       //generic_barrier(*activeComm);

     diff = MPI_Wtime() - start;

     MPI_Barrier(*activeComm);
     MPI_Barrier(*activeComm);

//     printf("START FREE 1 rank [%d] bufsize [%d] usebarrier [%d] \n", rank, bufsize, useBarrier);
     free(req_array[currtarg].sync);
     free(req_array[currtarg].offset);
     free(comBuf);
//     printf("FINISHED FREE rank [%d] bufsize [%d]\n", rank, bufsize);

   }


  //printf("cpoint12 rank: %d \n", rank);
  MPI_Barrier(MPI_COMM_WORLD);
  //return 1.0;

//  printf("After Barrier \n");
  /*  Get maximum sample length  */
  MPI_Reduce(&diff, &maxtime, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
  int count_sum = 0 ;
  MPI_Reduce(&count, &count_sum, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);

  //printf("cpoint13 rank: %d \n", rank);
  if ( rank == 0 )
  {
    totalops = iters * (procs/2);

    /*
      Bandwidth is calculated as :
      
        ((# of processes/2) * operations per time sample * message size)
        ------------------------------------------------------------
                  maximum sample length of all processes
    */

    if ( maxtime > 0 )
    {
      printf(outputFormat, "Unidirectional", procs, count_sum, bufsize, 
             ((double)totalops*(double)bufsize/maxtime)/1000000,
             maxtime/iters/2*1000000);
    }
    else
    {
      printf("Invalid measurement.  Increase number of operation per measurement.\n");
    }
  }

  return maxtime;
}
Beispiel #4
0
Datei: com.c Projekt: 8l/insieme
double
runLatencyTest ( int bufsize, int iters, MPI_Comm * activeComm )
{
  int i, currtarg;
  double start, diff;
  MPI_Status stat;
  char *comBuf = NULL;

  if ( bufsize > 0 )
  {
    comBuf = ( char * ) malloc ( bufsize );

    if ( comBuf == NULL )
      prestaAbort ( "Failed to allocate latency buffer.\n" );
  }

  currtarg =
    getTargetRank ( rank, argStruct.procsPerNode, argStruct.useNearestRank );
  diff = 0.0;

  if ( isActiveProc ( activeComm ) )
  {
    for ( i = 0; i < 1000; i++ )
    {
      if ( rank < currtarg )
      {
        /*  Ensure pair communication has been initialized  */
        MPI_Send ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD );
        MPI_Recv ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD,
                   &stat );
      }
      else
      {
        /*  Ensure pair communication has been initialized  */
        MPI_Recv ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD,
                   &stat );
        MPI_Send ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD );
      }
    }

    generic_barrier ( *activeComm );
    generic_barrier ( *activeComm );

    if ( rank < currtarg )
    {
      /*  Time operation loop  */
      start = MPI_Wtime (  );

      for ( i = 0; i < iters; i++ )
      {
        MPI_Send ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD );
        MPI_Recv ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD,
                   &stat );
      }
    }
    else
    {
      /*  Time operation loop  */
      start = MPI_Wtime (  );

      for ( i = 0; i < iters; i++ )
      {
        MPI_Recv ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD,
                   &stat );
        MPI_Send ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD );
      }
    }

    if ( argStruct.useBarrier )
      generic_barrier ( *activeComm );

    diff = MPI_Wtime (  ) - start;
  }

  MPI_Barrier ( MPI_COMM_WORLD );

  if ( comBuf != NULL )
    free ( comBuf );

  return diff;
}
Beispiel #5
0
Datei: com.c Projekt: 8l/insieme
double
runNonblockBicomTest ( int bufsize, int iters, MPI_Comm * activeComm )
{
  int i, currtarg;
  double diff = 0.0;
  double start;
  MPI_Status stat;
  char *sendBuf, *recvBuf;
  MPI_Request *sendRequests, *recvRequests;
  MPI_Status *sendStatuses, *recvStatuses;

  currtarg = getTargetRank ( rank, argStruct.procsPerNode,
                             argStruct.useNearestRank );

  sendBuf = ( char * ) malloc ( bufsize );
  recvBuf = ( char * ) malloc ( bufsize );

  sendRequests = malloc ( sizeof ( MPI_Request ) * argStruct.iters );
  recvRequests = malloc ( sizeof ( MPI_Request ) * argStruct.iters );
  sendStatuses = malloc ( sizeof ( MPI_Status ) * argStruct.iters );
  recvStatuses = malloc ( sizeof ( MPI_Status ) * argStruct.iters );

  if ( sendBuf == NULL || recvBuf == NULL ||
       sendRequests == NULL || recvRequests == NULL ||
       sendStatuses == NULL || recvStatuses == NULL )
    return 0;

  memset ( sendBuf, 0, bufsize );
  memset ( recvBuf, 0, bufsize );

  if ( isActiveProc ( activeComm ) )
  {
    /*  Ensure communication paths have been initialized  */
    MPI_Irecv ( recvBuf, bufsize, MPI_BYTE, currtarg, 0,
                MPI_COMM_WORLD, recvRequests );
    MPI_Isend ( sendBuf, bufsize, MPI_BYTE, currtarg, 0,
                MPI_COMM_WORLD, sendRequests );

    MPI_Wait ( recvRequests, recvStatuses );
    MPI_Wait ( sendRequests, sendStatuses );

    generic_barrier ( *activeComm );
    generic_barrier ( *activeComm );

    /*  Time operation loop  */
    start = MPI_Wtime (  );

#ifdef FINAL_WAITALL

    for ( i = 0; i < iters; i++ )
    {
      MPI_Irecv ( recvBuf, bufsize, MPI_BYTE, currtarg, MPI_ANY_TAG,
                  MPI_COMM_WORLD, &recvRequests[i] );
    }

    for ( i = 0; i < iters; i++ )
    {
      MPI_Isend ( sendBuf, bufsize, MPI_BYTE, currtarg, i,
                  MPI_COMM_WORLD, &sendRequests[i] );
    }

    MPI_Waitall ( argStruct.iters, sendRequests, sendStatuses );
    MPI_Waitall ( argStruct.iters, recvRequests, recvStatuses );

#else

    for ( i = 0; i < iters; i++ )
    {
      MPI_Isend ( sendBuf, bufsize, MPI_BYTE, currtarg, i,
                  MPI_COMM_WORLD, &sendRequests[0] );

      MPI_Recv ( recvBuf, bufsize, MPI_BYTE, currtarg, MPI_ANY_TAG,
                 MPI_COMM_WORLD, &stat );

      MPI_Wait ( sendRequests, sendStatuses );
    }

#endif

    if ( argStruct.useBarrier )
      generic_barrier ( *activeComm );

    diff = MPI_Wtime (  ) - start;
  }

  free ( sendBuf );
  free ( recvBuf );
  free ( sendRequests );
  free ( recvRequests );
  free ( sendStatuses );
  free ( recvStatuses );

  MPI_Barrier ( MPI_COMM_WORLD );

  if ( diff > 0 && argStruct.sumLocalBW == 1 )
    return ( ( double ) bufsize * ( double ) iters ) / diff;
  else
    return diff;
}
Beispiel #6
0
Datei: com.c Projekt: 8l/insieme
double
runBicomTest ( int bufsize, int iters, MPI_Comm * activeComm )
{
  int i, currtarg;
  double start, diff;
  char *sendbuf, *recvbuf, *validate_buf;
  MPI_Status stat;
  long long err_count = 0;

  currtarg = getTargetRank ( rank, argStruct.procsPerNode,
                             argStruct.useNearestRank );
  diff = 0.0;

  if ( currtarg != -1 && isActiveProc ( activeComm ) )
  {
    sendbuf = ( char * ) malloc ( bufsize );
    recvbuf = ( char * ) malloc ( bufsize );

    memset ( sendbuf, 0, bufsize );
    memset ( recvbuf, 0, bufsize );

    /*  Ensure communication paths have been initialized  */
    MPI_Sendrecv ( sendbuf, bufsize, MPI_BYTE, currtarg, 0,
                   recvbuf, bufsize, MPI_BYTE, currtarg, 0,
                   MPI_COMM_WORLD, &stat );

    generic_barrier ( *activeComm );
    generic_barrier ( *activeComm );

    /*  Time operation loop  */
    start = MPI_Wtime (  );

    if ( presta_check_data == 1 )
      validate_buf = malloc ( bufsize );

    for ( i = 0; i < iters; i++ )
    {
      if ( presta_check_data == 1 )
      {
        set_data_values ( bufsize, sendbuf );
        memcpy ( validate_buf, sendbuf, bufsize );
      }

      MPI_Sendrecv ( sendbuf, bufsize, MPI_BYTE, currtarg, 0,
                     recvbuf, bufsize, MPI_BYTE, currtarg, 0,
                     MPI_COMM_WORLD, &stat );

      if ( presta_check_data == 1 )
      {
        err_count = check_data_values ( bufsize, recvbuf, validate_buf,
                                        MPI_BYTE, PRESTA_OP_P2P );
        if ( err_count > 0 )
        {
          prestaWarn
            ( "Bidirectional receive data check failed with %d errors\n",
              err_count );
          presta_data_err_total += err_count;
        }
      }
    }

    if ( presta_check_data == 1 )
      free ( validate_buf );

    if ( argStruct.useBarrier )
      generic_barrier ( *activeComm );

    diff = MPI_Wtime (  ) - start;

    free ( sendbuf );
    free ( recvbuf );
  }

  MPI_Barrier ( MPI_COMM_WORLD );

  if ( diff > 0 && argStruct.sumLocalBW == 1 )
    return ( ( double ) bufsize * ( double ) iters ) / diff;
  else
    return diff;
}
Beispiel #7
0
Datei: com.c Projekt: 8l/insieme
double
runUnicomTest ( int bufsize, int iters, MPI_Comm * activeComm )
{
  int i, currtarg;
  double diff = 0.0;
  double start;
  MPI_Status stat;
  char *comBuf;

  currtarg =
    getTargetRank ( rank, argStruct.procsPerNode, argStruct.useNearestRank );

  diff = 0;

  if ( isActiveProc ( activeComm ) )
  {
    comBuf = ( char * ) malloc ( bufsize );
    memset ( comBuf, 0, bufsize );

    /*  Ensure communication paths have been initialized  */
    if ( rank < currtarg )
      MPI_Send ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD );
    else
    {
      MPI_Recv ( comBuf, bufsize, MPI_BYTE, currtarg, 0,
                 MPI_COMM_WORLD, &stat );
    }

    generic_barrier ( *activeComm );
    generic_barrier ( *activeComm );

    if ( rank < currtarg )
    {
      /*  Time operation loop  */
      start = MPI_Wtime (  );
      for ( i = 0; i < iters; i++ )
      {
        if ( presta_check_data == 1 )
          set_data_values ( bufsize, comBuf );
        MPI_Send ( comBuf, bufsize, MPI_BYTE, currtarg, 0, MPI_COMM_WORLD );
      }
    }
    else
    {
      void *validate_buf = NULL;
      int err_count = 0;
      if ( presta_check_data == 1 )
      {
        validate_buf = malloc ( bufsize );
      }

      start = MPI_Wtime (  );
      for ( i = 0; i < iters; i++ )
      {
        if ( presta_check_data == 1 )
          set_data_values ( bufsize, validate_buf );

        MPI_Recv ( comBuf, bufsize, MPI_BYTE, currtarg, 0,
                   MPI_COMM_WORLD, &stat );
        if ( presta_check_data == 1 )
        {
          err_count =
            check_data_values ( bufsize, comBuf, validate_buf,
                                MPI_BYTE, PRESTA_OP_P2P );
          if ( err_count > 0 )
          {
            prestaWarn
              ( "Unidirectional receive data check failed with %d errors\n",
                err_count );
            presta_data_err_total += err_count;

          }
        }
      }

      if ( presta_check_data == 1 )
      {
        free ( validate_buf );
      }
    }

    if ( argStruct.useBarrier )
      generic_barrier ( *activeComm );

    diff = MPI_Wtime (  ) - start;

    free ( comBuf );
  }

  MPI_Barrier ( MPI_COMM_WORLD );

  if ( diff > 0 && argStruct.sumLocalBW == 1 )
    return ( ( double ) bufsize * ( double ) iters ) / ( diff * 2 );
  else
    return diff;
}
Beispiel #8
0
double runBicomTest(int procs, int bufsize, int iters, int rank, 
                    int wsize, int procsPerNode, char allocPattern, 
                    int useBarrier, int useNearestRank, MPI_Comm *activeComm)
{
  int i, currtarg;
  double start, diff, max;
  char *sendbuf, *recvbuf;
  double totalops;
  MPI_Status stat;

  currtarg = getTargetRank(rank, wsize, procsPerNode, useNearestRank);
  diff     = 0;

  if ( isActiveProc(rank, wsize, procsPerNode, procs, 
                    allocPattern, useNearestRank) ) 
  {
    sendbuf  = (char*)malloc(bufsize);
    recvbuf  = (char*)malloc(bufsize);

    memset(sendbuf, 0, bufsize);
    memset(recvbuf, 0, bufsize);

    /*  Ensure communication paths have been initialized  */
    MPI_Sendrecv(sendbuf, bufsize, MPI_CHAR, currtarg, 0, 
                 recvbuf, bufsize, MPI_CHAR, currtarg, 0, 
                 MPI_COMM_WORLD, &stat);

    //generic_barrier(*activeComm);
    MPI_Barrier(*activeComm);
    //generic_barrier(*activeComm);
    MPI_Barrier(*activeComm);

    /*  Time operation loop  */
    start = MPI_Wtime();

    for ( i = 0; i < iters; i++ )
    {
      MPI_Sendrecv(sendbuf, bufsize, MPI_CHAR, currtarg, 0, 
                   recvbuf, bufsize, MPI_CHAR, currtarg, 0, 
                   MPI_COMM_WORLD, &stat);
    }

    if ( useBarrier )
      MPI_Barrier(*activeComm);
      //generic_barrier(*activeComm);

    diff = MPI_Wtime() - start;

    free(sendbuf);
    free(recvbuf);
  }

  MPI_Barrier(MPI_COMM_WORLD);

  /*  Get maximum sample length  */
  MPI_Reduce(&diff, &max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

  if ( rank == 0 )
  {
    totalops   = (double)(iters * procs);

    /*
      Bandwidth is calculated as :
      
        (# of processes * operations per time sample * message size)
        ------------------------------------------------------------
                  maximum sample length of all processes
    */

    if ( max > 0 )
    {
      printf(outputFormat, "Bidirectional", procs, bufsize, 
             ((double)totalops*(double)bufsize/max)/1000000,
             max/iters*1000000);
    }
    else
    {
      printf("Invalid measurement.  Increase number of operation per measurement.\n");
    }
  }

  return max;
}