Exemplo n.º 1
0
uint64_t accumulate (size_t            sndlen, 
		     int               mytask, 
		     int               origin,
		     int               target)
{
  TRACE_ERR((stderr, "(%u) Do test ... sndlen = %zu\n", mytask, sndlen));
  unsigned i;
  uint64_t t1 = GetTimeBase();
  double scale = 1.0;

  if (mytask == origin)
  {
    for (i = 0; i < ITERATIONS; i++)
    {
      //fprintf(stderr, "(%u) Starting Iteration %d of size %zu dstaddr %p\n", mytask, i, sndlen, rcvbuf);
      A1_Get ( target, 
	       srcbuf,
	       dstbuf,
	       sndlen * sizeof(double) );      
      A1_Flush (target);
    }
  }
  A1_AllFence();
  MPI_Barrier(MPI_COMM_WORLD);
  
  uint64_t t2 = GetTimeBase();
  return ((t2 - t1) / ITERATIONS);
}
Exemplo n.º 2
0
Arquivo: rmw.c Projeto: jeffhammond/a1
void test_rmw (int               mytask, 
	       int               origin,
	       int               ntasks)
{
  TRACE_ERR((stderr, "(%u) Do test ... \n", mytask));

  int i = 0;
  int *outbuf = (int *) malloc (2*sizeof(int));
  outbuf[0] = mytask;
  outbuf[1] = origin;
  int target = origin+1;    
  if (target >= ntasks)
    target = 0;

  A1_AllFence();  
  if (mytask == origin)
  {
    for (i = 0; i < ntasks-1; i++)
    {
      A1_Rmw ( target, 
	       &outbuf[0],
	       &outbuf[1],
	       &outbuf[0],
	       sizeof(int),
	       A1_SWAP,
	       A1_INT32 );
      A1_Flush (target);
      target ++;
      outbuf[0] = outbuf[1];
      //printf ("%d: current swap %d\n", i, outbuf[0]);
    }
  }
  A1_AllFence();  
  MPI_Barrier(MPI_COMM_WORLD);

  printf ("%d: My new task id %d\n", mytask, outbuf[0]);
}
Exemplo n.º 3
0
int main()
{

    size_t i, rank, nranks, msgsize, dest;
    size_t iterations, max_msgsize;
    int bufsize;
    double **buffer;
    double t_start, t_stop, t_total, d_total;
    double expected, bandwidth;
    A1_handle_t a1_handle;

    max_msgsize = MAX_MSGSIZE;

    A1_Initialize(A1_THREAD_SINGLE);

    rank = A1_Process_id(A1_GROUP_WORLD);
    nranks = A1_Process_total(A1_GROUP_WORLD);

    bufsize = max_msgsize * ITERATIONS;
    buffer = (double **) malloc(sizeof(double *) * nranks);
    A1_Alloc_segment((void **) &(buffer[rank]), bufsize);
    A1_Exchange_segments(A1_GROUP_WORLD, (void **) buffer);

    for (i = 0; i < bufsize / sizeof(double); i++)
    {
        *(buffer[rank] + i) = 1.0 + rank;
    }

    A1_Allocate_handle(&a1_handle);

    A1_Barrier_group(A1_GROUP_WORLD);

    if (rank == 0)
    {

        printf("A1_Put Bandwidth in MBPS \n");
        printf("%20s %22s \n", "Message Size", "Bandwidth");
        fflush(stdout);

        dest = 1;
        expected = 1 + dest;

        for (msgsize = sizeof(double); msgsize <= max_msgsize; msgsize *= 2)
        {

            iterations = bufsize/msgsize;

            t_start = A1_Time_seconds();

            for (i = 0; i < iterations; i++)
            {

                A1_NbPut(dest, (void *) ((size_t) buffer[dest] + (size_t)(i
                        * msgsize)), (void *) ((size_t) buffer[rank]
                        + (size_t)(i * msgsize)), msgsize, a1_handle);

            }

            A1_Wait_handle(a1_handle);

            t_stop = A1_Time_seconds();
            d_total = (iterations * msgsize) / (1024 * 1024);
            t_total = t_stop - t_start;
            bandwidth = d_total / t_total;
            printf("%20d %20.4lf \n", msgsize, bandwidth);
            fflush(stdout);
           
            A1_Flush(dest);
        }

    }

    A1_Barrier_group(A1_GROUP_WORLD);

    A1_Release_handle(a1_handle);

    A1_Release_segments(A1_GROUP_WORLD, buffer[rank]);

    A1_Finalize();

    return 0;
}
Exemplo n.º 4
0
int main() {

   int i, j, rank, nranks, msgsize;
   int xdim, ydim;
   long bufsize;
   double **buffer;
   double t_start, t_stop, t_latency;
   int count[2], src_stride, trg_stride, stride_level, peer;
   double expected, actual;
   
   A1_Initialize(A1_THREAD_SINGLE); 

   rank = A1_Process_id(A1_GROUP_WORLD);
   nranks = A1_Process_total(A1_GROUP_WORLD);

   buffer = (double **) malloc (sizeof(double *) * nranks); 

   A1_Barrier_group(A1_GROUP_WORLD);

   bufsize = MAX_XDIM * MAX_YDIM * sizeof(double);
   A1_Alloc_segment((void **) &(buffer[rank]), bufsize);
   A1_Exchange_segments(A1_GROUP_WORLD, (void **) buffer);

   for(i=0; i< bufsize/sizeof(double); i++) {
       *(buffer[rank] + i) = 1.0 + rank;
   }

   if(rank == 0) {
     printf("A1_PutS Latency - local and remote completions - in usec \n");
     printf("%30s %22s \n", "Dimensions(array of doubles)", "Latency-LocalCompeltion", "Latency-RemoteCompletion");
     fflush(stdout);
   }

   src_stride = MAX_YDIM*sizeof(double);
   trg_stride = MAX_YDIM*sizeof(double);
   stride_level = 1;

   for(xdim=1; xdim<=MAX_XDIM; xdim*=2)
   {
      count[1] = xdim;
      for(ydim=1; ydim<=MAX_YDIM; ydim*=2)
      {
        count[0] = ydim*sizeof(double); 
        if(rank == 0) 
        {
          peer = 1;          
 
          for(i=0; i<ITERATIONS+SKIP; i++)
          {
             if(i == SKIP) t_start = A1_Time_seconds();              
             A1_PutS(peer, stride_level, count, (void *) buffer[rank], &src_stride, (void *) buffer[peer], &trg_stride); 
          }
          t_stop = A1_Time_seconds();
          A1_Flush(peer);
          char temp[10]; 
          sprintf(temp,"%dX%d", xdim, ydim);
          printf("%30s %20.2f", temp, ((t_stop-t_start)*1000000)/ITERATIONS);
          fflush(stdout);

          A1_Barrier_group(A1_GROUP_WORLD);

          for(i=0; i<ITERATIONS+SKIP; i++)
          {
             if(i == SKIP) t_start = A1_Time_seconds();
             A1_PutS(peer, stride_level, count, (void *) buffer[rank], &src_stride, (void *) buffer[peer], &trg_stride);
             A1_Flush(peer);
          }
          t_stop = A1_Time_seconds();
          printf("%20.2f \n", ((t_stop-t_start)*1000000)/ITERATIONS);
          fflush(stdout);

          A1_Barrier_group(A1_GROUP_WORLD);
        }
        else if(rank == 1) 
        {
            peer = 0;
            expected = (1.0 + (double) peer);

            A1_Barrier_group(A1_GROUP_WORLD);

            for(i=0; i<xdim; i++)
            {
               for(j=0; j<ydim; j++)
               {
                   actual = *(buffer[rank] + i*MAX_YDIM + j);
                   if(actual != expected)
                   {
                      printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n",
                              i, j, expected, actual);
                      fflush(stdout);
                      return -1;
                    }
                }
            }

            for(i=0; i< bufsize/sizeof(double); i++)
                *(buffer[rank] + i) = 1.0 + rank;

            A1_Barrier_group(A1_GROUP_WORLD);

            for(i=0; i<xdim; i++)
            {
               for(j=0; j<ydim; j++)
               {
                   actual = *(buffer[rank] + i*MAX_YDIM + j);
                   if(actual != expected)
                   {
                      printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n",
                              i, j, expected, actual);
                      fflush(stdout);
                      return -1;
                    }
                }
            }
            for(i=0; i< bufsize/sizeof(double); i++)
                *(buffer[rank] + i) = 1.0 + rank;

            A1_Barrier_group(A1_GROUP_WORLD);
        }
      }
   }
   A1_Barrier_group(A1_GROUP_WORLD);

   A1_Release_segments(A1_GROUP_WORLD, (void *) buffer[rank]);
   A1_Free_segment((void *) buffer[rank]);

   A1_Finalize();

   return 0;
}