Example #1
0
int main (int argc, char ** argv)
{
  int my_task, origin_task, target_task, num_tasks;
  MPI_Init (&argc, &argv);
  A1_Initialize(0);
  origin_task = 0;

  srcbuf = (double *) memalign (64, BUFSIZE);  
  dstbuf = (double *) memalign (64, BUFSIZE);

  MPI_Comm_size(MPI_COMM_WORLD, &num_tasks);
  MPI_Comm_rank(MPI_COMM_WORLD, &my_task);

  target_task = num_tasks-1;
    
  /* Display some test header information */
  if (my_task == origin_task)
  {
    char str[3][1024];
    int index[3];
    index[0] = 0;
    index[1] = 0;
    index[2] = 0;
    
    index[0] += sprintf (&str[0][index[0]], "# get      ");
    index[1] += sprintf (&str[1][index[1]], "#          ");
    index[2] += sprintf (&str[2][index[2]], "#    bytes ");
    index[2] += sprintf (&str[2][index[2]], "  usec");
    
    fprintf (stdout, "#\n");
    fprintf (stdout, "%s\n", str[0]);
    fprintf (stdout, "%s\n", str[1]);
    fprintf (stdout, "%s\n", str[2]);
    fflush (stdout);
  }

  double usec;
  char str[10240];
  size_t sndlen = 1;
  int index = 0;
  for (; sndlen <= BUFSIZE/sizeof(double); sndlen = sndlen * 2)
  {
    index += sprintf (&str[index], "%10zd ", sndlen*sizeof(double));
    
#ifdef WARMUP
    accumulate (sndlen, my_task, origin_task, target_task);
#endif
    usec = ((double)accumulate (sndlen, my_task, origin_task, target_task)) / 1600.0;
    index += sprintf (&str[index], "%6.3f \n", usec);
  }
  
  if (my_task == origin_task)
    fprintf (stdout, "%s\n", str);

  A1_Finalize();
  MPI_Finalize();
  
  return 0;
}
Example #2
0
File: rmw.c Project: jeffhammond/a1
int main (int argc, char ** argv)
{
  MPI_Init (&argc, &argv);
  A1_Initialize(0);
  origin_task = 0;

  MPI_Comm_size(MPI_COMM_WORLD, &num_tasks);
  MPI_Comm_rank(MPI_COMM_WORLD, &my_task);

  test_rmw (my_task, origin_task, num_tasks);
  
  A1_Finalize();
  MPI_Finalize();

  return 0;
}
Example #3
0
int main(int argc, char **argv)
{

    /* initialize A1 */
    A1_Initialize(A1_THREAD_SINGLE);

    me = A1_Process_id(A1_GROUP_WORLD);
    nproc = A1_Process_total(A1_GROUP_WORLD);

    if(nproc < 2 || nproc> MAXPROC) {
        if(me == 0)
            fprintf(stderr,
                    "USAGE: 2 <= processes <= %d - got %d\n", MAXPROC, nproc);
        A1_Barrier_group(A1_GROUP_WORLD);
        exit(0);
    }
    
    if(!me)printf("\n             Performance of Basic Blocking Communication Operations\n");
    A1_Barrier_group(A1_GROUP_WORLD);
    
    CHECK_RESULT=1; test_1D(); CHECK_RESULT=0; /* warmup run */

    /* test 1 dimension array */
    if(!me)printf("\n\t\t\tContiguous Data Transfer\n");
    test_1D();

    /* test 1 dimension array */
    if(!me)printf("\n\t\t\tContiguous Data Transfer - Remote completion\n");
    test_1D_remote();
    
    /* test 2 dimension array */
    if(!me)printf("\n\t\t\tStrided Data Transfer\n");
    test_2D();

    /* test 2 dimension array */
    if(!me)printf("\n\t\t\tStrided Data Transfer - Remote completion\n");
    test_2D_remote();

    A1_Barrier_group(A1_GROUP_WORLD);
    if(me == 0){
       if(warn_accuracy) 
          printf("\nWARNING: Your timer does not have sufficient accuracy for this test (%d)\n",warn_accuracy);
       printf("\n\n------------ Now we test the same data transfer for correctness ----------\n");
       fflush(stdout);
    }

    A1_Barrier_group(A1_GROUP_WORLD);
    CHECK_RESULT=1;
    if(!me)printf("\n\t\t\tContiguous Data Transfer\n");
    test_1D();
    if(me == 0) printf("OK\n");
    A1_Barrier_group(A1_GROUP_WORLD);
    if(!me)printf("\n\t\t\tStrided Data Transfer\n");
    test_2D();
    if(me == 0) printf("OK\n\n\nTests Completed.\n");
    A1_Barrier_group(A1_GROUP_WORLD);

    /* done */
    A1_Finalize();
    return(0);
}    
Example #4
0
int main()
{

    size_t i, rank, nranks, msgsize, dest;
    size_t iterations, max_msgsize;
    int bufsize;
    double **buffer;
    double t_start, t_stop, t_total, d_total;
    double expected, bandwidth;
    A1_handle_t a1_handle;

    max_msgsize = MAX_MSGSIZE;

    A1_Initialize(A1_THREAD_SINGLE);

    rank = A1_Process_id(A1_GROUP_WORLD);
    nranks = A1_Process_total(A1_GROUP_WORLD);

    bufsize = max_msgsize * ITERATIONS;
    buffer = (double **) malloc(sizeof(double *) * nranks);
    A1_Alloc_segment((void **) &(buffer[rank]), bufsize);
    A1_Exchange_segments(A1_GROUP_WORLD, (void **) buffer);

    for (i = 0; i < bufsize / sizeof(double); i++)
    {
        *(buffer[rank] + i) = 1.0 + rank;
    }

    A1_Allocate_handle(&a1_handle);

    A1_Barrier_group(A1_GROUP_WORLD);

    if (rank == 0)
    {

        printf("A1_Put Bandwidth in MBPS \n");
        printf("%20s %22s \n", "Message Size", "Bandwidth");
        fflush(stdout);

        dest = 1;
        expected = 1 + dest;

        for (msgsize = sizeof(double); msgsize <= max_msgsize; msgsize *= 2)
        {

            iterations = bufsize/msgsize;

            t_start = A1_Time_seconds();

            for (i = 0; i < iterations; i++)
            {

                A1_NbPut(dest, (void *) ((size_t) buffer[dest] + (size_t)(i
                        * msgsize)), (void *) ((size_t) buffer[rank]
                        + (size_t)(i * msgsize)), msgsize, a1_handle);

            }

            A1_Wait_handle(a1_handle);

            t_stop = A1_Time_seconds();
            d_total = (iterations * msgsize) / (1024 * 1024);
            t_total = t_stop - t_start;
            bandwidth = d_total / t_total;
            printf("%20d %20.4lf \n", msgsize, bandwidth);
            fflush(stdout);
           
            A1_Flush(dest);
        }

    }

    A1_Barrier_group(A1_GROUP_WORLD);

    A1_Release_handle(a1_handle);

    A1_Release_segments(A1_GROUP_WORLD, buffer[rank]);

    A1_Finalize();

    return 0;
}
Example #5
0
int main(int argc, char* argv[])
{
    int provided;
    int i, rank, nranks, msgsize, target;
    long bufsize;
    int **counter;
    int *complete;
    int increment;
    int counter_fetch;
    int counters_received;
    int t_start, t_stop, t_latency;
    int expected;

    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);

    A1_Initialize(A1_THREAD_SINGLE);

    rank = A1_Process_id(A1_GROUP_WORLD);
    nranks = A1_Process_total(A1_GROUP_WORLD);

    complete = (int *) malloc(sizeof(int) * COUNT);

    counter = (int **) malloc(sizeof(int *) * nranks); 
    A1_Alloc_segment((void **) &(counter[rank]), sizeof(int)); 
    A1_Exchange_segments(A1_GROUP_WORLD, (void **) counter);

    if (rank == 0)
    {
        printf("A1_RMW Test - in usec \n");
        fflush(stdout);
    }

    target = 0; 

    for(i=0; i<COUNT; i++)
    {
       complete[i] = -1;
    } 
    if(rank == target) 
    { 
       *(counter[rank]) = 0;
    }
    increment = 1;
    counter_fetch = 0;
    counters_received = 0;

    A1_Barrier_group(A1_GROUP_WORLD);    
 
    while(counter_fetch < COUNT)
    {  
        A1_Rmw(target,
               (void *) &increment,
               (void *) &counter_fetch,
               (void *) counter[target],
               sizeof(int),
               A1_FETCH_AND_ADD,
               A1_INT32);

        /* s/1/rank/ means we will know who got the counter */
        if (counter_fetch < COUNT) complete[counter_fetch] = rank;
        counters_received++;
    }

    A1_Allreduce_group(A1_GROUP_WORLD, 
                       COUNT,                   
                       A1_SUM,
                       A1_INT32,
                       (void *) complete,
                       (void *) complete);

    for(i=0; i<COUNT; i++)
    {
       if (complete[i] == -1)
       {
           printf("[%d] The RMW update failed at index: %d \n", rank, i);
           fflush(stdout);
           exit(-1);
       }   
    }
    printf("[%d] The RMW update completed successfully \n", rank);
    fflush(stdout);
    A1_Barrier_group(A1_GROUP_WORLD);

    if (0==rank)
    {
        printf("Checking for fairness...\n", rank);
        fflush(stdout);
        for(i=0; i<COUNT; i++)
        {
           printf("counter value %d was received by process %d\n", i, complete[i]);
        }
        fflush(stdout);
    }
    A1_Barrier_group(A1_GROUP_WORLD);

    printf("process %d received %d counters\n", rank, counters_received);
    fflush(stdout);

    A1_Release_segments(A1_GROUP_WORLD, counter[rank]);
    A1_Free_segment(counter[rank]);

    A1_Finalize();

    return 0;
}
Example #6
0
int main()
{

    int i, rank, nranks, msgsize, peer;
    long bufsize;
    int *buffer;
    int t_start, t_stop, t_latency;
    int expected;

    A1_Initialize(A1_THREAD_SINGLE);

    rank = A1_Process_id(A1_GROUP_WORLD);
    nranks = A1_Process_total(A1_GROUP_WORLD);

    bufsize = MAX_MSG_SIZE;
    buffer = (int *) malloc(bufsize);

    if (rank == 0)
    {
        printf("A1_Allreduce Test - in usec \n");
        fflush(stdout);
    }

    A1_Barrier_group(A1_GROUP_WORLD);

    for (msgsize = sizeof(int); msgsize < MAX_MSG_SIZE; msgsize *= 2)
    {

            for (i = 0; i < bufsize/sizeof(int); i++)
            {
                 buffer[i] = rank;
            }

            A1_Allreduce_group(A1_GROUP_WORLD,
                               msgsize/sizeof(int),
                               A1_SUM,
                               A1_INT32,
                               (void *) buffer,
                               (void *) buffer);

            expected = (nranks-1)*(nranks)/2;
            for (i = 0; i < msgsize/sizeof(int); i++)
            {
               if(buffer[i] - expected != 0)
               {
                   printf("[%d] Validation has failed Expected: %d, Actual: %d, i: %d \n",
                               rank, expected, buffer[i], i);
                   fflush(stdout);
                   exit(-1);
               }
            }

            printf("[%d] %d message sum reduce successful\n", rank, msgsize);
            fflush(stdout);

            for (i = 0; i < bufsize/sizeof(int); i++)
            {
                  buffer[i] = 1;
            }

            A1_Allreduce_group(A1_GROUP_WORLD,
                               msgsize/sizeof(int),
                               A1_PROD,
                               A1_INT32,
                               (void *) buffer,
                               (void *) buffer);

            expected = 1;
            for (i = 0; i < msgsize/sizeof(int); i++)
            {
               if(buffer[i] - expected != 0)
               {
                   printf("[%d] Validation has failed Expected: %d, Actual: %d, i: %d \n",
                               rank, expected, buffer[i], i);
                   fflush(stdout);
                   exit(-1);
               }
            }

            printf("[%d] %d message product reduce successful\n", rank, msgsize);
            fflush(stdout);

    }

    free(buffer);
    A1_Finalize();

    return 0;
}
Example #7
0
int main() {

   int i, j, rank, nranks, msgsize;
   int xdim, ydim;
   long bufsize;
   double **buffer;
   double t_start, t_stop, t_latency;
   int count[2], src_stride, trg_stride, stride_level, peer;
   double expected, actual;
   
   A1_Initialize(A1_THREAD_SINGLE); 

   rank = A1_Process_id(A1_GROUP_WORLD);
   nranks = A1_Process_total(A1_GROUP_WORLD);

   buffer = (double **) malloc (sizeof(double *) * nranks); 

   A1_Barrier_group(A1_GROUP_WORLD);

   bufsize = MAX_XDIM * MAX_YDIM * sizeof(double);
   A1_Alloc_segment((void **) &(buffer[rank]), bufsize);
   A1_Exchange_segments(A1_GROUP_WORLD, (void **) buffer);

   for(i=0; i< bufsize/sizeof(double); i++) {
       *(buffer[rank] + i) = 1.0 + rank;
   }

   if(rank == 0) {
     printf("A1_PutS Latency - local and remote completions - in usec \n");
     printf("%30s %22s \n", "Dimensions(array of doubles)", "Latency-LocalCompeltion", "Latency-RemoteCompletion");
     fflush(stdout);
   }

   src_stride = MAX_YDIM*sizeof(double);
   trg_stride = MAX_YDIM*sizeof(double);
   stride_level = 1;

   for(xdim=1; xdim<=MAX_XDIM; xdim*=2)
   {
      count[1] = xdim;
      for(ydim=1; ydim<=MAX_YDIM; ydim*=2)
      {
        count[0] = ydim*sizeof(double); 
        if(rank == 0) 
        {
          peer = 1;          
 
          for(i=0; i<ITERATIONS+SKIP; i++)
          {
             if(i == SKIP) t_start = A1_Time_seconds();              
             A1_PutS(peer, stride_level, count, (void *) buffer[rank], &src_stride, (void *) buffer[peer], &trg_stride); 
          }
          t_stop = A1_Time_seconds();
          A1_Flush(peer);
          char temp[10]; 
          sprintf(temp,"%dX%d", xdim, ydim);
          printf("%30s %20.2f", temp, ((t_stop-t_start)*1000000)/ITERATIONS);
          fflush(stdout);

          A1_Barrier_group(A1_GROUP_WORLD);

          for(i=0; i<ITERATIONS+SKIP; i++)
          {
             if(i == SKIP) t_start = A1_Time_seconds();
             A1_PutS(peer, stride_level, count, (void *) buffer[rank], &src_stride, (void *) buffer[peer], &trg_stride);
             A1_Flush(peer);
          }
          t_stop = A1_Time_seconds();
          printf("%20.2f \n", ((t_stop-t_start)*1000000)/ITERATIONS);
          fflush(stdout);

          A1_Barrier_group(A1_GROUP_WORLD);
        }
        else if(rank == 1) 
        {
            peer = 0;
            expected = (1.0 + (double) peer);

            A1_Barrier_group(A1_GROUP_WORLD);

            for(i=0; i<xdim; i++)
            {
               for(j=0; j<ydim; j++)
               {
                   actual = *(buffer[rank] + i*MAX_YDIM + j);
                   if(actual != expected)
                   {
                      printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n",
                              i, j, expected, actual);
                      fflush(stdout);
                      return -1;
                    }
                }
            }

            for(i=0; i< bufsize/sizeof(double); i++)
                *(buffer[rank] + i) = 1.0 + rank;

            A1_Barrier_group(A1_GROUP_WORLD);

            for(i=0; i<xdim; i++)
            {
               for(j=0; j<ydim; j++)
               {
                   actual = *(buffer[rank] + i*MAX_YDIM + j);
                   if(actual != expected)
                   {
                      printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n",
                              i, j, expected, actual);
                      fflush(stdout);
                      return -1;
                    }
                }
            }
            for(i=0; i< bufsize/sizeof(double); i++)
                *(buffer[rank] + i) = 1.0 + rank;

            A1_Barrier_group(A1_GROUP_WORLD);
        }
      }
   }
   A1_Barrier_group(A1_GROUP_WORLD);

   A1_Release_segments(A1_GROUP_WORLD, (void *) buffer[rank]);
   A1_Free_segment((void *) buffer[rank]);

   A1_Finalize();

   return 0;
}