コード例 #1
0
ファイル: testnotify.c プロジェクト: jeffhammond/ga
int main(int argc, char *argv[])
{
  int ndim;

  armci_msg_init(&argc, &argv);
  ARMCI_Init_args(&argc, &argv);
  nproc = armci_msg_nproc();
  me = armci_msg_me();

  ARMCI_Barrier();
  if (me == 0) {
    printf("\nTesting armci_notify\n");
    fflush(stdout);
    sleep(1);
  }
  ARMCI_Barrier();

  for (ndim = 1; ndim <= MAXDIMS; ndim++) {
    test_notify(ndim);
  }
  ARMCI_Barrier();

  ARMCI_Finalize();
  armci_msg_finalize();
  return(0);
}
コード例 #2
0
ファイル: test_mutex_rmw.c プロジェクト: abhinavvishnu/matex
int main(int argc, char ** argv) {
  int    rank, nproc, val, i;
  void **base_ptrs;

  MPI_Init(&argc, &argv);
  ARMCI_Init();

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nproc);

  if (rank == 0) printf("Starting ARMCI mutex read-modify-write test with %d processes\n", nproc);

  base_ptrs = malloc(nproc*sizeof(void*));

  ARMCI_Create_mutexes(rank == 0 ? 1 : 0);
  ARMCI_Malloc(base_ptrs, (rank == 0) ? sizeof(int) : 0); // Proc 0 has a shared int

  if (rank == 0) {
    val = 0;
    ARMCI_Put(&val, base_ptrs[0], sizeof(int), 0);
  }

  ARMCI_Barrier();

  for (i = 0; i < NITER; i++) {
    ARMCI_Lock(0, 0);

    ARMCI_Get(base_ptrs[0], &val, sizeof(int), 0);
    val += ADDIN;
    ARMCI_Put(&val, base_ptrs[0], sizeof(int), 0);

    ARMCI_Unlock(0, 0);
  }

  printf(" + %3d done\n", rank);
  fflush(NULL);

  ARMCI_Barrier();

  if (rank == 0) {
    ARMCI_Get(base_ptrs[0], &val, sizeof(int), 0);

    if (val == ADDIN*nproc*NITER)
      printf("Test complete: PASS.\n");
    else
      printf("Test complete: FAIL.  Got %d, expected %d.\n", val, ADDIN*nproc*NITER);
  }

  ARMCI_Free(base_ptrs[rank]);
  ARMCI_Destroy_mutexes();
  free(base_ptrs);

  ARMCI_Finalize();
  MPI_Finalize();

  return 0;
}
コード例 #3
0
ファイル: gpc.c プロジェクト: dmlb2000/nwchem-cml
/*\ release/deassociate handle with previously registered callback function
\*/
void ARMCI_Gpc_release(int handle)
{
     int h = -handle + GPC_OFFSET;

     ARMCI_Barrier();
     if(h<0 || h >= GPC_SLOTS) armci_die("ARMCI_Gpc_release: bad handle",h);
     _table[h] = (void*)0;
}
コード例 #4
0
ファイル: ddi_armci.c プロジェクト: ryanolson/ddi
void DDI_ARMCI_Barrier(const DDI_Comm *comm) {
    if (comm == (const DDI_Comm *)Comm_find(DDI_COMM_WORLD)) {
	ARMCI_Barrier();
    }
    else {
	ARMCI_AllFence();
	MPI_Barrier(comm->compute_comm);
    }
}
コード例 #5
0
ファイル: test_mt.c プロジェクト: dmlb2000/nwchem-cml
void usage()
{
  if (!rank) {
    printf("Usage: test_mt, or \n");
    printf("       test_mt -tTHREADS_PER_PROC -sARRAY_SIZE -iITERATIONS_COUNT\n");
  }
  ARMCI_Barrier();
  armci_msg_finalize();
  exit(0);
}
コード例 #6
0
ファイル: put.c プロジェクト: brog2610/quinoa
int main(int argc, char * argv[]) {
  void *baseAddress[MAX_PROCESSORS];
  char *local;
  int thisImage;

  int iter = 100, size;
  double startTime, endTime;
  int i;

  // initialize
  ARMCI_Init();
  ARMCI_Myid(&thisImage);

  // allocate data (collective operation)
  ARMCI_Malloc(baseAddress, MAX_BUF_SIZE*sizeof(char));
  local = (char *)ARMCI_Malloc_local(MAX_BUF_SIZE*sizeof(char));

  ARMCI_Barrier();
  ARMCI_Migrate();

  if (thisImage == 0) {
    for(size = 1; size <= MAX_BUF_SIZE; size = size<<1){
      startTime = CkWallTimer();
      for(i = 0; i < iter; i++){
        ARMCI_Put(local, baseAddress[1], size, 1);
      }
      ARMCI_Fence(1);
      endTime = CkWallTimer();
      printf("%d: %f us\n", size, (endTime-startTime)*1000);
    }
    ARMCI_Barrier();
  } else if (thisImage == 1) {
    ARMCI_Barrier();
  }

  
  ARMCI_Free(baseAddress[thisImage]);
  ARMCI_Free_local(local);
  // finalize
  ARMCI_Finalize();
  return 0;
}
コード例 #7
0
ファイル: perf_aggr.c プロジェクト: dmlb2000/nwchem-cml
int main(int argc, char *argv[])
{
  ARMCI_Init_args(&argc, &argv);
  nproc = armci_msg_nproc();
  me = armci_msg_me();

  /*    printf("nproc = %d, me = %d\n", nproc, me);*/

  if (nproc > MAXPROC && me == 0) {
    ARMCI_Error("Test works for up to %d processors\n", MAXPROC);
  }

  if (me == 0) {
    printf("ARMCI test program (%d processes)\n", nproc);
    fflush(stdout);
    sleep(1);
  }

  if (me == 0) {
    printf("\nAggregate put/get requests\n\n");
    fflush(stdout);
  }
  test_aggregate(1); /* cold start */
  test_aggregate(0); /* warm start */

  ARMCI_AllFence();
  ARMCI_Barrier();
  if (me == 0) {
    printf("\nSuccess!!\n");
    fflush(stdout);
  }
  sleep(2);

  ARMCI_Barrier();
  ARMCI_Finalize();
  armci_msg_finalize();
  return(0);
}
コード例 #8
0
ファイル: gpc.c プロジェクト: dmlb2000/nwchem-cml
/*\ callback functions must be registered -- user gets int handle back
\*/
int ARMCI_Gpc_register( int (*func) ())
{
  int handle =-1, candidate = 0;

  ARMCI_Barrier();
  do{
    if(!_table[candidate]){
      handle = candidate;
      _table[candidate]=func;
    }
    candidate++;
  }while(candidate < GPC_SLOTS && handle == -1);
  return(GPC_OFFSET-handle);
}
コード例 #9
0
ファイル: ARMCI_Test_nodeid.c プロジェクト: arnolda/scafacos
int main(int argc, char **argv)
{
    int i, j, rank, nranks, msgsize, dest;
    int xdim, ydim;
    long bufsize;
    double **buffer;
    double t_start, t_stop, t_latency;
    int count[2], src_stride, trg_stride, stride_level;
    int provided;

    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    ARMCI_Init_args(&argc, &argv);

    ARMCI_Barrier();
 
    int me = armci_msg_me();
    int node = armci_domain_my_id(ARMCI_DOMAIN_SMP);

    printf("MPI_Rank: %d, \
            armci_msg_nproc: %d \
            armci_msg_me: %d, \
            armci_domain_id: %d, \
            armci_domain_same_id: %d,\ 
            armci_domain_my_id: %d, \ 
            armci_domain_count: %d, \
            armci_domain_nprocs: %d, \
            armci_domain_glob_proc_id: %d \n",
              rank, armci_msg_nproc(), me, armci_domain_id(ARMCI_DOMAIN_SMP, me),
              armci_domain_same_id(ARMCI_DOMAIN_SMP, me), armci_domain_my_id(ARMCI_DOMAIN_SMP),
              armci_domain_count(ARMCI_DOMAIN_SMP), armci_domain_nprocs(ARMCI_DOMAIN_SMP, node),
              armci_domain_glob_proc_id(ARMCI_DOMAIN_SMP, node, 0));
    fflush(stdout);

    ARMCI_Free((void *) buffer[rank]);

    ARMCI_Finalize();

    MPI_Finalize();

    return 0;
}
コード例 #10
0
int main(int argc, char ** argv) {
  int     rank, nproc, test_iter;
  void ***base_ptrs;

  MPI_Init(&argc, &argv);
  ARMCI_Init();

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nproc);

  if (rank == 0) printf("Starting ARMCI memory allocation test with %d processes\n", nproc);

  base_ptrs = malloc(sizeof(void**)*NUM_ITERATIONS);

  // Perform a pile of allocations
  for (test_iter = 0; test_iter < NUM_ITERATIONS; test_iter++) {
    if (rank == 0) printf(" + allocation %d\n", test_iter);

    base_ptrs[test_iter] = malloc(sizeof(void*)*nproc);
    ARMCI_Malloc((void**)base_ptrs[test_iter], (test_iter % 4 == 0) ? 0 : DATA_SZ);
  }

  ARMCI_Barrier();

  // Free all allocations
  for (test_iter = 0; test_iter < NUM_ITERATIONS; test_iter++) {
    if (rank == 0) printf(" + free %d\n", test_iter);

    ARMCI_Free(((void**)base_ptrs[test_iter])[rank]);
    free(base_ptrs[test_iter]);
  }

  free(base_ptrs);

  if (rank == 0) printf("Test complete: PASS.\n");

  ARMCI_Finalize();
  MPI_Finalize();

  return 0;
}
コード例 #11
0
ファイル: testnotify.c プロジェクト: jeffhammond/ga
void test_notify(int ndim)
{
  int lo[MAXDIMS], hi[MAXDIMS], count[MAXDIMS];
  int stride[MAXDIMS];
  int dim, elems;
  int i, Idx = 1, idx = 0;
  void *b[MAXPROC], *a[MAXPROC];
  int left = (me + nproc - 1) % nproc;
  int right = (me + 1) % nproc;
  int loopcnt = 1, less = 2, strl; /* less>1 takes a partial plane */


  /* create shared and local arrays */
  create_array(b, sizeof(double), ndim, dimsB);
  create_array(a, sizeof(double), ndim, dimsB);

  elems = get_elems(ndim, stride, dimsB, sizeof(double));
  init((double *)a[me], ndim, elems, dimsB);

  for (i = 0; i < ndim; i++) {
    lo[i] = 0;
    hi[i] = (less > dimsB[i]) ? dimsB[i] - 1 : dimsB[i] - less;
    count[i] = hi[i] - lo[i] + 1;
  }
  count[0] *= sizeof(double);

  for (i = 0; i < ndim - 1; i++) {
    Idx *= dimsB[i];
  }

  ARMCI_Barrier();
  if (me == 0) {
    printf("--------array[%d", dimsB[0]);
    for (dim = 1; dim < ndim; dim++) {
      printf(",%d", dimsB[dim]);
    }
    printf("]--------\n");
    fflush(stdout);
  }

  ARMCI_Barrier();
  loopcnt = (ndim > 1) ? dimsB[ndim-1] : 1;
  strl    = (ndim > 1) ? ndim - 2 : 0; /* strides of the subpatch to transfer */

  for (i = 0; i < loopcnt; i++) {
    int wc;

    if (me == 0) {

      ARMCI_PutS((double *)a[me] + idx, stride,
                 (double *)b[left] + idx, stride, count, strl, left);
#if DEBUG_
      printf("%d-%d: ps=%p pd=%p i=%d idx=%d count=%d\n", me, left, (double *)
             a[me] + idx, (double *)b[left] + idx, i, idx, count[0]);
      fflush(stdout);
#endif
      (void)armci_notify(left);
      (void)armci_notify_wait(right, &wc);

    }
    else {


      (void)armci_notify_wait(right, &wc);
      ARMCI_PutS((double *)b[me] + idx, stride,
                 (double *)b[left] + idx, stride, count, strl, left);
#if DEBUG_
      printf("%d: ps=%p pd=%p i=%d idx=%d count=%d\n", me, (double *)b[me] + idx,
             (double *)b[left] + idx, i, idx, count[0]);
      fflush(stdout);
#endif
      (void)armci_notify(left);
    }

    idx += Idx; /* advance to the next slab */
  }

  ARMCI_Barrier();

  if (me == 0) {
    compare_patches(0., ndim, (double *)a[0], lo, hi, dimsB,
                    (double *)b[0], lo, hi, dimsB);
    printf("OK\n");
  }

  ARMCI_Barrier();
  destroy_array(b);
  destroy_array(a);
}
コード例 #12
0
ファイル: bench_groups.c プロジェクト: abhinavvishnu/matex
int main(int argc, char **argv) {
  int                      me, nproc;
  int                      i, *procs;
  ARMCI_Group              g_world, g_odd, g_even;

  MPI_Init(&argc, &argv);
  ARMCI_Init();

  MPI_Comm_rank(MPI_COMM_WORLD, &me);
  MPI_Comm_size(MPI_COMM_WORLD, &nproc);

  procs = malloc(sizeof(int) * ( nproc/2 + (nproc % 2 ? 1 : 0 )));

  if (me == 0) printf("ARMCI Group test starting on %d procs\n", nproc);

  ARMCI_Group_get_world(&g_world);
  
  if (me == 0) printf(" + Creating odd group\n");

  for (i = 1; i < nproc; i += 2) {
    procs[i/2] = i;
  }

  ARMCI_Group_create_child(i/2, procs, &g_odd, &g_world);

  if (me == 0) printf(" + Creating even group\n");

  for (i = 0; i < nproc; i += 2) {
    procs[i/2] = i;
  }

  ARMCI_Group_create_child(i/2, procs, &g_even, &g_world);

  /***********************************************************************/
  {
    int    grp_me, grp_nproc;
    double t_abs_to_grp, t_grp_to_abs;
    const int iter = 1000000;

    if (me == 0) {
      ARMCI_Group_rank(&g_even, &grp_me);
      ARMCI_Group_size(&g_even, &grp_nproc);

      t_abs_to_grp = MPI_Wtime();

      for (i = 0; i < iter; i++)
        ARMCII_Translate_absolute_to_group(&g_even, (grp_me+1) % grp_nproc);

      t_abs_to_grp = MPI_Wtime() - t_abs_to_grp;

      t_grp_to_abs = MPI_Wtime();

      for (i = 0; i < iter; i++)
        ARMCI_Absolute_id(&g_even, (grp_me+1) % grp_nproc);

      t_grp_to_abs = MPI_Wtime() - t_grp_to_abs;

      printf("t_abs_to_grp = %f us, t_grp_to_abs = %f us\n", t_abs_to_grp/iter * 1.0e6, t_grp_to_abs/iter * 1.0e6);
    }

    ARMCI_Barrier();
  }
  /***********************************************************************/

  if (me == 0) printf(" + Freeing groups\n");

  if (me % 2 > 0)
    ARMCI_Group_free(&g_odd);
  else
    ARMCI_Group_free(&g_even);

  free(procs);

  ARMCI_Finalize();
  MPI_Finalize();

  return 0;
}
コード例 #13
0
ファイル: testnotify.c プロジェクト: jeffhammond/ga
void destroy_array(void *ptr[])
{
  ARMCI_Barrier();

  assert(!ARMCI_Free(ptr[me]));
}
コード例 #14
0
int main(int argc, char *argv[]) {

   int i, j, rank, nranks;
   int xdim, ydim;
   long bufsize;
   double **buffer;
   double t_start=0.0, t_stop=0.0;
   int count[2], src_stride, trg_stride, stride_level, peer;
   double expected, actual;
   int provided;

   MPI_Init_thread(&argc, &argv, MPI_THREAD_SINGLE, &provided);
   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    if (nranks < 2) {
        printf("%s: Must be run with at least 2 processes\n", argv[0]);
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

   ARMCI_Init_args(&argc, &argv);
   
   bufsize = MAX_XDIM * MAX_YDIM * sizeof(double);
   buffer = (double **) malloc(sizeof(double *) * nranks);
   ARMCI_Malloc((void **) buffer, bufsize);

   for(i=0; i< bufsize/sizeof(double); i++) {
       *(buffer[rank] + i) = 1.0 + rank;
   }

   if(rank == 0) {
     printf("ARMCI_PutS Latency - local and remote completions - in usec \n");
     printf("%30s %22s %22s\n", "Dimensions(array of doubles)", "Latency-LocalCompeltion", "Latency-RemoteCompletion");
     fflush(stdout);
   }

   src_stride = MAX_YDIM*sizeof(double);
   trg_stride = MAX_YDIM*sizeof(double);
   stride_level = 1;

   ARMCI_Barrier();

   for(xdim=1; xdim<=MAX_XDIM; xdim*=2) {

      count[1] = xdim;

      for(ydim=1; ydim<=MAX_YDIM; ydim*=2) {

        count[0] = ydim*sizeof(double); 
      
        if(rank == 0) 
        {
          peer = 1;          
 
          for(i=0; i<ITERATIONS+SKIP; i++) { 

             if(i == SKIP)
                 t_start = MPI_Wtime();

             ARMCI_PutS((void *) buffer[rank], &src_stride, (void *) buffer[peer], &trg_stride, count, stride_level, peer); 
 
          }
          t_stop = MPI_Wtime();
          ARMCI_Fence(peer);
          char temp[10]; 
          sprintf(temp,"%dX%d", xdim, ydim);
          printf("%30s %20.2f", temp, ((t_stop-t_start)*1000000)/ITERATIONS);
          fflush(stdout);

          ARMCI_Barrier();

          ARMCI_Barrier();

          for(i=0; i<ITERATIONS+SKIP; i++) {
  
             if(i == SKIP)
                t_start = MPI_Wtime();

             ARMCI_PutS((void *) buffer[rank], &src_stride, (void *) buffer[peer], &trg_stride, count, stride_level, peer); 
             ARMCI_Fence(peer);

          }
          t_stop = MPI_Wtime();
          printf("%20.2f \n", ((t_stop-t_start)*1000000)/ITERATIONS);
          fflush(stdout);

          ARMCI_Barrier();

          ARMCI_Barrier();
        }
        else
        {
            peer = 0;

            expected = (1.0 + (double) peer);

            ARMCI_Barrier();
            if (rank == 1)
            {
              for(i=0; i<xdim; i++)
              {
                for(j=0; j<ydim; j++)
                {
                  actual = *(buffer[rank] + i*MAX_YDIM + j);
                  if(actual != expected)
                  {
                    printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n",
                        i, j, expected, actual);
                    fflush(stdout);
                    ARMCI_Error("Bailing out", 1);
                  }
                }
              }
            }
            for(i=0; i< bufsize/sizeof(double); i++) {
              *(buffer[rank] + i) = 1.0 + rank;
            }

            ARMCI_Barrier();

            ARMCI_Barrier();
            if (rank == 1)
            {
              for(i=0; i<xdim; i++)
              {
                for(j=0; j<ydim; j++)
                {
                  actual = *(buffer[rank] + i*MAX_YDIM + j);
                  if(actual != expected)
                  {
                    printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n",
                        i, j, expected, actual);
                    fflush(stdout);
                    ARMCI_Error("Bailing out", 1);
                  }
                }
              }

              for(i=0; i< bufsize/sizeof(double); i++) {
                *(buffer[rank] + i) = 1.0 + rank;
              }
            }
            ARMCI_Barrier();

        }
        
      }

   }

   ARMCI_Barrier();

   ARMCI_Free((void *) buffer[rank]);
   free(buffer);

   ARMCI_Finalize();

   MPI_Finalize();

   return 0;
}
コード例 #15
0
int main(int argc, char **argv)
{

    int i, j, rank, nranks, peer;
    size_t xdim, ydim;
    unsigned long bufsize;
    double **buffer, *src_buf;
    double t_start=0.0, t_stop;
    int count[2], src_stride, trg_stride, stride_level;
    double scaling;
    int provided;

    MPI_Init_thread(&argc, &argv, MPI_THREAD_SINGLE, &provided);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    if (nranks < 2) {
        printf("%s: Must be run with at least 2 processes\n", argv[0]);
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    ARMCI_Init_args(&argc, &argv);

    buffer = (double **) malloc(sizeof(double *) * nranks);

    bufsize = MAX_XDIM * MAX_YDIM * sizeof(double);
    ARMCI_Malloc((void **) buffer, bufsize);
    src_buf = ARMCI_Malloc_local(bufsize);

    if (rank == 0)
    {
        printf("ARMCI_AccS Latency - local and remote completions - in usec \n");
        printf("%30s %22s %22s\n",
               "Dimensions(array of double)",
               "Local Completion",
               "Remote completion");
        fflush(stdout);
    }

    ARMCI_Access_begin(buffer[rank]);
    for (i = 0; i < bufsize / sizeof(double); i++)
    {
      *(buffer[rank] + i) = 1.0 + rank;
      *(src_buf + i) = 1.0 + rank;
    }
    ARMCI_Access_end(buffer[rank]);

    scaling = 2.0;

    src_stride = MAX_YDIM * sizeof(double);
    trg_stride = MAX_YDIM * sizeof(double);
    stride_level = 1;

    ARMCI_Barrier();

    for (xdim = 1; xdim <= MAX_XDIM; xdim *= 2)
    {

        count[1] = xdim;

        for (ydim = 1; ydim <= MAX_YDIM; ydim *= 2)
        {

            count[0] = ydim * sizeof(double);

            if (rank == 0)
            {

                peer = 1;

                for (i = 0; i < ITERATIONS + SKIP; i++)
                {

                    if (i == SKIP) t_start = MPI_Wtime();

                    ARMCI_AccS(ARMCI_ACC_DBL,
                               (void *) &scaling,
                               /* (void *) buffer[rank] */ src_buf,
                               &src_stride,
                               (void *) buffer[peer],
                               &trg_stride,
                               count,
                               stride_level,
                               1);

                }
                t_stop = MPI_Wtime();
                ARMCI_Fence(1);

                char temp[10];
                sprintf(temp, "%dX%d", (int) xdim, (int) ydim);
                printf("%30s %20.2f ", temp, ((t_stop - t_start) * 1000000)
                        / ITERATIONS);
                fflush(stdout);

                ARMCI_Barrier();

                ARMCI_Barrier();

                for (i = 0; i < ITERATIONS + SKIP; i++)
                {

                    if (i == SKIP) t_start = MPI_Wtime();

                    ARMCI_AccS(ARMCI_ACC_DBL,
                               (void *) &scaling,
                               /* (void *) buffer[rank] */ src_buf,
                               &src_stride,
                               (void *) buffer[peer],
                               &trg_stride,
                               count,
                               stride_level,
                               1);
                    ARMCI_Fence(1);

                }
                t_stop = MPI_Wtime();
                printf("%20.2f \n", ((t_stop - t_start) * 1000000) / ITERATIONS);
                fflush(stdout);

                ARMCI_Barrier();

                ARMCI_Barrier();

            }
            else
            {

                peer = 0;

                ARMCI_Barrier();

                if (rank == 1) 
                {
                  ARMCI_Access_begin(buffer[rank]);
                  for (i = 0; i < xdim; i++)
                  {
                    for (j = 0; j < ydim; j++)
                    {
                      if (*(buffer[rank] + i * MAX_XDIM + j) != ((1.0 + rank)
                            + scaling * (1.0 + peer) * (ITERATIONS + SKIP)))
                      {
                        printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n",
                            i,
                            j,
                            ((1.0 + rank) + scaling * (1.0 + peer)),
                            *(buffer[rank] + i * MAX_YDIM + j));
                        fflush(stdout);
                        ARMCI_Error("Bailing out", 1);
                      }
                    }
                  }

                  for (i = 0; i < bufsize / sizeof(double); i++)
                  {
                    *(buffer[rank] + i) = 1.0 + rank;
                  }
                  ARMCI_Access_end(buffer[rank]);
                }

                ARMCI_Barrier();

                ARMCI_Barrier();

                if (rank == 1) 
                {
                  ARMCI_Access_begin(buffer[rank]);

                  for (i = 0; i < xdim; i++)
                  {
                    for (j = 0; j < ydim; j++)
                    {
                      if (*(buffer[rank] + i * MAX_XDIM + j) != ((1.0 + rank)
                            + scaling * (1.0 + peer) * (ITERATIONS + SKIP)))
                      {
                        printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n",
                            i,
                            j,
                            ((1.0 + rank) + scaling * (1.0 + peer)),
                            *(buffer[rank] + i * MAX_YDIM + j));
                        fflush(stdout);
                        ARMCI_Error("Bailing out", 1);
                      }
                    }
                  }

                  for (i = 0; i < bufsize / sizeof(double); i++)
                  {
                    *(buffer[rank] + i) = 1.0 + rank;
                  }

                  ARMCI_Access_end(buffer[rank]);
                }
                ARMCI_Barrier();

            }

        }

    }

    ARMCI_Barrier();

    ARMCI_Free((void *) buffer[rank]);
    ARMCI_Free_local(src_buf);
    free(buffer);

    ARMCI_Finalize();

    MPI_Finalize();

    return 0;
}
コード例 #16
0
int main(int argc, char ** argv) {
  int    rank, nproc, i, test_iter;
  int   *my_data, *buf;
  void **base_ptrs;

  MPI_Init(&argc, &argv);
  ARMCI_Init();

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nproc);

  if (rank == 0) printf("Starting ARMCI test with %d processes\n", nproc);

  buf = malloc(DATA_SZ);
  base_ptrs = malloc(sizeof(void*)*nproc);

  for (test_iter = 0; test_iter < NUM_ITERATIONS; test_iter++) {
    if (rank == 0) printf(" + iteration %d\n", test_iter);

    /*** Allocate the shared array ***/
    ARMCI_Malloc(base_ptrs, DATA_SZ);
    my_data = base_ptrs[rank];

    /*** Get from our right neighbor and verify correct data ***/
    ARMCI_Access_begin(my_data);
    for (i = 0; i < DATA_NELTS; i++) my_data[i] = rank*test_iter;
    ARMCI_Access_end(my_data);

    ARMCI_Barrier(); // Wait for all updates to data to complete

    ARMCI_Get(base_ptrs[(rank+1) % nproc], buf, DATA_SZ, (rank+1) % nproc);

    for (i = 0; i < DATA_NELTS; i++) {
      if (buf[i] != ((rank+1) % nproc)*test_iter) {
        printf("%d: GET expected %d, got %d\n", rank, (rank+1) % nproc, buf[i]);
        MPI_Abort(MPI_COMM_WORLD, 1);
      }
    }

    ARMCI_Barrier(); // Wait for all gets to complete

    /*** Put to our left neighbor and verify correct data ***/
    for (i = 0; i < DATA_NELTS; i++) buf[i] = rank*test_iter;
    ARMCI_Put(buf, base_ptrs[(rank+nproc-1) % nproc], DATA_SZ, (rank+nproc-1) % nproc);

    ARMCI_Barrier(); // Wait for all updates to data to complete

    ARMCI_Access_begin(my_data);
    for (i = 0; i < DATA_NELTS; i++) {
      if (my_data[i] != ((rank+1) % nproc)*test_iter) {
        printf("%d: PUT expected %d, got %d\n", rank, (rank+1) % nproc, my_data[i]);
        MPI_Abort(MPI_COMM_WORLD, 1);
      }
    }
    ARMCI_Access_end(my_data);

    ARMCI_Barrier(); // Wait for all gets to complete

    /*** Accumulate to our left neighbor and verify correct data ***/
    for (i = 0; i < DATA_NELTS; i++) buf[i] = rank;
    
    ARMCI_Access_begin(my_data);
    for (i = 0; i < DATA_NELTS; i++) my_data[i] = rank;
    ARMCI_Access_end(my_data);
    ARMCI_Barrier();

    int scale = test_iter;
    ARMCI_Acc(ARMCI_ACC_INT, &scale, buf, base_ptrs[(rank+nproc-1) % nproc], DATA_SZ, (rank+nproc-1) % nproc);

    ARMCI_Barrier(); // Wait for all updates to data to complete

    ARMCI_Access_begin(my_data);
    for (i = 0; i < DATA_NELTS; i++) {
      if (my_data[i] != rank + ((rank+1) % nproc)*test_iter) {
        printf("%d: ACC expected %d, got %d\n", rank, (rank+1) % nproc, my_data[i]);
        //MPI_Abort(MPI_COMM_WORLD, 1);
      }
    }
    ARMCI_Access_end(my_data);

    ARMCI_Free(my_data);
  }

  free(buf);
  free(base_ptrs);

  if (rank == 0) printf("Test complete: PASS.\n");

  ARMCI_Finalize();
  MPI_Finalize();

  return 0;
}
コード例 #17
0
ファイル: ARMCI_Put_bw.c プロジェクト: jeffhammond/a1
int main(int argc, char *argv[])
{

    size_t i, rank, nranks, msgsize, dest;
    size_t iterations, max_msgsize;
    int bufsize;
    double **buffer;
    double t_start, t_stop, t_total, d_total;
    double expected, bandwidth;
    int provided;
    armci_hdl_t handle;

    max_msgsize = MAX_MSGSIZE;

    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    ARMCI_Init_args(&argc, &argv);

    bufsize = max_msgsize * ITERATIONS_LARGE;
    buffer = (double **) malloc(sizeof(double *) * nranks);
    ARMCI_Malloc((void **) buffer, bufsize);

    for (i = 0; i < bufsize / sizeof(double); i++)
    {
        *(buffer[rank] + i) = 1.0 + rank;
    }

    ARMCI_INIT_HANDLE(&handle);
    ARMCI_SET_AGGREGATE_HANDLE(&handle);

    ARMCI_Barrier();

    if (rank == 0)
    {

        printf("ARMCI_Put Bandwidth in MBPS \n");
        printf("%20s %22s \n", "Message Size", "Bandwidth");
        fflush(stdout);

        dest = 1;
        expected = 1 + dest;

        for (msgsize = sizeof(double); msgsize <= max_msgsize; msgsize *= 2)
        {

            if (msgsize <= 16 * 1024) iterations = ITERATIONS_VERYSMALL;
            else if (msgsize <= 64 * 1024) iterations = ITERATIONS_SMALL;
            else if (msgsize <= 512 * 1024) iterations = ITERATIONS_MEDIUM;
            else iterations = ITERATIONS_LARGE;

            t_start = MPI_Wtime();

            for (i = 0; i < iterations; i++)
            {

                ARMCI_NbPut((void *) ((size_t) buffer[dest] + (size_t)(i
                           * msgsize)), (void *) ((size_t) buffer[rank]
                           + (size_t)(i * msgsize)), msgsize, dest, &handle);

            }

            ARMCI_Wait(&handle);

            t_stop = MPI_Wtime();
            d_total = (iterations * msgsize) / (1024 * 1024);
            t_total = t_stop - t_start;
            bandwidth = d_total / t_total;
            printf("%20d %20.4lf \n", msgsize, bandwidth);
            fflush(stdout);
           
            ARMCI_Fence(dest);
        }

    }

    ARMCI_Barrier();

    ARMCI_UNSET_AGGREGATE_HANDLE(&handle);

    ARMCI_Free((void *) buffer[rank]);

    ARMCI_Finalize();

    MPI_Finalize(); 

    return 0;
}
コード例 #18
0
ファイル: test_mt.c プロジェクト: dmlb2000/nwchem-cml
int main(int argc, char *argv[])
{
  int ch;
  extern char *optarg;
  int i, j, r;
  thread_t threads[MAX_TPP];

  /* init ARMCI */
  ARMCI_Init_args(&argc, &argv);
  size = armci_msg_nproc();
  rank = armci_msg_me();

  while ((ch = getopt(argc, argv, "t:s:i:d:h")) != -1) {
    switch (ch) {
      case 't': /* # of threads */
        tpp = atoi(optarg);
        if (tpp < 1 || tpp > MAX_TPP) {
          PRINTF0("\"%s\" is improper value for -t, should be a "
                  "number between 1 and %d(MAX_TPP)\n",
                  optarg, MAX_TPP);
          usage();
        }
        break;
      case 'i': /* # of iterations */
        iters = atoi(optarg);
        if (iters < 1) {
          PRINTF0("\"%s\" is improper value for -t, should be a "
                  "number equal or larger than 1\n", optarg);
          usage();
        }
        break;
      case 's': /* # of elements in the array */
        asize = atoi(optarg);
        if (iters < 1) {
          PRINTF0("\"%s\" is improper value for -s, should be a "
                  "number equal or larger than 1\n", optarg);
          usage();
        }
        break;
      case 'd':
        delay = atoi(optarg);
        break; /* delay before start */
      case 'h':
        usage();
        break; /* print usage info */
    }
  }
#ifdef NOTHREADS
  tpp = 1;
  PRINTF0("Warning: NOTHREADS debug symbol is set -- running w/o threads\n");
#endif
  th_size = size * tpp;
  PRINTF0("\nTest of multi-threaded capabilities:\n"
          "%d threads per process (%d threads total),\n"
          "%d array elements of size %d,\n"
          "%d iteration(s)\n\n", tpp, th_size, asize, sizeof(atype_t), iters);
  if (delay) {
    printf("%d: %d\n", rank, getpid());
    fflush(stdout);
    sleep(delay);
    ARMCI_Barrier();
  }
  TH_INIT(size, tpp);
  for (i = 0; i < tpp; i++) {
    th_rank[i] = rank * tpp + i;
  }

#if defined(DEBUG) && defined(LOG2FILE)
  for (i = 0; i < tpp; i++) {
    fname[10] = '0' + th_rank[i] / 100;
    fname[11] = '0' + th_rank[i] % 100 / 10;
    fname[12] = '0' + th_rank[i] % 10;
    dbg[i] = fopen(fname, "w");
  }
#endif
  for (i = 0; i < tpp; i++) {
    prndbg(i, "proc %d, thread %d(%d):\n", rank, i, th_rank[i]);
  }

  /* set global seed (to ensure same random sequence across procs) */
  time_seed = (unsigned)time(NULL);
  armci_msg_brdcst(&time_seed, sizeof(time_seed), 0);
  srand(time_seed);
  rand();
  prndbg(0, "seed = %u\n", time_seed);
  /* random pairs */
  pairs = calloc(th_size, sizeof(int));
  for (i = 0; i < th_size; i++) {
    pairs[i] = -1;
  }
  for (i = 0; i < th_size; i++) {
    if (pairs[i] != -1) {
      continue;
    }
    r = RND(0, th_size);
    while (i == r || pairs[r] != -1) {
      r = RND(0, th_size);
    }
    pairs[i] = r;
    pairs[r] = i;
  }
  for (i = 0, cbufl = 0; i < th_size; i++)
    cbufl += sprintf(cbuf + cbufl, " %d->%d|%d->%d",
                     i, pairs[i], pairs[i], pairs[pairs[i]]);
  prndbg(0, "random pairs:%s\n", cbuf);
  /* random targets */
  rnd_tgts = calloc(th_size, sizeof(int));
  for (i = 0, cbufl = 0; i < th_size; i++) {
    rnd_tgts[i] = RND(0, th_size);
    if (rnd_tgts[i] == i) {
      i--;
      continue;
    }
    cbufl += sprintf(cbuf + cbufl, " %d", rnd_tgts[i]);
  }
  prndbg(0, "random targets:%s\n", cbuf);
  /* random one */
  rnd_one = RND(0, th_size);
  prndbg(0, "random one = %d\n", rnd_one);

  assert(ptrs1 = calloc(th_size, sizeof(void *)));
  assert(ptrs2 = calloc(th_size, sizeof(void *)));
#ifdef NOTHREADS
  thread_main((void *)(long)0);
#else
  for (i = 0; i < tpp; i++) {
    THREAD_CREATE(threads + i, thread_main, (void *)(long)i);
  }
  for (i = 0; i < tpp; i++) {
    THREAD_JOIN(threads[i], NULL);
  }
#endif

  ARMCI_Barrier();
  PRINTF0("Tests Completed\n");

  /* clean up */
#if defined(DEBUG) && defined(LOG2FILE)
  for (i = 0; i < tpp; i++) {
    fclose(dbg[i]);
  }
#endif
  ARMCI_Finalize();
  TH_FINALIZE();
  armci_msg_finalize();

  return 0;
}
コード例 #19
0
ファイル: ARMCI_Bcast.c プロジェクト: arnolda/scafacos
int main(int argc, char **argv)
{
    int i, j, rank, nranks, msgsize;
    int *buffer;
    int provided;
    char op = '+';

    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    ARMCI_Init_args(&argc, &argv);

    ARMCI_Barrier();

    buffer = (int *) malloc(MAX_MSG_SIZE);

    for(i=0; i<MAX_MSG_SIZE/sizeof(int); i++)
    {
       if(rank == 0) 
          buffer[i] = (2<<20 - 1);
       else
          buffer[i] = 0;
    }

    if(rank == 0)
    {
      printf("Testing functionality of ARMCI_Bcast \n");
      fflush(stdout);
    } 

    for(msgsize=sizeof(int); msgsize<=MAX_MSG_SIZE; msgsize*=2)
    {
       armci_msg_bcast(buffer, msgsize, 0); 

       for(i=0; i<msgsize/sizeof(int); i++) 
       {
          if(buffer[i] != (2<<20 - 1))
          {
             printf("[%d] Validation failed for msg size: %d at index: %d expected: %d actual: %d \n",
                     rank, msgsize, i, (2<<20 - 1), buffer[i]);
             fflush(stdout);
             exit(-1);
          }  
       }

       for(i=0; i<MAX_MSG_SIZE/sizeof(int); i++)
       {
          if(rank == 0)
             buffer[i] = (2<<20 - 1);
          else
             buffer[i] = 0;
       }

       ARMCI_Barrier();

       if(rank == 0)
       {
         printf("Validation successful for msg size: %d\n", msgsize);
         fflush(stdout);
       }
    }

    free(buffer);

    ARMCI_Finalize();

    MPI_Finalize();

    return 0;
}
コード例 #20
0
ファイル: ARMCI_PutS_bw.c プロジェクト: arnolda/scafacos
int main(int argc, char *argv[])
{

    int i, j, rank, nranks, msgsize, dest;
    int dim, iterations;
    long bufsize;
    double **buffer;
    double t_start, t_stop, t_total, d_total, bw;
    int count[2], src_stride, trg_stride, stride_level;
    int provided;
    armci_hdl_t handle;

    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    ARMCI_Init_args(&argc, &argv);

    bufsize = MAX_DIM * MAX_DIM * sizeof(double);
    buffer = (double **) malloc(sizeof(double *) * nranks);
    ARMCI_Malloc((void **) buffer, bufsize);

    for (i = 0; i < bufsize / sizeof(double); i++)
    {
        *(buffer[rank] + i) = 1.0 + rank;
    }

    ARMCI_INIT_HANDLE(&handle);
    ARMCI_SET_AGGREGATE_HANDLE(&handle);

    ARMCI_Barrier();

    if (rank == 0)
    {
        printf("ARMCI_PutS Bandwidth in MBPS \n");
        printf("%30s %22s \n", "Dimensions(array of doubles)", "Latency");
        fflush(stdout);

        dest = 1;

        src_stride = MAX_DIM * sizeof(double);
        trg_stride = MAX_DIM * sizeof(double);
        stride_level = 1;

        for (dim = 1; dim <= MAX_DIM; dim *= 2)
        {

            count[0] = dim*sizeof(double);
            count[1] = dim;
 
            iterations = 10*(MAX_DIM * MAX_DIM)/(dim * dim);

                t_start = MPI_Wtime();

                for (i = 0; i < iterations; i++)
                {

                    ARMCI_NbPutS((void *) buffer[rank],
                                  &src_stride,
                                  (void *) buffer[dest],
                                  &trg_stride,
                                  count,
                                  stride_level,
                                  dest,
                                  &handle);

                }
                ARMCI_Wait(&handle);
                t_stop = MPI_Wtime();
                ARMCI_Fence(1);

                char temp[10];
                sprintf(temp, "%dX%d", dim, dim);
                t_total = t_stop - t_start;
                d_total = (dim*dim*sizeof(double)*iterations)/(1024*1024);
                bw = d_total/t_total;
                printf("%30s %20.2f \n", temp, bw);
                fflush(stdout);

        }

    }

    ARMCI_Barrier();

    ARMCI_UNSET_AGGREGATE_HANDLE(&handle);

    ARMCI_Free((void *) buffer[rank]);

    ARMCI_Finalize();

    MPI_Finalize();

    return 0;

}
コード例 #21
0
int main(int argc, char **argv) {
    int i, j, rank, nranks, peer, bufsize, errors, total_errors;
    double **buf_bvec, **src_bvec, *src_buf;
    int count[2], src_stride, trg_stride, stride_level;
    double scaling, time;

    MPI_Init(&argc, &argv);
    ARMCI_Init();

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    buf_bvec = (double **) malloc(sizeof(double *) * nranks);
    src_bvec = (double **) malloc(sizeof(double *) * nranks);

    bufsize = XDIM * YDIM * sizeof(double);
    ARMCI_Malloc((void **) buf_bvec, bufsize);
    ARMCI_Malloc((void **) src_bvec, bufsize);
    src_buf = src_bvec[rank];

    if (rank == 0)
        printf("ARMCI Strided DLA Accumulate Test:\n");

    ARMCI_Access_begin(buf_bvec[rank]);
    ARMCI_Access_begin(src_buf);

    for (i = 0; i < XDIM*YDIM; i++) {
        *(buf_bvec[rank] + i) = 1.0 + rank;
        *(src_buf + i) = 1.0 + rank;
    }

    ARMCI_Access_end(src_buf);
    ARMCI_Access_end(buf_bvec[rank]);

    scaling = 2.0;

    src_stride = XDIM * sizeof(double);
    trg_stride = XDIM * sizeof(double);
    stride_level = 1;

    count[1] = YDIM;
    count[0] = XDIM * sizeof(double);

    ARMCI_Barrier();
    time = MPI_Wtime();

    peer = (rank+1) % nranks;

    for (i = 0; i < ITERATIONS; i++) {

      ARMCI_AccS(ARMCI_ACC_DBL,
          (void *) &scaling,
          src_buf,
          &src_stride,
          (void *) buf_bvec[peer],
          &trg_stride,
          count,
          stride_level,
          peer);
    }

    ARMCI_Barrier();
    time = MPI_Wtime() - time;

    if (rank == 0) printf("Time: %f sec\n", time);

    ARMCI_Access_begin(buf_bvec[rank]);
    for (i = errors = 0; i < XDIM; i++) {
      for (j = 0; j < YDIM; j++) {
        const double actual   = *(buf_bvec[rank] + i + j*XDIM);
        const double expected = (1.0 + rank) + scaling * (1.0 + ((rank+nranks-1)%nranks)) * (ITERATIONS);
        if (actual - expected > 1e-10) {
          printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n",
              rank, j, i, expected, actual);
          errors++;
          fflush(stdout);
        }
      }
    }
    ARMCI_Access_end(buf_bvec[rank]);

    MPI_Allreduce(&errors, &total_errors, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

    ARMCI_Free((void *) buf_bvec[rank]);
    ARMCI_Free((void *) src_bvec[rank]);

    free(buf_bvec);
    free(src_bvec);

    ARMCI_Finalize();
    MPI_Finalize();

    if (total_errors == 0) {
      if (rank == 0) printf("Success.\n");
      return 0;
    } else {
      if (rank == 0) printf("Fail.\n");
      return 1;
    }
}
コード例 #22
0
ファイル: ARMCI_Get_bw.c プロジェクト: arnolda/scafacos
int main(int argc, char *argv[])
{

    int rank, nranks;
    size_t i, msgsize, dest;
    size_t iterations, max_msgsize;
    int bufsize;
    double **buffer;
    double t_start, t_stop, t_total, d_total;
    double expected, bandwidth;
    int provided;
    armci_hdl_t handle;

    MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    max_msgsize = MAX_MSGSIZE;
    ARMCI_Init_args(&argc, &argv);

    bufsize = max_msgsize * ITERATIONS;
    buffer = (double **) malloc(sizeof(double *) * nranks);
    ARMCI_Malloc((void **) buffer, bufsize);

    for (i = 0; i < bufsize / sizeof(double); i++)
    {
        *(buffer[rank] + i) = 1.0 + rank;
    }

    ARMCI_INIT_HANDLE(&handle);
    ARMCI_SET_AGGREGATE_HANDLE(&handle);

    ARMCI_Barrier();

    if (rank == 0)
    {

        printf("ARMCI_Get Bandwidth in MBPS \n");
        printf("%20s %22s \n", "Message Size", "Bandwidth");
        fflush(stdout);

        dest = 1;
        expected = 1 + dest;

        for (msgsize = sizeof(double); msgsize <= max_msgsize; msgsize *= 2)
        {

            iterations = bufsize/msgsize;

            t_start = MPI_Wtime();

            for (i = 0; i < iterations; i++)
            {

                ARMCI_NbGet((void *) ((size_t) buffer[dest] + (size_t)(i
                        * msgsize)), (void *) ((size_t) buffer[rank]
                        + (size_t)(i * msgsize)), msgsize, dest, &handle);
            }

            ARMCI_Wait(&handle);

            t_stop = MPI_Wtime();
            d_total = (iterations * msgsize) / (1024 * 1024);
            t_total = t_stop - t_start;
            bandwidth = d_total / t_total;
            printf("%20d %20.4lf \n", msgsize, bandwidth);
            fflush(stdout);

#ifdef DATA_VALIDATION 
            {
                for(j=0; j<((iterations*msgsize)/sizeof(double)); j++)
                {
                    if(*(buffer[rank] + j) != expected)
                    {
                        printf("Data validation failed At displacement : %d Expected : %lf Actual : %lf \n",
                                j, expected, *(buffer[rank] + j));
                        fflush(stdout);
                        return -1;
                    }
                }

                for(j=0; j<bufsize/sizeof(double); j++)
                {
                    *(buffer[rank] + j) = 1.0 + rank;
                }
            }
#endif

        }

    }

    ARMCI_Barrier();

    ARMCI_UNSET_AGGREGATE_HANDLE(&handle);

    ARMCI_Free((void *) buffer[rank]);

    ARMCI_Finalize();

    MPI_Finalize();

    return 0;
}
コード例 #23
0
ファイル: perf_aggr.c プロジェクト: dmlb2000/nwchem-cml
void test_aggregate(int dryrun)
{

  int i, j, rc, bytes, elems[2] = {MAXPROC, MAXELEMS};
  double *ddst_put[MAXPROC];
  double *ddst_get[MAXPROC];
  double *dsrc[MAXPROC];
  armci_hdl_t aggr_hdl_put[MAXPROC];
  armci_hdl_t aggr_hdl_get[MAXPROC];
  armci_hdl_t hdl_put[MAXELEMS];
  armci_hdl_t hdl_get[MAXELEMS];
  armci_giov_t darr;
  void *src_ptr[MAX_REQUESTS], *dst_ptr[MAX_REQUESTS];
  int start = 0, end = 0;
  double start_time;

  create_array(ddst_put, 2, elems);
  create_array(ddst_get, 2, elems);
  create_array(dsrc, 1, &elems[1]);

  for (i = 0; i < elems[1]; i++) {
    dsrc[me][i] = i * 1.001 * (me + 1);
  }
  for (i = 0; i < elems[0]*elems[1]; i++) {
    ddst_put[me][i] = 0.0;
    ddst_get[me][i] = 0.0;
  }

  ARMCI_Barrier();

  /* only proc 0 does the work */
  if (me == 0) {
    if (!dryrun) {
      printf("Transferring %d doubles (Not an array of %d doubles)\n", MAXELEMS, MAXELEMS);
    }

    /* initializing non-blocking handles */
    for (i = 0; i < elems[1]; i++) {
      ARMCI_INIT_HANDLE(&hdl_put[i]);
    }
    for (i = 0; i < elems[1]; i++) {
      ARMCI_INIT_HANDLE(&hdl_get[i]);
    }

    /* aggregate handles */
    for (i = 0; i < nproc; i++) {
      ARMCI_INIT_HANDLE(&aggr_hdl_put[i]);
    }
    for (i = 0; i < nproc; i++) {
      ARMCI_INIT_HANDLE(&aggr_hdl_get[i]);
    }
    for (i = 0; i < nproc; i++) {
      ARMCI_SET_AGGREGATE_HANDLE(&aggr_hdl_put[i]);
    }
    for (i = 0; i < nproc; i++) {
      ARMCI_SET_AGGREGATE_HANDLE(&aggr_hdl_get[i]);
    }

    bytes = sizeof(double);

    /* **************** PUT **************** */
    /* register put */
    start_time = armci_timer();
    start = 0;
    end = elems[1];
    for (i = 1; i < nproc; i++) {
      for (j = start; j < end; j++) {
        ARMCI_NbPutValueDouble(dsrc[me][j], &ddst_put[i][me*elems[1] + j], i,
                               &hdl_put[j]);
      }
      for (j = start; j < end; j++) {
        ARMCI_Wait(&hdl_put[j]);
      }
    }
    if (!dryrun) {
      printf("%d: Value Put time      = %.2es\n", me, armci_timer() - start_time);
    }

    /* vector put */
    start_time = armci_timer();
    for (i = 1; i < nproc; i++) {
      for (j = start; j < end; j++) {
        src_ptr[j] = (void *)&dsrc[me][j];
        dst_ptr[j] = (void *)&ddst_put[i][me*elems[1] + j];
      }
      darr.src_ptr_array = src_ptr;
      darr.dst_ptr_array = dst_ptr;
      darr.bytes = sizeof(double);
      darr.ptr_array_len = elems[1];
      if ((rc = ARMCI_NbPutV(&darr, 1, i, &hdl_put[i]))) {
        ARMCI_Error("armci_nbputv failed\n", rc);
      }
    }
    for (i = 1; i < nproc; i++) {
      ARMCI_Wait(&hdl_put[i]);
    }
    if (!dryrun) {
      printf("%d: Vector Put time     = %.2es\n", me, armci_timer() - start_time);
    }

    /* regular put */
    start_time = armci_timer();
    for (i = 1; i < nproc; i++) {
      for (j = start; j < end; j++) {
        if ((rc = ARMCI_NbPut(&dsrc[me][j], &ddst_put[i][me*elems[1] + j], bytes,
                              i, &hdl_put[j]))) {
          ARMCI_Error("armci_nbput failed\n", rc);
        }
      }
      for (j = start; j < end; j++) {
        ARMCI_Wait(&hdl_put[j]);
      }
    }
    if (!dryrun) {
      printf("%d: Regular Put time    = %.2es\n", me, armci_timer() - start_time);
    }

    /* aggregate put */
    start_time = armci_timer();
    for (i = 1; i < nproc; i++) {
      for (j = start; j < end; j++) {
        if ((rc = ARMCI_NbPut(&dsrc[me][j], &ddst_put[i][me*elems[1] + j], bytes,
                              i,  &aggr_hdl_put[i]))) {
          ARMCI_Error("armci_nbput failed\n", rc);
        }
      }
    }
    for (i = 1; i < nproc; i++) {
      ARMCI_Wait(&aggr_hdl_put[i]);
    }
    if (!dryrun) {
      printf("%d: Aggregate Put time  = %.2es\n\n", me, armci_timer() - start_time);
    }


    /* **************** GET **************** */

    /* vector get */
    start_time = armci_timer();
    for (i = 1; i < nproc; i++) {
      for (j = start; j < end; j++) {
        src_ptr[j] = (void *)&dsrc[i][j];
        dst_ptr[j] = (void *)&ddst_get[me][i*elems[1] + j];
      }
      darr.src_ptr_array = src_ptr;
      darr.dst_ptr_array = dst_ptr;
      darr.bytes = sizeof(double);
      darr.ptr_array_len = elems[1];
      if ((rc = ARMCI_NbGetV(&darr, 1, i, &hdl_get[i]))) {
        ARMCI_Error("armci_nbgetv failed\n", rc);
      }
      ARMCI_Wait(&hdl_get[i]);
    }
    if (!dryrun) {
      printf("%d: Vector Get time     = %.2es\n", me, armci_timer() - start_time);
    }

    /* regular get */
    start_time = armci_timer();
    for (i = 1; i < nproc; i++) {
      for (j = start; j < end; j++) {
        if ((rc = ARMCI_NbGet(&dsrc[i][j], &ddst_get[me][i*elems[1] + j], bytes,
                              i, &hdl_get[j]))) {
          ARMCI_Error("armci_nbget failed\n", rc);
        }
      }
      for (j = start; j < end; j++) {
        ARMCI_Wait(&hdl_get[j]);
      }
    }
    if (!dryrun) {
      printf("%d: Regular Get time    = %.2es\n", me, armci_timer() - start_time);
    }

    /* aggregate get */
    start_time = armci_timer();
    for (i = 1; i < nproc; i++) {
      for (j = start; j < end; j++) {
        ARMCI_NbGet(&dsrc[i][j], &ddst_get[me][i*elems[1] + j], bytes,
                    i, &aggr_hdl_get[i]);
      }
    }
    for (i = 1; i < nproc; i++) {
      ARMCI_Wait(&aggr_hdl_get[i]);
    }
    if (!dryrun) {
      printf("%d: Aggregate Get time  = %.2es\n", me, armci_timer() - start_time);
    }
  }

  ARMCI_Barrier();
  ARMCI_AllFence();
  ARMCI_Barrier();

  /* Verify */
  if (!(me == 0))
    for (j = 0; j < elems[1]; j++) {
      if (ARMCI_ABS(ddst_put[me][j] - j * 1.001) > 0.1) {
        ARMCI_Error("aggregate put failed...1", 0);
      }
    }
  ARMCI_Barrier();
  if (!dryrun)if (me == 0) {
      printf("\n  aggregate put ..O.K.\n");
    }
  fflush(stdout);

  if (me == 0) {
    for (i = 1; i < nproc; i++) {
      for (j = 0; j < elems[1]; j++) {
        if (ARMCI_ABS(ddst_get[me][i*elems[1] + j] - j * 1.001 *(i + 1)) > 0.1) {
          ARMCI_Error("aggregate get failed...1", 0);
        }
      }
    }
  }
  ARMCI_Barrier();
  if (!dryrun)if (me == 0) {
      printf("  aggregate get ..O.K.\n");
    }
  fflush(stdout);


  ARMCI_AllFence();
  ARMCI_Barrier();

  if (!dryrun)if (me == 0) {
      printf("O.K.\n");
      fflush(stdout);
    }
  destroy_array(ddst_put);
  destroy_array(ddst_get);
  destroy_array(dsrc);
}
コード例 #24
0
ファイル: perf2.c プロジェクト: jeffhammond/ga
static void contig_test(size_t buffer_size, int op)
{
    void **dst_ptr;
    void **put_buf;
    void **get_buf;
    double *times;

    dst_ptr = (void*)malloc(nproc * sizeof(void*));
    put_buf = (void*)malloc(nproc * sizeof(void*));
    get_buf = (void*)malloc(nproc * sizeof(void*));
    times = (double*)malloc(nproc * sizeof(double));
    ARMCI_Malloc(dst_ptr, buffer_size);
    ARMCI_Malloc(put_buf, buffer_size);
    ARMCI_Malloc(get_buf, buffer_size);

    /* initialize what we're putting */
    fill_array((double*)put_buf[me], buffer_size/sizeof(double), me);

    size_t msg_size;

    int dst = 1;
    double scale = 1.0;
    for (msg_size = 16; msg_size <= buffer_size; msg_size *= 2) {

        int j;
        int iter = msg_size > MEDIUM_MESSAGE_SIZE ? ITER_LARGE : ITER_SMALL;

        double t_start, t_end;
        if (0 == me) {
            for (j= 0; j < iter + WARMUP; ++j) {

                if (WARMUP == j) {
                    t_start = dclock();
                }

                switch (op) {
                    case PUT:
                        ARMCI_Put(put_buf[me], dst_ptr[dst], msg_size,
                                dst);
                        break;
                    case GET:
                        ARMCI_Get(dst_ptr[dst], get_buf[me], msg_size,
                                dst);
                        break;
                    case ACC:
                        ARMCI_Acc(ARMCI_ACC_DBL, &scale, 
                                put_buf[me], dst_ptr[dst], msg_size,
                                dst);
                        break;
                    default:
                        ARMCI_Error("oops", 1);
                }

            }
        }
        /* calculate total time and average time */
        t_end = dclock();
        ARMCI_Barrier();


        if (0 == me) {
            printf("%8zu\t\t%6.2f\t\t%10.2f\n",
                    msg_size,
                    ((t_end  - t_start))/iter,
                    msg_size*iter/((t_end - t_start)));
        }
    }
    ARMCI_Free(dst_ptr[me]);
    ARMCI_Free(put_buf[me]);
    ARMCI_Free(get_buf[me]);
    free(dst_ptr);
    free(put_buf);
    free(get_buf);
    free(times);
}
コード例 #25
0
ファイル: test_puts.c プロジェクト: abhinavvishnu/matex
int main(int argc, char **argv) {
    int i, j, rank, nranks, peer, bufsize, errors;
    double **buffer, *src_buf;
    int count[2], src_stride, trg_stride, stride_level;

    MPI_Init(&argc, &argv);
    ARMCI_Init();

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    buffer = (double **) malloc(sizeof(double *) * nranks);

    bufsize = XDIM * YDIM * sizeof(double);
    ARMCI_Malloc((void **) buffer, bufsize);
    src_buf = ARMCI_Malloc_local(bufsize);

    if (rank == 0)
        printf("ARMCI Strided Put Test:\n");

    src_stride = XDIM * sizeof(double);
    trg_stride = XDIM * sizeof(double);
    stride_level = 1;

    count[1] = YDIM;
    count[0] = XDIM * sizeof(double);

    ARMCI_Barrier();

    peer = (rank+1) % nranks;

    for (i = 0; i < ITERATIONS; i++) {

      for (j = 0; j < XDIM*YDIM; j++) {
        *(src_buf + j) = rank + i;
      }

      ARMCI_PutS(
          src_buf,
          &src_stride,
          (void *) buffer[peer],
          &trg_stride,
          count,
          stride_level,
          peer);
    }

    ARMCI_Barrier();

    ARMCI_Access_begin(buffer[rank]);
    for (i = errors = 0; i < XDIM; i++) {
      for (j = 0; j < YDIM; j++) {
        const double actual   = *(buffer[rank] + i + j*XDIM);
        const double expected = (1.0 + rank) + (1.0 + ((rank+nranks-1)%nranks)) + (ITERATIONS);
        if (actual - expected > 1e-10) {
          printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n",
              rank, j, i, expected, actual);
          errors++;
          fflush(stdout);
        }
      }
    }
    ARMCI_Access_end(buffer[rank]);

    ARMCI_Free((void *) buffer[rank]);
    ARMCI_Free_local(src_buf);
    free(buffer);

    ARMCI_Finalize();
    MPI_Finalize();

    if (errors == 0) {
      printf("%d: Success\n", rank);
      return 0;
    } else {
      printf("%d: Fail\n", rank);
      return 1;
    }
}
コード例 #26
0
ファイル: fttest.c プロジェクト: dmlb2000/nwchem-cml
int main(int argc, char *argv[])
{
  int rc, i, j = 0, rid, ret;
  armci_ckpt_ds_t ckptds;
  ARMCI_Group grp;

  ARMCI_Init_args(&argc, &argv);
  nproc = armci_msg_nproc();
  me = armci_msg_me();

  if (me == 0) {
    if (nproc > MAXPROCS) {
      ARMCI_Error("nproc > MAXPROCS", nproc);
    }
    else {
      printf("ARMCI test program (%d processes)\n", nproc);
      fflush(stdout);
      sleep(1);
    }

  }
  armci_init_checkpoint2();
  ARMCI_Group_get_world(&grp);
  size = SIZE_;
  rc = ARMCI_Malloc((void **)ptr_arr, size * 8);
  printf("ARMCI test program (%d processes)\n", nproc);
  fflush(stdout);
  for (size = 1; size <= SIZE_; size *= 2) {
    t1 = MPI_Wtime();
    for (i = 0; i < 5; i++) {
      for (rc = 0; rc < 15; rc++) {
        do_work(size);
      }
    }
    time_array[j++] = MPI_Wtime() - t1;
    ARMCI_Barrier();
    printf("%d:done for size %ld\n", me, size);
    fflush(stdout);
  }

  (void)ARMCI_Ckpt_create_ds(&ckptds, 1);
  ckptds.ptr_arr[0] = ptr_arr[me];
  ckptds.sz[0] = SIZE_ * 8;
  rid = ARMCI_Ckpt_init(NULL, &grp, 1, 0, &ckptds);
  printf("%d: After ARMCI_Ckpt_init(): \n", me);

  j = 0;
  for (size = 128; size <= SIZE_; size *= 2) {

    int rc;
    int simulate_restart = 1;
    t1 = MPI_Wtime();

    ret = ARMCI_Ckpt(rid);
    if (ret == ARMCI_CKPT) {
      printf("%d: Performed CHECKPOINT @ size=%ld\n", me, size);
    }
    else if (ret == ARMCI_RESTART) {
      simulate_restart = 0;
      printf("%d: Performed RESTART @ size=%ld\n", me, size);
    }

    for (i = 0; i < 5; i++) {
      for (rc = 0; rc < 15; rc++)
        if (i == 3 && rc == 10) {
        }
      do_work(size);
    }

    time_array1[j++] = MPI_Wtime() - t1;
    sleep(1);

    if (simulate_restart && size == FAILURE_SIZE_) {
      printf("%d: Simulating FAILURE @ size = %d\n", me, size);
      ARMCI_Restart_simulate(rid, 1);
    }

    printf("%d: DONE for size=%ld regular=%f withckpt=%f\n\n",
           me, size, time_array[j-1], time_array1[j-1]);
    fflush(stdout);

  }

  ARMCI_Ckpt_finalize(rid);

  printf("Before Finalize()\n");
  ARMCI_Barrier();
  ARMCI_Finalize();
  armci_msg_finalize();
  return(0);
}