int main(void)
{
   long *dest;
   int me, npes;
   long swapped_val, new_val;

   shmem_init();
   me = shmem_my_pe();
   npes = shmem_n_pes();
   dest = (long *) shmem_malloc(sizeof (*dest));
   *dest = me;
   shmem_barrier_all();
   new_val = me;
   if (me & 1){
      swapped_val = shmem_swap(dest, new_val, (me + 1) % npes);
      printf("%d: dest = %ld, swapped = %ld\n", me, *dest, swapped_val);
   }
   shmem_free(dest);
   return 0;
}
Пример #2
0
int main(int argc, char **argv)
{
  int i,j;
  short     oldjs, oldxs, my_pes;
  int       oldji, oldxi, my_pei;
  long      oldjl, oldxl, my_pel;
  long long oldjll,oldxll,my_pell;
  float     oldjf, oldxf, my_pef;
  double    oldjd, oldxd, my_ped;
  int my_pe,n_pes;
  size_t max_elements,max_elements_bytes;
  static short *xs;
  static int   *xi;
  static long  *xl;
  static long long *xll;
  static float  *xf;
  static double *xd;

  shmem_init();
  my_pe = shmem_my_pe();
  n_pes = shmem_n_pes();
  my_pes = (short) my_pe;
  my_pei = (int)  my_pe;
  my_pel = (long) my_pe;
  my_pell = (long long) my_pe;
  my_pef = (float) my_pe;
  my_ped = (double) my_pe;
#ifdef HAVE_SET_CACHE_INV
  shmem_set_cache_inv();
#endif

/*  fail if trying to use only one processor  */
  if ( n_pes  <= 1 ){
        fprintf(stderr, "FAIL - test requires at least two PEs\n");
        exit(1);
  }

  if(my_pe == 0)
    fprintf(stderr, "shmem_swap(%s) n_pes=%d\n", argv[0],n_pes);

/*  test shmem_short_swap  */

  /*  shmalloc xs on all pes (only check the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(short) * n_pes);
  xs = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xs[i] = 0;
  shmem_barrier_all();

  oldjs = 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_pes = my_pes + (short) 1;
      /* record PE value in xs[my_pe] -- save PE number */
      oldxs = shmem_short_swap(&xs[my_pe], my_pes, 0);
      /* printf("PE=%d,i=%d,my_pes=%d,oldxs=%d\n",my_pe,i,my_pes,oldxs); */
      if (oldxs != oldjs)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxs = %d expected = %d\n",
                         my_pe, n_pes, i, oldxs, oldjs);
      oldjs = my_pes;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xs[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xs[%d]=%d,i=%d\n",j,j,xs[j],i); */
      if (xs[j] != (short) i)
        fprintf(stderr, "FAIL PE %d of %d: xs[%d] = %d expected = %d\n",
                         my_pe, n_pes, j, xs[j],i);
      i++;
    }
  }
  shmem_free(xs);

/*  test shmem_int_swap  */

  /*  shmalloc xi on all pes (only check the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(int) * n_pes);
  xi = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xi[i] = 0;
  shmem_barrier_all();

  oldji = 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_pei = my_pei + (int) 1;
      /* record PE value in xi[my_pe] -- save PE number */
      oldxi = shmem_int_swap(&xi[my_pe], my_pei, 0);
      /* printf("PE=%d,i=%d,my_pei=%d,oldxi=%d\n",my_pe,i,my_pei,oldxi); */
      if (oldxi != oldji)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxi = %d expected = %d\n",
                         my_pe, n_pes, i, oldxi, oldji);
      oldji = my_pei;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xi[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xi[%d]=%d,i=%d\n",j,j,xi[j],i); */
      if (xi[j] != i)
        fprintf(stderr, "FAIL PE %d of %d: xi[%d] = %d expected = %d\n",
                         my_pe, n_pes, j, xi[j],i);
      i++;
    }
  }
  shmem_free(xi);

/*  test shmem_long_swap  */

  /*  shmalloc xl on all pes (only check the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(long) * n_pes);
  xl = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xl[i] = 0;
  shmem_barrier_all();

  oldjl = 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_pel = my_pel + (long) 1;
      /* record PE value in xl[my_pe] -- save PE number */
      oldxl = shmem_long_swap(&xl[my_pe], my_pel, 0);
      /* printf("PE=%d,i=%d,my_pel=%d,oldxl=%d\n",my_pe,i,my_pel,oldxl); */
      if (oldxl != oldjl)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxl = %d expected = %d\n",
                         my_pe, n_pes, i, oldxl, oldjl);
      oldjl = my_pel;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xl[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xl[%d]=%d,i=%d\n",j,j,xl[j],i); */
      if (xl[j] != (long)i)
        fprintf(stderr, "FAIL PE %d of %d: xl[%d] = %ld expected = %d\n",
                         my_pe, n_pes, j, xl[j],i);
      i++;
    }
  }
  shmem_free(xl);

/*  test shmem_longlong_swap  */

#ifdef HAVE_LONG_LONG

  /*  shmalloc xll on all pes (only check the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(long long) * n_pes);
  xll = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xll[i] = 0;
  shmem_barrier_all();

  oldjll = 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_pell = my_pell + (long long) 1;
      /* record PE value in xll[my_pe] -- save PE number */
      oldxll = shmem_longlong_swap(&xll[my_pe], my_pell, 0);
      /* printf("PE=%d,i=%d,my_pell=%ld,oldxll=%d\n",my_pe,i,my_pell,oldxll); */
      if (oldxll != (long long) oldjll)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxll = %ld expected = %ld\n",
                         my_pe, n_pes, i, oldxll, oldjll);
      oldjll = my_pell;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xll[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xll[%d]=%ld,i=%d\n",j,j,xll[j],i); */
      if (xll[j] != (long long) i)
        fprintf(stderr, "FAIL PE %d of %d: xll[%d] = %d expected = %d\n",
                         my_pe, n_pes, j, xll[j],i);
      i++;
    }
  }
  shmem_free(xll);

#endif

/*  test shmem_float_swap  */

  /*  shmalloc xf on all pes (only use the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(float) * n_pes);
  xf = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xf[i] = (float) 0;
  shmem_barrier_all();

  oldjf = (float) 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_pef = my_pef + (float) 1;
      /* record PE value in xf[my_pe] -- save PE number */
      oldxf = shmem_float_swap(&xf[my_pe], my_pef, 0);
      /* printf("PE=%d,i=%d,my_pef=%10.2f,oldxf=%10.2f\n",my_pe,i,my_pef,oldxf); */
      if (oldxf != oldjf)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxf = %10.2f expected = %10.2f\n",
                         my_pe, n_pes, i, oldxf, oldjf);
      oldjf = my_pef;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xs[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xf[%d]=%10.2f,i=%d\n",j,j,xf[j],i); */
      if (xf[j] != (float) i)
        fprintf(stderr, "FAIL PE %d of %d: xf[%d] = %10.2f expected = %10.2f\n",
                         my_pe, n_pes, j-1, xf[j], (float)i);
      i++;
    }
  }
  shmem_free(xf);

/*  test shmem_double_swap  */

  /*  shmalloc xd on all pes (only use the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(double) * n_pes);
  xd = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xd[i] = (double) 0;
  shmem_barrier_all();

  oldjd = (double) 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_ped = my_ped + (double) 1;
      /* record PE value in xd[my_pe] -- save PE number */
      oldxd = shmem_double_swap(&xd[my_pe], my_ped, 0);
      /* printf("PE=%d,i=%d,my_ped=%10.2f,oldxd=%10.2f\n",my_pe,i,my_ped,oldxd);
 */
      if (oldxd != oldjd)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxd = %10.2f expected = %10.2f\n",
                         my_pe, n_pes, i, oldxd, oldjd);
      oldjd = my_ped;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xd[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xd[%d]=%10.2f,i=%d\n",j,j,xd[j],i); */
      if (xd[j] != (double) i)
        fprintf(stderr, "FAIL PE %d of %d: xd[%d] = %10.2f expected = %10.2f\n",
                         my_pe, n_pes, j, xd[j], (double)i);
      i++;
    }
  }
  shmem_free(xd);

#ifdef SHMEM_C_GENERIC_32

/*  test shmem_swap (GENERIC 32)  */

  my_pei = (int)  my_pe;
  /*  shmalloc xi on all pes (only check the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(int) * n_pes);
  xi = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xi[i] = 0;
  shmem_barrier_all();

  oldji = 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_pei = my_pei + (int) 1;
      /* record PE value in xi[my_pe] -- save PE number */
      oldxi = shmem_swap(&xi[my_pe], my_pei, 0);
      /* printf("PE=%d,i=%d,my_pei=%d,oldxi=%d\n",my_pe,i,my_pei,oldxi); */
      if (oldxi != oldji)
        fprintf(stderr, "FAIL pe %d of %d: i=%d, oldxi = %d expected = %d\n",
                         my_pe, n_pes, i, oldxi, oldji);
      oldji = my_pei;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xi[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xi[%d]=%d,i=%d\n",j,j,xi[j],i); */
      if (xi[j] != i)
        fprintf(stderr, "FAIL pe %d of %d: xi[%d] = %d expected = %d\n",
                         my_pe, n_pes, j, xi[j],i);
      i++;
    }
  }
  shmem_free(xi);

#else

/*  test shmem_swap (GENERIC 64)  */

  my_pel = (long) my_pe;
  /*  shmalloc xl on all pes (only check the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(long) * n_pes);
  xl = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xl[i] = 0;
  shmem_barrier_all();

  oldjl = 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_pel = my_pel + (long) 1;
      /* record PE value in xl[my_pe] -- save PE number */
      oldxl = shmem_swap(&xl[my_pe], my_pel, 0);
      /* printf("PE=%d,i=%d,my_pel=%d,oldxl=%d\n",my_pe,i,my_pel,oldxl); */
      if (oldxl != oldjl)
        fprintf(stderr, "FAIL pe %d of %d: i=%d, oldxl = %d expected = %d\n",
                         my_pe, n_pes, i, oldxl, oldjl);
      oldjl = my_pel;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xl[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xl[%d]=%d,i=%d\n",j,j,xl[j],i); */
      if (xl[j] != (long)i)
        fprintf(stderr, "FAIL pe %d of %d: xl[%d] = %ld expected = %d\n",
                         my_pe, n_pes, j, xl[j],i);
      i++;
    }
  }
  shmem_free(xl);

#endif

  shmem_barrier_all();
#ifdef NEEDS_FINALIZE
  shmem_finalize(); 
#endif
  return 0;
}
Пример #3
0
int main(int argc, char **argv)
{
  int i,j;
  short     modjs, oldjs, oldxmodjs, oldxas, my_pes, vals;
  int       modji, oldji, oldxmodji, oldxai, my_pei, vali;
  long      modjl, oldjl, oldxmodjl, oldxal, my_pel, vall;
  long long modjll,oldjll,oldxmodjll,oldxall,my_pell,valll;
  int my_pe,n_pes;
  size_t max_elements,max_elements_bytes;
  static short *xs,*xas;
  static int   *xi,*xai;
  static long  *xl,*xal;
  static long long *xll,*xall;

  shmem_init();
  my_pe = shmem_my_pe();
  n_pes = shmem_n_pes();
  my_pes = (short) my_pe;
  my_pei = (int)  my_pe;
  my_pel = (long) my_pe;
  my_pell = (long long) my_pe;
  vals = 1;  vali = 1;  vall = 1;  valll = 1;
#ifdef HAVE_SET_CACHE_INV
  shmem_set_cache_inv();
#endif

/*  fail if trying to use only one processor  */
  if ( n_pes  <= 1 ){
        fprintf(stderr, "FAIL - test requires at least two PEs\n");
        exit(1);
  }

  if(my_pe == 0)
    fprintf(stderr, "shmem_cswap(%s) n_pes=%d\n", argv[0],n_pes);

/*  test shmem_short_finc & shmem_short_swap & shmem_short_cswap */

  /*  shmalloc xs & xas on all pes (only use the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(short) * n_pes);
  xs = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xs[i] = 0;
  max_elements_bytes = (size_t) (sizeof(short) * n_pes * ITER);
  xas = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes*ITER; i++)
    if (((i/(n_pes-1)) % 2) == 0) {
      xas[i] = 1;
    } else {
      xas[i] = 0;
    }
  count_short = 0;
  shmem_barrier_all();

  for(i=0; i<ITER; i++) {
    if (i == ITER-1) shmem_barrier_all();  /* all PEs participate last time */
    if (my_pe != 0) {
      oldjs = shmem_short_finc(&count_short, 0);  /* get index oldjs from PE 0 */
      modjs = (oldjs % (n_pes-1));  /* PE 0 is just the counter/checker */
        /* conditionally record PE value in xas[oldjs] --
             tells PE involved for each count */
      oldxas = shmem_short_cswap(&xas[oldjs], vals, my_pes, 0);
      /* printf("PE=%d,i=%d,oldjs=%d,oldxas=%d\n",my_pe,i,oldjs,oldxas); */
      if (oldxas == 1) {
          /* record PE value in xs[modjs] */
        oldxmodjs = shmem_short_swap(&xs[modjs], my_pes, 0); 
        /* printf("PE=%d,oldjs=%d,modjs=%d,oldxmodjs=%d\n",
                   my_pe,oldjs,modjs,oldxmodjs); */
      }
      if (oldxas != 0 && oldxas != 1)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxas = %d expected = 0\n",
                         my_pe, n_pes, i, oldxas);
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check last xs[j] array PEs vs saved ans in xas[i] */
    i = (ITER-2)*(n_pes-1);
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xs[%d]=%d,xas[%d]=%d\n",j,j-1,xs[j-1],i,xas[i]); */
      if (xs[j-1] != xas[i])
        fprintf(stderr, "FAIL PE %d of %d: xs[%d] = %d expected = %d\n", 
                         my_pe, n_pes, j-1, xs[j-1], xas[i]);
      i++;
    }
  }
  shmem_free(xs);  shmem_free(xas);

/*  test shmem_int_finc & shmem_int_swap & shmem_int_cswap */

  /*  shmalloc xi & xai on all pes (only use the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(int) * n_pes);
  xi = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xi[i] = 0;
  max_elements_bytes = (size_t) (sizeof(int) * n_pes * ITER);
  xai = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes*ITER; i++)
    if (((i/(n_pes-1)) % 2) == 0) {
      xai[i] = 1;
    } else {
      xai[i] = 0;
    }
  count_int = 0;
  shmem_barrier_all();

  for(i=0; i<ITER; i++) {
    if (i == ITER-1) shmem_barrier_all();  /* all PEs participate last time */
    if (my_pe != 0) {
      oldji = shmem_int_finc(&count_int, 0);  /* get index oldji from PE 0 */
      modji = (oldji % (n_pes-1));  /* PE 0 is just the counter/checker */
        /* conditionally record PE value in xai[oldji] --
             tells PE involved for each count */
      oldxai = shmem_int_cswap(&xai[oldji], vali, my_pei, 0);
      /* printf("PE=%d,i=%d,oldji=%d,oldxai=%d\n",my_pe,i,oldji,oldxai); */
      if (oldxai == 1) {
          /* record PE value in xi[modji] */
        oldxmodji = shmem_int_swap(&xi[modji], my_pei, 0);
        /* printf("PE=%d,oldji=%d,modji=%d,oldxmodji=%d\n",
                   my_pe,oldji,modji,oldxmodji); */
      }
      if (oldxai != 0 && oldxai != 1)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxai = %d expected = 0\n",
                         my_pe, n_pes, i, oldxai);
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check last xi[j] array PEs vs saved ans in xai[i] */
    i = (ITER-2)*(n_pes-1);
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xi[%d]=%d,xai[%d]=%d\n",j,j-1,xi[j-1],i,xai[i]); */
      if (xi[j-1] != xai[i])
        fprintf(stderr, "FAIL PE %d of %d: xi[%d] = %d expected = %d\n",
                         my_pe, n_pes, j-1, xi[j-1], xai[i]);
      i++;
    }
  }
  shmem_free(xi);  shmem_free(xai);

/*  test shmem_long_finc & shmem_long_swap & shmem_long_cswap */

  /*  shmalloc xl & xal on all pes (only use the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(long) * n_pes);
  xl = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xl[i] = 0;
  max_elements_bytes = (size_t) (sizeof(long) * n_pes * ITER);
  xal = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes*ITER; i++)
    if (((i/(n_pes-1)) % 2) == 0) {
      xal[i] = 1;
    } else {
      xal[i] = 0;
    }
  count_long = 0;
  shmem_barrier_all();

  for(i=0; i<ITER; i++) {
    if (i == ITER-1) shmem_barrier_all();  /* all PEs participate last time */
    if (my_pe != 0) {
      oldjl = shmem_long_finc(&count_long, 0);  /* get index oldjl from PE 0 */
      modjl = (oldjl % (n_pes-1));  /* PE 0 is just the counter/checker */
        /* conditionally record PE value in xal[oldjl] --
             tells PE involved for each count */
      oldxal = shmem_long_cswap(&xal[oldjl], vall, my_pel, 0);
      /* printf("PE=%d,i=%d,oldjl=%d,oldxal=%d\n",my_pe,i,oldjl,oldxal); */
      if (oldxal == 1) {
          /* record PE value in xl[modjl] */
      oldxmodjl = shmem_long_swap(&xl[modjl], my_pel, 0);
      /* printf("PE=%d,oldjl=%ld,modjl=%ld,oldxmodjl=%ld\n",
                 my_pe,oldjl,modjl,oldxmodjl); */
      }
      if (oldxal != 0 && oldxal != 1)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxal = %ld expected = 0\n",
                         my_pe, n_pes, i, oldxal);
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check last xl[j] array PEs vs saved ans in xal[i] */
    i = (ITER-2)*(n_pes-1);
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xl[%d]=%ld,xal[%d]=%ld\n",j,j-1,xl[j-1],i,xal[i]); */
      if (xl[j-1] != xal[i])
        fprintf(stderr, "FAIL PE %d of %d: xl[%d] = %ld expected = %ld\n",
                         my_pe, n_pes, j-1, xl[j-1], xal[i]);
      i++;
    }
  }
  shmem_free(xl);  shmem_free(xal);

/*  test shmem_longlong_finc & shmem_longlong_swap & shmem_longlong_cswap */

#ifdef HAVE_LONG_LONG

  /*  shmalloc xll & xall on all pes (only use the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(long long) * n_pes);
  xll = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xll[i] = 0;
  max_elements_bytes = (size_t) (sizeof(long long) * n_pes * ITER);
  xall = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes*ITER; i++)
    if (((i/(n_pes-1)) % 2) == 0) {
      xall[i] = 1;
    } else {
      xall[i] = 0;
    }
  count_longlong = 0;
  shmem_barrier_all();

  for(i=0; i<ITER; i++) {
    if (i == ITER-1) shmem_barrier_all();  /* all PEs participate last time */
    if (my_pe != 0) {
      oldjll = shmem_longlong_finc(&count_longlong, 0);  /* get index oldjll from PE 0 */
      modjll = (oldjll % (n_pes-1));  /* PE 0 is just the counter/checker */
        /* conditionally record PE value in xall[oldjll] --
             tells PE involved for each count */
      oldxall = shmem_longlong_cswap(&xall[oldjll], valll, my_pell, 0);
      /* printf("PE=%d,i=%d,oldjll=%d,oldxall=%d\n",my_pe,i,oldjll,oldxall); */
      if (oldxall == 1) {
          /* record PE value in xll[modjll] */
        oldxmodjll = shmem_longlong_swap(&xll[modjll], my_pell, 0);
        /* printf("PE=%d,oldjll=%ld,modjll=%ld,oldxmodjll=%ld\n",
                   my_pe,oldjll,modjll,oldxmodjll); */
      }
      if (oldxall != 0 && oldxall != 1)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxall = %ld expected = 0\n",
                         my_pe, n_pes, i, oldxall);
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check last xll[j] array PEs vs saved ans in xall[i] */
    i = (ITER-2)*(n_pes-1);
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xll[%d]=%ld,xall[%d]=%ld\n",j,j-1,xll[j-1],i,xall[i]); */
      if (xll[j-1] != xall[i])
        fprintf(stderr, "FAIL PE %d of %d: xll[%d] = %ld expected = %ld\n",
                         my_pe, n_pes, j-1, xll[j-1], xall[i]);
      i++;
    }
  }
  shmem_free(xll);  shmem_free(xall);

#endif

#ifdef SHMEM_C_GENERIC_32

/*  test shmem_finc & shmem_swap & shmem_cswap (GENERIC 32)  */

  /*  shmalloc xi & xai on all pes (only use the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(int) * n_pes);
  xi = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xi[i] = 0;
  max_elements_bytes = (size_t) (sizeof(int) * n_pes * ITER);
  xai = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes*ITER; i++)
    if (((i/(n_pes-1)) % 2) == 0) {
      xai[i] = 1;
    } else {
      xai[i] = 0;
    }
  count_int = 0;
  shmem_barrier_all();

  for(i=0; i<ITER; i++) {
    if (i == ITER-1) shmem_barrier_all();  /* all PEs participate last time */
    if (my_pe != 0) {
      oldji = shmem_finc(&count_int, 0);  /* get index oldji from PE 0 */
      modji = (oldji % (n_pes-1));  /* PE 0 is just the counter/checker */
        /* conditionally record PE value in xai[oldji] --
             tells PE involved for each count */
      oldxai = shmem_cswap(&xai[oldji], vali, my_pei, 0);
      /* printf("PE=%d,i=%d,oldji=%d,oldxai=%d\n",my_pe,i,oldji,oldxai); */
      if (oldxai == 1) {
            /* record PE value in xi[modji] */
        oldxmodji = shmem_swap(&xi[modji], my_pei, 0);
        /* printf("PE=%d,oldji=%d,modji=%d,oldxmodji=%d\n",
                   my_pe,oldji,modji,oldxmodji); */
      }
      if (oldxai != 0 && oldxai != 1)
        fprintf(stderr, "FAIL pe %d of %d: i=%d, oldxai = %d expected = 0\n",
                         my_pe, n_pes, i, oldxai);
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check last xi[j] array PEs vs saved ans in xai[i] */
    i = (ITER-2)*(n_pes-1);
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xi[%d]=%d,xai[%d]=%d\n",j,j-1,xi[j-1],i,xai[i]); */
      if (xi[j-1] != xai[i])
        fprintf(stderr, "FAIL pe %d of %d: xi[%d] = %d expected = %d\n",
                         my_pe, n_pes, j-1, xi[j-1], xai[i]);
      i++;
    }
  }
  shmem_free(xi);  shmem_free(xai);

#else

/*  test shmem_finc & shmem_swap & shmem_cswap (GENERIC 64)  */

  /*  shmalloc xl & xal on all pes (only use the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(long) * n_pes);
  xl = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xl[i] = 0;
  max_elements_bytes = (size_t) (sizeof(long) * n_pes * ITER);
  xal = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes*ITER; i++)
    if (((i/(n_pes-1)) % 2) == 0) {
      xal[i] = 1;
    } else {
      xal[i] = 0;
    }
  count_long = 0;
  shmem_barrier_all();

  for(i=0; i<ITER; i++) {
    if (i == ITER-1) shmem_barrier_all();  /* all PEs participate last time */
    if (my_pe != 0) {
      oldjl = shmem_finc(&count_long, 0);  /* get index oldjl from PE 0 */
      modjl = (oldjl % (n_pes-1));  /* PE 0 is just the counter/checker */
        /* conditionally record PE value in xal[oldjl] --
             tells PE involved for each count */
      oldxal = shmem_cswap(&xal[oldjl], vall, my_pell, 0);
      /* printf("PE=%d,i=%d,oldjl=%d,oldxal=%d\n",my_pe,i,oldjl,oldxal); */
      if (oldxal == 1) {
            /* record PE value in xl[modjl] */
        oldxmodjl = shmem_swap(&xl[modjl], my_pell, 0);
        /* printf("PE=%d,oldjl=%ld,modjl=%ld,oldxmodjl=%ld\n",
                   my_pe,oldjl,modjl,oldxmodjl); */
      }
      if (oldxal != 0 && oldxal != 1)
        fprintf(stderr, "FAIL pe %d of %d: i=%d, oldxal = %ld expected = 0\n",
                         my_pe, n_pes, i, oldxal);
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check last xl[j] array PEs vs saved ans in xal[i] */
    i = (ITER-2)*(n_pes-1);
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xl[%d]=%ld,xal[%d]=%ld\n",j,j-1,xl[j-1],i,xal[i]); */
      if (xl[j-1] != xal[i])
        fprintf(stderr, "FAIL pe %d of %d: xl[%d] = %ld expected = %10.2f\n",
                         my_pe, n_pes, j-1, xl[j-1], xal[i]);
      i++;
    }
  }
  shmem_free(xl);  shmem_free(xal);

#endif

  shmem_barrier_all();
#ifdef NEEDS_FINALIZE
  shmem_finalize(); 
#endif
  return 0;
}