Example #1
0
int main(int argc, char **argv)
{
  int j;
  int my_pe,n_pes;
  int *flag,*one;
  size_t max_elements,max_elements_bytes;

  char *srce_char,*targ_char;
  short *srce_short,*targ_short;
  int *srce_int,*targ_int;
  long *srce_long,*targ_long;

  shmem_init();
  my_pe = shmem_my_pe();
  n_pes = shmem_n_pes();
  flag = shmem_malloc((size_t) sizeof(int));
  one  = shmem_malloc((size_t) sizeof(int));
  *one  = 1;

/*  fail if trying to use odd number of processors  */
  if ( (n_pes % 2) != 0 ){
        fprintf(stderr, "FAIL - test requires even number of PEs\n");
        exit(1);
  }

  if(my_pe == 0)
    fprintf(stderr, "shmem_num_put_nb(%s)\n", argv[0]);

/*  shmem_putmem_nb test   */
  *flag = 0;
  max_elements = (size_t) (MAX_SIZE / sizeof(char));
  max_elements_bytes = (size_t) (sizeof(char)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_putmem_nb         max_elements = %d\n",max_elements);
  srce_char = shmem_malloc(max_elements_bytes);
  targ_char = shmem_malloc(max_elements_bytes);
  if((srce_char == NULL) || (targ_char == NULL))
    shmalloc_error();
  if ( (my_pe % 2) == 0 )
    for(j = 0; j < max_elements; j++) 
      srce_char[j] = (char)(my_pe+j);
  else
    for(j = 0; j < max_elements; j++) 
      targ_char[j] = (char)(my_pe+j);
  shmem_barrier_all();
  if ( (my_pe % 2) == 0 ) {
    shmem_putmem_nb(targ_char,srce_char,max_elements,my_pe+1,NULL);
    shmem_quiet();
    shmem_int_put(flag,one,(size_t)1,my_pe+1);
  } else {
    shmem_int_wait(flag,0);
    for(j = 0; j < max_elements; j++)
      if ( targ_char[j] != (char)(my_pe+j-1) )
        fprintf(stderr, "FAIL: PE [%d] targ_char[%d]=%d my_pe+j-1=%d\n",
                               my_pe,j,targ_char[j],my_pe+j-1);
  }
  shmem_free(srce_char);  shmem_free(targ_char);

/*  shmem_put16_nb test   */
  *flag = 0;
  max_elements = (size_t) (MAX_SIZE / sizeof(short));
  if(max_elements > 20000) max_elements=20000;
  max_elements_bytes = (size_t) (sizeof(short)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_put16_nb          max_elements = %d\n",max_elements);
  srce_short = shmem_malloc(max_elements_bytes);
  targ_short = shmem_malloc(max_elements_bytes);
  if((srce_short == NULL) || (targ_short == NULL))
    shmalloc_error();
  if ( (my_pe % 2) == 0 )
    for(j = 0; j < max_elements; j++) 
      srce_short[j] = (short)(my_pe+j);
  else
    for(j = 0; j < max_elements; j++) 
      targ_short[j] = (short)(my_pe+j);
  shmem_barrier_all();
  if ( (my_pe % 2) == 0 ) {
    shmem_put16_nb(targ_short,srce_short,max_elements,my_pe+1,NULL);
    shmem_quiet();
    shmem_int_put(flag,one,(size_t)1,my_pe+1);
  } else {
    shmem_int_wait(flag,0);
    for(j = 0; j < max_elements; j++)
      if ( targ_short[j] != (short)(my_pe+j-1) )
        fprintf(stderr, "FAIL: PE [%d] targ_short[%d]=%d my_pe+j-1=%d\n",
                               my_pe,j,targ_short[j],my_pe+j-1);
  }
  shmem_free(srce_short);  shmem_free(targ_short);

/*  shmem_put32_nb test   */
  *flag = 0;
  max_elements = (size_t) (MAX_SIZE / sizeof(int));
  max_elements_bytes = (size_t) (sizeof(int)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_put32_nb          max_elements = %d\n",max_elements);
  srce_int = shmem_malloc(max_elements_bytes);
  targ_int = shmem_malloc(max_elements_bytes);
  if((srce_int == NULL) || (targ_int == NULL))
    shmalloc_error();
  if ( (my_pe % 2) == 0 )
    for(j = 0; j < max_elements; j++)
      srce_int[j] = (int)(my_pe+j);
  else
    for(j = 0; j < max_elements; j++)
      targ_int[j] = (int)(my_pe+j);
  shmem_barrier_all();
  if ( (my_pe % 2) == 0 ) {
    shmem_put32_nb(targ_int,srce_int,max_elements,my_pe+1,NULL);
    shmem_quiet();
    shmem_int_put(flag,one,(size_t)1,my_pe+1);
  } else {
    shmem_int_wait(flag,0);
    for(j = 0; j < max_elements; j++)
      if ( targ_int[j] != (int)(my_pe+j-1) )
	fprintf(stderr, "FAIL: PE [%d] targ_int[%d]=%d my_pe+j-1=%d\n",
                               my_pe,j,targ_int[j],my_pe+j-1);
  }
  shmem_free(srce_int);  shmem_free(targ_int);
  
/*  shmem_put64_nb test   */
  *flag = 0;
  max_elements = (size_t) (MAX_SIZE / sizeof(long));
  max_elements_bytes = (size_t) (sizeof(long)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_put64_nb          max_elements = %d\n",max_elements);
  srce_long = shmem_malloc(max_elements_bytes);
  targ_long = shmem_malloc(max_elements_bytes);
  if((srce_long == NULL) || (targ_long == NULL))
    shmalloc_error();
  if ( (my_pe % 2) == 0 )
    for(j = 0; j < max_elements; j++)
      srce_long[j] = (long)(my_pe+j);
  else
    for(j = 0; j < max_elements; j++)
      targ_long[j] = (long)(my_pe+j);
  shmem_barrier_all();
  if ( (my_pe % 2) == 0 ) {
    shmem_put64_nb(targ_long,srce_long,max_elements,my_pe+1,NULL);
    shmem_quiet();
    shmem_int_put(flag,one,(size_t)1,my_pe+1);
  } else {
    shmem_int_wait(flag,0);
    for(j = 0; j < max_elements; j++)
      if ( targ_long[j] != (long)(my_pe+j-1) )
        fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d my_pe+j-1=%d\n",
                               my_pe,j,targ_long[j],my_pe+j-1);
  }
  shmem_free(srce_long);  shmem_free(targ_long);

/*  shmem_put128_nb test   */
  *flag = 0;
  max_elements = (size_t) (MAX_SIZE / sizeof(long));
  if ( (max_elements % 2) != 0)
    max_elements = max_elements-1;
  max_elements_bytes = (size_t) (sizeof(long)*max_elements);
  max_elements = max_elements/2;
  if(my_pe == 0)
    fprintf(stderr,"shmem_put128_nb         max_elements = %d\n",max_elements);
  srce_long = shmem_malloc(max_elements_bytes);
  targ_long = shmem_malloc(max_elements_bytes);
  if((srce_long == NULL) || (targ_long == NULL))
    shmalloc_error();
  if ( (my_pe % 2) == 0 )
    for(j = 0; j < 2*max_elements; j++)
      srce_long[j] = (long)(my_pe+j);
  else
    for(j = 0; j < 2*max_elements; j++)
      targ_long[j] = (long)(my_pe+j);
  shmem_barrier_all();
  if ( (my_pe % 2) == 0 ) {
    shmem_put128_nb(targ_long,srce_long,max_elements,my_pe+1,NULL);
    shmem_quiet();
    shmem_int_put(flag,one,(size_t)1,my_pe+1);
  } else {
    shmem_int_wait(flag,0);
    for(j = 0; j < 2*max_elements; j++)
      if ( targ_long[j] != (long)(my_pe+j-1) )
        fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d my_pe+j-1=%d\n",
                               my_pe,j,targ_long[j],my_pe+j-1);
  }
  shmem_free(srce_long);  shmem_free(targ_long);

#ifdef SHMEM_C_GENERIC_32

/*  shmem_put_nb (GENERIC 32) test   */
  *flag = 0;
  max_elements = (size_t) (MAX_SIZE / sizeof(int));
  max_elements_bytes = (size_t) (sizeof(int)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_put_nb (GENERIC 32)  max_elements = %d\n",max_elements);
  srce_int = shmem_malloc(max_elements_bytes);
  targ_int = shmem_malloc(max_elements_bytes);
  if((srce_int == NULL) || (targ_int == NULL))
    shmalloc_error();
  if ( (my_pe % 2) == 0 )
    for(j = 0; j < max_elements; j++)
      srce_int[j] = (int)(my_pe+j);
  else
    for(j = 0; j < max_elements; j++)
      targ_int[j] = (int)(my_pe+j);
  shmem_barrier_all();
  if ( (my_pe % 2) == 0 ) {
    shmem_put_nb(targ_int,srce_int,max_elements,my_pe+1,NULL);
    shmem_quiet();
    shmem_int_put(flag,one,(size_t)1,my_pe+1);
  } else {
    shmem_int_wait(flag,0);
    for(j = 0; j < max_elements; j++)
      if ( targ_int[j] != (int)(my_pe+j-1) )
        fprintf(stderr, "FAIL: PE [%d] targ_int[%d]=%d my_pe+j-1=%d\n",
                               my_pe,j,targ_int[j],my_pe+j-1);
  }
  shmem_free(srce_int);  shmem_free(targ_int);

#else

/*  shmem_put_nb (GENERIC 64) test   */
  *flag = 0;
  max_elements = (size_t) (MAX_SIZE / sizeof(long));
  max_elements_bytes = (size_t) (sizeof(long)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_put_nb (GENERIC 64)  max_elements = %d\n",max_elements);
  srce_long = shmem_malloc(max_elements_bytes);
  targ_long = shmem_malloc(max_elements_bytes);
  if((srce_long == NULL) || (targ_long == NULL))
    shmalloc_error();
  if ( (my_pe % 2) == 0 )
    for(j = 0; j < max_elements; j++)
      srce_long[j] = (long)(my_pe+j);
  else
    for(j = 0; j < max_elements; j++)
      targ_long[j] = (long)(my_pe+j);
  shmem_barrier_all();
  if ( (my_pe % 2) == 0 ) {
    shmem_put_nb(targ_long,srce_long,max_elements,my_pe+1,NULL);
    shmem_quiet();
    shmem_int_put(flag,one,(size_t)1,my_pe+1);
  } else {
    shmem_int_wait(flag,0);
    for(j = 0; j < max_elements; j++)
      if ( targ_long[j] != (long)(my_pe+j-1) )
        fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d my_pe+j-1=%d\n",
                               my_pe,j,targ_long[j],my_pe+j-1);
  }
  shmem_free(srce_long);  shmem_free(targ_long);

#endif

#ifdef NEEDS_FINALIZE
  shmem_finalize(); 
#endif
  return 0;
}
int main(int argc, char **argv)
{
  int i,j;
  int my_pe,n_pes,PE_root;
  size_t max_elements,max_elements_bytes;

  int *srce_int,*targ_int,ans_int;
  long *srce_long,*targ_long,ans_long;
  float *srce_float,*targ_float,ans_float;
  double *srce_double,*targ_double,ans_double;

  shmem_init();
  my_pe = shmem_my_pe();
  n_pes = shmem_n_pes();

/*  fail if trying to use only one processor  */
  if ( n_pes  <= 1 ){
        fprintf(stderr, "FAIL - test requires at least two PEs\n");
        exit(1);
  }

  if(my_pe == 0)
    fprintf(stderr, "shmem_broadcast(%s) n_pes=%d\n", argv[0],n_pes);
  /* initialize the pSync arrays */
  for (i=0; i < _SHMEM_BCAST_SYNC_SIZE; i++) {
    pSync1[i] = _SHMEM_SYNC_VALUE;
    pSync2[i] = _SHMEM_SYNC_VALUE;
  }
  shmem_barrier_all();  /* Wait for all PEs to initialize pSync1 & pSync2 */
  PE_root=1;  /* we'll broadcast from this PE */

/*  shmem_broadcast32 test   */
  max_elements = (size_t) (MAX_SIZE / sizeof(int));
  max_elements_bytes = (size_t) (sizeof(int)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_broadcast32             max_elements = %d\n",
                                                  max_elements);
  srce_int = shmem_malloc(max_elements_bytes);
  targ_int = shmem_malloc(max_elements_bytes);
  srce_float = shmem_malloc(max_elements_bytes);
  targ_float = shmem_malloc(max_elements_bytes);
  if((srce_int == NULL) || (targ_int == NULL) ||
     (srce_float == NULL) || (targ_float == NULL))
     shmalloc_error();
  for(j = 0; j < max_elements; j++) {
    srce_int[j] = (int)(my_pe+j);
    srce_float[j] = (float)(my_pe+j);
    targ_int[j] = (int)(100*my_pe+j);
    targ_float[j] = (float)(100*my_pe+j);
  }
  shmem_barrier_all();
  for(i = 0; i < IMAX; i+=2) {
    /* i is even -- using int */
    if (my_pe == PE_root)
      for(j = 0; j < max_elements; j++) {
        srce_int[j] = (int)(my_pe+i+j);
      }
    /* broadcast from PE_root to all PEs using pSync1 */
    shmem_broadcast32(targ_int,srce_int,max_elements,PE_root,0,0,n_pes,pSync1);
    for(j = 0; j < max_elements; j++) {
      if (my_pe == PE_root) {
        ans_int= (int)(100*my_pe+j);
      } else {
        ans_int= (int)(PE_root+i+j);
      }
      if ( targ_int[j] != ans_int )
	fprintf(stderr, "FAIL: PE [%d] targ_int[%d]=%d ans_int=%d\n",
                               my_pe,j,targ_int[j],ans_int);
    }
    /* i+1 is odd -- using float */
    if (my_pe == PE_root)
      for(j = 0; j < max_elements; j++) {
        srce_float[j] = (float)(PE_root+i+1+j);
      }
    /* broadcast from PE_root to all PEs using pSync2 */
    shmem_broadcast32(targ_float,srce_float,max_elements,PE_root,0,0,n_pes,pSync2);
    for(j = 0; j < max_elements; j++) {
      if (my_pe == PE_root) {
        ans_float= (float)(100*my_pe+j);
      } else {
        ans_float= (float)(PE_root+i+1+j);
      }
      if ( targ_float[j] != ans_float )
        fprintf(stderr, "FAIL: PE [%d] targ_float[%d]=%10.0f ans_float=%10.0f\n",
                               my_pe,j,targ_float[j],ans_float);
    }
  }
  shmem_free(srce_int);    shmem_free(targ_int);
  shmem_free(srce_float);  shmem_free(targ_float);
  
/*  shmem_broadcast64 test   */
  max_elements = (size_t) (MAX_SIZE / sizeof(long));
  max_elements_bytes = (size_t) (sizeof(long)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_broadcast64             max_elements = %d\n",
                                                  max_elements);
  srce_long = shmem_malloc(max_elements_bytes);
  targ_long = shmem_malloc(max_elements_bytes);
  srce_double = shmem_malloc(max_elements_bytes);
  targ_double = shmem_malloc(max_elements_bytes);
  if((srce_long == NULL) || (targ_long == NULL) ||
     (srce_double == NULL) || (targ_double == NULL))
     shmalloc_error();
  for(j = 0; j < max_elements; j++) {
    srce_long[j] = (long)(my_pe+j);
    srce_double[j] = (double)(my_pe+j);
    targ_long[j] = (long)(100*my_pe+j);
    targ_double[j] = (double)(100*my_pe+j);
  }
  shmem_barrier_all();
  for(i = 0; i < IMAX; i+=2) {
    /* i is even -- using long */
    if (my_pe == PE_root)
      for(j = 0; j < max_elements; j++) {
        srce_long[j] = (long)(my_pe+i+j);
      }
    /* broadcast from PE_root to all PEs using pSync1 */
    shmem_broadcast64(targ_long,srce_long,max_elements,PE_root,0,0,n_pes,pSync1);
    for(j = 0; j < max_elements; j++) {
      if (my_pe == PE_root) {
        ans_long= (long)(100*my_pe+j);
      } else {
        ans_long= (long)(PE_root+i+j);
      }
      if ( targ_long[j] != ans_long )
        fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d ans_long=%d\n",
                               my_pe,j,targ_long[j],ans_long);
    }
    /* i+1 is odd -- using double */
    if (my_pe == PE_root)
      for(j = 0; j < max_elements; j++) {
        srce_double[j] = (double)(PE_root+i+1+j);
      }
    /* broadcast from PE_root to all PEs using pSync2 */
    shmem_broadcast64(targ_double,srce_double,max_elements,PE_root,0,0,n_pes,pSync2);
    for(j = 0; j < max_elements; j++) {
      if (my_pe == PE_root) {
        ans_double= (double)(100*my_pe+j);
      } else {
        ans_double= (double)(PE_root+i+1+j);
      }
      if ( targ_double[j] != ans_double )
        fprintf(stderr, "FAIL: PE [%d] targ_double[%d]=%10.0f ans_double=%10.0f\n",
                               my_pe,j,targ_double[j],ans_double);
    }
  }
  shmem_free(srce_long);  shmem_free(targ_long);
  shmem_free(srce_double);  shmem_free(targ_double);

#ifndef OPENSHMEM
#ifdef SHMEM_C_GENERIC_32

/*  shmemx_broadcast (GENERIC 32) test   */
  max_elements = (size_t) (MAX_SIZE / sizeof(int));
  max_elements_bytes = (size_t) (sizeof(int)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmemx_broadcast (GENERIC 32)  max_elements = %d\n",
                                                  max_elements);
  srce_int = shmem_malloc(max_elements_bytes);
  targ_int = shmem_malloc(max_elements_bytes);
  if((srce_int == NULL) || (targ_int == NULL))
    shmalloc_error();
    for(j = 0; j < max_elements; j++) {
      srce_int[j] = (int)(my_pe+j);
      targ_int[j] = (int)(2*my_pe+j);
    }
  shmem_barrier_all();
    /* broadcast from PE 1 to all PEs */
    shmemx_broadcast(targ_int,srce_int,max_elements,1,0,0,n_pes,pSync1);
    for(j = 0; j < max_elements; j++) {
      if (my_pe == 1) {
        ans_int= (int)(j+2);
      } else {
        ans_int= (int)(j+1);
      }
      if ( targ_int[j] != ans_int )
        fprintf(stderr, "FAIL: PE [%d] targ_int[%d]=%d ans_int=%d\n",
                               my_pe,j,targ_int[j],ans_int);
    }
  shmem_free(srce_int);  shmem_free(targ_int);

#else

/*  shmemx_broadcast (GENERIC 64) test   */
  max_elements = (size_t) (MAX_SIZE / sizeof(long));
  max_elements_bytes = (size_t) (sizeof(long)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmemx_broadcast (GENERIC 64)  max_elements = %d\n",
                                                  max_elements);
  srce_long = shmem_malloc(max_elements_bytes);
  targ_long = shmem_malloc(max_elements_bytes);
  if((srce_long == NULL) || (targ_long == NULL))
    shmalloc_error();
  for(j = 0; j < max_elements; j++) {
    srce_long[j] = (long)(my_pe+j);
    targ_long[j] = (long)(2*my_pe+j);
  }
  shmem_barrier_all();
    /* broadcast from PE 1 to all PEs */
    shmemx_broadcast(targ_long,srce_long,max_elements,1,0,0,n_pes,pSync1);
    for(j = 0; j < max_elements; j++) {
      if (my_pe == 1) {
        ans_long = (long)(j+2);
      } else {
        ans_long = (long)(j+1);
      }
      if ( targ_long[j] != ans_long )
        fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d ans_long=%d\n",
                               my_pe,j,targ_long[j],ans_long);
    }
  shmem_free(srce_long);  shmem_free(targ_long);

#endif
#endif

#ifdef NEEDS_FINALIZE
  shmem_finalize(); 
#endif
  return 0;
}
int main(int argc, char **argv)
{
  int i,j,iter;
  int my_pe,n_pes;
  int *flag,*one;
  size_t max_elements,max_elements_bytes;
  size_t elements[16] = {1,2,4,8,12,16,24,32,64,128,256,512,1024,2048,4096,8192};
  int num_elements = 16;

  short *srce_short,*targ_short;
  int *srce_int,*targ_int;
  long *srce_long,*targ_long;
  float *srce_float,*targ_float;
  double *srce_double,*targ_double;

  shmem_init();
  my_pe = shmem_my_pe();
  n_pes = shmem_n_pes();
  flag = shmem_malloc((size_t) sizeof(int));
  one  = shmem_malloc((size_t) sizeof(int));
  *one  = 1;

/*  fail if trying to use odd number of processors  */
  if ( (n_pes % 2) != 0 ){
        fprintf(stderr, "FAIL - test requires even number of PEs\n");
        exit(1);
  }

  if(my_pe == 0)
    fprintf(stderr, "shmem_both_put_nb_size(%s)\n", argv[0]);

/*  alloc arrays   */

  max_elements = (size_t) (MAX_SIZE / sizeof(int));
  max_elements_bytes = (size_t) (sizeof(int)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_int_put_nb        max_elements = %d\n",max_elements);
  srce_int = shmem_malloc(max_elements_bytes);
  targ_int = shmem_malloc(max_elements_bytes);
  if((srce_int == NULL) || (targ_int == NULL))
    shmalloc_error();

  max_elements = (size_t) (MAX_SIZE / sizeof(short));
  max_elements_bytes = (size_t) (sizeof(short)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_short_put         max_elements = %d\n",max_elements);
  srce_short = shmem_malloc(max_elements_bytes);
  targ_short = shmem_malloc(max_elements_bytes);
  if((srce_short == NULL) || (targ_short == NULL))
    shmalloc_error();

   max_elements = (size_t) (MAX_SIZE / sizeof(long));
  max_elements_bytes = (size_t) (sizeof(long)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_long_put_nb       max_elements = %d\n",max_elements);
  srce_long = shmem_malloc(max_elements_bytes);
  targ_long = shmem_malloc(max_elements_bytes);
  if((srce_long == NULL) || (targ_long == NULL))
    shmalloc_error();

  max_elements = (size_t) (MAX_SIZE / sizeof(float));
  max_elements_bytes = (size_t) (sizeof(float)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_float_put_nb      max_elements = %d\n",max_elements);
  srce_float = shmem_malloc(max_elements_bytes);
  targ_float = shmem_malloc(max_elements_bytes);
  if((srce_float == NULL) || (targ_float == NULL))
    shmalloc_error();

  max_elements = (size_t) (MAX_SIZE / sizeof(double));
  max_elements_bytes = (size_t) (sizeof(double)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_double_put_nb     max_elements = %d\n",max_elements);
  srce_double = shmem_malloc(max_elements_bytes);
  targ_double = shmem_malloc(max_elements_bytes);
  if((srce_double == NULL) || (targ_double == NULL))
    shmalloc_error();

  if(my_pe == 0)
    fprintf(stderr,"Actual value used for   max_elements = %d\n",max_elements);
  /* try the different sizes MAX_ITER times */
  for (iter = 0; iter < MAX_ITER; iter++) {
   for (i = 0; i < num_elements; i++) {
    *flag = 0;
    if (elements[i] <= max_elements) {
     if ( (my_pe % 2) == 0 )
       for(j = 0; j < elements[i]; j++) {
         srce_short[j] = (short)(my_pe+j);
         srce_int[j] = (int)(iter*10000+elements[i]*100+my_pe+j);
         srce_long[j] = (long)(iter*10000+elements[i]*100+my_pe+j);
         srce_float[j] = (float)(iter*10000+elements[i]*100+my_pe+j);
         srce_double[j] = (double)(iter*10000+elements[i]*100+my_pe+j);
       }
     else
       for(j = 0; j < elements[i]; j++) {
         targ_short[j] = (short)(my_pe+j);
         targ_int[j] = (int)(iter*10000+elements[i]*100+my_pe+j);
         targ_long[j] = (long)(iter*10000+elements[i]*100+my_pe+j);
         targ_float[j] = (float)(iter*10000+elements[i]*100+my_pe+j);
         targ_double[j] = (double)(iter*10000+elements[i]*100+my_pe+j);
       }
     shmem_barrier_all();
     if ( (my_pe % 2) == 0 ) {
#ifndef OPENSHMEM
       shmemx_int_put_nb(targ_int,srce_int,elements[i],my_pe+1,NULL);
       shmemx_long_put_nb(targ_long,srce_long,elements[i],my_pe+1,NULL);
       shmemx_float_put_nb(targ_float,srce_float,elements[i],my_pe+1,NULL);
       shmemx_double_put_nb(targ_double,srce_double,elements[i],my_pe+1,NULL);
#else
       shmem_int_put_nbi(targ_int,srce_int,elements[i],my_pe+1);
       shmem_long_put_nbi(targ_long,srce_long,elements[i],my_pe+1);
       shmem_float_put_nbi(targ_float,srce_float,elements[i],my_pe+1);
       shmem_double_put_nbi(targ_double,srce_double,elements[i],my_pe+1);
#endif
       /* this one is blocking */
       shmem_short_put(targ_short,srce_short,elements[i],my_pe+1);
       shmem_quiet();
       shmem_int_put(flag,one,(size_t)1,my_pe+1);
     } else {
       shmem_int_wait(flag,0);
       for(j = 0; j < elements[i]; j++) {
         if ( targ_short[j] != (short)(my_pe+j-1) )
           fprintf(stderr,
           "FAIL: PE [%d] iter=%d i=%d targ_short[%d]=%d not equal %d\n",
              my_pe,iter,i,j,targ_short[j],my_pe+j-1);
         if ( targ_int[j] != (int)(iter*10000+elements[i]*100+my_pe+j-1) )
           fprintf(stderr, 
           "FAIL: PE [%d] iter=%d i=%d targ_int[%d]=%d not equal %d\n",
              my_pe,iter,i,j,targ_int[j],iter*10000+elements[i]*100+my_pe+j-1);
         if ( targ_long[j] != (long)(iter*10000+elements[i]*100+my_pe+j-1) )
           fprintf(stderr,
           "FAIL: PE [%d] iter=%d i=%d targ_long[%d]=%d not equal %d\n",
              my_pe,iter,i,j,targ_long[j],iter*10000+elements[i]*100+my_pe+j-1);
         if ( targ_float[j] != (float)(iter*10000+elements[i]*100+my_pe+j-1) )
           fprintf(stderr,
           "FAIL: PE [%d] iter=%d i=%d targ_long[%d]=%f not equal %d\n",
              my_pe,iter,i,j,targ_float[j],iter*10000+elements[i]*100+my_pe+j-1);
         if ( targ_double[j] != (double)(iter*10000+elements[i]*100+my_pe+j-1) )
           fprintf(stderr,
           "FAIL: PE [%d] iter=%d i=%d targ_double[%d]=%f not equal %d\n",
              my_pe,iter,i,j,targ_double[j],iter*10000+elements[i]*100+my_pe+j-1);
         }
     }
    }
   }
  }
  shmem_free(srce_short);  shmem_free(targ_short);
  shmem_free(srce_int);  shmem_free(targ_int);
  shmem_free(srce_long);  shmem_free(targ_long);
  shmem_free(srce_float);  shmem_free(targ_float);
  shmem_free(srce_double);  shmem_free(targ_double);
#ifdef NEEDS_FINALIZE
  shmem_finalize(); 
#endif
  return 0;
}