Пример #1
0
/* 
 * Aggregates the per PE timing information
 */ 
static double * gather_rank_times(_timer_t * const timer)
{
  if(timer->seconds_iter > 0) {

    assert(timer->seconds_iter == timer->num_iters);

    const unsigned int num_records = NUM_PES * timer->seconds_iter;
    double * my_times = shmem_malloc(timer->seconds_iter * sizeof(double));
    assert(my_times);
    memcpy(my_times, timer->seconds, timer->seconds_iter * sizeof(double));

    double * all_times = shmem_malloc( num_records * sizeof(double));
    assert(all_times);

    shmem_barrier_all();

    shmem_fcollect64(all_times, my_times, timer->seconds_iter, 0, 0, NUM_PES, pSync);
    shmem_barrier_all();

    shmem_free(my_times);

    return all_times;
  }
  else{
    return NULL;
  }
}
Пример #2
0
/*
 * Aggregates the per PE timing 'count' information 
 */
static unsigned int * gather_rank_counts(_timer_t * const timer)
{
  if(timer->count_iter > 0){
    const unsigned int num_records = NUM_PES * timer->num_iters;

    unsigned int * my_counts = shmem_malloc(timer->num_iters * sizeof(unsigned int));
    assert(my_counts);
    memcpy(my_counts, timer->count, timer->num_iters*sizeof(unsigned int));

    unsigned int * all_counts = shmem_malloc( num_records * sizeof(unsigned int) );
    assert(all_counts);

    shmem_barrier_all();

    shmem_collect32(all_counts, my_counts, timer->num_iters, 0, 0, NUM_PES, pSync);

    shmem_barrier_all();

    shmem_free(my_counts);

    return all_counts;
  }
  else{
    return NULL;
  }

}
Пример #3
0
static void shared_free(void *ptr, int size) {
#if FORKED_CACHE
    shmem_free(ptr, size);
#else
    free(ptr);
#endif
}
Пример #4
0
int
main(void)
{
    double *f;
    int me;

    shmem_init();
    me = shmem_my_pe();

    f = (double *) shmem_malloc(sizeof(*f));

    *f = PI;
    shmem_barrier_all();

    if (me == 0) {
        shmem_double_p(f, E, 1);
    }

    shmem_barrier_all();


    if (me == 1) {
        printf("PE %d: %f, %s\n",
               me, *f, (fabs(*f - E) < epsilon) ? "OK" : "FAIL");
    }

    shmem_free(f);

    shmem_finalize();

    return 0;
}
Пример #5
0
cache_vars_t* cache_init(int size,int sector){
  int num;
#ifndef WIN32
  cache_vars_t* s=shmem_alloc(sizeof(cache_vars_t));
#else
  cache_vars_t* s=malloc(sizeof(cache_vars_t));
#endif
  if(s==NULL) return NULL;
  
  memset(s,0,sizeof(cache_vars_t));
  num=size/sector;
  if(num < 16){
     num = 16;
  }//32kb min_size
  s->buffer_size=num*sector;
  s->sector_size=sector;
#ifndef WIN32
  s->buffer=shmem_alloc(s->buffer_size);
#else
  s->buffer=malloc(s->buffer_size);
#endif

  if(s->buffer == NULL){
#ifndef WIN32
    shmem_free(s,sizeof(cache_vars_t));
#else
    free(s);
#endif
    return NULL;
  }

  s->fill_limit=8*sector;
  s->back_size=s->buffer_size/2;
  return s;
}
Пример #6
0
cache_vars_t* cache_init(int size,int sector){
  int num;
#if !defined(__MINGW32__) && !defined(PTHREAD_CACHE) && !defined(__OS2__)
  cache_vars_t* s=shmem_alloc(sizeof(cache_vars_t));
#else
  cache_vars_t* s=malloc(sizeof(cache_vars_t));
#endif
  if(s==NULL) return NULL;

  memset(s,0,sizeof(cache_vars_t));
  num=size/sector;
  if(num < 16){
     num = 16;
  }//32kb min_size
  s->buffer_size=num*sector;
  s->sector_size=sector;
#if !defined(__MINGW32__) && !defined(PTHREAD_CACHE) && !defined(__OS2__)
  s->buffer=shmem_alloc(s->buffer_size);
#else
  s->buffer=malloc(s->buffer_size);
#endif

  if(s->buffer == NULL){
#if !defined(__MINGW32__) && !defined(PTHREAD_CACHE) && !defined(__OS2__)
    shmem_free(s,sizeof(cache_vars_t));
#else
    free(s);
#endif
    return NULL;
  }

  s->fill_limit=8*sector;
  s->back_size=s->buffer_size/2;
  return s;
}
Пример #7
0
int
main (int argc, char **argv)
{
    int i;
    int nextpe;
    int me, npes;
    long src[N];
    long *dest;
    shmemx_request_handle_t handle;

    shmem_init ();
    me = shmem_my_pe ();
    npes = shmem_n_pes ();

    for (i = 0; i < N; i += 1) {
        src[i] = (long) me;
    }

    dest = (long *) shmem_malloc (N * sizeof (*dest));

    nextpe = (me + 1) % npes;

    shmemx_long_put_nb (dest, src, N, nextpe, &handle);

    shmemx_wait_req (handle);

    shmem_barrier_all ();

    shmem_free (dest);

    shmem_finalize ();

    return 0;
}
Пример #8
0
int
main(int argc, char* argv[])
{
    long source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
    long *target;
    int *flag;
    int i, num_pes;
    int failed = 0;

    shmem_init();

    target = (long*) shmem_malloc(sizeof(long) * 10);
    flag = (int*) shmem_malloc(sizeof(int));
    *flag = 0;

    num_pes=shmem_n_pes();

    memset(target, 0, sizeof(long)*10);

    shmem_barrier_all();

    if (shmem_my_pe() == 0) {
        for(i = 0; i < num_pes; i++) {
            shmem_long_put_nbi(target, source, 10, i);
            shmem_fence();
            shmem_int_inc(flag, i);
        }
    }

    shmem_int_wait_until(flag, SHMEM_CMP_EQ, 1);

    for (i = 0; i < 10; i++) {
        if (target[i] != source[i]) {
            fprintf(stderr,"[%d] target[%d] = %ld, expected %ld\n",
                    shmem_my_pe(), i, target[i], source[i]);
            failed = 1;
        }
    }

    shmem_free(target);
    shmem_free(flag);

    shmem_finalize();

    return failed;
}
Пример #9
0
void fifo_ring_destroy(FifoRing* thiz)
{
	if(thiz != NULL)
	{
		shmem_free(thiz, sizeof(FifoRing) + thiz->length * sizeof(void*));
	}

	return;
}
Пример #10
0
void cache_uninit(stream_t *s) {
  cache_vars_t* c = s->cache_data;
  if(!s->cache_pid) return;
#ifndef WIN32
  kill(s->cache_pid,SIGKILL);
  waitpid(s->cache_pid,NULL,0);
#else
  TerminateThread((HANDLE)s->cache_pid,0);
  free(c->stream);
#endif
  if(!c) return;
#ifndef WIN32
  shmem_free(c->buffer,c->buffer_size);
  shmem_free(s->cache_data,sizeof(cache_vars_t));
#else
  free(c->buffer);
  free(s->cache_data);
#endif
}
Пример #11
0
void cache_uninit(stream_t *s) {
  cache_vars_t* c = s->cache_data;
  if(!s->cache_pid) return;
#if defined(__MINGW32__) || defined(PTHREAD_CACHE) || defined(__OS2__)
  cache_do_control(s, -2, NULL);
#else
  kill(s->cache_pid,SIGKILL);
  waitpid(s->cache_pid,NULL,0);
#endif
  if(!c) return;
#if defined(__MINGW32__) || defined(PTHREAD_CACHE) || defined(__OS2__)
  free(c->stream);
  free(c->buffer);
  free(s->cache_data);
#else
  shmem_free(c->buffer,c->buffer_size);
  shmem_free(s->cache_data,sizeof(cache_vars_t));
#endif
}
Пример #12
0
void
fcollect(int *target, int *src, int elements, int me, int npes, int loops)
{
    int i;
	double start_time, elapsed_time;
    long total_bytes = loops * elements * sizeof(*src);
    long *ps, *pSync, *pSync1;

	pSync = (long*) shmem_malloc( 2 * sizeof(long) * _SHMEM_COLLECT_SYNC_SIZE );
	pSync1 = &pSync[_SHMEM_COLLECT_SYNC_SIZE];
    for (i = 0; i < _SHMEM_COLLECT_SYNC_SIZE; i++) {
        pSync[i] = pSync1[i] = _SHMEM_SYNC_VALUE;
    }
	target = (int *) shmem_malloc( elements * sizeof(*target) * npes );

    if (me==0 && Verbose) {
        fprintf(stdout,"%s: %d loops of fcollect32(%ld bytes) over %d PEs: ",
                __FUNCTION__,loops,(elements*sizeof(*src)),npes);
        fflush(stdout);
    }

	shmem_barrier_all();

    start_time = shmemx_wtime();
    for(i = 0; i < loops; i++) {
        ps = &pSync[(i&1)];
        shmem_fcollect32( target, src, elements, 0, 0, npes, ps );
    }
    elapsed_time = shmemx_wtime() - start_time;

    if (me==0 && Verbose) {
        printf("%7.3f secs\n", elapsed_time);
        printf("  %7.5f usecs / fcollect32(), %ld Kbytes @ %7.4f MB/sec\n\n",
                (elapsed_time/((double)loops*npes))*1000000.0,
                (total_bytes/1024),
                ((double)total_bytes/(1024.0*1024.0)) / elapsed_time );
    }
	shmem_barrier_all();
	shmem_free(target);
    shmem_free( pSync );
	shmem_barrier_all();
}
int
main ()
{
    long *x;

    shmem_init ();

    x = (long *) shmem_malloc (sizeof (*x));

    shmem_free (x);

    shmem_finalize ();

    return 0;
}
int main(void)
{
   long *dest;
   int me, npes;
   long swapped_val, new_val;

   shmem_init();
   me = shmem_my_pe();
   npes = shmem_n_pes();
   dest = (long *) shmem_malloc(sizeof (*dest));
   *dest = me;
   shmem_barrier_all();
   new_val = me;
   if (me & 1){
      swapped_val = shmem_swap(dest, new_val, (me + 1) % npes);
      printf("%d: dest = %ld, swapped = %ld\n", me, *dest, swapped_val);
   }
   shmem_free(dest);
   return 0;
}
Пример #15
0
int
main(int argc, char **argv)
{
    int loops=DFLT_LOOPS;
    char *pgm;
    int *Target;
    int *Source;
    int i, me, npes;
    int target_PE;
    long bytes;
    double start_time, *total_time;

    shmem_init();
    me = shmem_my_pe();
    npes = shmem_n_pes();

    if ((pgm=strrchr(argv[0],'/')))
        pgm++;
    else
        pgm = argv[0];

    while ((i = getopt (argc, argv, "hve:l:st")) != EOF) {
        switch (i)
        {
            case 'v':
                Verbose++;
                break;
            case 'e':
                if ((elements = atoi_scaled(optarg)) <= 0) {
                    fprintf(stderr,"ERR: Bad elements count %d\n",elements);
                    shmem_finalize();
                    return 1;
                }
                break;
            case 'l':
                if ((loops = atoi_scaled(optarg)) <= 0) {
                    fprintf(stderr,"ERR: Bad loop count %d\n",loops);
                    shmem_finalize();
                    return 1;
                }
                break;
            case 's':
                Sync++;
                break;
            case 't':
                Track++;
                break;
            case 'h':
                if (me == 0)
                    usage(pgm);
                return 0;
            default:
                if (me == 0) {
                    fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i);
                    usage(pgm);
                }
                shmem_finalize();
                return 1;
        }
    }

    for(i=0; i < SHMEM_REDUCE_SYNC_SIZE; i++)
        pSync[i] = SHMEM_SYNC_VALUE;

    target_PE = (me+1) % npes;

    total_time = (double *) shmem_malloc( npes * sizeof(double) );
    if (!total_time) {
        fprintf(stderr,"ERR: bad total_time shmem_malloc(%ld)\n",
                (elements * sizeof(double)));
        shmem_global_exit(1);
    }
    for(i=0; i < npes; i++)
        total_time[i] = -1.0;

    Source = (int *) shmem_malloc( elements * sizeof(*Source) );
    if (!Source) {
        fprintf(stderr,"ERR: bad Source shmem_malloc(%ld)\n",
                (elements * sizeof(*Target)));
        shmem_free(total_time);
        shmem_global_exit(1);
    }

    Target = (int *) shmem_malloc( elements * sizeof(*Target) );
    if (!Target) {
        fprintf(stderr,"ERR: bad Target shmem_malloc(%ld)\n",
                (elements * sizeof(*Target)));
        shmem_free(Source);
        shmem_free(total_time);
        shmem_global_exit(1);
    }

    for (i = 0; i < elements; i++) {
        Target[i] = -90;
        Source[i] = i + 1;
    }

    bytes = loops * sizeof(int) * elements;

    if (Verbose && me==0) {
        fprintf(stderr,
                "%s: INFO - %d loops, put %d (int) elements to PE+1 Max put ??\n",
                pgm, loops, elements);
    }
    shmem_barrier_all();

    for(i=0; i < loops; i++) {

        start_time = shmemx_wtime();

        shmem_int_put(Target, Source, elements, target_PE);

        time_taken += (shmemx_wtime() - start_time);

        if (me==0) {
            if ( Track && i > 0 && ((i % 200) == 0))
                fprintf(stderr,".%d",i);
        }
        if (Sync)
            shmem_barrier_all();
    }

    // collect time per node.
    shmem_double_put( &total_time[me], &time_taken, 1, 0 );
    shmem_double_sum_to_all(&sum_time, &time_taken, 1, 0, 0, npes, pWrk, pSync);

    shmem_barrier_all();

    for (i = 0; i < elements; i++) {
        if (Target[i] != i + 1) {
            printf("%d: Error Target[%d] = %d, expected %d\n",
                   me, i, Target[i], i + 1);
            shmem_global_exit(1);
        }
    }

    if ( Track && me == 0 ) fprintf(stderr,"\n");

    if(Verbose && me == 0) {
        double rate, comp_time;

        if (Verbose > 1)
            fprintf(stdout,"Individule PE times: (seconds)\n");
        for(i=0,comp_time=0.0; i < npes; i++) {
            comp_time += total_time[i];
            if (Verbose > 1)
                fprintf(stdout,"  PE[%d] %8.6f\n",i,total_time[i]);
        }

        sum_time /= (double)npes;
        comp_time /= (double)npes;
        if (sum_time != comp_time)
            printf("%s: computed_time %7.5f != sum_to_all_time %7.5f)\n",
                   pgm, comp_time, sum_time );

        rate = ((double)bytes/(1024.0*1024.0)) / comp_time;
        printf("%s: shmem_int_put() %7.4f MB/sec (bytes %ld secs %7.4f)\n",
               pgm, rate, bytes, sum_time);
    }

    shmem_free(total_time);
    shmem_free(Target);
    shmem_free(Source);

    shmem_finalize();

    return 0;
}
Пример #16
0
int main(int argc, char **argv)
{
  int i,j;
  int my_pe,n_pes,PE_root;
  size_t max_elements,max_elements_bytes;

  int *srce_int,*targ_int,ans_int;
  long *srce_long,*targ_long,ans_long;
  float *srce_float,*targ_float,ans_float;
  double *srce_double,*targ_double,ans_double;

  shmem_init();
  my_pe = shmem_my_pe();
  n_pes = shmem_n_pes();

/*  fail if trying to use only one processor  */
  if ( n_pes  <= 1 ){
        fprintf(stderr, "FAIL - test requires at least two PEs\n");
        exit(1);
  }

  if(my_pe == 0)
    fprintf(stderr, "shmem_broadcast(%s) n_pes=%d\n", argv[0],n_pes);
  /* initialize the pSync arrays */
  for (i=0; i < _SHMEM_BCAST_SYNC_SIZE; i++) {
    pSync1[i] = _SHMEM_SYNC_VALUE;
    pSync2[i] = _SHMEM_SYNC_VALUE;
  }
  shmem_barrier_all();  /* Wait for all PEs to initialize pSync1 & pSync2 */
  PE_root=1;  /* we'll broadcast from this PE */

/*  shmem_broadcast32 test   */
  max_elements = (size_t) (MAX_SIZE / sizeof(int));
  max_elements_bytes = (size_t) (sizeof(int)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_broadcast32             max_elements = %d\n",
                                                  max_elements);
  srce_int = shmem_malloc(max_elements_bytes);
  targ_int = shmem_malloc(max_elements_bytes);
  srce_float = shmem_malloc(max_elements_bytes);
  targ_float = shmem_malloc(max_elements_bytes);
  if((srce_int == NULL) || (targ_int == NULL) ||
     (srce_float == NULL) || (targ_float == NULL))
     shmalloc_error();
  for(j = 0; j < max_elements; j++) {
    srce_int[j] = (int)(my_pe+j);
    srce_float[j] = (float)(my_pe+j);
    targ_int[j] = (int)(100*my_pe+j);
    targ_float[j] = (float)(100*my_pe+j);
  }
  shmem_barrier_all();
  for(i = 0; i < IMAX; i+=2) {
    /* i is even -- using int */
    if (my_pe == PE_root)
      for(j = 0; j < max_elements; j++) {
        srce_int[j] = (int)(my_pe+i+j);
      }
    /* broadcast from PE_root to all PEs using pSync1 */
    shmem_broadcast32(targ_int,srce_int,max_elements,PE_root,0,0,n_pes,pSync1);
    for(j = 0; j < max_elements; j++) {
      if (my_pe == PE_root) {
        ans_int= (int)(100*my_pe+j);
      } else {
        ans_int= (int)(PE_root+i+j);
      }
      if ( targ_int[j] != ans_int )
	fprintf(stderr, "FAIL: PE [%d] targ_int[%d]=%d ans_int=%d\n",
                               my_pe,j,targ_int[j],ans_int);
    }
    /* i+1 is odd -- using float */
    if (my_pe == PE_root)
      for(j = 0; j < max_elements; j++) {
        srce_float[j] = (float)(PE_root+i+1+j);
      }
    /* broadcast from PE_root to all PEs using pSync2 */
    shmem_broadcast32(targ_float,srce_float,max_elements,PE_root,0,0,n_pes,pSync2);
    for(j = 0; j < max_elements; j++) {
      if (my_pe == PE_root) {
        ans_float= (float)(100*my_pe+j);
      } else {
        ans_float= (float)(PE_root+i+1+j);
      }
      if ( targ_float[j] != ans_float )
        fprintf(stderr, "FAIL: PE [%d] targ_float[%d]=%10.0f ans_float=%10.0f\n",
                               my_pe,j,targ_float[j],ans_float);
    }
  }
  shmem_free(srce_int);    shmem_free(targ_int);
  shmem_free(srce_float);  shmem_free(targ_float);
  
/*  shmem_broadcast64 test   */
  max_elements = (size_t) (MAX_SIZE / sizeof(long));
  max_elements_bytes = (size_t) (sizeof(long)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_broadcast64             max_elements = %d\n",
                                                  max_elements);
  srce_long = shmem_malloc(max_elements_bytes);
  targ_long = shmem_malloc(max_elements_bytes);
  srce_double = shmem_malloc(max_elements_bytes);
  targ_double = shmem_malloc(max_elements_bytes);
  if((srce_long == NULL) || (targ_long == NULL) ||
     (srce_double == NULL) || (targ_double == NULL))
     shmalloc_error();
  for(j = 0; j < max_elements; j++) {
    srce_long[j] = (long)(my_pe+j);
    srce_double[j] = (double)(my_pe+j);
    targ_long[j] = (long)(100*my_pe+j);
    targ_double[j] = (double)(100*my_pe+j);
  }
  shmem_barrier_all();
  for(i = 0; i < IMAX; i+=2) {
    /* i is even -- using long */
    if (my_pe == PE_root)
      for(j = 0; j < max_elements; j++) {
        srce_long[j] = (long)(my_pe+i+j);
      }
    /* broadcast from PE_root to all PEs using pSync1 */
    shmem_broadcast64(targ_long,srce_long,max_elements,PE_root,0,0,n_pes,pSync1);
    for(j = 0; j < max_elements; j++) {
      if (my_pe == PE_root) {
        ans_long= (long)(100*my_pe+j);
      } else {
        ans_long= (long)(PE_root+i+j);
      }
      if ( targ_long[j] != ans_long )
        fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d ans_long=%d\n",
                               my_pe,j,targ_long[j],ans_long);
    }
    /* i+1 is odd -- using double */
    if (my_pe == PE_root)
      for(j = 0; j < max_elements; j++) {
        srce_double[j] = (double)(PE_root+i+1+j);
      }
    /* broadcast from PE_root to all PEs using pSync2 */
    shmem_broadcast64(targ_double,srce_double,max_elements,PE_root,0,0,n_pes,pSync2);
    for(j = 0; j < max_elements; j++) {
      if (my_pe == PE_root) {
        ans_double= (double)(100*my_pe+j);
      } else {
        ans_double= (double)(PE_root+i+1+j);
      }
      if ( targ_double[j] != ans_double )
        fprintf(stderr, "FAIL: PE [%d] targ_double[%d]=%10.0f ans_double=%10.0f\n",
                               my_pe,j,targ_double[j],ans_double);
    }
  }
  shmem_free(srce_long);  shmem_free(targ_long);
  shmem_free(srce_double);  shmem_free(targ_double);

#ifndef OPENSHMEM
#ifdef SHMEM_C_GENERIC_32

/*  shmemx_broadcast (GENERIC 32) test   */
  max_elements = (size_t) (MAX_SIZE / sizeof(int));
  max_elements_bytes = (size_t) (sizeof(int)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmemx_broadcast (GENERIC 32)  max_elements = %d\n",
                                                  max_elements);
  srce_int = shmem_malloc(max_elements_bytes);
  targ_int = shmem_malloc(max_elements_bytes);
  if((srce_int == NULL) || (targ_int == NULL))
    shmalloc_error();
    for(j = 0; j < max_elements; j++) {
      srce_int[j] = (int)(my_pe+j);
      targ_int[j] = (int)(2*my_pe+j);
    }
  shmem_barrier_all();
    /* broadcast from PE 1 to all PEs */
    shmemx_broadcast(targ_int,srce_int,max_elements,1,0,0,n_pes,pSync1);
    for(j = 0; j < max_elements; j++) {
      if (my_pe == 1) {
        ans_int= (int)(j+2);
      } else {
        ans_int= (int)(j+1);
      }
      if ( targ_int[j] != ans_int )
        fprintf(stderr, "FAIL: PE [%d] targ_int[%d]=%d ans_int=%d\n",
                               my_pe,j,targ_int[j],ans_int);
    }
  shmem_free(srce_int);  shmem_free(targ_int);

#else

/*  shmemx_broadcast (GENERIC 64) test   */
  max_elements = (size_t) (MAX_SIZE / sizeof(long));
  max_elements_bytes = (size_t) (sizeof(long)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmemx_broadcast (GENERIC 64)  max_elements = %d\n",
                                                  max_elements);
  srce_long = shmem_malloc(max_elements_bytes);
  targ_long = shmem_malloc(max_elements_bytes);
  if((srce_long == NULL) || (targ_long == NULL))
    shmalloc_error();
  for(j = 0; j < max_elements; j++) {
    srce_long[j] = (long)(my_pe+j);
    targ_long[j] = (long)(2*my_pe+j);
  }
  shmem_barrier_all();
    /* broadcast from PE 1 to all PEs */
    shmemx_broadcast(targ_long,srce_long,max_elements,1,0,0,n_pes,pSync1);
    for(j = 0; j < max_elements; j++) {
      if (my_pe == 1) {
        ans_long = (long)(j+2);
      } else {
        ans_long = (long)(j+1);
      }
      if ( targ_long[j] != ans_long )
        fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d ans_long=%d\n",
                               my_pe,j,targ_long[j],ans_long);
    }
  shmem_free(srce_long);  shmem_free(targ_long);

#endif
#endif

#ifdef NEEDS_FINALIZE
  shmem_finalize(); 
#endif
  return 0;
}
Пример #17
0
int main(int argc, char **argv)
{
  int j;
  int my_pe,n_pes;
  int *flag,*one;
  size_t max_elements,max_elements_bytes;

  char *srce_char,*targ_char;
  short *srce_short,*targ_short;
  int *srce_int,*targ_int;
  long *srce_long,*targ_long;

  shmem_init();
  my_pe = shmem_my_pe();
  n_pes = shmem_n_pes();
  flag = shmem_malloc((size_t) sizeof(int));
  one  = shmem_malloc((size_t) sizeof(int));
  *one  = 1;

/*  fail if trying to use odd number of processors  */
  if ( (n_pes % 2) != 0 ){
        fprintf(stderr, "FAIL - test requires even number of PEs\n");
        exit(1);
  }

  if(my_pe == 0)
    fprintf(stderr, "shmem_num_put_nb(%s)\n", argv[0]);

/*  shmem_putmem_nb test   */
  *flag = 0;
  max_elements = (size_t) (MAX_SIZE / sizeof(char));
  max_elements_bytes = (size_t) (sizeof(char)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_putmem_nb         max_elements = %d\n",max_elements);
  srce_char = shmem_malloc(max_elements_bytes);
  targ_char = shmem_malloc(max_elements_bytes);
  if((srce_char == NULL) || (targ_char == NULL))
    shmalloc_error();
  if ( (my_pe % 2) == 0 )
    for(j = 0; j < max_elements; j++) 
      srce_char[j] = (char)(my_pe+j);
  else
    for(j = 0; j < max_elements; j++) 
      targ_char[j] = (char)(my_pe+j);
  shmem_barrier_all();
  if ( (my_pe % 2) == 0 ) {
    shmem_putmem_nb(targ_char,srce_char,max_elements,my_pe+1,NULL);
    shmem_quiet();
    shmem_int_put(flag,one,(size_t)1,my_pe+1);
  } else {
    shmem_int_wait(flag,0);
    for(j = 0; j < max_elements; j++)
      if ( targ_char[j] != (char)(my_pe+j-1) )
        fprintf(stderr, "FAIL: PE [%d] targ_char[%d]=%d my_pe+j-1=%d\n",
                               my_pe,j,targ_char[j],my_pe+j-1);
  }
  shmem_free(srce_char);  shmem_free(targ_char);

/*  shmem_put16_nb test   */
  *flag = 0;
  max_elements = (size_t) (MAX_SIZE / sizeof(short));
  if(max_elements > 20000) max_elements=20000;
  max_elements_bytes = (size_t) (sizeof(short)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_put16_nb          max_elements = %d\n",max_elements);
  srce_short = shmem_malloc(max_elements_bytes);
  targ_short = shmem_malloc(max_elements_bytes);
  if((srce_short == NULL) || (targ_short == NULL))
    shmalloc_error();
  if ( (my_pe % 2) == 0 )
    for(j = 0; j < max_elements; j++) 
      srce_short[j] = (short)(my_pe+j);
  else
    for(j = 0; j < max_elements; j++) 
      targ_short[j] = (short)(my_pe+j);
  shmem_barrier_all();
  if ( (my_pe % 2) == 0 ) {
    shmem_put16_nb(targ_short,srce_short,max_elements,my_pe+1,NULL);
    shmem_quiet();
    shmem_int_put(flag,one,(size_t)1,my_pe+1);
  } else {
    shmem_int_wait(flag,0);
    for(j = 0; j < max_elements; j++)
      if ( targ_short[j] != (short)(my_pe+j-1) )
        fprintf(stderr, "FAIL: PE [%d] targ_short[%d]=%d my_pe+j-1=%d\n",
                               my_pe,j,targ_short[j],my_pe+j-1);
  }
  shmem_free(srce_short);  shmem_free(targ_short);

/*  shmem_put32_nb test   */
  *flag = 0;
  max_elements = (size_t) (MAX_SIZE / sizeof(int));
  max_elements_bytes = (size_t) (sizeof(int)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_put32_nb          max_elements = %d\n",max_elements);
  srce_int = shmem_malloc(max_elements_bytes);
  targ_int = shmem_malloc(max_elements_bytes);
  if((srce_int == NULL) || (targ_int == NULL))
    shmalloc_error();
  if ( (my_pe % 2) == 0 )
    for(j = 0; j < max_elements; j++)
      srce_int[j] = (int)(my_pe+j);
  else
    for(j = 0; j < max_elements; j++)
      targ_int[j] = (int)(my_pe+j);
  shmem_barrier_all();
  if ( (my_pe % 2) == 0 ) {
    shmem_put32_nb(targ_int,srce_int,max_elements,my_pe+1,NULL);
    shmem_quiet();
    shmem_int_put(flag,one,(size_t)1,my_pe+1);
  } else {
    shmem_int_wait(flag,0);
    for(j = 0; j < max_elements; j++)
      if ( targ_int[j] != (int)(my_pe+j-1) )
	fprintf(stderr, "FAIL: PE [%d] targ_int[%d]=%d my_pe+j-1=%d\n",
                               my_pe,j,targ_int[j],my_pe+j-1);
  }
  shmem_free(srce_int);  shmem_free(targ_int);
  
/*  shmem_put64_nb test   */
  *flag = 0;
  max_elements = (size_t) (MAX_SIZE / sizeof(long));
  max_elements_bytes = (size_t) (sizeof(long)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_put64_nb          max_elements = %d\n",max_elements);
  srce_long = shmem_malloc(max_elements_bytes);
  targ_long = shmem_malloc(max_elements_bytes);
  if((srce_long == NULL) || (targ_long == NULL))
    shmalloc_error();
  if ( (my_pe % 2) == 0 )
    for(j = 0; j < max_elements; j++)
      srce_long[j] = (long)(my_pe+j);
  else
    for(j = 0; j < max_elements; j++)
      targ_long[j] = (long)(my_pe+j);
  shmem_barrier_all();
  if ( (my_pe % 2) == 0 ) {
    shmem_put64_nb(targ_long,srce_long,max_elements,my_pe+1,NULL);
    shmem_quiet();
    shmem_int_put(flag,one,(size_t)1,my_pe+1);
  } else {
    shmem_int_wait(flag,0);
    for(j = 0; j < max_elements; j++)
      if ( targ_long[j] != (long)(my_pe+j-1) )
        fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d my_pe+j-1=%d\n",
                               my_pe,j,targ_long[j],my_pe+j-1);
  }
  shmem_free(srce_long);  shmem_free(targ_long);

/*  shmem_put128_nb test   */
  *flag = 0;
  max_elements = (size_t) (MAX_SIZE / sizeof(long));
  if ( (max_elements % 2) != 0)
    max_elements = max_elements-1;
  max_elements_bytes = (size_t) (sizeof(long)*max_elements);
  max_elements = max_elements/2;
  if(my_pe == 0)
    fprintf(stderr,"shmem_put128_nb         max_elements = %d\n",max_elements);
  srce_long = shmem_malloc(max_elements_bytes);
  targ_long = shmem_malloc(max_elements_bytes);
  if((srce_long == NULL) || (targ_long == NULL))
    shmalloc_error();
  if ( (my_pe % 2) == 0 )
    for(j = 0; j < 2*max_elements; j++)
      srce_long[j] = (long)(my_pe+j);
  else
    for(j = 0; j < 2*max_elements; j++)
      targ_long[j] = (long)(my_pe+j);
  shmem_barrier_all();
  if ( (my_pe % 2) == 0 ) {
    shmem_put128_nb(targ_long,srce_long,max_elements,my_pe+1,NULL);
    shmem_quiet();
    shmem_int_put(flag,one,(size_t)1,my_pe+1);
  } else {
    shmem_int_wait(flag,0);
    for(j = 0; j < 2*max_elements; j++)
      if ( targ_long[j] != (long)(my_pe+j-1) )
        fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d my_pe+j-1=%d\n",
                               my_pe,j,targ_long[j],my_pe+j-1);
  }
  shmem_free(srce_long);  shmem_free(targ_long);

#ifdef SHMEM_C_GENERIC_32

/*  shmem_put_nb (GENERIC 32) test   */
  *flag = 0;
  max_elements = (size_t) (MAX_SIZE / sizeof(int));
  max_elements_bytes = (size_t) (sizeof(int)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_put_nb (GENERIC 32)  max_elements = %d\n",max_elements);
  srce_int = shmem_malloc(max_elements_bytes);
  targ_int = shmem_malloc(max_elements_bytes);
  if((srce_int == NULL) || (targ_int == NULL))
    shmalloc_error();
  if ( (my_pe % 2) == 0 )
    for(j = 0; j < max_elements; j++)
      srce_int[j] = (int)(my_pe+j);
  else
    for(j = 0; j < max_elements; j++)
      targ_int[j] = (int)(my_pe+j);
  shmem_barrier_all();
  if ( (my_pe % 2) == 0 ) {
    shmem_put_nb(targ_int,srce_int,max_elements,my_pe+1,NULL);
    shmem_quiet();
    shmem_int_put(flag,one,(size_t)1,my_pe+1);
  } else {
    shmem_int_wait(flag,0);
    for(j = 0; j < max_elements; j++)
      if ( targ_int[j] != (int)(my_pe+j-1) )
        fprintf(stderr, "FAIL: PE [%d] targ_int[%d]=%d my_pe+j-1=%d\n",
                               my_pe,j,targ_int[j],my_pe+j-1);
  }
  shmem_free(srce_int);  shmem_free(targ_int);

#else

/*  shmem_put_nb (GENERIC 64) test   */
  *flag = 0;
  max_elements = (size_t) (MAX_SIZE / sizeof(long));
  max_elements_bytes = (size_t) (sizeof(long)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_put_nb (GENERIC 64)  max_elements = %d\n",max_elements);
  srce_long = shmem_malloc(max_elements_bytes);
  targ_long = shmem_malloc(max_elements_bytes);
  if((srce_long == NULL) || (targ_long == NULL))
    shmalloc_error();
  if ( (my_pe % 2) == 0 )
    for(j = 0; j < max_elements; j++)
      srce_long[j] = (long)(my_pe+j);
  else
    for(j = 0; j < max_elements; j++)
      targ_long[j] = (long)(my_pe+j);
  shmem_barrier_all();
  if ( (my_pe % 2) == 0 ) {
    shmem_put_nb(targ_long,srce_long,max_elements,my_pe+1,NULL);
    shmem_quiet();
    shmem_int_put(flag,one,(size_t)1,my_pe+1);
  } else {
    shmem_int_wait(flag,0);
    for(j = 0; j < max_elements; j++)
      if ( targ_long[j] != (long)(my_pe+j-1) )
        fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d my_pe+j-1=%d\n",
                               my_pe,j,targ_long[j],my_pe+j-1);
  }
  shmem_free(srce_long);  shmem_free(targ_long);

#endif

#ifdef NEEDS_FINALIZE
  shmem_finalize(); 
#endif
  return 0;
}
int
main (int argc, char **argv)
{
    int i;
    int nextpe;
    int me, npes;
    int success1, success2, success3, success4, success5, success6, success7,
        success8;

    short src1[N];
    int src2[N];
    long src3[N];
    long double src4[N];
    long long src5[N];
    double src6[N];
    float src7[N];
    char *src8;
    short src9;
    int src10;
    long src11;
    double src12;
    float src13;

    short *dest1;
    int *dest2;
    long *dest3;
    long double *dest4;
    long long *dest5;
    double *dest6;
    float *dest7;
    char *dest8;
    short *dest9;
    int *dest10;
    long *dest11;
    double *dest12;
    float *dest13;


    shmem_init ();
    me = shmem_my_pe ();
    npes = shmem_n_pes ();

    if (npes > 1) {

        success1 = 0;
        success2 = 0;
        success3 = 0;
        success4 = 0;
        success5 = 0;
        success6 = 0;
        success7 = 0;
        success8 = 0;
        src8 = (char *) malloc (N * sizeof (char));

        for (i = 0; i < N; i += 1) {
            src1[i] = (short) me;
            src2[i] = me;
            src3[i] = (long) me;
            src4[i] = (long double) me;
            src5[i] = (long long) me;
            src6[i] = (double) me;
            src7[i] = (float) me;
            src8[i] = (char) me;
        }
        src9 = (short) me;
        src10 = me;
        src11 = (long) me;
        src12 = (double) me;
        src13 = (float) me;


        dest1 = (short *) shmem_malloc (N * sizeof (*dest1));
        dest2 = (int *) shmem_malloc (N * sizeof (*dest2));
        dest3 = (long *) shmem_malloc (N * sizeof (*dest3));
        dest4 = (long double *) shmem_malloc (N * sizeof (*dest4));
        dest5 = (long long *) shmem_malloc (N * sizeof (*dest5));
        dest6 = (double *) shmem_malloc (N * sizeof (*dest6));
        dest7 = (float *) shmem_malloc (N * sizeof (*dest7));
        dest8 = (char *) shmem_malloc (4 * sizeof (*dest8));
        dest9 = (short *) shmem_malloc (sizeof (*dest9));
        dest10 = (int *) shmem_malloc (sizeof (*dest10));
        dest11 = (long *) shmem_malloc (sizeof (*dest11));
        dest12 = (double *) shmem_malloc (sizeof (*dest12));
        dest13 = (float *) shmem_malloc (sizeof (*dest13));

        for (i = 0; i < N; i += 1) {
            dest1[i] = -9;
            dest2[i] = -9;
            dest3[i] = -9;
            dest4[i] = -9;
            dest5[i] = -9;
            dest6[i] = -9;
            dest7[i] = -9.0;
            dest8[i] = -9;
        }
        *dest9 = -9;
        *dest10 = -9;
        *dest11 = -9;
        *dest12 = -9;
        *dest13 = -9.0;

        nextpe = (me + 1) % npes;

        /* Testing shmem_short_put, shmem_int_put, shmem_long_put,
           shmem_longdouble_put, shmem_longlong_put, shmem_double_put,
           shmem_float_put, shmem_putmem */
        shmem_barrier_all ();

        shmem_short_put (dest1, src1, N, nextpe);
        shmem_int_put (dest2, src2, N, nextpe);
        shmem_long_put (dest3, src3, N, nextpe);
        shmem_longdouble_put (dest4, src4, N, nextpe);
        shmem_longlong_put (dest5, src5, N, nextpe);
        shmem_double_put (dest6, src6, N, nextpe);
        shmem_float_put (dest7, src7, N, nextpe);
        shmem_putmem (dest8, src8, N * sizeof (char), nextpe);

        shmem_barrier_all ();

        if (me == 0) {
            for (i = 0; i < N; i += 1) {
                if (dest1[i] != (npes - 1)) {
                    success1 = 1;
                }
                if (dest2[i] != (npes - 1)) {
                    success2 = 1;
                }
                if (dest3[i] != (npes - 1)) {
                    success3 = 1;
                }
                if (dest4[i] != (npes - 1)) {
                    success4 = 1;
                }
                if (dest5[i] != (npes - 1)) {
                    success5 = 1;
                }
                if (dest6[i] != (npes - 1)) {
                    success6 = 1;
                }
                if (dest7[i] != (npes - 1)) {
                    success7 = 1;
                }
                if (dest8[i] != (npes - 1)) {
                    success8 = 1;
                }
            }

            if (success1 == 0)
                printf ("Test shmem_short_put: Passed\n");
            else
                printf ("Test shmem_short_put: Failed\n");
            if (success2 == 0)
                printf ("Test shmem_int_put: Passed\n");
            else
                printf ("Test shmem_int_put: Failed\n");
            if (success3 == 0)
                printf ("Test shmem_long_put: Passed\n");
            else
                printf ("Test shmem_long_put: Failed\n");
            if (success4 == 0)
                printf ("Test shmem_longdouble_put: Passed\n");
            else
                printf ("Test shmem_longdouble_put: Failed\n");
            if (success5 == 0)
                printf ("Test shmem_longlong_put: Passed\n");
            else
                printf ("Test shmem_longlong_put: Failed\n");
            if (success6 == 0)
                printf ("Test shmem_double_put: Passed\n");
            else
                printf ("Test shmem_double_put: Failed\n");
            if (success7 == 0)
                printf ("Test shmem_float_put: Passed\n");
            else
                printf ("Test shmem_float_put: Failed\n");
            if (success8 == 0)
                printf ("Test shmem_putmem: Passed\n");
            else
                printf ("Test shmem_putmem: Failed\n");

        }
        shmem_barrier_all ();

        /* Testing shmem_put32, shmem_put64, shmem_put128 */
        if (sizeof (int) == 4) {
            for (i = 0; i < N; i += 1) {
                dest2[i] = -9;
                dest3[i] = -9;
                dest4[i] = -9;
            }
            success2 = 0;
            success3 = 0;
            success4 = 0;

            shmem_barrier_all ();

            shmem_put32 (dest2, src2, N, nextpe);
            shmem_put64 (dest3, src3, N, nextpe);
            shmem_put128 (dest4, src4, N, nextpe);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N; i += 1) {
                    if (dest2[i] != (npes - 1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (npes - 1)) {
                        success3 = 1;
                    }
                    if (dest4[i] != (npes - 1)) {
                        success4 = 1;
                    }
                }
                if (success2 == 0)
                    printf ("Test shmem_put32: Passed\n");
                else
                    printf ("Test shmem_put32: Failed\n");

                if (success3 == 0)
                    printf ("Test shmem_put64: Passed\n");
                else
                    printf ("Test shmem_put64: Failed\n");

                if (success4 == 0)
                    printf ("Test shmem_put128: Passed\n");
                else
                    printf ("Test shmem_put128: Failed\n");
            }
        }
        else if (sizeof (int) == 8) {
            for (i = 0; i < N; i += 1) {
                dest1[i] = -9;
                dest2[i] = -9;
                dest3[i] = -9;
            }
            success1 = 0;
            success2 = 0;
            success3 = 0;

            shmem_barrier_all ();

            shmem_put32 (dest1, src1, N, nextpe);
            shmem_put64 (dest2, src2, N, nextpe);
            shmem_put128 (dest3, src3, N, nextpe);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N; i += 1) {
                    if (dest1[i] != (npes - 1)) {
                        success1 = 1;
                    }
                    if (dest2[i] != (npes - 1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (npes - 1)) {
                        success3 = 1;
                    }

                }
                if (success1 == 0)
                    printf ("Test shmem_put32: Passed\n");
                else
                    printf ("Test shmem_put32: Failed\n");
                if (success2 == 0)
                    printf ("Test shmem_put64: Passed\n");
                else
                    printf ("Test shmem_put64: Failed\n");

                if (success3 == 0)
                    printf ("Test shmem_put128: Passed\n");
                else
                    printf ("Test shmem_put128: Failed\n");
            }
        }

        /* Testing shmem_iput32, shmem_iput64, shmem_iput128 */
        shmem_barrier_all ();
        if (sizeof (int) == 4) {
            for (i = 0; i < N; i += 1) {
                dest2[i] = -9;
                dest3[i] = -9;
                dest4[i] = -9;
            }
            success2 = 0;
            success3 = 0;
            success4 = 0;

            shmem_barrier_all ();

            shmem_iput32 (dest2, src2, 1, 2, N, nextpe);
            shmem_iput64 (dest3, src3, 1, 2, N, nextpe);
            shmem_iput128 (dest4, src4, 1, 2, N, nextpe);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N / 2; i += 1) {
                    if (dest2[i] != (npes - 1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (npes - 1)) {
                        success3 = 1;
                    }
                    if (dest4[i] != (npes - 1)) {
                        success4 = 1;
                    }
                }
                if (success2 == 0)
                    printf ("Test shmem_iput32: Passed\n");
                else
                    printf ("Test shmem_iput32: Failed\n");

                if (success3 == 0)
                    printf ("Test shmem_iput64: Passed\n");
                else
                    printf ("Test shmem_iput64: Failed\n");

                if (success4 == 0)
                    printf ("Test shmem_iput128: Passed\n");
                else
                    printf ("Test shmem_iput128: Failed\n");
            }
        }
        else if (sizeof (int) == 8) {
            for (i = 0; i < N; i += 1) {
                dest1[i] = -9;
                dest2[i] = -9;
                dest3[i] = -9;
            }
            success1 = 0;
            success2 = 0;
            success3 = 0;

            shmem_barrier_all ();

            shmem_iput32 (dest1, src1, 1, 2, N, nextpe);
            shmem_iput64 (dest2, src2, 1, 2, N, nextpe);
            shmem_iput128 (dest3, src3, 1, 2, N, nextpe);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N / 2; i += 1) {
                    if (dest1[i] != (npes - 1)) {
                        success1 = 1;
                    }
                    if (dest2[i] != (npes - 1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (npes - 1)) {
                        success3 = 1;
                    }

                }
                if (success1 == 0)
                    printf ("Test shmem_iput32: Passed\n");
                else
                    printf ("Test shmem_iput32: Failed\n");
                if (success2 == 0)
                    printf ("Test shmem_iput64: Passed\n");
                else
                    printf ("Test shmem_iput64: Failed\n");

                if (success3 == 0)
                    printf ("Test shmem_iput128: Passed\n");
                else
                    printf ("Test shmem_iput128: Failed\n");
            }
        }

        /* Testing shmem_short_iput, shmem_int_iput, shmem_long_iput,
           shmem_double_iput, shmem_float_iput */
        for (i = 0; i < N; i += 1) {
            dest1[i] = -9;
            dest2[i] = -9;
            dest3[i] = -9;
            dest6[i] = -9;
            dest7[i] = -9;
        }
        success1 = 0;
        success2 = 0;
        success3 = 0;
        success6 = 0;
        success7 = 0;

        shmem_barrier_all ();

        shmem_short_iput (dest1, src1, 1, 2, N, nextpe);
        shmem_int_iput (dest2, src2, 1, 2, N, nextpe);
        shmem_long_iput (dest3, src3, 1, 2, N, nextpe);
        shmem_double_iput (dest6, src6, 1, 2, N, nextpe);
        shmem_float_iput (dest7, src7, 1, 2, N, nextpe);

        shmem_barrier_all ();

        if (me == 0) {
            for (i = 0; i < N / 2; i += 1) {
                if (dest1[i] != (npes - 1)) {
                    success1 = 1;
                }
                if (dest2[i] != (npes - 1)) {
                    success2 = 1;
                }
                if (dest3[i] != (npes - 1)) {
                    success3 = 1;
                }
                if (dest6[i] != (npes - 1)) {
                    success6 = 1;
                }
                if (dest7[i] != (npes - 1)) {
                    success7 = 1;
                }
            }

            if (success1 == 0)
                printf ("Test shmem_short_iput: Passed\n");
            else
                printf ("Test shmem_short_iput: Failed\n");
            if (success2 == 0)
                printf ("Test shmem_int_iput: Passed\n");
            else
                printf ("Test shmem_int_iput: Failed\n");
            if (success3 == 0)
                printf ("Test shmem_long_iput: Passed\n");
            else
                printf ("Test shmem_long_iput: Failed\n");
            if (success6 == 0)
                printf ("Test shmem_double_iput: Passed\n");
            else
                printf ("Test shmem_double_iput: Failed\n");
            if (success7 == 0)
                printf ("Test shmem_float_iput: Passed\n");
            else
                printf ("Test shmem_float_iput: Failed\n");

        }


        /* Testing shmem_double_p, shmem_float_p, shmem_int_p, shmem_long_p,
           shmem_short_p */
        shmem_barrier_all ();

        shmem_short_p (dest9, src9, nextpe);
        shmem_int_p (dest10, src10, nextpe);
        shmem_long_p (dest11, src11, nextpe);
        shmem_double_p (dest12, src12, nextpe);
        shmem_float_p (dest13, src13, nextpe);

        shmem_barrier_all ();

        if (me == 0) {
            if (*dest9 == (npes - 1))
                printf ("Test shmem_short_p: Passed\n");
            else
                printf ("Test shmem_short_p: Failed\n");
            if (*dest10 == (npes - 1))
                printf ("Test shmem_int_p: Passed\n");
            else
                printf ("Test shmem_int_p: Failed\n");
            if (*dest11 == (npes - 1))
                printf ("Test shmem_long_p: Passed\n");
            else
                printf ("Test shmem_long_p: Failed\n");
            if (*dest12 == (npes - 1))
                printf ("Test shmem_double_p: Passed\n");
            else
                printf ("Test shmem_double_p: Failed\n");
            if (*dest13 == (npes - 1))
                printf ("Test shmem_float_p: Passed\n");
            else
                printf ("Test shmem_float_p: Failed\n");


        }

        shmem_barrier_all ();

        shmem_free (dest1);
        shmem_free (dest2);
        shmem_free (dest3);
        shmem_free (dest4);
        shmem_free (dest5);
        shmem_free (dest6);
        shmem_free (dest7);
        shmem_free (dest8);
        shmem_free (dest9);
        shmem_free (dest10);
        shmem_free (dest11);
        shmem_free (dest12);
        shmem_free (dest13);

    }
    else {
        printf ("Number of PEs must be > 1 to test shmem put, test skipped\n");
    }

    shmem_finalize ();

    return 0;
}
Пример #19
0
int main(int argc, char **argv)
{
  int i,j;
  short     oldjs, oldxs, my_pes;
  int       oldji, oldxi, my_pei;
  long      oldjl, oldxl, my_pel;
  long long oldjll,oldxll,my_pell;
  float     oldjf, oldxf, my_pef;
  double    oldjd, oldxd, my_ped;
  int my_pe,n_pes;
  size_t max_elements,max_elements_bytes;
  static short *xs;
  static int   *xi;
  static long  *xl;
  static long long *xll;
  static float  *xf;
  static double *xd;

  shmem_init();
  my_pe = shmem_my_pe();
  n_pes = shmem_n_pes();
  my_pes = (short) my_pe;
  my_pei = (int)  my_pe;
  my_pel = (long) my_pe;
  my_pell = (long long) my_pe;
  my_pef = (float) my_pe;
  my_ped = (double) my_pe;
#ifdef HAVE_SET_CACHE_INV
  shmem_set_cache_inv();
#endif

/*  fail if trying to use only one processor  */
  if ( n_pes  <= 1 ){
        fprintf(stderr, "FAIL - test requires at least two PEs\n");
        exit(1);
  }

  if(my_pe == 0)
    fprintf(stderr, "shmem_swap(%s) n_pes=%d\n", argv[0],n_pes);

/*  test shmem_short_swap  */

  /*  shmalloc xs on all pes (only check the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(short) * n_pes);
  xs = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xs[i] = 0;
  shmem_barrier_all();

  oldjs = 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_pes = my_pes + (short) 1;
      /* record PE value in xs[my_pe] -- save PE number */
      oldxs = shmem_short_swap(&xs[my_pe], my_pes, 0);
      /* printf("PE=%d,i=%d,my_pes=%d,oldxs=%d\n",my_pe,i,my_pes,oldxs); */
      if (oldxs != oldjs)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxs = %d expected = %d\n",
                         my_pe, n_pes, i, oldxs, oldjs);
      oldjs = my_pes;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xs[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xs[%d]=%d,i=%d\n",j,j,xs[j],i); */
      if (xs[j] != (short) i)
        fprintf(stderr, "FAIL PE %d of %d: xs[%d] = %d expected = %d\n",
                         my_pe, n_pes, j, xs[j],i);
      i++;
    }
  }
  shmem_free(xs);

/*  test shmem_int_swap  */

  /*  shmalloc xi on all pes (only check the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(int) * n_pes);
  xi = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xi[i] = 0;
  shmem_barrier_all();

  oldji = 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_pei = my_pei + (int) 1;
      /* record PE value in xi[my_pe] -- save PE number */
      oldxi = shmem_int_swap(&xi[my_pe], my_pei, 0);
      /* printf("PE=%d,i=%d,my_pei=%d,oldxi=%d\n",my_pe,i,my_pei,oldxi); */
      if (oldxi != oldji)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxi = %d expected = %d\n",
                         my_pe, n_pes, i, oldxi, oldji);
      oldji = my_pei;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xi[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xi[%d]=%d,i=%d\n",j,j,xi[j],i); */
      if (xi[j] != i)
        fprintf(stderr, "FAIL PE %d of %d: xi[%d] = %d expected = %d\n",
                         my_pe, n_pes, j, xi[j],i);
      i++;
    }
  }
  shmem_free(xi);

/*  test shmem_long_swap  */

  /*  shmalloc xl on all pes (only check the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(long) * n_pes);
  xl = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xl[i] = 0;
  shmem_barrier_all();

  oldjl = 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_pel = my_pel + (long) 1;
      /* record PE value in xl[my_pe] -- save PE number */
      oldxl = shmem_long_swap(&xl[my_pe], my_pel, 0);
      /* printf("PE=%d,i=%d,my_pel=%d,oldxl=%d\n",my_pe,i,my_pel,oldxl); */
      if (oldxl != oldjl)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxl = %d expected = %d\n",
                         my_pe, n_pes, i, oldxl, oldjl);
      oldjl = my_pel;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xl[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xl[%d]=%d,i=%d\n",j,j,xl[j],i); */
      if (xl[j] != (long)i)
        fprintf(stderr, "FAIL PE %d of %d: xl[%d] = %ld expected = %d\n",
                         my_pe, n_pes, j, xl[j],i);
      i++;
    }
  }
  shmem_free(xl);

/*  test shmem_longlong_swap  */

#ifdef HAVE_LONG_LONG

  /*  shmalloc xll on all pes (only check the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(long long) * n_pes);
  xll = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xll[i] = 0;
  shmem_barrier_all();

  oldjll = 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_pell = my_pell + (long long) 1;
      /* record PE value in xll[my_pe] -- save PE number */
      oldxll = shmem_longlong_swap(&xll[my_pe], my_pell, 0);
      /* printf("PE=%d,i=%d,my_pell=%ld,oldxll=%d\n",my_pe,i,my_pell,oldxll); */
      if (oldxll != (long long) oldjll)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxll = %ld expected = %ld\n",
                         my_pe, n_pes, i, oldxll, oldjll);
      oldjll = my_pell;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xll[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xll[%d]=%ld,i=%d\n",j,j,xll[j],i); */
      if (xll[j] != (long long) i)
        fprintf(stderr, "FAIL PE %d of %d: xll[%d] = %d expected = %d\n",
                         my_pe, n_pes, j, xll[j],i);
      i++;
    }
  }
  shmem_free(xll);

#endif

/*  test shmem_float_swap  */

  /*  shmalloc xf on all pes (only use the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(float) * n_pes);
  xf = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xf[i] = (float) 0;
  shmem_barrier_all();

  oldjf = (float) 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_pef = my_pef + (float) 1;
      /* record PE value in xf[my_pe] -- save PE number */
      oldxf = shmem_float_swap(&xf[my_pe], my_pef, 0);
      /* printf("PE=%d,i=%d,my_pef=%10.2f,oldxf=%10.2f\n",my_pe,i,my_pef,oldxf); */
      if (oldxf != oldjf)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxf = %10.2f expected = %10.2f\n",
                         my_pe, n_pes, i, oldxf, oldjf);
      oldjf = my_pef;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xs[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xf[%d]=%10.2f,i=%d\n",j,j,xf[j],i); */
      if (xf[j] != (float) i)
        fprintf(stderr, "FAIL PE %d of %d: xf[%d] = %10.2f expected = %10.2f\n",
                         my_pe, n_pes, j-1, xf[j], (float)i);
      i++;
    }
  }
  shmem_free(xf);

/*  test shmem_double_swap  */

  /*  shmalloc xd on all pes (only use the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(double) * n_pes);
  xd = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xd[i] = (double) 0;
  shmem_barrier_all();

  oldjd = (double) 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_ped = my_ped + (double) 1;
      /* record PE value in xd[my_pe] -- save PE number */
      oldxd = shmem_double_swap(&xd[my_pe], my_ped, 0);
      /* printf("PE=%d,i=%d,my_ped=%10.2f,oldxd=%10.2f\n",my_pe,i,my_ped,oldxd);
 */
      if (oldxd != oldjd)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxd = %10.2f expected = %10.2f\n",
                         my_pe, n_pes, i, oldxd, oldjd);
      oldjd = my_ped;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xd[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xd[%d]=%10.2f,i=%d\n",j,j,xd[j],i); */
      if (xd[j] != (double) i)
        fprintf(stderr, "FAIL PE %d of %d: xd[%d] = %10.2f expected = %10.2f\n",
                         my_pe, n_pes, j, xd[j], (double)i);
      i++;
    }
  }
  shmem_free(xd);

#ifdef SHMEM_C_GENERIC_32

/*  test shmem_swap (GENERIC 32)  */

  my_pei = (int)  my_pe;
  /*  shmalloc xi on all pes (only check the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(int) * n_pes);
  xi = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xi[i] = 0;
  shmem_barrier_all();

  oldji = 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_pei = my_pei + (int) 1;
      /* record PE value in xi[my_pe] -- save PE number */
      oldxi = shmem_swap(&xi[my_pe], my_pei, 0);
      /* printf("PE=%d,i=%d,my_pei=%d,oldxi=%d\n",my_pe,i,my_pei,oldxi); */
      if (oldxi != oldji)
        fprintf(stderr, "FAIL pe %d of %d: i=%d, oldxi = %d expected = %d\n",
                         my_pe, n_pes, i, oldxi, oldji);
      oldji = my_pei;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xi[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xi[%d]=%d,i=%d\n",j,j,xi[j],i); */
      if (xi[j] != i)
        fprintf(stderr, "FAIL pe %d of %d: xi[%d] = %d expected = %d\n",
                         my_pe, n_pes, j, xi[j],i);
      i++;
    }
  }
  shmem_free(xi);

#else

/*  test shmem_swap (GENERIC 64)  */

  my_pel = (long) my_pe;
  /*  shmalloc xl on all pes (only check the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(long) * n_pes);
  xl = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xl[i] = 0;
  shmem_barrier_all();

  oldjl = 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_pel = my_pel + (long) 1;
      /* record PE value in xl[my_pe] -- save PE number */
      oldxl = shmem_swap(&xl[my_pe], my_pel, 0);
      /* printf("PE=%d,i=%d,my_pel=%d,oldxl=%d\n",my_pe,i,my_pel,oldxl); */
      if (oldxl != oldjl)
        fprintf(stderr, "FAIL pe %d of %d: i=%d, oldxl = %d expected = %d\n",
                         my_pe, n_pes, i, oldxl, oldjl);
      oldjl = my_pel;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xl[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xl[%d]=%d,i=%d\n",j,j,xl[j],i); */
      if (xl[j] != (long)i)
        fprintf(stderr, "FAIL pe %d of %d: xl[%d] = %ld expected = %d\n",
                         my_pe, n_pes, j, xl[j],i);
      i++;
    }
  }
  shmem_free(xl);

#endif

  shmem_barrier_all();
#ifdef NEEDS_FINALIZE
  shmem_finalize(); 
#endif
  return 0;
}
Пример #20
0
int
main(int argc, char* argv[])
{
	int c, j, loops, k, l;
	int my_pe, nProcs, nWorkers;
	int  nWords=1;
	int  failures=0;
	char *prog_name;
	long *wp,work_sz;

    for(j=0; j < SHMEM_BARRIER_SYNC_SIZE; j++) {
        pSync0[j] = pSync1[j] = pSync2[j] = pSync3[j] =
            pSync4[j] = SHMEM_SYNC_VALUE;
    }

	shmem_init();
	my_pe = shmem_my_pe();
	nProcs = shmem_n_pes();
	nWorkers = nProcs - 1;

	if (nProcs == 1) {
   		Rfprintf(stderr,
			"ERR - Requires > 1 PEs\n");
		shmem_finalize();
		return 0;
	}

	for(j=0; j < nProcs; j++)
		if ( shmem_pe_accessible(j) != 1 ) {
			fprintf(stderr,
				"ERR - pe %d not accessible from pe %d\n",
				j, my_pe);
		}

	prog_name = strrchr(argv[0],'/');
	if ( prog_name )
		prog_name++;
	else
		prog_name = argv[0];

	while((c=getopt(argc,argv,"hvM:s")) != -1) {
		switch(c) {
		  case 's':
			Slow++;
			break;
		  case 'v':
			Verbose++;
			break;
		  case 'M':
			output_mod = atoi(optarg);
			if (output_mod <= 0) {
    				Rfprintf(stderr, "ERR - output modulo arg out of "
						"bounds '%d'?\n", output_mod);
				shmem_finalize();
				return 1;
			}
   			Rfprintf(stderr,"%s: output modulo %d\n",
					prog_name,output_mod);
			break;
		  case 'h':
			Rfprintf(stderr,
				"usage: %s {nWords-2-put(%d)K/M} {Loop-count(%d)K/M}\n",
				prog_name, DFLT_NWORDS, DFLT_LOOPS);
			shmem_finalize();
			return 1;
		  default:
			shmem_finalize();
			return 1;
		}
	}

	if (optind == argc)
		nWords = DFLT_NWORDS;
	else {
		nWords = atoi_scaled(argv[optind++]);
		if (nWords <= 0) {
    			Rfprintf(stderr, "ERR - Bad nWords arg '%d'?\n", nWords);
			shmem_finalize();
			return 1;
		}
	}

	if (optind == argc)
		loops = DFLT_LOOPS;
	else {
		loops = atoi_scaled(argv[optind++]);
		if (loops <= 0 || loops > 1000000) {
    			Rfprintf(stderr,
				"ERR - loops arg out of bounds '%d'?\n", loops);
			shmem_finalize();
			return 1;
		}
	}

    work_sz = (nProcs*nWords) * sizeof(long);
	work = shmem_malloc( work_sz );
	if ( !work ) {
   		fprintf(stderr,"[%d] ERR - work = shmem_malloc(%ld) ?\n",my_pe,work_sz);
		shmem_global_exit(1);
	}

	Target = shmem_malloc( 2 * nWords * sizeof(long) );
	if ( !Target ) {
   		fprintf(stderr,"[%d] ERR - Target = shmem_malloc(%ld) ?\n",
                my_pe, (nWords * sizeof(long)));
		shmem_global_exit(1);
	}
    src = &Target[nWords];

#if _DEBUG
	Rprintf("%s: %d loops of %d longs per put\n",prog_name,loops,nWords);
#endif

	for(j=0; j < nWords; j++)
		src[j] = VAL;

	for(j=0; j < loops; j++) {

#if _DEBUG
		if ( Verbose && (j==0 || (j % output_mod) == 0) )
    			fprintf(stderr,"[%d] +(%d)\n", my_pe,j);
#endif
        shmem_barrier(0, 0, nProcs, pSync0);
		if ( my_pe == 0 ) {
			int p;
			for(p=1; p < nProcs; p++)
				shmem_long_put(Target, src, nWords, p);
		}
		else {
			if (Slow) {
				/* wait for each put to complete */
				for(k=0; k < nWords; k++)
					shmem_wait(&Target[k],my_pe);
			} else {
				/* wait for last word to be written */
				shmem_wait(&Target[nWords-1],my_pe);
			}
		}
#if _DEBUG
		if ( Verbose && (j==0 || (j % output_mod) == 0) )
    			fprintf(stderr,"[%d] -(%d)\n", shmem_my_pe(),j);
#endif
        shmem_barrier(0, 0, nProcs, pSync1);

		RDprintf("Workers[1 ... %d] verify Target data put by proc0\n",
			nWorkers);

		/* workers verify put data is expected */
		if ( my_pe != 0 ) {
			for(k=0; k < nWords; k++) {
				if (Target[k] != VAL) {
					fprintf(stderr, "[%d] Target[%d] %#lx "
							"!= %#x?\n",
							my_pe,k,Target[k],VAL);
					failures++;
				}
				assert(Target[k] == VAL);
				Target[k] = my_pe;
			}
		}
		else	/* clear results buffer, workers will put here */
			memset(work, 0, work_sz);

        shmem_barrier(0, 0, nProcs, pSync2);

		RDprintf("Workers[1 ... %d] put Target data to PE0 work "
			"vector\n",nWorkers);

		if ( my_pe != 0 ) {
			/* push nWords of val my_pe back to PE zero */
			shmem_long_put(&work[my_pe * nWords], Target, nWords, 0);
		}
		else {
			/* wait for procs 1 ... nProcs to complete put()s */
			for(l=1; l < nProcs; l++) {
				wp = &work[ l*nWords ]; // procs nWords chunk
#if 1
				/* wait for last long to be written from each PE */
				shmem_wait(&wp[nWords-1],0);
#else
				for(k=0; k < nWords; k++)
					shmem_wait(&wp[k],0);
#endif
			}
		}

        shmem_barrier(0, 0, nProcs, pSync3);

		if ( my_pe == 0 ) {
			RDprintf("Loop(%d) PE0 verifing work data.\n",j);
			for(l=1; l < nProcs; l++) {
				wp = &work[ l*nWords ]; // procs nWords chunk
				for(k=0; k < nWords; k++) {
					if (wp[k] != l) {
						fprintf(stderr,
						"[0] PE(%d)_work[%d] %ld "
							"!= %d?\n",
							l,k,work[k],l);
						failures++;
					}
					assert(wp[k] == l);
					break;
				}
				if (failures)
					break;
			}
		}
        shmem_barrier(0, 0, nProcs, pSync4);
#if _DEBUG
		if (loops > 1) {
			Rfprintf(stderr,".");
			RDprintf("Loop(%d) Pass.\n",j);
		}
#endif
	}

    shmem_free( work );
    shmem_free( Target );

#if _DEBUG
	Rfprintf(stderr,"\n");fflush(stderr);
	shmem_barrier_all();
	RDprintf("%d(%d) Exit(%d)\n", my_pe, nProcs, failures);
#endif

	shmem_finalize();

	return failures;
}
Пример #21
0
int
main(int argc, char **argv)
{
    int me, c, l, j;
    int nWords, loops, incWords;

    pgm = strrchr(argv[0],'/');
    if ( pgm )
        pgm++;
    else
        pgm = argv[0];

    shmem_init();
    me = shmem_my_pe();

    while ((c = getopt (argc, argv, "hpv")) != -1)
        switch (c)
        {
        case 'v':
            Verbose++;
            break;
        case 'h':
        default:
            usage();
            break;
        }

    if (optind == argc)
        nWords = DFLT_NWORDS;
    else if ((nWords = getSize (argv[optind++])) <= 0)
        usage ();

    if (optind == argc)
            loops = DFLT_LOOPS;
    else if ((loops = getSize (argv[optind++])) < 0)
        usage ();

    if (optind == argc)
        incWords = DFLT_INCR;
    else if ((incWords = getSize (argv[optind++])) < 0)
        usage ();

    if (Verbose && me == 0)
        fprintf (stderr, "nWords(%d) loops(%d) incWords(%d)]\n",
                 nWords, loops, incWords);

    for(l=0; l < loops; l++)
    {
        /* align 2**2 ... 2**23; 24 exceeds symetric heap max */
        for(j=0,c=2; j < 23; j++,c<<=1)
        {
            target_sz = nWords * sizeof(DataType);
            if (!(target = (DataType *)shmem_align(c,target_sz))) {
                perror ("Failed target memory allocation");
                exit (1);
            }

            if ( (unsigned long)target & (c-1) ) {
                    fprintf(stdout,"PE%d Unaligned? ",me);
                    fflush(stdout);
                    fprintf(stdout,"align[%#09x]target %p\n",
                                        c, (void*)target);
                    shmem_global_exit(1);
            }
            else if (Verbose > 1 && me == 0)
                    fprintf(stdout,"align[%#09x]target %p\n",
                                        c, (void*)target);
            shmem_barrier_all();
            shmem_free(target);
        }
        nWords += incWords;
        if (Verbose && me == 0)
            fprintf(stdout,"Fini loop %d\n",(l+1));
    }

    shmem_finalize();

    return 0;
}
Пример #22
0
void shfree(void *ptr)
{
    shmem_free(ptr);
}
int
main(void)
{
    int i;
    int nextpe;
    int me, npes;
    int success1, success2, success3, success4,
        success5, success6, success7, success8;
    short dest1[N];
    int dest2[N];
    long dest3[N];
    long double dest4[N];
    long long dest5[N];
    double dest6[N];
    float dest7[N];
    char *dest8;
    short dest9;
    int dest10;
    long dest11;
    double dest12;
    float dest13;
    short *src1;
    int *src2;
    long *src3;
    long double *src4;
    long long *src5;
    double *src6;
    float *src7;
    char *src8;
    short *src9;
    int *src10;
    long *src11;
    double *src12;
    float *src13;

    shmem_init();
    me = shmem_my_pe();
    npes = shmem_n_pes();

    if (npes > 1) {
        success1 = 0;
        success2 = 0;
        success3 = 0;
        success4 = 0;
        success5 = 0;
        success6 = 0;
        success7 = 0;
        success8 = 0;
        dest8 = (char *) malloc(N * sizeof(char));

        for (i = 0; i < N; i += 1) {
            dest1[i] = -9;
            dest2[i] = -9;
            dest3[i] = -9;
            dest4[i] = -9;
            dest5[i] = -9;
            dest6[i] = -9;
            dest7[i] = -9.0;
            dest8[i] = -9;
        }
        dest9 = -9;
        dest10 = -9;
        dest11 = -9;
        dest12 = -9;
        dest13 = -9;
        src1 = (short *) shmem_malloc(N * sizeof(*src1));
        src2 = (int *) shmem_malloc(N * sizeof(*src2));
        src3 = (long *) shmem_malloc(N * sizeof(*src3));
        src4 = (long double *) shmem_malloc(N * sizeof(*src4));
        src5 = (long long *) shmem_malloc(N * sizeof(*src5));
        src6 = (double *) shmem_malloc(N * sizeof(*src6));
        src7 = (float *) shmem_malloc(N * sizeof(*src7));
        src8 = (char *) shmem_malloc(4 * sizeof(*src8));
        src9 = (short *) shmem_malloc(sizeof(*src9));
        src10 = (int *) shmem_malloc(sizeof(*src10));
        src11 = (long *) shmem_malloc(sizeof(*src11));
        src12 = (double *) shmem_malloc(sizeof(*src12));
        src13 = (float *) shmem_malloc(sizeof(*src13));
        for (i = 0; i < N; i += 1) {
            src1[i] = (short) me;
            src2[i] = me;
            src3[i] = (long) me;
            src4[i] = (long double) me;
            src5[i] = (long long) me;
            src6[i] = (double) me;
            src7[i] = (float) me;
            src8[i] = (char) me;
        } *src9 = (short) me;
        *src10 = me;
        *src11 = (long) me;
        *src12 = (double) me;
        *src13 = (float) me;
        nextpe = (me + 1) % npes;

        /* Testing shmem_short_get, shmem_short_get, shmem_int_get,
           shmem_long_get, shmem_longdouble_get, shmem_longlong_get,
           shmem_double_get, shmem_float_get, shmem_getmem */
        shmem_barrier_all();
        shmem_short_get(dest1, src1, N, nextpe);
        shmem_int_get(dest2, src2, N, nextpe);
        shmem_long_get(dest3, src3, N, nextpe);
        shmem_longdouble_get(dest4, src4, N, nextpe);
        shmem_longlong_get(dest5, src5, N, nextpe);
        shmem_double_get(dest6, src6, N, nextpe);
        shmem_float_get(dest7, src7, N, nextpe);
        shmem_getmem(dest8, src8, N * sizeof(char), nextpe);
        shmem_barrier_all();
        if (me == 0) {
            for (i = 0; i < N; i += 1) {
                if (dest1[i] != (1)) {
                    success1 = 1;
                }
                if (dest2[i] != (1)) {
                    success2 = 1;
                }
                if (dest3[i] != (1)) {
                    success3 = 1;
                }
                if (dest4[i] != (1)) {
                    success4 = 1;
                }
                if (dest5[i] != (1)) {
                    success5 = 1;
                }
                if (dest6[i] != (1)) {
                    success6 = 1;
                }
                if (dest7[i] != (1)) {
                    success7 = 1;
                }
                if (dest8[i] != (1)) {
                    success8 = 1;
                }
            }
            if (success1 == 0)
                printf("Test shmem_short_get: Passed\n");

            else
                printf("Test shmem_short_get: Failed\n");
            if (success2 == 0)
                printf("Test shmem_int_get: Passed\n");

            else
                printf("Test shmem_int_get: Failed\n");
            if (success3 == 0)
                printf("Test shmem_long_get: Passed\n");

            else
                printf("Test shmem_long_get: Failed\n");
            if (success4 == 0)
                printf("Test shmem_longdouble_get: Passed\n");

            else
                printf("Test shmem_longdouble_get: Failed\n");
            if (success5 == 0)
                printf("Test shmem_longlong_get: Passed\n");

            else
                printf("Test shmem_longlong_get: Failed\n");
            if (success6 == 0)
                printf("Test shmem_double_get: Passed\n");

            else
                printf("Test shmem_double_get: Failed\n");
            if (success7 == 0)
                printf("Test shmem_float_get: Passed\n");

            else
                printf("Test shmem_float_get: Failed\n");
            if (success8 == 0)
                printf("Test shmem_getmem: Passed\n");

            else
                printf("Test shmem_getmem: Failed\n");
        }
        shmem_barrier_all();

        /* Testing shmem_get32, shmem_get64, shmem_get128 */
        if (sizeof(int) == 4) {
            for (i = 0; i < N; i += 1) {
                dest2[i] = -9;
                dest3[i] = -9;
                dest4[i] = -9;
            }
            success2 = 0;
            success3 = 0;
            success4 = 0;
            shmem_barrier_all();
            shmem_get32(dest2, src2, N, nextpe);
            shmem_get64(dest3, src3, N, nextpe);
            shmem_get128(dest4, src4, N, nextpe);
            shmem_barrier_all();
            if (me == 0) {
                for (i = 0; i < N; i += 1) {
                    if (dest2[i] != (1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (1)) {
                        success3 = 1;
                    }
                    if (dest4[i] != (1)) {
                        success4 = 1;
                    }
                }
                if (success2 == 0)
                    printf("Test shmem_get32: Passed\n");

                else
                    printf("Test shmem_get32: Failed\n");
                if (success3 == 0)
                    printf("Test shmem_get64: Passed\n");

                else
                    printf("Test shmem_get64: Failed\n");
                if (success4 == 0)
                    printf("Test shmem_get128: Passed\n");

                else
                    printf("Test shmem_get128: Failed\n");
            }
        }

        else if (sizeof(int) == 8) {
            for (i = 0; i < N; i += 1) {
                dest1[i] = -9;
                dest2[i] = -9;
                dest3[i] = -9;
            }
            success1 = 0;
            success2 = 0;
            success3 = 0;
            shmem_barrier_all();
            shmem_get32(dest1, src1, N, nextpe);
            shmem_get64(dest2, src2, N, nextpe);
            shmem_get128(dest3, src3, N, nextpe);
            shmem_barrier_all();
            if (me == 0) {
                for (i = 0; i < N; i += 1) {
                    if (dest1[i] != (1)) {
                        success1 = 1;
                    }
                    if (dest2[i] != (1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (1)) {
                        success3 = 1;
                    }
                }
                if (success1 == 0)
                    printf("Test shmem_get32: Passed\n");

                else
                    printf("Test shmem_get32: Failed\n");
                if (success2 == 0)
                    printf("Test shmem_get64: Passed\n");

                else
                    printf("Test shmem_get64: Failed\n");
                if (success3 == 0)
                    printf("Test shmem_get128: Passed\n");

                else
                    printf("Test shmem_get128: Failed\n");
            }
        }

        /* Testing shmem_double_g, shmem_float_g, shmem_int_g, shmem_long_g,
           shmem_short_g */
        shmem_barrier_all();
        dest9 = shmem_short_g(src9, nextpe);
        dest10 = shmem_int_g(src10, nextpe);
        dest11 = shmem_long_g(src11, nextpe);
        dest12 = shmem_double_g(src12, nextpe);
        dest13 = shmem_float_g(src13, nextpe);
        shmem_barrier_all();
        if (me == 0) {
            if (dest9 == 1)
                printf("Test shmem_short_g: Passed\n");

            else
                printf("Test shmem_short_g: Failed\n");
            if (dest10 == 1)
                printf("Test shmem_int_g: Passed\n");

            else
                printf("Test shmem_int_g: Failed\n");
            if (dest11 == 1)
                printf("Test shmem_long_g: Passed\n");

            else
                printf("Test shmem_long_g: Failed\n");
            if (dest12 == 1)
                printf("Test shmem_double_g: Passed\n");

            else
                printf("Test shmem_double_g: Failed\n");
            if (dest13 == 1)
                printf("Test shmem_float_g: Passed\n");

            else
                printf("Test shmem_float_g: Failed\n");
        }
        shmem_barrier_all();
        shmem_free(src1);
        shmem_free(src2);
        shmem_free(src3);
        shmem_free(src4);
        shmem_free(src5);
        shmem_free(src6);
        shmem_free(src7);
        shmem_free(src8);
    }

    else {
        printf("Number of PEs must be > 1 to test shmem get, test skipped\n");
    }

    shmem_finalize();

    return 0;
}
Пример #24
0
int main(int argc, char **argv)
{
  int i,j;
  short     modjs, oldjs, oldxmodjs, oldxas, my_pes, vals;
  int       modji, oldji, oldxmodji, oldxai, my_pei, vali;
  long      modjl, oldjl, oldxmodjl, oldxal, my_pel, vall;
  long long modjll,oldjll,oldxmodjll,oldxall,my_pell,valll;
  int my_pe,n_pes;
  size_t max_elements,max_elements_bytes;
  static short *xs,*xas;
  static int   *xi,*xai;
  static long  *xl,*xal;
  static long long *xll,*xall;

  shmem_init();
  my_pe = shmem_my_pe();
  n_pes = shmem_n_pes();
  my_pes = (short) my_pe;
  my_pei = (int)  my_pe;
  my_pel = (long) my_pe;
  my_pell = (long long) my_pe;
  vals = 1;  vali = 1;  vall = 1;  valll = 1;
#ifdef HAVE_SET_CACHE_INV
  shmem_set_cache_inv();
#endif

/*  fail if trying to use only one processor  */
  if ( n_pes  <= 1 ){
        fprintf(stderr, "FAIL - test requires at least two PEs\n");
        exit(1);
  }

  if(my_pe == 0)
    fprintf(stderr, "shmem_cswap(%s) n_pes=%d\n", argv[0],n_pes);

/*  test shmem_short_finc & shmem_short_swap & shmem_short_cswap */

  /*  shmalloc xs & xas on all pes (only use the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(short) * n_pes);
  xs = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xs[i] = 0;
  max_elements_bytes = (size_t) (sizeof(short) * n_pes * ITER);
  xas = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes*ITER; i++)
    if (((i/(n_pes-1)) % 2) == 0) {
      xas[i] = 1;
    } else {
      xas[i] = 0;
    }
  count_short = 0;
  shmem_barrier_all();

  for(i=0; i<ITER; i++) {
    if (i == ITER-1) shmem_barrier_all();  /* all PEs participate last time */
    if (my_pe != 0) {
      oldjs = shmem_short_finc(&count_short, 0);  /* get index oldjs from PE 0 */
      modjs = (oldjs % (n_pes-1));  /* PE 0 is just the counter/checker */
        /* conditionally record PE value in xas[oldjs] --
             tells PE involved for each count */
      oldxas = shmem_short_cswap(&xas[oldjs], vals, my_pes, 0);
      /* printf("PE=%d,i=%d,oldjs=%d,oldxas=%d\n",my_pe,i,oldjs,oldxas); */
      if (oldxas == 1) {
          /* record PE value in xs[modjs] */
        oldxmodjs = shmem_short_swap(&xs[modjs], my_pes, 0); 
        /* printf("PE=%d,oldjs=%d,modjs=%d,oldxmodjs=%d\n",
                   my_pe,oldjs,modjs,oldxmodjs); */
      }
      if (oldxas != 0 && oldxas != 1)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxas = %d expected = 0\n",
                         my_pe, n_pes, i, oldxas);
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check last xs[j] array PEs vs saved ans in xas[i] */
    i = (ITER-2)*(n_pes-1);
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xs[%d]=%d,xas[%d]=%d\n",j,j-1,xs[j-1],i,xas[i]); */
      if (xs[j-1] != xas[i])
        fprintf(stderr, "FAIL PE %d of %d: xs[%d] = %d expected = %d\n", 
                         my_pe, n_pes, j-1, xs[j-1], xas[i]);
      i++;
    }
  }
  shmem_free(xs);  shmem_free(xas);

/*  test shmem_int_finc & shmem_int_swap & shmem_int_cswap */

  /*  shmalloc xi & xai on all pes (only use the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(int) * n_pes);
  xi = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xi[i] = 0;
  max_elements_bytes = (size_t) (sizeof(int) * n_pes * ITER);
  xai = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes*ITER; i++)
    if (((i/(n_pes-1)) % 2) == 0) {
      xai[i] = 1;
    } else {
      xai[i] = 0;
    }
  count_int = 0;
  shmem_barrier_all();

  for(i=0; i<ITER; i++) {
    if (i == ITER-1) shmem_barrier_all();  /* all PEs participate last time */
    if (my_pe != 0) {
      oldji = shmem_int_finc(&count_int, 0);  /* get index oldji from PE 0 */
      modji = (oldji % (n_pes-1));  /* PE 0 is just the counter/checker */
        /* conditionally record PE value in xai[oldji] --
             tells PE involved for each count */
      oldxai = shmem_int_cswap(&xai[oldji], vali, my_pei, 0);
      /* printf("PE=%d,i=%d,oldji=%d,oldxai=%d\n",my_pe,i,oldji,oldxai); */
      if (oldxai == 1) {
          /* record PE value in xi[modji] */
        oldxmodji = shmem_int_swap(&xi[modji], my_pei, 0);
        /* printf("PE=%d,oldji=%d,modji=%d,oldxmodji=%d\n",
                   my_pe,oldji,modji,oldxmodji); */
      }
      if (oldxai != 0 && oldxai != 1)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxai = %d expected = 0\n",
                         my_pe, n_pes, i, oldxai);
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check last xi[j] array PEs vs saved ans in xai[i] */
    i = (ITER-2)*(n_pes-1);
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xi[%d]=%d,xai[%d]=%d\n",j,j-1,xi[j-1],i,xai[i]); */
      if (xi[j-1] != xai[i])
        fprintf(stderr, "FAIL PE %d of %d: xi[%d] = %d expected = %d\n",
                         my_pe, n_pes, j-1, xi[j-1], xai[i]);
      i++;
    }
  }
  shmem_free(xi);  shmem_free(xai);

/*  test shmem_long_finc & shmem_long_swap & shmem_long_cswap */

  /*  shmalloc xl & xal on all pes (only use the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(long) * n_pes);
  xl = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xl[i] = 0;
  max_elements_bytes = (size_t) (sizeof(long) * n_pes * ITER);
  xal = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes*ITER; i++)
    if (((i/(n_pes-1)) % 2) == 0) {
      xal[i] = 1;
    } else {
      xal[i] = 0;
    }
  count_long = 0;
  shmem_barrier_all();

  for(i=0; i<ITER; i++) {
    if (i == ITER-1) shmem_barrier_all();  /* all PEs participate last time */
    if (my_pe != 0) {
      oldjl = shmem_long_finc(&count_long, 0);  /* get index oldjl from PE 0 */
      modjl = (oldjl % (n_pes-1));  /* PE 0 is just the counter/checker */
        /* conditionally record PE value in xal[oldjl] --
             tells PE involved for each count */
      oldxal = shmem_long_cswap(&xal[oldjl], vall, my_pel, 0);
      /* printf("PE=%d,i=%d,oldjl=%d,oldxal=%d\n",my_pe,i,oldjl,oldxal); */
      if (oldxal == 1) {
          /* record PE value in xl[modjl] */
      oldxmodjl = shmem_long_swap(&xl[modjl], my_pel, 0);
      /* printf("PE=%d,oldjl=%ld,modjl=%ld,oldxmodjl=%ld\n",
                 my_pe,oldjl,modjl,oldxmodjl); */
      }
      if (oldxal != 0 && oldxal != 1)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxal = %ld expected = 0\n",
                         my_pe, n_pes, i, oldxal);
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check last xl[j] array PEs vs saved ans in xal[i] */
    i = (ITER-2)*(n_pes-1);
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xl[%d]=%ld,xal[%d]=%ld\n",j,j-1,xl[j-1],i,xal[i]); */
      if (xl[j-1] != xal[i])
        fprintf(stderr, "FAIL PE %d of %d: xl[%d] = %ld expected = %ld\n",
                         my_pe, n_pes, j-1, xl[j-1], xal[i]);
      i++;
    }
  }
  shmem_free(xl);  shmem_free(xal);

/*  test shmem_longlong_finc & shmem_longlong_swap & shmem_longlong_cswap */

#ifdef HAVE_LONG_LONG

  /*  shmalloc xll & xall on all pes (only use the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(long long) * n_pes);
  xll = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xll[i] = 0;
  max_elements_bytes = (size_t) (sizeof(long long) * n_pes * ITER);
  xall = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes*ITER; i++)
    if (((i/(n_pes-1)) % 2) == 0) {
      xall[i] = 1;
    } else {
      xall[i] = 0;
    }
  count_longlong = 0;
  shmem_barrier_all();

  for(i=0; i<ITER; i++) {
    if (i == ITER-1) shmem_barrier_all();  /* all PEs participate last time */
    if (my_pe != 0) {
      oldjll = shmem_longlong_finc(&count_longlong, 0);  /* get index oldjll from PE 0 */
      modjll = (oldjll % (n_pes-1));  /* PE 0 is just the counter/checker */
        /* conditionally record PE value in xall[oldjll] --
             tells PE involved for each count */
      oldxall = shmem_longlong_cswap(&xall[oldjll], valll, my_pell, 0);
      /* printf("PE=%d,i=%d,oldjll=%d,oldxall=%d\n",my_pe,i,oldjll,oldxall); */
      if (oldxall == 1) {
          /* record PE value in xll[modjll] */
        oldxmodjll = shmem_longlong_swap(&xll[modjll], my_pell, 0);
        /* printf("PE=%d,oldjll=%ld,modjll=%ld,oldxmodjll=%ld\n",
                   my_pe,oldjll,modjll,oldxmodjll); */
      }
      if (oldxall != 0 && oldxall != 1)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxall = %ld expected = 0\n",
                         my_pe, n_pes, i, oldxall);
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check last xll[j] array PEs vs saved ans in xall[i] */
    i = (ITER-2)*(n_pes-1);
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xll[%d]=%ld,xall[%d]=%ld\n",j,j-1,xll[j-1],i,xall[i]); */
      if (xll[j-1] != xall[i])
        fprintf(stderr, "FAIL PE %d of %d: xll[%d] = %ld expected = %ld\n",
                         my_pe, n_pes, j-1, xll[j-1], xall[i]);
      i++;
    }
  }
  shmem_free(xll);  shmem_free(xall);

#endif

#ifdef SHMEM_C_GENERIC_32

/*  test shmem_finc & shmem_swap & shmem_cswap (GENERIC 32)  */

  /*  shmalloc xi & xai on all pes (only use the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(int) * n_pes);
  xi = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xi[i] = 0;
  max_elements_bytes = (size_t) (sizeof(int) * n_pes * ITER);
  xai = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes*ITER; i++)
    if (((i/(n_pes-1)) % 2) == 0) {
      xai[i] = 1;
    } else {
      xai[i] = 0;
    }
  count_int = 0;
  shmem_barrier_all();

  for(i=0; i<ITER; i++) {
    if (i == ITER-1) shmem_barrier_all();  /* all PEs participate last time */
    if (my_pe != 0) {
      oldji = shmem_finc(&count_int, 0);  /* get index oldji from PE 0 */
      modji = (oldji % (n_pes-1));  /* PE 0 is just the counter/checker */
        /* conditionally record PE value in xai[oldji] --
             tells PE involved for each count */
      oldxai = shmem_cswap(&xai[oldji], vali, my_pei, 0);
      /* printf("PE=%d,i=%d,oldji=%d,oldxai=%d\n",my_pe,i,oldji,oldxai); */
      if (oldxai == 1) {
            /* record PE value in xi[modji] */
        oldxmodji = shmem_swap(&xi[modji], my_pei, 0);
        /* printf("PE=%d,oldji=%d,modji=%d,oldxmodji=%d\n",
                   my_pe,oldji,modji,oldxmodji); */
      }
      if (oldxai != 0 && oldxai != 1)
        fprintf(stderr, "FAIL pe %d of %d: i=%d, oldxai = %d expected = 0\n",
                         my_pe, n_pes, i, oldxai);
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check last xi[j] array PEs vs saved ans in xai[i] */
    i = (ITER-2)*(n_pes-1);
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xi[%d]=%d,xai[%d]=%d\n",j,j-1,xi[j-1],i,xai[i]); */
      if (xi[j-1] != xai[i])
        fprintf(stderr, "FAIL pe %d of %d: xi[%d] = %d expected = %d\n",
                         my_pe, n_pes, j-1, xi[j-1], xai[i]);
      i++;
    }
  }
  shmem_free(xi);  shmem_free(xai);

#else

/*  test shmem_finc & shmem_swap & shmem_cswap (GENERIC 64)  */

  /*  shmalloc xl & xal on all pes (only use the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(long) * n_pes);
  xl = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xl[i] = 0;
  max_elements_bytes = (size_t) (sizeof(long) * n_pes * ITER);
  xal = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes*ITER; i++)
    if (((i/(n_pes-1)) % 2) == 0) {
      xal[i] = 1;
    } else {
      xal[i] = 0;
    }
  count_long = 0;
  shmem_barrier_all();

  for(i=0; i<ITER; i++) {
    if (i == ITER-1) shmem_barrier_all();  /* all PEs participate last time */
    if (my_pe != 0) {
      oldjl = shmem_finc(&count_long, 0);  /* get index oldjl from PE 0 */
      modjl = (oldjl % (n_pes-1));  /* PE 0 is just the counter/checker */
        /* conditionally record PE value in xal[oldjl] --
             tells PE involved for each count */
      oldxal = shmem_cswap(&xal[oldjl], vall, my_pell, 0);
      /* printf("PE=%d,i=%d,oldjl=%d,oldxal=%d\n",my_pe,i,oldjl,oldxal); */
      if (oldxal == 1) {
            /* record PE value in xl[modjl] */
        oldxmodjl = shmem_swap(&xl[modjl], my_pell, 0);
        /* printf("PE=%d,oldjl=%ld,modjl=%ld,oldxmodjl=%ld\n",
                   my_pe,oldjl,modjl,oldxmodjl); */
      }
      if (oldxal != 0 && oldxal != 1)
        fprintf(stderr, "FAIL pe %d of %d: i=%d, oldxal = %ld expected = 0\n",
                         my_pe, n_pes, i, oldxal);
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check last xl[j] array PEs vs saved ans in xal[i] */
    i = (ITER-2)*(n_pes-1);
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xl[%d]=%ld,xal[%d]=%ld\n",j,j-1,xl[j-1],i,xal[i]); */
      if (xl[j-1] != xal[i])
        fprintf(stderr, "FAIL pe %d of %d: xl[%d] = %ld expected = %10.2f\n",
                         my_pe, n_pes, j-1, xl[j-1], xal[i]);
      i++;
    }
  }
  shmem_free(xl);  shmem_free(xal);

#endif

  shmem_barrier_all();
#ifdef NEEDS_FINALIZE
  shmem_finalize(); 
#endif
  return 0;
}
Пример #25
0
int
main(int argc, char* argv[])
{
	int c, j, cloop, loops = DFLT_LOOPS;
	int mpe, num_pes;
	int nWords=1;
	int nIncr=1;
	int failures=0;
	char *pgm;

	shmem_init();
	mpe = shmem_my_pe();
	num_pes = shmem_n_pes();

	if (num_pes == 1) {
   		Rfprintf(stderr,
			"ERR - Requires > 1 PEs\n");
		shmem_finalize();
		return 0;
	}
	pgm = strrchr(argv[0],'/');
	if ( pgm )
		pgm++;
	else
		pgm = argv[0];

	while((c=getopt(argc,argv,"hqVvl:")) != -1) {
		switch(c) {
		  case 'V':
		  case 'v':
			Verbose++;
			break;
		  case 'l':
            loops = atoi(optarg);
            break;
		  case 'h':
			Rfprintf(stderr,
                "usage: %s {-l loopcnt(%d)} {numLongs(%d)} {loopIncr(%d)}\n",
                    pgm,DFLT_LOOPS,DFLT_NWORDS,DFLT_INCR);
			shmem_finalize();
			return 1;
		  default:
			shmem_finalize();
			return 1;
		}
	}

	if (optind == argc)
		nWords = DFLT_NWORDS;
	else {
		nWords = atoi_scaled(argv[optind++]);
		if (nWords <= 0) {
    			Rfprintf(stderr, "ERR - Bad nBytes arg?\n");
			shmem_finalize();
			return 1;
		}
	}

	if (optind == argc)
		nIncr = DFLT_INCR;
	else {
		loops = atoi(argv[optind++]);
		if (nIncr <= 0 ) {
   		    Rfprintf(stderr, "ERR - incLongs arg out of bounds '%d'?\n", nIncr);
			shmem_finalize();
			return 1;
		}
	}

    if ( nWords % 8 ) { // integral multiple of longs
	    Rprintf("%s: nWords(%d) not a multiple of %ld?\n",
            pgm,nWords,sizeof(long));
		shmem_finalize();
		return 1;
    }

    for (c = 0; c < _SHMEM_COLLECT_SYNC_SIZE;c++)
        pSync[c] = _SHMEM_SYNC_VALUE;

    if (Verbose && mpe == 0)
	    fprintf(stderr,"loops(%d) nWords(%d) incr-per-loop(%d)\n",
            loops,nWords,nIncr);

	for(cloop=1; cloop <= loops; cloop++) {

        c = (sizeof(long)*nWords) * (num_pes + 1); // src + dst allocation.
        //nWords /= sizeof(long); // convert input of bytes --> longs.

        src = (long*)shmem_malloc(c);
        if ( !src ) {
	        Rprintf("[%d] %s: shmem_malloc(%d) failed?\n", mpe, pgm,c);
            shmem_global_exit(1);
        }
        dst = &src[nWords];

	    for(j=0; j < nWords; j++)
		    src[j] = (long) (j + mpe*nWords);

		shmem_barrier_all();

        shmem_fcollect64(dst,src,nWords,0,0,num_pes,pSync);

        // Expect dst to be consecuative integers 0 ... (nLongs*num_pes)-1
        for(j=0; j < (nWords*num_pes); j++) {
            if ( dst[j] != (long) j ) {
                fprintf(stderr,
                    "[%d] dst[%d] %ld != expected %d\n",mpe,j,dst[j],j);
                shmem_global_exit(1);
            }
        }
		shmem_barrier_all();

		if (Verbose && mpe == 0 && loops > 1) {
			fprintf(stderr,".");
		}
        nWords += nIncr;
	}

    if (Verbose && mpe == 0) {
	    fprintf(stderr,"\n");fflush(stderr);
    }
    shmem_free( (void*)src );
	shmem_barrier_all();
	if (Verbose)
        printf("%d(%d) Exit(%d)\n", mpe, num_pes, failures);

	shmem_finalize();

	return failures;
}
Пример #26
0
int main (int argc, char *argv[]) {
  /**** Initialising ****/
const unsigned long long full_program_start = current_time_ns();
{
  shmem_init (); 
  /* Variable Declarations */

  int 	     Numprocs,MyRank, Root = 0;
  int 	     i,j,k, NoofElements, NoofElements_Bloc,
				  NoElementsToSort;
  int 	     count, temp;
  TYPE 	     *Input, *InputData;
  TYPE 	     *Splitter, *AllSplitter;
  TYPE 	     *Buckets, *BucketBuffer, *LocalBucket;
  TYPE 	     *OutputBuffer, *Output;
  
  MyRank = shmem_my_pe ();
  Numprocs = shmem_n_pes ();
  NoofElements = SIZE;

  if(( NoofElements % Numprocs) != 0){
    if(MyRank == Root)
      printf("Number of Elements are not divisible by Numprocs \n");
    shmem_finalize ();
    exit(0);
  }
  /**** Reading Input ****/
  
  Input = (TYPE *) shmem_malloc (NoofElements*sizeof(*Input));
  if(Input == NULL) {
    printf("Error : Can not allocate memory \n");
  }

  if (MyRank == Root){
    /* Initialise random number generator  */ 
    printf ("Generating input Array for Sorting %d uint64_t numbers\n",SIZE);
    srand48((TYPE)NoofElements);
    for(i=0; i< NoofElements; i++) {
      Input[i] = rand();
    }
  }

  /**** Sending Data ****/

  NoofElements_Bloc = NoofElements / Numprocs;
  InputData = (TYPE *) shmem_malloc (NoofElements_Bloc * sizeof (*InputData));
  if(InputData == NULL) {
    printf("Error : Can not allocate memory \n");
  }
  //MPI_Scatter(Input, NoofElements_Bloc, TYPE_MPI, InputData, 
  //				  NoofElements_Bloc, TYPE_MPI, Root, MPI_COMM_WORLD);

  shmem_barrier_all();
  if(MyRank == Root) {
    for(i=0; i<Numprocs; i++) {
      TYPE* start = &Input[i * NoofElements_Bloc];
      shmem_put64(InputData, start, NoofElements_Bloc, i);
    }
  }
  shmem_barrier_all();

  /**** Sorting Locally ****/
  sorting(InputData, NoofElements_Bloc);

  /**** Choosing Local Splitters ****/
  Splitter = (TYPE *) shmem_malloc (sizeof (TYPE) * (Numprocs-1));
  if(Splitter == NULL) {
    printf("Error : Can not allocate memory \n");
  }
  for (i=0; i< (Numprocs-1); i++){
        Splitter[i] = InputData[NoofElements/(Numprocs*Numprocs) * (i+1)];
  } 

  /**** Gathering Local Splitters at Root ****/
  AllSplitter = (TYPE *) shmem_malloc (sizeof (TYPE) * Numprocs * (Numprocs-1));
  if(AllSplitter == NULL) {
    printf("Error : Can not allocate memory \n");
  }
  //MPI_Gather (Splitter, Numprocs-1, TYPE_MPI, AllSplitter, Numprocs-1, 
  //				  TYPE_MPI, Root, MPI_COMM_WORLD);
  shmem_barrier_all();
  TYPE* target_index = &AllSplitter[MyRank * (Numprocs-1)];
  shmem_put64(target_index, Splitter, Numprocs-1, Root);
  shmem_barrier_all();

  /**** Choosing Global Splitters ****/
  if (MyRank == Root){
    sorting (AllSplitter, Numprocs*(Numprocs-1));

    for (i=0; i<Numprocs-1; i++)
      Splitter[i] = AllSplitter[(Numprocs-1)*(i+1)];
  }
  
  /**** Broadcasting Global Splitters ****/
  //MPI_Bcast (Splitter, Numprocs-1, TYPE_MPI, 0, MPI_COMM_WORLD);
  { int _i; for(_i=0; _i<_SHMEM_BCAST_SYNC_SIZE; _i++) { pSync[_i] = _SHMEM_SYNC_VALUE; } shmem_barrier_all(); }
  shmem_broadcast64(Splitter, Splitter, Numprocs-1, 0, 0, 0, Numprocs, pSync);
  shmem_barrier_all();

  /**** Creating Numprocs Buckets locally ****/
  Buckets = (TYPE *) shmem_malloc (sizeof (TYPE) * (NoofElements + Numprocs));  
  if(Buckets == NULL) {
    printf("Error : Can not allocate memory \n");
  }
  
  j = 0;
  k = 1;

  for (i=0; i<NoofElements_Bloc; i++){
    if(j < (Numprocs-1)){
       if (InputData[i] < Splitter[j]) 
			 Buckets[((NoofElements_Bloc + 1) * j) + k++] = InputData[i]; 
       else{
	       Buckets[(NoofElements_Bloc + 1) * j] = k-1;
		    k=1;
			 j++;
		    i--;
       }
    }
    else 
       Buckets[((NoofElements_Bloc + 1) * j) + k++] = InputData[i];
  }
  Buckets[(NoofElements_Bloc + 1) * j] = k - 1;
  shmem_free(Splitter);
  shmem_free(AllSplitter);
      
  /**** Sending buckets to respective processors ****/

  BucketBuffer = (TYPE *) shmem_malloc (sizeof (TYPE) * (NoofElements + Numprocs));
  if(BucketBuffer == NULL) {
    printf("Error : Can not allocate memory \n");
  }

  //MPI_Alltoall (Buckets, NoofElements_Bloc + 1, TYPE_MPI, BucketBuffer, 
  //					 NoofElements_Bloc + 1, TYPE_MPI, MPI_COMM_WORLD);
  shmem_barrier_all();
  for(i=0; i<Numprocs; i++) {
    shmem_put64(&BucketBuffer[MyRank*(NoofElements_Bloc + 1)], &Buckets[i*(NoofElements_Bloc + 1)],  NoofElements_Bloc + 1, i);   
  }
  shmem_barrier_all();

  /**** Rearranging BucketBuffer ****/
  LocalBucket = (TYPE *) shmem_malloc (sizeof (TYPE) * 2 * NoofElements / Numprocs);
  if(LocalBucket == NULL) {
    printf("Error : Can not allocate memory \n");
  }

  count = 1;

  for (j=0; j<Numprocs; j++) {
  k = 1;
    for (i=0; i<BucketBuffer[(NoofElements/Numprocs + 1) * j]; i++) 
      LocalBucket[count++] = BucketBuffer[(NoofElements/Numprocs + 1) * j + k++];
  }
  LocalBucket[0] = count-1;
    
  /**** Sorting Local Buckets using Bubble Sort ****/
  /*sorting (InputData, NoofElements_Bloc, sizeof(int), intcompare); */

  NoElementsToSort = LocalBucket[0];
  sorting (&LocalBucket[1], NoElementsToSort); 

  /**** Gathering sorted sub blocks at root ****/
  OutputBuffer = (TYPE *) shmem_malloc (sizeof(TYPE) * 2 * NoofElements);
  if(OutputBuffer == NULL) {
    printf("Error : Can not allocate memory \n");
  }

  //MPI_Gather (LocalBucket, 2*NoofElements_Bloc, TYPE_MPI, OutputBuffer, 
  //				  2*NoofElements_Bloc, TYPE_MPI, Root, MPI_COMM_WORLD);
  shmem_barrier_all();
  target_index = &OutputBuffer[MyRank * (2*NoofElements_Bloc)];
  shmem_put64(target_index, LocalBucket, 2*NoofElements_Bloc, Root);
  shmem_barrier_all();

  /**** Rearranging output buffer ****/
  if (MyRank == Root){
    Output = (TYPE *) malloc (sizeof (TYPE) * NoofElements);
    count = 0;
    for(j=0; j<Numprocs; j++){
      k = 1;
      for(i=0; i<OutputBuffer[(2 * NoofElements/Numprocs) * j]; i++) 
        Output[count++] = OutputBuffer[(2*NoofElements/Numprocs) * j + k++];
      }
       printf ( "Number of Elements to be sorted : %d \n", NoofElements);
       TYPE prev = 0;
       int fail = 0;
       for (i=0; i<NoofElements; i++){
         if(Output[i] < prev) { printf("Failed at index %d\n",i); fail = 1; }
         prev = Output[i];
       }
       if(fail) printf("Sorting FAILED\n");  
       else  printf("Sorting PASSED\n");
  	free(Output);
  }/* MyRank==0*/

  shmem_free(Input);
  shmem_free(OutputBuffer);
  shmem_free(InputData);
  shmem_free(Buckets);
  shmem_free(BucketBuffer);
  shmem_free(LocalBucket);

   /**** Finalize ****/
  shmem_finalize();
  } ; 
const unsigned long long full_program_end = current_time_ns();
printf("full_program %llu ns\n", full_program_end - full_program_start);

}
Пример #27
0
int
main ()
{
    int me, npes;

    int *dest1;
    float *dest2;
    long *dest3;
    double *dest4;
    long long *dest5;

    int swapped_val1, new_val1;
    float swapped_val2, new_val2;
    long swapped_val3, new_val3;
    double swapped_val4, new_val4;
    long long swapped_val5, new_val5;

    int success = 1;
    int success1_p1;
    int success2_p1;
    int success3_p1;
    int success4_p1;
    int success5_p1;

    shmem_init ();
    me = shmem_my_pe ();
    npes = shmem_n_pes ();

    shmem_barrier_all ();

    /* Checks if there are atleast 2 executing PEs */

    if (npes > 1) {

        dest1 = (int *) shmem_malloc (sizeof (*dest1));
        dest2 = (float *) shmem_malloc (sizeof (*dest2));
        dest3 = (long *) shmem_malloc (sizeof (*dest3));
        dest4 = (double *) shmem_malloc (sizeof (*dest4));
        dest5 = (long long *) shmem_malloc (sizeof (*dest5));

        *dest1 = *dest2 = *dest3 = *dest4 = *dest5 = me;
        new_val1 = new_val2 = new_val3 = new_val4 = new_val5 = me;
        success1_p1 = success1_p2 = success2_p1 = success2_p2 = success3_p1 =
            success3_p2 = success4_p1 = success4_p2 = success5_p1 =
            success5_p2 = -1;

        shmem_barrier_all ();

        swapped_val1 = shmem_int_swap (dest1, new_val1, (me + 1) % npes);
        swapped_val2 = shmem_float_swap (dest2, new_val2, (me + 1) % npes);
        swapped_val3 = shmem_long_swap (dest3, new_val3, (me + 1) % npes);
        swapped_val4 = shmem_double_swap (dest4, new_val4, (me + 1) % npes);
        swapped_val5 = shmem_longlong_swap (dest5, new_val5, (me + 1) % npes);


        /* To validate the working of swap we need to check the value received
           at the PE that initiated the swap as well as the dest PE */

        if (me == 0) {
            if (swapped_val1 == 1) {
                success1_p1 = 1;
            }
            if (swapped_val2 == 1) {
                success2_p1 = 1;
            }
            if (swapped_val3 == 1) {
                success3_p1 = 1;
            }
            if (swapped_val4 == 1) {
                success4_p1 = 1;
            }
            if (swapped_val5 == 1) {
                success5_p1 = 1;
            }
        }

        if (me == 1) {
            if (*dest1 == 0) {
                shmem_int_put (&success1_p2, &success, 1, 0);
            }
            if (*dest2 == 0) {
                shmem_int_put (&success2_p2, &success, 1, 0);
            }
            if (*dest3 == 0) {
                shmem_int_put (&success3_p2, &success, 1, 0);
            }
            if (*dest4 == 0) {
                shmem_int_put (&success4_p2, &success, 1, 0);
            }
            if (*dest5 == 0) {
                shmem_int_put (&success5_p2, &success, 1, 0);
            }
        }

        shmem_barrier_all ();

        if (me == 0) {
            if (success1_p1 && success1_p2) {
                printf ("Test shmem_int_swap: Passed\n");
            }
            else {
                printf ("Test shmem_int_swap: Failed\n");
            }

            if (success2_p1 && success2_p2) {
                printf ("Test shmem_float_swap: Passed\n");
            }
            else {
                printf ("Test shmem_float_swap: Failed\n");
            }

            if (success3_p1 && success3_p2) {
                printf ("Test shmem_long_swap: Passed\n");
            }
            else {
                printf ("Test shmem_long_swap: Failed\n");
            }

            if (success4_p1 && success4_p2) {
                printf ("Test shmem_double_swap: Passed\n");
            }
            else {
                printf ("Test shmem_double_swap: Failed\n");
            }

            if (success5_p1 && success5_p2) {
                printf ("Test shmem_longlong_swap: Passed\n");
            }
            else {
                printf ("Test shmem_longlong_swap: Failed\n");
            }

        }
        shmem_barrier_all ();



        /* Test conditional swaps shmem_longlong_cswap, shmem_long_cswap,
           shmem_int_cswap, */

        *dest1 = *dest3 = *dest5 = me;
        new_val1 = new_val3 = new_val5 = me;
        success1_p1 = success1_p2 = success3_p1 = success3_p2 = success5_p1 =
            success5_p2 = -1;

        shmem_barrier_all ();

        swapped_val1 = shmem_int_cswap (dest1, me + 1, (long) me, 1);
        swapped_val3 = shmem_long_cswap (dest3, me + 1, (long) me, 1);
        swapped_val5 = shmem_longlong_cswap (dest5, me + 1, (long) me, 1);


        /* To validate the working of conditionalswap we need to check the
           value received at the PE that initiated the conditional swap as
           well as the dest PE */

        if (me == 0) {
            if (swapped_val1 == 1) {
                success1_p1 = 1;
            }

            if (swapped_val3 == 1) {
                success3_p1 = 1;
            }

            if (swapped_val5 == 1) {
                success5_p1 = 1;
            }
        }

        if (me == 1) {
            if (*dest1 == 0) {
                shmem_int_put (&success1_p2, &success, 1, 0);
            }

            if (*dest3 == 0) {
                shmem_int_put (&success3_p2, &success, 1, 0);
            }

            if (*dest5 == 0) {
                shmem_int_put (&success5_p2, &success, 1, 0);
            }
        }

        shmem_barrier_all ();

        if (me == 0) {
            if (success1_p1 && success1_p2) {
                printf ("Test shmem_int_cswap: Passed\n");
            }
            else {
                printf ("Test shmem_int_cswap: Failed\n");
            }

            if (success3_p1 && success3_p2) {
                printf ("Test shmem_long_cswap: Passed\n");
            }
            else {
                printf ("Test shmem_long_cswap: Failed\n");
            }

            if (success5_p1 && success5_p2) {
                printf ("Test shmem_longlong_cswap: Passed\n");
            }
            else {
                printf ("Test shmem_longlong_cswap: Failed\n");
            }

        }
        shmem_barrier_all ();

        /* Test shmem_long_fadd, shmem_int_fadd, shmem_longlong_fadd */

        *dest1 = *dest3 = *dest5 = me;
        new_val1 = new_val3 = new_val5 = me;
        success1_p1 = success1_p2 = success3_p1 = success3_p2 = success5_p1 =
            success5_p2 = -1;

        shmem_barrier_all ();

        swapped_val1 = shmem_int_fadd (dest1, 1, 0);
        swapped_val3 = shmem_long_fadd (dest3, 1, 0);
        swapped_val5 = shmem_longlong_fadd (dest5, 1, 0);


        /* To validate the working of fetch and add we need to check the old
           value received at the PE that initiated the fetch and increment as
           well as the new value on the dest PE */

        if (me != 0) {
            if (swapped_val1 == 0) {
                success1_p1 = 1;
            }

            if (swapped_val3 == 0) {
                success3_p1 = 1;
            }

            if (swapped_val5 == 0) {
                success5_p1 = 1;
            }
        }

        if (me == 0) {
            if (*dest1 == npes - 1) {
                shmem_int_put (&success1_p2, &success, 1, npes - 1);
            }

            if (*dest3 == npes - 1) {
                shmem_int_put (&success3_p2, &success, 1, npes - 1);
            }

            if (*dest5 == npes - 1) {
                shmem_int_put (&success5_p2, &success, 1, npes - 1);
            }
        }

        shmem_barrier_all ();

        if (me == npes - 1) {
            if (success1_p1 && success1_p2) {
                printf ("Test shmem_int_fadd: Passed\n");
            }
            else {
                printf ("Test shmem_int_fadd: Failed\n");
            }

            if (success3_p1 && success3_p2) {
                printf ("Test shmem_long_fadd: Passed\n");
            }
            else {
                printf ("Test shmem_long_fadd: Failed\n");
            }

            if (success5_p1 && success5_p2) {
                printf ("Test shmem_longlong_fadd: Passed\n");
            }
            else {
                printf ("Test shmem_longlong_fadd: Failed\n");
            }

        }
        shmem_barrier_all ();

        /* Test shmem_long_finc, shmem_int_finc, shmem_longlong_finc */

        *dest1 = *dest3 = *dest5 = me;
        new_val1 = new_val3 = new_val5 = me;
        success1_p1 = success1_p2 = success3_p1 = success3_p2 = success5_p1 =
            success5_p2 = -1;

        shmem_barrier_all ();

        swapped_val1 = shmem_int_finc (dest1, 0);
        swapped_val3 = shmem_long_finc (dest3, 0);
        swapped_val5 = shmem_longlong_finc (dest5, 0);


        /* To validate the working of fetch and increment we need to check the
           old value received at the PE that initiated the fetch and increment
           as well as the new value on the dest PE */

        if (me != 0) {
            if (swapped_val1 == 0) {
                success1_p1 = 1;
            }

            if (swapped_val3 == 0) {
                success3_p1 = 1;
            }

            if (swapped_val5 == 0) {
                success5_p1 = 1;
            }
        }

        if (me == 0) {
            if (*dest1 == npes - 1) {
                shmem_int_put (&success1_p2, &success, 1, npes - 1);
            }

            if (*dest3 == npes - 1) {
                shmem_int_put (&success3_p2, &success, 1, npes - 1);
            }

            if (*dest5 == npes - 1) {
                shmem_int_put (&success5_p2, &success, 1, npes - 1);
            }
        }

        shmem_barrier_all ();

        if (me == npes - 1) {
            if (success1_p1 && success1_p2) {
                printf ("Test shmem_int_finc: Passed\n");
            }
            else {
                printf ("Test shmem_int_finc: Failed\n");
            }

            if (success3_p1 && success3_p2) {
                printf ("Test shmem_long_finc: Passed\n");
            }
            else {
                printf ("Test shmem_long_finc: Failed\n");
            }

            if (success5_p1 && success5_p2) {
                printf ("Test shmem_longlong_finc: Passed\n");
            }
            else {
                printf ("Test shmem_longlong_finc: Failed\n");
            }

        }
        shmem_barrier_all ();

        shmem_free (dest1);
        shmem_free (dest2);
        shmem_free (dest3);
        shmem_free (dest4);
        shmem_free (dest5);

    }
    else {
        printf
            ("Number of PEs must be > 1 to test shmem atomics, test skipped\n");
    }

    shmem_finalize ();

    return 0;
}
Пример #28
0
int
main (int argc, char *argv[])
{
    long local_dest;
    int *shm_dest;
    int me, npes, i;
    int pe_acc_success = 0;
    int fail_count = 0;

    shmem_init ();
    me = shmem_my_pe ();
    npes = shmem_n_pes ();

    shm_dest = (int *) shmem_malloc (sizeof (int));

    shmem_barrier_all ();

    if (me == 0) {

        if (!check_it (&global_dest)) {   /* long global: yes */
            printf ("Test Global Address Accessible: Failed\n");
            fail_count++;
        }
        else {
            printf ("Test Global Address Accessible: Passed\n");
        }
        if (!check_it (&static_dest)) {   /* static int global: yes */
            printf ("Test Static Global Address Accessible: Failed\n");
            fail_count++;
        }
        else {
            printf ("Test Static Global Address Accessible: Passed\n");
        }
        if (check_it (&local_dest)) { /* main() stack: no */
            printf ("Test Stack Address Accessible: Failed\n");
            fail_count++;
        }
        else {
            printf ("Test Stack Address Accessible: Passed\n");
        }
        if (!check_it (shm_dest)) {   /* shmem_malloc: yes */
            printf ("Test Shmalloc-ed Address Accessible: Failed\n");
            fail_count++;
        }
        else {
            printf ("Test Shmalloc-ed Address Accessible: Passed\n");
        }


        for (i = 1; i < npes; i++) {

            if (shmem_pe_accessible (i) != 1) {
                pe_acc_success = 1;
            }

        }
        if (pe_acc_success == 1) {
            printf ("Test shmem_pe_accessible: Failed\n");
            fail_count++;
        }
        else {
            printf ("Test shmem_pe_accessible: Passed\n");
        }

        if (fail_count == 0)
	    printf("All Tests Passed\n");
        else
	    printf("%d Tests Failed\n", fail_count);
    }

    shmem_free (shm_dest);

    shmem_finalize ();

    return 0;
}
Пример #29
0
int
main (void)
{
    int i;
    int *target;
    int *source;
    int me, npes;
    struct timeval start, end;
    long time_taken, start_time, end_time;

    shmem_init ();
    me = shmem_my_pe ();
    npes = shmem_n_pes ();

    source = (int *) shmem_malloc (N_ELEMENTS * sizeof (*source));

    time_taken = 0;

    for (i = 0; i < N_ELEMENTS; i += 1) {
        source[i] = (i + 1) * 10 + me;
    }
    target = (int *) shmem_malloc (N_ELEMENTS * sizeof (*target) * npes);
    for (i = 0; i < N_ELEMENTS * npes; i += 1) {
        target[i] = -90;
    }
    for (i = 0; i < _SHMEM_COLLECT_SYNC_SIZE; i += 1) {
        pSyncA[i] = _SHMEM_SYNC_VALUE;
        pSyncB[i] = _SHMEM_SYNC_VALUE;
    }
    shmem_barrier_all ();

    for (i = 0; i < 10000; i++) {
        gettimeofday (&start, NULL);

        start_time = (start.tv_sec * 1000000.0) + start.tv_usec;

        /* alternate between 2 pSync arrays to synchronize consequent
           collectives of even and odd iterations */
        if (i % 2) {
            shmem_fcollect32 (target, source, N_ELEMENTS, 0, 0, npes, pSyncA);
        }
        else {
            shmem_fcollect32 (target, source, N_ELEMENTS, 0, 0, npes, pSyncB);
        }

        gettimeofday (&end, NULL);

        end_time = (end.tv_sec * 1000000.0) + end.tv_usec;
        if (me == 0) {
            time_taken = time_taken + (end_time - start_time);
        }

    }
    if (me == 0) {
        printf
            ("Time required to collect %d bytes of data, with %d PEs is %ld microseconds\n",
             (4 * N_ELEMENTS * npes), npes, time_taken / 10000);
    }

    shmem_barrier_all ();
    shmem_free (target);
    shmem_free (source);

    shmem_finalize ();

    return 0;
}
Пример #30
0
int
main(int argc, char* argv[])
{
    DataType source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
    static DataType target[10];
    static DataType pong=666;
    DataType *t2=NULL;
    int me, num_pes, pe, Verbose=0;

    if (argc > 1 && (strcmp(argv[1],"-v") == 0)) {
        Verbose++;
    }

    shmem_init();
    me = shmem_my_pe();
    num_pes = shmem_n_pes();

    if (num_pes == 1) {
        printf("%s: Requires number of PEs > 1\n", argv[0]);
        shmem_finalize();
        return 0;
    }

    t2 = shmem_malloc(10*sizeof(DataType));
    if (!t2) {
        if (me==0) printf("shmem_malloc() failed?\n");
        shmem_global_exit(1);
    }
    t2[9] = target[9] = 0xFF;

    shmem_barrier_all();

    if (me == 0) {
        memset(target, 0, sizeof(target));
        for(pe=1; pe < num_pes; pe++)
            SHM_PUT(target, target, 10, pe);

        for(pe=1; pe < num_pes; pe++) /* put 10 elements into target on PE 1 */
            SHM_PUT(target, source, 10, pe);

        SHM_WAITU( &pong, SHMEM_CMP_GT, 666 );
        Vprintf("PE[%d] pong now "PF"\n",me,pong);

        for(pe=1; pe < num_pes; pe++) /* put 1 element into t2 on PE 1 */
            SHM_PUTP(&t2[9], 0xDD, pe);
    }
    else {
        /* wait for 10th element write of 'target' */
        SHM_WAITU( &target[9], SHMEM_CMP_NE, 0xFF );
        Vprintf("PE[%d] target[9] was 255 now "PF", success.\n",me,target[9]);

        SHM_WAITU( &target[9], SHMEM_CMP_EQ, 10 );
        Vprintf("PE[%d] expected target[9] == 10 now "PF"\n",me,target[9]);

        if (me == 1) {
            if (Verbose) {
                DataType tmp = SHM_GETP( &pong, 0);
                printf("PE[%d] @ PE[0] pong == "PF", setting to 999\n",me,tmp);
            }
            SHM_PUTP( &pong, 999, 0);
        }

        SHM_WAITU( &t2[9], SHMEM_CMP_NE, 0xFF );
    }

    //shmem_barrier_all();  /* sync sender and receiver */

    if (me != 0) {
        if (memcmp(source, target, sizeof(DataType) * 10) != 0) {
            int i;
            fprintf(stderr,"[%d] Src & Target mismatch?\n",me);
            for (i = 0 ; i < 10 ; ++i) {
                printf(PF","PF" ", source[i], target[i]);
            }
            printf("\n");
            shmem_global_exit(1);
        }
    }
    shmem_free(t2);

    if (Verbose)
        fprintf(stderr,"[%d] exit\n",shmem_my_pe());

    shmem_finalize();
    return 0;
}