/* * Aggregates the per PE timing information */ static double * gather_rank_times(_timer_t * const timer) { if(timer->seconds_iter > 0) { assert(timer->seconds_iter == timer->num_iters); const unsigned int num_records = NUM_PES * timer->seconds_iter; double * my_times = shmem_malloc(timer->seconds_iter * sizeof(double)); assert(my_times); memcpy(my_times, timer->seconds, timer->seconds_iter * sizeof(double)); double * all_times = shmem_malloc( num_records * sizeof(double)); assert(all_times); shmem_barrier_all(); shmem_fcollect64(all_times, my_times, timer->seconds_iter, 0, 0, NUM_PES, pSync); shmem_barrier_all(); shmem_free(my_times); return all_times; } else{ return NULL; } }
/* * Aggregates the per PE timing 'count' information */ static unsigned int * gather_rank_counts(_timer_t * const timer) { if(timer->count_iter > 0){ const unsigned int num_records = NUM_PES * timer->num_iters; unsigned int * my_counts = shmem_malloc(timer->num_iters * sizeof(unsigned int)); assert(my_counts); memcpy(my_counts, timer->count, timer->num_iters*sizeof(unsigned int)); unsigned int * all_counts = shmem_malloc( num_records * sizeof(unsigned int) ); assert(all_counts); shmem_barrier_all(); shmem_collect32(all_counts, my_counts, timer->num_iters, 0, 0, NUM_PES, pSync); shmem_barrier_all(); shmem_free(my_counts); return all_counts; } else{ return NULL; } }
static void shared_free(void *ptr, int size) { #if FORKED_CACHE shmem_free(ptr, size); #else free(ptr); #endif }
int main(void) { double *f; int me; shmem_init(); me = shmem_my_pe(); f = (double *) shmem_malloc(sizeof(*f)); *f = PI; shmem_barrier_all(); if (me == 0) { shmem_double_p(f, E, 1); } shmem_barrier_all(); if (me == 1) { printf("PE %d: %f, %s\n", me, *f, (fabs(*f - E) < epsilon) ? "OK" : "FAIL"); } shmem_free(f); shmem_finalize(); return 0; }
cache_vars_t* cache_init(int size,int sector){ int num; #ifndef WIN32 cache_vars_t* s=shmem_alloc(sizeof(cache_vars_t)); #else cache_vars_t* s=malloc(sizeof(cache_vars_t)); #endif if(s==NULL) return NULL; memset(s,0,sizeof(cache_vars_t)); num=size/sector; if(num < 16){ num = 16; }//32kb min_size s->buffer_size=num*sector; s->sector_size=sector; #ifndef WIN32 s->buffer=shmem_alloc(s->buffer_size); #else s->buffer=malloc(s->buffer_size); #endif if(s->buffer == NULL){ #ifndef WIN32 shmem_free(s,sizeof(cache_vars_t)); #else free(s); #endif return NULL; } s->fill_limit=8*sector; s->back_size=s->buffer_size/2; return s; }
cache_vars_t* cache_init(int size,int sector){ int num; #if !defined(__MINGW32__) && !defined(PTHREAD_CACHE) && !defined(__OS2__) cache_vars_t* s=shmem_alloc(sizeof(cache_vars_t)); #else cache_vars_t* s=malloc(sizeof(cache_vars_t)); #endif if(s==NULL) return NULL; memset(s,0,sizeof(cache_vars_t)); num=size/sector; if(num < 16){ num = 16; }//32kb min_size s->buffer_size=num*sector; s->sector_size=sector; #if !defined(__MINGW32__) && !defined(PTHREAD_CACHE) && !defined(__OS2__) s->buffer=shmem_alloc(s->buffer_size); #else s->buffer=malloc(s->buffer_size); #endif if(s->buffer == NULL){ #if !defined(__MINGW32__) && !defined(PTHREAD_CACHE) && !defined(__OS2__) shmem_free(s,sizeof(cache_vars_t)); #else free(s); #endif return NULL; } s->fill_limit=8*sector; s->back_size=s->buffer_size/2; return s; }
int main (int argc, char **argv) { int i; int nextpe; int me, npes; long src[N]; long *dest; shmemx_request_handle_t handle; shmem_init (); me = shmem_my_pe (); npes = shmem_n_pes (); for (i = 0; i < N; i += 1) { src[i] = (long) me; } dest = (long *) shmem_malloc (N * sizeof (*dest)); nextpe = (me + 1) % npes; shmemx_long_put_nb (dest, src, N, nextpe, &handle); shmemx_wait_req (handle); shmem_barrier_all (); shmem_free (dest); shmem_finalize (); return 0; }
int main(int argc, char* argv[]) { long source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; long *target; int *flag; int i, num_pes; int failed = 0; shmem_init(); target = (long*) shmem_malloc(sizeof(long) * 10); flag = (int*) shmem_malloc(sizeof(int)); *flag = 0; num_pes=shmem_n_pes(); memset(target, 0, sizeof(long)*10); shmem_barrier_all(); if (shmem_my_pe() == 0) { for(i = 0; i < num_pes; i++) { shmem_long_put_nbi(target, source, 10, i); shmem_fence(); shmem_int_inc(flag, i); } } shmem_int_wait_until(flag, SHMEM_CMP_EQ, 1); for (i = 0; i < 10; i++) { if (target[i] != source[i]) { fprintf(stderr,"[%d] target[%d] = %ld, expected %ld\n", shmem_my_pe(), i, target[i], source[i]); failed = 1; } } shmem_free(target); shmem_free(flag); shmem_finalize(); return failed; }
void fifo_ring_destroy(FifoRing* thiz) { if(thiz != NULL) { shmem_free(thiz, sizeof(FifoRing) + thiz->length * sizeof(void*)); } return; }
void cache_uninit(stream_t *s) { cache_vars_t* c = s->cache_data; if(!s->cache_pid) return; #ifndef WIN32 kill(s->cache_pid,SIGKILL); waitpid(s->cache_pid,NULL,0); #else TerminateThread((HANDLE)s->cache_pid,0); free(c->stream); #endif if(!c) return; #ifndef WIN32 shmem_free(c->buffer,c->buffer_size); shmem_free(s->cache_data,sizeof(cache_vars_t)); #else free(c->buffer); free(s->cache_data); #endif }
void cache_uninit(stream_t *s) { cache_vars_t* c = s->cache_data; if(!s->cache_pid) return; #if defined(__MINGW32__) || defined(PTHREAD_CACHE) || defined(__OS2__) cache_do_control(s, -2, NULL); #else kill(s->cache_pid,SIGKILL); waitpid(s->cache_pid,NULL,0); #endif if(!c) return; #if defined(__MINGW32__) || defined(PTHREAD_CACHE) || defined(__OS2__) free(c->stream); free(c->buffer); free(s->cache_data); #else shmem_free(c->buffer,c->buffer_size); shmem_free(s->cache_data,sizeof(cache_vars_t)); #endif }
void fcollect(int *target, int *src, int elements, int me, int npes, int loops) { int i; double start_time, elapsed_time; long total_bytes = loops * elements * sizeof(*src); long *ps, *pSync, *pSync1; pSync = (long*) shmem_malloc( 2 * sizeof(long) * _SHMEM_COLLECT_SYNC_SIZE ); pSync1 = &pSync[_SHMEM_COLLECT_SYNC_SIZE]; for (i = 0; i < _SHMEM_COLLECT_SYNC_SIZE; i++) { pSync[i] = pSync1[i] = _SHMEM_SYNC_VALUE; } target = (int *) shmem_malloc( elements * sizeof(*target) * npes ); if (me==0 && Verbose) { fprintf(stdout,"%s: %d loops of fcollect32(%ld bytes) over %d PEs: ", __FUNCTION__,loops,(elements*sizeof(*src)),npes); fflush(stdout); } shmem_barrier_all(); start_time = shmemx_wtime(); for(i = 0; i < loops; i++) { ps = &pSync[(i&1)]; shmem_fcollect32( target, src, elements, 0, 0, npes, ps ); } elapsed_time = shmemx_wtime() - start_time; if (me==0 && Verbose) { printf("%7.3f secs\n", elapsed_time); printf(" %7.5f usecs / fcollect32(), %ld Kbytes @ %7.4f MB/sec\n\n", (elapsed_time/((double)loops*npes))*1000000.0, (total_bytes/1024), ((double)total_bytes/(1024.0*1024.0)) / elapsed_time ); } shmem_barrier_all(); shmem_free(target); shmem_free( pSync ); shmem_barrier_all(); }
int main () { long *x; shmem_init (); x = (long *) shmem_malloc (sizeof (*x)); shmem_free (x); shmem_finalize (); return 0; }
int main(void) { long *dest; int me, npes; long swapped_val, new_val; shmem_init(); me = shmem_my_pe(); npes = shmem_n_pes(); dest = (long *) shmem_malloc(sizeof (*dest)); *dest = me; shmem_barrier_all(); new_val = me; if (me & 1){ swapped_val = shmem_swap(dest, new_val, (me + 1) % npes); printf("%d: dest = %ld, swapped = %ld\n", me, *dest, swapped_val); } shmem_free(dest); return 0; }
int main(int argc, char **argv) { int loops=DFLT_LOOPS; char *pgm; int *Target; int *Source; int i, me, npes; int target_PE; long bytes; double start_time, *total_time; shmem_init(); me = shmem_my_pe(); npes = shmem_n_pes(); if ((pgm=strrchr(argv[0],'/'))) pgm++; else pgm = argv[0]; while ((i = getopt (argc, argv, "hve:l:st")) != EOF) { switch (i) { case 'v': Verbose++; break; case 'e': if ((elements = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad elements count %d\n",elements); shmem_finalize(); return 1; } break; case 'l': if ((loops = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad loop count %d\n",loops); shmem_finalize(); return 1; } break; case 's': Sync++; break; case 't': Track++; break; case 'h': if (me == 0) usage(pgm); return 0; default: if (me == 0) { fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i); usage(pgm); } shmem_finalize(); return 1; } } for(i=0; i < SHMEM_REDUCE_SYNC_SIZE; i++) pSync[i] = SHMEM_SYNC_VALUE; target_PE = (me+1) % npes; total_time = (double *) shmem_malloc( npes * sizeof(double) ); if (!total_time) { fprintf(stderr,"ERR: bad total_time shmem_malloc(%ld)\n", (elements * sizeof(double))); shmem_global_exit(1); } for(i=0; i < npes; i++) total_time[i] = -1.0; Source = (int *) shmem_malloc( elements * sizeof(*Source) ); if (!Source) { fprintf(stderr,"ERR: bad Source shmem_malloc(%ld)\n", (elements * sizeof(*Target))); shmem_free(total_time); shmem_global_exit(1); } Target = (int *) shmem_malloc( elements * sizeof(*Target) ); if (!Target) { fprintf(stderr,"ERR: bad Target shmem_malloc(%ld)\n", (elements * sizeof(*Target))); shmem_free(Source); shmem_free(total_time); shmem_global_exit(1); } for (i = 0; i < elements; i++) { Target[i] = -90; Source[i] = i + 1; } bytes = loops * sizeof(int) * elements; if (Verbose && me==0) { fprintf(stderr, "%s: INFO - %d loops, put %d (int) elements to PE+1 Max put ??\n", pgm, loops, elements); } shmem_barrier_all(); for(i=0; i < loops; i++) { start_time = shmemx_wtime(); shmem_int_put(Target, Source, elements, target_PE); time_taken += (shmemx_wtime() - start_time); if (me==0) { if ( Track && i > 0 && ((i % 200) == 0)) fprintf(stderr,".%d",i); } if (Sync) shmem_barrier_all(); } // collect time per node. shmem_double_put( &total_time[me], &time_taken, 1, 0 ); shmem_double_sum_to_all(&sum_time, &time_taken, 1, 0, 0, npes, pWrk, pSync); shmem_barrier_all(); for (i = 0; i < elements; i++) { if (Target[i] != i + 1) { printf("%d: Error Target[%d] = %d, expected %d\n", me, i, Target[i], i + 1); shmem_global_exit(1); } } if ( Track && me == 0 ) fprintf(stderr,"\n"); if(Verbose && me == 0) { double rate, comp_time; if (Verbose > 1) fprintf(stdout,"Individule PE times: (seconds)\n"); for(i=0,comp_time=0.0; i < npes; i++) { comp_time += total_time[i]; if (Verbose > 1) fprintf(stdout," PE[%d] %8.6f\n",i,total_time[i]); } sum_time /= (double)npes; comp_time /= (double)npes; if (sum_time != comp_time) printf("%s: computed_time %7.5f != sum_to_all_time %7.5f)\n", pgm, comp_time, sum_time ); rate = ((double)bytes/(1024.0*1024.0)) / comp_time; printf("%s: shmem_int_put() %7.4f MB/sec (bytes %ld secs %7.4f)\n", pgm, rate, bytes, sum_time); } shmem_free(total_time); shmem_free(Target); shmem_free(Source); shmem_finalize(); return 0; }
int main(int argc, char **argv) { int i,j; int my_pe,n_pes,PE_root; size_t max_elements,max_elements_bytes; int *srce_int,*targ_int,ans_int; long *srce_long,*targ_long,ans_long; float *srce_float,*targ_float,ans_float; double *srce_double,*targ_double,ans_double; shmem_init(); my_pe = shmem_my_pe(); n_pes = shmem_n_pes(); /* fail if trying to use only one processor */ if ( n_pes <= 1 ){ fprintf(stderr, "FAIL - test requires at least two PEs\n"); exit(1); } if(my_pe == 0) fprintf(stderr, "shmem_broadcast(%s) n_pes=%d\n", argv[0],n_pes); /* initialize the pSync arrays */ for (i=0; i < _SHMEM_BCAST_SYNC_SIZE; i++) { pSync1[i] = _SHMEM_SYNC_VALUE; pSync2[i] = _SHMEM_SYNC_VALUE; } shmem_barrier_all(); /* Wait for all PEs to initialize pSync1 & pSync2 */ PE_root=1; /* we'll broadcast from this PE */ /* shmem_broadcast32 test */ max_elements = (size_t) (MAX_SIZE / sizeof(int)); max_elements_bytes = (size_t) (sizeof(int)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_broadcast32 max_elements = %d\n", max_elements); srce_int = shmem_malloc(max_elements_bytes); targ_int = shmem_malloc(max_elements_bytes); srce_float = shmem_malloc(max_elements_bytes); targ_float = shmem_malloc(max_elements_bytes); if((srce_int == NULL) || (targ_int == NULL) || (srce_float == NULL) || (targ_float == NULL)) shmalloc_error(); for(j = 0; j < max_elements; j++) { srce_int[j] = (int)(my_pe+j); srce_float[j] = (float)(my_pe+j); targ_int[j] = (int)(100*my_pe+j); targ_float[j] = (float)(100*my_pe+j); } shmem_barrier_all(); for(i = 0; i < IMAX; i+=2) { /* i is even -- using int */ if (my_pe == PE_root) for(j = 0; j < max_elements; j++) { srce_int[j] = (int)(my_pe+i+j); } /* broadcast from PE_root to all PEs using pSync1 */ shmem_broadcast32(targ_int,srce_int,max_elements,PE_root,0,0,n_pes,pSync1); for(j = 0; j < max_elements; j++) { if (my_pe == PE_root) { ans_int= (int)(100*my_pe+j); } else { ans_int= (int)(PE_root+i+j); } if ( targ_int[j] != ans_int ) fprintf(stderr, "FAIL: PE [%d] targ_int[%d]=%d ans_int=%d\n", my_pe,j,targ_int[j],ans_int); } /* i+1 is odd -- using float */ if (my_pe == PE_root) for(j = 0; j < max_elements; j++) { srce_float[j] = (float)(PE_root+i+1+j); } /* broadcast from PE_root to all PEs using pSync2 */ shmem_broadcast32(targ_float,srce_float,max_elements,PE_root,0,0,n_pes,pSync2); for(j = 0; j < max_elements; j++) { if (my_pe == PE_root) { ans_float= (float)(100*my_pe+j); } else { ans_float= (float)(PE_root+i+1+j); } if ( targ_float[j] != ans_float ) fprintf(stderr, "FAIL: PE [%d] targ_float[%d]=%10.0f ans_float=%10.0f\n", my_pe,j,targ_float[j],ans_float); } } shmem_free(srce_int); shmem_free(targ_int); shmem_free(srce_float); shmem_free(targ_float); /* shmem_broadcast64 test */ max_elements = (size_t) (MAX_SIZE / sizeof(long)); max_elements_bytes = (size_t) (sizeof(long)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_broadcast64 max_elements = %d\n", max_elements); srce_long = shmem_malloc(max_elements_bytes); targ_long = shmem_malloc(max_elements_bytes); srce_double = shmem_malloc(max_elements_bytes); targ_double = shmem_malloc(max_elements_bytes); if((srce_long == NULL) || (targ_long == NULL) || (srce_double == NULL) || (targ_double == NULL)) shmalloc_error(); for(j = 0; j < max_elements; j++) { srce_long[j] = (long)(my_pe+j); srce_double[j] = (double)(my_pe+j); targ_long[j] = (long)(100*my_pe+j); targ_double[j] = (double)(100*my_pe+j); } shmem_barrier_all(); for(i = 0; i < IMAX; i+=2) { /* i is even -- using long */ if (my_pe == PE_root) for(j = 0; j < max_elements; j++) { srce_long[j] = (long)(my_pe+i+j); } /* broadcast from PE_root to all PEs using pSync1 */ shmem_broadcast64(targ_long,srce_long,max_elements,PE_root,0,0,n_pes,pSync1); for(j = 0; j < max_elements; j++) { if (my_pe == PE_root) { ans_long= (long)(100*my_pe+j); } else { ans_long= (long)(PE_root+i+j); } if ( targ_long[j] != ans_long ) fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d ans_long=%d\n", my_pe,j,targ_long[j],ans_long); } /* i+1 is odd -- using double */ if (my_pe == PE_root) for(j = 0; j < max_elements; j++) { srce_double[j] = (double)(PE_root+i+1+j); } /* broadcast from PE_root to all PEs using pSync2 */ shmem_broadcast64(targ_double,srce_double,max_elements,PE_root,0,0,n_pes,pSync2); for(j = 0; j < max_elements; j++) { if (my_pe == PE_root) { ans_double= (double)(100*my_pe+j); } else { ans_double= (double)(PE_root+i+1+j); } if ( targ_double[j] != ans_double ) fprintf(stderr, "FAIL: PE [%d] targ_double[%d]=%10.0f ans_double=%10.0f\n", my_pe,j,targ_double[j],ans_double); } } shmem_free(srce_long); shmem_free(targ_long); shmem_free(srce_double); shmem_free(targ_double); #ifndef OPENSHMEM #ifdef SHMEM_C_GENERIC_32 /* shmemx_broadcast (GENERIC 32) test */ max_elements = (size_t) (MAX_SIZE / sizeof(int)); max_elements_bytes = (size_t) (sizeof(int)*max_elements); if(my_pe == 0) fprintf(stderr,"shmemx_broadcast (GENERIC 32) max_elements = %d\n", max_elements); srce_int = shmem_malloc(max_elements_bytes); targ_int = shmem_malloc(max_elements_bytes); if((srce_int == NULL) || (targ_int == NULL)) shmalloc_error(); for(j = 0; j < max_elements; j++) { srce_int[j] = (int)(my_pe+j); targ_int[j] = (int)(2*my_pe+j); } shmem_barrier_all(); /* broadcast from PE 1 to all PEs */ shmemx_broadcast(targ_int,srce_int,max_elements,1,0,0,n_pes,pSync1); for(j = 0; j < max_elements; j++) { if (my_pe == 1) { ans_int= (int)(j+2); } else { ans_int= (int)(j+1); } if ( targ_int[j] != ans_int ) fprintf(stderr, "FAIL: PE [%d] targ_int[%d]=%d ans_int=%d\n", my_pe,j,targ_int[j],ans_int); } shmem_free(srce_int); shmem_free(targ_int); #else /* shmemx_broadcast (GENERIC 64) test */ max_elements = (size_t) (MAX_SIZE / sizeof(long)); max_elements_bytes = (size_t) (sizeof(long)*max_elements); if(my_pe == 0) fprintf(stderr,"shmemx_broadcast (GENERIC 64) max_elements = %d\n", max_elements); srce_long = shmem_malloc(max_elements_bytes); targ_long = shmem_malloc(max_elements_bytes); if((srce_long == NULL) || (targ_long == NULL)) shmalloc_error(); for(j = 0; j < max_elements; j++) { srce_long[j] = (long)(my_pe+j); targ_long[j] = (long)(2*my_pe+j); } shmem_barrier_all(); /* broadcast from PE 1 to all PEs */ shmemx_broadcast(targ_long,srce_long,max_elements,1,0,0,n_pes,pSync1); for(j = 0; j < max_elements; j++) { if (my_pe == 1) { ans_long = (long)(j+2); } else { ans_long = (long)(j+1); } if ( targ_long[j] != ans_long ) fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d ans_long=%d\n", my_pe,j,targ_long[j],ans_long); } shmem_free(srce_long); shmem_free(targ_long); #endif #endif #ifdef NEEDS_FINALIZE shmem_finalize(); #endif return 0; }
int main(int argc, char **argv) { int j; int my_pe,n_pes; int *flag,*one; size_t max_elements,max_elements_bytes; char *srce_char,*targ_char; short *srce_short,*targ_short; int *srce_int,*targ_int; long *srce_long,*targ_long; shmem_init(); my_pe = shmem_my_pe(); n_pes = shmem_n_pes(); flag = shmem_malloc((size_t) sizeof(int)); one = shmem_malloc((size_t) sizeof(int)); *one = 1; /* fail if trying to use odd number of processors */ if ( (n_pes % 2) != 0 ){ fprintf(stderr, "FAIL - test requires even number of PEs\n"); exit(1); } if(my_pe == 0) fprintf(stderr, "shmem_num_put_nb(%s)\n", argv[0]); /* shmem_putmem_nb test */ *flag = 0; max_elements = (size_t) (MAX_SIZE / sizeof(char)); max_elements_bytes = (size_t) (sizeof(char)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_putmem_nb max_elements = %d\n",max_elements); srce_char = shmem_malloc(max_elements_bytes); targ_char = shmem_malloc(max_elements_bytes); if((srce_char == NULL) || (targ_char == NULL)) shmalloc_error(); if ( (my_pe % 2) == 0 ) for(j = 0; j < max_elements; j++) srce_char[j] = (char)(my_pe+j); else for(j = 0; j < max_elements; j++) targ_char[j] = (char)(my_pe+j); shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { shmem_putmem_nb(targ_char,srce_char,max_elements,my_pe+1,NULL); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < max_elements; j++) if ( targ_char[j] != (char)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] targ_char[%d]=%d my_pe+j-1=%d\n", my_pe,j,targ_char[j],my_pe+j-1); } shmem_free(srce_char); shmem_free(targ_char); /* shmem_put16_nb test */ *flag = 0; max_elements = (size_t) (MAX_SIZE / sizeof(short)); if(max_elements > 20000) max_elements=20000; max_elements_bytes = (size_t) (sizeof(short)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_put16_nb max_elements = %d\n",max_elements); srce_short = shmem_malloc(max_elements_bytes); targ_short = shmem_malloc(max_elements_bytes); if((srce_short == NULL) || (targ_short == NULL)) shmalloc_error(); if ( (my_pe % 2) == 0 ) for(j = 0; j < max_elements; j++) srce_short[j] = (short)(my_pe+j); else for(j = 0; j < max_elements; j++) targ_short[j] = (short)(my_pe+j); shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { shmem_put16_nb(targ_short,srce_short,max_elements,my_pe+1,NULL); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < max_elements; j++) if ( targ_short[j] != (short)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] targ_short[%d]=%d my_pe+j-1=%d\n", my_pe,j,targ_short[j],my_pe+j-1); } shmem_free(srce_short); shmem_free(targ_short); /* shmem_put32_nb test */ *flag = 0; max_elements = (size_t) (MAX_SIZE / sizeof(int)); max_elements_bytes = (size_t) (sizeof(int)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_put32_nb max_elements = %d\n",max_elements); srce_int = shmem_malloc(max_elements_bytes); targ_int = shmem_malloc(max_elements_bytes); if((srce_int == NULL) || (targ_int == NULL)) shmalloc_error(); if ( (my_pe % 2) == 0 ) for(j = 0; j < max_elements; j++) srce_int[j] = (int)(my_pe+j); else for(j = 0; j < max_elements; j++) targ_int[j] = (int)(my_pe+j); shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { shmem_put32_nb(targ_int,srce_int,max_elements,my_pe+1,NULL); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < max_elements; j++) if ( targ_int[j] != (int)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] targ_int[%d]=%d my_pe+j-1=%d\n", my_pe,j,targ_int[j],my_pe+j-1); } shmem_free(srce_int); shmem_free(targ_int); /* shmem_put64_nb test */ *flag = 0; max_elements = (size_t) (MAX_SIZE / sizeof(long)); max_elements_bytes = (size_t) (sizeof(long)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_put64_nb max_elements = %d\n",max_elements); srce_long = shmem_malloc(max_elements_bytes); targ_long = shmem_malloc(max_elements_bytes); if((srce_long == NULL) || (targ_long == NULL)) shmalloc_error(); if ( (my_pe % 2) == 0 ) for(j = 0; j < max_elements; j++) srce_long[j] = (long)(my_pe+j); else for(j = 0; j < max_elements; j++) targ_long[j] = (long)(my_pe+j); shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { shmem_put64_nb(targ_long,srce_long,max_elements,my_pe+1,NULL); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < max_elements; j++) if ( targ_long[j] != (long)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d my_pe+j-1=%d\n", my_pe,j,targ_long[j],my_pe+j-1); } shmem_free(srce_long); shmem_free(targ_long); /* shmem_put128_nb test */ *flag = 0; max_elements = (size_t) (MAX_SIZE / sizeof(long)); if ( (max_elements % 2) != 0) max_elements = max_elements-1; max_elements_bytes = (size_t) (sizeof(long)*max_elements); max_elements = max_elements/2; if(my_pe == 0) fprintf(stderr,"shmem_put128_nb max_elements = %d\n",max_elements); srce_long = shmem_malloc(max_elements_bytes); targ_long = shmem_malloc(max_elements_bytes); if((srce_long == NULL) || (targ_long == NULL)) shmalloc_error(); if ( (my_pe % 2) == 0 ) for(j = 0; j < 2*max_elements; j++) srce_long[j] = (long)(my_pe+j); else for(j = 0; j < 2*max_elements; j++) targ_long[j] = (long)(my_pe+j); shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { shmem_put128_nb(targ_long,srce_long,max_elements,my_pe+1,NULL); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < 2*max_elements; j++) if ( targ_long[j] != (long)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d my_pe+j-1=%d\n", my_pe,j,targ_long[j],my_pe+j-1); } shmem_free(srce_long); shmem_free(targ_long); #ifdef SHMEM_C_GENERIC_32 /* shmem_put_nb (GENERIC 32) test */ *flag = 0; max_elements = (size_t) (MAX_SIZE / sizeof(int)); max_elements_bytes = (size_t) (sizeof(int)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_put_nb (GENERIC 32) max_elements = %d\n",max_elements); srce_int = shmem_malloc(max_elements_bytes); targ_int = shmem_malloc(max_elements_bytes); if((srce_int == NULL) || (targ_int == NULL)) shmalloc_error(); if ( (my_pe % 2) == 0 ) for(j = 0; j < max_elements; j++) srce_int[j] = (int)(my_pe+j); else for(j = 0; j < max_elements; j++) targ_int[j] = (int)(my_pe+j); shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { shmem_put_nb(targ_int,srce_int,max_elements,my_pe+1,NULL); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < max_elements; j++) if ( targ_int[j] != (int)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] targ_int[%d]=%d my_pe+j-1=%d\n", my_pe,j,targ_int[j],my_pe+j-1); } shmem_free(srce_int); shmem_free(targ_int); #else /* shmem_put_nb (GENERIC 64) test */ *flag = 0; max_elements = (size_t) (MAX_SIZE / sizeof(long)); max_elements_bytes = (size_t) (sizeof(long)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_put_nb (GENERIC 64) max_elements = %d\n",max_elements); srce_long = shmem_malloc(max_elements_bytes); targ_long = shmem_malloc(max_elements_bytes); if((srce_long == NULL) || (targ_long == NULL)) shmalloc_error(); if ( (my_pe % 2) == 0 ) for(j = 0; j < max_elements; j++) srce_long[j] = (long)(my_pe+j); else for(j = 0; j < max_elements; j++) targ_long[j] = (long)(my_pe+j); shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { shmem_put_nb(targ_long,srce_long,max_elements,my_pe+1,NULL); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < max_elements; j++) if ( targ_long[j] != (long)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d my_pe+j-1=%d\n", my_pe,j,targ_long[j],my_pe+j-1); } shmem_free(srce_long); shmem_free(targ_long); #endif #ifdef NEEDS_FINALIZE shmem_finalize(); #endif return 0; }
int main (int argc, char **argv) { int i; int nextpe; int me, npes; int success1, success2, success3, success4, success5, success6, success7, success8; short src1[N]; int src2[N]; long src3[N]; long double src4[N]; long long src5[N]; double src6[N]; float src7[N]; char *src8; short src9; int src10; long src11; double src12; float src13; short *dest1; int *dest2; long *dest3; long double *dest4; long long *dest5; double *dest6; float *dest7; char *dest8; short *dest9; int *dest10; long *dest11; double *dest12; float *dest13; shmem_init (); me = shmem_my_pe (); npes = shmem_n_pes (); if (npes > 1) { success1 = 0; success2 = 0; success3 = 0; success4 = 0; success5 = 0; success6 = 0; success7 = 0; success8 = 0; src8 = (char *) malloc (N * sizeof (char)); for (i = 0; i < N; i += 1) { src1[i] = (short) me; src2[i] = me; src3[i] = (long) me; src4[i] = (long double) me; src5[i] = (long long) me; src6[i] = (double) me; src7[i] = (float) me; src8[i] = (char) me; } src9 = (short) me; src10 = me; src11 = (long) me; src12 = (double) me; src13 = (float) me; dest1 = (short *) shmem_malloc (N * sizeof (*dest1)); dest2 = (int *) shmem_malloc (N * sizeof (*dest2)); dest3 = (long *) shmem_malloc (N * sizeof (*dest3)); dest4 = (long double *) shmem_malloc (N * sizeof (*dest4)); dest5 = (long long *) shmem_malloc (N * sizeof (*dest5)); dest6 = (double *) shmem_malloc (N * sizeof (*dest6)); dest7 = (float *) shmem_malloc (N * sizeof (*dest7)); dest8 = (char *) shmem_malloc (4 * sizeof (*dest8)); dest9 = (short *) shmem_malloc (sizeof (*dest9)); dest10 = (int *) shmem_malloc (sizeof (*dest10)); dest11 = (long *) shmem_malloc (sizeof (*dest11)); dest12 = (double *) shmem_malloc (sizeof (*dest12)); dest13 = (float *) shmem_malloc (sizeof (*dest13)); for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; dest5[i] = -9; dest6[i] = -9; dest7[i] = -9.0; dest8[i] = -9; } *dest9 = -9; *dest10 = -9; *dest11 = -9; *dest12 = -9; *dest13 = -9.0; nextpe = (me + 1) % npes; /* Testing shmem_short_put, shmem_int_put, shmem_long_put, shmem_longdouble_put, shmem_longlong_put, shmem_double_put, shmem_float_put, shmem_putmem */ shmem_barrier_all (); shmem_short_put (dest1, src1, N, nextpe); shmem_int_put (dest2, src2, N, nextpe); shmem_long_put (dest3, src3, N, nextpe); shmem_longdouble_put (dest4, src4, N, nextpe); shmem_longlong_put (dest5, src5, N, nextpe); shmem_double_put (dest6, src6, N, nextpe); shmem_float_put (dest7, src7, N, nextpe); shmem_putmem (dest8, src8, N * sizeof (char), nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest4[i] != (npes - 1)) { success4 = 1; } if (dest5[i] != (npes - 1)) { success5 = 1; } if (dest6[i] != (npes - 1)) { success6 = 1; } if (dest7[i] != (npes - 1)) { success7 = 1; } if (dest8[i] != (npes - 1)) { success8 = 1; } } if (success1 == 0) printf ("Test shmem_short_put: Passed\n"); else printf ("Test shmem_short_put: Failed\n"); if (success2 == 0) printf ("Test shmem_int_put: Passed\n"); else printf ("Test shmem_int_put: Failed\n"); if (success3 == 0) printf ("Test shmem_long_put: Passed\n"); else printf ("Test shmem_long_put: Failed\n"); if (success4 == 0) printf ("Test shmem_longdouble_put: Passed\n"); else printf ("Test shmem_longdouble_put: Failed\n"); if (success5 == 0) printf ("Test shmem_longlong_put: Passed\n"); else printf ("Test shmem_longlong_put: Failed\n"); if (success6 == 0) printf ("Test shmem_double_put: Passed\n"); else printf ("Test shmem_double_put: Failed\n"); if (success7 == 0) printf ("Test shmem_float_put: Passed\n"); else printf ("Test shmem_float_put: Failed\n"); if (success8 == 0) printf ("Test shmem_putmem: Passed\n"); else printf ("Test shmem_putmem: Failed\n"); } shmem_barrier_all (); /* Testing shmem_put32, shmem_put64, shmem_put128 */ if (sizeof (int) == 4) { for (i = 0; i < N; i += 1) { dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; } success2 = 0; success3 = 0; success4 = 0; shmem_barrier_all (); shmem_put32 (dest2, src2, N, nextpe); shmem_put64 (dest3, src3, N, nextpe); shmem_put128 (dest4, src4, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest4[i] != (npes - 1)) { success4 = 1; } } if (success2 == 0) printf ("Test shmem_put32: Passed\n"); else printf ("Test shmem_put32: Failed\n"); if (success3 == 0) printf ("Test shmem_put64: Passed\n"); else printf ("Test shmem_put64: Failed\n"); if (success4 == 0) printf ("Test shmem_put128: Passed\n"); else printf ("Test shmem_put128: Failed\n"); } } else if (sizeof (int) == 8) { for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; } success1 = 0; success2 = 0; success3 = 0; shmem_barrier_all (); shmem_put32 (dest1, src1, N, nextpe); shmem_put64 (dest2, src2, N, nextpe); shmem_put128 (dest3, src3, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } } if (success1 == 0) printf ("Test shmem_put32: Passed\n"); else printf ("Test shmem_put32: Failed\n"); if (success2 == 0) printf ("Test shmem_put64: Passed\n"); else printf ("Test shmem_put64: Failed\n"); if (success3 == 0) printf ("Test shmem_put128: Passed\n"); else printf ("Test shmem_put128: Failed\n"); } } /* Testing shmem_iput32, shmem_iput64, shmem_iput128 */ shmem_barrier_all (); if (sizeof (int) == 4) { for (i = 0; i < N; i += 1) { dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; } success2 = 0; success3 = 0; success4 = 0; shmem_barrier_all (); shmem_iput32 (dest2, src2, 1, 2, N, nextpe); shmem_iput64 (dest3, src3, 1, 2, N, nextpe); shmem_iput128 (dest4, src4, 1, 2, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N / 2; i += 1) { if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest4[i] != (npes - 1)) { success4 = 1; } } if (success2 == 0) printf ("Test shmem_iput32: Passed\n"); else printf ("Test shmem_iput32: Failed\n"); if (success3 == 0) printf ("Test shmem_iput64: Passed\n"); else printf ("Test shmem_iput64: Failed\n"); if (success4 == 0) printf ("Test shmem_iput128: Passed\n"); else printf ("Test shmem_iput128: Failed\n"); } } else if (sizeof (int) == 8) { for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; } success1 = 0; success2 = 0; success3 = 0; shmem_barrier_all (); shmem_iput32 (dest1, src1, 1, 2, N, nextpe); shmem_iput64 (dest2, src2, 1, 2, N, nextpe); shmem_iput128 (dest3, src3, 1, 2, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N / 2; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } } if (success1 == 0) printf ("Test shmem_iput32: Passed\n"); else printf ("Test shmem_iput32: Failed\n"); if (success2 == 0) printf ("Test shmem_iput64: Passed\n"); else printf ("Test shmem_iput64: Failed\n"); if (success3 == 0) printf ("Test shmem_iput128: Passed\n"); else printf ("Test shmem_iput128: Failed\n"); } } /* Testing shmem_short_iput, shmem_int_iput, shmem_long_iput, shmem_double_iput, shmem_float_iput */ for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; dest6[i] = -9; dest7[i] = -9; } success1 = 0; success2 = 0; success3 = 0; success6 = 0; success7 = 0; shmem_barrier_all (); shmem_short_iput (dest1, src1, 1, 2, N, nextpe); shmem_int_iput (dest2, src2, 1, 2, N, nextpe); shmem_long_iput (dest3, src3, 1, 2, N, nextpe); shmem_double_iput (dest6, src6, 1, 2, N, nextpe); shmem_float_iput (dest7, src7, 1, 2, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N / 2; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest6[i] != (npes - 1)) { success6 = 1; } if (dest7[i] != (npes - 1)) { success7 = 1; } } if (success1 == 0) printf ("Test shmem_short_iput: Passed\n"); else printf ("Test shmem_short_iput: Failed\n"); if (success2 == 0) printf ("Test shmem_int_iput: Passed\n"); else printf ("Test shmem_int_iput: Failed\n"); if (success3 == 0) printf ("Test shmem_long_iput: Passed\n"); else printf ("Test shmem_long_iput: Failed\n"); if (success6 == 0) printf ("Test shmem_double_iput: Passed\n"); else printf ("Test shmem_double_iput: Failed\n"); if (success7 == 0) printf ("Test shmem_float_iput: Passed\n"); else printf ("Test shmem_float_iput: Failed\n"); } /* Testing shmem_double_p, shmem_float_p, shmem_int_p, shmem_long_p, shmem_short_p */ shmem_barrier_all (); shmem_short_p (dest9, src9, nextpe); shmem_int_p (dest10, src10, nextpe); shmem_long_p (dest11, src11, nextpe); shmem_double_p (dest12, src12, nextpe); shmem_float_p (dest13, src13, nextpe); shmem_barrier_all (); if (me == 0) { if (*dest9 == (npes - 1)) printf ("Test shmem_short_p: Passed\n"); else printf ("Test shmem_short_p: Failed\n"); if (*dest10 == (npes - 1)) printf ("Test shmem_int_p: Passed\n"); else printf ("Test shmem_int_p: Failed\n"); if (*dest11 == (npes - 1)) printf ("Test shmem_long_p: Passed\n"); else printf ("Test shmem_long_p: Failed\n"); if (*dest12 == (npes - 1)) printf ("Test shmem_double_p: Passed\n"); else printf ("Test shmem_double_p: Failed\n"); if (*dest13 == (npes - 1)) printf ("Test shmem_float_p: Passed\n"); else printf ("Test shmem_float_p: Failed\n"); } shmem_barrier_all (); shmem_free (dest1); shmem_free (dest2); shmem_free (dest3); shmem_free (dest4); shmem_free (dest5); shmem_free (dest6); shmem_free (dest7); shmem_free (dest8); shmem_free (dest9); shmem_free (dest10); shmem_free (dest11); shmem_free (dest12); shmem_free (dest13); } else { printf ("Number of PEs must be > 1 to test shmem put, test skipped\n"); } shmem_finalize (); return 0; }
int main(int argc, char **argv) { int i,j; short oldjs, oldxs, my_pes; int oldji, oldxi, my_pei; long oldjl, oldxl, my_pel; long long oldjll,oldxll,my_pell; float oldjf, oldxf, my_pef; double oldjd, oldxd, my_ped; int my_pe,n_pes; size_t max_elements,max_elements_bytes; static short *xs; static int *xi; static long *xl; static long long *xll; static float *xf; static double *xd; shmem_init(); my_pe = shmem_my_pe(); n_pes = shmem_n_pes(); my_pes = (short) my_pe; my_pei = (int) my_pe; my_pel = (long) my_pe; my_pell = (long long) my_pe; my_pef = (float) my_pe; my_ped = (double) my_pe; #ifdef HAVE_SET_CACHE_INV shmem_set_cache_inv(); #endif /* fail if trying to use only one processor */ if ( n_pes <= 1 ){ fprintf(stderr, "FAIL - test requires at least two PEs\n"); exit(1); } if(my_pe == 0) fprintf(stderr, "shmem_swap(%s) n_pes=%d\n", argv[0],n_pes); /* test shmem_short_swap */ /* shmalloc xs on all pes (only check the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(short) * n_pes); xs = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xs[i] = 0; shmem_barrier_all(); oldjs = 0; for(i=0; i<ITER; i++) { if (my_pe != 0) { my_pes = my_pes + (short) 1; /* record PE value in xs[my_pe] -- save PE number */ oldxs = shmem_short_swap(&xs[my_pe], my_pes, 0); /* printf("PE=%d,i=%d,my_pes=%d,oldxs=%d\n",my_pe,i,my_pes,oldxs); */ if (oldxs != oldjs) fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxs = %d expected = %d\n", my_pe, n_pes, i, oldxs, oldjs); oldjs = my_pes; } } shmem_barrier_all(); if (my_pe == 0) { /* check xs[j] array vs PE# + ITER */ i = (int) ITER + 1; for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xs[%d]=%d,i=%d\n",j,j,xs[j],i); */ if (xs[j] != (short) i) fprintf(stderr, "FAIL PE %d of %d: xs[%d] = %d expected = %d\n", my_pe, n_pes, j, xs[j],i); i++; } } shmem_free(xs); /* test shmem_int_swap */ /* shmalloc xi on all pes (only check the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(int) * n_pes); xi = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xi[i] = 0; shmem_barrier_all(); oldji = 0; for(i=0; i<ITER; i++) { if (my_pe != 0) { my_pei = my_pei + (int) 1; /* record PE value in xi[my_pe] -- save PE number */ oldxi = shmem_int_swap(&xi[my_pe], my_pei, 0); /* printf("PE=%d,i=%d,my_pei=%d,oldxi=%d\n",my_pe,i,my_pei,oldxi); */ if (oldxi != oldji) fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxi = %d expected = %d\n", my_pe, n_pes, i, oldxi, oldji); oldji = my_pei; } } shmem_barrier_all(); if (my_pe == 0) { /* check xi[j] array vs PE# + ITER */ i = (int) ITER + 1; for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xi[%d]=%d,i=%d\n",j,j,xi[j],i); */ if (xi[j] != i) fprintf(stderr, "FAIL PE %d of %d: xi[%d] = %d expected = %d\n", my_pe, n_pes, j, xi[j],i); i++; } } shmem_free(xi); /* test shmem_long_swap */ /* shmalloc xl on all pes (only check the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(long) * n_pes); xl = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xl[i] = 0; shmem_barrier_all(); oldjl = 0; for(i=0; i<ITER; i++) { if (my_pe != 0) { my_pel = my_pel + (long) 1; /* record PE value in xl[my_pe] -- save PE number */ oldxl = shmem_long_swap(&xl[my_pe], my_pel, 0); /* printf("PE=%d,i=%d,my_pel=%d,oldxl=%d\n",my_pe,i,my_pel,oldxl); */ if (oldxl != oldjl) fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxl = %d expected = %d\n", my_pe, n_pes, i, oldxl, oldjl); oldjl = my_pel; } } shmem_barrier_all(); if (my_pe == 0) { /* check xl[j] array vs PE# + ITER */ i = (int) ITER + 1; for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xl[%d]=%d,i=%d\n",j,j,xl[j],i); */ if (xl[j] != (long)i) fprintf(stderr, "FAIL PE %d of %d: xl[%d] = %ld expected = %d\n", my_pe, n_pes, j, xl[j],i); i++; } } shmem_free(xl); /* test shmem_longlong_swap */ #ifdef HAVE_LONG_LONG /* shmalloc xll on all pes (only check the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(long long) * n_pes); xll = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xll[i] = 0; shmem_barrier_all(); oldjll = 0; for(i=0; i<ITER; i++) { if (my_pe != 0) { my_pell = my_pell + (long long) 1; /* record PE value in xll[my_pe] -- save PE number */ oldxll = shmem_longlong_swap(&xll[my_pe], my_pell, 0); /* printf("PE=%d,i=%d,my_pell=%ld,oldxll=%d\n",my_pe,i,my_pell,oldxll); */ if (oldxll != (long long) oldjll) fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxll = %ld expected = %ld\n", my_pe, n_pes, i, oldxll, oldjll); oldjll = my_pell; } } shmem_barrier_all(); if (my_pe == 0) { /* check xll[j] array vs PE# + ITER */ i = (int) ITER + 1; for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xll[%d]=%ld,i=%d\n",j,j,xll[j],i); */ if (xll[j] != (long long) i) fprintf(stderr, "FAIL PE %d of %d: xll[%d] = %d expected = %d\n", my_pe, n_pes, j, xll[j],i); i++; } } shmem_free(xll); #endif /* test shmem_float_swap */ /* shmalloc xf on all pes (only use the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(float) * n_pes); xf = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xf[i] = (float) 0; shmem_barrier_all(); oldjf = (float) 0; for(i=0; i<ITER; i++) { if (my_pe != 0) { my_pef = my_pef + (float) 1; /* record PE value in xf[my_pe] -- save PE number */ oldxf = shmem_float_swap(&xf[my_pe], my_pef, 0); /* printf("PE=%d,i=%d,my_pef=%10.2f,oldxf=%10.2f\n",my_pe,i,my_pef,oldxf); */ if (oldxf != oldjf) fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxf = %10.2f expected = %10.2f\n", my_pe, n_pes, i, oldxf, oldjf); oldjf = my_pef; } } shmem_barrier_all(); if (my_pe == 0) { /* check xs[j] array vs PE# + ITER */ i = (int) ITER + 1; for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xf[%d]=%10.2f,i=%d\n",j,j,xf[j],i); */ if (xf[j] != (float) i) fprintf(stderr, "FAIL PE %d of %d: xf[%d] = %10.2f expected = %10.2f\n", my_pe, n_pes, j-1, xf[j], (float)i); i++; } } shmem_free(xf); /* test shmem_double_swap */ /* shmalloc xd on all pes (only use the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(double) * n_pes); xd = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xd[i] = (double) 0; shmem_barrier_all(); oldjd = (double) 0; for(i=0; i<ITER; i++) { if (my_pe != 0) { my_ped = my_ped + (double) 1; /* record PE value in xd[my_pe] -- save PE number */ oldxd = shmem_double_swap(&xd[my_pe], my_ped, 0); /* printf("PE=%d,i=%d,my_ped=%10.2f,oldxd=%10.2f\n",my_pe,i,my_ped,oldxd); */ if (oldxd != oldjd) fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxd = %10.2f expected = %10.2f\n", my_pe, n_pes, i, oldxd, oldjd); oldjd = my_ped; } } shmem_barrier_all(); if (my_pe == 0) { /* check xd[j] array vs PE# + ITER */ i = (int) ITER + 1; for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xd[%d]=%10.2f,i=%d\n",j,j,xd[j],i); */ if (xd[j] != (double) i) fprintf(stderr, "FAIL PE %d of %d: xd[%d] = %10.2f expected = %10.2f\n", my_pe, n_pes, j, xd[j], (double)i); i++; } } shmem_free(xd); #ifdef SHMEM_C_GENERIC_32 /* test shmem_swap (GENERIC 32) */ my_pei = (int) my_pe; /* shmalloc xi on all pes (only check the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(int) * n_pes); xi = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xi[i] = 0; shmem_barrier_all(); oldji = 0; for(i=0; i<ITER; i++) { if (my_pe != 0) { my_pei = my_pei + (int) 1; /* record PE value in xi[my_pe] -- save PE number */ oldxi = shmem_swap(&xi[my_pe], my_pei, 0); /* printf("PE=%d,i=%d,my_pei=%d,oldxi=%d\n",my_pe,i,my_pei,oldxi); */ if (oldxi != oldji) fprintf(stderr, "FAIL pe %d of %d: i=%d, oldxi = %d expected = %d\n", my_pe, n_pes, i, oldxi, oldji); oldji = my_pei; } } shmem_barrier_all(); if (my_pe == 0) { /* check xi[j] array vs PE# + ITER */ i = (int) ITER + 1; for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xi[%d]=%d,i=%d\n",j,j,xi[j],i); */ if (xi[j] != i) fprintf(stderr, "FAIL pe %d of %d: xi[%d] = %d expected = %d\n", my_pe, n_pes, j, xi[j],i); i++; } } shmem_free(xi); #else /* test shmem_swap (GENERIC 64) */ my_pel = (long) my_pe; /* shmalloc xl on all pes (only check the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(long) * n_pes); xl = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xl[i] = 0; shmem_barrier_all(); oldjl = 0; for(i=0; i<ITER; i++) { if (my_pe != 0) { my_pel = my_pel + (long) 1; /* record PE value in xl[my_pe] -- save PE number */ oldxl = shmem_swap(&xl[my_pe], my_pel, 0); /* printf("PE=%d,i=%d,my_pel=%d,oldxl=%d\n",my_pe,i,my_pel,oldxl); */ if (oldxl != oldjl) fprintf(stderr, "FAIL pe %d of %d: i=%d, oldxl = %d expected = %d\n", my_pe, n_pes, i, oldxl, oldjl); oldjl = my_pel; } } shmem_barrier_all(); if (my_pe == 0) { /* check xl[j] array vs PE# + ITER */ i = (int) ITER + 1; for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xl[%d]=%d,i=%d\n",j,j,xl[j],i); */ if (xl[j] != (long)i) fprintf(stderr, "FAIL pe %d of %d: xl[%d] = %ld expected = %d\n", my_pe, n_pes, j, xl[j],i); i++; } } shmem_free(xl); #endif shmem_barrier_all(); #ifdef NEEDS_FINALIZE shmem_finalize(); #endif return 0; }
int main(int argc, char* argv[]) { int c, j, loops, k, l; int my_pe, nProcs, nWorkers; int nWords=1; int failures=0; char *prog_name; long *wp,work_sz; for(j=0; j < SHMEM_BARRIER_SYNC_SIZE; j++) { pSync0[j] = pSync1[j] = pSync2[j] = pSync3[j] = pSync4[j] = SHMEM_SYNC_VALUE; } shmem_init(); my_pe = shmem_my_pe(); nProcs = shmem_n_pes(); nWorkers = nProcs - 1; if (nProcs == 1) { Rfprintf(stderr, "ERR - Requires > 1 PEs\n"); shmem_finalize(); return 0; } for(j=0; j < nProcs; j++) if ( shmem_pe_accessible(j) != 1 ) { fprintf(stderr, "ERR - pe %d not accessible from pe %d\n", j, my_pe); } prog_name = strrchr(argv[0],'/'); if ( prog_name ) prog_name++; else prog_name = argv[0]; while((c=getopt(argc,argv,"hvM:s")) != -1) { switch(c) { case 's': Slow++; break; case 'v': Verbose++; break; case 'M': output_mod = atoi(optarg); if (output_mod <= 0) { Rfprintf(stderr, "ERR - output modulo arg out of " "bounds '%d'?\n", output_mod); shmem_finalize(); return 1; } Rfprintf(stderr,"%s: output modulo %d\n", prog_name,output_mod); break; case 'h': Rfprintf(stderr, "usage: %s {nWords-2-put(%d)K/M} {Loop-count(%d)K/M}\n", prog_name, DFLT_NWORDS, DFLT_LOOPS); shmem_finalize(); return 1; default: shmem_finalize(); return 1; } } if (optind == argc) nWords = DFLT_NWORDS; else { nWords = atoi_scaled(argv[optind++]); if (nWords <= 0) { Rfprintf(stderr, "ERR - Bad nWords arg '%d'?\n", nWords); shmem_finalize(); return 1; } } if (optind == argc) loops = DFLT_LOOPS; else { loops = atoi_scaled(argv[optind++]); if (loops <= 0 || loops > 1000000) { Rfprintf(stderr, "ERR - loops arg out of bounds '%d'?\n", loops); shmem_finalize(); return 1; } } work_sz = (nProcs*nWords) * sizeof(long); work = shmem_malloc( work_sz ); if ( !work ) { fprintf(stderr,"[%d] ERR - work = shmem_malloc(%ld) ?\n",my_pe,work_sz); shmem_global_exit(1); } Target = shmem_malloc( 2 * nWords * sizeof(long) ); if ( !Target ) { fprintf(stderr,"[%d] ERR - Target = shmem_malloc(%ld) ?\n", my_pe, (nWords * sizeof(long))); shmem_global_exit(1); } src = &Target[nWords]; #if _DEBUG Rprintf("%s: %d loops of %d longs per put\n",prog_name,loops,nWords); #endif for(j=0; j < nWords; j++) src[j] = VAL; for(j=0; j < loops; j++) { #if _DEBUG if ( Verbose && (j==0 || (j % output_mod) == 0) ) fprintf(stderr,"[%d] +(%d)\n", my_pe,j); #endif shmem_barrier(0, 0, nProcs, pSync0); if ( my_pe == 0 ) { int p; for(p=1; p < nProcs; p++) shmem_long_put(Target, src, nWords, p); } else { if (Slow) { /* wait for each put to complete */ for(k=0; k < nWords; k++) shmem_wait(&Target[k],my_pe); } else { /* wait for last word to be written */ shmem_wait(&Target[nWords-1],my_pe); } } #if _DEBUG if ( Verbose && (j==0 || (j % output_mod) == 0) ) fprintf(stderr,"[%d] -(%d)\n", shmem_my_pe(),j); #endif shmem_barrier(0, 0, nProcs, pSync1); RDprintf("Workers[1 ... %d] verify Target data put by proc0\n", nWorkers); /* workers verify put data is expected */ if ( my_pe != 0 ) { for(k=0; k < nWords; k++) { if (Target[k] != VAL) { fprintf(stderr, "[%d] Target[%d] %#lx " "!= %#x?\n", my_pe,k,Target[k],VAL); failures++; } assert(Target[k] == VAL); Target[k] = my_pe; } } else /* clear results buffer, workers will put here */ memset(work, 0, work_sz); shmem_barrier(0, 0, nProcs, pSync2); RDprintf("Workers[1 ... %d] put Target data to PE0 work " "vector\n",nWorkers); if ( my_pe != 0 ) { /* push nWords of val my_pe back to PE zero */ shmem_long_put(&work[my_pe * nWords], Target, nWords, 0); } else { /* wait for procs 1 ... nProcs to complete put()s */ for(l=1; l < nProcs; l++) { wp = &work[ l*nWords ]; // procs nWords chunk #if 1 /* wait for last long to be written from each PE */ shmem_wait(&wp[nWords-1],0); #else for(k=0; k < nWords; k++) shmem_wait(&wp[k],0); #endif } } shmem_barrier(0, 0, nProcs, pSync3); if ( my_pe == 0 ) { RDprintf("Loop(%d) PE0 verifing work data.\n",j); for(l=1; l < nProcs; l++) { wp = &work[ l*nWords ]; // procs nWords chunk for(k=0; k < nWords; k++) { if (wp[k] != l) { fprintf(stderr, "[0] PE(%d)_work[%d] %ld " "!= %d?\n", l,k,work[k],l); failures++; } assert(wp[k] == l); break; } if (failures) break; } } shmem_barrier(0, 0, nProcs, pSync4); #if _DEBUG if (loops > 1) { Rfprintf(stderr,"."); RDprintf("Loop(%d) Pass.\n",j); } #endif } shmem_free( work ); shmem_free( Target ); #if _DEBUG Rfprintf(stderr,"\n");fflush(stderr); shmem_barrier_all(); RDprintf("%d(%d) Exit(%d)\n", my_pe, nProcs, failures); #endif shmem_finalize(); return failures; }
int main(int argc, char **argv) { int me, c, l, j; int nWords, loops, incWords; pgm = strrchr(argv[0],'/'); if ( pgm ) pgm++; else pgm = argv[0]; shmem_init(); me = shmem_my_pe(); while ((c = getopt (argc, argv, "hpv")) != -1) switch (c) { case 'v': Verbose++; break; case 'h': default: usage(); break; } if (optind == argc) nWords = DFLT_NWORDS; else if ((nWords = getSize (argv[optind++])) <= 0) usage (); if (optind == argc) loops = DFLT_LOOPS; else if ((loops = getSize (argv[optind++])) < 0) usage (); if (optind == argc) incWords = DFLT_INCR; else if ((incWords = getSize (argv[optind++])) < 0) usage (); if (Verbose && me == 0) fprintf (stderr, "nWords(%d) loops(%d) incWords(%d)]\n", nWords, loops, incWords); for(l=0; l < loops; l++) { /* align 2**2 ... 2**23; 24 exceeds symetric heap max */ for(j=0,c=2; j < 23; j++,c<<=1) { target_sz = nWords * sizeof(DataType); if (!(target = (DataType *)shmem_align(c,target_sz))) { perror ("Failed target memory allocation"); exit (1); } if ( (unsigned long)target & (c-1) ) { fprintf(stdout,"PE%d Unaligned? ",me); fflush(stdout); fprintf(stdout,"align[%#09x]target %p\n", c, (void*)target); shmem_global_exit(1); } else if (Verbose > 1 && me == 0) fprintf(stdout,"align[%#09x]target %p\n", c, (void*)target); shmem_barrier_all(); shmem_free(target); } nWords += incWords; if (Verbose && me == 0) fprintf(stdout,"Fini loop %d\n",(l+1)); } shmem_finalize(); return 0; }
void shfree(void *ptr) { shmem_free(ptr); }
int main(void) { int i; int nextpe; int me, npes; int success1, success2, success3, success4, success5, success6, success7, success8; short dest1[N]; int dest2[N]; long dest3[N]; long double dest4[N]; long long dest5[N]; double dest6[N]; float dest7[N]; char *dest8; short dest9; int dest10; long dest11; double dest12; float dest13; short *src1; int *src2; long *src3; long double *src4; long long *src5; double *src6; float *src7; char *src8; short *src9; int *src10; long *src11; double *src12; float *src13; shmem_init(); me = shmem_my_pe(); npes = shmem_n_pes(); if (npes > 1) { success1 = 0; success2 = 0; success3 = 0; success4 = 0; success5 = 0; success6 = 0; success7 = 0; success8 = 0; dest8 = (char *) malloc(N * sizeof(char)); for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; dest5[i] = -9; dest6[i] = -9; dest7[i] = -9.0; dest8[i] = -9; } dest9 = -9; dest10 = -9; dest11 = -9; dest12 = -9; dest13 = -9; src1 = (short *) shmem_malloc(N * sizeof(*src1)); src2 = (int *) shmem_malloc(N * sizeof(*src2)); src3 = (long *) shmem_malloc(N * sizeof(*src3)); src4 = (long double *) shmem_malloc(N * sizeof(*src4)); src5 = (long long *) shmem_malloc(N * sizeof(*src5)); src6 = (double *) shmem_malloc(N * sizeof(*src6)); src7 = (float *) shmem_malloc(N * sizeof(*src7)); src8 = (char *) shmem_malloc(4 * sizeof(*src8)); src9 = (short *) shmem_malloc(sizeof(*src9)); src10 = (int *) shmem_malloc(sizeof(*src10)); src11 = (long *) shmem_malloc(sizeof(*src11)); src12 = (double *) shmem_malloc(sizeof(*src12)); src13 = (float *) shmem_malloc(sizeof(*src13)); for (i = 0; i < N; i += 1) { src1[i] = (short) me; src2[i] = me; src3[i] = (long) me; src4[i] = (long double) me; src5[i] = (long long) me; src6[i] = (double) me; src7[i] = (float) me; src8[i] = (char) me; } *src9 = (short) me; *src10 = me; *src11 = (long) me; *src12 = (double) me; *src13 = (float) me; nextpe = (me + 1) % npes; /* Testing shmem_short_get, shmem_short_get, shmem_int_get, shmem_long_get, shmem_longdouble_get, shmem_longlong_get, shmem_double_get, shmem_float_get, shmem_getmem */ shmem_barrier_all(); shmem_short_get(dest1, src1, N, nextpe); shmem_int_get(dest2, src2, N, nextpe); shmem_long_get(dest3, src3, N, nextpe); shmem_longdouble_get(dest4, src4, N, nextpe); shmem_longlong_get(dest5, src5, N, nextpe); shmem_double_get(dest6, src6, N, nextpe); shmem_float_get(dest7, src7, N, nextpe); shmem_getmem(dest8, src8, N * sizeof(char), nextpe); shmem_barrier_all(); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest1[i] != (1)) { success1 = 1; } if (dest2[i] != (1)) { success2 = 1; } if (dest3[i] != (1)) { success3 = 1; } if (dest4[i] != (1)) { success4 = 1; } if (dest5[i] != (1)) { success5 = 1; } if (dest6[i] != (1)) { success6 = 1; } if (dest7[i] != (1)) { success7 = 1; } if (dest8[i] != (1)) { success8 = 1; } } if (success1 == 0) printf("Test shmem_short_get: Passed\n"); else printf("Test shmem_short_get: Failed\n"); if (success2 == 0) printf("Test shmem_int_get: Passed\n"); else printf("Test shmem_int_get: Failed\n"); if (success3 == 0) printf("Test shmem_long_get: Passed\n"); else printf("Test shmem_long_get: Failed\n"); if (success4 == 0) printf("Test shmem_longdouble_get: Passed\n"); else printf("Test shmem_longdouble_get: Failed\n"); if (success5 == 0) printf("Test shmem_longlong_get: Passed\n"); else printf("Test shmem_longlong_get: Failed\n"); if (success6 == 0) printf("Test shmem_double_get: Passed\n"); else printf("Test shmem_double_get: Failed\n"); if (success7 == 0) printf("Test shmem_float_get: Passed\n"); else printf("Test shmem_float_get: Failed\n"); if (success8 == 0) printf("Test shmem_getmem: Passed\n"); else printf("Test shmem_getmem: Failed\n"); } shmem_barrier_all(); /* Testing shmem_get32, shmem_get64, shmem_get128 */ if (sizeof(int) == 4) { for (i = 0; i < N; i += 1) { dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; } success2 = 0; success3 = 0; success4 = 0; shmem_barrier_all(); shmem_get32(dest2, src2, N, nextpe); shmem_get64(dest3, src3, N, nextpe); shmem_get128(dest4, src4, N, nextpe); shmem_barrier_all(); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest2[i] != (1)) { success2 = 1; } if (dest3[i] != (1)) { success3 = 1; } if (dest4[i] != (1)) { success4 = 1; } } if (success2 == 0) printf("Test shmem_get32: Passed\n"); else printf("Test shmem_get32: Failed\n"); if (success3 == 0) printf("Test shmem_get64: Passed\n"); else printf("Test shmem_get64: Failed\n"); if (success4 == 0) printf("Test shmem_get128: Passed\n"); else printf("Test shmem_get128: Failed\n"); } } else if (sizeof(int) == 8) { for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; } success1 = 0; success2 = 0; success3 = 0; shmem_barrier_all(); shmem_get32(dest1, src1, N, nextpe); shmem_get64(dest2, src2, N, nextpe); shmem_get128(dest3, src3, N, nextpe); shmem_barrier_all(); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest1[i] != (1)) { success1 = 1; } if (dest2[i] != (1)) { success2 = 1; } if (dest3[i] != (1)) { success3 = 1; } } if (success1 == 0) printf("Test shmem_get32: Passed\n"); else printf("Test shmem_get32: Failed\n"); if (success2 == 0) printf("Test shmem_get64: Passed\n"); else printf("Test shmem_get64: Failed\n"); if (success3 == 0) printf("Test shmem_get128: Passed\n"); else printf("Test shmem_get128: Failed\n"); } } /* Testing shmem_double_g, shmem_float_g, shmem_int_g, shmem_long_g, shmem_short_g */ shmem_barrier_all(); dest9 = shmem_short_g(src9, nextpe); dest10 = shmem_int_g(src10, nextpe); dest11 = shmem_long_g(src11, nextpe); dest12 = shmem_double_g(src12, nextpe); dest13 = shmem_float_g(src13, nextpe); shmem_barrier_all(); if (me == 0) { if (dest9 == 1) printf("Test shmem_short_g: Passed\n"); else printf("Test shmem_short_g: Failed\n"); if (dest10 == 1) printf("Test shmem_int_g: Passed\n"); else printf("Test shmem_int_g: Failed\n"); if (dest11 == 1) printf("Test shmem_long_g: Passed\n"); else printf("Test shmem_long_g: Failed\n"); if (dest12 == 1) printf("Test shmem_double_g: Passed\n"); else printf("Test shmem_double_g: Failed\n"); if (dest13 == 1) printf("Test shmem_float_g: Passed\n"); else printf("Test shmem_float_g: Failed\n"); } shmem_barrier_all(); shmem_free(src1); shmem_free(src2); shmem_free(src3); shmem_free(src4); shmem_free(src5); shmem_free(src6); shmem_free(src7); shmem_free(src8); } else { printf("Number of PEs must be > 1 to test shmem get, test skipped\n"); } shmem_finalize(); return 0; }
int main(int argc, char **argv) { int i,j; short modjs, oldjs, oldxmodjs, oldxas, my_pes, vals; int modji, oldji, oldxmodji, oldxai, my_pei, vali; long modjl, oldjl, oldxmodjl, oldxal, my_pel, vall; long long modjll,oldjll,oldxmodjll,oldxall,my_pell,valll; int my_pe,n_pes; size_t max_elements,max_elements_bytes; static short *xs,*xas; static int *xi,*xai; static long *xl,*xal; static long long *xll,*xall; shmem_init(); my_pe = shmem_my_pe(); n_pes = shmem_n_pes(); my_pes = (short) my_pe; my_pei = (int) my_pe; my_pel = (long) my_pe; my_pell = (long long) my_pe; vals = 1; vali = 1; vall = 1; valll = 1; #ifdef HAVE_SET_CACHE_INV shmem_set_cache_inv(); #endif /* fail if trying to use only one processor */ if ( n_pes <= 1 ){ fprintf(stderr, "FAIL - test requires at least two PEs\n"); exit(1); } if(my_pe == 0) fprintf(stderr, "shmem_cswap(%s) n_pes=%d\n", argv[0],n_pes); /* test shmem_short_finc & shmem_short_swap & shmem_short_cswap */ /* shmalloc xs & xas on all pes (only use the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(short) * n_pes); xs = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xs[i] = 0; max_elements_bytes = (size_t) (sizeof(short) * n_pes * ITER); xas = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes*ITER; i++) if (((i/(n_pes-1)) % 2) == 0) { xas[i] = 1; } else { xas[i] = 0; } count_short = 0; shmem_barrier_all(); for(i=0; i<ITER; i++) { if (i == ITER-1) shmem_barrier_all(); /* all PEs participate last time */ if (my_pe != 0) { oldjs = shmem_short_finc(&count_short, 0); /* get index oldjs from PE 0 */ modjs = (oldjs % (n_pes-1)); /* PE 0 is just the counter/checker */ /* conditionally record PE value in xas[oldjs] -- tells PE involved for each count */ oldxas = shmem_short_cswap(&xas[oldjs], vals, my_pes, 0); /* printf("PE=%d,i=%d,oldjs=%d,oldxas=%d\n",my_pe,i,oldjs,oldxas); */ if (oldxas == 1) { /* record PE value in xs[modjs] */ oldxmodjs = shmem_short_swap(&xs[modjs], my_pes, 0); /* printf("PE=%d,oldjs=%d,modjs=%d,oldxmodjs=%d\n", my_pe,oldjs,modjs,oldxmodjs); */ } if (oldxas != 0 && oldxas != 1) fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxas = %d expected = 0\n", my_pe, n_pes, i, oldxas); } } shmem_barrier_all(); if (my_pe == 0) { /* check last xs[j] array PEs vs saved ans in xas[i] */ i = (ITER-2)*(n_pes-1); for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xs[%d]=%d,xas[%d]=%d\n",j,j-1,xs[j-1],i,xas[i]); */ if (xs[j-1] != xas[i]) fprintf(stderr, "FAIL PE %d of %d: xs[%d] = %d expected = %d\n", my_pe, n_pes, j-1, xs[j-1], xas[i]); i++; } } shmem_free(xs); shmem_free(xas); /* test shmem_int_finc & shmem_int_swap & shmem_int_cswap */ /* shmalloc xi & xai on all pes (only use the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(int) * n_pes); xi = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xi[i] = 0; max_elements_bytes = (size_t) (sizeof(int) * n_pes * ITER); xai = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes*ITER; i++) if (((i/(n_pes-1)) % 2) == 0) { xai[i] = 1; } else { xai[i] = 0; } count_int = 0; shmem_barrier_all(); for(i=0; i<ITER; i++) { if (i == ITER-1) shmem_barrier_all(); /* all PEs participate last time */ if (my_pe != 0) { oldji = shmem_int_finc(&count_int, 0); /* get index oldji from PE 0 */ modji = (oldji % (n_pes-1)); /* PE 0 is just the counter/checker */ /* conditionally record PE value in xai[oldji] -- tells PE involved for each count */ oldxai = shmem_int_cswap(&xai[oldji], vali, my_pei, 0); /* printf("PE=%d,i=%d,oldji=%d,oldxai=%d\n",my_pe,i,oldji,oldxai); */ if (oldxai == 1) { /* record PE value in xi[modji] */ oldxmodji = shmem_int_swap(&xi[modji], my_pei, 0); /* printf("PE=%d,oldji=%d,modji=%d,oldxmodji=%d\n", my_pe,oldji,modji,oldxmodji); */ } if (oldxai != 0 && oldxai != 1) fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxai = %d expected = 0\n", my_pe, n_pes, i, oldxai); } } shmem_barrier_all(); if (my_pe == 0) { /* check last xi[j] array PEs vs saved ans in xai[i] */ i = (ITER-2)*(n_pes-1); for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xi[%d]=%d,xai[%d]=%d\n",j,j-1,xi[j-1],i,xai[i]); */ if (xi[j-1] != xai[i]) fprintf(stderr, "FAIL PE %d of %d: xi[%d] = %d expected = %d\n", my_pe, n_pes, j-1, xi[j-1], xai[i]); i++; } } shmem_free(xi); shmem_free(xai); /* test shmem_long_finc & shmem_long_swap & shmem_long_cswap */ /* shmalloc xl & xal on all pes (only use the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(long) * n_pes); xl = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xl[i] = 0; max_elements_bytes = (size_t) (sizeof(long) * n_pes * ITER); xal = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes*ITER; i++) if (((i/(n_pes-1)) % 2) == 0) { xal[i] = 1; } else { xal[i] = 0; } count_long = 0; shmem_barrier_all(); for(i=0; i<ITER; i++) { if (i == ITER-1) shmem_barrier_all(); /* all PEs participate last time */ if (my_pe != 0) { oldjl = shmem_long_finc(&count_long, 0); /* get index oldjl from PE 0 */ modjl = (oldjl % (n_pes-1)); /* PE 0 is just the counter/checker */ /* conditionally record PE value in xal[oldjl] -- tells PE involved for each count */ oldxal = shmem_long_cswap(&xal[oldjl], vall, my_pel, 0); /* printf("PE=%d,i=%d,oldjl=%d,oldxal=%d\n",my_pe,i,oldjl,oldxal); */ if (oldxal == 1) { /* record PE value in xl[modjl] */ oldxmodjl = shmem_long_swap(&xl[modjl], my_pel, 0); /* printf("PE=%d,oldjl=%ld,modjl=%ld,oldxmodjl=%ld\n", my_pe,oldjl,modjl,oldxmodjl); */ } if (oldxal != 0 && oldxal != 1) fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxal = %ld expected = 0\n", my_pe, n_pes, i, oldxal); } } shmem_barrier_all(); if (my_pe == 0) { /* check last xl[j] array PEs vs saved ans in xal[i] */ i = (ITER-2)*(n_pes-1); for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xl[%d]=%ld,xal[%d]=%ld\n",j,j-1,xl[j-1],i,xal[i]); */ if (xl[j-1] != xal[i]) fprintf(stderr, "FAIL PE %d of %d: xl[%d] = %ld expected = %ld\n", my_pe, n_pes, j-1, xl[j-1], xal[i]); i++; } } shmem_free(xl); shmem_free(xal); /* test shmem_longlong_finc & shmem_longlong_swap & shmem_longlong_cswap */ #ifdef HAVE_LONG_LONG /* shmalloc xll & xall on all pes (only use the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(long long) * n_pes); xll = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xll[i] = 0; max_elements_bytes = (size_t) (sizeof(long long) * n_pes * ITER); xall = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes*ITER; i++) if (((i/(n_pes-1)) % 2) == 0) { xall[i] = 1; } else { xall[i] = 0; } count_longlong = 0; shmem_barrier_all(); for(i=0; i<ITER; i++) { if (i == ITER-1) shmem_barrier_all(); /* all PEs participate last time */ if (my_pe != 0) { oldjll = shmem_longlong_finc(&count_longlong, 0); /* get index oldjll from PE 0 */ modjll = (oldjll % (n_pes-1)); /* PE 0 is just the counter/checker */ /* conditionally record PE value in xall[oldjll] -- tells PE involved for each count */ oldxall = shmem_longlong_cswap(&xall[oldjll], valll, my_pell, 0); /* printf("PE=%d,i=%d,oldjll=%d,oldxall=%d\n",my_pe,i,oldjll,oldxall); */ if (oldxall == 1) { /* record PE value in xll[modjll] */ oldxmodjll = shmem_longlong_swap(&xll[modjll], my_pell, 0); /* printf("PE=%d,oldjll=%ld,modjll=%ld,oldxmodjll=%ld\n", my_pe,oldjll,modjll,oldxmodjll); */ } if (oldxall != 0 && oldxall != 1) fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxall = %ld expected = 0\n", my_pe, n_pes, i, oldxall); } } shmem_barrier_all(); if (my_pe == 0) { /* check last xll[j] array PEs vs saved ans in xall[i] */ i = (ITER-2)*(n_pes-1); for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xll[%d]=%ld,xall[%d]=%ld\n",j,j-1,xll[j-1],i,xall[i]); */ if (xll[j-1] != xall[i]) fprintf(stderr, "FAIL PE %d of %d: xll[%d] = %ld expected = %ld\n", my_pe, n_pes, j-1, xll[j-1], xall[i]); i++; } } shmem_free(xll); shmem_free(xall); #endif #ifdef SHMEM_C_GENERIC_32 /* test shmem_finc & shmem_swap & shmem_cswap (GENERIC 32) */ /* shmalloc xi & xai on all pes (only use the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(int) * n_pes); xi = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xi[i] = 0; max_elements_bytes = (size_t) (sizeof(int) * n_pes * ITER); xai = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes*ITER; i++) if (((i/(n_pes-1)) % 2) == 0) { xai[i] = 1; } else { xai[i] = 0; } count_int = 0; shmem_barrier_all(); for(i=0; i<ITER; i++) { if (i == ITER-1) shmem_barrier_all(); /* all PEs participate last time */ if (my_pe != 0) { oldji = shmem_finc(&count_int, 0); /* get index oldji from PE 0 */ modji = (oldji % (n_pes-1)); /* PE 0 is just the counter/checker */ /* conditionally record PE value in xai[oldji] -- tells PE involved for each count */ oldxai = shmem_cswap(&xai[oldji], vali, my_pei, 0); /* printf("PE=%d,i=%d,oldji=%d,oldxai=%d\n",my_pe,i,oldji,oldxai); */ if (oldxai == 1) { /* record PE value in xi[modji] */ oldxmodji = shmem_swap(&xi[modji], my_pei, 0); /* printf("PE=%d,oldji=%d,modji=%d,oldxmodji=%d\n", my_pe,oldji,modji,oldxmodji); */ } if (oldxai != 0 && oldxai != 1) fprintf(stderr, "FAIL pe %d of %d: i=%d, oldxai = %d expected = 0\n", my_pe, n_pes, i, oldxai); } } shmem_barrier_all(); if (my_pe == 0) { /* check last xi[j] array PEs vs saved ans in xai[i] */ i = (ITER-2)*(n_pes-1); for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xi[%d]=%d,xai[%d]=%d\n",j,j-1,xi[j-1],i,xai[i]); */ if (xi[j-1] != xai[i]) fprintf(stderr, "FAIL pe %d of %d: xi[%d] = %d expected = %d\n", my_pe, n_pes, j-1, xi[j-1], xai[i]); i++; } } shmem_free(xi); shmem_free(xai); #else /* test shmem_finc & shmem_swap & shmem_cswap (GENERIC 64) */ /* shmalloc xl & xal on all pes (only use the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(long) * n_pes); xl = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xl[i] = 0; max_elements_bytes = (size_t) (sizeof(long) * n_pes * ITER); xal = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes*ITER; i++) if (((i/(n_pes-1)) % 2) == 0) { xal[i] = 1; } else { xal[i] = 0; } count_long = 0; shmem_barrier_all(); for(i=0; i<ITER; i++) { if (i == ITER-1) shmem_barrier_all(); /* all PEs participate last time */ if (my_pe != 0) { oldjl = shmem_finc(&count_long, 0); /* get index oldjl from PE 0 */ modjl = (oldjl % (n_pes-1)); /* PE 0 is just the counter/checker */ /* conditionally record PE value in xal[oldjl] -- tells PE involved for each count */ oldxal = shmem_cswap(&xal[oldjl], vall, my_pell, 0); /* printf("PE=%d,i=%d,oldjl=%d,oldxal=%d\n",my_pe,i,oldjl,oldxal); */ if (oldxal == 1) { /* record PE value in xl[modjl] */ oldxmodjl = shmem_swap(&xl[modjl], my_pell, 0); /* printf("PE=%d,oldjl=%ld,modjl=%ld,oldxmodjl=%ld\n", my_pe,oldjl,modjl,oldxmodjl); */ } if (oldxal != 0 && oldxal != 1) fprintf(stderr, "FAIL pe %d of %d: i=%d, oldxal = %ld expected = 0\n", my_pe, n_pes, i, oldxal); } } shmem_barrier_all(); if (my_pe == 0) { /* check last xl[j] array PEs vs saved ans in xal[i] */ i = (ITER-2)*(n_pes-1); for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xl[%d]=%ld,xal[%d]=%ld\n",j,j-1,xl[j-1],i,xal[i]); */ if (xl[j-1] != xal[i]) fprintf(stderr, "FAIL pe %d of %d: xl[%d] = %ld expected = %10.2f\n", my_pe, n_pes, j-1, xl[j-1], xal[i]); i++; } } shmem_free(xl); shmem_free(xal); #endif shmem_barrier_all(); #ifdef NEEDS_FINALIZE shmem_finalize(); #endif return 0; }
int main(int argc, char* argv[]) { int c, j, cloop, loops = DFLT_LOOPS; int mpe, num_pes; int nWords=1; int nIncr=1; int failures=0; char *pgm; shmem_init(); mpe = shmem_my_pe(); num_pes = shmem_n_pes(); if (num_pes == 1) { Rfprintf(stderr, "ERR - Requires > 1 PEs\n"); shmem_finalize(); return 0; } pgm = strrchr(argv[0],'/'); if ( pgm ) pgm++; else pgm = argv[0]; while((c=getopt(argc,argv,"hqVvl:")) != -1) { switch(c) { case 'V': case 'v': Verbose++; break; case 'l': loops = atoi(optarg); break; case 'h': Rfprintf(stderr, "usage: %s {-l loopcnt(%d)} {numLongs(%d)} {loopIncr(%d)}\n", pgm,DFLT_LOOPS,DFLT_NWORDS,DFLT_INCR); shmem_finalize(); return 1; default: shmem_finalize(); return 1; } } if (optind == argc) nWords = DFLT_NWORDS; else { nWords = atoi_scaled(argv[optind++]); if (nWords <= 0) { Rfprintf(stderr, "ERR - Bad nBytes arg?\n"); shmem_finalize(); return 1; } } if (optind == argc) nIncr = DFLT_INCR; else { loops = atoi(argv[optind++]); if (nIncr <= 0 ) { Rfprintf(stderr, "ERR - incLongs arg out of bounds '%d'?\n", nIncr); shmem_finalize(); return 1; } } if ( nWords % 8 ) { // integral multiple of longs Rprintf("%s: nWords(%d) not a multiple of %ld?\n", pgm,nWords,sizeof(long)); shmem_finalize(); return 1; } for (c = 0; c < _SHMEM_COLLECT_SYNC_SIZE;c++) pSync[c] = _SHMEM_SYNC_VALUE; if (Verbose && mpe == 0) fprintf(stderr,"loops(%d) nWords(%d) incr-per-loop(%d)\n", loops,nWords,nIncr); for(cloop=1; cloop <= loops; cloop++) { c = (sizeof(long)*nWords) * (num_pes + 1); // src + dst allocation. //nWords /= sizeof(long); // convert input of bytes --> longs. src = (long*)shmem_malloc(c); if ( !src ) { Rprintf("[%d] %s: shmem_malloc(%d) failed?\n", mpe, pgm,c); shmem_global_exit(1); } dst = &src[nWords]; for(j=0; j < nWords; j++) src[j] = (long) (j + mpe*nWords); shmem_barrier_all(); shmem_fcollect64(dst,src,nWords,0,0,num_pes,pSync); // Expect dst to be consecuative integers 0 ... (nLongs*num_pes)-1 for(j=0; j < (nWords*num_pes); j++) { if ( dst[j] != (long) j ) { fprintf(stderr, "[%d] dst[%d] %ld != expected %d\n",mpe,j,dst[j],j); shmem_global_exit(1); } } shmem_barrier_all(); if (Verbose && mpe == 0 && loops > 1) { fprintf(stderr,"."); } nWords += nIncr; } if (Verbose && mpe == 0) { fprintf(stderr,"\n");fflush(stderr); } shmem_free( (void*)src ); shmem_barrier_all(); if (Verbose) printf("%d(%d) Exit(%d)\n", mpe, num_pes, failures); shmem_finalize(); return failures; }
int main (int argc, char *argv[]) { /**** Initialising ****/ const unsigned long long full_program_start = current_time_ns(); { shmem_init (); /* Variable Declarations */ int Numprocs,MyRank, Root = 0; int i,j,k, NoofElements, NoofElements_Bloc, NoElementsToSort; int count, temp; TYPE *Input, *InputData; TYPE *Splitter, *AllSplitter; TYPE *Buckets, *BucketBuffer, *LocalBucket; TYPE *OutputBuffer, *Output; MyRank = shmem_my_pe (); Numprocs = shmem_n_pes (); NoofElements = SIZE; if(( NoofElements % Numprocs) != 0){ if(MyRank == Root) printf("Number of Elements are not divisible by Numprocs \n"); shmem_finalize (); exit(0); } /**** Reading Input ****/ Input = (TYPE *) shmem_malloc (NoofElements*sizeof(*Input)); if(Input == NULL) { printf("Error : Can not allocate memory \n"); } if (MyRank == Root){ /* Initialise random number generator */ printf ("Generating input Array for Sorting %d uint64_t numbers\n",SIZE); srand48((TYPE)NoofElements); for(i=0; i< NoofElements; i++) { Input[i] = rand(); } } /**** Sending Data ****/ NoofElements_Bloc = NoofElements / Numprocs; InputData = (TYPE *) shmem_malloc (NoofElements_Bloc * sizeof (*InputData)); if(InputData == NULL) { printf("Error : Can not allocate memory \n"); } //MPI_Scatter(Input, NoofElements_Bloc, TYPE_MPI, InputData, // NoofElements_Bloc, TYPE_MPI, Root, MPI_COMM_WORLD); shmem_barrier_all(); if(MyRank == Root) { for(i=0; i<Numprocs; i++) { TYPE* start = &Input[i * NoofElements_Bloc]; shmem_put64(InputData, start, NoofElements_Bloc, i); } } shmem_barrier_all(); /**** Sorting Locally ****/ sorting(InputData, NoofElements_Bloc); /**** Choosing Local Splitters ****/ Splitter = (TYPE *) shmem_malloc (sizeof (TYPE) * (Numprocs-1)); if(Splitter == NULL) { printf("Error : Can not allocate memory \n"); } for (i=0; i< (Numprocs-1); i++){ Splitter[i] = InputData[NoofElements/(Numprocs*Numprocs) * (i+1)]; } /**** Gathering Local Splitters at Root ****/ AllSplitter = (TYPE *) shmem_malloc (sizeof (TYPE) * Numprocs * (Numprocs-1)); if(AllSplitter == NULL) { printf("Error : Can not allocate memory \n"); } //MPI_Gather (Splitter, Numprocs-1, TYPE_MPI, AllSplitter, Numprocs-1, // TYPE_MPI, Root, MPI_COMM_WORLD); shmem_barrier_all(); TYPE* target_index = &AllSplitter[MyRank * (Numprocs-1)]; shmem_put64(target_index, Splitter, Numprocs-1, Root); shmem_barrier_all(); /**** Choosing Global Splitters ****/ if (MyRank == Root){ sorting (AllSplitter, Numprocs*(Numprocs-1)); for (i=0; i<Numprocs-1; i++) Splitter[i] = AllSplitter[(Numprocs-1)*(i+1)]; } /**** Broadcasting Global Splitters ****/ //MPI_Bcast (Splitter, Numprocs-1, TYPE_MPI, 0, MPI_COMM_WORLD); { int _i; for(_i=0; _i<_SHMEM_BCAST_SYNC_SIZE; _i++) { pSync[_i] = _SHMEM_SYNC_VALUE; } shmem_barrier_all(); } shmem_broadcast64(Splitter, Splitter, Numprocs-1, 0, 0, 0, Numprocs, pSync); shmem_barrier_all(); /**** Creating Numprocs Buckets locally ****/ Buckets = (TYPE *) shmem_malloc (sizeof (TYPE) * (NoofElements + Numprocs)); if(Buckets == NULL) { printf("Error : Can not allocate memory \n"); } j = 0; k = 1; for (i=0; i<NoofElements_Bloc; i++){ if(j < (Numprocs-1)){ if (InputData[i] < Splitter[j]) Buckets[((NoofElements_Bloc + 1) * j) + k++] = InputData[i]; else{ Buckets[(NoofElements_Bloc + 1) * j] = k-1; k=1; j++; i--; } } else Buckets[((NoofElements_Bloc + 1) * j) + k++] = InputData[i]; } Buckets[(NoofElements_Bloc + 1) * j] = k - 1; shmem_free(Splitter); shmem_free(AllSplitter); /**** Sending buckets to respective processors ****/ BucketBuffer = (TYPE *) shmem_malloc (sizeof (TYPE) * (NoofElements + Numprocs)); if(BucketBuffer == NULL) { printf("Error : Can not allocate memory \n"); } //MPI_Alltoall (Buckets, NoofElements_Bloc + 1, TYPE_MPI, BucketBuffer, // NoofElements_Bloc + 1, TYPE_MPI, MPI_COMM_WORLD); shmem_barrier_all(); for(i=0; i<Numprocs; i++) { shmem_put64(&BucketBuffer[MyRank*(NoofElements_Bloc + 1)], &Buckets[i*(NoofElements_Bloc + 1)], NoofElements_Bloc + 1, i); } shmem_barrier_all(); /**** Rearranging BucketBuffer ****/ LocalBucket = (TYPE *) shmem_malloc (sizeof (TYPE) * 2 * NoofElements / Numprocs); if(LocalBucket == NULL) { printf("Error : Can not allocate memory \n"); } count = 1; for (j=0; j<Numprocs; j++) { k = 1; for (i=0; i<BucketBuffer[(NoofElements/Numprocs + 1) * j]; i++) LocalBucket[count++] = BucketBuffer[(NoofElements/Numprocs + 1) * j + k++]; } LocalBucket[0] = count-1; /**** Sorting Local Buckets using Bubble Sort ****/ /*sorting (InputData, NoofElements_Bloc, sizeof(int), intcompare); */ NoElementsToSort = LocalBucket[0]; sorting (&LocalBucket[1], NoElementsToSort); /**** Gathering sorted sub blocks at root ****/ OutputBuffer = (TYPE *) shmem_malloc (sizeof(TYPE) * 2 * NoofElements); if(OutputBuffer == NULL) { printf("Error : Can not allocate memory \n"); } //MPI_Gather (LocalBucket, 2*NoofElements_Bloc, TYPE_MPI, OutputBuffer, // 2*NoofElements_Bloc, TYPE_MPI, Root, MPI_COMM_WORLD); shmem_barrier_all(); target_index = &OutputBuffer[MyRank * (2*NoofElements_Bloc)]; shmem_put64(target_index, LocalBucket, 2*NoofElements_Bloc, Root); shmem_barrier_all(); /**** Rearranging output buffer ****/ if (MyRank == Root){ Output = (TYPE *) malloc (sizeof (TYPE) * NoofElements); count = 0; for(j=0; j<Numprocs; j++){ k = 1; for(i=0; i<OutputBuffer[(2 * NoofElements/Numprocs) * j]; i++) Output[count++] = OutputBuffer[(2*NoofElements/Numprocs) * j + k++]; } printf ( "Number of Elements to be sorted : %d \n", NoofElements); TYPE prev = 0; int fail = 0; for (i=0; i<NoofElements; i++){ if(Output[i] < prev) { printf("Failed at index %d\n",i); fail = 1; } prev = Output[i]; } if(fail) printf("Sorting FAILED\n"); else printf("Sorting PASSED\n"); free(Output); }/* MyRank==0*/ shmem_free(Input); shmem_free(OutputBuffer); shmem_free(InputData); shmem_free(Buckets); shmem_free(BucketBuffer); shmem_free(LocalBucket); /**** Finalize ****/ shmem_finalize(); } ; const unsigned long long full_program_end = current_time_ns(); printf("full_program %llu ns\n", full_program_end - full_program_start); }
int main () { int me, npes; int *dest1; float *dest2; long *dest3; double *dest4; long long *dest5; int swapped_val1, new_val1; float swapped_val2, new_val2; long swapped_val3, new_val3; double swapped_val4, new_val4; long long swapped_val5, new_val5; int success = 1; int success1_p1; int success2_p1; int success3_p1; int success4_p1; int success5_p1; shmem_init (); me = shmem_my_pe (); npes = shmem_n_pes (); shmem_barrier_all (); /* Checks if there are atleast 2 executing PEs */ if (npes > 1) { dest1 = (int *) shmem_malloc (sizeof (*dest1)); dest2 = (float *) shmem_malloc (sizeof (*dest2)); dest3 = (long *) shmem_malloc (sizeof (*dest3)); dest4 = (double *) shmem_malloc (sizeof (*dest4)); dest5 = (long long *) shmem_malloc (sizeof (*dest5)); *dest1 = *dest2 = *dest3 = *dest4 = *dest5 = me; new_val1 = new_val2 = new_val3 = new_val4 = new_val5 = me; success1_p1 = success1_p2 = success2_p1 = success2_p2 = success3_p1 = success3_p2 = success4_p1 = success4_p2 = success5_p1 = success5_p2 = -1; shmem_barrier_all (); swapped_val1 = shmem_int_swap (dest1, new_val1, (me + 1) % npes); swapped_val2 = shmem_float_swap (dest2, new_val2, (me + 1) % npes); swapped_val3 = shmem_long_swap (dest3, new_val3, (me + 1) % npes); swapped_val4 = shmem_double_swap (dest4, new_val4, (me + 1) % npes); swapped_val5 = shmem_longlong_swap (dest5, new_val5, (me + 1) % npes); /* To validate the working of swap we need to check the value received at the PE that initiated the swap as well as the dest PE */ if (me == 0) { if (swapped_val1 == 1) { success1_p1 = 1; } if (swapped_val2 == 1) { success2_p1 = 1; } if (swapped_val3 == 1) { success3_p1 = 1; } if (swapped_val4 == 1) { success4_p1 = 1; } if (swapped_val5 == 1) { success5_p1 = 1; } } if (me == 1) { if (*dest1 == 0) { shmem_int_put (&success1_p2, &success, 1, 0); } if (*dest2 == 0) { shmem_int_put (&success2_p2, &success, 1, 0); } if (*dest3 == 0) { shmem_int_put (&success3_p2, &success, 1, 0); } if (*dest4 == 0) { shmem_int_put (&success4_p2, &success, 1, 0); } if (*dest5 == 0) { shmem_int_put (&success5_p2, &success, 1, 0); } } shmem_barrier_all (); if (me == 0) { if (success1_p1 && success1_p2) { printf ("Test shmem_int_swap: Passed\n"); } else { printf ("Test shmem_int_swap: Failed\n"); } if (success2_p1 && success2_p2) { printf ("Test shmem_float_swap: Passed\n"); } else { printf ("Test shmem_float_swap: Failed\n"); } if (success3_p1 && success3_p2) { printf ("Test shmem_long_swap: Passed\n"); } else { printf ("Test shmem_long_swap: Failed\n"); } if (success4_p1 && success4_p2) { printf ("Test shmem_double_swap: Passed\n"); } else { printf ("Test shmem_double_swap: Failed\n"); } if (success5_p1 && success5_p2) { printf ("Test shmem_longlong_swap: Passed\n"); } else { printf ("Test shmem_longlong_swap: Failed\n"); } } shmem_barrier_all (); /* Test conditional swaps shmem_longlong_cswap, shmem_long_cswap, shmem_int_cswap, */ *dest1 = *dest3 = *dest5 = me; new_val1 = new_val3 = new_val5 = me; success1_p1 = success1_p2 = success3_p1 = success3_p2 = success5_p1 = success5_p2 = -1; shmem_barrier_all (); swapped_val1 = shmem_int_cswap (dest1, me + 1, (long) me, 1); swapped_val3 = shmem_long_cswap (dest3, me + 1, (long) me, 1); swapped_val5 = shmem_longlong_cswap (dest5, me + 1, (long) me, 1); /* To validate the working of conditionalswap we need to check the value received at the PE that initiated the conditional swap as well as the dest PE */ if (me == 0) { if (swapped_val1 == 1) { success1_p1 = 1; } if (swapped_val3 == 1) { success3_p1 = 1; } if (swapped_val5 == 1) { success5_p1 = 1; } } if (me == 1) { if (*dest1 == 0) { shmem_int_put (&success1_p2, &success, 1, 0); } if (*dest3 == 0) { shmem_int_put (&success3_p2, &success, 1, 0); } if (*dest5 == 0) { shmem_int_put (&success5_p2, &success, 1, 0); } } shmem_barrier_all (); if (me == 0) { if (success1_p1 && success1_p2) { printf ("Test shmem_int_cswap: Passed\n"); } else { printf ("Test shmem_int_cswap: Failed\n"); } if (success3_p1 && success3_p2) { printf ("Test shmem_long_cswap: Passed\n"); } else { printf ("Test shmem_long_cswap: Failed\n"); } if (success5_p1 && success5_p2) { printf ("Test shmem_longlong_cswap: Passed\n"); } else { printf ("Test shmem_longlong_cswap: Failed\n"); } } shmem_barrier_all (); /* Test shmem_long_fadd, shmem_int_fadd, shmem_longlong_fadd */ *dest1 = *dest3 = *dest5 = me; new_val1 = new_val3 = new_val5 = me; success1_p1 = success1_p2 = success3_p1 = success3_p2 = success5_p1 = success5_p2 = -1; shmem_barrier_all (); swapped_val1 = shmem_int_fadd (dest1, 1, 0); swapped_val3 = shmem_long_fadd (dest3, 1, 0); swapped_val5 = shmem_longlong_fadd (dest5, 1, 0); /* To validate the working of fetch and add we need to check the old value received at the PE that initiated the fetch and increment as well as the new value on the dest PE */ if (me != 0) { if (swapped_val1 == 0) { success1_p1 = 1; } if (swapped_val3 == 0) { success3_p1 = 1; } if (swapped_val5 == 0) { success5_p1 = 1; } } if (me == 0) { if (*dest1 == npes - 1) { shmem_int_put (&success1_p2, &success, 1, npes - 1); } if (*dest3 == npes - 1) { shmem_int_put (&success3_p2, &success, 1, npes - 1); } if (*dest5 == npes - 1) { shmem_int_put (&success5_p2, &success, 1, npes - 1); } } shmem_barrier_all (); if (me == npes - 1) { if (success1_p1 && success1_p2) { printf ("Test shmem_int_fadd: Passed\n"); } else { printf ("Test shmem_int_fadd: Failed\n"); } if (success3_p1 && success3_p2) { printf ("Test shmem_long_fadd: Passed\n"); } else { printf ("Test shmem_long_fadd: Failed\n"); } if (success5_p1 && success5_p2) { printf ("Test shmem_longlong_fadd: Passed\n"); } else { printf ("Test shmem_longlong_fadd: Failed\n"); } } shmem_barrier_all (); /* Test shmem_long_finc, shmem_int_finc, shmem_longlong_finc */ *dest1 = *dest3 = *dest5 = me; new_val1 = new_val3 = new_val5 = me; success1_p1 = success1_p2 = success3_p1 = success3_p2 = success5_p1 = success5_p2 = -1; shmem_barrier_all (); swapped_val1 = shmem_int_finc (dest1, 0); swapped_val3 = shmem_long_finc (dest3, 0); swapped_val5 = shmem_longlong_finc (dest5, 0); /* To validate the working of fetch and increment we need to check the old value received at the PE that initiated the fetch and increment as well as the new value on the dest PE */ if (me != 0) { if (swapped_val1 == 0) { success1_p1 = 1; } if (swapped_val3 == 0) { success3_p1 = 1; } if (swapped_val5 == 0) { success5_p1 = 1; } } if (me == 0) { if (*dest1 == npes - 1) { shmem_int_put (&success1_p2, &success, 1, npes - 1); } if (*dest3 == npes - 1) { shmem_int_put (&success3_p2, &success, 1, npes - 1); } if (*dest5 == npes - 1) { shmem_int_put (&success5_p2, &success, 1, npes - 1); } } shmem_barrier_all (); if (me == npes - 1) { if (success1_p1 && success1_p2) { printf ("Test shmem_int_finc: Passed\n"); } else { printf ("Test shmem_int_finc: Failed\n"); } if (success3_p1 && success3_p2) { printf ("Test shmem_long_finc: Passed\n"); } else { printf ("Test shmem_long_finc: Failed\n"); } if (success5_p1 && success5_p2) { printf ("Test shmem_longlong_finc: Passed\n"); } else { printf ("Test shmem_longlong_finc: Failed\n"); } } shmem_barrier_all (); shmem_free (dest1); shmem_free (dest2); shmem_free (dest3); shmem_free (dest4); shmem_free (dest5); } else { printf ("Number of PEs must be > 1 to test shmem atomics, test skipped\n"); } shmem_finalize (); return 0; }
int main (int argc, char *argv[]) { long local_dest; int *shm_dest; int me, npes, i; int pe_acc_success = 0; int fail_count = 0; shmem_init (); me = shmem_my_pe (); npes = shmem_n_pes (); shm_dest = (int *) shmem_malloc (sizeof (int)); shmem_barrier_all (); if (me == 0) { if (!check_it (&global_dest)) { /* long global: yes */ printf ("Test Global Address Accessible: Failed\n"); fail_count++; } else { printf ("Test Global Address Accessible: Passed\n"); } if (!check_it (&static_dest)) { /* static int global: yes */ printf ("Test Static Global Address Accessible: Failed\n"); fail_count++; } else { printf ("Test Static Global Address Accessible: Passed\n"); } if (check_it (&local_dest)) { /* main() stack: no */ printf ("Test Stack Address Accessible: Failed\n"); fail_count++; } else { printf ("Test Stack Address Accessible: Passed\n"); } if (!check_it (shm_dest)) { /* shmem_malloc: yes */ printf ("Test Shmalloc-ed Address Accessible: Failed\n"); fail_count++; } else { printf ("Test Shmalloc-ed Address Accessible: Passed\n"); } for (i = 1; i < npes; i++) { if (shmem_pe_accessible (i) != 1) { pe_acc_success = 1; } } if (pe_acc_success == 1) { printf ("Test shmem_pe_accessible: Failed\n"); fail_count++; } else { printf ("Test shmem_pe_accessible: Passed\n"); } if (fail_count == 0) printf("All Tests Passed\n"); else printf("%d Tests Failed\n", fail_count); } shmem_free (shm_dest); shmem_finalize (); return 0; }
int main (void) { int i; int *target; int *source; int me, npes; struct timeval start, end; long time_taken, start_time, end_time; shmem_init (); me = shmem_my_pe (); npes = shmem_n_pes (); source = (int *) shmem_malloc (N_ELEMENTS * sizeof (*source)); time_taken = 0; for (i = 0; i < N_ELEMENTS; i += 1) { source[i] = (i + 1) * 10 + me; } target = (int *) shmem_malloc (N_ELEMENTS * sizeof (*target) * npes); for (i = 0; i < N_ELEMENTS * npes; i += 1) { target[i] = -90; } for (i = 0; i < _SHMEM_COLLECT_SYNC_SIZE; i += 1) { pSyncA[i] = _SHMEM_SYNC_VALUE; pSyncB[i] = _SHMEM_SYNC_VALUE; } shmem_barrier_all (); for (i = 0; i < 10000; i++) { gettimeofday (&start, NULL); start_time = (start.tv_sec * 1000000.0) + start.tv_usec; /* alternate between 2 pSync arrays to synchronize consequent collectives of even and odd iterations */ if (i % 2) { shmem_fcollect32 (target, source, N_ELEMENTS, 0, 0, npes, pSyncA); } else { shmem_fcollect32 (target, source, N_ELEMENTS, 0, 0, npes, pSyncB); } gettimeofday (&end, NULL); end_time = (end.tv_sec * 1000000.0) + end.tv_usec; if (me == 0) { time_taken = time_taken + (end_time - start_time); } } if (me == 0) { printf ("Time required to collect %d bytes of data, with %d PEs is %ld microseconds\n", (4 * N_ELEMENTS * npes), npes, time_taken / 10000); } shmem_barrier_all (); shmem_free (target); shmem_free (source); shmem_finalize (); return 0; }
int main(int argc, char* argv[]) { DataType source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; static DataType target[10]; static DataType pong=666; DataType *t2=NULL; int me, num_pes, pe, Verbose=0; if (argc > 1 && (strcmp(argv[1],"-v") == 0)) { Verbose++; } shmem_init(); me = shmem_my_pe(); num_pes = shmem_n_pes(); if (num_pes == 1) { printf("%s: Requires number of PEs > 1\n", argv[0]); shmem_finalize(); return 0; } t2 = shmem_malloc(10*sizeof(DataType)); if (!t2) { if (me==0) printf("shmem_malloc() failed?\n"); shmem_global_exit(1); } t2[9] = target[9] = 0xFF; shmem_barrier_all(); if (me == 0) { memset(target, 0, sizeof(target)); for(pe=1; pe < num_pes; pe++) SHM_PUT(target, target, 10, pe); for(pe=1; pe < num_pes; pe++) /* put 10 elements into target on PE 1 */ SHM_PUT(target, source, 10, pe); SHM_WAITU( &pong, SHMEM_CMP_GT, 666 ); Vprintf("PE[%d] pong now "PF"\n",me,pong); for(pe=1; pe < num_pes; pe++) /* put 1 element into t2 on PE 1 */ SHM_PUTP(&t2[9], 0xDD, pe); } else { /* wait for 10th element write of 'target' */ SHM_WAITU( &target[9], SHMEM_CMP_NE, 0xFF ); Vprintf("PE[%d] target[9] was 255 now "PF", success.\n",me,target[9]); SHM_WAITU( &target[9], SHMEM_CMP_EQ, 10 ); Vprintf("PE[%d] expected target[9] == 10 now "PF"\n",me,target[9]); if (me == 1) { if (Verbose) { DataType tmp = SHM_GETP( &pong, 0); printf("PE[%d] @ PE[0] pong == "PF", setting to 999\n",me,tmp); } SHM_PUTP( &pong, 999, 0); } SHM_WAITU( &t2[9], SHMEM_CMP_NE, 0xFF ); } //shmem_barrier_all(); /* sync sender and receiver */ if (me != 0) { if (memcmp(source, target, sizeof(DataType) * 10) != 0) { int i; fprintf(stderr,"[%d] Src & Target mismatch?\n",me); for (i = 0 ; i < 10 ; ++i) { printf(PF","PF" ", source[i], target[i]); } printf("\n"); shmem_global_exit(1); } } shmem_free(t2); if (Verbose) fprintf(stderr,"[%d] exit\n",shmem_my_pe()); shmem_finalize(); return 0; }