void neighbor_get(int *target, int *src, int elements, int me, int npes, int loops) { int i, neighbor_pe; double start_time, elapsed_time; long total_bytes = loops * elements * sizeof(*src); if (me==0 && Verbose) { fprintf(stdout, "%s: %d loops of get(%ld bytes) from neighbor, %d PEs: ", __FUNCTION__, loops, (elements*sizeof(*src)), npes); fflush(stdout); } shmem_barrier_all(); neighbor_pe = (me + 1) % npes; start_time = shmemx_wtime(); for(i = 0; i < loops; i++) shmem_int_get(target, src, elements, neighbor_pe); elapsed_time = shmemx_wtime() - start_time; if (me==0 && Verbose) { printf("%7.3f secs\n", elapsed_time); printf(" %7.5f usecs / get(), %ld Kbytes @ %7.4f MB/sec\n\n", (elapsed_time/((double)loops*npes))*1000000.0, (total_bytes/1024), ((double)total_bytes/(1024.0*1024.0)) / elapsed_time ); } shmem_barrier_all(); }
void many2one_get(int *target, int *src, int elements, int me, int npes, int loops) { int i, pe; double start_time, elapsed_time; long total_bytes = loops * elements * sizeof(*src) * (npes - 1); if (me == 0) { fprintf(stdout,"%s: %d loops of get(%ld bytes) from %d PEs: ", __FUNCTION__, loops, (elements*sizeof(*src)), npes-1); fflush(stdout); } shmem_barrier_all(); if (me == 0) { start_time = shmemx_wtime(); for(i = 0; i < loops; i++) { for(pe = 1; pe < npes; pe++) shmem_int_get(target, src, elements, pe); } elapsed_time = shmemx_wtime() - start_time; if (Verbose) { printf("%7.3f secs\n", elapsed_time); printf(" %7.5f usecs / get(), %ld Kbytes @ %7.4f MB/sec\n\n", (elapsed_time/((double)loops*(npes-1)))*1000000.0, (total_bytes/1024), ((double)total_bytes/(1024.0*1024.0)) / elapsed_time ); } } shmem_barrier_all(); }
void fcollect(int *target, int *src, int elements, int me, int npes, int loops) { int i; double start_time, elapsed_time; long total_bytes = loops * elements * sizeof(*src); long *ps, *pSync, *pSync1; pSync = (long*) shmem_malloc( 2 * sizeof(long) * _SHMEM_COLLECT_SYNC_SIZE ); pSync1 = &pSync[_SHMEM_COLLECT_SYNC_SIZE]; for (i = 0; i < _SHMEM_COLLECT_SYNC_SIZE; i++) { pSync[i] = pSync1[i] = _SHMEM_SYNC_VALUE; } target = (int *) shmem_malloc( elements * sizeof(*target) * npes ); if (me==0 && Verbose) { fprintf(stdout,"%s: %d loops of fcollect32(%ld bytes) over %d PEs: ", __FUNCTION__,loops,(elements*sizeof(*src)),npes); fflush(stdout); } shmem_barrier_all(); start_time = shmemx_wtime(); for(i = 0; i < loops; i++) { ps = &pSync[(i&1)]; shmem_fcollect32( target, src, elements, 0, 0, npes, ps ); } elapsed_time = shmemx_wtime() - start_time; if (me==0 && Verbose) { printf("%7.3f secs\n", elapsed_time); printf(" %7.5f usecs / fcollect32(), %ld Kbytes @ %7.4f MB/sec\n\n", (elapsed_time/((double)loops*npes))*1000000.0, (total_bytes/1024), ((double)total_bytes/(1024.0*1024.0)) / elapsed_time ); } shmem_barrier_all(); shmem_free(target); shmem_free( pSync ); shmem_barrier_all(); }
double FORTRANIFY (shmemx_wtime) (void) { return shmemx_wtime (); }
static inline double timer(void) { return shmemx_wtime(); }
int main(int argc, char **argv) { int loops=DFLT_LOOPS; char *pgm; int *Target; int *Source; int i, me, npes; int target_PE; long bytes; double start_time, *total_time; shmem_init(); me = shmem_my_pe(); npes = shmem_n_pes(); if ((pgm=strrchr(argv[0],'/'))) pgm++; else pgm = argv[0]; while ((i = getopt (argc, argv, "hve:l:st")) != EOF) { switch (i) { case 'v': Verbose++; break; case 'e': if ((elements = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad elements count %d\n",elements); shmem_finalize(); return 1; } break; case 'l': if ((loops = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad loop count %d\n",loops); shmem_finalize(); return 1; } break; case 's': Sync++; break; case 't': Track++; break; case 'h': if (me == 0) usage(pgm); return 0; default: if (me == 0) { fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i); usage(pgm); } shmem_finalize(); return 1; } } for(i=0; i < SHMEM_REDUCE_SYNC_SIZE; i++) pSync[i] = SHMEM_SYNC_VALUE; target_PE = (me+1) % npes; total_time = (double *) shmem_malloc( npes * sizeof(double) ); if (!total_time) { fprintf(stderr,"ERR: bad total_time shmem_malloc(%ld)\n", (elements * sizeof(double))); shmem_global_exit(1); } for(i=0; i < npes; i++) total_time[i] = -1.0; Source = (int *) shmem_malloc( elements * sizeof(*Source) ); if (!Source) { fprintf(stderr,"ERR: bad Source shmem_malloc(%ld)\n", (elements * sizeof(*Target))); shmem_free(total_time); shmem_global_exit(1); } Target = (int *) shmem_malloc( elements * sizeof(*Target) ); if (!Target) { fprintf(stderr,"ERR: bad Target shmem_malloc(%ld)\n", (elements * sizeof(*Target))); shmem_free(Source); shmem_free(total_time); shmem_global_exit(1); } for (i = 0; i < elements; i++) { Target[i] = -90; Source[i] = i + 1; } bytes = loops * sizeof(int) * elements; if (Verbose && me==0) { fprintf(stderr, "%s: INFO - %d loops, put %d (int) elements to PE+1 Max put ??\n", pgm, loops, elements); } shmem_barrier_all(); for(i=0; i < loops; i++) { start_time = shmemx_wtime(); shmem_int_put(Target, Source, elements, target_PE); time_taken += (shmemx_wtime() - start_time); if (me==0) { if ( Track && i > 0 && ((i % 200) == 0)) fprintf(stderr,".%d",i); } if (Sync) shmem_barrier_all(); } // collect time per node. shmem_double_put( &total_time[me], &time_taken, 1, 0 ); shmem_double_sum_to_all(&sum_time, &time_taken, 1, 0, 0, npes, pWrk, pSync); shmem_barrier_all(); for (i = 0; i < elements; i++) { if (Target[i] != i + 1) { printf("%d: Error Target[%d] = %d, expected %d\n", me, i, Target[i], i + 1); shmem_global_exit(1); } } if ( Track && me == 0 ) fprintf(stderr,"\n"); if(Verbose && me == 0) { double rate, comp_time; if (Verbose > 1) fprintf(stdout,"Individule PE times: (seconds)\n"); for(i=0,comp_time=0.0; i < npes; i++) { comp_time += total_time[i]; if (Verbose > 1) fprintf(stdout," PE[%d] %8.6f\n",i,total_time[i]); } sum_time /= (double)npes; comp_time /= (double)npes; if (sum_time != comp_time) printf("%s: computed_time %7.5f != sum_to_all_time %7.5f)\n", pgm, comp_time, sum_time ); rate = ((double)bytes/(1024.0*1024.0)) / comp_time; printf("%s: shmem_int_put() %7.4f MB/sec (bytes %ld secs %7.4f)\n", pgm, rate, bytes, sum_time); } shmem_free(total_time); shmem_free(Target); shmem_free(Source); shmem_finalize(); return 0; }
int main(int argc, char **argv) { int loops=DFLT_LOOPS; char *pgm; int *Target; int *Source; int i, me, npes; int target_pe; long bytes; double time_taken=0.0, start_time; shmem_init(); me = shmem_my_pe(); npes = shmem_n_pes(); if ((pgm=strrchr(argv[0],'/'))) pgm++; else pgm = argv[0]; while ((i = getopt (argc, argv, "hve:l:st")) != EOF) { switch (i) { case 'v': Verbose++; break; case 'e': if ((elements = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad elements count %d\n",elements); shmem_finalize(); return 1; } break; case 'l': if ((loops = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad loop count %d\n",loops); shmem_finalize(); return 1; } break; case 's': Sync++; break; case 't': Track++; break; case 'h': if (me == 0) usage(pgm); return 0; default: if (me == 0) { fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i); usage(pgm); } shmem_finalize(); return 1; } } target_pe = (me+1) % npes; total_time = (double *) shmem_malloc( npes * sizeof(double) ); if (!total_time) { fprintf(stderr,"ERR: bad total_time shmem_malloc(%ld)\n", (elements * sizeof(double))); shmem_global_exit(1); } Source = (int *) shmem_malloc( elements * sizeof(*Source) ); if (!Source) { fprintf(stderr,"ERR: bad Source shmem_malloc(%ld)\n", (elements * sizeof(*Target))); shmem_free(total_time); shmem_global_exit(1); } Target = (int *) shmem_malloc( elements * sizeof(*Target) ); if (!Target) { fprintf(stderr,"ERR: bad Target shmem_malloc(%ld)\n", (elements * sizeof(*Target))); shmem_free(Source); shmem_free(total_time); shmem_global_exit(1); } for (i = 0; i < elements; i++) { Target[i] = -90; Source[i] = i + 1; } bytes = loops * sizeof(int) * elements; if (Verbose && me==0) fprintf(stderr, "%s: INFO - %d loops, get %d (int) elements from PE+1\n", pgm, loops, elements); shmem_barrier_all(); for(i=0; i < loops; i++) { start_time = shmemx_wtime(); shmem_int_get( Target, Source, elements, target_pe ); time_taken += shmemx_wtime() - start_time; if (me==0) { if ( Track && i > 0 && ((i % 200) == 0)) fprintf(stderr,".%d",i); } if (Sync) shmem_barrier_all(); } // collect time per node elapsed time. shmem_double_put( &total_time[me], &time_taken, 1, 0 ); shmem_barrier_all(); for (i = 0; i < elements; i++) { if (Target[i] != i + 1) { printf("%d: Error Target[%d] = %d, expected %d\n", me, i, Target[i], i + 1); shmem_global_exit(1); } } if ( Track && me == 0 ) fprintf(stderr,"\n"); if (Verbose && me == 0) { double rate,secs; // average time for(i=0,secs=0.0; i < npes; i++) secs += total_time[i]; secs /= (double)npes; rate = ((double)bytes/(1024.0*1024.0)) / secs; printf("%s: ave %5.3f MB/sec (bytes %ld secs %5.3f)\n", pgm, rate, bytes, secs); } shmem_free(total_time); shmem_free(Target); shmem_free(Source); shmem_finalize(); return 0; }
int main(int argc, char **argv) { int i,ps,ps_cnt=2; int *target; int *source; int me, npes, elements=N_ELEMENTS, loops=DFLT_LOOPS; char *pgm; double start_time, time_taken; shmem_init(); me = shmem_my_pe(); npes = shmem_n_pes(); if ((pgm=strrchr(argv[0],'/'))) pgm++; else pgm = argv[0]; while ((i = getopt (argc, argv, "hve:l:p:s")) != EOF) { switch (i) { case 'v': Verbose++; break; case 'e': if ((elements = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad elements count %d\n",elements); shmem_finalize(); return 1; } break; case 'l': if ((loops = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad loop count %d\n",loops); shmem_finalize(); return 1; } break; case 'p': if ((ps_cnt = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad pSync[] elements %d\n",loops); shmem_finalize(); return 1; } break; case 's': Serialize++; break; case 'h': if (me == 0) usage(pgm); return 0; default: if (me == 0) { fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i); usage(pgm); } shmem_finalize(); return 1; } } ps_cnt *= _SHMEM_BCAST_SYNC_SIZE; pSync = shmem_malloc( ps_cnt * sizeof(long) ); for (i = 0; i < ps_cnt; i++) pSync[i] = _SHMEM_SYNC_VALUE; source = (int *) shmem_malloc( elements * sizeof(*source) ); target = (int *) shmem_malloc( elements * sizeof(*target) ); for (i = 0; i < elements; i += 1) { source[i] = i + 1; target[i] = -90; } if (me==0 && Verbose) fprintf(stderr,"ps_cnt %d loops %d nElems %d\n", ps_cnt,loops,elements); shmem_barrier_all(); for(time_taken = 0.0, ps = i = 0; i < loops; i++) { start_time = shmemx_wtime(); shmem_broadcast32(target, source, elements, 0, 0, 0, npes, &pSync[ps]); if (Serialize) shmem_barrier_all(); time_taken += (shmemx_wtime() - start_time); if (ps_cnt > 1 ) { ps += _SHMEM_BCAST_SYNC_SIZE; if ( ps >= ps_cnt ) ps = 0; } } if(me == 0 && Verbose) { printf("%d loops of Broadcast32(%ld bytes) over %d PEs: %7.3f secs\n", loops, (elements*sizeof(*source)), npes, time_taken); elements = (elements * loops * sizeof(*source)) / (1024*1024); printf(" %7.5f secs per broadcast() @ %7.4f MB/sec\n", (time_taken/(double)loops), ((double)elements / time_taken) ); } if (Verbose > 1) fprintf(stderr,"[%d] pre B1\n",me); shmem_barrier_all(); if (Verbose > 1) fprintf(stderr,"[%d] post B1\n",me); shmem_free(pSync); shmem_free(target); shmem_free(source); shmem_finalize(); return 0; }
int main( int argc, char *argv[]) { int rc=0, my_pe, npes, neighbor; int loops=LOOPS; int j; size_t data_sz=sizeof(long) * 3; double start_time; long *data, lval=0; if (argc > 1) loops = atoi(argv[1]); shmem_init(); my_pe = shmem_my_pe(); npes = shmem_n_pes(); data = shmem_malloc(data_sz); if (!data) { fprintf(stderr,"[%d] shmem_malloc(%ld) failure? %d\n", my_pe,data_sz,errno); shmem_global_exit(1); } memset((void*)data,0,data_sz); shmem_barrier_all(); neighbor = (my_pe + 1) % npes; start_time = shmemx_wtime(); for(j=0,elapsed=0.0; j < loops; j++) { start_time = shmemx_wtime(); lval = shmem_long_finc( (void*)&data[1], neighbor ); elapsed += shmemx_wtime() - start_time; if (lval != (long) j) { fprintf(stderr,"[%d] Test: FAIL previous val %ld != %d Exit.\n", my_pe, lval, j); shmem_global_exit(1); } } shmem_barrier_all(); rc = 0; if (data[1] != (long)loops) { fprintf(stderr,"[%d] finc neighbot: FAIL data[1](%p) %ld != %d Exit.\n", my_pe, (void*)&data[1], data[1], loops); rc--; } /* check if adjancent memory locations distrubed */ assert(data[0] == 0); assert(data[2] == 0); if (my_pe == 0 ) { if (rc == 0 && Verbose) fprintf(stderr,"[%d] finc neighbor: PASSED.\n",my_pe); fprintf(stderr,"[%d] %d loops of shmem_long_finc() in %6.4f secs\n" " %2.6f usecs per shmem_long_finc()\n", my_pe,loops,elapsed,((elapsed*100000.0)/(double)loops)); } shmem_free(data); shmem_finalize(); return rc; }