static inline void swaptest(int me, int iterations, int T, int S, int P) { int i; const int tswap = 5, sswap = 2; target[T] = tswap; source[S] = sswap; shmem_barrier_all(); /* Ensure target/source initialization completed */ if (me == 0) pre_op_check(__func__, source[S], iterations, 0); if (me == 0) { for (i = 0; i < iterations; i++) source[S] = shmem_int_swap(&target[T], source[S], 1); shmem_int_p(&sync_pes[P], i, 1); if (debug) printf("AFTER flag PE 0 value of source is %d" " = 5?\n", source[S]); if (((iterations % 2 == 1) && (source[S] != tswap)) || ((iterations % 2 == 0) && (source[S] != sswap))) { fprintf(stderr, "swap ERR: PE 0 source = %d\n", source[S]); shmem_global_exit(EXIT_FAILURE); } } else { wait_until(&sync_pes[P], iterations, 1); if (((iterations % 2 == 1) && (target[T] != sswap)) || ((iterations % 2 == 0) && (target[T] != tswap))) { fprintf(stderr, "swap ERR: PE 0 target = %d \n", target[T]); shmem_global_exit(EXIT_FAILURE); } } if (verbose) { if (me == 0) printf("SHMEM %s finished\n", __func__); } }
int main(int argc, char* argv[]) { int i, j, num_pes; int failed = 0; shmem_init(); if (shmem_my_pe() == 0) { num_pes=shmem_n_pes(); for(j = 0; j < num_pes; j++) { memset(target, 0, sizeof(long) * 10); shmem_long_get_nbi(target, source, 10, j); shmem_quiet(); for (i = 0; i < 10; i++) { if (source[i] != target[i]) { fprintf(stderr,"[%d] get_nbi from PE %d: target[%d] = %ld, expected %ld\n", shmem_my_pe(), j, i, target[i], source[i]); failed = 1; } } if (failed) shmem_global_exit(1); } } shmem_finalize(); return 0; }
static inline void cswaptest(int me, int iterations, int T, int S, int P) { int i; source[S] = -100; target[T] = 0; shmem_barrier_all(); if (me == 1) { pre_op_check(__func__, source[S], iterations, 1); for (i = 0; i < iterations; i++) source[S] = shmem_int_cswap(&(target[T]), i, (i+1), 0); shmem_int_p(&sync_pes[P], i, 0); post_op_check("cswap", source[S], (iterations-1), 1); } else { wait_until(&sync_pes[P], iterations, 0); if (target[T] != iterations) { fprintf(stderr, "cswap ERR: PE 1 target = %d != %d\n", target[T], iterations); shmem_global_exit(EXIT_FAILURE); } } if (verbose) { if (me == 1) printf("SHMEM %s finished\n", __func__); } }
static inline void post_op_check(const char *op, int check_var, int iterations, int pe) { if (check_var != iterations) { fprintf(stderr, "%s ERR: PE %d source = %d != %d\n", op, pe, check_var, iterations); shmem_global_exit(EXIT_FAILURE); } }
int main(int argc, char* argv[]) { int i; for(i = 0; i < N_THREADS*N_ELEMS; ++i) { source[i] = i+1; } int tl_expected = SHMEMX_THREAD_MULTIPLE; int tl; shmemx_init_thread(tl_expected,&tl); if (tl_expected != tl) { printf("Could not initialize with desired thread level (%d " "requested, got %d)\n", tl_expected, tl); return 0; } if (shmem_n_pes() == 1) { printf("%s: Requires number of PEs > 1\n", argv[0]); shmem_finalize(); return 0; } pthread_t threads[N_THREADS]; pthread_barrier_init(&fencebar,NULL,N_THREADS); fprintf(stderr,"Starting threads\n"); for(i = 0; i < N_THREADS; ++i) { /* fprintf(stderr,"Starting thread %d\n",i); */ ptrdiff_t tid = i; pthread_create(&threads[i],NULL,&roundrobin,(void*)tid); } for(i = 0; i < N_THREADS; ++i) { pthread_join(threads[i],NULL); } pthread_barrier_destroy(&fencebar); if (0 != memcmp(source, target, sizeof(long) * N_THREADS*N_ELEMS)) { fprintf(stderr,"[%d] Src & Target mismatch?\n",shmem_my_pe()); for (i = 0 ; i < 10 ; ++i) { printf("%ld,%ld ", source[i], target[i]); } printf("\n"); shmem_global_exit(1); } shmem_finalize(); return 0; }
int main(int argc, char* argv[]) { int i; shmem_init(); if (shmem_n_pes() == 1) { printf("%s: Requires number of PEs > 1\n", argv[0]); shmem_finalize(); return 0; } if (shmem_my_pe() == 0) { memset(target, 0, sizeof(target)); /* put 10 elements into target on PE 1 */ shmem_long_get(target, source, 10, 1); } shmem_barrier_all(); /* sync sender and receiver */ if (shmem_my_pe() == 0) { if (0 != memcmp(source, target, sizeof(long) * 10)) { fprintf(stderr,"[%d] Src & Target mismatch?\n",shmem_my_pe()); for (i = 0 ; i < 10 ; ++i) { printf("%ld,%ld ", source[i], target[i]); } printf("\n"); shmem_global_exit(1); } } shmem_finalize(); return 0; }
void FORTRANIFY (shmem_global_exit) (int *status) { shmem_global_exit (*status); }
int main(int argc, char **argv) { int me, c, l, j; int nWords, loops, incWords; pgm = strrchr(argv[0],'/'); if ( pgm ) pgm++; else pgm = argv[0]; shmem_init(); me = shmem_my_pe(); while ((c = getopt (argc, argv, "hpv")) != -1) switch (c) { case 'v': Verbose++; break; case 'h': default: usage(); break; } if (optind == argc) nWords = DFLT_NWORDS; else if ((nWords = getSize (argv[optind++])) <= 0) usage (); if (optind == argc) loops = DFLT_LOOPS; else if ((loops = getSize (argv[optind++])) < 0) usage (); if (optind == argc) incWords = DFLT_INCR; else if ((incWords = getSize (argv[optind++])) < 0) usage (); if (Verbose && me == 0) fprintf (stderr, "nWords(%d) loops(%d) incWords(%d)]\n", nWords, loops, incWords); for(l=0; l < loops; l++) { /* align 2**2 ... 2**23; 24 exceeds symetric heap max */ for(j=0,c=2; j < 23; j++,c<<=1) { target_sz = nWords * sizeof(DataType); if (!(target = (DataType *)shmem_align(c,target_sz))) { perror ("Failed target memory allocation"); exit (1); } if ( (unsigned long)target & (c-1) ) { fprintf(stdout,"PE%d Unaligned? ",me); fflush(stdout); fprintf(stdout,"align[%#09x]target %p\n", c, (void*)target); shmem_global_exit(1); } else if (Verbose > 1 && me == 0) fprintf(stdout,"align[%#09x]target %p\n", c, (void*)target); shmem_barrier_all(); shmem_free(target); } nWords += incWords; if (Verbose && me == 0) fprintf(stdout,"Fini loop %d\n",(l+1)); } shmem_finalize(); return 0; }
int main(int argc, char* argv[]) { int c, j, loops, k, l; int my_pe, nProcs, nWorkers; int nWords=1; int failures=0; char *prog_name; long *wp,work_sz; for(j=0; j < SHMEM_BARRIER_SYNC_SIZE; j++) { pSync0[j] = pSync1[j] = pSync2[j] = pSync3[j] = pSync4[j] = SHMEM_SYNC_VALUE; } shmem_init(); my_pe = shmem_my_pe(); nProcs = shmem_n_pes(); nWorkers = nProcs - 1; if (nProcs == 1) { Rfprintf(stderr, "ERR - Requires > 1 PEs\n"); shmem_finalize(); return 0; } for(j=0; j < nProcs; j++) if ( shmem_pe_accessible(j) != 1 ) { fprintf(stderr, "ERR - pe %d not accessible from pe %d\n", j, my_pe); } prog_name = strrchr(argv[0],'/'); if ( prog_name ) prog_name++; else prog_name = argv[0]; while((c=getopt(argc,argv,"hvM:s")) != -1) { switch(c) { case 's': Slow++; break; case 'v': Verbose++; break; case 'M': output_mod = atoi(optarg); if (output_mod <= 0) { Rfprintf(stderr, "ERR - output modulo arg out of " "bounds '%d'?\n", output_mod); shmem_finalize(); return 1; } Rfprintf(stderr,"%s: output modulo %d\n", prog_name,output_mod); break; case 'h': Rfprintf(stderr, "usage: %s {nWords-2-put(%d)K/M} {Loop-count(%d)K/M}\n", prog_name, DFLT_NWORDS, DFLT_LOOPS); shmem_finalize(); return 1; default: shmem_finalize(); return 1; } } if (optind == argc) nWords = DFLT_NWORDS; else { nWords = atoi_scaled(argv[optind++]); if (nWords <= 0) { Rfprintf(stderr, "ERR - Bad nWords arg '%d'?\n", nWords); shmem_finalize(); return 1; } } if (optind == argc) loops = DFLT_LOOPS; else { loops = atoi_scaled(argv[optind++]); if (loops <= 0 || loops > 1000000) { Rfprintf(stderr, "ERR - loops arg out of bounds '%d'?\n", loops); shmem_finalize(); return 1; } } work_sz = (nProcs*nWords) * sizeof(long); work = shmem_malloc( work_sz ); if ( !work ) { fprintf(stderr,"[%d] ERR - work = shmem_malloc(%ld) ?\n",my_pe,work_sz); shmem_global_exit(1); } Target = shmem_malloc( 2 * nWords * sizeof(long) ); if ( !Target ) { fprintf(stderr,"[%d] ERR - Target = shmem_malloc(%ld) ?\n", my_pe, (nWords * sizeof(long))); shmem_global_exit(1); } src = &Target[nWords]; #if _DEBUG Rprintf("%s: %d loops of %d longs per put\n",prog_name,loops,nWords); #endif for(j=0; j < nWords; j++) src[j] = VAL; for(j=0; j < loops; j++) { #if _DEBUG if ( Verbose && (j==0 || (j % output_mod) == 0) ) fprintf(stderr,"[%d] +(%d)\n", my_pe,j); #endif shmem_barrier(0, 0, nProcs, pSync0); if ( my_pe == 0 ) { int p; for(p=1; p < nProcs; p++) shmem_long_put(Target, src, nWords, p); } else { if (Slow) { /* wait for each put to complete */ for(k=0; k < nWords; k++) shmem_wait(&Target[k],my_pe); } else { /* wait for last word to be written */ shmem_wait(&Target[nWords-1],my_pe); } } #if _DEBUG if ( Verbose && (j==0 || (j % output_mod) == 0) ) fprintf(stderr,"[%d] -(%d)\n", shmem_my_pe(),j); #endif shmem_barrier(0, 0, nProcs, pSync1); RDprintf("Workers[1 ... %d] verify Target data put by proc0\n", nWorkers); /* workers verify put data is expected */ if ( my_pe != 0 ) { for(k=0; k < nWords; k++) { if (Target[k] != VAL) { fprintf(stderr, "[%d] Target[%d] %#lx " "!= %#x?\n", my_pe,k,Target[k],VAL); failures++; } assert(Target[k] == VAL); Target[k] = my_pe; } } else /* clear results buffer, workers will put here */ memset(work, 0, work_sz); shmem_barrier(0, 0, nProcs, pSync2); RDprintf("Workers[1 ... %d] put Target data to PE0 work " "vector\n",nWorkers); if ( my_pe != 0 ) { /* push nWords of val my_pe back to PE zero */ shmem_long_put(&work[my_pe * nWords], Target, nWords, 0); } else { /* wait for procs 1 ... nProcs to complete put()s */ for(l=1; l < nProcs; l++) { wp = &work[ l*nWords ]; // procs nWords chunk #if 1 /* wait for last long to be written from each PE */ shmem_wait(&wp[nWords-1],0); #else for(k=0; k < nWords; k++) shmem_wait(&wp[k],0); #endif } } shmem_barrier(0, 0, nProcs, pSync3); if ( my_pe == 0 ) { RDprintf("Loop(%d) PE0 verifing work data.\n",j); for(l=1; l < nProcs; l++) { wp = &work[ l*nWords ]; // procs nWords chunk for(k=0; k < nWords; k++) { if (wp[k] != l) { fprintf(stderr, "[0] PE(%d)_work[%d] %ld " "!= %d?\n", l,k,work[k],l); failures++; } assert(wp[k] == l); break; } if (failures) break; } } shmem_barrier(0, 0, nProcs, pSync4); #if _DEBUG if (loops > 1) { Rfprintf(stderr,"."); RDprintf("Loop(%d) Pass.\n",j); } #endif } shmem_free( work ); shmem_free( Target ); #if _DEBUG Rfprintf(stderr,"\n");fflush(stderr); shmem_barrier_all(); RDprintf("%d(%d) Exit(%d)\n", my_pe, nProcs, failures); #endif shmem_finalize(); return failures; }
int main(int argc, char* argv[]) { DataType source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; static DataType target[10]; static DataType pong=666; DataType *t2=NULL; int me, num_pes, pe, Verbose=0; if (argc > 1 && (strcmp(argv[1],"-v") == 0)) { Verbose++; } shmem_init(); me = shmem_my_pe(); num_pes = shmem_n_pes(); if (num_pes == 1) { printf("%s: Requires number of PEs > 1\n", argv[0]); shmem_finalize(); return 0; } t2 = shmem_malloc(10*sizeof(DataType)); if (!t2) { if (me==0) printf("shmem_malloc() failed?\n"); shmem_global_exit(1); } t2[9] = target[9] = 0xFF; shmem_barrier_all(); if (me == 0) { memset(target, 0, sizeof(target)); for(pe=1; pe < num_pes; pe++) SHM_PUT(target, target, 10, pe); for(pe=1; pe < num_pes; pe++) /* put 10 elements into target on PE 1 */ SHM_PUT(target, source, 10, pe); SHM_WAITU( &pong, SHMEM_CMP_GT, 666 ); Vprintf("PE[%d] pong now "PF"\n",me,pong); for(pe=1; pe < num_pes; pe++) /* put 1 element into t2 on PE 1 */ SHM_PUTP(&t2[9], 0xDD, pe); } else { /* wait for 10th element write of 'target' */ SHM_WAITU( &target[9], SHMEM_CMP_NE, 0xFF ); Vprintf("PE[%d] target[9] was 255 now "PF", success.\n",me,target[9]); SHM_WAITU( &target[9], SHMEM_CMP_EQ, 10 ); Vprintf("PE[%d] expected target[9] == 10 now "PF"\n",me,target[9]); if (me == 1) { if (Verbose) { DataType tmp = SHM_GETP( &pong, 0); printf("PE[%d] @ PE[0] pong == "PF", setting to 999\n",me,tmp); } SHM_PUTP( &pong, 999, 0); } SHM_WAITU( &t2[9], SHMEM_CMP_NE, 0xFF ); } //shmem_barrier_all(); /* sync sender and receiver */ if (me != 0) { if (memcmp(source, target, sizeof(DataType) * 10) != 0) { int i; fprintf(stderr,"[%d] Src & Target mismatch?\n",me); for (i = 0 ; i < 10 ; ++i) { printf(PF","PF" ", source[i], target[i]); } printf("\n"); shmem_global_exit(1); } } shmem_free(t2); if (Verbose) fprintf(stderr,"[%d] exit\n",shmem_my_pe()); shmem_finalize(); return 0; }
int main(int argc, char **argv) { int loops=DFLT_LOOPS; char *pgm; int *Target; int *Source; int i, me, npes; int target_PE; long bytes; double start_time, *total_time; shmem_init(); me = shmem_my_pe(); npes = shmem_n_pes(); if ((pgm=strrchr(argv[0],'/'))) pgm++; else pgm = argv[0]; while ((i = getopt (argc, argv, "hve:l:st")) != EOF) { switch (i) { case 'v': Verbose++; break; case 'e': if ((elements = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad elements count %d\n",elements); shmem_finalize(); return 1; } break; case 'l': if ((loops = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad loop count %d\n",loops); shmem_finalize(); return 1; } break; case 's': Sync++; break; case 't': Track++; break; case 'h': if (me == 0) usage(pgm); return 0; default: if (me == 0) { fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i); usage(pgm); } shmem_finalize(); return 1; } } for(i=0; i < SHMEM_REDUCE_SYNC_SIZE; i++) pSync[i] = SHMEM_SYNC_VALUE; target_PE = (me+1) % npes; total_time = (double *) shmem_malloc( npes * sizeof(double) ); if (!total_time) { fprintf(stderr,"ERR: bad total_time shmem_malloc(%ld)\n", (elements * sizeof(double))); shmem_global_exit(1); } for(i=0; i < npes; i++) total_time[i] = -1.0; Source = (int *) shmem_malloc( elements * sizeof(*Source) ); if (!Source) { fprintf(stderr,"ERR: bad Source shmem_malloc(%ld)\n", (elements * sizeof(*Target))); shmem_free(total_time); shmem_global_exit(1); } Target = (int *) shmem_malloc( elements * sizeof(*Target) ); if (!Target) { fprintf(stderr,"ERR: bad Target shmem_malloc(%ld)\n", (elements * sizeof(*Target))); shmem_free(Source); shmem_free(total_time); shmem_global_exit(1); } for (i = 0; i < elements; i++) { Target[i] = -90; Source[i] = i + 1; } bytes = loops * sizeof(int) * elements; if (Verbose && me==0) { fprintf(stderr, "%s: INFO - %d loops, put %d (int) elements to PE+1 Max put ??\n", pgm, loops, elements); } shmem_barrier_all(); for(i=0; i < loops; i++) { start_time = shmemx_wtime(); shmem_int_put(Target, Source, elements, target_PE); time_taken += (shmemx_wtime() - start_time); if (me==0) { if ( Track && i > 0 && ((i % 200) == 0)) fprintf(stderr,".%d",i); } if (Sync) shmem_barrier_all(); } // collect time per node. shmem_double_put( &total_time[me], &time_taken, 1, 0 ); shmem_double_sum_to_all(&sum_time, &time_taken, 1, 0, 0, npes, pWrk, pSync); shmem_barrier_all(); for (i = 0; i < elements; i++) { if (Target[i] != i + 1) { printf("%d: Error Target[%d] = %d, expected %d\n", me, i, Target[i], i + 1); shmem_global_exit(1); } } if ( Track && me == 0 ) fprintf(stderr,"\n"); if(Verbose && me == 0) { double rate, comp_time; if (Verbose > 1) fprintf(stdout,"Individule PE times: (seconds)\n"); for(i=0,comp_time=0.0; i < npes; i++) { comp_time += total_time[i]; if (Verbose > 1) fprintf(stdout," PE[%d] %8.6f\n",i,total_time[i]); } sum_time /= (double)npes; comp_time /= (double)npes; if (sum_time != comp_time) printf("%s: computed_time %7.5f != sum_to_all_time %7.5f)\n", pgm, comp_time, sum_time ); rate = ((double)bytes/(1024.0*1024.0)) / comp_time; printf("%s: shmem_int_put() %7.4f MB/sec (bytes %ld secs %7.4f)\n", pgm, rate, bytes, sum_time); } shmem_free(total_time); shmem_free(Target); shmem_free(Source); shmem_finalize(); return 0; }
int main(int argc, char* argv[]) { int c, j, cloop, loops = DFLT_LOOPS; int mpe, num_pes; int nWords=1; int nIncr=1; int failures=0; char *pgm; shmem_init(); mpe = shmem_my_pe(); num_pes = shmem_n_pes(); if (num_pes == 1) { Rfprintf(stderr, "ERR - Requires > 1 PEs\n"); shmem_finalize(); return 0; } pgm = strrchr(argv[0],'/'); if ( pgm ) pgm++; else pgm = argv[0]; while((c=getopt(argc,argv,"hqVvl:")) != -1) { switch(c) { case 'V': case 'v': Verbose++; break; case 'l': loops = atoi(optarg); break; case 'h': Rfprintf(stderr, "usage: %s {-l loopcnt(%d)} {numLongs(%d)} {loopIncr(%d)}\n", pgm,DFLT_LOOPS,DFLT_NWORDS,DFLT_INCR); shmem_finalize(); return 1; default: shmem_finalize(); return 1; } } if (optind == argc) nWords = DFLT_NWORDS; else { nWords = atoi_scaled(argv[optind++]); if (nWords <= 0) { Rfprintf(stderr, "ERR - Bad nBytes arg?\n"); shmem_finalize(); return 1; } } if (optind == argc) nIncr = DFLT_INCR; else { loops = atoi(argv[optind++]); if (nIncr <= 0 ) { Rfprintf(stderr, "ERR - incLongs arg out of bounds '%d'?\n", nIncr); shmem_finalize(); return 1; } } if ( nWords % 8 ) { // integral multiple of longs Rprintf("%s: nWords(%d) not a multiple of %ld?\n", pgm,nWords,sizeof(long)); shmem_finalize(); return 1; } for (c = 0; c < _SHMEM_COLLECT_SYNC_SIZE;c++) pSync[c] = _SHMEM_SYNC_VALUE; if (Verbose && mpe == 0) fprintf(stderr,"loops(%d) nWords(%d) incr-per-loop(%d)\n", loops,nWords,nIncr); for(cloop=1; cloop <= loops; cloop++) { c = (sizeof(long)*nWords) * (num_pes + 1); // src + dst allocation. //nWords /= sizeof(long); // convert input of bytes --> longs. src = (long*)shmem_malloc(c); if ( !src ) { Rprintf("[%d] %s: shmem_malloc(%d) failed?\n", mpe, pgm,c); shmem_global_exit(1); } dst = &src[nWords]; for(j=0; j < nWords; j++) src[j] = (long) (j + mpe*nWords); shmem_barrier_all(); shmem_fcollect64(dst,src,nWords,0,0,num_pes,pSync); // Expect dst to be consecuative integers 0 ... (nLongs*num_pes)-1 for(j=0; j < (nWords*num_pes); j++) { if ( dst[j] != (long) j ) { fprintf(stderr, "[%d] dst[%d] %ld != expected %d\n",mpe,j,dst[j],j); shmem_global_exit(1); } } shmem_barrier_all(); if (Verbose && mpe == 0 && loops > 1) { fprintf(stderr,"."); } nWords += nIncr; } if (Verbose && mpe == 0) { fprintf(stderr,"\n");fflush(stderr); } shmem_free( (void*)src ); shmem_barrier_all(); if (Verbose) printf("%d(%d) Exit(%d)\n", mpe, num_pes, failures); shmem_finalize(); return failures; }
int main(int argc, char **argv) { int loops=DFLT_LOOPS; char *pgm; int *Target; int *Source; int i, me, npes; int target_pe; long bytes; double time_taken=0.0, start_time; shmem_init(); me = shmem_my_pe(); npes = shmem_n_pes(); if ((pgm=strrchr(argv[0],'/'))) pgm++; else pgm = argv[0]; while ((i = getopt (argc, argv, "hve:l:st")) != EOF) { switch (i) { case 'v': Verbose++; break; case 'e': if ((elements = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad elements count %d\n",elements); shmem_finalize(); return 1; } break; case 'l': if ((loops = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad loop count %d\n",loops); shmem_finalize(); return 1; } break; case 's': Sync++; break; case 't': Track++; break; case 'h': if (me == 0) usage(pgm); return 0; default: if (me == 0) { fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i); usage(pgm); } shmem_finalize(); return 1; } } target_pe = (me+1) % npes; total_time = (double *) shmem_malloc( npes * sizeof(double) ); if (!total_time) { fprintf(stderr,"ERR: bad total_time shmem_malloc(%ld)\n", (elements * sizeof(double))); shmem_global_exit(1); } Source = (int *) shmem_malloc( elements * sizeof(*Source) ); if (!Source) { fprintf(stderr,"ERR: bad Source shmem_malloc(%ld)\n", (elements * sizeof(*Target))); shmem_free(total_time); shmem_global_exit(1); } Target = (int *) shmem_malloc( elements * sizeof(*Target) ); if (!Target) { fprintf(stderr,"ERR: bad Target shmem_malloc(%ld)\n", (elements * sizeof(*Target))); shmem_free(Source); shmem_free(total_time); shmem_global_exit(1); } for (i = 0; i < elements; i++) { Target[i] = -90; Source[i] = i + 1; } bytes = loops * sizeof(int) * elements; if (Verbose && me==0) fprintf(stderr, "%s: INFO - %d loops, get %d (int) elements from PE+1\n", pgm, loops, elements); shmem_barrier_all(); for(i=0; i < loops; i++) { start_time = shmemx_wtime(); shmem_int_get( Target, Source, elements, target_pe ); time_taken += shmemx_wtime() - start_time; if (me==0) { if ( Track && i > 0 && ((i % 200) == 0)) fprintf(stderr,".%d",i); } if (Sync) shmem_barrier_all(); } // collect time per node elapsed time. shmem_double_put( &total_time[me], &time_taken, 1, 0 ); shmem_barrier_all(); for (i = 0; i < elements; i++) { if (Target[i] != i + 1) { printf("%d: Error Target[%d] = %d, expected %d\n", me, i, Target[i], i + 1); shmem_global_exit(1); } } if ( Track && me == 0 ) fprintf(stderr,"\n"); if (Verbose && me == 0) { double rate,secs; // average time for(i=0,secs=0.0; i < npes; i++) secs += total_time[i]; secs /= (double)npes; rate = ((double)bytes/(1024.0*1024.0)) / secs; printf("%s: ave %5.3f MB/sec (bytes %ld secs %5.3f)\n", pgm, rate, bytes, secs); } shmem_free(total_time); shmem_free(Target); shmem_free(Source); shmem_finalize(); return 0; }
int main( int argc, char *argv[]) { int rc=0, my_pe, npes, neighbor; int loops=LOOPS; int j; size_t data_sz=sizeof(long) * 3; double start_time; long *data, lval=0; if (argc > 1) loops = atoi(argv[1]); shmem_init(); my_pe = shmem_my_pe(); npes = shmem_n_pes(); data = shmem_malloc(data_sz); if (!data) { fprintf(stderr,"[%d] shmem_malloc(%ld) failure? %d\n", my_pe,data_sz,errno); shmem_global_exit(1); } memset((void*)data,0,data_sz); shmem_barrier_all(); neighbor = (my_pe + 1) % npes; start_time = shmemx_wtime(); for(j=0,elapsed=0.0; j < loops; j++) { start_time = shmemx_wtime(); lval = shmem_long_finc( (void*)&data[1], neighbor ); elapsed += shmemx_wtime() - start_time; if (lval != (long) j) { fprintf(stderr,"[%d] Test: FAIL previous val %ld != %d Exit.\n", my_pe, lval, j); shmem_global_exit(1); } } shmem_barrier_all(); rc = 0; if (data[1] != (long)loops) { fprintf(stderr,"[%d] finc neighbot: FAIL data[1](%p) %ld != %d Exit.\n", my_pe, (void*)&data[1], data[1], loops); rc--; } /* check if adjancent memory locations distrubed */ assert(data[0] == 0); assert(data[2] == 0); if (my_pe == 0 ) { if (rc == 0 && Verbose) fprintf(stderr,"[%d] finc neighbor: PASSED.\n",my_pe); fprintf(stderr,"[%d] %d loops of shmem_long_finc() in %6.4f secs\n" " %2.6f usecs per shmem_long_finc()\n", my_pe,loops,elapsed,((elapsed*100000.0)/(double)loops)); } shmem_free(data); shmem_finalize(); return rc; }
int main(int argc, char* argv[]) { int i, Verbose=0; int mpe, num_pes, loops=10, cloop; char *pgm; long *dst, *src; int nBytes = START_BCAST_SIZE; int nLongs=0; shmem_init(); mpe = shmem_my_pe(); num_pes = shmem_n_pes(); if (num_pes == 1) { printf("%s: Requires number of PEs > 1\n", argv[0]); shmem_finalize(); return 0; } if (sizeof(long) != 8) { printf("Test assumes 64-bit long (%zd)\n", sizeof(long)); shmem_global_exit(1); return 0; } if ((pgm=strrchr(argv[0],'/'))) { pgm++; } else { pgm = argv[0]; } if (argc > 1) { if (strncmp(argv[1],"-v",3) == 0) { Verbose=1; } else if (strncmp(argv[1],"-h",3) == 0) { fprintf(stderr,"usage: %s {-v(verbose)|h(help)}\n",pgm); shmem_finalize(); exit(1); } } for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i += 1) { pSync[i] = SHMEM_SYNC_VALUE; } if ( mpe == 0 && Verbose ) { fprintf(stderr,"%d loops\n",loops); } for(cloop=1; cloop <= loops; cloop++) { nLongs = nBytes / sizeof(long); dst = (long *)shmem_malloc(nBytes*2); if ( !dst ) { fprintf(stderr,"[%d] shmem_malloc(%d) failed %s\n", mpe,nBytes,strerror(errno)); return 0; } memset( (void*)dst, 0, nBytes ); src = &dst[nLongs]; for (i = 1; i < nLongs; i++) { src[i] = i+1; } shmem_barrier_all(); shmem_broadcast64(dst, src, nLongs, 1, 0, 0, num_pes, pSync); for(i=0; i < nLongs; i++) { /* the root node shouldn't have the result into dst (cf specification).*/ if (1 != mpe && dst[i] != src[i]) { fprintf(stderr,"[%d] dst[%d] %ld != expected %ld\n", mpe, i, dst[i],src[i]); shmem_global_exit(1); } else if (1 == mpe && dst[i] != 0) { fprintf(stderr,"[%d] dst[%d] %ld != expected 0\n", mpe, i, dst[i]); shmem_global_exit(1); } } shmem_barrier_all(); shmem_free (dst); if (Verbose && mpe ==0) fprintf(stderr,"loop %2d Bcast %d, Done.\n",cloop,nBytes); nBytes += BCAST_INCR; } shmem_finalize(); return 0; }