int main(int argc, char* argv[]) { int c, j, loops, k, l; int my_pe, nProcs, nWorkers; int nWords=1; int failures=0; char *prog_name; long *wp,work_sz; for(j=0; j < SHMEM_BARRIER_SYNC_SIZE; j++) { pSync0[j] = pSync1[j] = pSync2[j] = pSync3[j] = pSync4[j] = SHMEM_SYNC_VALUE; } shmem_init(); my_pe = shmem_my_pe(); nProcs = shmem_n_pes(); nWorkers = nProcs - 1; if (nProcs == 1) { Rfprintf(stderr, "ERR - Requires > 1 PEs\n"); shmem_finalize(); return 0; } for(j=0; j < nProcs; j++) if ( shmem_pe_accessible(j) != 1 ) { fprintf(stderr, "ERR - pe %d not accessible from pe %d\n", j, my_pe); } prog_name = strrchr(argv[0],'/'); if ( prog_name ) prog_name++; else prog_name = argv[0]; while((c=getopt(argc,argv,"hvM:s")) != -1) { switch(c) { case 's': Slow++; break; case 'v': Verbose++; break; case 'M': output_mod = atoi(optarg); if (output_mod <= 0) { Rfprintf(stderr, "ERR - output modulo arg out of " "bounds '%d'?\n", output_mod); shmem_finalize(); return 1; } Rfprintf(stderr,"%s: output modulo %d\n", prog_name,output_mod); break; case 'h': Rfprintf(stderr, "usage: %s {nWords-2-put(%d)K/M} {Loop-count(%d)K/M}\n", prog_name, DFLT_NWORDS, DFLT_LOOPS); shmem_finalize(); return 1; default: shmem_finalize(); return 1; } } if (optind == argc) nWords = DFLT_NWORDS; else { nWords = atoi_scaled(argv[optind++]); if (nWords <= 0) { Rfprintf(stderr, "ERR - Bad nWords arg '%d'?\n", nWords); shmem_finalize(); return 1; } } if (optind == argc) loops = DFLT_LOOPS; else { loops = atoi_scaled(argv[optind++]); if (loops <= 0 || loops > 1000000) { Rfprintf(stderr, "ERR - loops arg out of bounds '%d'?\n", loops); shmem_finalize(); return 1; } } work_sz = (nProcs*nWords) * sizeof(long); work = shmem_malloc( work_sz ); if ( !work ) { fprintf(stderr,"[%d] ERR - work = shmem_malloc(%ld) ?\n",my_pe,work_sz); shmem_global_exit(1); } Target = shmem_malloc( 2 * nWords * sizeof(long) ); if ( !Target ) { fprintf(stderr,"[%d] ERR - Target = shmem_malloc(%ld) ?\n", my_pe, (nWords * sizeof(long))); shmem_global_exit(1); } src = &Target[nWords]; #if _DEBUG Rprintf("%s: %d loops of %d longs per put\n",prog_name,loops,nWords); #endif for(j=0; j < nWords; j++) src[j] = VAL; for(j=0; j < loops; j++) { #if _DEBUG if ( Verbose && (j==0 || (j % output_mod) == 0) ) fprintf(stderr,"[%d] +(%d)\n", my_pe,j); #endif shmem_barrier(0, 0, nProcs, pSync0); if ( my_pe == 0 ) { int p; for(p=1; p < nProcs; p++) shmem_long_put(Target, src, nWords, p); } else { if (Slow) { /* wait for each put to complete */ for(k=0; k < nWords; k++) shmem_wait(&Target[k],my_pe); } else { /* wait for last word to be written */ shmem_wait(&Target[nWords-1],my_pe); } } #if _DEBUG if ( Verbose && (j==0 || (j % output_mod) == 0) ) fprintf(stderr,"[%d] -(%d)\n", shmem_my_pe(),j); #endif shmem_barrier(0, 0, nProcs, pSync1); RDprintf("Workers[1 ... %d] verify Target data put by proc0\n", nWorkers); /* workers verify put data is expected */ if ( my_pe != 0 ) { for(k=0; k < nWords; k++) { if (Target[k] != VAL) { fprintf(stderr, "[%d] Target[%d] %#lx " "!= %#x?\n", my_pe,k,Target[k],VAL); failures++; } assert(Target[k] == VAL); Target[k] = my_pe; } } else /* clear results buffer, workers will put here */ memset(work, 0, work_sz); shmem_barrier(0, 0, nProcs, pSync2); RDprintf("Workers[1 ... %d] put Target data to PE0 work " "vector\n",nWorkers); if ( my_pe != 0 ) { /* push nWords of val my_pe back to PE zero */ shmem_long_put(&work[my_pe * nWords], Target, nWords, 0); } else { /* wait for procs 1 ... nProcs to complete put()s */ for(l=1; l < nProcs; l++) { wp = &work[ l*nWords ]; // procs nWords chunk #if 1 /* wait for last long to be written from each PE */ shmem_wait(&wp[nWords-1],0); #else for(k=0; k < nWords; k++) shmem_wait(&wp[k],0); #endif } } shmem_barrier(0, 0, nProcs, pSync3); if ( my_pe == 0 ) { RDprintf("Loop(%d) PE0 verifing work data.\n",j); for(l=1; l < nProcs; l++) { wp = &work[ l*nWords ]; // procs nWords chunk for(k=0; k < nWords; k++) { if (wp[k] != l) { fprintf(stderr, "[0] PE(%d)_work[%d] %ld " "!= %d?\n", l,k,work[k],l); failures++; } assert(wp[k] == l); break; } if (failures) break; } } shmem_barrier(0, 0, nProcs, pSync4); #if _DEBUG if (loops > 1) { Rfprintf(stderr,"."); RDprintf("Loop(%d) Pass.\n",j); } #endif } shmem_free( work ); shmem_free( Target ); #if _DEBUG Rfprintf(stderr,"\n");fflush(stderr); shmem_barrier_all(); RDprintf("%d(%d) Exit(%d)\n", my_pe, nProcs, failures); #endif shmem_finalize(); return failures; }
int main(int argc, char **argv) { int loops=DFLT_LOOPS; char *pgm; int *Target; int *Source; int i, me, npes; int target_PE; long bytes; double start_time, *total_time; shmem_init(); me = shmem_my_pe(); npes = shmem_n_pes(); if ((pgm=strrchr(argv[0],'/'))) pgm++; else pgm = argv[0]; while ((i = getopt (argc, argv, "hve:l:st")) != EOF) { switch (i) { case 'v': Verbose++; break; case 'e': if ((elements = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad elements count %d\n",elements); shmem_finalize(); return 1; } break; case 'l': if ((loops = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad loop count %d\n",loops); shmem_finalize(); return 1; } break; case 's': Sync++; break; case 't': Track++; break; case 'h': if (me == 0) usage(pgm); return 0; default: if (me == 0) { fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i); usage(pgm); } shmem_finalize(); return 1; } } for(i=0; i < SHMEM_REDUCE_SYNC_SIZE; i++) pSync[i] = SHMEM_SYNC_VALUE; target_PE = (me+1) % npes; total_time = (double *) shmem_malloc( npes * sizeof(double) ); if (!total_time) { fprintf(stderr,"ERR: bad total_time shmem_malloc(%ld)\n", (elements * sizeof(double))); shmem_global_exit(1); } for(i=0; i < npes; i++) total_time[i] = -1.0; Source = (int *) shmem_malloc( elements * sizeof(*Source) ); if (!Source) { fprintf(stderr,"ERR: bad Source shmem_malloc(%ld)\n", (elements * sizeof(*Target))); shmem_free(total_time); shmem_global_exit(1); } Target = (int *) shmem_malloc( elements * sizeof(*Target) ); if (!Target) { fprintf(stderr,"ERR: bad Target shmem_malloc(%ld)\n", (elements * sizeof(*Target))); shmem_free(Source); shmem_free(total_time); shmem_global_exit(1); } for (i = 0; i < elements; i++) { Target[i] = -90; Source[i] = i + 1; } bytes = loops * sizeof(int) * elements; if (Verbose && me==0) { fprintf(stderr, "%s: INFO - %d loops, put %d (int) elements to PE+1 Max put ??\n", pgm, loops, elements); } shmem_barrier_all(); for(i=0; i < loops; i++) { start_time = shmemx_wtime(); shmem_int_put(Target, Source, elements, target_PE); time_taken += (shmemx_wtime() - start_time); if (me==0) { if ( Track && i > 0 && ((i % 200) == 0)) fprintf(stderr,".%d",i); } if (Sync) shmem_barrier_all(); } // collect time per node. shmem_double_put( &total_time[me], &time_taken, 1, 0 ); shmem_double_sum_to_all(&sum_time, &time_taken, 1, 0, 0, npes, pWrk, pSync); shmem_barrier_all(); for (i = 0; i < elements; i++) { if (Target[i] != i + 1) { printf("%d: Error Target[%d] = %d, expected %d\n", me, i, Target[i], i + 1); shmem_global_exit(1); } } if ( Track && me == 0 ) fprintf(stderr,"\n"); if(Verbose && me == 0) { double rate, comp_time; if (Verbose > 1) fprintf(stdout,"Individule PE times: (seconds)\n"); for(i=0,comp_time=0.0; i < npes; i++) { comp_time += total_time[i]; if (Verbose > 1) fprintf(stdout," PE[%d] %8.6f\n",i,total_time[i]); } sum_time /= (double)npes; comp_time /= (double)npes; if (sum_time != comp_time) printf("%s: computed_time %7.5f != sum_to_all_time %7.5f)\n", pgm, comp_time, sum_time ); rate = ((double)bytes/(1024.0*1024.0)) / comp_time; printf("%s: shmem_int_put() %7.4f MB/sec (bytes %ld secs %7.4f)\n", pgm, rate, bytes, sum_time); } shmem_free(total_time); shmem_free(Target); shmem_free(Source); shmem_finalize(); return 0; }
int main(int argc, char* argv[]) { int c, j, cloop, loops = DFLT_LOOPS; int mpe, num_pes; int nWords=1; int nIncr=1; int failures=0; char *pgm; start_pes(0); mpe = _my_pe(); num_pes = _num_pes(); if (num_pes == 1) { Rfprintf(stderr, "ERR - Requires > 1 PEs\n"); return 1; } pgm = strrchr(argv[0],'/'); if ( pgm ) pgm++; else pgm = argv[0]; while((c=getopt(argc,argv,"hqVvl:")) != -1) { switch(c) { case 'V': case 'v': Verbose++; break; case 'l': loops = atoi(optarg); break; case 'h': Rfprintf(stderr, "usage: %s {-l loopcnt(%d)} {numLongs(%d)} {loopIncr(%d)}\n", pgm,DFLT_LOOPS,DFLT_NWORDS,DFLT_INCR); return 1; default: return 1; } } if (optind == argc) nWords = DFLT_NWORDS; else { nWords = atoi_scaled(argv[optind++]); if (nWords <= 0) { Rfprintf(stderr, "ERR - Bad nBytes arg?\n"); return 1; } } if (optind == argc) nIncr = DFLT_INCR; else { loops = atoi(argv[optind++]); if (nIncr <= 0 ) { Rfprintf(stderr, "ERR - incLongs arg out of bounds '%d'?\n", nIncr); return 1; } } if ( nWords % 8 ) { // integral multiple of longs Rprintf("%s: nWords(%d) not a multiple of %ld?\n", pgm,nWords,sizeof(long)); return 1; } for (c = 0; c < _SHMEM_COLLECT_SYNC_SIZE;c++) pSync[c] = _SHMEM_SYNC_VALUE; if (Verbose && mpe == 0) fprintf(stderr,"loops(%d) nWords(%d) incr-per-loop(%d)\n", loops,nWords,nIncr); for(cloop=1; cloop <= loops; cloop++) { c = (sizeof(long)*nWords) * (num_pes + 1); // src + dst allocation. //nWords /= sizeof(long); // convert input of bytes --> longs. src = (long*)shmalloc(c); if ( !src ) { Rprintf("[%d] %s: shmalloc(%d) failed?\n", mpe, pgm,c); return 0; } dst = &src[nWords]; for(j=0; j < nWords; j++) src[j] = (long) (j + mpe*nWords); shmem_barrier_all(); shmem_fcollect64(dst,src,nWords,0,0,num_pes,pSync); // Expect dst to be consecuative integers 0 ... (nLongs*num_pes)-1 for(j=0; j < (nWords*num_pes); j++) { if ( dst[j] != (long) j ) { fprintf(stderr, "[%d] dst[%d] %ld != expected %d\n",mpe,j,dst[j],j); return 1; } } shmem_barrier_all(); if (Verbose && mpe == 0 && loops > 1) { fprintf(stderr,"."); } nWords += nIncr; } if (Verbose && mpe == 0) { fprintf(stderr,"\n");fflush(stderr); } shfree( (void*)src ); shmem_barrier_all(); if (Verbose) printf("%d(%d) Exit(%d)\n", mpe, num_pes, failures); return failures; }
int main(int argc, char **argv) { int i,ps,ps_cnt=2; int *target; int *source; int me, npes, elements=N_ELEMENTS, loops=DFLT_LOOPS; char *pgm; double start_time, time_taken; start_pes(0); me = _my_pe(); npes = _num_pes(); if ((pgm=strrchr(argv[0],'/'))) pgm++; else pgm = argv[0]; while ((i = getopt (argc, argv, "hve:l:p:s")) != EOF) { switch (i) { case 'v': Verbose++; break; case 'e': if ((elements = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad elements count %d\n",elements); return 1; } break; case 'l': if ((loops = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad loop count %d\n",loops); return 1; } break; case 'p': if ((ps_cnt = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad pSync[] elements %d\n",loops); return 1; } break; case 's': Serialize++; break; case 'h': if (me == 0) usage(pgm); return 0; default: if (me == 0) { fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i); usage(pgm); } return 1; } } ps_cnt *= _SHMEM_BCAST_SYNC_SIZE; pSync = shmalloc( ps_cnt * sizeof(long) ); for (i = 0; i < ps_cnt; i++) pSync[i] = _SHMEM_SYNC_VALUE; source = (int *) shmalloc( elements * sizeof(*source) ); target = (int *) shmalloc( elements * sizeof(*target) ); for (i = 0; i < elements; i += 1) { source[i] = i + 1; target[i] = -90; } if (me==0 && Verbose) fprintf(stderr,"ps_cnt %d loops %d nElems %d\n", ps_cnt,loops,elements); shmem_barrier_all(); for(time_taken = 0.0, ps = i = 0; i < loops; i++) { start_time = shmem_wtime(); shmem_broadcast32(target, source, elements, 0, 0, 0, npes, &pSync[ps]); if (Serialize) shmem_barrier_all(); time_taken += (shmem_wtime() - start_time); if (ps_cnt > 1 ) { ps += _SHMEM_BCAST_SYNC_SIZE; if ( ps >= ps_cnt ) ps = 0; } } if(me == 0 && Verbose) { printf("%d loops of Broadcast32(%ld bytes) over %d PEs: %7.3f secs\n", loops, (elements*sizeof(*source)), npes, time_taken); elements = (elements * loops * sizeof(*source)) / (1024*1024); printf(" %7.5f secs per broadcast() @ %7.4f MB/sec\n", (time_taken/(double)loops), ((double)elements / time_taken) ); } if (Verbose > 1) fprintf(stderr,"[%d] pre B1\n",me); shmem_barrier_all(); if (Verbose > 1) fprintf(stderr,"[%d] post B1\n",me); shfree(pSync); shfree(target); shfree(source); return 0; }
int main(int argc, char **argv) { int loops=DFLT_LOOPS; char *pgm; int *Target; int *Source; int i, me, npes; int target_pe; long bytes; double time_taken=0.0, start_time; shmem_init(); me = shmem_my_pe(); npes = shmem_n_pes(); if ((pgm=strrchr(argv[0],'/'))) pgm++; else pgm = argv[0]; while ((i = getopt (argc, argv, "hve:l:st")) != EOF) { switch (i) { case 'v': Verbose++; break; case 'e': if ((elements = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad elements count %d\n",elements); shmem_finalize(); return 1; } break; case 'l': if ((loops = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad loop count %d\n",loops); shmem_finalize(); return 1; } break; case 's': Sync++; break; case 't': Track++; break; case 'h': if (me == 0) usage(pgm); return 0; default: if (me == 0) { fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i); usage(pgm); } shmem_finalize(); return 1; } } target_pe = (me+1) % npes; total_time = (double *) shmem_malloc( npes * sizeof(double) ); if (!total_time) { fprintf(stderr,"ERR: bad total_time shmem_malloc(%ld)\n", (elements * sizeof(double))); shmem_global_exit(1); } Source = (int *) shmem_malloc( elements * sizeof(*Source) ); if (!Source) { fprintf(stderr,"ERR: bad Source shmem_malloc(%ld)\n", (elements * sizeof(*Target))); shmem_free(total_time); shmem_global_exit(1); } Target = (int *) shmem_malloc( elements * sizeof(*Target) ); if (!Target) { fprintf(stderr,"ERR: bad Target shmem_malloc(%ld)\n", (elements * sizeof(*Target))); shmem_free(Source); shmem_free(total_time); shmem_global_exit(1); } for (i = 0; i < elements; i++) { Target[i] = -90; Source[i] = i + 1; } bytes = loops * sizeof(int) * elements; if (Verbose && me==0) fprintf(stderr, "%s: INFO - %d loops, get %d (int) elements from PE+1\n", pgm, loops, elements); shmem_barrier_all(); for(i=0; i < loops; i++) { start_time = shmemx_wtime(); shmem_int_get( Target, Source, elements, target_pe ); time_taken += shmemx_wtime() - start_time; if (me==0) { if ( Track && i > 0 && ((i % 200) == 0)) fprintf(stderr,".%d",i); } if (Sync) shmem_barrier_all(); } // collect time per node elapsed time. shmem_double_put( &total_time[me], &time_taken, 1, 0 ); shmem_barrier_all(); for (i = 0; i < elements; i++) { if (Target[i] != i + 1) { printf("%d: Error Target[%d] = %d, expected %d\n", me, i, Target[i], i + 1); shmem_global_exit(1); } } if ( Track && me == 0 ) fprintf(stderr,"\n"); if (Verbose && me == 0) { double rate,secs; // average time for(i=0,secs=0.0; i < npes; i++) secs += total_time[i]; secs /= (double)npes; rate = ((double)bytes/(1024.0*1024.0)) / secs; printf("%s: ave %5.3f MB/sec (bytes %ld secs %5.3f)\n", pgm, rate, bytes, secs); } shmem_free(total_time); shmem_free(Target); shmem_free(Source); shmem_finalize(); return 0; }
int main(int argc, char **argv) { int i; int *target; int *source; int me, npes, elements=N_ELEMENTS, loops=DFLT_LOOPS; char *pgm; shmem_init(); me = shmem_my_pe(); npes = shmem_n_pes(); if ((pgm=strrchr(argv[0],'/'))) pgm++; else pgm = argv[0]; /* lower-case switch enable only a specific test; otherwise run all tests */ while ((i = getopt (argc, argv, "hvqe:l:abcmn")) != EOF) { switch (i) { case 'a': All2++; break; case 'b': Bcast++; break; case 'c': Collect++; break; case 'm': Many++; break; case 'n': Neighbor++; break; case 'q': Verbose=0; break; case 'v': Verbose++; break; case 'e': if ((elements = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad elements count %d\n",elements); shmem_finalize(); return 1; } break; case 'l': if ((loops = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad loop count %d\n",loops); shmem_finalize(); return 1; } break; case 'h': if (me == 0) usage(pgm); shmem_finalize(); return 0; default: if (me == 0) { fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i); usage(pgm); } shmem_finalize(); return 1; } } if (All2==0 && Bcast==0 && Collect==0 && Many==0 && Neighbor==0) All2 = Bcast = Collect = Many = Neighbor = 1; source = (int *) shmem_malloc( elements * sizeof(*source) ); target = (int *) shmem_malloc( elements * sizeof(*target) ); for (i = 0; i < elements; i += 1) { source[i] = i + 1; target[i] = -90; } shmem_barrier_all(); if (Neighbor) { neighbor_put( target, source, elements, me, npes, loops ); neighbor_get( target, source, elements, me, npes, loops ); } if (All2) { all2all_put( target, source, elements, me, npes, loops ); all2all_get( target, source, elements, me, npes, loops ); } if (Many) { one2many_put( target, source, elements, me, npes, loops ); many2one_get( target, source, elements, me, npes, loops ); } if (Bcast) bcast( target, source, elements, me, npes, loops ); if (Collect) { collect( NULL, source, elements, me, npes, loops ); fcollect( NULL, source, elements, me, npes, loops ); } shmem_barrier_all(); shmem_free(target); shmem_free(source); shmem_finalize(); return 0; }