int main (void) { int me, npes; long *dest; { time_t now; time (&now); srand (now + getpid ()); } start_pes (0); me = shmem_my_pe (); npes = shmem_n_pes (); dest = (long *) shmalloc (sizeof (*dest)); *dest = 9L; shmem_barrier_all (); if (me == 0) { int i; for (i = 0; i < 4; i += 1) { long src = 9L; shmem_long_put (dest, &src, 1, 1); fprintf (stderr, "PE %d put %d\n", me, src); } fprintf (stderr, "----------------------------\n"); for (i = 0; i < 1000; i += 1) { long src = rand () % 10; shmem_long_put (dest, &src, 1, 1); fprintf (stderr, "PE %d put %d\n", me, src); if (src != 9L) break; } } shmem_barrier_all (); if (me == 1) { shmem_long_wait (dest, 9L); fprintf (stderr, "PE %d finished wait, got %d\n", me, *dest); } shmem_barrier_all (); return 0; }
void* roundrobin(void* tparam) { ptrdiff_t tid = (ptrdiff_t)tparam; int offset = tid*N_ELEMS; /* fprintf(stderr,"Starting thread %lu with offset %d\n",tid,offset); */ int nextpe = (shmem_my_pe()+1)%shmem_n_pes(); int prevpe = (shmem_my_pe()-1 + shmem_n_pes())%shmem_n_pes(); shmem_long_put(target+offset, source+offset, N_ELEMS, nextpe); /* fprintf(stderr,"Thread %lu done first put\n",tid); */ pthread_barrier_wait(&fencebar); if(tid == 0) shmem_barrier_all(); pthread_barrier_wait(&fencebar); shmem_long_get(source+offset, target+offset, N_ELEMS, prevpe); /* fprintf(stderr,"Thread %lu done first get\n",tid); */ pthread_barrier_wait(&fencebar); if(tid == 0) shmem_barrier_all(); pthread_barrier_wait(&fencebar); shmem_long_get(target+offset, source+offset, N_ELEMS, nextpe); /* fprintf(stderr,"Thread %lu done second get\n",tid); */ pthread_barrier_wait(&fencebar); if(tid == 0) shmem_barrier_all(); pthread_barrier_wait(&fencebar); /* fprintf(stderr,"Done thread %lu\n",tid); */ return 0; }
void benchmark (struct pe_vars v, long * msg_buffer) { static double pwrk[_SHMEM_REDUCE_SYNC_SIZE]; static long psync[_SHMEM_BCAST_SYNC_SIZE]; static double mr, mr_sum; unsigned long size, i; memset(psync, _SHMEM_SYNC_VALUE, sizeof(long[_SHMEM_BCAST_SYNC_SIZE])); /* * Warmup */ if (v.me < v.pairs) { //for (i = 0; i < (ITERS_LARGE * MAX_MSG_SZ); i += MAX_MSG_SZ) { for (i = 0; i < ITERS_LARGE; i += 1) { //shmem_putmem(&msg_buffer[i], &msg_buffer[i], MAX_MSG_SZ, v.nxtpe); shmem_long_put(&msg_buffer[i], &msg_buffer[i], MAX_MSG_SZ, v.nxtpe); } } shmem_barrier_all(); /* * Benchmark */ for (size = 1; size <= MAX_MSG_SZ; size <<= 1) { i = size < LARGE_THRESHOLD ? ITERS_SMALL : ITERS_LARGE; mr = message_rate(v, msg_buffer, size, i); shmem_double_sum_to_all(&mr_sum, &mr, 1, 0, 0, v.npes, pwrk, psync); print_message_rate(v.me, size, mr_sum); } }
double message_rate (struct pe_vars v, long * buffer, int size, int iterations) { int64_t begin, end; int i, offset; /* * Touch memory */ memset(buffer, size, sizeof(long) * MAX_MSG_SZ * ITERS_LARGE); shmem_barrier_all(); if (v.me < v.pairs) { begin = TIME(); for (i = 0, offset = 0; i < iterations; i++, offset += size) { //shmem_putmem(&buffer[offset], &buffer[offset], size, v.nxtpe); shmem_long_put(&buffer[offset], &buffer[offset], size, v.nxtpe); } shmem_fence(v.nxtpe); //shmem_quiet(); end = TIME(); return ((double)iterations * 1e6) / ((double)end - (double)begin); } return 0; }
int main(void) { long source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; int src = 99; shmem_init(); if (shmem_my_pe() == 0) { shmem_long_put(dest, source, 10, 1); /*put1*/ shmem_long_put(dest, source, 10, 2); /*put2*/ shmem_fence(); shmem_int_put(&targ, &src, 1, 1); /*put3*/ shmem_int_put(&targ, &src, 1, 2); /*put4*/ } shmem_barrier_all(); /* sync sender and receiver */ printf("dest[0] on PE %d is %ld\n", shmem_my_pe(), dest[0]); return 1; }
int main (int argc, char **argv) { long src; long *dest; int me, npes; struct utsname u; int su; su = uname (&u); assert (su == 0); shmem_init (); me = shmem_my_pe (); npes = shmem_n_pes (); { time_t now; time (&now); srand (now + getpid ()); } src = rand () % 1000; dest = (long *) shmem_malloc (sizeof (*dest)); *dest = -1; shmem_barrier_all (); if (me == 0) { int other_pe = rand () % npes; printf ("%d: -> %d, sending value %ld\n", me, other_pe, src); shmem_long_put (dest, &src, 1, other_pe); } shmem_barrier_all (); printf ("Result: %d @ %s: %ld\n", me, u.nodename, *dest); shmem_finalize (); return 0; }
void FORTRANIFY (shmem_put) (long *target, const long *source, int *size, int *pe) { shmem_long_put (target, source, *size, *pe); }
int main(int argc, char **argv) { int i,j; long modj,oldj,oldxmodj,newcount; int my_pe,n_pes; size_t max_elements_bytes; static long *x; shmem_init(); my_pe = shmem_my_pe(); n_pes = shmem_n_pes(); #ifdef HAVE_SET_CACHE_INV shmem_set_cache_inv(); #endif /* fail if trying to use only one processor */ if ( n_pes <= 1 ){ fprintf(stderr, "FAIL - test requires at least two PEs\n"); exit(1); } if(my_pe == 0) fprintf(stderr, "shmem_lock_set_clear(%s) n_pes=%d\n", argv[0],n_pes); /* shmalloc x on all pes (only use the one on PE 0) */ max_elements_bytes = (size_t) (sizeof(long) * n_pes); x = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) x[i] = 0; count = 0; shmem_barrier_all(); for(i=0; i<ITER; i++) { if (my_pe != 0) { /* emulate oldj = shmem_long_finc(&count, 0); */ shmem_set_lock(&lock); shmem_long_get(&oldj,&count,1,0); /* get oldj from PE 0's count */ newcount = oldj+1; shmem_long_put(&count,&newcount,1,0); /* update count on PE 0 */ shmem_quiet; /* insure that write completes */ shmem_clear_lock(&lock); /* end of emulation */ modj = (oldj % (n_pes-1)); /* PE 0 is just the counter/checker */ /* increment value in x[modj] */ oldxmodj = shmem_long_finc(&x[modj], 0); /* printf("PE=%d,oldj=%ld,modj=%ld,oldxmodj=%ld\n",my_pe,oldj,modj,oldxmodj); */ } } shmem_barrier_all(); if (my_pe == 0) { /* check x[j] array on PE 0 */ for(j=1 ; j<n_pes; j++) { if (x[j-1] != (long) ITER) fprintf(stderr, "FAIL PE %d of %d: x[%d] = %ld expected = %ld\n", my_pe, n_pes, j-1, x[j-1], (long) ITER); } } shmem_barrier_all(); #ifdef NEEDS_FINALIZE shmem_finalize(); #endif return 0; }
int main (int argc, char **argv) { int i; int nextpe; int me, npes; int success1, success2, success3, success4, success5, success6, success7, success8; short src1[N]; int src2[N]; long src3[N]; long double src4[N]; long long src5[N]; double src6[N]; float src7[N]; char *src8; short src9; int src10; long src11; double src12; float src13; short *dest1; int *dest2; long *dest3; long double *dest4; long long *dest5; double *dest6; float *dest7; char *dest8; short *dest9; int *dest10; long *dest11; double *dest12; float *dest13; shmem_init (); me = shmem_my_pe (); npes = shmem_n_pes (); if (npes > 1) { success1 = 0; success2 = 0; success3 = 0; success4 = 0; success5 = 0; success6 = 0; success7 = 0; success8 = 0; src8 = (char *) malloc (N * sizeof (char)); for (i = 0; i < N; i += 1) { src1[i] = (short) me; src2[i] = me; src3[i] = (long) me; src4[i] = (long double) me; src5[i] = (long long) me; src6[i] = (double) me; src7[i] = (float) me; src8[i] = (char) me; } src9 = (short) me; src10 = me; src11 = (long) me; src12 = (double) me; src13 = (float) me; dest1 = (short *) shmem_malloc (N * sizeof (*dest1)); dest2 = (int *) shmem_malloc (N * sizeof (*dest2)); dest3 = (long *) shmem_malloc (N * sizeof (*dest3)); dest4 = (long double *) shmem_malloc (N * sizeof (*dest4)); dest5 = (long long *) shmem_malloc (N * sizeof (*dest5)); dest6 = (double *) shmem_malloc (N * sizeof (*dest6)); dest7 = (float *) shmem_malloc (N * sizeof (*dest7)); dest8 = (char *) shmem_malloc (4 * sizeof (*dest8)); dest9 = (short *) shmem_malloc (sizeof (*dest9)); dest10 = (int *) shmem_malloc (sizeof (*dest10)); dest11 = (long *) shmem_malloc (sizeof (*dest11)); dest12 = (double *) shmem_malloc (sizeof (*dest12)); dest13 = (float *) shmem_malloc (sizeof (*dest13)); for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; dest5[i] = -9; dest6[i] = -9; dest7[i] = -9.0; dest8[i] = -9; } *dest9 = -9; *dest10 = -9; *dest11 = -9; *dest12 = -9; *dest13 = -9.0; nextpe = (me + 1) % npes; /* Testing shmem_short_put, shmem_int_put, shmem_long_put, shmem_longdouble_put, shmem_longlong_put, shmem_double_put, shmem_float_put, shmem_putmem */ shmem_barrier_all (); shmem_short_put (dest1, src1, N, nextpe); shmem_int_put (dest2, src2, N, nextpe); shmem_long_put (dest3, src3, N, nextpe); shmem_longdouble_put (dest4, src4, N, nextpe); shmem_longlong_put (dest5, src5, N, nextpe); shmem_double_put (dest6, src6, N, nextpe); shmem_float_put (dest7, src7, N, nextpe); shmem_putmem (dest8, src8, N * sizeof (char), nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest4[i] != (npes - 1)) { success4 = 1; } if (dest5[i] != (npes - 1)) { success5 = 1; } if (dest6[i] != (npes - 1)) { success6 = 1; } if (dest7[i] != (npes - 1)) { success7 = 1; } if (dest8[i] != (npes - 1)) { success8 = 1; } } if (success1 == 0) printf ("Test shmem_short_put: Passed\n"); else printf ("Test shmem_short_put: Failed\n"); if (success2 == 0) printf ("Test shmem_int_put: Passed\n"); else printf ("Test shmem_int_put: Failed\n"); if (success3 == 0) printf ("Test shmem_long_put: Passed\n"); else printf ("Test shmem_long_put: Failed\n"); if (success4 == 0) printf ("Test shmem_longdouble_put: Passed\n"); else printf ("Test shmem_longdouble_put: Failed\n"); if (success5 == 0) printf ("Test shmem_longlong_put: Passed\n"); else printf ("Test shmem_longlong_put: Failed\n"); if (success6 == 0) printf ("Test shmem_double_put: Passed\n"); else printf ("Test shmem_double_put: Failed\n"); if (success7 == 0) printf ("Test shmem_float_put: Passed\n"); else printf ("Test shmem_float_put: Failed\n"); if (success8 == 0) printf ("Test shmem_putmem: Passed\n"); else printf ("Test shmem_putmem: Failed\n"); } shmem_barrier_all (); /* Testing shmem_put32, shmem_put64, shmem_put128 */ if (sizeof (int) == 4) { for (i = 0; i < N; i += 1) { dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; } success2 = 0; success3 = 0; success4 = 0; shmem_barrier_all (); shmem_put32 (dest2, src2, N, nextpe); shmem_put64 (dest3, src3, N, nextpe); shmem_put128 (dest4, src4, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest4[i] != (npes - 1)) { success4 = 1; } } if (success2 == 0) printf ("Test shmem_put32: Passed\n"); else printf ("Test shmem_put32: Failed\n"); if (success3 == 0) printf ("Test shmem_put64: Passed\n"); else printf ("Test shmem_put64: Failed\n"); if (success4 == 0) printf ("Test shmem_put128: Passed\n"); else printf ("Test shmem_put128: Failed\n"); } } else if (sizeof (int) == 8) { for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; } success1 = 0; success2 = 0; success3 = 0; shmem_barrier_all (); shmem_put32 (dest1, src1, N, nextpe); shmem_put64 (dest2, src2, N, nextpe); shmem_put128 (dest3, src3, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } } if (success1 == 0) printf ("Test shmem_put32: Passed\n"); else printf ("Test shmem_put32: Failed\n"); if (success2 == 0) printf ("Test shmem_put64: Passed\n"); else printf ("Test shmem_put64: Failed\n"); if (success3 == 0) printf ("Test shmem_put128: Passed\n"); else printf ("Test shmem_put128: Failed\n"); } } /* Testing shmem_iput32, shmem_iput64, shmem_iput128 */ shmem_barrier_all (); if (sizeof (int) == 4) { for (i = 0; i < N; i += 1) { dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; } success2 = 0; success3 = 0; success4 = 0; shmem_barrier_all (); shmem_iput32 (dest2, src2, 1, 2, N, nextpe); shmem_iput64 (dest3, src3, 1, 2, N, nextpe); shmem_iput128 (dest4, src4, 1, 2, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N / 2; i += 1) { if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest4[i] != (npes - 1)) { success4 = 1; } } if (success2 == 0) printf ("Test shmem_iput32: Passed\n"); else printf ("Test shmem_iput32: Failed\n"); if (success3 == 0) printf ("Test shmem_iput64: Passed\n"); else printf ("Test shmem_iput64: Failed\n"); if (success4 == 0) printf ("Test shmem_iput128: Passed\n"); else printf ("Test shmem_iput128: Failed\n"); } } else if (sizeof (int) == 8) { for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; } success1 = 0; success2 = 0; success3 = 0; shmem_barrier_all (); shmem_iput32 (dest1, src1, 1, 2, N, nextpe); shmem_iput64 (dest2, src2, 1, 2, N, nextpe); shmem_iput128 (dest3, src3, 1, 2, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N / 2; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } } if (success1 == 0) printf ("Test shmem_iput32: Passed\n"); else printf ("Test shmem_iput32: Failed\n"); if (success2 == 0) printf ("Test shmem_iput64: Passed\n"); else printf ("Test shmem_iput64: Failed\n"); if (success3 == 0) printf ("Test shmem_iput128: Passed\n"); else printf ("Test shmem_iput128: Failed\n"); } } /* Testing shmem_short_iput, shmem_int_iput, shmem_long_iput, shmem_double_iput, shmem_float_iput */ for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; dest6[i] = -9; dest7[i] = -9; } success1 = 0; success2 = 0; success3 = 0; success6 = 0; success7 = 0; shmem_barrier_all (); shmem_short_iput (dest1, src1, 1, 2, N, nextpe); shmem_int_iput (dest2, src2, 1, 2, N, nextpe); shmem_long_iput (dest3, src3, 1, 2, N, nextpe); shmem_double_iput (dest6, src6, 1, 2, N, nextpe); shmem_float_iput (dest7, src7, 1, 2, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N / 2; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest6[i] != (npes - 1)) { success6 = 1; } if (dest7[i] != (npes - 1)) { success7 = 1; } } if (success1 == 0) printf ("Test shmem_short_iput: Passed\n"); else printf ("Test shmem_short_iput: Failed\n"); if (success2 == 0) printf ("Test shmem_int_iput: Passed\n"); else printf ("Test shmem_int_iput: Failed\n"); if (success3 == 0) printf ("Test shmem_long_iput: Passed\n"); else printf ("Test shmem_long_iput: Failed\n"); if (success6 == 0) printf ("Test shmem_double_iput: Passed\n"); else printf ("Test shmem_double_iput: Failed\n"); if (success7 == 0) printf ("Test shmem_float_iput: Passed\n"); else printf ("Test shmem_float_iput: Failed\n"); } /* Testing shmem_double_p, shmem_float_p, shmem_int_p, shmem_long_p, shmem_short_p */ shmem_barrier_all (); shmem_short_p (dest9, src9, nextpe); shmem_int_p (dest10, src10, nextpe); shmem_long_p (dest11, src11, nextpe); shmem_double_p (dest12, src12, nextpe); shmem_float_p (dest13, src13, nextpe); shmem_barrier_all (); if (me == 0) { if (*dest9 == (npes - 1)) printf ("Test shmem_short_p: Passed\n"); else printf ("Test shmem_short_p: Failed\n"); if (*dest10 == (npes - 1)) printf ("Test shmem_int_p: Passed\n"); else printf ("Test shmem_int_p: Failed\n"); if (*dest11 == (npes - 1)) printf ("Test shmem_long_p: Passed\n"); else printf ("Test shmem_long_p: Failed\n"); if (*dest12 == (npes - 1)) printf ("Test shmem_double_p: Passed\n"); else printf ("Test shmem_double_p: Failed\n"); if (*dest13 == (npes - 1)) printf ("Test shmem_float_p: Passed\n"); else printf ("Test shmem_float_p: Failed\n"); } shmem_barrier_all (); shmem_free (dest1); shmem_free (dest2); shmem_free (dest3); shmem_free (dest4); shmem_free (dest5); shmem_free (dest6); shmem_free (dest7); shmem_free (dest8); shmem_free (dest9); shmem_free (dest10); shmem_free (dest11); shmem_free (dest12); shmem_free (dest13); } else { printf ("Number of PEs must be > 1 to test shmem put, test skipped\n"); } shmem_finalize (); return 0; }
int main(int argc, char* argv[]) { int c, j, loops, k, l; int my_pe, nProcs, nWorkers; int nWords=1; int failures=0; char *prog_name; long *wp,work_sz; for(j=0; j < SHMEM_BARRIER_SYNC_SIZE; j++) { pSync0[j] = pSync1[j] = pSync2[j] = pSync3[j] = pSync4[j] = SHMEM_SYNC_VALUE; } shmem_init(); my_pe = shmem_my_pe(); nProcs = shmem_n_pes(); nWorkers = nProcs - 1; if (nProcs == 1) { Rfprintf(stderr, "ERR - Requires > 1 PEs\n"); shmem_finalize(); return 0; } for(j=0; j < nProcs; j++) if ( shmem_pe_accessible(j) != 1 ) { fprintf(stderr, "ERR - pe %d not accessible from pe %d\n", j, my_pe); } prog_name = strrchr(argv[0],'/'); if ( prog_name ) prog_name++; else prog_name = argv[0]; while((c=getopt(argc,argv,"hvM:s")) != -1) { switch(c) { case 's': Slow++; break; case 'v': Verbose++; break; case 'M': output_mod = atoi(optarg); if (output_mod <= 0) { Rfprintf(stderr, "ERR - output modulo arg out of " "bounds '%d'?\n", output_mod); shmem_finalize(); return 1; } Rfprintf(stderr,"%s: output modulo %d\n", prog_name,output_mod); break; case 'h': Rfprintf(stderr, "usage: %s {nWords-2-put(%d)K/M} {Loop-count(%d)K/M}\n", prog_name, DFLT_NWORDS, DFLT_LOOPS); shmem_finalize(); return 1; default: shmem_finalize(); return 1; } } if (optind == argc) nWords = DFLT_NWORDS; else { nWords = atoi_scaled(argv[optind++]); if (nWords <= 0) { Rfprintf(stderr, "ERR - Bad nWords arg '%d'?\n", nWords); shmem_finalize(); return 1; } } if (optind == argc) loops = DFLT_LOOPS; else { loops = atoi_scaled(argv[optind++]); if (loops <= 0 || loops > 1000000) { Rfprintf(stderr, "ERR - loops arg out of bounds '%d'?\n", loops); shmem_finalize(); return 1; } } work_sz = (nProcs*nWords) * sizeof(long); work = shmem_malloc( work_sz ); if ( !work ) { fprintf(stderr,"[%d] ERR - work = shmem_malloc(%ld) ?\n",my_pe,work_sz); shmem_global_exit(1); } Target = shmem_malloc( 2 * nWords * sizeof(long) ); if ( !Target ) { fprintf(stderr,"[%d] ERR - Target = shmem_malloc(%ld) ?\n", my_pe, (nWords * sizeof(long))); shmem_global_exit(1); } src = &Target[nWords]; #if _DEBUG Rprintf("%s: %d loops of %d longs per put\n",prog_name,loops,nWords); #endif for(j=0; j < nWords; j++) src[j] = VAL; for(j=0; j < loops; j++) { #if _DEBUG if ( Verbose && (j==0 || (j % output_mod) == 0) ) fprintf(stderr,"[%d] +(%d)\n", my_pe,j); #endif shmem_barrier(0, 0, nProcs, pSync0); if ( my_pe == 0 ) { int p; for(p=1; p < nProcs; p++) shmem_long_put(Target, src, nWords, p); } else { if (Slow) { /* wait for each put to complete */ for(k=0; k < nWords; k++) shmem_wait(&Target[k],my_pe); } else { /* wait for last word to be written */ shmem_wait(&Target[nWords-1],my_pe); } } #if _DEBUG if ( Verbose && (j==0 || (j % output_mod) == 0) ) fprintf(stderr,"[%d] -(%d)\n", shmem_my_pe(),j); #endif shmem_barrier(0, 0, nProcs, pSync1); RDprintf("Workers[1 ... %d] verify Target data put by proc0\n", nWorkers); /* workers verify put data is expected */ if ( my_pe != 0 ) { for(k=0; k < nWords; k++) { if (Target[k] != VAL) { fprintf(stderr, "[%d] Target[%d] %#lx " "!= %#x?\n", my_pe,k,Target[k],VAL); failures++; } assert(Target[k] == VAL); Target[k] = my_pe; } } else /* clear results buffer, workers will put here */ memset(work, 0, work_sz); shmem_barrier(0, 0, nProcs, pSync2); RDprintf("Workers[1 ... %d] put Target data to PE0 work " "vector\n",nWorkers); if ( my_pe != 0 ) { /* push nWords of val my_pe back to PE zero */ shmem_long_put(&work[my_pe * nWords], Target, nWords, 0); } else { /* wait for procs 1 ... nProcs to complete put()s */ for(l=1; l < nProcs; l++) { wp = &work[ l*nWords ]; // procs nWords chunk #if 1 /* wait for last long to be written from each PE */ shmem_wait(&wp[nWords-1],0); #else for(k=0; k < nWords; k++) shmem_wait(&wp[k],0); #endif } } shmem_barrier(0, 0, nProcs, pSync3); if ( my_pe == 0 ) { RDprintf("Loop(%d) PE0 verifing work data.\n",j); for(l=1; l < nProcs; l++) { wp = &work[ l*nWords ]; // procs nWords chunk for(k=0; k < nWords; k++) { if (wp[k] != l) { fprintf(stderr, "[0] PE(%d)_work[%d] %ld " "!= %d?\n", l,k,work[k],l); failures++; } assert(wp[k] == l); break; } if (failures) break; } } shmem_barrier(0, 0, nProcs, pSync4); #if _DEBUG if (loops > 1) { Rfprintf(stderr,"."); RDprintf("Loop(%d) Pass.\n",j); } #endif } shmem_free( work ); shmem_free( Target ); #if _DEBUG Rfprintf(stderr,"\n");fflush(stderr); shmem_barrier_all(); RDprintf("%d(%d) Exit(%d)\n", my_pe, nProcs, failures); #endif shmem_finalize(); return failures; }
int main (int argc, char *argv[]) { double t,tv[2]; int reps = DFLT_REPS; int doprint = 1/*0*/; char *progName; int minWords; int maxWords; int incWords, nwords, nproc, proc, peer, c, r, i; long *rbuf; /* remote buffer - sink */ long *tbuf; /* transmit buffer - src */ start_pes(0); proc = _my_pe(); nproc = _num_pes(); if (nproc == 1) { fprintf(stderr, "ERR - Requires > 1 Processing Elements\n"); return 1; } for (progName = argv[0] + strlen(argv[0]); progName > argv[0] && *(progName - 1) != '/'; progName--) ; while ((c = getopt (argc, argv, "n:evh")) != -1) switch (c) { case 'n': if ((reps = getSize (optarg)) <= 0) usage (progName); break; case 'e': doprint++; break; case 'v': Verbose++; break; case 'h': help (progName); default: usage (progName); } if (optind == argc) minWords = DFLT_MIN_WORDS; else if ((minWords = getSize (argv[optind++])) <= 0) usage (progName); if (optind == argc) maxWords = minWords; else if ((maxWords = getSize (argv[optind++])) < minWords) usage (progName); if (optind == argc) incWords = 0; else if ((incWords = getSize (argv[optind++])) < 0) usage (progName); if (!(rbuf = (long *)shmalloc(maxWords * sizeof(long)))) { perror ("Failed memory allocation"); exit (1); } memset (rbuf, 0, maxWords * sizeof (long)); if (!(tbuf = (long *)shmalloc(maxWords * sizeof(long)))) { perror ("Failed memory allocation"); exit (1); } for (i = 0; i < maxWords; i++) tbuf[i] = 1000 + (i & 255); if (doprint) printf ("%d(%d): Shmem PING reps %d minWords %d maxWords %d " "incWords %d\n", proc, nproc, reps, minWords, maxWords, incWords); dprint("[%d] rbuf: %ld\n", proc, (unsigned long) rbuf); shmem_barrier_all(); peer = proc ^ 1; if (peer >= nproc) doprint = 0; for (nwords = minWords; nwords <= maxWords; nwords = incWords ? nwords + incWords : nwords ? 2 * nwords : 1) { r = reps; shmem_barrier_all(); tv[0] = gettime(); if (peer < nproc) { if (proc & 1) { r--; shmem_wait(&rbuf[nwords-1], 0); rbuf[nwords-1] = 0; } while (r-- > 0) { shmem_long_put(rbuf, tbuf, nwords, peer); shmem_wait(&rbuf[nwords-1], 0); rbuf[nwords-1] = 0; } if (proc & 1) { shmem_long_put(rbuf, tbuf, nwords, peer); } } tv[1] = gettime(); t = dt (&tv[1], &tv[0]) / (2 * reps); shmem_barrier_all(); printStats (proc, peer, doprint, nwords, t); } shfree(rbuf); shfree(tbuf); shmem_barrier_all(); return 0; }
int main (int argc, char *argv[]) { double t, tv[2]; int reps = 10000; int doprint = 0; char *progName; int minWords = 1; int maxWords = 1; int incWords; int nwords; int nproc; int proc; int peer; int c; int r; int i; long *rbuf; long *tbuf; start_pes (0); proc = _my_pe (); nproc = _num_pes (); for (progName = argv[0] + strlen (argv[0]); progName > argv[0] && *(progName - 1) != '/'; progName--) ; while ((c = getopt (argc, argv, "n:eh")) != -1) switch (c) { case 'n': if ((reps = getSize (optarg)) <= 0) usage (progName); break; case 'e': doprint++; break; case 'h': help (progName); default: usage (progName); } if (optind == argc) minWords = 1; else if ((minWords = getSize (argv[optind++])) <= 0) usage (progName); if (optind == argc) maxWords = minWords; else if ((maxWords = getSize (argv[optind++])) < minWords) usage (progName); if (optind == argc) incWords = 0; else if ((incWords = getSize (argv[optind++])) < 0) usage (progName); if (!(rbuf = (long *) shmalloc (maxWords * sizeof (long)))) { perror ("Failed memory allocation"); exit (1); } memset (rbuf, 0, maxWords * sizeof (long)); shmem_barrier_all (); if (!(tbuf = (long *) malloc (maxWords * sizeof (long)))) { perror ("Failed memory allocation"); exit (1); } if (nproc == 1) return 0; for (i = 0; i < maxWords; i++) tbuf[i] = 1000 + (i & 255); if (doprint) printf ("%d(%d): Shmem PING reps %d minWords %d maxWords %d incWords %d\n", proc, nproc, reps, minWords, maxWords, incWords); shmem_barrier_all (); peer = proc ^ 1; if (peer >= nproc) doprint = 0; for (nwords = minWords; nwords <= maxWords; nwords = incWords ? nwords + incWords : nwords ? 2 * nwords : 1) { r = reps; shmem_barrier_all (); tv[0] = gettime (); if (peer < nproc) { if (proc & 1) { r--; shmem_wait (&rbuf[nwords - 1], 0); rbuf[nwords - 1] = 0; } while (r-- > 0) { shmem_long_put (rbuf, tbuf, nwords, peer); shmem_wait (&rbuf[nwords - 1], 0); rbuf[nwords - 1] = 0; } if (proc & 1) shmem_long_put (rbuf, tbuf, nwords, peer); } tv[1] = gettime (); t = dt (&tv[1], &tv[0]) / (2 * reps); shmem_barrier_all (); printStats (proc, peer, doprint, nwords, t); } shmem_barrier_all (); free (tbuf); shfree (rbuf); return 0; }
void shmemi_broadcast32_tree (void *target, const void *source, size_t nlong, int PE_root, int PE_start, int logPE_stride, int PE_size, long *pSync) { int child_l, child_r, parent; const int step = 1 << logPE_stride; int my_pe = GET_STATE (mype); int *target_ptr, *source_ptr; int no_children; long is_ready, lchild_ready, rchild_ready; is_ready = 1; lchild_ready = -1; rchild_ready = -1; shmem_long_wait_until (&pSync[0], SHMEM_CMP_EQ, SHMEM_SYNC_VALUE); shmem_long_wait_until (&pSync[1], SHMEM_CMP_EQ, SHMEM_SYNC_VALUE); pSync[0] = 0; pSync[1] = 0; target_ptr = (int *) target; source_ptr = (int *) source; set_2tree (PE_start, step, PE_size, &parent, &child_l, &child_r, my_pe); no_children = 0; build_tree (PE_start, step, PE_root, PE_size, &parent, &child_l, &child_r, my_pe); shmemi_trace (SHMEM_LOG_BROADCAST, "before broadcast, R_child = %d L_child = %d", child_r, child_l); /* The actual broadcast */ if (PE_size > 1) { if (my_pe == (PE_start + step * PE_root)) { pSync[0] = SHMEM_SYNC_VALUE; if (child_l != -1) { shmem_long_get (&lchild_ready, (const long *) &pSync[0], 1, child_l); while (lchild_ready != 0) shmem_long_get (&lchild_ready, &pSync[0], 1, child_l); shmem_int_put (target_ptr, source_ptr, nlong, child_l); shmem_fence (); shmem_long_put (&pSync[0], &is_ready, 1, child_l); no_children = 1; } if (child_r != -1) { shmem_long_get (&rchild_ready, &pSync[0], 1, child_r); while (rchild_ready != 0) shmem_long_get (&rchild_ready, &pSync[0], 1, child_r); shmem_int_put (target_ptr, source_ptr, nlong, child_r); shmem_fence (); shmem_long_put (&pSync[0], &is_ready, 1, child_r); no_children = 2; } shmem_long_wait_until (&pSync[1], SHMEM_CMP_EQ, (long) no_children); pSync[1] = SHMEM_SYNC_VALUE; } else { shmem_long_wait_until (&pSync[0], SHMEM_CMP_EQ, is_ready); pSync[0] = SHMEM_SYNC_VALUE; shmemi_trace (SHMEM_LOG_BROADCAST, "inside else"); memcpy (source_ptr, target_ptr, nlong * sizeof (int)); if (child_l != -1) { shmem_long_get (&lchild_ready, &pSync[0], 1, child_l); while (lchild_ready != 0) shmem_long_get (&lchild_ready, &pSync[0], 1, child_l); shmem_int_put (target_ptr, source_ptr, nlong, child_l); shmem_fence (); shmem_long_put (&pSync[0], &is_ready, 1, child_l); no_children = 1; } if (child_r != -1) { shmem_long_get (&rchild_ready, &pSync[0], 1, child_r); while (rchild_ready != 0) shmem_long_get (&rchild_ready, &pSync[0], 1, child_r); shmem_int_put (target_ptr, source_ptr, nlong, child_r); shmem_fence (); shmem_long_put (&pSync[0], &is_ready, 1, child_r); no_children = 2; } pSync[0] = SHMEM_SYNC_VALUE; if (no_children == 0) { pSync[1] = SHMEM_SYNC_VALUE; /* TO DO: Is check for parents pSync required? */ shmem_long_inc (&pSync[1], parent); } else { shmem_long_wait_until (&pSync[1], SHMEM_CMP_EQ, (long) no_children); pSync[1] = SHMEM_SYNC_VALUE; /* printf("PE %d incrementing child count on PE %d\n",my_pe,parent); */ shmem_long_inc (&pSync[1], parent); } } shmemi_trace (SHMEM_LOG_BROADCAST, "at the end of bcast32"); /* shmem_barrier(PE_start, logPE_stride, PE_size, pSync); */ } }
void shmemi_barrier_tree (int PE_start, int logPE_stride, int PE_size, long *pSync) { int child_l, child_r, parent; const int step = 1 << logPE_stride; int my_pe = GET_STATE (mype); int no_children; long is_ready, lchild_ready, rchild_ready; is_ready = 1; lchild_ready = -1; rchild_ready = -1; shmem_long_wait_until (&pSync[0], SHMEM_CMP_EQ, SHMEM_SYNC_VALUE); shmem_long_wait_until (&pSync[1], SHMEM_CMP_EQ, SHMEM_SYNC_VALUE); /* printf("Tree barrier\n"); */ set_2tree (PE_start, step, PE_size, &parent, &child_l, &child_r, my_pe); no_children = 0; shmemi_trace (SHMEM_LOG_BARRIER, "before barrier, R_child = %d L_child = %d", child_r, child_l); /* The actual barrier */ if (PE_size > 1) { pSync[0] = 0; pSync[1] = 0; if (my_pe == PE_start) { pSync[0] = SHMEM_SYNC_VALUE; if (child_l != -1) { shmem_long_get (&lchild_ready, (const long *) &pSync[0], 1, child_l); while (lchild_ready != 0) shmem_long_get (&lchild_ready, &pSync[0], 1, child_l); shmem_long_put (&pSync[0], &is_ready, 1, child_l); no_children = 1; } if (child_r != -1) { shmem_long_get (&rchild_ready, &pSync[0], 1, child_r); while (rchild_ready != 0) shmem_long_get (&rchild_ready, &pSync[0], 1, child_r); shmem_long_put (&pSync[0], &is_ready, 1, child_r); no_children = 2; } shmem_long_wait_until (&pSync[1], SHMEM_CMP_EQ, (long) no_children); pSync[1] = SHMEM_SYNC_VALUE; } else { shmem_long_wait_until (&pSync[0], SHMEM_CMP_EQ, is_ready); shmemi_trace (SHMEM_LOG_BARRIER, "inside else"); if (child_l != -1) { shmem_long_get (&lchild_ready, &pSync[0], 1, child_l); while (lchild_ready != 0) shmem_long_get (&lchild_ready, &pSync[0], 1, child_l); shmem_long_put (&pSync[0], &is_ready, 1, child_l); no_children = 1; } if (child_r != -1) { shmem_long_get (&rchild_ready, &pSync[0], 1, child_r); while (rchild_ready != 0) shmem_long_get (&rchild_ready, &pSync[0], 1, child_r); shmem_long_put (&pSync[0], &is_ready, 1, child_r); no_children = 2; } pSync[0] = SHMEM_SYNC_VALUE; if (no_children == 0) { pSync[1] = SHMEM_SYNC_VALUE; shmem_long_inc (&pSync[1], parent); } else { shmem_long_wait_until (&pSync[1], SHMEM_CMP_EQ, (long) no_children); pSync[1] = SHMEM_SYNC_VALUE; shmem_long_inc (&pSync[1], parent); } } shmemi_trace (SHMEM_LOG_BARRIER, "at the end of barrier"); } }