/* * Aggregates the per PE timing information */ static double * gather_rank_times(_timer_t * const timer) { if(timer->seconds_iter > 0) { assert(timer->seconds_iter == timer->num_iters); const unsigned int num_records = NUM_PES * timer->seconds_iter; double * my_times = shmem_malloc(timer->seconds_iter * sizeof(double)); assert(my_times); memcpy(my_times, timer->seconds, timer->seconds_iter * sizeof(double)); double * all_times = shmem_malloc( num_records * sizeof(double)); assert(all_times); shmem_barrier_all(); shmem_fcollect64(all_times, my_times, timer->seconds_iter, 0, 0, NUM_PES, pSync); shmem_barrier_all(); shmem_free(my_times); return all_times; } else{ return NULL; } }
/* * Aggregates the per PE timing 'count' information */ static unsigned int * gather_rank_counts(_timer_t * const timer) { if(timer->count_iter > 0){ const unsigned int num_records = NUM_PES * timer->num_iters; unsigned int * my_counts = shmem_malloc(timer->num_iters * sizeof(unsigned int)); assert(my_counts); memcpy(my_counts, timer->count, timer->num_iters*sizeof(unsigned int)); unsigned int * all_counts = shmem_malloc( num_records * sizeof(unsigned int) ); assert(all_counts); shmem_barrier_all(); shmem_collect32(all_counts, my_counts, timer->num_iters, 0, 0, NUM_PES, pSync); shmem_barrier_all(); shmem_free(my_counts); return all_counts; } else{ return NULL; } }
int main(void) { double *f; int me; shmem_init(); me = shmem_my_pe(); f = (double *) shmem_malloc(sizeof(*f)); *f = PI; shmem_barrier_all(); if (me == 0) { shmem_double_p(f, E, 1); } shmem_barrier_all(); if (me == 1) { printf("PE %d: %f, %s\n", me, *f, (fabs(*f - E) < epsilon) ? "OK" : "FAIL"); } shmem_free(f); shmem_finalize(); return 0; }
int main(int argc, char **argv) { const char *deps[] = { "system", "sos" }; hclib::launch(deps, 2, [] { pe = hclib::shmem_my_pe(); long *target = (long *)shmem_malloc(sizeof(long)); assert(target); *target = 0; hclib::shmem_barrier_all(); const int expect = pe % 2; int set_to; if (expect == 0) { set_to = 1; } else { set_to = 0; } hclib::finish([&] { for (int i = 0; i < 10; i++) { hclib::async([=] { const int ret = hclib::shmem_long_finc(target, 0); }); } }); fprintf(stderr, "%d out of finish\n", ::shmem_my_pe()); hclib::shmem_barrier_all(); hclib::shmem_free(target); }); return 0; }
int main(void) { long *f; int me; shmem_init(); me = shmem_my_pe(); f = (long *) shmem_malloc(sizeof(*f)); *f = 3; shmem_barrier_all(); printf("PE %d: before put, f = %ld\n", me, *f); if (me == 0) { shmem_long_p(f, 42, 1); } shmem_barrier_all(); if (me == 1) { printf("PE %d: after put, f = %ld, %s\n", me, *f, (*f == 42) ? "OK" : "FAIL"); } shmem_finalize(); return 0; }
int main (int argc, char **argv) { int i; int nextpe; int me, npes; long src[N]; long *dest; shmemx_request_handle_t handle; shmem_init (); me = shmem_my_pe (); npes = shmem_n_pes (); for (i = 0; i < N; i += 1) { src[i] = (long) me; } dest = (long *) shmem_malloc (N * sizeof (*dest)); nextpe = (me + 1) % npes; shmemx_long_put_nb (dest, src, N, nextpe, &handle); shmemx_wait_req (handle); shmem_barrier_all (); shmem_free (dest); shmem_finalize (); return 0; }
static void *safe_shmem_malloc(size_t size) { void *ret; if ((ret = shmem_malloc(size)) == NULL) { exit(-0xdead); } return ret; }
int main(int argc, char* argv[]) { long source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; long *target; int *flag; int i, num_pes; int failed = 0; shmem_init(); target = (long*) shmem_malloc(sizeof(long) * 10); flag = (int*) shmem_malloc(sizeof(int)); *flag = 0; num_pes=shmem_n_pes(); memset(target, 0, sizeof(long)*10); shmem_barrier_all(); if (shmem_my_pe() == 0) { for(i = 0; i < num_pes; i++) { shmem_long_put_nbi(target, source, 10, i); shmem_fence(); shmem_int_inc(flag, i); } } shmem_int_wait_until(flag, SHMEM_CMP_EQ, 1); for (i = 0; i < 10; i++) { if (target[i] != source[i]) { fprintf(stderr,"[%d] target[%d] = %ld, expected %ld\n", shmem_my_pe(), i, target[i], source[i]); failed = 1; } } shmem_free(target); shmem_free(flag); shmem_finalize(); return failed; }
int main(int argc, char **argv) { int i,j; long modj,oldj,oldxmodj; int my_pe,n_pes; size_t max_elements,max_elements_bytes; static long *x; shmem_init(); my_pe = shmem_my_pe(); n_pes = shmem_n_pes(); #ifdef HAVE_SET_CACHE_INV shmem_set_cache_inv(); #endif /* fail if trying to use only one processor */ if ( n_pes <= 1 ){ fprintf(stderr, "FAIL - test requires at least two PEs\n"); exit(1); } if(my_pe == 0) fprintf(stderr, "shmem_long_finc(%s) n_pes=%d\n", argv[0],n_pes); /* shmalloc x on all pes (only use the one on PE 0) */ max_elements_bytes = (size_t) (sizeof(long) * n_pes); x = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) x[i] = 0; count = 0; shmem_barrier_all(); for(i=0; i<ITER; i++) { if (my_pe != 0) { oldj = shmem_long_finc(&count, 0); /* get index oldj from PE 0 */ modj = (oldj % (n_pes-1)); /* PE 0 is just the counter/checker */ /* increment value in x[modj] */ oldxmodj = shmem_long_finc(&x[modj], 0); /* printf("PE=%d,oldj=%ld,modj=%ld,oldxmodj=%ld\n",my_pe,oldj,modj,oldxmodj); */ } } shmem_barrier_all(); if (my_pe == 0) { /* check x[j] array on PE 0 */ for(j=1 ; j<n_pes; j++) { if (x[j-1] != ITER) fprintf(stderr, "FAIL PE %d of %d: x[%d] = %ld expected = %ld\n", my_pe, n_pes, j-1, x[j-1], ITER); } } shmem_barrier_all(); #ifdef NEEDS_FINALIZE shmem_finalize(); #endif return 0; }
int main (int argc, char *argv[]) { int *sray, *rray; int *sdisp, *scounts, *rdisp, *rcounts; int ssize, rsize, i, k, j; float z; init_it (&argc, &argv); scounts = (int *) shmem_malloc (sizeof (int) * numnodes); rcounts = (int *) shmem_malloc (sizeof (int) * numnodes); sdisp = (int *) shmem_malloc (sizeof (int) * numnodes); rdisp = (int *) shmem_malloc (sizeof (int) * numnodes); /* ! seed the random number generator with a ! different number on each processor */ seed_random (myid); /* find data to send */ for (i = 0; i < numnodes; i++) { random_number (&z); scounts[i] = (int) (10.0 * z) + 1; } printf ("-------myid= %d scounts=", myid); for (i = 0; i < numnodes; i++) { printf ("%d ", scounts[i]); } printf ("\n"); /* send the data */ // mpi_err = MPI_Alltoall(scounts,1,MPI_INT, rcounts,1,MPI_INT, // MPI_COMM_WORLD); shmem_barrier_all (); int other, j1; for (j1 = 0; j1 < numnodes; j1++) { shmem_int_put (&rcounts[myid], &scounts[j1], 1, j1); } shmem_barrier_all (); printf ("myid= %d rcounts=", myid); for (i = 0; i < numnodes; i++) { printf ("%d ", rcounts[i]); } printf ("\n"); shmem_finalize (); return 0; }
void fcollect(int *target, int *src, int elements, int me, int npes, int loops) { int i; double start_time, elapsed_time; long total_bytes = loops * elements * sizeof(*src); long *ps, *pSync, *pSync1; pSync = (long*) shmem_malloc( 2 * sizeof(long) * _SHMEM_COLLECT_SYNC_SIZE ); pSync1 = &pSync[_SHMEM_COLLECT_SYNC_SIZE]; for (i = 0; i < _SHMEM_COLLECT_SYNC_SIZE; i++) { pSync[i] = pSync1[i] = _SHMEM_SYNC_VALUE; } target = (int *) shmem_malloc( elements * sizeof(*target) * npes ); if (me==0 && Verbose) { fprintf(stdout,"%s: %d loops of fcollect32(%ld bytes) over %d PEs: ", __FUNCTION__,loops,(elements*sizeof(*src)),npes); fflush(stdout); } shmem_barrier_all(); start_time = shmemx_wtime(); for(i = 0; i < loops; i++) { ps = &pSync[(i&1)]; shmem_fcollect32( target, src, elements, 0, 0, npes, ps ); } elapsed_time = shmemx_wtime() - start_time; if (me==0 && Verbose) { printf("%7.3f secs\n", elapsed_time); printf(" %7.5f usecs / fcollect32(), %ld Kbytes @ %7.4f MB/sec\n\n", (elapsed_time/((double)loops*npes))*1000000.0, (total_bytes/1024), ((double)total_bytes/(1024.0*1024.0)) / elapsed_time ); } shmem_barrier_all(); shmem_free(target); shmem_free( pSync ); shmem_barrier_all(); }
int main () { long *x; shmem_init (); x = (long *) shmem_malloc (sizeof (*x)); shmem_free (x); shmem_finalize (); return 0; }
int main(int argc, char **argv) { shmem_init(); int MyRank = shmem_my_pe (); int Numprocs = shmem_n_pes (); printf("rank = %d size = %d\n", MyRank, Numprocs); int *shared_arr = (int *)shmem_malloc(10 * sizeof(int)); assert(shared_arr); int i; for (i = 0; i < 10; i++) { shared_arr[i] = 3; } shmem_finalize(); return 0; }
int main (int argc, char **argv) { long src; long *dest; int me, npes; struct utsname u; int su; su = uname (&u); assert (su == 0); shmem_init (); me = shmem_my_pe (); npes = shmem_n_pes (); { time_t now; time (&now); srand (now + getpid ()); } src = rand () % 1000; dest = (long *) shmem_malloc (sizeof (*dest)); *dest = -1; shmem_barrier_all (); if (me == 0) { int other_pe = rand () % npes; printf ("%d: -> %d, sending value %ld\n", me, other_pe, src); shmem_long_put (dest, &src, 1, other_pe); } shmem_barrier_all (); printf ("Result: %d @ %s: %ld\n", me, u.nodename, *dest); shmem_finalize (); return 0; }
int main(int argc, char **argv) { int ret, interactive = 1; printf("Welcome to sh!\n"); if (argc == 2) { if ((ret = reopen(0, argv[1], O_RDONLY)) != 0) { return ret; } interactive = 0; } else if (argc > 2) { usage(); return -1; } shcwd = shmem_malloc(BUFSIZE); assert(shcwd != NULL); char *buffer; while ((buffer = readline((interactive) ? "$ " : NULL)) != NULL) { printf("\n"); shcwd[0] = '\0'; int pid; if (interactive && !strncmp(buffer, "!halt", 10)) return 0; if (interactive && !strncmp(buffer, "!debug", 10)) { __asm__ volatile(".word 0xe7f001f0"); continue; } if ((pid = fork()) == 0) { ret = runcmd(buffer); exit(ret); } assert(pid >= 0); if (waitpid(pid, &ret) == 0) { if (ret == 0 && shcwd[0] != '\0') { ret = chdir(shcwd); } if (ret != 0) { printf("error: %d - %e\n", ret, ret); } } }
int main(void) { long *dest; int me, npes; long swapped_val, new_val; shmem_init(); me = shmem_my_pe(); npes = shmem_n_pes(); dest = (long *) shmem_malloc(sizeof (*dest)); *dest = me; shmem_barrier_all(); new_val = me; if (me & 1){ swapped_val = shmem_swap(dest, new_val, (me + 1) % npes); printf("%d: dest = %ld, swapped = %ld\n", me, *dest, swapped_val); } shmem_free(dest); return 0; }
int main(void) { double *f; int me; shmem_init(); me = shmem_my_pe(); f = (double *) shmem_malloc(sizeof (*f)); *f = 3.1415927; shmem_barrier_all(); if (me == 0) shmem_double_p(f, e, 1); shmem_barrier_all(); if (me == 1) printf("%s\n", (fabs (*f - e) < epsilon) ? "OK" : "FAIL"); return 0; }
int main () { int *dst; int me; shmem_init (); me = shmem_my_pe (); dst = shmem_malloc (sizeof (*dst)); *dst = -999; shmem_barrier_all (); if (me == 0) { int s; shmemx_request_handle_t h; shmemx_int_put_nb (dst, &src, 1, 1, &h); shmemx_test_req (h, &s); fprintf (stderr, "%d: before wait, s = %d\n", me, s); shmemx_wait_req (h); shmemx_test_req (h, &s); fprintf (stderr, "%d: after wait, s = %d\n", me, s); } shmem_barrier_all (); fprintf (stderr, "%d: dst = %d\n", me, *dst); shmem_finalize (); return 0; }
int main(int argc, char **argv) { int i,j; short oldjs, oldxs, my_pes; int oldji, oldxi, my_pei; long oldjl, oldxl, my_pel; long long oldjll,oldxll,my_pell; float oldjf, oldxf, my_pef; double oldjd, oldxd, my_ped; int my_pe,n_pes; size_t max_elements,max_elements_bytes; static short *xs; static int *xi; static long *xl; static long long *xll; static float *xf; static double *xd; shmem_init(); my_pe = shmem_my_pe(); n_pes = shmem_n_pes(); my_pes = (short) my_pe; my_pei = (int) my_pe; my_pel = (long) my_pe; my_pell = (long long) my_pe; my_pef = (float) my_pe; my_ped = (double) my_pe; #ifdef HAVE_SET_CACHE_INV shmem_set_cache_inv(); #endif /* fail if trying to use only one processor */ if ( n_pes <= 1 ){ fprintf(stderr, "FAIL - test requires at least two PEs\n"); exit(1); } if(my_pe == 0) fprintf(stderr, "shmem_swap(%s) n_pes=%d\n", argv[0],n_pes); /* test shmem_short_swap */ /* shmalloc xs on all pes (only check the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(short) * n_pes); xs = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xs[i] = 0; shmem_barrier_all(); oldjs = 0; for(i=0; i<ITER; i++) { if (my_pe != 0) { my_pes = my_pes + (short) 1; /* record PE value in xs[my_pe] -- save PE number */ oldxs = shmem_short_swap(&xs[my_pe], my_pes, 0); /* printf("PE=%d,i=%d,my_pes=%d,oldxs=%d\n",my_pe,i,my_pes,oldxs); */ if (oldxs != oldjs) fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxs = %d expected = %d\n", my_pe, n_pes, i, oldxs, oldjs); oldjs = my_pes; } } shmem_barrier_all(); if (my_pe == 0) { /* check xs[j] array vs PE# + ITER */ i = (int) ITER + 1; for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xs[%d]=%d,i=%d\n",j,j,xs[j],i); */ if (xs[j] != (short) i) fprintf(stderr, "FAIL PE %d of %d: xs[%d] = %d expected = %d\n", my_pe, n_pes, j, xs[j],i); i++; } } shmem_free(xs); /* test shmem_int_swap */ /* shmalloc xi on all pes (only check the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(int) * n_pes); xi = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xi[i] = 0; shmem_barrier_all(); oldji = 0; for(i=0; i<ITER; i++) { if (my_pe != 0) { my_pei = my_pei + (int) 1; /* record PE value in xi[my_pe] -- save PE number */ oldxi = shmem_int_swap(&xi[my_pe], my_pei, 0); /* printf("PE=%d,i=%d,my_pei=%d,oldxi=%d\n",my_pe,i,my_pei,oldxi); */ if (oldxi != oldji) fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxi = %d expected = %d\n", my_pe, n_pes, i, oldxi, oldji); oldji = my_pei; } } shmem_barrier_all(); if (my_pe == 0) { /* check xi[j] array vs PE# + ITER */ i = (int) ITER + 1; for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xi[%d]=%d,i=%d\n",j,j,xi[j],i); */ if (xi[j] != i) fprintf(stderr, "FAIL PE %d of %d: xi[%d] = %d expected = %d\n", my_pe, n_pes, j, xi[j],i); i++; } } shmem_free(xi); /* test shmem_long_swap */ /* shmalloc xl on all pes (only check the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(long) * n_pes); xl = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xl[i] = 0; shmem_barrier_all(); oldjl = 0; for(i=0; i<ITER; i++) { if (my_pe != 0) { my_pel = my_pel + (long) 1; /* record PE value in xl[my_pe] -- save PE number */ oldxl = shmem_long_swap(&xl[my_pe], my_pel, 0); /* printf("PE=%d,i=%d,my_pel=%d,oldxl=%d\n",my_pe,i,my_pel,oldxl); */ if (oldxl != oldjl) fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxl = %d expected = %d\n", my_pe, n_pes, i, oldxl, oldjl); oldjl = my_pel; } } shmem_barrier_all(); if (my_pe == 0) { /* check xl[j] array vs PE# + ITER */ i = (int) ITER + 1; for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xl[%d]=%d,i=%d\n",j,j,xl[j],i); */ if (xl[j] != (long)i) fprintf(stderr, "FAIL PE %d of %d: xl[%d] = %ld expected = %d\n", my_pe, n_pes, j, xl[j],i); i++; } } shmem_free(xl); /* test shmem_longlong_swap */ #ifdef HAVE_LONG_LONG /* shmalloc xll on all pes (only check the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(long long) * n_pes); xll = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xll[i] = 0; shmem_barrier_all(); oldjll = 0; for(i=0; i<ITER; i++) { if (my_pe != 0) { my_pell = my_pell + (long long) 1; /* record PE value in xll[my_pe] -- save PE number */ oldxll = shmem_longlong_swap(&xll[my_pe], my_pell, 0); /* printf("PE=%d,i=%d,my_pell=%ld,oldxll=%d\n",my_pe,i,my_pell,oldxll); */ if (oldxll != (long long) oldjll) fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxll = %ld expected = %ld\n", my_pe, n_pes, i, oldxll, oldjll); oldjll = my_pell; } } shmem_barrier_all(); if (my_pe == 0) { /* check xll[j] array vs PE# + ITER */ i = (int) ITER + 1; for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xll[%d]=%ld,i=%d\n",j,j,xll[j],i); */ if (xll[j] != (long long) i) fprintf(stderr, "FAIL PE %d of %d: xll[%d] = %d expected = %d\n", my_pe, n_pes, j, xll[j],i); i++; } } shmem_free(xll); #endif /* test shmem_float_swap */ /* shmalloc xf on all pes (only use the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(float) * n_pes); xf = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xf[i] = (float) 0; shmem_barrier_all(); oldjf = (float) 0; for(i=0; i<ITER; i++) { if (my_pe != 0) { my_pef = my_pef + (float) 1; /* record PE value in xf[my_pe] -- save PE number */ oldxf = shmem_float_swap(&xf[my_pe], my_pef, 0); /* printf("PE=%d,i=%d,my_pef=%10.2f,oldxf=%10.2f\n",my_pe,i,my_pef,oldxf); */ if (oldxf != oldjf) fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxf = %10.2f expected = %10.2f\n", my_pe, n_pes, i, oldxf, oldjf); oldjf = my_pef; } } shmem_barrier_all(); if (my_pe == 0) { /* check xs[j] array vs PE# + ITER */ i = (int) ITER + 1; for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xf[%d]=%10.2f,i=%d\n",j,j,xf[j],i); */ if (xf[j] != (float) i) fprintf(stderr, "FAIL PE %d of %d: xf[%d] = %10.2f expected = %10.2f\n", my_pe, n_pes, j-1, xf[j], (float)i); i++; } } shmem_free(xf); /* test shmem_double_swap */ /* shmalloc xd on all pes (only use the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(double) * n_pes); xd = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xd[i] = (double) 0; shmem_barrier_all(); oldjd = (double) 0; for(i=0; i<ITER; i++) { if (my_pe != 0) { my_ped = my_ped + (double) 1; /* record PE value in xd[my_pe] -- save PE number */ oldxd = shmem_double_swap(&xd[my_pe], my_ped, 0); /* printf("PE=%d,i=%d,my_ped=%10.2f,oldxd=%10.2f\n",my_pe,i,my_ped,oldxd); */ if (oldxd != oldjd) fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxd = %10.2f expected = %10.2f\n", my_pe, n_pes, i, oldxd, oldjd); oldjd = my_ped; } } shmem_barrier_all(); if (my_pe == 0) { /* check xd[j] array vs PE# + ITER */ i = (int) ITER + 1; for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xd[%d]=%10.2f,i=%d\n",j,j,xd[j],i); */ if (xd[j] != (double) i) fprintf(stderr, "FAIL PE %d of %d: xd[%d] = %10.2f expected = %10.2f\n", my_pe, n_pes, j, xd[j], (double)i); i++; } } shmem_free(xd); #ifdef SHMEM_C_GENERIC_32 /* test shmem_swap (GENERIC 32) */ my_pei = (int) my_pe; /* shmalloc xi on all pes (only check the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(int) * n_pes); xi = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xi[i] = 0; shmem_barrier_all(); oldji = 0; for(i=0; i<ITER; i++) { if (my_pe != 0) { my_pei = my_pei + (int) 1; /* record PE value in xi[my_pe] -- save PE number */ oldxi = shmem_swap(&xi[my_pe], my_pei, 0); /* printf("PE=%d,i=%d,my_pei=%d,oldxi=%d\n",my_pe,i,my_pei,oldxi); */ if (oldxi != oldji) fprintf(stderr, "FAIL pe %d of %d: i=%d, oldxi = %d expected = %d\n", my_pe, n_pes, i, oldxi, oldji); oldji = my_pei; } } shmem_barrier_all(); if (my_pe == 0) { /* check xi[j] array vs PE# + ITER */ i = (int) ITER + 1; for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xi[%d]=%d,i=%d\n",j,j,xi[j],i); */ if (xi[j] != i) fprintf(stderr, "FAIL pe %d of %d: xi[%d] = %d expected = %d\n", my_pe, n_pes, j, xi[j],i); i++; } } shmem_free(xi); #else /* test shmem_swap (GENERIC 64) */ my_pel = (long) my_pe; /* shmalloc xl on all pes (only check the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(long) * n_pes); xl = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xl[i] = 0; shmem_barrier_all(); oldjl = 0; for(i=0; i<ITER; i++) { if (my_pe != 0) { my_pel = my_pel + (long) 1; /* record PE value in xl[my_pe] -- save PE number */ oldxl = shmem_swap(&xl[my_pe], my_pel, 0); /* printf("PE=%d,i=%d,my_pel=%d,oldxl=%d\n",my_pe,i,my_pel,oldxl); */ if (oldxl != oldjl) fprintf(stderr, "FAIL pe %d of %d: i=%d, oldxl = %d expected = %d\n", my_pe, n_pes, i, oldxl, oldjl); oldjl = my_pel; } } shmem_barrier_all(); if (my_pe == 0) { /* check xl[j] array vs PE# + ITER */ i = (int) ITER + 1; for(j=1 ; j<n_pes; j++) { /* printf("j=%d,xl[%d]=%d,i=%d\n",j,j,xl[j],i); */ if (xl[j] != (long)i) fprintf(stderr, "FAIL pe %d of %d: xl[%d] = %ld expected = %d\n", my_pe, n_pes, j, xl[j],i); i++; } } shmem_free(xl); #endif shmem_barrier_all(); #ifdef NEEDS_FINALIZE shmem_finalize(); #endif return 0; }
int main (int argc, char *argv[]) { int *sray, *rray; int *sdisp, *scounts, *rdisp, *rcounts; int ssize, rsize, i, k, j; float z; init_it (&argc, &argv); scounts = (int *) shmem_malloc (sizeof (int) * numnodes); rcounts = (int *) shmem_malloc (sizeof (int) * numnodes); sdisp = (int *) shmem_malloc (sizeof (int) * numnodes); rdisp = (int *) shmem_malloc (sizeof (int) * numnodes); /* ! seed the random number generator with a ! different number on each processor */ seed_random (myid); /* find out how much data to send */ for (i = 0; i < numnodes; i++) { random_number (&z); scounts[i] = (int) (5.0 * z) + 1; } printf ("myid= %d scounts=%d %d %d %d\n", myid, scounts[0], scounts[1], scounts[2], scounts[3]); /* tell the other processors how much data is coming */ // mpi_err = MPI_Alltoall(scounts,1,MPI_INT, rcounts,1,MPI_INT, // MPI_COMM_WORLD); shmem_barrier_all (); int other, j1; for (j1 = 0; j1 < numnodes; j1++) { shmem_int_put (&rcounts[myid], &scounts[j1], 1, j1); } shmem_barrier_all (); printf ("-----myid= %d rcounts=", myid); for (i = 0; i < numnodes; i++) { printf ("%d ", rcounts[i]); } printf ("\n"); /* write(*,*)"myid= ",myid," rcounts= ",rcounts */ /* calculate displacements and the size of the arrays */ sdisp[0] = 0; for (i = 1; i < numnodes; i++) { sdisp[i] = scounts[i - 1] + sdisp[i - 1]; } rdisp[0] = 0; for (i = 1; i < numnodes; i++) { rdisp[i] = rcounts[i - 1] + rdisp[i - 1]; } ssize = 0; rsize = 0; for (i = 0; i < numnodes; i++) { ssize = ssize + scounts[i]; rsize = rsize + rcounts[i]; } /* allocate send and rec arrays */ sray = (int *) shmem_malloc (sizeof (int) * 20); rray = (int *) shmem_malloc (sizeof (int) * 20); for (i = 0; i < ssize; i++) { sray[i] = myid; } /* send/rec different amounts of data to/from each processor */ // mpi_err = MPI_Alltoallv(sray,scounts,sdisp,MPI_INT, // rray,rcounts,rdisp,MPI_INT, MPI_COMM_WORLD); shmem_barrier_all (); for (j1 = 0; j1 < numnodes; j1++) { int k1 = sdisp[j1]; static int k2; shmem_int_get (&k2, &rdisp[myid], 1, j1); shmem_int_put (rray + k2, sray + k1, scounts[j1], j1); } shmem_barrier_all (); printf ("myid= %d rray=", myid); for (i = 0; i < rsize; i++) { printf ("%d ", rray[i]); } printf ("\n"); // mpi_err = MPI_Finalize(); shmem_finalize (); return 0; }
int main(int argc, char **argv) { int i,j; int my_pe,n_pes,PE_root; size_t max_elements,max_elements_bytes; int *srce_int,*targ_int,ans_int; long *srce_long,*targ_long,ans_long; float *srce_float,*targ_float,ans_float; double *srce_double,*targ_double,ans_double; shmem_init(); my_pe = shmem_my_pe(); n_pes = shmem_n_pes(); /* fail if trying to use only one processor */ if ( n_pes <= 1 ){ fprintf(stderr, "FAIL - test requires at least two PEs\n"); exit(1); } if(my_pe == 0) fprintf(stderr, "shmem_broadcast(%s) n_pes=%d\n", argv[0],n_pes); /* initialize the pSync arrays */ for (i=0; i < _SHMEM_BCAST_SYNC_SIZE; i++) { pSync1[i] = _SHMEM_SYNC_VALUE; pSync2[i] = _SHMEM_SYNC_VALUE; } shmem_barrier_all(); /* Wait for all PEs to initialize pSync1 & pSync2 */ PE_root=1; /* we'll broadcast from this PE */ /* shmem_broadcast32 test */ max_elements = (size_t) (MAX_SIZE / sizeof(int)); max_elements_bytes = (size_t) (sizeof(int)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_broadcast32 max_elements = %d\n", max_elements); srce_int = shmem_malloc(max_elements_bytes); targ_int = shmem_malloc(max_elements_bytes); srce_float = shmem_malloc(max_elements_bytes); targ_float = shmem_malloc(max_elements_bytes); if((srce_int == NULL) || (targ_int == NULL) || (srce_float == NULL) || (targ_float == NULL)) shmalloc_error(); for(j = 0; j < max_elements; j++) { srce_int[j] = (int)(my_pe+j); srce_float[j] = (float)(my_pe+j); targ_int[j] = (int)(100*my_pe+j); targ_float[j] = (float)(100*my_pe+j); } shmem_barrier_all(); for(i = 0; i < IMAX; i+=2) { /* i is even -- using int */ if (my_pe == PE_root) for(j = 0; j < max_elements; j++) { srce_int[j] = (int)(my_pe+i+j); } /* broadcast from PE_root to all PEs using pSync1 */ shmem_broadcast32(targ_int,srce_int,max_elements,PE_root,0,0,n_pes,pSync1); for(j = 0; j < max_elements; j++) { if (my_pe == PE_root) { ans_int= (int)(100*my_pe+j); } else { ans_int= (int)(PE_root+i+j); } if ( targ_int[j] != ans_int ) fprintf(stderr, "FAIL: PE [%d] targ_int[%d]=%d ans_int=%d\n", my_pe,j,targ_int[j],ans_int); } /* i+1 is odd -- using float */ if (my_pe == PE_root) for(j = 0; j < max_elements; j++) { srce_float[j] = (float)(PE_root+i+1+j); } /* broadcast from PE_root to all PEs using pSync2 */ shmem_broadcast32(targ_float,srce_float,max_elements,PE_root,0,0,n_pes,pSync2); for(j = 0; j < max_elements; j++) { if (my_pe == PE_root) { ans_float= (float)(100*my_pe+j); } else { ans_float= (float)(PE_root+i+1+j); } if ( targ_float[j] != ans_float ) fprintf(stderr, "FAIL: PE [%d] targ_float[%d]=%10.0f ans_float=%10.0f\n", my_pe,j,targ_float[j],ans_float); } } shmem_free(srce_int); shmem_free(targ_int); shmem_free(srce_float); shmem_free(targ_float); /* shmem_broadcast64 test */ max_elements = (size_t) (MAX_SIZE / sizeof(long)); max_elements_bytes = (size_t) (sizeof(long)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_broadcast64 max_elements = %d\n", max_elements); srce_long = shmem_malloc(max_elements_bytes); targ_long = shmem_malloc(max_elements_bytes); srce_double = shmem_malloc(max_elements_bytes); targ_double = shmem_malloc(max_elements_bytes); if((srce_long == NULL) || (targ_long == NULL) || (srce_double == NULL) || (targ_double == NULL)) shmalloc_error(); for(j = 0; j < max_elements; j++) { srce_long[j] = (long)(my_pe+j); srce_double[j] = (double)(my_pe+j); targ_long[j] = (long)(100*my_pe+j); targ_double[j] = (double)(100*my_pe+j); } shmem_barrier_all(); for(i = 0; i < IMAX; i+=2) { /* i is even -- using long */ if (my_pe == PE_root) for(j = 0; j < max_elements; j++) { srce_long[j] = (long)(my_pe+i+j); } /* broadcast from PE_root to all PEs using pSync1 */ shmem_broadcast64(targ_long,srce_long,max_elements,PE_root,0,0,n_pes,pSync1); for(j = 0; j < max_elements; j++) { if (my_pe == PE_root) { ans_long= (long)(100*my_pe+j); } else { ans_long= (long)(PE_root+i+j); } if ( targ_long[j] != ans_long ) fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d ans_long=%d\n", my_pe,j,targ_long[j],ans_long); } /* i+1 is odd -- using double */ if (my_pe == PE_root) for(j = 0; j < max_elements; j++) { srce_double[j] = (double)(PE_root+i+1+j); } /* broadcast from PE_root to all PEs using pSync2 */ shmem_broadcast64(targ_double,srce_double,max_elements,PE_root,0,0,n_pes,pSync2); for(j = 0; j < max_elements; j++) { if (my_pe == PE_root) { ans_double= (double)(100*my_pe+j); } else { ans_double= (double)(PE_root+i+1+j); } if ( targ_double[j] != ans_double ) fprintf(stderr, "FAIL: PE [%d] targ_double[%d]=%10.0f ans_double=%10.0f\n", my_pe,j,targ_double[j],ans_double); } } shmem_free(srce_long); shmem_free(targ_long); shmem_free(srce_double); shmem_free(targ_double); #ifndef OPENSHMEM #ifdef SHMEM_C_GENERIC_32 /* shmemx_broadcast (GENERIC 32) test */ max_elements = (size_t) (MAX_SIZE / sizeof(int)); max_elements_bytes = (size_t) (sizeof(int)*max_elements); if(my_pe == 0) fprintf(stderr,"shmemx_broadcast (GENERIC 32) max_elements = %d\n", max_elements); srce_int = shmem_malloc(max_elements_bytes); targ_int = shmem_malloc(max_elements_bytes); if((srce_int == NULL) || (targ_int == NULL)) shmalloc_error(); for(j = 0; j < max_elements; j++) { srce_int[j] = (int)(my_pe+j); targ_int[j] = (int)(2*my_pe+j); } shmem_barrier_all(); /* broadcast from PE 1 to all PEs */ shmemx_broadcast(targ_int,srce_int,max_elements,1,0,0,n_pes,pSync1); for(j = 0; j < max_elements; j++) { if (my_pe == 1) { ans_int= (int)(j+2); } else { ans_int= (int)(j+1); } if ( targ_int[j] != ans_int ) fprintf(stderr, "FAIL: PE [%d] targ_int[%d]=%d ans_int=%d\n", my_pe,j,targ_int[j],ans_int); } shmem_free(srce_int); shmem_free(targ_int); #else /* shmemx_broadcast (GENERIC 64) test */ max_elements = (size_t) (MAX_SIZE / sizeof(long)); max_elements_bytes = (size_t) (sizeof(long)*max_elements); if(my_pe == 0) fprintf(stderr,"shmemx_broadcast (GENERIC 64) max_elements = %d\n", max_elements); srce_long = shmem_malloc(max_elements_bytes); targ_long = shmem_malloc(max_elements_bytes); if((srce_long == NULL) || (targ_long == NULL)) shmalloc_error(); for(j = 0; j < max_elements; j++) { srce_long[j] = (long)(my_pe+j); targ_long[j] = (long)(2*my_pe+j); } shmem_barrier_all(); /* broadcast from PE 1 to all PEs */ shmemx_broadcast(targ_long,srce_long,max_elements,1,0,0,n_pes,pSync1); for(j = 0; j < max_elements; j++) { if (my_pe == 1) { ans_long = (long)(j+2); } else { ans_long = (long)(j+1); } if ( targ_long[j] != ans_long ) fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d ans_long=%d\n", my_pe,j,targ_long[j],ans_long); } shmem_free(srce_long); shmem_free(targ_long); #endif #endif #ifdef NEEDS_FINALIZE shmem_finalize(); #endif return 0; }
int main() { int start,stride,rmlast,rstride,np_aset,inset,lpe; int my_pe,n_pes; int i,fail,n_err,asfail,nasfail; char Case[40]; static int sSource_int[NREDUCE]; static int sTarget_int[NREDUCE]; static int spWrk_int[PWRKELEM]; static long spSync[_SHMEM_REDUCE_SYNC_SIZE]; shmem_init(); my_pe = shmem_my_pe(); n_pes = shmem_n_pes(); lpe=my_pe; dpSync=shmem_malloc(_SHMEM_REDUCE_SYNC_SIZE*sizeof(long)); for(i=0;i<_SHMEM_REDUCE_SYNC_SIZE;i++) { gpSync[i]=_SHMEM_SYNC_VALUE; dpSync[i]=_SHMEM_SYNC_VALUE; spSync[i]=_SHMEM_SYNC_VALUE; } dSource_int=shmem_malloc(NREDUCE*sizeof(int)); dTarget_int=shmem_malloc(NREDUCE*sizeof(int)); dpWrk_int=shmem_malloc((NREDUCE/2+1 > _SHMEM_REDUCE_MIN_WRKDATA_SIZE ? NREDUCE/2+1 : _SHMEM_REDUCE_MIN_WRKDATA_SIZE)*sizeof(int)); for(start=0;start<=MAXSTART;start++) { rstride=1; for(stride=0;stride<=MAXSTRIDE;stride++) { for(rmlast=0;rmlast<=MAXRMLAST;rmlast++) { np_aset=(n_pes+rstride-1-start)/rstride-rmlast; /* number of processes in the active set */ if(np_aset > 0) /* if active set is not empty */ { if(my_pe==0) printf("\nActive set triplet: PE_start=%d,logPE_stride=%d,PE_size=%d \n",start,stride,np_aset); if((my_pe>=start) && ((my_pe-start)%rstride==0) && ((my_pe-start)/rstride<np_aset)) inset=1; else inset=0; /* Initialize Source and Target arrays */ for(i=0;i<NREDUCE;i++) { sSource_int[i]=SINIT; sTarget_int[i]=TINIT; gSource_int[i]=SINIT; gTarget_int[i]=TINIT; dSource_int[i]=SINIT; dTarget_int[i]=TINIT; } shmem_barrier_all(); /* CASE: static arrays, source is different from target */ sprintf(Case,"static, source!=target"); if(inset) asfail=xor_int(sSource_int,sTarget_int,start,stride,np_aset,rstride,0,dpWrk_int,gpSync,Case); else { /* check that values of source and target have not been changed */ nasfail+=check_sval_notchanged(sSource_int,Case); nasfail+=check_tval_notchanged(sTarget_int,Case); } /* CASE: global arrays, source is different from target */ sprintf(Case,"global, source!=target"); if(inset) asfail=xor_int(gSource_int,gTarget_int,start,stride,np_aset,rstride,0,spWrk_int,dpSync,Case); else { /* check that values of source and target have not been changed */ nasfail+=check_sval_notchanged(gSource_int,Case); nasfail+=check_tval_notchanged(gTarget_int,Case); } /* CASE: symmetric heap arrays, source is different from target */ sprintf(Case,"sym heap, source!=target"); if(inset) asfail=xor_int(dSource_int,dTarget_int,start,stride,np_aset,rstride,0,gpWrk_int,spSync,Case); else { /* check that values of source and target have not been changed */ nasfail+=check_sval_notchanged(dSource_int,Case); nasfail+=check_tval_notchanged(dTarget_int,Case); } /* Reinitialize Source arrays for new tests */ for(i=0;i<NREDUCE;i++) { sSource_int[i]=SINIT; gSource_int[i]=SINIT; dSource_int[i]=SINIT; } shmem_barrier_all(); /* CASE: static arrays, source and target are the same array */ sprintf(Case,"static, source==target"); if(inset) asfail=xor_int(sSource_int,sSource_int,start,stride,np_aset,rstride,1,gpWrk_int,dpSync,Case); else /* check that values of source have not been changed */ nasfail+=check_sval_notchanged(sSource_int,Case); /* CASE: global arrays, source and target are the same array */ sprintf(Case,"global, source==target"); if(inset) asfail=xor_int(gSource_int,gSource_int,start,stride,np_aset,rstride,1,dpWrk_int,spSync,Case); else /* check that values of source have not been changed */ nasfail+=check_sval_notchanged(gSource_int,Case); /* CASE: symmetric heap arrays, source and target are the same array */ sprintf(Case,"sym heap, source==target"); if(inset) asfail=xor_int(dSource_int,dSource_int,start,stride,np_aset,rstride,1,spWrk_int,gpSync,Case); else /* check that values of source have not been changed */ nasfail+=check_sval_notchanged(dSource_int,Case); } /* end of if active set is not empty */ } /* end of for loop on rmlast */ rstride*=2; } /* end of for loop on stride */ } /* end of for loop on start */ shmem_barrier_all(); #ifdef NEEDS_FINALIZE shmem_finalize(); #endif return(0); }
int main(int argc, char* argv[]) { DataType source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; static DataType target[10]; static DataType pong=666; DataType *t2=NULL; int me, num_pes, pe, Verbose=0; if (argc > 1 && (strcmp(argv[1],"-v") == 0)) { Verbose++; } shmem_init(); me = shmem_my_pe(); num_pes = shmem_n_pes(); if (num_pes == 1) { printf("%s: Requires number of PEs > 1\n", argv[0]); shmem_finalize(); return 0; } t2 = shmem_malloc(10*sizeof(DataType)); if (!t2) { if (me==0) printf("shmem_malloc() failed?\n"); shmem_global_exit(1); } t2[9] = target[9] = 0xFF; shmem_barrier_all(); if (me == 0) { memset(target, 0, sizeof(target)); for(pe=1; pe < num_pes; pe++) SHM_PUT(target, target, 10, pe); for(pe=1; pe < num_pes; pe++) /* put 10 elements into target on PE 1 */ SHM_PUT(target, source, 10, pe); SHM_WAITU( &pong, SHMEM_CMP_GT, 666 ); Vprintf("PE[%d] pong now "PF"\n",me,pong); for(pe=1; pe < num_pes; pe++) /* put 1 element into t2 on PE 1 */ SHM_PUTP(&t2[9], 0xDD, pe); } else { /* wait for 10th element write of 'target' */ SHM_WAITU( &target[9], SHMEM_CMP_NE, 0xFF ); Vprintf("PE[%d] target[9] was 255 now "PF", success.\n",me,target[9]); SHM_WAITU( &target[9], SHMEM_CMP_EQ, 10 ); Vprintf("PE[%d] expected target[9] == 10 now "PF"\n",me,target[9]); if (me == 1) { if (Verbose) { DataType tmp = SHM_GETP( &pong, 0); printf("PE[%d] @ PE[0] pong == "PF", setting to 999\n",me,tmp); } SHM_PUTP( &pong, 999, 0); } SHM_WAITU( &t2[9], SHMEM_CMP_NE, 0xFF ); } //shmem_barrier_all(); /* sync sender and receiver */ if (me != 0) { if (memcmp(source, target, sizeof(DataType) * 10) != 0) { int i; fprintf(stderr,"[%d] Src & Target mismatch?\n",me); for (i = 0 ; i < 10 ; ++i) { printf(PF","PF" ", source[i], target[i]); } printf("\n"); shmem_global_exit(1); } } shmem_free(t2); if (Verbose) fprintf(stderr,"[%d] exit\n",shmem_my_pe()); shmem_finalize(); return 0; }
void * shmalloc(size_t size) { return shmem_malloc(size); }
int main(int argc, char **argv) { const int ITER_CNT = 100; const long int MAX_MSG_SIZE = 1048576; int* source_addr; int peer; long int i=0, buff_size; int j=0; long long int start_time, stop_time, res; double time; shmem_init(); int pe_id = shmem_my_pe(); source_addr = (int*) shmem_malloc(MAX_MSG_SIZE); if(pe_id == 1) { if(shmem_n_pes()!=4) fprintf(stderr,"Num PEs should be ==4"); printf("#Message Cnt;Time(s);MR(msgs/sec)\n"); } if (pe_id==1) peer = 3; else if(pe_id==3) peer = 1; get_rtc_res_(&res); for (i = 0; i < SHMEM_BARRIER_SYNC_SIZE; i += 1){ pSync[i] = SHMEM_SYNC_VALUE; } /* Collective operation: Implicit barrier on return from attach */ shmem_barrier_all(); if(pe_id == 1 || pe_id == 3) { for(buff_size=1; buff_size<=MAX_MSG_SIZE; buff_size*=2) { isdone=0; shmem_barrier(1,1,2,pSync); get_rtc_(&start_time); for(j=1;j<=ITER_CNT;j++) { shmem_putmem(source_addr, source_addr, buff_size, peer); shmem_quiet(); shmem_int_put(&isdone, &j, 1, peer); shmem_quiet(); shmem_int_wait(&isdone,j-1); shmem_putmem(source_addr, source_addr, buff_size, peer); shmem_quiet(); } shmem_barrier(1,1,2,pSync); get_rtc_(&stop_time); time = (stop_time - start_time)*1.0/(double)res/ITER_CNT; if(pe_id == 1) { printf("%20ld;%20.12f;%20.12f\n", buff_size, time, (double)buff_size/time); } fflush(stdout); } } shmem_barrier_all(); shmem_finalize(); }
int main (int argc, char **argv) { int i; int nextpe; int me, npes; int success1, success2, success3, success4, success5, success6, success7, success8; short src1[N]; int src2[N]; long src3[N]; long double src4[N]; long long src5[N]; double src6[N]; float src7[N]; char *src8; short src9; int src10; long src11; double src12; float src13; short *dest1; int *dest2; long *dest3; long double *dest4; long long *dest5; double *dest6; float *dest7; char *dest8; short *dest9; int *dest10; long *dest11; double *dest12; float *dest13; shmem_init (); me = shmem_my_pe (); npes = shmem_n_pes (); if (npes > 1) { success1 = 0; success2 = 0; success3 = 0; success4 = 0; success5 = 0; success6 = 0; success7 = 0; success8 = 0; src8 = (char *) malloc (N * sizeof (char)); for (i = 0; i < N; i += 1) { src1[i] = (short) me; src2[i] = me; src3[i] = (long) me; src4[i] = (long double) me; src5[i] = (long long) me; src6[i] = (double) me; src7[i] = (float) me; src8[i] = (char) me; } src9 = (short) me; src10 = me; src11 = (long) me; src12 = (double) me; src13 = (float) me; dest1 = (short *) shmem_malloc (N * sizeof (*dest1)); dest2 = (int *) shmem_malloc (N * sizeof (*dest2)); dest3 = (long *) shmem_malloc (N * sizeof (*dest3)); dest4 = (long double *) shmem_malloc (N * sizeof (*dest4)); dest5 = (long long *) shmem_malloc (N * sizeof (*dest5)); dest6 = (double *) shmem_malloc (N * sizeof (*dest6)); dest7 = (float *) shmem_malloc (N * sizeof (*dest7)); dest8 = (char *) shmem_malloc (4 * sizeof (*dest8)); dest9 = (short *) shmem_malloc (sizeof (*dest9)); dest10 = (int *) shmem_malloc (sizeof (*dest10)); dest11 = (long *) shmem_malloc (sizeof (*dest11)); dest12 = (double *) shmem_malloc (sizeof (*dest12)); dest13 = (float *) shmem_malloc (sizeof (*dest13)); for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; dest5[i] = -9; dest6[i] = -9; dest7[i] = -9.0; dest8[i] = -9; } *dest9 = -9; *dest10 = -9; *dest11 = -9; *dest12 = -9; *dest13 = -9.0; nextpe = (me + 1) % npes; /* Testing shmem_short_put, shmem_int_put, shmem_long_put, shmem_longdouble_put, shmem_longlong_put, shmem_double_put, shmem_float_put, shmem_putmem */ shmem_barrier_all (); shmem_short_put (dest1, src1, N, nextpe); shmem_int_put (dest2, src2, N, nextpe); shmem_long_put (dest3, src3, N, nextpe); shmem_longdouble_put (dest4, src4, N, nextpe); shmem_longlong_put (dest5, src5, N, nextpe); shmem_double_put (dest6, src6, N, nextpe); shmem_float_put (dest7, src7, N, nextpe); shmem_putmem (dest8, src8, N * sizeof (char), nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest4[i] != (npes - 1)) { success4 = 1; } if (dest5[i] != (npes - 1)) { success5 = 1; } if (dest6[i] != (npes - 1)) { success6 = 1; } if (dest7[i] != (npes - 1)) { success7 = 1; } if (dest8[i] != (npes - 1)) { success8 = 1; } } if (success1 == 0) printf ("Test shmem_short_put: Passed\n"); else printf ("Test shmem_short_put: Failed\n"); if (success2 == 0) printf ("Test shmem_int_put: Passed\n"); else printf ("Test shmem_int_put: Failed\n"); if (success3 == 0) printf ("Test shmem_long_put: Passed\n"); else printf ("Test shmem_long_put: Failed\n"); if (success4 == 0) printf ("Test shmem_longdouble_put: Passed\n"); else printf ("Test shmem_longdouble_put: Failed\n"); if (success5 == 0) printf ("Test shmem_longlong_put: Passed\n"); else printf ("Test shmem_longlong_put: Failed\n"); if (success6 == 0) printf ("Test shmem_double_put: Passed\n"); else printf ("Test shmem_double_put: Failed\n"); if (success7 == 0) printf ("Test shmem_float_put: Passed\n"); else printf ("Test shmem_float_put: Failed\n"); if (success8 == 0) printf ("Test shmem_putmem: Passed\n"); else printf ("Test shmem_putmem: Failed\n"); } shmem_barrier_all (); /* Testing shmem_put32, shmem_put64, shmem_put128 */ if (sizeof (int) == 4) { for (i = 0; i < N; i += 1) { dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; } success2 = 0; success3 = 0; success4 = 0; shmem_barrier_all (); shmem_put32 (dest2, src2, N, nextpe); shmem_put64 (dest3, src3, N, nextpe); shmem_put128 (dest4, src4, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest4[i] != (npes - 1)) { success4 = 1; } } if (success2 == 0) printf ("Test shmem_put32: Passed\n"); else printf ("Test shmem_put32: Failed\n"); if (success3 == 0) printf ("Test shmem_put64: Passed\n"); else printf ("Test shmem_put64: Failed\n"); if (success4 == 0) printf ("Test shmem_put128: Passed\n"); else printf ("Test shmem_put128: Failed\n"); } } else if (sizeof (int) == 8) { for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; } success1 = 0; success2 = 0; success3 = 0; shmem_barrier_all (); shmem_put32 (dest1, src1, N, nextpe); shmem_put64 (dest2, src2, N, nextpe); shmem_put128 (dest3, src3, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } } if (success1 == 0) printf ("Test shmem_put32: Passed\n"); else printf ("Test shmem_put32: Failed\n"); if (success2 == 0) printf ("Test shmem_put64: Passed\n"); else printf ("Test shmem_put64: Failed\n"); if (success3 == 0) printf ("Test shmem_put128: Passed\n"); else printf ("Test shmem_put128: Failed\n"); } } /* Testing shmem_iput32, shmem_iput64, shmem_iput128 */ shmem_barrier_all (); if (sizeof (int) == 4) { for (i = 0; i < N; i += 1) { dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; } success2 = 0; success3 = 0; success4 = 0; shmem_barrier_all (); shmem_iput32 (dest2, src2, 1, 2, N, nextpe); shmem_iput64 (dest3, src3, 1, 2, N, nextpe); shmem_iput128 (dest4, src4, 1, 2, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N / 2; i += 1) { if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest4[i] != (npes - 1)) { success4 = 1; } } if (success2 == 0) printf ("Test shmem_iput32: Passed\n"); else printf ("Test shmem_iput32: Failed\n"); if (success3 == 0) printf ("Test shmem_iput64: Passed\n"); else printf ("Test shmem_iput64: Failed\n"); if (success4 == 0) printf ("Test shmem_iput128: Passed\n"); else printf ("Test shmem_iput128: Failed\n"); } } else if (sizeof (int) == 8) { for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; } success1 = 0; success2 = 0; success3 = 0; shmem_barrier_all (); shmem_iput32 (dest1, src1, 1, 2, N, nextpe); shmem_iput64 (dest2, src2, 1, 2, N, nextpe); shmem_iput128 (dest3, src3, 1, 2, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N / 2; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } } if (success1 == 0) printf ("Test shmem_iput32: Passed\n"); else printf ("Test shmem_iput32: Failed\n"); if (success2 == 0) printf ("Test shmem_iput64: Passed\n"); else printf ("Test shmem_iput64: Failed\n"); if (success3 == 0) printf ("Test shmem_iput128: Passed\n"); else printf ("Test shmem_iput128: Failed\n"); } } /* Testing shmem_short_iput, shmem_int_iput, shmem_long_iput, shmem_double_iput, shmem_float_iput */ for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; dest6[i] = -9; dest7[i] = -9; } success1 = 0; success2 = 0; success3 = 0; success6 = 0; success7 = 0; shmem_barrier_all (); shmem_short_iput (dest1, src1, 1, 2, N, nextpe); shmem_int_iput (dest2, src2, 1, 2, N, nextpe); shmem_long_iput (dest3, src3, 1, 2, N, nextpe); shmem_double_iput (dest6, src6, 1, 2, N, nextpe); shmem_float_iput (dest7, src7, 1, 2, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N / 2; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest6[i] != (npes - 1)) { success6 = 1; } if (dest7[i] != (npes - 1)) { success7 = 1; } } if (success1 == 0) printf ("Test shmem_short_iput: Passed\n"); else printf ("Test shmem_short_iput: Failed\n"); if (success2 == 0) printf ("Test shmem_int_iput: Passed\n"); else printf ("Test shmem_int_iput: Failed\n"); if (success3 == 0) printf ("Test shmem_long_iput: Passed\n"); else printf ("Test shmem_long_iput: Failed\n"); if (success6 == 0) printf ("Test shmem_double_iput: Passed\n"); else printf ("Test shmem_double_iput: Failed\n"); if (success7 == 0) printf ("Test shmem_float_iput: Passed\n"); else printf ("Test shmem_float_iput: Failed\n"); } /* Testing shmem_double_p, shmem_float_p, shmem_int_p, shmem_long_p, shmem_short_p */ shmem_barrier_all (); shmem_short_p (dest9, src9, nextpe); shmem_int_p (dest10, src10, nextpe); shmem_long_p (dest11, src11, nextpe); shmem_double_p (dest12, src12, nextpe); shmem_float_p (dest13, src13, nextpe); shmem_barrier_all (); if (me == 0) { if (*dest9 == (npes - 1)) printf ("Test shmem_short_p: Passed\n"); else printf ("Test shmem_short_p: Failed\n"); if (*dest10 == (npes - 1)) printf ("Test shmem_int_p: Passed\n"); else printf ("Test shmem_int_p: Failed\n"); if (*dest11 == (npes - 1)) printf ("Test shmem_long_p: Passed\n"); else printf ("Test shmem_long_p: Failed\n"); if (*dest12 == (npes - 1)) printf ("Test shmem_double_p: Passed\n"); else printf ("Test shmem_double_p: Failed\n"); if (*dest13 == (npes - 1)) printf ("Test shmem_float_p: Passed\n"); else printf ("Test shmem_float_p: Failed\n"); } shmem_barrier_all (); shmem_free (dest1); shmem_free (dest2); shmem_free (dest3); shmem_free (dest4); shmem_free (dest5); shmem_free (dest6); shmem_free (dest7); shmem_free (dest8); shmem_free (dest9); shmem_free (dest10); shmem_free (dest11); shmem_free (dest12); shmem_free (dest13); } else { printf ("Number of PEs must be > 1 to test shmem put, test skipped\n"); } shmem_finalize (); return 0; }
int main(int argc, char* argv[]) { int c, j, loops, k, l; int my_pe, nProcs, nWorkers; int nWords=1; int failures=0; char *prog_name; long *wp,work_sz; for(j=0; j < SHMEM_BARRIER_SYNC_SIZE; j++) { pSync0[j] = pSync1[j] = pSync2[j] = pSync3[j] = pSync4[j] = SHMEM_SYNC_VALUE; } shmem_init(); my_pe = shmem_my_pe(); nProcs = shmem_n_pes(); nWorkers = nProcs - 1; if (nProcs == 1) { Rfprintf(stderr, "ERR - Requires > 1 PEs\n"); shmem_finalize(); return 0; } for(j=0; j < nProcs; j++) if ( shmem_pe_accessible(j) != 1 ) { fprintf(stderr, "ERR - pe %d not accessible from pe %d\n", j, my_pe); } prog_name = strrchr(argv[0],'/'); if ( prog_name ) prog_name++; else prog_name = argv[0]; while((c=getopt(argc,argv,"hvM:s")) != -1) { switch(c) { case 's': Slow++; break; case 'v': Verbose++; break; case 'M': output_mod = atoi(optarg); if (output_mod <= 0) { Rfprintf(stderr, "ERR - output modulo arg out of " "bounds '%d'?\n", output_mod); shmem_finalize(); return 1; } Rfprintf(stderr,"%s: output modulo %d\n", prog_name,output_mod); break; case 'h': Rfprintf(stderr, "usage: %s {nWords-2-put(%d)K/M} {Loop-count(%d)K/M}\n", prog_name, DFLT_NWORDS, DFLT_LOOPS); shmem_finalize(); return 1; default: shmem_finalize(); return 1; } } if (optind == argc) nWords = DFLT_NWORDS; else { nWords = atoi_scaled(argv[optind++]); if (nWords <= 0) { Rfprintf(stderr, "ERR - Bad nWords arg '%d'?\n", nWords); shmem_finalize(); return 1; } } if (optind == argc) loops = DFLT_LOOPS; else { loops = atoi_scaled(argv[optind++]); if (loops <= 0 || loops > 1000000) { Rfprintf(stderr, "ERR - loops arg out of bounds '%d'?\n", loops); shmem_finalize(); return 1; } } work_sz = (nProcs*nWords) * sizeof(long); work = shmem_malloc( work_sz ); if ( !work ) { fprintf(stderr,"[%d] ERR - work = shmem_malloc(%ld) ?\n",my_pe,work_sz); shmem_global_exit(1); } Target = shmem_malloc( 2 * nWords * sizeof(long) ); if ( !Target ) { fprintf(stderr,"[%d] ERR - Target = shmem_malloc(%ld) ?\n", my_pe, (nWords * sizeof(long))); shmem_global_exit(1); } src = &Target[nWords]; #if _DEBUG Rprintf("%s: %d loops of %d longs per put\n",prog_name,loops,nWords); #endif for(j=0; j < nWords; j++) src[j] = VAL; for(j=0; j < loops; j++) { #if _DEBUG if ( Verbose && (j==0 || (j % output_mod) == 0) ) fprintf(stderr,"[%d] +(%d)\n", my_pe,j); #endif shmem_barrier(0, 0, nProcs, pSync0); if ( my_pe == 0 ) { int p; for(p=1; p < nProcs; p++) shmem_long_put(Target, src, nWords, p); } else { if (Slow) { /* wait for each put to complete */ for(k=0; k < nWords; k++) shmem_wait(&Target[k],my_pe); } else { /* wait for last word to be written */ shmem_wait(&Target[nWords-1],my_pe); } } #if _DEBUG if ( Verbose && (j==0 || (j % output_mod) == 0) ) fprintf(stderr,"[%d] -(%d)\n", shmem_my_pe(),j); #endif shmem_barrier(0, 0, nProcs, pSync1); RDprintf("Workers[1 ... %d] verify Target data put by proc0\n", nWorkers); /* workers verify put data is expected */ if ( my_pe != 0 ) { for(k=0; k < nWords; k++) { if (Target[k] != VAL) { fprintf(stderr, "[%d] Target[%d] %#lx " "!= %#x?\n", my_pe,k,Target[k],VAL); failures++; } assert(Target[k] == VAL); Target[k] = my_pe; } } else /* clear results buffer, workers will put here */ memset(work, 0, work_sz); shmem_barrier(0, 0, nProcs, pSync2); RDprintf("Workers[1 ... %d] put Target data to PE0 work " "vector\n",nWorkers); if ( my_pe != 0 ) { /* push nWords of val my_pe back to PE zero */ shmem_long_put(&work[my_pe * nWords], Target, nWords, 0); } else { /* wait for procs 1 ... nProcs to complete put()s */ for(l=1; l < nProcs; l++) { wp = &work[ l*nWords ]; // procs nWords chunk #if 1 /* wait for last long to be written from each PE */ shmem_wait(&wp[nWords-1],0); #else for(k=0; k < nWords; k++) shmem_wait(&wp[k],0); #endif } } shmem_barrier(0, 0, nProcs, pSync3); if ( my_pe == 0 ) { RDprintf("Loop(%d) PE0 verifing work data.\n",j); for(l=1; l < nProcs; l++) { wp = &work[ l*nWords ]; // procs nWords chunk for(k=0; k < nWords; k++) { if (wp[k] != l) { fprintf(stderr, "[0] PE(%d)_work[%d] %ld " "!= %d?\n", l,k,work[k],l); failures++; } assert(wp[k] == l); break; } if (failures) break; } } shmem_barrier(0, 0, nProcs, pSync4); #if _DEBUG if (loops > 1) { Rfprintf(stderr,"."); RDprintf("Loop(%d) Pass.\n",j); } #endif } shmem_free( work ); shmem_free( Target ); #if _DEBUG Rfprintf(stderr,"\n");fflush(stderr); shmem_barrier_all(); RDprintf("%d(%d) Exit(%d)\n", my_pe, nProcs, failures); #endif shmem_finalize(); return failures; }
int main(int argc, char *argv[]) { int i; shmem_init(); rank = shmem_my_pe(); world_size = shmem_n_pes(); /* root handles arguments and bcasts answers */ if (0 == rank) { int ch; while (start_err != 1 && (ch = getopt(argc, argv, "p:i:m:s:c:n:oh")) != -1) { switch (ch) { case 'p': npeers = atoi(optarg); break; case 'i': niters = atoi(optarg); break; case 'm': nmsgs = atoi(optarg); break; case 's': nbytes = atoi(optarg); break; case 'c': cache_size = atoi(optarg) / sizeof(int); break; case 'n': ppn = atoi(optarg); break; case 'o': machine_output = 1; break; case 'h': case '?': default: start_err = 1; usage(); } } /* sanity check */ if (start_err != 1) { #if 0 if (world_size < 3) { fprintf(stderr, "Error: At least three processes are required\n"); start_err = 1; } else #endif if (world_size <= npeers) { fprintf(stderr, "Error: job size (%d) <= number of peers (%d)\n", world_size, npeers); start_err = 77; } else if (ppn < 1) { fprintf(stderr, "Error: must specify process per node (-n #)\n"); start_err = 77; } else if (world_size / ppn <= npeers) { fprintf(stderr, "Error: node count <= number of peers\n"); start_err = 77; } } } for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i++) bcast_pSync[i] = SHMEM_SYNC_VALUE; for (i = 0; i < SHMEM_BARRIER_SYNC_SIZE; i++) barrier_pSync[i] = SHMEM_SYNC_VALUE; for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) reduce_pSync[i] = SHMEM_SYNC_VALUE; for (i = 0; i < SHMEM_REDUCE_MIN_WRKDATA_SIZE; i++) reduce_pWrk[i] = SHMEM_SYNC_VALUE; shmem_barrier_all(); /* broadcast results */ printf("%d: psync: 0x%lu\n", rank, (unsigned long) bcast_pSync); shmem_broadcast32(&start_err, &start_err, 1, 0, 0, 0, world_size, bcast_pSync); if (0 != start_err) { exit(start_err); } shmem_barrier_all(); shmem_broadcast32(&npeers, &npeers, 1, 0, 0, 0, world_size, bcast_pSync); shmem_barrier_all(); shmem_broadcast32(&niters, &niters, 1, 0, 0, 0, world_size, bcast_pSync); shmem_barrier_all(); shmem_broadcast32(&nmsgs, &nmsgs, 1, 0, 0, 0, world_size, bcast_pSync); shmem_barrier_all(); shmem_broadcast32(&nbytes, &nbytes, 1, 0, 0, 0, world_size, bcast_pSync); shmem_barrier_all(); shmem_broadcast32(&cache_size, &cache_size, 1, 0, 0, 0, world_size, bcast_pSync); shmem_barrier_all(); shmem_broadcast32(&ppn, &ppn, 1, 0, 0, 0, world_size, bcast_pSync); shmem_barrier_all(); if (0 == rank) { if (!machine_output) { printf("job size: %d\n", world_size); printf("npeers: %d\n", npeers); printf("niters: %d\n", niters); printf("nmsgs: %d\n", nmsgs); printf("nbytes: %d\n", nbytes); printf("cache size: %d\n", cache_size * (int)sizeof(int)); printf("ppn: %d\n", ppn); } else { printf("%d %d %d %d %d %d %d ", world_size, npeers, niters, nmsgs, nbytes, cache_size * (int)sizeof(int), ppn); } } /* allocate buffers */ send_peers = malloc(sizeof(int) * npeers); if (NULL == send_peers) abort_app("malloc"); recv_peers = malloc(sizeof(int) * npeers); if (NULL == recv_peers) abort_app("malloc"); cache_buf = malloc(sizeof(int) * cache_size); if (NULL == cache_buf) abort_app("malloc"); send_buf = malloc(npeers * nmsgs * nbytes); if (NULL == send_buf) abort_app("malloc"); memset(send_buf, 1, npeers * nmsgs * nbytes); recv_buf = shmem_malloc(npeers * nmsgs * nbytes); if (NULL == recv_buf) abort_app("malloc"); memset(recv_buf, 0, npeers * nmsgs * nbytes); /* calculate peers */ for (i = 0 ; i < npeers ; ++i) { if (i < npeers / 2) { send_peers[i] = (rank + world_size + ((i - npeers / 2) * ppn)) % world_size; } else { send_peers[i] = (rank + world_size + ((i - npeers / 2 + 1) * ppn)) % world_size; } } if (npeers % 2 == 0) { /* even */ for (i = 0 ; i < npeers ; ++i) { if (i < npeers / 2) { recv_peers[i] = (rank + world_size + ((i - npeers / 2) *ppn)) % world_size; } else { recv_peers[i] = (rank + world_size + ((i - npeers / 2 + 1) * ppn)) % world_size; } } } else { /* odd */ for (i = 0 ; i < npeers ; ++i) { if (i < npeers / 2 + 1) { recv_peers[i] = (rank + world_size + ((i - npeers / 2 - 1) * ppn)) % world_size; } else { recv_peers[i] = (rank + world_size + ((i - npeers / 2) * ppn)) % world_size; } } } /* BWB: FIX ME: trash the free lists / malloc here */ /* sync, although tests will do this on their own (in theory) */ shmem_barrier_all(); /* run tests */ test_one_way(); test_same_direction(); test_prepost(); test_allstart(); if (rank == 0 && machine_output) printf("\n"); /* done */ shmem_finalize(); return 0; }
int main (void) { int i; int *target; int *source; int me, npes; struct timeval start, end; long time_taken, start_time, end_time; shmem_init (); me = shmem_my_pe (); npes = shmem_n_pes (); source = (int *) shmem_malloc (N_ELEMENTS * sizeof (*source)); time_taken = 0; for (i = 0; i < N_ELEMENTS; i += 1) { source[i] = (i + 1) * 10 + me; } target = (int *) shmem_malloc (N_ELEMENTS * sizeof (*target) * npes); for (i = 0; i < N_ELEMENTS * npes; i += 1) { target[i] = -90; } for (i = 0; i < _SHMEM_COLLECT_SYNC_SIZE; i += 1) { pSyncA[i] = _SHMEM_SYNC_VALUE; pSyncB[i] = _SHMEM_SYNC_VALUE; } shmem_barrier_all (); for (i = 0; i < 10000; i++) { gettimeofday (&start, NULL); start_time = (start.tv_sec * 1000000.0) + start.tv_usec; /* alternate between 2 pSync arrays to synchronize consequent collectives of even and odd iterations */ if (i % 2) { shmem_fcollect32 (target, source, N_ELEMENTS, 0, 0, npes, pSyncA); } else { shmem_fcollect32 (target, source, N_ELEMENTS, 0, 0, npes, pSyncB); } gettimeofday (&end, NULL); end_time = (end.tv_sec * 1000000.0) + end.tv_usec; if (me == 0) { time_taken = time_taken + (end_time - start_time); } } if (me == 0) { printf ("Time required to collect %d bytes of data, with %d PEs is %ld microseconds\n", (4 * N_ELEMENTS * npes), npes, time_taken / 10000); } shmem_barrier_all (); shmem_free (target); shmem_free (source); shmem_finalize (); return 0; }
int main(int argc, char **argv) { int j; int my_pe,n_pes; int *flag,*one; size_t max_elements,max_elements_bytes; char *srce_char,*targ_char; short *srce_short,*targ_short; int *srce_int,*targ_int; long *srce_long,*targ_long; shmem_init(); my_pe = shmem_my_pe(); n_pes = shmem_n_pes(); flag = shmem_malloc((size_t) sizeof(int)); one = shmem_malloc((size_t) sizeof(int)); *one = 1; /* fail if trying to use odd number of processors */ if ( (n_pes % 2) != 0 ){ fprintf(stderr, "FAIL - test requires even number of PEs\n"); exit(1); } if(my_pe == 0) fprintf(stderr, "shmem_num_put_nb(%s)\n", argv[0]); /* shmem_putmem_nb test */ *flag = 0; max_elements = (size_t) (MAX_SIZE / sizeof(char)); max_elements_bytes = (size_t) (sizeof(char)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_putmem_nb max_elements = %d\n",max_elements); srce_char = shmem_malloc(max_elements_bytes); targ_char = shmem_malloc(max_elements_bytes); if((srce_char == NULL) || (targ_char == NULL)) shmalloc_error(); if ( (my_pe % 2) == 0 ) for(j = 0; j < max_elements; j++) srce_char[j] = (char)(my_pe+j); else for(j = 0; j < max_elements; j++) targ_char[j] = (char)(my_pe+j); shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { shmem_putmem_nb(targ_char,srce_char,max_elements,my_pe+1,NULL); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < max_elements; j++) if ( targ_char[j] != (char)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] targ_char[%d]=%d my_pe+j-1=%d\n", my_pe,j,targ_char[j],my_pe+j-1); } shmem_free(srce_char); shmem_free(targ_char); /* shmem_put16_nb test */ *flag = 0; max_elements = (size_t) (MAX_SIZE / sizeof(short)); if(max_elements > 20000) max_elements=20000; max_elements_bytes = (size_t) (sizeof(short)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_put16_nb max_elements = %d\n",max_elements); srce_short = shmem_malloc(max_elements_bytes); targ_short = shmem_malloc(max_elements_bytes); if((srce_short == NULL) || (targ_short == NULL)) shmalloc_error(); if ( (my_pe % 2) == 0 ) for(j = 0; j < max_elements; j++) srce_short[j] = (short)(my_pe+j); else for(j = 0; j < max_elements; j++) targ_short[j] = (short)(my_pe+j); shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { shmem_put16_nb(targ_short,srce_short,max_elements,my_pe+1,NULL); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < max_elements; j++) if ( targ_short[j] != (short)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] targ_short[%d]=%d my_pe+j-1=%d\n", my_pe,j,targ_short[j],my_pe+j-1); } shmem_free(srce_short); shmem_free(targ_short); /* shmem_put32_nb test */ *flag = 0; max_elements = (size_t) (MAX_SIZE / sizeof(int)); max_elements_bytes = (size_t) (sizeof(int)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_put32_nb max_elements = %d\n",max_elements); srce_int = shmem_malloc(max_elements_bytes); targ_int = shmem_malloc(max_elements_bytes); if((srce_int == NULL) || (targ_int == NULL)) shmalloc_error(); if ( (my_pe % 2) == 0 ) for(j = 0; j < max_elements; j++) srce_int[j] = (int)(my_pe+j); else for(j = 0; j < max_elements; j++) targ_int[j] = (int)(my_pe+j); shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { shmem_put32_nb(targ_int,srce_int,max_elements,my_pe+1,NULL); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < max_elements; j++) if ( targ_int[j] != (int)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] targ_int[%d]=%d my_pe+j-1=%d\n", my_pe,j,targ_int[j],my_pe+j-1); } shmem_free(srce_int); shmem_free(targ_int); /* shmem_put64_nb test */ *flag = 0; max_elements = (size_t) (MAX_SIZE / sizeof(long)); max_elements_bytes = (size_t) (sizeof(long)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_put64_nb max_elements = %d\n",max_elements); srce_long = shmem_malloc(max_elements_bytes); targ_long = shmem_malloc(max_elements_bytes); if((srce_long == NULL) || (targ_long == NULL)) shmalloc_error(); if ( (my_pe % 2) == 0 ) for(j = 0; j < max_elements; j++) srce_long[j] = (long)(my_pe+j); else for(j = 0; j < max_elements; j++) targ_long[j] = (long)(my_pe+j); shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { shmem_put64_nb(targ_long,srce_long,max_elements,my_pe+1,NULL); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < max_elements; j++) if ( targ_long[j] != (long)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d my_pe+j-1=%d\n", my_pe,j,targ_long[j],my_pe+j-1); } shmem_free(srce_long); shmem_free(targ_long); /* shmem_put128_nb test */ *flag = 0; max_elements = (size_t) (MAX_SIZE / sizeof(long)); if ( (max_elements % 2) != 0) max_elements = max_elements-1; max_elements_bytes = (size_t) (sizeof(long)*max_elements); max_elements = max_elements/2; if(my_pe == 0) fprintf(stderr,"shmem_put128_nb max_elements = %d\n",max_elements); srce_long = shmem_malloc(max_elements_bytes); targ_long = shmem_malloc(max_elements_bytes); if((srce_long == NULL) || (targ_long == NULL)) shmalloc_error(); if ( (my_pe % 2) == 0 ) for(j = 0; j < 2*max_elements; j++) srce_long[j] = (long)(my_pe+j); else for(j = 0; j < 2*max_elements; j++) targ_long[j] = (long)(my_pe+j); shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { shmem_put128_nb(targ_long,srce_long,max_elements,my_pe+1,NULL); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < 2*max_elements; j++) if ( targ_long[j] != (long)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d my_pe+j-1=%d\n", my_pe,j,targ_long[j],my_pe+j-1); } shmem_free(srce_long); shmem_free(targ_long); #ifdef SHMEM_C_GENERIC_32 /* shmem_put_nb (GENERIC 32) test */ *flag = 0; max_elements = (size_t) (MAX_SIZE / sizeof(int)); max_elements_bytes = (size_t) (sizeof(int)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_put_nb (GENERIC 32) max_elements = %d\n",max_elements); srce_int = shmem_malloc(max_elements_bytes); targ_int = shmem_malloc(max_elements_bytes); if((srce_int == NULL) || (targ_int == NULL)) shmalloc_error(); if ( (my_pe % 2) == 0 ) for(j = 0; j < max_elements; j++) srce_int[j] = (int)(my_pe+j); else for(j = 0; j < max_elements; j++) targ_int[j] = (int)(my_pe+j); shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { shmem_put_nb(targ_int,srce_int,max_elements,my_pe+1,NULL); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < max_elements; j++) if ( targ_int[j] != (int)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] targ_int[%d]=%d my_pe+j-1=%d\n", my_pe,j,targ_int[j],my_pe+j-1); } shmem_free(srce_int); shmem_free(targ_int); #else /* shmem_put_nb (GENERIC 64) test */ *flag = 0; max_elements = (size_t) (MAX_SIZE / sizeof(long)); max_elements_bytes = (size_t) (sizeof(long)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_put_nb (GENERIC 64) max_elements = %d\n",max_elements); srce_long = shmem_malloc(max_elements_bytes); targ_long = shmem_malloc(max_elements_bytes); if((srce_long == NULL) || (targ_long == NULL)) shmalloc_error(); if ( (my_pe % 2) == 0 ) for(j = 0; j < max_elements; j++) srce_long[j] = (long)(my_pe+j); else for(j = 0; j < max_elements; j++) targ_long[j] = (long)(my_pe+j); shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { shmem_put_nb(targ_long,srce_long,max_elements,my_pe+1,NULL); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < max_elements; j++) if ( targ_long[j] != (long)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d my_pe+j-1=%d\n", my_pe,j,targ_long[j],my_pe+j-1); } shmem_free(srce_long); shmem_free(targ_long); #endif #ifdef NEEDS_FINALIZE shmem_finalize(); #endif return 0; }