void neighbor_get(int *target, int *src, int elements, int me, int npes, int loops) { int i, neighbor_pe; double start_time, elapsed_time; long total_bytes = loops * elements * sizeof(*src); if (me==0 && Verbose) { fprintf(stdout, "%s: %d loops of get(%ld bytes) from neighbor, %d PEs: ", __FUNCTION__, loops, (elements*sizeof(*src)), npes); fflush(stdout); } shmem_barrier_all(); neighbor_pe = (me + 1) % npes; start_time = shmemx_wtime(); for(i = 0; i < loops; i++) shmem_int_get(target, src, elements, neighbor_pe); elapsed_time = shmemx_wtime() - start_time; if (me==0 && Verbose) { printf("%7.3f secs\n", elapsed_time); printf(" %7.5f usecs / get(), %ld Kbytes @ %7.4f MB/sec\n\n", (elapsed_time/((double)loops*npes))*1000000.0, (total_bytes/1024), ((double)total_bytes/(1024.0*1024.0)) / elapsed_time ); } shmem_barrier_all(); }
int main(){ int me, npes; start_pes(0); me = shmem_my_pe(); npes = shmem_n_pes(); for (i = 0; i < _SHMEM_REDUCE_SYNC_SIZE; i += 1) { pSync[i] = _SHMEM_SYNC_VALUE; } x = 42; y = 0; if(me==0){ shmem_barrier_all(); temp = x+y; } else { shmem_barrier_all(); if(me==1){ old = shmem_int_finc (&y, 0); shmem_int_sum_to_all(&y,&x,1,1,0,npes-1,pWrk,pSync); x= x+10; shmem_int_get(&y,&y,1,0); } else{ shmem_int_sum_to_all(&y,&x, 1,1,0,npes-1,pWrk,pSync); x=y*0.23; } } shmem_barrier_all(); if (me == 0) { printf("value in temp is %d (should be 42)\n", temp); } return 0; }
void many2one_get(int *target, int *src, int elements, int me, int npes, int loops) { int i, pe; double start_time, elapsed_time; long total_bytes = loops * elements * sizeof(*src) * (npes - 1); if (me == 0) { fprintf(stdout,"%s: %d loops of get(%ld bytes) from %d PEs: ", __FUNCTION__, loops, (elements*sizeof(*src)), npes-1); fflush(stdout); } shmem_barrier_all(); if (me == 0) { start_time = shmemx_wtime(); for(i = 0; i < loops; i++) { for(pe = 1; pe < npes; pe++) shmem_int_get(target, src, elements, pe); } elapsed_time = shmemx_wtime() - start_time; if (Verbose) { printf("%7.3f secs\n", elapsed_time); printf(" %7.5f usecs / get(), %ld Kbytes @ %7.4f MB/sec\n\n", (elapsed_time/((double)loops*(npes-1)))*1000000.0, (total_bytes/1024), ((double)total_bytes/(1024.0*1024.0)) / elapsed_time ); } } shmem_barrier_all(); }
/**************************************************************************** * Place for Test Item functions ***************************************************************************/ static int test_item1(void) { int rc = TC_PASS; int myPe = shmem_my_pe(); int myPeer = myPe + ( myPe % 2 ? -1 : 1 ) ; int nPe = shmem_n_pes(); int remainderPe = nPe - (nPe % 2); static int statArray[ARRAY_SIZE]; int* dynamicArray = shmalloc( ARRAY_SIZE * sizeof(int) ); int iterate; for (iterate = 0; iterate < ARRAY_SIZE; iterate++) { if (myPe != remainderPe) { int tryIterate; int putNum, getNum; for (tryIterate = 0; tryIterate < TRY_SIZE; tryIterate++) { putNum = iterate + myPe; shmem_int_put(&statArray[iterate], &putNum, 1, myPeer); shmem_int_put(&dynamicArray[iterate], &putNum, 1, myPeer); } shmem_fence(); shmem_int_get(&getNum, &statArray[iterate], 1, myPeer); if (getNum != putNum) { rc = TC_FAIL; } shmem_int_get(&getNum, &dynamicArray[iterate], 1, myPeer); if (getNum != putNum) { rc = TC_FAIL; } } shmem_barrier_all(); } shfree(dynamicArray); return rc; }
void update_ghostcells ( int **buffer, int height, int width, int fhlimit,int localheight, int heightoffset ) { int i, j, k; shmem_barrier_all() ; if ( size < 2 ) { /* Nothing to be done */ return; } if ( rank != 0 ) { if ( shmem_addr_accessible(&buffer[0][0], rank-1)) { if ( (rank - 1 ) == 0 ) { for ( i = 0; i < 2; i++ ) shmem_int_get ( &(buffer[i][0]), &(buffer[i+(localheight-2)][0]), width, rank-1); } else { for ( i = 0; i < 2; i++ ) shmem_int_get ( &(buffer[i][0]), &(buffer[i+(localheight)][0]), width, rank-1); } } else { printf("Not_Accessible_Error_01"); } if ( shmem_addr_accessible(&buffer[0][0], rank-1)) { if ( (rank - 1 ) == 0 ) { for ( i = 0; i < 2; i++ ) shmem_int_put ( &(buffer[i+(localheight)][0]), &(buffer[i+heightoffset][0]), width, rank-1); } else { for ( i = 0; i < 2; i++ ) shmem_int_put ( &(buffer[i+(localheight)+2][0]), &(buffer[i+heightoffset][0]), width, rank-1); } } else { printf("Not_Accessible_Error_02"); } } shmem_barrier_all (); return; }
int main (void) { static int aaa, bbb; int num_pes, my_pe, peer; start_pes(0); num_pes = _num_pes(); my_pe = _my_pe(); peer = (my_pe + 1) % num_pes; printf("Process %d gets message from %d (%d processes in ring)\n", my_pe, peer, num_pes); shmem_int_get(&aaa, &bbb, 1, peer); shmem_barrier_all(); printf("Process %d exiting\n", my_pe); return 0; }
int recurse(int i, int ilog, int pes, int j, int nelems) { int next_i,next_ilog,next_pes,k; if(ilog <= npes) { if(me % ilog ==0) { if(me == 4) printf("\n"); // printf("\nDEBUG%d --> %d", me+i, me); shmem_int_get(&A[nelems], A, nelems, me+i); /* printf("\nDEBUG(%d)imported list: ",me); for( k = 0; k < nelems; k++){ printf("%d ,",Aux[k]); } printf("\n"); for(k=0; k < nelems; k++){ A[nelems+k] = Aux[k]; } */ merge (0,nelems-1, nelems*2-1); nelems = nelems *2; /* printf("\nDEBUG(%d)new A: ",me); for( k = 0; k < nelems; k++){ printf("A[%d]= %d \n",k,A[k]); } printf("\n");*/ next_pes = pes/2; next_i = 2*i; next_ilog = 2*ilog; shmem_barrier(0,j , next_pes, pSync); j = j+1; recurse(next_i, next_ilog, next_pes, j, nelems); } } }
int main(int argc, char* argv[]) { int me, neighbor; int ret = 0; shmem_init(); bbb = me = shmem_my_pe(); neighbor = (me + 1) % shmem_n_pes(); shmem_barrier_all(); shmem_int_get( &aaa, &bbb, 1, neighbor ); shmem_barrier_all(); if (aaa != neighbor ) ret = 1; shmem_finalize(); return ret; }
int main(int argc, char **argv) { int i,j; int nextpe; int me, npes; int success1,success2,success3, success4, success5, success6, success7, success8; short dest1[N]; int dest2[N]; long dest3[N]; long double dest4[N]; long long dest5[N]; double dest6[N]; float dest7[N]; char *dest8; short dest9; int dest10; long dest11; double dest12; float dest13; short *src1; int *src2; long *src3; long double *src4; long long *src5; double *src6; float *src7; char *src8; short *src9; int *src10; long *src11; double *src12; float *src13; start_pes(0); me = _my_pe(); npes = _num_pes(); if(npes>1){ success1 =0; success2 =0; success3 =0; success4 =0; success5 =0; success6 =0; success7 =0; success8 =0; dest8 = (char *)malloc(N*sizeof(char)); for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; dest5[i] = -9; dest6[i] = -9; dest7[i] = -9.0; dest8[i] = -9; } dest9 = -9; dest10 = -9; dest11 = -9; dest12 = -9; dest13 = -9; src1 = (short *)shmalloc( N * sizeof(*src1) ); src2 = (int *)shmalloc( N * sizeof(*src2) ); src3 = (long *)shmalloc( N * sizeof(*src3) ); src4 = (long double *)shmalloc( N * sizeof(*src4) ); src5 = (long long*)shmalloc( N * sizeof(*src5) ); src6 = (double *)shmalloc( N * sizeof(*src6) ); src7 = (float *)shmalloc( N * sizeof(*src7) ); src8 = (char *)shmalloc( 4 * sizeof(*src8) ); src9 = (short *)shmalloc( sizeof(*src9) ); src10 = (int *)shmalloc( sizeof(*src10) ); src11 = (long *)shmalloc( sizeof(*src11) ); src12 = (double *)shmalloc( sizeof(*src12) ); src13 = (float *)shmalloc( sizeof(*src13) ); for (i = 0; i < N; i += 1) { src1[i] = (short)me; src2[i] = me; src3[i] = (long)me; src4[i] = (long double)me; src5[i] = (long long)me; src6[i] = (double)me; src7[i] = (float)me; src8[i] = (char)me; } *src9 = (short)me; *src10 = me; *src11 = (long)me; *src12 = (double)me; *src13 = (float)me; nextpe = (me + 1) % npes; /*Testing shmem_short_get, shmem_short_get, shmem_int_get, shmem_long_get, shmem_longdouble_get, shmem_longlong_get, shmem_double_get, shmem_float_get, shmem_getmem*/ shmem_barrier_all(); shmem_short_get(dest1, src1, N, nextpe); shmem_int_get(dest2, src2, N, nextpe); shmem_long_get(dest3, src3, N, nextpe); shmem_longdouble_get(dest4, src4, N, nextpe); shmem_longlong_get(dest5, src5, N, nextpe); shmem_double_get(dest6, src6, N, nextpe); shmem_float_get(dest7, src7, N, nextpe); shmem_getmem(dest8, src8, N*sizeof(char), nextpe); shmem_barrier_all(); if(me == 0){ for (i = 0; i < N; i += 1) { if(dest1[i] != ( 1)){ success1=1; } if(dest2[i] != ( 1)){ success2=1; } if(dest3[i] != ( 1)){ success3=1; } if(dest4[i] != ( 1)){ success4=1; } if(dest5[i] != ( 1)){ success5=1; } if(dest6[i] != ( 1)){ success6=1; } if(dest7[i] != ( 1)){ success7=1; } if(dest8[i] != ( 1)){ success8=1; } } if(success1==0) printf("Test shmem_short_get: Passed\n"); else printf("Test shmem_short_get: Failed\n"); if(success2==0) printf("Test shmem_int_get: Passed\n"); else printf("Test shmem_int_get: Failed\n"); if(success3==0) printf("Test shmem_long_get: Passed\n"); else printf("Test shmem_long_get: Failed\n"); if(success4==0) printf("Test shmem_longdouble_get: Passed\n"); else printf("Test shmem_longdouble_get: Failed\n"); if(success5==0) printf("Test shmem_longlong_get: Passed\n"); else printf("Test shmem_longlong_get: Failed\n"); if(success6==0) printf("Test shmem_double_get: Passed\n"); else printf("Test shmem_double_get: Failed\n"); if(success7==0) printf("Test shmem_float_get: Passed\n"); else printf("Test shmem_float_get: Failed\n"); if(success8==0) printf("Test shmem_getmem: Passed\n"); else printf("Test shmem_getmem: Failed\n"); } shmem_barrier_all(); /*Testing shmem_get32, shmem_get64, shmem_get128 */ if(sizeof(int)==4){ for (i = 0; i < N; i += 1) { dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; } success2 = 0; success3 = 0; success4 = 0; shmem_barrier_all(); shmem_get32(dest2, src2, N, nextpe); shmem_get64(dest3, src3, N, nextpe); shmem_get128(dest4, src4, N, nextpe); shmem_barrier_all(); if(me == 0){ for (i = 0; i < N; i += 1) { if(dest2[i] != ( 1)){ success2=1; } if(dest3[i] != ( 1)){ success3=1; } if(dest4[i] != ( 1)){ success4=1; } } if(success2==0) printf("Test shmem_get32: Passed\n"); else printf("Test shmem_get32: Failed\n"); if(success3==0) printf("Test shmem_get64: Passed\n"); else printf("Test shmem_get64: Failed\n"); if(success4==0) printf("Test shmem_get128: Passed\n"); else printf("Test shmem_get128: Failed\n"); } } else if(sizeof(int)==8){ for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; } success1 = 0; success2 = 0; success3 = 0; shmem_barrier_all(); shmem_get32(dest1, src1, N, nextpe); shmem_get64(dest2, src2, N, nextpe); shmem_get128(dest3, src3, N, nextpe); shmem_barrier_all(); if(me == 0){ for (i = 0; i < N; i += 1) { if(dest1[i] != ( 1)){ success1=1; } if(dest2[i] != ( 1)){ success2=1; } if(dest3[i] != ( 1)){ success3=1; } } if(success1==0) printf("Test shmem_get32: Passed\n"); else printf("Test shmem_get32: Failed\n"); if(success2==0) printf("Test shmem_get64: Passed\n"); else printf("Test shmem_get64: Failed\n"); if(success3==0) printf("Test shmem_get128: Passed\n"); else printf("Test shmem_get128: Failed\n"); } } /* Testing shmem_iget32, shmem_iget64, shmem_iget128 */ shmem_barrier_all(); if(sizeof(int)==4){ for (i = 0; i < N; i += 1) { dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; } success2 = 0; success3 = 0; success4 = 0; shmem_barrier_all(); shmem_iget32(dest2, src2, 1, 2, N/2, npes-1); shmem_iget64(dest3, src3, 1, 2, N/2, npes-1); shmem_iget128(dest4, src4, 1, 2, N/2, npes-1); shmem_barrier_all(); if(me == 0){ for (i = 0; i < N/2; i += 1) { if(dest2[i] != (npes-1)){ success2=1; } if(dest3[i] != (npes-1)){ success3=1; } if(dest4[i] != (npes-1)){ success4=1; } } if(success2==0) printf("Test shmem_iget32: Passed\n"); else printf("Test shmem_iget32: Failed\n"); if(success3==0) printf("Test shmem_iget64: Passed\n"); else printf("Test shmem_iget64: Failed\n"); if(success4==0) printf("Test shmem_iget128: Passed\n"); else printf("Test shmem_iget128: Failed\n"); } } else if(sizeof(int)==8){ for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; } success1 = 0; success2 = 0; success3 = 0; shmem_barrier_all(); shmem_iget32(dest1, src1, 1, 2, N/2, npes-1); shmem_iget64(dest2, src2, 1, 2, N/2, npes-1); shmem_iget128(dest3, src3, 1, 2, N/2, npes-1); shmem_barrier_all(); if(me == 0){ for (i = 0; i < N/2; i += 1) { if(dest1[i] != (npes-1)){ success1=1; } if(dest2[i] != (npes-1)){ success2=1; } if(dest3[i] != (npes-1)){ success3=1; } } if(success1==0) printf("Test shmem_iget32: Passed\n"); else printf("Test shmem_iget32: Failed\n"); if(success2==0) printf("Test shmem_iget64: Passed\n"); else printf("Test shmem_iget64: Failed\n"); if(success3==0) printf("Test shmem_iget128: Passed\n"); else printf("Test shmem_iget128: Failed\n"); } } /*Testing shmem_short_iget, shmem_int_iget, shmem_long_iget, shmem_double_iget, shmem_float_iget */ for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; dest6[i] = -9; dest7[i] = -9; } success1 = 0; success2 = 0; success3 = 0; success6 = 0; success7 = 0; shmem_barrier_all(); shmem_short_iget(dest1, src1, 1, 2, N/2, npes-1); shmem_int_iget(dest2, src2, 1, 2, N/2, npes-1); shmem_long_iget(dest3, src3, 1, 2, N/2, npes-1); shmem_double_iget(dest6, src6, 1, 2, N/2, npes-1); shmem_float_iget(dest7, src7, 1, 2, N/2, npes-1); shmem_barrier_all(); if(me == 0){ for (i = 0; i < N/2; i += 1) { if(dest1[i] != (npes-1)){ success1=1; } if(dest2[i] != (npes-1)){ success2=1; } if(dest3[i] != (npes-1)){ success3=1; } if(dest6[i] != (npes-1)){ success6=1; } if(dest7[i] != (npes-1)){ success7=1; } } if(success1==0) printf("Test shmem_short_iget: Passed\n"); else printf("Test shmem_short_iget: Failed\n"); if(success2==0) printf("Test shmem_int_iget: Passed\n"); else printf("Test shmem_int_iget: Failed\n"); if(success3==0) printf("Test shmem_long_iget: Passed\n"); else printf("Test shmem_long_iget: Failed\n"); if(success6==0) printf("Test shmem_double_iget: Passed\n"); else printf("Test shmem_double_iget: Failed\n"); if(success7==0) printf("Test shmem_float_iget: Passed\n"); else printf("Test shmem_float_iget: Failed\n"); } /* Testing shmem_double_g, shmem_float_g, shmem_int_g, shmem_long_g, shmem_short_g */ shmem_barrier_all(); dest9 = shmem_short_g(src9, nextpe); dest10 = shmem_int_g(src10, nextpe); dest11 = shmem_long_g(src11, nextpe); dest12 = shmem_double_g(src12, nextpe); dest13 = shmem_float_g(src13, nextpe); shmem_barrier_all(); if(me == 0){ if(dest9 == 1) printf("Test shmem_short_g: Passed\n"); else printf("Test shmem_short_g: Failed\n"); if(dest10 == 1) printf("Test shmem_int_g: Passed\n"); else printf("Test shmem_int_g: Failed\n"); if(dest11 == 1) printf("Test shmem_long_g: Passed\n"); else printf("Test shmem_long_g: Failed\n"); if(dest12 == 1) printf("Test shmem_double_g: Passed\n"); else printf("Test shmem_double_g: Failed\n"); if(dest13 == 1) printf("Test shmem_float_g: Passed\n"); else printf("Test shmem_float_g: Failed\n"); } shmem_barrier_all(); shfree(src1); shfree(src2); shfree(src3); shfree(src4); shfree(src5); shfree(src6); shfree(src7); shfree(src8); } else{ printf("Number of PEs must be > 1 to test shmem get, test skipped\n"); } return 0; }
int main (int argc, char *argv[]) { int *sray, *rray; int *sdisp, *scounts, *rdisp, *rcounts; int ssize, rsize, i, k, j; float z; init_it (&argc, &argv); scounts = (int *) shmem_malloc (sizeof (int) * numnodes); rcounts = (int *) shmem_malloc (sizeof (int) * numnodes); sdisp = (int *) shmem_malloc (sizeof (int) * numnodes); rdisp = (int *) shmem_malloc (sizeof (int) * numnodes); /* ! seed the random number generator with a ! different number on each processor */ seed_random (myid); /* find out how much data to send */ for (i = 0; i < numnodes; i++) { random_number (&z); scounts[i] = (int) (5.0 * z) + 1; } printf ("myid= %d scounts=%d %d %d %d\n", myid, scounts[0], scounts[1], scounts[2], scounts[3]); /* tell the other processors how much data is coming */ // mpi_err = MPI_Alltoall(scounts,1,MPI_INT, rcounts,1,MPI_INT, // MPI_COMM_WORLD); shmem_barrier_all (); int other, j1; for (j1 = 0; j1 < numnodes; j1++) { shmem_int_put (&rcounts[myid], &scounts[j1], 1, j1); } shmem_barrier_all (); printf ("-----myid= %d rcounts=", myid); for (i = 0; i < numnodes; i++) { printf ("%d ", rcounts[i]); } printf ("\n"); /* write(*,*)"myid= ",myid," rcounts= ",rcounts */ /* calculate displacements and the size of the arrays */ sdisp[0] = 0; for (i = 1; i < numnodes; i++) { sdisp[i] = scounts[i - 1] + sdisp[i - 1]; } rdisp[0] = 0; for (i = 1; i < numnodes; i++) { rdisp[i] = rcounts[i - 1] + rdisp[i - 1]; } ssize = 0; rsize = 0; for (i = 0; i < numnodes; i++) { ssize = ssize + scounts[i]; rsize = rsize + rcounts[i]; } /* allocate send and rec arrays */ sray = (int *) shmem_malloc (sizeof (int) * 20); rray = (int *) shmem_malloc (sizeof (int) * 20); for (i = 0; i < ssize; i++) { sray[i] = myid; } /* send/rec different amounts of data to/from each processor */ // mpi_err = MPI_Alltoallv(sray,scounts,sdisp,MPI_INT, // rray,rcounts,rdisp,MPI_INT, MPI_COMM_WORLD); shmem_barrier_all (); for (j1 = 0; j1 < numnodes; j1++) { int k1 = sdisp[j1]; static int k2; shmem_int_get (&k2, &rdisp[myid], 1, j1); shmem_int_put (rray + k2, sray + k1, scounts[j1], j1); } shmem_barrier_all (); printf ("myid= %d rray=", myid); for (i = 0; i < rsize; i++) { printf ("%d ", rray[i]); } printf ("\n"); // mpi_err = MPI_Finalize(); shmem_finalize (); return 0; }
/* main function */ int main(int argc, char **argv) { int localheight=0; int realheight=0; int height, width,i, j, k, heightoffset; int **labels=NULL; FILE *fpa; /* initializing shmem values */ start_pes (0); size = _num_pes (); rank = _my_pe (); /* determining inputs */ if (argc != 5 ) { printf("usage: %s <inputfile> <outputfile> <height> <width>\n", argv[0]); exit(0); } height = atoi ( argv[3]); width = atoi ( argv[4]); /* creating symmetric memory */ labels = (int **) shmalloc (height*sizeof(int *)); for ( i = 0; i < height; i++ ) { labels[i] = (int*) shmalloc (width*sizeof(int*)); } allocate_2D_int_matrix ( &labels, height, width ); /* reading from the input file */ if ( rank == 0 ) { fpa = fopen ( argv[1], "rb"); fread ( &(labels[0][0]), sizeof(int), height*width, fpa); fclose ( fpa ); } /* datapartitioning between the processes */ localheight = height/size; realheight = localheight + 4 ; heightoffset = 2; if ( rank == 0 ) { realheight -= 2 ; heightoffset = 0; } if ( rank == (size -1) ) { realheight -= 2; } shmem_barrier_all (); /* distributing input datum to all the symmetric variables */ for ( i=1; i<size; i++ ) { if ( rank == i ) if ( shmem_addr_accessible(&labels[0][0], 0)) { if ( rank == ( size -1 ) ) { k = 0; } else { k = 2; } for ( j = 0; j < localheight+k; j++ ) { shmem_int_get(&labels[j+heightoffset][0], &labels[j+(localheight*rank)][0], width, 0); } } else { printf("Not_accessible_Error_00"); } } /* gray erode calculation */ shmem_barrier_all(); gettimeofday(&start, NULL); gray_erode(labels, height, width, FILTERHEIGHT,FILTERWIDTH, ITERATIONS, SEGMENTS, localheight, heightoffset ); gettimeofday(&end, NULL); /* calculating and displaying time taken */ if( rank == 0 ) { t = (double) ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)) / 1000000.0; printf("Total Time%f \n",t); } /* transferring datum back to root */ shmem_barrier_all (); for ( k = 0; k < size; k++) { if ( rank == k ) { for ( i = 0; i < localheight; i++ ) { shmem_int_put(&labels[i+(localheight*rank)][0], &labels[i+heightoffset][0], width, 0); } } } shmem_barrier_all (); /* creating output file */ if(rank == 0) { fpa = fopen ( argv[2], "wb"); fwrite( &(labels[0][0]), sizeof(int), height*width, fpa ); fclose( fpa ); } free_2D_int_matrix ( &labels); return 0; }
int main (int argc, char *argv[]) { int *sray, *rray; int *sdisp, *scounts, *rdisp, *rcounts, *rcounts_full; int ssize, rsize, i, k, j; float z; init_it (&argc, &argv); scounts = (int *) shmem_malloc (sizeof (int) * numnodes); rcounts = (int *) shmem_malloc (sizeof (int) * numnodes); rcounts_full = (int *) shmem_malloc (sizeof (int) * numnodes * numnodes); sdisp = (int *) shmem_malloc (sizeof (int) * numnodes); rdisp = (int *) shmem_malloc (sizeof (int) * numnodes); /* ! seed the random number generator with a ! different number on each processor */ seed_random (myid); /* find out how much data to send */ for (i = 0; i < numnodes; i++) { random_number (&z); scounts[i] = (int) (5.0 * z) + 1; } printf ("myid= %d scounts=%d %d %d %d\n", myid, scounts[0], scounts[1], scounts[2], scounts[3]); printf ("\n"); /* tell the other processors how much data is coming */ // mpi_err = MPI_Alltoall(scounts,1,MPI_INT, rcounts,1,MPI_INT, // MPI_COMM_WORLD); static long psync[_SHMEM_COLLECT_SYNC_SIZE]; for (i = 0; i < _SHMEM_COLLECT_SYNC_SIZE; i++) psync[i] = _SHMEM_SYNC_VALUE; shmem_barrier_all (); int other, j1; shmem_fcollect32 (rcounts_full, scounts, 4, 0, 0, numnodes, psync); for (i = 0; i < numnodes; i++) { rcounts[i] = rcounts_full[i * numnodes + myid]; } printf ("-----myid= %d rcounts=", myid); for (i = 0; i < numnodes; i++) printf ("%d ", rcounts[i]); printf ("\n"); /* write(*,*)"myid= ",myid," rcounts= ",rcounts */ /* calculate displacements and the size of the arrays */ sdisp[0] = 0; for (i = 1; i < numnodes; i++) { sdisp[i] = scounts[i - 1] + sdisp[i - 1]; } rdisp[0] = 0; for (i = 1; i < numnodes; i++) { rdisp[i] = rcounts[i - 1] + rdisp[i - 1]; } ssize = 0; rsize = 0; for (i = 0; i < numnodes; i++) { ssize = ssize + scounts[i]; rsize = rsize + rcounts[i]; } /* allocate send and rec arrays */ sray = (int *) shmem_malloc (sizeof (int) * 20); rray = (int *) shmem_malloc (sizeof (int) * 20); for (i = 0; i < ssize; i++) { sray[i] = myid; } /* send/rec different amounts of data to/from each processor */ // mpi_err = MPI_Alltoallv(sray,scounts,sdisp,MPI_INT, // rray,rcounts,rdisp,MPI_INT, MPI_COMM_WORLD); shmem_barrier_all (); for (j1 = 0; j1 < numnodes; j1++) { int k1 = sdisp[j1]; static int k2; shmem_int_get (&k2, &rdisp[myid], 1, j1); shmem_int_put (rray + k2, sray + k1, scounts[j1], j1); } shmem_barrier_all (); // not possible, coz even though the rcounts[myid] will be different on // each PE, the elements collected // by PE0 from other PE's will be constant. // shmem_collect32(rray_full,sray,rcounts[myid],0,0,numnodes,psync); printf ("myid= %d rray=", myid); for (i = 0; i < rsize; i++) printf ("%d ", rray[i]); printf ("\n"); // mpi_err = MPI_Finalize(); shmem_finalize (); return 0; }
int main(int argc, char **argv) { int loops=DFLT_LOOPS; char *pgm; int *Target; int *Source; int i, me, npes; int target_pe; long bytes; double time_taken=0.0, start_time; shmem_init(); me = shmem_my_pe(); npes = shmem_n_pes(); if ((pgm=strrchr(argv[0],'/'))) pgm++; else pgm = argv[0]; while ((i = getopt (argc, argv, "hve:l:st")) != EOF) { switch (i) { case 'v': Verbose++; break; case 'e': if ((elements = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad elements count %d\n",elements); shmem_finalize(); return 1; } break; case 'l': if ((loops = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad loop count %d\n",loops); shmem_finalize(); return 1; } break; case 's': Sync++; break; case 't': Track++; break; case 'h': if (me == 0) usage(pgm); return 0; default: if (me == 0) { fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i); usage(pgm); } shmem_finalize(); return 1; } } target_pe = (me+1) % npes; total_time = (double *) shmem_malloc( npes * sizeof(double) ); if (!total_time) { fprintf(stderr,"ERR: bad total_time shmem_malloc(%ld)\n", (elements * sizeof(double))); shmem_global_exit(1); } Source = (int *) shmem_malloc( elements * sizeof(*Source) ); if (!Source) { fprintf(stderr,"ERR: bad Source shmem_malloc(%ld)\n", (elements * sizeof(*Target))); shmem_free(total_time); shmem_global_exit(1); } Target = (int *) shmem_malloc( elements * sizeof(*Target) ); if (!Target) { fprintf(stderr,"ERR: bad Target shmem_malloc(%ld)\n", (elements * sizeof(*Target))); shmem_free(Source); shmem_free(total_time); shmem_global_exit(1); } for (i = 0; i < elements; i++) { Target[i] = -90; Source[i] = i + 1; } bytes = loops * sizeof(int) * elements; if (Verbose && me==0) fprintf(stderr, "%s: INFO - %d loops, get %d (int) elements from PE+1\n", pgm, loops, elements); shmem_barrier_all(); for(i=0; i < loops; i++) { start_time = shmemx_wtime(); shmem_int_get( Target, Source, elements, target_pe ); time_taken += shmemx_wtime() - start_time; if (me==0) { if ( Track && i > 0 && ((i % 200) == 0)) fprintf(stderr,".%d",i); } if (Sync) shmem_barrier_all(); } // collect time per node elapsed time. shmem_double_put( &total_time[me], &time_taken, 1, 0 ); shmem_barrier_all(); for (i = 0; i < elements; i++) { if (Target[i] != i + 1) { printf("%d: Error Target[%d] = %d, expected %d\n", me, i, Target[i], i + 1); shmem_global_exit(1); } } if ( Track && me == 0 ) fprintf(stderr,"\n"); if (Verbose && me == 0) { double rate,secs; // average time for(i=0,secs=0.0; i < npes; i++) secs += total_time[i]; secs /= (double)npes; rate = ((double)bytes/(1024.0*1024.0)) / secs; printf("%s: ave %5.3f MB/sec (bytes %ld secs %5.3f)\n", pgm, rate, bytes, secs); } shmem_free(total_time); shmem_free(Target); shmem_free(Source); shmem_finalize(); return 0; }
int main (int argc, char **argv) { int i; int nextpe; int me, npes; int success1, success2, success3, success4, success5, success6, success7, success8; short dest1[N]; int dest2[N]; long dest3[N]; long double dest4[N]; long long dest5[N]; double dest6[N]; float dest7[N]; char *dest8; short dest9; int dest10; long dest11; double dest12; float dest13; int fail_count = 0; shmem_init (); me = shmem_my_pe (); npes = shmem_n_pes (); if (npes > 1) { success1 = 0; success2 = 0; success3 = 0; success4 = 0; success5 = 0; success6 = 0; success7 = 0; success8 = 0; dest8 = (char *) malloc (N * sizeof (char)); for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; dest5[i] = -9; dest6[i] = -9; dest7[i] = -9.0; dest8[i] = -9; } dest9 = -9; dest10 = -9; dest11 = -9; dest12 = -9; dest13 = -9; for (i = 0; i < N; i += 1) { src1[i] = (short) me; src2[i] = me; src3[i] = (long) me; src4[i] = (long double) me; src5[i] = (long long) me; src6[i] = (double) me; src7[i] = (float) me; src8[i] = (char) me; } src9 = (short) me; src10 = me; src11 = (long) me; src12 = (double) me; src13 = (float) me; nextpe = (me + 1) % npes; /* Testing shmem_short_get, shmem_short_get, shmem_int_get, shmem_long_get, shmem_longdouble_get, shmem_longlong_get, shmem_double_get, shmem_float_get, shmem_getmem */ shmem_barrier_all (); shmem_short_get (dest1, src1, N, nextpe); shmem_int_get (dest2, src2, N, nextpe); shmem_long_get (dest3, src3, N, nextpe); shmem_longdouble_get (dest4, src4, N, nextpe); shmem_longlong_get (dest5, src5, N, nextpe); shmem_double_get (dest6, src6, N, nextpe); shmem_float_get (dest7, src7, N, nextpe); shmem_getmem (dest8, src8, N * sizeof (char), nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest1[i] != (1)) { success1 = 1; } if (dest2[i] != (1)) { success2 = 1; } if (dest3[i] != (1)) { success3 = 1; } if (dest4[i] != (1)) { success4 = 1; } if (dest5[i] != (1)) { success5 = 1; } if (dest6[i] != (1)) { success6 = 1; } if (dest7[i] != (1)) { success7 = 1; } if (dest8[i] != (1)) { success8 = 1; } } if (success1 == 0) printf ("Test shmem_short_get: Passed\n"); else { printf ("Test shmem_short_get: Failed\n"); fail_count++; } if (success2 == 0) printf ("Test shmem_int_get: Passed\n"); else { printf ("Test shmem_int_get: Failed\n"); fail_count++; } if (success3 == 0) printf ("Test shmem_long_get: Passed\n"); else { printf ("Test shmem_long_get: Failed\n"); fail_count++; } if (success4 == 0) printf ("Test shmem_longdouble_get: Passed\n"); else { printf ("Test shmem_longdouble_get: Failed\n"); fail_count++; } if (success5 == 0) printf ("Test shmem_longlong_get: Passed\n"); else { printf ("Test shmem_longlong_get: Failed\n"); fail_count++; } if (success6 == 0) printf ("Test shmem_double_get: Passed\n"); else { printf ("Test shmem_double_get: Failed\n"); fail_count++; } if (success7 == 0) printf ("Test shmem_float_get: Passed\n"); else { printf ("Test shmem_float_get: Failed\n"); fail_count++; } if (success8 == 0) printf ("Test shmem_getmem: Passed\n"); else { printf ("Test shmem_getmem: Failed\n"); fail_count++; } } shmem_barrier_all (); /* Testing shmem_get32, shmem_get64, shmem_get128 */ if (sizeof (int) == 4) { for (i = 0; i < N; i += 1) { dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; } success2 = 0; success3 = 0; success4 = 0; shmem_barrier_all (); shmem_get32 (dest2, src2, N, nextpe); shmem_get64 (dest3, src3, N, nextpe); shmem_get128 (dest4, src4, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest2[i] != (1)) { success2 = 1; } if (dest3[i] != (1)) { success3 = 1; } if (dest4[i] != (1)) { success4 = 1; } } if (success2 == 0) printf ("Test shmem_get32: Passed\n"); else { printf ("Test shmem_get32: Failed\n"); fail_count++; } if (success3 == 0) printf ("Test shmem_get64: Passed\n"); else { printf ("Test shmem_get64: Failed\n"); fail_count++; } if (success4 == 0) printf ("Test shmem_get128: Passed\n"); else { printf ("Test shmem_get128: Failed\n"); fail_count++; } } } else if (sizeof (int) == 8) { for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; } success1 = 0; success2 = 0; success3 = 0; shmem_barrier_all (); shmem_get32 (dest1, src1, N, nextpe); shmem_get64 (dest2, src2, N, nextpe); shmem_get128 (dest3, src3, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest1[i] != (1)) { success1 = 1; } if (dest2[i] != (1)) { success2 = 1; } if (dest3[i] != (1)) { success3 = 1; } } if (success1 == 0) printf ("Test shmem_get32: Passed\n"); else { printf ("Test shmem_get32: Failed\n"); fail_count++; } if (success2 == 0) printf ("Test shmem_get64: Passed\n"); else { printf ("Test shmem_get64: Failed\n"); fail_count++; } if (success3 == 0) printf ("Test shmem_get128: Passed\n"); else { printf ("Test shmem_get128: Failed\n"); fail_count++; } } } /* Testing shmem_iget32, shmem_iget64, shmem_iget128 */ shmem_barrier_all (); if (sizeof (int) == 4) { for (i = 0; i < N; i += 1) { dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; } success2 = 0; success3 = 0; success4 = 0; shmem_barrier_all (); shmem_iget32 (dest2, src2, 1, 2, N / 2, npes - 1); shmem_iget64 (dest3, src3, 1, 2, N / 2, npes - 1); shmem_iget128 (dest4, src4, 1, 2, N / 2, npes - 1); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N / 2; i += 1) { if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest4[i] != (npes - 1)) { success4 = 1; } } if (success2 == 0) printf ("Test shmem_iget32: Passed\n"); else { printf ("Test shmem_iget32: Failed\n"); fail_count++; } if (success3 == 0) printf ("Test shmem_iget64: Passed\n"); else { printf ("Test shmem_iget64: Failed\n"); fail_count++; } if (success4 == 0) printf ("Test shmem_iget128: Passed\n"); else { printf ("Test shmem_iget128: Failed\n"); fail_count++; } } } else if (sizeof (int) == 8) { for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; } success1 = 0; success2 = 0; success3 = 0; shmem_barrier_all (); shmem_iget32 (dest1, src1, 1, 2, N / 2, npes - 1); shmem_iget64 (dest2, src2, 1, 2, N / 2, npes - 1); shmem_iget128 (dest3, src3, 1, 2, N / 2, npes - 1); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N / 2; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } } if (success1 == 0) printf ("Test shmem_iget32: Passed\n"); else { printf ("Test shmem_iget32: Failed\n"); fail_count++; } if (success2 == 0) printf ("Test shmem_iget64: Passed\n"); else { printf ("Test shmem_iget64: Failed\n"); fail_count++; } if (success3 == 0) printf ("Test shmem_iget128: Passed\n"); else { printf ("Test shmem_iget128: Failed\n"); fail_count++; } } } /* Testing shmem_short_iget, shmem_int_iget, shmem_long_iget, shmem_double_iget, shmem_float_iget */ for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; dest6[i] = -9; dest7[i] = -9; } success1 = 0; success2 = 0; success3 = 0; success6 = 0; success7 = 0; shmem_barrier_all (); shmem_short_iget (dest1, src1, 1, 2, N / 2, npes - 1); shmem_int_iget (dest2, src2, 1, 2, N / 2, npes - 1); shmem_long_iget (dest3, src3, 1, 2, N / 2, npes - 1); shmem_double_iget (dest6, src6, 1, 2, N / 2, npes - 1); shmem_float_iget (dest7, src7, 1, 2, N / 2, npes - 1); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N / 2; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest6[i] != (npes - 1)) { success6 = 1; } if (dest7[i] != (npes - 1)) { success7 = 1; } } if (success1 == 0) printf ("Test shmem_short_iget: Passed\n"); else { printf ("Test shmem_short_iget: Failed\n"); fail_count++; } if (success2 == 0) printf ("Test shmem_int_iget: Passed\n"); else { printf ("Test shmem_int_iget: Failed\n"); fail_count++; } if (success3 == 0) printf ("Test shmem_long_iget: Passed\n"); else { printf ("Test shmem_long_iget: Failed\n"); fail_count++; } if (success6 == 0) printf ("Test shmem_double_iget: Passed\n"); else { printf ("Test shmem_double_iget: Failed\n"); fail_count++; } if (success7 == 0) printf ("Test shmem_float_iget: Passed\n"); else { printf ("Test shmem_float_iget: Failed\n"); fail_count++; } } /* Testing shmem_double_g, shmem_float_g, shmem_int_g, shmem_long_g, shmem_short_g */ shmem_barrier_all (); dest9 = shmem_short_g (&src9, nextpe); dest10 = shmem_int_g (&src10, nextpe); dest11 = shmem_long_g (&src11, nextpe); dest12 = shmem_double_g (&src12, nextpe); dest13 = shmem_float_g (&src13, nextpe); shmem_barrier_all (); if (me == 0) { if (dest9 == 1) printf ("Test shmem_short_g: Passed\n"); else { printf ("Test shmem_short_g: Failed\n"); fail_count++; } if (dest10 == 1) printf ("Test shmem_int_g: Passed\n"); else { printf ("Test shmem_int_g: Failed\n"); fail_count++; } if (dest11 == 1) printf ("Test shmem_long_g: Passed\n"); else { printf ("Test shmem_long_g: Failed\n"); fail_count++; } if (dest12 == 1) printf ("Test shmem_double_g: Passed\n"); else { printf ("Test shmem_double_g: Failed\n"); fail_count++; } if (dest13 == 1) printf ("Test shmem_float_g: Passed\n"); else { printf ("Test shmem_float_g: Failed\n"); fail_count++; } } shmem_barrier_all (); if (me == 0) { if (fail_count == 0) printf("All Tests Passed\n"); else printf("%d Tests Failed\n", fail_count); } } else { printf ("Number of PEs must be > 1 to test shmem get, test skipped\n"); } shmem_finalize (); return 0; }
/* Performance test for shmem_XX_get (latency and bandwidth) */ #include <stdio.h> #include <stdlib.h> #include <time.h> #include <sys/time.h> #include <shmem.h> long double time_taken; long pSync[_SHMEM_REDUCE_SYNC_SIZE]; long double pWrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; //#define N_ELEMENTS 25600/*Data size chosen to be able to capture time required*/ int main(void) { int i,j,k; int *target; int *source; int me, npes; int nxtpe; struct timeval start, end; long double start_time,end_time; int N_ELEMENTS = (4194304*2)/sizeof(int); start_pes(0); me = _my_pe(); npes = _num_pes(); for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i += 1) { pSync[i] = _SHMEM_SYNC_VALUE; } nxtpe = (me+1)%npes; source = (int *) shmalloc( N_ELEMENTS * sizeof(*source) ); target = (int *) shmalloc( N_ELEMENTS * sizeof(*target) ); if(me == 0) printf("Get Performance test results:\nSize (Bytes)\t\tTime (Microseconds)\t\tBandwidth (Bytes/Second)\n"); for (i = 0; i < N_ELEMENTS; i += 1) { source[i] = i + 1; target[i] = -90; } shmem_barrier_all(); /*For int put we take average of all the times realized by a pair of PEs, thus * reducing effects of physical location of PEs*/ for (i=1;i<=N_ELEMENTS;i=i*2) { time_taken = 0; for(j=0;j<10000;j++){ gettimeofday(&start, NULL); start_time = (start.tv_sec * 1000000.0) + start.tv_usec; shmem_int_get(target, source, i,nxtpe); gettimeofday(&end, NULL); end_time = (end.tv_sec * 1000000.0) + end.tv_usec; time_taken = time_taken + (end_time - start_time); } shmem_longdouble_sum_to_all(&time_taken, &time_taken,1, 0, 0, npes, pWrk, pSync); if(me == 0){ time_taken = time_taken/(npes*10000); /*Average time across all PEs for one put*/ if (i*sizeof(i) < 1048576) printf("%ld \t\t\t\t %ld\t\t\t\t %ld\n",i*sizeof(i),time_taken,(i*sizeof(i))/(time_taken*1000000.0)); else printf("%ld \t\t\t %ld\t\t\t\t %ld\n",i*sizeof(i),time_taken,(i*sizeof(i))/(time_taken*1000000.0)); } } shmem_barrier_all(); shfree(target); shfree(source); return 0; }