void shmem_put_f(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *length, MPI_Fint *pe) { shmem_put(FPTR_2_VOID_PTR(target), FPTR_2_VOID_PTR(source), OMPI_FINT_2_INT(*length), OMPI_FINT_2_INT(*pe)); }
int main(void) { long source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; static long dest[10]; shmem_init(); int me = shmem_my_pe(); if (me == 0) /* put 10 words into dest on PE 1 */ shmem_put(dest, source, 10, 1); shmem_barrier_all(); /* sync sender and receiver */ printf("dest[0] on PE %d is %ld\n", me, dest[0]); shmem_finalize(); return 0; }
int main(void) { static int targ = 0; shmem_init(); int me = shmem_my_pe(); int receiver = 1 % shmem_n_pes(); if (me == 0) { int src = 33; shmem_put(&targ, &src, 1, receiver); } shmem_barrier_all(); /* Synchronizes sender and receiver */ if (me == receiver) printf("PE %d targ=%d (expect 33)\n", me, targ); shmem_finalize(); return 0; }
void _PERM_IR(_permmap* const pm) { const int one = 1; int * rindbase; int * lindbase; int * const restrict lsize = (int *)shmalloc(_PROCESSORS * sizeof(int)); int * const restrict rsize = (int *)shmalloc(_PROCESSORS * sizeof(int)); int * restrict * const restrict lind = pm->lind; int * restrict * const restrict rind = pm->rind; char * restrict * const restrict rptr = pm->rptr; int * const restrict rflag = pm->rflag; int* addr; int i, j; for (i = 0; i < _PROCESSORS; i++) { lsize[i] = lind[i] ? lind[i][0] : 0; rsize[i] = 0; } shmem_barrier_all(); for (i = (_INDEX == _PROCESSORS - 1) ? 0 : _INDEX+1; i != _INDEX; i = (i == _PROCESSORS - 1) ? 0 : i++) { if (lsize[i] > 0) { #ifdef _SHMEM_PERMUTE_DEBUG printf("%d sending count to %d\n", _INDEX, i); fflush(stdout); #endif shmem_int_put(&(rsize[_INDEX]), &(lsize[i]), 1, i); } } rsize[_INDEX] = lsize[_INDEX]; shmem_barrier_all(); #ifdef _SHMEM_PERMUTE_DEBUG sleep(_PROCESSORS); #endif _PERM_CleanIndices(lsize, rsize, lind, rind, &lindbase, &rindbase); #ifdef _SHMEM_PERMUTE_DEBUG sleep(_INDEX); printf("FROM PROCESSOR %d\n", _INDEX); printf(" LSIZE = "); for (i = 0; i < _PROCESSORS; i++) { printf("%d ", lsize[i]); } printf("\n"); printf(" RSIZE = "); for (i = 0; i < _PROCESSORS; i++) { printf("%d ", rsize[i]); } printf("\n"); printf(" PROCMAP: size = %d, # elts = %d, encoded = %d :: ", pm->procmap[0], pm->procmap[1], pm->procmap[2]); for (j = 3; j < pm->procmap[0]; j++) { printf("%d ", pm->procmap[j]); } printf("\n"); for (i = 0; i < _PROCESSORS; i++) { if (lind[i] != 0) { printf(" TO PROCESSOR %d: ", i); printf("size = %d, # elts = %d, encoded = %d :: ", lind[i][0], lind[i][1], lind[i][2]); for (j = 3; j < lind[i][0]; j++) { printf("%d ", lind[i][j]); } printf("\n"); } } printf("\n"); fflush(stdout); sleep(_PROCESSORS-_INDEX); #endif for (i = (_INDEX == _PROCESSORS - 1) ? 0 : _INDEX+1; i != _INDEX; i = (i == _PROCESSORS - 1) ? 0 : i++) { if (rsize[i] > 0) { #ifdef _SHMEM_PERMUTE_DEBUG printf("%d sending rind address to %d\n", _INDEX, i); fflush(stdout); #endif rflag[_INDEX] = 0; shmem_put((void*)&(rptr[_INDEX]), (void*)&(rind[i]), 1, i); } } #ifdef _SHMEM_PERMUTE_DEBUG sleep(_PROCESSORS); #endif for (i = (_INDEX == 0) ? _PROCESSORS-1 : _INDEX-1; i != _INDEX; i = (i == 0) ? _PROCESSORS-1 : i--) { if (lsize[i] > 0) { #ifdef _SHMEM_PERMUTE_DEBUG printf("%d waiting for rind address from %d, sending lind\n", _INDEX, i); fflush(stdout); #endif shmem_wait((long*)&(rptr[i]), 0); addr = (int*)rptr[i]; rptr[i] = 0; shmem_int_put(addr, lind[i], lsize[i], i); } } #ifdef _SHMEM_PERMUTE_DEBUG sleep(_PROCESSORS); #endif shmem_fence(); for (i = (_INDEX == 0) ? _PROCESSORS-1 : _INDEX-1; i != _INDEX; i = (i == 0) ? _PROCESSORS-1 : i--) { if (lsize[i] > 0) { #ifdef _SHMEM_PERMUTE_DEBUG printf("IR %d sending one to %d\n", _INDEX, i); fflush(stdout); #endif shmem_int_put(&(rflag[_INDEX]), &one, 1, i); } } if (lsize[_INDEX] > 0) { memcpy(rind[_INDEX], lind[_INDEX], lsize[_INDEX]*sizeof(int)); } pm->lindbase = lindbase; pm->rindbase = rindbase; shfree(lsize); shfree(rsize); }
void _PERM_DR(const _permmap* const pm, _permdata* const pd, const int scatter, _array_fnc dst, _array_fnc src) { const int one = 1; const int eltsize = pd->eltsize; int i; int * const restrict ldecnt = (int*)_zmalloc(_PROCESSORS*sizeof(int), "perm dr lcnt"); int * const restrict rdecnt = (int*)_zmalloc(_PROCESSORS*sizeof(int), "perm dr rcnt"); char * const restrict * const restrict ldata = pd->ldata; char * const restrict * const restrict rdata = pd->rdata; char * restrict * const restrict rptr = pm->rptr; int * restrict rflag = pm->rflag; char* addr; #ifdef _SHMEM_PERMUTE_DEBUG printf("DR start %d\n", _INDEX); fflush(stdout); sleep(5); #endif for (i=0; i<_PROCESSORS; i++) { ldecnt[i] = (scatter ? _PERM_LCNT(pm, i) : _PERM_RCNT(pm, i)) * eltsize; rdecnt[i] = (scatter ? _PERM_RCNT(pm, i) : _PERM_LCNT(pm, i)) * eltsize; } for (i = (_INDEX == _PROCESSORS - 1) ? 0 : _INDEX+1; i != _INDEX; i = (i == _PROCESSORS - 1) ? 0 : i++) { if (rdecnt[i] > 0) { #ifdef _SHMEM_PERMUTE_DEBUG printf("DR %d sending addr, %d, to %d\n", _INDEX, (int)&(rdata[i]), i); fflush(stdout); sleep(5); #endif rflag[i] = 0; shmem_put((void*)&(rptr[_INDEX]), (void*)&(rdata[i]), 1, i); } } for (i = (_INDEX == 0) ? _PROCESSORS-1 : _INDEX-1; i != _INDEX; i = (i == 0) ? _PROCESSORS-1 : i--) { if (ldecnt[i] > 0) { shmem_wait((long*)&(rptr[i]), 0); addr = rptr[i]; rptr[i] = 0; #ifdef _SHMEM_PERMUTE_DEBUG printf("DR %d waiting addr, %d, from %d\n", _INDEX, (int)addr, i); fflush(stdout); sleep(5); #endif shmem_putmem(addr, ldata[i], ldecnt[i], i); } } shmem_fence(); for (i = (_INDEX == 0) ? _PROCESSORS-1 : _INDEX-1; i != _INDEX; i = (i == 0) ? _PROCESSORS-1 : i--) { if (ldecnt[i] > 0) { #ifdef _SHMEM_PERMUTE_DEBUG printf("DR %d sending flag to %d\n", _INDEX, i); fflush(stdout); sleep(5); #endif shmem_int_put(&(rflag[_INDEX]), &one, 1, i); } } if (ldecnt[_INDEX] > 0) { memcpy(rdata[_INDEX], ldata[_INDEX], ldecnt[_INDEX]); } _zfree(ldecnt, "perm dr lcnt"); _zfree(rdecnt, "perm dr rcnt"); pd->count = -1; }
int main(int argc, char **argv) { int nodes; int mypid; int reps, mult, count, start_msglen, stride; int otherpid; int x; double r_avg; double t_start, t_stop; double t_calibrate; double t_elapsed; double t_avg; int c; int msglen; int check; int r; double g_total; int i; nodes = _num_pes(); mypid = _MY_PE(); shmem_set_cache_inv(); reps=atoi(argv[1]); mult=atoi(argv[2]); count=atoi(argv[3]); start_msglen=atoi(argv[4]); stride=atoi(argv[5]); for (i=0; i<_SHMEM_BCAST_SYNC_SIZE; i++) { psync[i] = _SHMEM_SYNC_VALUE; } barrier(); if ((mypid % 2) == 0) { otherpid = mypid+1 % nodes; for(x = 0; x < max_words; x++) msgbuf[x] = x; } else { otherpid = (nodes+mypid-1) % nodes; t_start = IRTC(); t_stop = IRTC(); t_calibrate = t_stop - t_start; } for(c = 1; c <= count+1; c++) { if (c == 1) msglen = min_msglen; else if( c == 2) msglen = start_msglen; else if (mult) msglen = start_msglen * (int)pow((double)stride, (double)(c-2)); else msglen = start_msglen + stride*(c-2); if ( msglen % word_size != 0) msglen = (msglen/word_size) * word_size; if (msglen > max_msglen) { printf("Message too big\n"); exit(1); } check = msglen/word_size + 1; if ( mypid % 2 == 0) { for(r=0; r< reps; r++) { barrier(); msgbuf[check] = 1; shmem_put(msgbuf, msgbuf, check+1, otherpid); } barrier(); if(!mypid) { g_total = 0.0; for(x = 1; x < nodes; x+=2) { shmem_get((long *)&t_total, (long *)&t_total, 1, x); g_total = g_total + t_total; } t_avg = g_total / (nodes/2.0*reps*cycle_rate); if (c == 1) { printf("T3D COMMUNICATION TIMING\n"); printf("------------------------\n"); printf(" Method: %s\n", method); printf(" PE's: %d\n", nodes); printf(" Repetitions: %d\n", reps); printf(" Latency: %lf ", t_avg ); printf("us (transmit time for %d-byte msg)\n", min_msglen); printf("===================== ============== ==============\n"); printf(" MESSAGE LENGTH TRANSMIT TIME COMM RATE\n"); printf(" (bytes) (words) (us) (MB/s)\n"); printf("========== ========== ============== ==============\n"); } else { r_avg = (rate_factor * msglen) / t_avg; printf(" %7d %6d %7.2lf %7.2lf\n", msglen, msglen/word_size, t_avg, r_avg); } } } else { t_total = 0; for(r=0; r < reps; r++) { msgbuf[check] = 0; barrier(); t_start = IRTC(); do { shmem_udcflush(); } while(msgbuf[check] == 0); t_stop = IRTC(); t_elapsed = t_stop - t_start - t_calibrate; t_total = t_total + t_elapsed; } barrier(); } barrier(); } barrier(); }
int main(int argc, char **argv) { int j; int my_pe,n_pes; int *flag,*one; size_t max_elements,max_elements_bytes; char *srce_char,*targ_char; short *srce_short,*targ_short; int *srce_int,*targ_int; long *srce_long,*targ_long; start_pes(0); my_pe = shmem_my_pe(); n_pes = shmem_n_pes(); flag = shmalloc((size_t) sizeof(int)); one = shmalloc((size_t) sizeof(int)); *one = 1; /* fail if trying to use odd number of processors */ if ( (n_pes % 2) != 0 ){ fprintf(stderr, "FAIL - test requires even number of PEs\n"); exit(1); } if(my_pe == 0) fprintf(stderr, "shmem_num_put(%s)\n", argv[0]); /* shmem_putmem test */ *flag = 0; max_elements = (size_t) (MAX_SIZE / sizeof(char)); max_elements_bytes = (size_t) (sizeof(char)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_putmem max_elements = %d\n",max_elements); srce_char = shmalloc(max_elements_bytes); targ_char = shmalloc(max_elements_bytes); if((srce_char == NULL) || (targ_char == NULL)) shmalloc_error(); if ( (my_pe % 2) == 0 ) for(j = 0; j < max_elements; j++) srce_char[j] = (char)(my_pe+j); else for(j = 0; j < max_elements; j++) targ_char[j] = (char)(my_pe+j); shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { shmem_putmem(targ_char,srce_char,max_elements,my_pe+1); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < max_elements; j++) if ( targ_char[j] != (char)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] targ_char[%d]=%d my_pe+j-1=%d\n", my_pe,j,targ_char[j],my_pe+j-1); } shfree(srce_char); shfree(targ_char); /* shmem_put16 test */ *flag = 0; max_elements = (size_t) (MAX_SIZE / sizeof(short)); if(max_elements > 20000) max_elements=20000; max_elements_bytes = (size_t) (sizeof(short)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_put16 max_elements = %d\n",max_elements); srce_short = shmalloc(max_elements_bytes); targ_short = shmalloc(max_elements_bytes); if((srce_short == NULL) || (targ_short == NULL)) shmalloc_error(); if ( (my_pe % 2) == 0 ) for(j = 0; j < max_elements; j++) srce_short[j] = (short)(my_pe+j); else for(j = 0; j < max_elements; j++) targ_short[j] = (short)(my_pe+j); shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { shmem_put16(targ_short,srce_short,max_elements,my_pe+1); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < max_elements; j++) if ( targ_short[j] != (short)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] targ_short[%d]=%d my_pe+j-1=%d\n", my_pe,j,targ_short[j],my_pe+j-1); } shfree(srce_short); shfree(targ_short); /* shmem_put32 test */ *flag = 0; max_elements = (size_t) (MAX_SIZE / sizeof(int)); max_elements_bytes = (size_t) (sizeof(int)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_put32 max_elements = %d\n",max_elements); srce_int = shmalloc(max_elements_bytes); targ_int = shmalloc(max_elements_bytes); if((srce_int == NULL) || (targ_int == NULL)) shmalloc_error(); if ( (my_pe % 2) == 0 ) for(j = 0; j < max_elements; j++) srce_int[j] = (int)(my_pe+j); else for(j = 0; j < max_elements; j++) targ_int[j] = (int)(my_pe+j); shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { shmem_put32(targ_int,srce_int,max_elements,my_pe+1); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < max_elements; j++) if ( targ_int[j] != (int)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] targ_int[%d]=%d my_pe+j-1=%d\n", my_pe,j,targ_int[j],my_pe+j-1); } shfree(srce_int); shfree(targ_int); /* shmem_put64 test */ *flag = 0; max_elements = (size_t) (MAX_SIZE / sizeof(long)); max_elements_bytes = (size_t) (sizeof(long)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_put64 max_elements = %d\n",max_elements); srce_long = shmalloc(max_elements_bytes); targ_long = shmalloc(max_elements_bytes); if((srce_long == NULL) || (targ_long == NULL)) shmalloc_error(); if ( (my_pe % 2) == 0 ) for(j = 0; j < max_elements; j++) srce_long[j] = (long)(my_pe+j); else for(j = 0; j < max_elements; j++) targ_long[j] = (long)(my_pe+j); shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { shmem_put64(targ_long,srce_long,max_elements,my_pe+1); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < max_elements; j++) if ( targ_long[j] != (long)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d my_pe+j-1=%d\n", my_pe,j,targ_long[j],my_pe+j-1); } shfree(srce_long); shfree(targ_long); /* shmem_put128 test */ *flag = 0; max_elements = (size_t) (MAX_SIZE / sizeof(long)); if ( (max_elements % 2) != 0) max_elements = max_elements-1; max_elements_bytes = (size_t) (sizeof(long)*max_elements); max_elements = max_elements/2; if(my_pe == 0) fprintf(stderr,"shmem_put128 max_elements = %d\n",max_elements); srce_long = shmalloc(max_elements_bytes); targ_long = shmalloc(max_elements_bytes); if((srce_long == NULL) || (targ_long == NULL)) shmalloc_error(); if ( (my_pe % 2) == 0 ) for(j = 0; j < 2*max_elements; j++) srce_long[j] = (long)(my_pe+j); else for(j = 0; j < 2*max_elements; j++) targ_long[j] = (long)(my_pe+j); shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { shmem_put128(targ_long,srce_long,max_elements,my_pe+1); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < 2*max_elements; j++) if ( targ_long[j] != (long)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d my_pe+j-1=%d\n", my_pe,j,targ_long[j],my_pe+j-1); } shfree(srce_long); shfree(targ_long); #ifdef SHMEM_C_GENERIC_32 /* shmem_put (GENERIC 32) test */ *flag = 0; max_elements = (size_t) (MAX_SIZE / sizeof(int)); max_elements_bytes = (size_t) (sizeof(int)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_put (GENERIC 32) max_elements = %d\n",max_elements); srce_int = shmalloc(max_elements_bytes); targ_int = shmalloc(max_elements_bytes); if((srce_int == NULL) || (targ_int == NULL)) shmalloc_error(); if ( (my_pe % 2) == 0 ) for(j = 0; j < max_elements; j++) srce_int[j] = (int)(my_pe+j); else for(j = 0; j < max_elements; j++) targ_int[j] = (int)(my_pe+j); shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { shmem_put(targ_int,srce_int,max_elements,my_pe+1); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < max_elements; j++) if ( targ_int[j] != (int)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] targ_int[%d]=%d my_pe+j-1=%d\n", my_pe,j,targ_int[j],my_pe+j-1); } shfree(srce_int); shfree(targ_int); #else /* shmem_put (GENERIC 64) test */ *flag = 0; max_elements = (size_t) (MAX_SIZE / sizeof(long)); max_elements_bytes = (size_t) (sizeof(long)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_put (GENERIC 64) max_elements = %d\n",max_elements); srce_long = shmalloc(max_elements_bytes); targ_long = shmalloc(max_elements_bytes); if((srce_long == NULL) || (targ_long == NULL)) shmalloc_error(); if ( (my_pe % 2) == 0 ) for(j = 0; j < max_elements; j++) srce_long[j] = (long)(my_pe+j); else for(j = 0; j < max_elements; j++) targ_long[j] = (long)(my_pe+j); shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { shmem_put(targ_long,srce_long,max_elements,my_pe+1); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < max_elements; j++) if ( targ_long[j] != (long)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] targ_long[%d]=%d my_pe+j-1=%d\n", my_pe,j,targ_long[j],my_pe+j-1); } shfree(srce_long); shfree(targ_long); #endif #ifdef NEEDS_FINALIZE shmem_finalize(); #endif return 0; }