void ARMCI_Barrier(void) { parmci_calls++; /* C (ISO 9899:1999) 6.8.6.4/1: * "A return statement with an expression shall not appear in a function whose return type is void." */ PARMCI_Barrier(); return; }
/*\ release/deassociate handle with previously registered callback function \*/ void ARMCI_Gpc_release(int handle) { int h = -handle + GPC_OFFSET; PARMCI_Barrier(); if(h<0 || h >= GPC_SLOTS) armci_die("ARMCI_Gpc_release: bad handle",h); _table[h] = (void*)0; }
void ARMCI_Barrier() { armci_profile_start(ARMCI_PROF_BARRIER); PARMCI_Barrier(); armci_profile_stop(ARMCI_PROF_BARRIER); }
void ARMCI_Barrier() { static double stime, etime; stime = TIME(); PARMCI_Barrier(); etime = TIME(); ARMCI_Barrier_t += etime - stime; }
/*\ callback functions must be registered -- user gets int handle back \*/ int ARMCI_Gpc_register( int (*func) ()) { int handle =-1, candidate = 0; PARMCI_Barrier(); do{ if(!_table[candidate]){ handle = candidate; _table[candidate]=func; } candidate++; }while(candidate < GPC_SLOTS && handle == -1); return(GPC_OFFSET-handle); }
void ARMCI_Barrier(void) { PARMCI_Barrier(); return; }
int main(int argc, char *argv[]) { int rank, size; int provided; double one = 1.0; #if defined(__bgp__) MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); assert(provided==MPI_THREAD_MULTIPLE); #else MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided); //assert(provided>MPI_THREAD_SINGLE); #endif MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); assert( size > 1 ); PARMCI_Init_args(&argc, &argv); int w, maxwinsize = ( argc > 1 ? atoi(argv[1]) : 1000000 ); if ( rank == 0 ) printf( "size = %d maxwinsize = %d doubles\n", size, maxwinsize ); for ( w = 1 ; w < maxwinsize ; w *= 2 ) { double ** window; window = (double **) PARMCI_Malloc_local( size * sizeof(double *) ); PARMCI_Malloc( (void **) window, w * sizeof(double) ); for (int i = 0; i < w; i++) window[rank][i] = 0.0; double * buffer; buffer = (double *) PARMCI_Malloc_local( w * sizeof(double) ); PARMCI_Barrier(); if (rank == 0) for (int t=1; t<size; t++) { int bytes = w * sizeof(double); for (int i = 0; i < w; i++) buffer[i] = (double)(t); PARMCI_Acc( ARMCI_ACC_DBL, &one, buffer, window[t], bytes, t ); PARMCI_Fence( t ); for (int i = 0; i < w; i++) buffer[i] = 0.0; PARMCI_Get( window[t], buffer, bytes, t ); int errors = 0; for (int i = 0; i < w; i++) if ( buffer[i] != (double)(t) ) errors++; if ( errors > 0 ) for (int i = 0; i < w; i++) printf("rank %d buffer[%d] = %lf \n", rank, i, buffer[i] ); } PARMCI_Barrier(); if (rank != 0) { int errors = 0; for (int i = 0; i < w; i++) if ( window[rank][i] != (double)(rank) ) errors++; if ( errors > 0 ) for (int i = 0; i < w; i++) printf("rank %d window[%d][%d] = %lf \n", rank, rank, i, window[rank][i] ); } PARMCI_Barrier(); if (rank == 0) for (int t=1; t<size; t++) { int bytes = w * sizeof(double); double t0, t1, t2, dt1, dt2, bw1, bw2; for (int i = 0; i < w; i++) buffer[i] = (double)(-1); t0 = MPI_Wtime(); PARMCI_Acc( ARMCI_ACC_DBL, &one, buffer, window[t], bytes, t ); t1 = MPI_Wtime(); PARMCI_Fence( t ); t2 = MPI_Wtime(); dt1 = t1 - t0; dt2 = t2 - t0; bw1 = bytes / dt1; bw2 = bytes / dt2; bw1 /= 1000000.0; bw2 /= 1000000.0; printf("PARMCI_Acc of from rank %4d to rank %4d of %9d bytes - local: %lf s (%lf MB/s) remote: %lf s (%lf MB/s) \n", t, 0, bytes, dt1, bw1, dt2, bw2); fflush(stdout); } PARMCI_Barrier(); PARMCI_Free_local( (void *) buffer ); PARMCI_Free( (void *) window[rank] ); PARMCI_Free_local( (void *) window ); } PARMCI_Finalize(); printf("%d: all done \n", rank ); fflush(stdout); MPI_Finalize(); return 0; }
int PARMCI_Create_mutexes(int num) { int rc,p, totcount; int *mutex_count = (int*)armci_internal_buffer; if((sizeof(int)*armci_nproc) > armci_getbufsize()) { mutex_count = (double *)malloc(sizeof(int)*armci_nproc); } if (num < 0 || num > MAX_LOCKS) return(FAIL); if(num_mutexes) armci_die("mutexes already created",num_mutexes); if(armci_nproc == 1) { num_mutexes=1; return(0); } /* local memory allocation for mutex arrays*/ mutex_mem_ar = (void*) malloc(armci_nproc*sizeof(void*)); if(!mutex_mem_ar) armci_die("ARMCI_Create_mutexes: malloc failed",0); glob_mutex = (void*)malloc(armci_nproc*sizeof(mutex_entry_t)); if(!glob_mutex) { free(mutex_mem_ar); armci_die("ARMCI_Create_mutexes: malloc 2 failed",0); } /* bzero(mutex_count,armci_nproc*sizeof(int));*/ bzero((char*)mutex_count,sizeof(int)*armci_nproc); /* find out how many mutexes everybody allocated */ mutex_count[armci_me]=num; armci_msg_igop(mutex_count, armci_nproc, "+"); for(p=totcount=0; p< armci_nproc; p++)totcount+=mutex_count[p]; tickets = calloc(totcount,sizeof(int)); if(!tickets) { free(glob_mutex); free(mutex_mem_ar); return(FAIL2); } /* we need memory for token and turn - 2 ints */ rc = PARMCI_Malloc(mutex_mem_ar,2*num*sizeof(int)); if(rc) { free(glob_mutex); free(mutex_mem_ar); free(tickets); return(FAIL3); } if(num)bzero((char*)mutex_mem_ar[armci_me],2*num*sizeof(int)); /* setup global mutex array */ for(p=totcount=0; p< armci_nproc; p++) { glob_mutex[p].token = mutex_mem_ar[p]; glob_mutex[p].turn = glob_mutex[p].token + mutex_count[p]; glob_mutex[p].count = mutex_count[p]; glob_mutex[p].tickets = tickets + totcount; totcount += mutex_count[p]; } num_mutexes= totcount; #ifdef LAPI mymutexcount = num; #endif PARMCI_Barrier(); if(DEBUG) fprintf(stderr,"%d created (%d,%d) mutexes\n",armci_me,num,totcount); return(0); }