void *ARMCI_Malloc_local(armci_size_t bytes) { void *rval; static double stime, etime; stime = TIME(); rval = PARMCI_Malloc_local(bytes); etime = TIME(); ARMCI_Malloc_local_t += etime - stime; return rval; }
void openib_create_locks() { // Create the locks and initialize them l_state.local_lock_buf = PARMCI_Malloc_local(sizeof(long)); assert(l_state.local_lock_buf); l_state.atomic_lock_buf = (void **)malloc(l_state.size * sizeof(void *)); assert(l_state.atomic_lock_buf); PARMCI_Malloc((l_state.atomic_lock_buf), sizeof(long)); *(long *)(l_state.atomic_lock_buf[l_state.rank]) = 0; *(long *)(l_state.local_lock_buf) = 0; MPI_Barrier(l_state.world_comm); }
void* ARMCI_Malloc_local(armci_size_t bytes) { void* ret; ret = PARMCI_Malloc_local(bytes); return ret; }
void *ARMCI_Malloc_local(armci_size_t size) { return PARMCI_Malloc_local(size); }
int main(int argc, char *argv[]) { int rank, size; int provided; #if defined(__bgp__) MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); assert(provided==MPI_THREAD_MULTIPLE); #else MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided); //assert(provided>MPI_THREAD_SINGLE); #endif MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); assert( size > 1 ); PARMCI_Init_args(&argc, &argv); int w, maxwinsize = ( argc > 1 ? atoi(argv[1]) : 1000000 ); if ( rank == 0 ) printf( "size = %d maxwinsize = %d doubles\n", size, maxwinsize ); for ( w = 1 ; w < maxwinsize ; w *= 2 ) { double ** window; window = (double **) PARMCI_Malloc_local( size * sizeof(double *) ); PARMCIX_Malloc_comm(MPI_COMM_WORLD, (void **) window, w * sizeof(double) ); for (int i = 0; i < w; i++) window[rank][i] = 0.0; double * buffer; buffer = (double *) PARMCI_Malloc_local( w * sizeof(double) ); PARMCIX_Barrier_comm(MPI_COMM_WORLD); if (rank == 0) for (int t=1; t<size; t+=2) { int bytes = w * sizeof(double); for (int i = 0; i < w; i++) buffer[i] = (double)(t); PARMCI_Put( buffer, window[t], bytes, t ); PARMCI_Fence( t ); for (int i = 0; i < w; i++) buffer[i] = 0.0; PARMCI_Get( window[t], buffer, bytes, t ); int errors = 0; for (int i = 0; i < w; i++) if ( buffer[i] != (double)(t) ) errors++; if ( errors > 0 ) for (int i = 0; i < w; i++) printf("rank %d buffer[%d] = %lf \n", rank, i, buffer[i] ); } PARMCIX_Barrier_comm(MPI_COMM_WORLD); if (rank != 0) { int errors = 0; for (int i = 0; i < w; i++) if ( window[rank][i] != (double)(rank) ) errors++; if ( errors > 0 ) for (int i = 0; i < w; i++) printf("rank %d window[%d][%d] = %lf \n", rank, rank, i, window[rank][i] ); } PARMCIX_Barrier_comm(MPI_COMM_WORLD); if (rank == 0) for (int t=1; t<size; t++) { int bytes = w * sizeof(double); double t0, t1, t2, dt1, dt2, bw1, bw2; for (int i = 0; i < w; i++) buffer[i] = (double)(-1); t0 = MPI_Wtime(); PARMCI_Put( buffer, window[t], bytes, t ); t1 = MPI_Wtime(); PARMCI_Fence( t ); t2 = MPI_Wtime(); dt1 = t1 - t0; dt2 = t2 - t0; bw1 = bytes / dt1; bw2 = bytes / dt2; bw1 /= 1000000.0; bw2 /= 1000000.0; printf("PARMCI_Put of from rank %4d to rank %4d of %9d bytes - local: %lf s (%lf MB/s) remote: %lf s (%lf MB/s) \n", t, 0, bytes, dt1, bw1, dt2, bw2); fflush(stdout); } PARMCIX_Barrier_comm(MPI_COMM_WORLD); PARMCI_Free_local( (void *) buffer ); PARMCIX_Free_comm(MPI_COMM_WORLD, (void *) window[rank] ); PARMCI_Free_local( (void *) window ); } PARMCI_Finalize(); printf("%d: all done \n", rank ); fflush(stdout); MPI_Finalize(); return 0; }