void ompi_win_wait_f(MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; MPI_Win c_win = MPI_Win_f2c(*win); c_ierr = MPI_Win_wait(c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); }
int main(int argc, char *argv[]) { MPI_Win win; MPI_Init(&argc, &argv); MPI_Win_create(MPI_BOTTOM, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); MPI_Win_post(MPI_GROUP_EMPTY, 0, win); MPI_Win_wait(win); MPI_Win_free(&win); MPI_Finalize(); return 0; }
inline void SpParHelper::WaitNFree(vector<MPI_Win> & arrwin) { // End the exposure epochs for the arrays of the local matrices A and B // The Wait() call matches calls to Complete() issued by ** EACH OF THE ORIGIN PROCESSES ** // that were granted access to the window during this epoch. for(unsigned int i=0; i< arrwin.size(); ++i) { MPI_Win_wait(arrwin[i]); } FreeWindows(arrwin); }
int main(int argc, char *argv[]) { int rank, nproc, i; int errors = 0, all_errors = 0; int buf, *my_buf; MPI_Win win; MPI_Group world_group; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Win_create(&buf, sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win); MPI_Win_set_errhandler(win, MPI_ERRORS_RETURN); MPI_Comm_group(MPI_COMM_WORLD, &world_group); MPI_Win_post(world_group, 0, win); MPI_Win_start(world_group, 0, win); my_buf = malloc(nproc*sizeof(int)); for (i = 0; i < nproc; i++) { MPI_Get(&my_buf[i], 1, MPI_INT, i, 0, 1, MPI_INT, win); } /* This should fail, because the window is in an active target access epoch. */ CHECK_ERR(MPI_Win_start(world_group, 0, win)); MPI_Win_complete(win); /* This should fail, because the window is not in an active target access epoch. */ CHECK_ERR(MPI_Win_complete(win)); MPI_Win_wait(win); MPI_Win_free(&win); free(my_buf); MPI_Group_free(&world_group); MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0 && all_errors == 0) printf(" No Errors\n"); MPI_Finalize(); return 0; }
void RunAccPSCW(MPI_Win win, int destRank, int cnt, int sz, MPI_Group exposureGroup, MPI_Group accessGroup) { int k, i, j, one = 1; for (k = 0; k < MAX_RUNS; k++) { MPI_Barrier(MPI_COMM_WORLD); MPI_Win_post(exposureGroup, 0, win); MPI_Win_start(accessGroup, 0, win); j = 0; for (i = 0; i < cnt; i++) { MPI_Accumulate(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, MPI_SUM, win); j += sz; } MPI_Win_complete(win); MPI_Win_wait(win); } }
/*@C PetscSFRestoreWindow - Restores a window obtained with PetscSFGetWindow() Collective Input Arguments: + sf - star forest . unit - data type . array - array associated with window . epoch - close an epoch, must match argument to PetscSFGetWindow() - win - window Level: developer .seealso: PetscSFFindWindow() @*/ static PetscErrorCode PetscSFRestoreWindow(PetscSF sf,MPI_Datatype unit,const void *array,PetscBool epoch,PetscMPIInt fenceassert,MPI_Win *win) { PetscSF_Window *w = (PetscSF_Window*)sf->data; PetscErrorCode ierr; PetscSFWinLink *p,link; PetscFunctionBegin; for (p=&w->wins; *p; p=&(*p)->next) { link = *p; if (*win == link->win) { if (array != link->addr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Matched window, but not array"); if (epoch != link->epoch) { if (epoch) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"No epoch to end"); else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Restoring window without ending epoch"); } *p = link->next; goto found; } } SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use"); found: if (epoch) { switch (w->sync) { case PETSCSF_WINDOW_SYNC_FENCE: ierr = MPI_Win_fence(fenceassert,*win);CHKERRQ(ierr); break; case PETSCSF_WINDOW_SYNC_LOCK: break; /* handled outside */ case PETSCSF_WINDOW_SYNC_ACTIVE: { ierr = MPI_Win_complete(*win);CHKERRQ(ierr); ierr = MPI_Win_wait(*win);CHKERRQ(ierr); } break; default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type"); } } ierr = MPI_Win_free(&link->win);CHKERRQ(ierr); ierr = PetscFree(link);CHKERRQ(ierr); *win = MPI_WIN_NULL; PetscFunctionReturn(0); }
int main(int argc, char *argv[]) { int rank, nproc, i; int errors = 0, errs = 0; int buf = 0, *my_buf; MPI_Win win; MPI_Group world_group; MTest_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Win_create(&buf, sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win); MPI_Win_set_errhandler(win, MPI_ERRORS_RETURN); MPI_Comm_group(MPI_COMM_WORLD, &world_group); MPI_Win_post(world_group, 0, win); MPI_Win_start(world_group, 0, win); my_buf = malloc(nproc * sizeof(int)); for (i = 0; i < nproc; i++) { MPI_Get(&my_buf[i], 1, MPI_INT, i, 0, 1, MPI_INT, win); } /* This should fail, because the window is in an active target epoch. */ CHECK_ERR(MPI_Win_free(&win)); MPI_Win_complete(win); MPI_Win_wait(win); MPI_Win_free(&win); free(my_buf); MPI_Group_free(&world_group); MTest_Finalize(errors); return MTestReturnValue(errs); }
int main(int argc, char *argv[]) { int rank, destrank, nprocs, *A, *B, i; MPI_Comm CommDeuce; MPI_Group comm_group, group; MPI_Win win; int errs = 0; MTest_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&rank); if (nprocs < 2) { printf("Run this program with 2 or more processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Comm_split(MPI_COMM_WORLD, (rank < 2), rank, &CommDeuce); if (rank < 2) { i = MPI_Alloc_mem(SIZE * sizeof(int), MPI_INFO_NULL, &A); if (i) { printf("Can't allocate memory in test program\n"); MPI_Abort(MPI_COMM_WORLD, 1); } i = MPI_Alloc_mem(SIZE * sizeof(int), MPI_INFO_NULL, &B); if (i) { printf("Can't allocate memory in test program\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Comm_group(CommDeuce, &comm_group); if (rank == 0) { for (i=0; i<SIZE; i++) { A[i] = i; B[i] = SIZE + i; } MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, CommDeuce, &win); destrank = 1; MPI_Group_incl(comm_group, 1, &destrank, &group); MPI_Win_start(group, 0, win); MPI_Put(A, SIZE, MPI_INT, 1, 0, SIZE, MPI_INT, win); MPI_Win_complete(win); MPI_Send(B, SIZE, MPI_INT, 1, 100, MPI_COMM_WORLD); } else { /* rank=1 */ for (i=0; i<SIZE; i++) A[i] = B[i] = (-4)*i; MPI_Win_create(B, SIZE*sizeof(int), sizeof(int), MPI_INFO_NULL, CommDeuce, &win); destrank = 0; MPI_Group_incl(comm_group, 1, &destrank, &group); MPI_Win_post(group, 0, win); MPI_Recv(A, SIZE, MPI_INT, 0, 100, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Win_wait(win); for (i=0; i<SIZE; i++) { if (B[i] != i) { SQUELCH( printf("Rank 1: Put Error: B[i] is %d, should be %d\n", B[i], i); ); errs++; } if (A[i] != SIZE + i) { SQUELCH( printf("Rank 1: Send/Recv Error: A[i] is %d, should be %d\n", A[i], SIZE+i); );
int main(int argc, char ** argv) { int my_ID; /* rank */ int root; int m, n; /* grid dimensions */ double local_pipeline_time, /* timing parameters */ pipeline_time, avgtime; double epsilon = 1.e-8; /* error tolerance */ double corner_val; /* verification value at top right corner of grid */ int i, j, iter, ID;/* dummies */ int iterations; /* number of times to run the pipeline algorithm */ int *start, *end; /* starts and ends of grid slices */ int segment_size; int error=0; /* error flag */ int Num_procs; /* Number of ranks */ double *vector; /* array holding grid values */ long total_length; /* total required length to store grid values */ MPI_Status status; /* completion status of message */ MPI_Win rma_win; /* RMA window object */ MPI_Info rma_winfo; /* info for window */ MPI_Group world_group, origin_group, target_group; int origin_ranks[1], target_ranks[1]; int nbr_segment_size; /********************************************************************************* ** Initialize the MPI environment **********************************************************************************/ MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_ID); MPI_Comm_size(MPI_COMM_WORLD, &Num_procs); /* we set root equal to highest rank, because this is also the rank that reports on the verification value */ root = Num_procs-1; /********************************************************************* ** process, test and broadcast input parameter *********************************************************************/ if (my_ID == root){ if (argc != 4){ printf("Usage: %s <#iterations> <1st array dimension> <2nd array dimension>\n", *argv); error = 1; goto ENDOFTESTS; } iterations = atoi(*++argv); if (iterations < 1){ printf("ERROR: iterations must be >= 1 : %d \n",iterations); error = 1; goto ENDOFTESTS; } m = atoi(*++argv); n = atoi(*++argv); if (m < 1 || n < 1){ printf("ERROR: grid dimensions must be positive: %d, %d \n", m, n); error = 1; goto ENDOFTESTS; } if (m<Num_procs) { printf("ERROR: First grid dimension %d smaller than number of ranks %d\n", m, Num_procs); error = 1; goto ENDOFTESTS; } ENDOFTESTS:; } bail_out(error); if (my_ID == root) { printf("Parallel Research Kernels version %s\n", PRKVERSION); printf("MPIRMA pipeline execution on 2D grid\n"); printf("Number of ranks = %i\n",Num_procs); printf("Grid sizes = %d, %d\n", m, n); printf("Number of iterations = %d\n", iterations); #ifdef VERBOSE printf("Synchronizations/iteration = %d\n", (Num_procs-1)*(n-1)); #endif } /* Broadcast benchmark data to all ranks */ MPI_Bcast(&m, 1, MPI_INT, root, MPI_COMM_WORLD); MPI_Bcast(&n, 1, MPI_INT, root, MPI_COMM_WORLD); MPI_Bcast(&iterations, 1, MPI_INT, root, MPI_COMM_WORLD); start = (int *) malloc(2*Num_procs*sizeof(int)); if (!start) { printf("ERROR: Could not allocate space for array of slice boundaries\n"); exit(EXIT_FAILURE); } end = start + Num_procs; start[0] = 0; for (ID=0; ID<Num_procs; ID++) { segment_size = m/Num_procs; if (ID < (m%Num_procs)) segment_size++; if (ID>0) start[ID] = end[ID-1]+1; end[ID] = start[ID]+segment_size-1; } /* now set segment_size to the value needed by the calling rank */ segment_size = end[my_ID] - start[my_ID] + 1; /* RMA win info */ MPI_Info_create(&rma_winfo); /* This key indicates that passive target RMA will not be used. * It is the one info key that MPICH actually uses for optimization. */ MPI_Info_set(rma_winfo, "no_locks", "true"); /* total_length takes into account one ghost cell on left side of segment */ total_length = ((end[my_ID]-start[my_ID]+1)+1)*n; vector = (double *) malloc(total_length*sizeof(double)); MPI_Win_create(vector, total_length*sizeof(double), sizeof(double), rma_winfo, MPI_COMM_WORLD, &rma_win); /* MPI_Win_allocate(total_length*sizeof(double), sizeof(double), rma_winfo, MPI_COMM_WORLD, (void *) &vector, &rma_win); */ if (vector == NULL) { printf("Could not allocate space for grid slice of %d by %d points", segment_size, n); printf(" on rank %d\n", my_ID); error = 1; } bail_out(error); /* clear the array */ for (j=0; j<n; j++) for (i=start[my_ID]-1; i<=end[my_ID]; i++) { ARRAY(i-start[my_ID],j) = 0.0; } /* set boundary values (bottom and left side of grid */ if (my_ID==0) for (j=0; j<n; j++) ARRAY(0,j) = (double) j; for (i=start[my_ID]-1; i<=end[my_ID]; i++) ARRAY(i-start[my_ID],0) = (double) i; /* redefine start and end for calling rank to reflect local indices */ if (my_ID==0) start[my_ID] = 1; else start[my_ID] = 0; end[my_ID] = segment_size-1; /* Set up origin and target rank groups for PSCW */ MPI_Comm_group(MPI_COMM_WORLD, &world_group); /* Target group consists of rank my_ID+1, right neighbor */ if (my_ID < Num_procs-1) target_ranks[0] = my_ID+1; else target_ranks[0] = 0; MPI_Group_incl(world_group, 1, target_ranks, &target_group); /* Origin group consists of rank my_ID-1, left neighbor */ if (my_ID > 0) origin_ranks[0] = my_ID-1; else origin_ranks[0] = Num_procs-1; MPI_Group_incl(world_group, 1, origin_ranks, &origin_group); /* Set neighbor segment size */ if (my_ID != Num_procs-1) nbr_segment_size = end[my_ID+1] - start[my_ID+1] + 1; else nbr_segment_size = end[0] - start[0] + 1; for (iter=0; iter<=iterations; iter++) { /* start timer after a warmup iteration */ if (iter == 1) { MPI_Barrier(MPI_COMM_WORLD); local_pipeline_time = wtime(); } /* execute pipeline algorithm for grid lines 1 through n-1 (skip bottom line) */ for (j=1; j<n; j++) { /* if I am not at the left boundary, I need to wait for my left neighbor to send data */ if (my_ID > 0) { /* Exposure epoch at target*/ MPI_Win_post(origin_group, MPI_MODE_NOSTORE, rma_win); MPI_Win_wait(rma_win); } for (i=start[my_ID]; i<= end[my_ID]; i++) { ARRAY(i,j) = ARRAY(i-1,j) + ARRAY(i,j-1) - ARRAY(i-1,j-1); } /* if I am not on the right boundary, send data to my right neighbor */ if (my_ID != Num_procs-1) { /* Access epoch at origin */ MPI_Win_start(target_group, 0, rma_win); MPI_Put(&(ARRAY(end[my_ID],j)), 1, MPI_DOUBLE, my_ID+1, NBR_INDEX(0,j), 1, MPI_DOUBLE, rma_win); MPI_Win_complete(rma_win); } } /* copy top right corner value to bottom left corner to create dependency */ if (Num_procs >1) { if (my_ID==root) { corner_val = -ARRAY(end[my_ID],n-1); MPI_Win_start(target_group, 0, rma_win); MPI_Put(&corner_val, 1, MPI_DOUBLE, 0, NBR_INDEX(1,0), 1, MPI_DOUBLE, rma_win); MPI_Win_complete(rma_win); } if (my_ID==0) { MPI_Win_post(origin_group, MPI_MODE_NOSTORE, rma_win); MPI_Win_wait(rma_win); } } else ARRAY(0,0)= -ARRAY(end[my_ID],n-1); } local_pipeline_time = wtime() - local_pipeline_time; MPI_Reduce(&local_pipeline_time, &pipeline_time, 1, MPI_DOUBLE, MPI_MAX, root, MPI_COMM_WORLD); /******************************************************************************* ** Analyze and output results. ********************************************************************************/ /* verify correctness, using top right value */ corner_val = (double) ((iterations+1)*(m+n-2)); if (my_ID == root) { if (abs(ARRAY(end[my_ID],n-1)-corner_val)/corner_val >= epsilon) { printf("ERROR: checksum %lf does not match verification value %lf\n", ARRAY(end[my_ID],n-1), corner_val); error = 1; } } bail_out(error); if (my_ID == root) { avgtime = pipeline_time/iterations; #ifdef VERBOSE printf("Solution validates; verification value = %lf\n", corner_val); printf("Point-to-point synchronizations/s: %lf\n", ((float)((n-1)*(Num_procs-1)))/(avgtime)); #else printf("Solution validates\n"); #endif printf("Rate (MFlops/s): %lf, Avg time (s): %lf\n", 1.0E-06 * 2 * ((double)((m-1)*(n-1)))/avgtime, avgtime); } MPI_Win_free(&rma_win); MPI_Info_free(&rma_winfo); MPI_Finalize(); exit(EXIT_SUCCESS); } /* end of main */
int main (int argc, char *argv[]) { int rank, destrank, nprocs, i; int size, page_size, no_hints = 0; char *A, *B; char *s_buf, *r_buf; MPI_Group comm_group, group; MPI_Win win; MPI_Info win_info; double t_start=0.0, t_end=0.0; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (nprocs != 2) { if (rank == 0) { fprintf(stderr, "This test requires exactly two processes\n"); } MPI_Finalize(); return EXIT_FAILURE; } while (1) { static struct option long_options[] = {{"no-hints", no_argument, NULL, 'n'}, {0, 0, 0, 0}}; int option, index; option = getopt_long (argc, argv, "n::", long_options, &index); if (option == -1) { break; } switch (option) { case 'n': no_hints = 1; break; default: if (rank == 0) { fprintf(stderr, "Invalid Option \n"); } MPI_Finalize(); return EXIT_FAILURE; } } if (no_hints == 0) { /* Providing MVAPICH2 specific hint to allocate memory * in shared space. MVAPICH2 optimizes communication * on windows created in this memory */ MPI_Info_create(&win_info); MPI_Info_set(win_info, "alloc_shm", "true"); MPI_Alloc_mem (MYBUFSIZE, win_info, &A); } else { MPI_Alloc_mem (MYBUFSIZE, MPI_INFO_NULL, &A); } if (NULL == A) { fprintf(stderr, "[%d] Buffer Allocation Failed \n", rank); exit(-1); } MPI_Alloc_mem (MYBUFSIZE, MPI_INFO_NULL, &B); if (NULL == B) { fprintf(stderr, "[%d] Buffer Allocation Failed \n", rank); exit(-1); } page_size = getpagesize(); assert(page_size <= MAX_ALIGNMENT); s_buf = (char *) (((unsigned long) A + (page_size - 1)) / page_size * page_size); r_buf = (char *) (((unsigned long) B + (page_size - 1)) / page_size * page_size); memset(r_buf, 0, MAX_SIZE); memset(s_buf, 1, MAX_SIZE); if (rank == 0) { fprintf(stdout, HEADER); fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "Latency (us)"); fflush(stdout); } MPI_Comm_group(MPI_COMM_WORLD, &comm_group); for (size = 0; size <= MAX_SIZE; size = (size ? size * 2 : 1)) { if (size > large_message_size) { loop = loop_large; skip = skip_large; } MPI_Win_create(s_buf, size, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); if (rank == 0) { destrank = 1; MPI_Group_incl(comm_group, 1, &destrank, &group); MPI_Barrier(MPI_COMM_WORLD); for (i = 0; i < skip + loop; i++) { MPI_Win_start(group, 0, win); if (i == skip) { t_start = MPI_Wtime (); } MPI_Get(r_buf, size, MPI_CHAR, 1, 0, size, MPI_CHAR, win); MPI_Win_complete(win); MPI_Win_post(group, 0, win); MPI_Win_wait(win); } t_end = MPI_Wtime (); } else { /* rank=1 */ destrank = 0; MPI_Group_incl(comm_group, 1, &destrank, &group); MPI_Barrier(MPI_COMM_WORLD); for (i = 0; i < skip + loop; i++) { MPI_Win_post(group, 0, win); MPI_Win_wait(win); MPI_Win_start(group, 0, win); MPI_Get(r_buf, size, MPI_CHAR, 0, 0, size, MPI_CHAR, win); MPI_Win_complete(win); } } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH, FLOAT_PRECISION, (t_end - t_start) * 1.0e6 / loop / 2); fflush(stdout); } MPI_Group_free(&group); MPI_Win_free(&win); } if (no_hints == 0) { MPI_Info_free(&win_info); } MPI_Free_mem(A); MPI_Free_mem(B); MPI_Group_free(&comm_group); MPI_Finalize(); return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { int rank, destrank, nprocs, A[SIZE2], B[SIZE2], i; MPI_Comm CommDeuce; MPI_Group comm_group, group; MPI_Win win; int errs = 0; MTest_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&rank); if (nprocs < 2) { printf("Run this program with 2 or more processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Comm_split(MPI_COMM_WORLD, (rank < 2), rank, &CommDeuce); if (rank < 2) { MPI_Comm_group(CommDeuce, &comm_group); if (rank == 0) { for (i=0; i<SIZE2; i++) A[i] = B[i] = i; MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, CommDeuce, &win); destrank = 1; MPI_Group_incl(comm_group, 1, &destrank, &group); MPI_Win_start(group, 0, win); for (i=0; i<SIZE1; i++) MPI_Put(A+i, 1, MPI_INT, 1, i, 1, MPI_INT, win); for (i=0; i<SIZE1; i++) MPI_Get(B+i, 1, MPI_INT, 1, SIZE1+i, 1, MPI_INT, win); MPI_Win_complete(win); for (i=0; i<SIZE1; i++) if (B[i] != (-4)*(i+SIZE1)) { printf("Get Error: B[i] is %d, should be %d\n", B[i], (-4)*(i+SIZE1)); errs++; } } else if (rank == 1) { for (i=0; i<SIZE2; i++) B[i] = (-4)*i; MPI_Win_create(B, SIZE2*sizeof(int), sizeof(int), MPI_INFO_NULL, CommDeuce, &win); destrank = 0; MPI_Group_incl(comm_group, 1, &destrank, &group); MPI_Win_post(group, 0, win); MPI_Win_wait(win); for (i=0; i<SIZE1; i++) { if (B[i] != i) { printf("Put Error: B[i] is %d, should be %d\n", B[i], i); errs++; } } } MPI_Group_free(&group); MPI_Group_free(&comm_group); MPI_Win_free(&win); } MPI_Comm_free(&CommDeuce); MTest_Finalize(errs); MPI_Finalize(); return 0; }
void mpi_win_wait_f(MPI_Fint *win, MPI_Fint *ierr) { MPI_Win c_win = MPI_Win_f2c(*win); *ierr = OMPI_INT_2_FINT(MPI_Win_wait(c_win)); }
int main( int argc, char *argv[] ) { int errs = 0, err; int rank, size, source, dest; int minsize = 2, count; MPI_Comm comm; MPI_Win win; MPI_Aint extent; MPI_Group wingroup, neighbors; MTestDatatype sendtype, recvtype; MTest_Init( &argc, &argv ); /* The following illustrates the use of the routines to run through a selection of communicators and datatypes. Use subsets of these for tests that do not involve combinations of communicators, datatypes, and counts of datatypes */ while (MTestGetIntracommGeneral( &comm, minsize, 1 )) { if (comm == MPI_COMM_NULL) continue; /* Determine the sender and receiver */ MPI_Comm_rank( comm, &rank ); MPI_Comm_size( comm, &size ); source = 0; dest = size - 1; for (count = 1; count < 65000; count = count * 2) { while (MTestGetDatatypes( &sendtype, &recvtype, count )) { /* Make sure that everyone has a recv buffer */ recvtype.InitBuf( &recvtype ); MPI_Type_extent( recvtype.datatype, &extent ); MPI_Win_create( recvtype.buf, recvtype.count * extent, (int)extent, MPI_INFO_NULL, comm, &win ); MPI_Win_get_group( win, &wingroup ); if (rank == source) { /* To improve reporting of problems about operations, we change the error handler to errors return */ MPI_Win_set_errhandler( win, MPI_ERRORS_RETURN ); sendtype.InitBuf( &sendtype ); /* Neighbor is dest only */ MPI_Group_incl( wingroup, 1, &dest, &neighbors ); err = MPI_Win_start( neighbors, 0, win ); if (err) { errs++; if (errs < 10) { MTestPrintError( err ); } } MPI_Group_free( &neighbors ); err = MPI_Put( sendtype.buf, sendtype.count, sendtype.datatype, dest, 0, recvtype.count, recvtype.datatype, win ); if (err) { errs++; MTestPrintError( err ); } err = MPI_Win_complete( win ); if (err) { errs++; if (errs < 10) { MTestPrintError( err ); } } } else if (rank == dest) { MPI_Group_incl( wingroup, 1, &source, &neighbors ); MPI_Win_post( neighbors, 0, win ); MPI_Group_free( &neighbors ); MPI_Win_wait( win ); /* This should have the same effect, in terms of transfering data, as a send/recv pair */ err = MTestCheckRecv( 0, &recvtype ); if (err) { errs += errs; } } else { /* Nothing; the other processes need not call any MPI routines */ ; } MPI_Win_free( &win ); MTestFreeDatatype( &sendtype ); MTestFreeDatatype( &recvtype ); MPI_Group_free( &wingroup ); } } MTestFreeComm( &comm ); } MTest_Finalize( errs ); MPI_Finalize(); return 0; }
int main(int argc, char ** argv) { MPI_Aint win_size = WIN_SIZE; MPI_Win win; MPI_Group group; char* base; int disp_unit = 1; int rank, size, target_rank, target_disp = 1; int r, flag; /*************************************************************/ /* Init and set values */ /*************************************************************/ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); target_rank = (rank + 1) % size; MPI_Alloc_mem(WIN_SIZE, MPI_INFO_NULL, &base); if ( NULL == base ) { printf("failed to alloc %d\n", WIN_SIZE); exit(16); } /*************************************************************/ /* Win_create */ /*************************************************************/ /* MPI_Win_create(void *base, MPI_Aint size, int disp_unit, MPI_Info info, MPI_Comm comm, MPI_Win *win); */ r = MPI_Win_create(base, win_size, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_create\n", rank); /*************************************************************/ /* First epoch: Tests Put, Get, Get_group, Post, Start, */ /* Complete, Wait, Lock, Unlock */ /*************************************************************/ r = MPI_Win_get_group(win, &group); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_get_group\n", rank); r = MPI_Win_post(group, 0, win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_post\n", rank); r = MPI_Win_start(group, 0, win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_start\n", rank); r = MPI_Win_lock(MPI_LOCK_SHARED, target_rank, 0, win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_lock\n", rank); /* MPI_Put(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win) */ r = MPI_Put(base, WIN_SIZE, MPI_BYTE, target_rank, target_disp, WIN_SIZE, MPI_BYTE, win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Put\n", rank); r = MPI_Win_unlock(target_rank, win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_unlock\n", rank); /* MPI_Get(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win); */ r = MPI_Get(base, WIN_SIZE, MPI_BYTE, target_rank, target_disp, WIN_SIZE, MPI_BYTE, win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Get\n", rank); r = MPI_Win_complete(win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_complete\n", rank); r = MPI_Win_test(win, &flag); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_test\n", rank); r = MPI_Win_wait(win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_wait\n", rank); /*************************************************************************/ /* Second epoch: Tests Accumulate and Fence */ /*************************************************************************/ r = MPI_Win_fence(0, win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_fence\n", rank); if ( rank == 0 ) { /* MPI_Accumulate(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win) */ r = MPI_Accumulate(base, WIN_SIZE, MPI_BYTE, 0, target_disp, WIN_SIZE, MPI_BYTE, MPI_SUM, win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Accumulate\n", rank); } r = MPI_Win_fence(0, win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_fence\n", rank); /*************************************************************/ /* Win_free and Finalize */ /*************************************************************/ r = MPI_Win_free(&win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_free\n", rank); free(base); MPI_Finalize(); }
int main(int argc, char *argv[]) { int rank, destrank, nprocs, A[SIZE2], B[SIZE2], i; MPI_Group comm_group, group; MPI_Win win; MPI_Init(&argc,&argv); Test_Init_No_File(); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&rank); if (nprocs != 2) { printf("Run this program with 2 processes\n"); MPI_Abort(MPI_COMM_WORLD,1); } MPI_Comm_group(MPI_COMM_WORLD, &comm_group); if (rank == 0) { for (i=0; i<SIZE2; i++) A[i] = B[i] = i; MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); destrank = 1; MPI_Group_incl(comm_group, 1, &destrank, &group); MPI_Win_start(group, 0, win); for (i=0; i<SIZE1; i++) MPI_Put(A+i, 1, MPI_INT, 1, i, 1, MPI_INT, win); for (i=0; i<SIZE1; i++) MPI_Get(B+i, 1, MPI_INT, 1, SIZE1+i, 1, MPI_INT, win); MPI_Win_complete(win); for (i=0; i<SIZE1; i++) if (B[i] != (-4)*(i+SIZE1)) { printf("Get Error: B[i] is %d, should be %d\n", B[i], (-4)*(i+SIZE1)); Test_Failed(NULL); } } else { /* rank=1 */ for (i=0; i<SIZE2; i++) B[i] = (-4)*i; MPI_Win_create(B, SIZE2*sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win); destrank = 0; MPI_Group_incl(comm_group, 1, &destrank, &group); MPI_Win_post(group, 0, win); MPI_Win_wait(win); for (i=0; i<SIZE1; i++) { if (B[i] != i) { printf("Put Error: B[i] is %d, should be %d\n", B[i], i); Test_Failed(NULL); } } } MPI_Group_free(&group); MPI_Group_free(&comm_group); MPI_Win_free(&win); Test_Waitforall(); Test_Global_Summary(); MPI_Finalize(); return 0; }
int main( int argc, char *argv[] ) { int rank, destrank, nprocs, i; MPI_Group comm_group, group; MPI_Win win; int loop; int size; double t_start, t_end; int count, align_size; int * s_buf; int * r_buf; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&rank); MPI_Comm_group(MPI_COMM_WORLD, &comm_group); loop = LOOP; align_size = MESSAGE_ALIGNMENT; s_buf = (int *) (((unsigned long) s_buf_original + (align_size - 1)) / align_size * align_size); r_buf = (int *) (((unsigned long) r_buf_original + (align_size - 1)) / align_size * align_size); for (i=0; i < MAX_SIZE/sizeof(int); i++) { r_buf[i] = i; s_buf[i] = 2*i; } if (rank == 0) { fprintf(stdout, "# OSU MPI2 Accumulate Latency Test (Version 1.0)\n"); fprintf(stdout, "# Size\t\tLatency (us) \n"); } for (count = 0; count <= MAX_SIZE/sizeof(int); count = (count ? count * 2 : count + 1)) { size = count * sizeof(int); if (rank == 0) { MPI_Win_create(r_buf, size, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); destrank = 1; MPI_Group_incl(comm_group, 1, &destrank, &group); MPI_Barrier( MPI_COMM_WORLD); for (i=0;i<SKIP+loop;i++) { MPI_Win_start(group, 0, win); if (i==SKIP) t_start=MPI_Wtime(); MPI_Accumulate(s_buf, count, MPI_INT, 1, 0, count, MPI_INT,MPI_SUM, win); MPI_Win_complete(win); MPI_Win_post(group, 0, win); MPI_Win_wait(win); } t_end=MPI_Wtime(); MPI_Win_free(&win); } else { MPI_Win_create(r_buf, size, 1, MPI_INFO_NULL,MPI_COMM_WORLD, &win); destrank = 0; MPI_Group_incl(comm_group, 1, &destrank, &group); MPI_Barrier( MPI_COMM_WORLD); for (i=0;i<SKIP+loop;i++) { MPI_Win_post(group, 0, win); MPI_Win_wait(win); MPI_Win_start(group, 0, win); MPI_Accumulate(s_buf, count, MPI_INT, 0, 0, count, MPI_INT,MPI_SUM, win); MPI_Win_complete(win); } MPI_Win_free(&win); } if(rank == 0) { printf("%d\t\t%f\n",size, (t_end-t_start)*1.0e6/loop/2); fflush(stdout); } } //end of for loop MPI_Finalize(); return 0; }
FORT_DLL_SPEC void FORT_CALL mpi_win_wait_ ( MPI_Fint *v1, MPI_Fint *ierr ){ *ierr = MPI_Win_wait( (MPI_Win)*v1 ); }
/* * Class: mpi_Win * Method: waitFor * Signature: (J)V */ JNIEXPORT void JNICALL Java_mpi_Win_waitFor( JNIEnv *env, jobject jthis, jlong win) { int rc = MPI_Win_wait((MPI_Win)win); ompi_java_exceptionCheck(env, rc); }
int main (int argc, char *argv[]) { int myid, numprocs, i, j; int size, loop, page_size; char *s_buf, *r_buf; double t_start = 0.0, t_end = 0.0, t = 0.0; int destrank; MPI_Group comm_group, group; MPI_Win win; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); MPI_Comm_rank(MPI_COMM_WORLD, &myid); MPI_Comm_group(MPI_COMM_WORLD, &comm_group); if(numprocs != 2) { if(myid == 0) { fprintf(stderr, "This test requires exactly two processes\n"); } MPI_Finalize(); return EXIT_FAILURE; } loop = LOOP; page_size = getpagesize(); assert(page_size <= MAX_ALIGNMENT); s_buf = (char *) (((unsigned long) s_buf1 + (page_size - 1)) / page_size * page_size); r_buf = (char *) (((unsigned long) r_buf1 + (page_size - 1)) / page_size * page_size); assert((s_buf != NULL) && (r_buf != NULL)); assert(MAX_SIZE * WINDOW_SIZE < MYBUFSIZE); if(myid == 0) { fprintf(stdout, "# %s %s\n", BENCHMARK, OMB_VERSION); fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "Bandwidth (MB/s)"); fflush(stdout); } /* Bandwidth test */ for(size = 1; size <= MAX_SIZE; size *= 2) { /* Window creation and warming-up */ if(myid == 0) { MPI_Win_create(s_buf, size, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); destrank = 1; MPI_Group_incl(comm_group, 1, &destrank, &group); for(i = 0; i < WARMUP; i++) { MPI_Win_start(group, 0, win); MPI_Get((char *)((unsigned long)r_buf + i * size), size, MPI_CHAR, 1, 0, size, MPI_CHAR, win); MPI_Win_complete(win); } } else { MPI_Win_create(s_buf, size, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); destrank = 0; MPI_Group_incl(comm_group, 1, &destrank, &group); for(i = 0; i < WARMUP; i++) { MPI_Win_post(group, 0, win); MPI_Win_wait(win); } } MPI_Barrier(MPI_COMM_WORLD); if(myid == 0) { t_start = MPI_Wtime(); for(i = 0; i < loop; i++) { MPI_Win_start(group, 0, win); for(j = 0; j < WINDOW_SIZE; j++) { MPI_Get((char *)((unsigned long)r_buf + j * size), size, MPI_CHAR, 1, 0, size, MPI_CHAR, win); } MPI_Win_complete(win); } t_end = MPI_Wtime (); t = t_end - t_start; MPI_Barrier (MPI_COMM_WORLD); } else { for(i = 0; i < loop; i++) { MPI_Win_post(group, 0, win); MPI_Win_wait(win); } MPI_Barrier (MPI_COMM_WORLD); } if(myid == 0) { double tmp = size / 1e6 * loop * WINDOW_SIZE; fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH, FLOAT_PRECISION, tmp / t); fflush(stdout); } MPI_Win_free (&win); } MPI_Barrier (MPI_COMM_WORLD); MPI_Finalize (); return EXIT_SUCCESS; }
/*Run PUT with Post/Start/Complete/Wait */ void run_put_with_pscw(int rank, WINDOW type) { double t; int destrank, size, i, j; MPI_Aint disp = 0; MPI_Win win; MPI_Group comm_group, group; MPI_CHECK(MPI_Comm_group(MPI_COMM_WORLD, &comm_group)); int window_size = WINDOW_SIZE_LARGE; for (size = 1; size <= MAX_SIZE; size = size * 2) { allocate_memory(rank, sbuf_original, rbuf_original, &sbuf, &rbuf, &rbuf, size*window_size, type, &win); #if MPI_VERSION >= 3 if (type == WIN_DYNAMIC) { disp = disp_remote; } #endif if (size > LARGE_MESSAGE_SIZE) { loop = LOOP_LARGE; skip = SKIP_LARGE; } MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); if (rank == 0) { destrank = 1; MPI_CHECK(MPI_Group_incl (comm_group, 1, &destrank, &group)); for (i = 0; i < skip + loop; i++) { if (i == skip) { t_start = MPI_Wtime (); } MPI_CHECK(MPI_Win_post(group, 0, win)); MPI_CHECK(MPI_Win_start(group, 0, win)); for(j = 0; j < window_size; j++) { MPI_CHECK(MPI_Put(sbuf + j*size, size, MPI_CHAR, 1, disp + (j*size), size, MPI_CHAR, win)); } MPI_CHECK(MPI_Win_complete(win)); MPI_CHECK(MPI_Win_wait(win)); } t_end = MPI_Wtime(); t = t_end - t_start; } else { destrank = 0; MPI_CHECK(MPI_Group_incl(comm_group, 1, &destrank, &group)); for (i = 0; i < skip + loop; i++) { MPI_CHECK(MPI_Win_post(group, 0, win)); MPI_CHECK(MPI_Win_start(group, 0, win)); for (j = 0; j < window_size; j++) { MPI_CHECK(MPI_Put(sbuf + j*size, size, MPI_CHAR, 0, disp + (j*size), size, MPI_CHAR, win)); } MPI_CHECK(MPI_Win_complete(win)); MPI_CHECK(MPI_Win_wait(win)); } } MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); print_bibw(rank, size, t); MPI_CHECK(MPI_Group_free(&group)); free_memory (sbuf, rbuf, win, rank); } MPI_CHECK(MPI_Group_free(&comm_group)); }
/*Run ACC with Post/Start/Complete/Wait */ void run_acc_with_pscw(int rank, WINDOW type) { int destrank, size, i; MPI_Aint disp = 0; MPI_Win win; MPI_Group comm_group, group; MPI_CHECK(MPI_Comm_group(MPI_COMM_WORLD, &comm_group)); for (size = 0; size <= MAX_SIZE; size = (size ? size * 2 : 1)) { allocate_memory(rank, sbuf_original, rbuf_original, &sbuf, &rbuf, &sbuf, size, type, &win); #if MPI_VERSION >= 3 if (type == WIN_DYNAMIC) { disp = disp_remote; } #endif if (size > LARGE_MESSAGE_SIZE) { loop = LOOP_LARGE; skip = SKIP_LARGE; } if (rank == 0) { destrank = 1; MPI_CHECK(MPI_Group_incl(comm_group, 1, &destrank, &group)); MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); for (i = 0; i < skip + loop; i++) { MPI_CHECK(MPI_Win_start (group, 0, win)); if (i == skip) { t_start = MPI_Wtime (); } MPI_CHECK(MPI_Accumulate(sbuf, size, MPI_CHAR, 1, disp, size, MPI_CHAR, MPI_SUM, win)); MPI_CHECK(MPI_Win_complete(win)); MPI_CHECK(MPI_Win_post(group, 0, win)); MPI_CHECK(MPI_Win_wait(win)); } t_end = MPI_Wtime (); } else { /* rank=1 */ destrank = 0; MPI_CHECK(MPI_Group_incl(comm_group, 1, &destrank, &group)); MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); for (i = 0; i < skip + loop; i++) { MPI_CHECK(MPI_Win_post(group, 0, win)); MPI_CHECK(MPI_Win_wait(win)); MPI_CHECK(MPI_Win_start(group, 0, win)); MPI_CHECK(MPI_Accumulate(sbuf, size, MPI_CHAR, 0, disp, size, MPI_CHAR, MPI_SUM, win)); MPI_CHECK(MPI_Win_complete(win)); } } MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); if (rank == 0) { fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH, FLOAT_PRECISION, (t_end - t_start) * 1.0e6 / loop / 2); fflush(stdout); } MPI_CHECK(MPI_Group_free(&group)); free_memory (sbuf, rbuf, win, rank); } MPI_CHECK(MPI_Group_free(&comm_group)); }
int main(int argc, char *argv[]) { int rank, nprocs, A[NROWS][NCOLS], i, j, destrank; MPI_Win win; MPI_Datatype column, xpose; MPI_Group comm_group, group; int errs=0; MTest_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&rank); if (nprocs != 2) { printf("Run this program with 2 processes\n"); MPI_Abort(MPI_COMM_WORLD,1); } MPI_Comm_group(MPI_COMM_WORLD, &comm_group); if (rank == 0) { for (i=0; i<NROWS; i++) for (j=0; j<NCOLS; j++) A[i][j] = i*NCOLS + j; /* create datatype for one column */ MPI_Type_vector(NROWS, 1, NCOLS, MPI_INT, &column); /* create datatype for matrix in column-major order */ MPI_Type_hvector(NCOLS, 1, sizeof(int), column, &xpose); MPI_Type_commit(&xpose); MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); destrank = 1; MPI_Group_incl(comm_group, 1, &destrank, &group); MPI_Win_start(group, 0, win); MPI_Put(A, NROWS*NCOLS, MPI_INT, 1, 0, 1, xpose, win); MPI_Type_free(&column); MPI_Type_free(&xpose); MPI_Win_complete(win); } else { /* rank = 1 */ for (i=0; i<NROWS; i++) for (j=0; j<NCOLS; j++) A[i][j] = -1; MPI_Win_create(A, NROWS*NCOLS*sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win); destrank = 0; MPI_Group_incl(comm_group, 1, &destrank, &group); MPI_Win_post(group, 0, win); MPI_Win_wait(win); for (j=0; j<NCOLS; j++) { for (i=0; i<NROWS; i++) { if (A[j][i] != i*NCOLS + j) { if (errs < 50) { printf("Error: A[%d][%d]=%d should be %d\n", j, i, A[j][i], i*NCOLS + j); } errs++; } } } if (errs >= 50) { printf("Total number of errors: %d\n", errs); } } MPI_Group_free(&group); MPI_Group_free(&comm_group); MPI_Win_free(&win); MTest_Finalize(errs); MPI_Finalize(); return 0; }
int main (int argc, char *argv[]) { int myid, numprocs, i, j; int size, page_size; char *s_buf, *r_buf; char *s_buf1, *r_buf1; double t_start = 0.0, t_end = 0.0, t = 0.0; int destrank, no_hints = 0; MPI_Group comm_group, group; MPI_Win win; MPI_Info win_info; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); MPI_Comm_rank(MPI_COMM_WORLD, &myid); MPI_Comm_group(MPI_COMM_WORLD, &comm_group); if (numprocs != 2) { if (myid == 0) { fprintf(stderr, "This test requires exactly two processes\n"); } MPI_Finalize(); return EXIT_FAILURE; } while (1) { static struct option long_options[] = {{"no-hints", no_argument, NULL, 'n'}, {0, 0, 0, 0}}; int option, index; option = getopt_long (argc, argv, "n::", long_options, &index); if (option == -1) { break; } switch (option) { case 'n': no_hints = 1; break; default: if (myid == 0) { fprintf(stderr, "Invalid Option \n"); } MPI_Finalize(); return EXIT_FAILURE; } } page_size = getpagesize(); assert(page_size <= MAX_ALIGNMENT); MPI_Alloc_mem (MAX_MSG_SIZE*window_size + MAX_ALIGNMENT, MPI_INFO_NULL, &s_buf1); if (NULL == s_buf1) { fprintf(stderr, "[%d] Buffer Allocation Failed \n", myid); exit(-1); } if (no_hints == 0) { /* Providing MVAPICH2 specific hint to allocate memory * in shared space. MVAPICH2 optimizes communication * on windows created in this memory */ MPI_Info_create(&win_info); MPI_Info_set(win_info, "alloc_shm", "true"); MPI_Alloc_mem (MAX_MSG_SIZE*window_size + MAX_ALIGNMENT, win_info, &r_buf1); } else { MPI_Alloc_mem (MAX_MSG_SIZE*window_size + MAX_ALIGNMENT, MPI_INFO_NULL, &r_buf1); } if (NULL == r_buf1) { fprintf(stderr, "[%d] Buffer Allocation Failed \n", myid); exit(-1); } s_buf = (char *) (((unsigned long) s_buf1 + (page_size - 1)) / page_size * page_size); r_buf = (char *) (((unsigned long) r_buf1 + (page_size - 1)) / page_size * page_size); assert((s_buf != NULL) && (r_buf != NULL)); if (myid == 0) { fprintf(stdout, HEADER); fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "Bi-Bandwidth (MB/s)"); fflush(stdout); } /* Bandwidth test */ for (size = 1; size <= MAX_MSG_SIZE; size *= 2) { if (size > large_message_size) { loop = loop_large; skip = skip_large; window_size = window_size_large; } /* Window creation and warming-up */ MPI_Win_create(r_buf, size * window_size, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); if (myid == 0) { destrank = 1; MPI_Group_incl(comm_group, 1, &destrank, &group); for (i = 0; i < skip; i++) { MPI_Win_post(group, 0, win); MPI_Win_start(group, 0, win); MPI_Put(s_buf + i*size, size, MPI_CHAR, 1, i*size, size, MPI_CHAR, win); MPI_Win_complete(win); MPI_Win_wait(win); } } else { /*rank 1*/ destrank = 0; MPI_Group_incl(comm_group, 1, &destrank, &group); for (i = 0; i < skip; i++) { MPI_Win_post(group, 0, win); MPI_Win_start(group, 0, win); MPI_Put(s_buf + i*size, size, MPI_CHAR, 0, i*size, size, MPI_CHAR, win); MPI_Win_complete(win); MPI_Win_wait(win); } } MPI_Barrier(MPI_COMM_WORLD); if (myid == 0) { t_start = MPI_Wtime(); for (i = 0; i < loop; i++) { MPI_Win_post(group, 0, win); MPI_Win_start(group, 0, win); for (j = 0; j < window_size; j++) { MPI_Put(s_buf + j*size, size, MPI_CHAR, 1, j*size, size, MPI_CHAR, win); } MPI_Win_complete(win); MPI_Win_wait(win); } t_end = MPI_Wtime(); t = t_end - t_start; } else { for (i = 0; i < loop; i++) { MPI_Win_post(group, 0, win); MPI_Win_start(group, 0, win); for (j = 0; j < window_size; j++) { MPI_Put(s_buf + j*size, size, MPI_CHAR, 0, j*size, size, MPI_CHAR, win); } MPI_Win_complete(win); MPI_Win_wait(win); } } MPI_Barrier(MPI_COMM_WORLD); if (myid == 0) { double tmp = size / 1e6 * loop * window_size; fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH, FLOAT_PRECISION, (tmp / t) * 2); fflush(stdout); } MPI_Group_free(&group); MPI_Win_free(&win); } MPI_Barrier(MPI_COMM_WORLD); if (no_hints == 0) { MPI_Info_free(&win_info); } MPI_Free_mem(s_buf1); MPI_Free_mem(r_buf1); MPI_Group_free(&comm_group); MPI_Finalize(); return EXIT_SUCCESS; }
/*Run FOP with Post/Start/Complete/Wait */ void run_fop_with_pscw(int rank, WINDOW type) { int destrank, i; MPI_Aint disp = 0; MPI_Win win; MPI_Group comm_group, group; MPI_CHECK(MPI_Comm_group(MPI_COMM_WORLD, &comm_group)); allocate_atomic_memory(rank, sbuf_original, rbuf_original, tbuf_original, NULL, (char **)&sbuf, (char **)&rbuf, (char **)&tbuf, NULL, (char **)&rbuf, MAX_MSG_SIZE, type, &win); if (type == WIN_DYNAMIC) { disp = disp_remote; } if (rank == 0) { destrank = 1; MPI_CHECK(MPI_Group_incl(comm_group, 1, &destrank, &group)); MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); for (i = 0; i < skip + loop; i++) { MPI_CHECK(MPI_Win_start (group, 0, win)); if (i == skip) { t_start = MPI_Wtime (); } MPI_CHECK(MPI_Fetch_and_op(sbuf, tbuf, MPI_LONG_LONG, 1, disp, MPI_SUM, win)); MPI_CHECK(MPI_Win_complete(win)); MPI_CHECK(MPI_Win_post(group, 0, win)); MPI_CHECK(MPI_Win_wait(win)); } t_end = MPI_Wtime (); } else { /* rank=1 */ destrank = 0; MPI_CHECK(MPI_Group_incl(comm_group, 1, &destrank, &group)); MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); for (i = 0; i < skip + loop; i++) { MPI_CHECK(MPI_Win_post(group, 0, win)); MPI_CHECK(MPI_Win_wait(win)); MPI_CHECK(MPI_Win_start(group, 0, win)); MPI_CHECK(MPI_Fetch_and_op(sbuf, tbuf, MPI_LONG_LONG, 0, disp, MPI_SUM, win)); MPI_CHECK(MPI_Win_complete(win)); } } MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); if (rank == 0) { fprintf(stdout, "%-*d%*.*f\n", 10, 8, FIELD_WIDTH, FLOAT_PRECISION, (t_end - t_start) * 1.0e6 / loop / 2); fflush(stdout); } MPI_CHECK(MPI_Group_free(&group)); MPI_CHECK(MPI_Group_free(&comm_group)); free_atomic_memory (sbuf, rbuf, tbuf, NULL, win, rank); }