int MPIX_Raccumulate_x(const void *origin_addr, MPI_Count origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, MPI_Count target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win, MPI_Request *request) { int rc = MPI_SUCCESS; if (likely (origin_count <= bigmpi_int_max && target_count <= bigmpi_int_max)) { rc = MPI_Raccumulate(origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, op, win, request); } else { MPI_Datatype neworigin_datatype, newtarget_datatype; MPIX_Type_contiguous_x(origin_count, origin_datatype, &neworigin_datatype); MPIX_Type_contiguous_x(target_count, target_datatype, &newtarget_datatype); MPI_Type_commit(&neworigin_datatype); MPI_Type_commit(&newtarget_datatype); rc = MPI_Raccumulate(origin_addr, 1, neworigin_datatype, target_rank, target_disp, 1, newtarget_datatype, op, win, request); MPI_Type_free(&neworigin_datatype); MPI_Type_free(&newtarget_datatype); } return rc; }
int main( int argc, char *argv[] ) { int rank, nproc, i; int errors = 0, all_errors = 0; int *buf; MPI_Win window; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); if (nproc < 2) { if (rank == 0) printf("Error: must be run with two or more processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } /** Create using MPI_Win_create() **/ if (rank == 0) { MPI_Alloc_mem(4*sizeof(int), MPI_INFO_NULL, &buf); *buf = nproc-1; } else buf = NULL; MPI_Win_create(buf, 4*sizeof(int)*(rank == 0), 1, MPI_INFO_NULL, MPI_COMM_WORLD, &window); /* PROC_NULL Communication */ { MPI_Request pn_req[4]; int val[4], res; MPI_Win_lock_all(0, window); MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, MPI_PROC_NULL, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]); MPI_Rget(&val[1], 1, MPI_INT, MPI_PROC_NULL, 1, 1, MPI_INT, window, &pn_req[1]); MPI_Rput(&val[2], 1, MPI_INT, MPI_PROC_NULL, 2, 1, MPI_INT, window, &pn_req[2]); MPI_Raccumulate(&val[3], 1, MPI_INT, MPI_PROC_NULL, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]); assert(pn_req[0] != MPI_REQUEST_NULL); assert(pn_req[1] != MPI_REQUEST_NULL); assert(pn_req[2] != MPI_REQUEST_NULL); assert(pn_req[3] != MPI_REQUEST_NULL); MPI_Win_unlock_all(window); MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE); } MPI_Barrier(MPI_COMM_WORLD); MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, window); /* GET-ACC: Test third-party communication, through rank 0. */ for (i = 0; i < ITER; i++) { MPI_Request gacc_req; int val = -1, exp = -1; /* Processes form a ring. Process 0 starts first, then passes a token * to the right. Each process, in turn, performs third-party * communication via process 0's window. */ if (rank > 0) { MPI_Recv(NULL, 0, MPI_BYTE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } MPI_Rget_accumulate(&rank, 1, MPI_INT, &val, 1, MPI_INT, 0, 0, 1, MPI_INT, MPI_REPLACE, window, &gacc_req); assert(gacc_req != MPI_REQUEST_NULL); MPI_Wait(&gacc_req, MPI_STATUS_IGNORE); MPI_Win_flush(0, window); exp = (rank + nproc-1) % nproc; if (val != exp) { printf("%d - Got %d, expected %d\n", rank, val, exp); errors++; } if (rank < nproc-1) { MPI_Send(NULL, 0, MPI_BYTE, rank+1, 0, MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) *buf = nproc-1; MPI_Win_sync(window); /* GET+PUT: Test third-party communication, through rank 0. */ for (i = 0; i < ITER; i++) { MPI_Request req; int val = -1, exp = -1; /* Processes form a ring. Process 0 starts first, then passes a token * to the right. Each process, in turn, performs third-party * communication via process 0's window. */ if (rank > 0) { MPI_Recv(NULL, 0, MPI_BYTE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } MPI_Rget(&val, 1, MPI_INT, 0, 0, 1, MPI_INT, window, &req); assert(req != MPI_REQUEST_NULL); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Rput(&rank, 1, MPI_INT, 0, 0, 1, MPI_INT, window, &req); assert(req != MPI_REQUEST_NULL); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Win_flush(0, window); exp = (rank + nproc-1) % nproc; if (val != exp) { printf("%d - Got %d, expected %d\n", rank, val, exp); errors++; } if (rank < nproc-1) { MPI_Send(NULL, 0, MPI_BYTE, rank+1, 0, MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) *buf = nproc-1; MPI_Win_sync(window); /* GET+ACC: Test third-party communication, through rank 0. */ for (i = 0; i < ITER; i++) { MPI_Request req; int val = -1, exp = -1; /* Processes form a ring. Process 0 starts first, then passes a token * to the right. Each process, in turn, performs third-party * communication via process 0's window. */ if (rank > 0) { MPI_Recv(NULL, 0, MPI_BYTE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } MPI_Rget(&val, 1, MPI_INT, 0, 0, 1, MPI_INT, window, &req); assert(req != MPI_REQUEST_NULL); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Raccumulate(&rank, 1, MPI_INT, 0, 0, 1, MPI_INT, MPI_REPLACE, window, &req); assert(req != MPI_REQUEST_NULL); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Win_flush(0, window); exp = (rank + nproc-1) % nproc; if (val != exp) { printf("%d - Got %d, expected %d\n", rank, val, exp); errors++; } if (rank < nproc-1) { MPI_Send(NULL, 0, MPI_BYTE, rank+1, 0, MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); } MPI_Win_unlock(0, window); MPI_Barrier(MPI_COMM_WORLD); /* Wait inside of an epoch */ { MPI_Request pn_req[4]; int val[4], res; const int target = 0; MPI_Win_lock_all(0, window); MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]); MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]); MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]); MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]); assert(pn_req[0] != MPI_REQUEST_NULL); assert(pn_req[1] != MPI_REQUEST_NULL); assert(pn_req[2] != MPI_REQUEST_NULL); assert(pn_req[3] != MPI_REQUEST_NULL); MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE); MPI_Win_unlock_all(window); } MPI_Barrier(MPI_COMM_WORLD); /* Wait outside of an epoch */ { MPI_Request pn_req[4]; int val[4], res; const int target = 0; MPI_Win_lock_all(0, window); MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]); MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]); MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]); MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]); assert(pn_req[0] != MPI_REQUEST_NULL); assert(pn_req[1] != MPI_REQUEST_NULL); assert(pn_req[2] != MPI_REQUEST_NULL); assert(pn_req[3] != MPI_REQUEST_NULL); MPI_Win_unlock_all(window); MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE); } /* Wait in a different epoch */ { MPI_Request pn_req[4]; int val[4], res; const int target = 0; MPI_Win_lock_all(0, window); MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]); MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]); MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]); MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]); assert(pn_req[0] != MPI_REQUEST_NULL); assert(pn_req[1] != MPI_REQUEST_NULL); assert(pn_req[2] != MPI_REQUEST_NULL); assert(pn_req[3] != MPI_REQUEST_NULL); MPI_Win_unlock_all(window); MPI_Win_lock_all(0, window); MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE); MPI_Win_unlock_all(window); } /* Wait in a fence epoch */ { MPI_Request pn_req[4]; int val[4], res; const int target = 0; MPI_Win_lock_all(0, window); MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]); MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]); MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]); MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]); assert(pn_req[0] != MPI_REQUEST_NULL); assert(pn_req[1] != MPI_REQUEST_NULL); assert(pn_req[2] != MPI_REQUEST_NULL); assert(pn_req[3] != MPI_REQUEST_NULL); MPI_Win_unlock_all(window); MPI_Win_fence(0, window); MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE); MPI_Win_fence(0, window); } MPI_Win_free(&window); if (buf) MPI_Free_mem(buf); MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0 && all_errors == 0) printf(" No Errors\n"); MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { int rank, nproc, i; int errors = 0, all_errors = 0; int *buf = NULL, *winbuf = NULL; MPI_Win window; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); if (nproc < 2) { if (rank == 0) printf("Error: must be run with two or more processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Alloc_mem(MAX_SIZE * sizeof(int), MPI_INFO_NULL, &buf); MPI_Alloc_mem(MAX_SIZE * sizeof(int), MPI_INFO_NULL, &winbuf); MPI_Win_create(winbuf, MAX_SIZE * sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &window); MPI_Win_lock_all(0, window); /* Test Raccumulate local completion with small data. * Small data is always copied to header packet as immediate data. */ if (rank == 1) { for (i = 0; i < ITER; i++) { MPI_Request acc_req; int val = -1; buf[0] = rank * i; MPI_Raccumulate(&buf[0], 1, MPI_INT, 0, 0, 1, MPI_INT, MPI_MAX, window, &acc_req); MPI_Wait(&acc_req, MPI_STATUS_IGNORE); /* reset local buffer to check local completion */ buf[0] = 0; MPI_Win_flush(0, window); MPI_Get(&val, 1, MPI_INT, 0, 0, 1, MPI_INT, window); MPI_Win_flush(0, window); if (val != rank * i) { printf("%d - Got %d in small Raccumulate test, expected %d (%d * %d)\n", rank, val, rank * i, rank, i); errors++; } } } MPI_Barrier(MPI_COMM_WORLD); /* Test Raccumulate local completion with large data . * Large data is not suitable for 1-copy optimization, and always sent out * from user buffer. */ if (rank == 1) { for (i = 0; i < ITER; i++) { MPI_Request acc_req; int val0 = -1, val1 = -1, val2 = -1; int j; /* initialize data */ for (j = 0; j < MAX_SIZE; j++) { buf[j] = rank + j + i; } MPI_Raccumulate(buf, MAX_SIZE, MPI_INT, 0, 0, MAX_SIZE, MPI_INT, MPI_REPLACE, window, &acc_req); MPI_Wait(&acc_req, MPI_STATUS_IGNORE); /* reset local buffer to check local completion */ buf[0] = 0; buf[MAX_SIZE - 1] = 0; buf[MAX_SIZE / 2] = 0; MPI_Win_flush(0, window); /* get remote values which are modified in local buffer after wait */ MPI_Get(&val0, 1, MPI_INT, 0, 0, 1, MPI_INT, window); MPI_Get(&val1, 1, MPI_INT, 0, MAX_SIZE - 1, 1, MPI_INT, window); MPI_Get(&val2, 1, MPI_INT, 0, MAX_SIZE / 2, 1, MPI_INT, window); MPI_Win_flush(0, window); if (val0 != rank + i) { printf("%d - Got %d in large Raccumulate test, expected %d\n", rank, val0, rank + i); errors++; } if (val1 != rank + MAX_SIZE - 1 + i) { printf("%d - Got %d in large Raccumulate test, expected %d\n", rank, val1, rank + MAX_SIZE - 1 + i); errors++; } if (val2 != rank + MAX_SIZE / 2 + i) { printf("%d - Got %d in large Raccumulate test, expected %d\n", rank, val2, rank + MAX_SIZE / 2 + i); errors++; } } } MPI_Win_unlock_all(window); MPI_Barrier(MPI_COMM_WORLD); MPI_Win_free(&window); if (buf) MPI_Free_mem(buf); if (winbuf) MPI_Free_mem(winbuf); MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0 && all_errors == 0) printf(" No Errors\n"); MPI_Finalize(); return 0; }
FORT_DLL_SPEC void FORT_CALL mpi_raccumulate_ ( void*v1, MPI_Fint *v2, MPI_Fint *v3, MPI_Fint *v4, MPI_Fint *v5, MPI_Fint *v6, MPI_Fint *v7, MPI_Fint *v8, MPI_Fint *v9, MPI_Fint *v10, MPI_Fint *ierr ){ *ierr = MPI_Raccumulate( v1, (int)*v2, (MPI_Datatype)(*v3), (int)*v4, (MPI_Aint)*v5, (int)*v6, (MPI_Datatype)(*v7), (MPI_Op)*v8, (MPI_Win)*v9, (MPI_Request *)(v10) ); }