int main(int argc, char *argv[]) { int rank, nproc; int errors = 0, all_errors = 0; int buf, my_buf; MPI_Win win; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Win_create(&buf, sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win); MPI_Win_set_errhandler(win, MPI_ERRORS_RETURN); MPI_Win_fence(0, win); MPI_Win_lock(MPI_LOCK_SHARED, 0, MPI_MODE_NOCHECK, win); MPI_Get(&my_buf, 1, MPI_INT, 0, 0, 1, MPI_INT, win); MPI_Win_unlock(0, win); /* This should fail because the window is no longer in a fence epoch */ CHECK_ERR(MPI_Get(&my_buf, 1, MPI_INT, 0, 0, 1, MPI_INT, win)); MPI_Win_fence(0, win); MPI_Win_free(&win); MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0 && all_errors == 0) printf(" No Errors\n"); MPI_Finalize(); return 0; }
void MPIMutex::lock(int proc) { int rank, nproc, already_locked; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &nproc); //std::cout << "trying to get lock" << std::endl; MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, win); byte *buff = (byte*)malloc(sizeof(byte)*nproc); buff[rank] = 1; MPI_Put(&(buff[rank]), 1, MPI_BYTE, proc, rank, 1, MPI_BYTE, win); /* Get data to the left of rank */ if (rank > 0) { MPI_Get(buff, rank, MPI_BYTE, proc, 0, rank, MPI_BYTE, win); } /* Get data to the right of rank */ if (rank < nproc - 1) { MPI_Get(&(buff[rank+1]), nproc-1-rank, MPI_BYTE, proc, rank+1, nproc-1-rank, MPI_BYTE, win); } MPI_Win_unlock(proc, win); /* check if anyone has the lock*/ for (int i = already_locked = 0; i < nproc; i++) if (buff[i] && i != rank) already_locked = 1; /* Wait for notification */ if (already_locked) { MPI_Status status; //std::cout << "waiting for notification [proc = "<<proc<<"]" << std::endl; MPI_Recv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE, MPI_MUTEX_TAG+id, comm, &status); } //std::cout << "lock acquired [proc = "<<proc<<"]" << std::endl; free(buff); };
bool MPIMutex::try_lock(int proc) { int rank, nproc, already_locked; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &nproc); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, win); byte *buff = (byte*)malloc(sizeof(byte)*nproc); buff[rank] = 1; MPI_Put(&(buff[rank]), 1, MPI_BYTE, proc, rank, 1, MPI_BYTE, win); /* Get data to the left of rank */ if (rank > 0) { MPI_Get(buff, rank, MPI_BYTE, proc, 0, rank, MPI_BYTE, win); } /* Get data to the right of rank */ if (rank < nproc - 1) { MPI_Get(&(buff[rank+1]), nproc-1-rank, MPI_BYTE, proc, rank+1, nproc-1-rank, MPI_BYTE, win); } MPI_Win_unlock(proc, win); /* check if anyone has the lock*/ already_locked = 1; //1 means succesfully got the lock for (int i = 0; i < nproc; i++) if (buff[i] && i != rank) already_locked = 0; free(buff); return already_locked; };
int do_test(int origin_count, MPI_Datatype origin_type, int result_count, MPI_Datatype result_type, int target_count, MPI_Datatype target_type) { int errs = 0, ret, origin_type_size, result_type_size; ret = MPI_Put(origin_buf, origin_count, origin_type, 1, 0, target_count, target_type, win); if (ret) errs++; ret = MPI_Get(origin_buf, origin_count, origin_type, 1, 0, target_count, target_type, win); if (ret) errs++; ret = MPI_Accumulate(origin_buf, origin_count, origin_type, 1, 0, target_count, target_type, MPI_SUM, win); if (ret) errs++; ret = MPI_Get_accumulate(origin_buf, origin_count, origin_type, result_buf, result_count, result_type, 1, 0, target_count, target_type, MPI_SUM, win); if (ret) errs++; MPI_Type_size(origin_type, &origin_type_size); MPI_Type_size(result_type, &result_type_size); if (origin_count == 0 || origin_type_size == 0) { ret = MPI_Put(NULL, origin_count, origin_type, 1, 0, target_count, target_type, win); if (ret) errs++; ret = MPI_Get(NULL, origin_count, origin_type, 1, 0, target_count, target_type, win); if (ret) errs++; ret = MPI_Accumulate(NULL, origin_count, origin_type, 1, 0, target_count, target_type, MPI_SUM, win); if (ret) errs++; ret = MPI_Get_accumulate(NULL, origin_count, origin_type, result_buf, result_count, result_type, 1, 0, target_count, target_type, MPI_SUM, win); if (ret) errs++; if (result_count == 0 || result_type_size == 0) { ret = MPI_Get_accumulate(NULL, origin_count, origin_type, NULL, result_count, result_type, 1, 0, target_count, target_type, MPI_SUM, win); if (ret) errs++; } } return errs; }
/** Lock a mutex. * * @param[in] hdl Mutex group that the mutex belongs to * @param[in] mutex Desired mutex number [0..count-1] * @param[in] proc Rank of process where the mutex lives * @return MPI status */ int MPIX_Mutex_lock(MPIX_Mutex hdl, int mutex, int proc) { int rank, nproc, already_locked, i; uint8_t *buf; assert(mutex >= 0 && mutex < hdl->max_count); MPI_Comm_rank(hdl->comm, &rank); MPI_Comm_size(hdl->comm, &nproc); assert(proc >= 0 && proc < nproc); buf = malloc(nproc * sizeof(uint8_t)); assert(buf != NULL); buf[rank] = 1; /* Get all data from the lock_buf, except the byte belonging to * me. Set the byte belonging to me to 1. */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->windows[mutex]); MPI_Put(&buf[rank], 1, MPI_BYTE, proc, rank, 1, MPI_BYTE, hdl->windows[mutex]); /* Get data to the left of rank */ if (rank > 0) { MPI_Get(buf, rank, MPI_BYTE, proc, 0, rank, MPI_BYTE, hdl->windows[mutex]); } /* Get data to the right of rank */ if (rank < nproc - 1) { MPI_Get(&buf[rank + 1], nproc - 1 - rank, MPI_BYTE, proc, rank + 1, nproc - 1 - rank, MPI_BYTE, hdl->windows[mutex]); } MPI_Win_unlock(proc, hdl->windows[mutex]); assert(buf[rank] == 1); for (i = already_locked = 0; i < nproc; i++) if (buf[i] && i != rank) already_locked = 1; /* Wait for notification */ if (already_locked) { MPI_Status status; debug_print("waiting for notification [proc = %d, mutex = %d]\n", proc, mutex); MPI_Recv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE, MPIX_MUTEX_TAG + mutex, hdl->comm, &status); } debug_print("lock acquired [proc = %d, mutex = %d]\n", proc, mutex); free(buf); return MPI_SUCCESS; }
/** Unlock a mutex. * * @param[in] hdl Mutex group that the mutex belongs to. * @param[in] mutex Desired mutex number [0..count-1] * @param[in] proc Rank of process where the mutex lives * @return MPI status */ int MPIX_Mutex_unlock(MPIX_Mutex hdl, int mutex, int proc) { int rank, nproc, i; uint8_t *buf; assert(mutex >= 0 && mutex < hdl->max_count); MPI_Comm_rank(hdl->comm, &rank); MPI_Comm_size(hdl->comm, &nproc); assert(proc >= 0 && proc < nproc); buf = malloc(nproc * sizeof(uint8_t)); assert(buf != NULL); buf[rank] = 0; /* Get all data from the lock_buf, except the byte belonging to * me. Set the byte belonging to me to 0. */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->windows[mutex]); MPI_Put(&buf[rank], 1, MPI_BYTE, proc, rank, 1, MPI_BYTE, hdl->windows[mutex]); /* Get data to the left of rank */ if (rank > 0) { MPI_Get(buf, rank, MPI_BYTE, proc, 0, rank, MPI_BYTE, hdl->windows[mutex]); } /* Get data to the right of rank */ if (rank < nproc - 1) { MPI_Get(&buf[rank + 1], nproc - 1 - rank, MPI_BYTE, proc, rank + 1, nproc - 1 - rank, MPI_BYTE, hdl->windows[mutex]); } MPI_Win_unlock(proc, hdl->windows[mutex]); assert(buf[rank] == 0); /* Notify the next waiting process, starting to my right for fairness */ for (i = 1; i < nproc; i++) { int p = (rank + i) % nproc; if (buf[p] == 1) { debug_print("notifying %d [proc = %d, mutex = %d]\n", p, proc, mutex); MPI_Send(NULL, 0, MPI_BYTE, p, MPI_MUTEX_TAG + mutex, hdl->comm); break; } } debug_print("lock released [proc = %d, mutex = %d]\n", proc, mutex); free(buf); return MPI_SUCCESS; }
/** Lock a mutex. * * @param[in] hdl Mutex group that the mutex belongs to. * @param[in] mutex Desired mutex number [0..count-1] * @param[in] world_proc Absolute ID of process where the mutex lives */ void ARMCIX_Lock_hdl(armcix_mutex_hdl_t hdl, int mutex, int world_proc) { int rank, nproc, already_locked, i, proc; uint8_t *buf; ARMCII_Assert(mutex >= 0 && mutex < hdl->max_count); MPI_Comm_rank(hdl->grp.comm, &rank); MPI_Comm_size(hdl->grp.comm, &nproc); /* User gives us the absolute ID. Translate to the rank in the mutex's group. */ proc = ARMCII_Translate_absolute_to_group(&hdl->grp, world_proc); ARMCII_Assert(proc >= 0); buf = malloc(nproc*sizeof(uint8_t)); ARMCII_Assert(buf != NULL); buf[rank] = 1; /* Get all data from the lock_buf, except the byte belonging to * me. Set the byte belonging to me to 1. */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->windows[mutex]); MPI_Put(&buf[rank], 1, MPI_BYTE, proc, rank, 1, MPI_BYTE, hdl->windows[mutex]); /* Get data to the left of rank */ if (rank > 0) { MPI_Get(buf, rank, MPI_BYTE, proc, 0, rank, MPI_BYTE, hdl->windows[mutex]); } /* Get data to the right of rank */ if (rank < nproc - 1) { MPI_Get(&buf[rank+1], nproc-1-rank, MPI_BYTE, proc, rank + 1, nproc-1-rank, MPI_BYTE, hdl->windows[mutex]); } MPI_Win_unlock(proc, hdl->windows[mutex]); ARMCII_Assert(buf[rank] == 1); for (i = already_locked = 0; i < nproc; i++) if (buf[i] && i != rank) already_locked = 1; /* Wait for notification */ if (already_locked) { MPI_Status status; ARMCII_Dbg_print(DEBUG_CAT_MUTEX, "waiting for notification [proc = %d, mutex = %d]\n", proc, mutex); MPI_Recv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE, ARMCI_MUTEX_TAG+mutex, hdl->grp.comm, &status); } ARMCII_Dbg_print(DEBUG_CAT_MUTEX, "lock acquired [proc = %d, mutex = %d]\n", proc, mutex); free(buf); }
/** Unlock a mutex. * * @param[in] hdl Mutex group that the mutex belongs to. * @param[in] mutex Desired mutex number [0..count-1] * @param[in] world_proc Absolute ID of process where the mutex lives */ void ARMCIX_Unlock_hdl(armcix_mutex_hdl_t hdl, int mutex, int world_proc) { int rank, nproc, i, proc; uint8_t *buf; ARMCII_Assert(mutex >= 0 && mutex < hdl->max_count); MPI_Comm_rank(hdl->grp.comm, &rank); MPI_Comm_size(hdl->grp.comm, &nproc); proc = ARMCII_Translate_absolute_to_group(&hdl->grp, world_proc); ARMCII_Assert(proc >= 0); buf = malloc(nproc*sizeof(uint8_t)); buf[rank] = 0; /* Get all data from the lock_buf, except the byte belonging to * me. Set the byte belonging to me to 0. */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->windows[mutex]); MPI_Put(&buf[rank], 1, MPI_BYTE, proc, rank, 1, MPI_BYTE, hdl->windows[mutex]); /* Get data to the left of rank */ if (rank > 0) { MPI_Get(buf, rank, MPI_BYTE, proc, 0, rank, MPI_BYTE, hdl->windows[mutex]); } /* Get data to the right of rank */ if (rank < nproc - 1) { MPI_Get(&buf[rank+1], nproc-1-rank, MPI_BYTE, proc, rank + 1, nproc-1-rank, MPI_BYTE, hdl->windows[mutex]); } MPI_Win_unlock(proc, hdl->windows[mutex]); ARMCII_Assert(buf[rank] == 0); /* Notify the next waiting process, starting to my right for fairness */ for (i = 1; i < nproc; i++) { int p = (rank + i) % nproc; if (buf[p] == 1) { ARMCII_Dbg_print(DEBUG_CAT_MUTEX, "notifying %d [proc = %d, mutex = %d]\n", p, proc, mutex); MPI_Send(NULL, 0, MPI_BYTE, p, ARMCI_MUTEX_TAG+mutex, hdl->grp.comm); break; } } ARMCII_Dbg_print(DEBUG_CAT_MUTEX, "lock released [proc = %d, mutex = %d]\n", proc, mutex); free(buf); }
int main(int argc, char **argv){ MPI_Init(&argc, &argv); int rank, nproc; MPI_Comm_size(MPI_COMM_WORLD, &(nproc)); MPI_Comm_rank(MPI_COMM_WORLD, &(rank)); MPI_Win win; MPI_Aint remote; MPI_Aint local; MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &win); if(rank==0){ //int *a = (int*)malloc(sizeof(int)); int a= 4; MPI_Win_attach(win, &a, sizeof(int)); MPI_Get_address(&a, &local); MPI_Send(&local, 1, MPI_AINT, 1, 1, MPI_COMM_WORLD); } else{ //MPI_Status reqstat; //MPI_Recv(&sdisp_remote, 1, MPI_AINT, 0, 1, MPI_COMM_WORLD, &reqstat ); int val; MPI_Status reqstat; MPI_Recv(&remote, 1, MPI_AINT, 0, 1, MPI_COMM_WORLD, &reqstat ); MPI_Get(&val, 1, MPI_INT, 0, remote, 1, MPI_INT, win); } //MPI_Win_free(&win); }
int main(int argc, char *argv[]) { int errs = 0, err; int rank, size; int *buf, bufsize; int *result; int *rmabuf, rsize, rcount; MPI_Comm comm; MPI_Win win; MPI_Request req; MPI_Datatype derived_dtp; MTest_Init(&argc, &argv); bufsize = 256 * sizeof(int); buf = (int *) malloc(bufsize); if (!buf) { fprintf(stderr, "Unable to allocated %d bytes\n", bufsize); MPI_Abort(MPI_COMM_WORLD, 1); } result = (int *) malloc(bufsize); if (!result) { fprintf(stderr, "Unable to allocated %d bytes\n", bufsize); MPI_Abort(MPI_COMM_WORLD, 1); } rcount = 16; rsize = rcount * sizeof(int); rmabuf = (int *) malloc(rsize); if (!rmabuf) { fprintf(stderr, "Unable to allocated %d bytes\n", rsize); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Type_contiguous(2, MPI_INT, &derived_dtp); MPI_Type_commit(&derived_dtp); /* The following loop is used to run through a series of communicators * that are subsets of MPI_COMM_WORLD, of size 1 or greater. */ while (MTestGetIntracommGeneral(&comm, 1, 1)) { int count = 0; if (comm == MPI_COMM_NULL) continue; /* Determine the sender and receiver */ MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size); MPI_Win_create(buf, bufsize, 2 * sizeof(int), MPI_INFO_NULL, comm, &win); /* To improve reporting of problems about operations, we * change the error handler to errors return */ MPI_Win_set_errhandler(win, MPI_ERRORS_RETURN); /** TEST OPERATIONS USING ACTIVE TARGET (FENCE) SYNCHRONIZATION **/ MPI_Win_fence(0, win); TEST_FENCE_OP("Put", MPI_Put(rmabuf, count, MPI_INT, TARGET, 0, count, MPI_INT, win); ); TEST_FENCE_OP("Get", MPI_Get(rmabuf, count, MPI_INT, TARGET, 0, count, MPI_INT, win); );
void DO_OP_LOOP(int dst, int iter) { int i, x; switch (OP_TYPE) { case OP_ACC: for (x = 0; x < iter; x++) { for (i = 0; i < NOP; i++) MPI_Accumulate(&locbuf[0], OP_SIZE, MPI_DOUBLE, dst, 0, OP_SIZE, MPI_DOUBLE, MPI_SUM, win); MPI_Win_flush(dst, win); } break; case OP_PUT: for (x = 0; x < iter; x++) { for (i = 0; i < NOP; i++) MPI_Put(&locbuf[0], OP_SIZE, MPI_DOUBLE, dst, 0, OP_SIZE, MPI_DOUBLE, win); MPI_Win_flush(dst, win); } break; case OP_GET: for (x = 0; x < iter; x++) { for (i = 0; i < NOP; i++) MPI_Get(&locbuf[0], OP_SIZE, MPI_DOUBLE, dst, 0, OP_SIZE, MPI_DOUBLE, win); MPI_Win_flush(dst, win); } break; } }
int main(int argc, char *argv[]) { int n, myid, numprocs, i, ierr; double PI25DT = 3.141592653589793238462643; double mypi, pi, h, sum, x; MPI_Win nwin, piwin; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&numprocs); MPI_Comm_rank(MPI_COMM_WORLD,&myid); if (myid == 0) { MPI_Win_create(&n, sizeof(int), 1, MPI_INFO_NULL, MPI_COMM_WORLD, &nwin); MPI_Win_create(&pi, sizeof(double), 1, MPI_INFO_NULL, MPI_COMM_WORLD, &piwin); } else { MPI_Win_create(MPI_BOTTOM, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &nwin); MPI_Win_create(MPI_BOTTOM, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &piwin); } while (1) { if (myid == 0) { fprintf(stdout, "Enter the number of intervals: (0 quits) "); fflush(stdout); ierr=scanf("%d",&n); pi = 0.0; } MPI_Win_fence(0, nwin); if (myid != 0) MPI_Get(&n, 1, MPI_INT, 0, 0, 1, MPI_INT, nwin); MPI_Win_fence(0, nwin); if (n == 0) break; else { h = 1.0 / (double) n; sum = 0.0; for (i = myid + 1; i <= n; i += numprocs) { x = h * ((double)i - 0.5); sum += (4.0 / (1.0 + x*x)); } mypi = h * sum; MPI_Win_fence( 0, piwin); MPI_Accumulate(&mypi, 1, MPI_DOUBLE, 0, 0, 1, MPI_DOUBLE, MPI_SUM, piwin); MPI_Win_fence(0, piwin); if (myid == 0) { fprintf(stdout, "pi is approximately %.16f, Error is %.16f\n", pi, fabs(pi - PI25DT)); fflush(stdout); } } } MPI_Win_free(&nwin); MPI_Win_free(&piwin); MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { int rank, nprocs, A[SIZE2], B[SIZE2], i; MPI_Win win; int errs = 0; MTest_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&rank); if (nprocs != 2) { printf("Run this program with 2 processes\n"); MPI_Abort(MPI_COMM_WORLD,1); } if (rank == 0) { for (i=0; i<SIZE2; i++) A[i] = B[i] = i; MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); for (i=0; i<SIZE1; i++) { MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win); MPI_Put(A+i, 1, MPI_INT, 1, i, 1, MPI_INT, win); MPI_Win_unlock(1, win); } for (i=0; i<SIZE1; i++) { MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win); MPI_Get(B+i, 1, MPI_INT, 1, SIZE1+i, 1, MPI_INT, win); MPI_Win_unlock(1, win); } MPI_Win_free(&win); for (i=0; i<SIZE1; i++) if (B[i] != (-4)*(i+SIZE1)) { printf("Get Error: B[%d] is %d, should be %d\n", i, B[i], (-4)*(i+SIZE1)); errs++; } } else { /* rank=1 */ for (i=0; i<SIZE2; i++) B[i] = (-4)*i; MPI_Win_create(B, SIZE2*sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win); MPI_Win_free(&win); for (i=0; i<SIZE1; i++) { if (B[i] != i) { printf("Put Error: B[%d] is %d, should be %d\n", i, B[i], i); errs++; } } } /* if (rank==0) printf("Done\n");*/ MTest_Finalize(errs); MPI_Finalize(); return 0; }
/** One-sided get operation with type arguments. Destination buffer must be private. * * @param[in] mreg Memory region * @param[in] src Address of source data * @param[in] src_count Number of elements of the given type at the source * @param[in] src_type MPI datatype of the source elements * @param[in] dst Address of destination buffer * @param[in] dst_count Number of elements of the given type at the destination * @param[in] src_type MPI datatype of the destination elements * @param[in] size Number of bytes to transfer * @param[in] proc Absolute process id of target process * @return 0 on success, non-zero on failure */ int gmr_get_typed(gmr_t *mreg, void *src, int src_count, MPI_Datatype src_type, void *dst, int dst_count, MPI_Datatype dst_type, int proc) { int grp_proc; gmr_size_t disp; MPI_Aint lb, extent; grp_proc = ARMCII_Translate_absolute_to_group(&mreg->group, proc); ARMCII_Assert(grp_proc >= 0); // Calculate displacement from beginning of the window if (src == MPI_BOTTOM) disp = 0; else disp = (gmr_size_t) ((uint8_t*)src - (uint8_t*)mreg->slices[proc].base); // Perform checks MPI_Type_get_true_extent(src_type, &lb, &extent); ARMCII_Assert(mreg->lock_state != GMR_LOCK_UNLOCKED); ARMCII_Assert_msg(disp >= 0 && disp < mreg->slices[proc].size, "Invalid remote address"); ARMCII_Assert_msg(disp + src_count*extent <= mreg->slices[proc].size, "Transfer is out of range"); MPI_Get(dst, dst_count, dst_type, grp_proc, (MPI_Aint) disp, src_count, src_type, mreg->window); return 0; }
void add_gather_request(gather * g, size_t local_idx, int remote_rank, size_t remote_idx, size_t req_id) { assert(g->valid); #pragma omp critical MPI_Get(g->output + local_idx * g->elt_size, 1, g->datatype, remote_rank, remote_idx, 1, g->datatype, g->win); }
int main(int argc, char *argv[]) { int errs = 0, err; int rank, size; int *buf, bufsize; int *result; int *rmabuf, rsize, rcount; MPI_Comm comm; MPI_Win win; MPI_Request req; MTest_Init(&argc, &argv); bufsize = 256 * sizeof(int); buf = (int *) malloc(bufsize); if (!buf) { fprintf(stderr, "Unable to allocated %d bytes\n", bufsize); MPI_Abort(MPI_COMM_WORLD, 1); } result = (int *) malloc(bufsize); if (!result) { fprintf(stderr, "Unable to allocated %d bytes\n", bufsize); MPI_Abort(MPI_COMM_WORLD, 1); } rcount = 16; rsize = rcount * sizeof(int); rmabuf = (int *) malloc(rsize); if (!rmabuf) { fprintf(stderr, "Unable to allocated %d bytes\n", rsize); MPI_Abort(MPI_COMM_WORLD, 1); } /* The following illustrates the use of the routines to * run through a selection of communicators and datatypes. * Use subsets of these for tests that do not involve combinations * of communicators, datatypes, and counts of datatypes */ while (MTestGetIntracommGeneral(&comm, 1, 1)) { if (comm == MPI_COMM_NULL) continue; /* Determine the sender and receiver */ MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size); MPI_Win_create(buf, bufsize, sizeof(int), MPI_INFO_NULL, comm, &win); /* To improve reporting of problems about operations, we * change the error handler to errors return */ MPI_Win_set_errhandler(win, MPI_ERRORS_RETURN); /** TEST OPERATIONS USING ACTIVE TARGET (FENCE) SYNCHRONIZATION **/ MPI_Win_fence(0, win); TEST_FENCE_OP("Put", MPI_Put(rmabuf, rcount, MPI_INT, MPI_PROC_NULL, 0, rcount, MPI_INT, win); ); TEST_FENCE_OP("Get", MPI_Get(rmabuf, rcount, MPI_INT, MPI_PROC_NULL, 0, rcount, MPI_INT, win); );
void SweptDiscretization2D::allGatherAllOutputToFile(string filename) { void *buffer = NULL; FILE *output; if(pg.rank == 0) { MPI_Alloc_mem(foundationSize * pg.mpiSize * sizeof(double), MPI_INFO_NULL, &buffer); output = fopen(filename.c_str(),"wb"); } MPI_Win_fence((MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE), foundationWindow); if(pg.rank == 0) { for(int r=0;r<pg.mpiSize;r++) { MPI_Get((char*)buffer + (r * foundationSize * sizeof(double)), foundationSize * sizeof(double), MPI_BYTE, r, 0, foundationSize * sizeof(double), MPI_BYTE, foundationWindow); } } MPI_Win_fence(MPI_MODE_NOSUCCEED, foundationWindow); if(pg.rank == 0) { int w = (n * pg.xNodes); int h = (n * pg.yNodes); int resultArraySize = w * h; if(resultArray == NULL) resultArray = (double*) malloc(resultArraySize * sizeof(double) * outputLength); for(int r=0;r<pg.mpiSize;r++) { double *processing = (double*)buffer + (foundationSize * r); int jIndex = (r % (pg.xNodes*pg.yNodes)) / pg.xNodes; int iIndex = r % pg.xNodes; for(int j=1;j<n+1;j++) { for(int i=1;i<n+1;i++) { int iGlobal = n*iIndex + (i-1); int jGlobal = n*jIndex + (j-1); int index = this->ijToIndex(i,j); for(int point=0;point<outputLength;point++) { double val = processing[index + constants + point]; int resultIndex = (iGlobal + jGlobal * n * pg.xNodes) * outputLength + point; resultArray[resultIndex] = val; } } } } fwrite((const void*)resultArray,sizeof(double),resultArraySize,output); fclose(output); MPI_Free_mem(buffer); } MPI_Barrier(MPI_COMM_WORLD); }
int main(int argc, char *argv[]) { int rank, nprocs, A[SIZE], B[SIZE], i; MPI_Comm CommDeuce; MPI_Win win; int errs = 0; MTest_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (nprocs < 2) { printf("Run this program with 2 or more processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Comm_split(MPI_COMM_WORLD, (rank < 2), rank, &CommDeuce); if (rank < 2) { if (rank == 0) { for (i = 0; i < SIZE; i++) A[i] = B[i] = i; } else { for (i = 0; i < SIZE; i++) { A[i] = (-3) * i; B[i] = (-4) * i; } } MPI_Win_create(B, SIZE * sizeof(int), sizeof(int), MPI_INFO_NULL, CommDeuce, &win); MPI_Win_fence(0, win); if (rank == 0) { for (i = 0; i < SIZE - 1; i++) MPI_Put(A + i, 1, MPI_INT, 1, i, 1, MPI_INT, win); } else { for (i = 0; i < SIZE - 1; i++) MPI_Get(A + i, 1, MPI_INT, 0, i, 1, MPI_INT, win); MPI_Accumulate(A + i, 1, MPI_INT, 0, i, 1, MPI_INT, MPI_SUM, win); } MPI_Win_fence(0, win); if (rank == 1) { for (i = 0; i < SIZE - 1; i++) { if (A[i] != B[i]) { SQUELCH(printf("Put/Get Error: A[i]=%d, B[i]=%d\n", A[i], B[i]);); errs++; } } }
int main(int argc, char *argv[]) { int rank, destrank, nprocs, *A, *B, i; MPI_Comm CommDeuce; MPI_Group comm_group, group; MPI_Win win; int errs = 0; MTest_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&rank); if (nprocs < 2) { printf("Run this program with 2 or more processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Comm_split(MPI_COMM_WORLD, (rank < 2), rank, &CommDeuce); if (rank < 2) { i = MPI_Alloc_mem(SIZE2 * sizeof(int), MPI_INFO_NULL, &A); if (i) { printf("Can't allocate memory in test program\n"); MPI_Abort(MPI_COMM_WORLD, 1); } i = MPI_Alloc_mem(SIZE2 * sizeof(int), MPI_INFO_NULL, &B); if (i) { printf("Can't allocate memory in test program\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Comm_group(CommDeuce, &comm_group); if (rank == 0) { for (i=0; i<SIZE2; i++) A[i] = B[i] = i; MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, CommDeuce, &win); destrank = 1; MPI_Group_incl(comm_group, 1, &destrank, &group); MPI_Win_start(group, 0, win); for (i=0; i<SIZE1; i++) MPI_Put(A+i, 1, MPI_INT, 1, i, 1, MPI_INT, win); for (i=0; i<SIZE1; i++) MPI_Get(B+i, 1, MPI_INT, 1, SIZE1+i, 1, MPI_INT, win); MPI_Win_complete(win); for (i=0; i<SIZE1; i++) if (B[i] != (-4)*(i+SIZE1)) { SQUELCH( printf("Get Error: B[i] is %d, should be %d\n", B[i], (-4)*(i+SIZE1)); ); errs++; }
void test_put(void) { int me, nproc; MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Comm_rank(MPI_COMM_WORLD, &me); MPI_Win dst_win; double *dst_buf; double src_buf[MAXELEMS]; int i, j; MPI_Alloc_mem(sizeof(double) * nproc * MAXELEMS, MPI_INFO_NULL, &dst_buf); MPI_Win_create(dst_buf, sizeof(double) * nproc * MAXELEMS, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &dst_win); for (i = 0; i < MAXELEMS; i++) src_buf[i] = me + 1.0; MPI_Win_lock(MPI_LOCK_EXCLUSIVE, me, 0, dst_win); for (i = 0; i < nproc * MAXELEMS; i++) dst_buf[i] = 0.0; MPI_Win_unlock(me, dst_win); MPI_Barrier(MPI_COMM_WORLD); for (i = 0; i < nproc; i++) { int target = i; for (j = 0; j < COUNT; j++) { if (verbose) printf("%2d -> %2d [%2d]\n", me, target, j); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, target, 0, dst_win); MPI_Put(&src_buf[j], sizeof(double), MPI_BYTE, target, (me * MAXELEMS + j) * sizeof(double), sizeof(double), MPI_BYTE, dst_win); MPI_Win_unlock(target, dst_win); } for (j = 0; j < COUNT; j++) { if (verbose) printf("%2d <- %2d [%2d]\n", me, target, j); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, target, 0, dst_win); MPI_Get(&src_buf[j], sizeof(double), MPI_BYTE, target, (me * MAXELEMS + j) * sizeof(double), sizeof(double), MPI_BYTE, dst_win); MPI_Win_unlock(target, dst_win); } } MPI_Barrier(MPI_COMM_WORLD); MPI_Win_free(&dst_win); MPI_Free_mem(dst_buf); }
int MPIX_Get_x(void *origin_addr, MPI_Count origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, MPI_Count target_count, MPI_Datatype target_datatype, MPI_Win win) { int rc = MPI_SUCCESS; if (likely (origin_count <= bigmpi_int_max && target_count <= bigmpi_int_max)) { rc = MPI_Get(origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, win); } else { MPI_Datatype neworigin_datatype, newtarget_datatype; MPIX_Type_contiguous_x(origin_count, origin_datatype, &neworigin_datatype); MPIX_Type_contiguous_x(target_count, target_datatype, &newtarget_datatype); MPI_Type_commit(&neworigin_datatype); MPI_Type_commit(&newtarget_datatype); rc = MPI_Get(origin_addr, 1, neworigin_datatype, target_rank, target_disp, 1, newtarget_datatype, win); MPI_Type_free(&neworigin_datatype); MPI_Type_free(&newtarget_datatype); } return rc; }
/* * Class: mpi_Win * Method: get * Signature: (JLjava/lang/Object;IJIIIJI)V */ JNIEXPORT void JNICALL Java_mpi_Win_get( JNIEnv *env, jobject jthis, jlong win, jobject origin, jint orgCount, jlong orgType, jint targetRank, jint targetDisp, jint targetCount, jlong targetType, jint baseType) { void *orgPtr = (*env)->GetDirectBufferAddress(env, origin); int rc = MPI_Get(orgPtr, orgCount, (MPI_Datatype)orgType, targetRank, (MPI_Aint)targetDisp, targetCount, (MPI_Datatype)targetType, (MPI_Win)win); ompi_java_exceptionCheck(env, rc); }
void SpParHelper::FetchMatrix(SpMat<IT,NT,DER> & MRecv, const vector<IT> & essentials, vector<MPI_Win> & arrwin, int ownind) { MRecv.Create(essentials); // allocate memory for arrays Arr<IT,NT> arrinfo = MRecv.GetArrays(); assert( (arrwin.size() == arrinfo.totalsize())); // C-binding for MPI::Get // int MPI_Get(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, // int target_count, MPI_Datatype target_datatype, MPI_Win win) IT essk = 0; for(int i=0; i< arrinfo.indarrs.size(); ++i) // get index arrays { //arrwin[essk].Lock(MPI::LOCK_SHARED, ownind, 0); MPI_Get( arrinfo.indarrs[i].addr, arrinfo.indarrs[i].count, MPIType<IT>(), ownind, 0, arrinfo.indarrs[i].count, MPIType<IT>(), arrwin[essk++]); } for(int i=0; i< arrinfo.numarrs.size(); ++i) // get numerical arrays { //arrwin[essk].Lock(MPI::LOCK_SHARED, ownind, 0); MPI_Get(arrinfo.numarrs[i].addr, arrinfo.numarrs[i].count, MPIType<NT>(), ownind, 0, arrinfo.numarrs[i].count, MPIType<NT>(), arrwin[essk++]); } }
/** Lock a mutex. * * @param[in] hdl Mutex group that the mutex belongs to. * @param[in] mutex Desired mutex number [0..count-1] * @param[in] world_proc Absolute ID of process where the mutex lives */ void ARMCIX_Lock_hdl(armcix_mutex_hdl_t hdl, int mutex, int world_proc) { int rank, nproc, proc; long lock_val, unlock_val, lock_out; int timeout = 1; MPI_Comm_rank(hdl->comm, &rank); MPI_Comm_size(hdl->comm, &nproc); /* User gives us the absolute ID. Translate to the rank in the mutex's group. */ proc = ARMCII_Translate_absolute_to_group(hdl->comm, world_proc); ARMCII_Assert(proc >= 0); lock_val = rank+1; // Map into range 1..nproc unlock_val = -1 * (rank+1); /* mutex <- mutex + rank */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->window); MPI_Accumulate(&lock_val, 1, MPI_LONG, proc, mutex, 1, MPI_LONG, MPI_SUM, hdl->window); MPI_Win_unlock(proc, hdl->window); for (;;) { /* read mutex value */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->window); MPI_Get(&lock_out, 1, MPI_LONG, proc, mutex, 1, MPI_LONG, hdl->window); MPI_Win_unlock(proc, hdl->window); ARMCII_Assert(lock_out > 0); ARMCII_Assert(lock_out <= nproc*(nproc+1)/2); // Must be < sum of all ranks /* We are holding the mutex */ if (lock_out == rank+1) break; /* mutex <- mutex - rank */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->window); MPI_Accumulate(&unlock_val, 1, MPI_LONG, proc, mutex, 1, MPI_LONG, MPI_SUM, hdl->window); MPI_Win_unlock(proc, hdl->window); /* Exponential backoff */ usleep(timeout + rand()%timeout); timeout = MIN(timeout*TIMEOUT_MUL, MAX_TIMEOUT); if (rand() % nproc == 0) // Chance to reset timeout timeout = 1; /* mutex <- mutex + rank */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->window); MPI_Accumulate(&lock_val, 1, MPI_LONG, proc, mutex, 1, MPI_LONG, MPI_SUM, hdl->window); MPI_Win_unlock(proc, hdl->window); } }
void MPIMutex::unlock(int proc) { int rank, nproc; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &nproc); byte *buff = (byte*)malloc(nproc*sizeof(byte)); buff[rank] = 0; /* Get all data from the lock_buf, except the byte belonging to * me. Set the byte belonging to me to 0. */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, win); MPI_Put(&(buff[rank]), 1, MPI_BYTE, proc, rank, 1, MPI_BYTE, win); /* Get data to the left of rank */ if (rank > 0) { MPI_Get(buff, rank, MPI_BYTE, proc, 0, rank, MPI_BYTE, win); } /* Get data to the right of rank */ if (rank < nproc - 1) { MPI_Get(&buff[rank+1], nproc-1-rank, MPI_BYTE, proc, rank+1, nproc-1-rank, MPI_BYTE, win); } MPI_Win_unlock(proc, win); /* Notify the next waiting process, starting to my right for fairness */ for (int i = 1; i < nproc; i++) { int p = (rank + i) % nproc; if (buff[p] == 1) { //std::cout << "notifying "<<p<<"[proc = "<<proc<<"]" << std::endl; MPI_Send(NULL, 0, MPI_BYTE, p, MPI_MUTEX_TAG+id, comm); break; } } //std::cout << "lock released [proc = "<<proc<<"]" << std::endl; free(buff); };
int main(int argc, char *argv[]) { int rank, nprocs, A[SIZE2], B[SIZE2], i, j; MPI_Comm CommDeuce; MPI_Win win; int errs = 0; MTest_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (nprocs < 2) { printf("Run this program with 2 or more processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Comm_split(MPI_COMM_WORLD, (rank < 2), rank, &CommDeuce); if (rank < 2) { if (rank == 0) { for (i = 0; i < SIZE2; i++) A[i] = B[i] = i; MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, CommDeuce, &win); for (j = 0; j < 2; j++) { for (i = 0; i < SIZE1; i++) { MPI_Win_lock(MPI_LOCK_SHARED, 1, j == 0 ? 0 : MPI_MODE_NOCHECK, win); MPI_Put(A + i, 1, MPI_INT, 1, i, 1, MPI_INT, win); MPI_Win_unlock(1, win); } for (i = 0; i < SIZE1; i++) { MPI_Win_lock(MPI_LOCK_SHARED, 1, j == 0 ? 0 : MPI_MODE_NOCHECK, win); MPI_Get(B + i, 1, MPI_INT, 1, SIZE1 + i, 1, MPI_INT, win); MPI_Win_unlock(1, win); } } MPI_Win_free(&win); for (i = 0; i < SIZE1; i++) if (B[i] != (-4) * (i + SIZE1)) { SQUELCH(printf ("Get Error: B[%d] is %d, should be %d\n", i, B[i], (-4) * (i + SIZE1));); errs++; } }
static void _mpi_contiguous(const int op, const int target_rank, const _XMP_coarray_t *remote_desc, const void *local, const size_t remote_offset, const size_t local_offset, const size_t transfer_size, const int is_remote_on_acc) { if(transfer_size == 0) return; char *laddr = (char*)local + local_offset; char *raddr = get_remote_addr(remote_desc, target_rank, is_remote_on_acc) + remote_offset; MPI_Win win = get_window(remote_desc, is_remote_on_acc); if(op == _XMP_N_COARRAY_PUT){ XACC_DEBUG("contiguous_put(local=%p, size=%zd, target=%d, remote=%p, is_acc=%d)", laddr, transfer_size, target_rank, raddr, is_remote_on_acc); MPI_Put((void*)laddr, transfer_size, MPI_BYTE, target_rank, (MPI_Aint)raddr, transfer_size, MPI_BYTE, win); _wait_puts(target_rank, win); }else if(op == _XMP_N_COARRAY_GET){ XACC_DEBUG("contiguous_get(local=%p, size=%zd, target=%d, remote=%p, is_acc=%d)", laddr, transfer_size, target_rank, raddr, is_remote_on_acc); MPI_Get((void*)laddr, transfer_size, MPI_BYTE, target_rank, (MPI_Aint)raddr, transfer_size, MPI_BYTE, win); _wait_gets(target_rank, win); }else{ _XMP_fatal("invalid coarray operation type"); } /* MPI_Request req[2]; size_t size_multiple128k = (transfer_size / (128*1024)) * (128*1024); size_t size_rest = transfer_size - size_multiple128k; if(transfer_size >= (128*1024) && size_rest > 0 && size_rest <= (8*1024)){ XACC_DEBUG("put(src_p=%p, size=%zd, target=%d, dst_p=%p, is_acc=%d) divied! (%d,%d)", laddr, transfer_size, target_rank, raddr, is_dst_on_acc, size128k, size_rest); MPI_Rput((void*)laddr, size_multiple128k, MPI_BYTE, target_rank, (MPI_Aint)raddr, size_multiple128k, MPI_BYTE, win,req); MPI_Rput((void*)(laddr+size_multiple128k), size_rest, MPI_BYTE, target_rank, (MPI_Aint)(raddr+size_multiple128k), size_rest, MPI_BYTE, win,req+1); MPI_Waitall(2, req, MPI_STATUSES_IGNORE); }else{ MPI_Rput((void*)laddr, transfer_size, MPI_BYTE, target_rank, (MPI_Aint)raddr, transfer_size, MPI_BYTE, win,req); MPI_Wait(req, MPI_STATUS_IGNORE); } */ }
int main(int argc, char *argv[]) { int rank, nproc, i; int errors = 0, all_errors = 0; int buf, *my_buf; MPI_Win win; MPI_Group world_group; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Win_create(&buf, sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win); MPI_Win_set_errhandler(win, MPI_ERRORS_RETURN); MPI_Comm_group(MPI_COMM_WORLD, &world_group); MPI_Win_post(world_group, 0, win); MPI_Win_start(world_group, 0, win); my_buf = malloc(nproc*sizeof(int)); for (i = 0; i < nproc; i++) { MPI_Get(&my_buf[i], 1, MPI_INT, i, 0, 1, MPI_INT, win); } /* This should fail, because the window is in an active target access epoch. */ CHECK_ERR(MPI_Win_start(world_group, 0, win)); MPI_Win_complete(win); /* This should fail, because the window is not in an active target access epoch. */ CHECK_ERR(MPI_Win_complete(win)); MPI_Win_wait(win); MPI_Win_free(&win); free(my_buf); MPI_Group_free(&world_group); MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0 && all_errors == 0) printf(" No Errors\n"); MPI_Finalize(); return 0; }
MTEST_THREAD_RETURN_TYPE run_test(void *arg) { int i; double *local_b; MPI_Alloc_mem(COUNT * sizeof(double), MPI_INFO_NULL, &local_b); for (i = 0; i < LOOPS; i++) { MPI_Get(local_b, COUNT, MPI_DOUBLE, 0, 0, COUNT, MPI_DOUBLE, win); MPI_Win_flush_all(win); } MPI_Free_mem(local_b); return (MTEST_THREAD_RETURN_TYPE) NULL; }
int main(int argc, char *argv[]) { int rank, nprocs, i, *A, *B; MPI_Comm CommDeuce; MPI_Win win; int errs = 0; MTest_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (nprocs < 2) { printf("Run this program with 2 or more processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Comm_split(MPI_COMM_WORLD, (rank < 2), rank, &CommDeuce); if (rank < 2) { i = MPI_Alloc_mem(SIZE * sizeof(int), MPI_INFO_NULL, &A); if (i) { printf("Can't allocate memory in test program\n"); MPI_Abort(MPI_COMM_WORLD, 1); } i = MPI_Alloc_mem(SIZE * sizeof(int), MPI_INFO_NULL, &B); if (i) { printf("Can't allocate memory in test program\n"); MPI_Abort(MPI_COMM_WORLD, 1); } if (rank == 0) { for (i = 0; i < SIZE; i++) B[i] = 500 + i; MPI_Win_create(B, SIZE * sizeof(int), sizeof(int), MPI_INFO_NULL, CommDeuce, &win); MPI_Win_fence(0, win); for (i = 0; i < SIZE; i++) { A[i] = i + 100; MPI_Get(&A[i], 1, MPI_INT, 1, i, 1, MPI_INT, win); } MPI_Win_fence(0, win); for (i = 0; i < SIZE; i++) if (A[i] != 1000 + i) { SQUELCH(printf("Rank 0: A[%d] is %d, should be %d\n", i, A[i], 1000 + i);); errs++; }