static int _ZMPI_Alltoall_int_proclists_put(int alloc_mem, int nphases, int *sendbuf, int nsprocs, int *sprocs, int *recvbuf, int nrprocs, int *rprocs, MPI_Comm comm) { int i, p, size, rank, *rcounts_put; MPI_Win win; MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank); if (alloc_mem) MPI_Alloc_mem(size * sizeof(int), MPI_INFO_NULL, &rcounts_put); else rcounts_put = recvbuf; if (nrprocs >= 0) for (i = 0; i < nrprocs; ++i) rcounts_put[rprocs[i]] = DEFAULT_INT; else for (i = 0; i < size; ++i) rcounts_put[i] = DEFAULT_INT; MPI_Win_create(rcounts_put, size * sizeof(int), sizeof(int), MPI_INFO_NULL, comm, &win); MPI_Win_fence(MPI_MODE_NOSTORE|MPI_MODE_NOPRECEDE, win); for (p = 0; p < nphases; ++p) { /* printf("%d: phase = %d of %d\n", rank, p, nphases);*/ if (rank % nphases == p) { if (nsprocs >= 0) { for (i = 0; i < nsprocs; ++i) if (sendbuf[sprocs[i]] != DEFAULT_INT) MPI_Put(&sendbuf[sprocs[i]], 1, MPI_INT, sprocs[i], rank, 1, MPI_INT, win); } else { for (i = 0; i < size; ++i) if (sendbuf[i] != DEFAULT_INT) MPI_Put(&sendbuf[i], 1, MPI_INT, i, rank, 1, MPI_INT, win); } } if (p < nphases - 1) MPI_Win_fence(0, win); } MPI_Win_fence(MPI_MODE_NOPUT|MPI_MODE_NOSUCCEED, win); MPI_Win_free(&win); if (alloc_mem) { if (nrprocs >= 0) for (i = 0; i < nrprocs; ++i) recvbuf[rprocs[i]] = rcounts_put[rprocs[i]]; else for (i = 0; i < size; ++i) recvbuf[i] = rcounts_put[i]; MPI_Free_mem(rcounts_put); } return MPI_SUCCESS; }
/*Run PUT with Fence */ void run_put_with_fence(int rank, WINDOW type) { double t; int size, i, j; MPI_Aint disp = 0; MPI_Win win; int window_size = WINDOW_SIZE_LARGE; for (size = 1; size <= MAX_SIZE; size = size * 2) { allocate_memory(rank, sbuf_original, rbuf_original, &sbuf, &rbuf, &rbuf, size*window_size, type, &win); #if MPI_VERSION >= 3 if (type == WIN_DYNAMIC) { disp = disp_remote; } #endif if(size > LARGE_MESSAGE_SIZE) { loop = LOOP_LARGE; skip = SKIP_LARGE; } MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); if(rank == 0) { for (i = 0; i < skip + loop; i++) { if (i == skip) { t_start = MPI_Wtime (); } MPI_CHECK(MPI_Win_fence(0, win)); for(j = 0; j < window_size; j++) { MPI_CHECK(MPI_Put(sbuf+(j*size), size, MPI_CHAR, 1, disp + (j * size), size, MPI_CHAR, win)); } MPI_CHECK(MPI_Win_fence(0, win)); } t_end = MPI_Wtime (); t = t_end - t_start; } else { for (i = 0; i < skip + loop; i++) { MPI_CHECK(MPI_Win_fence(0, win)); for(j = 0; j < window_size; j++) { MPI_CHECK(MPI_Put(sbuf+(j*size), size, MPI_CHAR, 0, disp + (j * size), size, MPI_CHAR, win)); } MPI_CHECK(MPI_Win_fence(0, win)); } } MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); print_bibw(rank, size, t); free_memory (sbuf, rbuf, win, rank); } }
int do_test(int origin_count, MPI_Datatype origin_type, int result_count, MPI_Datatype result_type, int target_count, MPI_Datatype target_type) { int errs = 0, ret, origin_type_size, result_type_size; ret = MPI_Put(origin_buf, origin_count, origin_type, 1, 0, target_count, target_type, win); if (ret) errs++; ret = MPI_Get(origin_buf, origin_count, origin_type, 1, 0, target_count, target_type, win); if (ret) errs++; ret = MPI_Accumulate(origin_buf, origin_count, origin_type, 1, 0, target_count, target_type, MPI_SUM, win); if (ret) errs++; ret = MPI_Get_accumulate(origin_buf, origin_count, origin_type, result_buf, result_count, result_type, 1, 0, target_count, target_type, MPI_SUM, win); if (ret) errs++; MPI_Type_size(origin_type, &origin_type_size); MPI_Type_size(result_type, &result_type_size); if (origin_count == 0 || origin_type_size == 0) { ret = MPI_Put(NULL, origin_count, origin_type, 1, 0, target_count, target_type, win); if (ret) errs++; ret = MPI_Get(NULL, origin_count, origin_type, 1, 0, target_count, target_type, win); if (ret) errs++; ret = MPI_Accumulate(NULL, origin_count, origin_type, 1, 0, target_count, target_type, MPI_SUM, win); if (ret) errs++; ret = MPI_Get_accumulate(NULL, origin_count, origin_type, result_buf, result_count, result_type, 1, 0, target_count, target_type, MPI_SUM, win); if (ret) errs++; if (result_count == 0 || result_type_size == 0) { ret = MPI_Get_accumulate(NULL, origin_count, origin_type, NULL, result_count, result_type, 1, 0, target_count, target_type, MPI_SUM, win); if (ret) errs++; } } return errs; }
dart_ret_t dart_put( dart_gptr_t gptr, const void * src, size_t nbytes) { MPI_Aint disp_s, disp_rel; MPI_Win win; dart_unit_t target_unitid_abs; uint64_t offset = gptr.addr_or_offs.offset; int16_t seg_id = gptr.segid; target_unitid_abs = gptr.unitid; if (seg_id) { uint16_t index = gptr.flags; dart_unit_t target_unitid_rel; win = dart_win_lists[index]; unit_g2l (index, target_unitid_abs, &target_unitid_rel); if (dart_adapt_transtable_get_disp( seg_id, target_unitid_rel, &disp_s) == -1) { return DART_ERR_INVAL; } disp_rel = disp_s + offset; MPI_Put( src, nbytes, MPI_BYTE, target_unitid_rel, disp_rel, nbytes, MPI_BYTE, win); DART_LOG_DEBUG("dart_put: nbytes:%zu (from collective allocation) " "target unit: %d offset: %"PRIu64"", nbytes, target_unitid_abs, offset); } else { win = dart_win_local_alloc; MPI_Put( src, nbytes, MPI_BYTE, target_unitid_abs, offset, nbytes, MPI_BYTE, win); DART_LOG_DEBUG("dart_put: nbytes:%zu (from local allocation) " "target unit: %d offset: %"PRIu64"", nbytes, target_unitid_abs, offset); } return DART_OK; }
int main(int argc, char **argv){ int i, me, target; unsigned int size; double t, t_max; MPI_Win win; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &me); MPI_Win_create(&send_buf, sizeof(char)*MAX_SIZE, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); target = 1 - me; MPI_Win_lock_all(0, win); init_buf(send_buf, me); if(me==0) print_items(); for(size=1;size<MAX_SIZE+1;size*=2){ MPI_Barrier(MPI_COMM_WORLD); for(i=0;i<LOOP+WARMUP;i++){ if(WARMUP == i) t = wtime(); if(me == 0){ MPI_Put(send_buf, size, MPI_CHAR, target, 0, size, MPI_CHAR, win); MPI_Win_flush_local(target, win); while(send_buf[0] == '0' || send_buf[size-1] == '0'){ MPI_Win_flush(me, win); } send_buf[0] = '0'; send_buf[size-1] = '0'; } else { while(send_buf[0] == '1' || send_buf[size-1] == '1'){ MPI_Win_flush(me, win); } send_buf[0] = '1'; send_buf[size-1] = '1'; MPI_Put(send_buf, size, MPI_CHAR, target, 0, size, MPI_CHAR, win); MPI_Win_flush_local(target, win); } } //end of LOOP t = wtime() - t; MPI_Reduce(&t, &t_max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); if(me == 0) print_results(size, t_max); } MPI_Win_unlock_all(win); MPI_Win_free(&win); MPI_Finalize(); return 0; }
void exchange(field * temperature, parallel_data * parallel) { MPI_Win_fence(0, temperature->rma_window); // Put upwards MPI_Put(temperature->data[1], temperature->ny + 2, MPI_DOUBLE, parallel->nup, (temperature->ny + 2) * (temperature->nx + 1), temperature->ny + 2, MPI_DOUBLE, temperature->rma_window); // Put downwards MPI_Put(temperature->data[temperature->nx], temperature->ny + 2, MPI_DOUBLE, parallel->ndown, 0, temperature->ny + 2, MPI_DOUBLE, temperature->rma_window); MPI_Win_fence(0, temperature->rma_window); }
double message_rate (long * buffer, int size, int iterations, int me, int pairs, int nxtpe, MPI_Win win) { int64_t begin, end; int i, offset; /* * Touch memory */ memset(buffer, size, MAX_MSG_SZ * ITERS_LARGE * sizeof(long)); MPI_Barrier(MPI_COMM_WORLD); if (me < pairs) { begin = TIME(); for (i = 0, offset = 0; i < iterations; i++, offset++) { MPI_Put ((buffer + offset*size), size, MPI_LONG, nxtpe, offset*size, size, MPI_LONG, win); //MPI_Win_flush_local (nxtpe, win); } //MPI_Win_flush_all(win); MPI_Win_flush(nxtpe, win); end = TIME(); return ((double)iterations * 1e6) / ((double)end - (double)begin); } return 0; }
int main(int argc, char *argv[]) { int rank, nprocs, A[SIZE2], B[SIZE2], i; MPI_Win win; int errs = 0; MTest_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&rank); if (nprocs != 2) { printf("Run this program with 2 processes\n"); MPI_Abort(MPI_COMM_WORLD,1); } if (rank == 0) { for (i=0; i<SIZE2; i++) A[i] = B[i] = i; MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); for (i=0; i<SIZE1; i++) { MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win); MPI_Put(A+i, 1, MPI_INT, 1, i, 1, MPI_INT, win); MPI_Win_unlock(1, win); } for (i=0; i<SIZE1; i++) { MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win); MPI_Get(B+i, 1, MPI_INT, 1, SIZE1+i, 1, MPI_INT, win); MPI_Win_unlock(1, win); } MPI_Win_free(&win); for (i=0; i<SIZE1; i++) if (B[i] != (-4)*(i+SIZE1)) { printf("Get Error: B[%d] is %d, should be %d\n", i, B[i], (-4)*(i+SIZE1)); errs++; } } else { /* rank=1 */ for (i=0; i<SIZE2; i++) B[i] = (-4)*i; MPI_Win_create(B, SIZE2*sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win); MPI_Win_free(&win); for (i=0; i<SIZE1; i++) { if (B[i] != i) { printf("Put Error: B[%d] is %d, should be %d\n", i, B[i], i); errs++; } } } /* if (rank==0) printf("Done\n");*/ MTest_Finalize(errs); MPI_Finalize(); return 0; }
void MPIMutex::lock(int proc) { int rank, nproc, already_locked; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &nproc); //std::cout << "trying to get lock" << std::endl; MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, win); byte *buff = (byte*)malloc(sizeof(byte)*nproc); buff[rank] = 1; MPI_Put(&(buff[rank]), 1, MPI_BYTE, proc, rank, 1, MPI_BYTE, win); /* Get data to the left of rank */ if (rank > 0) { MPI_Get(buff, rank, MPI_BYTE, proc, 0, rank, MPI_BYTE, win); } /* Get data to the right of rank */ if (rank < nproc - 1) { MPI_Get(&(buff[rank+1]), nproc-1-rank, MPI_BYTE, proc, rank+1, nproc-1-rank, MPI_BYTE, win); } MPI_Win_unlock(proc, win); /* check if anyone has the lock*/ for (int i = already_locked = 0; i < nproc; i++) if (buff[i] && i != rank) already_locked = 1; /* Wait for notification */ if (already_locked) { MPI_Status status; //std::cout << "waiting for notification [proc = "<<proc<<"]" << std::endl; MPI_Recv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE, MPI_MUTEX_TAG+id, comm, &status); } //std::cout << "lock acquired [proc = "<<proc<<"]" << std::endl; free(buff); };
bool MPIMutex::try_lock(int proc) { int rank, nproc, already_locked; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &nproc); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, win); byte *buff = (byte*)malloc(sizeof(byte)*nproc); buff[rank] = 1; MPI_Put(&(buff[rank]), 1, MPI_BYTE, proc, rank, 1, MPI_BYTE, win); /* Get data to the left of rank */ if (rank > 0) { MPI_Get(buff, rank, MPI_BYTE, proc, 0, rank, MPI_BYTE, win); } /* Get data to the right of rank */ if (rank < nproc - 1) { MPI_Get(&(buff[rank+1]), nproc-1-rank, MPI_BYTE, proc, rank+1, nproc-1-rank, MPI_BYTE, win); } MPI_Win_unlock(proc, win); /* check if anyone has the lock*/ already_locked = 1; //1 means succesfully got the lock for (int i = 0; i < nproc; i++) if (buff[i] && i != rank) already_locked = 0; free(buff); return already_locked; };
void DO_OP_LOOP(int dst, int iter) { int i, x; switch (OP_TYPE) { case OP_ACC: for (x = 0; x < iter; x++) { for (i = 0; i < NOP; i++) MPI_Accumulate(&locbuf[0], OP_SIZE, MPI_DOUBLE, dst, 0, OP_SIZE, MPI_DOUBLE, MPI_SUM, win); MPI_Win_flush(dst, win); } break; case OP_PUT: for (x = 0; x < iter; x++) { for (i = 0; i < NOP; i++) MPI_Put(&locbuf[0], OP_SIZE, MPI_DOUBLE, dst, 0, OP_SIZE, MPI_DOUBLE, win); MPI_Win_flush(dst, win); } break; case OP_GET: for (x = 0; x < iter; x++) { for (i = 0; i < NOP; i++) MPI_Get(&locbuf[0], OP_SIZE, MPI_DOUBLE, dst, 0, OP_SIZE, MPI_DOUBLE, win); MPI_Win_flush(dst, win); } break; } }
/** One-sided put operation with type arguments. Source buffer must be private. * * @param[in] mreg Memory region * @param[in] src Address of source data * @param[in] src_count Number of elements of the given type at the source * @param[in] src_type MPI datatype of the source elements * @param[in] dst Address of destination buffer * @param[in] dst_count Number of elements of the given type at the destination * @param[in] src_type MPI datatype of the destination elements * @param[in] size Number of bytes to transfer * @param[in] proc Absolute process id of target process * @return 0 on success, non-zero on failure */ int gmr_put_typed(gmr_t *mreg, void *src, int src_count, MPI_Datatype src_type, void *dst, int dst_count, MPI_Datatype dst_type, int proc) { int grp_proc; gmr_size_t disp; MPI_Aint lb, extent; grp_proc = ARMCII_Translate_absolute_to_group(&mreg->group, proc); ARMCII_Assert(grp_proc >= 0); // Calculate displacement from beginning of the window if (dst == MPI_BOTTOM) disp = 0; else disp = (gmr_size_t) ((uint8_t*)dst - (uint8_t*)mreg->slices[proc].base); // Perform checks MPI_Type_get_true_extent(dst_type, &lb, &extent); ARMCII_Assert(mreg->lock_state != GMR_LOCK_UNLOCKED); ARMCII_Assert_msg(disp >= 0 && disp < mreg->slices[proc].size, "Invalid remote address"); ARMCII_Assert_msg(disp + dst_count*extent <= mreg->slices[proc].size, "Transfer is out of range"); MPI_Put(src, src_count, src_type, grp_proc, (MPI_Aint) disp, dst_count, dst_type, mreg->window); return 0; }
int main(int argc, char *argv[]) { int errs = 0, err; int rank, size; int *buf, bufsize; int *result; int *rmabuf, rsize, rcount; MPI_Comm comm; MPI_Win win; MPI_Request req; MPI_Datatype derived_dtp; MTest_Init(&argc, &argv); bufsize = 256 * sizeof(int); buf = (int *) malloc(bufsize); if (!buf) { fprintf(stderr, "Unable to allocated %d bytes\n", bufsize); MPI_Abort(MPI_COMM_WORLD, 1); } result = (int *) malloc(bufsize); if (!result) { fprintf(stderr, "Unable to allocated %d bytes\n", bufsize); MPI_Abort(MPI_COMM_WORLD, 1); } rcount = 16; rsize = rcount * sizeof(int); rmabuf = (int *) malloc(rsize); if (!rmabuf) { fprintf(stderr, "Unable to allocated %d bytes\n", rsize); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Type_contiguous(2, MPI_INT, &derived_dtp); MPI_Type_commit(&derived_dtp); /* The following loop is used to run through a series of communicators * that are subsets of MPI_COMM_WORLD, of size 1 or greater. */ while (MTestGetIntracommGeneral(&comm, 1, 1)) { int count = 0; if (comm == MPI_COMM_NULL) continue; /* Determine the sender and receiver */ MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size); MPI_Win_create(buf, bufsize, 2 * sizeof(int), MPI_INFO_NULL, comm, &win); /* To improve reporting of problems about operations, we * change the error handler to errors return */ MPI_Win_set_errhandler(win, MPI_ERRORS_RETURN); /** TEST OPERATIONS USING ACTIVE TARGET (FENCE) SYNCHRONIZATION **/ MPI_Win_fence(0, win); TEST_FENCE_OP("Put", MPI_Put(rmabuf, count, MPI_INT, TARGET, 0, count, MPI_INT, win); ); TEST_FENCE_OP("Get", MPI_Get(rmabuf, count, MPI_INT, TARGET, 0, count, MPI_INT, win); );
void benchmark (long * msg_buffer, int me, int pairs, int nxtpe, MPI_Win win) { static double mr, mr_sum; int iters; if (msg_buffer == NULL) { printf("Input buffer is NULL, no reason to proceed\n"); exit(-1); } /* * Warmup */ if (me < pairs) { for (int i = 0; i < ITERS_LARGE; i += 1) { MPI_Put ((msg_buffer + i*MAX_MSG_SZ), MAX_MSG_SZ, MPI_LONG, nxtpe, i*MAX_MSG_SZ, MAX_MSG_SZ, MPI_LONG, win); MPI_Win_flush_local (nxtpe, win); } } MPI_Win_flush_all(win); MPI_Barrier(MPI_COMM_WORLD); /* * Benchmark */ for (long size = 1; size <= MAX_MSG_SZ; size <<= 1) { iters = size < LARGE_THRESHOLD ? ITERS_SMALL : ITERS_LARGE; mr = message_rate(msg_buffer, size, iters, me, pairs, nxtpe, win); MPI_Reduce(&mr, &mr_sum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); print_message_rate(size, mr_sum, me); } }
static void _mpi_scalar_mput(const int target_rank, const _XMP_coarray_t *dst_desc, const void *src, const size_t dst_offset, const size_t src_offset, const int dst_dims, const _XMP_array_section_t *dst_info, const bool is_dst_on_acc) { int allelmt_dim = _XMP_get_dim_of_allelmts(dst_dims, dst_info); size_t element_size = dst_desc->elmt_size; size_t allelmt_size = (allelmt_dim == dst_dims)? element_size : dst_info[allelmt_dim].distance * dst_info[allelmt_dim].elmts; char *laddr = (allelmt_dim == dst_dims)? ((char*)src + src_offset) : _XMP_alloc(allelmt_size); char *raddr = get_remote_addr(dst_desc, target_rank, is_dst_on_acc) + dst_offset; MPI_Win win = get_window(dst_desc, is_dst_on_acc); XACC_DEBUG("scalar_mput(src_p=%p, size=%zd, target=%d, dst_p=%p, is_acc=%d)", laddr, element_size, target_rank, raddr, is_dst_on_acc); XACC_DEBUG("allelmt_dim=%d, dst_dims=%d", allelmt_dim, dst_dims); if(allelmt_dim != dst_dims){ //mcopy _XMP_array_section_t info; info.start = 0; info.length = allelmt_size/element_size; info.stride = 1; info.elmts = info.length; info.distance = element_size; _XMP_stride_memcpy_1dim(laddr, (char*)src+src_offset, &info, element_size, _XMP_SCALAR_MCOPY); XACC_DEBUG("mcopy(%lld, %lld, %lld), %lld",info.start, info.length, info.stride, info.elmts); } long long idxs[allelmt_dim+1]; for(int i = 0; i < allelmt_dim+1; i++) idxs[i]=0; while(1){ size_t offset = 0; for(int i = 0; i < allelmt_dim; i++){ offset += dst_info[i].distance * idxs[i+1] * dst_info[i].stride; } MPI_Put((void*)laddr, allelmt_size, MPI_BYTE, target_rank, (MPI_Aint)(raddr+offset), allelmt_size, MPI_BYTE, win); ++idxs[allelmt_dim]; for(int i = allelmt_dim-1; i >= 0; i--){ long long length = dst_info[i].length; if(idxs[i+1] >= length){ idxs[i+1] -= length; ++idxs[i]; }else{ break; } } if(idxs[0] > 0){ break; } } _wait_puts(target_rank, win); if(allelmt_dim != dst_dims){ _XMP_free(laddr); } }
void SweptDiscretization2D::updateRemoteConstants(unsigned char *buffer) { void *sendingBuffer = NULL; FILE *inFile = NULL; if(pg.rank == 0) { int bufferSize = this->remoteConstantsCount * n * n * pg.mpiSize * sizeof(double); MPI_Alloc_mem(bufferSize, MPI_INFO_NULL, &sendingBuffer); for(int r=0;r<pg.mpiSize;r++) { double *processing = (double*)sendingBuffer + (this->remoteConstantsCount * n * n * r); int jIndex = (r % (pg.xNodes*pg.yNodes)) / pg.xNodes; int iIndex = r % pg.xNodes; for(int j=0;j<n;j++) { for(int i=0;i<n;i++) { int iGlobal = n*iIndex + (i); int jGlobal = n*jIndex + (j); int index = this->ijToConstantIndex(i,j); int globalIndex = this->remoteConstantsCount * (iGlobal + jGlobal * n * pg.xNodes); for(int k=0;k<this->remoteConstantsCount;k++) { processing[index + k] = ((double*)buffer)[k + globalIndex]; } } } } } MPI_Win_fence(MPI_MODE_NOPRECEDE, this->constantsWindow); if(pg.rank == 0) { for(int r=0;r<pg.mpiSize;r++) { MPI_Put((unsigned char*)sendingBuffer + (r * remoteConstantsCount * n * n * sizeof(double)), remoteConstantsCount * n * n * sizeof(double), MPI_BYTE, r, 0, remoteConstantsCount * n * n * sizeof(double), MPI_BYTE, constantsWindow); } } MPI_Win_fence((MPI_MODE_NOSTORE | MPI_MODE_NOSUCCEED), this->constantsWindow); if(pg.rank == 0) { MPI_Free_mem(sendingBuffer); } for(int i=1;i<n+1;i++) { for(int j=1;j<n+1;j++) { for(int k=0;k<this->remoteConstantsCount;k++) { int windowIndex = this->ijToConstantIndex(i-1,j-1); int foundationIndex = this->ijToIndex(i,j); this->foundation[foundationIndex + k] = this->remoteConstants[windowIndex + k]; } } } }
void add_scatter_constant_request(scatter_constant * sc, int remote_rank, size_t remote_idx, size_t req_id) { assert(sc->valid); #pragma omp critical MPI_Put(sc->constant, 1, sc->datatype, remote_rank, remote_idx, 1, sc->datatype, sc->win); }
void add_scatter_request(scatter* sc, const char* local_data, int remote_rank, size_t remote_idx, size_t req_id) { assert (sc->valid); assert (sc->request_count < sc->nrequests_max); memcpy(sc->send_data + sc->request_count * sc->elt_size, local_data, sc->elt_size); #pragma omp critical MPI_Put(sc->send_data + sc->request_count * sc->elt_size, 1, sc->datatype, remote_rank, remote_idx, 1, sc->datatype, sc->win); ++sc->request_count; }
int main(int argc, char *argv[]) { int errs = 0, err; int rank, size; int *buf, bufsize; int *result; int *rmabuf, rsize, rcount; MPI_Comm comm; MPI_Win win; MPI_Request req; MTest_Init(&argc, &argv); bufsize = 256 * sizeof(int); buf = (int *) malloc(bufsize); if (!buf) { fprintf(stderr, "Unable to allocated %d bytes\n", bufsize); MPI_Abort(MPI_COMM_WORLD, 1); } result = (int *) malloc(bufsize); if (!result) { fprintf(stderr, "Unable to allocated %d bytes\n", bufsize); MPI_Abort(MPI_COMM_WORLD, 1); } rcount = 16; rsize = rcount * sizeof(int); rmabuf = (int *) malloc(rsize); if (!rmabuf) { fprintf(stderr, "Unable to allocated %d bytes\n", rsize); MPI_Abort(MPI_COMM_WORLD, 1); } /* The following illustrates the use of the routines to * run through a selection of communicators and datatypes. * Use subsets of these for tests that do not involve combinations * of communicators, datatypes, and counts of datatypes */ while (MTestGetIntracommGeneral(&comm, 1, 1)) { if (comm == MPI_COMM_NULL) continue; /* Determine the sender and receiver */ MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size); MPI_Win_create(buf, bufsize, sizeof(int), MPI_INFO_NULL, comm, &win); /* To improve reporting of problems about operations, we * change the error handler to errors return */ MPI_Win_set_errhandler(win, MPI_ERRORS_RETURN); /** TEST OPERATIONS USING ACTIVE TARGET (FENCE) SYNCHRONIZATION **/ MPI_Win_fence(0, win); TEST_FENCE_OP("Put", MPI_Put(rmabuf, rcount, MPI_INT, MPI_PROC_NULL, 0, rcount, MPI_INT, win); ); TEST_FENCE_OP("Get", MPI_Get(rmabuf, rcount, MPI_INT, MPI_PROC_NULL, 0, rcount, MPI_INT, win); );
void Master::UpdateCriticalAttribute(Attribute attr, AgentId agent_id, AgentType agent_type, void* location) { AgentType type = GlobalToLocalType(agent_id); auto p = std::make_pair(type, attr); size_t target_disp = critical_agents_offsets_.at(agent_id) + critical_attributes_offsets_.at(p); MPI_Datatype attribute_type = attributes_MPI_types_.at(p); for (MasterId id=0; id<nb_masters_; id++) { MPI_Put(location, 1, attribute_type, id, target_disp, 1, attribute_type, critical_window_); } }
/** Lock a mutex. * * @param[in] hdl Mutex group that the mutex belongs to * @param[in] mutex Desired mutex number [0..count-1] * @param[in] proc Rank of process where the mutex lives * @return MPI status */ int MPIX_Mutex_lock(MPIX_Mutex hdl, int mutex, int proc) { int rank, nproc, already_locked, i; uint8_t *buf; assert(mutex >= 0 && mutex < hdl->max_count); MPI_Comm_rank(hdl->comm, &rank); MPI_Comm_size(hdl->comm, &nproc); assert(proc >= 0 && proc < nproc); buf = malloc(nproc * sizeof(uint8_t)); assert(buf != NULL); buf[rank] = 1; /* Get all data from the lock_buf, except the byte belonging to * me. Set the byte belonging to me to 1. */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->windows[mutex]); MPI_Put(&buf[rank], 1, MPI_BYTE, proc, rank, 1, MPI_BYTE, hdl->windows[mutex]); /* Get data to the left of rank */ if (rank > 0) { MPI_Get(buf, rank, MPI_BYTE, proc, 0, rank, MPI_BYTE, hdl->windows[mutex]); } /* Get data to the right of rank */ if (rank < nproc - 1) { MPI_Get(&buf[rank + 1], nproc - 1 - rank, MPI_BYTE, proc, rank + 1, nproc - 1 - rank, MPI_BYTE, hdl->windows[mutex]); } MPI_Win_unlock(proc, hdl->windows[mutex]); assert(buf[rank] == 1); for (i = already_locked = 0; i < nproc; i++) if (buf[i] && i != rank) already_locked = 1; /* Wait for notification */ if (already_locked) { MPI_Status status; debug_print("waiting for notification [proc = %d, mutex = %d]\n", proc, mutex); MPI_Recv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE, MPIX_MUTEX_TAG + mutex, hdl->comm, &status); } debug_print("lock acquired [proc = %d, mutex = %d]\n", proc, mutex); free(buf); return MPI_SUCCESS; }
int main(int argc, char *argv[]) { int rank, nprocs, A[SIZE], B[SIZE], i; MPI_Comm CommDeuce; MPI_Win win; int errs = 0; MTest_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (nprocs < 2) { printf("Run this program with 2 or more processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Comm_split(MPI_COMM_WORLD, (rank < 2), rank, &CommDeuce); if (rank < 2) { if (rank == 0) { for (i = 0; i < SIZE; i++) A[i] = B[i] = i; } else { for (i = 0; i < SIZE; i++) { A[i] = (-3) * i; B[i] = (-4) * i; } } MPI_Win_create(B, SIZE * sizeof(int), sizeof(int), MPI_INFO_NULL, CommDeuce, &win); MPI_Win_fence(0, win); if (rank == 0) { for (i = 0; i < SIZE - 1; i++) MPI_Put(A + i, 1, MPI_INT, 1, i, 1, MPI_INT, win); } else { for (i = 0; i < SIZE - 1; i++) MPI_Get(A + i, 1, MPI_INT, 0, i, 1, MPI_INT, win); MPI_Accumulate(A + i, 1, MPI_INT, 0, i, 1, MPI_INT, MPI_SUM, win); } MPI_Win_fence(0, win); if (rank == 1) { for (i = 0; i < SIZE - 1; i++) { if (A[i] != B[i]) { SQUELCH(printf("Put/Get Error: A[i]=%d, B[i]=%d\n", A[i], B[i]);); errs++; } } }
int MPIX_Put_x(const void *origin_addr, MPI_Count origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, MPI_Count target_count, MPI_Datatype target_datatype, MPI_Win win) { int rc = MPI_SUCCESS; if (likely (origin_count <= bigmpi_int_max && target_count <= bigmpi_int_max)) { rc = MPI_Put(origin_addr, origin_count, origin_datatype, target_rank, target_disp, target_count, target_datatype, win); } else { MPI_Datatype neworigin_datatype, newtarget_datatype; MPIX_Type_contiguous_x(origin_count, origin_datatype, &neworigin_datatype); MPIX_Type_contiguous_x(target_count, target_datatype, &newtarget_datatype); MPI_Type_commit(&neworigin_datatype); MPI_Type_commit(&newtarget_datatype); rc = MPI_Put(origin_addr, 1, neworigin_datatype, target_rank, target_disp, 1, newtarget_datatype, win); MPI_Type_free(&neworigin_datatype); MPI_Type_free(&newtarget_datatype); } return rc; }
int main(int argc, char *argv[]) { int rank, destrank, nprocs, *A, *B, i; MPI_Comm CommDeuce; MPI_Group comm_group, group; MPI_Win win; int errs = 0; MTest_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&rank); if (nprocs < 2) { printf("Run this program with 2 or more processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Comm_split(MPI_COMM_WORLD, (rank < 2), rank, &CommDeuce); if (rank < 2) { i = MPI_Alloc_mem(SIZE2 * sizeof(int), MPI_INFO_NULL, &A); if (i) { printf("Can't allocate memory in test program\n"); MPI_Abort(MPI_COMM_WORLD, 1); } i = MPI_Alloc_mem(SIZE2 * sizeof(int), MPI_INFO_NULL, &B); if (i) { printf("Can't allocate memory in test program\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Comm_group(CommDeuce, &comm_group); if (rank == 0) { for (i=0; i<SIZE2; i++) A[i] = B[i] = i; MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, CommDeuce, &win); destrank = 1; MPI_Group_incl(comm_group, 1, &destrank, &group); MPI_Win_start(group, 0, win); for (i=0; i<SIZE1; i++) MPI_Put(A+i, 1, MPI_INT, 1, i, 1, MPI_INT, win); for (i=0; i<SIZE1; i++) MPI_Get(B+i, 1, MPI_INT, 1, SIZE1+i, 1, MPI_INT, win); MPI_Win_complete(win); for (i=0; i<SIZE1; i++) if (B[i] != (-4)*(i+SIZE1)) { SQUELCH( printf("Get Error: B[i] is %d, should be %d\n", B[i], (-4)*(i+SIZE1)); ); errs++; }
/** Unlock a mutex. * * @param[in] hdl Mutex group that the mutex belongs to. * @param[in] mutex Desired mutex number [0..count-1] * @param[in] proc Rank of process where the mutex lives * @return MPI status */ int MPIX_Mutex_unlock(MPIX_Mutex hdl, int mutex, int proc) { int rank, nproc, i; uint8_t *buf; assert(mutex >= 0 && mutex < hdl->max_count); MPI_Comm_rank(hdl->comm, &rank); MPI_Comm_size(hdl->comm, &nproc); assert(proc >= 0 && proc < nproc); buf = malloc(nproc * sizeof(uint8_t)); assert(buf != NULL); buf[rank] = 0; /* Get all data from the lock_buf, except the byte belonging to * me. Set the byte belonging to me to 0. */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->windows[mutex]); MPI_Put(&buf[rank], 1, MPI_BYTE, proc, rank, 1, MPI_BYTE, hdl->windows[mutex]); /* Get data to the left of rank */ if (rank > 0) { MPI_Get(buf, rank, MPI_BYTE, proc, 0, rank, MPI_BYTE, hdl->windows[mutex]); } /* Get data to the right of rank */ if (rank < nproc - 1) { MPI_Get(&buf[rank + 1], nproc - 1 - rank, MPI_BYTE, proc, rank + 1, nproc - 1 - rank, MPI_BYTE, hdl->windows[mutex]); } MPI_Win_unlock(proc, hdl->windows[mutex]); assert(buf[rank] == 0); /* Notify the next waiting process, starting to my right for fairness */ for (i = 1; i < nproc; i++) { int p = (rank + i) % nproc; if (buf[p] == 1) { debug_print("notifying %d [proc = %d, mutex = %d]\n", p, proc, mutex); MPI_Send(NULL, 0, MPI_BYTE, p, MPI_MUTEX_TAG + mutex, hdl->comm); break; } } debug_print("lock released [proc = %d, mutex = %d]\n", proc, mutex); free(buf); return MPI_SUCCESS; }
void test_put(void) { int me, nproc; MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Comm_rank(MPI_COMM_WORLD, &me); MPI_Win dst_win; double *dst_buf; double src_buf[MAXELEMS]; int i, j; MPI_Alloc_mem(sizeof(double) * nproc * MAXELEMS, MPI_INFO_NULL, &dst_buf); MPI_Win_create(dst_buf, sizeof(double) * nproc * MAXELEMS, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &dst_win); for (i = 0; i < MAXELEMS; i++) src_buf[i] = me + 1.0; MPI_Win_lock(MPI_LOCK_EXCLUSIVE, me, 0, dst_win); for (i = 0; i < nproc * MAXELEMS; i++) dst_buf[i] = 0.0; MPI_Win_unlock(me, dst_win); MPI_Barrier(MPI_COMM_WORLD); for (i = 0; i < nproc; i++) { int target = i; for (j = 0; j < COUNT; j++) { if (verbose) printf("%2d -> %2d [%2d]\n", me, target, j); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, target, 0, dst_win); MPI_Put(&src_buf[j], sizeof(double), MPI_BYTE, target, (me * MAXELEMS + j) * sizeof(double), sizeof(double), MPI_BYTE, dst_win); MPI_Win_unlock(target, dst_win); } for (j = 0; j < COUNT; j++) { if (verbose) printf("%2d <- %2d [%2d]\n", me, target, j); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, target, 0, dst_win); MPI_Get(&src_buf[j], sizeof(double), MPI_BYTE, target, (me * MAXELEMS + j) * sizeof(double), sizeof(double), MPI_BYTE, dst_win); MPI_Win_unlock(target, dst_win); } } MPI_Barrier(MPI_COMM_WORLD); MPI_Win_free(&dst_win); MPI_Free_mem(dst_buf); }
/** Lock a mutex. * * @param[in] hdl Mutex group that the mutex belongs to. * @param[in] mutex Desired mutex number [0..count-1] * @param[in] world_proc Absolute ID of process where the mutex lives */ void ARMCIX_Lock_hdl(armcix_mutex_hdl_t hdl, int mutex, int world_proc) { int rank, nproc, already_locked, i, proc; uint8_t *buf; ARMCII_Assert(mutex >= 0 && mutex < hdl->max_count); MPI_Comm_rank(hdl->grp.comm, &rank); MPI_Comm_size(hdl->grp.comm, &nproc); /* User gives us the absolute ID. Translate to the rank in the mutex's group. */ proc = ARMCII_Translate_absolute_to_group(&hdl->grp, world_proc); ARMCII_Assert(proc >= 0); buf = malloc(nproc*sizeof(uint8_t)); ARMCII_Assert(buf != NULL); buf[rank] = 1; /* Get all data from the lock_buf, except the byte belonging to * me. Set the byte belonging to me to 1. */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->windows[mutex]); MPI_Put(&buf[rank], 1, MPI_BYTE, proc, rank, 1, MPI_BYTE, hdl->windows[mutex]); /* Get data to the left of rank */ if (rank > 0) { MPI_Get(buf, rank, MPI_BYTE, proc, 0, rank, MPI_BYTE, hdl->windows[mutex]); } /* Get data to the right of rank */ if (rank < nproc - 1) { MPI_Get(&buf[rank+1], nproc-1-rank, MPI_BYTE, proc, rank + 1, nproc-1-rank, MPI_BYTE, hdl->windows[mutex]); } MPI_Win_unlock(proc, hdl->windows[mutex]); ARMCII_Assert(buf[rank] == 1); for (i = already_locked = 0; i < nproc; i++) if (buf[i] && i != rank) already_locked = 1; /* Wait for notification */ if (already_locked) { MPI_Status status; ARMCII_Dbg_print(DEBUG_CAT_MUTEX, "waiting for notification [proc = %d, mutex = %d]\n", proc, mutex); MPI_Recv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE, ARMCI_MUTEX_TAG+mutex, hdl->grp.comm, &status); } ARMCII_Dbg_print(DEBUG_CAT_MUTEX, "lock acquired [proc = %d, mutex = %d]\n", proc, mutex); free(buf); }
/* * Class: mpi_Win * Method: put * Signature: (JLjava/lang/Object;IJIIIJI)V */ JNIEXPORT void JNICALL Java_mpi_Win_put( JNIEnv *env, jobject jthis, jlong win, jobject origin, jint orgCount, jlong orgType, jint targetRank, jint targetDisp, jint targetCount, jlong targetType, jint baseType) { void *orgPtr = (*env)->GetDirectBufferAddress(env, origin); int rc = MPI_Put(orgPtr, orgCount, (MPI_Datatype)orgType, targetRank, (MPI_Aint)targetDisp, targetCount, (MPI_Datatype)targetType, (MPI_Win)win); ompi_java_exceptionCheck(env, rc); }
MTEST_THREAD_RETURN_TYPE run_test(void *arg) { int i; for (i = 0; i < LOOPS; i++) { /* send a global variable, rather than a stack variable, so * other threads can access the address during flush */ MPI_Put(&dummy, 1, MPI_INT, 0, 0, 1, MPI_INT, win); MPI_Win_flush(0, win); } return (MTEST_THREAD_RETURN_TYPE) NULL; }
/** Unlock a mutex. * * @param[in] hdl Mutex group that the mutex belongs to. * @param[in] mutex Desired mutex number [0..count-1] * @param[in] world_proc Absolute ID of process where the mutex lives */ void ARMCIX_Unlock_hdl(armcix_mutex_hdl_t hdl, int mutex, int world_proc) { int rank, nproc, i, proc; uint8_t *buf; ARMCII_Assert(mutex >= 0 && mutex < hdl->max_count); MPI_Comm_rank(hdl->grp.comm, &rank); MPI_Comm_size(hdl->grp.comm, &nproc); proc = ARMCII_Translate_absolute_to_group(&hdl->grp, world_proc); ARMCII_Assert(proc >= 0); buf = malloc(nproc*sizeof(uint8_t)); buf[rank] = 0; /* Get all data from the lock_buf, except the byte belonging to * me. Set the byte belonging to me to 0. */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, hdl->windows[mutex]); MPI_Put(&buf[rank], 1, MPI_BYTE, proc, rank, 1, MPI_BYTE, hdl->windows[mutex]); /* Get data to the left of rank */ if (rank > 0) { MPI_Get(buf, rank, MPI_BYTE, proc, 0, rank, MPI_BYTE, hdl->windows[mutex]); } /* Get data to the right of rank */ if (rank < nproc - 1) { MPI_Get(&buf[rank+1], nproc-1-rank, MPI_BYTE, proc, rank + 1, nproc-1-rank, MPI_BYTE, hdl->windows[mutex]); } MPI_Win_unlock(proc, hdl->windows[mutex]); ARMCII_Assert(buf[rank] == 0); /* Notify the next waiting process, starting to my right for fairness */ for (i = 1; i < nproc; i++) { int p = (rank + i) % nproc; if (buf[p] == 1) { ARMCII_Dbg_print(DEBUG_CAT_MUTEX, "notifying %d [proc = %d, mutex = %d]\n", p, proc, mutex); MPI_Send(NULL, 0, MPI_BYTE, p, ARMCI_MUTEX_TAG+mutex, hdl->grp.comm); break; } } ARMCII_Dbg_print(DEBUG_CAT_MUTEX, "lock released [proc = %d, mutex = %d]\n", proc, mutex); free(buf); }