int main(int argc, char *argv[]) { int errs = 0, err; int j, count; char *ap; MTest_Init(&argc, &argv); MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN); for (count = 1; count < 128000; count *= 2) { err = MPI_Alloc_mem(count, MPI_INFO_NULL, &ap); if (err) { int errclass; /* An error of MPI_ERR_NO_MEM is allowed */ MPI_Error_class(err, &errclass); if (errclass != MPI_ERR_NO_MEM) { errs++; MTestPrintError(err); } } else { /* Access all of this memory */ for (j = 0; j < count; j++) { ap[j] = (char) (j & 0x7f); } MPI_Free_mem(ap); } } MTest_Finalize(errs); return MTestReturnValue(errs); }
/** Create a mutex group. Collective. * * @param[in] count Number of mutexes to create on the calling process * @return Handle to the mutex group */ armcix_mutex_hdl_t ARMCIX_Create_mutexes_hdl(int count, ARMCI_Group *pgroup) { int ierr, i; armcix_mutex_hdl_t hdl; hdl = malloc(sizeof(struct armcix_mutex_hdl_s)); ARMCII_Assert(hdl != NULL); MPI_Comm_dup(pgroup->comm, &hdl->comm); if (count > 0) { MPI_Alloc_mem(count*sizeof(long), MPI_INFO_NULL, &hdl->base); ARMCII_Assert(hdl->base != NULL); } else { hdl->base = NULL; } hdl->count = count; // Initialize mutexes to 0 for (i = 0; i < count; i++) hdl->base[i] = 0; ierr = MPI_Win_create(hdl->base, count*sizeof(long), sizeof(long) /* displacement size */, MPI_INFO_NULL, hdl->comm, &hdl->window); ARMCII_Assert(ierr == MPI_SUCCESS); return hdl; }
/** Prepare a set of buffers for use with a get operation. The returned set of * buffers is guaranteed to be in private space. Copies will be made if needed, * the result should be completed by finish. * * @param[in] orig_bufs Original set of buffers. * @param[out] new_bufs Pointer to the set of private buffers. * @param[in] count Number of entries in the buffer list. * @param[in] size The size of the buffers (all are of the same size). * @return Number of buffers that were moved. */ int ARMCII_Buf_prepare_write_vec(void **orig_bufs, void ***new_bufs_ptr, int count, int size) { int num_moved = 0; if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) { void **new_bufs = malloc(count*sizeof(void*)); int i; for (i = 0; i < count; i++) new_bufs[i] = NULL; for (i = 0; i < count; i++) { // Check if the destination buffer is within a shared region. If not, create // a temporary private buffer to hold the result. gmr_t *mreg = gmr_lookup(orig_bufs[i], ARMCI_GROUP_WORLD.rank); if (mreg != NULL) { MPI_Alloc_mem(size, MPI_INFO_NULL, &new_bufs[i]); ARMCII_Assert(new_bufs[i] != NULL); num_moved++; } else { new_bufs[i] = orig_bufs[i]; } } *new_bufs_ptr = new_bufs; } else { *new_bufs_ptr = orig_bufs; } return num_moved; }
int main (int argc,char *argv[]) { int i; double w[NEL]; MPI_Aint win_size,warr_size; MPI_Win *win; win_size=sizeof(MPI_Win); warr_size=sizeof(MPI_DOUBLE)*NEL; MPI_Init (&argc, &argv); for(i=0;i<NTIMES;i++) { MPI_Alloc_mem(win_size,MPI_INFO_NULL,&win); MPI_Win_create(w,warr_size,sizeof(double),MPI_INFO_NULL,MPI_COMM_WORLD,win); MPI_Win_free(win); MPI_Free_mem(win); } MPI_Finalize(); return 0; }
void *mpp_alloc (size_t len) { MPI_Info info; void *buf = NULL; #if HAVE_MPI_ALLOC_MEM if (use_mpi_alloc) { MPI_Info_create (&info); #if 0 MPI_Info_set (info, "alignment", "4096"); MPI_Info_set (info, "type", "private"); #endif MPI_Alloc_mem (len, info, &buf); MPI_Info_free (&info); } else #endif buf = malloc(len); if (buf == NULL) { fprintf (stderr, "Could not allocate %d byte buffer\n", len); MPI_Abort (MPI_COMM_WORLD, -1); } return buf; }
void SweptDiscretization2D::updateRemoteConstants(unsigned char *buffer) { void *sendingBuffer = NULL; FILE *inFile = NULL; if(pg.rank == 0) { int bufferSize = this->remoteConstantsCount * n * n * pg.mpiSize * sizeof(double); MPI_Alloc_mem(bufferSize, MPI_INFO_NULL, &sendingBuffer); for(int r=0;r<pg.mpiSize;r++) { double *processing = (double*)sendingBuffer + (this->remoteConstantsCount * n * n * r); int jIndex = (r % (pg.xNodes*pg.yNodes)) / pg.xNodes; int iIndex = r % pg.xNodes; for(int j=0;j<n;j++) { for(int i=0;i<n;i++) { int iGlobal = n*iIndex + (i); int jGlobal = n*jIndex + (j); int index = this->ijToConstantIndex(i,j); int globalIndex = this->remoteConstantsCount * (iGlobal + jGlobal * n * pg.xNodes); for(int k=0;k<this->remoteConstantsCount;k++) { processing[index + k] = ((double*)buffer)[k + globalIndex]; } } } } } MPI_Win_fence(MPI_MODE_NOPRECEDE, this->constantsWindow); if(pg.rank == 0) { for(int r=0;r<pg.mpiSize;r++) { MPI_Put((unsigned char*)sendingBuffer + (r * remoteConstantsCount * n * n * sizeof(double)), remoteConstantsCount * n * n * sizeof(double), MPI_BYTE, r, 0, remoteConstantsCount * n * n * sizeof(double), MPI_BYTE, constantsWindow); } } MPI_Win_fence((MPI_MODE_NOSTORE | MPI_MODE_NOSUCCEED), this->constantsWindow); if(pg.rank == 0) { MPI_Free_mem(sendingBuffer); } for(int i=1;i<n+1;i++) { for(int j=1;j<n+1;j++) { for(int k=0;k<this->remoteConstantsCount;k++) { int windowIndex = this->ijToConstantIndex(i-1,j-1); int foundationIndex = this->ijToIndex(i,j); this->foundation[foundationIndex + k] = this->remoteConstants[windowIndex + k]; } } } }
void ompi_alloc_mem_f(MPI_Aint *size, MPI_Fint *info, char *baseptr, MPI_Fint *ierr) { int ierr_c; MPI_Info c_info = MPI_Info_f2c(*info); ierr_c = MPI_Alloc_mem(*size, c_info, baseptr); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(ierr_c); }
int main(int argc, char *argv[]) { int rank, nprocs, i, *A, *B; MPI_Comm CommDeuce; MPI_Win win; int errs = 0; MTest_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (nprocs < 2) { printf("Run this program with 2 or more processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Comm_split(MPI_COMM_WORLD, (rank < 2), rank, &CommDeuce); if (rank < 2) { i = MPI_Alloc_mem(SIZE * sizeof(int), MPI_INFO_NULL, &A); if (i) { printf("Can't allocate memory in test program\n"); MPI_Abort(MPI_COMM_WORLD, 1); } i = MPI_Alloc_mem(SIZE * sizeof(int), MPI_INFO_NULL, &B); if (i) { printf("Can't allocate memory in test program\n"); MPI_Abort(MPI_COMM_WORLD, 1); } if (rank == 0) { for (i = 0; i < SIZE; i++) B[i] = 500 + i; MPI_Win_create(B, SIZE * sizeof(int), sizeof(int), MPI_INFO_NULL, CommDeuce, &win); MPI_Win_fence(0, win); for (i = 0; i < SIZE; i++) { A[i] = i + 100; MPI_Get(&A[i], 1, MPI_INT, 1, i, 1, MPI_INT, win); } MPI_Win_fence(0, win); for (i = 0; i < SIZE; i++) if (A[i] != 1000 + i) { SQUELCH(printf("Rank 0: A[%d] is %d, should be %d\n", i, A[i], 1000 + i);); errs++; }
int main(int argc, char** argv) { MPI_Init(&argc, &argv); int my_rank; // Number of the node MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); int node_count; // Total number of nodes MPI_Comm_size(MPI_COMM_WORLD, &node_count); // The root must load the input data to distribute to the other nodes if(my_rank == 0) { // In our case it generates a random array as input data srand(time(NULL)); for(int item = 0; item < items; ++item) array[item] = rand(); } int items_per_rank = items / node_count; int remainder_items = items % node_count; int* my_work; MPI_Alloc_mem(items_per_rank * sizeof(int), MPI_INFO_NULL, &my_work); // MPI_Scatter is a collective operation which distributes an equal-sized part of the given array to each node. MPI_Scatter(&array[remainder_items] /* send buffer */, items_per_rank /* send count per node */, MPI_INT /* send type */, my_work /* receive buffer on each node */, items_per_rank /* receive count */ , MPI_INT /* receive type */, 0 /* send buffer is stored on this rank */, MPI_COMM_WORLD /* communication channel */); // This is the actual working-loop long sub_sum = 0; for(int i=0; i < items_per_rank; i++) sub_sum += my_work[i]; if(my_rank == 0) { // Scatter cannot deal with a division remainder so we manually deal with it while(remainder_items > 0) sub_sum += array[--remainder_items]; } MPI_Free_mem(my_work); // MPI_Reduce with op-code MPI_SUM is a collective operation which sums up the input sub_sum of each node // into single a resulting output sum on the master. MPI_Reduce(&sub_sum /* input to sum up */, &sum /* output */, 1 /* input count */, MPI_LONG /* input type */, MPI_SUM /* operation */, 0 /* output is stored on this rank */, MPI_COMM_WORLD /* communication channel */); if(my_rank == 0) { // The result of the computation now is available on rank 0. // We compare it with the sequential reference implementation to test our parallel implementation. if(sum == sum__sequential_reference_implementation()) fprintf(stderr, "Test OK.\n"); else fprintf(stderr, "Test FAILED!\n"); } MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); return EXIT_SUCCESS; }
void SweptDiscretization2D::allGatherAllOutputToFile(string filename) { void *buffer = NULL; FILE *output; if(pg.rank == 0) { MPI_Alloc_mem(foundationSize * pg.mpiSize * sizeof(double), MPI_INFO_NULL, &buffer); output = fopen(filename.c_str(),"wb"); } MPI_Win_fence((MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE), foundationWindow); if(pg.rank == 0) { for(int r=0;r<pg.mpiSize;r++) { MPI_Get((char*)buffer + (r * foundationSize * sizeof(double)), foundationSize * sizeof(double), MPI_BYTE, r, 0, foundationSize * sizeof(double), MPI_BYTE, foundationWindow); } } MPI_Win_fence(MPI_MODE_NOSUCCEED, foundationWindow); if(pg.rank == 0) { int w = (n * pg.xNodes); int h = (n * pg.yNodes); int resultArraySize = w * h; if(resultArray == NULL) resultArray = (double*) malloc(resultArraySize * sizeof(double) * outputLength); for(int r=0;r<pg.mpiSize;r++) { double *processing = (double*)buffer + (foundationSize * r); int jIndex = (r % (pg.xNodes*pg.yNodes)) / pg.xNodes; int iIndex = r % pg.xNodes; for(int j=1;j<n+1;j++) { for(int i=1;i<n+1;i++) { int iGlobal = n*iIndex + (i-1); int jGlobal = n*jIndex + (j-1); int index = this->ijToIndex(i,j); for(int point=0;point<outputLength;point++) { double val = processing[index + constants + point]; int resultIndex = (iGlobal + jGlobal * n * pg.xNodes) * outputLength + point; resultArray[resultIndex] = val; } } } } fwrite((const void*)resultArray,sizeof(double),resultArraySize,output); fclose(output); MPI_Free_mem(buffer); } MPI_Barrier(MPI_COMM_WORLD); }
static int _ZMPI_Alltoall_int_proclists_put(int alloc_mem, int nphases, int *sendbuf, int nsprocs, int *sprocs, int *recvbuf, int nrprocs, int *rprocs, MPI_Comm comm) { int i, p, size, rank, *rcounts_put; MPI_Win win; MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank); if (alloc_mem) MPI_Alloc_mem(size * sizeof(int), MPI_INFO_NULL, &rcounts_put); else rcounts_put = recvbuf; if (nrprocs >= 0) for (i = 0; i < nrprocs; ++i) rcounts_put[rprocs[i]] = DEFAULT_INT; else for (i = 0; i < size; ++i) rcounts_put[i] = DEFAULT_INT; MPI_Win_create(rcounts_put, size * sizeof(int), sizeof(int), MPI_INFO_NULL, comm, &win); MPI_Win_fence(MPI_MODE_NOSTORE|MPI_MODE_NOPRECEDE, win); for (p = 0; p < nphases; ++p) { /* printf("%d: phase = %d of %d\n", rank, p, nphases);*/ if (rank % nphases == p) { if (nsprocs >= 0) { for (i = 0; i < nsprocs; ++i) if (sendbuf[sprocs[i]] != DEFAULT_INT) MPI_Put(&sendbuf[sprocs[i]], 1, MPI_INT, sprocs[i], rank, 1, MPI_INT, win); } else { for (i = 0; i < size; ++i) if (sendbuf[i] != DEFAULT_INT) MPI_Put(&sendbuf[i], 1, MPI_INT, i, rank, 1, MPI_INT, win); } } if (p < nphases - 1) MPI_Win_fence(0, win); } MPI_Win_fence(MPI_MODE_NOPUT|MPI_MODE_NOSUCCEED, win); MPI_Win_free(&win); if (alloc_mem) { if (nrprocs >= 0) for (i = 0; i < nrprocs; ++i) recvbuf[rprocs[i]] = rcounts_put[rprocs[i]]; else for (i = 0; i < size; ++i) recvbuf[i] = rcounts_put[i]; MPI_Free_mem(rcounts_put); } return MPI_SUCCESS; }
void* xMPI_Alloc_mem(size_t nbytes) { void* p; MPI_Alloc_mem(nbytes, MPI_INFO_NULL, &p); if (nbytes != 0 && !p) { fprintf(stderr, "MPI_Alloc_mem failed for size %zu\n", nbytes); throw "OutOfMemoryExpception"; } return p; }
void mpiofstream::flush() { MPI_Status status; char* buf; MPI_Alloc_mem(ss.str().length()+1, MPI_INFO_NULL, &buf); strcpy(buf, ss.str().c_str()); MPI_File_write_shared(fh, buf, ss.str().length(), MPI_CHAR, &status); MPI_Free_mem(buf); ss.str(""); }
MPIMutex::MPIMutex(MPI_Comm _comm) { int nproc, rank; id = details::mutex_count++; comm = _comm; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &nproc); MPI_Alloc_mem(nproc, MPI_INFO_NULL, &lock_vector); bzero(lock_vector, nproc); MPI_Win_create(lock_vector, nproc, sizeof(byte), MPI_INFO_NULL, comm, &win); };
void* xMPI_Alloc_mem(size_t nbytes) { void* p; MPI_Alloc_mem(nbytes, MPI_INFO_NULL, &p); if (nbytes != 0 && !p) { fprintf(stderr, "MPI_Alloc_mem failed for size %zu\n", nbytes); abort(); } // if (rank == 0) fprintf(stderr, "MEMORY: alloc %zu %p mpi\n", nbytes, p); return p; }
/** Allocate a local buffer suitable for use in one-sided communication * * @param[in] size Number of bytes to allocate * @return Pointer to the local buffer */ void *PARMCI_Malloc_local(armci_size_t size) { void *buf; MPI_Alloc_mem((MPI_Aint) size, MPI_INFO_NULL, &buf); if (ARMCII_GLOBAL_STATE.debug_alloc) { ARMCII_Bzero(buf, size); } return buf; }
/** * Wrappers for MPI_Alloc_mem for computers which may not have them (MPI-1 computers). */ static void socket_allocateMem(socket_t * s, long long int size, const char *chName) { #ifndef MC_NO_MPI_ALLOC_MEM MPI_Alloc_mem(size, MPI_INFO_NULL, &(s->buffer)); if(!s->buffer) error("%s(%s): cannot 'MPI_Alloc_mem' %.2f Kb.", __func__, chName, size / 1024.0); #else s->buffer = malloc(size); if(!s->buffer) error("%s(%s): cannot 'malloc' %.2f Kb.", __func__, chName, size / 1024.0); msg_fsend("%s(%s): fallback (malloc) is used to allocate %.2f Kb.", __func__, chName, size / 1024.0); #endif }
void test_put(void) { int me, nproc; MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Comm_rank(MPI_COMM_WORLD, &me); MPI_Win dst_win; double *dst_buf; double src_buf[MAXELEMS]; int i, j; MPI_Alloc_mem(sizeof(double) * nproc * MAXELEMS, MPI_INFO_NULL, &dst_buf); MPI_Win_create(dst_buf, sizeof(double) * nproc * MAXELEMS, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &dst_win); for (i = 0; i < MAXELEMS; i++) src_buf[i] = me + 1.0; MPI_Win_lock(MPI_LOCK_EXCLUSIVE, me, 0, dst_win); for (i = 0; i < nproc * MAXELEMS; i++) dst_buf[i] = 0.0; MPI_Win_unlock(me, dst_win); MPI_Barrier(MPI_COMM_WORLD); for (i = 0; i < nproc; i++) { int target = i; for (j = 0; j < COUNT; j++) { if (verbose) printf("%2d -> %2d [%2d]\n", me, target, j); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, target, 0, dst_win); MPI_Put(&src_buf[j], sizeof(double), MPI_BYTE, target, (me * MAXELEMS + j) * sizeof(double), sizeof(double), MPI_BYTE, dst_win); MPI_Win_unlock(target, dst_win); } for (j = 0; j < COUNT; j++) { if (verbose) printf("%2d <- %2d [%2d]\n", me, target, j); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, target, 0, dst_win); MPI_Get(&src_buf[j], sizeof(double), MPI_BYTE, target, (me * MAXELEMS + j) * sizeof(double), sizeof(double), MPI_BYTE, dst_win); MPI_Win_unlock(target, dst_win); } } MPI_Barrier(MPI_COMM_WORLD); MPI_Win_free(&dst_win); MPI_Free_mem(dst_buf); }
/** One-sided put operation. * * @param[in] src Source address (remote) * @param[in] dst Destination address (local) * @param[in] size Number of bytes to transfer * @param[in] target Process id to target * @return 0 on success, non-zero on failure */ int ARMCI_Put(void *src, void *dst, int size, int target) { gmr_t *src_mreg, *dst_mreg; src_mreg = gmr_lookup(src, ARMCI_GROUP_WORLD.rank); dst_mreg = gmr_lookup(dst, target); ARMCII_Assert_msg(dst_mreg != NULL, "Invalid remote pointer"); /* Local operation */ if (target == ARMCI_GROUP_WORLD.rank) { if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) { gmr_dla_lock(dst_mreg); if (src_mreg) gmr_dla_lock(src_mreg); } ARMCI_Copy(src, dst, size); if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) { gmr_dla_unlock(dst_mreg); if (src_mreg) gmr_dla_unlock(src_mreg); } } /* Origin buffer is private */ else if (src_mreg == NULL || ARMCII_GLOBAL_STATE.shr_buf_method == ARMCII_SHR_BUF_NOGUARD) { gmr_lock(dst_mreg, target); gmr_put(dst_mreg, src, dst, size, target); gmr_unlock(dst_mreg, target); } /* COPY: Either origin and target buffers are in the same window and we can't * lock the same window twice (MPI semantics) or the user has requested * always-copy mode. */ else { void *src_buf; MPI_Alloc_mem(size, MPI_INFO_NULL, &src_buf); ARMCII_Assert(src_buf != NULL); gmr_dla_lock(src_mreg); ARMCI_Copy(src, src_buf, size); gmr_dla_unlock(src_mreg); gmr_lock(dst_mreg, target); gmr_put(dst_mreg, src_buf, dst, size, target); gmr_unlock(dst_mreg, target); MPI_Free_mem(src_buf); } return 0; }
int main( int argc, char *argv[] ) { int errs = 0; int rank, size, i; MPI_Comm comm; MPI_Win win; int *winbuf, count; MTest_Init( &argc, &argv ); comm = MPI_COMM_WORLD; MPI_Comm_rank( comm, &rank ); MPI_Comm_size( comm, &size ); /* Allocate and initialize buf */ count = 1000; MPI_Alloc_mem( count*sizeof(int), MPI_INFO_NULL, &winbuf ); MPI_Win_create( winbuf, count * sizeof(int), sizeof(int), MPI_INFO_NULL, comm, &win ); /* Clear winbuf */ memset( winbuf, 0, count*sizeof(int) ); /* Note that for i == rank, this is a useful operation - it allows the programmer to use direct loads and stores, rather than put/get/accumulate, to access the local memory window. */ for (i=0; i<size; i++) { MPI_Win_lock( MPI_LOCK_EXCLUSIVE, i, 0, win ); MPI_Win_unlock( i, win ); } for (i=0; i<size; i++) { MPI_Win_lock( MPI_LOCK_SHARED, i, 0, win ); MPI_Win_unlock( i, win ); } MPI_Win_free( &win ); MPI_Free_mem( winbuf ); /* If this test completes, no error has been found */ /* A more complete test may ensure that local locks in fact block remote, exclusive locks */ MTest_Finalize( errs ); MPI_Finalize(); return 0; }
MTEST_THREAD_RETURN_TYPE run_test(void *arg) { int i; double *local_b; MPI_Alloc_mem(COUNT * sizeof(double), MPI_INFO_NULL, &local_b); for (i = 0; i < LOOPS; i++) { MPI_Get(local_b, COUNT, MPI_DOUBLE, 0, 0, COUNT, MPI_DOUBLE, win); MPI_Win_flush_all(win); } MPI_Free_mem(local_b); return (MTEST_THREAD_RETURN_TYPE) NULL; }
/** Create a group of ARMCI mutexes. Collective onthe ARMCI group. * * @param[in] count Number of mutexes on the local process. * @param[in] pgroup ARMCI group on which to create mutexes * @return Handle to the mutex group. */ armcix_mutex_hdl_t ARMCIX_Create_mutexes_hdl(int my_count, ARMCI_Group *pgroup) { int rank, nproc, max_count, i; armcix_mutex_hdl_t hdl; hdl = malloc(sizeof(struct armcix_mutex_hdl_s)); ARMCII_Assert(hdl != NULL); ARMCIX_Group_dup(pgroup, &hdl->grp); MPI_Comm_rank(hdl->grp.comm, &rank); MPI_Comm_size(hdl->grp.comm, &nproc); hdl->my_count = my_count; /* Find the max. count to determine how many windows we need. */ MPI_Allreduce(&my_count, &max_count, 1, MPI_INT, MPI_MAX, hdl->grp.comm); ARMCII_Assert_msg(max_count > 0, "Invalid number of mutexes"); hdl->max_count = max_count; hdl->windows = malloc(sizeof(MPI_Win)*max_count); if (my_count > 0) { hdl->bases = malloc(sizeof(uint8_t*)*my_count); } else { hdl->bases = NULL; } /* We need multiple windows here: one for each mutex. Otherwise performance will suffer due to exclusive access epochs. */ for (i = 0; i < max_count; i++) { int size = 0; void *base = NULL; if (i < my_count) { MPI_Alloc_mem(nproc, MPI_INFO_NULL, &hdl->bases[i]); ARMCII_Assert(hdl->bases[i] != NULL); ARMCII_Bzero(hdl->bases[i], nproc); base = hdl->bases[i]; size = nproc; } MPI_Win_create(base, size, sizeof(uint8_t), MPI_INFO_NULL, hdl->grp.comm, &hdl->windows[i]); } return hdl; }
void heartbeat_master() { long *timestamps; MPI_Alloc_mem(sizeof(long) * (comm_size - 1), MPI_INFO_NULL, ×tamps); for (int i = 0; i < comm_size - 1; i++) { timestamps[i] = 0; } MPI_Win win; MPI_Win_create(timestamps, sizeof(long) * (comm_size - 1), sizeof(long), MPI_INFO_NULL, MPI_COMM_WORLD, &win); int workers_alive; while (1) { sleep(1); time_t current_time; time(¤t_time); for (int k = 0; k < 100; k++) { int flag; // ... schaut einfach nach, ob nachricht im eingangspuffer liegt // bringt uns eigentlich nichts, MPI intern MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE); } workers_alive = 0; // koennte klappen, dann ohne Iprobe // MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 0, 0, win); for (int i = 0; i < comm_size - 1; i++) { if (current_time - timestamps[i] <= 3) { workers_alive++; } else { printf("Worker %d dead.\n", i + 1); } } if (workers_alive <= 0) { break; } // koennte klappen, dann ohne Iprobe // MPI_Win_unlock(0, win); } MPI_Win_free(&win); }
/** One-sided get operation. * * @param[in] src Source address (remote) * @param[in] dst Destination address (local) * @param[in] size Number of bytes to transfer * @param[in] target Process id to target * @return 0 on success, non-zero on failure */ int PARMCI_Get(void *src, void *dst, int size, int target) { gmr_t *src_mreg, *dst_mreg; src_mreg = gmr_lookup(src, target); /* If NOGUARD is set, assume the buffer is not shared */ if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) dst_mreg = gmr_lookup(dst, ARMCI_GROUP_WORLD.rank); else dst_mreg = NULL; ARMCII_Assert_msg(src_mreg != NULL, "Invalid remote pointer"); /* Local operation */ if (target == ARMCI_GROUP_WORLD.rank && dst_mreg == NULL) { ARMCI_Copy(src, dst, size); } /* Origin buffer is private */ else if (dst_mreg == NULL) { gmr_get(src_mreg, src, dst, size, target); gmr_flush(src_mreg, target, 0); /* it's a round trip so w.r.t. flush, local=remote */ } /* COPY: Either origin and target buffers are in the same window and we can't * lock the same window twice (MPI semantics) or the user has requested * always-copy mode. */ else { void *dst_buf; MPI_Alloc_mem(size, MPI_INFO_NULL, &dst_buf); ARMCII_Assert(dst_buf != NULL); gmr_get(src_mreg, src, dst_buf, size, target); gmr_flush(src_mreg, target, 0); /* it's a round trip so w.r.t. flush, local=remote */ ARMCI_Copy(dst_buf, dst, size); MPI_Free_mem(dst_buf); } return 0; }
/* Allocate a new shared linked list element */ MPI_Aint alloc_elem(int value, MPI_Win win) { MPI_Aint disp; llist_elem_t *elem_ptr; /* Allocate the new element and register it with the window */ MPI_Alloc_mem(sizeof(llist_elem_t), MPI_INFO_NULL, &elem_ptr); elem_ptr->value = value; elem_ptr->next = nil; MPI_Win_attach(win, elem_ptr, sizeof(llist_elem_t)); /* Add the element to the list of local elements so we can free it later. */ if (my_elems_size == my_elems_count) { my_elems_size += 100; my_elems = realloc(my_elems, my_elems_size*sizeof(void*)); } my_elems[my_elems_count] = elem_ptr; my_elems_count++; MPI_Get_address(elem_ptr, &disp); return disp; }
long * allocate_memory (int me, MPI_Win * win) { long * msg_buffer; long * win_base ; /* base */ MPI_Info info; MPI_Info_create(&info); MPI_Info_set(info, "same_size", "true"); MPI_Alloc_mem((MAX_MSG_SZ * ITERS_LARGE) * sizeof(long), MPI_INFO_NULL, &msg_buffer); MPI_Win_allocate((MAX_MSG_SZ * ITERS_LARGE) * sizeof(long), sizeof(long), info, MPI_COMM_WORLD, &win_base, win); MPI_Win_lock_all (MPI_MODE_NOCHECK, *win); MPI_Info_free(&info); if (NULL == msg_buffer && MPI_BOTTOM == win_base) { fprintf(stderr, "Failed to allocate window (pe: %d)\n", me); exit(EXIT_FAILURE); } return msg_buffer; }
/** Prepare a set of buffers for use with a put operation. The returned set of * buffers is guaranteed to be in private space. Copies will be made if needed, * the result should be completed by finish. * * @param[in] orig_bufs Original set of buffers. * @param[out] new_bufs Pointer to the set of private buffers. * @param[in] count Number of entries in the buffer list. * @param[in] size The size of the buffers (all are of the same size). * @return Number of buffers that were moved. */ int ARMCII_Buf_prepare_read_vec(void **orig_bufs, void ***new_bufs_ptr, int count, int size) { int num_moved = 0; if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) { void **new_bufs = malloc(count*sizeof(void*)); int i; for (i = 0; i < count; i++) new_bufs[i] = NULL; for (i = 0; i < count; i++) { // Check if the source buffer is within a shared region. If so, copy it // into a private buffer. gmr_t *mreg = gmr_lookup(orig_bufs[i], ARMCI_GROUP_WORLD.rank); if (mreg != NULL) { MPI_Alloc_mem(size, MPI_INFO_NULL, &new_bufs[i]); ARMCII_Assert(new_bufs[i] != NULL); gmr_dla_lock(mreg); ARMCI_Copy(orig_bufs[i], new_bufs[i], size); // gmr_get(mreg, orig_bufs[i], new_bufs[i], size, ARMCI_GROUP_WORLD.rank); gmr_dla_unlock(mreg); num_moved++; } else { new_bufs[i] = orig_bufs[i]; } } *new_bufs_ptr = new_bufs; } else { *new_bufs_ptr = orig_bufs; } return num_moved; }
int main(int argc, char *argv[]) { int rank, nproc; int i; MPI_Win win; int *tar_buf = NULL; int *orig_buf = NULL; MPI_Datatype derived_dtp; int errors = 0; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (nproc < 3) { fprintf(stderr, "Run this program with at least 3 processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Alloc_mem(sizeof(int) * DATA_SIZE, MPI_INFO_NULL, &orig_buf); MPI_Alloc_mem(sizeof(int) * DATA_SIZE, MPI_INFO_NULL, &tar_buf); for (i = 0; i < DATA_SIZE; i++) { orig_buf[i] = 1; tar_buf[i] = 0; } MPI_Type_vector(COUNT, BLOCKLENGTH - 1, STRIDE, MPI_INT, &derived_dtp); MPI_Type_commit(&derived_dtp); MPI_Win_create(tar_buf, sizeof(int) * DATA_SIZE, sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win); /***** test between rank 0 and rank 1 *****/ if (rank == 1) { MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win); for (i = 0; i < OPS_NUM; i++) { MPI_Accumulate(orig_buf, 1, derived_dtp, 0, 0, DATA_SIZE - COUNT, MPI_INT, MPI_SUM, win); MPI_Win_flush_local(0, win); } MPI_Win_unlock(0, win); } MPI_Barrier(MPI_COMM_WORLD); /* check results */ if (rank == 0) { for (i = 0; i < DATA_SIZE - COUNT; i++) { if (tar_buf[i] != OPS_NUM) { printf("tar_buf[%d] = %d, expected %d\n", i, tar_buf[i], OPS_NUM); errors++; } } } for (i = 0; i < DATA_SIZE; i++) { tar_buf[i] = 0; } MPI_Barrier(MPI_COMM_WORLD); /***** test between rank 0 and rank 2 *****/ if (rank == 2) { MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win); for (i = 0; i < OPS_NUM; i++) { MPI_Accumulate(orig_buf, 1, derived_dtp, 0, 0, DATA_SIZE - COUNT, MPI_INT, MPI_SUM, win); MPI_Win_flush_local(0, win); } MPI_Win_unlock(0, win); } MPI_Barrier(MPI_COMM_WORLD); /* check results */ if (rank == 0) { for (i = 0; i < DATA_SIZE - COUNT; i++) { if (tar_buf[i] != OPS_NUM) { printf("tar_buf[%d] = %d, expected %d\n", i, tar_buf[i], OPS_NUM); errors++; } } if (errors == 0) printf(" No Errors\n"); } MPI_Win_free(&win); MPI_Type_free(&derived_dtp); MPI_Free_mem(orig_buf); MPI_Free_mem(tar_buf); MPI_Finalize(); return 0; }
main(int argc, char *argv[]) { int i, j; int ch; extern char *optarg; int edge; int size; int nloop=5; double **ptr_loc; MP_INIT(arc,argv); MP_PROCS(&nproc); MP_MYID(&me); while ((ch = getopt(argc, argv, "n:b:p:h")) != -1) { switch(ch) { case 'n': n = atoi(optarg); break; case 'b': block_size = atoi(optarg); break; case 'p': nproc = atoi(optarg); break; case 'h': { printf("Usage: LU, or \n"); printf(" LU -nMATRIXSIZE -bBLOCKSIZE -pNPROC\n"); MP_BARRIER(); MP_FINALIZE(); exit(0); } } } if(me == 0) { printf("\n Blocked Dense LU Factorization\n"); printf(" %d by %d Matrix\n", n, n); printf(" %d Processors\n", nproc); printf(" %d by %d Element Blocks\n", block_size, block_size); printf("\n"); } num_rows = (int) sqrt((double) nproc); for (;;) { num_cols = nproc/num_rows; if (num_rows*num_cols == nproc) break; num_rows--; } nblocks = n/block_size; if (block_size * nblocks != n) { nblocks++; } edge = n%block_size; if (edge == 0) { edge = block_size; } #ifdef DEBUG if(me == 0) for (i=0; i<nblocks; i++) { for (j=0; j<nblocks; j++) printf("%d ", block_owner(i, j)); printf("\n"); } MP_BARRIER(); MP_FINALIZE(); exit(0); #endif for (i=0; i<nblocks; i++) { for (j=0; j<nblocks; j++) { if(block_owner(i,j) == me) { if ((i == nblocks-1) && (j == nblocks-1)) { size = edge*edge; } else if ((i == nblocks-1) || (j == nblocks-1)) { size = edge*block_size; } else { size = block_size*block_size; } proc_bytes += size*sizeof(double); } } } ptr = (void **)malloc(nproc * sizeof(void *)); #ifdef MPI2_ONESIDED MPI_Alloc_mem(proc_bytes, MPI_INFO_NULL, &ptr[me]); MPI_Win_create((void*)ptr[me], proc_bytes, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); for(i=0; i<nproc; i++) ptr[i] = (double *)ptr[me]; MPI_Barrier(MPI_COMM_WORLD); #else /* initialize ARMCI */ ARMCI_Init(); ARMCI_Malloc(ptr, proc_bytes); #endif a = (double **)malloc(nblocks*nblocks*sizeof(double *)); if (a == NULL) { fprintf(stderr, "Could not malloc memory for a\n"); exit(-1); } ptr_loc = (double **)malloc(nproc*sizeof(double *)); for(i=0; i<nproc; i++) ptr_loc[i] = (double *)ptr[i]; for(i=0; i<nblocks; i ++) { for(j=0; j<nblocks; j++) { a[i+j*nblocks] = ptr_loc[block_owner(i, j)]; if ((i == nblocks-1) && (j == nblocks-1)) { size = edge*edge; } else if ((i == nblocks-1) || (j == nblocks-1)) { size = edge*block_size; } else { size = block_size*block_size; } ptr_loc[block_owner(i, j)] += size; } } /* initialize the array */ init_array(); /* barrier to ensure all initialization is done */ MP_BARRIER(); /* to remove cold-start misses, all processors touch their own data */ touch_array(block_size, me); MP_BARRIER(); if(doprint) { if(me == 0) { printf("Matrix before LU decomposition\n"); print_array(me); } MP_BARRIER(); } lu(n, block_size, me); /* cold start */ /* Starting the timer */ MP_BARRIER(); if(me == 0) start_timer(); for(i=0; i<nloop; i++) lu(n, block_size, me); MP_BARRIER(); /* Timer Stops here */ if(me == 0) printf("\nRunning time = %lf milliseconds.\n\n", elapsed_time()/nloop); printf("%d: (ngets=%d) Communication (get) time = %e milliseconds\n", me, get_cntr, comm_time*1000/nloop); if(doprint) { if(me == 0) { printf("after LU\n"); print_array(me); } MP_BARRIER(); } /* done */ #ifdef MPI2_ONESIDED MPI_Win_free(&win); MPI_Free_mem(ptr[me]); #else ARMCI_Free(ptr[me]); ARMCI_Finalize(); #endif MP_FINALIZE(); }
int main(int argc, char *argv[]) { MPI_Win win; int errors = 0; int rank, nproc, i; double *orig_buf; double *tar_buf; MPI_Datatype vector_dtp; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Alloc_mem(sizeof(double) * DATA_SIZE, MPI_INFO_NULL, &orig_buf); MPI_Alloc_mem(sizeof(double) * DATA_SIZE, MPI_INFO_NULL, &tar_buf); for (i = 0; i < DATA_SIZE; i++) { orig_buf[i] = 1.0; tar_buf[i] = 0.5; } MPI_Type_vector(5 /* count */ , 3 /* blocklength */ , 5 /* stride */ , MPI_DOUBLE, &vector_dtp); MPI_Type_commit(&vector_dtp); MPI_Win_create(tar_buf, sizeof(double) * DATA_SIZE, sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win); if (rank == 0) { MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win); MPI_Accumulate(orig_buf, 1, vector_dtp, 1, 0, 1, vector_dtp, MPI_SUM, win); MPI_Win_unlock(1, win); } MPI_Win_fence(0, win); if (rank == 1) { for (i = 0; i < DATA_SIZE; i++) { if (i % 5 < 3) { if (tar_buf[i] != 1.5) { printf("tar_buf[i] = %f (expected 1.5)\n", tar_buf[i]); errors++; } } else { if (tar_buf[i] != 0.5) { printf("tar_buf[i] = %f (expected 0.5)\n", tar_buf[i]); errors++; } } } } MPI_Type_free(&vector_dtp); MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win); MPI_Accumulate(orig_buf, DATA_SIZE, MPI_DOUBLE, 1, 0, DATA_SIZE, MPI_DOUBLE, MPI_SUM, win); MPI_Win_unlock(1, win); } MPI_Win_fence(0, win); if (rank == 1) { for (i = 0; i < DATA_SIZE; i++) { if (i % 5 < 3) { if (tar_buf[i] != 2.5) { printf("tar_buf[i] = %f (expected 2.5)\n", tar_buf[i]); errors++; } } else { if (tar_buf[i] != 1.5) { printf("tar_buf[i] = %f (expected 1.5)\n", tar_buf[i]); errors++; } } } } MPI_Win_free(&win); MPI_Free_mem(orig_buf); MPI_Free_mem(tar_buf); if (rank == 1) { if (errors == 0) printf(" No Errors\n"); } MPI_Finalize(); return 0; }