int main(int argc, char* argv[]) { MPI_Init(&argc,&argv); MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &MPI_COMM_NODE); int n = (argc>1) ? atoi(argv[1]) : 1000; int wrank, wsize; MPI_Comm_rank(MPI_COMM_WORLD, &wrank); MPI_Comm_size(MPI_COMM_WORLD, &wsize); int nrank, nsize; MPI_Comm_rank(MPI_COMM_WORLD, &nrank); MPI_Comm_size(MPI_COMM_WORLD, &nsize); char * buf1 = NULL; char * buf2 = NULL; MPI_Alloc_mem(n, MPI_INFO_NULL, &buf1); MPI_Alloc_mem(n, MPI_INFO_NULL, &buf2); memset(buf1, nrank==0 ? 'Z' : 'A', n); memset(buf2, nrank==0 ? 'Z' : 'A', n); double t0, t1, dt; for (int r=0; r<20; r++) { MPI_Barrier(MPI_COMM_WORLD); t0 = MPI_Wtime(); MPI_Bcast(buf1, n, MPI_CHAR, 0, MPI_COMM_NODE); t1 = MPI_Wtime(); dt = t1-t0; printf("%d: MPI_Bcast: %lf seconds, %lf MB/s \n", wrank, dt, n*(1.e-6/dt)); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); t0 = MPI_Wtime(); SMP_Bcast(buf2, n, MPI_CHAR, 0, MPI_COMM_NODE); t1 = MPI_Wtime(); dt = t1-t0; printf("%d: SMP_Bcast: %lf seconds, %lf MB/s \n", wrank, dt, n*(1.e-6/dt)); fflush(stdout); if (r==0) { char * tmp = malloc(n); memset(tmp, 'Z', n); int err1 = memcmp(tmp, buf1, n); int err2 = memcmp(tmp, buf2, n); if (err1>0 || err2>0) { printf("%d: errors: MPI (%d), SMP (%d) \n", wrank, err1, err2); } } } MPI_Free_mem(buf1); MPI_Free_mem(buf2); MPI_Comm_free(&MPI_COMM_NODE); MPI_Finalize(); return 0; }
int main(int argc, char * argv[]) { const MPI_Count test_int_max = BigMPI_Get_max_int(); MPI_Init(&argc, &argv); int rank, size; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size<1) { printf("Use 1 or more processes. \n"); MPI_Finalize(); return 1; } int l = (argc > 1) ? atoi(argv[1]) : 2; int m = (argc > 2) ? atoi(argv[2]) : 17777; MPI_Count n = l * test_int_max + m; char * buf_send = NULL; char * buf_recv = NULL; MPI_Alloc_mem((MPI_Aint)n * size, MPI_INFO_NULL, &buf_send); assert(buf_send!=NULL); MPI_Alloc_mem((MPI_Aint)n, MPI_INFO_NULL, &buf_recv); assert(buf_recv!=NULL); if (rank==0) { for (int i = 0; i < size; ++i) { for (MPI_Count j = 0; j < n; ++j) { buf_send[i*n+j] = (unsigned char)i; } } } memset(buf_recv, -1, (size_t)n); /* collective communication */ MPIX_Scatter_x(buf_send, n, MPI_CHAR, buf_recv, n, MPI_CHAR, 0 /* root */, MPI_COMM_WORLD); size_t errors = verify_buffer(buf_recv, n, rank); MPI_Free_mem(buf_send); MPI_Free_mem(buf_recv); if (rank==0 && errors==0) { printf("SUCCESS\n"); } MPI_Finalize(); return 0; }
int main (int argc, char *argv[]) { struct pe_vars v; long * msg_buffer; /* * Initialize */ init_mpi(&v); check_usage(argc, argv, v.npes, v.me); print_header(v.me); if (v.me == 0) printf("Total processes = %d\n",v.npes); /* * Allocate Memory */ msg_buffer = allocate_memory(v.me, &(v.win) ); memset(msg_buffer, 0, MAX_MSG_SZ * ITERS_LARGE * sizeof(long)); /* * Time Put Message Rate */ benchmark(msg_buffer, v.me, v.pairs, v.nxtpe, v.win); /* * Finalize */ MPI_Win_unlock_all(v.win); MPI_Win_free(&v.win); MPI_Free_mem(msg_buffer); MPI_Finalize(); return EXIT_SUCCESS; }
int main (int argc,char *argv[]) { int i; double w[NEL]; MPI_Aint win_size,warr_size; MPI_Win *win; win_size=sizeof(MPI_Win); warr_size=sizeof(MPI_DOUBLE)*NEL; MPI_Init (&argc, &argv); for(i=0;i<NTIMES;i++) { MPI_Alloc_mem(win_size,MPI_INFO_NULL,&win); MPI_Win_create(w,warr_size,sizeof(double),MPI_INFO_NULL,MPI_COMM_WORLD,win); MPI_Win_free(win); MPI_Free_mem(win); } MPI_Finalize(); return 0; }
void IMB_del_r_buf(struct comm_info* c_info ) /* Deletes recv buffer component of c_info In/out variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information */ { /* July 2002 V2.2.1 change: use MPI_Free_mem */ if ( c_info->r_alloc> 0) { #if (defined EXT || defined MPIIO || defined RMA) MPI_Free_mem( c_info->r_buffer ); #else IMB_v_free( (void**)&c_info->r_buffer ); #endif c_info-> r_alloc = 0; c_info->r_buffer = NULL; } }
/* Free the storage associated with a window object */ void MTestFreeWin(MPI_Win * win) { void *addr; int flag, merr; merr = MPI_Win_get_attr(*win, MPI_WIN_BASE, &addr, &flag); if (merr) MTestPrintError(merr); if (!flag) { MTestError("Could not get WIN_BASE from window"); } if (addr) { void *val; merr = MPI_Win_get_attr(*win, mem_keyval, &val, &flag); if (merr) MTestPrintError(merr); if (flag) { if (val == (void *) 1) { free(addr); } else if (val == (void *) 2) { merr = MPI_Free_mem(addr); if (merr) MTestPrintError(merr); } /* if val == (void *)0, then static data that must not be freed */ } } merr = MPI_Win_free(win); if (merr) MTestPrintError(merr); }
int main(int argc, char *argv[]) { int errs = 0, err; int j, count; char *ap; MTest_Init(&argc, &argv); MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN); for (count = 1; count < 128000; count *= 2) { err = MPI_Alloc_mem(count, MPI_INFO_NULL, &ap); if (err) { int errclass; /* An error of MPI_ERR_NO_MEM is allowed */ MPI_Error_class(err, &errclass); if (errclass != MPI_ERR_NO_MEM) { errs++; MTestPrintError(err); } } else { /* Access all of this memory */ for (j = 0; j < count; j++) { ap[j] = (char) (j & 0x7f); } MPI_Free_mem(ap); } } MTest_Finalize(errs); return MTestReturnValue(errs); }
void SweptDiscretization2D::updateRemoteConstants(unsigned char *buffer) { void *sendingBuffer = NULL; FILE *inFile = NULL; if(pg.rank == 0) { int bufferSize = this->remoteConstantsCount * n * n * pg.mpiSize * sizeof(double); MPI_Alloc_mem(bufferSize, MPI_INFO_NULL, &sendingBuffer); for(int r=0;r<pg.mpiSize;r++) { double *processing = (double*)sendingBuffer + (this->remoteConstantsCount * n * n * r); int jIndex = (r % (pg.xNodes*pg.yNodes)) / pg.xNodes; int iIndex = r % pg.xNodes; for(int j=0;j<n;j++) { for(int i=0;i<n;i++) { int iGlobal = n*iIndex + (i); int jGlobal = n*jIndex + (j); int index = this->ijToConstantIndex(i,j); int globalIndex = this->remoteConstantsCount * (iGlobal + jGlobal * n * pg.xNodes); for(int k=0;k<this->remoteConstantsCount;k++) { processing[index + k] = ((double*)buffer)[k + globalIndex]; } } } } } MPI_Win_fence(MPI_MODE_NOPRECEDE, this->constantsWindow); if(pg.rank == 0) { for(int r=0;r<pg.mpiSize;r++) { MPI_Put((unsigned char*)sendingBuffer + (r * remoteConstantsCount * n * n * sizeof(double)), remoteConstantsCount * n * n * sizeof(double), MPI_BYTE, r, 0, remoteConstantsCount * n * n * sizeof(double), MPI_BYTE, constantsWindow); } } MPI_Win_fence((MPI_MODE_NOSTORE | MPI_MODE_NOSUCCEED), this->constantsWindow); if(pg.rank == 0) { MPI_Free_mem(sendingBuffer); } for(int i=1;i<n+1;i++) { for(int j=1;j<n+1;j++) { for(int k=0;k<this->remoteConstantsCount;k++) { int windowIndex = this->ijToConstantIndex(i-1,j-1); int foundationIndex = this->ijToIndex(i,j); this->foundation[foundationIndex + k] = this->remoteConstants[windowIndex + k]; } } } }
int MPI_Free_mem(void* baseptr) { if (max_ep > 0) { EPLIB_free(baseptr); return MPI_SUCCESS; } return MPI_Free_mem(baseptr); }
int main(int argc, char** argv) { MPI_Init(&argc, &argv); int my_rank; // Number of the node MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); int node_count; // Total number of nodes MPI_Comm_size(MPI_COMM_WORLD, &node_count); // The root must load the input data to distribute to the other nodes if(my_rank == 0) { // In our case it generates a random array as input data srand(time(NULL)); for(int item = 0; item < items; ++item) array[item] = rand(); } int items_per_rank = items / node_count; int remainder_items = items % node_count; int* my_work; MPI_Alloc_mem(items_per_rank * sizeof(int), MPI_INFO_NULL, &my_work); // MPI_Scatter is a collective operation which distributes an equal-sized part of the given array to each node. MPI_Scatter(&array[remainder_items] /* send buffer */, items_per_rank /* send count per node */, MPI_INT /* send type */, my_work /* receive buffer on each node */, items_per_rank /* receive count */ , MPI_INT /* receive type */, 0 /* send buffer is stored on this rank */, MPI_COMM_WORLD /* communication channel */); // This is the actual working-loop long sub_sum = 0; for(int i=0; i < items_per_rank; i++) sub_sum += my_work[i]; if(my_rank == 0) { // Scatter cannot deal with a division remainder so we manually deal with it while(remainder_items > 0) sub_sum += array[--remainder_items]; } MPI_Free_mem(my_work); // MPI_Reduce with op-code MPI_SUM is a collective operation which sums up the input sub_sum of each node // into single a resulting output sum on the master. MPI_Reduce(&sub_sum /* input to sum up */, &sum /* output */, 1 /* input count */, MPI_LONG /* input type */, MPI_SUM /* operation */, 0 /* output is stored on this rank */, MPI_COMM_WORLD /* communication channel */); if(my_rank == 0) { // The result of the computation now is available on rank 0. // We compare it with the sequential reference implementation to test our parallel implementation. if(sum == sum__sequential_reference_implementation()) fprintf(stderr, "Test OK.\n"); else fprintf(stderr, "Test FAILED!\n"); } MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); return EXIT_SUCCESS; }
void SweptDiscretization2D::allGatherAllOutputToFile(string filename) { void *buffer = NULL; FILE *output; if(pg.rank == 0) { MPI_Alloc_mem(foundationSize * pg.mpiSize * sizeof(double), MPI_INFO_NULL, &buffer); output = fopen(filename.c_str(),"wb"); } MPI_Win_fence((MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE), foundationWindow); if(pg.rank == 0) { for(int r=0;r<pg.mpiSize;r++) { MPI_Get((char*)buffer + (r * foundationSize * sizeof(double)), foundationSize * sizeof(double), MPI_BYTE, r, 0, foundationSize * sizeof(double), MPI_BYTE, foundationWindow); } } MPI_Win_fence(MPI_MODE_NOSUCCEED, foundationWindow); if(pg.rank == 0) { int w = (n * pg.xNodes); int h = (n * pg.yNodes); int resultArraySize = w * h; if(resultArray == NULL) resultArray = (double*) malloc(resultArraySize * sizeof(double) * outputLength); for(int r=0;r<pg.mpiSize;r++) { double *processing = (double*)buffer + (foundationSize * r); int jIndex = (r % (pg.xNodes*pg.yNodes)) / pg.xNodes; int iIndex = r % pg.xNodes; for(int j=1;j<n+1;j++) { for(int i=1;i<n+1;i++) { int iGlobal = n*iIndex + (i-1); int jGlobal = n*jIndex + (j-1); int index = this->ijToIndex(i,j); for(int point=0;point<outputLength;point++) { double val = processing[index + constants + point]; int resultIndex = (iGlobal + jGlobal * n * pg.xNodes) * outputLength + point; resultArray[resultIndex] = val; } } } } fwrite((const void*)resultArray,sizeof(double),resultArraySize,output); fclose(output); MPI_Free_mem(buffer); } MPI_Barrier(MPI_COMM_WORLD); }
static int _ZMPI_Alltoall_int_proclists_put(int alloc_mem, int nphases, int *sendbuf, int nsprocs, int *sprocs, int *recvbuf, int nrprocs, int *rprocs, MPI_Comm comm) { int i, p, size, rank, *rcounts_put; MPI_Win win; MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank); if (alloc_mem) MPI_Alloc_mem(size * sizeof(int), MPI_INFO_NULL, &rcounts_put); else rcounts_put = recvbuf; if (nrprocs >= 0) for (i = 0; i < nrprocs; ++i) rcounts_put[rprocs[i]] = DEFAULT_INT; else for (i = 0; i < size; ++i) rcounts_put[i] = DEFAULT_INT; MPI_Win_create(rcounts_put, size * sizeof(int), sizeof(int), MPI_INFO_NULL, comm, &win); MPI_Win_fence(MPI_MODE_NOSTORE|MPI_MODE_NOPRECEDE, win); for (p = 0; p < nphases; ++p) { /* printf("%d: phase = %d of %d\n", rank, p, nphases);*/ if (rank % nphases == p) { if (nsprocs >= 0) { for (i = 0; i < nsprocs; ++i) if (sendbuf[sprocs[i]] != DEFAULT_INT) MPI_Put(&sendbuf[sprocs[i]], 1, MPI_INT, sprocs[i], rank, 1, MPI_INT, win); } else { for (i = 0; i < size; ++i) if (sendbuf[i] != DEFAULT_INT) MPI_Put(&sendbuf[i], 1, MPI_INT, i, rank, 1, MPI_INT, win); } } if (p < nphases - 1) MPI_Win_fence(0, win); } MPI_Win_fence(MPI_MODE_NOPUT|MPI_MODE_NOSUCCEED, win); MPI_Win_free(&win); if (alloc_mem) { if (nrprocs >= 0) for (i = 0; i < nrprocs; ++i) recvbuf[rprocs[i]] = rcounts_put[rprocs[i]]; else for (i = 0; i < size; ++i) recvbuf[i] = rcounts_put[i]; MPI_Free_mem(rcounts_put); } return MPI_SUCCESS; }
void mpp_free (void *buf) { #if HAVE_MPI_ALLOC_MEM if (use_mpi_alloc) MPI_Free_mem (buf); else #endif free (buf); return; }
/** * Wrappers for MPI_Free_mem for computers which may not have them (MPI-1 computers). */ static void socket_freeMem(socket_t * s) { assert(s->buffer); #ifndef MC_NO_MPI_ALLOC_MEM MPI_Free_mem(s->buffer); #else free(s->buffer); #endif s->buffer = 0; }
void mpiofstream::flush() { MPI_Status status; char* buf; MPI_Alloc_mem(ss.str().length()+1, MPI_INFO_NULL, &buf); strcpy(buf, ss.str().c_str()); MPI_File_write_shared(fh, buf, ss.str().length(), MPI_CHAR, &status); MPI_Free_mem(buf); ss.str(""); }
/** One-sided accumulate operation. * * @param[in] datatype ARMCI data type for the accumulate operation (see armci.h) * @param[in] scale Pointer for a scalar of type datatype that will be used to * scale values in the source buffer * @param[in] src Source address (remote) * @param[in] dst Destination address (local) * @param[in] bytes Number of bytes to transfer * @param[in] proc Process id to target * @return 0 on success, non-zero on failure */ int PARMCI_Acc(int datatype, void *scale, void *src, void *dst, int bytes, int proc) { void *src_buf; int count, type_size, scaled; MPI_Datatype type; gmr_t *src_mreg, *dst_mreg; /* If NOGUARD is set, assume the buffer is not shared */ if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) src_mreg = gmr_lookup(src, ARMCI_GROUP_WORLD.rank); else src_mreg = NULL; dst_mreg = gmr_lookup(dst, proc); ARMCII_Assert_msg(dst_mreg != NULL, "Invalid remote pointer"); /* Prepare the input data: Apply scaling if needed and acquire the DLA lock if * needed. We hold the DLA lock if (src_buf == src && src_mreg != NULL). */ scaled = ARMCII_Buf_acc_is_scaled(datatype, scale); if (scaled) { MPI_Alloc_mem(bytes, MPI_INFO_NULL, &src_buf); ARMCII_Assert(src_buf != NULL); ARMCII_Buf_acc_scale(src, src_buf, bytes, datatype, scale); } else { src_buf = src; } /* Check if we need to copy: user requested it or same mem region */ if ( (src_buf == src) /* buf_prepare didn't make a copy */ && (ARMCII_GLOBAL_STATE.shr_buf_method == ARMCII_SHR_BUF_COPY || src_mreg == dst_mreg) ) { MPI_Alloc_mem(bytes, MPI_INFO_NULL, &src_buf); ARMCII_Assert(src_buf != NULL); ARMCI_Copy(src, src_buf, bytes); } ARMCII_Acc_type_translate(datatype, &type, &type_size); count = bytes/type_size; ARMCII_Assert_msg(bytes % type_size == 0, "Transfer size is not a multiple of the datatype size"); /* TODO: Support a local accumulate operation more efficiently */ gmr_accumulate(dst_mreg, src_buf, dst, count, type, proc); gmr_flush(dst_mreg, proc, 1); /* flush_local */ if (src_buf != src) MPI_Free_mem(src_buf); return 0; }
/** Finish a set of prepared buffers. Will perform communication and copies as * needed to ensure results are in the original buffers. Temporary space will be * freed. * * @param[in] orig_bufs Original set of buffers. * @param[out] new_bufs Set of private buffers. * @param[in] count Number of entries in the buffer list. * @param[in] size The size of the buffers (all are of the same size). */ void ARMCII_Buf_finish_acc_vec(void **orig_bufs, void **new_bufs, int count, int size) { int i; for (i = 0; i < count; i++) { if (orig_bufs[i] != new_bufs[i]) { MPI_Free_mem(new_bufs[i]); } } free(new_bufs); }
/** Destroy/free a mutex group. Collective. * * @param[in] hdl Group to destroy */ int ARMCIX_Destroy_mutexes_hdl(armcix_mutex_hdl_t hdl) { MPI_Win_free(&hdl->window); if (hdl->base) MPI_Free_mem(hdl->base); MPI_Comm_free(&hdl->comm); free(hdl); return 0; }
void test_put(void) { int me, nproc; MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Comm_rank(MPI_COMM_WORLD, &me); MPI_Win dst_win; double *dst_buf; double src_buf[MAXELEMS]; int i, j; MPI_Alloc_mem(sizeof(double) * nproc * MAXELEMS, MPI_INFO_NULL, &dst_buf); MPI_Win_create(dst_buf, sizeof(double) * nproc * MAXELEMS, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &dst_win); for (i = 0; i < MAXELEMS; i++) src_buf[i] = me + 1.0; MPI_Win_lock(MPI_LOCK_EXCLUSIVE, me, 0, dst_win); for (i = 0; i < nproc * MAXELEMS; i++) dst_buf[i] = 0.0; MPI_Win_unlock(me, dst_win); MPI_Barrier(MPI_COMM_WORLD); for (i = 0; i < nproc; i++) { int target = i; for (j = 0; j < COUNT; j++) { if (verbose) printf("%2d -> %2d [%2d]\n", me, target, j); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, target, 0, dst_win); MPI_Put(&src_buf[j], sizeof(double), MPI_BYTE, target, (me * MAXELEMS + j) * sizeof(double), sizeof(double), MPI_BYTE, dst_win); MPI_Win_unlock(target, dst_win); } for (j = 0; j < COUNT; j++) { if (verbose) printf("%2d <- %2d [%2d]\n", me, target, j); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, target, 0, dst_win); MPI_Get(&src_buf[j], sizeof(double), MPI_BYTE, target, (me * MAXELEMS + j) * sizeof(double), sizeof(double), MPI_BYTE, dst_win); MPI_Win_unlock(target, dst_win); } } MPI_Barrier(MPI_COMM_WORLD); MPI_Win_free(&dst_win); MPI_Free_mem(dst_buf); }
/** One-sided put operation. * * @param[in] src Source address (remote) * @param[in] dst Destination address (local) * @param[in] size Number of bytes to transfer * @param[in] target Process id to target * @return 0 on success, non-zero on failure */ int ARMCI_Put(void *src, void *dst, int size, int target) { gmr_t *src_mreg, *dst_mreg; src_mreg = gmr_lookup(src, ARMCI_GROUP_WORLD.rank); dst_mreg = gmr_lookup(dst, target); ARMCII_Assert_msg(dst_mreg != NULL, "Invalid remote pointer"); /* Local operation */ if (target == ARMCI_GROUP_WORLD.rank) { if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) { gmr_dla_lock(dst_mreg); if (src_mreg) gmr_dla_lock(src_mreg); } ARMCI_Copy(src, dst, size); if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) { gmr_dla_unlock(dst_mreg); if (src_mreg) gmr_dla_unlock(src_mreg); } } /* Origin buffer is private */ else if (src_mreg == NULL || ARMCII_GLOBAL_STATE.shr_buf_method == ARMCII_SHR_BUF_NOGUARD) { gmr_lock(dst_mreg, target); gmr_put(dst_mreg, src, dst, size, target); gmr_unlock(dst_mreg, target); } /* COPY: Either origin and target buffers are in the same window and we can't * lock the same window twice (MPI semantics) or the user has requested * always-copy mode. */ else { void *src_buf; MPI_Alloc_mem(size, MPI_INFO_NULL, &src_buf); ARMCII_Assert(src_buf != NULL); gmr_dla_lock(src_mreg); ARMCI_Copy(src, src_buf, size); gmr_dla_unlock(src_mreg); gmr_lock(dst_mreg, target); gmr_put(dst_mreg, src_buf, dst, size, target); gmr_unlock(dst_mreg, target); MPI_Free_mem(src_buf); } return 0; }
/** Finish a set of prepared buffers. Will perform communication and copies as * needed to ensure results are in the original buffers. Temporary space will be * freed. * * @param[in] orig_bufs Original set of buffers. * @param[out] new_bufs Set of private buffers. * @param[in] count Number of entries in the buffer list. * @param[in] size The size of the buffers (all are of the same size). */ void ARMCII_Buf_finish_read_vec(void **orig_bufs, void **new_bufs, int count, int size) { if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) { int i; for (i = 0; i < count; i++) { if (orig_bufs[i] != new_bufs[i]) { MPI_Free_mem(new_bufs[i]); } } free(new_bufs); } }
int main( int argc, char *argv[] ) { int errs = 0; int rank, size, i; MPI_Comm comm; MPI_Win win; int *winbuf, count; MTest_Init( &argc, &argv ); comm = MPI_COMM_WORLD; MPI_Comm_rank( comm, &rank ); MPI_Comm_size( comm, &size ); /* Allocate and initialize buf */ count = 1000; MPI_Alloc_mem( count*sizeof(int), MPI_INFO_NULL, &winbuf ); MPI_Win_create( winbuf, count * sizeof(int), sizeof(int), MPI_INFO_NULL, comm, &win ); /* Clear winbuf */ memset( winbuf, 0, count*sizeof(int) ); /* Note that for i == rank, this is a useful operation - it allows the programmer to use direct loads and stores, rather than put/get/accumulate, to access the local memory window. */ for (i=0; i<size; i++) { MPI_Win_lock( MPI_LOCK_EXCLUSIVE, i, 0, win ); MPI_Win_unlock( i, win ); } for (i=0; i<size; i++) { MPI_Win_lock( MPI_LOCK_SHARED, i, 0, win ); MPI_Win_unlock( i, win ); } MPI_Win_free( &win ); MPI_Free_mem( winbuf ); /* If this test completes, no error has been found */ /* A more complete test may ensure that local locks in fact block remote, exclusive locks */ MTest_Finalize( errs ); MPI_Finalize(); return 0; }
MTEST_THREAD_RETURN_TYPE run_test(void *arg) { int i; double *local_b; MPI_Alloc_mem(COUNT * sizeof(double), MPI_INFO_NULL, &local_b); for (i = 0; i < LOOPS; i++) { MPI_Get(local_b, COUNT, MPI_DOUBLE, 0, 0, COUNT, MPI_DOUBLE, win); MPI_Win_flush_all(win); } MPI_Free_mem(local_b); return (MTEST_THREAD_RETURN_TYPE) NULL; }
/** One-sided get operation. * * @param[in] src Source address (remote) * @param[in] dst Destination address (local) * @param[in] size Number of bytes to transfer * @param[in] target Process id to target * @return 0 on success, non-zero on failure */ int PARMCI_Get(void *src, void *dst, int size, int target) { gmr_t *src_mreg, *dst_mreg; src_mreg = gmr_lookup(src, target); /* If NOGUARD is set, assume the buffer is not shared */ if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) dst_mreg = gmr_lookup(dst, ARMCI_GROUP_WORLD.rank); else dst_mreg = NULL; ARMCII_Assert_msg(src_mreg != NULL, "Invalid remote pointer"); /* Local operation */ if (target == ARMCI_GROUP_WORLD.rank && dst_mreg == NULL) { ARMCI_Copy(src, dst, size); } /* Origin buffer is private */ else if (dst_mreg == NULL) { gmr_get(src_mreg, src, dst, size, target); gmr_flush(src_mreg, target, 0); /* it's a round trip so w.r.t. flush, local=remote */ } /* COPY: Either origin and target buffers are in the same window and we can't * lock the same window twice (MPI semantics) or the user has requested * always-copy mode. */ else { void *dst_buf; MPI_Alloc_mem(size, MPI_INFO_NULL, &dst_buf); ARMCII_Assert(dst_buf != NULL); gmr_get(src_mreg, src, dst_buf, size, target); gmr_flush(src_mreg, target, 0); /* it's a round trip so w.r.t. flush, local=remote */ ARMCI_Copy(dst_buf, dst, size); MPI_Free_mem(dst_buf); } return 0; }
/** Destroy a group of ARMCI mutexes. Collective. * * @param[in] hdl Handle to the group that should be destroyed. * @return Zero on success, non-zero otherwise. */ int ARMCIX_Destroy_mutexes_hdl(armcix_mutex_hdl_t hdl) { int i; for (i = 0; i < hdl->max_count; i++) { MPI_Win_free(&hdl->windows[i]); } if (hdl->bases != NULL) { for (i = 0; i < hdl->my_count; i++) MPI_Free_mem(hdl->bases[i]); free(hdl->bases); } ARMCI_Group_free(&hdl->grp); free(hdl->windows); free(hdl); return 0; }
/** Finish a set of prepared buffers. Will perform communication and copies as * needed to ensure results are in the original buffers. Temporary space will be * freed. * * @param[in] orig_bufs Original set of buffers. * @param[out] new_bufs Set of private buffers. * @param[in] count Number of entries in the buffer list. * @param[in] size The size of the buffers (all are of the same size). */ void ARMCII_Buf_finish_write_vec(void **orig_bufs, void **new_bufs, int count, int size) { if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) { int i; for (i = 0; i < count; i++) { if (orig_bufs[i] != new_bufs[i]) { gmr_t *mreg = gmr_lookup(orig_bufs[i], ARMCI_GROUP_WORLD.rank); ARMCII_Assert(mreg != NULL); gmr_dla_lock(mreg); ARMCI_Copy(new_bufs[i], orig_bufs[i], size); // gmr_put(mreg, new_bufs[i], orig_bufs[i], size, ARMCI_GROUP_WORLD.rank); gmr_dla_unlock(mreg); MPI_Free_mem(new_bufs[i]); } } free(new_bufs); } }
/** Free a group of MPI mutexes. Collective on communicator used at the * time of creation. * * @param[in] hdl Handle to the group that will be freed * @return MPI status */ int MPIX_Mutex_free(MPIX_Mutex * hdl_ptr) { MPIX_Mutex hdl = *hdl_ptr; int i; for (i = 0; i < hdl->max_count; i++) { MPI_Win_free(&hdl->windows[i]); } if (hdl->bases != NULL) { for (i = 0; i < hdl->my_count; i++) MPI_Free_mem(hdl->bases[i]); free(hdl->bases); } MPI_Comm_free(&hdl->comm); free(hdl); hdl_ptr = NULL; return MPI_SUCCESS; }
int main(int argc, char *argv[]) { int rank, nproc; int i; MPI_Win win; int *tar_buf = NULL; int *orig_buf = NULL; MPI_Datatype derived_dtp; int errors = 0; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (nproc < 3) { fprintf(stderr, "Run this program with at least 3 processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Alloc_mem(sizeof(int) * DATA_SIZE, MPI_INFO_NULL, &orig_buf); MPI_Alloc_mem(sizeof(int) * DATA_SIZE, MPI_INFO_NULL, &tar_buf); for (i = 0; i < DATA_SIZE; i++) { orig_buf[i] = 1; tar_buf[i] = 0; } MPI_Type_vector(COUNT, BLOCKLENGTH - 1, STRIDE, MPI_INT, &derived_dtp); MPI_Type_commit(&derived_dtp); MPI_Win_create(tar_buf, sizeof(int) * DATA_SIZE, sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win); /***** test between rank 0 and rank 1 *****/ if (rank == 1) { MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win); for (i = 0; i < OPS_NUM; i++) { MPI_Accumulate(orig_buf, 1, derived_dtp, 0, 0, DATA_SIZE - COUNT, MPI_INT, MPI_SUM, win); MPI_Win_flush_local(0, win); } MPI_Win_unlock(0, win); } MPI_Barrier(MPI_COMM_WORLD); /* check results */ if (rank == 0) { for (i = 0; i < DATA_SIZE - COUNT; i++) { if (tar_buf[i] != OPS_NUM) { printf("tar_buf[%d] = %d, expected %d\n", i, tar_buf[i], OPS_NUM); errors++; } } } for (i = 0; i < DATA_SIZE; i++) { tar_buf[i] = 0; } MPI_Barrier(MPI_COMM_WORLD); /***** test between rank 0 and rank 2 *****/ if (rank == 2) { MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win); for (i = 0; i < OPS_NUM; i++) { MPI_Accumulate(orig_buf, 1, derived_dtp, 0, 0, DATA_SIZE - COUNT, MPI_INT, MPI_SUM, win); MPI_Win_flush_local(0, win); } MPI_Win_unlock(0, win); } MPI_Barrier(MPI_COMM_WORLD); /* check results */ if (rank == 0) { for (i = 0; i < DATA_SIZE - COUNT; i++) { if (tar_buf[i] != OPS_NUM) { printf("tar_buf[%d] = %d, expected %d\n", i, tar_buf[i], OPS_NUM); errors++; } } if (errors == 0) printf(" No Errors\n"); } MPI_Win_free(&win); MPI_Type_free(&derived_dtp); MPI_Free_mem(orig_buf); MPI_Free_mem(tar_buf); MPI_Finalize(); return 0; }
main(int argc, char *argv[]) { int i, j; int ch; extern char *optarg; int edge; int size; int nloop=5; double **ptr_loc; MP_INIT(arc,argv); MP_PROCS(&nproc); MP_MYID(&me); while ((ch = getopt(argc, argv, "n:b:p:h")) != -1) { switch(ch) { case 'n': n = atoi(optarg); break; case 'b': block_size = atoi(optarg); break; case 'p': nproc = atoi(optarg); break; case 'h': { printf("Usage: LU, or \n"); printf(" LU -nMATRIXSIZE -bBLOCKSIZE -pNPROC\n"); MP_BARRIER(); MP_FINALIZE(); exit(0); } } } if(me == 0) { printf("\n Blocked Dense LU Factorization\n"); printf(" %d by %d Matrix\n", n, n); printf(" %d Processors\n", nproc); printf(" %d by %d Element Blocks\n", block_size, block_size); printf("\n"); } num_rows = (int) sqrt((double) nproc); for (;;) { num_cols = nproc/num_rows; if (num_rows*num_cols == nproc) break; num_rows--; } nblocks = n/block_size; if (block_size * nblocks != n) { nblocks++; } edge = n%block_size; if (edge == 0) { edge = block_size; } #ifdef DEBUG if(me == 0) for (i=0; i<nblocks; i++) { for (j=0; j<nblocks; j++) printf("%d ", block_owner(i, j)); printf("\n"); } MP_BARRIER(); MP_FINALIZE(); exit(0); #endif for (i=0; i<nblocks; i++) { for (j=0; j<nblocks; j++) { if(block_owner(i,j) == me) { if ((i == nblocks-1) && (j == nblocks-1)) { size = edge*edge; } else if ((i == nblocks-1) || (j == nblocks-1)) { size = edge*block_size; } else { size = block_size*block_size; } proc_bytes += size*sizeof(double); } } } ptr = (void **)malloc(nproc * sizeof(void *)); #ifdef MPI2_ONESIDED MPI_Alloc_mem(proc_bytes, MPI_INFO_NULL, &ptr[me]); MPI_Win_create((void*)ptr[me], proc_bytes, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); for(i=0; i<nproc; i++) ptr[i] = (double *)ptr[me]; MPI_Barrier(MPI_COMM_WORLD); #else /* initialize ARMCI */ ARMCI_Init(); ARMCI_Malloc(ptr, proc_bytes); #endif a = (double **)malloc(nblocks*nblocks*sizeof(double *)); if (a == NULL) { fprintf(stderr, "Could not malloc memory for a\n"); exit(-1); } ptr_loc = (double **)malloc(nproc*sizeof(double *)); for(i=0; i<nproc; i++) ptr_loc[i] = (double *)ptr[i]; for(i=0; i<nblocks; i ++) { for(j=0; j<nblocks; j++) { a[i+j*nblocks] = ptr_loc[block_owner(i, j)]; if ((i == nblocks-1) && (j == nblocks-1)) { size = edge*edge; } else if ((i == nblocks-1) || (j == nblocks-1)) { size = edge*block_size; } else { size = block_size*block_size; } ptr_loc[block_owner(i, j)] += size; } } /* initialize the array */ init_array(); /* barrier to ensure all initialization is done */ MP_BARRIER(); /* to remove cold-start misses, all processors touch their own data */ touch_array(block_size, me); MP_BARRIER(); if(doprint) { if(me == 0) { printf("Matrix before LU decomposition\n"); print_array(me); } MP_BARRIER(); } lu(n, block_size, me); /* cold start */ /* Starting the timer */ MP_BARRIER(); if(me == 0) start_timer(); for(i=0; i<nloop; i++) lu(n, block_size, me); MP_BARRIER(); /* Timer Stops here */ if(me == 0) printf("\nRunning time = %lf milliseconds.\n\n", elapsed_time()/nloop); printf("%d: (ngets=%d) Communication (get) time = %e milliseconds\n", me, get_cntr, comm_time*1000/nloop); if(doprint) { if(me == 0) { printf("after LU\n"); print_array(me); } MP_BARRIER(); } /* done */ #ifdef MPI2_ONESIDED MPI_Win_free(&win); MPI_Free_mem(ptr[me]); #else ARMCI_Free(ptr[me]); ARMCI_Finalize(); #endif MP_FINALIZE(); }
int main(int argc, char *argv[]) { MPI_Win win; int errors = 0; int rank, nproc, i; double *orig_buf; double *tar_buf; MPI_Datatype vector_dtp; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Alloc_mem(sizeof(double) * DATA_SIZE, MPI_INFO_NULL, &orig_buf); MPI_Alloc_mem(sizeof(double) * DATA_SIZE, MPI_INFO_NULL, &tar_buf); for (i = 0; i < DATA_SIZE; i++) { orig_buf[i] = 1.0; tar_buf[i] = 0.5; } MPI_Type_vector(5 /* count */ , 3 /* blocklength */ , 5 /* stride */ , MPI_DOUBLE, &vector_dtp); MPI_Type_commit(&vector_dtp); MPI_Win_create(tar_buf, sizeof(double) * DATA_SIZE, sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win); if (rank == 0) { MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win); MPI_Accumulate(orig_buf, 1, vector_dtp, 1, 0, 1, vector_dtp, MPI_SUM, win); MPI_Win_unlock(1, win); } MPI_Win_fence(0, win); if (rank == 1) { for (i = 0; i < DATA_SIZE; i++) { if (i % 5 < 3) { if (tar_buf[i] != 1.5) { printf("tar_buf[i] = %f (expected 1.5)\n", tar_buf[i]); errors++; } } else { if (tar_buf[i] != 0.5) { printf("tar_buf[i] = %f (expected 0.5)\n", tar_buf[i]); errors++; } } } } MPI_Type_free(&vector_dtp); MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win); MPI_Accumulate(orig_buf, DATA_SIZE, MPI_DOUBLE, 1, 0, DATA_SIZE, MPI_DOUBLE, MPI_SUM, win); MPI_Win_unlock(1, win); } MPI_Win_fence(0, win); if (rank == 1) { for (i = 0; i < DATA_SIZE; i++) { if (i % 5 < 3) { if (tar_buf[i] != 2.5) { printf("tar_buf[i] = %f (expected 2.5)\n", tar_buf[i]); errors++; } } else { if (tar_buf[i] != 1.5) { printf("tar_buf[i] = %f (expected 1.5)\n", tar_buf[i]); errors++; } } } } MPI_Win_free(&win); MPI_Free_mem(orig_buf); MPI_Free_mem(tar_buf); if (rank == 1) { if (errors == 0) printf(" No Errors\n"); } MPI_Finalize(); return 0; }