/*Run Get_accumulate with flush */ void run_get_acc_with_flush(int rank, WINDOW type) { int size, i; MPI_Aint disp = 0; MPI_Win win; for (size = 0; size <= MAX_SIZE; size = (size ? size * 2 : size + 1)) { allocate_memory(rank, rbuf, size, type, &win); if (type == WIN_DYNAMIC) { disp = sdisp_remote; } if(size > LARGE_MESSAGE_SIZE) { loop = LOOP_LARGE; skip = SKIP_LARGE; } if(rank == 0) { MPI_CHECK(MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 1, 0, win)); for (i = 0; i < skip + loop; i++) { if (i == skip) { t_start = MPI_Wtime (); } MPI_CHECK(MPI_Get_accumulate(sbuf, size, MPI_CHAR, cbuf, size, MPI_CHAR, 1, disp, size, MPI_CHAR, MPI_SUM, win)); MPI_CHECK(MPI_Win_flush(1, win)); } t_end = MPI_Wtime (); MPI_CHECK(MPI_Win_unlock(1, win)); } MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); print_latency(rank, size); MPI_Win_free(&win); } }
/*Run ACC with Lock/unlock */ void run_acc_with_lock(int rank, WINDOW type) { int size, i; MPI_Aint disp = 0; MPI_Win win; for (size = 0; size <= MAX_SIZE; size = (size ? size * 2 : 1)) { allocate_memory(rank, sbuf_original, rbuf_original, &sbuf, &rbuf, &sbuf, size, type, &win); #if MPI_VERSION >= 3 if (type == WIN_DYNAMIC) { disp = disp_remote; } #endif if(size > LARGE_MESSAGE_SIZE) { loop = LOOP_LARGE; skip = SKIP_LARGE; } if(rank == 0) { for (i = 0; i < skip + loop; i++) { if (i == skip) { t_start = MPI_Wtime (); } MPI_CHECK(MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win)); MPI_CHECK(MPI_Accumulate(sbuf, size, MPI_CHAR, 1, disp, size, MPI_CHAR, MPI_SUM, win)); MPI_CHECK(MPI_Win_unlock(1, win)); } t_end = MPI_Wtime (); } MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); print_latency(rank, size); free_memory (sbuf, rbuf, win, rank); } }
void MPIMutex::unlock(int proc) { int rank, nproc; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &nproc); byte *buff = (byte*)malloc(nproc*sizeof(byte)); buff[rank] = 0; /* Get all data from the lock_buf, except the byte belonging to * me. Set the byte belonging to me to 0. */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc, 0, win); MPI_Put(&(buff[rank]), 1, MPI_BYTE, proc, rank, 1, MPI_BYTE, win); /* Get data to the left of rank */ if (rank > 0) { MPI_Get(buff, rank, MPI_BYTE, proc, 0, rank, MPI_BYTE, win); } /* Get data to the right of rank */ if (rank < nproc - 1) { MPI_Get(&buff[rank+1], nproc-1-rank, MPI_BYTE, proc, rank+1, nproc-1-rank, MPI_BYTE, win); } MPI_Win_unlock(proc, win); /* Notify the next waiting process, starting to my right for fairness */ for (int i = 1; i < nproc; i++) { int p = (rank + i) % nproc; if (buff[p] == 1) { //std::cout << "notifying "<<p<<"[proc = "<<proc<<"]" << std::endl; MPI_Send(NULL, 0, MPI_BYTE, p, MPI_MUTEX_TAG+id, comm); break; } } //std::cout << "lock released [proc = "<<proc<<"]" << std::endl; free(buff); };
int main(int argc, char *argv[]) { int rank = 0, nprocs = 0, dst = 0; int winbuf[BUFSIZE]; MPI_Win win = MPI_WIN_NULL; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); memset(winbuf, 0, sizeof(int) * BUFSIZE); MPI_Win_create(winbuf, sizeof(int) * BUFSIZE, sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win); if (rank == 0) { /* lock each process */ for (dst = 0; dst < nprocs; dst++) { MPI_Win_lock(MPI_LOCK_SHARED, dst, 0, win); } /* unlock each process */ for (dst = nprocs - 1; dst >= 0; dst--) { MPI_Win_unlock(dst, win); } } MPI_Barrier(MPI_COMM_WORLD); MPI_Win_free(&win); if (rank == 0) { fprintf(stdout, " No Errors\n"); fflush(stdout); } MPI_Finalize(); return 0; }
/*Run FOP with flush local*/ void run_fop_with_flush_local (int rank, WINDOW type) { int i; MPI_Win win; MPI_Aint disp = 0; MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); allocate_atomic_memory(rank, sbuf_original, rbuf_original, tbuf_original, NULL, (char **)&sbuf, (char **)&rbuf, (char **)&tbuf, NULL, (char **)&rbuf, MAX_MSG_SIZE, type, &win); if(rank == 0) { if (type == WIN_DYNAMIC) { disp = disp_remote; } MPI_CHECK(MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win)); for (i = 0; i < skip + loop; i++) { if (i == skip) { t_start = MPI_Wtime (); } MPI_CHECK(MPI_Fetch_and_op(sbuf, tbuf, MPI_LONG_LONG, 1, disp, MPI_SUM, win)); MPI_CHECK(MPI_Win_flush_local(1, win)); } t_end = MPI_Wtime (); MPI_CHECK(MPI_Win_unlock(1, win)); } MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD)); print_latency(rank, 8); free_atomic_memory (sbuf, rbuf, tbuf, NULL, win, rank); }
int main( int argc, char *argv[] ) { int rank, nproc, i; int errors = 0, all_errors = 0; int *buf; MPI_Win window; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); if (nproc < 2) { if (rank == 0) printf("Error: must be run with two or more processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } /** Create using MPI_Win_create() **/ if (rank == 0) { MPI_Alloc_mem(4*sizeof(int), MPI_INFO_NULL, &buf); *buf = nproc-1; } else buf = NULL; MPI_Win_create(buf, 4*sizeof(int)*(rank == 0), 1, MPI_INFO_NULL, MPI_COMM_WORLD, &window); /* PROC_NULL Communication */ { MPI_Request pn_req[4]; int val[4], res; MPI_Win_lock_all(0, window); MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, MPI_PROC_NULL, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]); MPI_Rget(&val[1], 1, MPI_INT, MPI_PROC_NULL, 1, 1, MPI_INT, window, &pn_req[1]); MPI_Rput(&val[2], 1, MPI_INT, MPI_PROC_NULL, 2, 1, MPI_INT, window, &pn_req[2]); MPI_Raccumulate(&val[3], 1, MPI_INT, MPI_PROC_NULL, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]); assert(pn_req[0] != MPI_REQUEST_NULL); assert(pn_req[1] != MPI_REQUEST_NULL); assert(pn_req[2] != MPI_REQUEST_NULL); assert(pn_req[3] != MPI_REQUEST_NULL); MPI_Win_unlock_all(window); MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE); } MPI_Barrier(MPI_COMM_WORLD); MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, window); /* GET-ACC: Test third-party communication, through rank 0. */ for (i = 0; i < ITER; i++) { MPI_Request gacc_req; int val = -1, exp = -1; /* Processes form a ring. Process 0 starts first, then passes a token * to the right. Each process, in turn, performs third-party * communication via process 0's window. */ if (rank > 0) { MPI_Recv(NULL, 0, MPI_BYTE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } MPI_Rget_accumulate(&rank, 1, MPI_INT, &val, 1, MPI_INT, 0, 0, 1, MPI_INT, MPI_REPLACE, window, &gacc_req); assert(gacc_req != MPI_REQUEST_NULL); MPI_Wait(&gacc_req, MPI_STATUS_IGNORE); MPI_Win_flush(0, window); exp = (rank + nproc-1) % nproc; if (val != exp) { printf("%d - Got %d, expected %d\n", rank, val, exp); errors++; } if (rank < nproc-1) { MPI_Send(NULL, 0, MPI_BYTE, rank+1, 0, MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) *buf = nproc-1; MPI_Win_sync(window); /* GET+PUT: Test third-party communication, through rank 0. */ for (i = 0; i < ITER; i++) { MPI_Request req; int val = -1, exp = -1; /* Processes form a ring. Process 0 starts first, then passes a token * to the right. Each process, in turn, performs third-party * communication via process 0's window. */ if (rank > 0) { MPI_Recv(NULL, 0, MPI_BYTE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } MPI_Rget(&val, 1, MPI_INT, 0, 0, 1, MPI_INT, window, &req); assert(req != MPI_REQUEST_NULL); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Rput(&rank, 1, MPI_INT, 0, 0, 1, MPI_INT, window, &req); assert(req != MPI_REQUEST_NULL); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Win_flush(0, window); exp = (rank + nproc-1) % nproc; if (val != exp) { printf("%d - Got %d, expected %d\n", rank, val, exp); errors++; } if (rank < nproc-1) { MPI_Send(NULL, 0, MPI_BYTE, rank+1, 0, MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) *buf = nproc-1; MPI_Win_sync(window); /* GET+ACC: Test third-party communication, through rank 0. */ for (i = 0; i < ITER; i++) { MPI_Request req; int val = -1, exp = -1; /* Processes form a ring. Process 0 starts first, then passes a token * to the right. Each process, in turn, performs third-party * communication via process 0's window. */ if (rank > 0) { MPI_Recv(NULL, 0, MPI_BYTE, rank-1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } MPI_Rget(&val, 1, MPI_INT, 0, 0, 1, MPI_INT, window, &req); assert(req != MPI_REQUEST_NULL); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Raccumulate(&rank, 1, MPI_INT, 0, 0, 1, MPI_INT, MPI_REPLACE, window, &req); assert(req != MPI_REQUEST_NULL); MPI_Wait(&req, MPI_STATUS_IGNORE); MPI_Win_flush(0, window); exp = (rank + nproc-1) % nproc; if (val != exp) { printf("%d - Got %d, expected %d\n", rank, val, exp); errors++; } if (rank < nproc-1) { MPI_Send(NULL, 0, MPI_BYTE, rank+1, 0, MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); } MPI_Win_unlock(0, window); MPI_Barrier(MPI_COMM_WORLD); /* Wait inside of an epoch */ { MPI_Request pn_req[4]; int val[4], res; const int target = 0; MPI_Win_lock_all(0, window); MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]); MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]); MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]); MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]); assert(pn_req[0] != MPI_REQUEST_NULL); assert(pn_req[1] != MPI_REQUEST_NULL); assert(pn_req[2] != MPI_REQUEST_NULL); assert(pn_req[3] != MPI_REQUEST_NULL); MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE); MPI_Win_unlock_all(window); } MPI_Barrier(MPI_COMM_WORLD); /* Wait outside of an epoch */ { MPI_Request pn_req[4]; int val[4], res; const int target = 0; MPI_Win_lock_all(0, window); MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]); MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]); MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]); MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]); assert(pn_req[0] != MPI_REQUEST_NULL); assert(pn_req[1] != MPI_REQUEST_NULL); assert(pn_req[2] != MPI_REQUEST_NULL); assert(pn_req[3] != MPI_REQUEST_NULL); MPI_Win_unlock_all(window); MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE); } /* Wait in a different epoch */ { MPI_Request pn_req[4]; int val[4], res; const int target = 0; MPI_Win_lock_all(0, window); MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]); MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]); MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]); MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]); assert(pn_req[0] != MPI_REQUEST_NULL); assert(pn_req[1] != MPI_REQUEST_NULL); assert(pn_req[2] != MPI_REQUEST_NULL); assert(pn_req[3] != MPI_REQUEST_NULL); MPI_Win_unlock_all(window); MPI_Win_lock_all(0, window); MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE); MPI_Win_unlock_all(window); } /* Wait in a fence epoch */ { MPI_Request pn_req[4]; int val[4], res; const int target = 0; MPI_Win_lock_all(0, window); MPI_Rget_accumulate(&val[0], 1, MPI_INT, &res, 1, MPI_INT, target, 0, 1, MPI_INT, MPI_REPLACE, window, &pn_req[0]); MPI_Rget(&val[1], 1, MPI_INT, target, 1, 1, MPI_INT, window, &pn_req[1]); MPI_Rput(&val[2], 1, MPI_INT, target, 2, 1, MPI_INT, window, &pn_req[2]); MPI_Raccumulate(&val[3], 1, MPI_INT, target, 3, 1, MPI_INT, MPI_REPLACE, window, &pn_req[3]); assert(pn_req[0] != MPI_REQUEST_NULL); assert(pn_req[1] != MPI_REQUEST_NULL); assert(pn_req[2] != MPI_REQUEST_NULL); assert(pn_req[3] != MPI_REQUEST_NULL); MPI_Win_unlock_all(window); MPI_Win_fence(0, window); MPI_Waitall(4, pn_req, MPI_STATUSES_IGNORE); MPI_Win_fence(0, window); } MPI_Win_free(&window); if (buf) MPI_Free_mem(buf); MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0 && all_errors == 0) printf(" No Errors\n"); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int i, j, rank, nranks, peer, bufsize, errs; double *win_buf, *src_buf, *dst_buf; MPI_Win buf_win; MTest_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nranks); bufsize = XDIM * YDIM * sizeof(double); MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &win_buf); MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &src_buf); MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &dst_buf); for (i = 0; i < XDIM * YDIM; i++) { *(win_buf + i) = 1.0 + rank; *(src_buf + i) = 1.0 + rank; } MPI_Win_create(win_buf, bufsize, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &buf_win); peer = (rank + 1) % nranks; /* Perform ITERATIONS strided put operations */ for (i = 0; i < ITERATIONS; i++) { MPI_Aint idx_loc[SUB_YDIM]; int idx_rem[SUB_YDIM]; int blk_len[SUB_YDIM]; MPI_Datatype src_type, dst_type; for (j = 0; j < SUB_YDIM; j++) { MPI_Get_address(&src_buf[j * XDIM], &idx_loc[j]); idx_rem[j] = j * XDIM * sizeof(double); blk_len[j] = SUB_XDIM * sizeof(double); } MPI_Type_create_hindexed(SUB_YDIM, blk_len, idx_loc, MPI_BYTE, &src_type); MPI_Type_create_indexed_block(SUB_YDIM, SUB_XDIM * sizeof(double), idx_rem, MPI_BYTE, &dst_type); MPI_Type_commit(&src_type); MPI_Type_commit(&dst_type); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win); MPI_Put(MPI_BOTTOM, 1, src_type, peer, 0, 1, dst_type, buf_win); MPI_Win_unlock(peer, buf_win); MPI_Type_free(&src_type); MPI_Type_free(&dst_type); } MPI_Barrier(MPI_COMM_WORLD); /* Verify that the results are correct */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, buf_win); errs = 0; for (i = 0; i < SUB_XDIM; i++) { for (j = 0; j < SUB_YDIM; j++) { const double actual = *(win_buf + i + j * XDIM); const double expected = (1.0 + ((rank + nranks - 1) % nranks)); if (actual - expected > 1e-10) { SQUELCH(printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n", rank, j, i, expected, actual);); errs++; fflush(stdout); } }
int main(int argc, char *argv[]) { int err, errs = 0; int rank, size, orig, target; int minsize = 2, count; int i, j; MPI_Aint origcount, targetcount; MPI_Comm comm; MPI_Win win; MPI_Aint lb, extent; MPI_Datatype origtype, targettype; DTP_t orig_dtp, target_dtp; void *origbuf, *targetbuf; MTest_Init(&argc, &argv); #ifndef USE_DTP_POOL_TYPE__STRUCT /* set in 'test/mpi/structtypetest.txt' to split tests */ MPI_Datatype basic_type; int len; char type_name[MPI_MAX_OBJECT_NAME] = { 0 }; err = MTestInitBasicSignature(argc, argv, &count, &basic_type); if (err) return MTestReturnValue(1); err = DTP_pool_create(basic_type, count, &orig_dtp); if (err != DTP_SUCCESS) { MPI_Type_get_name(basic_type, type_name, &len); fprintf(stdout, "Error while creating orig pool (%s,%d)\n", type_name, count); fflush(stdout); } err = DTP_pool_create(basic_type, count, &target_dtp); if (err != DTP_SUCCESS) { MPI_Type_get_name(basic_type, type_name, &len); fprintf(stdout, "Error while creating target pool (%s,%d)\n", type_name, count); fflush(stdout); } #else MPI_Datatype *basic_types = NULL; int *basic_type_counts = NULL; int basic_type_num; err = MTestInitStructSignature(argc, argv, &basic_type_num, &basic_type_counts, &basic_types); if (err) return MTestReturnValue(1); err = DTP_pool_create_struct(basic_type_num, basic_types, basic_type_counts, &orig_dtp); if (err != DTP_SUCCESS) { fprintf(stdout, "Error while creating struct pool\n"); fflush(stdout); } err = DTP_pool_create_struct(basic_type_num, basic_types, basic_type_counts, &target_dtp); if (err != DTP_SUCCESS) { fprintf(stdout, "Error while creating struct pool\n"); fflush(stdout); } /* this is ignored */ count = 0; #endif while (MTestGetIntracommGeneral(&comm, minsize, 1)) { if (comm == MPI_COMM_NULL) continue; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size); orig = 0; target = size - 1; for (i = 0; i < target_dtp->DTP_num_objs; i++) { err = DTP_obj_create(target_dtp, i, 0, 0, 0); if (err != DTP_SUCCESS) { errs++; break; } targetcount = target_dtp->DTP_obj_array[i].DTP_obj_count; targettype = target_dtp->DTP_obj_array[i].DTP_obj_type; targetbuf = target_dtp->DTP_obj_array[i].DTP_obj_buf; MPI_Type_get_extent(targettype, &lb, &extent); MPI_Win_create(targetbuf, lb + targetcount * extent, (int) extent, MPI_INFO_NULL, comm, &win); for (j = 0; j < orig_dtp->DTP_num_objs; j++) { err = DTP_obj_create(orig_dtp, j, 0, 1, count); if (err != DTP_SUCCESS) { errs++; break; } origcount = orig_dtp->DTP_obj_array[j].DTP_obj_count; origtype = orig_dtp->DTP_obj_array[j].DTP_obj_type; origbuf = orig_dtp->DTP_obj_array[j].DTP_obj_buf; if (rank == orig) { MPI_Win_lock(MPI_LOCK_SHARED, target, 0, win); MPI_Accumulate(origbuf, origcount, origtype, target, 0, targetcount, targettype, MPI_REPLACE, win); MPI_Win_unlock(target, win); MPI_Barrier(comm); char *resbuf = (char *) calloc(lb + extent * targetcount, sizeof(char)); /*wait for the destination to finish checking and reinitializing the buffer */ MPI_Barrier(comm); MPI_Win_lock(MPI_LOCK_SHARED, target, 0, win); MPI_Get_accumulate(origbuf, origcount, origtype, resbuf, targetcount, targettype, target, 0, targetcount, targettype, MPI_REPLACE, win); MPI_Win_unlock(target, win); MPI_Barrier(comm); free(resbuf); } else if (rank == target) { /* TODO: add a DTP_buf_set() function to replace this */ char *tmp = (char *) calloc(lb + extent * targetcount, sizeof(char)); memcpy(tmp, targetbuf, lb + extent * targetcount); MPI_Barrier(comm); MPI_Win_lock(MPI_LOCK_SHARED, target, 0, win); err = DTP_obj_buf_check(target_dtp, i, 0, 1, count); if (err != DTP_SUCCESS) { errs++; } /* restore target buffer */ memcpy(targetbuf, tmp, lb + extent * targetcount); free(tmp); MPI_Win_unlock(target, win); /*signal the source that checking and reinitialization is done */ MPI_Barrier(comm); MPI_Barrier(comm); MPI_Win_lock(MPI_LOCK_SHARED, target, 0, win); err = DTP_obj_buf_check(target_dtp, i, 0, 1, count); if (err != DTP_SUCCESS) { errs++; } MPI_Win_unlock(target, win); } DTP_obj_free(orig_dtp, j); } MPI_Win_free(&win); DTP_obj_free(target_dtp, i); } MTestFreeComm(&comm); } DTP_pool_free(orig_dtp); DTP_pool_free(target_dtp); #ifdef USE_DTP_POOL_TYPE__STRUCT /* cleanup array if any */ if (basic_types) { free(basic_types); } if (basic_type_counts) { free(basic_type_counts); } #endif MTest_Finalize(errs); return MTestReturnValue(errs); }
int main(int argc, char **argv) { int rank, nranks, rank_world, nranks_world; int i, j, peer, bufsize, errors; double *win_buf, *src_buf, *dst_buf; MPI_Win buf_win; MPI_Comm shr_comm; MTest_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank_world); MPI_Comm_size(MPI_COMM_WORLD, &nranks_world); MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL, &shr_comm); MPI_Comm_rank(shr_comm, &rank); MPI_Comm_size(shr_comm, &nranks); bufsize = XDIM * YDIM * sizeof(double); MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &src_buf); MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &dst_buf); MPI_Win_allocate_shared(bufsize, 1, MPI_INFO_NULL, shr_comm, &win_buf, &buf_win); MPI_Win_fence(0, buf_win); for (i = 0; i < XDIM*YDIM; i++) { *(win_buf + i) = -1.0; *(src_buf + i) = 1.0 + rank; } MPI_Win_fence(0, buf_win); peer = (rank+1) % nranks; /* Perform ITERATIONS strided accumulate operations */ for (i = 0; i < ITERATIONS; i++) { int idx_rem[SUB_YDIM]; int blk_len[SUB_YDIM]; MPI_Datatype src_type, dst_type; for (j = 0; j < SUB_YDIM; j++) { idx_rem[j] = j*XDIM; blk_len[j] = SUB_XDIM; } MPI_Type_indexed(SUB_YDIM, blk_len, idx_rem, MPI_DOUBLE, &src_type); MPI_Type_indexed(SUB_YDIM, blk_len, idx_rem, MPI_DOUBLE, &dst_type); MPI_Type_commit(&src_type); MPI_Type_commit(&dst_type); /* PUT */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win); MPI_Get_accumulate(src_buf, 1, src_type, dst_buf, 1, src_type, peer, 0, 1, dst_type, MPI_REPLACE, buf_win); MPI_Win_unlock(peer, buf_win); /* GET */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win); MPI_Get_accumulate(src_buf, 1, src_type, dst_buf, 1, src_type, peer, 0, 1, dst_type, MPI_NO_OP, buf_win); MPI_Win_unlock(peer, buf_win); MPI_Type_free(&src_type); MPI_Type_free(&dst_type); } MPI_Barrier(MPI_COMM_WORLD); /* Verify that the results are correct */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, buf_win); errors = 0; for (i = 0; i < SUB_XDIM; i++) { for (j = 0; j < SUB_YDIM; j++) { const double actual = *(win_buf + i + j*XDIM); const double expected = (1.0 + ((rank+nranks-1)%nranks)); if (fabs(actual - expected) > 1.0e-10) { SQUELCH( printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n", rank, j, i, expected, actual); ); errors++; fflush(stdout); } }
void set_status( int new_status, int rank ) { MPI_Win_lock( MPI_LOCK_EXCLUSIVE, rank, 0, win_status ); STATUS[0] = new_status; MPI_Win_unlock( rank, win_status ); }
int main(int argc, char *argv[]) { int rank, nproc, i, x; int errors = 0, all_errors = 0; MPI_Win win = MPI_WIN_NULL; MPI_Comm shm_comm = MPI_COMM_NULL; int shm_nproc, shm_rank; double **shm_bases = NULL, *my_base; MPI_Win shm_win = MPI_WIN_NULL; MPI_Group shm_group = MPI_GROUP_NULL, world_group = MPI_GROUP_NULL; int *shm_ranks = NULL, *shm_ranks_in_world = NULL; MPI_Aint get_target_base_offsets = 0; int win_size = sizeof(double) * BUF_CNT; int new_win_size = win_size; int win_unit = sizeof(double); int shm_root_rank_in_world; int origin = -1, put_target, get_target; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Comm_group(MPI_COMM_WORLD, &world_group); if (nproc != 4) { if (rank == 0) printf("Error: must be run with four processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL, &shm_comm); MPI_Comm_rank(shm_comm, &shm_rank); MPI_Comm_size(shm_comm, &shm_nproc); MPI_Comm_group(shm_comm, &shm_group); /* Platform does not support shared memory or wrong host file, just return. */ if (shm_nproc != 2) { goto exit; } shm_bases = (double **) calloc(shm_nproc, sizeof(double *)); shm_ranks = (int *) calloc(shm_nproc, sizeof(int)); shm_ranks_in_world = (int *) calloc(shm_nproc, sizeof(int)); if (shm_rank == 0) shm_root_rank_in_world = rank; MPI_Bcast(&shm_root_rank_in_world, 1, MPI_INT, 0, shm_comm); /* Identify ranks of target processes which are located on node 0 */ if (rank == 0) { for (i = 0; i < shm_nproc; i++) { shm_ranks[i] = i; } MPI_Group_translate_ranks(shm_group, shm_nproc, shm_ranks, world_group, shm_ranks_in_world); } MPI_Bcast(shm_ranks_in_world, shm_nproc, MPI_INT, 0, MPI_COMM_WORLD); put_target = shm_ranks_in_world[shm_nproc - 1]; get_target = shm_ranks_in_world[0]; /* Identify the rank of origin process which are located on node 1 */ if (shm_root_rank_in_world == 1 && shm_rank == 0) { origin = rank; if (verbose) { printf("---- I am origin = %d, get_target = %d, put_target = %d\n", origin, get_target, put_target); } } /* Allocate shared memory among local processes */ MPI_Win_allocate_shared(win_size, win_unit, MPI_INFO_NULL, shm_comm, &my_base, &shm_win); if (shm_root_rank_in_world == 0 && verbose) { MPI_Aint size; int disp_unit; for (i = 0; i < shm_nproc; i++) { MPI_Win_shared_query(shm_win, i, &size, &disp_unit, &shm_bases[i]); printf("%d -- shared query: base[%d]=%p, size %zd, " "unit %d\n", rank, i, shm_bases[i], size, disp_unit); } } /* Get offset of put target(1) on get target(0) */ get_target_base_offsets = (shm_nproc - 1) * win_size / win_unit; if (origin == rank && verbose) printf("%d -- base_offset of put_target %d on get_target %d: %zd\n", rank, put_target, get_target, get_target_base_offsets); /* Create using MPI_Win_create(). Note that new window size of get_target(0) * is equal to the total size of shm segments on this node, thus get_target * process can read the byte located on put_target process.*/ for (i = 0; i < BUF_CNT; i++) { local_buf[i] = (i + 1) * 1.0; my_base[i] = 0.0; } if (get_target == rank) new_win_size = win_size * shm_nproc; MPI_Win_create(my_base, new_win_size, win_unit, MPI_INFO_NULL, MPI_COMM_WORLD, &win); if (verbose) printf("%d -- new window my_base %p, size %d\n", rank, my_base, new_win_size); MPI_Barrier(MPI_COMM_WORLD); /* Check if flush guarantees the completion of put operations on target side. * * P exclusively locks 2 processes whose windows are shared with each other. * P first put and flush to a process, then get the updated data from another process. * If flush returns before operations are done on the target side, the data may be * incorrect.*/ for (x = 0; x < ITER; x++) { for (i = 0; i < BUF_CNT; i++) { local_buf[i] += x; check_buf[i] = 0; } if (rank == origin) { MPI_Win_lock(MPI_LOCK_EXCLUSIVE, put_target, 0, win); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, get_target, 0, win); for (i = 0; i < BUF_CNT; i++) { MPI_Put(&local_buf[i], 1, MPI_DOUBLE, put_target, i, 1, MPI_DOUBLE, win); } MPI_Win_flush(put_target, win); MPI_Get(check_buf, BUF_CNT, MPI_DOUBLE, get_target, get_target_base_offsets, BUF_CNT, MPI_DOUBLE, win); MPI_Win_flush(get_target, win); for (i = 0; i < BUF_CNT; i++) { if (check_buf[i] != local_buf[i]) { printf("%d(iter %d) - Got check_buf[%d] = %.1lf, expected %.1lf\n", rank, x, i, check_buf[i], local_buf[i]); errors++; } } MPI_Win_unlock(put_target, win); MPI_Win_unlock(get_target, win); } } MPI_Barrier(MPI_COMM_WORLD); MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); exit: if (rank == 0 && all_errors == 0) printf(" No Errors\n"); if (shm_bases) free(shm_bases); if (shm_ranks) free(shm_ranks); if (shm_ranks_in_world) free(shm_ranks_in_world); if (shm_win != MPI_WIN_NULL) MPI_Win_free(&shm_win); if (win != MPI_WIN_NULL) MPI_Win_free(&win); if (shm_comm != MPI_COMM_NULL) MPI_Comm_free(&shm_comm); if (shm_group != MPI_GROUP_NULL) MPI_Group_free(&shm_group); if (world_group != MPI_GROUP_NULL) MPI_Group_free(&world_group); MPI_Finalize(); return 0; }
static int run_test(int nop) { int i, x, errs = 0, errs_total = 0; MPI_Status stat; int dst; int winbuf_offset = 0; double t0, avg_total_time = 0.0, t_total = 0.0; double sum = 0.0; if (nprocs <= NPROCS_M) { ITER = ITER_S; } else { ITER = ITER_L; } target_computation_init(); MPI_Win_lock_all(0, win); t0 = MPI_Wtime(); for (x = 0; x < ITER; x++) { // send to all the left processes in a ring style for (dst = (rank + 1) % nprocs; dst != rank; dst = (dst + 1) % nprocs) { MPI_Accumulate(&locbuf[0], 1, MPI_DOUBLE, dst, rank, 1, MPI_DOUBLE, MPI_SUM, win); } MPI_Win_flush_all(win); target_computation(); for (dst = (rank + 1) % nprocs; dst != rank; dst = (dst + 1) % nprocs) { for (i = 1; i < nop; i++) { MPI_Accumulate(&locbuf[i], 1, MPI_DOUBLE, dst, rank, 1, MPI_DOUBLE, MPI_SUM, win); } } MPI_Win_flush_all(win); debug_printf("[%d]MPI_Win_flush all done\n", x); } t_total += MPI_Wtime() - t0; t_total /= ITER; MPI_Win_unlock_all(win); MPI_Barrier(MPI_COMM_WORLD); target_computation_exit(); #ifdef CHECK MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, win); sum = 0.0; for (i = 0; i < nop; i++) { sum += locbuf[i]; } sum *= ITER; for (i = 0; i < nprocs; i++) { if (i == rank) continue; if (winbuf[i] != sum) { fprintf(stderr, "[%d]computation error : winbuf[%d] %.2lf != %.2lf, nop %d\n", rank, i, winbuf[i], sum, nop); errs += 1; } } MPI_Win_unlock(rank, win); #endif MPI_Reduce(&t_total, &avg_total_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Allreduce(&errs, &errs_total, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); if (rank == 0) { avg_total_time /= nprocs; #ifdef MTCORE fprintf(stdout, "mtcore: comp_size %d num_op %d nprocs %d total_time %lf\n", DGEMM_SIZE, nop, nprocs, avg_total_time); #else fprintf(stdout, "orig: comp_size %d num_op %d nprocs %d total_time %lf\n", DGEMM_SIZE, nop, nprocs, avg_total_time); #endif } return errs_total; }
int main(int argc, char **argv) { int procid, nproc, i; MPI_Win llist_win; llist_ptr_t head_ptr, tail_ptr; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &procid); MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &llist_win); /* Process 0 creates the head node */ if (procid == 0) head_ptr.disp = alloc_elem(-1, llist_win); /* Broadcast the head pointer to everyone */ head_ptr.rank = 0; MPI_Bcast(&head_ptr.disp, 1, MPI_AINT, 0, MPI_COMM_WORLD); tail_ptr = head_ptr; /* All processes concurrently append NUM_ELEMS elements to the list */ for (i = 0; i < NUM_ELEMS; i++) { llist_ptr_t new_elem_ptr; int success; /* Create a new list element and register it with the window */ new_elem_ptr.rank = procid; new_elem_ptr.disp = alloc_elem(procid, llist_win); /* Append the new node to the list. This might take multiple attempts if others have already appended and our tail pointer is stale. */ do { llist_ptr_t next_tail_ptr = nil; MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win); MPI_Compare_and_swap((void*) &new_elem_ptr.rank, (void*) &nil.rank, (void*) &next_tail_ptr.rank, MPI_INT, tail_ptr.rank, (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.rank), llist_win); MPI_Win_unlock(tail_ptr.rank, llist_win); success = (next_tail_ptr.rank == nil.rank); if (success) { int i, flag; MPI_Aint result; MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win); MPI_Fetch_and_op(&new_elem_ptr.disp, &result, MPI_AINT, tail_ptr.rank, (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp), MPI_REPLACE, llist_win); /* Note: accumulate is faster, since we don't need the result. Replacing with Fetch_and_op to create a more complete test case. */ /* MPI_Accumulate(&new_elem_ptr.disp, 1, MPI_AINT, tail_ptr.rank, (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp), 1, MPI_AINT, MPI_REPLACE, llist_win); */ MPI_Win_unlock(tail_ptr.rank, llist_win); tail_ptr = new_elem_ptr; /* For implementations that use pt-to-pt messaging, force progress for other threads' RMA operations. */ for (i = 0; i < NPROBE; i++) MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE); } else { /* Tail pointer is stale, fetch the displacement. May take multiple tries if it is being updated. */ do { MPI_Aint junk = 0; MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win); MPI_Fetch_and_op(NULL, &next_tail_ptr.disp, MPI_AINT, tail_ptr.rank, (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp), MPI_NO_OP, llist_win); MPI_Win_unlock(tail_ptr.rank, llist_win); } while (next_tail_ptr.disp == nil.disp); tail_ptr = next_tail_ptr; } } while (!success); } MPI_Barrier(MPI_COMM_WORLD); /* Traverse the list and verify that all processes inserted exactly the correct number of elements. */ if (procid == 0) { int have_root = 0; int errors = 0; int *counts, count = 0; counts = (int*) malloc(sizeof(int) * nproc); assert(counts != NULL); for (i = 0; i < nproc; i++) counts[i] = 0; tail_ptr = head_ptr; /* Walk the list and tally up the number of elements inserted by each rank */ while (tail_ptr.disp != nil.disp) { llist_elem_t elem; MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win); MPI_Get(&elem, sizeof(llist_elem_t), MPI_BYTE, tail_ptr.rank, tail_ptr.disp, sizeof(llist_elem_t), MPI_BYTE, llist_win); MPI_Win_unlock(tail_ptr.rank, llist_win); tail_ptr = elem.next; /* This is not the root */ if (have_root) { assert(elem.value >= 0 && elem.value < nproc); counts[elem.value]++; count++; if (verbose) { int last_elem = tail_ptr.disp == nil.disp; printf("%2d%s", elem.value, last_elem ? "" : " -> "); if (count % ELEM_PER_ROW == 0 && !last_elem) printf("\n"); } } /* This is the root */ else { assert(elem.value == -1); have_root = 1; } } if (verbose) printf("\n\n"); /* Verify the counts we collected */ for (i = 0; i < nproc; i++) { int expected = NUM_ELEMS; if (counts[i] != expected) { printf("Error: Rank %d inserted %d elements, expected %d\n", i, counts[i], expected); errors++; } } printf("%s\n", errors == 0 ? " No Errors" : "FAIL"); free(counts); } MPI_Win_free(&llist_win); /* Free all the elements in the list */ for ( ; my_elems_count > 0; my_elems_count--) MPI_Free_mem(my_elems[my_elems_count-1]); MPI_Finalize(); return 0; }
// argc = cpu count, argv = file.cpp int main(int argc, char *argv[]) { // create win object, this is used for locks MPI_Win win; // needed for MPI int namelen = 0; int myid, numprocs = 0; // processor name char processor_name[MPI_MAX_PROCESSOR_NAME]; //initialize MPI execution environment MPI_Init(&argc, &argv); //each process get total # of processes //the total # of processes specified in mpirun �np n MPI_Comm_size(MPI_COMM_WORLD, &numprocs); //each process gets its own id MPI_Comm_rank(MPI_COMM_WORLD, &myid); // needed for times double program_start = 0; double program_end = 0; double process_start = 0; double process_end = 0; // take time if (myid == 0) // get start program time program_start = MPI_Wtime(); // Gets the name of the processor MPI_Get_processor_name(processor_name, &namelen); // number of processes int n = 0; // display info fprintf(stderr, "process %d on %s\n", myid, processor_name); fflush(stderr); // create win object for locks MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); // declare array to hold char from words plus \0 char* arr; // list to keep track of length of each word short* list; // size of entire array int arr_size = 0; // size of the list int list_size = 0; // new list of words that are not palindromes char* new_words; // size of new array of words eahc process will // have inorder to send back to root after finding // all none plaindrome words int new_size = 0; // this will be the total size of non-palidrome words // which will be recieved from each process int total_size = 0; // temp vector to hold arrays in file std::vector<std::string>* words; // root does if (myid == 0) { // stream to open file std::fstream in; // vector to dynamically grow as we add strings to it // this makes it so we don't need to open file twice since // we would normally open file and count number of words // then reopen it to get the actually words to put in an array // we just declared based off the size we got the first time words = new std::vector<std::string>(); // open file as instream in.open("Palindromes.txt", std::ios::in); // if error opening file if (in.fail()) { // display message and close std::cout << "Error Opening File" << std::endl; MPI_Abort(MPI_COMM_WORLD, 1); } // no error while opening file else { // temp string to hold each word std::string temp; // grab each word from each line while (getline(in, temp)) { // put word into vector words->push_back(temp); // loop each string (word) and get it's length for (int i = 0; i < temp.size(); i++) //increment size arr_size++; // increment one last time since we will be adding a // \0 for each word arr_size++; } // done, close file in.close(); } // set size depending on word size list_size = words->size(); // we added one since later on in the program // we use the next index to mark where the loop stops // without one at end, there is no way to mark the end // and last word never gets processed list_size++; } // take time if (myid == 0) // get start program time process_start = MPI_Wtime(); // barrier MPI_Barrier(MPI_COMM_WORLD); // broadcast the size of char array and list to other processes // they will be used to allocate the needed space per process MPI_Bcast(&arr_size, 1, MPI_INT, 0, MPI_COMM_WORLD); // barrier MPI_Barrier(MPI_COMM_WORLD); // broadcats list size MPI_Bcast(&list_size, 1, MPI_SHORT, 0, MPI_COMM_WORLD); // barrier MPI_Barrier(MPI_COMM_WORLD); // allocate list, list should be number of \0 // since there is one per word, it should be the number of words list = new short[list_size]; // barrier MPI_Barrier(MPI_COMM_WORLD); // allocate array arr = new char[arr_size]; // barrier MPI_Barrier(MPI_COMM_WORLD); // root does this if (myid == 0) { // put the values into array // using a counter int counter = 0; // loop entier array, while looping each word // and put them sequentially into array // with null terminator ending each word // we do list_size-1 since list_size is increased by 1 // to fix an earlier problem where we need to mark // last element in list to be able to end it // without it, it crashes, not sure why for (int i = 0; i < list_size - 1; i++) { //mark start of word arr[counter] = '\0'; // put null terminator index into list list[i] = counter; // incremenet counter counter++; // loop to get count of the next word for (int j = 0; j < words->at(i).size(); j++) { // get word from vector at i (string is returned) // get char at j from string arr[counter++] = words->at(i).at(j); } } // make last element to stop loops later in program list[list_size - 1] = counter; // free up memory, this object is no longer used delete words; } // broadcast array of char (basically all the words // in a char array where each word ends in \0) // also broadcast list of word indexes MPI_Barrier(MPI_COMM_WORLD); // send list of indexes to all processes MPI_Bcast(list, list_size, MPI_SHORT, 0, MPI_COMM_WORLD); // send array ofwords to processes MPI_Bcast(arr, arr_size, MPI_CHAR, 0, MPI_COMM_WORLD); // run function for each process to create a new list of non-palindromes // this is using cyclic partiioning new_words = markParalindromes(myid, arr_size, list_size, arr, list, numprocs, new_size); // use lock MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win); // create out stream object std::fstream out; // open file out.open("Non-Palindromes.txt", std::ios::out | std::ios::app); // if there is an error creating/opening if (out.fail()) { // display message and close std::cout << "Error Opening File" << std::endl; MPI_Abort(MPI_COMM_WORLD, 1); } else { // loop non-palindrome words for (int i = 0; i < new_size; i++) { // if char is null terminator // replace it with newline if (new_words[i] == '\0') { // if I write one newline, it does not work // but two does out << std::endl; out << std::endl; } // if not null terminator else // write char of current word out << new_words[i]; } // needed for end of file out << std::endl; out << std::endl; // close file out.close(); } // unlock MPI_Win_unlock(1, win); // barrier MPI_Barrier(MPI_COMM_WORLD); if (myid == 0) // get start program time process_end = MPI_Wtime(); // clean up and display results if (myid == 0) { // clean up if(arr != NULL) delete[] arr; if (list != NULL) delete[] list; if (new_words != NULL) delete[] new_words; } // barrier MPI_Barrier(MPI_COMM_WORLD); if (myid == 0) // get start program time program_end = MPI_Wtime(); if(myid == 0) { // get total time std::cout << "Program Time: " << (program_end - program_start) << "s" << std::endl; // get processe stime std::cout << "Process Time: " << (process_end - process_start) << "s" << std::endl; } // needed to clean up MPI_Win_free(&win); MPI_Finalize(); }
numb hashlookup(obj o) /* Never fill up the hash table! Returns the number of times a value * was seen so far. If an obj is seen for the first time, it is stored * without copying it, do not free the object in this case, ownership * goes over to the hash table. */ { numb v; int destRank, destPos; numb localHash,localCount; v = f(o); localHash=0; localCount=0; while (1) { /* work out who and where the hash should go */ destRank = v/nobj; destPos = v - nobj*destRank; /* Get the value in the hash table */ if(destRank!=rank){ remote++; } else{ local++; } MPI_Win_lock(MPI_LOCK_EXCLUSIVE,destRank,0,win); status = MPI_Get(&localHash,1,MPI_INT,destRank,destPos,1,MPI_INT,win); MPI_Win_unlock(destRank,win); if (localHash) { /* if (memcmp(o,hashtab[v],m)) {*/ if(!(*o==localHash)){ v++; destPos++; if (v >= hashlen) v = 0; if (destPos >= nobj){ /* don't fall off the end, go to the next one */ destPos=0; destRank++; /* and for rank too */ if(destRank>=size) destRank=0; } collisions++; } else { /* Found! */ /* get the count */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE,destRank,0,win2); MPI_Get(&localCount,1,MPI_INT,destRank,destPos,1,MPI_INT,win2); localCount++; MPI_Put(&localCount,1,MPI_INT,destRank,destPos,1,MPI_INT,win2); MPI_Win_unlock(destRank,win2); /* hashcount[v]++; return hashcount[v];*/ return localCount; } } else { /* Definitely not found */ /* hashtab[yv] = o;*/ MPI_Win_lock(MPI_LOCK_EXCLUSIVE,destRank,0,win); /* What an I copying here */ MPI_Put(o,1,MPI_INT,destRank,destPos,1,MPI_INT,win); MPI_Win_unlock(destRank,win); /* hashcount[v] = 1;*/ MPI_Win_lock(MPI_LOCK_EXCLUSIVE,destRank,0,win2); localCount=1; MPI_Put(&localCount,1,MPI_INT,destRank,destPos,1,MPI_INT,win2); MPI_Win_unlock(destRank,win2); return localCount; } } }
int main( int argc, char *argv[] ) { int errs = 0; MPI_Win win; int *rmabuffer=0, *getbuf=0; MPI_Aint bufsize=0, getbufsize=0; int master, partner, next, wrank, wsize, i; int ntest = LAST_TEST; int *srcbuf; MTest_Init( &argc, &argv ); /* Determine who is responsible for each part of the test */ MPI_Comm_rank( MPI_COMM_WORLD, &wrank ); MPI_Comm_size( MPI_COMM_WORLD, &wsize ); if (wsize < 3) { fprintf( stderr, "This test requires at least 3 processes\n" ); MPI_Abort( MPI_COMM_WORLD, 1 ); } master = 0; partner = 1; next = wrank + 1; if (next == partner) next++; if (next >= wsize) { next = 0; if (next == partner) next++; } /* Determine the last test to run (by default, run them all) */ for (i=1; i<argc; i++) { if (strcmp( "-ntest", argv[i] ) == 0) { i++; if (i < argc) { ntest = atoi( argv[i] ); } else { fprintf( stderr, "Missing value for -ntest\n" ); MPI_Abort( MPI_COMM_WORLD, 1 ); } } } MPI_Type_vector( veccount, 1, stride, MPI_INT, &vectype ); MPI_Type_commit( &vectype ); /* Create the RMA window */ bufsize = 0; if (wrank == master) { bufsize = RMA_SIZE; MPI_Alloc_mem( bufsize*sizeof(int), MPI_INFO_NULL, &rmabuffer ); } else if (wrank == partner) { getbufsize = RMA_SIZE; getbuf = (int *)malloc( getbufsize*sizeof(int) ); if (!getbuf) { fprintf( stderr, "Unable to allocated %d bytes for getbuf\n", (int)getbufsize ); MPI_Abort( MPI_COMM_WORLD, 1 ); } } srcbuf = malloc(RMA_SIZE*sizeof(*srcbuf)); assert(srcbuf); MPI_Win_create( rmabuffer, bufsize, sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win ); /* Run a sequence of tests */ for (i=0; i<=ntest; i++) { if (wrank == master) { MTestPrintfMsg( 0, "Test %d\n", i ); /* Because this lock is local, it must return only when the lock is acquired */ MPI_Win_lock( MPI_LOCK_EXCLUSIVE, 0, master, win ); RMATestInit( i, rmabuffer, bufsize ); MPI_Send( MPI_BOTTOM, 0, MPI_INT, partner, i, MPI_COMM_WORLD ); MPI_Send( MPI_BOTTOM, 0, MPI_INT, next, i, MPI_COMM_WORLD ); MPI_Recv( MPI_BOTTOM, 0, MPI_INT, MPI_ANY_SOURCE, i, MPI_COMM_WORLD, MPI_STATUS_IGNORE ); MPI_Win_unlock( master, win ); MPI_Recv( MPI_BOTTOM, 0, MPI_INT, partner, i, MPI_COMM_WORLD, MPI_STATUS_IGNORE ); errs += RMACheck( i, rmabuffer, bufsize ); } else if (wrank == partner) { MPI_Recv( MPI_BOTTOM, 0, MPI_INT, master, i, MPI_COMM_WORLD, MPI_STATUS_IGNORE ); MPI_Win_lock( MPI_LOCK_EXCLUSIVE, 0, master, win ); RMATest( i, win, master, srcbuf, RMA_SIZE, getbuf, getbufsize ); MPI_Win_unlock( master, win ); errs += RMACheckGet( i, win, getbuf, getbufsize ); MPI_Send( MPI_BOTTOM, 0, MPI_INT, master, i, MPI_COMM_WORLD ); } else { MPI_Recv( MPI_BOTTOM, 0, MPI_INT, MPI_ANY_SOURCE, i, MPI_COMM_WORLD, MPI_STATUS_IGNORE ); MPI_Send( MPI_BOTTOM, 0, MPI_INT, next, i, MPI_COMM_WORLD ); } } if (rmabuffer) { MPI_Free_mem( rmabuffer ); } if (getbuf) { free( getbuf ); } MPI_Win_free( &win ); MPI_Type_free( &vectype ); MTest_Finalize( errs ); MPI_Finalize(); return MTestReturnValue( errs ); }
int main(int argc, char *argv[]) { int rank, size, i, j, k; int errors = 0; int origin_shm, origin_am, dest; int *orig_buf = NULL, *result_buf = NULL, *compare_buf = NULL, *target_buf = NULL, *check_buf = NULL; MPI_Win win; MPI_Status status; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size != 3) { /* run this test with three processes */ goto exit_test; } /* this works when MPIR_PARAM_CH3_ODD_EVEN_CLIQUES is set */ dest = 2; origin_shm = 0; origin_am = 1; if (rank != dest) { MPI_Alloc_mem(sizeof(int), MPI_INFO_NULL, &orig_buf); MPI_Alloc_mem(sizeof(int), MPI_INFO_NULL, &result_buf); MPI_Alloc_mem(sizeof(int), MPI_INFO_NULL, &compare_buf); } MPI_Win_allocate(sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &target_buf, &win); for (k = 0; k < LOOP_SIZE; k++) { /* init buffers */ if (rank == origin_shm) { orig_buf[0] = 1; compare_buf[0] = 0; result_buf[0] = 0; } else if (rank == origin_am) { orig_buf[0] = 0; compare_buf[0] = 1; result_buf[0] = 0; } else { MPI_Win_lock(MPI_LOCK_SHARED, rank, 0, win); target_buf[0] = 0; MPI_Win_unlock(rank, win); } MPI_Barrier(MPI_COMM_WORLD); /* perform FOP */ MPI_Win_lock_all(0, win); if (rank != dest) { MPI_Compare_and_swap(orig_buf, compare_buf, result_buf, MPI_INT, dest, 0, win); MPI_Win_flush(dest, win); } MPI_Win_unlock_all(win); MPI_Barrier(MPI_COMM_WORLD); /* check results */ if (rank != dest) { MPI_Gather(result_buf, 1, MPI_INT, check_buf, 1, MPI_INT, dest, MPI_COMM_WORLD); } else { MPI_Alloc_mem(sizeof(int) * 3, MPI_INFO_NULL, &check_buf); MPI_Gather(target_buf, 1, MPI_INT, check_buf, 1, MPI_INT, dest, MPI_COMM_WORLD); if (!(check_buf[dest] == 0 && check_buf[origin_shm] == 0 && check_buf[origin_am] == 1) && !(check_buf[dest] == 1 && check_buf[origin_shm] == 0 && check_buf[origin_am] == 0)) { printf ("Wrong results: target result = %d, origin_shm result = %d, origin_am result = %d\n", check_buf[dest], check_buf[origin_shm], check_buf[origin_am]); printf ("Expected results (1): target result = 1, origin_shm result = 0, origin_am result = 0\n"); printf ("Expected results (2): target result = 0, origin_shm result = 0, origin_am result = 1\n"); errors++; } MPI_Free_mem(check_buf); } } MPI_Win_free(&win); if (rank == origin_am || rank == origin_shm) { MPI_Free_mem(orig_buf); MPI_Free_mem(result_buf); MPI_Free_mem(compare_buf); } exit_test: if (rank == dest && errors == 0) printf(" No Errors\n"); MPI_Finalize(); return 0; }
//functions for task stealing!!! int read_last_task( task_type_unit * task0, int target_rank, int num_tries) { // return codes: // 0 - element read // 1 - q is empty // 2 - element not read (more tries than <num_tries>) int ret = 0; int iamfree = 0; int my_offset; int tries_cntr=0; while(iamfree == 0 && (num_tries==0 || tries_cntr<num_tries)) //try to lock offs window putting -2 value { tries_cntr++; MPI_Win_lock( MPI_LOCK_EXCLUSIVE, target_rank, 0, win_offs ); MPI_Get( &my_offset, 1, MPI_INT, target_rank, 0, 1, MPI_INT, win_offs ); MPI_Put( &lock, 1, MPI_INT, target_rank, 0, 1, MPI_INT, win_offs ); //implicitly block OFFSET win on proc <rank> (block code -2) MPI_Win_unlock( target_rank, win_offs ); if(my_offset >= -1) //if the window was not locked before { iamfree = 1; } } if(iamfree == 0) // q is still blocked - go furter { ret = 2; return(ret); } // offs window is now locked by me! work! if(my_offset == -1) //q is empty { ret = 1; } else { //if(ts_logging==1) { sched_log_file = fopen(sched_log,"a"); fprintf(sched_log_file, "[%f] Take task N %d\n", MPI_Wtime(), my_offset); fclose(sched_log_file); } //task_type_unit buf[task_type_length]; MPI_Win_lock( MPI_LOCK_EXCLUSIVE, target_rank, 0, win_q ); //lock the q MPI_Get( task0, task_type_length, mpi_task_type_unit, target_rank, my_offset, task_type_length, mpi_task_type_unit, win_q ); MPI_Win_unlock( target_rank, win_q ); //task0[0] = buf[0]; //task0[1] = buf[1]; my_offset--; } MPI_Win_lock( MPI_LOCK_EXCLUSIVE, target_rank, 0, win_offs ); MPI_Put( &my_offset, 1, MPI_INT, target_rank, 0, 1, MPI_INT, win_offs ); //UNBLOCK OFFSET win (put proper offs val - either changed or not) MPI_Win_unlock( target_rank, win_offs ); return(ret); }
int main(int argc, char *argv[]) { int rank, nprocs, A[SIZE2], B[SIZE2], i; MPI_Win win; MPI_Init(&argc,&argv); Test_Init_No_File(); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&rank); if (nprocs != 2) { printf("Run this program with 2 processes\n"); MPI_Abort(MPI_COMM_WORLD,1); } if (rank == 0) { for (i = 0; i < SIZE2; i++) A[i] = B[i] = i; MPI_Win_create(NULL, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); for (i = 0; i < SIZE1; i++) { MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win); MPI_Put(A+i, 1, MPI_INT, 1, i, 1, MPI_INT, win); MPI_Win_unlock(1, win); } for (i = 0; i < SIZE1; i++) { MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win); MPI_Get(B+i, 1, MPI_INT, 1, SIZE1+i, 1, MPI_INT, win); MPI_Win_unlock(1, win); } MPI_Win_free(&win); for (i = 0; i < SIZE1; i++) if (B[i] != (-4) * (i + SIZE1)) { printf("Get Error: B[%d] is %d, should be %d\n", i, B[i], (-4) * (i + SIZE1)); Test_Failed(NULL); } } else { /* rank=1 */ for (i = 0; i < SIZE2; i++) B[i] = (-4) * i; MPI_Win_create(B, SIZE2 * sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win); MPI_Win_free(&win); for (i = 0; i < SIZE1; i++) { if (B[i] != i) { printf("Put Error: B[%d] is %d, should be %d\n", i, B[i], i); Test_Failed(NULL); } } } Test_Waitforall(); Test_Global_Summary(); MPI_Finalize(); return 0; }
/* * Class: mpi_Win * Method: unlock * Signature: (JI)V */ JNIEXPORT void JNICALL Java_mpi_Win_unlock( JNIEnv *env, jobject jthis, jlong win, jint rank) { int rc = MPI_Win_unlock(rank, (MPI_Win)win); ompi_java_exceptionCheck(env, rc); }
int main(int argc, char **argv) { int me, nproc; int msg_length, round, i; double t_start, t_stop; u_int8_t *snd_buf; // Send buffer (byte array) u_int8_t *rcv_buf; // Receive buffer (byte array) MPI_Win window; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &me); MPI_Comm_size(MPI_COMM_WORLD, &nproc); if (nproc != 2) { if (me == 0) printf("This benchmark should be run on exactly two processes"); MPI_Abort(MPI_COMM_WORLD, 1); } if (me == 0) printf("MPI-2 passive ping-pong latency test, performing %d rounds at each xfer size.\n\n", NUM_ROUNDS); MPI_Alloc_mem(MAX_SIZE, MPI_INFO_NULL, &rcv_buf); MPI_Alloc_mem(MAX_SIZE, MPI_INFO_NULL, &snd_buf); MPI_Win_create(rcv_buf, MAX_SIZE, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &window); for (i = 0; i < MAX_SIZE; i++) { snd_buf[i] = 1; } for (msg_length = 1; msg_length <= MAX_SIZE; msg_length *= 2) { MPI_Barrier(MPI_COMM_WORLD); t_start = MPI_Wtime(); // Perform NUM_ROUNDS ping-pongs for (round = 0; round < NUM_ROUNDS*2; round++) { // I am the sender if (round % 2 == me) { // Clear start and end markers for next round MPI_Win_lock(MPI_LOCK_EXCLUSIVE, me, 0, window); rcv_buf[0] = 0; rcv_buf[msg_length-1] = 0; MPI_Win_unlock(me, window); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, (me+1)%2, 0, window); MPI_Put(snd_buf, msg_length, MPI_BYTE, (me+1)%2, 0, msg_length, MPI_BYTE, window); MPI_Win_unlock((me+1)%2, window); } // I am the receiver: Poll start and end markers else { u_int8_t val; do { //MPI_Iprobe(0, 0, MPI_COMM_WORLD, &val, MPI_STATUS_IGNORE); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, me, 0, window); val = ((volatile u_int8_t*)rcv_buf)[0]; MPI_Win_unlock(me, window); } while (val == 0); do { //MPI_Iprobe(0, 0, MPI_COMM_WORLD, &val, MPI_STATUS_IGNORE); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, me, 0, window); val = ((volatile u_int8_t*)rcv_buf)[msg_length-1]; MPI_Win_unlock(me, window); } while (val == 0); } } MPI_Barrier(MPI_COMM_WORLD); t_stop = MPI_Wtime(); if (me == 0) printf("%8d bytes \t %12.8f us\n", msg_length, (t_stop-t_start)/NUM_ROUNDS*1.0e6); } MPI_Win_free(&window); MPI_Free_mem(snd_buf); MPI_Free_mem(rcv_buf); MPI_Finalize(); return 0; }
void add_element_sorted( task_type task0, int rank, int sort_param_num ) { int iamfree = 0; int my_offset; int i; while(iamfree == 0) //try to lock offs window putting -2 value { MPI_Win_lock( MPI_LOCK_EXCLUSIVE, rank, 0, win_offs ); my_offset = OFFSET[0]; OFFSET[0] = lock; MPI_Win_unlock( rank, win_offs ); if(my_offset >= -1) //if the window was not locked before { iamfree = 1; } } //if(ts_logging==1) { sched_log_file = fopen(sched_log,"a"); fprintf(sched_log_file, "[%f] Adding task [%3.1f][%6.4f] to my queue, sorted, sort param num %d\n", MPI_Wtime(), task0[0], task0[1], sort_param_num); fclose(sched_log_file); } my_offset++; task_type_unit local_q[my_offset*task_type_length]; MPI_Win_lock( MPI_LOCK_EXCLUSIVE, rank, 0, win_q ); //lock the q //locally copy the whole queue //MPI_Get( local_q, my_offset*task_type_length, mpi_task_type_unit, rank, 0, my_offset*task_type_length, mpi_task_type_unit, win_q ); //get the last value from queue window memcpy(local_q, QUEUE, sizeof(task_type_unit)*my_offset*task_type_length); MPI_Win_unlock( rank, win_q ); // seek for the item before which we should place our new task int my_index=0; task_type_unit my_param = local_q[sort_param_num]; for(i=sort_param_num; (i<my_offset*task_type_length) && (task0[sort_param_num]>my_param); i+=task_type_length) { my_param = local_q[i]; my_index = i-sort_param_num; } if(task0[sort_param_num]>my_param) my_index+=task_type_length; // we have to shift the last part of the queue // z.b. in such a primitive way as I did or using memcpy // starting from last element ending with index, move each param of each element to the next position MPI_Win_lock( MPI_LOCK_EXCLUSIVE, rank, 0, win_q ); //lock the q // now put the new element to its place for(i=0; i<task_type_length; i++) { QUEUE[my_index + i] = task0[i]; // or use memcpy } for(i=my_offset*task_type_length-1; i>=my_index; i-- ) { QUEUE[i+task_type_length] = local_q[i]; } MPI_Win_unlock( rank, win_q ); //if(ts_logging==1) { sched_log_file = fopen(sched_log,"a"); fprintf(sched_log_file, "[%f] New number of tasks is N %d\n", MPI_Wtime(), my_offset); fclose(sched_log_file); } MPI_Win_lock( MPI_LOCK_EXCLUSIVE, rank, 0, win_offs ); OFFSET[0] = my_offset; MPI_Win_unlock( rank, win_offs ); }
int main(int argc, char **argv) { int comm_size, comm_rank, i, by_rank, errs = 0; int rc; char *rma_win_addr, *local_buf; char check; MPI_Win win; MPI_Status status; MTest_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank); if ((comm_size > (MAX_BUF_SIZE / PUT_SIZE)) || (comm_size <= 2)) MPI_Abort(MPI_COMM_WORLD, 1); /* If alloc mem returns an error (because too much memory is requested */ MPI_Errhandler_set( MPI_COMM_WORLD, MPI_ERRORS_RETURN ); rc = MPI_Alloc_mem(MAX_BUF_SIZE, MPI_INFO_NULL, (void *) &rma_win_addr); if (rc) { MTestPrintErrorMsg( "Unable to MPI_Alloc_mem space (not an error)", rc ); MPI_Abort( MPI_COMM_WORLD, 0 ); } memset(rma_win_addr, 0, MAX_BUF_SIZE); MPI_Win_create((void *) rma_win_addr, MAX_BUF_SIZE, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); rc = MPI_Alloc_mem(PUT_SIZE, MPI_INFO_NULL, (void *) &local_buf); if (rc) { MTestPrintErrorMsg( "Unable to MPI_Alloc_mem space (not an error)", rc ); MPI_Abort( MPI_COMM_WORLD, 0 ); } for (i = 0; i < PUT_SIZE; i++) local_buf[i] = 1; MPI_Barrier(MPI_COMM_WORLD); if (comm_rank == 0) { /* target */ for (i = 0; i < (NUM_TIMES * (comm_size - 2)); i++) { /* Wait for a message from the server to notify me that * someone put some data in my window */ MPI_Recv(&by_rank, 1, MPI_INT, 1, 0, MPI_COMM_WORLD, &status); /* Got a message from the server that 'by_rank' put some * data in my local window. Check the last byte to make * sure we got it correctly. */ MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win); MPI_Get((void *) &check, 1, MPI_CHAR, 0, ((by_rank + 1) * PUT_SIZE) - 1, 1, MPI_CHAR, win); MPI_Win_unlock(0, win); /* If this is not the value I expect, count it as an error */ if (check != 1) errs++; /* Reset the buffer to zero for the next round */ memset((void *) (rma_win_addr + (by_rank * PUT_SIZE)), 0, PUT_SIZE); /* Tell the origin that I am ready for the next round */ MPI_Send(NULL, 0, MPI_INT, by_rank, 0, MPI_COMM_WORLD); } } else if (comm_rank == 1) { /* server */ for (i = 0; i < (NUM_TIMES * (comm_size - 2)); i++) { /* Wait for a message from any of the origin processes * informing me that it has put data to the target * process */ MPI_Recv(NULL, 0, MPI_INT, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status); by_rank = status.MPI_SOURCE; /* Tell the target process that it should be seeing some * data in its local buffer */ MPI_Send(&by_rank, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); } } else { /* origin */ for (i = 0; i < NUM_TIMES; i++) { /* Put some data in the target window */ MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win); MPI_Put(local_buf, PUT_SIZE, MPI_CHAR, 0, comm_rank * PUT_SIZE, PUT_SIZE, MPI_CHAR, win); MPI_Win_unlock(0, win); /* Tell the server that the put has completed */ MPI_Send(NULL, 0, MPI_INT, 1, 0, MPI_COMM_WORLD); /* Wait for a message from the target that it is ready for * the next round */ MPI_Recv(NULL, 0, MPI_INT, 0, 0, MPI_COMM_WORLD, &status); } } MPI_Win_free(&win); MPI_Free_mem(rma_win_addr); MPI_Free_mem(local_buf); MTest_Finalize(errs); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int i, j, rank, nranks, peer, bufsize, errors; double *win_buf, *src_buf; MPI_Win buf_win; MTest_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nranks); bufsize = XDIM * YDIM * sizeof(double); MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &win_buf); MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &src_buf); if (rank == 0) if (verbose) printf("MPI RMA Strided Accumulate Test:\n"); for (i = 0; i < XDIM*YDIM; i++) { *(win_buf + i) = -1.0; *(src_buf + i) = 1.0 + rank; } MPI_Win_create(win_buf, bufsize, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &buf_win); peer = (rank+1) % nranks; /* Perform ITERATIONS strided accumulate operations */ for (i = 0; i < ITERATIONS; i++) { int ndims = 2; int src_arr_sizes[2] = { XDIM, YDIM }; int src_arr_subsizes[2] = { SUB_XDIM, SUB_YDIM }; int src_arr_starts[2] = { 0, 0 }; int dst_arr_sizes[2] = { XDIM, YDIM }; int dst_arr_subsizes[2] = { SUB_XDIM, SUB_YDIM }; int dst_arr_starts[2] = { 0, 0 }; MPI_Datatype src_type, dst_type; MPI_Type_create_subarray(ndims, src_arr_sizes, src_arr_subsizes, src_arr_starts, MPI_ORDER_C, MPI_DOUBLE, &src_type); MPI_Type_create_subarray(ndims, dst_arr_sizes, dst_arr_subsizes, dst_arr_starts, MPI_ORDER_C, MPI_DOUBLE, &dst_type); MPI_Type_commit(&src_type); MPI_Type_commit(&dst_type); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win); MPI_Accumulate(src_buf, 1, src_type, peer, 0, 1, dst_type, MPI_SUM, buf_win); MPI_Win_unlock(peer, buf_win); MPI_Type_free(&src_type); MPI_Type_free(&dst_type); } MPI_Barrier(MPI_COMM_WORLD); /* Verify that the results are correct */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, buf_win); errors = 0; for (i = 0; i < SUB_XDIM; i++) { for (j = 0; j < SUB_YDIM; j++) { const double actual = *(win_buf + i + j*XDIM); const double expected = -1.0 + (1.0 + ((rank+nranks-1)%nranks)) * (ITERATIONS); if (fabs(actual - expected) > 1.0e-10) { SQUELCH( printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n", rank, j, i, expected, actual); ); errors++; fflush(stdout); } }
/* tests passive target RMA on 2 processes. tests the lock-single_op-unlock optimization for less common cases: origin datatype derived, target datatype predefined */ int main(int argc, char *argv[]) { int wrank, nprocs, *srcbuf, *rmabuf, i; int memsize; MPI_Datatype vectype; MPI_Win win; int errs = 0; MTest_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&wrank); if (nprocs < 2) { printf("Run this program with 2 or more processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } memsize = 10 * 4 * nprocs; /* Create and initialize data areas */ srcbuf = (int *)malloc( sizeof(int) * memsize ); MPI_Alloc_mem( sizeof(int) * memsize, MPI_INFO_NULL, &rmabuf ); if (!srcbuf || !rmabuf) { printf( "Unable to allocate srcbuf and rmabuf of size %d\n", memsize ); MPI_Abort( MPI_COMM_WORLD, 1 ); } for (i=0; i<memsize; i++) { rmabuf[i] = -i; srcbuf[i] = i; } MPI_Win_create( rmabuf, memsize*sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win ); /* Vector of 10 elements, separated by 4 */ MPI_Type_vector( 10, 1, 4, MPI_INT, &vectype ); MPI_Type_commit( &vectype ); /* Accumulate with a derived origin type and target predefined type*/ if (wrank == 0) { MPI_Barrier( MPI_COMM_WORLD ); MPI_Win_lock( MPI_LOCK_EXCLUSIVE, 0, 0, win ); for (i=0; i<10; i++) { if (rmabuf[i] != -i + 4*i) { errs++; printf( "Acc: expected rmabuf[%d] = %d but saw %d\n", i, -i + 4*i, rmabuf[i] ); } rmabuf[i] = -i; } for (i=10; i<memsize; i++) { if (rmabuf[i] != -i) { errs++; printf( "Acc: expected rmabuf[%d] = %d but saw %d\n", i, -i, rmabuf[i] ); rmabuf[i] = -i; } } MPI_Win_unlock( 0, win ); } else if (wrank == 1) { MPI_Win_lock( MPI_LOCK_SHARED, 0, 0, win ); MPI_Accumulate( srcbuf, 1, vectype, 0, 0, 10, MPI_INT, MPI_SUM, win ); MPI_Win_unlock( 0, win ); MPI_Barrier( MPI_COMM_WORLD ); } else { MPI_Barrier( MPI_COMM_WORLD ); } MPI_Barrier(MPI_COMM_WORLD); /* Put with a derived origin type and target predefined type*/ if (wrank == 0) { MPI_Barrier( MPI_COMM_WORLD ); MPI_Win_lock( MPI_LOCK_EXCLUSIVE, 0, 0, win ); for (i=0; i<10; i++) { if (rmabuf[i] != 4*i) { errs++; printf( "Put: expected rmabuf[%d] = %d but saw %d\n", i, 4*i, rmabuf[i] ); } rmabuf[i] = -i; } for (i=10; i<memsize; i++) { if (rmabuf[i] != -i) { errs++; printf( "Put: expected rmabuf[%d] = %d but saw %d\n", i, -i, rmabuf[i] ); rmabuf[i] = -i; } } MPI_Win_unlock( 0, win ); } else if (wrank == 1) { MPI_Win_lock( MPI_LOCK_SHARED, 0, 0, win ); MPI_Put( srcbuf, 1, vectype, 0, 0, 10, MPI_INT, win ); MPI_Win_unlock( 0, win ); MPI_Barrier( MPI_COMM_WORLD ); } else { MPI_Barrier( MPI_COMM_WORLD ); } MPI_Barrier(MPI_COMM_WORLD); /* Put with a derived origin type and target predefined type, with a get (see the move-to-end optimization) */ if (wrank == 0) { MPI_Barrier( MPI_COMM_WORLD ); MPI_Win_lock( MPI_LOCK_EXCLUSIVE, 0, 0, win ); for (i=0; i<10; i++) { if (rmabuf[i] != 4*i) { errs++; printf( "Put: expected rmabuf[%d] = %d but saw %d\n", i, 4*i, rmabuf[i] ); } rmabuf[i] = -i; } for (i=10; i<memsize; i++) { if (rmabuf[i] != -i) { errs++; printf( "Put: expected rmabuf[%d] = %d but saw %d\n", i, -i, rmabuf[i] ); rmabuf[i] = -i; } } MPI_Win_unlock( 0, win ); } else if (wrank == 1) { int val; MPI_Win_lock( MPI_LOCK_SHARED, 0, 0, win ); MPI_Get( &val, 1, MPI_INT, 0, 10, 1, MPI_INT, win ); MPI_Put( srcbuf, 1, vectype, 0, 0, 10, MPI_INT, win ); MPI_Win_unlock( 0, win ); MPI_Barrier( MPI_COMM_WORLD ); if (val != -10) { errs++; printf( "Get: Expected -10, got %d\n", val ); } } else { MPI_Barrier( MPI_COMM_WORLD ); } MPI_Barrier(MPI_COMM_WORLD); /* Put with a derived origin type and target predefined type, with a get already at the end (see the move-to-end optimization) */ if (wrank == 0) { MPI_Barrier( MPI_COMM_WORLD ); MPI_Win_lock( MPI_LOCK_EXCLUSIVE, 0, 0, win ); for (i=0; i<10; i++) { if (rmabuf[i] != 4*i) { errs++; printf( "Put: expected rmabuf[%d] = %d but saw %d\n", i, 4*i, rmabuf[i] ); } rmabuf[i] = -i; } for (i=10; i<memsize; i++) { if (rmabuf[i] != -i) { errs++; printf( "Put: expected rmabuf[%d] = %d but saw %d\n", i, -i, rmabuf[i] ); rmabuf[i] = -i; } } MPI_Win_unlock( 0, win ); } else if (wrank == 1) { int val; MPI_Win_lock( MPI_LOCK_SHARED, 0, 0, win ); MPI_Put( srcbuf, 1, vectype, 0, 0, 10, MPI_INT, win ); MPI_Get( &val, 1, MPI_INT, 0, 10, 1, MPI_INT, win ); MPI_Win_unlock( 0, win ); MPI_Barrier( MPI_COMM_WORLD ); if (val != -10) { errs++; printf( "Get: Expected -10, got %d\n", val ); } } else { MPI_Barrier( MPI_COMM_WORLD ); } MPI_Win_free( &win ); MPI_Free_mem( rmabuf ); free( srcbuf ); MPI_Type_free( &vectype ); MTest_Finalize(errs); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int nprocs, mpi_err, *array; int getval, disp, errs=0; MPI_Win win; MPI_Datatype type; MTest_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); if (nprocs != 1) { printf("Run this program with 1 process\n"); MPI_Abort(MPI_COMM_WORLD,1); } /* To improve reporting of problems about operations, we change the error handler to errors return */ MPI_Comm_set_errhandler( MPI_COMM_WORLD, MPI_ERRORS_RETURN ); /* create an indexed datatype that points to the second integer in an array (the first integer is skipped). */ disp = 1; mpi_err = MPI_Type_create_indexed_block(1, 1, &disp, MPI_INT, &type); if (mpi_err != MPI_SUCCESS) goto err_return; mpi_err = MPI_Type_commit(&type); if (mpi_err != MPI_SUCCESS) goto err_return; /* allocate window of size 2 integers*/ mpi_err = MPI_Alloc_mem(2*sizeof(int), MPI_INFO_NULL, &array); if (mpi_err != MPI_SUCCESS) goto err_return; /* create window object */ mpi_err = MPI_Win_create(array, 2*sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win); if (mpi_err != MPI_SUCCESS) goto err_return; /* initialize array */ array[0] = 100; array[1] = 200; getval = 0; /* To improve reporting of problems about operations, we change the error handler to errors return */ MPI_Win_set_errhandler( win, MPI_ERRORS_RETURN ); mpi_err = MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 0, 0, win); if (mpi_err != MPI_SUCCESS) goto err_return; /* get the current value of element array[1] */ mpi_err = MPI_Get(&getval, 1, MPI_INT, 0, 0, 1, type, win); if (mpi_err != MPI_SUCCESS) goto err_return; mpi_err = MPI_Win_unlock(0, win); if (mpi_err != MPI_SUCCESS) goto err_return; /* getval should contain the value of array[1] */ if (getval != array[1]) { errs++; printf("getval=%d, should be %d\n", getval, array[1]); } MPI_Free_mem(array); MPI_Win_free(&win); MPI_Type_free(&type); MTest_Finalize(errs); MPI_Finalize(); return 0; err_return: printf("MPI function error returned an error\n"); MTestPrintError( mpi_err ); errs++; MTest_Finalize(errs); MPI_Finalize(); return 1; }
int main(int argc, char *argv[]) { MPI_Win win; int errors = 0; int rank, nproc, i; double *orig_buf; double *tar_buf; MPI_Datatype vector_dtp; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Alloc_mem(sizeof(double) * DATA_SIZE, MPI_INFO_NULL, &orig_buf); MPI_Alloc_mem(sizeof(double) * DATA_SIZE, MPI_INFO_NULL, &tar_buf); for (i = 0; i < DATA_SIZE; i++) { orig_buf[i] = 1.0; tar_buf[i] = 0.5; } MPI_Type_vector(5 /* count */ , 3 /* blocklength */ , 5 /* stride */ , MPI_DOUBLE, &vector_dtp); MPI_Type_commit(&vector_dtp); MPI_Win_create(tar_buf, sizeof(double) * DATA_SIZE, sizeof(double), MPI_INFO_NULL, MPI_COMM_WORLD, &win); if (rank == 0) { MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win); MPI_Accumulate(orig_buf, 1, vector_dtp, 1, 0, 1, vector_dtp, MPI_SUM, win); MPI_Win_unlock(1, win); } MPI_Win_fence(0, win); if (rank == 1) { for (i = 0; i < DATA_SIZE; i++) { if (i % 5 < 3) { if (tar_buf[i] != 1.5) { printf("tar_buf[i] = %f (expected 1.5)\n", tar_buf[i]); errors++; } } else { if (tar_buf[i] != 0.5) { printf("tar_buf[i] = %f (expected 0.5)\n", tar_buf[i]); errors++; } } } } MPI_Type_free(&vector_dtp); MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, win); MPI_Accumulate(orig_buf, DATA_SIZE, MPI_DOUBLE, 1, 0, DATA_SIZE, MPI_DOUBLE, MPI_SUM, win); MPI_Win_unlock(1, win); } MPI_Win_fence(0, win); if (rank == 1) { for (i = 0; i < DATA_SIZE; i++) { if (i % 5 < 3) { if (tar_buf[i] != 2.5) { printf("tar_buf[i] = %f (expected 2.5)\n", tar_buf[i]); errors++; } } else { if (tar_buf[i] != 1.5) { printf("tar_buf[i] = %f (expected 1.5)\n", tar_buf[i]); errors++; } } } } MPI_Win_free(&win); MPI_Free_mem(orig_buf); MPI_Free_mem(tar_buf); if (rank == 1) { if (errors == 0) printf(" No Errors\n"); } MPI_Finalize(); return 0; }
int main(int argc, char ** argv) { long Block_order; /* number of columns owned by rank */ long Block_size; /* size of a single block */ long Colblock_size; /* size of column block */ int Tile_order=32; /* default Tile order */ int tiling; /* boolean: true if tiling is used */ int Num_procs; /* number of ranks */ long order; /* order of overall matrix */ int send_to, recv_from; /* ranks with which to communicate */ long bytes; /* combined size of matrices */ int my_ID; /* rank */ int root=0; /* rank of root */ int iterations; /* number of times to do the transpose */ int i, j, it, jt, istart;/* dummies */ int iter; /* index of iteration */ int phase; /* phase inside staged communication */ int colstart; /* starting column for owning rank */ int error; /* error flag */ double RESTRICT *A_p; /* original matrix column block */ double RESTRICT *B_p; /* transposed matrix column block */ double RESTRICT *Work_in_p;/* workspace for transpose function */ double RESTRICT *Work_out_p;/* workspace for transpose function */ double abserr, /* absolute error */ abserr_tot; /* aggregate absolute error */ double epsilon = 1.e-8; /* error tolerance */ double local_trans_time, /* timing parameters */ trans_time, avgtime; MPI_Win rma_win = MPI_WIN_NULL; MPI_Info rma_winfo = MPI_INFO_NULL; int passive_target = 0; /* use passive target RMA sync */ #if MPI_VERSION >= 3 int flush_local = 1; /* flush local (or remote) after put */ int flush_bundle = 1; /* flush every <bundle> put calls */ #endif /********************************************************************* ** Initialize the MPI environment *********************************************************************/ MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_ID); MPI_Comm_size(MPI_COMM_WORLD, &Num_procs); /********************************************************************* ** process, test and broadcast input parameters *********************************************************************/ error = 0; if (my_ID == root) { printf("Parallel Research Kernels version %s\n", PRKVERSION); printf("MPIRMA matrix transpose: B = A^T\n"); if (argc <= 3){ printf("Usage: %s <# iterations> <matrix order> [Tile size]" "[sync (0=fence, 1=flush)] [flush local?] [flush bundle]\n", *argv); error = 1; goto ENDOFTESTS; } iterations = atoi(*++argv); if(iterations < 1){ printf("ERROR: iterations must be >= 1 : %d \n",iterations); error = 1; goto ENDOFTESTS; } order = atol(*++argv); if (order < Num_procs) { printf("ERROR: matrix order %ld should at least # procs %d\n", order, Num_procs); error = 1; goto ENDOFTESTS; } if (order%Num_procs) { printf("ERROR: matrix order %ld should be divisible by # procs %d\n", order, Num_procs); error = 1; goto ENDOFTESTS; } if (argc >= 4) Tile_order = atoi(*++argv); if (argc >= 5) passive_target = atoi(*++argv); #if MPI_VERSION >= 3 if (argc >= 6) flush_local = atoi(*++argv); if (argc >= 7) flush_bundle = atoi(*++argv); #endif ENDOFTESTS:; } bail_out(error); if (my_ID == root) { printf("Number of ranks = %d\n", Num_procs); printf("Matrix order = %ld\n", order); printf("Number of iterations = %d\n", iterations); if ((Tile_order > 0) && (Tile_order < order)) printf("Tile size = %d\n", Tile_order); else printf("Untiled\n"); if (passive_target) { #if MPI_VERSION < 3 printf("Synchronization = MPI_Win_(un)lock\n"); #else printf("Synchronization = MPI_Win_flush%s (bundle=%d)\n", flush_local ? "_local" : "", flush_bundle); #endif } else { printf("Synchronization = MPI_Win_fence\n"); } } /* Broadcast input data to all ranks */ MPI_Bcast (&order, 1, MPI_LONG, root, MPI_COMM_WORLD); MPI_Bcast (&iterations, 1, MPI_INT, root, MPI_COMM_WORLD); MPI_Bcast (&Tile_order, 1, MPI_INT, root, MPI_COMM_WORLD); MPI_Bcast (&passive_target, 1, MPI_INT, root, MPI_COMM_WORLD); #if MPI_VERSION >= 3 MPI_Bcast (&flush_local, 1, MPI_INT, root, MPI_COMM_WORLD); MPI_Bcast (&flush_bundle, 1, MPI_INT, root, MPI_COMM_WORLD); #endif /* a non-positive tile size means no tiling of the local transpose */ tiling = (Tile_order > 0) && (Tile_order < order); bytes = 2 * sizeof(double) * order * order; /********************************************************************* ** The matrix is broken up into column blocks that are mapped one to a ** rank. Each column block is made up of Num_procs smaller square ** blocks of order block_order. *********************************************************************/ Block_order = order/Num_procs; colstart = Block_order * my_ID; Colblock_size = order * Block_order; Block_size = Block_order * Block_order; /* debug message size effects */ if (my_ID == root) { printf("Block_size = %ld\n", Block_size); } /********************************************************************* ** Create the column block of the test matrix, the row block of the ** transposed matrix, and workspace (workspace only if #procs>1) *********************************************************************/ A_p = (double *)prk_malloc(Colblock_size*sizeof(double)); if (A_p == NULL){ printf(" Error allocating space for original matrix on node %d\n",my_ID); error = 1; } bail_out(error); MPI_Info_create (&rma_winfo); MPI_Info_set (rma_winfo, "no locks", "true"); B_p = (double *)prk_malloc(Colblock_size*sizeof(double)); if (B_p == NULL){ printf(" Error allocating space for transpose matrix on node %d\n",my_ID); error = 1; } bail_out(error); if (Num_procs>1) { Work_out_p = (double *) prk_malloc(Block_size*(Num_procs-1)*sizeof(double)); if (Work_out_p == NULL){ printf(" Error allocating space for work_out on node %d\n",my_ID); error = 1; } bail_out(error); PRK_Win_allocate(Block_size*(Num_procs-1)*sizeof(double), sizeof(double), rma_winfo, MPI_COMM_WORLD, &Work_in_p, &rma_win); if (Work_in_p == NULL){ printf(" Error allocating space for work on node %d\n",my_ID); error = 1; } bail_out(error); } #if MPI_VERSION >= 3 if (passive_target && Num_procs>1) { MPI_Win_lock_all(MPI_MODE_NOCHECK,rma_win); } #endif /* Fill the original column matrix */ istart = 0; for (j=0;j<Block_order;j++) { for (i=0;i<order; i++) { A(i,j) = (double) (order*(j+colstart) + i); B(i,j) = 0.0; } } MPI_Barrier(MPI_COMM_WORLD); for (iter = 0; iter<=iterations; iter++) { /* start timer after a warmup iteration */ if (iter == 1) { MPI_Barrier(MPI_COMM_WORLD); local_trans_time = wtime(); } /* do the local transpose */ istart = colstart; if (!tiling) { for (i=0; i<Block_order; i++) { for (j=0; j<Block_order; j++) { B(j,i) += A(i,j); A(i,j) += 1.0; } } } else { for (i=0; i<Block_order; i+=Tile_order) { for (j=0; j<Block_order; j+=Tile_order) { for (it=i; it<MIN(Block_order,i+Tile_order); it++) { for (jt=j; jt<MIN(Block_order,j+Tile_order);jt++) { B(jt,it) += A(it,jt); A(it,jt) += 1.0; } } } } } if (!passive_target && Num_procs>1) { MPI_Win_fence(MPI_MODE_NOSTORE | MPI_MODE_NOPRECEDE, rma_win); } for (phase=1; phase<Num_procs; phase++){ send_to = (my_ID - phase + Num_procs)%Num_procs; istart = send_to*Block_order; if (!tiling) { for (i=0; i<Block_order; i++) { for (j=0; j<Block_order; j++) { Work_out(phase-1,j,i) = A(i,j); A(i,j) += 1.0; } } } else { for (i=0; i<Block_order; i+=Tile_order) { for (j=0; j<Block_order; j+=Tile_order) { for (it=i; it<MIN(Block_order,i+Tile_order); it++) { for (jt=j; jt<MIN(Block_order,j+Tile_order);jt++) { Work_out(phase-1,jt,it) = A(it,jt); A(it,jt) += 1.0; } } } } } #if MPI_VERSION < 3 if (passive_target) { MPI_Win_lock(MPI_LOCK_SHARED, send_to, MPI_MODE_NOCHECK, rma_win); } #endif MPI_Put(Work_out_p+Block_size*(phase-1), Block_size, MPI_DOUBLE, send_to, Block_size*(phase-1), Block_size, MPI_DOUBLE, rma_win); if (passive_target) { #if MPI_VERSION < 3 MPI_Win_unlock(send_to, rma_win); #else if (flush_bundle==1) { if (flush_local==1) { MPI_Win_flush_local(send_to, rma_win); } else { MPI_Win_flush(send_to, rma_win); } } else if ( (phase%flush_bundle) == 0) { /* Too lazy to record all targets, so let MPI do it internally (hopefully) */ if (flush_local==1) { MPI_Win_flush_local_all(rma_win); } else { MPI_Win_flush_all(rma_win); } } #endif } } /* end of phase loop for puts */ if (Num_procs>1) { if (passive_target) { #if MPI_VERSION >= 3 MPI_Win_flush_all(rma_win); #endif MPI_Barrier(MPI_COMM_WORLD); } else { MPI_Win_fence(MPI_MODE_NOSTORE, rma_win); } } for (phase=1; phase<Num_procs; phase++) { recv_from = (my_ID + phase)%Num_procs; istart = recv_from*Block_order; /* scatter received block to transposed matrix; no need to tile */ for (j=0; j<Block_order; j++) { for (i=0; i<Block_order; i++) { B(i,j) += Work_in(phase-1,i,j); } } } /* end of phase loop for scatters */ /* for the flush case we need to make sure we have consumed Work_in before overwriting it in the next iteration */ if (Num_procs>1 && passive_target) { MPI_Barrier(MPI_COMM_WORLD); } } /* end of iterations */ local_trans_time = wtime() - local_trans_time; MPI_Reduce(&local_trans_time, &trans_time, 1, MPI_DOUBLE, MPI_MAX, root, MPI_COMM_WORLD); abserr = 0.0; istart = 0; double addit = ((double)(iterations+1) * (double) (iterations))/2.0; for (j=0;j<Block_order;j++) { for (i=0;i<order; i++) { abserr += ABS(B(i,j) - ((double)(order*i + j+colstart)*(iterations+1)+addit)); } } MPI_Reduce(&abserr, &abserr_tot, 1, MPI_DOUBLE, MPI_SUM, root, MPI_COMM_WORLD); if (my_ID == root) { if (abserr_tot < epsilon) { printf("Solution validates\n"); avgtime = trans_time/(double)iterations; printf("Rate (MB/s): %lf Avg time (s): %lf\n",1.0E-06*bytes/avgtime, avgtime); } else { printf("ERROR: Aggregate absolute error %lf exceeds threshold %e\n", abserr_tot, epsilon); error = 1; } } bail_out(error); if (rma_win!=MPI_WIN_NULL) { #if MPI_VERSION >=3 if (passive_target) { MPI_Win_unlock_all(rma_win); } #endif PRK_Win_free(&rma_win); } MPI_Finalize(); exit(EXIT_SUCCESS); } /* end of main */
int main(int argc, char *argv[]) { int rank, nproc; int i; MPI_Win win; int *tar_buf = NULL; int *orig_buf = NULL; MPI_Datatype derived_dtp; int errors = 0; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (nproc < 3) { fprintf(stderr, "Run this program with at least 3 processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Alloc_mem(sizeof(int) * DATA_SIZE, MPI_INFO_NULL, &orig_buf); MPI_Alloc_mem(sizeof(int) * DATA_SIZE, MPI_INFO_NULL, &tar_buf); for (i = 0; i < DATA_SIZE; i++) { orig_buf[i] = 1; tar_buf[i] = 0; } MPI_Type_vector(COUNT, BLOCKLENGTH - 1, STRIDE, MPI_INT, &derived_dtp); MPI_Type_commit(&derived_dtp); MPI_Win_create(tar_buf, sizeof(int) * DATA_SIZE, sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win); /***** test between rank 0 and rank 1 *****/ if (rank == 1) { MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win); for (i = 0; i < OPS_NUM; i++) { MPI_Accumulate(orig_buf, 1, derived_dtp, 0, 0, DATA_SIZE - COUNT, MPI_INT, MPI_SUM, win); MPI_Win_flush_local(0, win); } MPI_Win_unlock(0, win); } MPI_Barrier(MPI_COMM_WORLD); /* check results */ if (rank == 0) { for (i = 0; i < DATA_SIZE - COUNT; i++) { if (tar_buf[i] != OPS_NUM) { printf("tar_buf[%d] = %d, expected %d\n", i, tar_buf[i], OPS_NUM); errors++; } } } for (i = 0; i < DATA_SIZE; i++) { tar_buf[i] = 0; } MPI_Barrier(MPI_COMM_WORLD); /***** test between rank 0 and rank 2 *****/ if (rank == 2) { MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, win); for (i = 0; i < OPS_NUM; i++) { MPI_Accumulate(orig_buf, 1, derived_dtp, 0, 0, DATA_SIZE - COUNT, MPI_INT, MPI_SUM, win); MPI_Win_flush_local(0, win); } MPI_Win_unlock(0, win); } MPI_Barrier(MPI_COMM_WORLD); /* check results */ if (rank == 0) { for (i = 0; i < DATA_SIZE - COUNT; i++) { if (tar_buf[i] != OPS_NUM) { printf("tar_buf[%d] = %d, expected %d\n", i, tar_buf[i], OPS_NUM); errors++; } } if (errors == 0) printf(" No Errors\n"); } MPI_Win_free(&win); MPI_Type_free(&derived_dtp); MPI_Free_mem(orig_buf); MPI_Free_mem(tar_buf); MPI_Finalize(); return 0; }
int main(int argc, char ** argv) { MPI_Aint win_size = WIN_SIZE; MPI_Win win; MPI_Group group; char* base; int disp_unit = 1; int rank, size, target_rank, target_disp = 1; int r, flag; /*************************************************************/ /* Init and set values */ /*************************************************************/ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); target_rank = (rank + 1) % size; MPI_Alloc_mem(WIN_SIZE, MPI_INFO_NULL, &base); if ( NULL == base ) { printf("failed to alloc %d\n", WIN_SIZE); exit(16); } /*************************************************************/ /* Win_create */ /*************************************************************/ /* MPI_Win_create(void *base, MPI_Aint size, int disp_unit, MPI_Info info, MPI_Comm comm, MPI_Win *win); */ r = MPI_Win_create(base, win_size, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_create\n", rank); /*************************************************************/ /* First epoch: Tests Put, Get, Get_group, Post, Start, */ /* Complete, Wait, Lock, Unlock */ /*************************************************************/ r = MPI_Win_get_group(win, &group); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_get_group\n", rank); r = MPI_Win_post(group, 0, win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_post\n", rank); r = MPI_Win_start(group, 0, win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_start\n", rank); r = MPI_Win_lock(MPI_LOCK_SHARED, target_rank, 0, win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_lock\n", rank); /* MPI_Put(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win) */ r = MPI_Put(base, WIN_SIZE, MPI_BYTE, target_rank, target_disp, WIN_SIZE, MPI_BYTE, win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Put\n", rank); r = MPI_Win_unlock(target_rank, win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_unlock\n", rank); /* MPI_Get(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Win win); */ r = MPI_Get(base, WIN_SIZE, MPI_BYTE, target_rank, target_disp, WIN_SIZE, MPI_BYTE, win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Get\n", rank); r = MPI_Win_complete(win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_complete\n", rank); r = MPI_Win_test(win, &flag); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_test\n", rank); r = MPI_Win_wait(win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_wait\n", rank); /*************************************************************************/ /* Second epoch: Tests Accumulate and Fence */ /*************************************************************************/ r = MPI_Win_fence(0, win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_fence\n", rank); if ( rank == 0 ) { /* MPI_Accumulate(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, int target_rank, MPI_Aint target_disp, int target_count, MPI_Datatype target_datatype, MPI_Op op, MPI_Win win) */ r = MPI_Accumulate(base, WIN_SIZE, MPI_BYTE, 0, target_disp, WIN_SIZE, MPI_BYTE, MPI_SUM, win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Accumulate\n", rank); } r = MPI_Win_fence(0, win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_fence\n", rank); /*************************************************************/ /* Win_free and Finalize */ /*************************************************************/ r = MPI_Win_free(&win); if ( MPI_SUCCESS TEST_OP r ) printf("Rank %d failed MPI_Win_free\n", rank); free(base); MPI_Finalize(); }