int main(int argc, char **argv){ MPI_Init(&argc, &argv); int rank, nproc; MPI_Comm_size(MPI_COMM_WORLD, &(nproc)); MPI_Comm_rank(MPI_COMM_WORLD, &(rank)); MPI_Win win; MPI_Aint remote; MPI_Aint local; MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &win); if(rank==0){ //int *a = (int*)malloc(sizeof(int)); int a= 4; MPI_Win_attach(win, &a, sizeof(int)); MPI_Get_address(&a, &local); MPI_Send(&local, 1, MPI_AINT, 1, 1, MPI_COMM_WORLD); } else{ //MPI_Status reqstat; //MPI_Recv(&sdisp_remote, 1, MPI_AINT, 0, 1, MPI_COMM_WORLD, &reqstat ); int val; MPI_Status reqstat; MPI_Recv(&remote, 1, MPI_AINT, 0, 1, MPI_COMM_WORLD, &reqstat ); MPI_Get(&val, 1, MPI_INT, 0, remote, 1, MPI_INT, win); } //MPI_Win_free(&win); }
void allocate_memory(int rank, char *rbuf, int size, WINDOW type, MPI_Win *win) { MPI_Status reqstat; switch (type){ case WIN_DYNAMIC: MPI_CHECK(MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, win)); MPI_CHECK(MPI_Win_attach(*win, (void *)rbuf, size)); MPI_CHECK(MPI_Get_address(rbuf, &sdisp_local)); if(rank == 0){ MPI_CHECK(MPI_Send(&sdisp_local, 1, MPI_AINT, 1, 1, MPI_COMM_WORLD)); MPI_CHECK(MPI_Recv(&sdisp_remote, 1, MPI_AINT, 1, 1, MPI_COMM_WORLD, &reqstat)); } else{ MPI_CHECK(MPI_Recv(&sdisp_remote, 1, MPI_AINT, 0, 1, MPI_COMM_WORLD, &reqstat)); MPI_CHECK(MPI_Send(&sdisp_local, 1, MPI_AINT, 0, 1, MPI_COMM_WORLD)); } break; case WIN_CREATE: MPI_CHECK(MPI_Win_create(rbuf, size, 1, MPI_INFO_NULL, MPI_COMM_WORLD, win)); break; default: MPI_CHECK(MPI_Win_allocate(size, 1, MPI_INFO_NULL, MPI_COMM_WORLD, rbuf, win)); break; } }
JNIEXPORT jlong JNICALL Java_mpi_Win_createDynamicWin( JNIEnv *env, jobject jthis, jlong info, jlong comm) { MPI_Win win; int rc = MPI_Win_create_dynamic( (MPI_Info)info, (MPI_Comm)comm, &win); ompi_java_exceptionCheck(env, rc); return (jlong)win; }
int main(int argc, char **argv) { int i, rank, nproc; int errors = 0, all_errors = 0; int val = 0, one = 1; int iter; MPI_Aint *val_ptrs; MPI_Win dyn_win; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); iter = ITER_PER_RANK * nproc; val_ptrs = malloc(nproc * sizeof(MPI_Aint)); MPI_Get_address(&val, &val_ptrs[rank]); MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, val_ptrs, 1, MPI_AINT, MPI_COMM_WORLD); MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &dyn_win); MPI_Win_attach(dyn_win, &val, sizeof(int)); for (i = 0; i < iter; i++) { MPI_Win_fence(MPI_MODE_NOPRECEDE, dyn_win); MPI_Accumulate(&one, 1, MPI_INT, i % nproc, val_ptrs[i % nproc], 1, MPI_INT, MPI_SUM, dyn_win); MPI_Win_fence(MPI_MODE_NOSUCCEED, dyn_win); } MPI_Barrier(MPI_COMM_WORLD); /* Read and verify my data */ if (val != iter) { errors++; printf("%d -- Got %d, expected %d\n", rank, val, iter); } MPI_Win_detach(dyn_win, &val); MPI_Win_free(&dyn_win); MPI_Reduce(&errors, &all_errors, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0 && all_errors == 0) printf(" No Errors\n"); free(val_ptrs); MPI_Finalize(); return 0; }
void ompi_win_create_dynamic_f(MPI_Fint *info, MPI_Fint *comm, MPI_Fint *win, MPI_Fint *ierr) { int c_ierr; MPI_Win c_win; MPI_Info c_info; MPI_Comm c_comm; c_comm = MPI_Comm_f2c(*comm); c_info = MPI_Info_f2c(*info); c_ierr = MPI_Win_create_dynamic(c_info, c_comm, &c_win); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { *win = MPI_Win_c2f(c_win); } }
int main(int argc, char **argv) { int procid, nproc, i; MPI_Win llist_win; llist_ptr_t head_ptr, tail_ptr; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &procid); MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &llist_win); /* Process 0 creates the head node */ if (procid == 0) head_ptr.disp = alloc_elem(-1, llist_win); /* Broadcast the head pointer to everyone */ head_ptr.rank = 0; MPI_Bcast(&head_ptr.disp, 1, MPI_AINT, 0, MPI_COMM_WORLD); tail_ptr = head_ptr; /* All processes concurrently append NUM_ELEMS elements to the list */ for (i = 0; i < NUM_ELEMS; i++) { llist_ptr_t new_elem_ptr; int success; /* Create a new list element and register it with the window */ new_elem_ptr.rank = procid; new_elem_ptr.disp = alloc_elem(procid, llist_win); /* Append the new node to the list. This might take multiple attempts if others have already appended and our tail pointer is stale. */ do { llist_ptr_t next_tail_ptr = nil; MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win); MPI_Compare_and_swap((void*) &new_elem_ptr.rank, (void*) &nil.rank, (void*) &next_tail_ptr.rank, MPI_INT, tail_ptr.rank, (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.rank), llist_win); MPI_Win_unlock(tail_ptr.rank, llist_win); success = (next_tail_ptr.rank == nil.rank); if (success) { int i, flag; MPI_Aint result; MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win); MPI_Fetch_and_op(&new_elem_ptr.disp, &result, MPI_AINT, tail_ptr.rank, (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp), MPI_REPLACE, llist_win); /* Note: accumulate is faster, since we don't need the result. Replacing with Fetch_and_op to create a more complete test case. */ /* MPI_Accumulate(&new_elem_ptr.disp, 1, MPI_AINT, tail_ptr.rank, (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp), 1, MPI_AINT, MPI_REPLACE, llist_win); */ MPI_Win_unlock(tail_ptr.rank, llist_win); tail_ptr = new_elem_ptr; /* For implementations that use pt-to-pt messaging, force progress for other threads' RMA operations. */ for (i = 0; i < NPROBE; i++) MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE); } else { /* Tail pointer is stale, fetch the displacement. May take multiple tries if it is being updated. */ do { MPI_Aint junk = 0; MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win); MPI_Fetch_and_op(NULL, &next_tail_ptr.disp, MPI_AINT, tail_ptr.rank, (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next.disp), MPI_NO_OP, llist_win); MPI_Win_unlock(tail_ptr.rank, llist_win); } while (next_tail_ptr.disp == nil.disp); tail_ptr = next_tail_ptr; } } while (!success); } MPI_Barrier(MPI_COMM_WORLD); /* Traverse the list and verify that all processes inserted exactly the correct number of elements. */ if (procid == 0) { int have_root = 0; int errors = 0; int *counts, count = 0; counts = (int*) malloc(sizeof(int) * nproc); assert(counts != NULL); for (i = 0; i < nproc; i++) counts[i] = 0; tail_ptr = head_ptr; /* Walk the list and tally up the number of elements inserted by each rank */ while (tail_ptr.disp != nil.disp) { llist_elem_t elem; MPI_Win_lock(MPI_LOCK_SHARED, tail_ptr.rank, MPI_MODE_NOCHECK, llist_win); MPI_Get(&elem, sizeof(llist_elem_t), MPI_BYTE, tail_ptr.rank, tail_ptr.disp, sizeof(llist_elem_t), MPI_BYTE, llist_win); MPI_Win_unlock(tail_ptr.rank, llist_win); tail_ptr = elem.next; /* This is not the root */ if (have_root) { assert(elem.value >= 0 && elem.value < nproc); counts[elem.value]++; count++; if (verbose) { int last_elem = tail_ptr.disp == nil.disp; printf("%2d%s", elem.value, last_elem ? "" : " -> "); if (count % ELEM_PER_ROW == 0 && !last_elem) printf("\n"); } } /* This is the root */ else { assert(elem.value == -1); have_root = 1; } } if (verbose) printf("\n\n"); /* Verify the counts we collected */ for (i = 0; i < nproc; i++) { int expected = NUM_ELEMS; if (counts[i] != expected) { printf("Error: Rank %d inserted %d elements, expected %d\n", i, counts[i], expected); errors++; } } printf("%s\n", errors == 0 ? " No Errors" : "FAIL"); free(counts); } MPI_Win_free(&llist_win); /* Free all the elements in the list */ for ( ; my_elems_count > 0; my_elems_count--) MPI_Free_mem(my_elems[my_elems_count-1]); MPI_Finalize(); return 0; }
/** * TODO: Differentiate units belonging to team_id and that not * belonging to team_id * within the function or outside it? * FIX: Outside it. * * The teamid stands for a superteam related to the new generated * newteam. */ dart_ret_t dart_team_create( dart_team_t teamid, const dart_group_t* group, dart_team_t *newteam) { MPI_Comm comm; MPI_Comm subcomm; MPI_Win win; uint16_t index, unique_id; size_t size; dart_team_t max_teamid = -1; dart_unit_t sub_unit, unit; dart_myid (&unit); dart_size (&size); dart_team_myid (teamid, &sub_unit); int result = dart_adapt_teamlist_convert (teamid, &unique_id); if (result == -1) { return DART_ERR_INVAL; } comm = dart_teams[unique_id]; subcomm = MPI_COMM_NULL; MPI_Comm_create (comm, group -> mpi_group, &subcomm); *newteam = DART_TEAM_NULL; /* Get the maximum next_availteamid among all the units belonging to * the parent team specified by 'teamid'. */ MPI_Allreduce( &dart_next_availteamid, &max_teamid, 1, MPI_INT32_T, MPI_MAX, comm); dart_next_availteamid = max_teamid + 1; if (subcomm != MPI_COMM_NULL) { int result = dart_adapt_teamlist_alloc(max_teamid, &index); if (result == -1) { return DART_ERR_OTHER; } /* max_teamid is thought to be the new created team ID. */ *newteam = max_teamid; dart_teams[index] = subcomm; MPI_Win_create_dynamic(MPI_INFO_NULL, subcomm, &win); dart_win_lists[index] = win; } #if 0 /* Another way of generating the available teamID for the newly crated team. */ if (subcomm != MPI_COMM_NULL) { /* Get the maximum next_availteamid among all the units belonging to the * created sub-communicator. */ MPI_Allreduce (&next_availteamid, &max_teamid, 1, MPI_INT, MPI_MAX, subcomm); int result = dart_adapt_teamlist_alloc (max_teamid, &index); if (result == -1) { return DART_ERR_OTHER; } *newteam = max_teamid; teams[index] = subcomm; MPI_Comm_rank (subcomm, &rank); if (rank == 0) { root = sub_unit; if (sub_unit != 0) { MPI_Send (&root, 1, MPI_INT, 0, 0, comm); } } next_availteamid = max_teamid + 1; } if (sub_unit == 0) { if (root == -1) { MPI_Recv (&root, 1, MPI_INT, MPI_ANY_SOURCE, 0, comm, MPI_STATUS_IGNORE); } } MPI_Bcast (&root, 1, MPI_INT, 0, comm); /* Broadcast the calculated max_teamid to all the units not belonging to the * sub-communicator. */ MPI_Bcast (&max_teamid, 1, MPI_INT, root, comm); if (subcomm == MPI_COMM_NULL) { /* 'Next_availteamid' is changed iff it is smaller than 'max_teamid + 1' */ if (max_teamid + 1 > next_availteamid) { next_availteamid = max_teamid + 1; } } #endif if (subcomm != MPI_COMM_NULL) { #if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) int i; size_t n; MPI_Comm sharedmem_comm; MPI_Group sharedmem_group, group_all; MPI_Comm_split_type( subcomm, MPI_COMM_TYPE_SHARED, 1, MPI_INFO_NULL, &sharedmem_comm); dart_sharedmem_comm_list[index] = sharedmem_comm; if (sharedmem_comm != MPI_COMM_NULL) { MPI_Comm_size( sharedmem_comm, &(dart_sharedmemnode_size[index])); // dart_unit_mapping[index] = (int*)malloc ( // dart_sharedmem_size[index] * sizeof (int)); MPI_Comm_group(sharedmem_comm, &sharedmem_group); MPI_Comm_group(MPI_COMM_WORLD, &group_all); int* dart_unit_mapping = (int *)malloc ( dart_sharedmemnode_size[index] * sizeof (int)); int* sharedmem_ranks = (int*)malloc ( dart_sharedmemnode_size[index] * sizeof (int)); dart_sharedmem_table[index] = (int*)malloc(size * sizeof(int)); for (i = 0; i < dart_sharedmemnode_size[index]; i++) { sharedmem_ranks[i] = i; } // MPI_Group_translate_ranks (sharedmem_group, dart_sharedmem_size[index], // sharedmem_ranks, group_all, dart_unit_mapping[index]); MPI_Group_translate_ranks( sharedmem_group, dart_sharedmemnode_size[index], sharedmem_ranks, group_all, dart_unit_mapping); for (n = 0; n < size; n++) { dart_sharedmem_table[index][n] = -1; } for (i = 0; i < dart_sharedmemnode_size[index]; i++) { dart_sharedmem_table[index][dart_unit_mapping[i]] = i; } free (sharedmem_ranks); free (dart_unit_mapping); } #endif MPI_Win_lock_all(0, win); DART_LOG_DEBUG ("%2d: TEAMCREATE - create team %d out of parent team %d", unit, *newteam, teamid); } return DART_OK; }
void BoxLib::Initialize (int& argc, char**& argv, bool build_parm_parse, MPI_Comm mpi_comm) { ParallelDescriptor::StartParallel(&argc, &argv, mpi_comm); #ifndef WIN32 // // Make sure to catch new failures. // std::set_new_handler(BoxLib::OutOfMemory); if (argv[0][0] != '/') { char temp[1024]; getcwd(temp,1024); exename = temp; exename += "/"; } exename += argv[0]; #endif #ifdef BL_USE_UPCXX upcxx::init(&argc, &argv); if (upcxx::myrank() != ParallelDescriptor::MyProc()) BoxLib::Abort("UPC++ rank != MPI rank"); #endif #ifdef BL_USE_MPI3 MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &ParallelDescriptor::cp_win); MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &ParallelDescriptor::fb_win); MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &ParallelDescriptor::fpb_win); #endif while (!The_Initialize_Function_Stack.empty()) { // // Call the registered function. // (*The_Initialize_Function_Stack.top())(); // // And then remove it from the stack. // The_Initialize_Function_Stack.pop(); } if(ParallelDescriptor::NProcsSidecar() > 0) { if(ParallelDescriptor::InSidecarGroup()) { if (ParallelDescriptor::IOProcessor()) std::cout << "===== SIDECARS INITIALIZED =====" << std::endl; ParallelDescriptor::SidecarProcess(); BoxLib::Finalize(); return; } } BL_PROFILE_INITIALIZE(); // // Initialize random seed after we're running in parallel. // BoxLib::InitRandom(ParallelDescriptor::MyProc()+1, ParallelDescriptor::NProcs()); #ifdef BL_USE_MPI if (ParallelDescriptor::IOProcessor()) { std::cout << "MPI initialized with " << ParallelDescriptor::NProcs() << " MPI processes\n"; } #endif #ifdef _OPENMP if (ParallelDescriptor::IOProcessor()) { std::cout << "OMP initialized with " << omp_get_max_threads() << " OMP threads\n"; } #endif signal(SIGSEGV, BLBackTrace::handler); // catch seg falult signal(SIGINT, BLBackTrace::handler); #ifndef BL_AMRPROF if (build_parm_parse) { if (argc == 1) { ParmParse::Initialize(0,0,0); } else { if (strchr(argv[1],'=')) { ParmParse::Initialize(argc-1,argv+1,0); } else { ParmParse::Initialize(argc-2,argv+2,argv[1]); } } } { ParmParse pp("boxlib"); pp.query("v", verbose); pp.query("verbose", verbose); int invalid = 0, divbyzero=0, overflow=0; pp.query("fpe_trap_invalid", invalid); pp.query("fpe_trap_zero", divbyzero); pp.query("fpe_trap_overflow", overflow); int flags = 0; if (invalid) flags |= FE_INVALID; if (divbyzero) flags |= FE_DIVBYZERO; if (overflow) flags |= FE_OVERFLOW; #if defined(__linux__) #if !defined(__PGI) || (__PGIC__ >= 16) if (flags != 0) { feenableexcept(flags); // trap floating point exceptions signal(SIGFPE, BLBackTrace::handler); } #endif #endif } ParallelDescriptor::StartTeams(); ParallelDescriptor::StartSubCommunicator(); mempool_init(); #endif std::cout << std::setprecision(10); if (double(std::numeric_limits<long>::max()) < 9.e18) { if (ParallelDescriptor::IOProcessor()) { std::cout << "!\n! WARNING: Maximum of long int, " << std::numeric_limits<long>::max() << ", might be too small for big runs.\n!\n"; } } #if defined(BL_USE_FORTRAN_MPI) || defined(BL_USE_F_INTERFACES) int fcomm = MPI_Comm_c2f(ParallelDescriptor::Communicator()); bl_fortran_mpi_comm_init (fcomm); #endif #if defined(BL_MEM_PROFILING) && defined(BL_USE_F_BASELIB) MemProfiler_f::initialize(); #endif }
void run_rma_test(int nprocs_per_node) { int myrank, nprocs; int mem_rank; MPI_Win win; int *baseptr; MPI_Aint local_size; MPI_Comm_rank(MPI_COMM_WORLD, &myrank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); if (nprocs < nprocs_per_node * 2) { if (!myrank) printf("should start program with at least %d processes\n", nprocs_per_node * 2); MPI_Finalize(); exit(EXIT_FAILURE); } mem_rank = nprocs_per_node + nprocs_per_node / 2; local_size = (myrank == mem_rank) ? COUNT : 0; MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &win); MPI_Win_lock_all(0, win); int type_size; MPI_Type_size(MPI_INT, &type_size); size_t nbytes = COUNT * type_size; assert(MPI_Alloc_mem(nbytes, MPI_INFO_NULL, &baseptr) == MPI_SUCCESS); assert(MPI_Win_attach(win, baseptr, nbytes) == MPI_SUCCESS); MPI_Aint ldisp; MPI_Aint *disps = malloc(nprocs * sizeof(MPI_Aint)); assert(MPI_Get_address(baseptr, &ldisp) == MPI_SUCCESS); assert(MPI_Allgather(&ldisp, 1, MPI_AINT, disps, nprocs, MPI_AINT, MPI_COMM_WORLD) == MPI_SUCCESS); if (myrank == 0) { for (size_t idx = 0; idx < COUNT; ++idx) { baseptr[idx] = idx * COUNT + 1; } } MPI_Barrier(MPI_COMM_WORLD); if (myrank == mem_rank) { assert(MPI_Get(baseptr, 10, MPI_INT, 0, disps[0], 10, MPI_INT, win) == MPI_SUCCESS); assert(MPI_Win_flush(0, win) == MPI_SUCCESS); for (size_t idx = 0; idx < COUNT; ++idx) { assert(baseptr[idx] == idx * 10 + 1); } } MPI_Barrier(MPI_COMM_WORLD); MPI_Win_unlock_all(win); MPI_Barrier(MPI_COMM_WORLD); MPI_Win_free(&win); MPI_Free_mem(baseptr); printf("Test finished\n"); }
int main(int argc, char **argv) { int rank, nproc; int errs = 0; int array[1024]; int val = 0; int target_rank; MPI_Aint bases[2]; MPI_Aint disp, offset; MPI_Win win; MTest_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); if (rank == 0 && nproc != 2) { MTestError("Must run with 2 ranks\n"); } /* Get the base address in the middle of the array */ if (rank == 0) { target_rank = 1; array[0] = 1234; MPI_Get_address(&array[512], &bases[0]); } else if (rank == 1) { target_rank = 0; array[1023] = 1234; MPI_Get_address(&array[512], &bases[1]); } /* Exchange bases */ MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, bases, 1, MPI_AINT, MPI_COMM_WORLD); MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &win); MPI_Win_attach(win, array, sizeof(int)*1024); /* Do MPI_Aint addressing arithmetic */ if (rank == 0) { disp = sizeof(int)*511; offset = MPIX_Aint_add(bases[1], disp); /* offset points to array[1023]*/ } else if (rank == 1) { disp = sizeof(int)*512; offset = MPIX_Aint_diff(bases[0], disp); /* offset points to array[0] */ } /* Get val and verify it */ MPI_Win_fence(MPI_MODE_NOPRECEDE, win); MPI_Get(&val, 1, MPI_INT, target_rank, offset, 1, MPI_INT, win); MPI_Win_fence(MPI_MODE_NOSUCCEED, win); if (val != 1234) { errs++; printf("%d -- Got %d, expected 1234\n", rank, val); } MPI_Win_detach(win, array); MPI_Win_free(&win); MTest_Finalize(errs); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int procid, nproc, i, j, my_nelem; int pollint = 0; double time; MPI_Win llist_win; llist_ptr_t head_ptr, tail_ptr; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &procid); MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &llist_win); /* Process 0 creates the head node */ if (procid == 0) head_ptr.disp = alloc_elem(procid, llist_win); /* Broadcast the head pointer to everyone */ head_ptr.rank = 0; MPI_Bcast(&head_ptr.disp, 1, MPI_AINT, 0, MPI_COMM_WORLD); tail_ptr = head_ptr; /* All processes append NUM_ELEMS elements to the list; rank 0 has already * appended an element. */ if (procid == 0) i = 1; else i = 0; my_nelem = NUM_ELEMS/nproc; if (procid < NUM_ELEMS % nproc) my_nelem++; MPI_Barrier(MPI_COMM_WORLD); time = MPI_Wtime(); for ( ; i < my_nelem; i++) { llist_ptr_t new_elem_ptr; int success = 0; /* Create a new list element and register it with the window */ new_elem_ptr.rank = procid; new_elem_ptr.disp = alloc_elem(procid, llist_win); /* Append the new node to the list. This might take multiple attempts if others have already appended and our tail pointer is stale. */ do { int flag; /* The tail is at my left neighbor, append my element. */ if (tail_ptr.rank == (procid + nproc-1) % nproc) { if (verbose) printf("%d: Appending to <%d, %p>\n", procid, tail_ptr.rank, (void*) tail_ptr.disp); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, tail_ptr.rank, 0, llist_win); #if USE_ACC MPI_Accumulate(&new_elem_ptr, sizeof(llist_ptr_t), MPI_BYTE, tail_ptr.rank, (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next), sizeof(llist_ptr_t), MPI_BYTE, MPI_REPLACE, llist_win); #else MPI_Put(&new_elem_ptr, sizeof(llist_ptr_t), MPI_BYTE, tail_ptr.rank, (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next), sizeof(llist_ptr_t), MPI_BYTE, llist_win); #endif MPI_Win_unlock(tail_ptr.rank, llist_win); success = 1; tail_ptr = new_elem_ptr; } /* Otherwise, chase the tail. */ else { llist_ptr_t next_tail_ptr; MPI_Win_lock(MPI_LOCK_EXCLUSIVE, tail_ptr.rank, 0, llist_win); #if USE_ACC MPI_Get_accumulate( NULL, 0, MPI_DATATYPE_NULL, &next_tail_ptr, sizeof(llist_ptr_t), MPI_BYTE, tail_ptr.rank, (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next), sizeof(llist_ptr_t), MPI_BYTE, MPI_NO_OP, llist_win); #else MPI_Get(&next_tail_ptr, sizeof(llist_ptr_t), MPI_BYTE, tail_ptr.rank, (MPI_Aint) &(((llist_elem_t*)tail_ptr.disp)->next), sizeof(llist_ptr_t), MPI_BYTE, llist_win); #endif MPI_Win_unlock(tail_ptr.rank, llist_win); if (next_tail_ptr.rank != nil.rank) { if (verbose) printf("%d: Chasing to <%d, %p>\n", procid, next_tail_ptr.rank, (void*) next_tail_ptr.disp); tail_ptr = next_tail_ptr; pollint = MAX(MIN_NPROBE, pollint/2); } else { for (j = 0; j < pollint; j++) MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE); pollint = MIN(MAX_NPROBE, pollint*2); } } } while (!success); } MPI_Barrier(MPI_COMM_WORLD); time = MPI_Wtime() - time; /* Traverse the list and verify that all processes inserted exactly the correct number of elements. */ if (procid == 0) { int errors = 0; int *counts, count = 0; counts = (int*) malloc(sizeof(int) * nproc); assert(counts != NULL); for (i = 0; i < nproc; i++) counts[i] = 0; tail_ptr = head_ptr; MPI_Win_lock_all(0, llist_win); /* Walk the list and tally up the number of elements inserted by each rank */ while (tail_ptr.disp != nil.disp) { llist_elem_t elem; MPI_Get(&elem, sizeof(llist_elem_t), MPI_BYTE, tail_ptr.rank, tail_ptr.disp, sizeof(llist_elem_t), MPI_BYTE, llist_win); MPI_Win_flush(tail_ptr.rank, llist_win); tail_ptr = elem.next; assert(elem.value >= 0 && elem.value < nproc); counts[elem.value]++; count++; if (verbose) { int last_elem = tail_ptr.disp == nil.disp; printf("%2d%s", elem.value, last_elem ? "" : " -> "); if (count % ELEM_PER_ROW == 0 && !last_elem) printf("\n"); } } MPI_Win_unlock_all(llist_win); if (verbose) printf("\n\n"); /* Verify the counts we collected */ for (i = 0; i < nproc; i++) { int expected; expected = NUM_ELEMS/nproc; if (i < NUM_ELEMS % nproc) expected++; if (counts[i] != expected) { printf("Error: Rank %d inserted %d elements, expected %d\n", i, counts[i], expected); errors++; } } printf("%s\n", errors == 0 ? " No Errors" : "FAIL"); free(counts); } if (print_perf) { double max_time; MPI_Reduce(&time, &max_time, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if (procid == 0) { printf("Total time = %0.2f sec, elem/sec = %0.2f, sec/elem = %0.2f usec\n", max_time, NUM_ELEMS/max_time, max_time/NUM_ELEMS*1.0e6); } } MPI_Win_free(&llist_win); /* Free all the elements in the list */ for ( ; my_elems_count > 0; my_elems_count--) MPI_Free_mem(my_elems[my_elems_count-1]); MPI_Finalize(); return 0; }