VCComm::~VCComm(void) { #ifdef MPI_C_FOUND if (updateFieldCounts) delete updateFieldCounts; if (updateFieldDisps) delete updateFieldDisps; if (updateFieldSendBuf) delete updateFieldSendBuf; if (updateFieldRecvBuf) delete updateFieldRecvBuf; if (updateFieldSendIDs) delete updateFieldSendIDs; if (updateFieldRecvIDs) delete updateFieldRecvIDs; if (failBlockSendBuf) delete failBlockSendBuf; if (failBlockRecvBuf) delete failBlockRecvBuf; if (failBlockCounts) delete failBlockCounts; if (failBlockDisps) delete failBlockDisps; MPI_Type_free(&block_val_type); MPI_Op_free(&bv_min_op); MPI_Op_free(&bv_max_op); MPI_Op_free(&bv_sum_op); MPI_Type_free(&element_sweep_type); #endif }
VCComm::~VCComm(void) { #ifdef MPI_C_FOUND // these are declared with "new type[]", so use delete []: // if (updateFieldCounts) delete [] updateFieldCounts; if (updateFieldDisps) delete [] updateFieldDisps; if (updateFieldSendBuf) delete [] updateFieldSendBuf; if (updateFieldRecvBuf) delete [] updateFieldRecvBuf; if (updateFieldSendIDs) delete [] updateFieldSendIDs; if (updateFieldRecvIDs) delete [] updateFieldRecvIDs; if (failBlockSendBuf) delete [] failBlockSendBuf; if (failBlockRecvBuf) delete [] failBlockRecvBuf; if (failBlockCounts) delete [] failBlockCounts; if (failBlockDisps) delete [] failBlockDisps; MPI_Type_free(&block_val_type); MPI_Op_free(&bv_min_op); MPI_Op_free(&bv_max_op); MPI_Op_free(&bv_sum_op); MPI_Type_free(&element_sweep_type); #endif }
int main( int argc, char **argv ) { int *sendbuf; int block_size; int *recvbuf; int size, rank, i; MPI_Comm comm; MPI_Op left_op, right_op, nc_sum_op; MTest_Init( &argc, &argv ); comm = MPI_COMM_WORLD; MPI_Comm_size( comm, &size ); MPI_Comm_rank( comm, &rank ); #if MTEST_HAVE_MIN_MPI_VERSION(2,2) /* MPI_Reduce_scatter block was added in MPI-2.2 */ MPI_Op_create(&left, 0/*non-commutative*/, &left_op); MPI_Op_create(&right, 0/*non-commutative*/, &right_op); MPI_Op_create(&nc_sum, 0/*non-commutative*/, &nc_sum_op); for (block_size = 1; block_size < MAX_BLOCK_SIZE; block_size *= 2) { sendbuf = (int *) malloc( block_size * size * sizeof(int) ); recvbuf = malloc( block_size * sizeof(int) ); for (i=0; i<(size*block_size); i++) sendbuf[i] = rank + i; for (i=0; i<block_size; i++) recvbuf[i] = 0xdeadbeef; MPI_Reduce_scatter_block( sendbuf, recvbuf, block_size, MPI_INT, left_op, comm ); for (i = 0; i < block_size; ++i) if (recvbuf[i] != (rank * block_size + i)) ++err; MPI_Reduce_scatter_block( sendbuf, recvbuf, block_size, MPI_INT, right_op, comm ); for (i = 0; i < block_size; ++i) if (recvbuf[i] != ((size - 1) + (rank * block_size) + i)) ++err; MPI_Reduce_scatter_block( sendbuf, recvbuf, block_size, MPI_INT, nc_sum_op, comm ); for (i = 0; i < block_size; ++i) { int x = rank * block_size + i; if (recvbuf[i] != (size*x + (size-1)*size/2)) ++err; } free(recvbuf); free(sendbuf); } MPI_Op_free(&left_op); MPI_Op_free(&right_op); MPI_Op_free(&nc_sum_op); #endif MTest_Finalize( err ); MPI_Finalize( ); return err; }
int main(int argc, char *argv[]) { MPI_Op c_uop = MPI_OP_NULL; MPI_Op nc_uop = MPI_OP_NULL; int is_commutative = 0; MTest_Init(&argc, &argv); /* make sure that user-define ops work too */ MPI_Op_create(&user_op, 1 /*commute */ , &c_uop); MPI_Op_create(&user_op, 0 /*!commute */ , &nc_uop); #if MTEST_HAVE_MIN_MPI_VERSION(2,2) /* this function was added in MPI-2.2 */ #define CHECK_COMMUTATIVE(op_) \ do { \ MPI_Op_commutative((op_), &is_commutative); \ if (!is_commutative) { ++errs; } \ } while (0) /* Check all predefined reduction operations for commutivity. * This list is from section 5.9.2 of the MPI-2.1 standard */ CHECK_COMMUTATIVE(MPI_MAX); CHECK_COMMUTATIVE(MPI_MIN); CHECK_COMMUTATIVE(MPI_SUM); CHECK_COMMUTATIVE(MPI_PROD); CHECK_COMMUTATIVE(MPI_LAND); CHECK_COMMUTATIVE(MPI_BAND); CHECK_COMMUTATIVE(MPI_LOR); CHECK_COMMUTATIVE(MPI_BOR); CHECK_COMMUTATIVE(MPI_LXOR); CHECK_COMMUTATIVE(MPI_BXOR); CHECK_COMMUTATIVE(MPI_MAXLOC); CHECK_COMMUTATIVE(MPI_MINLOC); #undef CHECK_COMMUTATIVE MPI_Op_commutative(c_uop, &is_commutative); if (!is_commutative) { ++errs; } /* also check our non-commutative user defined operation */ MPI_Op_commutative(nc_uop, &is_commutative); if (is_commutative) { ++errs; } #endif MPI_Op_free(&nc_uop); MPI_Op_free(&c_uop); MTest_Finalize(errs); MPI_Finalize(); return 0; }
void Sort<Hilbert::HilbertIndices,unsigned int>::binsort() { // Find the global min and max from all the // processors. Do this using MPI_Allreduce. Hilbert::HilbertIndices local_min, local_max, global_min, global_max; if (_data.empty()) { local_min.rack0 = local_min.rack1 = local_min.rack2 = static_cast<Hilbert::inttype>(-1); local_max.rack0 = local_max.rack1 = local_max.rack2 = 0; } else { local_min = _data.front(); local_max = _data.back(); } MPI_Op hilbert_max, hilbert_min; MPI_Op_create ((MPI_User_function*)__hilbert_max_op, true, &hilbert_max); MPI_Op_create ((MPI_User_function*)__hilbert_min_op, true, &hilbert_min); // Communicate to determine the global // min and max for all processors. MPI_Allreduce(&local_min, &global_min, 1, Parallel::StandardType<Hilbert::HilbertIndices>(), hilbert_min, this->comm().get()); MPI_Allreduce(&local_max, &global_max, 1, Parallel::StandardType<Hilbert::HilbertIndices>(), hilbert_max, this->comm().get()); MPI_Op_free (&hilbert_max); MPI_Op_free (&hilbert_min); // Bin-Sort based on the global min and max Parallel::BinSorter<Hilbert::HilbertIndices> bs(this->comm(),_data); bs.binsort(_n_procs, global_max, global_min); // Now save the local bin sizes in a vector so // we don't have to keep around the BinSorter. for (processor_id_type i=0; i<_n_procs; ++i) _local_bin_sizes[i] = bs.sizeof_bin(i); }
/*@C VecFinalizePackage - This function finalizes everything in the Vec package. It is called from PetscFinalize(). Level: developer .keywords: Vec, initialize, package .seealso: PetscInitialize() @*/ PetscErrorCode VecFinalizePackage(void) { PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscFunctionListDestroy(&VecList);CHKERRQ(ierr); ierr = MPI_Op_free(&PetscSplitReduction_Op);CHKERRQ(ierr); ierr = MPI_Op_free(&VecMax_Local_Op);CHKERRQ(ierr); ierr = MPI_Op_free(&VecMin_Local_Op);CHKERRQ(ierr); VecPackageInitialized = PETSC_FALSE; VecRegisterAllCalled = PETSC_FALSE; PetscFunctionReturn(0); }
int dgraphAllreduceMaxSum2 ( Gnum * reduloctab, /* Pointer to array of local Gnum data */ Gnum * reduglbtab, /* Pointer to array of reduced Gnum data */ int redumaxsumnbr, /* Number of max + sum Gnum operations */ MPI_User_function * redufuncptr, /* Pointer to operator function */ MPI_Comm proccomm) /* Communicator to be used for reduction */ { MPI_Datatype redutypedat; /* Data type for finding best separator */ MPI_Op reduoperdat; /* Handle of MPI operator for finding best separator */ if ((MPI_Type_contiguous (redumaxsumnbr, GNUM_MPI, &redutypedat) != MPI_SUCCESS) || (MPI_Type_commit (&redutypedat) != MPI_SUCCESS) || (MPI_Op_create (redufuncptr, 1, &reduoperdat) != MPI_SUCCESS)) { errorPrint ("dgraphAllreduceMaxSum: communication error (1)"); return (1); } if (MPI_Allreduce (reduloctab, reduglbtab, 1, redutypedat, reduoperdat, proccomm) != MPI_SUCCESS) { errorPrint ("dgraphAllreduceMaxSum: communication error (2)"); return (1); } if ((MPI_Op_free (&reduoperdat) != MPI_SUCCESS) || (MPI_Type_free (&redutypedat) != MPI_SUCCESS)) { errorPrint ("dgraphAllreduceMaxSum: communication error (3)"); return (1); } return (0); }
int main( int argc, char **argv ) { int rank, size, i; int data; int errors=0; int result = -100; int correct_result; MPI_Op op; MTest_Init( &argc, &argv ); MPI_Comm_rank( MPI_COMM_WORLD, &rank ); MPI_Comm_size( MPI_COMM_WORLD, &size ); data = rank; MPI_Op_create( (MPI_User_function *)addem, 1, &op ); MPI_Reduce ( &data, &result, 1, MPI_INT, op, 0, MPI_COMM_WORLD ); MPI_Bcast ( &result, 1, MPI_INT, 0, MPI_COMM_WORLD ); MPI_Op_free( &op ); correct_result = 0; for(i=0;i<size;i++) correct_result += i; if (result != correct_result) errors++; MTest_Finalize( errors ); MPI_Finalize(); return MTestReturnValue( errors ); }
void mpiReduce_pickerV3(float *resDataAbsMaxPaddedGlobal, size_t *resDataMaxIndPaddedGlobal, size_t resSize, eXCorrMerge bAbs) { resSizeMPI = resSize; MPI_Datatype mpiType; MPI_Type_contiguous((int) 2, MPI_FLOAT, &mpiType); MPI_Type_commit(&mpiType); float *resDataGlobalNode = NULL; float *resDataGlobalNodeReduce = NULL; array_new(resDataGlobalNode, 2*resSize); array_new(resDataGlobalNodeReduce, 2*resSize); memcpy(resDataGlobalNode, resDataAbsMaxPaddedGlobal, resSize*sizeof(float)); mpiOp_array_typecast(resDataMaxIndPaddedGlobal, resDataGlobalNode+resSize, resSize); MPI_Op mpiOp; switch (bAbs) { case XCORR_MERGE_NEGATIVE: MPI_Op_create((MPI_User_function *) mpiOp_xcorrMergeResultGlobalV3Abs, 1, // commutative &mpiOp); break; case XCORR_MERGE_POSITIVE: MPI_Op_create((MPI_User_function *) mpiOp_xcorrMergeResultGlobalV3, 1, // commutative &mpiOp); break; default: ERROR("mpiReduce_pickerV3", "unsupported merging mode"); } MPI_Reduce(resDataGlobalNode, resDataGlobalNodeReduce, (int) resSize, // resSize elements of size 2*sizeof(float) mpiType, mpiOp, 0, MPI_COMM_WORLD); MPI_Op_free(&mpiOp); memcpy(resDataAbsMaxPaddedGlobal, resDataGlobalNodeReduce, resSize*sizeof(float)); mpiOp_array_typecast(resDataGlobalNodeReduce+resSize, resDataMaxIndPaddedGlobal, resSize); array_delete(resDataGlobalNode); array_delete(resDataGlobalNodeReduce); MPI_Type_free(&mpiType); }
int main( int argc, char **argv ) { int rank, size; int data; int errors=0; int result = -100; MPI_Op op; MPI_Init( &argc, &argv ); MPI_Comm_rank( MPI_COMM_WORLD, &rank ); MPI_Comm_size( MPI_COMM_WORLD, &size ); data = rank; MPI_Op_create( (MPI_User_function*)assoc, 0, &op ); MPI_Reduce ( &data, &result, 1, MPI_INT, op, size-1, MPI_COMM_WORLD ); MPI_Bcast ( &result, 1, MPI_INT, size-1, MPI_COMM_WORLD ); MPI_Op_free( &op ); if (result == BAD_ANSWER) errors++; if (errors) printf( "[%d] done with ERRORS(%d)!\n", rank, errors ); else { if (rank == 0) printf(" No Errors\n"); } MPI_Finalize(); return errors; }
int main( int argc, char *argv[] ) { int errs = 0; #if MTEST_HAVE_MIN_MPI_VERSION(2,2) int i; int *inbuf = NULL; int *inoutbuf = NULL; int count = -1; MPI_Op uop = MPI_OP_NULL; #endif MTest_Init(&argc, &argv); #if MTEST_HAVE_MIN_MPI_VERSION(2,2) /* this function was added in MPI-2.2 */ inbuf = malloc(sizeof(int) * MAX_BUF_ELEMENTS); inoutbuf = malloc(sizeof(int) * MAX_BUF_ELEMENTS); for (count = 0; count < MAX_BUF_ELEMENTS; count > 0 ? count*=2 : count++) { for (i = 0; i < count; ++i) { inbuf[i] = i; inoutbuf[i] = i; } MPI_Reduce_local(inbuf, inoutbuf, count, MPI_INT, MPI_SUM); for (i = 0; i < count; ++i) if (inbuf[i] != i) { ++errs; if (inoutbuf[i] != (2*i)) ++errs; } } /* make sure that user-define ops work too */ MPI_Op_create(&user_op, 0/*!commute*/, &uop); for (count = 0; count < MAX_BUF_ELEMENTS; count > 0 ? count*=2 : count++) { for (i = 0; i < count; ++i) { inbuf[i] = i; inoutbuf[i] = i; } MPI_Reduce_local(inbuf, inoutbuf, count, MPI_INT, uop); errs += uop_errs; for (i = 0; i < count; ++i) if (inbuf[i] != i) { ++errs; if (inoutbuf[i] != (3*i)) ++errs; } } MPI_Op_free(&uop); free(inbuf); free(inoutbuf); #endif MTest_Finalize(errs); MPI_Finalize(); return 0; }
int main( int argc, char **argv ) { MPI_Op op; int i, rank, size, bufsize, errcnt = 0, toterr; double *inbuf, *outbuf, value; MPI_Init( &argc, &argv ); MPI_Comm_rank( MPI_COMM_WORLD, &rank ); MPI_Comm_size( MPI_COMM_WORLD, &size ); MPI_Op_create( (MPI_User_function *)add, 1, &op ); bufsize = 1; while (bufsize < 100000) { inbuf = (double *)malloc( bufsize * sizeof(double) ); outbuf = (double *)malloc( bufsize * sizeof(double) ); if (! inbuf || ! outbuf) { fprintf( stderr, "Could not allocate buffers for size %d\n", bufsize ); errcnt++; break; } value = (rank & 0x1) ? 1.0 : -1.0; for (i=0; i<bufsize; i++) { inbuf[i] = value; outbuf[i] = 100.0; } MPI_Allreduce( inbuf, outbuf, bufsize, MPI_DOUBLE, op, MPI_COMM_WORLD ); /* Check values */ value = (size & 0x1) ? -1.0 : 0.0; for (i=0; i<bufsize; i++) { if (outbuf[i] != value) { if (errcnt < 10) printf( "outbuf[%d] = %f, should = %f\n", i, outbuf[i], value ); errcnt ++; } } free( inbuf ); free( outbuf ); bufsize *= 2; } MPI_Allreduce( &errcnt, &toterr, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); MPI_Comm_rank( MPI_COMM_WORLD, &rank ); if (rank == 0) { if (toterr == 0) printf( " No Errors\n" ); else printf( "*! %d errors!\n", toterr ); } MPI_Op_free( &op ); MPI_Finalize( ); return 0; }
Int128 Comm::add_int128(Int128 x) const { #ifdef OMEGA_H_USE_MPI MPI_Op op; int commute = true; CALL(MPI_Op_create(mpi_add_int128, commute, &op)); CALL(MPI_Allreduce(MPI_IN_PLACE, &x, sizeof(Int128), MPI_PACKED, op, impl_)); CALL(MPI_Op_free(&op)); #endif return x; }
/* * Class: mpi_Op * Method: Free * Signature: ()V */ JNIEXPORT void JNICALL Java_mpi_Op_free(JNIEnv *env, jobject jthis) { MPI_Op op = (MPI_Op)((*env)->GetLongField(env, jthis, ompi_java.OpHandle)); if(op != NULL && op != MPI_OP_NULL) { int rc = MPI_Op_free(&op); ompi_java_exceptionCheck(env, rc); ((*env)->SetLongField(env,jthis,ompi_java.OpHandle,(long)MPI_OP_NULL)); } }
~user_op() { if (std::uncaught_exception()) { // Ignore failure cases: there are obviously other problems // already, and we don't want to cause program termination if // MPI_Op_free fails. MPI_Op_free(&mpi_op); } else { BOOST_MPI_CHECK_RESULT(MPI_Op_free, (&mpi_op)); } }
std::pair<MPI_Request, const K*>* buildTwo(Prcndtnr* B, const MPI_Comm& comm) { static_assert(std::is_same<typename Prcndtnr::super&, decltype(*this)>::value || std::is_same<typename Prcndtnr::super::super&, decltype(*this)>::value, "Wrong preconditioner"); std::pair<MPI_Request, const K*>* ret = nullptr; constexpr unsigned short N = std::is_same<typename Prcndtnr::super&, decltype(*this)>::value ? 2 : 3; unsigned short allUniform[N + 1]; allUniform[0] = Subdomain<K>::_map.size(); const Option& opt = *Option::get(); unsigned short nu = allUniform[1] = (_co ? _co->getLocal() : static_cast<unsigned short>(opt["geneo_nu"])); allUniform[2] = static_cast<unsigned short>(~nu); if(N == 3) allUniform[3] = nu > 0 ? nu : std::numeric_limits<unsigned short>::max(); { MPI_Op op; #ifdef __MINGW32__ MPI_Op_create(&f<N>, 1, &op); #else auto f = [](void* in, void* inout, int*, MPI_Datatype*) -> void { HPDDM_LAMBDA_F(in, input, inout, output, N) }; MPI_Op_create(f, 1, &op); #endif MPI_Allreduce(MPI_IN_PLACE, allUniform, N + 1, MPI_UNSIGNED_SHORT, op, comm); MPI_Op_free(&op); } if(nu > 0 || allUniform[1] != 0 || allUniform[2] != std::numeric_limits<unsigned short>::max()) { if(!_co) { _co = new CoarseOperator; _co->setLocal(nu); } double construction = MPI_Wtime(); if(allUniform[1] == nu && allUniform[2] == static_cast<unsigned short>(~nu)) ret = _co->template construction<1, excluded>(Operator(*B, allUniform[0]), comm); else if(N == 3 && allUniform[1] == 0 && allUniform[2] == static_cast<unsigned short>(~allUniform[3])) ret = _co->template construction<2, excluded>(Operator(*B, allUniform[0]), comm); else ret = _co->template construction<0, excluded>(Operator(*B, allUniform[0]), comm); construction = MPI_Wtime() - construction; if(_co->getRank() == 0 && opt.val<int>("verbosity") > 0) { std::stringstream ss; ss << std::setprecision(2) << construction; std::string line = " --- coarse operator transferred and factorized by " + to_string(static_cast<int>(opt["master_p"])) + " process" + (static_cast<int>(opt["master_p"]) == 1 ? "" : "es") + " (in " + ss.str() + "s)"; std::cout << line << std::endl; std::cout << std::right << std::setw(line.size()) << "(criterion = " + to_string(allUniform[1] == nu && allUniform[2] == static_cast<unsigned short>(~nu) ? nu : (N == 3 && allUniform[2] == static_cast<unsigned short>(~allUniform[3]) ? -_co->getLocal() : 0)) + " -- topology = " + to_string(static_cast<int>(opt["master_topology"])) + " -- distribution = " + to_string(static_cast<int>(opt["master_distribution"])) + ")" << std::endl; std::cout.unsetf(std::ios_base::adjustfield); } } else { delete _co; _co = nullptr; } return ret; }
void ADIO_End(int *error_code) { ADIOI_Flatlist_node *curr, *next; ADIOI_Datarep *datarep, *datarep_next; /* FPRINTF(stderr, "reached end\n"); */ /* if a default errhandler was set on MPI_FILE_NULL then we need to ensure * that our reference to that errhandler is released */ /* Open MPI: The call to PMPI_File_set_errhandler has to be done in romio/src/io_romio_file_open.c in routine mca_io_romio_file_close() */ #if 0 PMPI_File_set_errhandler(MPI_FILE_NULL, MPI_ERRORS_RETURN); #endif /* delete the flattened datatype list */ curr = ADIOI_Flatlist; while (curr) { if (curr->blocklens) ADIOI_Free(curr->blocklens); if (curr->indices) ADIOI_Free(curr->indices); next = curr->next; ADIOI_Free(curr); curr = next; } ADIOI_Flatlist = NULL; /* free file and info tables used for Fortran interface */ if (ADIOI_Ftable) ADIOI_Free(ADIOI_Ftable); #ifndef HAVE_MPI_INFO if (MPIR_Infotable) ADIOI_Free(MPIR_Infotable); #endif /* free the memory allocated for a new data representation, if any */ datarep = ADIOI_Datarep_head; while (datarep) { datarep_next = datarep->next; ADIOI_Free(datarep->name); ADIOI_Free(datarep); datarep = datarep_next; } if( ADIOI_syshints != MPI_INFO_NULL) MPI_Info_free(&ADIOI_syshints); MPI_Op_free(&ADIO_same_amode); *error_code = MPI_SUCCESS; }
inline std::pair<MPI_Request, const K*>* buildTwo(Operator&& A, const MPI_Comm& comm, Container& parm) { static_assert(N == 2 || N == 3, "Wrong template parameter"); std::pair<MPI_Request, const K*>* ret = nullptr; unsigned short allUniform[N + 1]; allUniform[0] = Subdomain<K>::_map.size(); allUniform[1] = parm[NU]; allUniform[2] = static_cast<unsigned short>(~parm[NU]); if(N == 3) allUniform[3] = parm[NU] > 0 ? parm[NU] : std::numeric_limits<unsigned short>::max(); { MPI_Op op; #ifndef __MINGW32__ auto f = [](void* in, void* inout, int*, MPI_Datatype*) -> void { HPDDM_LAMBDA_F(in, input, inout, output, N) }; MPI_Op_create(f, 1, &op); #else MPI_Op_create(&f<N>, 1, &op); #endif MPI_Allreduce(MPI_IN_PLACE, allUniform, N + 1, MPI_UNSIGNED_SHORT, op, comm); MPI_Op_free(&op); } A.sparsity(allUniform[0]); if(parm[NU] > 0 || allUniform[1] != 0 || allUniform[2] != std::numeric_limits<unsigned short>::max()) { if(!_co) _co = new CoarseOperator; _co->setLocal(parm[NU]); double construction = MPI_Wtime(); if(allUniform[1] == parm[NU] && allUniform[2] == static_cast<unsigned short>(~parm[NU])) ret = _co->template construction<1, excluded>(A, comm, parm); else if(N == 3 && allUniform[1] == 0 && allUniform[2] == static_cast<unsigned short>(~allUniform[3])) ret = _co->template construction<2, excluded>(A, comm, parm); else ret = _co->template construction<0, excluded>(A, comm, parm); construction = MPI_Wtime() - construction; if(_co->getRank() == 0) { std::cout << " (" << parm[P] << " process" << (parm[P] > 1 ? "es" : "") << " -- topology = " << parm[TOPOLOGY] << " -- distribution = " << _co->getDistribution() << ")" << std::endl; std::cout << std::scientific << " --- coarse operator transferred and factorized (in " << construction << ")" << std::endl; std::cout << " (criterion: " << (allUniform[1] == parm[NU] && allUniform[2] == static_cast<unsigned short>(~parm[NU]) ? parm[NU] : (N == 3 && allUniform[2] == static_cast<unsigned short>(~allUniform[3]) ? -_co->getLocal() : 0)) << ")" << std::endl; } _uc = new K[_co->getSizeRHS()]; } return ret; }
int main(int argc, char *argv[]) { int errs = 0; int rank, size; int minsize = 2, count; MPI_Comm comm; MPI_Op op; int *buf, i; MTest_Init(&argc, &argv); MPI_Op_create(mysum, 0, &op); while (MTestGetIntracommGeneral(&comm, minsize, 1)) { if (comm == MPI_COMM_NULL) continue; MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank); for (count = 1; count < 65000; count = count * 2) { /* Contiguous data */ buf = (int *) malloc(count * sizeof(int)); for (i = 0; i < count; i++) buf[i] = rank + i; MPI_Allreduce(MPI_IN_PLACE, buf, count, MPI_INT, op, comm); /* Check the results */ for (i = 0; i < count; i++) { int result = i * size + (size * (size - 1)) / 2; if (buf[i] != result) { errs++; if (errs < 10) { fprintf(stderr, "buf[%d] = %d expected %d\n", i, buf[i], result); } } } free(buf); } MTestFreeComm(&comm); } MPI_Op_free(&op); MTest_Finalize(errs); return MTestReturnValue(errs); }
/** * Reduce all server timers to server master. * @param timer */ void mpi_reduce_timers(TIMER& timer){ sip::SIPMPIAttr &attr = sip::SIPMPIAttr::get_instance(); sip::check(attr.is_server(), "Trying to reduce timer on a non-server rank !"); long long * timers = timer.get_timers(); long long * timer_counts = timer.get_timer_count(); // Data to send to reduce long long * sendbuf = new long long[2*timer.max_slots + 1]; sendbuf[0] = timer.max_slots; // The data will be structured as // Length of arrays 1 & 2 // Array1 -> timer_switched_ array // Array2 -> timer_list_ array std::copy(timer_counts + 0, timer_counts + timer.max_slots, sendbuf+1); std::copy(timers + 0, timers + timer.max_slots, sendbuf+1+ timer.max_slots); long long * recvbuf = new long long[2*timer.max_slots + 1](); int server_master = attr.COMPANY_MASTER_RANK; MPI_Comm server_company = attr.company_communicator(); MPI_Datatype server_timer_reduce_dt; // MPI Type for timer data to be reduced. MPI_Op server_timer_reduce_op; // MPI OP to reduce timer data. SIPMPIUtils::check_err(MPI_Type_contiguous(timer.max_slots*2+1, MPI_LONG_LONG, &server_timer_reduce_dt)); SIPMPIUtils::check_err(MPI_Type_commit(&server_timer_reduce_dt)); SIPMPIUtils::check_err(MPI_Op_create((MPI_User_function *)server_timer_reduce_op_function, 1, &server_timer_reduce_op)); SIPMPIUtils::check_err(MPI_Reduce(sendbuf, recvbuf, 1, server_timer_reduce_dt, server_timer_reduce_op, server_master, server_company)); if (attr.is_company_master()){ std::copy(recvbuf+1, recvbuf+1+timer.max_slots, timer_counts); std::copy(recvbuf+1+timer.max_slots, recvbuf+1+2*timer.max_slots, timers); } // Cleanup delete [] sendbuf; delete [] recvbuf; SIPMPIUtils::check_err(MPI_Type_free(&server_timer_reduce_dt)); SIPMPIUtils::check_err(MPI_Op_free(&server_timer_reduce_op)); }
int main(int argc, char **argv) { int rank, size; int data; int errors = 0; int result = -100; MPI_Op op; MTest_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); data = rank; MPI_Op_create((MPI_User_function *) assoc, 0, &op); MPI_Reduce(&data, &result, 1, MPI_INT, op, size - 1, MPI_COMM_WORLD); MPI_Bcast(&result, 1, MPI_INT, size - 1, MPI_COMM_WORLD); MPI_Op_free(&op); if (result == BAD_ANSWER) errors++; MTest_Finalize(errors); return MTestReturnValue(errors); }
int split_bin_2level( int bins, int bin, const lwgrp_ring* lev1_ring, const lwgrp_logring* lev1_logring, const lwgrp_ring* lev2_ring, const lwgrp_logring* lev2_logring, lwgrp_ring* new_lev1_ring, lwgrp_logring* new_lev1_logring, lwgrp_ring* new_lev2_ring, lwgrp_logring* new_lev2_logring) { int i; /* initialize new rings and logrings to empty groups, * we'll overwrite these if proc is really in a group */ lwgrp_ring_set_null(new_lev1_ring); lwgrp_ring_set_null(new_lev2_ring); lwgrp_logring_build_from_ring(new_lev1_ring, new_lev1_logring); lwgrp_logring_build_from_ring(new_lev2_ring, new_lev2_logring); if (bins <= 0) { return 0; } /* get our rank within and the size of the parent communicator */ int comm_size; int comm_rank = lev1_ring->comm_rank; MPI_Comm_size(lev1_ring->comm, &comm_size); /* allocate memory to execute collectives */ int* reduce_inbuf = (int*) malloc(bins * sizeof(int)); int* reduce_outbuf = (int*) malloc(bins * sizeof(int)); int* scan_inbuf = (int*) malloc(2 * bins * sizeof(int)); int* scan_recvleft = (int*) malloc(2 * bins * sizeof(int)); int* scan_recvright = (int*) malloc(2 * bins * sizeof(int)); /* intiaize all bins to MPI_PROC_NULL, except for our * bin in which case we list our rank within comm */ for (i = 0; i < bins; i++) { /* initialize all bins to size(lev1), would like MPI_PROC_NULL, * but we use size instead so that reduce(min) does the right thing */ reduce_inbuf[i] = comm_size; } if (bin >= 0) { reduce_inbuf[bin] = comm_rank; } /* reduce to node leader to find lowest rank in each bin */ lwgrp_logring_reduce( reduce_inbuf, reduce_outbuf, bins, MPI_INT, MPI_MIN, 0, lev1_ring, lev1_logring ); /* create the scan type (a rank and a count pair) */ MPI_Datatype scan_type; MPI_Type_contiguous(2, MPI_INT, &scan_type); MPI_Type_commit(&scan_type); /* double exscan across node leaders to * build info for new node leader chains */ int lev1_rank = lev1_ring->group_rank; if (lev1_rank == 0) { /* prepare data for input to double scan, for each bin * record the lowest rank and a count of either 0 or 1 */ for (i = 0; i < bins; i++) { if (reduce_outbuf[i] != comm_size) { scan_inbuf[i*2 + SCAN_RANK] = reduce_outbuf[i]; scan_inbuf[i*2 + SCAN_COUNT] = 1; } else { scan_inbuf[i*2 + SCAN_RANK] = MPI_PROC_NULL; scan_inbuf[i*2 + SCAN_COUNT] = 0; } } /* create the scan operation */ MPI_Op scan_op; int commutative = 0; MPI_Op_create(scan_chain, commutative, &scan_op); /* execute the double exclusive scan to get next rank and * count of ranks to either side for each bin */ lwgrp_logring_double_exscan( scan_inbuf, scan_recvright, scan_inbuf, scan_recvleft, bins, scan_type, scan_op, lev2_ring, lev2_logring ); /* if we're on the end of the level 2 group, need to initialize * the recv values */ int lev2_rank = lev2_ring->group_rank; int lev2_size = lev2_ring->group_size; if (lev2_rank == 0) { /* we're on the left end of lev2 group, so we didn't get * anything from the left side */ for (i = 0; i < bins; i++) { scan_recvleft[i*2 + SCAN_RANK] = MPI_PROC_NULL; scan_recvleft[i*2 + SCAN_COUNT] = 0; } } if (lev2_rank == lev2_size-1) { /* we're on the right end of lev2 group, so we didn't get * anything from the right side */ for (i = 0; i < bins; i++) { scan_recvright[i*2 + SCAN_RANK] = MPI_PROC_NULL; scan_recvright[i*2 + SCAN_COUNT] = 0; } } /* free the scan op */ MPI_Op_free(&scan_op); } /* broadcast scan results to local comm */ lwgrp_logring_bcast(scan_recvleft, bins, scan_type, 0, lev1_ring, lev1_logring); lwgrp_logring_bcast(scan_recvright, bins, scan_type, 0, lev1_ring, lev1_logring); /* free the scan type */ MPI_Type_free(&scan_type); /* call bin_split on local chain */ lwgrp_ring_split_bin_radix(bins, bin, lev1_ring, new_lev1_ring); lwgrp_logring_build_from_ring(new_lev1_ring, new_lev1_logring); /* for each valid bin, all rank 0 procs of new lev1 groups form new lev2 groups */ if (bin >= 0) { int new_lev1_rank = new_lev1_ring->group_rank; if (new_lev1_rank == 0) { /* extract chain values from scan results */ MPI_Comm comm = new_lev1_ring->comm; int left = scan_recvleft[2*bin + SCAN_RANK]; int right = scan_recvright[2*bin + SCAN_RANK]; int size = scan_recvleft[2*bin + SCAN_COUNT] + scan_recvright[2*bin + SCAN_COUNT] + 1; int rank = scan_recvleft[2*bin + SCAN_COUNT]; /* build chain, then ring, then logring, and finally free chain */ lwgrp_chain tmp_chain; lwgrp_chain_build_from_vals(comm, left, right, size, rank, &tmp_chain); lwgrp_ring_build_from_chain(&tmp_chain, new_lev2_ring); lwgrp_logring_build_from_ring(new_lev2_ring, new_lev2_logring); lwgrp_chain_free(&tmp_chain); } } /* free our temporary memory */ free(scan_recvright); free(scan_recvleft); free(scan_inbuf); free(reduce_outbuf); free(reduce_inbuf); return 0; }
F_VOID_FUNC dgamn2d_(int *ConTxt, F_CHAR scope, F_CHAR top, int *m, int *n, double *A, int *lda, int *rA, int *cA, int *ldia, int *rdest, int *cdest) #endif /* * -- V1.1 BLACS routine -- * University of Tennessee, May 1, 1996 * Written by Clint Whaley. * * Purpose * ======= * Combine amn operation for double precision rectangular matrices. * * Arguments * ========= * * ConTxt (input) Ptr to int * Index into MyConTxts00 (my contexts array). * * SCOPE (input) Ptr to char * Limit the scope of the operation. * = 'R' : Operation is performed by a process row. * = 'C' : Operation is performed by a process column. * = 'A' : Operation is performed by all processes in grid. * * TOP (input) Ptr to char * Controls fashion in which messages flow within the operation. * * M (input) Ptr to int * The number of rows of the matrix A. M >= 0. * * N (input) Ptr to int * The number of columns of the matrix A. N >= 0. * * A (output) Ptr to double precision two dimensional array * The m by n matrix A. Fortran77 (column-major) storage * assumed. * * LDA (input) Ptr to int * The leading dimension of the array A. LDA >= M. * * RA (output) Integer Array, dimension (LDIA, N) * Contains process row that the amn of each element * of A was found on: i.e., rA(1,2) contains the process * row that the amn of A(1,2) was found on. * Values are left on process {rdest, cdest} only, others * may be modified, but not left with interesting data. * If rdest == -1, then result is left on all processes in scope. * If LDIA == -1, this array is not accessed, and need not exist. * * CA (output) Integer Array, dimension (LDIA, N) * Contains process column that the amn of each element * of A was found on: i.e., cA(1,2) contains the process * column that the max/min of A(1,2) was found on. * Values are left on process {rdest, cdest} only, others * may be modified, but not left with interesting data. * If rdest == -1, then result is left on all processes in scope. * If LDIA == -1, this array is not accessed, and need not exist. * * LDIA (input) Ptr to int * If (LDIA == -1), then the arrays RA and CA are not accessed. * ELSE leading dimension of the arrays RA and CA. LDIA >= M. * * RDEST (input) Ptr to int * The process row of the destination of the amn. * If rdest == -1, then result is left on all processes in scope. * * CDEST (input) Ptr to int * The process column of the destination of the amn. * If rdest == -1, then CDEST ignored. * * ------------------------------------------------------------------------ */ { void BI_ArgCheck(int, int, char *, char, char, char, int, int, int, int, int *, int *); void BI_UpdateBuffs(BLACBUFF *); BLACBUFF *BI_GetBuff(int); int BI_BuffIsFree(BLACBUFF *, int); MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int, MPI_Datatype, int *); BLACBUFF *BI_Pack(BLACSCONTEXT *, BVOID *, BLACBUFF *, MPI_Datatype); MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int, MPI_Datatype, int *); void BI_Unpack(BLACSCONTEXT *, BVOID *, BLACBUFF *, MPI_Datatype); void BI_MringComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR, int, int); void BI_TreeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR, int, int); void BI_BeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR); void BI_dvvamn(int, char *, char *); void BI_dvvamn2(int, char *, char *); void BI_dMPI_amn(void *, void *, int *, MPI_Datatype *); void BI_dMPI_amn2(void *, void *, int *, MPI_Datatype *); /* * Variable Declarations */ VVFUNPTR vvop; BLACBUFF *bp, *bp2; BLACSCONTEXT *ctxt; char ttop, tscope; int i, j, N, dest, idist, length, tlda, tldia, trdest, ierr; int len[2]; MPI_Aint disp[2]; MPI_Datatype dtypes[2]; MPI_Op BlacComb; MPI_Datatype MyType; BI_DistType *dist, mydist; extern BLACBUFF *BI_ActiveQ; extern BLACBUFF BI_AuxBuff; MGetConTxt(Mpval(ConTxt), ctxt); ttop = F2C_CharTrans(top); ttop = Mlowcase(ttop); tscope = F2C_CharTrans(scope); tscope = Mlowcase(tscope); /* * If the user has set the default combine topology, use it instead of * BLACS default */ #ifdef DefCombTop if (ttop == ' ') ttop = DefCombTop; #endif if (Mpval(cdest) == -1) trdest = -1; else trdest = Mpval(rdest); #if (BlacsDebugLvl > 0) BI_ArgCheck(Mpval(ConTxt), RT_COMB, __FILE__, tscope, 'u', 'u', Mpval(m), Mpval(n), Mpval(lda), 1, &trdest, Mpaddress(cdest)); if (Mpval(ldia) < Mpval(m)) { if (Mpval(ldia) != -1) BI_BlacsWarn(Mpval(ConTxt), __LINE__, __FILE__, "LDIA too small (LDIA=%d, but M=%d)", Mpval(ldia), Mpval(m)); } #endif if (Mpval(lda) >= Mpval(m)) tlda = Mpval(lda); else tlda = Mpval(m); if (Mpval(ldia) < Mpval(m)) tldia = Mpval(m); else tldia = Mpval(ldia); switch(tscope) { case 'r': ctxt->scp = &ctxt->rscp; if (trdest == -1) dest = -1; else dest = Mpval(cdest); break; case 'c': ctxt->scp = &ctxt->cscp; dest = trdest; break; case 'a': ctxt->scp = &ctxt->ascp; if (trdest == -1) dest = -1; else dest = Mvkpnum(ctxt, trdest, Mpval(cdest)); break; default: BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown scope '%c'", tscope); } /* * It's not defined how MPI reacts to 0 element reductions, so use BLACS 1-tree * topology if we've got one */ if (ttop == ' ') if ( (Mpval(m) < 1) || (Mpval(n) < 1) || (ctxt->TopsRepeat) ) ttop = '1'; N = Mpval(m) * Mpval(n); /* * If process who has amn is to be communicated, must set up distance * vector after value vector */ if (Mpval(ldia) != -1) { vvop = BI_dvvamn; length = N * sizeof(double); i = length % sizeof(BI_DistType); /* ensure dist vec aligned correctly */ if (i) length += sizeof(BI_DistType) - i; idist = length; length += N * sizeof(BI_DistType); /* * For performance, insist second buffer is at least 8-byte aligned */ j = 8; if (sizeof(double) > j) j = sizeof(double); i = length % j; if (i) length += j - i; i = 2 * length; bp = BI_GetBuff(i); bp2 = &BI_AuxBuff; bp2->Buff = &bp->Buff[length]; BI_dmvcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff); /* * Fill in distance vector */ if (dest == -1) mydist = ctxt->scp->Iam; else mydist = (ctxt->scp->Np + ctxt->scp->Iam - dest) % ctxt->scp->Np; dist = (BI_DistType *) &bp->Buff[idist]; for (i=0; i < N; i++) dist[i] = mydist; /* * Create the MPI datatype holding both user's buffer and distance vector */ len[0] = len[1] = N; disp[0] = 0; disp[1] = idist; dtypes[0] = MPI_DOUBLE; dtypes[1] = BI_MpiDistType; #ifdef ZeroByteTypeBug if (N > 0) { #endif i = 2; ierr=MPI_Type_struct(i, len, disp, dtypes, &MyType); ierr=MPI_Type_commit(&MyType); bp->N = bp2->N = 1; bp->dtype = bp2->dtype = MyType; #ifdef ZeroByteTypeBug } else { bp->N = bp2->N = 0; bp->dtype = bp2->dtype = MPI_INT; } #endif } else { vvop = BI_dvvamn2; length = N * sizeof(double); /* * If A is contiguous, we can use it as one of our buffers */ if ( (Mpval(m) == tlda) || (Mpval(n) == 1) ) { bp = &BI_AuxBuff; bp->Buff = (char *) A; bp2 = BI_GetBuff(length); } else { bp = BI_GetBuff(length*2); bp2 = &BI_AuxBuff; bp2->Buff = &bp->Buff[length]; BI_dmvcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff); } bp->N = bp2->N = N; bp->dtype = bp2->dtype = MPI_DOUBLE; } switch(ttop) { case ' ': /* use MPI's reduction by default */ i = 1; if (Mpval(ldia) == -1) { ierr=MPI_Op_create(BI_dMPI_amn2, i, &BlacComb); } else { ierr=MPI_Op_create(BI_dMPI_amn, i, &BlacComb); BI_AuxBuff.Len = N; /* set this up for the MPI OP wrappers */ } if (trdest != -1) { ierr=MPI_Reduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, BlacComb, dest, ctxt->scp->comm); if (ctxt->scp->Iam == dest) { BI_dvmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff); if (Mpval(ldia) != -1) BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia, (BI_DistType *) &bp2->Buff[idist], trdest, Mpval(cdest)); } } else { ierr=MPI_Allreduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, BlacComb, ctxt->scp->comm); BI_dvmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff); if (Mpval(ldia) != -1) BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia, (BI_DistType *) &bp2->Buff[idist], trdest, Mpval(cdest)); } ierr=MPI_Op_free(&BlacComb); if (Mpval(ldia) != -1) #ifdef ZeroByteTypeBug if (N > 0) #endif ierr=BI_MPI_TYPE_FREE(&MyType); if (BI_ActiveQ) BI_UpdateBuffs(NULL); return; break; case 'i': BI_MringComb(ctxt, bp, bp2, N, vvop, dest, 1); break; case 'd': BI_MringComb(ctxt, bp, bp2, N, vvop, dest, -1); break; case 's': BI_MringComb(ctxt, bp, bp2, N, vvop, dest, 2); break; case 'm': BI_MringComb(ctxt, bp, bp2, N, vvop, dest, ctxt->Nr_co); break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, ttop-47); break; case 'f': BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, FULLCON); break; case 't': BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, ctxt->Nb_co); break; case 'h': /* * Use bidirectional exchange if everyone wants answer */ if ( (trdest == -1) && !(ctxt->TopsCohrnt) ) BI_BeComb(ctxt, bp, bp2, N, vvop); else BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, 2); break; default : BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown topology '%c'", ttop); } if (Mpval(ldia) != -1) #ifdef ZeroByteTypeBug if (N > 0) #endif ierr=BI_MPI_TYPE_FREE(&MyType); /* * If I am selected to receive answer */ if ( (ctxt->scp->Iam == dest) || (dest == -1) ) { /* * Translate the distances stored in the latter part of bp->Buff into * process grid coordinates, and output these coordinates in the * arrays rA and cA. */ if (Mpval(ldia) != -1) BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia, dist, trdest, Mpval(cdest)); /* * Unpack the amn array */ if (bp != &BI_AuxBuff) BI_dvmcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff); } }
int main( int argc, char **argv ) { int rank, size, i; int data; int errors=0; int result = -100; int correct_result; MPI_Op op_assoc, op_addem; MPI_Comm comm; MPI_Init( &argc, &argv ); MPI_Op_create( (MPI_User_function *)assoc, 0, &op_assoc ); MPI_Op_create( (MPI_User_function *)addem, 1, &op_addem ); /* Run this for a variety of communicator sizes */ while ((comm = GetNextComm()) != MPI_COMM_NULL) { MPI_Comm_rank( comm, &rank ); MPI_Comm_size( comm, &size ); data = rank; correct_result = 0; for (i=0;i<=rank;i++) correct_result += i; MPI_Scan ( &data, &result, 1, MPI_INT, MPI_SUM, comm ); if (result != correct_result) { fprintf( stderr, "[%d] Error suming ints with scan\n", rank ); errors++; } MPI_Scan ( &data, &result, 1, MPI_INT, MPI_SUM, comm ); if (result != correct_result) { fprintf( stderr, "[%d] Error summing ints with scan (2)\n", rank ); errors++; } data = rank; result = -100; MPI_Scan ( &data, &result, 1, MPI_INT, op_addem, comm ); if (result != correct_result) { fprintf( stderr, "[%d] Error summing ints with scan (userop)\n", rank ); errors++; } MPI_Scan ( &data, &result, 1, MPI_INT, op_addem, comm ); if (result != correct_result) { fprintf( stderr, "[%d] Error summing ints with scan (userop2)\n", rank ); errors++; } /* result = -100;*/ /* data = rank;*/ /* MPI_Scan ( &data, &result, 1, MPI_INT, op_assoc, comm );*/ /* if (result == BAD_ANSWER) {*/ /* fprintf( stderr, "[%d] Error scanning with non-commutative op\n",*/ /* rank );*/ /* errors++;*/ /* }*/ MPI_Comm_free( &comm ); } MPI_Op_free( &op_assoc ); MPI_Op_free( &op_addem ); if (errors) { MPI_Comm_rank( MPI_COMM_WORLD, &rank ); printf( "[%d] done with ERRORS(%d)!\n", rank, errors ); } Test_Waitforall( ); MPI_Finalize(); return errors; }
int Zoltan_LB_Build_PartDist(ZZ *zz) { char *yo = "Zoltan_LB_Build_PartDist"; int ierr = ZOLTAN_OK; int inflag[6], outflag[6] = {0,0,-1,0,0,0}; int global_parts_set = 0; /* number of procs on which NUM_GLOBAL_PARTS parameter was set. */ int local_parts_set = 0; /* number of procs on which NUM_LOCAL_PARTS parameter was set. */ int max_global_parts = 0; /* Max value of Num_Global_Parts_Param on all procs. */ int sum_local_parts = 0; /* Sum of Num_Local_Parts over all procs. Procs on which NUM_LOCAL_PARTS was not set assume zero parts on them. Thus, sum_local_parts may be < max_global_parts. */ int remaining_procs; /* Num of procs not setting NUM_LOCAL_PARTS */ int avail_local_parts; /* max_global_parts - sum_local_parts */ int num_proc = zz->Num_Proc; int *pdist; int local_parts = 0; int *local_parts_params = NULL; int i, j, cnt, pcnt; int frac = 0, mod = 0; MPI_Op op; MPI_User_function Zoltan_PartDist_MPIOp; struct Zoltan_part_info *part_sizes=NULL; float sum_parts, part_total; MPI_Op_create(&Zoltan_PartDist_MPIOp,1,&op); /* Check whether global parts or local parts parameters were used. */ inflag[0] = (zz->LB.Num_Global_Parts_Param != -1); inflag[1] = (zz->LB.Num_Local_Parts_Param != -1); inflag[2] = zz->LB.Num_Global_Parts_Param; inflag[3] = ((zz->LB.Num_Local_Parts_Param == -1) ? 0 : zz->LB.Num_Local_Parts_Param); inflag[4] = (zz->LB.Num_Global_Parts_Param != zz->LB.Prev_Global_Parts_Param); inflag[5] = (zz->LB.Num_Local_Parts_Param != zz->LB.Prev_Local_Parts_Param); MPI_Allreduce(inflag, outflag, 6, MPI_INT, op, zz->Communicator); MPI_Op_free(&op); if (!outflag[4] && !outflag[5]) { /* Parameter values have not changed since last invocation of Zoltan. */ /* Do not have to change PartDist or Num_Global_Parts. */ goto End; } /* Since PartDist is changing, can't reuse old parts. * Free LB.Data_Structure to prevent reuse. * Also free LB.PartDist and LB.ProcDist. */ if (zz->LB.Free_Structure != NULL) zz->LB.Free_Structure(zz); ZOLTAN_FREE(&(zz->LB.PartDist)); ZOLTAN_FREE(&(zz->LB.ProcDist)); zz->LB.Prev_Global_Parts_Param = zz->LB.Num_Global_Parts_Param; zz->LB.Prev_Local_Parts_Param = zz->LB.Num_Local_Parts_Param; global_parts_set = outflag[0]; /* Sum of inflag[0] */ local_parts_set = outflag[1]; /* Sum of inflag[1] */ max_global_parts = outflag[2]; /* Max of inflag[2] */ sum_local_parts = outflag[3]; /* Sum of inflag[3] */ /* Check whether any parameters were set; * No need to build the PartDist array if not. */ if ((!global_parts_set || (max_global_parts==num_proc)) && !local_parts_set) { /* Number of parts == number of procs, uniformly distributed; */ zz->LB.Num_Global_Parts = num_proc; zz->LB.Single_Proc_Per_Part = 1; } else { /* Either NUM_GLOBAL_PARTS is set != num_proc or NUM_LOCAL_PARTS * is set. Build PartDist, distributing parts to processors as * specified. */ /* error checking. */ if (local_parts_set) { if (!global_parts_set) max_global_parts = sum_local_parts; else if (sum_local_parts > max_global_parts) { char emsg[256]; sprintf(emsg, "Sum of NUM_LOCAL_PARTS %d > NUM_GLOBAL_PARTS %d", sum_local_parts, max_global_parts); ZOLTAN_PRINT_ERROR(zz->Proc, yo, emsg); ierr = ZOLTAN_FATAL; goto End; } else if (sum_local_parts < max_global_parts && local_parts_set == num_proc) { char emsg[256]; sprintf(emsg, "Sum of NUM_LOCAL_PARTS %d < NUM_GLOBAL_PARTS %d", sum_local_parts, max_global_parts); ZOLTAN_PRINT_ERROR(zz->Proc, yo, emsg); ierr = ZOLTAN_FATAL; goto End; } } if (max_global_parts == 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Zero parts requested"); ierr = ZOLTAN_FATAL; goto End; } /* Allocate space for PartDist. */ zz->LB.PartDist = (int *) ZOLTAN_MALLOC((max_global_parts+1)*sizeof(int)); if (zz->LB.PartDist == NULL) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); goto End; } pdist = zz->LB.PartDist; /* Compute the PartDist array. */ if (!local_parts_set) { part_sizes = zz->LB.Part_Info; if (part_sizes != NULL){ /* Use ratio of part sizes to assign parts to procs */ part_total = 0.0; for (i=0; i < zz->LB.Part_Info_Len; i++){ part_total += part_sizes[i].Size; } pdist[0] = 0; zz->LB.Single_Proc_Per_Part = 1; sum_parts = 0.0; for (i = 1; i < max_global_parts; i++){ sum_parts += (part_sizes[i-1].Size / part_total); pdist[i] = sum_parts * num_proc; if (pdist[i] > pdist[i-1] + 1){ zz->LB.Single_Proc_Per_Part = 0; } } pdist[max_global_parts] = num_proc; } else{ if (max_global_parts > num_proc) { /* NUM_LOCAL_PARTS is not set; NUM_GLOBAL_PARTS > num_proc. */ /* Even distribution of parts to processors. */ zz->LB.Single_Proc_Per_Part = 1; frac = max_global_parts / num_proc; mod = max_global_parts % num_proc; for (cnt = 0, i = 0; i < num_proc; i++) { local_parts = frac + ((num_proc - i) <= mod); for (j = 0; j < local_parts; j++) pdist[cnt++] = i; } pdist[cnt] = num_proc; } else { /* num_proc < max_global_parts */ /* NUM_LOCAL_PARTS is not set; NUM_GLOBAL_PARTS < num_proc. */ /* Even distribution of processors to parts. */ zz->LB.Single_Proc_Per_Part = 0; /* Parts are spread across procs */ pdist[0] = 0; frac = num_proc / max_global_parts; mod = num_proc % max_global_parts; for (i = 1; i < max_global_parts; i++) pdist[i] = pdist[i-1] + frac + ((max_global_parts - i) <= mod); pdist[max_global_parts] = num_proc; } } } else /* local_parts_set */ { /* NUM_LOCAL_PARTS is set on at least some processors. */ /* Distribute parts to processors to match NUM_LOCAL_PARTS where specified; distribute remaining parts to processors that didn't specify NUM_LOCAL_PARTS */ zz->LB.Single_Proc_Per_Part = 1; /* Gather the parameter values from all processors. */ local_parts_params = (int *) ZOLTAN_MALLOC((num_proc+1)* sizeof(int)); MPI_Allgather(&(zz->LB.Num_Local_Parts_Param), 1, MPI_INT, local_parts_params, 1, MPI_INT, zz->Communicator); /* Compute number of parts not specified by NUM_LOCAL_PARTS */ /* In MPI_Allreduce above, processors not specifying NUM_LOCAL_PARTS * specified contributed zero parts to sum_local_parts. */ remaining_procs = num_proc - local_parts_set; avail_local_parts = max_global_parts - sum_local_parts; if (remaining_procs > 0) { frac = avail_local_parts / remaining_procs; mod = avail_local_parts % remaining_procs; } for (cnt = 0, pcnt = 0, i = 0; i < num_proc; i++) if (local_parts_params[i] != -1) { /* Fill in processor for its NUM_LOCAL_PARTS parts. */ for (j = 0; j < local_parts_params[i]; j++) pdist[cnt++] = i; } else { /* Equally distribute avail_local_parts among remaining_procs. */ local_parts = frac + ((remaining_procs - pcnt) <= mod); for (j = 0; j < local_parts; j++) pdist[cnt++] = i; pcnt++; } pdist[cnt] = num_proc; ZOLTAN_FREE(&local_parts_params); } /* Reset Num_Global_Parts. */ zz->LB.Num_Global_Parts = max_global_parts; if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL && zz->LB.PartDist != NULL) { printf("[%1d] Debug: LB.PartDist = ", zz->Proc); for (i=0; i<=zz->LB.Num_Global_Parts; i++) printf("%d ", zz->LB.PartDist[i]); } } End: return ierr; }
static PetscErrorCode TestCellShape(DM dm) { PetscMPIInt rank; PetscInt dim, c, cStart, cEnd, count = 0; ex1_stats_t stats, globalStats; PetscReal *J, *invJ, min = 0, max = 0, mean = 0, stdev = 0; MPI_Comm comm = PetscObjectComm((PetscObject)dm); DM dmCoarse; PetscErrorCode ierr; PetscFunctionBegin; stats.min = PETSC_MAX_REAL; stats.max = PETSC_MIN_REAL; stats.sum = stats.squaresum = 0.; stats.count = 0; ierr = DMGetDimension(dm,&dim);CHKERRQ(ierr); ierr = PetscMalloc2(dim * dim, &J, dim * dim, &invJ);CHKERRQ(ierr); ierr = DMPlexGetHeightStratum(dm,0,&cStart,&cEnd);CHKERRQ(ierr); for (c = cStart; c < cEnd; c++) { PetscInt i; PetscReal frobJ = 0., frobInvJ = 0., cond2, cond, detJ; ierr = DMPlexComputeCellGeometryAffineFEM(dm,c,NULL,J,invJ,&detJ);CHKERRQ(ierr); for (i = 0; i < dim * dim; i++) { frobJ += J[i] * J[i]; frobInvJ += invJ[i] * invJ[i]; } cond2 = frobJ * frobInvJ; cond = PetscSqrtReal(cond2); stats.min = PetscMin(stats.min,cond); stats.max = PetscMax(stats.max,cond); stats.sum += cond; stats.squaresum += cond2; stats.count++; } { PetscMPIInt blockLengths[2] = {4,1}; MPI_Aint blockOffsets[2] = {offsetof(ex1_stats_t,min),offsetof(ex1_stats_t,count)}; MPI_Datatype blockTypes[2] = {MPIU_REAL,MPIU_INT}, statType; MPI_Op statReduce; ierr = MPI_Type_create_struct(2,blockLengths,blockOffsets,blockTypes,&statType);CHKERRQ(ierr); ierr = MPI_Type_commit(&statType);CHKERRQ(ierr); ierr = MPI_Op_create(ex1_stats_reduce, PETSC_TRUE, &statReduce);CHKERRQ(ierr); ierr = MPI_Reduce(&stats,&globalStats,1,statType,statReduce,0,comm);CHKERRQ(ierr); ierr = MPI_Op_free(&statReduce);CHKERRQ(ierr); ierr = MPI_Type_free(&statType);CHKERRQ(ierr); } ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); if (!rank) { count = globalStats.count; min = globalStats.min; max = globalStats.max; mean = globalStats.sum / globalStats.count; stdev = PetscSqrtReal(globalStats.squaresum / globalStats.count - mean * mean); } ierr = PetscPrintf(comm,"Mesh with %d cells, shape condition numbers: min = %g, max = %g, mean = %g, stddev = %g\n", count, (double) min, (double) max, (double) mean, (double) stdev); ierr = PetscFree2(J,invJ);CHKERRQ(ierr); ierr = DMGetCoarseDM(dm,&dmCoarse);CHKERRQ(ierr); if (dmCoarse) { ierr = TestCellShape(dmCoarse);CHKERRQ(ierr); } PetscFunctionReturn(0); }
int main( int argc, char *argv[] ) { int errs = 0; int rank, size, root; int minsize = 2, count; MPI_Comm comm; int *buf, *bufout; MPI_Op op; MPI_Datatype mattype; MTest_Init( &argc, &argv ); MPI_Op_create( uop, 0, &op ); while (MTestGetIntracommGeneral( &comm, minsize, 1 )) { if (comm == MPI_COMM_NULL) continue; MPI_Comm_size( comm, &size ); MPI_Comm_rank( comm, &rank ); matSize = size; /* used by the user-defined operation */ /* Only one matrix for now */ count = 1; /* A single matrix, the size of the communicator */ MPI_Type_contiguous( size*size, MPI_INT, &mattype ); MPI_Type_commit( &mattype ); buf = (int *)malloc( count * size * size * sizeof(int) ); if (!buf) MPI_Abort( MPI_COMM_WORLD, 1 ); bufout = (int *)malloc( count * size * size * sizeof(int) ); if (!bufout) MPI_Abort( MPI_COMM_WORLD, 1 ); for (root = 0; root < size; root ++) { initMat( comm, buf ); MPI_Reduce( buf, bufout, count, mattype, op, root, comm ); if (rank == root) { errs += isShiftLeft( comm, bufout ); } /* Try the same test, but using MPI_IN_PLACE */ initMat( comm, bufout ); if (rank == root) { MPI_Reduce( MPI_IN_PLACE, bufout, count, mattype, op, root, comm ); } else { MPI_Reduce( bufout, NULL, count, mattype, op, root, comm ); } if (rank == root) { errs += isShiftLeft( comm, bufout ); } #if MTEST_HAVE_MIN_MPI_VERSION(2,2) /* Try one more time without IN_PLACE to make sure we check * aliasing correctly */ if (rank == root) { MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN); if (MPI_SUCCESS == MPI_Reduce( bufout, bufout, count, mattype, op, root, comm )) errs++; } #endif } free( buf ); free( bufout ); MPI_Type_free( &mattype ); MTestFreeComm( &comm ); } MPI_Op_free( &op ); MTest_Finalize( errs ); MPI_Finalize(); return 0; }
/* Destroy the two ops that we created */ ~modeler_t () { MPI_Op_free (&MPI_KPI_MAX_OP); MPI_Op_free (&MPI_KPI_MIN_OP); MPI_Type_free (&MPI_Kpi_pair); }
int main(int argc, char *argv[]) { unsigned char str[154]; unsigned int arr[] = {9,2,5,8,4,2,4,1,6,9,1,8,9,9,6,1,5,7,0,7,7,4,3,7,6,3,9,5,4,2,3,0,4,4,1,5,3,3,7,2,3,3,7,0,9,4,5,2,8,4,6,\ 2,1,3,4,1,4,2,6,0,8,5,1,7,3,1,4,4,7,0,5,3,4,4,8,9,1,1,9,8,3,5,1,8,3,4,4,8,3,2,8,1,2,8,7,4,1,8,1,8,0,4,\ 8,4,2,4,4,5,4,9,1,8,3,4,9,5,6,3,3,1,4,6,4,1,0,2,0,2,5,1,4,8,5,9,9,6,9,4,0,3,6,5,5,9,5,4,2,2,3,7,8,5,9,7}; long i; double t1, t2, Itime; int provided; /* Allocation */ v1 = (vector *) malloc (VLEN * sizeof (vector)); v2 = (vector *) malloc (VLEN * sizeof (vector)); v3 = (vector *) malloc (VLEN * sizeof (vector)); fin_sum = (mp_limb_t *) malloc ((2*LIMBS+1) * sizeof (mp_limb_t)); result = (mp_limb_t *) malloc ((2*LIMBS+1) * sizeof (mp_limb_t)); q = (mp_limb_t *) malloc (LIMBS * sizeof (mp_limb_t)); MPI_Init_thread (&argc, &argv, MPI_THREAD_MULTIPLE, &provided); MPI_Comm_rank (MPI_COMM_WORLD, &id); MPI_Comm_size (MPI_COMM_WORLD, &p); MPI_Type_contiguous (2*LIMBS+1, MPI_UNSIGNED_LONG_LONG, &mpntype0); MPI_Type_commit (&mpntype0); MPI_Type_contiguous (LIMBS, MPI_UNSIGNED_LONG_LONG, &mpntype1); MPI_Type_commit (&mpntype1); MPI_Op_create ((MPI_User_function *)addmpn, 1, &mpn_sum); for (i=0; i<154; ++i) str[i] = (unsigned char)arr[i]; mpn_set_str (q, str, 154, 10); //if (!id) gmp_printf ("Modulus: %Nd\n", q, LIMBS); MPI_Barrier (MPI_COMM_WORLD); /* Setting limits for 2 MPI nodes */ VOffset = BLOCK_LOW(id,p,VLEN); VChunk = BLOCK_SIZE(id,p,VLEN); /* Setting limits for NCORES-1 threads */ for (i=0; i<NCORES-1; ++i) { VStart[i] = VOffset + BLOCK_LOW(i,NCORES-1,VChunk); VEnd[i] = VOffset + BLOCK_HIGH(i,NCORES-1,VChunk); } for (i=0; i<VLEN; ++i) mpn_random (v1[i], LIMBS); for (i=0; i<VLEN; ++i) mpn_random (v2[i], LIMBS); for (i=BLOCK_LOW(id,p,VLEN); i<=BLOCK_HIGH(id,p,VLEN); ++i) mpn_random (v3[i], LIMBS); MPI_Barrier (MPI_COMM_WORLD); t1 = MPI_Wtime (); for (i=0; i<NCORES; ++i) pthread_create(&threads[i], &attr, VectMul, (void *) i); for (i=0; i<NCORES; ++i) pthread_join (threads[i], NULL); t2 = MPI_Wtime (); Itime = t2 - t1; if (!id) printf ("Total time taken: %lf\n",Itime); if (!id) gmp_printf ("Result: %Nd\n", cnum, LIMBS); MPI_Op_free(&mpn_sum); MPI_Request_free (&Rrqst); MPI_Request_free (&Srqst); MPI_Finalize (); return 0; }
HYPRE_Int hypre_MPI_Op_free( hypre_MPI_Op *op ) { return (HYPRE_Int) MPI_Op_free(op); }