inline void all_gathervm_impl(const Communicator& comm, const T* in_values, const int in_n, const int *in_map, T* out_values, const int *out_n, const int *out_map, const int stride ) { // get data type and number of processors Datatype type = PE::get_mpi_datatype(*in_values); int nproc; MPI_CHECK_RESULT(MPI_Comm_size,(comm,&nproc)); // if stride is smaller than one and unsupported functionality cf3_assert( stride>0 ); // compute displacements both on send an receive side // also compute stride-multiplied send and receive counts int *out_nstride=new int[nproc]; int *out_disp=new int[nproc]; out_disp[0]=0; for(int i=0; i<nproc-1; i++) { out_nstride[i]=stride*out_n[i]; out_disp[i+1]=out_disp[i]+out_nstride[i]; } out_nstride[nproc-1]=out_n[nproc-1]*stride; // compute total number of send and receive items const int in_sum=stride*in_n; const int out_sum=out_disp[nproc-1]+stride*out_n[nproc-1]; // set up in_buf T *in_buf=(T*)in_values; if (in_map!=0) { if ( (in_buf=new T[in_sum+1]) == (T*)0 ) throw cf3::common::NotEnoughMemory(FromHere(),"Could not allocate temporary buffer."); // +1 for avoiding possible zero allocation if (stride==1) { for(int i=0; i<in_sum; i++) in_buf[i]=in_values[in_map[i]]; } else { for(int i=0; i<in_sum/stride; i++) memcpy(&in_buf[stride*i],&in_values[stride*in_map[i]],stride*sizeof(T)); } } // set up out_buf T *out_buf=out_values; if ((out_map!=0)||(in_values==out_values)) { if ( (out_buf=new T[out_sum+1]) == (T*)0 ) throw cf3::common::NotEnoughMemory(FromHere(),"Could not allocate temporary buffer."); // +1 for avoiding possible zero allocation } // do the communication MPI_CHECK_RESULT(MPI_Allgatherv, (in_buf, in_sum, type, out_buf, out_nstride, out_disp, type, comm)); // re-populate out_values if (out_map!=0) { if (stride==1) { for(int i=0; i<out_sum; i++) out_values[out_map[i]]=out_buf[i]; } else { for(int i=0; i<out_sum/stride; i++) memcpy(&out_values[stride*out_map[i]],&out_buf[stride*i],stride*sizeof(T)); } delete[] out_buf; } else if (in_values==out_values) { memcpy(out_values,out_buf,out_sum*sizeof(T)); delete[] out_buf; } // free internal memory if (in_map!=0) delete[] in_buf; delete[] out_disp; delete[] out_nstride; }
template <typename T> inline Datatype get_mpi_datatype() { static Datatype type(nullptr); if (type==nullptr){ //PEProcessSortedExecute(-1,CFinfo << "Registering type of size " << sizeof(T) << CFendl;); //if (!boost::is_pod<T>::value) throw NotSupported(FromHere(),"Non-POD (plain old datatype) is not supported by parallel environment communications."); MPI_CHECK_RESULT(MPI_Type_contiguous,(sizeof(T), MPI_BYTE, &type)); MPI_CHECK_RESULT(MPI_Type_commit,(&type)); } return type; }
/** Support for registering and storing non-built-in operations. @returns Operation to the desired operation and type combo. **/ template<typename T, typename Op> Operation get_mpi_op_impl() { static Operation op((Operation)nullptr); if (op==(Operation)nullptr) { MPI_CHECK_RESULT(MPI_Op_create, (Op::template func<T>, Op::is_commutative, &op)); } return op; }
void my_all_to_all(const std::vector<Comm::Buffer>& send, Comm::Buffer& recv) { std::vector<int> send_strides(send.size()); std::vector<int> send_displs(send.size()); for (Uint i=0; i<send.size(); ++i) send_strides[i] = send[i].packed_size(); if (send.size()) send_displs[0] = 0; for (Uint i=1; i<send.size(); ++i) send_displs[i] = send_displs[i-1] + send_strides[i-1]; Comm::Buffer send_linear; send_linear.resize(send_displs.back()+send_strides.back()); for (Uint i=0; i<send.size(); ++i) send_linear.pack(send[i].buffer(),send[i].packed_size()); std::vector<int> recv_strides(Comm::PE::instance().size()); std::vector<int> recv_displs(Comm::PE::instance().size()); Comm::PE::instance().all_to_all(send_strides,recv_strides); if (recv_displs.size()) recv_displs[0] = 0; for (Uint i=1; i<Comm::PE::instance().size(); ++i) recv_displs[i] = recv_displs[i-1] + recv_strides[i-1]; recv.reset(); recv.resize(recv_displs.back()+recv_strides.back()); MPI_CHECK_RESULT(MPI_Alltoallv, ((void*)send_linear.buffer(), &send_strides[0], &send_displs[0], MPI_PACKED, (void*)recv.buffer(), &recv_strides[0], &recv_displs[0], MPI_PACKED, Comm::PE::instance().communicator())); recv.packed_size()=recv_displs.back()+recv_strides.back(); }
std::string Comm::version() const { int version = 0; int subversion = 0; MPI_CHECK_RESULT(MPI_Get_version,(&version,&subversion)); return std::string( to_str(version) + "." + to_str(subversion) ); }
void Comm::init(int argc, char** args) { if ( is_finalized() ) throw SetupError( FromHere(), "Should not call Comm::initialize() after Comm::finalize()" ); if( !is_initialized() && !is_finalized() ) // then initialize { MPI_CHECK_RESULT(MPI_Init,(&argc,&args)); // CFinfo << "MPI (version " << version() << ") -- initiated" << CFendl; } m_comm = MPI_COMM_WORLD; }
void my_all_to_all(const std::vector<std::vector<T> >& send, std::vector<std::vector<T> >& recv) { std::vector<int> send_strides(send.size()); std::vector<int> send_displs(send.size()); for (Uint i=0; i<send.size(); ++i) send_strides[i] = send[i].size(); send_displs[0] = 0; for (Uint i=1; i<send.size(); ++i) send_displs[i] = send_displs[i-1] + send_strides[i-1]; std::vector<T> send_linear(send_displs.back()+send_strides.back()); for (Uint i=0; i<send.size(); ++i) for (Uint j=0; j<send[i].size(); ++j) send_linear[send_displs[i]+j] = send[i][j]; std::vector<int> recv_strides(Comm::PE::instance().size()); std::vector<int> recv_displs(Comm::PE::instance().size()); Comm::PE::instance().all_to_all(send_strides,recv_strides); recv_displs[0] = 0; for (Uint i=1; i<Comm::PE::instance().size(); ++i) recv_displs[i] = recv_displs[i-1] + recv_strides[i-1]; std::vector<Uint> recv_linear(recv_displs.back()+recv_strides.back()); MPI_CHECK_RESULT(MPI_Alltoallv, (&send_linear[0], &send_strides[0], &send_displs[0], Comm::get_mpi_datatype<Uint>(), &recv_linear[0], &recv_strides[0], &recv_displs[0], get_mpi_datatype<Uint>(), Comm::PE::instance().communicator())); recv.resize(recv_strides.size()); for (Uint i=0; i<recv_strides.size(); ++i) { recv[i].resize(recv_strides[i]); for (Uint j=0; j<recv_strides[i]; ++j) { recv[i][j]=recv_linear[recv_displs[i]+j]; } } }