Read<T> Comm::alltoallv(Read<T> sendbuf_dev, Read<LO> sendcounts_dev, Read<LO> sdispls_dev, Read<LO> recvcounts_dev, Read<LO> rdispls_dev) const { #ifdef OMEGA_H_USE_MPI HostRead<T> sendbuf(sendbuf_dev); HostRead<LO> sendcounts(sendcounts_dev); HostRead<LO> recvcounts(recvcounts_dev); HostRead<LO> sdispls(sdispls_dev); HostRead<LO> rdispls(rdispls_dev); CHECK(rdispls.size() == recvcounts.size() + 1); int nrecvd = rdispls.last(); HostWrite<T> recvbuf(nrecvd); CHECK(sendcounts.size() == host_dsts_.size()); CHECK(recvcounts.size() == host_srcs_.size()); CHECK(sdispls.size() == sendcounts.size() + 1); CHECK(sendbuf.size() == sdispls.last()); CALL(Neighbor_alltoallv(host_srcs_, host_dsts_, sendbuf.data(), sendcounts.data(), sdispls.data(), MpiTraits<T>::datatype(), recvbuf.data(), recvcounts.data(), rdispls.data(), MpiTraits<T>::datatype(), impl_)); return recvbuf.write(); #else (void)sendcounts_dev; (void)recvcounts_dev; (void)sdispls_dev; (void)rdispls_dev; return sendbuf_dev; #endif }
Read<T> Comm::alltoall(Read<T> x) const { #ifdef OMEGA_H_USE_MPI HostWrite<T> recvbuf(srcs_.size()); HostRead<T> sendbuf(x); CALL(Neighbor_alltoall(host_srcs_, host_dsts_, sendbuf.data(), 1, MpiTraits<T>::datatype(), recvbuf.data(), 1, MpiTraits<T>::datatype(), impl_)); return recvbuf.write(); #else return x; #endif }
Read<T> Comm::allgather(T x) const { #ifdef OMEGA_H_USE_MPI HostWrite<T> recvbuf(srcs_.size()); CALL(Neighbor_allgather(host_srcs_, host_dsts_, &x, 1, MpiTraits<T>::datatype(), recvbuf.data(), 1, MpiTraits<T>::datatype(), impl_)); return recvbuf.write(); #else if (srcs_.size() == 1) return Read<T>({x}); return Read<T>({}); #endif }
int main( int argc, char** argv ) { int numtasks = 0; MPI_( MPI_Init( &argc, &argv ) ); MPI_( MPI_Errhandler_set( MPI_COMM_WORLD, MPI_ERRORS_RETURN ) ); MPI_( MPI_Comm_size( MPI_COMM_WORLD, &numtasks ) ); const int DIM = int( std::sqrt( double( numtasks ) ) ); std::vector< int > dims( 2, DIM ); std::vector< int > periods( 2, 0 ); //periodic - false -> non-periodic const int reorder = 0; //false - no reorder MPI_Comm cartcomm; MPI_( MPI_Cart_create( MPI_COMM_WORLD, 2, &dims[ 0 ], &periods[ 0 ], reorder, &cartcomm ) ); int task = -1; MPI_( MPI_Comm_rank( cartcomm, &task ) ); std::vector< int > coords( 2, -1 ); MPI_( MPI_Cart_coords( cartcomm, task, 2, &coords[ 0 ] ) ); std::vector< int > neighbors( 4, -1 ); enum { UP = 0, DOWN, LEFT, RIGHT }; // compute the shifted source and destination ranks, given a shift direction and amount //MPI_Cart_shift is uses to find two "nearby" neighbors of the calling process //along a specified direction of an N-dimensional grid //The direction and offset are specified as a signed integer //If the sign of the displacement is positive the "source" rank is lower //than the destination rank; if it's negative the opposite is true MPI_( MPI_Cart_shift( cartcomm, 0, 1, &neighbors[ UP ], &neighbors[ DOWN ] ) ); MPI_( MPI_Cart_shift( cartcomm, 1, 1, &neighbors[ LEFT ], &neighbors[ RIGHT ] ) ); int sendbuf = task; const int tag = 0x01; std::vector< int > recvbuf( 4, MPI_PROC_NULL ); std::vector< MPI_Request > reqs( 2 * 4 ); for( int i = 0; i != 4; ++i ) { int dest = neighbors[ i ]; int src = neighbors[ i ]; MPI_( MPI_Isend( &sendbuf, 1, MPI_INT, dest, tag, MPI_COMM_WORLD, &reqs[ i ] ) ); MPI_( MPI_Irecv( &recvbuf[ i ], 1, MPI_INT, src, tag, MPI_COMM_WORLD, &reqs[ i + 4 ] ) ); } std::vector< MPI_Status > status( 2 * 4 ); MPI_( MPI_Waitall( 8, &reqs[ 0 ], &status[ 0 ] ) ); std::ostringstream os; os << "rank= " << task << " coords= " << coords[ 0 ] << ',' << coords[ 1 ] << " neighbors= " << neighbors[ UP ] << ',' << neighbors[ DOWN ] << ',' << neighbors[ LEFT ] << ',' << neighbors[ RIGHT ] << '\n'; std::cout << os.str(); os.flush(); MPI_( MPI_Finalize() ); return 0; }
//------------------------------------------------------------------------ int mirrorCommPattern(MPI_Comm comm, comm_map* inPattern, comm_map*& outPattern) { #ifdef FEI_SER (void)inPattern; (void)outPattern; #else int localP = localProc(comm); int numP = numProcs(comm); if (numP < 2) return(0); std::vector<int> buf(numP*2, 0); int numInProcs = inPattern->getMap().size(); std::vector<int> inProcs(numInProcs); fei::copyKeysToVector(inPattern->getMap(), inProcs); std::vector<int> outProcs; int err = mirrorProcs(comm, inProcs, outProcs); if (err != 0) ERReturn(-1); std::vector<int> recvbuf(outProcs.size(), 0); outPattern = new comm_map(0,1); MPI_Datatype mpi_ttype = fei::mpiTraits<int>::mpi_type(); //now recv a length (the contents of buf[i]) from each "out-proc", which //will be the length of the equation data that will also be recvd from that //proc. std::vector<MPI_Request> mpiReqs(outProcs.size()); std::vector<MPI_Status> mpiStss(outProcs.size()); MPI_Request* requests = &mpiReqs[0]; MPI_Status* statuses = &mpiStss[0]; int firsttag = 11117; int offset = 0; int* outProcsPtr = &outProcs[0]; for(unsigned i=0; i<outProcs.size(); ++i) { if (MPI_Irecv(&(recvbuf[i]), 1, MPI_INT, outProcsPtr[i], firsttag, comm, &requests[offset++]) != MPI_SUCCESS) ERReturn(-1); } comm_map::map_type& in_row_map = inPattern->getMap(); comm_map::map_type::iterator in_iter = in_row_map.begin(), in_end = in_row_map.end(); int* inProcsPtr = &inProcs[0]; for(int ii=0; in_iter!= in_end; ++in_iter, ++ii) { comm_map::row_type* in_row = in_iter->second; buf[ii] = in_row->size(); if (MPI_Send(&(buf[ii]), 1, MPI_INT, inProcsPtr[ii], firsttag, comm) != MPI_SUCCESS) ERReturn(-1); } int numOutProcs = outProcs.size(); MPI_Waitall(numOutProcs, requests, statuses); std::vector<int> lengths(numOutProcs); int totalRecvLen = 0; offset = 0; for(int ii=0; ii<numOutProcs; ++ii) { if (recvbuf[ii] > 0) { lengths[offset++] = recvbuf[ii]; totalRecvLen += recvbuf[ii]; } } //now we need to create the space into which we'll receive the //lists that other procs send to us. std::vector<int> recvData(totalRecvLen, 999999); int tag2 = 11118; offset = 0; for(int ii=0; ii<numOutProcs; ++ii) { CHK_MPI(MPI_Irecv(&(recvData[offset]), lengths[ii], mpi_ttype, outProcs[ii], tag2, comm, &requests[ii]) ); offset += lengths[ii]; } std::vector<int> sendList; in_iter = in_row_map.begin(); for(int ii=0; in_iter != in_end; ++in_iter,++ii) { if (inProcs[ii] == localP) { continue; } sendList.resize(in_iter->second->size()); fei::copySetToArray(*(in_iter->second), sendList.size(), &sendList[0]); CHK_MPI(MPI_Send(&sendList[0], sendList.size(), mpi_ttype, inProcs[ii], tag2, comm) ); } //our final communication operation is to catch the Irecvs we started above. for(int ii=0; ii<numOutProcs; ++ii) { MPI_Wait(&requests[ii], &statuses[ii]); } //now we've completed all the communication, so we're ready to put the data //we received into the outPattern object. offset = 0; for(int ii=0; ii<numOutProcs; ii++) { outPattern->addIndices(outProcs[ii], lengths[ii], &(recvData[offset])); offset += lengths[ii]; } #endif return(0); }