//------------------------------------------------------------------------ int mirrorProcs(MPI_Comm comm, std::vector<int>& toProcs, std::vector<int>& fromProcs) { fromProcs.resize(0); #ifdef FEI_SER fromProcs.push_back(0); return(0); #else int num_procs = fei::numProcs(comm); std::vector<int> tmpIntData(num_procs*3, 0); int* buf = &tmpIntData[0]; int* recvbuf = buf+num_procs; for(unsigned i=0; i<toProcs.size(); ++i) { buf[toProcs[i]] = 1; } for(int ii=2*num_procs; ii<3*num_procs; ++ii) { buf[ii] = 1; } CHK_MPI( MPI_Reduce_scatter(buf, &(buf[num_procs]), &(buf[2*num_procs]), MPI_INT, MPI_SUM, comm) ); int numRecvProcs = buf[num_procs]; int tag = 11116; std::vector<MPI_Request> mpiReqs(numRecvProcs); int offset = 0; for(int ii=0; ii<numRecvProcs; ++ii) { CHK_MPI( MPI_Irecv(&(recvbuf[ii]), 1, MPI_INT, MPI_ANY_SOURCE, tag, comm, &(mpiReqs[offset++])) ); } for(unsigned i=0; i<toProcs.size(); ++i) { CHK_MPI( MPI_Send(&(toProcs[i]), 1, MPI_INT, toProcs[i], tag, comm) ); } MPI_Status status; for(int ii=0; ii<numRecvProcs; ++ii) { int index; MPI_Waitany(numRecvProcs, &mpiReqs[0], &index, &status); fromProcs.push_back(status.MPI_SOURCE); } std::sort(fromProcs.begin(), fromProcs.end()); return(0); #endif }
int fei::Vector_core::scatterToOverlap() { if (fei::numProcs(comm_) == 1 || haveFEVector()) { return(0); } #ifndef FEI_SER if (!overlapAlreadySet_) { setOverlap(); } //...and now the overlap is whatever is in our remotelyOwned_ vectors. //first find out which procs we'll be receiving from. std::vector<int> recvProcs; for(unsigned i=0; i<remotelyOwned_.size(); ++i) { if ((int)i == fei::localProc(comm_)) continue; if (remotelyOwned_[i]->size() == 0) continue; recvProcs.push_back((int)i); } //find out the send-procs. std::vector<int> sendProcs; fei::mirrorProcs(comm_, recvProcs, sendProcs); //declare arrays to send from, and corresponding sizes std::vector<std::vector<int> > send_ints(sendProcs.size()); std::vector<std::vector<double> > send_doubles(sendProcs.size()); std::vector<int> send_sizes(sendProcs.size()); std::vector<MPI_Request> mpiReqs(sendProcs.size()+recvProcs.size()); std::vector<MPI_Status> mpiStatuses(sendProcs.size()+recvProcs.size()); int tag1 = 11111; int tag2 = 11112; //first, the procs we're going to send to, have to let us know //how much data we're supposed to send. So we have to receive //sizes and then indices from the "send"-procs. for(unsigned i=0; i<sendProcs.size(); ++i) { MPI_Irecv(&send_sizes[i], 1, MPI_INT, sendProcs[i], tag1, comm_, &mpiReqs[i]); } //now we'll send the sizes of our remotely-owned data to the //procs that we will be receiving the data from, and also the //indices that we want to receive. for(unsigned i=0; i<recvProcs.size(); ++i) { int proc = recvProcs[i]; int size = remotelyOwned_[proc]->size(); MPI_Send(&size, 1, MPI_INT, proc, tag1, comm_); } MPI_Waitall(sendProcs.size(), &mpiReqs[0], &mpiStatuses[0]); //now resize our send_ints and send_doubles arrays, and post the recvs //for indices that we're supposed to pack. for(unsigned i=0; i<sendProcs.size(); ++i) { int proc = sendProcs[i]; int size = send_sizes[i]; send_ints[i].resize(size); MPI_Irecv(&(send_ints[i][0]), size, MPI_INT, proc, tag1, comm_, &mpiReqs[i]); send_doubles[i].resize(size); } //now send the indices that we want to receive data for. for(unsigned i=0; i<recvProcs.size(); ++i) { int proc = recvProcs[i]; int size = remotelyOwned_[proc]->size(); int* indices = &(remotelyOwned_[proc]->indices())[0]; MPI_Send(indices, size, MPI_INT, proc, tag1, comm_); } MPI_Waitall(sendProcs.size(), &mpiReqs[0], &mpiStatuses[0]); //now post our recvs. for(unsigned i=0; i<recvProcs.size(); ++i) { int proc = recvProcs[i]; int size = remotelyOwned_[proc]->size(); double* coefs = &(remotelyOwned_[proc]->coefs())[0]; MPI_Irecv(coefs, size, MPI_DOUBLE, proc, tag2, comm_, &mpiReqs[i]); } //now pack and send the coefs that the other procs need from us. for(unsigned i=0; i<sendProcs.size(); ++i) { int proc = sendProcs[i]; int num = send_sizes[i]; int err = copyOutOfUnderlyingVector(num, &(send_ints[i][0]), &(send_doubles[i][0]), 0); if (err != 0) { FEI_COUT << "fei::Vector_core::scatterToOverlap ERROR getting data to send."<<FEI_ENDL; return(err); } MPI_Send(&(send_doubles[i][0]), num, MPI_DOUBLE, proc, tag2, comm_); } MPI_Waitall(recvProcs.size(), &mpiReqs[0], &mpiStatuses[0]); #endif //#ifndef FEI_SER return(0); }
int fei::Vector_core::gatherFromOverlap(bool accumulate) { if (fei::numProcs(comm_) == 1 || haveFEVector()) { return(0); } #ifndef FEI_SER //first create the list of procs we'll be sending to. std::vector<int> sendProcs; for(unsigned i=0; i<remotelyOwned_.size(); ++i) { if ((int)i == fei::localProc(comm_)) continue; if (remotelyOwned_[i]->size() == 0) continue; sendProcs.push_back(i); } std::vector<int> recvProcs; fei::mirrorProcs(comm_, sendProcs, recvProcs); //declare arrays to hold the indices and coefs we'll be receiving. std::vector<std::vector<int> > recv_ints(recvProcs.size()); std::vector<std::vector<double> > recv_doubles(recvProcs.size()); std::vector<int> recv_sizes(recvProcs.size()); std::vector<MPI_Request> mpiReqs(recvProcs.size()*2); std::vector<MPI_Status> mpiStatuses(recvProcs.size()*2); int tag1 = 11111; int tag2 = 11112; //post the recvs for the sizes. for(size_t i=0; i<recvProcs.size(); ++i) { int proc = recvProcs[i]; MPI_Irecv(&recv_sizes[i], 1, MPI_INT, proc, tag1, comm_, &mpiReqs[i]); } //send the sizes of data we'll be sending. for(unsigned i=0; i<sendProcs.size(); ++i) { int proc = sendProcs[i]; int size = remotelyOwned_[proc]->size(); MPI_Send(&size, 1, MPI_INT, proc, tag1, comm_); } if (recvProcs.size() > 0) { MPI_Waitall(recvProcs.size(), &mpiReqs[0], &mpiStatuses[0]); } //now post the recvs for the data. unsigned offset = 0; for(size_t i=0; i<recvProcs.size(); ++i) { int proc = recvProcs[i]; int size = recv_sizes[i]; std::vector<int>& recv_ints_i = recv_ints[i]; std::vector<double>& recv_doubles_i = recv_doubles[i]; recv_ints_i.resize(size); recv_doubles_i.resize(size); MPI_Irecv(&(recv_ints_i[0]), size, MPI_INT, proc, tag1, comm_, &mpiReqs[offset++]); MPI_Irecv(&(recv_doubles_i[0]), size, MPI_DOUBLE, proc, tag2, comm_, &mpiReqs[offset++]); } //now send the outgoing data. for(size_t i=0; i<sendProcs.size(); ++i) { int proc = sendProcs[i]; int size = remotelyOwned_[proc]->size(); int* indices = &(remotelyOwned_[proc]->indices())[0]; MPI_Send(indices, size, MPI_INT, proc, tag1, comm_); double* coefs = &(remotelyOwned_[proc]->coefs())[0]; MPI_Send(coefs, size, MPI_DOUBLE, proc, tag2, comm_); fei::set_values(*remotelyOwned_[proc], 0.0); } if (recvProcs.size() > 0) { MPI_Waitall(recvProcs.size()*2, &mpiReqs[0], &mpiStatuses[0]); } //now store the data we've received. for(size_t i=0; i<recvProcs.size(); ++i) { int num = recv_sizes[i]; std::vector<int>& recv_ints_i = recv_ints[i]; std::vector<double>& recv_doubles_i = recv_doubles[i]; int err = giveToUnderlyingVector(num, &(recv_ints_i[0]), &(recv_doubles_i[0]), accumulate, 0); if (err != 0) { FEI_COUT << "fei::Vector_core::gatherFromOverlap ERROR storing recvd data" << FEI_ENDL; return(err); } } #endif //#ifndef FEI_SER return(0); }
//------------------------------------------------------------------------ int mirrorCommPattern(MPI_Comm comm, comm_map* inPattern, comm_map*& outPattern) { #ifdef FEI_SER (void)inPattern; (void)outPattern; #else int localP = localProc(comm); int numP = numProcs(comm); if (numP < 2) return(0); std::vector<int> buf(numP*2, 0); int numInProcs = inPattern->getMap().size(); std::vector<int> inProcs(numInProcs); fei::copyKeysToVector(inPattern->getMap(), inProcs); std::vector<int> outProcs; int err = mirrorProcs(comm, inProcs, outProcs); if (err != 0) ERReturn(-1); std::vector<int> recvbuf(outProcs.size(), 0); outPattern = new comm_map(0,1); MPI_Datatype mpi_ttype = fei::mpiTraits<int>::mpi_type(); //now recv a length (the contents of buf[i]) from each "out-proc", which //will be the length of the equation data that will also be recvd from that //proc. std::vector<MPI_Request> mpiReqs(outProcs.size()); std::vector<MPI_Status> mpiStss(outProcs.size()); MPI_Request* requests = &mpiReqs[0]; MPI_Status* statuses = &mpiStss[0]; int firsttag = 11117; int offset = 0; int* outProcsPtr = &outProcs[0]; for(unsigned i=0; i<outProcs.size(); ++i) { if (MPI_Irecv(&(recvbuf[i]), 1, MPI_INT, outProcsPtr[i], firsttag, comm, &requests[offset++]) != MPI_SUCCESS) ERReturn(-1); } comm_map::map_type& in_row_map = inPattern->getMap(); comm_map::map_type::iterator in_iter = in_row_map.begin(), in_end = in_row_map.end(); int* inProcsPtr = &inProcs[0]; for(int ii=0; in_iter!= in_end; ++in_iter, ++ii) { comm_map::row_type* in_row = in_iter->second; buf[ii] = in_row->size(); if (MPI_Send(&(buf[ii]), 1, MPI_INT, inProcsPtr[ii], firsttag, comm) != MPI_SUCCESS) ERReturn(-1); } int numOutProcs = outProcs.size(); MPI_Waitall(numOutProcs, requests, statuses); std::vector<int> lengths(numOutProcs); int totalRecvLen = 0; offset = 0; for(int ii=0; ii<numOutProcs; ++ii) { if (recvbuf[ii] > 0) { lengths[offset++] = recvbuf[ii]; totalRecvLen += recvbuf[ii]; } } //now we need to create the space into which we'll receive the //lists that other procs send to us. std::vector<int> recvData(totalRecvLen, 999999); int tag2 = 11118; offset = 0; for(int ii=0; ii<numOutProcs; ++ii) { CHK_MPI(MPI_Irecv(&(recvData[offset]), lengths[ii], mpi_ttype, outProcs[ii], tag2, comm, &requests[ii]) ); offset += lengths[ii]; } std::vector<int> sendList; in_iter = in_row_map.begin(); for(int ii=0; in_iter != in_end; ++in_iter,++ii) { if (inProcs[ii] == localP) { continue; } sendList.resize(in_iter->second->size()); fei::copySetToArray(*(in_iter->second), sendList.size(), &sendList[0]); CHK_MPI(MPI_Send(&sendList[0], sendList.size(), mpi_ttype, inProcs[ii], tag2, comm) ); } //our final communication operation is to catch the Irecvs we started above. for(int ii=0; ii<numOutProcs; ++ii) { MPI_Wait(&requests[ii], &statuses[ii]); } //now we've completed all the communication, so we're ready to put the data //we received into the outPattern object. offset = 0; for(int ii=0; ii<numOutProcs; ii++) { outPattern->addIndices(outProcs[ii], lengths[ii], &(recvData[offset])); offset += lengths[ii]; } #endif return(0); }