/*! Compute exchange index between grid source and grid destination \param [in] globalIndexWeightFromDestToSource global index mapping between grid destination and grid source */ void CGridTransformation::computeTransformationMapping(const SourceDestinationIndexMap& globaIndexWeightFromSrcToDst) { CContext* context = CContext::getCurrent(); CContextClient* client = context->client; int nbClient = client->clientSize; int clientRank = client->clientRank; // Recalculate the distribution of grid destination CDistributionClient distributionClientDest(client->clientRank, tmpGridDestination_); CDistributionClient::GlobalLocalDataMap& globalLocalIndexGridDestSendToServer = distributionClientDest.getGlobalLocalDataSendToServer(); // Update number of local index on each transformation size_t nbLocalIndex = globalLocalIndexGridDestSendToServer.size(); nbLocalIndexOnGridDest_.push_back(nbLocalIndex); localMaskOnGridDest_.push_back(std::vector<bool>()); std::vector<bool>& tmpMask = localMaskOnGridDest_.back(); tmpMask.resize(nbLocalIndex,false); // Find out number of index sent from grid source and number of index received on grid destination SourceDestinationIndexMap::const_iterator itbIndex = globaIndexWeightFromSrcToDst.begin(), iteIndex = globaIndexWeightFromSrcToDst.end(), itIndex; typedef boost::unordered_map<size_t, std::vector<std::pair<size_t,double> > > SendIndexMap; std::map<int,int> sendRankSizeMap,recvRankSizeMap; int connectedClient = globaIndexWeightFromSrcToDst.size(); int* recvCount=new int[nbClient]; int* displ=new int[nbClient]; int* sendRankBuff=new int[connectedClient]; int* sendSizeBuff=new int[connectedClient]; int n = 0; for (itIndex = itbIndex; itIndex != iteIndex; ++itIndex, ++n) { sendRankBuff[n] = itIndex->first; const SendIndexMap& sendIndexMap = itIndex->second; SendIndexMap::const_iterator itbSend = sendIndexMap.begin(), iteSend = sendIndexMap.end(), itSend; int sendSize = 0; for (itSend = itbSend; itSend != iteSend; ++itSend) { sendSize += itSend->second.size(); } sendSizeBuff[n] = sendSize; sendRankSizeMap[itIndex->first] = sendSize; } MPI_Allgather(&connectedClient,1,MPI_INT,recvCount,1,MPI_INT,client->intraComm); displ[0]=0 ; for(int n=1;n<nbClient;n++) displ[n]=displ[n-1]+recvCount[n-1]; int recvSize=displ[nbClient-1]+recvCount[nbClient-1]; int* recvRankBuff=new int[recvSize]; int* recvSizeBuff=new int[recvSize]; MPI_Allgatherv(sendRankBuff,connectedClient,MPI_INT,recvRankBuff,recvCount,displ,MPI_INT,client->intraComm); MPI_Allgatherv(sendSizeBuff,connectedClient,MPI_INT,recvSizeBuff,recvCount,displ,MPI_INT,client->intraComm); for (int i = 0; i < nbClient; ++i) { int currentPos = displ[i]; for (int j = 0; j < recvCount[i]; ++j) if (recvRankBuff[currentPos+j] == clientRank) { recvRankSizeMap[i] = recvSizeBuff[currentPos+j]; } } // Sending global index of grid source to corresponding process as well as the corresponding mask std::vector<MPI_Request> requests; std::vector<MPI_Status> status; boost::unordered_map<int, unsigned char* > recvMaskDst; boost::unordered_map<int, unsigned long* > recvGlobalIndexSrc; for (std::map<int,int>::const_iterator itRecv = recvRankSizeMap.begin(); itRecv != recvRankSizeMap.end(); ++itRecv) { int recvRank = itRecv->first; int recvSize = itRecv->second; recvMaskDst[recvRank] = new unsigned char [recvSize]; recvGlobalIndexSrc[recvRank] = new unsigned long [recvSize]; requests.push_back(MPI_Request()); MPI_Irecv(recvGlobalIndexSrc[recvRank], recvSize, MPI_UNSIGNED_LONG, recvRank, 46, client->intraComm, &requests.back()); requests.push_back(MPI_Request()); MPI_Irecv(recvMaskDst[recvRank], recvSize, MPI_UNSIGNED_CHAR, recvRank, 47, client->intraComm, &requests.back()); } boost::unordered_map<int, CArray<size_t,1> > globalIndexDst; boost::unordered_map<int, CArray<double,1> > weightDst; boost::unordered_map<int, unsigned char* > sendMaskDst; boost::unordered_map<int, unsigned long* > sendGlobalIndexSrc; for (itIndex = itbIndex; itIndex != iteIndex; ++itIndex) { int sendRank = itIndex->first; int sendSize = sendRankSizeMap[sendRank]; const SendIndexMap& sendIndexMap = itIndex->second; SendIndexMap::const_iterator itbSend = sendIndexMap.begin(), iteSend = sendIndexMap.end(), itSend; globalIndexDst[sendRank].resize(sendSize); weightDst[sendRank].resize(sendSize); sendMaskDst[sendRank] = new unsigned char [sendSize]; sendGlobalIndexSrc[sendRank] = new unsigned long [sendSize]; int countIndex = 0; for (itSend = itbSend; itSend != iteSend; ++itSend) { const std::vector<std::pair<size_t,double> >& dstWeight = itSend->second; for (int idx = 0; idx < dstWeight.size(); ++idx) { globalIndexDst[sendRank](countIndex) = dstWeight[idx].first; weightDst[sendRank](countIndex) = dstWeight[idx].second; if (0 < globalLocalIndexGridDestSendToServer.count(dstWeight[idx].first)) sendMaskDst[sendRank][countIndex] = 1; else sendMaskDst[sendRank][countIndex] = 0; sendGlobalIndexSrc[sendRank][countIndex] = itSend->first; ++countIndex; } } // Send global index source and mask requests.push_back(MPI_Request()); MPI_Isend(sendGlobalIndexSrc[sendRank], sendSize, MPI_UNSIGNED_LONG, sendRank, 46, client->intraComm, &requests.back()); requests.push_back(MPI_Request()); MPI_Isend(sendMaskDst[sendRank], sendSize, MPI_UNSIGNED_CHAR, sendRank, 47, client->intraComm, &requests.back()); } status.resize(requests.size()); MPI_Waitall(requests.size(), &requests[0], &status[0]); // Okie, now use the mask to identify which index source we need to send, then also signal the destination which masked index we will return std::vector<MPI_Request>().swap(requests); std::vector<MPI_Status>().swap(status); // Okie, on destination side, we will wait for information of masked index of source for (std::map<int,int>::const_iterator itSend = sendRankSizeMap.begin(); itSend != sendRankSizeMap.end(); ++itSend) { int recvRank = itSend->first; int recvSize = itSend->second; requests.push_back(MPI_Request()); MPI_Irecv(sendMaskDst[recvRank], recvSize, MPI_UNSIGNED_CHAR, recvRank, 48, client->intraComm, &requests.back()); } // Ok, now we fill in local index of grid source (we even count for masked index) CDistributionClient distributionClientSrc(client->clientRank, gridSource_); CDistributionClient::GlobalLocalDataMap& globalLocalIndexGridSrcSendToServer = distributionClientSrc.getGlobalLocalDataSendToServer(); localIndexToSendFromGridSource_.push_back(SendingIndexGridSourceMap()); SendingIndexGridSourceMap& tmpSend = localIndexToSendFromGridSource_.back(); for (std::map<int,int>::const_iterator itRecv = recvRankSizeMap.begin(); itRecv != recvRankSizeMap.end(); ++itRecv) { int recvRank = itRecv->first; int recvSize = itRecv->second; unsigned char* recvMask = recvMaskDst[recvRank]; unsigned long* recvIndexSrc = recvGlobalIndexSrc[recvRank]; int realSendSize = 0; for (int idx = 0; idx < recvSize; ++idx) { if (0 != (*(recvMask+idx))) // OKie, now we have a demand from non-masked index destination if (0 < globalLocalIndexGridSrcSendToServer.count(*(recvIndexSrc+idx))) // check whether index source is masked ++realSendSize; else // inform the destination that this index is masked *(recvMask+idx) = 0; } tmpSend[recvRank].resize(realSendSize); realSendSize = 0; for (int idx = 0; idx < recvSize; ++idx) { if (0 != (*(recvMask+idx))) // OKie, now we have a demand from non-masked index destination { tmpSend[recvRank](realSendSize) = globalLocalIndexGridSrcSendToServer[*(recvIndexSrc+idx)]; ++realSendSize; } } // Okie, now inform the destination which source index are masked requests.push_back(MPI_Request()); MPI_Isend(recvMaskDst[recvRank], recvSize, MPI_UNSIGNED_CHAR, recvRank, 48, client->intraComm, &requests.back()); } status.resize(requests.size()); MPI_Waitall(requests.size(), &requests[0], &status[0]); // Cool, now we can fill in local index of grid destination (counted for masked index) localIndexToReceiveOnGridDest_.push_back(RecvIndexGridDestinationMap()); RecvIndexGridDestinationMap& recvTmp = localIndexToReceiveOnGridDest_.back(); for (std::map<int,int>::const_iterator itSend = sendRankSizeMap.begin(); itSend != sendRankSizeMap.end(); ++itSend) { int recvRank = itSend->first; int recvSize = itSend->second; unsigned char* recvMask = sendMaskDst[recvRank]; CArray<size_t,1>& recvIndexDst = globalIndexDst[recvRank]; CArray<double,1>& recvWeightDst = weightDst[recvRank]; int realRecvSize = 0; for (int idx = 0; idx < recvSize; ++idx) { if (0 != *(recvMask+idx)) // OKie, now we have a non-masked index destination ++realRecvSize; } int localIndexDst; recvTmp[recvRank].resize(realRecvSize); realRecvSize = 0; for (int idx = 0; idx < recvSize; ++idx) { if (0 != *(recvMask+idx)) // OKie, now we have a demand from non-masked index destination { recvTmp[recvRank][realRecvSize].first = globalLocalIndexGridDestSendToServer[recvIndexDst(idx)]; recvTmp[recvRank][realRecvSize].second = recvWeightDst(idx); tmpMask[globalLocalIndexGridDestSendToServer[recvIndexDst(idx)]] = true; ++realRecvSize; } } } delete [] recvCount; delete [] displ; delete [] sendRankBuff; delete [] recvRankBuff; delete [] sendSizeBuff; delete [] recvSizeBuff; boost::unordered_map<int, unsigned char* >::const_iterator itChar; for (itChar = sendMaskDst.begin(); itChar != sendMaskDst.end(); ++itChar) delete [] itChar->second; for (itChar = recvMaskDst.begin(); itChar != recvMaskDst.end(); ++itChar) delete [] itChar->second; boost::unordered_map<int, unsigned long* >::const_iterator itLong; for (itLong = sendGlobalIndexSrc.begin(); itLong != sendGlobalIndexSrc.end(); ++itLong) delete [] itLong->second; for (itLong = recvGlobalIndexSrc.begin(); itLong != recvGlobalIndexSrc.end(); ++itLong) delete [] itLong->second; }
void connection_handler::handle_messages() { detail::handling_messages hm(handling_messages_); // reset on exit bool bootstrapping = hpx::is_starting(); bool has_work = true; std::size_t k = 0; hpx::util::high_resolution_timer t; std::list<std::pair<int, MPI_Request> > close_requests; // We let the message handling loop spin for another 2 seconds to avoid the // costs involved with posting it to asio while(bootstrapping || has_work || (!has_work && t.elapsed() < 2.0)) { if(stopped_) break; // break the loop if someone requested to pause the parcelport if(!enable_parcel_handling_) break; // handle all send requests { hpx::lcos::local::spinlock::scoped_lock l(senders_mtx_); for( senders_type::iterator it = senders_.begin(); !stopped_ && enable_parcel_handling_ && it != senders_.end(); /**/) { if((*it)->done()) { it = senders_.erase(it); } else { ++it; } } has_work = !senders_.empty(); } // Send the pending close requests { hpx::lcos::local::spinlock::scoped_lock l(close_mtx_); typedef std::pair<int, int> pair_type; BOOST_FOREACH(pair_type p, pending_close_requests_) { header close_request = header::close(p.first, p.second); close_requests.push_back(std::make_pair(p.first, MPI_Request())); MPI_Isend( close_request.data(), // Data pointer close_request.data_size_, // Size close_request.type(), // MPI Datatype close_request.rank(), // Destination 0, // Tag communicator_, // Communicator &close_requests.back().second ); } pending_close_requests_.clear(); } // add new receive requests std::pair<bool, header> next(acceptor_.next_header()); if(next.first) { boost::shared_ptr<receiver> rcv; header h = next.second; receivers_tag_map_type & tag_map = receivers_map_[h.rank()]; receivers_tag_map_type::iterator jt = tag_map.find(h.tag()); if(jt != tag_map.end()) { rcv = jt->second; } else { rcv = boost::make_shared<receiver>( communicator_ , get_next_tag() , h.tag() , h.rank() , *this); tag_map.insert(std::make_pair(h.tag(), rcv)); } if(h.close_request()) { rcv->close(); } else { h.assert_valid(); if (static_cast<std::size_t>(h.size()) > this->get_max_message_size()) { // report this problem ... HPX_THROW_EXCEPTION(boost::asio::error::operation_not_supported, "mpi::connection_handler::handle_messages", "The size of this message exceeds the maximum inbound data size"); return; } if(rcv->async_read(h)) { #ifdef HPX_DEBUG receivers_type::iterator it = std::find(receivers_.begin(), receivers_.end(), rcv); HPX_ASSERT(it == receivers_.end()); #endif receivers_.push_back(rcv); } } } // handle all receive requests for(receivers_type::iterator it = receivers_.begin(); it != receivers_.end(); /**/) { boost::shared_ptr<receiver> rcv = *it; if(rcv->done()) { HPX_ASSERT(rcv->sender_tag() != -1); if(rcv->closing()) { receivers_tag_map_type & tag_map = receivers_map_[rcv->rank()]; receivers_tag_map_type::iterator jt = tag_map.find(rcv->sender_tag()); HPX_ASSERT(jt != tag_map.end()); tag_map.erase(jt); { hpx::lcos::local::spinlock::scoped_lock l(tag_mtx_); free_tags_.push_back(rcv->tag()); } } it = receivers_.erase(it); } else { ++it; } } if(!has_work) has_work = !receivers_.empty(); // handle completed close requests for( std::list<std::pair<int, MPI_Request> >::iterator it = close_requests.begin(); !stopped_ && enable_parcel_handling_ && it != close_requests.end(); ) { int completed = 0; MPI_Status status; int ret = 0; ret = MPI_Test(&it->second, &completed, &status); HPX_ASSERT(ret == MPI_SUCCESS); if(completed && status.MPI_ERROR != MPI_ERR_PENDING) { hpx::lcos::local::spinlock::scoped_lock l(tag_mtx_); free_tags_.push_back(it->first); it = close_requests.erase(it); } else { ++it; } } if(!has_work) has_work = !close_requests.empty(); if (bootstrapping) bootstrapping = hpx::is_starting(); if(has_work) { t.restart(); k = 0; } else { if(enable_parcel_handling_) { hpx::lcos::local::spinlock::yield(k); ++k; } } }
MPI_Request& DataChannelMPI::RequestMPI::new_request() { _requests.push_back(MPI_Request()); return _requests.back(); }
void connection_handler::handle_messages() { detail::handling_messages hm(handling_messages_); // reset on exit bool bootstrapping = hpx::is_starting(); bool has_work = true; std::size_t k = 0; hpx::util::high_resolution_timer t; std::list<std::pair<int, MPI_Request> > close_requests; // We let the message handling loop spin for another 2 seconds to avoid the // costs involved with posting it to asio while(bootstrapping || (!stopped_ && has_work) || (!has_work && t.elapsed() < 2.0)) { // break the loop if someone requested to pause the parcelport if(!enable_parcel_handling_) break; // handle all send requests { hpx::lcos::local::spinlock::scoped_lock l(senders_mtx_); for( senders_type::iterator it = senders_.begin(); !stopped_ && enable_parcel_handling_ && it != senders_.end(); /**/) { if((*it)->done()) { it = senders_.erase(it); } else { ++it; } } has_work = !senders_.empty(); } // Send the pending close requests { hpx::lcos::local::spinlock::scoped_lock l(close_mtx_); typedef std::pair<int, int> pair_type; BOOST_FOREACH(pair_type p, pending_close_requests_) { header close_request = header::close(p.first, p.second); close_requests.push_back(std::make_pair(p.first, MPI_Request())); MPI_Isend( close_request.data(), // Data pointer close_request.data_size_, // Size close_request.type(), // MPI Datatype close_request.rank(), // Destination 0, // Tag communicator_, // Communicator &close_requests.back().second ); } pending_close_requests_.clear(); } // add new receive requests std::pair<bool, header> next(acceptor_.next_header()); if(next.first) { boost::shared_ptr<receiver> rcv; receivers_rank_map_type::iterator jt = receivers_map_.find(next.second.rank()); if(jt != receivers_map_.end()) { receivers_tag_map_type::iterator kt = jt->second.find(next.second.tag()); if(kt != jt->second.end()) { if(next.second.close_request()) { hpx::lcos::local::spinlock::scoped_lock l(tag_mtx_); free_tags_.push_back(kt->second->tag()); jt->second.erase(kt); if(jt->second.empty()) { receivers_map_.erase(jt); } } else { rcv = kt->second; } } } if(!next.second.close_request()) { next.second.assert_valid(); if(!rcv) { rcv = boost::make_shared<receiver>(communicator_, get_next_tag()); } rcv->async_read(next.second, *this); receivers_.push_back(rcv); } } // handle all receive requests for( receivers_type::iterator it = receivers_.begin(); !stopped_ && enable_parcel_handling_ && it != receivers_.end(); /**/) { if((*it)->done(*this)) { HPX_ASSERT( !receivers_map_[(*it)->rank()][(*it)->sender_tag()] || receivers_map_[(*it)->rank()][(*it)->sender_tag()].get() == it->get() ); receivers_map_[(*it)->rank()][(*it)->sender_tag()] = *it; it = receivers_.erase(it); } else { ++it; } } if(!has_work) has_work = !receivers_.empty(); // handle completed close requests for( std::list<std::pair<int, MPI_Request> >::iterator it = close_requests.begin(); !stopped_ && enable_parcel_handling_ && it != close_requests.end(); ) { int completed = 0; MPI_Status status; int ret = 0; ret = MPI_Test(&it->second, &completed, &status); HPX_ASSERT(ret == MPI_SUCCESS); if(completed && status.MPI_ERROR != MPI_ERR_PENDING) { hpx::lcos::local::spinlock::scoped_lock l(tag_mtx_); free_tags_.push_back(it->first); it = close_requests.erase(it); } else { ++it; } } if(!has_work) has_work = !close_requests.empty(); if (bootstrapping) bootstrapping = hpx::is_starting(); if(has_work) { t.restart(); k = 0; } else { if(enable_parcel_handling_) { hpx::lcos::local::spinlock::yield(k); ++k; } } }
MPI_Request& add() { m_requests.push_back(MPI_Request()); return m_requests.back(); }