void recursive_bisection_contoller::run(parallel::hypergraph &hgraph, MPI_Comm comm) { initialize_coarsest_hypergraph(hgraph, comm); convToBisectionConstraints(); progress("[R-B]: %i |", number_of_runs_); int i; int j; int ij; int numVertices = hypergraph_->number_of_vertices(); int *pVector = nullptr; int destProcessor; int myPartitionIdx = 0; int v; dynamic_array<int> recvLens(number_of_processors_); dynamic_array<int> recvDispls(number_of_processors_); bisection *b; all_partition_info_.resize(numVertices << 1); for (i = 0; i < number_of_runs_; ++i) { destProcessor = i % number_of_processors_; sum_of_cuts_ = 0; local_vertex_part_info_length_ = 0; if (rank_ == destProcessor) { pVector = &partition_vector_[partition_vector_offsets_[myPartitionIdx]]; } b = new bisection(hypergraph_, log_k_, 0); b->initMap(); recursively_bisect(*b, comm); // ### // now recover the partition and // partition cutsize // ### MPI_Reduce(&sum_of_cuts_, &ij, 1, MPI_INT, MPI_SUM, destProcessor, comm); MPI_Gather(&local_vertex_part_info_length_, 1, MPI_INT, recvLens.data(), 1, MPI_INT, destProcessor, comm); if (rank_ == destProcessor) { partition_vector_cuts_[myPartitionIdx] = ij; ij = 0; for (j = 0; j < number_of_processors_; ++j) { recvDispls[j] = ij; ij += recvLens[j]; } } MPI_Gatherv(local_vertex_partition_info_.data(), local_vertex_part_info_length_, MPI_INT, all_partition_info_.data(), recvLens.data(), recvDispls.data(), MPI_INT, destProcessor, comm); if (rank_ == destProcessor) { ij = numVertices << 1; for (j = 0; j < ij;) { v = all_partition_info_[j++]; pVector[v] = all_partition_info_[j++]; } ++myPartitionIdx; } } // ### // k-way refine local partitions // ### hypergraph_->set_number_of_partitions(number_of_partitions_); if (number_of_partitions_ > 0) { for (i = 0; i < number_of_partitions_; ++i) { int *start = &(partition_vector_.data()[partition_vector_offsets_[i]]); dynamic_array<int> p_vector(numVertices); p_vector.set_data(start, numVertices); hypergraph_->copy_in_partition(p_vector, numVertices, i, partition_vector_cuts_[i]); } refiner_->rebalance(*hypergraph_); } // ### // project partitions // ### initialize_serial_partitions(hgraph, comm); #ifdef DEBUG_CONTROLLER hgraph.checkPartitions(numParts, maxPartWt, comm); #endif }
void recursive_bisection_contoller::initialize_serial_partitions( parallel::hypergraph &hgraph, MPI_Comm comm) { int i; int j; int ij; int numTotVertices = hypergraph_->number_of_vertices(); int ijk; int startOffset; int endOffset; int totToSend; ds::dynamic_array<int> hGraphPartitionVector; ds::dynamic_array<int> hGraphPartVectorOffsets; ds::dynamic_array<int> hGraphPartCuts; auto hPartitionVector = hypergraph_->partition_vector(); auto hPartOffsetsVector = hypergraph_->partition_offsets(); auto hPartitionCutsArray = hypergraph_->partition_cuts(); dynamic_array<int> numVperProc(number_of_processors_); dynamic_array<int> procDispls(number_of_processors_); dynamic_array<int> sendLens(number_of_processors_); dynamic_array<int> sendDispls(number_of_processors_); dynamic_array<int> recvLens(number_of_processors_); dynamic_array<int> recvDispls(number_of_processors_); dynamic_array<int> sendArray; hgraph.set_number_of_partitions(number_of_runs_); hGraphPartitionVector = hgraph.partition_vector(); hGraphPartVectorOffsets = hgraph.partition_offsets(); hGraphPartCuts = hgraph.partition_cuts(); // ### // communicate partition vector values // ### j = number_of_processors_ - 1; ij = numTotVertices / number_of_processors_; for (i = 0; i < j; ++i) numVperProc[i] = ij; numVperProc[i] = ij + (numTotVertices % number_of_processors_); j = 0; ij = 0; for (i = 0; i < number_of_processors_; ++i) { sendDispls[i] = j; procDispls[i] = ij; sendLens[i] = numVperProc[i] * number_of_partitions_; j += sendLens[i]; ij += numVperProc[i]; } sendArray.resize(j); totToSend = j; ij = 0; for (ijk = 0; ijk < number_of_processors_; ++ijk) { for (j = 0; j < number_of_partitions_; ++j) { startOffset = hPartOffsetsVector[j] + procDispls[ijk]; endOffset = startOffset + numVperProc[ijk]; for (i = startOffset; i < endOffset; ++i) { sendArray[ij++] = hPartitionVector[i]; } } } #ifdef DEBUG_CONTROLLER assert(ij == totToSend); #endif MPI_Alltoall(sendLens.data(), 1, MPI_INT, recvLens.data(), 1, MPI_INT, comm); ij = 0; for (i = 0; i < number_of_processors_; ++i) { recvDispls[i] = ij; ij += recvLens[i]; } #ifdef DEBUG_CONTROLLER assert(ij == hGraphPartVectorOffsets[numSeqRuns]); #endif MPI_Alltoallv(sendArray.data(), sendLens.data(), sendDispls.data(), MPI_INT, hGraphPartitionVector.data(), recvLens.data(), recvDispls.data(), MPI_INT, comm); // ### // communicate partition cuts // ### MPI_Allgather(&number_of_partitions_, 1, MPI_INT, recvLens.data(), 1, MPI_INT, comm); ij = 0; for (i = 0; i < number_of_processors_; ++i) { recvDispls[i] = ij; ij += recvLens[i]; } MPI_Allgatherv(hPartitionCutsArray.data(), number_of_partitions_, MPI_INT, hGraphPartCuts.data(), recvLens.data(), recvDispls.data(), MPI_INT, comm); for (i = 0; i < number_of_runs_; ++i) { progress("%i ", hGraphPartCuts[i]); } progress("\n"); }
int exchangeData(MPI_Comm comm, std::vector<int>& sendProcs, std::vector<std::vector<T>*>& sendData, std::vector<int>& recvProcs, bool recvLengthsKnownOnEntry, std::vector<std::vector<T>*>& recvData) { if (sendProcs.size() == 0 && recvProcs.size() == 0) return(0); if (sendProcs.size() != sendData.size()) return(-1); #ifndef FEI_SER int tag = 11115; MPI_Datatype mpi_dtype = fei::mpiTraits<T>::mpi_type(); std::vector<MPI_Request> mpiReqs; try { mpiReqs.resize(recvProcs.size()); if (!recvLengthsKnownOnEntry) { std::vector<int> tmpIntData; tmpIntData.resize(sendData.size()); std::vector<int> recvLens(sendData.size()); for(unsigned i=0; i<sendData.size(); ++i) { tmpIntData[i] = (int)sendData[i]->size(); } if (exchangeIntData(comm, sendProcs, tmpIntData, recvProcs, recvLens) != 0) { return(-1); } for(unsigned i=0; i<recvLens.size(); ++i) { recvData[i]->resize(recvLens[i]); } } } catch(std::runtime_error& exc) { fei::console_out() << exc.what() << FEI_ENDL; return(-1); } //launch Irecv's for recvData: size_t numRecvProcs = recvProcs.size(); int req_offset = 0; int localProc = fei::localProc(comm); for(unsigned i=0; i<recvProcs.size(); ++i) { if (recvProcs[i] == localProc) {--numRecvProcs; continue;} size_t len = recvData[i]->size(); std::vector<T>& rbuf = *recvData[i]; CHK_MPI( MPI_Irecv(&rbuf[0], (int)len, mpi_dtype, recvProcs[i], tag, comm, &mpiReqs[req_offset++]) ); } //send the sendData: for(unsigned i=0; i<sendProcs.size(); ++i) { if (sendProcs[i] == localProc) continue; std::vector<T>& sbuf = *sendData[i]; CHK_MPI( MPI_Send(&sbuf[0], (int)sbuf.size(), mpi_dtype, sendProcs[i], tag, comm) ); } //complete the Irecv's: for(unsigned i=0; i<numRecvProcs; ++i) { if (recvProcs[i] == localProc) continue; int index; MPI_Status status; CHK_MPI( MPI_Waitany((int)numRecvProcs, &mpiReqs[0], &index, &status) ); } #endif return(0); }