void recursive_bisection_contoller::run(parallel::hypergraph &hgraph,
                                        MPI_Comm comm) {
  initialize_coarsest_hypergraph(hgraph, comm);
  convToBisectionConstraints();

  progress("[R-B]: %i |", number_of_runs_);

  int i;
  int j;
  int ij;

  int numVertices = hypergraph_->number_of_vertices();
  int *pVector = nullptr;
  int destProcessor;
  int myPartitionIdx = 0;
  int v;

  dynamic_array<int> recvLens(number_of_processors_);
  dynamic_array<int> recvDispls(number_of_processors_);

  bisection *b;

  all_partition_info_.resize(numVertices << 1);

  for (i = 0; i < number_of_runs_; ++i) {
    destProcessor = i % number_of_processors_;
    sum_of_cuts_ = 0;
    local_vertex_part_info_length_ = 0;

    if (rank_ == destProcessor) {
      pVector = &partition_vector_[partition_vector_offsets_[myPartitionIdx]];
    }

    b = new bisection(hypergraph_, log_k_, 0);
    b->initMap();

    recursively_bisect(*b, comm);

    // ###
    // now recover the partition and
    // partition cutsize
    // ###

    MPI_Reduce(&sum_of_cuts_, &ij, 1, MPI_INT, MPI_SUM, destProcessor, comm);
    MPI_Gather(&local_vertex_part_info_length_, 1, MPI_INT, recvLens.data(), 1, MPI_INT,
               destProcessor, comm);

    if (rank_ == destProcessor) {
      partition_vector_cuts_[myPartitionIdx] = ij;
      ij = 0;

      for (j = 0; j < number_of_processors_; ++j) {
        recvDispls[j] = ij;
        ij += recvLens[j];
      }
    }

    MPI_Gatherv(local_vertex_partition_info_.data(), local_vertex_part_info_length_, MPI_INT,
                all_partition_info_.data(), recvLens.data(),
                recvDispls.data(), MPI_INT, destProcessor, comm);

    if (rank_ == destProcessor) {
      ij = numVertices << 1;

      for (j = 0; j < ij;) {
        v = all_partition_info_[j++];
        pVector[v] = all_partition_info_[j++];
      }

      ++myPartitionIdx;
    }
  }

  // ###
  // k-way refine local partitions
  // ###

  hypergraph_->set_number_of_partitions(number_of_partitions_);

  if (number_of_partitions_ > 0) {
    for (i = 0; i < number_of_partitions_; ++i) {
      int *start = &(partition_vector_.data()[partition_vector_offsets_[i]]);
      dynamic_array<int> p_vector(numVertices);
      p_vector.set_data(start, numVertices);
      hypergraph_->copy_in_partition(p_vector, numVertices, i, partition_vector_cuts_[i]);
    }

    refiner_->rebalance(*hypergraph_);
  }

  // ###
  // project partitions
  // ###

  initialize_serial_partitions(hgraph, comm);

#ifdef DEBUG_CONTROLLER
  hgraph.checkPartitions(numParts, maxPartWt, comm);
#endif
}
void recursive_bisection_contoller::initialize_serial_partitions(
    parallel::hypergraph &hgraph,
    MPI_Comm comm) {
  int i;
  int j;
  int ij;

  int numTotVertices = hypergraph_->number_of_vertices();
  int ijk;
  int startOffset;
  int endOffset;
  int totToSend;

  ds::dynamic_array<int> hGraphPartitionVector;
  ds::dynamic_array<int> hGraphPartVectorOffsets;
  ds::dynamic_array<int> hGraphPartCuts;

  auto hPartitionVector = hypergraph_->partition_vector();
  auto hPartOffsetsVector = hypergraph_->partition_offsets();
  auto hPartitionCutsArray = hypergraph_->partition_cuts();

  dynamic_array<int> numVperProc(number_of_processors_);
  dynamic_array<int> procDispls(number_of_processors_);

  dynamic_array<int> sendLens(number_of_processors_);
  dynamic_array<int> sendDispls(number_of_processors_);
  dynamic_array<int> recvLens(number_of_processors_);
  dynamic_array<int> recvDispls(number_of_processors_);
  dynamic_array<int> sendArray;

  hgraph.set_number_of_partitions(number_of_runs_);

  hGraphPartitionVector = hgraph.partition_vector();
  hGraphPartVectorOffsets = hgraph.partition_offsets();
  hGraphPartCuts = hgraph.partition_cuts();

  // ###
  // communicate partition vector values
  // ###

  j = number_of_processors_ - 1;
  ij = numTotVertices / number_of_processors_;

  for (i = 0; i < j; ++i)
    numVperProc[i] = ij;

  numVperProc[i] = ij + (numTotVertices % number_of_processors_);

  j = 0;
  ij = 0;

  for (i = 0; i < number_of_processors_; ++i) {
    sendDispls[i] = j;
    procDispls[i] = ij;
    sendLens[i] = numVperProc[i] * number_of_partitions_;
    j += sendLens[i];
    ij += numVperProc[i];
  }

  sendArray.resize(j);
  totToSend = j;

  ij = 0;

  for (ijk = 0; ijk < number_of_processors_; ++ijk) {
    for (j = 0; j < number_of_partitions_; ++j) {
      startOffset = hPartOffsetsVector[j] + procDispls[ijk];
      endOffset = startOffset + numVperProc[ijk];

      for (i = startOffset; i < endOffset; ++i) {
        sendArray[ij++] = hPartitionVector[i];
      }
    }
  }
#ifdef DEBUG_CONTROLLER
  assert(ij == totToSend);
#endif

  MPI_Alltoall(sendLens.data(), 1, MPI_INT, recvLens.data(), 1, MPI_INT,
               comm);

  ij = 0;

  for (i = 0; i < number_of_processors_; ++i) {
    recvDispls[i] = ij;
    ij += recvLens[i];
  }

#ifdef DEBUG_CONTROLLER
  assert(ij == hGraphPartVectorOffsets[numSeqRuns]);
#endif

  MPI_Alltoallv(sendArray.data(), sendLens.data(),
                sendDispls.data(), MPI_INT, hGraphPartitionVector.data(),
                recvLens.data(), recvDispls.data(), MPI_INT, comm);

  // ###
  // communicate partition cuts
  // ###

  MPI_Allgather(&number_of_partitions_, 1, MPI_INT, recvLens.data(), 1, MPI_INT,
                comm);

  ij = 0;

  for (i = 0; i < number_of_processors_; ++i) {
    recvDispls[i] = ij;
    ij += recvLens[i];
  }

  MPI_Allgatherv(hPartitionCutsArray.data(), number_of_partitions_, MPI_INT,
                 hGraphPartCuts.data(), recvLens.data(), recvDispls.data(),
                 MPI_INT, comm);

  for (i = 0; i < number_of_runs_; ++i) {
    progress("%i ", hGraphPartCuts[i]);
  }
  progress("\n");
}
Exemplo n.º 3
0
int exchangeData(MPI_Comm comm,
                 std::vector<int>& sendProcs,
                 std::vector<std::vector<T>*>& sendData,
                 std::vector<int>& recvProcs,
                 bool recvLengthsKnownOnEntry,
                 std::vector<std::vector<T>*>& recvData)
{
  if (sendProcs.size() == 0 && recvProcs.size() == 0) return(0);
  if (sendProcs.size() != sendData.size()) return(-1);
#ifndef FEI_SER
  int tag = 11115;
  MPI_Datatype mpi_dtype = fei::mpiTraits<T>::mpi_type();
  std::vector<MPI_Request> mpiReqs;

  try {
  mpiReqs.resize(recvProcs.size());

  if (!recvLengthsKnownOnEntry) {
    std::vector<int> tmpIntData;
    tmpIntData.resize(sendData.size());
    std::vector<int> recvLens(sendData.size());
    for(unsigned i=0; i<sendData.size(); ++i) {
      tmpIntData[i] = (int)sendData[i]->size();
    }

    if (exchangeIntData(comm, sendProcs, tmpIntData, recvProcs, recvLens) != 0) {
      return(-1);
    }

    for(unsigned i=0; i<recvLens.size(); ++i) {
      recvData[i]->resize(recvLens[i]);
    }
  }
  }
  catch(std::runtime_error& exc) {
    fei::console_out() << exc.what() << FEI_ENDL;
    return(-1);
  }

  //launch Irecv's for recvData:

  size_t numRecvProcs = recvProcs.size();
  int req_offset = 0;
  int localProc = fei::localProc(comm);
  for(unsigned i=0; i<recvProcs.size(); ++i) {
    if (recvProcs[i] == localProc) {--numRecvProcs; continue;}

    size_t len = recvData[i]->size();
    std::vector<T>& rbuf = *recvData[i];

    CHK_MPI( MPI_Irecv(&rbuf[0], (int)len, mpi_dtype,
                       recvProcs[i], tag, comm, &mpiReqs[req_offset++]) );
  }

  //send the sendData:

  for(unsigned i=0; i<sendProcs.size(); ++i) {
    if (sendProcs[i] == localProc) continue;

    std::vector<T>& sbuf = *sendData[i];
    CHK_MPI( MPI_Send(&sbuf[0], (int)sbuf.size(), mpi_dtype,
                      sendProcs[i], tag, comm) );
  }

  //complete the Irecv's:
  for(unsigned i=0; i<numRecvProcs; ++i) {
    if (recvProcs[i] == localProc) continue;
    int index;
    MPI_Status status;
    CHK_MPI( MPI_Waitany((int)numRecvProcs, &mpiReqs[0], &index, &status) );
  }

#endif
  return(0);
}