Esempio n. 1
0
void generatePartitionSizeHistogram(typename std::vector<T>& localVector, std::string filename, MPI_Comm comm = MPI_COMM_WORLD)
{
  /*
   * Approach :
   * 1. Do a global sort by keyLayer (Partition id).
   * 2. Compute the information Partition_Id:Size for boundary partitions (Avoid duplication).
   * 3. Do an all_gather of boundary values plugged with ranks.
   * 4. All boundary partitions should be owned by minimum rank that shares it.
   * 5. Locally compute the largest partition size and do MPI_Allreduce 
   *    with MPI_MAX.
   * 6. Build the histogram locally. 
   * 7. Finally do MPI_Reduce over whole histogram to root node.
   * 8. Write the output to file
   */

  int p;
  int rank;
  MPI_Comm_size(comm, &p);
  MPI_Comm_rank(comm, &rank);

  static layer_comparator<keyLayer, T> pccomp;

  //Sort the vector by each tuple's keyLayer element
  mxx::sort(localVector.begin(), localVector.end(), pccomp, comm, false);

  //Iterate over tuples to compute boundary information
  
  bool iownLeftBucket, iownRightBucket, onlySingleLocalPartition;
  //Type of tuple to communicate : (Partition Id, rank, size)
  typedef std::tuple<uint32_t, int, uint64_t> tupletypeforbucketSize;
  std::vector<tupletypeforbucketSize> toSend;
  toSend.resize(2);

  //Find the left most bucket
  auto leftBucketRange = findRange(localVector.begin(), localVector.end(), *(localVector.begin()), pccomp);
  std::get<0>(toSend[0]) = std::get<keyLayer>(*localVector.begin()); 
  std::get<1>(toSend[0]) = rank;
  std::get<2>(toSend[0]) = leftBucketRange.second - leftBucketRange.first;

  //Find the right most bucket
  auto rightBucketRange = findRange(localVector.rbegin(), localVector.rend(), *(localVector.rbegin()), pccomp);
  std::get<0>(toSend[1]) = std::get<keyLayer>(*localVector.rbegin()); 
  std::get<1>(toSend[1]) = rank;
  std::get<2>(toSend[1]) = rightBucketRange.second - rightBucketRange.first;

  //If we have only single partition, make sure we are not creating duplicates
  if(std::get<0>(toSend[0]) == std::get<0>(toSend[1]))
  {
    //Make second send element's size zero
    std::get<2>(toSend[1]) = 0;
    onlySingleLocalPartition = true;
  }
  else
    onlySingleLocalPartition = false;

  //Gather all the boundary information
  auto allBoundaryPartitionSizes = mxx::allgather_vectors(toSend, comm);

  uint64_t leftBucketSize = 0;
  uint64_t rightBucketSize = 0;

  //Need to parse boundary information that matches the partitionIds we have
  static layer_comparator<0, tupletypeforbucketSize> pccomp2;
  auto leftBucketBoundaryRange = std::equal_range(allBoundaryPartitionSizes.begin(), allBoundaryPartitionSizes.end(), toSend[0], pccomp2);

  //Check if this processor owns this bucket
  if(std::get<1>(*(leftBucketBoundaryRange.first)) == rank)
  {
    iownLeftBucket = true;
    for(auto it = leftBucketBoundaryRange.first; it != leftBucketBoundaryRange.second; it++)
    {
      leftBucketSize += std::get<2>(*it);
    }
  }
  else
    iownLeftBucket = false;

  auto rightBucketBoundaryRange = std::equal_range(allBoundaryPartitionSizes.begin(), allBoundaryPartitionSizes.end(), toSend[1], pccomp2);

  //Check if this processor owns right partition
  if(std::get<1>(*rightBucketBoundaryRange.first) == rank && !onlySingleLocalPartition)
  {
    iownRightBucket = true;
    for(auto it = rightBucketBoundaryRange.first; it != rightBucketBoundaryRange.second; it++)
    {
      rightBucketSize += std::get<2>(*it);
    }
  }
  else
    iownRightBucket = false;

  //Map from partition size to count 
  typedef std::map <uint64_t, uint32_t> MapType;
  MapType localHistMap;

  for(auto it = localVector.begin(); it!= localVector.end();)  // iterate over all segments.
  {
    auto innerLoopBound = findRange(it, localVector.end(), *it, pccomp);

    //Left most bucket
    if (innerLoopBound.first == localVector.begin()) // first
    {
      if(iownLeftBucket)
        insertToHistogram(localHistMap, leftBucketSize);
    }
    //Right most bucket
    else if (innerLoopBound.second == localVector.end()) // first
    {
      if(iownRightBucket)
        insertToHistogram(localHistMap, rightBucketSize);
    }
    //Inner buckets
    else
    {
      insertToHistogram(localHistMap, innerLoopBound.second - innerLoopBound.first);
    }

    it = innerLoopBound.second;
  }

  //Convert map to vector
  using tupleTypeforHist = std::tuple<uint64_t, uint32_t>;
  std::vector<tupleTypeforHist> localHistVector;

  for(MapType::iterator it = localHistMap.begin(); it != localHistMap.end(); ++it ) 
  {
    localHistVector.push_back(std::make_tuple(it->first, it->second));
  }

  //Gather vector from all processors to root
  auto globalHistVector = mxx::gather_vectors(localHistVector, comm);
  static layer_comparator<0, tupleTypeforHist> partition_size_cmp;

  //Write to file
  if(rank == 0)
  {
    //Sort the vector to bring counts with same size adjacent (default comparator will work)
    std::sort(globalHistVector.begin(), globalHistVector.end());

    std::ofstream ofs;
    ofs.open(filename, std::ios_base::out);

    //Iterate over the global vector
    for(auto it = globalHistVector.begin(); it != globalHistVector.end();)
    {
      //Range of counts belonging to same partition size
      auto innerLoopRange = findRange(it, globalHistVector.end(), *it, partition_size_cmp);
      auto p_size = std::get<0>(*it);
      uint32_t sum = 0;

      //Loop over the range
      for(auto it2 = innerLoopRange.first; it2 != innerLoopRange.second; it2++)
        sum += std::get<1>(*it2);

      ofs << p_size << " " << sum <<"\n";
      it = innerLoopRange.second;
    }

    ofs.close();

  }
}