void EmergencyAggregationAlgorithm<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector<unsigned>& aggStat, LO& numNonAggregatedNodes) const {
    Monitor m(*this, "BuildAggregates");

    // vertex ids for output
    ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
    ArrayRCP<LO> procWinner   = aggregates.GetProcWinner()  ->getDataNonConst(0);

    const LO  nRows  = graph.GetNodeNumVertices();
    const int myRank = graph.GetComm()->getRank();

    int              aggIndex = -1;
    size_t           aggSize  =  0;
    std::vector<int> aggList(graph.getNodeMaxNumRowEntries());

    LO nLocalAggregates = aggregates.GetNumAggregates();
    for (LO iNode = 0; iNode < nRows; iNode++) {
      if (aggStat[iNode] != NodeStats::AGGREGATED) {
        aggSize = 0;
        aggregates.SetIsRoot(iNode);

        aggList[aggSize++] = iNode;
        aggIndex = nLocalAggregates++;

        ArrayView<const LO> neighOfINode = graph.getNeighborVertices(iNode);

        for (LO j = 0; j < neighOfINode.size(); j++) {
          LO neigh = neighOfINode[j];

          if (neigh != iNode && graph.isLocalNeighborVertex(neigh) && aggStat[neigh] != NodeStats::AGGREGATED)
            aggList[aggSize++] = neigh;
        }

        // finalize aggregate
        for (size_t k = 0; k < aggSize; k++) {
          aggStat     [aggList[k]] = NodeStats::AGGREGATED;
          vertex2AggId[aggList[k]] = aggIndex;
          procWinner  [aggList[k]] = myRank;
        }

        numNonAggregatedNodes -= aggSize;
      }
    }

    aggregates.SetNumAggregates(nLocalAggregates);
  }
  void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
  BuildAggregates(const ParameterList& params, const LWGraph_kokkos& graph, Aggregates_kokkos& aggregates, std::vector<unsigned>& aggStat,
                  LO& numNonAggregatedNodes) const {
    Monitor m(*this, "BuildAggregates");

    std::string ordering        = params.get<std::string>("aggregation: ordering");
    int maxNeighAlreadySelected = params.get<int>         ("aggregation: max selected neighbors");
    int minNodesPerAggregate    = params.get<int>         ("aggregation: min agg size");
    int maxNodesPerAggregate    = params.get<int>         ("aggregation: max agg size");

    TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate, Exceptions::RuntimeError,
                               "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");

    const LO  numRows = graph.GetNodeNumVertices();
    const int myRank  = graph.GetComm()->getRank();

    ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
    ArrayRCP<LO> procWinner   = aggregates.GetProcWinner()  ->getDataNonConst(0);

    LO numLocalAggregates = aggregates.GetNumAggregates();

    ArrayRCP<LO> randomVector;
    if (ordering == "random") {
      randomVector = arcp<LO>(numRows);
      for (LO i = 0; i < numRows; i++)
        randomVector[i] = i;
      RandomReorder(randomVector);
    }

    int              aggIndex = -1;
    size_t           aggSize  =  0;
    std::vector<int> aggList(graph.getNodeMaxNumRowEntries());

    std::queue<LO>   graphOrderQueue;

    // Main loop over all local rows of graph(A)
    for (LO i = 0; i < numRows; i++) {
      // Step 1: pick the next node to aggregate
      LO rootCandidate = 0;
      if      (ordering == "natural") rootCandidate = i;
      else if (ordering == "random")  rootCandidate = randomVector[i];
      else if (ordering == "graph") {

        if (graphOrderQueue.size() == 0) {
          // Current queue is empty for "graph" ordering, populate with one READY node
          for (LO jnode = 0; jnode < numRows; jnode++)
            if (aggStat[jnode] == READY) {
              graphOrderQueue.push(jnode);
              break;
            }
        }
        if (graphOrderQueue.size() == 0) {
          // There are no more ready nodes, end the phase
          break;
        }
        rootCandidate = graphOrderQueue.front();   // take next node from graph ordering queue
        graphOrderQueue.pop();                     // delete this node in list
      }

      if (aggStat[rootCandidate] != READY)
        continue;

      // Step 2: build tentative aggregate
      aggSize = 0;
      aggList[aggSize++] = rootCandidate;

      // TODO replace me
      //ArrayView<const LO> neighOfINode = graph.getNeighborVertices(rootCandidate);
      ArrayView<const LO> neighOfINode;

      // If the number of neighbors is less than the minimum number of nodes
      // per aggregate, we know this is not going to be a valid root, and we
      // may skip it, but only for "natural" and "random" (for "graph" we still
      // need to fetch the list of local neighbors to continue)
      if ((ordering == "natural" || ordering == "random") &&
          neighOfINode.size() < minNodesPerAggregate) {
        continue;
      }

      LO numAggregatedNeighbours = 0;

      for (int j = 0; j < neighOfINode.size(); j++) {
        LO neigh = neighOfINode[j];

        if (neigh != rootCandidate && graph.isLocalNeighborVertex(neigh)) {

          if (aggStat[neigh] == READY || aggStat[neigh] == NOTSEL) {
            // If aggregate size does not exceed max size, add node to the
            // tentative aggregate
            // NOTE: We do not exit the loop over all neighbours since we have
            // still to count all aggregated neighbour nodes for the
            // aggregation criteria
            // NOTE: We check here for the maximum aggregation size. If we
            // would do it below with all the other check too big aggregates
            // would not be accepted at all.
            if (aggSize < as<size_t>(maxNodesPerAggregate))
              aggList[aggSize++] = neigh;

          } else {
            numAggregatedNeighbours++;
          }
        }
      }

      // Step 3: check if tentative aggregate is acceptable
      if ((numAggregatedNeighbours <= maxNeighAlreadySelected) &&           // too many connections to other aggregates
          (aggSize                 >= as<size_t>(minNodesPerAggregate))) {  // too few nodes in the tentative aggregate
        // Accept new aggregate
        // rootCandidate becomes the root of the newly formed aggregate
        aggregates.SetIsRoot(rootCandidate);
        aggIndex = numLocalAggregates++;

        for (size_t k = 0; k < aggSize; k++) {
          aggStat     [aggList[k]] = AGGREGATED;
          vertex2AggId[aggList[k]] = aggIndex;
          procWinner  [aggList[k]] = myRank;
        }

        numNonAggregatedNodes -= aggSize;

      } else {
        // Aggregate is not accepted
        aggStat[rootCandidate] = NOTSEL;

        // Need this for the "graph" ordering below
        // The original candidate is always aggList[0]
        aggSize = 1;
      }

      if (ordering == "graph") {
        // Add candidates to the list of nodes
        // NOTE: the code have slightly different meanings depending on context:
        //  - if aggregate was accepted, we add neighbors of neighbors of the original candidate
        //  - if aggregate was not accepted, we add neighbors of the original candidate
        for (size_t k = 0; k < aggSize; k++) {

          // TODO replace this
          //ArrayView<const LO> neighOfJNode = graph.getNeighborVertices(aggList[k]);
          ArrayView<const LO> neighOfJNode;

          for (int j = 0; j < neighOfJNode.size(); j++) {
            LO neigh = neighOfJNode[j];

            if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY)
              graphOrderQueue.push(neigh);
          }
        }
      }
    }

    // Reset all NOTSEL vertices to READY
    // This simplifies other algorithms
    for (LO i = 0; i < numRows; i++)
      if (aggStat[i] == NOTSEL)
        aggStat[i] = READY;

    // update aggregate object
    aggregates.SetNumAggregates(numLocalAggregates);
  }
  void UncoupledAggregationAlgorithm<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::
  BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector<unsigned>& aggStat,
                  LO& numNonAggregatedNodes) const {
    Monitor m(*this, "BuildAggregates");

    AggOptions::Ordering ordering    = params.get<AggOptions::Ordering>("Ordering");
    LO MaxNeighAlreadySelected       = params.get<LO>                  ("MaxNeighAlreadySelected");
    LO MinNodesPerAggregate          = params.get<LO>                  ("MinNodesPerAggregate");
    LO MaxNodesPerAggregate          = params.get<LO>                  ("MaxNodesPerAggregate");

    TEUCHOS_TEST_FOR_EXCEPTION(MaxNodesPerAggregate < MinNodesPerAggregate, Exceptions::RuntimeError, "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: MinNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");

    if (ordering != NATURAL && ordering != RANDOM && ordering != GRAPH)
      throw Exceptions::RuntimeError("UncoupledAggregation::BuildAggregates : bad aggregation ordering option");

    const LO  nRows  = graph.GetNodeNumVertices();
    const int myRank = graph.GetComm()->getRank();

    // vertex ids for output
    Teuchos::ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
    Teuchos::ArrayRCP<LO> procWinner   = aggregates.GetProcWinner()  ->getDataNonConst(0);

    // some internal variables
    LO nLocalAggregates = aggregates.GetNumAggregates();    // number of local aggregates on current proc
    std::queue<LO> graph_ordering_inodes; // inodes for graph ordering

    ArrayRCP<LO> randomVector;
    if (ordering == RANDOM) {
      randomVector = arcp<LO>(nRows);
      for (LO i = 0; i < nRows; i++)
        randomVector[i] = i;
      RandomReorder(randomVector);
    }

    int              aggIndex = -1;
    size_t           aggSize  =  0;
    std::vector<int> aggList(graph.getNodeMaxNumRowEntries());

    // Main loop over all local rows of graph(A)
    for (LO iNode2 = 0; iNode2 < nRows; iNode2++) {
      // Step 1: pick the next node to aggregate
      LO iNode1 = 0;
      if      (ordering == NATURAL) iNode1 = iNode2;
      else if (ordering == RANDOM)  iNode1 = randomVector[iNode2];
      else if (ordering == GRAPH) {

        if (graph_ordering_inodes.size() == 0) {
          // There are no nodes for graph ordering scheme,
          // add exactly one ready node for graph ordering aggregates
          for (LO jnode = 0; jnode < nRows; jnode++)
            if (aggStat[jnode] == NodeStats::READY) {
              graph_ordering_inodes.push(jnode);
              break;
            }
        }
        if (graph_ordering_inodes.size() == 0) {
          // There are no more ready nodes, end the phase
          break;
        }
        iNode1 = graph_ordering_inodes.front();   // take next node from graph ordering queue
        graph_ordering_inodes.pop();              // delete this node in list
      }

      if (aggStat[iNode1] == NodeStats::READY) {
        // Step 2: build tentative aggregate
        aggSize = 0;
        aggList[aggSize++] = iNode1;

        ArrayView<const LO> neighOfINode = graph.getNeighborVertices(iNode1);

        LO numAggregatedNeighbours = 0;

        // NOTE: if neighOfINode.size() < MinNodesPerAggregate, we could skip this loop,
        // but only for NATURAL and RANDOM (for GRAPH we still need the list of local neighbors)
        for (LO j = 0; j < neighOfINode.size(); j++) {
          LO neigh = neighOfINode[j];

          if (neigh != iNode1 && graph.isLocalNeighborVertex(neigh)) {

            if (aggStat[neigh] == NodeStats::READY || aggStat[neigh] == NodeStats::NOTSEL) {
              // Add neighbor node to tentative aggregate
              // but only if aggregate size is not exceeding maximum size
              // NOTE: We do not exit the loop over all neighbours since we have still
              //       to count all aggregated neighbour nodes for the aggregation criteria
              // NOTE: We check here for the maximum aggregation size. If we would do it below
              //       with all the other check too big aggregates would not be accepted at all.
              if (aggSize < as<size_t>(MaxNodesPerAggregate))
                aggList[aggSize++] = neigh;

            } else {
              numAggregatedNeighbours++;
            }
          }
        }

        // Step 3: check if tentative aggregate is acceptable
        if ((numAggregatedNeighbours <= MaxNeighAlreadySelected) &&   // too many connections to other aggregates
            (as<LO>(aggSize)         >= MinNodesPerAggregate)) {      // too few nodes in the tentative aggregate
          // Accept new aggregate
          // iNode1 becomes the root of the newly formed aggregate
          aggregates.SetIsRoot(iNode1);
          aggIndex = nLocalAggregates++;

          for (size_t k = 0; k < aggSize; k++) {
            aggStat     [aggList[k]] = NodeStats::AGGREGATED;
            vertex2AggId[aggList[k]] = aggIndex;
            procWinner  [aggList[k]] = myRank;

            if (ordering == GRAPH) {
              Teuchos::ArrayView<const LO> neighOfJNode = graph.getNeighborVertices(aggList[k]);
              for (int j = 0; j < neighOfJNode.size(); j++) {
                LO neigh = neighOfJNode[j];

                if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == NodeStats::READY)
                  graph_ordering_inodes.push(neigh);
              }
            }
          }

          numNonAggregatedNodes -= aggSize;

        } else {
          // Aggregate is not accepted
          aggStat[iNode1] = NodeStats::NOTSEL;

          if (ordering == GRAPH) {
            // Even though the aggregate around iNode1 is not perfect, we want to try
            // the neighbor nodes of iNode1
            for (int j = 0; j < neighOfINode.size(); j++) {
              LO neigh = neighOfINode[j];

              if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == NodeStats::READY)
                graph_ordering_inodes.push(neigh);
            }
          }
        }
      }
    }

    // update aggregate object
    aggregates.SetNumAggregates(nLocalAggregates);
  }
  void MaxLinkAggregationAlgorithm<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::
  BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector<unsigned>& aggStat, LO& numNonAggregatedNodes) const {
    Monitor m(*this, "BuildAggregates");

    LO MaxNodesPerAggregate = params.get<LO>("MaxNodesPerAggregate");

    // vertex ids for output
    ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
    ArrayRCP<LO> procWinner   = aggregates.GetProcWinner()  ->getDataNonConst(0);
    ArrayRCP<LO> aggSizes     = aggregates.ComputeAggregateSizes(); // contains number of nodes in aggregate with given aggId

    const LO  nRows  = graph.GetNodeNumVertices();
    const int myRank = graph.GetComm()->getRank();

    size_t           aggSize = 0;
    std::vector<int> aggList(graph.getNodeMaxNumRowEntries());

    //bool recomputeAggregateSizes=false; // variable not used TODO remove it

    for (LO iNode = 0; iNode < nRows; iNode++) {
      if (aggStat[iNode] == NodeStats::AGGREGATED)
        continue;

      ArrayView<const LocalOrdinal> neighOfINode = graph.getNeighborVertices(iNode);

      aggSize = 0;
      for (LO j = 0; j < neighOfINode.size(); j++) {
        LO neigh = neighOfINode[j];

        // NOTE: we don't need the check (neigh != iNode), as we work only
        // if aggStat[neigh] == AGGREGATED, which we know is different from aggStat[iNode]
        if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == NodeStats::AGGREGATED)
          aggList[aggSize++] = vertex2AggId[neigh];
      }

      // Ideally, we would have a _fast_ hash table here.
      // But for the absense of that, sorting works just fine.
      std::sort(aggList.begin(), aggList.begin() + aggSize);

      // terminator
      aggList[aggSize] = -1;

      // Find an aggregate id with most connections to
      LO maxNumConnections =  0, curNumConnections = 0;
      LO selectedAggregate = -1;
      for (size_t i = 0; i < aggSize; i++) {
        curNumConnections++;
        if (aggList[i+1] != aggList[i]) {
          if (curNumConnections > maxNumConnections &&         // only select aggregate if it has more connections
              aggSizes[aggList[i]] < MaxNodesPerAggregate) {   // and if it is not too big (i.e. can have one more node)
            maxNumConnections = curNumConnections;
            selectedAggregate = aggList[i];
            //recomputeAggregateSizes=true;
          }
          curNumConnections = 0;
        }
      }

      // Add node iNode to aggregate
      if (selectedAggregate != -1) {
        aggStat[iNode]      = NodeStats::AGGREGATED;
        vertex2AggId[iNode] = selectedAggregate;
        procWinner[iNode]   = myRank;

        numNonAggregatedNodes--;
      }
    }
  }
  void AggregationPhase2aAlgorithm<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector<unsigned>& aggStat, LO& numNonAggregatedNodes) const {
    Monitor m(*this, "BuildAggregates");

    LO minNodesPerAggregate = params.get<LO>("aggregation: min agg size");
    LO maxNodesPerAggregate = params.get<LO>("aggregation: max agg size");

    const LO  numRows = graph.GetNodeNumVertices();
    const int myRank  = graph.GetComm()->getRank();

    ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
    ArrayRCP<LO> procWinner   = aggregates.GetProcWinner()  ->getDataNonConst(0);

    LO numLocalAggregates = aggregates.GetNumAggregates();

    LO numLocalNodes      = procWinner.size();
    LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;

    const double aggFactor = 0.5;
    double       factor    = as<double>(numLocalAggregated)/(numLocalNodes+1);
    factor = pow(factor, aggFactor);

    int              aggIndex = -1;
    size_t           aggSize  =  0;
    std::vector<int> aggList(graph.getNodeMaxNumRowEntries());

    for (LO rootCandidate = 0; rootCandidate < numRows; rootCandidate++) {
      if (aggStat[rootCandidate] != READY)
        continue;

      aggSize = 0;

      ArrayView<const LocalOrdinal> neighOfINode = graph.getNeighborVertices(rootCandidate);

      LO numNeighbors = 0;
      for (int j = 0; j < neighOfINode.size(); j++) {
        LO neigh = neighOfINode[j];

        if (neigh != rootCandidate) {
          if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) {
            // If aggregate size does not exceed max size, add node to the tentative aggregate
            // NOTE: We do not exit the loop over all neighbours since we have still
            //       to count all aggregated neighbour nodes for the aggregation criteria
            // NOTE: We check here for the maximum aggregation size. If we would do it below
            //       with all the other check too big aggregates would not be accepted at all.
            if (aggSize < as<size_t>(maxNodesPerAggregate))
              aggList[aggSize++] = neigh;
          }

          numNeighbors++;
        }
      }

      // NOTE: ML uses a hardcoded value 3 instead of MinNodesPerAggregate
      if (aggSize > as<size_t>(minNodesPerAggregate) &&
          aggSize > factor*numNeighbors) {
        // Accept new aggregate
        // rootCandidate becomes the root of the newly formed aggregate
        aggregates.SetIsRoot(rootCandidate);
        aggIndex = numLocalAggregates++;

        for (size_t k = 0; k < aggSize; k++) {
          aggStat     [aggList[k]] = AGGREGATED;
          vertex2AggId[aggList[k]] = aggIndex;
          procWinner  [aggList[k]] = myRank;
        }

        numNonAggregatedNodes -= aggSize;
      }
    }

    // update aggregate object
    aggregates.SetNumAggregates(numLocalAggregates);
  }