void UncoupledAggregationAlgorithm<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::
  BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector<unsigned>& aggStat,
                  LO& numNonAggregatedNodes) const {
    Monitor m(*this, "BuildAggregates");

    AggOptions::Ordering ordering    = params.get<AggOptions::Ordering>("Ordering");
    LO MaxNeighAlreadySelected       = params.get<LO>                  ("MaxNeighAlreadySelected");
    LO MinNodesPerAggregate          = params.get<LO>                  ("MinNodesPerAggregate");
    LO MaxNodesPerAggregate          = params.get<LO>                  ("MaxNodesPerAggregate");

    TEUCHOS_TEST_FOR_EXCEPTION(MaxNodesPerAggregate < MinNodesPerAggregate, Exceptions::RuntimeError, "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: MinNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");

    if (ordering != NATURAL && ordering != RANDOM && ordering != GRAPH)
      throw Exceptions::RuntimeError("UncoupledAggregation::BuildAggregates : bad aggregation ordering option");

    const LO  nRows  = graph.GetNodeNumVertices();
    const int myRank = graph.GetComm()->getRank();

    // vertex ids for output
    Teuchos::ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
    Teuchos::ArrayRCP<LO> procWinner   = aggregates.GetProcWinner()  ->getDataNonConst(0);

    // some internal variables
    LO nLocalAggregates = aggregates.GetNumAggregates();    // number of local aggregates on current proc
    std::queue<LO> graph_ordering_inodes; // inodes for graph ordering

    ArrayRCP<LO> randomVector;
    if (ordering == RANDOM) {
      randomVector = arcp<LO>(nRows);
      for (LO i = 0; i < nRows; i++)
        randomVector[i] = i;
      RandomReorder(randomVector);
    }

    int              aggIndex = -1;
    size_t           aggSize  =  0;
    std::vector<int> aggList(graph.getNodeMaxNumRowEntries());

    // Main loop over all local rows of graph(A)
    for (LO iNode2 = 0; iNode2 < nRows; iNode2++) {
      // Step 1: pick the next node to aggregate
      LO iNode1 = 0;
      if      (ordering == NATURAL) iNode1 = iNode2;
      else if (ordering == RANDOM)  iNode1 = randomVector[iNode2];
      else if (ordering == GRAPH) {

        if (graph_ordering_inodes.size() == 0) {
          // There are no nodes for graph ordering scheme,
          // add exactly one ready node for graph ordering aggregates
          for (LO jnode = 0; jnode < nRows; jnode++)
            if (aggStat[jnode] == NodeStats::READY) {
              graph_ordering_inodes.push(jnode);
              break;
            }
        }
        if (graph_ordering_inodes.size() == 0) {
          // There are no more ready nodes, end the phase
          break;
        }
        iNode1 = graph_ordering_inodes.front();   // take next node from graph ordering queue
        graph_ordering_inodes.pop();              // delete this node in list
      }

      if (aggStat[iNode1] == NodeStats::READY) {
        // Step 2: build tentative aggregate
        aggSize = 0;
        aggList[aggSize++] = iNode1;

        ArrayView<const LO> neighOfINode = graph.getNeighborVertices(iNode1);

        LO numAggregatedNeighbours = 0;

        // NOTE: if neighOfINode.size() < MinNodesPerAggregate, we could skip this loop,
        // but only for NATURAL and RANDOM (for GRAPH we still need the list of local neighbors)
        for (LO j = 0; j < neighOfINode.size(); j++) {
          LO neigh = neighOfINode[j];

          if (neigh != iNode1 && graph.isLocalNeighborVertex(neigh)) {

            if (aggStat[neigh] == NodeStats::READY || aggStat[neigh] == NodeStats::NOTSEL) {
              // Add neighbor node to tentative aggregate
              // but only if aggregate size is not exceeding maximum size
              // NOTE: We do not exit the loop over all neighbours since we have still
              //       to count all aggregated neighbour nodes for the aggregation criteria
              // NOTE: We check here for the maximum aggregation size. If we would do it below
              //       with all the other check too big aggregates would not be accepted at all.
              if (aggSize < as<size_t>(MaxNodesPerAggregate))
                aggList[aggSize++] = neigh;

            } else {
              numAggregatedNeighbours++;
            }
          }
        }

        // Step 3: check if tentative aggregate is acceptable
        if ((numAggregatedNeighbours <= MaxNeighAlreadySelected) &&   // too many connections to other aggregates
            (as<LO>(aggSize)         >= MinNodesPerAggregate)) {      // too few nodes in the tentative aggregate
          // Accept new aggregate
          // iNode1 becomes the root of the newly formed aggregate
          aggregates.SetIsRoot(iNode1);
          aggIndex = nLocalAggregates++;

          for (size_t k = 0; k < aggSize; k++) {
            aggStat     [aggList[k]] = NodeStats::AGGREGATED;
            vertex2AggId[aggList[k]] = aggIndex;
            procWinner  [aggList[k]] = myRank;

            if (ordering == GRAPH) {
              Teuchos::ArrayView<const LO> neighOfJNode = graph.getNeighborVertices(aggList[k]);
              for (int j = 0; j < neighOfJNode.size(); j++) {
                LO neigh = neighOfJNode[j];

                if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == NodeStats::READY)
                  graph_ordering_inodes.push(neigh);
              }
            }
          }

          numNonAggregatedNodes -= aggSize;

        } else {
          // Aggregate is not accepted
          aggStat[iNode1] = NodeStats::NOTSEL;

          if (ordering == GRAPH) {
            // Even though the aggregate around iNode1 is not perfect, we want to try
            // the neighbor nodes of iNode1
            for (int j = 0; j < neighOfINode.size(); j++) {
              LO neigh = neighOfINode[j];

              if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == NodeStats::READY)
                graph_ordering_inodes.push(neigh);
            }
          }
        }
      }
    }

    // update aggregate object
    aggregates.SetNumAggregates(nLocalAggregates);
  }
  void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
  BuildAggregates(const ParameterList& params, const LWGraph_kokkos& graph, Aggregates_kokkos& aggregates, std::vector<unsigned>& aggStat,
                  LO& numNonAggregatedNodes) const {
    Monitor m(*this, "BuildAggregates");

    std::string ordering        = params.get<std::string>("aggregation: ordering");
    int maxNeighAlreadySelected = params.get<int>         ("aggregation: max selected neighbors");
    int minNodesPerAggregate    = params.get<int>         ("aggregation: min agg size");
    int maxNodesPerAggregate    = params.get<int>         ("aggregation: max agg size");

    TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate, Exceptions::RuntimeError,
                               "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");

    const LO  numRows = graph.GetNodeNumVertices();
    const int myRank  = graph.GetComm()->getRank();

    ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
    ArrayRCP<LO> procWinner   = aggregates.GetProcWinner()  ->getDataNonConst(0);

    LO numLocalAggregates = aggregates.GetNumAggregates();

    ArrayRCP<LO> randomVector;
    if (ordering == "random") {
      randomVector = arcp<LO>(numRows);
      for (LO i = 0; i < numRows; i++)
        randomVector[i] = i;
      RandomReorder(randomVector);
    }

    int              aggIndex = -1;
    size_t           aggSize  =  0;
    std::vector<int> aggList(graph.getNodeMaxNumRowEntries());

    std::queue<LO>   graphOrderQueue;

    // Main loop over all local rows of graph(A)
    for (LO i = 0; i < numRows; i++) {
      // Step 1: pick the next node to aggregate
      LO rootCandidate = 0;
      if      (ordering == "natural") rootCandidate = i;
      else if (ordering == "random")  rootCandidate = randomVector[i];
      else if (ordering == "graph") {

        if (graphOrderQueue.size() == 0) {
          // Current queue is empty for "graph" ordering, populate with one READY node
          for (LO jnode = 0; jnode < numRows; jnode++)
            if (aggStat[jnode] == READY) {
              graphOrderQueue.push(jnode);
              break;
            }
        }
        if (graphOrderQueue.size() == 0) {
          // There are no more ready nodes, end the phase
          break;
        }
        rootCandidate = graphOrderQueue.front();   // take next node from graph ordering queue
        graphOrderQueue.pop();                     // delete this node in list
      }

      if (aggStat[rootCandidate] != READY)
        continue;

      // Step 2: build tentative aggregate
      aggSize = 0;
      aggList[aggSize++] = rootCandidate;

      // TODO replace me
      //ArrayView<const LO> neighOfINode = graph.getNeighborVertices(rootCandidate);
      ArrayView<const LO> neighOfINode;

      // If the number of neighbors is less than the minimum number of nodes
      // per aggregate, we know this is not going to be a valid root, and we
      // may skip it, but only for "natural" and "random" (for "graph" we still
      // need to fetch the list of local neighbors to continue)
      if ((ordering == "natural" || ordering == "random") &&
          neighOfINode.size() < minNodesPerAggregate) {
        continue;
      }

      LO numAggregatedNeighbours = 0;

      for (int j = 0; j < neighOfINode.size(); j++) {
        LO neigh = neighOfINode[j];

        if (neigh != rootCandidate && graph.isLocalNeighborVertex(neigh)) {

          if (aggStat[neigh] == READY || aggStat[neigh] == NOTSEL) {
            // If aggregate size does not exceed max size, add node to the
            // tentative aggregate
            // NOTE: We do not exit the loop over all neighbours since we have
            // still to count all aggregated neighbour nodes for the
            // aggregation criteria
            // NOTE: We check here for the maximum aggregation size. If we
            // would do it below with all the other check too big aggregates
            // would not be accepted at all.
            if (aggSize < as<size_t>(maxNodesPerAggregate))
              aggList[aggSize++] = neigh;

          } else {
            numAggregatedNeighbours++;
          }
        }
      }

      // Step 3: check if tentative aggregate is acceptable
      if ((numAggregatedNeighbours <= maxNeighAlreadySelected) &&           // too many connections to other aggregates
          (aggSize                 >= as<size_t>(minNodesPerAggregate))) {  // too few nodes in the tentative aggregate
        // Accept new aggregate
        // rootCandidate becomes the root of the newly formed aggregate
        aggregates.SetIsRoot(rootCandidate);
        aggIndex = numLocalAggregates++;

        for (size_t k = 0; k < aggSize; k++) {
          aggStat     [aggList[k]] = AGGREGATED;
          vertex2AggId[aggList[k]] = aggIndex;
          procWinner  [aggList[k]] = myRank;
        }

        numNonAggregatedNodes -= aggSize;

      } else {
        // Aggregate is not accepted
        aggStat[rootCandidate] = NOTSEL;

        // Need this for the "graph" ordering below
        // The original candidate is always aggList[0]
        aggSize = 1;
      }

      if (ordering == "graph") {
        // Add candidates to the list of nodes
        // NOTE: the code have slightly different meanings depending on context:
        //  - if aggregate was accepted, we add neighbors of neighbors of the original candidate
        //  - if aggregate was not accepted, we add neighbors of the original candidate
        for (size_t k = 0; k < aggSize; k++) {

          // TODO replace this
          //ArrayView<const LO> neighOfJNode = graph.getNeighborVertices(aggList[k]);
          ArrayView<const LO> neighOfJNode;

          for (int j = 0; j < neighOfJNode.size(); j++) {
            LO neigh = neighOfJNode[j];

            if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY)
              graphOrderQueue.push(neigh);
          }
        }
      }
    }

    // Reset all NOTSEL vertices to READY
    // This simplifies other algorithms
    for (LO i = 0; i < numRows; i++)
      if (aggStat[i] == NOTSEL)
        aggStat[i] = READY;

    // update aggregate object
    aggregates.SetNumAggregates(numLocalAggregates);
  }
  void LocalAggregationAlgorithm<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::CoarsenUncoupled(GraphBase const & graph, Aggregates & aggregates) const {
    Monitor m(*this, "Coarsen Uncoupled");

    std::string orderingType;
    switch(ordering_) {
      case NATURAL:
        orderingType="Natural";
        break;
      case RANDOM:
        orderingType="Random";
        break;
      case GRAPH:
        orderingType="Graph";
        break;
      default:
        break;
    }
    GetOStream(Runtime1) << "Ordering:                  " << orderingType << std::endl;
    GetOStream(Runtime1) << "Min nodes per aggregate:   " << minNodesPerAggregate_ << std::endl;
    GetOStream(Runtime1) << "Max nbrs already selected: " << maxNeighAlreadySelected_ << std::endl;

    /* Create Aggregation object */
    my_size_t nAggregates = 0;

    /* ============================================================= */
    /* aggStat indicates whether this node has been aggreated, and   */
    /* vertex2AggId stores the aggregate number where this node has  */
    /* been aggregated into.                                         */
    /* ============================================================= */

    Teuchos::ArrayRCP<NodeState> aggStat;
    const my_size_t nRows = graph.GetNodeNumVertices();
    if (nRows > 0) aggStat = Teuchos::arcp<NodeState>(nRows);
    for ( my_size_t i = 0; i < nRows; ++i ) aggStat[i] = READY;

    /* ============================================================= */
    /* Phase 1  :                                                    */
    /*    for all nodes, form a new aggregate with its neighbors     */
    /*    if the number of its neighbors having been aggregated does */
    /*    not exceed a given threshold                               */
    /*    (GetMaxNeighAlreadySelected() = 0 ===> Vanek's scheme) */
    /* ============================================================= */

    /* some general variable declarations */
    Teuchos::ArrayRCP<LO> randomVector;
    RCP<MueLu::LinkedList> nodeList; /* list storing the next node to pick as a root point for ordering_ == GRAPH */
    MueLu_SuperNode  *aggHead=NULL, *aggCurrent=NULL, *supernode=NULL;
    /**/

    if ( ordering_ == RANDOM )       /* random ordering */
      {
        //TODO: could be stored in a class that respect interface of LinkedList

        randomVector = Teuchos::arcp<LO>(nRows); //size_t or int ?-> to be propagated
        for (my_size_t i = 0; i < nRows; ++i) randomVector[i] = i;
        RandomReorder(randomVector);
      }
    else if ( ordering_ == GRAPH )  /* graph ordering */
      {
        nodeList = rcp(new MueLu::LinkedList());
        nodeList->Add(0);
      }

    /* main loop */
    {
      LO iNode  = 0;
      LO iNode2 = 0;

      Teuchos::ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); // output only: contents ignored

      while (iNode2 < nRows)
        {

          /*------------------------------------------------------ */
          /* pick the next node to aggregate                       */
          /*------------------------------------------------------ */

          if      ( ordering_ == NATURAL ) iNode = iNode2++;
          else if ( ordering_ == RANDOM )  iNode = randomVector[iNode2++];
          else if ( ordering_ == GRAPH )
            {
              if ( nodeList->IsEmpty() )
                {
                  for ( int jNode = 0; jNode < nRows; ++jNode )
                    {
                      if ( aggStat[jNode] == READY )
                        {
                          nodeList->Add(jNode); //TODO optim: not necessary to create a node. Can just set iNode value and skip the end
                          break;
                        }
                    }
                }
              if ( nodeList->IsEmpty() ) break; /* end of the while loop */ //TODO: coding style :(

              iNode = nodeList->Pop();
            }
          else {
            throw(Exceptions::RuntimeError("CoarsenUncoupled: bad aggregation ordering option"));
          }

          /*------------------------------------------------------ */
          /* consider further only if the node is in READY mode    */
          /*------------------------------------------------------ */

          if ( aggStat[iNode] == READY )
            {
              // neighOfINode is the neighbor node list of node 'iNode'.
              Teuchos::ArrayView<const LO> neighOfINode = graph.getNeighborVertices(iNode);
              typename Teuchos::ArrayView<const LO>::size_type length = neighOfINode.size();

              supernode = new MueLu_SuperNode;
              try {
                supernode->list = Teuchos::arcp<int>(length+1);
              } catch (std::bad_alloc&) {
                TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::LocalAggregationAlgorithm::CoarsenUncoupled(): Error: couldn't allocate memory for supernode! length=" + Teuchos::toString(length));
              }

              supernode->maxLength = length;
              supernode->length = 1;
              supernode->list[0] = iNode;

              int selectFlag = 1;
              {
                /*--------------------------------------------------- */
                /* count the no. of neighbors having been aggregated  */
                /*--------------------------------------------------- */

                int count = 0;
                for (typename Teuchos::ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it)
                  {
                    int index = *it;
                    if ( index < nRows )
                      {
                        if ( aggStat[index] == READY ||
                             aggStat[index] == NOTSEL )
                          supernode->list[supernode->length++] = index;
                        else count++;

                      }
                  }

                /*--------------------------------------------------- */
                /* if there are too many neighbors aggregated or the  */
                /* number of nodes in the new aggregate is too few,   */
                /* don't do this one                                  */
                /*--------------------------------------------------- */

                if ( count > GetMaxNeighAlreadySelected() ) selectFlag = 0;
              }

              // Note: the supernode length is actually 1 more than the
              //       number of nodes in the candidate aggregate. The
              //       root is counted twice. I'm not sure if this is
              //       a bug or a feature ... so I'll leave it and change
              //       < to <= in the if just below.

              if (selectFlag != 1 ||
                  supernode->length <= GetMinNodesPerAggregate())
                {
                  aggStat[iNode] = NOTSEL;
                  delete supernode;
                  if ( ordering_ == GRAPH ) /* if graph ordering */
                    {
                      for (typename Teuchos::ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it)
                        {
                          int index = *it;
                          if  ( index < nRows && aggStat[index] == READY )
                            {
                              nodeList->Add(index);
                            }
                        }
                    }
                }
              else
                {
                  aggregates.SetIsRoot(iNode);
                  for ( int j = 0; j < supernode->length; ++j )
                    {
                      int jNode = supernode->list[j];
                      aggStat[jNode] = SELECTED;
                      vertex2AggId[jNode] = nAggregates;
                      if ( ordering_ == GRAPH ) /* if graph ordering */
                        {

                          Teuchos::ArrayView<const LO> neighOfJNode = graph.getNeighborVertices(jNode);

                          for (typename Teuchos::ArrayView<const LO>::const_iterator it = neighOfJNode.begin(); it != neighOfJNode.end(); ++it)
                            {
                              int index = *it;
                              if ( index < nRows && aggStat[index] == READY )
                                {
                                  nodeList->Add(index);
                                }
                            }
                        }
                    }
                  supernode->next = NULL;
                  supernode->index = nAggregates;
                  if ( nAggregates == 0 )
                    {
                      aggHead = supernode;
                      aggCurrent = supernode;
                    }
                  else
                    {
                      aggCurrent->next = supernode;
                      aggCurrent = supernode;
                    }
                  nAggregates++;
                  // unused aggCntArray[nAggregates] = supernode->length;
                }
            }
        } // end of 'for'

      // views on distributed vectors are freed here.

    } // end of 'main loop'

    nodeList = Teuchos::null;

    /* Update aggregate object */
    aggregates.SetNumAggregates(nAggregates);

    /* Verbose */
    {
      const RCP<const Teuchos::Comm<int> > & comm = graph.GetComm();

      if (IsPrint(Warnings0)) {
        GO localReady=0, globalReady;

        // Compute 'localReady'
        for ( my_size_t i = 0; i < nRows; ++i )
          if (aggStat[i] == READY) localReady++;

        // Compute 'globalReady'
        sumAll(comm, localReady, globalReady);

        if(globalReady > 0)
          GetOStream(Warnings0) << "Warning: " << globalReady << " READY nodes left" << std::endl;
      }

      if (IsPrint(Statistics1)) {
        // Compute 'localSelected'
        LO localSelected=0;
        for ( my_size_t i = 0; i < nRows; ++i )
          if ( aggStat[i] == SELECTED ) localSelected++;

        // Compute 'globalSelected'
        GO globalSelected; sumAll(comm, (GO)localSelected, globalSelected);

        // Compute 'globalNRows'
        GO globalNRows; sumAll(comm, (GO)nRows, globalNRows);

        GetOStream(Statistics1) << "Nodes aggregated = " << globalSelected << " (" << globalNRows << ")" << std::endl;
      }

      if (IsPrint(Statistics1)) {
        GO nAggregatesGlobal; sumAll(comm, (GO)nAggregates, nAggregatesGlobal);
        GetOStream(Statistics1) << "Total aggregates = " << nAggregatesGlobal << std::endl;
      }

    } // verbose

      /* ------------------------------------------------------------- */
      /* clean up                                                      */
      /* ------------------------------------------------------------- */

    aggCurrent = aggHead;
    while ( aggCurrent != NULL )
      {
        supernode = aggCurrent;
        aggCurrent = aggCurrent->next;
        delete supernode;
      }

  } // CoarsenUncoupled