//! BuildAggregates routine. virtual void PrintAggregationInformation(const std::string phase, GraphBase const & graph, Aggregates & aggregates, Teuchos::ArrayRCP<unsigned int> & aggStat) const { const RCP<const Teuchos::Comm<int> > & comm = graph.GetComm(); const LocalOrdinal nRows = graph.GetNodeNumVertices(); const LocalOrdinal nLocalAggregates = aggregates.GetNumAggregates(); if(IsPrint(Statistics1)) { LO localAggregated = 0; GO globalAggregated = 0; GO globalNRows = 0; for(LO i=0; i<nRows; ++i) if(aggStat[i] == NodeStats::AGGREGATED) localAggregated++; sumAll(comm, (GO)localAggregated, globalAggregated); sumAll(comm, (GO)nRows, globalNRows); GetOStream(Statistics1, 0) << "Aggregation (UC): " << phase << " Nodes aggregated = " << globalAggregated << " out of " << globalNRows << " nodes" << std::endl; GO nAggregatesGlobal = 0; sumAll(comm, (GO)nLocalAggregates, nAggregatesGlobal); GetOStream(Statistics1, 0) << "Aggregation (UC): " << phase << " Total aggregates = " << nAggregatesGlobal << std::endl; } if(IsPrint(Warnings0)) { GO localNotAggregated = 0; GO globalNotAggregated = 0; for(LO i=0; i<nRows; ++i) if(aggStat[i] != NodeStats::AGGREGATED ) localNotAggregated++; sumAll(comm, (GO)localNotAggregated, globalNotAggregated); if(globalNotAggregated > 0) GetOStream(Warnings0,0) << "Aggregation (UC): " << phase << " (WARNING) " << globalNotAggregated << " unaggregated nodes left" << std::endl; } }
int LeftoverAggregationAlgorithm<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::RemoveSmallAggs(Aggregates& aggregates, int min_size, RCP<Xpetra::Vector<double,LO,GO,NO> > & distWeights, const MueLu::CoupledAggregationCommHelper<LO,GO,NO,LMO> & myWidget) const { int myPid = aggregates.GetMap()->getComm()->getRank(); LO nAggregates = aggregates.GetNumAggregates(); ArrayRCP<LO> procWinner = aggregates.GetProcWinner()->getDataNonConst(0); ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); LO size = procWinner.size(); //ArrayRCP<int> AggInfo = Teuchos::arcp<int>(nAggregates+1); //aggregates.ComputeAggSizes(AggInfo); ArrayRCP<LO> AggInfo = aggregates.ComputeAggregateSizes(); ArrayRCP<double> weights = distWeights->getDataNonConst(0); // Make a list of all aggregates indicating New AggId // Use AggInfo array for this. LO NewNAggs = 0; for (LO i = 0; i < nAggregates; i++) { if ( AggInfo[i] < min_size) { AggInfo[i] = MUELU_UNAGGREGATED; } else AggInfo[i] = NewNAggs++; } for (LO k = 0; k < size; k++ ) { if (procWinner[k] == myPid) { if (vertex2AggId[k] != MUELU_UNAGGREGATED) { vertex2AggId[k] = AggInfo[vertex2AggId[k]]; weights[k] = 1.; } if (vertex2AggId[k] == MUELU_UNAGGREGATED) aggregates.SetIsRoot(k,false); } } nAggregates = NewNAggs; //TODO JJH We want to skip this call myWidget.ArbitrateAndCommunicate(*distWeights, aggregates, true); // All tentatively assigned vertices are now definitive // procWinner is not set correctly for aggregates which have // been eliminated for (LO i = 0; i < size; i++) { if (vertex2AggId[i] == MUELU_UNAGGREGATED) procWinner[i] = MUELU_UNASSIGNED; } aggregates.SetNumAggregates(nAggregates); return 0; //TODO } //RemoveSmallAggs
void EmergencyAggregationAlgorithm<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector<unsigned>& aggStat, LO& numNonAggregatedNodes) const { Monitor m(*this, "BuildAggregates"); // vertex ids for output ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); ArrayRCP<LO> procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); const LO nRows = graph.GetNodeNumVertices(); const int myRank = graph.GetComm()->getRank(); int aggIndex = -1; size_t aggSize = 0; std::vector<int> aggList(graph.getNodeMaxNumRowEntries()); LO nLocalAggregates = aggregates.GetNumAggregates(); for (LO iNode = 0; iNode < nRows; iNode++) { if (aggStat[iNode] != NodeStats::AGGREGATED) { aggSize = 0; aggregates.SetIsRoot(iNode); aggList[aggSize++] = iNode; aggIndex = nLocalAggregates++; ArrayView<const LO> neighOfINode = graph.getNeighborVertices(iNode); for (LO j = 0; j < neighOfINode.size(); j++) { LO neigh = neighOfINode[j]; if (neigh != iNode && graph.isLocalNeighborVertex(neigh) && aggStat[neigh] != NodeStats::AGGREGATED) aggList[aggSize++] = neigh; } // finalize aggregate for (size_t k = 0; k < aggSize; k++) { aggStat [aggList[k]] = NodeStats::AGGREGATED; vertex2AggId[aggList[k]] = aggIndex; procWinner [aggList[k]] = myRank; } numNonAggregatedNodes -= aggSize; } } aggregates.SetNumAggregates(nLocalAggregates); }
void OnePtAggregationAlgorithm<LocalOrdinal, GlobalOrdinal, Node>::BuildAggregates(Teuchos::ParameterList const & params, GraphBase const & graph, Aggregates & aggregates, std::vector<unsigned>& aggStat, LO& numNonAggregatedNodes) const { Monitor m(*this, "BuildAggregates"); const LocalOrdinal nRows = graph.GetNodeNumVertices(); const int myRank = graph.GetComm()->getRank(); // vertex ids for output Teuchos::ArrayRCP<LocalOrdinal> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); Teuchos::ArrayRCP<LocalOrdinal> procWinner = aggregates.GetProcWinner()->getDataNonConst(0); // some internal variables LocalOrdinal nLocalAggregates = aggregates.GetNumAggregates(); // number of local aggregates on current proc LocalOrdinal iNode1 = 0; // current node // main loop over all local rows of grpah(A) while (iNode1 < nRows) { if (aggStat[iNode1] == ONEPT) { aggregates.SetIsRoot(iNode1); // mark iNode1 as root node for new aggregate 'ag' Aggregate ag; ag.list.push_back(iNode1); ag.index = nLocalAggregates++; // finalize aggregate for(size_t k=0; k<ag.list.size(); k++) { aggStat[ag.list[k]] = IGNORED; vertex2AggId[ag.list[k]] = ag.index; procWinner[ag.list[k]] = myRank; } numNonAggregatedNodes -= ag.list.size(); } iNode1++; } // end while // update aggregate object aggregates.SetNumAggregates(nLocalAggregates); }
void UncoupledAggregationAlgorithm<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>:: BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector<unsigned>& aggStat, LO& numNonAggregatedNodes) const { Monitor m(*this, "BuildAggregates"); AggOptions::Ordering ordering = params.get<AggOptions::Ordering>("Ordering"); LO MaxNeighAlreadySelected = params.get<LO> ("MaxNeighAlreadySelected"); LO MinNodesPerAggregate = params.get<LO> ("MinNodesPerAggregate"); LO MaxNodesPerAggregate = params.get<LO> ("MaxNodesPerAggregate"); TEUCHOS_TEST_FOR_EXCEPTION(MaxNodesPerAggregate < MinNodesPerAggregate, Exceptions::RuntimeError, "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: MinNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!"); if (ordering != NATURAL && ordering != RANDOM && ordering != GRAPH) throw Exceptions::RuntimeError("UncoupledAggregation::BuildAggregates : bad aggregation ordering option"); const LO nRows = graph.GetNodeNumVertices(); const int myRank = graph.GetComm()->getRank(); // vertex ids for output Teuchos::ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); Teuchos::ArrayRCP<LO> procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); // some internal variables LO nLocalAggregates = aggregates.GetNumAggregates(); // number of local aggregates on current proc std::queue<LO> graph_ordering_inodes; // inodes for graph ordering ArrayRCP<LO> randomVector; if (ordering == RANDOM) { randomVector = arcp<LO>(nRows); for (LO i = 0; i < nRows; i++) randomVector[i] = i; RandomReorder(randomVector); } int aggIndex = -1; size_t aggSize = 0; std::vector<int> aggList(graph.getNodeMaxNumRowEntries()); // Main loop over all local rows of graph(A) for (LO iNode2 = 0; iNode2 < nRows; iNode2++) { // Step 1: pick the next node to aggregate LO iNode1 = 0; if (ordering == NATURAL) iNode1 = iNode2; else if (ordering == RANDOM) iNode1 = randomVector[iNode2]; else if (ordering == GRAPH) { if (graph_ordering_inodes.size() == 0) { // There are no nodes for graph ordering scheme, // add exactly one ready node for graph ordering aggregates for (LO jnode = 0; jnode < nRows; jnode++) if (aggStat[jnode] == NodeStats::READY) { graph_ordering_inodes.push(jnode); break; } } if (graph_ordering_inodes.size() == 0) { // There are no more ready nodes, end the phase break; } iNode1 = graph_ordering_inodes.front(); // take next node from graph ordering queue graph_ordering_inodes.pop(); // delete this node in list } if (aggStat[iNode1] == NodeStats::READY) { // Step 2: build tentative aggregate aggSize = 0; aggList[aggSize++] = iNode1; ArrayView<const LO> neighOfINode = graph.getNeighborVertices(iNode1); LO numAggregatedNeighbours = 0; // NOTE: if neighOfINode.size() < MinNodesPerAggregate, we could skip this loop, // but only for NATURAL and RANDOM (for GRAPH we still need the list of local neighbors) for (LO j = 0; j < neighOfINode.size(); j++) { LO neigh = neighOfINode[j]; if (neigh != iNode1 && graph.isLocalNeighborVertex(neigh)) { if (aggStat[neigh] == NodeStats::READY || aggStat[neigh] == NodeStats::NOTSEL) { // Add neighbor node to tentative aggregate // but only if aggregate size is not exceeding maximum size // NOTE: We do not exit the loop over all neighbours since we have still // to count all aggregated neighbour nodes for the aggregation criteria // NOTE: We check here for the maximum aggregation size. If we would do it below // with all the other check too big aggregates would not be accepted at all. if (aggSize < as<size_t>(MaxNodesPerAggregate)) aggList[aggSize++] = neigh; } else { numAggregatedNeighbours++; } } } // Step 3: check if tentative aggregate is acceptable if ((numAggregatedNeighbours <= MaxNeighAlreadySelected) && // too many connections to other aggregates (as<LO>(aggSize) >= MinNodesPerAggregate)) { // too few nodes in the tentative aggregate // Accept new aggregate // iNode1 becomes the root of the newly formed aggregate aggregates.SetIsRoot(iNode1); aggIndex = nLocalAggregates++; for (size_t k = 0; k < aggSize; k++) { aggStat [aggList[k]] = NodeStats::AGGREGATED; vertex2AggId[aggList[k]] = aggIndex; procWinner [aggList[k]] = myRank; if (ordering == GRAPH) { Teuchos::ArrayView<const LO> neighOfJNode = graph.getNeighborVertices(aggList[k]); for (int j = 0; j < neighOfJNode.size(); j++) { LO neigh = neighOfJNode[j]; if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == NodeStats::READY) graph_ordering_inodes.push(neigh); } } } numNonAggregatedNodes -= aggSize; } else { // Aggregate is not accepted aggStat[iNode1] = NodeStats::NOTSEL; if (ordering == GRAPH) { // Even though the aggregate around iNode1 is not perfect, we want to try // the neighbor nodes of iNode1 for (int j = 0; j < neighOfINode.size(); j++) { LO neigh = neighOfINode[j]; if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == NodeStats::READY) graph_ordering_inodes.push(neigh); } } } } } // update aggregate object aggregates.SetNumAggregates(nLocalAggregates); }
void AmalgamationInfo<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::UnamalgamateAggregates(const Aggregates& aggregates, Teuchos::ArrayRCP<LocalOrdinal>& aggStart, Teuchos::ArrayRCP<GlobalOrdinal>& aggToRowMap) const { int myPid = aggregates.GetMap()->getComm()->getRank(); Teuchos::ArrayView<const GO> nodeGlobalElts = aggregates.GetMap()->getNodeElementList(); Teuchos::ArrayRCP<LO> procWinner = aggregates.GetProcWinner()->getDataNonConst(0); Teuchos::ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); LO size = procWinner.size(); GO numAggregates = aggregates.GetNumAggregates(); std::vector<LO> sizes(numAggregates); if (stridedblocksize_ == 1) { for (LO lnode = 0; lnode < size; ++lnode) { LO myAgg = vertex2AggId[lnode]; if (procWinner[lnode] == myPid) sizes[myAgg] += 1; } } else { for (LO lnode = 0; lnode < size; ++lnode) { LO myAgg = vertex2AggId[lnode]; if (procWinner[lnode] == myPid) { GO gnodeid = nodeGlobalElts[lnode]; for (LocalOrdinal k = 0; k < stridedblocksize_; k++) { GlobalOrdinal gDofIndex = ComputeGlobalDOF(gnodeid,k); if (columnMap_->isNodeGlobalElement(gDofIndex)) sizes[myAgg] += 1; } } } } aggStart = ArrayRCP<LO>(numAggregates+1,0); aggStart[0]=0; for (GO i=0; i<numAggregates; ++i) { aggStart[i+1] = aggStart[i] + sizes[i]; } aggToRowMap = ArrayRCP<GO>(aggStart[numAggregates],0); // count, how many dofs have been recorded for each aggregate so far Array<LO> numDofs(numAggregates, 0); // empty array with number of Dofs for each aggregate if (stridedblocksize_ == 1) { for (LO lnode = 0; lnode < size; ++lnode) { LO myAgg = vertex2AggId[lnode]; if (procWinner[lnode] == myPid) { aggToRowMap[ aggStart[myAgg] + numDofs[myAgg] ] = ComputeGlobalDOF(nodeGlobalElts[lnode]); ++(numDofs[myAgg]); } } } else { for (LO lnode = 0; lnode < size; ++lnode) { LO myAgg = vertex2AggId[lnode]; if (procWinner[lnode] == myPid) { GO gnodeid = nodeGlobalElts[lnode]; for (LocalOrdinal k = 0; k < stridedblocksize_; k++) { GlobalOrdinal gDofIndex = ComputeGlobalDOF(gnodeid,k); if (columnMap_->isNodeGlobalElement(gDofIndex)) { aggToRowMap[ aggStart[myAgg] + numDofs[myAgg] ] = gDofIndex; ++(numDofs[myAgg]); } } } } } // todo plausibility check: entry numDofs[k] == aggToRowMap[k].size() } //UnamalgamateAggregates
void AmalgamationInfo<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::UnamalgamateAggregatesLO(const Aggregates& aggregates, Teuchos::ArrayRCP<LO>& aggStart, Teuchos::ArrayRCP<LO>& aggToRowMap) const { int myPid = aggregates.GetMap()->getComm()->getRank(); Teuchos::ArrayView<const GO> nodeGlobalElts = aggregates.GetMap()->getNodeElementList(); Teuchos::ArrayRCP<LO> procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); Teuchos::ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); const GO numAggregates = aggregates.GetNumAggregates(); // FIXME: Do we need to compute size here? Or can we use existing? LO size = procWinner.size(); std::vector<LO> sizes(numAggregates); if (stridedblocksize_ == 1) { for (LO lnode = 0; lnode < size; lnode++) if (procWinner[lnode] == myPid) sizes[vertex2AggId[lnode]]++; } else { for (LO lnode = 0; lnode < size; lnode++) if (procWinner[lnode] == myPid) { GO nodeGID = nodeGlobalElts[lnode]; for (LO k = 0; k < stridedblocksize_; k++) { GO GID = ComputeGlobalDOF(nodeGID, k); if (columnMap_->isNodeGlobalElement(GID)) sizes[vertex2AggId[lnode]]++; } } } aggStart = ArrayRCP<LO>(numAggregates+1); // FIXME: useless initialization with zeros aggStart[0] = 0; for (GO i = 0; i < numAggregates; i++) aggStart[i+1] = aggStart[i] + sizes[i]; aggToRowMap = ArrayRCP<LO>(aggStart[numAggregates], 0); // count, how many dofs have been recorded for each aggregate so far Array<LO> numDofs(numAggregates, 0); // empty array with number of DOFs for each aggregate if (stridedblocksize_ == 1) { for (LO lnode = 0; lnode < size; ++lnode) if (procWinner[lnode] == myPid) { LO myAgg = vertex2AggId[lnode]; aggToRowMap[aggStart[myAgg] + numDofs[myAgg]] = lnode; numDofs[myAgg]++; } } else { for (LO lnode = 0; lnode < size; ++lnode) if (procWinner[lnode] == myPid) { LO myAgg = vertex2AggId[lnode]; GO nodeGID = nodeGlobalElts[lnode]; for (LO k = 0; k < stridedblocksize_; k++) { GO GID = ComputeGlobalDOF(nodeGID, k); if (columnMap_->isNodeGlobalElement(GID)) { aggToRowMap[aggStart[myAgg] + numDofs[myAgg]] = lnode*stridedblocksize_ + k; numDofs[myAgg]++; } } } } // todo plausibility check: entry numDofs[k] == aggToRowMap[k].size() } //UnamalgamateAggregates
void AggregationPhase2aAlgorithm<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::BuildAggregates(const ParameterList& params, const GraphBase& graph, Aggregates& aggregates, std::vector<unsigned>& aggStat, LO& numNonAggregatedNodes) const { Monitor m(*this, "BuildAggregates"); LO minNodesPerAggregate = params.get<LO>("aggregation: min agg size"); LO maxNodesPerAggregate = params.get<LO>("aggregation: max agg size"); const LO numRows = graph.GetNodeNumVertices(); const int myRank = graph.GetComm()->getRank(); ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); ArrayRCP<LO> procWinner = aggregates.GetProcWinner() ->getDataNonConst(0); LO numLocalAggregates = aggregates.GetNumAggregates(); LO numLocalNodes = procWinner.size(); LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes; const double aggFactor = 0.5; double factor = as<double>(numLocalAggregated)/(numLocalNodes+1); factor = pow(factor, aggFactor); int aggIndex = -1; size_t aggSize = 0; std::vector<int> aggList(graph.getNodeMaxNumRowEntries()); for (LO rootCandidate = 0; rootCandidate < numRows; rootCandidate++) { if (aggStat[rootCandidate] != READY) continue; aggSize = 0; ArrayView<const LocalOrdinal> neighOfINode = graph.getNeighborVertices(rootCandidate); LO numNeighbors = 0; for (int j = 0; j < neighOfINode.size(); j++) { LO neigh = neighOfINode[j]; if (neigh != rootCandidate) { if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] == READY) { // If aggregate size does not exceed max size, add node to the tentative aggregate // NOTE: We do not exit the loop over all neighbours since we have still // to count all aggregated neighbour nodes for the aggregation criteria // NOTE: We check here for the maximum aggregation size. If we would do it below // with all the other check too big aggregates would not be accepted at all. if (aggSize < as<size_t>(maxNodesPerAggregate)) aggList[aggSize++] = neigh; } numNeighbors++; } } // NOTE: ML uses a hardcoded value 3 instead of MinNodesPerAggregate if (aggSize > as<size_t>(minNodesPerAggregate) && aggSize > factor*numNeighbors) { // Accept new aggregate // rootCandidate becomes the root of the newly formed aggregate aggregates.SetIsRoot(rootCandidate); aggIndex = numLocalAggregates++; for (size_t k = 0; k < aggSize; k++) { aggStat [aggList[k]] = AGGREGATED; vertex2AggId[aggList[k]] = aggIndex; procWinner [aggList[k]] = myRank; } numNonAggregatedNodes -= aggSize; } } // update aggregate object aggregates.SetNumAggregates(numLocalAggregates); }
void LeftoverAggregationAlgorithm<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::AggregateLeftovers(GraphBase const &graph, Aggregates &aggregates) const { Monitor m(*this, "AggregateLeftovers"); my_size_t nVertices = graph.GetNodeNumVertices(); int exp_nRows = aggregates.GetMap()->getNodeNumElements(); // Tentative fix... was previously exp_nRows = nVertices + graph.GetNodeNumGhost(); int myPid = graph.GetComm()->getRank(); my_size_t nAggregates = aggregates.GetNumAggregates(); int minNodesPerAggregate = GetMinNodesPerAggregate(); const RCP<const Map> nonUniqueMap = aggregates.GetMap(); //column map of underlying graph const RCP<const Map> uniqueMap = graph.GetDomainMap(); MueLu::CoupledAggregationCommHelper<LO,GO,NO,LMO> myWidget(uniqueMap, nonUniqueMap); //TODO JJH We want to skip this call RCP<Xpetra::Vector<double,LO,GO,NO> > distWeights = Xpetra::VectorFactory<double,LO,GO,NO>::Build(nonUniqueMap); // Aggregated vertices not "definitively" assigned to processors are // arbitrated by ArbitrateAndCommunicate(). There is some // additional logic to prevent losing root nodes in arbitration. { ArrayRCP<const LO> vertex2AggId = aggregates.GetVertex2AggId()->getData(0); ArrayRCP<const LO> procWinner = aggregates.GetProcWinner()->getData(0); ArrayRCP<double> weights = distWeights->getDataNonConst(0); for (size_t i=0;i<nonUniqueMap->getNodeNumElements();i++) { if (procWinner[i] == MUELU_UNASSIGNED) { if (vertex2AggId[i] != MUELU_UNAGGREGATED) { weights[i] = 1.; if (aggregates.IsRoot(i)) weights[i] = 2.; } } } // views on distributed vectors are freed here. } //TODO JJH We want to skip this call myWidget.ArbitrateAndCommunicate(*distWeights, aggregates, true); // All tentatively assigned vertices are now definitive // Tentatively assign any vertex (ghost or local) which neighbors a root // to the aggregate associated with the root. { ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); ArrayRCP<const LO> procWinner = aggregates.GetProcWinner()->getData(0); ArrayRCP<double> weights = distWeights->getDataNonConst(0); for (my_size_t i = 0; i < nVertices; i++) { if ( aggregates.IsRoot(i) && (procWinner[i] == myPid) ) { // neighOfINode is the neighbor node list of node 'i'. ArrayView<const LO> neighOfINode = graph.getNeighborVertices(i); for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int colj = *it; if (vertex2AggId[colj] == MUELU_UNAGGREGATED) { weights[colj]= 1.; vertex2AggId[colj] = vertex2AggId[i]; } } } } // views on distributed vectors are freed here. } //TODO JJH We want to skip this call myWidget.ArbitrateAndCommunicate(*distWeights, aggregates, true); // All tentatively assigned vertices are now definitive // Record the number of aggregated vertices GO total_phase_one_aggregated = 0; { ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); GO phase_one_aggregated = 0; for (my_size_t i = 0; i < nVertices; i++) { if (vertex2AggId[i] != MUELU_UNAGGREGATED) phase_one_aggregated++; } sumAll(graph.GetComm(), phase_one_aggregated, total_phase_one_aggregated); GO local_nVertices = nVertices, total_nVertices = 0; sumAll(graph.GetComm(), local_nVertices, total_nVertices); /* Among unaggregated points, see if we can make a reasonable size */ /* aggregate out of it. We do this by looking at neighbors and seeing */ /* how many are unaggregated and on my processor. Loosely, */ /* base the number of new aggregates created on the percentage of */ /* unaggregated nodes. */ ArrayRCP<double> weights = distWeights->getDataNonConst(0); double factor = 1.; factor = ((double) total_phase_one_aggregated)/((double)(total_nVertices + 1)); factor = pow(factor, GetPhase3AggCreation()); for (my_size_t i = 0; i < nVertices; i++) { if (vertex2AggId[i] == MUELU_UNAGGREGATED) { // neighOfINode is the neighbor node list of node 'iNode'. ArrayView<const LO> neighOfINode = graph.getNeighborVertices(i); int rowi_N = neighOfINode.size(); int nonaggd_neighbors = 0; for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int colj = *it; if (vertex2AggId[colj] == MUELU_UNAGGREGATED && colj < nVertices) nonaggd_neighbors++; } if ( (nonaggd_neighbors > minNodesPerAggregate) && (((double) nonaggd_neighbors)/((double) rowi_N) > factor)) { vertex2AggId[i] = (nAggregates)++; for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int colj = *it; if (vertex2AggId[colj]==MUELU_UNAGGREGATED) { vertex2AggId[colj] = vertex2AggId[i]; if (colj < nVertices) weights[colj] = 2.; else weights[colj] = 1.; } } aggregates.SetIsRoot(i); weights[i] = 2.; } } } // for (i = 0; i < nVertices; i++) // views on distributed vectors are freed here. } //TODO JJH We want to skip this call myWidget.ArbitrateAndCommunicate(*distWeights, aggregates, true); //All tentatively assigned vertices are now definitive if (IsPrint(Statistics1)) { GO Nphase1_agg = nAggregates; GO total_aggs; sumAll(graph.GetComm(), Nphase1_agg, total_aggs); GetOStream(Statistics1, 0) << "Phase 1 - nodes aggregated = " << total_phase_one_aggregated << std::endl; GetOStream(Statistics1, 0) << "Phase 1 - total aggregates = " << total_aggs << std::endl; GO i = nAggregates - Nphase1_agg; { GO ii; sumAll(graph.GetComm(),i,ii); i = ii; } GetOStream(Statistics1, 0) << "Phase 3 - additional aggregates = " << i << std::endl; } // Determine vertices that are not shared by setting Temp to all ones // and doing NonUnique2NonUnique(..., ADD). This sums values of all // local copies associated with each Gid. Thus, sums > 1 are shared. // std::cout << "exp_nrows=" << exp_nRows << " (nVertices= " << nVertices << ", numGhost=" << graph.GetNodeNumGhost() << ")" << std::endl; // std::cout << "nonUniqueMap=" << nonUniqueMap->getNodeNumElements() << std::endl; RCP<Xpetra::Vector<double,LO,GO,NO> > temp_ = Xpetra::VectorFactory<double,LO,GO,NO> ::Build(nonUniqueMap,false); //no need to zero out vector in ctor temp_->putScalar(1.); RCP<Xpetra::Vector<double,LO,GO,NO> > tempOutput_ = Xpetra::VectorFactory<double,LO,GO,NO> ::Build(nonUniqueMap); myWidget.NonUnique2NonUnique(*temp_, *tempOutput_, Xpetra::ADD); std::vector<bool> gidNotShared(exp_nRows); { ArrayRCP<const double> tempOutput = tempOutput_->getData(0); for (int i = 0; i < exp_nRows; i++) { if (tempOutput[i] > 1.) gidNotShared[i] = false; else gidNotShared[i] = true; } } // Phase 4. double nAggregatesTarget; nAggregatesTarget = ((double) uniqueMap->getGlobalNumElements())* (((double) uniqueMap->getGlobalNumElements())/ ((double) graph.GetGlobalNumEdges())); GO nAggregatesLocal=nAggregates, nAggregatesGlobal; sumAll(graph.GetComm(), nAggregatesLocal, nAggregatesGlobal); LO minNAggs; minAll(graph.GetComm(), nAggregates, minNAggs); LO maxNAggs; maxAll(graph.GetComm(), nAggregates, maxNAggs); // // Only do this phase if things look really bad. THIS // CODE IS PRETTY EXPERIMENTAL // #define MUELU_PHASE4BUCKETS 6 if ((nAggregatesGlobal < graph.GetComm()->getSize()) && (2.5*nAggregatesGlobal < nAggregatesTarget) && (minNAggs ==0) && (maxNAggs <= 1)) { // Modify seed of the random algorithm used by temp_->randomize() { typedef Teuchos::ScalarTraits<double> scalarTrait; // temp_ is of type double. scalarTrait::seedrandom(static_cast<unsigned int>(myPid*2 + (int) (11*scalarTrait::random()))); int k = (int)ceil( (10.*myPid)/graph.GetComm()->getSize()); for (int i = 0; i < k+7; i++) scalarTrait::random(); temp_->setSeed(static_cast<unsigned int>(scalarTrait::random())); } temp_->randomize(); ArrayRCP<double> temp = temp_->getDataNonConst(0); // build a list of candidate root nodes (vertices not adjacent // to aggregated vertices) my_size_t nCandidates = 0; global_size_t nCandidatesGlobal; ArrayRCP<LO> candidates = Teuchos::arcp<LO>(nVertices+1); double priorThreshold = 0.; for (int kkk = 0; kkk < MUELU_PHASE4BUCKETS; kkk++) { { ArrayRCP<const LO> vertex2AggId = aggregates.GetVertex2AggId()->getData(0); ArrayView<const LO> vertex2AggIdView = vertex2AggId(); RootCandidates(nVertices, vertex2AggIdView, graph, candidates, nCandidates, nCandidatesGlobal); // views on distributed vectors are freed here. } double nTargetNewGuys = nAggregatesTarget - nAggregatesGlobal; double threshold = priorThreshold + (1. - priorThreshold)*nTargetNewGuys/(nCandidatesGlobal + .001); threshold = (threshold*(kkk+1.))/((double) MUELU_PHASE4BUCKETS); priorThreshold = threshold; { ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); ArrayRCP<double> weights = distWeights->getDataNonConst(0); for (int k = 0; k < nCandidates; k++ ) { int i = candidates[k]; if ((vertex2AggId[i] == MUELU_UNAGGREGATED) && (fabs(temp[i]) < threshold)) { // Note: priorThreshold <= fabs(temp[i]) <= 1 // neighOfINode is the neighbor node list of node 'iNode'. ArrayView<const LO> neighOfINode = graph.getNeighborVertices(i); if (neighOfINode.size() > minNodesPerAggregate) { //TODO: check if this test is exactly was we want to do int count = 0; for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int Adjacent = *it; // This might not be true if someone close to i // is chosen as a root via fabs(temp[]) < Threshold if (vertex2AggId[Adjacent] == MUELU_UNAGGREGATED){ count++; vertex2AggId[Adjacent] = nAggregates; weights[Adjacent] = 1.; } } if (count >= minNodesPerAggregate) { vertex2AggId[i] = nAggregates++; weights[i] = 2.; aggregates.SetIsRoot(i); } else { // undo things for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int Adjacent = *it; if (vertex2AggId[Adjacent] == nAggregates){ vertex2AggId[Adjacent] = MUELU_UNAGGREGATED; weights[Adjacent] = 0.; } } } } } } // views on distributed vectors are freed here. } //TODO JJH We want to skip this call myWidget.ArbitrateAndCommunicate(*distWeights, aggregates, true); // All tentatively assigned vertices are now definitive nAggregatesLocal=nAggregates; sumAll(graph.GetComm(), nAggregatesLocal, nAggregatesGlobal); // check that there are no aggregates sizes below minNodesPerAggregate aggregates.SetNumAggregates(nAggregates); RemoveSmallAggs(aggregates, minNodesPerAggregate, distWeights, myWidget); nAggregates = aggregates.GetNumAggregates(); } // one possibility } // Initialize things for Phase 5. This includes building the transpose // of the matrix ONLY for transposed rows that correspond to unaggregted // ghost vertices. Further, the transpose is only a local transpose. // Nonzero edges which exist on other processors are not represented. int observedNAgg=-1; //number of aggregates that contain vertices on this process { ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); ArrayRCP<const LO> procWinner = aggregates.GetProcWinner()->getData(0); for(LO k = 0; k < vertex2AggId.size(); ++k ) if(vertex2AggId[k]>observedNAgg) observedNAgg=vertex2AggId[k]; observedNAgg++; } ArrayRCP<int> Mark = Teuchos::arcp<int>(exp_nRows+1); ArrayRCP<int> agg_incremented = Teuchos::arcp<int>(observedNAgg); ArrayRCP<int> SumOfMarks = Teuchos::arcp<int>(observedNAgg); for (int i = 0; i < exp_nRows; i++) Mark[i] = MUELU_DISTONE_VERTEX_WEIGHT; for (int i = 0; i < agg_incremented.size(); i++) agg_incremented[i] = 0; for (int i = 0; i < SumOfMarks.size(); i++) SumOfMarks[i] = 0; // Grab the transpose matrix graph for unaggregated ghost vertices. // a) count the number of nonzeros per row in the transpose std::vector<int> RowPtr(exp_nRows+1-nVertices); //{ ArrayRCP<const LO> vertex2AggIdCst = aggregates.GetVertex2AggId()->getData(0); for (int i = nVertices; i < exp_nRows; i++) RowPtr[i-nVertices] = 0; for (int i = 0; i < nVertices; i++) { // neighOfINode is the neighbor node list of node 'iNode'. ArrayView<const LO> neighOfINode = graph.getNeighborVertices(i); for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int j = *it; if ( (j >= nVertices) && (vertex2AggIdCst[j] == MUELU_UNAGGREGATED)){ RowPtr[j-nVertices]++; } } } // b) Convert RowPtr[i] to point to 1st first nnz spot in row i. int iSum = 0, iTemp; for (int i = nVertices; i < exp_nRows; i++) { iTemp = RowPtr[i-nVertices]; RowPtr[i-nVertices] = iSum; iSum += iTemp; } RowPtr[exp_nRows-nVertices] = iSum; std::vector<LO> cols(iSum+1); // c) Traverse matrix and insert entries in proper location. for (int i = 0; i < nVertices; i++) { // neighOfINode is the neighbor node list of node 'iNode'. ArrayView<const LO> neighOfINode = graph.getNeighborVertices(i); for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int j = *it; if ( (j >= nVertices) && (vertex2AggIdCst[j] == MUELU_UNAGGREGATED)){ cols[RowPtr[j-nVertices]++] = i; } } } // d) RowPtr[i] points to beginning of row i+1 so shift by one location. for (int i = exp_nRows; i > nVertices; i--) RowPtr[i-nVertices] = RowPtr[i-1-nVertices]; RowPtr[0] = 0; // views on distributed vectors are freed here. vertex2AggIdCst = Teuchos::null; //} int bestScoreCutoff; int thresholds[10] = {300,200,100,50,25,13,7,4,2,0}; // Stick unaggregated vertices into existing aggregates as described above. { int ncalls=0; for (int kk = 0; kk < 10; kk += 2) { bestScoreCutoff = thresholds[kk]; ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); ArrayRCP<const LO> procWinner = aggregates.GetProcWinner()->getData(0); ArrayRCP<double> weights = distWeights->getDataNonConst(0); for (int i = 0; i < exp_nRows; i++) { if (vertex2AggId[i] == MUELU_UNAGGREGATED) { // neighOfINode is the neighbor node list of node 'iNode'. ArrayView<const LO> neighOfINode; // Grab neighboring vertices which is either in graph for local ids // or sits in transposed fragment just constructed above for ghosts. if (i < nVertices) { neighOfINode = graph.getNeighborVertices(i); } else { LO *rowi_col = NULL, rowi_N; rowi_col = &(cols[RowPtr[i-nVertices]]); rowi_N = RowPtr[i+1-nVertices] - RowPtr[i-nVertices]; neighOfINode = ArrayView<const LO>(rowi_col, rowi_N); } for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int Adjacent = *it; int AdjacentAgg = vertex2AggId[Adjacent]; //Adjacent is aggregated and either I own the aggregate // or I could own the aggregate after arbitration. if ((AdjacentAgg != MUELU_UNAGGREGATED) && ((procWinner[Adjacent] == myPid) || (procWinner[Adjacent] == MUELU_UNASSIGNED))){ SumOfMarks[AdjacentAgg] += Mark[Adjacent]; } } int best_score = MUELU_NOSCORE; int best_agg = -1; int BestMark = -1; bool cannotLoseAllFriends=false; // Used to address possible loss of vertices in arbitration of shared nodes discussed above. (Initialized to false only to avoid a compiler warning). for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int Adjacent = *it; int AdjacentAgg = vertex2AggId[Adjacent]; //Adjacent is unaggregated, has some value and no //other processor has definitively claimed him if ((AdjacentAgg != MUELU_UNAGGREGATED) && (SumOfMarks[AdjacentAgg] != 0) && ((procWinner[Adjacent] == myPid) || (procWinner[Adjacent] == MUELU_UNASSIGNED ))) { // first figure out the penalty associated with // AdjacentAgg having already been incremented // during this phase, then compute score. double penalty = (double) (INCR_SCALING*agg_incremented[AdjacentAgg]); if (penalty > MUELU_PENALTYFACTOR*((double)SumOfMarks[AdjacentAgg])) penalty = MUELU_PENALTYFACTOR*((double)SumOfMarks[AdjacentAgg]); int score = SumOfMarks[AdjacentAgg]- ((int) floor(penalty)); if (score > best_score) { best_agg = AdjacentAgg; best_score = score; BestMark = Mark[Adjacent]; cannotLoseAllFriends = false; // This address issue mentioned above by checking whether // Adjacent could be lost in arbitration. weight==0 means that // Adjacent was not set during this loop of Phase 5 (and so it // has already undergone arbitration). GidNotShared == true // obviously implies that Adjacent cannot be lost to arbitration if ((weights[Adjacent]== 0.) || (gidNotShared[Adjacent] == true)) cannotLoseAllFriends = true; } // Another vertex within current best aggregate found. // We should have (best_score == score). We need to see // if we can improve BestMark and cannotLoseAllFriends. else if (best_agg == AdjacentAgg) { if ((weights[Adjacent]== 0.) || (gidNotShared[Adjacent] == true)) cannotLoseAllFriends = true; if (Mark[Adjacent] > BestMark) BestMark = Mark[Adjacent]; } } } // Clean up for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int Adjacent = *it; int AdjacentAgg = vertex2AggId[Adjacent]; if (AdjacentAgg >= 0) SumOfMarks[AdjacentAgg] = 0; } // Tentatively assign vertex to best_agg. if ( (best_score >= bestScoreCutoff) && (cannotLoseAllFriends)) { TEUCHOS_TEST_FOR_EXCEPTION(best_agg == -1 || BestMark == -1, MueLu::Exceptions::RuntimeError, "MueLu::CoupledAggregationFactory internal error"); // should never happen vertex2AggId[i] = best_agg; weights[i] = best_score; agg_incremented[best_agg]++; Mark[i] = (int) ceil( ((double) BestMark)/2.); } } // views on distributed vectors are freed here. } vertex2AggId = Teuchos::null; procWinner = Teuchos::null; weights = Teuchos::null; ++ncalls; //TODO JJH We want to skip this call myWidget.ArbitrateAndCommunicate(*distWeights, aggregates, true); // All tentatively assigned vertices are now definitive } // if (graph.GetComm()->getRank()==0) // std::cout << "#calls to Arb&Comm=" << ncalls << std::endl; } // Phase 6: Aggregate remain unaggregated vertices and try at all costs // to avoid small aggregates. // One case where we can find ourselves in this situation // is if all vertices vk adjacent to v have already been // put in other processor's aggregates and v does not have // a direct connection to a local vertex in any of these // aggregates. int Nleftover = 0, Nsingle = 0; { ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); ArrayRCP<double> weights = distWeights->getDataNonConst(0); ArrayRCP<const LO> procWinner = aggregates.GetProcWinner()->getData(0); int count = 0; for (my_size_t i = 0; i < nVertices; i++) { if (vertex2AggId[i] == MUELU_UNAGGREGATED) { Nleftover++; // neighOfINode is the neighbor node list of node 'iNode'. ArrayView<const LO> neighOfINode = graph.getNeighborVertices(i); // We don't want too small of an aggregate. So lets see if there is an // unaggregated neighbor that we can also put with this vertex vertex2AggId[i] = nAggregates; weights[i] = 1.; if (count == 0) aggregates.SetIsRoot(i); count++; for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int j = *it; if ((j != i)&&(vertex2AggId[j] == MUELU_UNAGGREGATED)&& (j < nVertices)) { vertex2AggId[j] = nAggregates; weights[j] = 1.; count++; } } if ( count >= minNodesPerAggregate) { nAggregates++; count = 0; } } } // We have something which is under minNodesPerAggregate when if (count != 0) { #ifdef FIXME // Can stick small aggregate with 0th aggregate? if (nAggregates > 0) { for (my_size_t i = 0; i < nVertices; i++) { if ((vertex2AggId[i] == nAggregates) && (procWinner[i] == myPid)) { vertex2AggId[i] = 0; aggregates.SetIsRoot(i,false); } } } else { Nsingle++; nAggregates++; } #else // Can stick small aggregate with 0th aggregate? if (nAggregates > 0) { for (my_size_t i = 0; i < nVertices; i++) { // TW: This is not a real fix. This may produce ugly bad aggregates! // I removed the procWinner[i] == myPid check. it makes no sense to me since // it leaves vertex2AggId[i] == nAggregates -> crash in ComputeAggregateSizes(). // Maybe it's better to add the leftovers to the last generated agg on the current proc. // The best solution would be to add them to the "next"/nearest aggregate, that may be // on an other processor if (vertex2AggId[i] == nAggregates) { vertex2AggId[i] = nAggregates-1; //0; aggregates.SetIsRoot(i,false); } } } else { Nsingle++; nAggregates++; } #endif } // views on distributed vectors are freed here. } //TODO JJH We want to skip this call myWidget.ArbitrateAndCommunicate(*distWeights, aggregates, false); if (IsPrint(Statistics1)) { GO total_Nsingle=0; sumAll(graph.GetComm(), (GO)Nsingle, total_Nsingle); GO total_Nleftover=0; sumAll(graph.GetComm(), (GO)Nleftover, total_Nleftover); // GO total_aggs; sumAll(graph.GetComm(), (GO)nAggregates, total_aggs); // GetOStream(Statistics1, 0) << "Phase 6 - total aggregates = " << total_aggs << std::endl; GetOStream(Statistics1, 0) << "Phase 6 - leftovers = " << total_Nleftover << " and singletons = " << total_Nsingle << std::endl; } aggregates.SetNumAggregates(nAggregates); } //AggregateLeftovers