std::string MHDRAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::PrintLoadBalancingInfo(const Matrix & Ac, const std::string & msgTag) { std::stringstream ss(std::stringstream::out); // TODO: provide a option to skip this (to avoid global communication) // TODO: skip if nproc == 1 //nonzero imbalance size_t numMyNnz = Ac.getNodeNumEntries(); GO maxNnz, minNnz; RCP<const Teuchos::Comm<int> > comm = Ac.getRowMap()->getComm(); maxAll(comm,(GO)numMyNnz,maxNnz); //min nnz over all proc (disallow any processors with 0 nnz) minAll(comm, (GO)((numMyNnz > 0) ? numMyNnz : maxNnz), minNnz); double imbalance = ((double) maxNnz) / minNnz; size_t numMyRows = Ac.getNodeNumRows(); //Check whether Ac is spread over more than one process. GO numActiveProcesses=0; sumAll(comm, (GO)((numMyRows > 0) ? 1 : 0), numActiveProcesses); //min, max, and avg # rows per proc GO minNumRows, maxNumRows; double avgNumRows; maxAll(comm, (GO)numMyRows, maxNumRows); minAll(comm, (GO)((numMyRows > 0) ? numMyRows : maxNumRows), minNumRows); assert(numActiveProcesses > 0); avgNumRows = Ac.getGlobalNumRows() / numActiveProcesses; ss << msgTag << " # processes with rows = " << numActiveProcesses << std::endl; ss << msgTag << " min # rows per proc = " << minNumRows << ", max # rows per proc = " << maxNumRows << ", avg # rows per proc = " << avgNumRows << std::endl; ss << msgTag << " nonzero imbalance = " << imbalance << std::endl; return ss.str(); }
Max7219::Max7219(byte dataInPin, byte loadPin, byte clockPin, byte numMax) : m_dataInPin(dataInPin), m_loadPin(loadPin), m_clockPin(clockPin), m_numMax(numMax) #ifdef SUPPORT_PERCENTAGE , m_percentMaxValue(100) , m_percentLastValue(0) #endif #ifdef SUPPORT_SCROLLING ,m_scrollText(0), m_scrollIndex(0), m_currScrollPixRowCol(0), m_inverseScroll(false) #endif { pinMode(m_dataInPin, OUTPUT); pinMode(m_clockPin, OUTPUT); pinMode(m_loadPin, OUTPUT); digitalWrite(13, HIGH); // initiating the max 7219 maxAll(max7219_reg_scanLimit, 0x07); maxAll(max7219_reg_decodeMode, 0x00); // using an led matrix (not digits) maxAll(max7219_reg_shutdown, 0x01); // not in shutdown mode maxAll(max7219_reg_displayTest, 0x00); // no display test for(byte e = 1; e <= 8; e++) // empty registers, turn all LEDs off maxAll(e, 0); setIntensity(15); } // ctor
void max72_setup () { pinMode(dataIn, OUTPUT); pinMode(clock, OUTPUT); pinMode(load, OUTPUT); //beginSerial(9600); digitalWrite(13, HIGH); //initiation of the max 7219 maxAll(max7219_reg_scanLimit, 0x07); maxAll(max7219_reg_decodeMode, 0x00); // using an led matrix (not digits) maxAll(max7219_reg_shutdown, 0x01); // not in shutdown mode maxAll(max7219_reg_displayTest, 0x00); // no display test for (e=1; e<=8; e++) { // empty registers, turn all LEDs off maxAll(e,0); } maxAll(max7219_reg_intensity, 0x0f & 0x0f); // the first 0x0f is the value you can set // range: 0x00 to 0x0f }
// Reset void IRCar_DotMatrix_Reset(Reset_t Reset) { if (RESET_INIT == Reset) { pinMode(IO_OUT_SPI_DATA, OUTPUT); pinMode(IO_OUT_SPI_LOAD, OUTPUT); pinMode(IO_OUT_SPI_CLK, OUTPUT); //initiation of the max 7219 maxAll(max7219_reg_scanLimit, 0x07); maxAll(max7219_reg_decodeMode, 0x00); // using an led matrix (not digits) maxAll(max7219_reg_shutdown, 0x01); // not in shutdown mode maxAll(max7219_reg_displayTest, 0x00); // no display test maxAll(max7219_reg_intensity, 0x0f & 0x0f); // the first 0x0f is the value you can set range: 0x00 to 0x0f // empty registers, turn all LEDs off for (U8 e=1; e<=8; e++) { maxAll(e, 0); } } if (RESET_NONE != Reset) { DotMatrix_Cmd(); } else { // Update Global Time IRCar_TimeMs = millis(); // Restart TimeOut for 5 sec DotMatrix_Timer_Duration = 5*1000UL; DotMatrix_TimerON = IRCar_TimeMs; // no null Timer => reserved for stopped state if (!DotMatrix_TimerON) DotMatrix_TimerON = 1; } }
void RebalanceTransferFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level& fineLevel, Level& coarseLevel) const { FactoryMonitor m(*this, "Build", coarseLevel); const ParameterList& pL = GetParameterList(); int implicit = !pL.get<bool>("repartition: rebalance P and R"); int writeStart = pL.get<int> ("write start"); int writeEnd = pL.get<int> ("write end"); if (writeStart == 0 && fineLevel.GetLevelID() == 0 && writeStart <= writeEnd && IsAvailable(fineLevel, "Coordinates")) { std::string fileName = "coordinates_level_0.m"; RCP<MultiVector> fineCoords = fineLevel.Get< RCP<MultiVector> >("Coordinates"); if (fineCoords != Teuchos::null) Utils::Write(fileName, *fineCoords); } RCP<const Import> importer = Get<RCP<const Import> >(coarseLevel, "Importer"); if (implicit) { // Save the importer, we'll need it for solve coarseLevel.Set("Importer", importer, NoFactory::get()); } RCP<ParameterList> params = rcp(new ParameterList());; params->set("printLoadBalancingInfo", true); params->set("printCommInfo", true); std::string transferType = pL.get<std::string>("type"); if (transferType == "Interpolation") { RCP<Matrix> originalP = Get< RCP<Matrix> >(coarseLevel, "P"); { // This line must be after the Get call SubFactoryMonitor m1(*this, "Rebalancing prolongator", coarseLevel); if (implicit || importer.is_null()) { GetOStream(Runtime0) << "Using original prolongator" << std::endl; Set(coarseLevel, "P", originalP); } else { // P is the transfer operator from the coarse grid to the fine grid. // P must transfer the data from the newly reordered coarse A to the // (unchanged) fine A. This means that the domain map (coarse) of P // must be changed according to the new partition. The range map // (fine) is kept unchanged. // // The domain map of P must match the range map of R. See also note // below about domain/range map of R and its implications for P. // // To change the domain map of P, P needs to be fillCompleted again // with the new domain map. To achieve this, P is copied into a new // matrix that is not fill-completed. The doImport() operation is // just used here to make a copy of P: the importer is trivial and // there is no data movement involved. The reordering actually // happens during the fillComplete() with domainMap == importer->getTargetMap(). RCP<Matrix> rebalancedP = originalP; RCP<const CrsMatrixWrap> crsOp = rcp_dynamic_cast<const CrsMatrixWrap>(originalP); TEUCHOS_TEST_FOR_EXCEPTION(crsOp == Teuchos::null, Exceptions::BadCast, "Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed"); RCP<CrsMatrix> rebalancedP2 = crsOp->getCrsMatrix(); TEUCHOS_TEST_FOR_EXCEPTION(rebalancedP2 == Teuchos::null, std::runtime_error, "Xpetra::CrsMatrixWrap doesn't have a CrsMatrix"); { SubFactoryMonitor subM(*this, "Rebalancing prolongator -- fast map replacement", coarseLevel); RCP<const Import> newImporter = ImportFactory::Build(importer->getTargetMap(), rebalancedP->getColMap()); rebalancedP2->replaceDomainMapAndImporter(importer->getTargetMap(), newImporter); } ///////////////////////// EXPERIMENTAL // TODO FIXME somehow we have to transfer the striding information of the permuted domain/range maps. // That is probably something for an external permutation factory // if (originalP->IsView("stridedMaps")) // rebalancedP->CreateView("stridedMaps", originalP); ///////////////////////// EXPERIMENTAL Set(coarseLevel, "P", rebalancedP); if (IsPrint(Statistics1)) GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*rebalancedP, "P (rebalanced)", params); } } if (importer.is_null()) { if (IsAvailable(coarseLevel, "Nullspace")) Set(coarseLevel, "Nullspace", Get<RCP<MultiVector> >(coarseLevel, "Nullspace")); if (pL.isParameter("Coordinates") && pL.get< RCP<const FactoryBase> >("Coordinates") != Teuchos::null) if (IsAvailable(coarseLevel, "Coordinates")) Set(coarseLevel, "Coordinates", Get< RCP<MultiVector> >(coarseLevel, "Coordinates")); return; } if (pL.isParameter("Coordinates") && pL.get< RCP<const FactoryBase> >("Coordinates") != Teuchos::null && IsAvailable(coarseLevel, "Coordinates")) { RCP<MultiVector> coords = Get<RCP<MultiVector> >(coarseLevel, "Coordinates"); // This line must be after the Get call SubFactoryMonitor subM(*this, "Rebalancing coordinates", coarseLevel); LO nodeNumElts = coords->getMap()->getNodeNumElements(); // If a process has no matrix rows, then we can't calculate blocksize using the formula below. LO myBlkSize = 0, blkSize = 0; if (nodeNumElts > 0) myBlkSize = importer->getSourceMap()->getNodeNumElements() / nodeNumElts; maxAll(coords->getMap()->getComm(), myBlkSize, blkSize); RCP<const Import> coordImporter; if (blkSize == 1) { coordImporter = importer; } else { // NOTE: there is an implicit assumption here: we assume that dof any node are enumerated consequently // Proper fix would require using decomposition similar to how we construct importer in the // RepartitionFactory RCP<const Map> origMap = coords->getMap(); GO indexBase = origMap->getIndexBase(); ArrayView<const GO> OEntries = importer->getTargetMap()->getNodeElementList(); LO numEntries = OEntries.size()/blkSize; ArrayRCP<GO> Entries(numEntries); for (LO i = 0; i < numEntries; i++) Entries[i] = (OEntries[i*blkSize]-indexBase)/blkSize + indexBase; RCP<const Map> targetMap = MapFactory::Build(origMap->lib(), origMap->getGlobalNumElements(), Entries(), indexBase, origMap->getComm()); coordImporter = ImportFactory::Build(origMap, targetMap); } RCP<MultiVector> permutedCoords = MultiVectorFactory::Build(coordImporter->getTargetMap(), coords->getNumVectors()); permutedCoords->doImport(*coords, *coordImporter, Xpetra::INSERT); if (pL.get<bool>("useSubcomm") == true) permutedCoords->replaceMap(permutedCoords->getMap()->removeEmptyProcesses()); Set(coarseLevel, "Coordinates", permutedCoords); std::string fileName = "rebalanced_coordinates_level_" + toString(coarseLevel.GetLevelID()) + ".m"; if (writeStart <= coarseLevel.GetLevelID() && coarseLevel.GetLevelID() <= writeEnd && permutedCoords->getMap() != Teuchos::null) Utils::Write(fileName, *permutedCoords); } if (IsAvailable(coarseLevel, "Nullspace")) { RCP<MultiVector> nullspace = Get< RCP<MultiVector> >(coarseLevel, "Nullspace"); // This line must be after the Get call SubFactoryMonitor subM(*this, "Rebalancing nullspace", coarseLevel); RCP<MultiVector> permutedNullspace = MultiVectorFactory::Build(importer->getTargetMap(), nullspace->getNumVectors()); permutedNullspace->doImport(*nullspace, *importer, Xpetra::INSERT); if (pL.get<bool>("useSubcomm") == true) permutedNullspace->replaceMap(permutedNullspace->getMap()->removeEmptyProcesses()); Set(coarseLevel, "Nullspace", permutedNullspace); } } else { if (pL.get<bool>("transpose: use implicit") == false) { RCP<Matrix> originalR = Get< RCP<Matrix> >(coarseLevel, "R"); SubFactoryMonitor m2(*this, "Rebalancing restriction", coarseLevel); if (implicit || importer.is_null()) { GetOStream(Runtime0) << "Using original restrictor" << std::endl; Set(coarseLevel, "R", originalR); } else { RCP<Matrix> rebalancedR; { SubFactoryMonitor subM(*this, "Rebalancing restriction -- fusedImport", coarseLevel); RCP<Map> dummy; // meaning: use originalR's domain map. rebalancedR = MatrixFactory::Build(originalR, *importer, dummy, importer->getTargetMap()); } Set(coarseLevel, "R", rebalancedR); ///////////////////////// EXPERIMENTAL // TODO FIXME somehow we have to transfer the striding information of the permuted domain/range maps. // That is probably something for an external permutation factory // if (originalR->IsView("stridedMaps")) // rebalancedR->CreateView("stridedMaps", originalR); ///////////////////////// EXPERIMENTAL if (IsPrint(Statistics1)) GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*rebalancedR, "R (rebalanced)", params); } } } }
void Max7219::setIntensity(byte intensity) const { maxAll(max7219_reg_intensity, intensity bitand 0x0f); // the first 0x0f is the value you can set (range: 0x00 to 0x0f) }
Int_t main(Int_t argc, Char_t *argv[]) { ROOT::Mpi::TEnvironment env(argc, argv); ROOT::Mpi::TIntraCommunicator world; TVectorT<Double_t> mResult; Double_t fScalarResult; TVectorT<Double_t> v1(elements); TVectorT<Double_t> v2(elements); for (Int_t i = 0; i < elements; i++) { v1[i] = i + (i + world.Size()); v2[i] = i * (i + world.Size()); } /////////////////////////////////////////////// //Testing methdos with results in single Rank// /////////////////////////////////////////////// ROOT::Mpi::Math::TVectorTWrapper<Double_t> add(v1); add.Addition(v2, root); ROOT::Mpi::Math::TVectorTWrapper<Double_t> sub(v1); sub.Subtraction(v2, root); ROOT::Mpi::Math::TVectorTWrapper<Double_t> dot(v1); dot.Dot(v2, root); ROOT::Mpi::Math::TVectorTWrapper<Double_t> norm2Sqr(v1); norm2Sqr.Norm2Sqr(root); ROOT::Mpi::Math::TVectorTWrapper<Double_t> norm1(v1); norm1.Norm1(root); ROOT::Mpi::Math::TVectorTWrapper<Double_t> min(v1); min.Min(root); ROOT::Mpi::Math::TVectorTWrapper<Double_t> max(v1); max.Max(root); ROOT::Mpi::Math::TVectorTWrapper<Double_t> normalize(v1); normalize.Normalize(root); ROOT::Mpi::Math::TVectorTWrapper<Double_t> sum(v1); sum.Sum(root); if (world.Rank() == root) { add.GetResult(mResult); MpiCompareTVectorTest(mResult, v1 + v2, world.Rank(), "Vector Addition Single"); sub.GetResult(mResult); MpiCompareTVectorTest(mResult, v1 - v2, world.Rank(), "Vector Subtraction Single"); dot.GetResult(fScalarResult); MpiCompareTest(fScalarResult, Dot(v1, v2) , world.Rank(), "Vector Dot Product Single"); norm2Sqr.GetResult(fScalarResult); MpiCompareTest(fScalarResult, v1.Norm2Sqr() , world.Rank(), "Vector Norm2Sqr Single"); norm1.GetResult(fScalarResult); MpiCompareTest(fScalarResult, v1.Norm1() , world.Rank(), "Vector Norm1 Single"); min.GetResult(fScalarResult); MpiCompareTest(fScalarResult, v1.Min() , world.Rank(), "Vector Min Single"); max.GetResult(fScalarResult); MpiCompareTest(fScalarResult, v1.Max() , world.Rank(), "Vector Max Single"); normalize.GetResult(mResult); MpiCompareTest(mResult.Norm2Sqr(), ((1 / TMath::Sqrt(v1.Norm2Sqr()))*v1).Norm2Sqr() , world.Rank(), "Vector Normalize Single"); sum.GetResult(fScalarResult); MpiCompareTest(fScalarResult, v1.Sum(), world.Rank(), "Vector Sum Single"); } /////////////////////////////////////////////// //Testing methdos with results in all ranks // /////////////////////////////////////////////// ROOT::Mpi::Math::TVectorTWrapper<Double_t> addAll(v1); add.Addition(v2); ROOT::Mpi::Math::TVectorTWrapper<Double_t> subAll(v1); sub.Subtraction(v2); add.GetResult(mResult); MpiCompareTVectorTest(mResult, v1 + v2, world.Rank(), "Vector Addition All"); sub.GetResult(mResult); MpiCompareTVectorTest(mResult, v1 - v2, world.Rank(), "Vector Subtraction All"); ROOT::Mpi::Math::TVectorTWrapper<Double_t> dotAll(v1); dotAll.Dot(v2); dotAll.GetResult(fScalarResult); MpiCompareTest(fScalarResult, Dot(v1, v2) , world.Rank(), "Vector Dot Product All"); ROOT::Mpi::Math::TVectorTWrapper<Double_t> norm2SqrAll(v2); norm2SqrAll.Norm2Sqr(); norm2SqrAll.GetResult(fScalarResult); MpiCompareTest(fScalarResult, v2.Norm2Sqr() , world.Rank(), "Vector Norm2Sqr All"); ROOT::Mpi::Math::TVectorTWrapper<Double_t> norm1All(v2); norm1All.Norm1(); norm1All.GetResult(fScalarResult); MpiCompareTest(fScalarResult, v2.Norm1() , world.Rank(), "Vector Norm1 All"); ROOT::Mpi::Math::TVectorTWrapper<Double_t> minAll(v2); minAll.Min(); minAll.GetResult(fScalarResult); MpiCompareTest(fScalarResult, v2.Min() , world.Rank(), "Vector Min All"); ROOT::Mpi::Math::TVectorTWrapper<Double_t> maxAll(v2); maxAll.Max(); maxAll.GetResult(fScalarResult); MpiCompareTest(fScalarResult, v2.Max() , world.Rank(), "Vector Max All"); ROOT::Mpi::Math::TVectorTWrapper<Double_t> normalizeAll(v2); normalizeAll.Normalize(); normalizeAll.GetResult(mResult); //show if the vector is normalize, then Norm2Sqr of result is near to 1 MpiCompareTest(mResult.Norm2Sqr(), ((1 / TMath::Sqrt(v2.Norm2Sqr()))*v2).Norm2Sqr() , world.Rank(), "Vector Normalize All"); ROOT::Mpi::Math::TVectorTWrapper<Double_t> sumAll(v2); sumAll.Sum(); sumAll.GetResult(fScalarResult); MpiCompareTest(fScalarResult, v2.Sum() , world.Rank(), "Vector Sum All"); return 0; }
void CoupledAggregationCommHelper<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::ArbitrateAndCommunicate(Vector &weight_, LOVector &procWinner_, LOVector *companion, const bool perturb) const { const RCP<const Map> weightMap = weight_.getMap(); const size_t nodeNumElements = weightMap->getNodeNumElements(); const RCP<const Teuchos::Comm<int> > & comm = weightMap->getComm(); int MyPid = comm->getRank(); // TODO:remove the getMap() step ++numCalls_; //short-circuit if only one process if (comm->getSize() == 1) { ArrayRCP<SC> serialWeight = weight_.getDataNonConst(0); ArrayRCP<LO> serialProcWinner = procWinner_.getDataNonConst(0); for (size_t i=0; i < nodeNumElements; ++i) { if (serialWeight[i] > 0) { serialWeight[i] = 0; serialProcWinner[i] = MyPid; } } //companion doesn't change return; } #ifdef COMPARE_IN_OUT_VECTORS RCP<Vector> in_weight_ = VectorFactory::Build(weight_.getMap()); { ArrayRCP<SC> in_weight = in_weight_->getDataNonConst(0); ArrayRCP<SC> weight = weight_.getDataNonConst(0); for (size_t i=0; i < nodeNumElements; ++i) in_weight[i] = weight[i]; } RCP<LOVector> in_procWinner_ = LOVectorFactory::Build(procWinner_.getMap()); { ArrayRCP<LO> in_procWinner = in_procWinner_->getDataNonConst(0); ArrayRCP<LO> procWinner = procWinner_.getDataNonConst(0); for (size_t i=0; i < nodeNumElements; ++i) in_procWinner[i] = procWinner[i]; } RCP<LOVector> in_companion; { if (companion != NULL) { in_companion = LOVectorFactory::Build(companion->getMap()); ArrayRCP<LO> in_comp = in_companion->getDataNonConst(0); ArrayRCP<LO> comp = companion->getDataNonConst(0); for (size_t i=0; i < nodeNumElements; ++i) in_comp[i] = comp[i]; } } #endif if (perturb) { if (perturbWt_ == Teuchos::null || !perturbWt_->getMap()->isSameAs(*weightMap)) { perturbWt_ = VectorFactory::Build(weightMap,false); //no need to zero out because this will be randomized // Modify seed of the random algorithm used by perturbWt_->randomize() { ST::seedrandom( Teuchos::as<unsigned int>(MyPid*47) ); for (int i = 0; i < 10; ++i) ST::random(); } //Note that we must not use perturbWt_->randomize(). This produces the same //local random vector on each processor. The whole point of the weights //is to provide tie-breaking that isn't based on the highest PID. ArrayRCP<SC> lperturbWt = perturbWt_->getDataNonConst(0); for (size_t i=0; i < nodeNumElements; ++i) lperturbWt[i] = 1e-7*fabs(ST::random()); //FIXME this won't work for general SC #ifdef COMPARE_IN_OUT_VECTORS ArrayRCP<SC> locperturbWt = perturbWt_->getDataNonConst(0); for (size_t i=0; i < nodeNumElements; ++i) printf("perturbWt[%d] = %15.10e\n",i,locperturbWt[i]); #endif } //if (perturbWt_ == Teuchos::null || ... ArrayRCP<SC> weight = weight_.getDataNonConst(0); // TODO: const? ArrayRCP<SC> perturbWt = perturbWt_->getDataNonConst(0); // Note: maxValue() not available for Tpetra //SC largestGlobalWeight = weight_.maxValue(); SC largestGlobalWeight = weight_.normInf(); for (size_t i=0; i < nodeNumElements; ++i) { if (weight[i] != 0.) { weight[i] += largestGlobalWeight*perturbWt[i]; } } //TODO is it necessary to return the *perturbed* weights? } //if (perturb) // Communicate weights and store results in PostComm (which will be copied // back into weights later. When multiple processors have different weights // for the same GID, we take the largest weight. After this fragment every // processor should have the same value for PostComm[] even when multiple // copies of the same Gid are involved. if (postComm_ == Teuchos::null || !postComm_->getMap()->isSameAs(*weightMap) ) postComm_ = VectorFactory::Build(weightMap); //note: postComm_ is zeroed either in build above, or in loop below upon last touch. NonUnique2NonUnique(weight_, *postComm_, Xpetra::ABSMAX); // Let every processor know who is the procWinner. For nonunique // copies of the same Gid, this corresponds to the processor with // the highest Wt[]. When several processors have the same positive value // for weight[] (which is also the maximum value), the highest proc id // is declared the procWinner. // // Note:This is accomplished by filling a vector with MyPid+1 if weight[k] is // nonzero and PostComm[k]==weight[k]. NonUnique2NonUnique(...,AbsMax) // is invoked to let everyone know the procWinner. // One is then subtracted so that procWinner[i] indicates the // Pid of the winning processor. // When all weight's for a GID are zero, the associated procWinner's // are left untouched. if (candidateWinners_ == Teuchos::null || !candidateWinners_->getMap()->isSameAs(*weightMap) ) candidateWinners_ = VectorFactory::Build(weightMap,false); //note: candidateWinners_ is initialized below ArrayRCP<SC> weight = weight_.getDataNonConst(0); { ArrayRCP<SC> candidateWinners = candidateWinners_->getDataNonConst(0); ArrayRCP<SC> postComm = postComm_->getDataNonConst(0); for (size_t i=0; i < nodeNumElements; ++i) { if (postComm[i] == weight[i]) candidateWinners[i] = (SC) MyPid+1; else candidateWinners[i] = 0; weight[i]=postComm[i]; } } NonUnique2NonUnique(*candidateWinners_, *postComm_, Xpetra::ABSMAX); // Note: // associated CandidateWinners[] // weight[i]!=0 ==> on some proc is equal to its ==> postComm[i]!=0 // MyPid+1. // int numMyWinners = 0; ArrayRCP<LO> procWinner = procWinner_.getDataNonConst(0); { ArrayRCP<SC> postComm = postComm_->getDataNonConst(0); for (size_t i=0; i < nodeNumElements; ++i) { if ( weight[i] != 0.) procWinner[i] = ((int) (postComm[i])) - 1; weight[i] = 0.; //we are done with weight postComm[i] = 0.; //avoids having to initialize postComm_ on next call to ArbitrateAndCommunicate if (procWinner[i] == MyPid) ++numMyWinners; } } weight = Teuchos::null; //TODO why do we do this? if (companion != NULL) { // Now build a new Map, WinnerMap which just consists of procWinners. // This is done by extracting the Gids for Wt, and shoving // the subset that correspond to procWinners in MyWinners. // WinnerMap is then constructed using MyWinners. // // In order to avoid regenerating winnerMap_, the following are checked: // 1) Do the local number of entries in MyWinners differ? If so, regenerate/repopulate MyWinners and regenerate winnerMap_. // 2) If the local number of entries in MyWinners are the same, do any entries differ? If so, repopulate MyWinners and // regenerate winnerMap_. ArrayView<const GO> myGids = weightMap->getNodeElementList(); //== weightMap->MyGlobalElements(myGids); bool realloc=false; if (numMyWinners != numMyWinners_ || winnerMap_ == Teuchos::null) { // The local number of entries in MyWinners_ have changed since the last invocation, so reallocate myWinners_. myWinners_ = ArrayRCP<GO>(numMyWinners); realloc=true; //std::cout << MyPid << ": numMyWinners has changed : (old) " << numMyWinners_ << ", (new) " << numMyWinners << std::endl; numMyWinners_ = numMyWinners; } #ifdef JG_DEBUG procWinner = Teuchos::null; std::cout << MyPid << ": nodeNumElements=" << nodeNumElements << std::endl; std::cout << MyPid << ": procWinner=" << procWinner_ << std::endl; procWinner = procWinner_.getDataNonConst(0); #endif if (realloc==true) { // The local number of entries in MyWinners have changed since the last invocation, so repopulate MyWinners_. numMyWinners = 0; for (size_t i = 0; i < nodeNumElements; ++i) { if (procWinner[i] == MyPid) { myWinners_[numMyWinners++] = myGids[i]; } } } else { // The local number of entries in MyWinners are the same as the last invocation, but // we still must check if any entries differ from the last invocation. bool entryMismatch=false; numMyWinners = 0; for (size_t i = 0; i < nodeNumElements; ++i) { if (procWinner[i] == MyPid) { if (myWinners_[numMyWinners++] != myGids[i]) { entryMismatch=true; break; } } } if (entryMismatch == true) { // Entries differ from last invocation, so repopulate myWinners_. realloc=true; numMyWinners = 0; for (size_t i = 0; i < nodeNumElements; ++i) { if (procWinner[i] == MyPid) { myWinners_[numMyWinners++] = myGids[i]; } } } } //if (realloc==true) ... else procWinner = Teuchos::null; #ifdef JG_DEBUG std::cout << MyPid << ": numMyWinners=" << numMyWinners << std::endl; std::cout << MyPid << ": myWinners_" << myWinners_ << std::endl; for(int i=0;i<numMyWinners; i++) std::cout << MyPid << ": myWinners_[locId=" << i << "] = " << myWinners_[i] << std::endl; #endif #ifdef HAVE_MPI //See whether any process has determined that winnerMap_ must be regenerated. int irealloc,orealloc; if (realloc) irealloc=1; else irealloc=0; maxAll(comm,irealloc,orealloc); if (orealloc == 1) realloc=true; else realloc=false; #endif if (realloc) { // Either the number of entries or the value have changed since the last invocation, so reallocation the map. const Xpetra::global_size_t GSTI = Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(); winnerMap_ = MapFactory::Build(weightMap->lib(), GSTI, myWinners_(), 0, weightMap->getComm()); } // Pull the Winners out of companion // JustWinners <-- companion[Winners]; RCP<LOVector> justWinners = LOVectorFactory::Build(winnerMap_); #ifdef JG_DEBUG RCP<Teuchos::FancyOStream> out = rcp(new Teuchos::FancyOStream(rcp(&std::cout,false))); std::cout << MyPid << ": justWinners(Vector in)=" << *justWinners << std::endl; justWinners->describe(*out, Teuchos::VERB_EXTREME); #endif if ( winnerImport_ == Teuchos::null || !winnerImport_->getSourceMap()->isSameAs(*weightMap) || !winnerImport_->getTargetMap()->isSameAs(*winnerMap_) ) winnerImport_ = ImportFactory::Build(weightMap, winnerMap_); RCP<const Import> winnerImport = winnerImport_; try { justWinners->doImport(*companion, *winnerImport, Xpetra::INSERT); } catch(std::exception& e) { std::cout << MyPid << ": ERR2: An exception occurred." << std::endl; throw e; } // Put the JustWinner values back into companion so that // all nonunique copies of the same Gid have the procWinner's // version of the companion. //#define JG_DEBUG #ifdef JG_DEBUG RCP<Teuchos::FancyOStream> fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); fos->setOutputToRootOnly(-1); if (!weightMap->getComm()->getRank()) std::cout << "------ winnerMap_ ------" << std::endl; winnerMap_->describe(*fos,Teuchos::VERB_EXTREME); if (!weightMap->getComm()->getRank()) std::cout << "------ weightMap ------" << std::endl; weightMap->getComm()->barrier(); weightMap->describe(*fos,Teuchos::VERB_EXTREME); //std::cout << *winnerMap_ << std::endl; //std::cout << *weightMap << std::endl; sleep(5); exit(1); #endif #ifdef JG_DEBUG #undef JG_DEBUG #endif if ( pushWinners_ == Teuchos::null || !pushWinners_->getSourceMap()->isSameAs(*winnerMap_) || !pushWinners_->getTargetMap()->isSameAs(*weightMap) ) pushWinners_ = ImportFactory::Build(winnerMap_,weightMap); RCP<Import> pushWinners = pushWinners_; //RCP<Import> pushWinners = ImportFactory::Build(winnerMap_, weightMap); // VERSION1 //RCP<Export> pushWinners = ExportFactory::Build(winnerMap_, weightMap); // VERSION4 try { companion->doImport(*justWinners, *pushWinners, Xpetra::INSERT); // VERSION1 Slow //companion->doExport(*justWinners, *winnerImport_, Xpetra::INSERT); // JJH this should work... but exception // if (weightMap->lib() == Xpetra::UseEpetra) // justWinners->doExport(*companion, *winnerImport, Xpetra::INSERT); // VERSION2 Tpetra doc is wrong // else if (weightMap->lib() == Xpetra::UseTpetra) // companion->doExport(*justWinners, *winnerImport, Xpetra::INSERT); // VERSION3 - TODO: will certainly not work with Epetra? (change Xpetra?) //companion->doExport(*justWinners, *pushWinners, Xpetra::INSERT); // VERSION4 // else throw "lib()"; } catch(std::exception& e) { throw e; } //#define JG_DEBUG #ifdef JG_DEBUG // RCP<Teuchos::FancyOStream> out = rcp(new Teuchos::FancyOStream(rcp(&std::cout,false))); //->describe(*out, Teuchos::VERB_EXTREME); // std::cout << MyPid << ": ERR3: An exception occurred." << std::endl; std::cout << MyPid << ": numMyWinners=" << numMyWinners << std::endl; std::cout << MyPid << ": justWinners(Vector in)=" << std::endl; justWinners->describe(*out, Teuchos::VERB_EXTREME); std::cout << MyPid << ": companion(Vector out)=" << std::endl; companion->describe(*out, Teuchos::VERB_EXTREME); // std::cout << MyPid << ": pushWinners(Import(winnerMap_, weight_.Map))=" << *pushWinners << std::endl; std::cout << MyPid << ": winnerMap_=" << *winnerMap_ << std::endl; std::cout << MyPid << ": weight_.Map=" << *weightMap << std::endl; #endif // throw e; // throw 1; } #ifdef COMPARE_IN_OUT_VECTORS if (MyPid == 0) { std::cout << "==============" << std::endl; std::cout << "call #" << numCalls << " (1-based)" << std::endl; std::cout << "==============" << std::endl; } /* bool sameWeight=true; bool sameWinner=true; bool sameComp=true; */ std::string sameOrDiff; { ArrayRCP<SC> in_weight = in_weight_->getDataNonConst(0); ArrayRCP<SC> weight = weight_.getDataNonConst(0); if (MyPid == 0) std::cout << "==============\nweight\n==============\n" << std::endl; for (size_t i=0; i < weight_.getLocalLength(); ++i) { if (in_weight[i] - weight[i] != 0) sameOrDiff = " <<<<"; else sameOrDiff = " "; std::cout << std::setw(3) << i<<": " << in_weight[i] << " " << weight[i] << sameOrDiff << in_weight[i] - weight[i] << std::endl; /* if (in_weight[i] != weight[i]) { sameWeight=false; std::cout << "\n\nin and out weight DIFFER\n\n" << std::endl; std::cout << "i="<<i<<", in=" << in_weight[i] << " , out=" << weight[i] << std::endl; break; } */ } } { ArrayRCP<LO> in_procWinner = in_procWinner_->getDataNonConst(0); ArrayRCP<LO> procWinner = procWinner_.getDataNonConst(0); if (MyPid == 0) std::cout << "==============\nprocWinner\n==============\n" << std::endl; for (size_t i=0; i < procWinner_.getLocalLength(); ++i) { if (in_procWinner[i] != procWinner[i]) sameOrDiff = " <<<<"; else sameOrDiff = " "; std::cout << std::setw(3) << i<<": " << in_procWinner[i] << " " << procWinner[i] << sameOrDiff << std::endl; /* if (in_procWinner[i] != procWinner[i]) { sameWinner=false; std::cout << "\n\nin and out procWinner DIFFER\n\n" << std::endl; std::cout << "i="<<i<<", in=" << in_procWinner[i] << ", out=" << procWinner[i] << std::endl; break; } */ } } { if (companion != NULL) { ArrayRCP<LO> in_comp = in_companion->getDataNonConst(0); ArrayRCP<LO> comp = companion->getDataNonConst(0); if (MyPid == 0) std::cout << "==============\ncompanion\n==============\n" << std::endl; for (size_t i=0; i < companion->getLocalLength(); ++i) { if (in_comp[i] != comp[i]) sameOrDiff = " <<<<"; else sameOrDiff = " "; std::cout << std::setw(3) << i<<": " << in_comp[i] << " " << comp[i] << sameOrDiff << std::endl; /* if (in_comp[i] != comp[i]) { sameComp=false; std::cout << "\n\nin and out companion DIFFER\n\n" << std::endl; std::cout << "i="<<i<<", in=" << in_comp[i] << ", out=" << comp[i] << std::endl; break; } */ } } } #endif } //ArbitrateAndCommunicate(Vector&, LOVector &, LOVector *, const bool) const
void LeftoverAggregationAlgorithm<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::AggregateLeftovers(GraphBase const &graph, Aggregates &aggregates) const { Monitor m(*this, "AggregateLeftovers"); my_size_t nVertices = graph.GetNodeNumVertices(); int exp_nRows = aggregates.GetMap()->getNodeNumElements(); // Tentative fix... was previously exp_nRows = nVertices + graph.GetNodeNumGhost(); int myPid = graph.GetComm()->getRank(); my_size_t nAggregates = aggregates.GetNumAggregates(); int minNodesPerAggregate = GetMinNodesPerAggregate(); const RCP<const Map> nonUniqueMap = aggregates.GetMap(); //column map of underlying graph const RCP<const Map> uniqueMap = graph.GetDomainMap(); MueLu::CoupledAggregationCommHelper<LO,GO,NO,LMO> myWidget(uniqueMap, nonUniqueMap); //TODO JJH We want to skip this call RCP<Xpetra::Vector<double,LO,GO,NO> > distWeights = Xpetra::VectorFactory<double,LO,GO,NO>::Build(nonUniqueMap); // Aggregated vertices not "definitively" assigned to processors are // arbitrated by ArbitrateAndCommunicate(). There is some // additional logic to prevent losing root nodes in arbitration. { ArrayRCP<const LO> vertex2AggId = aggregates.GetVertex2AggId()->getData(0); ArrayRCP<const LO> procWinner = aggregates.GetProcWinner()->getData(0); ArrayRCP<double> weights = distWeights->getDataNonConst(0); for (size_t i=0;i<nonUniqueMap->getNodeNumElements();i++) { if (procWinner[i] == MUELU_UNASSIGNED) { if (vertex2AggId[i] != MUELU_UNAGGREGATED) { weights[i] = 1.; if (aggregates.IsRoot(i)) weights[i] = 2.; } } } // views on distributed vectors are freed here. } //TODO JJH We want to skip this call myWidget.ArbitrateAndCommunicate(*distWeights, aggregates, true); // All tentatively assigned vertices are now definitive // Tentatively assign any vertex (ghost or local) which neighbors a root // to the aggregate associated with the root. { ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); ArrayRCP<const LO> procWinner = aggregates.GetProcWinner()->getData(0); ArrayRCP<double> weights = distWeights->getDataNonConst(0); for (my_size_t i = 0; i < nVertices; i++) { if ( aggregates.IsRoot(i) && (procWinner[i] == myPid) ) { // neighOfINode is the neighbor node list of node 'i'. ArrayView<const LO> neighOfINode = graph.getNeighborVertices(i); for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int colj = *it; if (vertex2AggId[colj] == MUELU_UNAGGREGATED) { weights[colj]= 1.; vertex2AggId[colj] = vertex2AggId[i]; } } } } // views on distributed vectors are freed here. } //TODO JJH We want to skip this call myWidget.ArbitrateAndCommunicate(*distWeights, aggregates, true); // All tentatively assigned vertices are now definitive // Record the number of aggregated vertices GO total_phase_one_aggregated = 0; { ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); GO phase_one_aggregated = 0; for (my_size_t i = 0; i < nVertices; i++) { if (vertex2AggId[i] != MUELU_UNAGGREGATED) phase_one_aggregated++; } sumAll(graph.GetComm(), phase_one_aggregated, total_phase_one_aggregated); GO local_nVertices = nVertices, total_nVertices = 0; sumAll(graph.GetComm(), local_nVertices, total_nVertices); /* Among unaggregated points, see if we can make a reasonable size */ /* aggregate out of it. We do this by looking at neighbors and seeing */ /* how many are unaggregated and on my processor. Loosely, */ /* base the number of new aggregates created on the percentage of */ /* unaggregated nodes. */ ArrayRCP<double> weights = distWeights->getDataNonConst(0); double factor = 1.; factor = ((double) total_phase_one_aggregated)/((double)(total_nVertices + 1)); factor = pow(factor, GetPhase3AggCreation()); for (my_size_t i = 0; i < nVertices; i++) { if (vertex2AggId[i] == MUELU_UNAGGREGATED) { // neighOfINode is the neighbor node list of node 'iNode'. ArrayView<const LO> neighOfINode = graph.getNeighborVertices(i); int rowi_N = neighOfINode.size(); int nonaggd_neighbors = 0; for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int colj = *it; if (vertex2AggId[colj] == MUELU_UNAGGREGATED && colj < nVertices) nonaggd_neighbors++; } if ( (nonaggd_neighbors > minNodesPerAggregate) && (((double) nonaggd_neighbors)/((double) rowi_N) > factor)) { vertex2AggId[i] = (nAggregates)++; for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int colj = *it; if (vertex2AggId[colj]==MUELU_UNAGGREGATED) { vertex2AggId[colj] = vertex2AggId[i]; if (colj < nVertices) weights[colj] = 2.; else weights[colj] = 1.; } } aggregates.SetIsRoot(i); weights[i] = 2.; } } } // for (i = 0; i < nVertices; i++) // views on distributed vectors are freed here. } //TODO JJH We want to skip this call myWidget.ArbitrateAndCommunicate(*distWeights, aggregates, true); //All tentatively assigned vertices are now definitive if (IsPrint(Statistics1)) { GO Nphase1_agg = nAggregates; GO total_aggs; sumAll(graph.GetComm(), Nphase1_agg, total_aggs); GetOStream(Statistics1, 0) << "Phase 1 - nodes aggregated = " << total_phase_one_aggregated << std::endl; GetOStream(Statistics1, 0) << "Phase 1 - total aggregates = " << total_aggs << std::endl; GO i = nAggregates - Nphase1_agg; { GO ii; sumAll(graph.GetComm(),i,ii); i = ii; } GetOStream(Statistics1, 0) << "Phase 3 - additional aggregates = " << i << std::endl; } // Determine vertices that are not shared by setting Temp to all ones // and doing NonUnique2NonUnique(..., ADD). This sums values of all // local copies associated with each Gid. Thus, sums > 1 are shared. // std::cout << "exp_nrows=" << exp_nRows << " (nVertices= " << nVertices << ", numGhost=" << graph.GetNodeNumGhost() << ")" << std::endl; // std::cout << "nonUniqueMap=" << nonUniqueMap->getNodeNumElements() << std::endl; RCP<Xpetra::Vector<double,LO,GO,NO> > temp_ = Xpetra::VectorFactory<double,LO,GO,NO> ::Build(nonUniqueMap,false); //no need to zero out vector in ctor temp_->putScalar(1.); RCP<Xpetra::Vector<double,LO,GO,NO> > tempOutput_ = Xpetra::VectorFactory<double,LO,GO,NO> ::Build(nonUniqueMap); myWidget.NonUnique2NonUnique(*temp_, *tempOutput_, Xpetra::ADD); std::vector<bool> gidNotShared(exp_nRows); { ArrayRCP<const double> tempOutput = tempOutput_->getData(0); for (int i = 0; i < exp_nRows; i++) { if (tempOutput[i] > 1.) gidNotShared[i] = false; else gidNotShared[i] = true; } } // Phase 4. double nAggregatesTarget; nAggregatesTarget = ((double) uniqueMap->getGlobalNumElements())* (((double) uniqueMap->getGlobalNumElements())/ ((double) graph.GetGlobalNumEdges())); GO nAggregatesLocal=nAggregates, nAggregatesGlobal; sumAll(graph.GetComm(), nAggregatesLocal, nAggregatesGlobal); LO minNAggs; minAll(graph.GetComm(), nAggregates, minNAggs); LO maxNAggs; maxAll(graph.GetComm(), nAggregates, maxNAggs); // // Only do this phase if things look really bad. THIS // CODE IS PRETTY EXPERIMENTAL // #define MUELU_PHASE4BUCKETS 6 if ((nAggregatesGlobal < graph.GetComm()->getSize()) && (2.5*nAggregatesGlobal < nAggregatesTarget) && (minNAggs ==0) && (maxNAggs <= 1)) { // Modify seed of the random algorithm used by temp_->randomize() { typedef Teuchos::ScalarTraits<double> scalarTrait; // temp_ is of type double. scalarTrait::seedrandom(static_cast<unsigned int>(myPid*2 + (int) (11*scalarTrait::random()))); int k = (int)ceil( (10.*myPid)/graph.GetComm()->getSize()); for (int i = 0; i < k+7; i++) scalarTrait::random(); temp_->setSeed(static_cast<unsigned int>(scalarTrait::random())); } temp_->randomize(); ArrayRCP<double> temp = temp_->getDataNonConst(0); // build a list of candidate root nodes (vertices not adjacent // to aggregated vertices) my_size_t nCandidates = 0; global_size_t nCandidatesGlobal; ArrayRCP<LO> candidates = Teuchos::arcp<LO>(nVertices+1); double priorThreshold = 0.; for (int kkk = 0; kkk < MUELU_PHASE4BUCKETS; kkk++) { { ArrayRCP<const LO> vertex2AggId = aggregates.GetVertex2AggId()->getData(0); ArrayView<const LO> vertex2AggIdView = vertex2AggId(); RootCandidates(nVertices, vertex2AggIdView, graph, candidates, nCandidates, nCandidatesGlobal); // views on distributed vectors are freed here. } double nTargetNewGuys = nAggregatesTarget - nAggregatesGlobal; double threshold = priorThreshold + (1. - priorThreshold)*nTargetNewGuys/(nCandidatesGlobal + .001); threshold = (threshold*(kkk+1.))/((double) MUELU_PHASE4BUCKETS); priorThreshold = threshold; { ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); ArrayRCP<double> weights = distWeights->getDataNonConst(0); for (int k = 0; k < nCandidates; k++ ) { int i = candidates[k]; if ((vertex2AggId[i] == MUELU_UNAGGREGATED) && (fabs(temp[i]) < threshold)) { // Note: priorThreshold <= fabs(temp[i]) <= 1 // neighOfINode is the neighbor node list of node 'iNode'. ArrayView<const LO> neighOfINode = graph.getNeighborVertices(i); if (neighOfINode.size() > minNodesPerAggregate) { //TODO: check if this test is exactly was we want to do int count = 0; for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int Adjacent = *it; // This might not be true if someone close to i // is chosen as a root via fabs(temp[]) < Threshold if (vertex2AggId[Adjacent] == MUELU_UNAGGREGATED){ count++; vertex2AggId[Adjacent] = nAggregates; weights[Adjacent] = 1.; } } if (count >= minNodesPerAggregate) { vertex2AggId[i] = nAggregates++; weights[i] = 2.; aggregates.SetIsRoot(i); } else { // undo things for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int Adjacent = *it; if (vertex2AggId[Adjacent] == nAggregates){ vertex2AggId[Adjacent] = MUELU_UNAGGREGATED; weights[Adjacent] = 0.; } } } } } } // views on distributed vectors are freed here. } //TODO JJH We want to skip this call myWidget.ArbitrateAndCommunicate(*distWeights, aggregates, true); // All tentatively assigned vertices are now definitive nAggregatesLocal=nAggregates; sumAll(graph.GetComm(), nAggregatesLocal, nAggregatesGlobal); // check that there are no aggregates sizes below minNodesPerAggregate aggregates.SetNumAggregates(nAggregates); RemoveSmallAggs(aggregates, minNodesPerAggregate, distWeights, myWidget); nAggregates = aggregates.GetNumAggregates(); } // one possibility } // Initialize things for Phase 5. This includes building the transpose // of the matrix ONLY for transposed rows that correspond to unaggregted // ghost vertices. Further, the transpose is only a local transpose. // Nonzero edges which exist on other processors are not represented. int observedNAgg=-1; //number of aggregates that contain vertices on this process { ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); ArrayRCP<const LO> procWinner = aggregates.GetProcWinner()->getData(0); for(LO k = 0; k < vertex2AggId.size(); ++k ) if(vertex2AggId[k]>observedNAgg) observedNAgg=vertex2AggId[k]; observedNAgg++; } ArrayRCP<int> Mark = Teuchos::arcp<int>(exp_nRows+1); ArrayRCP<int> agg_incremented = Teuchos::arcp<int>(observedNAgg); ArrayRCP<int> SumOfMarks = Teuchos::arcp<int>(observedNAgg); for (int i = 0; i < exp_nRows; i++) Mark[i] = MUELU_DISTONE_VERTEX_WEIGHT; for (int i = 0; i < agg_incremented.size(); i++) agg_incremented[i] = 0; for (int i = 0; i < SumOfMarks.size(); i++) SumOfMarks[i] = 0; // Grab the transpose matrix graph for unaggregated ghost vertices. // a) count the number of nonzeros per row in the transpose std::vector<int> RowPtr(exp_nRows+1-nVertices); //{ ArrayRCP<const LO> vertex2AggIdCst = aggregates.GetVertex2AggId()->getData(0); for (int i = nVertices; i < exp_nRows; i++) RowPtr[i-nVertices] = 0; for (int i = 0; i < nVertices; i++) { // neighOfINode is the neighbor node list of node 'iNode'. ArrayView<const LO> neighOfINode = graph.getNeighborVertices(i); for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int j = *it; if ( (j >= nVertices) && (vertex2AggIdCst[j] == MUELU_UNAGGREGATED)){ RowPtr[j-nVertices]++; } } } // b) Convert RowPtr[i] to point to 1st first nnz spot in row i. int iSum = 0, iTemp; for (int i = nVertices; i < exp_nRows; i++) { iTemp = RowPtr[i-nVertices]; RowPtr[i-nVertices] = iSum; iSum += iTemp; } RowPtr[exp_nRows-nVertices] = iSum; std::vector<LO> cols(iSum+1); // c) Traverse matrix and insert entries in proper location. for (int i = 0; i < nVertices; i++) { // neighOfINode is the neighbor node list of node 'iNode'. ArrayView<const LO> neighOfINode = graph.getNeighborVertices(i); for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int j = *it; if ( (j >= nVertices) && (vertex2AggIdCst[j] == MUELU_UNAGGREGATED)){ cols[RowPtr[j-nVertices]++] = i; } } } // d) RowPtr[i] points to beginning of row i+1 so shift by one location. for (int i = exp_nRows; i > nVertices; i--) RowPtr[i-nVertices] = RowPtr[i-1-nVertices]; RowPtr[0] = 0; // views on distributed vectors are freed here. vertex2AggIdCst = Teuchos::null; //} int bestScoreCutoff; int thresholds[10] = {300,200,100,50,25,13,7,4,2,0}; // Stick unaggregated vertices into existing aggregates as described above. { int ncalls=0; for (int kk = 0; kk < 10; kk += 2) { bestScoreCutoff = thresholds[kk]; ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); ArrayRCP<const LO> procWinner = aggregates.GetProcWinner()->getData(0); ArrayRCP<double> weights = distWeights->getDataNonConst(0); for (int i = 0; i < exp_nRows; i++) { if (vertex2AggId[i] == MUELU_UNAGGREGATED) { // neighOfINode is the neighbor node list of node 'iNode'. ArrayView<const LO> neighOfINode; // Grab neighboring vertices which is either in graph for local ids // or sits in transposed fragment just constructed above for ghosts. if (i < nVertices) { neighOfINode = graph.getNeighborVertices(i); } else { LO *rowi_col = NULL, rowi_N; rowi_col = &(cols[RowPtr[i-nVertices]]); rowi_N = RowPtr[i+1-nVertices] - RowPtr[i-nVertices]; neighOfINode = ArrayView<const LO>(rowi_col, rowi_N); } for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int Adjacent = *it; int AdjacentAgg = vertex2AggId[Adjacent]; //Adjacent is aggregated and either I own the aggregate // or I could own the aggregate after arbitration. if ((AdjacentAgg != MUELU_UNAGGREGATED) && ((procWinner[Adjacent] == myPid) || (procWinner[Adjacent] == MUELU_UNASSIGNED))){ SumOfMarks[AdjacentAgg] += Mark[Adjacent]; } } int best_score = MUELU_NOSCORE; int best_agg = -1; int BestMark = -1; bool cannotLoseAllFriends=false; // Used to address possible loss of vertices in arbitration of shared nodes discussed above. (Initialized to false only to avoid a compiler warning). for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int Adjacent = *it; int AdjacentAgg = vertex2AggId[Adjacent]; //Adjacent is unaggregated, has some value and no //other processor has definitively claimed him if ((AdjacentAgg != MUELU_UNAGGREGATED) && (SumOfMarks[AdjacentAgg] != 0) && ((procWinner[Adjacent] == myPid) || (procWinner[Adjacent] == MUELU_UNASSIGNED ))) { // first figure out the penalty associated with // AdjacentAgg having already been incremented // during this phase, then compute score. double penalty = (double) (INCR_SCALING*agg_incremented[AdjacentAgg]); if (penalty > MUELU_PENALTYFACTOR*((double)SumOfMarks[AdjacentAgg])) penalty = MUELU_PENALTYFACTOR*((double)SumOfMarks[AdjacentAgg]); int score = SumOfMarks[AdjacentAgg]- ((int) floor(penalty)); if (score > best_score) { best_agg = AdjacentAgg; best_score = score; BestMark = Mark[Adjacent]; cannotLoseAllFriends = false; // This address issue mentioned above by checking whether // Adjacent could be lost in arbitration. weight==0 means that // Adjacent was not set during this loop of Phase 5 (and so it // has already undergone arbitration). GidNotShared == true // obviously implies that Adjacent cannot be lost to arbitration if ((weights[Adjacent]== 0.) || (gidNotShared[Adjacent] == true)) cannotLoseAllFriends = true; } // Another vertex within current best aggregate found. // We should have (best_score == score). We need to see // if we can improve BestMark and cannotLoseAllFriends. else if (best_agg == AdjacentAgg) { if ((weights[Adjacent]== 0.) || (gidNotShared[Adjacent] == true)) cannotLoseAllFriends = true; if (Mark[Adjacent] > BestMark) BestMark = Mark[Adjacent]; } } } // Clean up for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int Adjacent = *it; int AdjacentAgg = vertex2AggId[Adjacent]; if (AdjacentAgg >= 0) SumOfMarks[AdjacentAgg] = 0; } // Tentatively assign vertex to best_agg. if ( (best_score >= bestScoreCutoff) && (cannotLoseAllFriends)) { TEUCHOS_TEST_FOR_EXCEPTION(best_agg == -1 || BestMark == -1, MueLu::Exceptions::RuntimeError, "MueLu::CoupledAggregationFactory internal error"); // should never happen vertex2AggId[i] = best_agg; weights[i] = best_score; agg_incremented[best_agg]++; Mark[i] = (int) ceil( ((double) BestMark)/2.); } } // views on distributed vectors are freed here. } vertex2AggId = Teuchos::null; procWinner = Teuchos::null; weights = Teuchos::null; ++ncalls; //TODO JJH We want to skip this call myWidget.ArbitrateAndCommunicate(*distWeights, aggregates, true); // All tentatively assigned vertices are now definitive } // if (graph.GetComm()->getRank()==0) // std::cout << "#calls to Arb&Comm=" << ncalls << std::endl; } // Phase 6: Aggregate remain unaggregated vertices and try at all costs // to avoid small aggregates. // One case where we can find ourselves in this situation // is if all vertices vk adjacent to v have already been // put in other processor's aggregates and v does not have // a direct connection to a local vertex in any of these // aggregates. int Nleftover = 0, Nsingle = 0; { ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); ArrayRCP<double> weights = distWeights->getDataNonConst(0); ArrayRCP<const LO> procWinner = aggregates.GetProcWinner()->getData(0); int count = 0; for (my_size_t i = 0; i < nVertices; i++) { if (vertex2AggId[i] == MUELU_UNAGGREGATED) { Nleftover++; // neighOfINode is the neighbor node list of node 'iNode'. ArrayView<const LO> neighOfINode = graph.getNeighborVertices(i); // We don't want too small of an aggregate. So lets see if there is an // unaggregated neighbor that we can also put with this vertex vertex2AggId[i] = nAggregates; weights[i] = 1.; if (count == 0) aggregates.SetIsRoot(i); count++; for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int j = *it; if ((j != i)&&(vertex2AggId[j] == MUELU_UNAGGREGATED)&& (j < nVertices)) { vertex2AggId[j] = nAggregates; weights[j] = 1.; count++; } } if ( count >= minNodesPerAggregate) { nAggregates++; count = 0; } } } // We have something which is under minNodesPerAggregate when if (count != 0) { #ifdef FIXME // Can stick small aggregate with 0th aggregate? if (nAggregates > 0) { for (my_size_t i = 0; i < nVertices; i++) { if ((vertex2AggId[i] == nAggregates) && (procWinner[i] == myPid)) { vertex2AggId[i] = 0; aggregates.SetIsRoot(i,false); } } } else { Nsingle++; nAggregates++; } #else // Can stick small aggregate with 0th aggregate? if (nAggregates > 0) { for (my_size_t i = 0; i < nVertices; i++) { // TW: This is not a real fix. This may produce ugly bad aggregates! // I removed the procWinner[i] == myPid check. it makes no sense to me since // it leaves vertex2AggId[i] == nAggregates -> crash in ComputeAggregateSizes(). // Maybe it's better to add the leftovers to the last generated agg on the current proc. // The best solution would be to add them to the "next"/nearest aggregate, that may be // on an other processor if (vertex2AggId[i] == nAggregates) { vertex2AggId[i] = nAggregates-1; //0; aggregates.SetIsRoot(i,false); } } } else { Nsingle++; nAggregates++; } #endif } // views on distributed vectors are freed here. } //TODO JJH We want to skip this call myWidget.ArbitrateAndCommunicate(*distWeights, aggregates, false); if (IsPrint(Statistics1)) { GO total_Nsingle=0; sumAll(graph.GetComm(), (GO)Nsingle, total_Nsingle); GO total_Nleftover=0; sumAll(graph.GetComm(), (GO)Nleftover, total_Nleftover); // GO total_aggs; sumAll(graph.GetComm(), (GO)nAggregates, total_aggs); // GetOStream(Statistics1, 0) << "Phase 6 - total aggregates = " << total_aggs << std::endl; GetOStream(Statistics1, 0) << "Phase 6 - leftovers = " << total_Nleftover << " and singletons = " << total_Nsingle << std::endl; } aggregates.SetNumAggregates(nAggregates); } //AggregateLeftovers