void RigidBodyModeFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::DeclareInput(Level ¤tLevel) const { if (currentLevel.IsAvailable(nspName_, NoFactory::get()) == false && currentLevel.GetLevelID() == 0) { Input(currentLevel, "A"); //Input(currentLevel,"Coordinates"); } if (currentLevel.GetLevelID() !=0) { currentLevel.DeclareInput("Nullspace", GetFactory(nspName_).get(), this); /* ! "Nullspace" and nspName_ mismatch possible here */ } }
void TogglePFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& fineLevel, Level &coarseLevel) const { FactoryMonitor m(*this, "Prolongator toggle", coarseLevel); std::ostringstream levelstr; levelstr << coarseLevel.GetLevelID(); typedef typename Teuchos::ScalarTraits<SC>::magnitudeType Magnitude; TEUCHOS_TEST_FOR_EXCEPTION(nspFacts_.size() != prolongatorFacts_.size(), Exceptions::RuntimeError, "MueLu::TogglePFactory::Build: The number of provided prolongator factories and coarse nullspace factories must be identical."); TEUCHOS_TEST_FOR_EXCEPTION(nspFacts_.size() != 2, Exceptions::RuntimeError, "MueLu::TogglePFactory::Build: TogglePFactory needs two different transfer operator strategies for toggling."); // TODO adapt this/weaken this as soon as other toggling strategies are introduced. // decision routine which prolongator factory to be used int nProlongatorFactory = 0; // default behavior: use first prolongator in list // extract user parameters const Teuchos::ParameterList & pL = GetParameterList(); std::string mode = Teuchos::as<std::string>(pL.get<std::string>("toggle: mode")); int semicoarsen_levels = Teuchos::as<int>(pL.get<int>("semicoarsen: number of levels")); TEUCHOS_TEST_FOR_EXCEPTION(mode!="semicoarsen", Exceptions::RuntimeError, "MueLu::TogglePFactory::Build: The 'toggle: mode' parameter must be set to 'semicoarsen'. No other mode supported, yet."); LO NumZDir = -1; if(fineLevel.IsAvailable("NumZLayers", NoFactory::get())) { NumZDir = fineLevel.Get<LO>("NumZLayers", NoFactory::get()); //obtain info GetOStream(Runtime1) << "Number of layers for semicoarsening: " << NumZDir << std::endl; } // Make a decision which prolongator to be used. if(fineLevel.GetLevelID() >= semicoarsen_levels || NumZDir == 1) { nProlongatorFactory = 1; } else { nProlongatorFactory = 0; } RCP<Matrix> P = Teuchos::null; RCP<MultiVector> coarseNullspace = Teuchos::null; // call Build for selected transfer operator GetOStream(Runtime0) << "TogglePFactory: call transfer factory: " << (prolongatorFacts_[nProlongatorFactory])->description() << std::endl; prolongatorFacts_[nProlongatorFactory]->CallBuild(coarseLevel); P = coarseLevel.Get< RCP<Matrix> >("P", (prolongatorFacts_[nProlongatorFactory]).get()); coarseNullspace = coarseLevel.Get< RCP<MultiVector> >("Nullspace", (nspFacts_[nProlongatorFactory]).get()); // Release dependencies of all prolongator and coarse level null spaces for(size_t t=0; t<nspFacts_.size(); ++t) { coarseLevel.Release(*(prolongatorFacts_[t])); coarseLevel.Release(*(nspFacts_[t])); } // store prolongator with this factory identification. Set(coarseLevel, "P", P); Set(coarseLevel, "Nullspace", coarseNullspace); } //Build()
void NullspacePresmoothFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level ¤tLevel) const { FactoryMonitor m(*this, "Nullspace presmooth factory", currentLevel); RCP<MultiVector> newB; if (currentLevel.GetLevelID() == 0) { RCP<Matrix> A = Get< RCP<Matrix> > (currentLevel, "A"); RCP<MultiVector> B = Get< RCP<MultiVector> >(currentLevel, "Nullspace"); newB = MultiVectorFactory::Build(B->getMap(), B->getNumVectors()); Teuchos::ArrayRCP<SC> D = Utils::GetMatrixDiagonal(*A); SC damping = 4./3; damping /= Utils::PowerMethod(*A, true, (LO) 10, 1e-4); A->apply(*B, *newB, Teuchos::NO_TRANS); size_t numVec = newB->getNumVectors(); LO numElements = newB->getLocalLength(); for (size_t j = 0; j < numVec; j++) { Teuchos::ArrayRCP<const SC> Bj = B->getData(j); Teuchos::ArrayRCP<SC> newBj = newB->getDataNonConst(j); for (LO i = 0; i < numElements; i++) newBj[i] = Bj[i] - damping*newBj[i]/D[i]; } } else { newB = Get< RCP<MultiVector> >(currentLevel, "Nullspace"); } // provide "Nullspace" variable on current level Set(currentLevel, "Nullspace", newB); } // Build
//! virtual void CallDeclareInput(Level & requestedLevel) const { if (requestedLevel.GetPreviousLevel() == Teuchos::null) { std::ostringstream errStr; errStr << "LevelID = " << requestedLevel.GetLevelID(); throw Exceptions::DependencyError(errStr.str()); } DeclareInput(*requestedLevel.GetPreviousLevel(), requestedLevel); }
void UserAggregationFactory<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level ¤tLevel) const { FactoryMonitor m(*this, "Build", currentLevel); const ParameterList & pL = GetParameterList(); RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm(); const int myRank = comm->getRank(); std::string fileName = pL.get<std::string>("filePrefix") + toString(currentLevel.GetLevelID()) + "_" + toString(myRank) + "." + pL.get<std::string>("fileExt"); std::ifstream ifs(fileName.c_str()); if (!ifs.good()) throw Exceptions::RuntimeError("Cannot read data from \"" + fileName + "\""); LO numVertices, numAggregates; ifs >> numVertices >> numAggregates; // FIXME: what is the map? Xpetra::UnderlyingLib lib = Xpetra::UseEpetra; const int indexBase = 0; RCP<Map> map = MapFactory::Build(lib, numVertices, indexBase, comm); RCP<Aggregates> aggregates = rcp(new Aggregates(map)); aggregates->setObjectLabel("User"); aggregates->SetNumAggregates(numAggregates); Teuchos::ArrayRCP<LO> vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0); Teuchos::ArrayRCP<LO> procWinner = aggregates->GetProcWinner() ->getDataNonConst(0); for (LO i = 0; i < numAggregates; i++) { int aggSize = 0; ifs >> aggSize; std::vector<LO> list(aggSize); for (int k = 0; k < aggSize; k++) { // FIXME: File contains GIDs, we need LIDs // for now, works on a single processor ifs >> list[k]; } // Mark first node as root node for the aggregate aggregates->SetIsRoot(list[0]); // Fill vertex2AggId and procWinner structure with information for (int k = 0; k < aggSize; k++) { vertex2AggId[list[k]] = i; procWinner [list[k]] = myRank; } } // FIXME: do the proper check whether aggregates cross interprocessor boundary aggregates->AggregatesCrossProcessors(false); Set(currentLevel, "Aggregates", aggregates); GetOStream(Statistics0, 0) << aggregates->description() << std::endl; }
//! virtual void CallBuild(Level& requestedLevel) const { int levelID = requestedLevel.GetLevelID(); #ifdef HAVE_MUELU_DEBUG // We cannot call Build method twice for the same level, but we can call it multiple times for different levels TEUCHOS_TEST_FOR_EXCEPTION((multipleCallCheck_ == ENABLED) && (multipleCallCheckGlobal_ == ENABLED) && (lastLevelID_ == levelID), Exceptions::RuntimeError, this->ShortClassName() << "::Build() called twice for the same level (levelID=" << levelID << "). This is likely due to a configuration error."); if (multipleCallCheck_ == FIRSTCALL) multipleCallCheck_ = ENABLED; lastLevelID_ = levelID; #endif TEUCHOS_TEST_FOR_EXCEPTION(requestedLevel.GetPreviousLevel() == Teuchos::null, Exceptions::RuntimeError, "LevelID = " << levelID); #ifdef HAVE_MUELU_TIMER_SYNCHRONIZATION RCP<const Teuchos::Comm<int> > comm = requestedLevel.GetComm(); if (comm.is_null()) { // Some factories are called before we constructed Ac, and therefore, // before we set the level communicator. For such factories we can get // the comm from the previous level, as all processes go there RCP<Level>& prevLevel = requestedLevel.GetPreviousLevel(); if (!prevLevel.is_null()) comm = prevLevel->GetComm(); } // Synchronization timer std::string syncTimer = this->ShortClassName() + ": Build sync (level=" + toString(requestedLevel.GetLevelID()) + ")"; if (!comm.is_null()) { TimeMonitor timer(*this, syncTimer); comm->barrier(); } #endif Build(*requestedLevel.GetPreviousLevel(), requestedLevel); #ifdef HAVE_MUELU_TIMER_SYNCHRONIZATION // Synchronization timer if (!comm.is_null()) { TimeMonitor timer(*this, syncTimer); comm->barrier(); } #endif GetOStream(Test) << *RemoveFactoriesFromList(GetParameterList()) << std::endl; }
void RebalanceAcFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &fineLevel, Level &coarseLevel) const { FactoryMonitor m(*this, "Computing Ac", coarseLevel); RCP<Matrix> originalAc = Get< RCP<Matrix> >(coarseLevel, "A"); RCP<const Import> rebalanceImporter = Get< RCP<const Import> >(coarseLevel, "Importer"); if (rebalanceImporter != Teuchos::null) { RCP<Matrix> rebalancedAc; { SubFactoryMonitor subM(*this, "Rebalancing existing Ac", coarseLevel); RCP<const Map> targetMap = rebalanceImporter->getTargetMap(); const ParameterList & pL = GetParameterList(); ParameterList XpetraList; if (pL.get<bool>("useSubcomm") == true) { GetOStream(Runtime0,0) << "Replacing maps with a subcommunicator" << std::endl; XpetraList.set("Restrict Communicator",true); } // NOTE: If the communicator is restricted away, Build returns Teuchos::null. rebalancedAc = MatrixFactory::Build(originalAc, *rebalanceImporter, targetMap, targetMap, rcp(&XpetraList,false)); if (!rebalancedAc.is_null()) rebalancedAc->SetFixedBlockSize(originalAc->GetFixedBlockSize()); Set(coarseLevel, "A", rebalancedAc); } if (!rebalancedAc.is_null()) { RCP<ParameterList> params = rcp(new ParameterList()); params->set("printLoadBalancingInfo", true); GetOStream(Statistics0, 0) << Utils::PrintMatrixInfo(*rebalancedAc, "Ac (rebalanced)", params); } } else { // Ac already built by the load balancing process and no load balancing needed GetOStream(Warnings0, 0) << "No rebalancing" << std::endl; GetOStream(Warnings0, 0) << "Jamming A into Level " << coarseLevel.GetLevelID() << " w/ generating factory " << this << std::endl; Set(coarseLevel, "A", originalAc); } } //Build()
void SaPFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::BuildP(Level& fineLevel, Level& coarseLevel) const { FactoryMonitor m(*this, "Prolongator smoothing", coarseLevel); // Add debugging information DeviceType::execution_space::print_configuration(GetOStream(Runtime1)); typedef typename Teuchos::ScalarTraits<SC>::magnitudeType Magnitude; // Get default tentative prolongator factory // Getting it that way ensure that the same factory instance will be used for both SaPFactory_kokkos and NullspaceFactory. // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere! RCP<const FactoryBase> initialPFact = GetFactory("P"); if (initialPFact == Teuchos::null) { initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); } // Level Get RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel, "A"); RCP<Matrix> Ptent = coarseLevel.Get< RCP<Matrix> >("P", initialPFact.get()); if(restrictionMode_) { SubFactoryMonitor m2(*this, "Transpose A", coarseLevel); A = Utilities_kokkos::Transpose(*A, true); // build transpose of A explicitly } //Build final prolongator RCP<Matrix> finalP; // output // Reuse pattern if available RCP<ParameterList> APparams = rcp(new ParameterList); if (coarseLevel.IsAvailable("AP reuse data", this)) { GetOStream(static_cast<MsgType>(Runtime0 | Test)) << "Reusing previous AP data" << std::endl; APparams = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", this); if (APparams->isParameter("graph")) finalP = APparams->get< RCP<Matrix> >("graph"); } const ParameterList& pL = GetParameterList(); SC dampingFactor = as<SC>(pL.get<double>("sa: damping factor")); LO maxEigenIterations = as<LO>(pL.get<int>("sa: eigenvalue estimate num iterations")); bool estimateMaxEigen = pL.get<bool>("sa: calculate eigenvalue estimate"); if (dampingFactor != Teuchos::ScalarTraits<SC>::zero()) { SC lambdaMax; { SubFactoryMonitor m2(*this, "Eigenvalue estimate", coarseLevel); lambdaMax = A->GetMaxEigenvalueEstimate(); if (lambdaMax == -Teuchos::ScalarTraits<SC>::one() || estimateMaxEigen) { GetOStream(Statistics1) << "Calculating max eigenvalue estimate now (max iters = "<< maxEigenIterations << ")" << std::endl; Magnitude stopTol = 1e-4; lambdaMax = Utilities_kokkos::PowerMethod(*A, true, maxEigenIterations, stopTol); A->SetMaxEigenvalueEstimate(lambdaMax); } else { GetOStream(Statistics1) << "Using cached max eigenvalue estimate" << std::endl; } GetOStream(Statistics0) << "Prolongator damping factor = " << dampingFactor/lambdaMax << " (" << dampingFactor << " / " << lambdaMax << ")" << std::endl; } { SubFactoryMonitor m2(*this, "Fused (I-omega*D^{-1} A)*Ptent", coarseLevel); RCP<Vector> invDiag = Utilities_kokkos::GetMatrixDiagonalInverse(*A); SC omega = dampingFactor / lambdaMax; // finalP = Ptent + (I - \omega D^{-1}A) Ptent finalP = Xpetra::IteratorOps<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Jacobi(omega, *invDiag, *A, *Ptent, finalP, GetOStream(Statistics2), std::string("MueLu::SaP-") + toString(coarseLevel.GetLevelID()), APparams); } } else { finalP = Ptent; } // Level Set if (!restrictionMode_) { // prolongation factory is in prolongation mode Set(coarseLevel, "P", finalP); // NOTE: EXPERIMENTAL if (Ptent->IsView("stridedMaps")) finalP->CreateView("stridedMaps", Ptent); } else { // prolongation factory is in restriction mode RCP<Matrix> R = Utilities_kokkos::Transpose(*finalP, true); Set(coarseLevel, "R", R); // NOTE: EXPERIMENTAL if (Ptent->IsView("stridedMaps")) R->CreateView("stridedMaps", Ptent, true); } if (IsPrint(Statistics1)) { RCP<ParameterList> params = rcp(new ParameterList()); params->set("printLoadBalancingInfo", true); params->set("printCommInfo", true); GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*finalP, (!restrictionMode_ ? "P" : "R"), params); } } //Build()
void RAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& fineLevel, Level& coarseLevel) const { { FactoryMonitor m(*this, "Computing Ac", coarseLevel); std::ostringstream levelstr; levelstr << coarseLevel.GetLevelID(); TEUCHOS_TEST_FOR_EXCEPTION(hasDeclaredInput_==false, Exceptions::RuntimeError, "MueLu::RAPFactory::Build(): CallDeclareInput has not been called before Build!"); // Set "Keeps" from params const Teuchos::ParameterList& pL = GetParameterList(); if (pL.get<bool>("Keep AP Pattern")) coarseLevel.Keep("AP Pattern", this); if (pL.get<bool>("Keep RAP Pattern")) coarseLevel.Keep("RAP Pattern", this); RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel, "A"); RCP<Matrix> P = Get< RCP<Matrix> >(coarseLevel, "P"), AP, Ac; // Reuse pattern if available (multiple solve) if (coarseLevel.IsAvailable("AP Pattern", this)) { GetOStream(Runtime0) << "Ac: Using previous AP pattern" << std::endl; AP = Get< RCP<Matrix> >(coarseLevel, "AP Pattern"); } { SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel); AP = Utils::Multiply(*A, false, *P, false, AP, GetOStream(Statistics2),true,true,std::string("MueLu::A*P-")+levelstr.str()); } if (pL.get<bool>("Keep AP Pattern")) Set(coarseLevel, "AP Pattern", AP); // Reuse coarse matrix memory if available (multiple solve) if (coarseLevel.IsAvailable("RAP Pattern", this)) { GetOStream(Runtime0) << "Ac: Using previous RAP pattern" << std::endl; Ac = Get< RCP<Matrix> >(coarseLevel, "RAP Pattern"); // Some eigenvalue may have been cached with the matrix in the previous run. // As the matrix values will be updated, we need to reset the eigenvalue. Ac->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one()); } // If we do not modify matrix later, allow optimization of storage. // This is necessary for new faster Epetra MM kernels. bool doOptimizeStorage = !pL.get<bool>("RepairMainDiagonal"); const bool doTranspose = true; const bool doFillComplete = true; if (pL.get<bool>("transpose: use implicit") == true) { SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel); Ac = Utils::Multiply(*P, doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), doFillComplete, doOptimizeStorage,std::string("MueLu::R*(AP)-implicit-")+levelstr.str()); } else { RCP<Matrix> R = Get< RCP<Matrix> >(coarseLevel, "R"); SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel); Ac = Utils::Multiply(*R, !doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), doFillComplete, doOptimizeStorage,std::string("MueLu::R*(AP)-explicit-")+levelstr.str()); } CheckRepairMainDiagonal(Ac); if (IsPrint(Statistics1)) { RCP<ParameterList> params = rcp(new ParameterList());; params->set("printLoadBalancingInfo", true); params->set("printCommInfo", true); GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params); } Set(coarseLevel, "A", Ac); if (pL.get<bool>("Keep RAP Pattern")) Set(coarseLevel, "RAP Pattern", Ac); } if (transferFacts_.begin() != transferFacts_.end()) { SubFactoryMonitor m(*this, "Projections", coarseLevel); // call Build of all user-given transfer factories for (std::vector<RCP<const FactoryBase> >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { RCP<const FactoryBase> fac = *it; GetOStream(Runtime0) << "RAPFactory: call transfer factory: " << fac->description() << std::endl; fac->CallBuild(coarseLevel); // Coordinates transfer is marginally different from all other operations // because it is *optional*, and not required. For instance, we may need // coordinates only on level 4 if we start repartitioning from that level, // but we don't need them on level 1,2,3. As our current Hierarchy setup // assumes propagation of dependencies only through three levels, this // means that we need to rely on other methods to propagate optional data. // // The method currently used is through RAP transfer factories, which are // simply factories which are called at the end of RAP with a single goal: // transfer some fine data to coarser level. Because these factories are // kind of outside of the mainline factories, they behave different. In // particular, we call their Build method explicitly, rather than through // Get calls. This difference is significant, as the Get call is smart // enough to know when to release all factory dependencies, and Build is // dumb. This led to the following CoordinatesTransferFactory sequence: // 1. Request level 0 // 2. Request level 1 // 3. Request level 0 // 4. Release level 0 // 5. Release level 1 // // The problem is missing "6. Release level 0". Because it was missing, // we had outstanding request on "Coordinates", "Aggregates" and // "CoarseMap" on level 0. // // This was fixed by explicitly calling Release on transfer factories in // RAPFactory. I am still unsure how exactly it works, but now we have // clear data requests for all levels. coarseLevel.Release(*fac); } } }
void IfpackSmoother::Setup(Level ¤tLevel) { FactoryMonitor m(*this, "Setup Smoother", currentLevel); if (SmootherPrototype::IsSetup() == true) GetOStream(Warnings0, 0) << "Warning: MueLu::IfpackSmoother::Setup(): Setup() has already been called"; A_ = Factory::Get< RCP<Matrix> >(currentLevel, "A"); double lambdaMax = -1.0; if (type_ == "Chebyshev") { std::string maxEigString = "chebyshev: max eigenvalue"; std::string eigRatioString = "chebyshev: ratio eigenvalue"; try { lambdaMax = Teuchos::getValue<Scalar>(this->GetParameter(maxEigString)); this->GetOStream(Statistics1, 0) << maxEigString << " (cached with smoother parameter list) = " << lambdaMax << std::endl; } catch (Teuchos::Exceptions::InvalidParameterName) { lambdaMax = A_->GetMaxEigenvalueEstimate(); if (lambdaMax != -1.0) { this->GetOStream(Statistics1, 0) << maxEigString << " (cached with matrix) = " << lambdaMax << std::endl; this->SetParameter(maxEigString, ParameterEntry(lambdaMax)); } } // Calculate the eigenvalue ratio const Scalar defaultEigRatio = 20; Scalar ratio = defaultEigRatio; try { ratio = Teuchos::getValue<Scalar>(this->GetParameter(eigRatioString)); } catch (Teuchos::Exceptions::InvalidParameterName) { this->SetParameter(eigRatioString, ParameterEntry(ratio)); } if (currentLevel.GetLevelID()) { // Update ratio to be // ratio = max(number of fine DOFs / number of coarse DOFs, defaultValue) // // NOTE: We don't need to request previous level matrix as we know for sure it was constructed RCP<const Matrix> fineA = currentLevel.GetPreviousLevel()->Get<RCP<Matrix> >("A"); size_t nRowsFine = fineA->getGlobalNumRows(); size_t nRowsCoarse = A_->getGlobalNumRows(); ratio = std::max(ratio, as<Scalar>(nRowsFine)/nRowsCoarse); this->GetOStream(Statistics1, 0) << eigRatioString << " (computed) = " << ratio << std::endl; this->SetParameter(eigRatioString, ParameterEntry(ratio)); } } RCP<Epetra_CrsMatrix> epA = Utils::Op2NonConstEpetraCrs(A_); Ifpack factory; prec_ = rcp(factory.Create(type_, &(*epA), overlap_)); TEUCHOS_TEST_FOR_EXCEPTION(prec_.is_null(), Exceptions::RuntimeError, "Could not create an Ifpack preconditioner with type = \"" << type_ << "\""); SetPrecParameters(); prec_->Compute(); SmootherPrototype::IsSetup(true); if (type_ == "Chebyshev" && lambdaMax == -1.0) { Teuchos::RCP<Ifpack_Chebyshev> chebyPrec = rcp_dynamic_cast<Ifpack_Chebyshev>(prec_); if (chebyPrec != Teuchos::null) { lambdaMax = chebyPrec->GetLambdaMax(); A_->SetMaxEigenvalueEstimate(lambdaMax); this->GetOStream(Statistics1, 0) << "chebyshev: max eigenvalue (calculated by Ifpack)" << " = " << lambdaMax << std::endl; } TEUCHOS_TEST_FOR_EXCEPTION(lambdaMax == -1.0, Exceptions::RuntimeError, "MueLu::IfpackSmoother::Setup(): no maximum eigenvalue estimate"); } this->GetOStream(Statistics0, 0) << description() << std::endl; }
void SaPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BuildP(Level &fineLevel, Level &coarseLevel) const { FactoryMonitor m(*this, "Prolongator smoothing", coarseLevel); std::ostringstream levelstr; levelstr << coarseLevel.GetLevelID(); typedef typename Teuchos::ScalarTraits<SC>::magnitudeType Magnitude; // Get default tentative prolongator factory // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory. // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere! RCP<const FactoryBase> initialPFact = GetFactory("P"); if (initialPFact == Teuchos::null) { initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); } // Level Get RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel, "A"); RCP<Matrix> Ptent = coarseLevel.Get< RCP<Matrix> >("P", initialPFact.get()); if(restrictionMode_) { SubFactoryMonitor m2(*this, "Transpose A", coarseLevel); A = Utils2::Transpose(*A, true); // build transpose of A explicitely } //Build final prolongator RCP<Matrix> finalP; // output const ParameterList & pL = GetParameterList(); Scalar dampingFactor = as<Scalar>(pL.get<double>("sa: damping factor")); LO maxEigenIterations = as<LO>(pL.get<int>("sa: eigenvalue estimate num iterations")); bool estimateMaxEigen = pL.get<bool>("sa: calculate eigenvalue estimate"); if (dampingFactor != Teuchos::ScalarTraits<Scalar>::zero()) { Scalar lambdaMax; { SubFactoryMonitor m2(*this, "Eigenvalue estimate", coarseLevel); lambdaMax = A->GetMaxEigenvalueEstimate(); if (lambdaMax == -Teuchos::ScalarTraits<SC>::one() || estimateMaxEigen) { GetOStream(Statistics1) << "Calculating max eigenvalue estimate now (max iters = "<< maxEigenIterations << ")" << std::endl; Magnitude stopTol = 1e-4; lambdaMax = Utils::PowerMethod(*A, true, maxEigenIterations, stopTol); A->SetMaxEigenvalueEstimate(lambdaMax); } else { GetOStream(Statistics1) << "Using cached max eigenvalue estimate" << std::endl; } GetOStream(Statistics0) << "Prolongator damping factor = " << dampingFactor/lambdaMax << " (" << dampingFactor << " / " << lambdaMax << ")" << std::endl; } { SubFactoryMonitor m2(*this, "Fused (I-omega*D^{-1} A)*Ptent", coarseLevel); Teuchos::RCP<Vector> invDiag = Utils::GetMatrixDiagonalInverse(*A); SC omega = dampingFactor / lambdaMax; // finalP = Ptent + (I - \omega D^{-1}A) Ptent finalP = Utils::Jacobi(omega, *invDiag, *A, *Ptent, finalP, GetOStream(Statistics2),std::string("MueLu::SaP-")+levelstr.str()); } } else { finalP = Ptent; } // Level Set if (!restrictionMode_) { // prolongation factory is in prolongation mode Set(coarseLevel, "P", finalP); // NOTE: EXPERIMENTAL if (Ptent->IsView("stridedMaps")) finalP->CreateView("stridedMaps", Ptent); } else { // prolongation factory is in restriction mode RCP<Matrix> R = Utils2::Transpose(*finalP, true); // use Utils2 -> specialization for double Set(coarseLevel, "R", R); // NOTE: EXPERIMENTAL if (Ptent->IsView("stridedMaps")) R->CreateView("stridedMaps", Ptent, true); } if (IsPrint(Statistics1)) { RCP<ParameterList> params = rcp(new ParameterList()); params->set("printLoadBalancingInfo", true); params->set("printCommInfo", true); GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*finalP, (!restrictionMode_ ? "P" : "R"), params); } } //Build()
//! virtual void CallDeclareInput(Level & requestedLevel) const { TEUCHOS_TEST_FOR_EXCEPTION(requestedLevel.GetPreviousLevel() == Teuchos::null, Exceptions::RuntimeError, "LevelID = " << requestedLevel.GetLevelID()); DeclareInput(*requestedLevel.GetPreviousLevel(), requestedLevel); }
void RepartitionFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& currentLevel) const { FactoryMonitor m(*this, "Build", currentLevel); const Teuchos::ParameterList & pL = GetParameterList(); // Access parameters here to make sure that we set the parameter entry flag to "used" even in case of short-circuit evaluation. // TODO (JG): I don't really know if we want to do this. const int startLevel = pL.get<int> ("repartition: start level"); const LO minRowsPerProcessor = pL.get<LO> ("repartition: min rows per proc"); const double nonzeroImbalance = pL.get<double>("repartition: max imbalance"); const bool remapPartitions = pL.get<bool> ("repartition: remap parts"); // TODO: We only need a CrsGraph. This class does not have to be templated on Scalar types. RCP<Matrix> A = Get< RCP<Matrix> >(currentLevel, "A"); // ====================================================================================================== // Determine whether partitioning is needed // ====================================================================================================== // NOTE: most tests include some global communication, which is why we currently only do tests until we make // a decision on whether to repartition. However, there is value in knowing how "close" we are to having to // rebalance an operator. So, it would probably be beneficial to do and report *all* tests. // Test1: skip repartitioning if current level is less than the specified minimum level for repartitioning if (currentLevel.GetLevelID() < startLevel) { GetOStream(Statistics0) << "Repartitioning? NO:" << "\n current level = " << Teuchos::toString(currentLevel.GetLevelID()) << ", first level where repartitioning can happen is " + Teuchos::toString(startLevel) << std::endl; Set<RCP<const Import> >(currentLevel, "Importer", Teuchos::null); return; } RCP<const Map> rowMap = A->getRowMap(); // NOTE: Teuchos::MPIComm::duplicate() calls MPI_Bcast inside, so this is // a synchronization point. However, as we do MueLu_sumAll afterwards anyway, it // does not matter. RCP<const Teuchos::Comm<int> > origComm = rowMap->getComm(); RCP<const Teuchos::Comm<int> > comm = origComm->duplicate(); // Test 2: check whether A is actually distributed, i.e. more than one processor owns part of A // TODO: this global communication can be avoided if we store the information with the matrix (it is known when matrix is created) // TODO: further improvements could be achieved when we use subcommunicator for the active set. Then we only need to check its size { int numActiveProcesses = 0; MueLu_sumAll(comm, Teuchos::as<int>((A->getNodeNumRows() > 0) ? 1 : 0), numActiveProcesses); if (numActiveProcesses == 1) { GetOStream(Statistics0) << "Repartitioning? NO:" << "\n # processes with rows = " << Teuchos::toString(numActiveProcesses) << std::endl; Set<RCP<const Import> >(currentLevel, "Importer", Teuchos::null); return; } } bool test3 = false, test4 = false; std::string msg3, msg4; // Test3: check whether number of rows on any processor satisfies the minimum number of rows requirement // NOTE: Test2 ensures that repartitionning is not done when there is only one processor (it may or may not satisfy Test3) if (minRowsPerProcessor > 0) { LO numMyRows = Teuchos::as<LO>(A->getNodeNumRows()), minNumRows, LOMAX = Teuchos::OrdinalTraits<LO>::max(); LO haveFewRows = (numMyRows < minRowsPerProcessor ? 1 : 0), numWithFewRows = 0; MueLu_sumAll(comm, haveFewRows, numWithFewRows); MueLu_minAll(comm, (numMyRows > 0 ? numMyRows : LOMAX), minNumRows); // TODO: we could change it to repartition only if the number of processors with numRows < minNumRows is larger than some // percentage of the total number. This way, we won't repartition if 2 out of 1000 processors don't have enough elements. // I'm thinking maybe 20% threshold. To implement, simply add " && numWithFewRows < .2*numProcs" to the if statement. if (numWithFewRows > 0) test3 = true; msg3 = "\n min # rows per proc = " + Teuchos::toString(minNumRows) + ", min allowable = " + Teuchos::toString(minRowsPerProcessor); } // Test4: check whether the balance in the number of nonzeros per processor is greater than threshold if (!test3) { GO minNnz, maxNnz, numMyNnz = Teuchos::as<GO>(A->getNodeNumEntries()); MueLu_maxAll(comm, numMyNnz, maxNnz); MueLu_minAll(comm, (numMyNnz > 0 ? numMyNnz : maxNnz), minNnz); // min nnz over all active processors double imbalance = Teuchos::as<double>(maxNnz)/minNnz; if (imbalance > nonzeroImbalance) test4 = true; msg4 = "\n nonzero imbalance = " + Teuchos::toString(imbalance) + ", max allowable = " + Teuchos::toString(nonzeroImbalance); } if (!test3 && !test4) { GetOStream(Statistics0) << "Repartitioning? NO:" << msg3 + msg4 << std::endl; Set<RCP<const Import> >(currentLevel, "Importer", Teuchos::null); return; } GetOStream(Statistics0) << "Repartitioning? YES:" << msg3 + msg4 << std::endl; GO indexBase = rowMap->getIndexBase(); Xpetra::UnderlyingLib lib = rowMap->lib(); int myRank = comm->getRank(); int numProcs = comm->getSize(); RCP<const Teuchos::MpiComm<int> > tmpic = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm); TEUCHOS_TEST_FOR_EXCEPTION(tmpic == Teuchos::null, Exceptions::RuntimeError, "Cannot cast base Teuchos::Comm to Teuchos::MpiComm object."); RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm(); // ====================================================================================================== // Calculate number of partitions // ====================================================================================================== // FIXME Quick way to figure out how many partitions there should be (same algorithm as ML) // FIXME Should take into account nnz? Perhaps only when user is using min #nnz per row threshold. GO numPartitions; if (currentLevel.IsAvailable("number of partitions")) { numPartitions = currentLevel.Get<GO>("number of partitions"); GetOStream(Warnings0) << "Using user-provided \"number of partitions\", the performance is unknown" << std::endl; } else { if (Teuchos::as<GO>(A->getGlobalNumRows()) < minRowsPerProcessor) { // System is too small, migrate it to a single processor numPartitions = 1; } else { // Make sure that each processor has approximately minRowsPerProcessor numPartitions = A->getGlobalNumRows() / minRowsPerProcessor; } numPartitions = std::min(numPartitions, Teuchos::as<GO>(numProcs)); currentLevel.Set("number of partitions", numPartitions, NoFactory::get()); } GetOStream(Statistics0) << "Number of partitions to use = " << numPartitions << std::endl; // ====================================================================================================== // Construct decomposition vector // ====================================================================================================== RCP<GOVector> decomposition; if (numPartitions == 1) { // Trivial case: decomposition is the trivial one, all zeros. We skip the call to Zoltan_Interface // (this is mostly done to avoid extra output messages, as even if we didn't skip there is a shortcut // in Zoltan[12]Interface). // TODO: We can probably skip more work in this case (like building all extra data structures) GetOStream(Warnings0) << "Only one partition: Skip call to the repartitioner." << std::endl; decomposition = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(A->getRowMap(), true); } else { decomposition = Get<RCP<GOVector> >(currentLevel, "Partition"); if (decomposition.is_null()) { GetOStream(Warnings0) << "No repartitioning necessary: partitions were left unchanged by the repartitioner" << std::endl; Set<RCP<const Import> >(currentLevel, "Importer", Teuchos::null); return; } } // ====================================================================================================== // Remap if necessary // ====================================================================================================== // From a user perspective, we want user to not care about remapping, thinking of it as only a performance feature. // There are two problems, however. // (1) Next level aggregation depends on the order of GIDs in the vector, if one uses "natural" or "random" orderings. // This also means that remapping affects next level aggregation, despite the fact that the _set_ of GIDs for // each partition is the same. // (2) Even with the fixed order of GIDs, the remapping may influence the aggregation for the next-next level. // Let us consider the following example. Lets assume that when we don't do remapping, processor 0 would have // GIDs {0,1,2}, and processor 1 GIDs {3,4,5}, and if we do remapping processor 0 would contain {3,4,5} and // processor 1 {0,1,2}. Now, when we run repartitioning algorithm on the next level (say Zoltan1 RCB), it may // be dependent on whether whether it is [{0,1,2}, {3,4,5}] or [{3,4,5}, {0,1,2}]. Specifically, the tie-breaking // algorithm can resolve these differently. For instance, running // mpirun -np 5 ./MueLu_ScalingTestParamList.exe --xml=easy_sa.xml --nx=12 --ny=12 --nz=12 // with // <ParameterList name="MueLu"> // <Parameter name="coarse: max size" type="int" value="1"/> // <Parameter name="repartition: enable" type="bool" value="true"/> // <Parameter name="repartition: min rows per proc" type="int" value="2"/> // <ParameterList name="level 1"> // <Parameter name="repartition: remap parts" type="bool" value="false/true"/> // </ParameterList> // </ParameterList> // produces different repartitioning for level 2. // This different repartitioning may then escalate into different aggregation for the next level. // // We fix (1) by fixing the order of GIDs in a vector by sorting the resulting vector. // Fixing (2) is more complicated. // FIXME: Fixing (2) in Zoltan may not be enough, as we may use some arbitration in MueLu, // for instance with CoupledAggregation. What we really need to do is to use the same order of processors containing // the same order of GIDs. To achieve that, the newly created subcommunicator must be conforming with the order. For // instance, if we have [{0,1,2}, {3,4,5}], we create a subcommunicator where processor 0 gets rank 0, and processor 1 // gets rank 1. If, on the other hand, we have [{3,4,5}, {0,1,2}], we assign rank 1 to processor 0, and rank 0 to processor 1. // This rank permutation requires help from Epetra/Tpetra, both of which have no such API in place. // One should also be concerned that if we had such API in place, rank 0 in subcommunicator may no longer be rank 0 in // MPI_COMM_WORLD, which may lead to issues for logging. if (remapPartitions) { SubFactoryMonitor m1(*this, "DeterminePartitionPlacement", currentLevel); DeterminePartitionPlacement(*A, *decomposition, numPartitions); } // ====================================================================================================== // Construct importer // ====================================================================================================== // At this point, the following is true: // * Each processors owns 0 or 1 partitions // * If a processor owns a partition, that partition number is equal to the processor rank // * The decomposition vector contains the partitions ids that the corresponding GID belongs to ArrayRCP<const GO> decompEntries; if (decomposition->getLocalLength() > 0) decompEntries = decomposition->getData(0); #ifdef HAVE_MUELU_DEBUG // Test range of partition ids int incorrectRank = -1; for (int i = 0; i < decompEntries.size(); i++) if (decompEntries[i] >= numProcs || decompEntries[i] < 0) { incorrectRank = myRank; break; } int incorrectGlobalRank = -1; MueLu_maxAll(comm, incorrectRank, incorrectGlobalRank); TEUCHOS_TEST_FOR_EXCEPTION(incorrectGlobalRank >- 1, Exceptions::RuntimeError, "pid " + Teuchos::toString(incorrectGlobalRank) + " encountered a partition number is that out-of-range"); #endif Array<GO> myGIDs; myGIDs.reserve(decomposition->getLocalLength()); // Step 0: Construct mapping // part number -> GIDs I own which belong to this part // NOTE: my own part GIDs are not part of the map typedef std::map<GO, Array<GO> > map_type; map_type sendMap; for (LO i = 0; i < decompEntries.size(); i++) { GO id = decompEntries[i]; GO GID = rowMap->getGlobalElement(i); if (id == myRank) myGIDs .push_back(GID); else sendMap[id].push_back(GID); } decompEntries = Teuchos::null; if (IsPrint(Statistics2)) { GO numLocalKept = myGIDs.size(), numGlobalKept, numGlobalRows = A->getGlobalNumRows(); MueLu_sumAll(comm,numLocalKept, numGlobalKept); GetOStream(Statistics2) << "Unmoved rows: " << numGlobalKept << " / " << numGlobalRows << " (" << 100*Teuchos::as<double>(numGlobalKept)/numGlobalRows << "%)" << std::endl; } int numSend = sendMap.size(), numRecv; // Arrayify map keys Array<GO> myParts(numSend), myPart(1); int cnt = 0; myPart[0] = myRank; for (typename map_type::const_iterator it = sendMap.begin(); it != sendMap.end(); it++) myParts[cnt++] = it->first; // Step 1: Find out how many processors send me data // partsIndexBase starts from zero, as the processors ids start from zero GO partsIndexBase = 0; RCP<Map> partsIHave = MapFactory ::Build(lib, Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), myParts(), partsIndexBase, comm); RCP<Map> partsIOwn = MapFactory ::Build(lib, numProcs, myPart(), partsIndexBase, comm); RCP<Export> partsExport = ExportFactory::Build(partsIHave, partsIOwn); RCP<GOVector> partsISend = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(partsIHave); RCP<GOVector> numPartsIRecv = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(partsIOwn); if (numSend) { ArrayRCP<GO> partsISendData = partsISend->getDataNonConst(0); for (int i = 0; i < numSend; i++) partsISendData[i] = 1; } (numPartsIRecv->getDataNonConst(0))[0] = 0; numPartsIRecv->doExport(*partsISend, *partsExport, Xpetra::ADD); numRecv = (numPartsIRecv->getData(0))[0]; // Step 2: Get my GIDs from everybody else MPI_Datatype MpiType = MpiTypeTraits<GO>::getType(); int msgTag = 12345; // TODO: use Comm::dup for all internal messaging // Post sends Array<MPI_Request> sendReqs(numSend); cnt = 0; for (typename map_type::iterator it = sendMap.begin(); it != sendMap.end(); it++) MPI_Isend(static_cast<void*>(it->second.getRawPtr()), it->second.size(), MpiType, Teuchos::as<GO>(it->first), msgTag, *rawMpiComm, &sendReqs[cnt++]); map_type recvMap; size_t totalGIDs = myGIDs.size(); for (int i = 0; i < numRecv; i++) { MPI_Status status; MPI_Probe(MPI_ANY_SOURCE, msgTag, *rawMpiComm, &status); // Get rank and number of elements from status int fromRank = status.MPI_SOURCE, count; MPI_Get_count(&status, MpiType, &count); recvMap[fromRank].resize(count); MPI_Recv(static_cast<void*>(recvMap[fromRank].getRawPtr()), count, MpiType, fromRank, msgTag, *rawMpiComm, &status); totalGIDs += count; } // Do waits on send requests if (numSend) { Array<MPI_Status> sendStatuses(numSend); MPI_Waitall(numSend, sendReqs.getRawPtr(), sendStatuses.getRawPtr()); } // Merge GIDs myGIDs.reserve(totalGIDs); for (typename map_type::const_iterator it = recvMap.begin(); it != recvMap.end(); it++) { int offset = myGIDs.size(), len = it->second.size(); if (len) { myGIDs.resize(offset + len); memcpy(myGIDs.getRawPtr() + offset, it->second.getRawPtr(), len*sizeof(GO)); } } // NOTE 2: The general sorting algorithm could be sped up by using the knowledge that original myGIDs and all received chunks // (i.e. it->second) are sorted. Therefore, a merge sort would work well in this situation. std::sort(myGIDs.begin(), myGIDs.end()); // Step 3: Construct importer RCP<Map> newRowMap = MapFactory ::Build(lib, rowMap->getGlobalNumElements(), myGIDs(), indexBase, origComm); RCP<const Import> rowMapImporter; { SubFactoryMonitor m1(*this, "Import construction", currentLevel); rowMapImporter = ImportFactory::Build(rowMap, newRowMap); } Set(currentLevel, "Importer", rowMapImporter); // ====================================================================================================== // Print some data // ====================================================================================================== if (pL.get<bool>("repartition: print partition distribution") && IsPrint(Statistics2)) { // Print the grid of processors GetOStream(Statistics2) << "Partition distribution over cores (ownership is indicated by '+')" << std::endl; char amActive = (myGIDs.size() ? 1 : 0); std::vector<char> areActive(numProcs, 0); MPI_Gather(&amActive, 1, MPI_CHAR, &areActive[0], 1, MPI_CHAR, 0, *rawMpiComm); int rowWidth = std::min(Teuchos::as<int>(ceil(sqrt(numProcs))), 100); for (int proc = 0; proc < numProcs; proc += rowWidth) { for (int j = 0; j < rowWidth; j++) if (proc + j < numProcs) GetOStream(Statistics2) << (areActive[proc + j] ? "+" : "."); else GetOStream(Statistics2) << " "; GetOStream(Statistics2) << " " << proc << ":" << std::min(proc + rowWidth, numProcs) - 1 << std::endl; } } } // Build
void RebalanceTransferFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level& fineLevel, Level& coarseLevel) const { FactoryMonitor m(*this, "Build", coarseLevel); const ParameterList& pL = GetParameterList(); int implicit = !pL.get<bool>("repartition: rebalance P and R"); int writeStart = pL.get<int> ("write start"); int writeEnd = pL.get<int> ("write end"); if (writeStart == 0 && fineLevel.GetLevelID() == 0 && writeStart <= writeEnd && IsAvailable(fineLevel, "Coordinates")) { std::string fileName = "coordinates_level_0.m"; RCP<MultiVector> fineCoords = fineLevel.Get< RCP<MultiVector> >("Coordinates"); if (fineCoords != Teuchos::null) Utils::Write(fileName, *fineCoords); } RCP<const Import> importer = Get<RCP<const Import> >(coarseLevel, "Importer"); if (implicit) { // Save the importer, we'll need it for solve coarseLevel.Set("Importer", importer, NoFactory::get()); } RCP<ParameterList> params = rcp(new ParameterList());; params->set("printLoadBalancingInfo", true); params->set("printCommInfo", true); std::string transferType = pL.get<std::string>("type"); if (transferType == "Interpolation") { RCP<Matrix> originalP = Get< RCP<Matrix> >(coarseLevel, "P"); { // This line must be after the Get call SubFactoryMonitor m1(*this, "Rebalancing prolongator", coarseLevel); if (implicit || importer.is_null()) { GetOStream(Runtime0) << "Using original prolongator" << std::endl; Set(coarseLevel, "P", originalP); } else { // P is the transfer operator from the coarse grid to the fine grid. // P must transfer the data from the newly reordered coarse A to the // (unchanged) fine A. This means that the domain map (coarse) of P // must be changed according to the new partition. The range map // (fine) is kept unchanged. // // The domain map of P must match the range map of R. See also note // below about domain/range map of R and its implications for P. // // To change the domain map of P, P needs to be fillCompleted again // with the new domain map. To achieve this, P is copied into a new // matrix that is not fill-completed. The doImport() operation is // just used here to make a copy of P: the importer is trivial and // there is no data movement involved. The reordering actually // happens during the fillComplete() with domainMap == importer->getTargetMap(). RCP<Matrix> rebalancedP = originalP; RCP<const CrsMatrixWrap> crsOp = rcp_dynamic_cast<const CrsMatrixWrap>(originalP); TEUCHOS_TEST_FOR_EXCEPTION(crsOp == Teuchos::null, Exceptions::BadCast, "Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed"); RCP<CrsMatrix> rebalancedP2 = crsOp->getCrsMatrix(); TEUCHOS_TEST_FOR_EXCEPTION(rebalancedP2 == Teuchos::null, std::runtime_error, "Xpetra::CrsMatrixWrap doesn't have a CrsMatrix"); { SubFactoryMonitor subM(*this, "Rebalancing prolongator -- fast map replacement", coarseLevel); RCP<const Import> newImporter = ImportFactory::Build(importer->getTargetMap(), rebalancedP->getColMap()); rebalancedP2->replaceDomainMapAndImporter(importer->getTargetMap(), newImporter); } ///////////////////////// EXPERIMENTAL // TODO FIXME somehow we have to transfer the striding information of the permuted domain/range maps. // That is probably something for an external permutation factory // if (originalP->IsView("stridedMaps")) // rebalancedP->CreateView("stridedMaps", originalP); ///////////////////////// EXPERIMENTAL Set(coarseLevel, "P", rebalancedP); if (IsPrint(Statistics1)) GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*rebalancedP, "P (rebalanced)", params); } } if (importer.is_null()) { if (IsAvailable(coarseLevel, "Nullspace")) Set(coarseLevel, "Nullspace", Get<RCP<MultiVector> >(coarseLevel, "Nullspace")); if (pL.isParameter("Coordinates") && pL.get< RCP<const FactoryBase> >("Coordinates") != Teuchos::null) if (IsAvailable(coarseLevel, "Coordinates")) Set(coarseLevel, "Coordinates", Get< RCP<MultiVector> >(coarseLevel, "Coordinates")); return; } if (pL.isParameter("Coordinates") && pL.get< RCP<const FactoryBase> >("Coordinates") != Teuchos::null && IsAvailable(coarseLevel, "Coordinates")) { RCP<MultiVector> coords = Get<RCP<MultiVector> >(coarseLevel, "Coordinates"); // This line must be after the Get call SubFactoryMonitor subM(*this, "Rebalancing coordinates", coarseLevel); LO nodeNumElts = coords->getMap()->getNodeNumElements(); // If a process has no matrix rows, then we can't calculate blocksize using the formula below. LO myBlkSize = 0, blkSize = 0; if (nodeNumElts > 0) myBlkSize = importer->getSourceMap()->getNodeNumElements() / nodeNumElts; maxAll(coords->getMap()->getComm(), myBlkSize, blkSize); RCP<const Import> coordImporter; if (blkSize == 1) { coordImporter = importer; } else { // NOTE: there is an implicit assumption here: we assume that dof any node are enumerated consequently // Proper fix would require using decomposition similar to how we construct importer in the // RepartitionFactory RCP<const Map> origMap = coords->getMap(); GO indexBase = origMap->getIndexBase(); ArrayView<const GO> OEntries = importer->getTargetMap()->getNodeElementList(); LO numEntries = OEntries.size()/blkSize; ArrayRCP<GO> Entries(numEntries); for (LO i = 0; i < numEntries; i++) Entries[i] = (OEntries[i*blkSize]-indexBase)/blkSize + indexBase; RCP<const Map> targetMap = MapFactory::Build(origMap->lib(), origMap->getGlobalNumElements(), Entries(), indexBase, origMap->getComm()); coordImporter = ImportFactory::Build(origMap, targetMap); } RCP<MultiVector> permutedCoords = MultiVectorFactory::Build(coordImporter->getTargetMap(), coords->getNumVectors()); permutedCoords->doImport(*coords, *coordImporter, Xpetra::INSERT); if (pL.get<bool>("useSubcomm") == true) permutedCoords->replaceMap(permutedCoords->getMap()->removeEmptyProcesses()); Set(coarseLevel, "Coordinates", permutedCoords); std::string fileName = "rebalanced_coordinates_level_" + toString(coarseLevel.GetLevelID()) + ".m"; if (writeStart <= coarseLevel.GetLevelID() && coarseLevel.GetLevelID() <= writeEnd && permutedCoords->getMap() != Teuchos::null) Utils::Write(fileName, *permutedCoords); } if (IsAvailable(coarseLevel, "Nullspace")) { RCP<MultiVector> nullspace = Get< RCP<MultiVector> >(coarseLevel, "Nullspace"); // This line must be after the Get call SubFactoryMonitor subM(*this, "Rebalancing nullspace", coarseLevel); RCP<MultiVector> permutedNullspace = MultiVectorFactory::Build(importer->getTargetMap(), nullspace->getNumVectors()); permutedNullspace->doImport(*nullspace, *importer, Xpetra::INSERT); if (pL.get<bool>("useSubcomm") == true) permutedNullspace->replaceMap(permutedNullspace->getMap()->removeEmptyProcesses()); Set(coarseLevel, "Nullspace", permutedNullspace); } } else { if (pL.get<bool>("transpose: use implicit") == false) { RCP<Matrix> originalR = Get< RCP<Matrix> >(coarseLevel, "R"); SubFactoryMonitor m2(*this, "Rebalancing restriction", coarseLevel); if (implicit || importer.is_null()) { GetOStream(Runtime0) << "Using original restrictor" << std::endl; Set(coarseLevel, "R", originalR); } else { RCP<Matrix> rebalancedR; { SubFactoryMonitor subM(*this, "Rebalancing restriction -- fusedImport", coarseLevel); RCP<Map> dummy; // meaning: use originalR's domain map. rebalancedR = MatrixFactory::Build(originalR, *importer, dummy, importer->getTargetMap()); } Set(coarseLevel, "R", rebalancedR); ///////////////////////// EXPERIMENTAL // TODO FIXME somehow we have to transfer the striding information of the permuted domain/range maps. // That is probably something for an external permutation factory // if (originalR->IsView("stridedMaps")) // rebalancedR->CreateView("stridedMaps", originalR); ///////////////////////// EXPERIMENTAL if (IsPrint(Statistics1)) GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*rebalancedR, "R (rebalanced)", params); } } } }
void NullspacePresmoothFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::DeclareInput(Level ¤tLevel) const { Input(currentLevel, "Nullspace"); if (currentLevel.GetLevelID() == 0) Input(currentLevel, "A"); }
//! virtual void CallBuild(Level & requestedLevel) const { #ifdef HAVE_MUELU_DEBUG TEUCHOS_TEST_FOR_EXCEPTION((multipleCallCheck_ == ENABLED) && (multipleCallCheckGlobal_ == ENABLED) && (lastLevel_ == &requestedLevel), Exceptions::RuntimeError, this->ShortClassName() << "::Build() called twice for the same level (levelID=" << requestedLevel.GetLevelID() << "). This is likely due to a configuration error."); if (multipleCallCheck_ == FIRSTCALL) multipleCallCheck_ = ENABLED; lastLevel_ = &requestedLevel; #endif TEUCHOS_TEST_FOR_EXCEPTION(requestedLevel.GetPreviousLevel() == Teuchos::null, Exceptions::RuntimeError, "LevelID = " << requestedLevel.GetLevelID()); Build(*requestedLevel.GetPreviousLevel(), requestedLevel); }
void CoarseMapFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level ¤tLevel) const { FactoryMonitor m(*this, "Build", currentLevel); RCP<Aggregates> aggregates = Get< RCP<Aggregates> >(currentLevel, "Aggregates"); RCP<MultiVector> nullspace = Get< RCP<MultiVector> >(currentLevel, "Nullspace"); GlobalOrdinal numAggs = aggregates->GetNumAggregates(); const size_t NSDim = nullspace->getNumVectors(); RCP<const Teuchos::Comm<int> > comm = aggregates->GetMap()->getComm(); // read in offset information from parameter list and fill the internal member variable GlobalOrdinal domainGidOffset = 0; std::vector<GlobalOrdinal> domainGidOffsets; domainGidOffsets.clear(); const ParameterList & pL = GetParameterList(); if(pL.isParameter("Domain GID offsets")) { std::string strDomainGIDs = pL.get<std::string>("Domain GID offsets"); if(strDomainGIDs.empty() == false) { Teuchos::Array<GlobalOrdinal> arrayVal = Teuchos::fromStringToArray<GlobalOrdinal>(strDomainGIDs); domainGidOffsets = Teuchos::createVector(arrayVal); if(currentLevel.GetLevelID() < Teuchos::as<int>(domainGidOffsets.size()) ) { domainGidOffset = domainGidOffsets[currentLevel.GetLevelID()]; } } } LocalOrdinal stridedBlockId = pL.get<LocalOrdinal>("Strided block id"); // read in stridingInfo from parameter list and fill the internal member variable // read the data only if the parameter "Striding info" exists and is non-empty //const ParameterList & pL = GetParameterList(); if(pL.isParameter("Striding info")) { std::string strStridingInfo = pL.get<std::string>("Striding info"); if(strStridingInfo.empty() == false) { Teuchos::Array<size_t> arrayVal = Teuchos::fromStringToArray<size_t>(strStridingInfo); stridingInfo_ = Teuchos::createVector(arrayVal); } } // check for consistency of striding information with NSDim and nCoarseDofs if (stridedBlockId== -1) { // this means we have no real strided map but only a block map with constant blockSize "NSDim" TEUCHOS_TEST_FOR_EXCEPTION(stridingInfo_.size() > 1, Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): stridingInfo_.size() but must be one"); stridingInfo_.clear(); stridingInfo_.push_back(NSDim); TEUCHOS_TEST_FOR_EXCEPTION(stridingInfo_.size() != 1, Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): stridingInfo_.size() but must be one"); } else { // stridedBlockId > -1, set by user TEUCHOS_TEST_FOR_EXCEPTION(stridedBlockId > Teuchos::as<LO>(stridingInfo_.size() - 1) , Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): it is stridingInfo_.size() <= stridedBlockId_. error."); size_t stridedBlockSize = stridingInfo_[stridedBlockId]; TEUCHOS_TEST_FOR_EXCEPTION(stridedBlockSize != NSDim , Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): dimension of strided block != NSDim. error."); } GetOStream(Statistics2) << "domainGIDOffset: " << domainGidOffset << " block size: " << getFixedBlockSize() << " stridedBlockId: " << stridedBlockId << std::endl; // number of coarse level dofs (fixed by number of aggregates and blocksize data) GlobalOrdinal nCoarseDofs = numAggs * getFixedBlockSize(); GlobalOrdinal indexBase = aggregates->GetMap()->getIndexBase(); RCP<const Map> coarseMap = StridedMapFactory::Build(aggregates->GetMap()->lib(), Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), nCoarseDofs, indexBase, stridingInfo_, comm, stridedBlockId, domainGidOffset); Set(currentLevel, "CoarseMap", coarseMap); } // Build
void RigidBodyModeFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level ¤tLevel) const { FactoryMonitor m(*this, "Rigid body mode factory", currentLevel); RCP<MultiVector> nullspace; if (currentLevel.GetLevelID() == 0) { if (currentLevel.IsAvailable(nspName_, NoFactory::get())) { nullspace = currentLevel.Get< RCP<MultiVector> >(nspName_, NoFactory::get()); GetOStream(Runtime1, 0) << "Use user-given rigid body modes " << nspName_ << ": nullspace dimension=" << nullspace->getNumVectors() << " nullspace length=" << nullspace->getGlobalLength() << std::endl; } else { RCP<Matrix> A = Get< RCP<Matrix> >(currentLevel, "A"); GetOStream(Runtime1, 0) << "Generating rigid body modes: dimension = " << numPDEs_ << std::endl; RCP<const Map> xmap=A->getDomainMap(); if(numPDEs_==1) { nullspace = MultiVectorFactory::Build(xmap, 1); } else if(numPDEs_==2) { nullspace = MultiVectorFactory::Build(xmap, 3); } else if(numPDEs_==3) { nullspace = MultiVectorFactory::Build(xmap, 6); } Scalar zero(0.0); nullspace -> putScalar(zero); RCP<MultiVector> Coords = Get< RCP<MultiVector> >(currentLevel,"Coordinates"); ArrayRCP<Scalar> xnodes, ynodes, znodes; Scalar cx, cy, cz; ArrayRCP<Scalar> nsValues0, nsValues1, nsValues2, nsValues3, nsValues4, nsValues5; int nDOFs=xmap->getNodeNumElements(); if(numPDEs_==1) { nsValues0 = nullspace->getDataNonConst(0); for(int j=0; j<nDOFs; j++) { // constant null space for scalar PDE nsValues0[j]=1.0; } } else if(numPDEs_==2) { xnodes = Coords->getDataNonConst(0); ynodes = Coords->getDataNonConst(1); cx = Coords->getVector(0)->meanValue(); cy = Coords->getVector(1)->meanValue(); nsValues0 = nullspace->getDataNonConst(0); nsValues1 = nullspace->getDataNonConst(1); nsValues2 = nullspace->getDataNonConst(2); for (int j=0; j<nDOFs; j+=numPDEs_) { // translation nsValues0[j+0] = 1.0; nsValues1[j+1] = 1.0; // rotate around z-axis (x-y plane) nsValues2[j+0] = -(ynodes[j]-cy); nsValues2[j+1] = (xnodes[j]-cx); } } else if(numPDEs_==3) { xnodes = Coords->getDataNonConst(0); ynodes = Coords->getDataNonConst(1); znodes = Coords->getDataNonConst(2); cx = Coords->getVector(0)->meanValue(); cy = Coords->getVector(1)->meanValue(); cz = Coords->getVector(2)->meanValue(); nsValues0 = nullspace->getDataNonConst(0); nsValues1 = nullspace->getDataNonConst(1); nsValues2 = nullspace->getDataNonConst(2); nsValues3 = nullspace->getDataNonConst(3); nsValues4 = nullspace->getDataNonConst(4); nsValues5 = nullspace->getDataNonConst(5); for (int j=0; j<nDOFs; j+=numPDEs_) { // translation nsValues0[j+0] = 1.0; nsValues1[j+1] = 1.0; nsValues2[j+2] = 1.0; // rotate around z-axis (x-y plane) nsValues3[j+0] = -(ynodes[j]-cy); nsValues3[j+1] = (xnodes[j]-cx); // rotate around x-axis (y-z plane) nsValues4[j+1] = -(znodes[j]-cz); nsValues4[j+2] = (ynodes[j]-cy); // rotate around y-axis (x-z plane) nsValues5[j+0] = (znodes[j]-cz); nsValues5[j+2] = -(xnodes[j]-cx); } } } // end if "Nullspace" not available } else { nullspace = currentLevel.Get< RCP<MultiVector> >("Nullspace", GetFactory(nspName_).get()); } Set(currentLevel, "Nullspace", nullspace); }
/*! @brief Constructor @param[in] object Reference to the class instance that is creating this SubMonitor. @param[in] msg String that indicates what the SubMonitor is monitoring, e.g., "Build". @param[in] level The MueLu Level object. @param[in] msgLevel Governs whether information should be printed. @param[in] timerLevel Governs whether timing information should be *gathered*. Setting this to NoTimeReport prevents the creation of timers. TODO: code factorization */ FactoryMonitor(const BaseClass& object, const std::string & msg, const Level & level, MsgType msgLevel = static_cast<MsgType>(Test | Runtime0), MsgType timerLevel = Timings0) : Monitor(object, msg, msgLevel, timerLevel), timerMonitorExclusive_(object, object.ShortClassName() + " : " + msg, timerLevel) { if (IsPrint(TimingsByLevel)) { levelTimeMonitor_ = rcp(new TimeMonitor(object, object.ShortClassName() + ": " + msg + " (total, level=" + Teuchos::Utils::toString(level.GetLevelID()) + ")", timerLevel)); levelTimeMonitorExclusive_ = rcp(new MutuallyExclusiveTimeMonitor<Level>(object, object.ShortClassName() + " " + MUELU_TIMER_AS_STRING + " : " + msg + " (level=" + Teuchos::Utils::toString(level.GetLevelID()) + ")", timerLevel)); } }
void Ifpack2Smoother<Scalar, LocalOrdinal, GlobalOrdinal, Node>::SetupChebyshev(Level& currentLevel) { if (this->IsSetup() == true) this->GetOStream(Warnings0) << "MueLu::Ifpack2Smoother::Setup(): Setup() has already been called" << std::endl; typedef Teuchos::ScalarTraits<SC> STS; SC negone = -STS::one(); SC lambdaMax = negone; { std::string maxEigString = "chebyshev: max eigenvalue"; std::string eigRatioString = "chebyshev: ratio eigenvalue"; ParameterList& paramList = const_cast<ParameterList&>(this->GetParameterList()); // Get/calculate the maximum eigenvalue if (paramList.isParameter(maxEigString)) { if (paramList.isType<double>(maxEigString)) lambdaMax = paramList.get<double>(maxEigString); else lambdaMax = paramList.get<SC>(maxEigString); this->GetOStream(Statistics1) << maxEigString << " (cached with smoother parameter list) = " << lambdaMax << std::endl; } else { lambdaMax = A_->GetMaxEigenvalueEstimate(); if (lambdaMax != negone) { this->GetOStream(Statistics1) << maxEigString << " (cached with matrix) = " << lambdaMax << std::endl; paramList.set(maxEigString, lambdaMax); } } // Calculate the eigenvalue ratio const SC defaultEigRatio = 20; SC ratio = defaultEigRatio; if (paramList.isParameter(eigRatioString)) { if (paramList.isType<double>(eigRatioString)) ratio = paramList.get<double>(eigRatioString); else ratio = paramList.get<SC>(eigRatioString); } if (currentLevel.GetLevelID()) { // Update ratio to be // ratio = max(number of fine DOFs / number of coarse DOFs, defaultValue) // // NOTE: We don't need to request previous level matrix as we know for sure it was constructed RCP<const Matrix> fineA = currentLevel.GetPreviousLevel()->Get<RCP<Matrix> >("A"); size_t nRowsFine = fineA->getGlobalNumRows(); size_t nRowsCoarse = A_->getGlobalNumRows(); SC levelRatio = as<SC>(as<float>(nRowsFine)/nRowsCoarse); if (STS::magnitude(levelRatio) > STS::magnitude(ratio)) ratio = levelRatio; } this->GetOStream(Statistics1) << eigRatioString << " (computed) = " << ratio << std::endl; paramList.set(eigRatioString, ratio); } RCP<const Tpetra::RowMatrix<SC, LO, GO, NO> > tpA = Utilities::Op2NonConstTpetraRow(A_); prec_ = Ifpack2::Factory::create(type_, tpA, overlap_); SetPrecParameters(); prec_->initialize(); prec_->compute(); if (lambdaMax == negone) { typedef Tpetra::RowMatrix<SC, LO, GO, NO> MatrixType; Teuchos::RCP<Ifpack2::Chebyshev<MatrixType> > chebyPrec = rcp_dynamic_cast<Ifpack2::Chebyshev<MatrixType> >(prec_); if (chebyPrec != Teuchos::null) { lambdaMax = chebyPrec->getLambdaMaxForApply(); A_->SetMaxEigenvalueEstimate(lambdaMax); this->GetOStream(Statistics1) << "chebyshev: max eigenvalue (calculated by Ifpack2)" << " = " << lambdaMax << std::endl; } TEUCHOS_TEST_FOR_EXCEPTION(lambdaMax == negone, Exceptions::RuntimeError, "MueLu::IfpackSmoother::Setup(): no maximum eigenvalue estimate"); } }
/*! @brief Constructor @param[in] object Reference to the class instance that is creating this SubMonitor. @param[in] msg String that indicates what the SubMonitor is monitoring, e.g., "Build" @param[in] level The MueLu Level object. @param[in] msgLevel Governs whether information should be printed. @param[in] timerLevel Governs whether timing information should be *gathered*. Setting this to NoTimeReport prevents the creation of timers. */ SubFactoryMonitor(const BaseClass& object, const std::string & msg, const Level & level, MsgType msgLevel = Runtime1, MsgType timerLevel = Timings1) : SubMonitor(object, msg, msgLevel, timerLevel) { if (IsPrint(TimingsByLevel)) { levelTimeMonitor_ = rcp(new TimeMonitor(object, object.ShortClassName() + ": " + msg + " (sub, total, level=" + Teuchos::Utils::toString(level.GetLevelID()) + ")", timerLevel)); } }
void CoordinatesTransferFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level & fineLevel, Level &coarseLevel) const { FactoryMonitor m(*this, "Build", coarseLevel); GetOStream(Runtime0, 0) << "Transferring coordinates" << std::endl; const ParameterList & pL = GetParameterList(); int writeStart = pL.get< int >("write start"); int writeEnd = pL.get< int >("write end"); RCP<Aggregates> aggregates = Get< RCP<Aggregates> > (fineLevel, "Aggregates"); RCP<MultiVector> fineCoords = Get< RCP<MultiVector> >(fineLevel, "Coordinates"); RCP<const Map> coarseMap = Get< RCP<const Map> > (fineLevel, "CoarseMap"); // coarseMap is being used to set up the domain map of tentative P, and therefore, the row map of Ac // Therefore, if we amalgamate coarseMap, logical nodes in the coordinates vector would correspond to // logical blocks in the matrix ArrayView<const GO> elementAList = coarseMap->getNodeElementList(); LO blkSize = 1; if (rcp_dynamic_cast<const StridedMap>(coarseMap) != Teuchos::null) blkSize = rcp_dynamic_cast<const StridedMap>(coarseMap)->getFixedBlockSize(); GO indexBase = coarseMap->getIndexBase(); size_t numElements = elementAList.size() / blkSize; Array<GO> elementList(numElements); // Amalgamate the map for (LO i = 0; i < Teuchos::as<LO>(numElements); i++) elementList[i] = (elementAList[i*blkSize]-indexBase)/blkSize + indexBase; RCP<const Map> coarseCoordMap = MapFactory ::Build(coarseMap->lib(), Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), elementList, indexBase, coarseMap->getComm()); RCP<MultiVector> coarseCoords = MultiVectorFactory::Build(coarseCoordMap, fineCoords->getNumVectors()); // Maps RCP<const Map> uniqueMap = fineCoords->getMap(); RCP<const Map> nonUniqueMap = aggregates->GetMap(); // Create overlapped fine coordinates to reduce global communication RCP<const Import> importer = ImportFactory ::Build(uniqueMap, nonUniqueMap); RCP<MultiVector> ghostedCoords = MultiVectorFactory::Build(nonUniqueMap, fineCoords->getNumVectors()); ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT); // Get some info about aggregates int myPID = uniqueMap->getComm()->getRank(); LO numAggs = aggregates->GetNumAggregates(); ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizes(); const ArrayRCP<const LO> vertex2AggID = aggregates->GetVertex2AggId()->getData(0); const ArrayRCP<const LO> procWinner = aggregates->GetProcWinner()->getData(0); // Fill in coarse coordinates for (size_t j = 0; j < fineCoords->getNumVectors(); j++) { ArrayRCP<const Scalar> fineCoordsData = ghostedCoords->getData(j); ArrayRCP<Scalar> coarseCoordsData = coarseCoords->getDataNonConst(j); for (LO lnode = 0; lnode < vertex2AggID.size(); lnode++) if (procWinner[lnode] == myPID) coarseCoordsData[vertex2AggID[lnode]] += fineCoordsData[lnode]; for (LO agg = 0; agg < numAggs; agg++) coarseCoordsData[agg] /= aggSizes[agg]; } Set<RCP<MultiVector> >(coarseLevel, "Coordinates", coarseCoords); if (writeStart == 0 && fineLevel.GetLevelID() == 0 && writeStart <= writeEnd) { std::ostringstream buf; buf << fineLevel.GetLevelID(); std::string fileName = "coordinates_before_rebalance_level_" + buf.str() + ".m"; Utils::Write(fileName,*fineCoords); } if (writeStart <= coarseLevel.GetLevelID() && coarseLevel.GetLevelID() <= writeEnd) { std::ostringstream buf; buf << coarseLevel.GetLevelID(); std::string fileName = "coordinates_before_rebalance_level_" + buf.str() + ".m"; Utils::Write(fileName,*coarseCoords); } } // Build