void RAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::CheckMainDiagonal(RCP<Matrix> & Ac) const { // plausibility check: no zeros on diagonal RCP<Vector> diagVec = VectorFactory::Build(Ac->getRowMap()); Ac->getLocalDiagCopy(*diagVec); SC zero = Teuchos::ScalarTraits<SC>::zero(), one = Teuchos::ScalarTraits<SC>::one(); LO lZeroDiags = 0; Teuchos::ArrayRCP< Scalar > diagVal = diagVec->getDataNonConst(0); for (size_t r = 0; r < Ac->getRowMap()->getNodeNumElements(); r++) { if (diagVal[r] == zero) { lZeroDiags++; if (repairZeroDiagonals_) { GO grid = Ac->getRowMap()->getGlobalElement(r); LO lcid = Ac->getColMap()->getLocalElement(grid); Teuchos::ArrayRCP<LO> indout(1, lcid); Teuchos::ArrayRCP<SC> valout(1, one); Ac->insertLocalValues(r, indout.view(0, indout.size()), valout.view(0, valout.size())); } } } if (IsPrint(Warnings0)) { const RCP<const Teuchos::Comm<int> > & comm = Ac->getRowMap()->getComm(); GO lZeroDiagsGO = Teuchos::as<GO>(lZeroDiags); /* LO->GO conversion */ GO gZeroDiags = 0; sumAll(comm, lZeroDiagsGO, gZeroDiags); if (repairZeroDiagonals_) GetOStream(Warnings0,0) << "RAPFactory (WARNING): repaired " << gZeroDiags << " zeros on main diagonal of Ac." << std::endl; else GetOStream(Warnings0,0) << "RAPFactory (WARNING): found " << gZeroDiags << " zeros on main diagonal of Ac." << std::endl; } }
void CloneRepartitionInterface<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level ¤tLevel) const { FactoryMonitor m(*this, "Build", currentLevel); currentLevel.print(GetOStream(Statistics0,0)); // extract blocked operator A from current level Teuchos::RCP<Matrix> A = Get< Teuchos::RCP<Matrix> > (currentLevel, "A"); Teuchos::RCP<const Teuchos::Comm< int > > comm = A->getRowMap()->getComm(); // number of Partitions only used for a shortcut. GO numPartitions = 0; if (currentLevel.IsAvailable("number of partitions")) { numPartitions = currentLevel.Get<GO>("number of partitions"); GetOStream(Warnings0) << "Using user-provided \"number of partitions\", the performance is unknown" << std::endl; } // ====================================================================================================== // Construct decomposition vector // ====================================================================================================== RCP<GOVector> decomposition = Teuchos::null; // extract decomposition vector decomposition = Get<RCP<GOVector> >(currentLevel, "Partition"); ArrayRCP<const GO> decompEntries = decomposition->getData(0); if (decomposition.is_null()) { GetOStream(Warnings0) << "No repartitioning necessary: partitions were left unchanged by the repartitioner" << std::endl; Set<RCP<const Import> >(currentLevel, "Importer", Teuchos::null); return; } // create new decomposition vector Teuchos::RCP<GOVector> ret = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(A->getRowMap(), false); ArrayRCP<GO> retDecompEntries = ret->getDataNonConst(0); // block size of output vector LocalOrdinal blkSize = A->GetFixedBlockSize(); // plausibility check! size_t inLocalLength = decomposition->getLocalLength(); size_t outLocalLength = A->getRowMap()->getNodeNumElements(); size_t numLocalNodes = outLocalLength / blkSize; TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(outLocalLength % blkSize) != 0, MueLu::Exceptions::RuntimeError,"CloneRepartitionInterface: inconsistent number of local DOFs (" << outLocalLength << ") and degrees of freedoms ("<<blkSize<<")"); if (numLocalNodes > 0) { TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(inLocalLength % numLocalNodes) != 0, MueLu::Exceptions::RuntimeError,"CloneRepartitionInterface: inconsistent number of local DOFs (" << inLocalLength << ") and number of local nodes (" << numLocalNodes << ")"); LocalOrdinal inBlkSize = Teuchos::as<LocalOrdinal>(inLocalLength / numLocalNodes); //TEUCHOS_TEST_FOR_EXCEPTION(blkSize != inBlkSize, MueLu::Exceptions::RuntimeError,"CloneRepartitionInterface: input block size = " << inBlkSize << " outpub block size = " << blkSize << ". They should be the same."); for(LO i = 0; i<Teuchos::as<LO>(numLocalNodes); i++) { for(LO j = 0; j < blkSize; j++) { retDecompEntries[i*blkSize + j] = Teuchos::as<GO>(decompEntries[i*inBlkSize]); } } } // end if numLocalNodes > 0 Set(currentLevel, "Partition", ret); } //Build()
const RCP<const FactoryBase> FactoryManager<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::SetAndReturnDefaultFactory(const std::string & varName, const RCP<const FactoryBase> & factory) const { TEUCHOS_TEST_FOR_EXCEPTION(factory == Teuchos::null, Exceptions::RuntimeError, ""); GetOStream(Warnings0, 0) << "Attention: No factory has been specified for building '" << varName << "'." << std::endl; GetOStream(Warnings00, 0) << " Using default factory "; { Teuchos::OSTab tab(getOStream(), 7); factory->describe(GetOStream(Warnings00), GetVerbLevel());} defaultFactoryTable_[varName] = factory; return defaultFactoryTable_[varName]; }
void TogglePFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& fineLevel, Level &coarseLevel) const { FactoryMonitor m(*this, "Prolongator toggle", coarseLevel); std::ostringstream levelstr; levelstr << coarseLevel.GetLevelID(); typedef typename Teuchos::ScalarTraits<SC>::magnitudeType Magnitude; TEUCHOS_TEST_FOR_EXCEPTION(nspFacts_.size() != prolongatorFacts_.size(), Exceptions::RuntimeError, "MueLu::TogglePFactory::Build: The number of provided prolongator factories and coarse nullspace factories must be identical."); TEUCHOS_TEST_FOR_EXCEPTION(nspFacts_.size() != 2, Exceptions::RuntimeError, "MueLu::TogglePFactory::Build: TogglePFactory needs two different transfer operator strategies for toggling."); // TODO adapt this/weaken this as soon as other toggling strategies are introduced. // decision routine which prolongator factory to be used int nProlongatorFactory = 0; // default behavior: use first prolongator in list // extract user parameters const Teuchos::ParameterList & pL = GetParameterList(); std::string mode = Teuchos::as<std::string>(pL.get<std::string>("toggle: mode")); int semicoarsen_levels = Teuchos::as<int>(pL.get<int>("semicoarsen: number of levels")); TEUCHOS_TEST_FOR_EXCEPTION(mode!="semicoarsen", Exceptions::RuntimeError, "MueLu::TogglePFactory::Build: The 'toggle: mode' parameter must be set to 'semicoarsen'. No other mode supported, yet."); LO NumZDir = -1; if(fineLevel.IsAvailable("NumZLayers", NoFactory::get())) { NumZDir = fineLevel.Get<LO>("NumZLayers", NoFactory::get()); //obtain info GetOStream(Runtime1) << "Number of layers for semicoarsening: " << NumZDir << std::endl; } // Make a decision which prolongator to be used. if(fineLevel.GetLevelID() >= semicoarsen_levels || NumZDir == 1) { nProlongatorFactory = 1; } else { nProlongatorFactory = 0; } RCP<Matrix> P = Teuchos::null; RCP<MultiVector> coarseNullspace = Teuchos::null; // call Build for selected transfer operator GetOStream(Runtime0) << "TogglePFactory: call transfer factory: " << (prolongatorFacts_[nProlongatorFactory])->description() << std::endl; prolongatorFacts_[nProlongatorFactory]->CallBuild(coarseLevel); P = coarseLevel.Get< RCP<Matrix> >("P", (prolongatorFacts_[nProlongatorFactory]).get()); coarseNullspace = coarseLevel.Get< RCP<MultiVector> >("Nullspace", (nspFacts_[nProlongatorFactory]).get()); // Release dependencies of all prolongator and coarse level null spaces for(size_t t=0; t<nspFacts_.size(); ++t) { coarseLevel.Release(*(prolongatorFacts_[t])); coarseLevel.Release(*(nspFacts_[t])); } // store prolongator with this factory identification. Set(coarseLevel, "P", P); Set(coarseLevel, "Nullspace", coarseNullspace); } //Build()
void EminPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::BuildP(Level& fineLevel, Level& coarseLevel) const { FactoryMonitor m(*this, "Prolongator minimization", coarseLevel); const ParameterList & pL = GetParameterList(); // Set keep flags if (pL.isParameter("Keep P0") && pL.get<bool>("Keep P0")) coarseLevel.Keep("P0",this); if (pL.isParameter("Keep Constraint0") && pL.get<bool>("Keep Constraint0")) coarseLevel.Keep("Constraint0",this); // Reuse int Niterations; // Get A, B RCP<Matrix> A = Get< RCP<Matrix> > (fineLevel, "A"); RCP<MultiVector> B = Get< RCP<MultiVector> >(fineLevel, "Nullspace"); // Get P0 or make P RCP<Matrix> P0; if (coarseLevel.IsAvailable("P0", this)) { P0 = coarseLevel.Get<RCP<Matrix> >("P0", this); Niterations = pL.get<int>("Reuse Niterations"); GetOStream(Runtime0, 0) << "EminPFactory: Reusing P0"<<std::endl; } else { P0 = Get< RCP<Matrix> >(coarseLevel, "P"); Niterations = pL.get<int>("Niterations"); } // Get Constraint0 or make Constraint RCP<Constraint> X; if (coarseLevel.IsAvailable("Constraint0", this)) { X = coarseLevel.Get<RCP<Constraint> >("Constraint0", this); GetOStream(Runtime0, 0) << "EminPFactory: Reusing Constraint0"<<std::endl; } else { X = Get< RCP<Constraint> > (coarseLevel, "Constraint"); } RCP<Matrix> P; CGSolver EminSolver(Niterations); EminSolver.Iterate(*A, *X, *P0, *B, P); Set(coarseLevel, "Constraint0", X); Set(coarseLevel, "P", P); Set(coarseLevel, "P0", P); RCP<ParameterList> params = rcp(new ParameterList()); params->set("printLoadBalancingInfo", true); GetOStream(Statistics0,0) << Utils::PrintMatrixInfo(*P, "P", params); }
void PermutationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level & currentLevel) const { FactoryMonitor m(*this, "Permutation Factory ", currentLevel); Teuchos::RCP<Matrix> A = Get< Teuchos::RCP<Matrix> > (currentLevel, "A"); const ParameterList & pL = GetParameterList(); std::string mapName = pL.get<std::string> ("PermutationRowMapName"); Teuchos::RCP<const FactoryBase> mapFactory = GetFactory ("PermutationRowMapFactory"); Teuchos::RCP<const Map> permRowMap = Teuchos::null; if(mapName.length() > 0 ) { permRowMap = currentLevel.Get<RCP<const Map> >(mapName,mapFactory.get()); } else { permRowMap = A->getRowMap(); // use full row map of A } std::string strStrategy = pL.get<std::string> ("PermutationStrategy"); if( strStrategy == "Algebraic" ) { Teuchos::RCP<AlgebraicPermutationStrategy> permStrat = Teuchos::rcp(new AlgebraicPermutationStrategy()); permStrat->BuildPermutation(A,permRowMap,currentLevel,this); } else if( strStrategy == "Local" ) { Teuchos::RCP<LocalPermutationStrategy> permStrat = Teuchos::rcp(new LocalPermutationStrategy()); permStrat->BuildPermutation(A,permRowMap,currentLevel,this); } else TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "`PermutationStrategy' has incorrect value (" << strStrategy << ") in input to PermutationFactory." << "Check the documentation for a list of valid choices"); GetOStream(Runtime0, 0) << "Using " << strStrategy << " permutation strategy." << std::endl; }
void ToggleCoordinatesTransferFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level & fineLevel, Level &coarseLevel) const { FactoryMonitor m(*this, "Coordinate transfer toggle", coarseLevel); typedef Xpetra::MultiVector<double,LO,GO,NO> xdMV; TEUCHOS_TEST_FOR_EXCEPTION(coordFacts_.size() != 2, Exceptions::RuntimeError, "MueLu::TogglePFactory::Build: ToggleCoordinatesTransferFactory needs two different transfer operator strategies for toggling."); int chosenP = Get< int > (coarseLevel, "Chosen P"); GetOStream(Runtime1) << "Transfer Coordinates" << chosenP << " to coarse level" << std::endl; RCP<xdMV> coarseCoords = coarseLevel.Get< RCP<xdMV> >("Coordinates",(coordFacts_[chosenP]).get()); Set(coarseLevel, "Coordinates", coarseCoords); // loop through all coord facts and check whether the coarse coordinates are available. // This is the coarse coordinate transfer factory which belongs to the execution path // chosen by the TogglePFactory /*RCP<xdMV> coarseCoords = Teuchos::null; for(size_t t=0; t<coordFacts_.size(); ++t) { bool bIsAv = coarseLevel.IsAvailable("Coordinates",(coordFacts_[t]).get()); std::cout << "Coordinates generated by " << (coordFacts_[t]).get() << " available? " << bIsAv << std::endl; if ( coarseLevel.IsAvailable("Coordinates",(coordFacts_[t]).get()) ) { GetOStream(Runtime1) << "Choose factory " << t << " (" << (coordFacts_[t]).get() << ")" << std::endl; coarseCoords = coarseLevel.Get< RCP<xdMV> >("Coordinates",(coordFacts_[t]).get()); Set(coarseLevel, "Coordinates", coarseCoords); } }*/ // Release dependencies of all coordinate transfer factories for(size_t t=0; t<coordFacts_.size(); ++t) { coarseLevel.Release(*(coordFacts_[t])); } //TODO: exception if coarseCoords == Teuchos::null }
void CoupledAggregationFactory<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level ¤tLevel) const { FactoryMonitor m(*this, "Build", currentLevel); RCP<Aggregates> aggregates; { //TODO check for reuse of aggregates here // Level Get RCP<const GraphBase> graph = Get< RCP<GraphBase> >(currentLevel, "Graph"); // Build aggregates = rcp(new Aggregates(*graph)); aggregates->setObjectLabel("UC"); algo1_.CoarsenUncoupled(*graph, *aggregates); algo2_.AggregateLeftovers(*graph, *aggregates); } aggregates->AggregatesCrossProcessors(true); // Level Set Set(currentLevel, "Aggregates", aggregates); if (IsPrint(Statistics0)) { aggregates->describe(GetOStream(Statistics0, 0), getVerbLevel()); } }
const RCP<const FactoryBase> FactoryManager<Scalar, LocalOrdinal, GlobalOrdinal, Node>::SetAndReturnDefaultFactory(const std::string& varName, const RCP<const FactoryBase>& factory) const { TEUCHOS_TEST_FOR_EXCEPTION(factory.is_null(), Exceptions::RuntimeError, "The default factory for building '" << varName << "' is null"); GetOStream(Runtime1) << "Using default factory (" << factory->description() << ") for building '" << varName << "'." << std::endl; defaultFactoryTable_[varName] = factory; return defaultFactoryTable_[varName]; }
void UserAggregationFactory<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level ¤tLevel) const { FactoryMonitor m(*this, "Build", currentLevel); const ParameterList & pL = GetParameterList(); RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm(); const int myRank = comm->getRank(); std::string fileName = pL.get<std::string>("filePrefix") + toString(currentLevel.GetLevelID()) + "_" + toString(myRank) + "." + pL.get<std::string>("fileExt"); std::ifstream ifs(fileName.c_str()); if (!ifs.good()) throw Exceptions::RuntimeError("Cannot read data from \"" + fileName + "\""); LO numVertices, numAggregates; ifs >> numVertices >> numAggregates; // FIXME: what is the map? Xpetra::UnderlyingLib lib = Xpetra::UseEpetra; const int indexBase = 0; RCP<Map> map = MapFactory::Build(lib, numVertices, indexBase, comm); RCP<Aggregates> aggregates = rcp(new Aggregates(map)); aggregates->setObjectLabel("User"); aggregates->SetNumAggregates(numAggregates); Teuchos::ArrayRCP<LO> vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0); Teuchos::ArrayRCP<LO> procWinner = aggregates->GetProcWinner() ->getDataNonConst(0); for (LO i = 0; i < numAggregates; i++) { int aggSize = 0; ifs >> aggSize; std::vector<LO> list(aggSize); for (int k = 0; k < aggSize; k++) { // FIXME: File contains GIDs, we need LIDs // for now, works on a single processor ifs >> list[k]; } // Mark first node as root node for the aggregate aggregates->SetIsRoot(list[0]); // Fill vertex2AggId and procWinner structure with information for (int k = 0; k < aggSize; k++) { vertex2AggId[list[k]] = i; procWinner [list[k]] = myRank; } } // FIXME: do the proper check whether aggregates cross interprocessor boundary aggregates->AggregatesCrossProcessors(false); Set(currentLevel, "Aggregates", aggregates); GetOStream(Statistics0, 0) << aggregates->description() << std::endl; }
void RebalanceAcFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &fineLevel, Level &coarseLevel) const { FactoryMonitor m(*this, "Computing Ac", coarseLevel); RCP<Matrix> originalAc = Get< RCP<Matrix> >(coarseLevel, "A"); RCP<const Import> rebalanceImporter = Get< RCP<const Import> >(coarseLevel, "Importer"); if (rebalanceImporter != Teuchos::null) { RCP<Matrix> rebalancedAc; { SubFactoryMonitor subM(*this, "Rebalancing existing Ac", coarseLevel); RCP<const Map> targetMap = rebalanceImporter->getTargetMap(); const ParameterList & pL = GetParameterList(); ParameterList XpetraList; if (pL.get<bool>("useSubcomm") == true) { GetOStream(Runtime0,0) << "Replacing maps with a subcommunicator" << std::endl; XpetraList.set("Restrict Communicator",true); } // NOTE: If the communicator is restricted away, Build returns Teuchos::null. rebalancedAc = MatrixFactory::Build(originalAc, *rebalanceImporter, targetMap, targetMap, rcp(&XpetraList,false)); if (!rebalancedAc.is_null()) rebalancedAc->SetFixedBlockSize(originalAc->GetFixedBlockSize()); Set(coarseLevel, "A", rebalancedAc); } if (!rebalancedAc.is_null()) { RCP<ParameterList> params = rcp(new ParameterList()); params->set("printLoadBalancingInfo", true); GetOStream(Statistics0, 0) << Utils::PrintMatrixInfo(*rebalancedAc, "Ac (rebalanced)", params); } } else { // Ac already built by the load balancing process and no load balancing needed GetOStream(Warnings0, 0) << "No rebalancing" << std::endl; GetOStream(Warnings0, 0) << "Jamming A into Level " << coarseLevel.GetLevelID() << " w/ generating factory " << this << std::endl; Set(coarseLevel, "A", originalAc); } } //Build()
//!Destructor ~MutuallyExclusiveTime() { // This timer can only be destroyed if it is not in the stack if (isPaused()) { // error message because cannot throw an exception in destructor GetOStream(Errors) << "MutuallyExclusiveTime::~MutuallyExclusiveTime(): Error: destructor called on a paused timer." << std::endl; //TODO: Even if timing results will be wrong, the timer can be removed from the stack to avoid a segmentation fault. } stop(); // if isRunning(), remove from the stack, resume previous timer }
//! @brief Starts the timer. If a MutuallyExclusiveTime timer is running, it will be stopped. //! @pre Timer is not already paused. //! @post Timer is running. Other MutuallyExclusiveTime timers are paused or stopped. void start(bool reset=false) { TEUCHOS_TEST_FOR_EXCEPTION(isPaused(), Exceptions::RuntimeError, "MueLu::MutuallyExclusiveTime::start(): timer is paused. Use resume()."); if (isRunning()) { return; } // If timer is already running, do not pause/push-in-the-stack/start the timer. // Otherwise, something bad will happen when this.stop() will be called // pause currently running timer if (!timerStack_.empty()) { GetOStream(Debug) << "pausing parent timer " << timerStack_.top()->name_ << std::endl; timerStack_.top()->pause(); GetOStream(Debug) << "starting child timer " << this->name_ << std::endl; myParent_[this->name_] = timerStack_.top()->name_; } else { GetOStream(Debug) << "starting orphan timer " << this->name_ << std::endl; myParent_[this->name_] = "no parent"; } // start this timer timer_->start(reset); timerStack_.push(this); }
void FactoryManager<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Print() const { std::map<std::string, RCP<const FactoryBase> >::const_iterator it; Teuchos::FancyOStream& fancy = GetOStream(Debug); fancy << "Users factory table (factoryTable_):" << std::endl; for (it = factoryTable_.begin(); it != factoryTable_.end(); it++) fancy << " " << it->first << " -> " << Teuchos::toString(it->second.get()) << std::endl; fancy << "Default factory table (defaultFactoryTable_):" << std::endl; for (it = defaultFactoryTable_.begin(); it != defaultFactoryTable_.end(); it++) fancy << " " << it->first << " -> " << Teuchos::toString(it->second.get()) << std::endl; }
//! Constructor PrintMonitor(const BaseClass& object, const std::string& msg, MsgType msgLevel = Runtime0) { // Inherit verbosity from 'object' SetVerbLevel(object.GetVerbLevel()); setOStream(object.getOStream()); // Print description and new indent if (IsPrint(msgLevel)) { GetOStream(msgLevel, 0) << msg << std::endl; tab_ = rcp(new Teuchos::OSTab(getOStream())); } }
void MapTransferFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level & fineLevel, Level & coarseLevel) const { typedef Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> OperatorClass; //TODO typedef Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node> MapClass; typedef Xpetra::MapFactory<LocalOrdinal, GlobalOrdinal, Node> MapFactoryClass; Monitor m(*this, "Contact Map transfer factory"); if (fineLevel.IsAvailable(mapName_, mapFact_.get())==false) { GetOStream(Runtime0, 0) << "MapTransferFactory::Build: User provided map " << mapName_ << " not found in Level class." << std::endl; } // fetch map extractor from level RCP<const MapClass> transferMap = fineLevel.Get<RCP<const MapClass> >(mapName_,mapFact_.get()); // Get default tentative prolongator factory // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory. // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere! RCP<const FactoryBase> tentPFact = GetFactory("P"); if (tentPFact == Teuchos::null) { tentPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); } TEUCHOS_TEST_FOR_EXCEPTION(!coarseLevel.IsAvailable("P",tentPFact.get()),Exceptions::RuntimeError, "MueLu::MapTransferFactory::Build(): P (generated by TentativePFactory) not available."); RCP<OperatorClass> Ptent = coarseLevel.Get<RCP<OperatorClass> >("P", tentPFact.get()); std::vector<GlobalOrdinal > coarseMapGids; // loop over local rows of Ptent for(size_t row=0; row<Ptent->getNodeNumRows(); row++) { GlobalOrdinal grid = Ptent->getRowMap()->getGlobalElement(row); if(transferMap->isNodeGlobalElement(grid)) { Teuchos::ArrayView<const LocalOrdinal> indices; Teuchos::ArrayView<const Scalar> vals; Ptent->getLocalRowView(row, indices, vals); for(size_t i=0; i<(size_t)indices.size(); i++) { // mark all columns in Ptent(grid,*) to be coarse Dofs of next level transferMap GlobalOrdinal gcid = Ptent->getColMap()->getGlobalElement(indices[i]); coarseMapGids.push_back(gcid); } } // end if isNodeGlobalElement(grid) } // build column maps std::sort(coarseMapGids.begin(), coarseMapGids.end()); coarseMapGids.erase(std::unique(coarseMapGids.begin(), coarseMapGids.end()), coarseMapGids.end()); Teuchos::ArrayView<GlobalOrdinal> coarseMapGidsView (&coarseMapGids[0],coarseMapGids.size()); Teuchos::RCP<const MapClass> coarseTransferMap = MapFactoryClass::Build(Ptent->getColMap()->lib(), -1, coarseMapGidsView, Ptent->getColMap()->getIndexBase(), Ptent->getColMap()->getComm()); // store map extractor in coarse level coarseLevel.Set(mapName_, coarseTransferMap, mapFact_.get()); }
void DropNegativeEntriesFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& currentLevel) const { FactoryMonitor m(*this, "Matrix filtering (springs)", currentLevel); RCP<Matrix> Ain = Get< RCP<Matrix> >(currentLevel, "A"); LocalOrdinal nDofsPerNode = Ain->GetFixedBlockSize(); // create new empty Operator Teuchos::RCP<Matrix> Aout = MatrixFactory::Build(Ain->getRowMap(), Ain->getGlobalMaxNumRowEntries(), Xpetra::StaticProfile); size_t numLocalRows = Ain->getNodeNumRows(); for(size_t row=0; row<numLocalRows; row++) { GlobalOrdinal grid = Ain->getRowMap()->getGlobalElement(row); int rDofID = Teuchos::as<int>(grid % nDofsPerNode); // extract row information from input matrix Teuchos::ArrayView<const LocalOrdinal> indices; Teuchos::ArrayView<const Scalar> vals; Ain->getLocalRowView(row, indices, vals); // just copy all values in output Teuchos::ArrayRCP<GlobalOrdinal> indout(indices.size(),Teuchos::ScalarTraits<GlobalOrdinal>::zero()); Teuchos::ArrayRCP<Scalar> valout(indices.size(),Teuchos::ScalarTraits<Scalar>::zero()); size_t nNonzeros = 0; for(size_t i=0; i<(size_t)indices.size(); i++) { GlobalOrdinal gcid = Ain->getColMap()->getGlobalElement(indices[i]); // global column id int cDofID = Teuchos::as<int>(gcid % nDofsPerNode); if(rDofID == cDofID && Teuchos::ScalarTraits<Scalar>::magnitude(vals[i]) >= Teuchos::ScalarTraits<Scalar>::magnitude(Teuchos::ScalarTraits<Scalar>::zero())) { indout [nNonzeros] = gcid; valout [nNonzeros] = vals[i]; nNonzeros++; } } indout.resize(nNonzeros); valout.resize(nNonzeros); Aout->insertGlobalValues(Ain->getRowMap()->getGlobalElement(row), indout.view(0,indout.size()), valout.view(0,valout.size())); } Aout->fillComplete(Ain->getDomainMap(), Ain->getRangeMap()); // copy block size information Aout->SetFixedBlockSize(nDofsPerNode); GetOStream(Statistics0, 0) << "Nonzeros in A (input): " << Ain->getGlobalNumEntries() << ", Nonzeros after filtering A: " << Aout->getGlobalNumEntries() << std::endl; Set(currentLevel, "A", Aout); }
void UserPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::BuildP(Level& fineLevel, Level& coarseLevel) const { FactoryMonitor m(*this, "Build", coarseLevel); RCP<Matrix> A = Get< RCP<Matrix> > (fineLevel, "A"); RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace"); TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() != 1, Exceptions::RuntimeError, "Block size > 1 has not been implemented"); const Teuchos::ParameterList& pL = GetParameterList(); std::string mapFile = pL.get<std::string>("mapFileName"); RCP<const Map> rowMap = A->getRowMap(); RCP<const Map> coarseMap = Utils2::ReadMap(mapFile, rowMap->lib(), rowMap->getComm()); Set(coarseLevel, "CoarseMap", coarseMap); std::string matrixFile = pL.get<std::string>("matrixFileName"); RCP<Matrix> P = Utils::Read(matrixFile, rowMap, coarseMap, coarseMap, rowMap); #if 1 Set(coarseLevel, "P", P); #else // Expand column map by 1 RCP<Matrix> P1 = Utils::Multiply(*A, false, *P, false); P = Utils::Read(matrixFile, rowMap, P1->getColMap(), coarseMap, rowMap); Set(coarseLevel, "P", P); #endif RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors()); P->apply(*fineNullspace, *coarseNullspace, Teuchos::TRANS, Teuchos::ScalarTraits<SC>::one(), Teuchos::ScalarTraits<SC>::zero()); Set(coarseLevel, "Nullspace", coarseNullspace); // Coordinates transfer size_t n = Teuchos::as<size_t>(sqrt(coarseMap->getGlobalNumElements())); TEUCHOS_TEST_FOR_EXCEPTION(n*n != coarseMap->getGlobalNumElements(), Exceptions::RuntimeError, "Unfortunately, this is not the case, don't know what to do"); RCP<MultiVector> coarseCoords = MultiVectorFactory::Build(coarseMap, 2); ArrayRCP<Scalar> x = coarseCoords->getDataNonConst(0), y = coarseCoords->getDataNonConst(1); for (size_t LID = 0; LID < coarseMap->getNodeNumElements(); ++LID) { GlobalOrdinal GID = coarseMap->getGlobalElement(LID) - coarseMap->getIndexBase(); GlobalOrdinal i = GID % n, j = GID/n; x[LID] = i; y[LID] = j; } Set(coarseLevel, "Coordinates", coarseCoords); if (IsPrint(Statistics1)) { RCP<ParameterList> params = rcp(new ParameterList()); params->set("printLoadBalancingInfo", true); GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*P, "P", params); } }
//! virtual void CallBuild(Level& requestedLevel) const { int levelID = requestedLevel.GetLevelID(); #ifdef HAVE_MUELU_DEBUG // We cannot call Build method twice for the same level, but we can call it multiple times for different levels TEUCHOS_TEST_FOR_EXCEPTION((multipleCallCheck_ == ENABLED) && (multipleCallCheckGlobal_ == ENABLED) && (lastLevelID_ == levelID), Exceptions::RuntimeError, this->ShortClassName() << "::Build() called twice for the same level (levelID=" << levelID << "). This is likely due to a configuration error."); if (multipleCallCheck_ == FIRSTCALL) multipleCallCheck_ = ENABLED; lastLevelID_ = levelID; #endif TEUCHOS_TEST_FOR_EXCEPTION(requestedLevel.GetPreviousLevel() == Teuchos::null, Exceptions::RuntimeError, "LevelID = " << levelID); #ifdef HAVE_MUELU_TIMER_SYNCHRONIZATION RCP<const Teuchos::Comm<int> > comm = requestedLevel.GetComm(); if (comm.is_null()) { // Some factories are called before we constructed Ac, and therefore, // before we set the level communicator. For such factories we can get // the comm from the previous level, as all processes go there RCP<Level>& prevLevel = requestedLevel.GetPreviousLevel(); if (!prevLevel.is_null()) comm = prevLevel->GetComm(); } // Synchronization timer std::string syncTimer = this->ShortClassName() + ": Build sync (level=" + toString(requestedLevel.GetLevelID()) + ")"; if (!comm.is_null()) { TimeMonitor timer(*this, syncTimer); comm->barrier(); } #endif Build(*requestedLevel.GetPreviousLevel(), requestedLevel); #ifdef HAVE_MUELU_TIMER_SYNCHRONIZATION // Synchronization timer if (!comm.is_null()) { TimeMonitor timer(*this, syncTimer); comm->barrier(); } #endif GetOStream(Test) << *RemoveFactoriesFromList(GetParameterList()) << std::endl; }
//! @brief Stops the timer. //! The previous MutuallyExclusiveTime that has been paused when this timer was started will be resumed. //! This method can be called on an already stopped timer or on the currently running timer. double stop() { TEUCHOS_TEST_FOR_EXCEPTION(isPaused(), Exceptions::RuntimeError, "MueLu::MutuallyExclusiveTime::start(): timer is paused. Use resume()."); if (!isRunning()) { return timer_->stop(); } // stop() can be called on stopped timer // Here, timer is running, so it is the head of the stack TopOfTheStack(); timerStack_.pop(); double r = timer_->stop(); if (!timerStack_.empty()) { GetOStream(Debug) << "resuming timer " << timerStack_.top()->name_ << std::endl; timerStack_.top()->resume(); } return r; }
void Set(const std::string& ename, const T& entry, const FactoryBase* factory = NoFactory::get()) { const FactoryBase* fac = GetFactory(ename, factory); if (fac == NoFactory::get()) { // Any data set with a NoFactory gets UserData keep flag by default AddKeepFlag(ename, NoFactory::get(), MueLu::UserData); } // Store entry only if data have been requested (or any keep flag) if (IsRequested(ename, factory) || GetKeepFlag(ename, factory) != 0) { TEUCHOS_TEST_FOR_EXCEPTION(!IsKey(factory, ename), Exceptions::RuntimeError, "" + ename + " not found in"); map_[factory][ename]->SetData(entry); } else { GetOStream(Warnings0) << "Level::Set: unable to store \"" << ename << "\" generated by factory " << factory << " on level " << toString(GetLevelID()) << ", as it has not been requested and no keep flags were set for it" << std::endl; } } // Set
~TimeMonitor() { if (timer_ != Teuchos::null) { // Stop the timer timer_->stop(); if (IsPrint(RuntimeTimings)) { //FIXME: creates lot of barriers. An option to report time of proc0 only instead would be nice //FIXME: MPI_COMM_WORLD only... BTW, it is also the case in Teuchos::TimeMonitor... // // mfh 11 Nov 2012: Actually, Teuchos::TimeMonitor::summarize() has multiple overloads that take a Teuchos::Comm. ArrayRCP<double> stats = ReduceMaxMinAvg(timer_->totalElapsedTime(), *Teuchos::DefaultComm<int>::getComm ()); //FIXME: Not very important for now, but timer will be printed even if verboseLevel of Monitor/Object changed // between Monitor constructor and destructor. GetOStream(RuntimeTimings, 0) << "Timer: " << " max=" << stats[0] << " min=" << stats[1] << " avg=" << stats[2] << std::endl; } } }
void CoarseMapFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level ¤tLevel) const { FactoryMonitor m(*this, "Build", currentLevel); RCP<Aggregates> aggregates = Get< RCP<Aggregates> >(currentLevel, "Aggregates"); RCP<MultiVector> nullspace = Get< RCP<MultiVector> >(currentLevel, "Nullspace"); GlobalOrdinal numAggs = aggregates->GetNumAggregates(); const size_t NSDim = nullspace->getNumVectors(); RCP<const Teuchos::Comm<int> > comm = aggregates->GetMap()->getComm(); // check for consistency of striding information with NSDim and nCoarseDofs if (stridedBlockId_== -1) { // this means we have no real strided map but only a block map with constant blockSize "NSDim" TEUCHOS_TEST_FOR_EXCEPTION(stridingInfo_.size() > 1, Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): stridingInfo_.size() but must be one"); stridingInfo_.clear(); stridingInfo_.push_back(NSDim); TEUCHOS_TEST_FOR_EXCEPTION(stridingInfo_.size() != 1, Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): stridingInfo_.size() but must be one"); } else { // stridedBlockId_ > -1, set by user TEUCHOS_TEST_FOR_EXCEPTION(stridedBlockId_ > Teuchos::as<LO>(stridingInfo_.size() - 1) , Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): it is stridingInfo_.size() <= stridedBlockId_. error."); size_t stridedBlockSize = stridingInfo_[stridedBlockId_]; TEUCHOS_TEST_FOR_EXCEPTION(stridedBlockSize != NSDim , Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): dimension of strided block != NSDim. error."); } GetOStream(Statistics1, 0) << "domainGIDOffset: " << domainGidOffset_ << " block size: " << getFixedBlockSize() << " stridedBlockId: " << stridedBlockId_ << std::endl; // number of coarse level dofs (fixed by number of aggregates and blocksize data) GlobalOrdinal nCoarseDofs = numAggs * getFixedBlockSize(); GlobalOrdinal indexBase = aggregates->GetMap()->getIndexBase(); RCP<const Map> coarseMap = StridedMapFactory::Build(aggregates->GetMap()->lib(), Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), nCoarseDofs, indexBase, stridingInfo_, comm, stridedBlockId_, domainGidOffset_); Set(currentLevel, "CoarseMap", coarseMap); } // Build
void PatternFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level& fineLevel, Level& coarseLevel) const { FactoryMonitor m(*this, "Ppattern", coarseLevel); RCP<Matrix> P = Get< RCP<Matrix> >(coarseLevel, "P"); const ParameterList& pL = GetParameterList(); int k = pL.get<int>("emin: pattern order"); if (k > 0) { RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel, "A"); RCP<Matrix> AP; bool doFillComplete = true; bool optimizeStorage = true; for (int i = 0; i < k; i++) { AP = Utils::Multiply(*A, false, *P, false, GetOStream(Statistics2), doFillComplete, optimizeStorage); P.swap(AP); } } Set(coarseLevel, "Ppattern", P->getCrsGraph()); }
void MultiVectorTransferFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level & fineLevel, Level &coarseLevel) const { FactoryMonitor m(*this, "Build", coarseLevel); const ParameterList & pL = GetParameterList(); std::string vectorName = pL.get<std::string>("Vector name"); RCP<MultiVector> fineVector = fineLevel.Get< RCP<MultiVector> >(vectorName, GetFactory("Vector factory").get()); RCP<Matrix> transferOp = Get<RCP<Matrix> >(coarseLevel, "R"); RCP<MultiVector> coarseVector = MultiVectorFactory::Build(transferOp->getRangeMap(), fineVector->getNumVectors()); GetOStream(Runtime0, 0) << "Transferring multivector \"" << vectorName << "\"" << std::endl; RCP<MultiVector> onesVector = MultiVectorFactory::Build(transferOp->getDomainMap(), 1); onesVector->putScalar(Teuchos::ScalarTraits<Scalar>::one()); RCP<MultiVector> rowSumVector = MultiVectorFactory::Build(transferOp->getRangeMap(), 1); transferOp->apply(*onesVector, *rowSumVector); transferOp->apply(*fineVector, *coarseVector); if (vectorName == "Coordinates") TEUCHOS_TEST_FOR_EXCEPTION(true,Exceptions::RuntimeError,"Use CoordinatesTransferFactory to transfer coordinates instead of MultiVectorTransferFactory."); Set<RCP<MultiVector> >(coarseLevel, vectorName, coarseVector); } // Build
void MHDRAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level &fineLevel, Level &coarseLevel) const { // FIXME make fineLevel const { FactoryMonitor m(*this, "Computing Ac", coarseLevel); // // Inputs: A, P // //DEBUG //Teuchos::FancyOStream fout(*GetOStream(Runtime1)); //coarseLevel.print(fout,Teuchos::VERB_HIGH); RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel, "A" ); RCP<Matrix> A00 = Get< RCP<Matrix> >(fineLevel, "A00"); RCP<Matrix> A01 = Get< RCP<Matrix> >(fineLevel, "A01"); RCP<Matrix> A02 = Get< RCP<Matrix> >(fineLevel, "A02"); RCP<Matrix> A10 = Get< RCP<Matrix> >(fineLevel, "A10"); RCP<Matrix> A11 = Get< RCP<Matrix> >(fineLevel, "A11"); RCP<Matrix> A12 = Get< RCP<Matrix> >(fineLevel, "A12"); RCP<Matrix> A20 = Get< RCP<Matrix> >(fineLevel, "A20"); RCP<Matrix> A21 = Get< RCP<Matrix> >(fineLevel, "A21"); RCP<Matrix> A22 = Get< RCP<Matrix> >(fineLevel, "A22"); RCP<Matrix> P = Get< RCP<Matrix> >(coarseLevel, "P" ); RCP<Matrix> PV = Get< RCP<Matrix> >(coarseLevel, "PV"); RCP<Matrix> PP = Get< RCP<Matrix> >(coarseLevel, "PP"); RCP<Matrix> PM = Get< RCP<Matrix> >(coarseLevel, "PM"); // // Build Ac = RAP // RCP<Matrix> AP; RCP<Matrix> AP00; RCP<Matrix> AP01; RCP<Matrix> AP02; RCP<Matrix> AP10; RCP<Matrix> AP11; RCP<Matrix> AP12; RCP<Matrix> AP20; RCP<Matrix> AP21; RCP<Matrix> AP22; { SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel); AP = Utils::Multiply(*A , false, *P , false, AP, GetOStream(Statistics2)); AP00 = Utils::Multiply(*A00, false, *PV, false, AP00, GetOStream(Statistics2)); AP01 = Utils::Multiply(*A01, false, *PP, false, AP01, GetOStream(Statistics2)); AP02 = Utils::Multiply(*A02, false, *PM, false, AP02, GetOStream(Statistics2)); AP10 = Utils::Multiply(*A10, false, *PV, false, AP10, GetOStream(Statistics2)); AP11 = Utils::Multiply(*A11, false, *PP, false, AP11, GetOStream(Statistics2)); AP12 = Utils::Multiply(*A12, false, *PM, false, AP12, GetOStream(Statistics2)); AP20 = Utils::Multiply(*A20, false, *PV, false, AP20, GetOStream(Statistics2)); AP21 = Utils::Multiply(*A21, false, *PP, false, AP21, GetOStream(Statistics2)); AP22 = Utils::Multiply(*A22, false, *PM, false, AP22, GetOStream(Statistics2)); } RCP<Matrix> Ac; RCP<Matrix> Ac00; RCP<Matrix> Ac01; RCP<Matrix> Ac02; RCP<Matrix> Ac10; RCP<Matrix> Ac11; RCP<Matrix> Ac12; RCP<Matrix> Ac20; RCP<Matrix> Ac21; RCP<Matrix> Ac22; if (implicitTranspose_) { SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel); Ac = Utils::Multiply(*P , true, *AP , false, Ac, GetOStream(Statistics2)); Ac00 = Utils::Multiply(*PV, true, *AP00, false, Ac00, GetOStream(Statistics2)); Ac01 = Utils::Multiply(*PV, true, *AP01, false, Ac01, GetOStream(Statistics2)); Ac02 = Utils::Multiply(*PV, true, *AP02, false, Ac02, GetOStream(Statistics2)); Ac10 = Utils::Multiply(*PP, true, *AP10, false, Ac10, GetOStream(Statistics2)); Ac11 = Utils::Multiply(*PP, true, *AP11, false, Ac11, GetOStream(Statistics2)); Ac12 = Utils::Multiply(*PP, true, *AP12, false, Ac12, GetOStream(Statistics2)); Ac20 = Utils::Multiply(*PM, true, *AP20, false, Ac20, GetOStream(Statistics2)); Ac21 = Utils::Multiply(*PM, true, *AP21, false, Ac21, GetOStream(Statistics2)); Ac22 = Utils::Multiply(*PM, true, *AP22, false, Ac22, GetOStream(Statistics2)); } else { SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel); RCP<Matrix> R = Get< RCP<Matrix> >(coarseLevel, "R" ); RCP<Matrix> RV = Get< RCP<Matrix> >(coarseLevel, "RV"); RCP<Matrix> RP = Get< RCP<Matrix> >(coarseLevel, "RP"); RCP<Matrix> RM = Get< RCP<Matrix> >(coarseLevel, "RM"); Ac = Utils::Multiply(*R , false, *AP , false, Ac, GetOStream(Statistics2)); Ac00 = Utils::Multiply(*RV, false, *AP00, false, Ac00, GetOStream(Statistics2)); Ac01 = Utils::Multiply(*RV, false, *AP01, false, Ac01, GetOStream(Statistics2)); Ac02 = Utils::Multiply(*RV, false, *AP02, false, Ac02, GetOStream(Statistics2)); Ac10 = Utils::Multiply(*RP, false, *AP10, false, Ac10, GetOStream(Statistics2)); Ac11 = Utils::Multiply(*RP, false, *AP11, false, Ac11, GetOStream(Statistics2)); Ac12 = Utils::Multiply(*RP, false, *AP12, false, Ac12, GetOStream(Statistics2)); Ac20 = Utils::Multiply(*RM, false, *AP20, false, Ac20, GetOStream(Statistics2)); Ac21 = Utils::Multiply(*RM, false, *AP21, false, Ac21, GetOStream(Statistics2)); Ac22 = Utils::Multiply(*RM, false, *AP22, false, Ac22, GetOStream(Statistics2)); } // FINISHED MAKING COARSE BLOCKS Set(coarseLevel, "A" , Ac ); Set(coarseLevel, "A00", Ac00); Set(coarseLevel, "A01", Ac01); Set(coarseLevel, "A02", Ac02); Set(coarseLevel, "A10", Ac10); Set(coarseLevel, "A11", Ac11); Set(coarseLevel, "A12", Ac12); Set(coarseLevel, "A20", Ac20); Set(coarseLevel, "A21", Ac21); Set(coarseLevel, "A22", Ac22); } }
void AlgebraicPermutationStrategy<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::BuildPermutation(const Teuchos::RCP<Matrix> & A, const Teuchos::RCP<const Map> permRowMap, Level & currentLevel, const FactoryBase* genFactory) const { #ifndef HAVE_MUELU_INST_COMPLEX_INT_INT const Teuchos::RCP< const Teuchos::Comm< int > > comm = A->getRowMap()->getComm(); int numProcs = comm->getSize(); int myRank = comm->getRank(); /*if( permRowMap == Teuchos::null ) { permRowMap = A->getRowMap(); // use full row map of A }*/ size_t nDofsPerNode = 1; if (A->IsView("stridedMaps")) { Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps"); nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize(); } //GetOStream(Runtime0, 0) << "Perform generation of permutation operators on " << mapName_ << " map with " << permRowMap->getGlobalNumElements() << " elements" << std::endl; std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidates; std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > keepDiagonalEntries; std::vector<Scalar> Weights; // loop over all local rows in matrix A and keep diagonal entries if corresponding // matrix rows are not contained in permRowMap for (size_t row = 0; row < A->getRowMap()->getNodeNumElements(); row++) { GlobalOrdinal grow = A->getRowMap()->getGlobalElement(row); if(permRowMap->isNodeGlobalElement(grow) == true) continue; size_t nnz = A->getNumEntriesInLocalRow(row); // extract local row information from matrix Teuchos::ArrayView<const LocalOrdinal> indices; Teuchos::ArrayView<const Scalar> vals; A->getLocalRowView(row, indices, vals); TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError, "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error."); // find column entry with max absolute value GlobalOrdinal gMaxValIdx = 0; Scalar norm1 = 0.0; Scalar maxVal = 0.0; for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) { norm1 += std::abs(vals[j]); if(std::abs(vals[j]) > maxVal) { maxVal = std::abs(vals[j]); gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]); } } if(grow == gMaxValIdx) // only keep row/col pair if it's diagonal dominant!!! keepDiagonalEntries.push_back(std::make_pair(grow,grow)); } ////////// // handle rows that are marked to be relevant for permutations for (size_t row = 0; row < permRowMap->getNodeNumElements(); row++) { GlobalOrdinal grow = permRowMap->getGlobalElement(row); LocalOrdinal lArow = A->getRowMap()->getLocalElement(grow); size_t nnz = A->getNumEntriesInLocalRow(lArow); // extract local row information from matrix Teuchos::ArrayView<const LocalOrdinal> indices; Teuchos::ArrayView<const Scalar> vals; A->getLocalRowView(lArow, indices, vals); TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError, "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error."); // find column entry with max absolute value GlobalOrdinal gMaxValIdx = 0; Scalar norm1 = 0.0; Scalar maxVal = 0.0; for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) { norm1 += std::abs(vals[j]); if(std::abs(vals[j]) > maxVal) { maxVal = std::abs(vals[j]); gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]); } } if(std::abs(maxVal) > 0.0) { // keep only max Entries \neq 0.0 permutedDiagCandidates.push_back(std::make_pair(grow,gMaxValIdx)); Weights.push_back(maxVal/(norm1*Teuchos::as<Scalar>(nnz))); } else { std::cout << "ATTENTION: row " << grow << " has only zero entries -> singular matrix!" << std::endl; } } // sort Weights in descending order std::vector<int> permutation; sortingPermutation(Weights,permutation); // create new vector with exactly one possible entry for each column // each processor which requests the global column id gcid adds 1 to gColVec // gColVec will be summed up over all processors and communicated to gDomVec // which is based on the non-overlapping domain map of A. Teuchos::RCP<Vector> gColVec = VectorFactory::Build(A->getColMap()); Teuchos::RCP<Vector> gDomVec = VectorFactory::Build(A->getDomainMap()); gColVec->putScalar(0.0); gDomVec->putScalar(0.0); // put in all keep diagonal entries for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = keepDiagonalEntries.begin(); p != keepDiagonalEntries.end(); ++p) { gColVec->sumIntoGlobalValue((*p).second,1.0); } Teuchos::RCP<Export> exporter = ExportFactory::Build(gColVec->getMap(), gDomVec->getMap()); gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD); // communicate blocked gcolids to all procs gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT); std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidatesFiltered; // TODO reserve memory std::map<GlobalOrdinal, Scalar> gColId2Weight; Teuchos::ArrayRCP< Scalar > ddata = gColVec->getDataNonConst(0); for(size_t i = 0; i < permutedDiagCandidates.size(); ++i) { // loop over all candidates std::pair<GlobalOrdinal, GlobalOrdinal> pp = permutedDiagCandidates[permutation[i]]; GlobalOrdinal grow = pp.first; GlobalOrdinal gcol = pp.second; LocalOrdinal lcol = A->getColMap()->getLocalElement(gcol); //Teuchos::ArrayRCP< Scalar > ddata = gColVec->getDataNonConst(0); if(ddata[lcol] > 0.0){ continue; // skip lcol: column already handled by another row } // mark column as already taken ddata[lcol]++; permutedDiagCandidatesFiltered.push_back(std::make_pair(grow,gcol)); gColId2Weight[gcol] = Weights[permutation[i]]; } // communicate how often each column index is requested by the different procs gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD); gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT); // probably not needed // TODO check me //***************************************************************************************** // first communicate ALL global ids of column indices which are requested by more // than one proc to all other procs // detect which global col indices are requested by more than one proc // and store them in the multipleColRequests vector std::vector<GlobalOrdinal> multipleColRequests; // store all global column indices from current processor that are also // requested by another processor. This is possible, since they are stored // in gDomVec which is based on the nonoverlapping domain map. That is, each // global col id is handled by exactly one proc. std::queue<GlobalOrdinal> unusedColIdx; // unused column indices on current processor for(size_t sz = 0; sz<gDomVec->getLocalLength(); ++sz) { Teuchos::ArrayRCP< const Scalar > arrDomVec = gDomVec->getData(0); if(arrDomVec[sz] > 1.0) { multipleColRequests.push_back(gDomVec->getMap()->getGlobalElement(sz)); } else if(arrDomVec[sz] == 0.0) { unusedColIdx.push(gDomVec->getMap()->getGlobalElement(sz)); } } // communicate the global number of column indices which are requested by more than one proc LocalOrdinal localMultColRequests = Teuchos::as<LocalOrdinal>(multipleColRequests.size()); LocalOrdinal globalMultColRequests = 0; // sum up all entries in multipleColRequests over all processors sumAll(gDomVec->getMap()->getComm(), (LocalOrdinal)localMultColRequests, globalMultColRequests); if(globalMultColRequests > 0) { // special handling: two processors request the same global column id. // decide which processor gets it // distribute number of multipleColRequests to all processors // each processor stores how many column ids for exchange are handled by the cur proc std::vector<GlobalOrdinal> numMyMultColRequests(numProcs,0); std::vector<GlobalOrdinal> numGlobalMultColRequests(numProcs,0); numMyMultColRequests[myRank] = localMultColRequests; Teuchos::reduceAll(*comm,Teuchos::REDUCE_MAX,numProcs,&numMyMultColRequests[0],&numGlobalMultColRequests[0]); // communicate multipleColRequests entries to all processors int nMyOffset = 0; for (int i=0; i<myRank-1; i++) nMyOffset += numGlobalMultColRequests[i]; // calculate offset to store the weights on the corresponding place in procOverlappingWeights GlobalOrdinal zero=0; std::vector<GlobalOrdinal> procMultRequestedColIds(globalMultColRequests,zero); std::vector<GlobalOrdinal> global_procMultRequestedColIds(globalMultColRequests,zero); // loop over all local column GIDs that are also requested by other procs for(size_t i = 0; i < multipleColRequests.size(); i++) { procMultRequestedColIds[nMyOffset + i] = multipleColRequests[i]; // all weights are > 0 ? } // template ordinal, package (double) Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(globalMultColRequests), &procMultRequestedColIds[0], &global_procMultRequestedColIds[0]); // loop over global_procOverlappingWeights and eliminate wrong entries... for (size_t k = 0; k<global_procMultRequestedColIds.size(); k++) { GlobalOrdinal globColId = global_procMultRequestedColIds[k]; std::vector<Scalar> MyWeightForColId(numProcs,0); std::vector<Scalar> GlobalWeightForColId(numProcs,0); if(gColVec->getMap()->isNodeGlobalElement(globColId)) { MyWeightForColId[myRank] = gColId2Weight[globColId]; } else { MyWeightForColId[myRank] = 0.0; } Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &MyWeightForColId[0], &GlobalWeightForColId[0]); if(gColVec->getMap()->isNodeGlobalElement(globColId)) { // note: 2 procs could have the same weight for a column index. // pick the first one. Scalar winnerValue = 0.0; int winnerProcRank = 0; for (int proc = 0; proc < numProcs; proc++) { if(GlobalWeightForColId[proc] > winnerValue) { winnerValue = GlobalWeightForColId[proc]; winnerProcRank = proc; } } // winnerProcRank is the winner for handling globColId. // winnerProcRank is unique (even if two procs have the same weight for a column index) if(myRank != winnerProcRank) { // remove corresponding entry from permutedDiagCandidatesFiltered typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = permutedDiagCandidatesFiltered.begin(); while(p != permutedDiagCandidatesFiltered.end() ) { if((*p).second == globColId) p = permutedDiagCandidatesFiltered.erase(p); else p++; } } } // end if isNodeGlobalElement } // end loop over global_procOverlappingWeights and eliminate wrong entries... } // end if globalMultColRequests > 0 // put together all pairs: //size_t sizeRowColPairs = keepDiagonalEntries.size() + permutedDiagCandidatesFiltered.size(); std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > RowColPairs; RowColPairs.insert( RowColPairs.end(), keepDiagonalEntries.begin(), keepDiagonalEntries.end()); RowColPairs.insert( RowColPairs.end(), permutedDiagCandidatesFiltered.begin(), permutedDiagCandidatesFiltered.end()); #ifdef DEBUG_OUTPUT //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& // plausibility check gColVec->putScalar(0.0); gDomVec->putScalar(0.0); typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator pl = RowColPairs.begin(); while(pl != RowColPairs.end() ) { //GlobalOrdinal ik = (*pl).first; GlobalOrdinal jk = (*pl).second; gColVec->sumIntoGlobalValue(jk,1.0); pl++; } gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD); for(size_t sz = 0; sz<gDomVec->getLocalLength(); ++sz) { Teuchos::ArrayRCP< const Scalar > arrDomVec = gDomVec->getData(0); if(arrDomVec[sz] > 1.0) { GetOStream(Runtime0,0) << "RowColPairs has multiple column [" << sz << "]=" << arrDomVec[sz] << std::endl; } else if(arrDomVec[sz] == 0.0) { GetOStream(Runtime0,0) << "RowColPairs has empty column [" << sz << "]=" << arrDomVec[sz] << std::endl; } } //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& #endif ////////////////////////////////////////////////// // assumption: on each processor RowColPairs now contains // a valid set of (row,column) pairs, where the row entries // are a subset of the processor's rows and the column entries // are unique throughout all processors. // Note: the RowColPairs are only defined for a subset of all rows, // so there might be rows without an entry in RowColPairs. // It can be, that some rows seem to be missing in RowColPairs, since // the entry in that row with maximum absolute value has been reserved // by another row already (e.g. as already diagonal dominant row outside // of perRowMap). // In fact, the RowColPairs vector only defines the (row,column) pairs // that will be definitely moved to the diagonal after permutation. #ifdef DEBUG_OUTPUT // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p) { // std::cout << "proc: " << myRank << " r/c: " << (*p).first << "/" << (*p).second << std::endl; // } // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p) // { //// if((*p).first != (*p).second) std::cout << "difference: " << (*p).first << " " << (*p).second << std::endl; // std::cout << (*p).first +1 << " " << (*p).second+1 << std::endl; // } // std::cout << "\n"; #endif // vectors to store permutation information Teuchos::RCP<Vector> Pperm = VectorFactory::Build(A->getRowMap()); Teuchos::RCP<Vector> Qperm = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map) Teuchos::RCP<Vector> lQperm = VectorFactory::Build(A->getColMap()); // local variant (based on column map) Teuchos::ArrayRCP< Scalar > PpermData = Pperm->getDataNonConst(0); Teuchos::ArrayRCP< Scalar > QpermData = Qperm->getDataNonConst(0); Pperm->putScalar(0.0); Qperm->putScalar(0.0); lQperm->putScalar(0.0); // setup exporter for Qperm Teuchos::RCP<Export> QpermExporter = ExportFactory::Build(lQperm->getMap(), Qperm->getMap()); Teuchos::RCP<Vector> RowIdStatus = VectorFactory::Build(A->getRowMap()); Teuchos::RCP<Vector> ColIdStatus = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map) Teuchos::RCP<Vector> lColIdStatus = VectorFactory::Build(A->getColMap()); // local variant (based on column map) Teuchos::RCP<Vector> ColIdUsed = VectorFactory::Build(A->getDomainMap()); // mark column ids to be already in use Teuchos::ArrayRCP< Scalar > RowIdStatusArray = RowIdStatus->getDataNonConst(0); Teuchos::ArrayRCP< Scalar > ColIdStatusArray = ColIdStatus->getDataNonConst(0); Teuchos::ArrayRCP< Scalar > lColIdStatusArray = lColIdStatus->getDataNonConst(0); Teuchos::ArrayRCP< Scalar > ColIdUsedArray = ColIdUsed->getDataNonConst(0); // not sure about this RowIdStatus->putScalar(0.0); ColIdStatus->putScalar(0.0); lColIdStatus->putScalar(0.0); ColIdUsed->putScalar(0.0); // no column ids are used // count wide-range permutations // a wide-range permutation is defined as a permutation of rows/columns which do not // belong to the same node LocalOrdinal lWideRangeRowPermutations = 0; GlobalOrdinal gWideRangeRowPermutations = 0; LocalOrdinal lWideRangeColPermutations = 0; GlobalOrdinal gWideRangeColPermutations = 0; // run 1: mark all "identity" permutations typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = RowColPairs.begin(); while(p != RowColPairs.end() ) { GlobalOrdinal ik = (*p).first; GlobalOrdinal jk = (*p).second; LocalOrdinal lik = A->getRowMap()->getLocalElement(ik); LocalOrdinal ljk = A->getColMap()->getLocalElement(jk); if(RowIdStatusArray[lik] == 0.0) { RowIdStatusArray[lik] = 1.0; // use this row id lColIdStatusArray[ljk] = 1.0; // use this column id Pperm->replaceLocalValue(lik, ik); lQperm->replaceLocalValue(ljk, ik); // use column map ColIdUsed->replaceGlobalValue(ik,1.0); // ik is now used p = RowColPairs.erase(p); // detect wide range permutations if(floor(ik/nDofsPerNode) != floor(jk/nDofsPerNode)) { lWideRangeColPermutations++; } } else p++; } // communicate column map -> domain map Qperm->doExport(*lQperm,*QpermExporter,Xpetra::ABSMAX); ColIdStatus->doExport(*lColIdStatus,*QpermExporter,Xpetra::ABSMAX); // plausibility check if(RowColPairs.size()>0) GetOStream(Warnings0,0) << "MueLu::PermutationFactory: There are Row/Col pairs left!!!" << std::endl; // TODO fix me // close Pperm // count, how many row permutations are missing on current proc size_t cntFreeRowIdx = 0; std::queue<GlobalOrdinal> qFreeGRowIdx; // store global row ids of "free" rows for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) { if(RowIdStatusArray[lik] == 0.0) { cntFreeRowIdx++; qFreeGRowIdx.push(RowIdStatus->getMap()->getGlobalElement(lik)); } } // fix Pperm for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) { if(RowIdStatusArray[lik] == 0.0) { RowIdStatusArray[lik] = 1.0; // use this row id Pperm->replaceLocalValue(lik, qFreeGRowIdx.front()); // detect wide range permutations if(floor(qFreeGRowIdx.front()/nDofsPerNode) != floor(RowIdStatus->getMap()->getGlobalElement(lik)/nDofsPerNode)) { lWideRangeRowPermutations++; } qFreeGRowIdx.pop(); } } // close Qperm (free permutation entries in Qperm) size_t cntFreeColIdx = 0; std::queue<GlobalOrdinal> qFreeGColIdx; // store global column ids of "free" available columns for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { if(ColIdStatusArray[ljk] == 0.0) { cntFreeColIdx++; qFreeGColIdx.push(ColIdStatus->getMap()->getGlobalElement(ljk)); } } size_t cntUnusedColIdx = 0; std::queue<GlobalOrdinal> qUnusedGColIdx; // store global column ids of "free" available columns for (size_t ljk = 0; ljk < ColIdUsed->getLocalLength(); ++ljk) { if(ColIdUsedArray[ljk] == 0.0) { cntUnusedColIdx++; qUnusedGColIdx.push(ColIdUsed->getMap()->getGlobalElement(ljk)); } } // fix Qperm with local entries for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { // stop if no (local) unused column idx are left if(cntUnusedColIdx == 0) break; if(ColIdStatusArray[ljk] == 0.0) { ColIdStatusArray[ljk] = 1.0; // use this row id Qperm->replaceLocalValue(ljk, qUnusedGColIdx.front()); // loop over ColIdStatus (lives on domain map) ColIdUsed->replaceGlobalValue(qUnusedGColIdx.front(),1.0); // ljk is now used, too // detect wide range permutations if(floor(qUnusedGColIdx.front()/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) { lWideRangeColPermutations++; } qUnusedGColIdx.pop(); cntUnusedColIdx--; cntFreeColIdx--; } } //Qperm->doExport(*lQperm,*QpermExporter,Xpetra::ABSMAX); // no export necessary, since changes only locally //ColIdStatus->doExport(*lColIdStatus,*QpermExporter,Xpetra::ABSMAX); // count, how many unused column idx are needed on current processor // to complete Qperm cntFreeColIdx = 0; for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { // TODO avoid this loop if(ColIdStatusArray[ljk] == 0.0) { cntFreeColIdx++; } } GlobalOrdinal global_cntFreeColIdx = 0; LocalOrdinal local_cntFreeColIdx = cntFreeColIdx; sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntFreeColIdx), global_cntFreeColIdx); #ifdef DEBUG_OUTPUT std::cout << "global # of empty column idx entries in Qperm: " << global_cntFreeColIdx << std::endl; #endif // avoid global communication if possible if(global_cntFreeColIdx > 0) { // 1) count how many unused column ids are left GlobalOrdinal global_cntUnusedColIdx = 0; LocalOrdinal local_cntUnusedColIdx = cntUnusedColIdx; sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntUnusedColIdx), global_cntUnusedColIdx); #ifdef DEBUG_OUTPUT std::cout << "global # of unused column idx: " << global_cntUnusedColIdx << std::endl; #endif // 2) communicate how many unused column ids are available on procs std::vector<LocalOrdinal> local_UnusedColIdxOnProc (numProcs); std::vector<LocalOrdinal> global_UnusedColIdxOnProc(numProcs); local_UnusedColIdxOnProc[myRank] = local_cntUnusedColIdx; Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &local_UnusedColIdxOnProc[0], &global_UnusedColIdxOnProc[0]); #ifdef DEBUG_OUTPUT std::cout << "PROC " << myRank << " global num unused indices per proc: "; for (size_t ljk = 0; ljk < global_UnusedColIdxOnProc.size(); ++ljk) { std::cout << " " << global_UnusedColIdxOnProc[ljk]; } std::cout << std::endl; #endif // 3) build array of length global_cntUnusedColIdx to globally replicate unused column idx std::vector<GlobalOrdinal> local_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx)); std::vector<GlobalOrdinal> global_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx)); GlobalOrdinal global_cntUnusedColIdxStartIter = 0; for(int proc=0; proc<myRank; proc++) { global_cntUnusedColIdxStartIter += global_UnusedColIdxOnProc[proc]; } for(GlobalOrdinal k = global_cntUnusedColIdxStartIter; k < global_cntUnusedColIdxStartIter+local_cntUnusedColIdx; k++) { local_UnusedColIdxVector[k] = qUnusedGColIdx.front(); qUnusedGColIdx.pop(); } Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(global_cntUnusedColIdx), &local_UnusedColIdxVector[0], &global_UnusedColIdxVector[0]); #ifdef DEBUG_OUTPUT std::cout << "PROC " << myRank << " global UnusedGColIdx: "; for (size_t ljk = 0; ljk < global_UnusedColIdxVector.size(); ++ljk) { std::cout << " " << global_UnusedColIdxVector[ljk]; } std::cout << std::endl; #endif // 4) communicate, how many column idx are needed on each processor // to complete Qperm std::vector<LocalOrdinal> local_EmptyColIdxOnProc (numProcs); std::vector<LocalOrdinal> global_EmptyColIdxOnProc(numProcs); local_EmptyColIdxOnProc[myRank] = local_cntFreeColIdx; Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &local_EmptyColIdxOnProc[0], &global_EmptyColIdxOnProc[0]); #ifdef DEBUG_OUTPUT std::cout << "PROC " << myRank << " global num of needed column indices: "; for (size_t ljk = 0; ljk < global_EmptyColIdxOnProc.size(); ++ljk) { std::cout << " " << global_EmptyColIdxOnProc[ljk]; } std::cout << std::endl; #endif // 5) determine first index in global_UnusedColIdxVector for unused column indices, // that are marked to be used by this processor GlobalOrdinal global_UnusedColStartIdx = 0; for(int proc=0; proc<myRank; proc++) { global_UnusedColStartIdx += global_EmptyColIdxOnProc[proc]; } #ifdef DEBUG_OUTPUT GetOStream(Statistics0,0) << "PROC " << myRank << " is allowd to use the following column gids: "; for(GlobalOrdinal k = global_UnusedColStartIdx; k < global_UnusedColStartIdx + Teuchos::as<GlobalOrdinal>(cntFreeColIdx); k++) { GetOStream(Statistics0,0) << global_UnusedColIdxVector[k] << " "; } GetOStream(Statistics0,0) << std::endl; #endif // 6.) fix Qperm with global entries GlobalOrdinal array_iter = 0; for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { if(ColIdStatusArray[ljk] == 0.0) { ColIdStatusArray[ljk] = 1.0; // use this row id Qperm->replaceLocalValue(ljk, global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]); ColIdUsed->replaceGlobalValue(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter],1.0); // detect wide range permutations if(floor(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) { lWideRangeColPermutations++; } array_iter++; //cntUnusedColIdx--; // check me } } } // end if global_cntFreeColIdx > 0 /////////////////// Qperm should be fine now... // create new empty Matrix Teuchos::RCP<CrsMatrixWrap> permPTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1,Xpetra::StaticProfile)); Teuchos::RCP<CrsMatrixWrap> permQTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1,Xpetra::StaticProfile)); for(size_t row=0; row<A->getNodeNumRows(); row++) { Teuchos::ArrayRCP<GlobalOrdinal> indoutP(1,Teuchos::as<GO>(PpermData[row])); // column idx for Perm^T Teuchos::ArrayRCP<GlobalOrdinal> indoutQ(1,Teuchos::as<GO>(QpermData[row])); // column idx for Qperm Teuchos::ArrayRCP<Scalar> valout(1,1.0); permPTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutP.view(0,indoutP.size()), valout.view(0,valout.size())); permQTmatrix->insertGlobalValues (A->getRowMap()->getGlobalElement(row), indoutQ.view(0,indoutQ.size()), valout.view(0,valout.size())); } permPTmatrix->fillComplete(); permQTmatrix->fillComplete(); Teuchos::RCP<Matrix> permPmatrix = Utils2::Transpose(permPTmatrix,true); for(size_t row=0; row<permPTmatrix->getNodeNumRows(); row++) { if(permPTmatrix->getNumEntriesInLocalRow(row) != 1) GetOStream(Warnings0,0) <<"#entries in row " << row << " of permPTmatrix is " << permPTmatrix->getNumEntriesInLocalRow(row) << std::endl; if(permPmatrix->getNumEntriesInLocalRow(row) != 1) GetOStream(Warnings0,0) <<"#entries in row " << row << " of permPmatrix is " << permPmatrix->getNumEntriesInLocalRow(row) << std::endl; if(permQTmatrix->getNumEntriesInLocalRow(row) != 1) GetOStream(Warnings0,0) <<"#entries in row " << row << " of permQmatrix is " << permQTmatrix->getNumEntriesInLocalRow(row) << std::endl; } // build permP * A * permQT Teuchos::RCP<Matrix> ApermQt = Utils::Multiply(*A, false, *permQTmatrix, false); Teuchos::RCP<Matrix> permPApermQt = Utils::Multiply(*permPmatrix, false, *ApermQt, false); /* MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Write("A.mat", *A); MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Write("permP.mat", *permPmatrix); MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Write("permQt.mat", *permQTmatrix); MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Write("permPApermQt.mat", *permPApermQt); */ // build scaling matrix Teuchos::RCP<Vector> diagVec = VectorFactory::Build(permPApermQt->getRowMap(),true); Teuchos::RCP<Vector> invDiagVec = VectorFactory::Build(permPApermQt->getRowMap(),true); Teuchos::ArrayRCP< const Scalar > diagVecData = diagVec->getData(0); Teuchos::ArrayRCP< Scalar > invDiagVecData = invDiagVec->getDataNonConst(0); permPApermQt->getLocalDiagCopy(*diagVec); for(size_t i = 0; i<diagVec->getMap()->getNodeNumElements(); ++i) { if(diagVecData[i] != 0.0) invDiagVecData[i] = 1/diagVecData[i]; else { invDiagVecData[i] = 1.0; GetOStream(Statistics0,0) << "MueLu::PermutationFactory: found zero on diagonal in row " << i << std::endl; } } Teuchos::RCP<CrsMatrixWrap> diagScalingOp = Teuchos::rcp(new CrsMatrixWrap(permPApermQt->getRowMap(),1,Xpetra::StaticProfile)); for(size_t row=0; row<A->getNodeNumRows(); row++) { Teuchos::ArrayRCP<GlobalOrdinal> indout(1,permPApermQt->getRowMap()->getGlobalElement(row)); // column idx for Perm^T Teuchos::ArrayRCP<Scalar> valout(1,invDiagVecData[row]); diagScalingOp->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indout.view(0,indout.size()), valout.view(0,valout.size())); } diagScalingOp->fillComplete(); Teuchos::RCP<Matrix> scaledA = Utils::Multiply(*diagScalingOp, false, *permPApermQt, false); currentLevel.Set("A", Teuchos::rcp_dynamic_cast<Matrix>(scaledA), genFactory/*this*/); currentLevel.Set("permA", Teuchos::rcp_dynamic_cast<Matrix>(permPApermQt), genFactory/*this*/); // TODO careful with this!!! currentLevel.Set("permP", Teuchos::rcp_dynamic_cast<Matrix>(permPmatrix), genFactory/*this*/); currentLevel.Set("permQT", Teuchos::rcp_dynamic_cast<Matrix>(permQTmatrix), genFactory/*this*/); currentLevel.Set("permScaling", Teuchos::rcp_dynamic_cast<Matrix>(diagScalingOp), genFactory/*this*/); //// count row permutations // count zeros on diagonal in P -> number of row permutations Teuchos::RCP<Vector> diagPVec = VectorFactory::Build(permPmatrix->getRowMap(),true); permPmatrix->getLocalDiagCopy(*diagPVec); Teuchos::ArrayRCP< const Scalar > diagPVecData = diagPVec->getData(0); LocalOrdinal lNumRowPermutations = 0; GlobalOrdinal gNumRowPermutations = 0; for(size_t i = 0; i<diagPVec->getMap()->getNodeNumElements(); ++i) { if(diagPVecData[i] == 0.0) { lNumRowPermutations++; } } // sum up all entries in multipleColRequests over all processors sumAll(diagPVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumRowPermutations), gNumRowPermutations); //// count column permutations // count zeros on diagonal in Q^T -> number of column permutations Teuchos::RCP<Vector> diagQTVec = VectorFactory::Build(permQTmatrix->getRowMap(),true); permQTmatrix->getLocalDiagCopy(*diagQTVec); Teuchos::ArrayRCP< const Scalar > diagQTVecData = diagQTVec->getData(0); LocalOrdinal lNumColPermutations = 0; GlobalOrdinal gNumColPermutations = 0; for(size_t i = 0; i<diagQTVec->getMap()->getNodeNumElements(); ++i) { if(diagQTVecData[i] == 0.0) { lNumColPermutations++; } } // sum up all entries in multipleColRequests over all processors sumAll(diagQTVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumColPermutations), gNumColPermutations); currentLevel.Set("#RowPermutations", gNumRowPermutations, genFactory/*this*/); currentLevel.Set("#ColPermutations", gNumColPermutations, genFactory/*this*/); currentLevel.Set("#WideRangeRowPermutations", gWideRangeRowPermutations, genFactory/*this*/); currentLevel.Set("#WideRangeColPermutations", gWideRangeColPermutations, genFactory/*this*/); GetOStream(Statistics0, 0) << "#Row permutations/max possible permutations: " << gNumRowPermutations << "/" << diagPVec->getMap()->getGlobalNumElements() << std::endl; GetOStream(Statistics0, 0) << "#Column permutations/max possible permutations: " << gNumColPermutations << "/" << diagQTVec->getMap()->getGlobalNumElements() << std::endl; GetOStream(Runtime1, 0) << "#wide range row permutations: " << gWideRangeRowPermutations << " #wide range column permutations: " << gWideRangeColPermutations << std::endl; #else #warning PermutationFactory not compiling/working for Scalar==complex. #endif // #ifndef HAVE_MUELU_INST_COMPLEX_INT_INT }
void BlockedRAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level &fineLevel, Level &coarseLevel) const { //FIXME make fineLevel const!! FactoryMonitor m(*this, "Computing Ac (block)", coarseLevel); const Teuchos::ParameterList& pL = GetParameterList(); RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel, "A"); RCP<Matrix> P = Get< RCP<Matrix> >(coarseLevel, "P"); RCP<BlockedCrsMatrix> bA = rcp_dynamic_cast<BlockedCrsMatrix>(A); RCP<BlockedCrsMatrix> bP = rcp_dynamic_cast<BlockedCrsMatrix>(P); TEUCHOS_TEST_FOR_EXCEPTION(bA.is_null() || bP.is_null(), Exceptions::BadCast, "Matrices R, A and P must be of type BlockedCrsMatrix."); RCP<BlockedCrsMatrix> bAP; RCP<BlockedCrsMatrix> bAc; { SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel); // Triple matrix product for BlockedCrsMatrixClass TEUCHOS_TEST_FOR_EXCEPTION((bA->Cols() != bP->Rows()), Exceptions::BadCast, "Block matrix dimensions do not match: " "A is " << bA->Rows() << "x" << bA->Cols() << "P is " << bP->Rows() << "x" << bP->Cols()); bAP = Utils::TwoMatrixMultiplyBlock(*bA, false, *bP, false, GetOStream(Statistics2), true, true); } // If we do not modify matrix later, allow optimization of storage. // This is necessary for new faster Epetra MM kernels. bool doOptimizeStorage = !checkAc_; const bool doTranspose = true; const bool doFillComplete = true; if (pL.get<bool>("transpose: use implicit") == true) { SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel); bAc = Utils::TwoMatrixMultiplyBlock(*bP, doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), doFillComplete, doOptimizeStorage); } else { RCP<Matrix> R = Get< RCP<Matrix> >(coarseLevel, "R"); RCP<BlockedCrsMatrix> bR = rcp_dynamic_cast<BlockedCrsMatrix>(R); TEUCHOS_TEST_FOR_EXCEPTION(bR.is_null(), Exceptions::BadCast, "Matrix R must be of type BlockedCrsMatrix."); TEUCHOS_TEST_FOR_EXCEPTION(bA->Rows() != bR->Cols(), Exceptions::BadCast, "Block matrix dimensions do not match: " "R is " << bR->Rows() << "x" << bR->Cols() << "A is " << bA->Rows() << "x" << bA->Cols()); SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel); bAc = Utils::TwoMatrixMultiplyBlock(*bR, !doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), doFillComplete, doOptimizeStorage); } if (checkAc_) CheckMainDiagonal(bAc); GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*bAc, "Ac (blocked)"); // static int run = 1; // RCP<CrsMatrixWrap> A11 = rcp(new CrsMatrixWrap(bAc->getMatrix(0,0))); // Utils::Write(toString(run) + "_A_11.mm", *A11); // if (!bAc->getMatrix(1,1).is_null()) { // RCP<CrsMatrixWrap> A22 = rcp(new CrsMatrixWrap(bAc->getMatrix(1,1))); // Utils::Write(toString(run) + "_A_22.mm", *A22); // } // RCP<CrsMatrixWrap> Am = rcp(new CrsMatrixWrap(bAc->Merge())); // Utils::Write(toString(run) + "_A.mm", *Am); // run++; Set<RCP <Matrix> >(coarseLevel, "A", bAc); if (transferFacts_.begin() != transferFacts_.end()) { SubFactoryMonitor m1(*this, "Projections", coarseLevel); // call Build of all user-given transfer factories for (std::vector<RCP<const FactoryBase> >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { RCP<const FactoryBase> fac = *it; GetOStream(Runtime0) << "BlockRAPFactory: call transfer factory: " << fac->description() << std::endl; fac->CallBuild(coarseLevel); // AP (11/11/13): I am not sure exactly why we need to call Release, but we do need it to get rid // of dangling data for CoordinatesTransferFactory coarseLevel.Release(*fac); } } }
void RAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &fineLevel, Level &coarseLevel) const { // FIXME make fineLevel const { FactoryMonitor m(*this, "Computing Ac", coarseLevel); // Set "Keeps" from params const Teuchos::ParameterList& pL = GetParameterList(); if (pL.isParameter("Keep AP Pattern") && pL.get<bool>("Keep AP Pattern")) coarseLevel.Keep("AP Pattern", this); if (pL.isParameter("Keep RAP Pattern") && pL.get<bool>("Keep RAP Pattern")) coarseLevel.Keep("RAP Pattern", this); // // Inputs: A, P // RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel, "A"); RCP<Matrix> P = Get< RCP<Matrix> >(coarseLevel, "P"); // // Build Ac = RAP // RCP<Matrix> AP; // Reuse pattern if available (multiple solve) if (coarseLevel.IsAvailable("AP Pattern", this)){ GetOStream(Runtime0, 0) << "Ac: Using previous AP pattern"<<std::endl; AP = Get< RCP<Matrix> >(coarseLevel, "AP Pattern"); } { SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel); AP = Utils::Multiply(*A, false, *P, false, AP); Set(coarseLevel, "AP Pattern", AP); } bool doOptimizedStorage = !checkAc_; // Optimization storage option. If not modifying matrix later (inserting local values), allow optimization of storage. // This is necessary for new faster Epetra MM kernels. RCP<Matrix> Ac; // Reuse coarse matrix memory if available (multiple solve) if (coarseLevel.IsAvailable("RAP Pattern", this)) { GetOStream(Runtime0, 0) << "Ac: Using previous RAP pattern" << std::endl; Ac = Get< RCP<Matrix> >(coarseLevel, "RAP Pattern"); } if (implicitTranspose_) { SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel); Ac = Utils::Multiply(*P, true, *AP, false, Ac, true, doOptimizedStorage); } else { SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel); RCP<Matrix> R = Get< RCP<Matrix> >(coarseLevel, "R"); Ac = Utils::Multiply(*R, false, *AP, false, Ac, true, doOptimizedStorage); } if (checkAc_) CheckMainDiagonal(Ac); RCP<ParameterList> params = rcp(new ParameterList());; params->set("printLoadBalancingInfo", true); GetOStream(Statistics0, 0) << Utils::PrintMatrixInfo(*Ac, "Ac", params); Set(coarseLevel, "A", Ac); Set(coarseLevel, "RAP Pattern", Ac); } if (transferFacts_.begin() != transferFacts_.end()) { SubFactoryMonitor m(*this, "Projections", coarseLevel); // call Build of all user-given transfer factories for (std::vector<RCP<const FactoryBase> >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { GetOStream(Runtime0, 0) << "Ac: call transfer factory " << (*it).get() << ": " << (*it)->description() << std::endl; (*it)->CallBuild(coarseLevel); } } }
void SubBlockAFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level & currentLevel) const { typedef Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> OMatrix; //TODO typedef Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> CrsMatrixClass; //TODO typedef Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> CrsMatrixWrapClass; //TODO typedef Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> BlockedCrsOMatrix; //TODO typedef Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> MapExtractorClass; const ParameterList & pL = GetParameterList(); size_t row = Teuchos::as<size_t>(pL.get<int>("block row")); size_t col = Teuchos::as<size_t>(pL.get<int>("block col")); RCP<OMatrix> Ain = Teuchos::null; Ain = Get< RCP<OMatrix> >(currentLevel, "A"); RCP<BlockedCrsOMatrix> bA = Teuchos::rcp_dynamic_cast<BlockedCrsOMatrix>(Ain); TEUCHOS_TEST_FOR_EXCEPTION(bA==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: input matrix A is not of type BlockedCrsMatrix! error."); TEUCHOS_TEST_FOR_EXCEPTION(row > bA->Rows(), Exceptions::RuntimeError, "MueLu::SubBlockAFactory::Build: A.Rows() > rows_! error."); TEUCHOS_TEST_FOR_EXCEPTION(col > bA->Cols(), Exceptions::RuntimeError, "MueLu::SubBlockAFactory::Build: A.Cols() > cols_! error."); Teuchos::RCP<CrsMatrixClass> A = bA->getMatrix(row, col); Teuchos::RCP<CrsMatrixWrapClass> Op = Teuchos::rcp(new CrsMatrixWrapClass(A)); //////////////// EXPERIMENTAL // extract striding information from RangeMapExtractor Teuchos::RCP<const MapExtractorClass> rgMapExtractor = bA->getRangeMapExtractor(); Teuchos::RCP<const MapExtractorClass> doMapExtractor = bA->getDomainMapExtractor(); Teuchos::RCP<const Map> rgMap = rgMapExtractor->getMap(row); Teuchos::RCP<const Map> doMap = doMapExtractor->getMap(col); Teuchos::RCP<const StridedMap> srgMap = Teuchos::rcp_dynamic_cast<const StridedMap>(rgMap); Teuchos::RCP<const StridedMap> sdoMap = Teuchos::rcp_dynamic_cast<const StridedMap>(doMap); if(srgMap == Teuchos::null) { Teuchos::RCP<const Map> fullRgMap = rgMapExtractor->getFullMap(); Teuchos::RCP<const StridedMap> sFullRgMap = Teuchos::rcp_dynamic_cast<const StridedMap>(fullRgMap); TEUCHOS_TEST_FOR_EXCEPTION(sFullRgMap==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: full rangeMap is not a strided map"); std::vector<size_t> stridedData = sFullRgMap->getStridingData(); if(stridedData.size() == 1 && row > 0) // we have block matrices. use striding block information 0 srgMap = StridedMapFactory::Build(rgMap, stridedData, 0, sFullRgMap->getOffset()); else // we have strided matrices. use striding information of the corresponding block srgMap = StridedMapFactory::Build(rgMap, stridedData, row, sFullRgMap->getOffset()); } if(sdoMap == Teuchos::null) { Teuchos::RCP<const Map> fullDoMap = doMapExtractor->getFullMap(); Teuchos::RCP<const StridedMap> sFullDoMap = Teuchos::rcp_dynamic_cast<const StridedMap>(fullDoMap); TEUCHOS_TEST_FOR_EXCEPTION(sFullDoMap==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: full domainMap is not a strided map"); std::vector<size_t> stridedData2 = sFullDoMap->getStridingData(); if(stridedData2.size() == 1 && col > 0) // we have block matrices. use striding block information 0 sdoMap = StridedMapFactory::Build(doMap, stridedData2, 0, sFullDoMap->getOffset()); else // we have strided matrices. use striding information of the corresponding block sdoMap = StridedMapFactory::Build(doMap, stridedData2, col, sFullDoMap->getOffset()); } TEUCHOS_TEST_FOR_EXCEPTION(srgMap==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: rangeMap " << row << " is not a strided map"); TEUCHOS_TEST_FOR_EXCEPTION(sdoMap==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: domainMap " << col << " is not a strided map"); GetOStream(Statistics1) << "A(" << row << "," << col << ") has strided maps: range map fixed block size=" << srgMap->getFixedBlockSize() << " strided block id = " << srgMap->getStridedBlockId() << ", domain map fixed block size=" << sdoMap->getFixedBlockSize() << ", strided block id=" << sdoMap->getStridedBlockId() << std::endl; if(Op->IsView("stridedMaps") == true) Op->RemoveView("stridedMaps"); Op->CreateView("stridedMaps", srgMap, sdoMap); TEUCHOS_TEST_FOR_EXCEPTION(Op->IsView("stridedMaps")==false, Exceptions::RuntimeError, "MueLu::SubBlockAFactory::Build: failed to set stridedMaps"); //////////////// EXPERIMENTAL currentLevel.Set("A", Teuchos::rcp_dynamic_cast<OMatrix>(Op), this); }