TEUCHOS_UNIT_TEST(CoarseMap, StandardCase) { out << "version: " << MueLu::Version() << std::endl; Level myLevel; myLevel.SetLevelID(0); RCP<Matrix> A = TestHelpers::TestFactory<SC, LO, GO, NO, LMO>::Build1DPoisson(15); myLevel.Set("A", A); // build dummy aggregate structure Teuchos::RCP<Aggregates> aggs = Teuchos::rcp(new Aggregates(A->getRowMap())); aggs->SetNumAggregates(10); // set (local!) number of aggregates myLevel.Set("Aggregates", aggs); // build dummy nullspace vector Teuchos::RCP<MultiVector> nsp = MultiVectorFactory::Build(A->getRowMap(),1); nsp->putScalar(1.0); myLevel.Set("Nullspace", nsp); RCP<CoarseMapFactory> myCMF = Teuchos::rcp(new CoarseMapFactory()); myLevel.Request("CoarseMap",myCMF.get()); myCMF->SetFactory("Aggregates",MueLu::NoFactory::getRCP()); myCMF->SetFactory("Nullspace",MueLu::NoFactory::getRCP()); myCMF->Build(myLevel); Teuchos::RCP<const Map> myCoarseMap = myLevel.Get<Teuchos::RCP<const Map> >("CoarseMap",myCMF.get()); TEST_EQUALITY(myCoarseMap->getMinAllGlobalIndex() == 0, true); TEST_EQUALITY(myCoarseMap->getMaxLocalIndex()==9,true); }
void TopRAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level & fineLevel, Level & coarseLevel) const { if ((PFact_ != Teuchos::null) && (PFact_ != NoFactory::getRCP())) { RCP<Operator> oP = coarseLevel.Get<RCP<Operator> >("P", PFact_.get()); RCP<Matrix> P = rcp_dynamic_cast<Matrix>(oP); if (!P.is_null()) coarseLevel.Set("P", P, NoFactory::get()); else coarseLevel.Set("P", oP, NoFactory::get()); coarseLevel.AddKeepFlag ("P", NoFactory::get(), MueLu::Final); // FIXME2: Order of Remove/Add matter (data removed otherwise). Should do something about this coarseLevel.RemoveKeepFlag("P", NoFactory::get(), MueLu::UserData); // FIXME: This is a hack, I should change behavior of Level::Set() instead. FIXME3: Should not be removed if flag was there already } if ((RFact_ != Teuchos::null) && (RFact_ != NoFactory::getRCP()) ) { RCP<Operator> oR = coarseLevel.Get<RCP<Operator> >("R", RFact_.get()); RCP<Matrix> R = rcp_dynamic_cast<Matrix>(oR); if (!R.is_null()) coarseLevel.Set("R", R, NoFactory::get()); else coarseLevel.Set("R", oR, NoFactory::get()); coarseLevel.AddKeepFlag ("R", NoFactory::get(), MueLu::Final); coarseLevel.RemoveKeepFlag("R", NoFactory::get(), MueLu::UserData); // FIXME: This is a hack } if ((AcFact_ != Teuchos::null) && (AcFact_ != NoFactory::getRCP())) { RCP<Operator> oA = coarseLevel.Get<RCP<Operator> >("A", AcFact_.get()); RCP<Matrix> A = rcp_dynamic_cast<Matrix>(oA); if (!A.is_null()) coarseLevel.Set("A", A, NoFactory::get()); else coarseLevel.Set("A", oA, NoFactory::get()); coarseLevel.AddKeepFlag ("A", NoFactory::get(), MueLu::Final); coarseLevel.RemoveKeepFlag("A", NoFactory::get(), MueLu::UserData); // FIXME: This is a hack } }
void TopSmootherFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level & level) const { if (preSmootherFact_.is_null() && postSmootherFact_.is_null()) return; // NOTE 1: We need to set at least some keep flag for the smoothers, otherwise it is going to be removed as soon as all requests are released. // We choose to set the Final flag for the data. In addition, we allow this data to be retrieved by only using the name by the means // of using NoFactory. However, any data set with NoFactory gets UserData flag by default. We don't really want that flag, so we remove it. // NOTE 2: some smoother factories are tricky (see comments in MueLu::SmootherFactory // Sometimes, we don't know whether the factory is able to generate "PreSmoother" or "PostSmoother" // For the SmootherFactory, however, we are able to check that. if (!preSmootherFact_.is_null()) { // Checking for null is not sufficient, as SmootherFactory(null, something) does not generate "PreSmoother" bool isAble = true; RCP<const SmootherFactory> s = rcp_dynamic_cast<const SmootherFactory>(preSmootherFact_); if (!s.is_null()) { RCP<SmootherPrototype> pre, post; s->GetSmootherPrototypes(pre, post); if (pre.is_null()) isAble = false; } else { // We assume that if presmoother factory is not SmootherFactory, it *is* able to generate "PreSmoother" } if (isAble) { RCP<SmootherBase> Pre = level.Get<RCP<SmootherBase> >("PreSmoother", preSmootherFact_.get()); level.Set ("PreSmoother", Pre, NoFactory::get()); level.AddKeepFlag ("PreSmoother", NoFactory::get(), MueLu::Final); level.RemoveKeepFlag("PreSmoother", NoFactory::get(), MueLu::UserData); } } if (!postSmootherFact_.is_null()) { // Checking for null is not sufficient, as SmootherFactory(something, null) does not generate "PostSmoother" bool isAble = true; RCP<const SmootherFactory> s = rcp_dynamic_cast<const SmootherFactory>(postSmootherFact_); if (!s.is_null()) { RCP<SmootherPrototype> pre, post; s->GetSmootherPrototypes(pre, post); if (post.is_null()) isAble = false; } else { // We assume that if presmoother factory is not SmootherFactory, it *is* able to generate "PreSmoother" } if (isAble) { RCP<SmootherBase> Post = level.Get<RCP<SmootherBase> >("PostSmoother", postSmootherFact_.get()); level.Set ("PostSmoother", Post, NoFactory::get()); level.AddKeepFlag ("PostSmoother", NoFactory::get(), MueLu::Final); level.RemoveKeepFlag("PostSmoother", NoFactory::get(), MueLu::UserData); } } }
void RebalanceMapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build(Level &level) const { FactoryMonitor m(*this, "Build", level); //Teuchos::RCP<Teuchos::FancyOStream> fos = Teuchos::getFancyOStream(Teuchos::rcpFromRef(std::cout)); // extract data from Level object const Teuchos::ParameterList & pL = GetParameterList(); std::string mapName = pL.get<std::string> ("Map name"); Teuchos::RCP<const FactoryBase> mapFactory = GetFactory ("Map factory"); RCP<const Import> rebalanceImporter = Get<RCP<const Import> >(level, "Importer"); if(rebalanceImporter != Teuchos::null) { // input map (not rebalanced) RCP<const Map> map = level.Get< RCP<const Map> >(mapName,mapFactory.get()); // create vector based on input map // Note, that the map can be a part only of the full map stored in rebalanceImporter.getSourceMap() RCP<Vector> v = VectorFactory::Build(map); v->putScalar(1.0); // create a new vector based on the full rebalanceImporter.getSourceMap() // import the partial map information to the full source map RCP<const Import> blowUpImporter = ImportFactory::Build(map, rebalanceImporter->getSourceMap()); RCP<Vector> pv = VectorFactory::Build(rebalanceImporter->getSourceMap()); pv->doImport(*v,*blowUpImporter,Xpetra::INSERT); // do rebalancing using rebalanceImporter RCP<Vector> ptv = VectorFactory::Build(rebalanceImporter->getTargetMap()); ptv->doImport(*pv,*rebalanceImporter,Xpetra::INSERT); if (pL.get<bool>("repartition: use subcommunicators") == true) ptv->replaceMap(ptv->getMap()->removeEmptyProcesses()); // reconstruct rebalanced partial map Teuchos::ArrayRCP< const Scalar > ptvData = ptv->getData(0); std::vector<GlobalOrdinal> localGIDs; // vector with GIDs that are stored on current proc for (size_t k = 0; k < ptv->getLocalLength(); k++) { if(ptvData[k] == 1.0) { localGIDs.push_back(ptv->getMap()->getGlobalElement(k)); } } const Teuchos::ArrayView<const GlobalOrdinal> localGIDs_view(&localGIDs[0],localGIDs.size()); Teuchos::RCP<const Map> localGIDsMap = MapFactory::Build( map->lib(), Teuchos::OrdinalTraits<int>::invalid(), localGIDs_view, 0, ptv->getMap()->getComm()); // use correct communicator here! // store rebalanced partial map using the same name and generating factory as the original map // in the level class level.Set(mapName, localGIDsMap, mapFactory.get()); } } //Build()
void TopRAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level & fineLevel, Level & coarseLevel) const { if (PFact_ != Teuchos::null) { RCP<Matrix> P = coarseLevel.Get<RCP<Matrix> >("P", PFact_.get()); coarseLevel.Set ("P", P, NoFactory::get()); coarseLevel.AddKeepFlag ("P", NoFactory::get(), MueLu::Final); // FIXME2: Order of Remove/Add matter (data removed otherwise). Should do something about this coarseLevel.RemoveKeepFlag("P", NoFactory::get(), MueLu::UserData); // FIXME: This is a hack, I should change behavior of Level::Set() instead. FIXME3: Should not be removed if flag was there already } if (RFact_ != Teuchos::null) { RCP<Matrix> R = coarseLevel.Get<RCP<Matrix> >("R", RFact_.get()); coarseLevel.Set ("R", R, NoFactory::get()); coarseLevel.AddKeepFlag ("R", NoFactory::get(), MueLu::Final); coarseLevel.RemoveKeepFlag("R", NoFactory::get(), MueLu::UserData); // FIXME: This is a hack } if ((AcFact_ != Teuchos::null) && (AcFact_ != NoFactory::getRCP())) { RCP<Matrix> Ac = coarseLevel.Get<RCP<Matrix> >("A", AcFact_.get()); coarseLevel.Set ("A", Ac, NoFactory::get()); coarseLevel.AddKeepFlag ("A", NoFactory::get(), MueLu::Final); coarseLevel.RemoveKeepFlag("A", NoFactory::get(), MueLu::UserData); // FIXME: This is a hack } }
void MapTransferFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level & fineLevel, Level & coarseLevel) const { typedef Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> OperatorClass; //TODO typedef Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node> MapClass; typedef Xpetra::MapFactory<LocalOrdinal, GlobalOrdinal, Node> MapFactoryClass; Monitor m(*this, "Contact Map transfer factory"); if (fineLevel.IsAvailable(mapName_, mapFact_.get())==false) { GetOStream(Runtime0, 0) << "MapTransferFactory::Build: User provided map " << mapName_ << " not found in Level class." << std::endl; } // fetch map extractor from level RCP<const MapClass> transferMap = fineLevel.Get<RCP<const MapClass> >(mapName_,mapFact_.get()); // Get default tentative prolongator factory // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory. // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere! RCP<const FactoryBase> tentPFact = GetFactory("P"); if (tentPFact == Teuchos::null) { tentPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); } TEUCHOS_TEST_FOR_EXCEPTION(!coarseLevel.IsAvailable("P",tentPFact.get()),Exceptions::RuntimeError, "MueLu::MapTransferFactory::Build(): P (generated by TentativePFactory) not available."); RCP<OperatorClass> Ptent = coarseLevel.Get<RCP<OperatorClass> >("P", tentPFact.get()); std::vector<GlobalOrdinal > coarseMapGids; // loop over local rows of Ptent for(size_t row=0; row<Ptent->getNodeNumRows(); row++) { GlobalOrdinal grid = Ptent->getRowMap()->getGlobalElement(row); if(transferMap->isNodeGlobalElement(grid)) { Teuchos::ArrayView<const LocalOrdinal> indices; Teuchos::ArrayView<const Scalar> vals; Ptent->getLocalRowView(row, indices, vals); for(size_t i=0; i<(size_t)indices.size(); i++) { // mark all columns in Ptent(grid,*) to be coarse Dofs of next level transferMap GlobalOrdinal gcid = Ptent->getColMap()->getGlobalElement(indices[i]); coarseMapGids.push_back(gcid); } } // end if isNodeGlobalElement(grid) } // build column maps std::sort(coarseMapGids.begin(), coarseMapGids.end()); coarseMapGids.erase(std::unique(coarseMapGids.begin(), coarseMapGids.end()), coarseMapGids.end()); Teuchos::ArrayView<GlobalOrdinal> coarseMapGidsView (&coarseMapGids[0],coarseMapGids.size()); Teuchos::RCP<const MapClass> coarseTransferMap = MapFactoryClass::Build(Ptent->getColMap()->lib(), -1, coarseMapGidsView, Ptent->getColMap()->getIndexBase(), Ptent->getColMap()->getComm()); // store map extractor in coarse level coarseLevel.Set(mapName_, coarseTransferMap, mapFact_.get()); }
TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoarseMap_kokkos, StandardCase, Scalar, LocalOrdinal, GlobalOrdinal, Node) { # include "MueLu_UseShortNames.hpp" RUN_EPETRA_ONLY_WITH_SERIAL_NODE(Node); MueLu::VerboseObject::SetDefaultOStream(Teuchos::rcpFromRef(out)); out << "version: " << MueLu::Version() << std::endl; Level fineLevel; TestHelpers_kokkos::TestFactory<SC,LO,GO,NO>::createSingleLevelHierarchy(fineLevel); RCP<Matrix> A = TestHelpers_kokkos::TestFactory<SC, LO, GO, NO>::Build1DPoisson(15); fineLevel.Set("A", A); // build dummy aggregate structure RCP<Aggregates_kokkos> aggs = Teuchos::rcp(new Aggregates_kokkos(A->getRowMap())); aggs->SetNumAggregates(10); // set (local!) number of aggregates fineLevel.Set("Aggregates", aggs); // build dummy nullspace vector RCP<MultiVector> nsp = MultiVectorFactory::Build(A->getRowMap(),1); nsp->putScalar(1.0); fineLevel.Set("Nullspace", nsp); RCP<CoarseMapFactory_kokkos> coarseMapFactory = Teuchos::rcp(new CoarseMapFactory_kokkos()); coarseMapFactory->SetFactory("Aggregates", MueLu::NoFactory::getRCP()); coarseMapFactory->SetFactory("Nullspace", MueLu::NoFactory::getRCP()); fineLevel.Request("CoarseMap", coarseMapFactory.get()); coarseMapFactory->Build(fineLevel); auto myCoarseMap = fineLevel.Get<Teuchos::RCP<const Map> >("CoarseMap", coarseMapFactory.get()); TEST_EQUALITY(myCoarseMap->getMinAllGlobalIndex() == 0, true); TEST_EQUALITY(myCoarseMap->getMaxLocalIndex() == 9, true); }
TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(ThresholdAFilterFactory, Basic, Scalar, LocalOrdinal, GlobalOrdinal, Node) { # include <MueLu_UseShortNames.hpp> MUELU_TESTING_SET_OSTREAM; MUELU_TESTING_LIMIT_EPETRA_SCOPE(Scalar,GlobalOrdinal,Node); out << "version: " << MueLu::Version() << std::endl; Level aLevel; TestHelpers::TestFactory<SC, LO, GO, NO>::createSingleLevelHierarchy(aLevel); RCP<Matrix> A = TestHelpers::TestFactory<SC, LO, GO, NO>::Build1DPoisson(20); //can be an empty operator RCP<ThresholdAFilterFactory> AfilterFactory0 = rcp(new ThresholdAFilterFactory("A",0.1)); // keep all RCP<ThresholdAFilterFactory> AfilterFactory1 = rcp(new ThresholdAFilterFactory("A",1.1)); // keep only diagonal RCP<ThresholdAFilterFactory> AfilterFactory2 = rcp(new ThresholdAFilterFactory("A",3)); // keep only diagonal aLevel.Set("A",A); aLevel.Request("A",AfilterFactory0.get()); AfilterFactory0->Build(aLevel); TEST_EQUALITY(aLevel.IsAvailable("A",AfilterFactory0.get()), true); RCP<Matrix> A0 = aLevel.Get< RCP<Matrix> >("A",AfilterFactory0.get()); aLevel.Release("A",AfilterFactory0.get()); TEST_EQUALITY(aLevel.IsAvailable("A",AfilterFactory0.get()), false); TEST_EQUALITY(A0->getNodeNumEntries(), A->getNodeNumEntries()); TEST_EQUALITY(A0->getGlobalNumEntries(), A->getGlobalNumEntries()); aLevel.Request("A",AfilterFactory1.get()); AfilterFactory1->Build(aLevel); TEST_EQUALITY(aLevel.IsAvailable("A",AfilterFactory1.get()), true); RCP<Matrix> A1 = aLevel.Get< RCP<Matrix> >("A",AfilterFactory1.get()); aLevel.Release("A",AfilterFactory1.get()); TEST_EQUALITY(aLevel.IsAvailable("A",AfilterFactory1.get()), false); TEST_EQUALITY(A1->getGlobalNumEntries(), A1->getGlobalNumRows()); aLevel.Request("A",AfilterFactory2.get()); AfilterFactory2->Build(aLevel); TEST_EQUALITY(aLevel.IsAvailable("A",AfilterFactory2.get()), true); RCP<Matrix> A2 = aLevel.Get< RCP<Matrix> >("A",AfilterFactory2.get()); aLevel.Release("A",AfilterFactory2.get()); TEST_EQUALITY(aLevel.IsAvailable("A",AfilterFactory2.get()), false); TEST_EQUALITY(A2->getGlobalNumEntries(), A2->getGlobalNumRows()); }
Teuchos::RCP<MueLu::Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Node>> gimmeUncoupledAggregates(const Teuchos::RCP<Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>>& A, Teuchos::RCP<MueLu::AmalgamationInfo<LocalOrdinal, GlobalOrdinal, Node>>& amalgInfo, bool bPhase1 = true, bool bPhase2a = true, bool bPhase2b = true, bool bPhase3 = true) { # include "MueLu_UseShortNames.hpp" Level level; TestHelpers_kokkos::TestFactory<SC,LO,GO,NO>::createSingleLevelHierarchy(level); level.Set("A", A); RCP<AmalgamationFactory> amalgFact = rcp(new AmalgamationFactory()); RCP<CoalesceDropFactory_kokkos> dropFact = rcp(new CoalesceDropFactory_kokkos()); dropFact->SetFactory("UnAmalgamationInfo", amalgFact); using Teuchos::ParameterEntry; // Setup aggregation factory (use default factory for graph) RCP<UncoupledAggregationFactory_kokkos> aggFact = rcp(new UncoupledAggregationFactory_kokkos()); aggFact->SetFactory("Graph", dropFact); aggFact->SetParameter("aggregation: max agg size", ParameterEntry(3)); aggFact->SetParameter("aggregation: min agg size", ParameterEntry(3)); aggFact->SetParameter("aggregation: max selected neighbors", ParameterEntry(0)); aggFact->SetParameter("aggregation: ordering", ParameterEntry(std::string("natural"))); aggFact->SetParameter("aggregation: enable phase 1", ParameterEntry(bPhase1)); aggFact->SetParameter("aggregation: enable phase 2a", ParameterEntry(bPhase2a)); aggFact->SetParameter("aggregation: enable phase 2b", ParameterEntry(bPhase2b)); aggFact->SetParameter("aggregation: enable phase 3", ParameterEntry(bPhase3)); level.Request("Aggregates", aggFact.get()); level.Request("UnAmalgamationInfo", amalgFact.get()); level.Request(*aggFact); aggFact->Build(level); auto aggregates = level.Get<RCP<Aggregates_kokkos> >("Aggregates", aggFact.get()); amalgInfo = level.Get<RCP<AmalgamationInfo> >("UnAmalgamationInfo", amalgFact.get()); level.Release("UnAmalgamationInfo", amalgFact.get()); level.Release("Aggregates", aggFact.get()); return aggregates; }
TEUCHOS_UNIT_TEST(Zoltan, Build3PDEs) { typedef Teuchos::ScalarTraits<Scalar> ST; out << "version: " << MueLu::Version() << std::endl; out << std::endl; out << "This tests that the partitioning produced by Zoltan is \"reasonable\" for a matrix" << std::endl; out << "that has a random number of nonzeros per row and 3 DOFs per mesh point. Good results have been precomputed" << std::endl; out << "for up to 5 processors. The results are the number of nonzeros in the local matrix" << std::endl; out << "once the Zoltan repartitioning has been applied." << std::endl; out << "The results can be viewed in Paraview by enabling code guarded by the macro MUELU_VISUALIZE_REPARTITIONING" << std::endl; RCP<const Teuchos::Comm<int> > comm = TestHelpers::Parameters::getDefaultComm(); if (comm->getSize() > 5) { out << std::endl; out << "This test must be run on 1 to 5 processes." << std::endl; TEST_EQUALITY(true, true); return; } Level level; RCP<FactoryManagerBase> factoryHandler = rcp(new FactoryManager()); level.SetFactoryManager(factoryHandler); int nx=9; int ny=nx; int dofsPerNode = 3; GO numGlobalElements = nx*ny*dofsPerNode; size_t maxEntriesPerRow=30; RCP<const Map> map; int numMyNodes = numGlobalElements / dofsPerNode; if (comm->getSize() > 1) { // In parallel, make sure that the dof's associated with a node all // reside on the same processor. int numNodes = numGlobalElements / dofsPerNode; TEUCHOS_TEST_FOR_EXCEPTION( (numGlobalElements - numNodes * dofsPerNode) != 0, MueLu::Exceptions::RuntimeError, "Number of matrix rows is not divisible by #dofs" ); int nproc = comm->getSize(); if (comm->getRank() < nproc-1) numMyNodes = numNodes / nproc; else numMyNodes = numNodes - (numNodes/nproc) * (nproc-1); map = MapFactory::createContigMap(TestHelpers::Parameters::getLib(), numGlobalElements, numMyNodes*dofsPerNode, comm); } else { map = MapFactory::createUniformContigMap(TestHelpers::Parameters::getLib(), numGlobalElements, comm); } const size_t numMyElements = map->getNodeNumElements(); Teuchos::ArrayView<const GlobalOrdinal> myGlobalElements = map->getNodeElementList(); RCP<Matrix> A = rcp(new CrsMatrixWrap(map, 1)); // Force underlying linear algebra library to allocate more // memory on the fly. While not super efficient, this // ensures that no zeros are being stored. Thus, from // Zoltan's perspective the matrix is imbalanced. // Populate CrsMatrix with random number of entries (up to maxEntriesPerRow) per row. // Create a vector with random integer entries in [1,maxEntriesPerRow]. ST::seedrandom(666*comm->getRank()); RCP<Xpetra::Vector<LO,LO,GO,NO> > entriesPerRow = Xpetra::VectorFactory<LO,LO,GO,NO>::Build(map,false); Teuchos::ArrayRCP<LO> eprData = entriesPerRow->getDataNonConst(0); for (Teuchos::ArrayRCP<LO>::iterator i=eprData.begin(); i!=eprData.end(); ++i) { *i = (LO)(std::floor(((ST::random()+1)*0.5*maxEntriesPerRow)+1)); } RCP<Teuchos::FancyOStream> fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); fos->setOutputToRootOnly(-1); Teuchos::Array<Scalar> vals(maxEntriesPerRow); Teuchos::Array<GO> cols(maxEntriesPerRow); for (size_t i = 0; i < numMyElements; ++i) { Teuchos::ArrayView<SC> av(&vals[0],eprData[i]); Teuchos::ArrayView<GO> iv(&cols[0],eprData[i]); //stick in ones for values for (LO j=0; j< eprData[i]; ++j) vals[j] = ST::one(); //figure out valid column indices GO start = std::max(myGlobalElements[i]-eprData[i]+1,0); for (LO j=0; j< eprData[i]; ++j) cols[j] = start+j; A->insertGlobalValues(myGlobalElements[i], iv, av); } A->fillComplete(); // Now treat the matrix as if it has 3 DOFs per node. A->SetFixedBlockSize(dofsPerNode); level.Set("A",A); //build coordinates Teuchos::ParameterList list; list.set("nx",nx); list.set("ny",ny); RCP<const Map> coalescedMap = MapFactory::createContigMap(TestHelpers::Parameters::getLib(), numGlobalElements/dofsPerNode, numMyNodes, comm); RCP<MultiVector> XYZ = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("2D",coalescedMap,list); // XYZ are the "coalesce" coordinates as it has been generated for 1 DOF/node and we are using them for 3 DOFS/node // level.Set("Coordinates",XYZ); "Coordinates" == uncoalesce. "X,Y,ZCoordinates" == coalesce { RCP<MultiVector> coordinates = XYZ; // making a copy because I don't want to keep 'open' the Xpetra_MultiVector if (coordinates->getNumVectors() >= 1) { Teuchos::ArrayRCP<const SC> coord = coordinates->getData(0); Teuchos::ArrayRCP<SC> coordCpy(coord.size()); for(int i=0; i<coord.size(); i++) { coordCpy[i] = coord[i]; } level.Set("XCoordinates", coordCpy); //std::cout << coordCpy << std::endl; } if (coordinates->getNumVectors() >= 2) { Teuchos::ArrayRCP<const SC> coord = coordinates->getData(1); Teuchos::ArrayRCP<SC> coordCpy(coord.size()); for(int i=0; i<coord.size(); i++) { coordCpy[i] = coord[i]; } level.Set("YCoordinates", coordCpy); } /*if (coordinates->getNumVectors() >= 3) { Teuchos::ArrayRCP<const SC> coord = coordinates->getData(2); Teuchos::ArrayRCP<SC> coordCpy(coord.size()); for(int i=0; i<coord.size(); i++) { coordCpy[i] = coord[i]; } level.Set("ZCoordinates", coordCpy); }*/ } //coalescedMap->describe(*fos,Teuchos::VERB_EXTREME); //sleep(1); comm->barrier(); //XYZ->describe(*fos,Teuchos::VERB_EXTREME); LO numPartitions = comm->getSize(); level.Set("number of partitions",numPartitions); RCP<ZoltanInterface> zoltan = rcp(new ZoltanInterface()); //zoltan->SetOutputLevel(0); //options are 0=none, 1=summary, 2=every pid prints level.Request("Partition",zoltan.get()); zoltan->Build(level); RCP<Xpetra::Vector<GO,LO,GO,NO> > decomposition = level.Get<RCP<Xpetra::Vector<GO,LO,GO,NO> > >("Partition",zoltan.get()); /* //temporary code to have the trivial decomposition (no change) ArrayRCP<GO> decompEntries = decomposition->getDataNonConst(0); for (ArrayRCP<GO>::iterator i = decompEntries.begin(); i != decompEntries.end(); ++i) *i = comm->getRank(); decompEntries=Teuchos::null; */ //Create vector whose local length is the global number of partitions. //This vector will record the local number of nonzeros associated with each partition. Teuchos::Array<GO> parts(numPartitions); for (int i=0; i<numPartitions; ++i) parts[i] = i; Teuchos::ArrayView<GO> partsView(&parts[0],numPartitions); RCP<const Map> partitionMap = MapFactory::Build(TestHelpers::Parameters::getLib(), Teuchos::OrdinalTraits<global_size_t>::invalid(), partsView, map->getIndexBase(),comm); RCP<Xpetra::Vector<LO,LO,GO,NO> > localPartsVec = Xpetra::VectorFactory<LO,LO,GO,NO>::Build(partitionMap); RCP<Xpetra::Vector<LO,LO,GO,NO> > nnzPerRow = Xpetra::VectorFactory<LO,LO,GO,NO>::Build(A->getRowMap()); Teuchos::ArrayRCP<GO> nnzData = nnzPerRow->getDataNonConst(0); //For the local rows in each partition, tally up the number of nonzeros. This is what //Zoltan should be load-balancing. Teuchos::ArrayRCP<GO> lpvData = localPartsVec->getDataNonConst(0); Teuchos::ArrayRCP<const GO> decompData = decomposition->getData(0); for (size_t i=0; i<decomposition->getLocalLength();++i) { Teuchos::ArrayView<const LO> c; Teuchos::ArrayView<const SC> v; A->getLocalRowView(i,c,v); lpvData[decompData[i]] += v.size(); nnzData[i] = v.size(); } lpvData = Teuchos::null; decompData = Teuchos::null; nnzData = Teuchos::null; /* if (comm->getRank() == 0) std::cout << "nnz per row" << std::endl; nnzPerRow->describe(*fos,Teuchos::VERB_EXTREME); if (comm->getRank() == 0) std::cout << "Row-to-partition assignment (from Zoltan)" << std::endl; decomposition->describe(*fos,Teuchos::VERB_EXTREME); if (comm->getRank() == 0) std::cout << "#nonzeros per partition" << std::endl; localPartsVec->describe(*fos,Teuchos::VERB_EXTREME); */ //Send the local nnz tallies to pid 0, which can report the global sums. size_t mysize=1; if (comm->getRank() == 0) mysize = numPartitions; RCP<const Map> globalTallyMap = MapFactory::Build(TestHelpers::Parameters::getLib(), Teuchos::OrdinalTraits<global_size_t>::invalid(), mysize, map->getIndexBase(), comm); RCP<Xpetra::Vector<LO,LO,GO,NO> > globalTallyVec = Xpetra::VectorFactory<LO,LO,GO,NO>::Build(globalTallyMap); RCP<const Export> exporter = ExportFactory::Build( partitionMap, globalTallyMap); globalTallyVec->doExport(*localPartsVec,*exporter,Xpetra::ADD); ArrayRCP<GO> expectedResults(numPartitions); switch (comm->getSize()) { case 1: expectedResults[0] = 3951; break; case 2: expectedResults[0] = 1955; expectedResults[1] = 1910; break; case 3: expectedResults[0] = 1326; expectedResults[1] = 1340; expectedResults[2] = 1321; break; case 4: expectedResults[0] = 950; expectedResults[1] = 922; expectedResults[2] = 908; expectedResults[3] = 936; break; case 5: expectedResults[0] = 774; expectedResults[1] = 735; expectedResults[2] = 726; expectedResults[3] = 771; expectedResults[4] = 759; break; default: break; }; ArrayRCP<const LO> gtvData = globalTallyVec->getData(0); #ifdef __linux__ out << "Checking results..." << std::endl; for (int i=0; i<numPartitions; ++i) { if (comm->getRank() == 0) TEST_EQUALITY( expectedResults[i], gtvData[i]); } #endif #ifdef MUELU_VISUALIZE_REPARTITIONING // //Now write everything to a comma-separate list that ParaView can grok // Teuchos::ArrayRCP<const Scalar> X = XYZ->getData(0); Teuchos::ArrayRCP<const Scalar> Y = XYZ->getData(1); Teuchos::ArrayRCP<const GO> D = decomposition->getData(0); RCP<std::ofstream> outFile; std::string fileName = "zoltanResults.csv"; //write header information if (comm->getRank() == 0) { outFile = rcp(new std::ofstream(fileName.c_str())); *outFile << "x coord, y coord, z coord, partition, row weight" << std::endl; } comm->barrier(); //append coordinates nnzData = nnzPerRow->getDataNonConst(0); for (int j=0; j<comm->getSize(); ++j) { int mypid = comm->getRank(); if (mypid == j) { outFile = rcp(new std::ofstream(fileName.c_str(),std::ios::app)); int blockSize = A->GetFixedBlockSize(); //Coordinates are for coalesced system, D is for uncoalesced for (int i=0; i < D.size()/blockSize; ++i) { int nnz=0; for (int k=0; k<blockSize; ++k) nnz += nnzData[i*blockSize+k]; *outFile << X[i] << ", " << Y[i] << ", " << ST::zero() << ", " << D[i*blockSize] << ", " << nnz << std::endl; } } } //for (int i=0; i<comm->getSize(); ++i) out << std::endl; out << "You can view the Zoltan decomposition in ParaView 3.10.1 or later:" << std::endl; out << " 1) Load the data file " << fileName << "." << std::endl; out << " 2) Run the filter Filters/ Alphabetical/ Table To Points." << std::endl; out << " 3) Tell ParaView what columns are the X, Y and Z coordinates." << std::endl; out << " 4) Split screen horizontally (Icon, top right)." << std::endl; out << " 5) Click on the eyeball in the Pipeline Browser to see the points." << std::endl; out << " 6) Under the Display tab, you can color points by scalar value and resize them." << std::endl; out << std::endl; out << " To display row weights next to each point:" << std::endl; out << " 1) Click the \"Select Points Through\" button (2nd row) and select all points." << std::endl; out << " 2) Under View pull-down menu, choose the \"Selection Inspector\"." << std::endl; out << " 3) Under the Point Label, check the Visible box and set the Label Mode to \"row weight\"." << std::endl; #endif } //Build3PDEs
void BlockedPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level& fineLevel, Level &coarseLevel) const { typedef Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> MatrixClass; typedef Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> CrsMatrixClass; typedef Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> CrsMatrixWrapClass; typedef Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> BlockedCrsOMatrix; typedef Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node> MapClass; typedef Xpetra::MapFactory<LocalOrdinal, GlobalOrdinal, Node> MapFactoryClass; typedef Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> MapExtractorClass; typedef Xpetra::MapExtractorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node> MapExtractorFactoryClass; //Teuchos::RCP<Teuchos::FancyOStream> fos = Teuchos::getFancyOStream(Teuchos::rcpFromRef(std::cout)); //std::ostringstream buf; buf << coarseLevel.GetLevelID(); // Level Get //RCP<Matrix> A = fineLevel. Get< RCP<Matrix> >("A", AFact_.get()); // IMPORTANT: use main factory manager for getting A RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel, "A"); RCP<BlockedCrsOMatrix> bA = Teuchos::rcp_dynamic_cast<BlockedCrsOMatrix>(A); TEUCHOS_TEST_FOR_EXCEPTION(bA==Teuchos::null, Exceptions::BadCast, "MueLu::BlockedPFactory::Build: input matrix A is not of type BlockedCrsMatrix! error."); // plausibility check TEUCHOS_TEST_FOR_EXCEPTION(bA->Rows() != FactManager_.size(), Exceptions::RuntimeError, "MueLu::BlockedPFactory::Build: number of block rows of A does not match number of SubFactoryManagers. error."); TEUCHOS_TEST_FOR_EXCEPTION(bA->Cols() != FactManager_.size(), Exceptions::RuntimeError, "MueLu::BlockedPFactory::Build: number of block cols of A does not match number of SubFactoryManagers. error."); // build blocked prolongator std::vector<RCP<Matrix> > subBlockP; std::vector<RCP<const MapClass> > subBlockPRangeMaps; std::vector<RCP<const MapClass > > subBlockPDomainMaps; std::vector<GO> fullRangeMapVector; std::vector<GO> fullDomainMapVector; subBlockP.reserve(FactManager_.size()); // reserve size for block P operators subBlockPRangeMaps.reserve(FactManager_.size()); // reserve size for block P operators subBlockPDomainMaps.reserve(FactManager_.size()); // reserve size for block P operators // build and store the subblocks and the corresponding range and domain maps // since we put together the full range and domain map from the submaps we do not have // to use the maps from blocked A std::vector<Teuchos::RCP<const FactoryManagerBase> >::const_iterator it; for(it = FactManager_.begin(); it!=FactManager_.end(); ++it) { SetFactoryManager fineSFM (rcpFromRef(fineLevel), *it); SetFactoryManager coarseSFM(rcpFromRef(coarseLevel), *it); if(!restrictionMode_) { subBlockP.push_back(coarseLevel.Get<RCP<Matrix> >("P", (*it)->GetFactory("P").get())); // create and return block P operator } else { subBlockP.push_back(coarseLevel.Get<RCP<Matrix> >("R", (*it)->GetFactory("R").get())); // create and return block R operator } // check if prolongator/restrictor operators have strided maps TEUCHOS_TEST_FOR_EXCEPTION(subBlockP.back()->IsView("stridedMaps")==false, Exceptions::BadCast, "MueLu::BlockedPFactory::Build: subBlock P operator has no strided map information. error."); // append strided row map (= range map) to list of range maps. Teuchos::RCP<const Map> rangeMap = subBlockP.back()->getRowMap("stridedMaps"); /* getRangeMap(); //*/ subBlockPRangeMaps.push_back(rangeMap); // use plain range map to determine the DOF ids Teuchos::ArrayView< const GlobalOrdinal > nodeRangeMap = subBlockP.back()->getRangeMap()->getNodeElementList(); //subBlockPRangeMaps.back()->getNodeElementList(); fullRangeMapVector.insert(fullRangeMapVector.end(), nodeRangeMap.begin(), nodeRangeMap.end()); sort(fullRangeMapVector.begin(), fullRangeMapVector.end()); // append strided col map (= domain map) to list of range maps. Teuchos::RCP<const Map> domainMap = subBlockP.back()->getColMap("stridedMaps"); /* getDomainMap(); //*/ subBlockPDomainMaps.push_back(domainMap); // use plain domain map to determine the DOF ids Teuchos::ArrayView< const GlobalOrdinal > nodeDomainMap = subBlockP.back()->getDomainMap()->getNodeElementList(); //subBlockPDomainMaps.back()->getNodeElementList(); fullDomainMapVector.insert(fullDomainMapVector.end(), nodeDomainMap.begin(), nodeDomainMap.end()); sort(fullDomainMapVector.begin(), fullDomainMapVector.end()); } // extract map index base from maps of blocked A GO rangeIndexBase = 0; GO domainIndexBase = 0; if(!restrictionMode_) { // prolongation mode: just use index base of range and domain map of bA rangeIndexBase = bA->getRangeMap()->getIndexBase(); domainIndexBase= bA->getDomainMap()->getIndexBase(); } else { // restriction mode: switch range and domain map for blocked restriction operator rangeIndexBase = bA->getDomainMap()->getIndexBase(); domainIndexBase= bA->getRangeMap()->getIndexBase(); } // build full range map. // If original range map has striding information, then transfer it to the new range map RCP<const MapExtractorClass> rangeAMapExtractor = bA->getRangeMapExtractor(); Teuchos::ArrayView<GO> fullRangeMapGIDs(&fullRangeMapVector[0],fullRangeMapVector.size()); Teuchos::RCP<const StridedMap> stridedRgFullMap = Teuchos::rcp_dynamic_cast<const StridedMap>(rangeAMapExtractor->getFullMap()); Teuchos::RCP<const Map > fullRangeMap = Teuchos::null; if(stridedRgFullMap != Teuchos::null) { std::vector<size_t> stridedData = stridedRgFullMap->getStridingData(); fullRangeMap = StridedMapFactory::Build( bA->getRangeMap()->lib(), Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), fullRangeMapGIDs, rangeIndexBase, stridedData, bA->getRangeMap()->getComm(), stridedRgFullMap->getStridedBlockId(), stridedRgFullMap->getOffset()); } else { fullRangeMap = MapFactory::Build( bA->getRangeMap()->lib(), Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), fullRangeMapGIDs, rangeIndexBase, bA->getRangeMap()->getComm()); } RCP<const MapExtractorClass> domainAMapExtractor = bA->getDomainMapExtractor(); Teuchos::ArrayView<GO> fullDomainMapGIDs(&fullDomainMapVector[0],fullDomainMapVector.size()); Teuchos::RCP<const StridedMap> stridedDoFullMap = Teuchos::rcp_dynamic_cast<const StridedMap>(domainAMapExtractor->getFullMap()); Teuchos::RCP<const Map > fullDomainMap = Teuchos::null; if(stridedDoFullMap != Teuchos::null) { TEUCHOS_TEST_FOR_EXCEPTION(stridedDoFullMap==Teuchos::null, Exceptions::BadCast, "MueLu::BlockedPFactory::Build: full map in domain map extractor has no striding information! error."); std::vector<size_t> stridedData2 = stridedDoFullMap->getStridingData(); fullDomainMap = StridedMapFactory::Build( bA->getDomainMap()->lib(), Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), fullDomainMapGIDs, domainIndexBase, stridedData2, bA->getDomainMap()->getComm(), stridedDoFullMap->getStridedBlockId(), stridedDoFullMap->getOffset()); } else { fullDomainMap = MapFactory::Build( bA->getDomainMap()->lib(), Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), fullDomainMapGIDs, domainIndexBase, bA->getDomainMap()->getComm()); } // build map extractors Teuchos::RCP<const MapExtractorClass> rangeMapExtractor = MapExtractorFactoryClass::Build(fullRangeMap, subBlockPRangeMaps); Teuchos::RCP<const MapExtractorClass> domainMapExtractor = MapExtractorFactoryClass::Build(fullDomainMap, subBlockPDomainMaps); Teuchos::RCP<BlockedCrsOMatrix> bP = Teuchos::rcp(new BlockedCrsOMatrix(rangeMapExtractor,domainMapExtractor,10)); for(size_t i = 0; i<subBlockPRangeMaps.size(); i++) { Teuchos::RCP<CrsMatrixWrapClass> crsOpii = Teuchos::rcp_dynamic_cast<CrsMatrixWrapClass>(subBlockP[i]); Teuchos::RCP<CrsMatrixClass> crsMatii = crsOpii->getCrsMatrix(); bP->setMatrix(i,i,crsMatii); } bP->fillComplete(); //bP->describe(*fos,Teuchos::VERB_EXTREME); // Level Set if(!restrictionMode_) { // prolongation factory is in prolongation mode coarseLevel.Set("P", Teuchos::rcp_dynamic_cast<MatrixClass>(bP), this); } else { // prolongation factory is in restriction mode // we do not have to transpose the blocked R operator since the subblocks on the diagonal // are already valid R subblocks coarseLevel.Set("R", Teuchos::rcp_dynamic_cast<MatrixClass>(bP), this); } }
void SubBlockAFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level & currentLevel) const { typedef Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> OMatrix; //TODO typedef Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> CrsMatrixClass; //TODO typedef Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> CrsMatrixWrapClass; //TODO typedef Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> BlockedCrsOMatrix; //TODO typedef Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> MapExtractorClass; const ParameterList & pL = GetParameterList(); size_t row = Teuchos::as<size_t>(pL.get<int>("block row")); size_t col = Teuchos::as<size_t>(pL.get<int>("block col")); RCP<OMatrix> Ain = Teuchos::null; Ain = Get< RCP<OMatrix> >(currentLevel, "A"); RCP<BlockedCrsOMatrix> bA = Teuchos::rcp_dynamic_cast<BlockedCrsOMatrix>(Ain); TEUCHOS_TEST_FOR_EXCEPTION(bA==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: input matrix A is not of type BlockedCrsMatrix! error."); TEUCHOS_TEST_FOR_EXCEPTION(row > bA->Rows(), Exceptions::RuntimeError, "MueLu::SubBlockAFactory::Build: A.Rows() > rows_! error."); TEUCHOS_TEST_FOR_EXCEPTION(col > bA->Cols(), Exceptions::RuntimeError, "MueLu::SubBlockAFactory::Build: A.Cols() > cols_! error."); Teuchos::RCP<CrsMatrixClass> A = bA->getMatrix(row, col); Teuchos::RCP<CrsMatrixWrapClass> Op = Teuchos::rcp(new CrsMatrixWrapClass(A)); //////////////// EXPERIMENTAL // extract striding information from RangeMapExtractor Teuchos::RCP<const MapExtractorClass> rgMapExtractor = bA->getRangeMapExtractor(); Teuchos::RCP<const MapExtractorClass> doMapExtractor = bA->getDomainMapExtractor(); Teuchos::RCP<const Map> rgMap = rgMapExtractor->getMap(row); Teuchos::RCP<const Map> doMap = doMapExtractor->getMap(col); Teuchos::RCP<const StridedMap> srgMap = Teuchos::rcp_dynamic_cast<const StridedMap>(rgMap); Teuchos::RCP<const StridedMap> sdoMap = Teuchos::rcp_dynamic_cast<const StridedMap>(doMap); if(srgMap == Teuchos::null) { Teuchos::RCP<const Map> fullRgMap = rgMapExtractor->getFullMap(); Teuchos::RCP<const StridedMap> sFullRgMap = Teuchos::rcp_dynamic_cast<const StridedMap>(fullRgMap); TEUCHOS_TEST_FOR_EXCEPTION(sFullRgMap==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: full rangeMap is not a strided map"); std::vector<size_t> stridedData = sFullRgMap->getStridingData(); if(stridedData.size() == 1 && row > 0) // we have block matrices. use striding block information 0 srgMap = StridedMapFactory::Build(rgMap, stridedData, 0, sFullRgMap->getOffset()); else // we have strided matrices. use striding information of the corresponding block srgMap = StridedMapFactory::Build(rgMap, stridedData, row, sFullRgMap->getOffset()); } if(sdoMap == Teuchos::null) { Teuchos::RCP<const Map> fullDoMap = doMapExtractor->getFullMap(); Teuchos::RCP<const StridedMap> sFullDoMap = Teuchos::rcp_dynamic_cast<const StridedMap>(fullDoMap); TEUCHOS_TEST_FOR_EXCEPTION(sFullDoMap==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: full domainMap is not a strided map"); std::vector<size_t> stridedData2 = sFullDoMap->getStridingData(); if(stridedData2.size() == 1 && col > 0) // we have block matrices. use striding block information 0 sdoMap = StridedMapFactory::Build(doMap, stridedData2, 0, sFullDoMap->getOffset()); else // we have strided matrices. use striding information of the corresponding block sdoMap = StridedMapFactory::Build(doMap, stridedData2, col, sFullDoMap->getOffset()); } TEUCHOS_TEST_FOR_EXCEPTION(srgMap==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: rangeMap " << row << " is not a strided map"); TEUCHOS_TEST_FOR_EXCEPTION(sdoMap==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: domainMap " << col << " is not a strided map"); GetOStream(Statistics1) << "A(" << row << "," << col << ") has strided maps: range map fixed block size=" << srgMap->getFixedBlockSize() << " strided block id = " << srgMap->getStridedBlockId() << ", domain map fixed block size=" << sdoMap->getFixedBlockSize() << ", strided block id=" << sdoMap->getStridedBlockId() << std::endl; if(Op->IsView("stridedMaps") == true) Op->RemoveView("stridedMaps"); Op->CreateView("stridedMaps", srgMap, sdoMap); TEUCHOS_TEST_FOR_EXCEPTION(Op->IsView("stridedMaps")==false, Exceptions::RuntimeError, "MueLu::SubBlockAFactory::Build: failed to set stridedMaps"); //////////////// EXPERIMENTAL currentLevel.Set("A", Teuchos::rcp_dynamic_cast<OMatrix>(Op), this); }
void AlgebraicPermutationStrategy<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::BuildPermutation(const Teuchos::RCP<Matrix> & A, const Teuchos::RCP<const Map> permRowMap, Level & currentLevel, const FactoryBase* genFactory) const { #ifndef HAVE_MUELU_INST_COMPLEX_INT_INT const Teuchos::RCP< const Teuchos::Comm< int > > comm = A->getRowMap()->getComm(); int numProcs = comm->getSize(); int myRank = comm->getRank(); /*if( permRowMap == Teuchos::null ) { permRowMap = A->getRowMap(); // use full row map of A }*/ size_t nDofsPerNode = 1; if (A->IsView("stridedMaps")) { Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps"); nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize(); } //GetOStream(Runtime0, 0) << "Perform generation of permutation operators on " << mapName_ << " map with " << permRowMap->getGlobalNumElements() << " elements" << std::endl; std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidates; std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > keepDiagonalEntries; std::vector<Scalar> Weights; // loop over all local rows in matrix A and keep diagonal entries if corresponding // matrix rows are not contained in permRowMap for (size_t row = 0; row < A->getRowMap()->getNodeNumElements(); row++) { GlobalOrdinal grow = A->getRowMap()->getGlobalElement(row); if(permRowMap->isNodeGlobalElement(grow) == true) continue; size_t nnz = A->getNumEntriesInLocalRow(row); // extract local row information from matrix Teuchos::ArrayView<const LocalOrdinal> indices; Teuchos::ArrayView<const Scalar> vals; A->getLocalRowView(row, indices, vals); TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError, "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error."); // find column entry with max absolute value GlobalOrdinal gMaxValIdx = 0; Scalar norm1 = 0.0; Scalar maxVal = 0.0; for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) { norm1 += std::abs(vals[j]); if(std::abs(vals[j]) > maxVal) { maxVal = std::abs(vals[j]); gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]); } } if(grow == gMaxValIdx) // only keep row/col pair if it's diagonal dominant!!! keepDiagonalEntries.push_back(std::make_pair(grow,grow)); } ////////// // handle rows that are marked to be relevant for permutations for (size_t row = 0; row < permRowMap->getNodeNumElements(); row++) { GlobalOrdinal grow = permRowMap->getGlobalElement(row); LocalOrdinal lArow = A->getRowMap()->getLocalElement(grow); size_t nnz = A->getNumEntriesInLocalRow(lArow); // extract local row information from matrix Teuchos::ArrayView<const LocalOrdinal> indices; Teuchos::ArrayView<const Scalar> vals; A->getLocalRowView(lArow, indices, vals); TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError, "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error."); // find column entry with max absolute value GlobalOrdinal gMaxValIdx = 0; Scalar norm1 = 0.0; Scalar maxVal = 0.0; for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) { norm1 += std::abs(vals[j]); if(std::abs(vals[j]) > maxVal) { maxVal = std::abs(vals[j]); gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]); } } if(std::abs(maxVal) > 0.0) { // keep only max Entries \neq 0.0 permutedDiagCandidates.push_back(std::make_pair(grow,gMaxValIdx)); Weights.push_back(maxVal/(norm1*Teuchos::as<Scalar>(nnz))); } else { std::cout << "ATTENTION: row " << grow << " has only zero entries -> singular matrix!" << std::endl; } } // sort Weights in descending order std::vector<int> permutation; sortingPermutation(Weights,permutation); // create new vector with exactly one possible entry for each column // each processor which requests the global column id gcid adds 1 to gColVec // gColVec will be summed up over all processors and communicated to gDomVec // which is based on the non-overlapping domain map of A. Teuchos::RCP<Vector> gColVec = VectorFactory::Build(A->getColMap()); Teuchos::RCP<Vector> gDomVec = VectorFactory::Build(A->getDomainMap()); gColVec->putScalar(0.0); gDomVec->putScalar(0.0); // put in all keep diagonal entries for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = keepDiagonalEntries.begin(); p != keepDiagonalEntries.end(); ++p) { gColVec->sumIntoGlobalValue((*p).second,1.0); } Teuchos::RCP<Export> exporter = ExportFactory::Build(gColVec->getMap(), gDomVec->getMap()); gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD); // communicate blocked gcolids to all procs gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT); std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidatesFiltered; // TODO reserve memory std::map<GlobalOrdinal, Scalar> gColId2Weight; Teuchos::ArrayRCP< Scalar > ddata = gColVec->getDataNonConst(0); for(size_t i = 0; i < permutedDiagCandidates.size(); ++i) { // loop over all candidates std::pair<GlobalOrdinal, GlobalOrdinal> pp = permutedDiagCandidates[permutation[i]]; GlobalOrdinal grow = pp.first; GlobalOrdinal gcol = pp.second; LocalOrdinal lcol = A->getColMap()->getLocalElement(gcol); //Teuchos::ArrayRCP< Scalar > ddata = gColVec->getDataNonConst(0); if(ddata[lcol] > 0.0){ continue; // skip lcol: column already handled by another row } // mark column as already taken ddata[lcol]++; permutedDiagCandidatesFiltered.push_back(std::make_pair(grow,gcol)); gColId2Weight[gcol] = Weights[permutation[i]]; } // communicate how often each column index is requested by the different procs gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD); gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT); // probably not needed // TODO check me //***************************************************************************************** // first communicate ALL global ids of column indices which are requested by more // than one proc to all other procs // detect which global col indices are requested by more than one proc // and store them in the multipleColRequests vector std::vector<GlobalOrdinal> multipleColRequests; // store all global column indices from current processor that are also // requested by another processor. This is possible, since they are stored // in gDomVec which is based on the nonoverlapping domain map. That is, each // global col id is handled by exactly one proc. std::queue<GlobalOrdinal> unusedColIdx; // unused column indices on current processor for(size_t sz = 0; sz<gDomVec->getLocalLength(); ++sz) { Teuchos::ArrayRCP< const Scalar > arrDomVec = gDomVec->getData(0); if(arrDomVec[sz] > 1.0) { multipleColRequests.push_back(gDomVec->getMap()->getGlobalElement(sz)); } else if(arrDomVec[sz] == 0.0) { unusedColIdx.push(gDomVec->getMap()->getGlobalElement(sz)); } } // communicate the global number of column indices which are requested by more than one proc LocalOrdinal localMultColRequests = Teuchos::as<LocalOrdinal>(multipleColRequests.size()); LocalOrdinal globalMultColRequests = 0; // sum up all entries in multipleColRequests over all processors sumAll(gDomVec->getMap()->getComm(), (LocalOrdinal)localMultColRequests, globalMultColRequests); if(globalMultColRequests > 0) { // special handling: two processors request the same global column id. // decide which processor gets it // distribute number of multipleColRequests to all processors // each processor stores how many column ids for exchange are handled by the cur proc std::vector<GlobalOrdinal> numMyMultColRequests(numProcs,0); std::vector<GlobalOrdinal> numGlobalMultColRequests(numProcs,0); numMyMultColRequests[myRank] = localMultColRequests; Teuchos::reduceAll(*comm,Teuchos::REDUCE_MAX,numProcs,&numMyMultColRequests[0],&numGlobalMultColRequests[0]); // communicate multipleColRequests entries to all processors int nMyOffset = 0; for (int i=0; i<myRank-1; i++) nMyOffset += numGlobalMultColRequests[i]; // calculate offset to store the weights on the corresponding place in procOverlappingWeights GlobalOrdinal zero=0; std::vector<GlobalOrdinal> procMultRequestedColIds(globalMultColRequests,zero); std::vector<GlobalOrdinal> global_procMultRequestedColIds(globalMultColRequests,zero); // loop over all local column GIDs that are also requested by other procs for(size_t i = 0; i < multipleColRequests.size(); i++) { procMultRequestedColIds[nMyOffset + i] = multipleColRequests[i]; // all weights are > 0 ? } // template ordinal, package (double) Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(globalMultColRequests), &procMultRequestedColIds[0], &global_procMultRequestedColIds[0]); // loop over global_procOverlappingWeights and eliminate wrong entries... for (size_t k = 0; k<global_procMultRequestedColIds.size(); k++) { GlobalOrdinal globColId = global_procMultRequestedColIds[k]; std::vector<Scalar> MyWeightForColId(numProcs,0); std::vector<Scalar> GlobalWeightForColId(numProcs,0); if(gColVec->getMap()->isNodeGlobalElement(globColId)) { MyWeightForColId[myRank] = gColId2Weight[globColId]; } else { MyWeightForColId[myRank] = 0.0; } Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &MyWeightForColId[0], &GlobalWeightForColId[0]); if(gColVec->getMap()->isNodeGlobalElement(globColId)) { // note: 2 procs could have the same weight for a column index. // pick the first one. Scalar winnerValue = 0.0; int winnerProcRank = 0; for (int proc = 0; proc < numProcs; proc++) { if(GlobalWeightForColId[proc] > winnerValue) { winnerValue = GlobalWeightForColId[proc]; winnerProcRank = proc; } } // winnerProcRank is the winner for handling globColId. // winnerProcRank is unique (even if two procs have the same weight for a column index) if(myRank != winnerProcRank) { // remove corresponding entry from permutedDiagCandidatesFiltered typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = permutedDiagCandidatesFiltered.begin(); while(p != permutedDiagCandidatesFiltered.end() ) { if((*p).second == globColId) p = permutedDiagCandidatesFiltered.erase(p); else p++; } } } // end if isNodeGlobalElement } // end loop over global_procOverlappingWeights and eliminate wrong entries... } // end if globalMultColRequests > 0 // put together all pairs: //size_t sizeRowColPairs = keepDiagonalEntries.size() + permutedDiagCandidatesFiltered.size(); std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > RowColPairs; RowColPairs.insert( RowColPairs.end(), keepDiagonalEntries.begin(), keepDiagonalEntries.end()); RowColPairs.insert( RowColPairs.end(), permutedDiagCandidatesFiltered.begin(), permutedDiagCandidatesFiltered.end()); #ifdef DEBUG_OUTPUT //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& // plausibility check gColVec->putScalar(0.0); gDomVec->putScalar(0.0); typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator pl = RowColPairs.begin(); while(pl != RowColPairs.end() ) { //GlobalOrdinal ik = (*pl).first; GlobalOrdinal jk = (*pl).second; gColVec->sumIntoGlobalValue(jk,1.0); pl++; } gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD); for(size_t sz = 0; sz<gDomVec->getLocalLength(); ++sz) { Teuchos::ArrayRCP< const Scalar > arrDomVec = gDomVec->getData(0); if(arrDomVec[sz] > 1.0) { GetOStream(Runtime0,0) << "RowColPairs has multiple column [" << sz << "]=" << arrDomVec[sz] << std::endl; } else if(arrDomVec[sz] == 0.0) { GetOStream(Runtime0,0) << "RowColPairs has empty column [" << sz << "]=" << arrDomVec[sz] << std::endl; } } //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& #endif ////////////////////////////////////////////////// // assumption: on each processor RowColPairs now contains // a valid set of (row,column) pairs, where the row entries // are a subset of the processor's rows and the column entries // are unique throughout all processors. // Note: the RowColPairs are only defined for a subset of all rows, // so there might be rows without an entry in RowColPairs. // It can be, that some rows seem to be missing in RowColPairs, since // the entry in that row with maximum absolute value has been reserved // by another row already (e.g. as already diagonal dominant row outside // of perRowMap). // In fact, the RowColPairs vector only defines the (row,column) pairs // that will be definitely moved to the diagonal after permutation. #ifdef DEBUG_OUTPUT // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p) { // std::cout << "proc: " << myRank << " r/c: " << (*p).first << "/" << (*p).second << std::endl; // } // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p) // { //// if((*p).first != (*p).second) std::cout << "difference: " << (*p).first << " " << (*p).second << std::endl; // std::cout << (*p).first +1 << " " << (*p).second+1 << std::endl; // } // std::cout << "\n"; #endif // vectors to store permutation information Teuchos::RCP<Vector> Pperm = VectorFactory::Build(A->getRowMap()); Teuchos::RCP<Vector> Qperm = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map) Teuchos::RCP<Vector> lQperm = VectorFactory::Build(A->getColMap()); // local variant (based on column map) Teuchos::ArrayRCP< Scalar > PpermData = Pperm->getDataNonConst(0); Teuchos::ArrayRCP< Scalar > QpermData = Qperm->getDataNonConst(0); Pperm->putScalar(0.0); Qperm->putScalar(0.0); lQperm->putScalar(0.0); // setup exporter for Qperm Teuchos::RCP<Export> QpermExporter = ExportFactory::Build(lQperm->getMap(), Qperm->getMap()); Teuchos::RCP<Vector> RowIdStatus = VectorFactory::Build(A->getRowMap()); Teuchos::RCP<Vector> ColIdStatus = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map) Teuchos::RCP<Vector> lColIdStatus = VectorFactory::Build(A->getColMap()); // local variant (based on column map) Teuchos::RCP<Vector> ColIdUsed = VectorFactory::Build(A->getDomainMap()); // mark column ids to be already in use Teuchos::ArrayRCP< Scalar > RowIdStatusArray = RowIdStatus->getDataNonConst(0); Teuchos::ArrayRCP< Scalar > ColIdStatusArray = ColIdStatus->getDataNonConst(0); Teuchos::ArrayRCP< Scalar > lColIdStatusArray = lColIdStatus->getDataNonConst(0); Teuchos::ArrayRCP< Scalar > ColIdUsedArray = ColIdUsed->getDataNonConst(0); // not sure about this RowIdStatus->putScalar(0.0); ColIdStatus->putScalar(0.0); lColIdStatus->putScalar(0.0); ColIdUsed->putScalar(0.0); // no column ids are used // count wide-range permutations // a wide-range permutation is defined as a permutation of rows/columns which do not // belong to the same node LocalOrdinal lWideRangeRowPermutations = 0; GlobalOrdinal gWideRangeRowPermutations = 0; LocalOrdinal lWideRangeColPermutations = 0; GlobalOrdinal gWideRangeColPermutations = 0; // run 1: mark all "identity" permutations typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = RowColPairs.begin(); while(p != RowColPairs.end() ) { GlobalOrdinal ik = (*p).first; GlobalOrdinal jk = (*p).second; LocalOrdinal lik = A->getRowMap()->getLocalElement(ik); LocalOrdinal ljk = A->getColMap()->getLocalElement(jk); if(RowIdStatusArray[lik] == 0.0) { RowIdStatusArray[lik] = 1.0; // use this row id lColIdStatusArray[ljk] = 1.0; // use this column id Pperm->replaceLocalValue(lik, ik); lQperm->replaceLocalValue(ljk, ik); // use column map ColIdUsed->replaceGlobalValue(ik,1.0); // ik is now used p = RowColPairs.erase(p); // detect wide range permutations if(floor(ik/nDofsPerNode) != floor(jk/nDofsPerNode)) { lWideRangeColPermutations++; } } else p++; } // communicate column map -> domain map Qperm->doExport(*lQperm,*QpermExporter,Xpetra::ABSMAX); ColIdStatus->doExport(*lColIdStatus,*QpermExporter,Xpetra::ABSMAX); // plausibility check if(RowColPairs.size()>0) GetOStream(Warnings0,0) << "MueLu::PermutationFactory: There are Row/Col pairs left!!!" << std::endl; // TODO fix me // close Pperm // count, how many row permutations are missing on current proc size_t cntFreeRowIdx = 0; std::queue<GlobalOrdinal> qFreeGRowIdx; // store global row ids of "free" rows for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) { if(RowIdStatusArray[lik] == 0.0) { cntFreeRowIdx++; qFreeGRowIdx.push(RowIdStatus->getMap()->getGlobalElement(lik)); } } // fix Pperm for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) { if(RowIdStatusArray[lik] == 0.0) { RowIdStatusArray[lik] = 1.0; // use this row id Pperm->replaceLocalValue(lik, qFreeGRowIdx.front()); // detect wide range permutations if(floor(qFreeGRowIdx.front()/nDofsPerNode) != floor(RowIdStatus->getMap()->getGlobalElement(lik)/nDofsPerNode)) { lWideRangeRowPermutations++; } qFreeGRowIdx.pop(); } } // close Qperm (free permutation entries in Qperm) size_t cntFreeColIdx = 0; std::queue<GlobalOrdinal> qFreeGColIdx; // store global column ids of "free" available columns for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { if(ColIdStatusArray[ljk] == 0.0) { cntFreeColIdx++; qFreeGColIdx.push(ColIdStatus->getMap()->getGlobalElement(ljk)); } } size_t cntUnusedColIdx = 0; std::queue<GlobalOrdinal> qUnusedGColIdx; // store global column ids of "free" available columns for (size_t ljk = 0; ljk < ColIdUsed->getLocalLength(); ++ljk) { if(ColIdUsedArray[ljk] == 0.0) { cntUnusedColIdx++; qUnusedGColIdx.push(ColIdUsed->getMap()->getGlobalElement(ljk)); } } // fix Qperm with local entries for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { // stop if no (local) unused column idx are left if(cntUnusedColIdx == 0) break; if(ColIdStatusArray[ljk] == 0.0) { ColIdStatusArray[ljk] = 1.0; // use this row id Qperm->replaceLocalValue(ljk, qUnusedGColIdx.front()); // loop over ColIdStatus (lives on domain map) ColIdUsed->replaceGlobalValue(qUnusedGColIdx.front(),1.0); // ljk is now used, too // detect wide range permutations if(floor(qUnusedGColIdx.front()/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) { lWideRangeColPermutations++; } qUnusedGColIdx.pop(); cntUnusedColIdx--; cntFreeColIdx--; } } //Qperm->doExport(*lQperm,*QpermExporter,Xpetra::ABSMAX); // no export necessary, since changes only locally //ColIdStatus->doExport(*lColIdStatus,*QpermExporter,Xpetra::ABSMAX); // count, how many unused column idx are needed on current processor // to complete Qperm cntFreeColIdx = 0; for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { // TODO avoid this loop if(ColIdStatusArray[ljk] == 0.0) { cntFreeColIdx++; } } GlobalOrdinal global_cntFreeColIdx = 0; LocalOrdinal local_cntFreeColIdx = cntFreeColIdx; sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntFreeColIdx), global_cntFreeColIdx); #ifdef DEBUG_OUTPUT std::cout << "global # of empty column idx entries in Qperm: " << global_cntFreeColIdx << std::endl; #endif // avoid global communication if possible if(global_cntFreeColIdx > 0) { // 1) count how many unused column ids are left GlobalOrdinal global_cntUnusedColIdx = 0; LocalOrdinal local_cntUnusedColIdx = cntUnusedColIdx; sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntUnusedColIdx), global_cntUnusedColIdx); #ifdef DEBUG_OUTPUT std::cout << "global # of unused column idx: " << global_cntUnusedColIdx << std::endl; #endif // 2) communicate how many unused column ids are available on procs std::vector<LocalOrdinal> local_UnusedColIdxOnProc (numProcs); std::vector<LocalOrdinal> global_UnusedColIdxOnProc(numProcs); local_UnusedColIdxOnProc[myRank] = local_cntUnusedColIdx; Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &local_UnusedColIdxOnProc[0], &global_UnusedColIdxOnProc[0]); #ifdef DEBUG_OUTPUT std::cout << "PROC " << myRank << " global num unused indices per proc: "; for (size_t ljk = 0; ljk < global_UnusedColIdxOnProc.size(); ++ljk) { std::cout << " " << global_UnusedColIdxOnProc[ljk]; } std::cout << std::endl; #endif // 3) build array of length global_cntUnusedColIdx to globally replicate unused column idx std::vector<GlobalOrdinal> local_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx)); std::vector<GlobalOrdinal> global_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx)); GlobalOrdinal global_cntUnusedColIdxStartIter = 0; for(int proc=0; proc<myRank; proc++) { global_cntUnusedColIdxStartIter += global_UnusedColIdxOnProc[proc]; } for(GlobalOrdinal k = global_cntUnusedColIdxStartIter; k < global_cntUnusedColIdxStartIter+local_cntUnusedColIdx; k++) { local_UnusedColIdxVector[k] = qUnusedGColIdx.front(); qUnusedGColIdx.pop(); } Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(global_cntUnusedColIdx), &local_UnusedColIdxVector[0], &global_UnusedColIdxVector[0]); #ifdef DEBUG_OUTPUT std::cout << "PROC " << myRank << " global UnusedGColIdx: "; for (size_t ljk = 0; ljk < global_UnusedColIdxVector.size(); ++ljk) { std::cout << " " << global_UnusedColIdxVector[ljk]; } std::cout << std::endl; #endif // 4) communicate, how many column idx are needed on each processor // to complete Qperm std::vector<LocalOrdinal> local_EmptyColIdxOnProc (numProcs); std::vector<LocalOrdinal> global_EmptyColIdxOnProc(numProcs); local_EmptyColIdxOnProc[myRank] = local_cntFreeColIdx; Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &local_EmptyColIdxOnProc[0], &global_EmptyColIdxOnProc[0]); #ifdef DEBUG_OUTPUT std::cout << "PROC " << myRank << " global num of needed column indices: "; for (size_t ljk = 0; ljk < global_EmptyColIdxOnProc.size(); ++ljk) { std::cout << " " << global_EmptyColIdxOnProc[ljk]; } std::cout << std::endl; #endif // 5) determine first index in global_UnusedColIdxVector for unused column indices, // that are marked to be used by this processor GlobalOrdinal global_UnusedColStartIdx = 0; for(int proc=0; proc<myRank; proc++) { global_UnusedColStartIdx += global_EmptyColIdxOnProc[proc]; } #ifdef DEBUG_OUTPUT GetOStream(Statistics0,0) << "PROC " << myRank << " is allowd to use the following column gids: "; for(GlobalOrdinal k = global_UnusedColStartIdx; k < global_UnusedColStartIdx + Teuchos::as<GlobalOrdinal>(cntFreeColIdx); k++) { GetOStream(Statistics0,0) << global_UnusedColIdxVector[k] << " "; } GetOStream(Statistics0,0) << std::endl; #endif // 6.) fix Qperm with global entries GlobalOrdinal array_iter = 0; for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { if(ColIdStatusArray[ljk] == 0.0) { ColIdStatusArray[ljk] = 1.0; // use this row id Qperm->replaceLocalValue(ljk, global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]); ColIdUsed->replaceGlobalValue(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter],1.0); // detect wide range permutations if(floor(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) { lWideRangeColPermutations++; } array_iter++; //cntUnusedColIdx--; // check me } } } // end if global_cntFreeColIdx > 0 /////////////////// Qperm should be fine now... // create new empty Matrix Teuchos::RCP<CrsMatrixWrap> permPTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1,Xpetra::StaticProfile)); Teuchos::RCP<CrsMatrixWrap> permQTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1,Xpetra::StaticProfile)); for(size_t row=0; row<A->getNodeNumRows(); row++) { Teuchos::ArrayRCP<GlobalOrdinal> indoutP(1,Teuchos::as<GO>(PpermData[row])); // column idx for Perm^T Teuchos::ArrayRCP<GlobalOrdinal> indoutQ(1,Teuchos::as<GO>(QpermData[row])); // column idx for Qperm Teuchos::ArrayRCP<Scalar> valout(1,1.0); permPTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutP.view(0,indoutP.size()), valout.view(0,valout.size())); permQTmatrix->insertGlobalValues (A->getRowMap()->getGlobalElement(row), indoutQ.view(0,indoutQ.size()), valout.view(0,valout.size())); } permPTmatrix->fillComplete(); permQTmatrix->fillComplete(); Teuchos::RCP<Matrix> permPmatrix = Utils2::Transpose(permPTmatrix,true); for(size_t row=0; row<permPTmatrix->getNodeNumRows(); row++) { if(permPTmatrix->getNumEntriesInLocalRow(row) != 1) GetOStream(Warnings0,0) <<"#entries in row " << row << " of permPTmatrix is " << permPTmatrix->getNumEntriesInLocalRow(row) << std::endl; if(permPmatrix->getNumEntriesInLocalRow(row) != 1) GetOStream(Warnings0,0) <<"#entries in row " << row << " of permPmatrix is " << permPmatrix->getNumEntriesInLocalRow(row) << std::endl; if(permQTmatrix->getNumEntriesInLocalRow(row) != 1) GetOStream(Warnings0,0) <<"#entries in row " << row << " of permQmatrix is " << permQTmatrix->getNumEntriesInLocalRow(row) << std::endl; } // build permP * A * permQT Teuchos::RCP<Matrix> ApermQt = Utils::Multiply(*A, false, *permQTmatrix, false); Teuchos::RCP<Matrix> permPApermQt = Utils::Multiply(*permPmatrix, false, *ApermQt, false); /* MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Write("A.mat", *A); MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Write("permP.mat", *permPmatrix); MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Write("permQt.mat", *permQTmatrix); MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Write("permPApermQt.mat", *permPApermQt); */ // build scaling matrix Teuchos::RCP<Vector> diagVec = VectorFactory::Build(permPApermQt->getRowMap(),true); Teuchos::RCP<Vector> invDiagVec = VectorFactory::Build(permPApermQt->getRowMap(),true); Teuchos::ArrayRCP< const Scalar > diagVecData = diagVec->getData(0); Teuchos::ArrayRCP< Scalar > invDiagVecData = invDiagVec->getDataNonConst(0); permPApermQt->getLocalDiagCopy(*diagVec); for(size_t i = 0; i<diagVec->getMap()->getNodeNumElements(); ++i) { if(diagVecData[i] != 0.0) invDiagVecData[i] = 1/diagVecData[i]; else { invDiagVecData[i] = 1.0; GetOStream(Statistics0,0) << "MueLu::PermutationFactory: found zero on diagonal in row " << i << std::endl; } } Teuchos::RCP<CrsMatrixWrap> diagScalingOp = Teuchos::rcp(new CrsMatrixWrap(permPApermQt->getRowMap(),1,Xpetra::StaticProfile)); for(size_t row=0; row<A->getNodeNumRows(); row++) { Teuchos::ArrayRCP<GlobalOrdinal> indout(1,permPApermQt->getRowMap()->getGlobalElement(row)); // column idx for Perm^T Teuchos::ArrayRCP<Scalar> valout(1,invDiagVecData[row]); diagScalingOp->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indout.view(0,indout.size()), valout.view(0,valout.size())); } diagScalingOp->fillComplete(); Teuchos::RCP<Matrix> scaledA = Utils::Multiply(*diagScalingOp, false, *permPApermQt, false); currentLevel.Set("A", Teuchos::rcp_dynamic_cast<Matrix>(scaledA), genFactory/*this*/); currentLevel.Set("permA", Teuchos::rcp_dynamic_cast<Matrix>(permPApermQt), genFactory/*this*/); // TODO careful with this!!! currentLevel.Set("permP", Teuchos::rcp_dynamic_cast<Matrix>(permPmatrix), genFactory/*this*/); currentLevel.Set("permQT", Teuchos::rcp_dynamic_cast<Matrix>(permQTmatrix), genFactory/*this*/); currentLevel.Set("permScaling", Teuchos::rcp_dynamic_cast<Matrix>(diagScalingOp), genFactory/*this*/); //// count row permutations // count zeros on diagonal in P -> number of row permutations Teuchos::RCP<Vector> diagPVec = VectorFactory::Build(permPmatrix->getRowMap(),true); permPmatrix->getLocalDiagCopy(*diagPVec); Teuchos::ArrayRCP< const Scalar > diagPVecData = diagPVec->getData(0); LocalOrdinal lNumRowPermutations = 0; GlobalOrdinal gNumRowPermutations = 0; for(size_t i = 0; i<diagPVec->getMap()->getNodeNumElements(); ++i) { if(diagPVecData[i] == 0.0) { lNumRowPermutations++; } } // sum up all entries in multipleColRequests over all processors sumAll(diagPVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumRowPermutations), gNumRowPermutations); //// count column permutations // count zeros on diagonal in Q^T -> number of column permutations Teuchos::RCP<Vector> diagQTVec = VectorFactory::Build(permQTmatrix->getRowMap(),true); permQTmatrix->getLocalDiagCopy(*diagQTVec); Teuchos::ArrayRCP< const Scalar > diagQTVecData = diagQTVec->getData(0); LocalOrdinal lNumColPermutations = 0; GlobalOrdinal gNumColPermutations = 0; for(size_t i = 0; i<diagQTVec->getMap()->getNodeNumElements(); ++i) { if(diagQTVecData[i] == 0.0) { lNumColPermutations++; } } // sum up all entries in multipleColRequests over all processors sumAll(diagQTVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumColPermutations), gNumColPermutations); currentLevel.Set("#RowPermutations", gNumRowPermutations, genFactory/*this*/); currentLevel.Set("#ColPermutations", gNumColPermutations, genFactory/*this*/); currentLevel.Set("#WideRangeRowPermutations", gWideRangeRowPermutations, genFactory/*this*/); currentLevel.Set("#WideRangeColPermutations", gWideRangeColPermutations, genFactory/*this*/); GetOStream(Statistics0, 0) << "#Row permutations/max possible permutations: " << gNumRowPermutations << "/" << diagPVec->getMap()->getGlobalNumElements() << std::endl; GetOStream(Statistics0, 0) << "#Column permutations/max possible permutations: " << gNumColPermutations << "/" << diagQTVec->getMap()->getGlobalNumElements() << std::endl; GetOStream(Runtime1, 0) << "#wide range row permutations: " << gWideRangeRowPermutations << " #wide range column permutations: " << gWideRangeColPermutations << std::endl; #else #warning PermutationFactory not compiling/working for Scalar==complex. #endif // #ifndef HAVE_MUELU_INST_COMPLEX_INT_INT }
void SubBlockAFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& currentLevel) const { const ParameterList& pL = GetParameterList(); size_t row = Teuchos::as<size_t>(pL.get<int>("block row")); size_t col = Teuchos::as<size_t>(pL.get<int>("block col")); RCP<Matrix> Ain = Get<RCP<Matrix> >(currentLevel, "A"); RCP<BlockedCrsMatrix> A = rcp_dynamic_cast<BlockedCrsMatrix>(Ain); TEUCHOS_TEST_FOR_EXCEPTION(A.is_null(), Exceptions::BadCast, "Input matrix A is not a BlockedCrsMatrix."); TEUCHOS_TEST_FOR_EXCEPTION(row > A->Rows(), Exceptions::RuntimeError, "row [" << row << "] > A.Rows() [" << A->Rows() << "]."); TEUCHOS_TEST_FOR_EXCEPTION(col > A->Cols(), Exceptions::RuntimeError, "col [" << col << "] > A.Cols() [" << A->Cols() << "]."); RCP<CrsMatrixWrap> Op = Teuchos::rcp(new CrsMatrixWrap(A->getMatrix(row, col))); //////////////// EXPERIMENTAL // extract striding information from RangeMapExtractor RCP<const MapExtractor> rangeMapExtractor = A->getRangeMapExtractor(); RCP<const MapExtractor> domainMapExtractor = A->getDomainMapExtractor(); RCP<const Map> rangeMap = rangeMapExtractor ->getMap(row); RCP<const Map> domainMap = domainMapExtractor->getMap(col); RCP<const StridedMap> srangeMap = rcp_dynamic_cast<const StridedMap>(rangeMap); RCP<const StridedMap> sdomainMap = rcp_dynamic_cast<const StridedMap>(domainMap); if (srangeMap.is_null()) { RCP<const Map> fullRangeMap = rangeMapExtractor->getFullMap(); RCP<const StridedMap> sFullRangeMap = rcp_dynamic_cast<const StridedMap>(fullRangeMap); TEUCHOS_TEST_FOR_EXCEPTION(sFullRangeMap.is_null(), Exceptions::BadCast, "Full rangeMap is not a strided map."); std::vector<size_t> stridedData = sFullRangeMap->getStridingData(); if (stridedData.size() == 1 && row > 0) { // We have block matrices. use striding block information 0 srangeMap = StridedMapFactory::Build(rangeMap, stridedData, 0, sFullRangeMap->getOffset()); } else { // We have strided matrices. use striding information of the corresponding block srangeMap = StridedMapFactory::Build(rangeMap, stridedData, row, sFullRangeMap->getOffset()); } } if (sdomainMap.is_null()) { RCP<const Map> fullDomainMap = domainMapExtractor->getFullMap(); RCP<const StridedMap> sFullDomainMap = rcp_dynamic_cast<const StridedMap>(fullDomainMap); TEUCHOS_TEST_FOR_EXCEPTION(sFullDomainMap.is_null(), Exceptions::BadCast, "Full domainMap is not a strided map"); std::vector<size_t> stridedData = sFullDomainMap->getStridingData(); if (stridedData.size() == 1 && col > 0) { // We have block matrices. use striding block information 0 sdomainMap = StridedMapFactory::Build(domainMap, stridedData, 0, sFullDomainMap->getOffset()); } else { // We have strided matrices. use striding information of the corresponding block sdomainMap = StridedMapFactory::Build(domainMap, stridedData, col, sFullDomainMap->getOffset()); } } TEUCHOS_TEST_FOR_EXCEPTION(srangeMap.is_null(), Exceptions::BadCast, "rangeMap " << row << " is not a strided map."); TEUCHOS_TEST_FOR_EXCEPTION(sdomainMap.is_null(), Exceptions::BadCast, "domainMap " << col << " is not a strided map."); GetOStream(Statistics1) << "A(" << row << "," << col << ") has strided maps:" << "\n range map fixed block size = " << srangeMap ->getFixedBlockSize() << ", strided block id = " << srangeMap ->getStridedBlockId() << "\n domain map fixed block size = " << sdomainMap->getFixedBlockSize() << ", strided block id = " << sdomainMap->getStridedBlockId() << std::endl; if (Op->IsView("stridedMaps") == true) Op->RemoveView("stridedMaps"); Op->CreateView("stridedMaps", srangeMap, sdomainMap); TEUCHOS_TEST_FOR_EXCEPTION(Op->IsView("stridedMaps") == false, Exceptions::RuntimeError, "Failed to set \"stridedMaps\" view."); //////////////// EXPERIMENTAL currentLevel.Set("A", rcp_dynamic_cast<Matrix>(Op), this); }
TEUCHOS_UNIT_TEST(Zoltan, Build) { typedef Teuchos::ScalarTraits<Scalar> ST; out << "version: " << MueLu::Version() << std::endl; out << std::endl; out << "This tests that the partitioning produced by Zoltan is \"reasonable\" for a matrix" << std::endl; out << "that has a random number of nonzeros per row. Good results have been precomputed" << std::endl; out << "for up to 5 processors. The results are the number of nonzeros in the local matrix" << std::endl; out << "once the Zoltan repartitioning has been applied." << std::endl; out << "The results can be viewed in Paraview by enabling code guarded by the macro MUELU_VISUALIZE_REPARTITIONING" << std::endl; RCP<const Teuchos::Comm<int> > comm = TestHelpers::Parameters::getDefaultComm(); if (comm->getSize() > 5) { out << std::endl; out << "This test must be run on 1 to 5 processes." << std::endl; TEST_EQUALITY(true, true); return; } Level level; RCP<FactoryManagerBase> factoryHandler = rcp(new FactoryManager()); level.SetFactoryManager(factoryHandler); int nx=7; int ny=nx; GO numGlobalElements = nx*ny; size_t maxEntriesPerRow=30; // Populate CrsMatrix with random number of entries (up to maxEntriesPerRow) per row. RCP<const Map> map = MapFactory::createUniformContigMap(TestHelpers::Parameters::getLib(), numGlobalElements, comm); const size_t numMyElements = map->getNodeNumElements(); Teuchos::ArrayView<const GlobalOrdinal> myGlobalElements = map->getNodeElementList(); RCP<Matrix> A = rcp(new CrsMatrixWrap(map, 1)); // Force underlying linear algebra library to allocate more // memory on the fly. While not super efficient, this // ensures that no zeros are being stored. Thus, from // Zoltan's perspective the matrix is imbalanced. // Create a vector with random integer entries in [1,maxEntriesPerRow]. ST::seedrandom(666*comm->getRank()); RCP<Xpetra::Vector<LO,LO,GO,NO> > entriesPerRow = Xpetra::VectorFactory<LO,LO,GO,NO>::Build(map,false); Teuchos::ArrayRCP<LO> eprData = entriesPerRow->getDataNonConst(0); for (Teuchos::ArrayRCP<LO>::iterator i=eprData.begin(); i!=eprData.end(); ++i) { *i = (LO)(std::floor(((ST::random()+1)*0.5*maxEntriesPerRow)+1)); } RCP<Teuchos::FancyOStream> fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); fos->setOutputToRootOnly(-1); Teuchos::Array<Scalar> vals(maxEntriesPerRow); Teuchos::Array<GO> cols(maxEntriesPerRow); for (size_t i = 0; i < numMyElements; ++i) { Teuchos::ArrayView<SC> av(&vals[0],eprData[i]); Teuchos::ArrayView<GO> iv(&cols[0],eprData[i]); //stick in ones for values for (LO j=0; j< eprData[i]; ++j) vals[j] = ST::one(); //figure out valid column indices GO start = std::max(myGlobalElements[i]-eprData[i]+1,0); for (LO j=0; j< eprData[i]; ++j) cols[j] = start+j; A->insertGlobalValues(myGlobalElements[i], iv, av); } A->fillComplete(); level.Set("A",A); //build coordinates RCP<const Map> rowMap = A->getRowMap(); Teuchos::ParameterList list; list.set("nx",nx); list.set("ny",ny); RCP<MultiVector> XYZ = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("2D",rowMap,list); level.Set("Coordinates",XYZ); LO numPartitions = comm->getSize(); level.Set("number of partitions",numPartitions); RCP<ZoltanInterface> zoltan = rcp(new ZoltanInterface()); //zoltan->SetNumberOfPartitions(numPartitions); //zoltan->SetOutputLevel(0); //options are 0=none, 1=summary, 2=every pid prints level.Request("Partition",zoltan.get()); zoltan->Build(level); RCP<Xpetra::Vector<GO,LO,GO,NO> > decomposition = level.Get<RCP<Xpetra::Vector<GO,LO,GO,NO> > >("Partition",zoltan.get()); /* //TODO temporary code to have the trivial decomposition (no change) ArrayRCP<GO> decompEntries = decomposition->getDataNonConst(0); for (ArrayRCP<GO>::iterator i = decompEntries.begin(); i != decompEntries.end(); ++i) *i = comm->getRank(); decompEntries=Teuchos::null; */ //TODO end of temporary code //Create vector whose local length is the global number of partitions. //This vector will record the local number of nonzeros associated with each partition. Teuchos::Array<GO> parts(numPartitions); for (int i=0; i<numPartitions; ++i) parts[i] = i; Teuchos::ArrayView<GO> partsView(&parts[0],numPartitions); RCP<const Map> partitionMap = MapFactory::Build(TestHelpers::Parameters::getLib(), Teuchos::OrdinalTraits<global_size_t>::invalid(), partsView, map->getIndexBase(),comm); RCP<Xpetra::Vector<LO,LO,GO,NO> > localPartsVec = Xpetra::VectorFactory<LO,LO,GO,NO>::Build(partitionMap); //For the local rows in each partition, tally up the number of nonzeros. This is what //Zoltan should be load-balancing. Teuchos::ArrayRCP<GO> lpvData = localPartsVec->getDataNonConst(0); Teuchos::ArrayRCP<const GO> decompData = decomposition->getData(0); for (size_t i=0; i<decomposition->getLocalLength();++i) { Teuchos::ArrayView<const LO> c; Teuchos::ArrayView<const SC> v; A->getLocalRowView(i,c,v); lpvData[decompData[i]] += v.size(); } lpvData = Teuchos::null; decompData = Teuchos::null; //localPartsVec->describe(*fos,Teuchos::VERB_EXTREME); //Send the local nnz tallies to pid 0, which can report the global sums. size_t mysize=1; if (comm->getRank() == 0) mysize = numPartitions; RCP<const Map> globalTallyMap = MapFactory::Build(TestHelpers::Parameters::getLib(), Teuchos::OrdinalTraits<global_size_t>::invalid(), mysize, map->getIndexBase(), comm); RCP<Xpetra::Vector<LO,LO,GO,NO> > globalTallyVec = Xpetra::VectorFactory<LO,LO,GO,NO>::Build(globalTallyMap); RCP<const Export> exporter = ExportFactory::Build( partitionMap, globalTallyMap); globalTallyVec->doExport(*localPartsVec,*exporter,Xpetra::ADD); ArrayRCP<GO> expectedResults(numPartitions); switch (comm->getSize()) { case 1: expectedResults[0] = 807; break; case 2: expectedResults[0] = 364; expectedResults[1] = 363; break; case 3: expectedResults[0] = 277; expectedResults[1] = 261; expectedResults[2] = 269; break; case 4: expectedResults[0] = 195; expectedResults[1] = 186; expectedResults[2] = 177; expectedResults[3] = 168; break; case 5: expectedResults[0] = 161; expectedResults[1] = 145; expectedResults[2] = 148; expectedResults[3] = 159; expectedResults[4] = 157; break; default: break; }; //FIXME cool ... this next line causes a hang if locally the globalyTallyVec has no data. //FIXME I get around this by making mysize (above) 1 instead of 0. Is this a bug or feature //FIXME in getData? ArrayRCP<const LO> gtvData = globalTallyVec->getData(0); #ifdef __linux__ out << "Checking results..." << std::endl; for (int i=0; i<numPartitions; ++i) { if (comm->getRank() == 0) TEST_EQUALITY( expectedResults[i], gtvData[i]); } #endif #ifdef MUELU_VISUALIZE_REPARTITIONING // //Now write everything to a comma-separate list that ParaView can grok // Teuchos::ArrayRCP<const Scalar> X = XYZ->getData(0); Teuchos::ArrayRCP<const Scalar> Y = XYZ->getData(1); Teuchos::ArrayRCP<const GO> D = decomposition->getData(0); RCP<std::ofstream> outFile; std::string fileName = "zoltanResults.csv"; //write header information if (comm->getRank() == 0) { outFile = rcp(new std::ofstream(fileName.c_str())); *outFile << "x coord, y coord, z coord, scalar" << std::endl; } comm->barrier(); //append coordinates for (int j=0; j<comm->getSize(); ++j) { int mypid = comm->getRank(); if (mypid == j) { outFile = rcp(new std::ofstream(fileName.c_str(),std::ios::app)); for (int i=0; i < D.size(); ++i) { *outFile << X[i] << ", " << Y[i] << ", " << ST::zero() << ", " << D[i] << std::endl; } } } //for (int i=0; i<comm->getSize(); ++i) out << std::endl; out << "You can view the Zoltan decomposition in ParaView 3.10.1 or later:" << std::endl; out << " 1) Load the data file " << fileName << "." << std::endl; out << " 2) Run the filter Filters/ Alphabetical/ Table To Points." << std::endl; out << " 3) Tell ParaView what columns are the X, Y and Z coordinates." << std::endl; out << " 4) Split screen horizontally (Icon, top right)." << std::endl; out << " 5) Click on the eyeball in the Pipeline Browser to see the points." << std::endl; out << " 6) Under the Display tab, you can color points by scalar value and resize them." << std::endl; out << std::endl; out << " To display row weights next to each point:" << std::endl; out << " 1) Click the \"Select Points Through\" button (2nd row) and select all points." << std::endl; out << " 2) Under View pull-down menu, choose the \"Selection Inspector\"." << std::endl; out << " 3) Under the Point Label, check the Visible box and set the Label Mode to \"row weight\"." << std::endl; #endif } //Build
// SmootherPrototype helper function void setupSmoother(RCP<Matrix>& A, SmootherPrototype & smoother, Teuchos::FancyOStream & out, bool & success) { Level level; TestHelpers::TestFactory<SC,LO,GO,NO,LMO>::createSingleLevelHierarchy(level); level.Set("A", A); smoother.Setup(level); }
void RepartitionFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& currentLevel) const { FactoryMonitor m(*this, "Build", currentLevel); const Teuchos::ParameterList & pL = GetParameterList(); // Access parameters here to make sure that we set the parameter entry flag to "used" even in case of short-circuit evaluation. // TODO (JG): I don't really know if we want to do this. const int startLevel = pL.get<int> ("repartition: start level"); const LO minRowsPerProcessor = pL.get<LO> ("repartition: min rows per proc"); const double nonzeroImbalance = pL.get<double>("repartition: max imbalance"); const bool remapPartitions = pL.get<bool> ("repartition: remap parts"); // TODO: We only need a CrsGraph. This class does not have to be templated on Scalar types. RCP<Matrix> A = Get< RCP<Matrix> >(currentLevel, "A"); // ====================================================================================================== // Determine whether partitioning is needed // ====================================================================================================== // NOTE: most tests include some global communication, which is why we currently only do tests until we make // a decision on whether to repartition. However, there is value in knowing how "close" we are to having to // rebalance an operator. So, it would probably be beneficial to do and report *all* tests. // Test1: skip repartitioning if current level is less than the specified minimum level for repartitioning if (currentLevel.GetLevelID() < startLevel) { GetOStream(Statistics0) << "Repartitioning? NO:" << "\n current level = " << Teuchos::toString(currentLevel.GetLevelID()) << ", first level where repartitioning can happen is " + Teuchos::toString(startLevel) << std::endl; Set<RCP<const Import> >(currentLevel, "Importer", Teuchos::null); return; } RCP<const Map> rowMap = A->getRowMap(); // NOTE: Teuchos::MPIComm::duplicate() calls MPI_Bcast inside, so this is // a synchronization point. However, as we do MueLu_sumAll afterwards anyway, it // does not matter. RCP<const Teuchos::Comm<int> > origComm = rowMap->getComm(); RCP<const Teuchos::Comm<int> > comm = origComm->duplicate(); // Test 2: check whether A is actually distributed, i.e. more than one processor owns part of A // TODO: this global communication can be avoided if we store the information with the matrix (it is known when matrix is created) // TODO: further improvements could be achieved when we use subcommunicator for the active set. Then we only need to check its size { int numActiveProcesses = 0; MueLu_sumAll(comm, Teuchos::as<int>((A->getNodeNumRows() > 0) ? 1 : 0), numActiveProcesses); if (numActiveProcesses == 1) { GetOStream(Statistics0) << "Repartitioning? NO:" << "\n # processes with rows = " << Teuchos::toString(numActiveProcesses) << std::endl; Set<RCP<const Import> >(currentLevel, "Importer", Teuchos::null); return; } } bool test3 = false, test4 = false; std::string msg3, msg4; // Test3: check whether number of rows on any processor satisfies the minimum number of rows requirement // NOTE: Test2 ensures that repartitionning is not done when there is only one processor (it may or may not satisfy Test3) if (minRowsPerProcessor > 0) { LO numMyRows = Teuchos::as<LO>(A->getNodeNumRows()), minNumRows, LOMAX = Teuchos::OrdinalTraits<LO>::max(); LO haveFewRows = (numMyRows < minRowsPerProcessor ? 1 : 0), numWithFewRows = 0; MueLu_sumAll(comm, haveFewRows, numWithFewRows); MueLu_minAll(comm, (numMyRows > 0 ? numMyRows : LOMAX), minNumRows); // TODO: we could change it to repartition only if the number of processors with numRows < minNumRows is larger than some // percentage of the total number. This way, we won't repartition if 2 out of 1000 processors don't have enough elements. // I'm thinking maybe 20% threshold. To implement, simply add " && numWithFewRows < .2*numProcs" to the if statement. if (numWithFewRows > 0) test3 = true; msg3 = "\n min # rows per proc = " + Teuchos::toString(minNumRows) + ", min allowable = " + Teuchos::toString(minRowsPerProcessor); } // Test4: check whether the balance in the number of nonzeros per processor is greater than threshold if (!test3) { GO minNnz, maxNnz, numMyNnz = Teuchos::as<GO>(A->getNodeNumEntries()); MueLu_maxAll(comm, numMyNnz, maxNnz); MueLu_minAll(comm, (numMyNnz > 0 ? numMyNnz : maxNnz), minNnz); // min nnz over all active processors double imbalance = Teuchos::as<double>(maxNnz)/minNnz; if (imbalance > nonzeroImbalance) test4 = true; msg4 = "\n nonzero imbalance = " + Teuchos::toString(imbalance) + ", max allowable = " + Teuchos::toString(nonzeroImbalance); } if (!test3 && !test4) { GetOStream(Statistics0) << "Repartitioning? NO:" << msg3 + msg4 << std::endl; Set<RCP<const Import> >(currentLevel, "Importer", Teuchos::null); return; } GetOStream(Statistics0) << "Repartitioning? YES:" << msg3 + msg4 << std::endl; GO indexBase = rowMap->getIndexBase(); Xpetra::UnderlyingLib lib = rowMap->lib(); int myRank = comm->getRank(); int numProcs = comm->getSize(); RCP<const Teuchos::MpiComm<int> > tmpic = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm); TEUCHOS_TEST_FOR_EXCEPTION(tmpic == Teuchos::null, Exceptions::RuntimeError, "Cannot cast base Teuchos::Comm to Teuchos::MpiComm object."); RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm(); // ====================================================================================================== // Calculate number of partitions // ====================================================================================================== // FIXME Quick way to figure out how many partitions there should be (same algorithm as ML) // FIXME Should take into account nnz? Perhaps only when user is using min #nnz per row threshold. GO numPartitions; if (currentLevel.IsAvailable("number of partitions")) { numPartitions = currentLevel.Get<GO>("number of partitions"); GetOStream(Warnings0) << "Using user-provided \"number of partitions\", the performance is unknown" << std::endl; } else { if (Teuchos::as<GO>(A->getGlobalNumRows()) < minRowsPerProcessor) { // System is too small, migrate it to a single processor numPartitions = 1; } else { // Make sure that each processor has approximately minRowsPerProcessor numPartitions = A->getGlobalNumRows() / minRowsPerProcessor; } numPartitions = std::min(numPartitions, Teuchos::as<GO>(numProcs)); currentLevel.Set("number of partitions", numPartitions, NoFactory::get()); } GetOStream(Statistics0) << "Number of partitions to use = " << numPartitions << std::endl; // ====================================================================================================== // Construct decomposition vector // ====================================================================================================== RCP<GOVector> decomposition; if (numPartitions == 1) { // Trivial case: decomposition is the trivial one, all zeros. We skip the call to Zoltan_Interface // (this is mostly done to avoid extra output messages, as even if we didn't skip there is a shortcut // in Zoltan[12]Interface). // TODO: We can probably skip more work in this case (like building all extra data structures) GetOStream(Warnings0) << "Only one partition: Skip call to the repartitioner." << std::endl; decomposition = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(A->getRowMap(), true); } else { decomposition = Get<RCP<GOVector> >(currentLevel, "Partition"); if (decomposition.is_null()) { GetOStream(Warnings0) << "No repartitioning necessary: partitions were left unchanged by the repartitioner" << std::endl; Set<RCP<const Import> >(currentLevel, "Importer", Teuchos::null); return; } } // ====================================================================================================== // Remap if necessary // ====================================================================================================== // From a user perspective, we want user to not care about remapping, thinking of it as only a performance feature. // There are two problems, however. // (1) Next level aggregation depends on the order of GIDs in the vector, if one uses "natural" or "random" orderings. // This also means that remapping affects next level aggregation, despite the fact that the _set_ of GIDs for // each partition is the same. // (2) Even with the fixed order of GIDs, the remapping may influence the aggregation for the next-next level. // Let us consider the following example. Lets assume that when we don't do remapping, processor 0 would have // GIDs {0,1,2}, and processor 1 GIDs {3,4,5}, and if we do remapping processor 0 would contain {3,4,5} and // processor 1 {0,1,2}. Now, when we run repartitioning algorithm on the next level (say Zoltan1 RCB), it may // be dependent on whether whether it is [{0,1,2}, {3,4,5}] or [{3,4,5}, {0,1,2}]. Specifically, the tie-breaking // algorithm can resolve these differently. For instance, running // mpirun -np 5 ./MueLu_ScalingTestParamList.exe --xml=easy_sa.xml --nx=12 --ny=12 --nz=12 // with // <ParameterList name="MueLu"> // <Parameter name="coarse: max size" type="int" value="1"/> // <Parameter name="repartition: enable" type="bool" value="true"/> // <Parameter name="repartition: min rows per proc" type="int" value="2"/> // <ParameterList name="level 1"> // <Parameter name="repartition: remap parts" type="bool" value="false/true"/> // </ParameterList> // </ParameterList> // produces different repartitioning for level 2. // This different repartitioning may then escalate into different aggregation for the next level. // // We fix (1) by fixing the order of GIDs in a vector by sorting the resulting vector. // Fixing (2) is more complicated. // FIXME: Fixing (2) in Zoltan may not be enough, as we may use some arbitration in MueLu, // for instance with CoupledAggregation. What we really need to do is to use the same order of processors containing // the same order of GIDs. To achieve that, the newly created subcommunicator must be conforming with the order. For // instance, if we have [{0,1,2}, {3,4,5}], we create a subcommunicator where processor 0 gets rank 0, and processor 1 // gets rank 1. If, on the other hand, we have [{3,4,5}, {0,1,2}], we assign rank 1 to processor 0, and rank 0 to processor 1. // This rank permutation requires help from Epetra/Tpetra, both of which have no such API in place. // One should also be concerned that if we had such API in place, rank 0 in subcommunicator may no longer be rank 0 in // MPI_COMM_WORLD, which may lead to issues for logging. if (remapPartitions) { SubFactoryMonitor m1(*this, "DeterminePartitionPlacement", currentLevel); DeterminePartitionPlacement(*A, *decomposition, numPartitions); } // ====================================================================================================== // Construct importer // ====================================================================================================== // At this point, the following is true: // * Each processors owns 0 or 1 partitions // * If a processor owns a partition, that partition number is equal to the processor rank // * The decomposition vector contains the partitions ids that the corresponding GID belongs to ArrayRCP<const GO> decompEntries; if (decomposition->getLocalLength() > 0) decompEntries = decomposition->getData(0); #ifdef HAVE_MUELU_DEBUG // Test range of partition ids int incorrectRank = -1; for (int i = 0; i < decompEntries.size(); i++) if (decompEntries[i] >= numProcs || decompEntries[i] < 0) { incorrectRank = myRank; break; } int incorrectGlobalRank = -1; MueLu_maxAll(comm, incorrectRank, incorrectGlobalRank); TEUCHOS_TEST_FOR_EXCEPTION(incorrectGlobalRank >- 1, Exceptions::RuntimeError, "pid " + Teuchos::toString(incorrectGlobalRank) + " encountered a partition number is that out-of-range"); #endif Array<GO> myGIDs; myGIDs.reserve(decomposition->getLocalLength()); // Step 0: Construct mapping // part number -> GIDs I own which belong to this part // NOTE: my own part GIDs are not part of the map typedef std::map<GO, Array<GO> > map_type; map_type sendMap; for (LO i = 0; i < decompEntries.size(); i++) { GO id = decompEntries[i]; GO GID = rowMap->getGlobalElement(i); if (id == myRank) myGIDs .push_back(GID); else sendMap[id].push_back(GID); } decompEntries = Teuchos::null; if (IsPrint(Statistics2)) { GO numLocalKept = myGIDs.size(), numGlobalKept, numGlobalRows = A->getGlobalNumRows(); MueLu_sumAll(comm,numLocalKept, numGlobalKept); GetOStream(Statistics2) << "Unmoved rows: " << numGlobalKept << " / " << numGlobalRows << " (" << 100*Teuchos::as<double>(numGlobalKept)/numGlobalRows << "%)" << std::endl; } int numSend = sendMap.size(), numRecv; // Arrayify map keys Array<GO> myParts(numSend), myPart(1); int cnt = 0; myPart[0] = myRank; for (typename map_type::const_iterator it = sendMap.begin(); it != sendMap.end(); it++) myParts[cnt++] = it->first; // Step 1: Find out how many processors send me data // partsIndexBase starts from zero, as the processors ids start from zero GO partsIndexBase = 0; RCP<Map> partsIHave = MapFactory ::Build(lib, Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), myParts(), partsIndexBase, comm); RCP<Map> partsIOwn = MapFactory ::Build(lib, numProcs, myPart(), partsIndexBase, comm); RCP<Export> partsExport = ExportFactory::Build(partsIHave, partsIOwn); RCP<GOVector> partsISend = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(partsIHave); RCP<GOVector> numPartsIRecv = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(partsIOwn); if (numSend) { ArrayRCP<GO> partsISendData = partsISend->getDataNonConst(0); for (int i = 0; i < numSend; i++) partsISendData[i] = 1; } (numPartsIRecv->getDataNonConst(0))[0] = 0; numPartsIRecv->doExport(*partsISend, *partsExport, Xpetra::ADD); numRecv = (numPartsIRecv->getData(0))[0]; // Step 2: Get my GIDs from everybody else MPI_Datatype MpiType = MpiTypeTraits<GO>::getType(); int msgTag = 12345; // TODO: use Comm::dup for all internal messaging // Post sends Array<MPI_Request> sendReqs(numSend); cnt = 0; for (typename map_type::iterator it = sendMap.begin(); it != sendMap.end(); it++) MPI_Isend(static_cast<void*>(it->second.getRawPtr()), it->second.size(), MpiType, Teuchos::as<GO>(it->first), msgTag, *rawMpiComm, &sendReqs[cnt++]); map_type recvMap; size_t totalGIDs = myGIDs.size(); for (int i = 0; i < numRecv; i++) { MPI_Status status; MPI_Probe(MPI_ANY_SOURCE, msgTag, *rawMpiComm, &status); // Get rank and number of elements from status int fromRank = status.MPI_SOURCE, count; MPI_Get_count(&status, MpiType, &count); recvMap[fromRank].resize(count); MPI_Recv(static_cast<void*>(recvMap[fromRank].getRawPtr()), count, MpiType, fromRank, msgTag, *rawMpiComm, &status); totalGIDs += count; } // Do waits on send requests if (numSend) { Array<MPI_Status> sendStatuses(numSend); MPI_Waitall(numSend, sendReqs.getRawPtr(), sendStatuses.getRawPtr()); } // Merge GIDs myGIDs.reserve(totalGIDs); for (typename map_type::const_iterator it = recvMap.begin(); it != recvMap.end(); it++) { int offset = myGIDs.size(), len = it->second.size(); if (len) { myGIDs.resize(offset + len); memcpy(myGIDs.getRawPtr() + offset, it->second.getRawPtr(), len*sizeof(GO)); } } // NOTE 2: The general sorting algorithm could be sped up by using the knowledge that original myGIDs and all received chunks // (i.e. it->second) are sorted. Therefore, a merge sort would work well in this situation. std::sort(myGIDs.begin(), myGIDs.end()); // Step 3: Construct importer RCP<Map> newRowMap = MapFactory ::Build(lib, rowMap->getGlobalNumElements(), myGIDs(), indexBase, origComm); RCP<const Import> rowMapImporter; { SubFactoryMonitor m1(*this, "Import construction", currentLevel); rowMapImporter = ImportFactory::Build(rowMap, newRowMap); } Set(currentLevel, "Importer", rowMapImporter); // ====================================================================================================== // Print some data // ====================================================================================================== if (pL.get<bool>("repartition: print partition distribution") && IsPrint(Statistics2)) { // Print the grid of processors GetOStream(Statistics2) << "Partition distribution over cores (ownership is indicated by '+')" << std::endl; char amActive = (myGIDs.size() ? 1 : 0); std::vector<char> areActive(numProcs, 0); MPI_Gather(&amActive, 1, MPI_CHAR, &areActive[0], 1, MPI_CHAR, 0, *rawMpiComm); int rowWidth = std::min(Teuchos::as<int>(ceil(sqrt(numProcs))), 100); for (int proc = 0; proc < numProcs; proc += rowWidth) { for (int j = 0; j < rowWidth; j++) if (proc + j < numProcs) GetOStream(Statistics2) << (areActive[proc + j] ? "+" : "."); else GetOStream(Statistics2) << " "; GetOStream(Statistics2) << " " << proc << ":" << std::min(proc + rowWidth, numProcs) - 1 << std::endl; } } } // Build
void SubBlockAFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& currentLevel) const { FactoryMonitor m(*this, "Build", currentLevel); const ParameterList& pL = GetParameterList(); size_t row = Teuchos::as<size_t>(pL.get<int>("block row")); size_t col = Teuchos::as<size_t>(pL.get<int>("block col")); RCP<Matrix> Ain = currentLevel.Get< RCP<Matrix> >("A",this->GetFactory("A").get()); RCP<BlockedCrsMatrix> A = rcp_dynamic_cast<BlockedCrsMatrix>(Ain); TEUCHOS_TEST_FOR_EXCEPTION(A.is_null(), Exceptions::BadCast, "Input matrix A is not a BlockedCrsMatrix."); TEUCHOS_TEST_FOR_EXCEPTION(row > A->Rows(), Exceptions::RuntimeError, "row [" << row << "] > A.Rows() [" << A->Rows() << "]."); TEUCHOS_TEST_FOR_EXCEPTION(col > A->Cols(), Exceptions::RuntimeError, "col [" << col << "] > A.Cols() [" << A->Cols() << "]."); // get sub-matrix RCP<Matrix> Op = A->getMatrix(row, col); // Check if it is a BlockedCrsMatrix object // If it is a BlockedCrsMatrix object (most likely a ReorderedBlockedCrsMatrix) // we have to distinguish whether it is a 1x1 leaf block in the ReorderedBlockedCrsMatrix // or a nxm block. If it is a 1x1 leaf block, we "unpack" it and return the underlying // CrsMatrixWrap object. RCP<BlockedCrsMatrix> bOp = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(Op); if (bOp != Teuchos::null) { // check if it is a 1x1 leaf block if (bOp->Rows() == 1 && bOp->Cols() == 1) { // return the unwrapped CrsMatrixWrap object underneath Op = bOp->getCrsMatrix(); TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(Op) == Teuchos::null, Exceptions::BadCast, "SubBlockAFactory::Build: sub block A[" << row << "," << col << "] must be a single block CrsMatrixWrap object!"); } else { // If it is a regular nxm blocked operator, just return it. // We do not set any kind of striding or blocking information as this // usually would not make sense for general blocked operators GetOStream(Statistics1) << "A(" << row << "," << col << ") is a " << bOp->Rows() << "x" << bOp->Cols() << " block matrix" << std::endl; GetOStream(Statistics2) << "with altogether " << bOp->getGlobalNumRows() << "x" << bOp->getGlobalNumCols() << " rows and columns." << std::endl; currentLevel.Set("A", Op, this); return; } } // The sub-block is not a BlockedCrsMatrix object, that is, we expect // it to be of type CrsMatrixWrap allowing direct access to the corresponding // data. For a single block CrsMatrixWrap type matrix we can/should set the // corresponding striding/blocking information for the algorithms to work // properly // // TAW: In fact, a 1x1 BlockedCrsMatrix object also allows to access the data // directly, but this feature is nowhere really used in the algorithms. // So let's keep checking for the CrsMatrixWrap class to avoid skrewing // things up // TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::rcp_dynamic_cast<CrsMatrixWrap>(Op) == Teuchos::null, Exceptions::BadCast, "SubBlockAFactory::Build: sub block A[" << row << "," << col << "] is NOT a BlockedCrsMatrix but also NOT a CrsMatrixWrap object? This cannot be."); // strided maps for range and domain map of sub matrix RCP<const StridedMap> srangeMap = Teuchos::null; RCP<const StridedMap> sdomainMap = Teuchos::null; // check for user-specified striding information from XML file std::vector<size_t> rangeStridingInfo; std::vector<size_t> domainStridingInfo; LocalOrdinal rangeStridedBlockId = 0; LocalOrdinal domainStridedBlockId = 0; bool bRangeUserSpecified = CheckForUserSpecifiedBlockInfo(true, rangeStridingInfo, rangeStridedBlockId); bool bDomainUserSpecified = CheckForUserSpecifiedBlockInfo(false, domainStridingInfo, domainStridedBlockId); TEUCHOS_TEST_FOR_EXCEPTION(bRangeUserSpecified != bDomainUserSpecified, Exceptions::RuntimeError, "MueLu::SubBlockAFactory[" << row << "," << col << "]: the user has to specify either both domain and range map or none."); // extract map information from MapExtractor RCP<const MapExtractor> rangeMapExtractor = A->getRangeMapExtractor(); RCP<const MapExtractor> domainMapExtractor = A->getDomainMapExtractor(); RCP<const Map> rangeMap = rangeMapExtractor ->getMap(row); RCP<const Map> domainMap = domainMapExtractor->getMap(col); // use user-specified striding information if available. Otherwise try to use internal striding information from the submaps! if(bRangeUserSpecified) srangeMap = Teuchos::rcp(new StridedMap(rangeMap,rangeStridingInfo,rangeMap->getIndexBase(),rangeStridedBlockId,0)); else srangeMap = rcp_dynamic_cast<const StridedMap>(rangeMap); if(bDomainUserSpecified) sdomainMap = Teuchos::rcp(new StridedMap(domainMap,domainStridingInfo,domainMap->getIndexBase(),domainStridedBlockId,0)); else sdomainMap = rcp_dynamic_cast<const StridedMap>(domainMap); // In case that both user-specified and internal striding information from the submaps // does not contain valid striding information, try to extract it from the global maps // in the map extractor. if (srangeMap.is_null()) { RCP<const Map> fullRangeMap = rangeMapExtractor->getFullMap(); RCP<const StridedMap> sFullRangeMap = rcp_dynamic_cast<const StridedMap>(fullRangeMap); TEUCHOS_TEST_FOR_EXCEPTION(sFullRangeMap.is_null(), Exceptions::BadCast, "Full rangeMap is not a strided map."); std::vector<size_t> stridedData = sFullRangeMap->getStridingData(); if (stridedData.size() == 1 && row > 0) { // We have block matrices. use striding block information 0 srangeMap = StridedMapFactory::Build(rangeMap, stridedData, 0, sFullRangeMap->getOffset()); } else { // We have strided matrices. use striding information of the corresponding block srangeMap = StridedMapFactory::Build(rangeMap, stridedData, row, sFullRangeMap->getOffset()); } } if (sdomainMap.is_null()) { RCP<const Map> fullDomainMap = domainMapExtractor->getFullMap(); RCP<const StridedMap> sFullDomainMap = rcp_dynamic_cast<const StridedMap>(fullDomainMap); TEUCHOS_TEST_FOR_EXCEPTION(sFullDomainMap.is_null(), Exceptions::BadCast, "Full domainMap is not a strided map"); std::vector<size_t> stridedData = sFullDomainMap->getStridingData(); if (stridedData.size() == 1 && col > 0) { // We have block matrices. use striding block information 0 sdomainMap = StridedMapFactory::Build(domainMap, stridedData, 0, sFullDomainMap->getOffset()); } else { // We have strided matrices. use striding information of the corresponding block sdomainMap = StridedMapFactory::Build(domainMap, stridedData, col, sFullDomainMap->getOffset()); } } TEUCHOS_TEST_FOR_EXCEPTION(srangeMap.is_null(), Exceptions::BadCast, "rangeMap " << row << " is not a strided map."); TEUCHOS_TEST_FOR_EXCEPTION(sdomainMap.is_null(), Exceptions::BadCast, "domainMap " << col << " is not a strided map."); GetOStream(Statistics1) << "A(" << row << "," << col << ") is a single block and has strided maps:" << "\n range map fixed block size = " << srangeMap ->getFixedBlockSize() << ", strided block id = " << srangeMap ->getStridedBlockId() << "\n domain map fixed block size = " << sdomainMap->getFixedBlockSize() << ", strided block id = " << sdomainMap->getStridedBlockId() << std::endl; GetOStream(Statistics2) << "A(" << row << "," << col << ") has " << Op->getGlobalNumRows() << "x" << Op->getGlobalNumCols() << " rows and columns." << std::endl; // TODO do we really need that? we moved the code to getMatrix... if (Op->IsView("stridedMaps") == true) Op->RemoveView("stridedMaps"); Op->CreateView("stridedMaps", srangeMap, sdomainMap); TEUCHOS_TEST_FOR_EXCEPTION(Op->IsView("stridedMaps") == false, Exceptions::RuntimeError, "Failed to set \"stridedMaps\" view."); currentLevel.Set("A", Op, this); }
TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicBlockWithFiltering, Scalar, LocalOrdinal, GlobalOrdinal, Node) { # include "MueLu_UseShortNames.hpp" MUELU_TESTING_SET_OSTREAM; MUELU_TESTING_LIMIT_SCOPE(Scalar,GlobalOrdinal,NO); out << "version: " << MueLu::Version() << std::endl; RCP<const Teuchos::Comm<int> > comm = Parameters::getDefaultComm(); Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); Level fineLevel; TestHelpers_kokkos::TestFactory<SC,LO,GO,NO>::createSingleLevelHierarchy(fineLevel); auto dofMap = MapFactory::Build(lib, 3*comm->getSize(), 0, comm); auto mtx = TestHelpers_kokkos::TestFactory<SC,LO,GO,NO>::BuildTridiag(dofMap, 2.0, -1.0, 0.00001); mtx->SetFixedBlockSize(3, 0); fineLevel.Set("A", mtx); CoalesceDropFactory_kokkos dropFact = CoalesceDropFactory_kokkos(); dropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(1.0)); fineLevel.Request("Graph", &dropFact); fineLevel.Request("DofsPerNode", &dropFact); fineLevel.Request("Filtering", &dropFact); dropFact.Build(fineLevel); auto graph = fineLevel.Get<RCP<LWGraph_kokkos> >("Graph", &dropFact); auto myDofsPerNode = fineLevel.Get<LO> ("DofsPerNode", &dropFact); auto filtering = fineLevel.Get<bool> ("Filtering", &dropFact); TEST_EQUALITY(as<int>(myDofsPerNode) == 3, true); TEST_EQUALITY(filtering, true); TEST_EQUALITY(as<int>(graph->GetDomainMap()->getGlobalNumElements()) == comm->getSize(), true); TEST_EQUALITY(graph->getNeighborVertices(0).size(), 1); auto myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! auto myDomainMap = graph->GetDomainMap(); TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), comm->getSize()-1); TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); TEST_EQUALITY(myImportMap->getGlobalNumElements(), as<size_t>(comm->getSize()+2*(comm->getSize()-1))); if (comm->getSize() > 1) { size_t numLocalRowMapElts = graph->GetNodeNumVertices(); size_t numLocalImportElts = myImportMap->getNodeNumElements(); if (comm->getRank() == 0 || comm->getRank() == comm->getSize()-1) { TEST_EQUALITY(numLocalImportElts, numLocalRowMapElts+1); } else { TEST_EQUALITY(numLocalImportElts, numLocalRowMapElts+2); } } TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), comm->getSize()-1); TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); TEST_EQUALITY(myDomainMap->getMaxLocalIndex(), 0); TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); TEST_EQUALITY(myDomainMap->getGlobalNumElements(), as<size_t>(comm->getSize())); TEST_EQUALITY(myDomainMap->getNodeNumElements(), 1); }
int main(int argc, char *argv[]) { using Teuchos::RCP; Teuchos::oblackholestream blackhole; Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole); RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm(); RCP<Teuchos::FancyOStream> out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); out->setOutputToRootOnly(0); #ifndef HAVE_TEUCHOS_LONG_LONG_INT *out << "Warning: scaling test was not compiled with long long int support" << std::endl; #endif /**********************************************************************************/ /* SET TEST PARAMETERS */ /**********************************************************************************/ // Note: use --help to list available options. Teuchos::CommandLineProcessor clp(false); // Default is Laplace1D with nx = 8748. // It's a nice size for 1D and perfect aggregation. (6561=3^8) //Nice size for 1D and perfect aggregation on small numbers of processors. (8748=4*3^7) Galeri::Xpetra::Parameters<GO> matrixParameters(clp, 8748); // manage parameters of the test case Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra // custom parameters std::string aggOrdering = "natural"; int minPerAgg=2; int maxNbrAlreadySelected=0; clp.setOption("aggOrdering",&aggOrdering,"aggregation ordering strategy (natural,random,graph)"); clp.setOption("minPerAgg",&minPerAgg,"minimum #DOFs per aggregate"); clp.setOption("maxNbrSel",&maxNbrAlreadySelected,"maximum # of nbrs allowed to be in other aggregates"); switch (clp.parse(argc,argv)) { case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break; case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } matrixParameters.check(); xpetraParameters.check(); // TODO: check custom parameters if (comm->getRank() == 0) { std::cout << matrixParameters << xpetraParameters << std::endl; // TODO: print custom parameters } /**********************************************************************************/ /* CREATE INITIAL MATRIX */ /**********************************************************************************/ const RCP<const Map> map = MapFactory::Build(xpetraParameters.GetLib(), matrixParameters.GetNumGlobalElements(), 0, comm); Teuchos::RCP<Galeri::Xpetra::Problem<Map,CrsMatrixWrap,MultiVector> > Pr = Galeri::Xpetra::BuildProblem<SC,LO,GO,Map,CrsMatrixWrap,MultiVector>(matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList()); //TODO: Matrix vs. CrsMatrixWrap RCP<Matrix> A = Pr->BuildMatrix(); // return EXIT_SUCCESS; /**********************************************************************************/ /* */ /**********************************************************************************/ Level Finest; Finest.SetLevelID(0); // must be level 0 for NullspaceFactory Finest.Set("A", A); Finest.SetFactoryManager( rcp( new FactoryManager() )); CoupledAggregationFactory CoupledAggFact; Finest.Request(CoupledAggFact); *out << "========================= Aggregate option summary =========================" << std::endl; *out << "min DOFs per aggregate : " << minPerAgg << std::endl; *out << "min # of root nbrs already aggregated : " << maxNbrAlreadySelected << std::endl; CoupledAggFact.SetMinNodesPerAggregate(minPerAgg); //TODO should increase if run anything other than 1D CoupledAggFact.SetMaxNeighAlreadySelected(maxNbrAlreadySelected); std::transform(aggOrdering.begin(), aggOrdering.end(), aggOrdering.begin(), ::tolower); if (aggOrdering == "natural") { *out << "aggregate ordering : NATURAL" << std::endl; CoupledAggFact.SetOrdering(MueLu::AggOptions::NATURAL); } else if (aggOrdering == "random") { *out << "aggregate ordering : RANDOM" << std::endl; CoupledAggFact.SetOrdering(MueLu::AggOptions::RANDOM); } else if (aggOrdering == "graph") { *out << "aggregate ordering : GRAPH" << std::endl; CoupledAggFact.SetOrdering(MueLu::AggOptions::GRAPH); } else { std::string msg = "main: bad aggregation option """ + aggOrdering + """."; throw(MueLu::Exceptions::RuntimeError(msg)); } CoupledAggFact.SetPhase3AggCreation(0.5); *out << "=============================================================================" << std::endl; CoupledAggFact.Build(Finest); return EXIT_SUCCESS; }
// Tests two sweeps of ILUT in Ifpack2 TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Ifpack2Smoother, ILU_TwoSweeps, Scalar, LocalOrdinal, GlobalOrdinal, Node) { # include <MueLu_UseShortNames.hpp> MUELU_TESTING_SET_OSTREAM; MUELU_TESTING_LIMIT_EPETRA_SCOPE(Scalar,GlobalOrdinal,Node); typedef typename Teuchos::ScalarTraits<SC>::magnitudeType magnitude_type; MUELU_TEST_ONLY_FOR(Xpetra::UseTpetra) { //FIXME this will probably fail in parallel b/c it becomes block Jacobi Teuchos::ParameterList paramList; Ifpack2Smoother smoother("ILUT",paramList); //I don't use the testApply infrastructure because it has no provision for an initial guess. Teuchos::RCP<Matrix> A = TestHelpers::TestFactory<SC, LO, GO, NO>::Build1DPoisson(125); Level level; TestHelpers::TestFactory<SC,LO,GO,NO>::createSingleLevelHierarchy(level); level.Set("A", A); smoother.Setup(level); RCP<MultiVector> X = MultiVectorFactory::Build(A->getDomainMap(),1); RCP<MultiVector> RHS = MultiVectorFactory::Build(A->getRangeMap(),1); // Random X X->setSeed(846930886); X->randomize(); // Normalize X Array<magnitude_type> norms(1); X->norm2(norms); X->scale(1/norms[0]); // Compute RHS corresponding to X A->apply(*X,*RHS, Teuchos::NO_TRANS,(SC)1.0,(SC)0.0); // Reset X to 0 X->putScalar((SC) 0.0); RHS->norm2(norms); out << "||RHS|| = " << std::setiosflags(std::ios::fixed) << std::setprecision(10) << norms[0] << std::endl; out << "solve with zero initial guess" << std::endl; Teuchos::Array<magnitude_type> initialNorms(1); X->norm2(initialNorms); out << " ||X_initial|| = " << std::setiosflags(std::ios::fixed) << std::setprecision(10) << initialNorms[0] << std::endl; smoother.Apply(*X, *RHS, true); //zero initial guess Teuchos::Array<magnitude_type> finalNorms(1); X->norm2(finalNorms); Teuchos::Array<magnitude_type> residualNorm1 = Utilities::ResidualNorm(*A, *X, *RHS); out << " ||Residual_final|| = " << std::setiosflags(std::ios::fixed) << std::setprecision(20) << residualNorm1[0] << std::endl; out << " ||X_final|| = " << std::setiosflags(std::ios::fixed) << std::setprecision(10) << finalNorms[0] << std::endl; out << "solve with random initial guess" << std::endl; X->randomize(); X->norm2(initialNorms); out << " ||X_initial|| = " << std::setiosflags(std::ios::fixed) << std::setprecision(10) << initialNorms[0] << std::endl; smoother.Apply(*X, *RHS, false); //nonzero initial guess X->norm2(finalNorms); Teuchos::Array<magnitude_type> residualNorm2 = Utilities::ResidualNorm(*A, *X, *RHS); out << " ||Residual_final|| = " << std::setiosflags(std::ios::fixed) << std::setprecision(20) << residualNorm2[0] << std::endl; out << " ||X_final|| = " << std::setiosflags(std::ios::fixed) << std::setprecision(10) << finalNorms[0] << std::endl; RCP<const Teuchos::Comm<int> > comm = TestHelpers::Parameters::getDefaultComm(); if (comm->getSize() == 1) { //TEST_EQUALITY(residualNorms < 1e-10, true); TEST_EQUALITY(residualNorm1[0] != residualNorm2[0], true); } else { out << "Pass/Fail is only checked in serial." << std::endl; } } } // ILU
TEUCHOS_UNIT_TEST(CoalesceDropFactory, AmalgamationStrided2LW) { # include "MueLu_UseShortNames.hpp" MUELU_TESTING_SET_OSTREAM; MUELU_TESTING_LIMIT_SCOPE(Scalar,GlobalOrdinal,NO); out << "version: " << MueLu::Version() << std::endl; // unit test for block size 3 = (2,1). wrap block 0 // lightweight wrap = true RCP<const Teuchos::Comm<int> > comm = Parameters::getDefaultComm(); Xpetra::UnderlyingLib lib = TestHelpers::Parameters::getLib(); // create strided map information std::vector<size_t> stridingInfo; stridingInfo.push_back(as<size_t>(2)); stridingInfo.push_back(as<size_t>(1)); LocalOrdinal stridedBlockId = 0; int blockSize=3; RCP<const StridedMap> dofMap = Xpetra::StridedMapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build(lib, blockSize*comm->getSize(), 0, stridingInfo, comm, stridedBlockId /*blockId*/, 0 /*offset*/); ///////////////////////////////////////////////////// Teuchos::RCP<Matrix> mtx = TestHelpers::TestFactory<SC,LO,GO,NO>::BuildTridiag(dofMap, 2.0, -1.0, -1.0); Level fineLevel; TestHelpers::TestFactory<SC,LO,GO,NO>::createSingleLevelHierarchy(fineLevel); RCP<const Xpetra::StridedMap<LocalOrdinal, GlobalOrdinal, Node> > stridedRangeMap = Xpetra::StridedMapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build( mtx->getRangeMap(), stridingInfo, stridedBlockId, 0 /*offset*/ ); RCP<const Map> stridedDomainMap = Xpetra::StridedMapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build( mtx->getDomainMap(), stridingInfo, stridedBlockId, 0 /*offset*/ ); if(mtx->IsView("stridedMaps") == true) mtx->RemoveView("stridedMaps"); mtx->CreateView("stridedMaps", stridedRangeMap, stridedDomainMap); fineLevel.Set("A", mtx); CoalesceDropFactory dropFact = CoalesceDropFactory(); dropFact.SetParameter("lightweight wrap",Teuchos::ParameterEntry(true)); fineLevel.Request("Graph", &dropFact); fineLevel.Request("DofsPerNode", &dropFact); dropFact.Build(fineLevel); fineLevel.print(out); RCP<GraphBase> graph = fineLevel.Get<RCP<GraphBase> >("Graph", &dropFact); LO myDofsPerNode = fineLevel.Get<LO>("DofsPerNode", &dropFact); TEST_EQUALITY(as<int>(graph->GetDomainMap()->getGlobalNumElements()) == comm->getSize(), true); TEST_EQUALITY(as<int>(myDofsPerNode) == blockSize, true); bool bCorrectGraph = false; if (comm->getSize() == 1 && graph->getNeighborVertices(0).size() == 1) { bCorrectGraph = true; } else { if (comm->getRank() == 0 || comm->getRank() == comm->getSize()-1) { if (graph->getNeighborVertices(0).size() == 2) bCorrectGraph = true; } else { if (graph->getNeighborVertices(0).size() == blockSize) bCorrectGraph = true; } } TEST_EQUALITY(bCorrectGraph, true); const RCP<const Map> myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! const RCP<const Map> myDomainMap = graph->GetDomainMap(); TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), comm->getSize()-1); TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); TEST_EQUALITY(myImportMap->getMinLocalIndex(),0); TEST_EQUALITY(myImportMap->getGlobalNumElements(),as<size_t>(comm->getSize()+2*(comm->getSize()-1))); if (comm->getSize()>1) { size_t numLocalRowMapElts = graph->GetNodeNumVertices(); size_t numLocalImportElts = myImportMap->getNodeNumElements(); if (comm->getRank() == 0 || comm->getRank() == comm->getSize()-1) { TEST_EQUALITY(as<bool>(numLocalImportElts==numLocalRowMapElts+1), true); } else { TEST_EQUALITY(as<bool>(numLocalImportElts==numLocalRowMapElts+2), true); } } if (comm->getSize()>1) { size_t numLocalRowMapElts = graph->GetNodeNumVertices(); size_t maxLocalIndex = myImportMap->getMaxLocalIndex(); if (comm->getRank() == 0 || comm->getRank() == comm->getSize()-1) { TEST_EQUALITY(as<bool>(maxLocalIndex==numLocalRowMapElts*blockSize-2), true); } else { TEST_EQUALITY(as<bool>(maxLocalIndex==numLocalRowMapElts*blockSize-1), true); } } TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), comm->getSize()-1); TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); TEST_EQUALITY(myDomainMap->getMinLocalIndex(),0); TEST_EQUALITY(myDomainMap->getMaxLocalIndex(),0); TEST_EQUALITY(myDomainMap->getGlobalNumElements(),as<size_t>(comm->getSize())); TEST_EQUALITY(as<bool>(myDomainMap->getNodeNumElements()==1), true); } // AmalgamationStrided2LW
TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicScalarWithoutFiltering, Scalar, LocalOrdinal, GlobalOrdinal, Node) { # include "MueLu_UseShortNames.hpp" MUELU_TESTING_SET_OSTREAM; MUELU_TESTING_LIMIT_SCOPE(Scalar,GlobalOrdinal,NO); out << "version: " << MueLu::Version() << std::endl; RCP<const Teuchos::Comm<int> > comm = Parameters::getDefaultComm(); Level fineLevel; TestHelpers_kokkos::TestFactory<SC,LO,GO,NO>::createSingleLevelHierarchy(fineLevel); RCP<Matrix> A = TestHelpers_kokkos::TestFactory<SC,LO,GO,NO>::Build1DPoisson(36); fineLevel.Set("A", A); CoalesceDropFactory_kokkos dropFact; fineLevel.Request("Graph", &dropFact); fineLevel.Request("DofsPerNode", &dropFact); fineLevel.Request("Filtering", &dropFact); dropFact.Build(fineLevel); auto graph = fineLevel.Get<RCP<LWGraph_kokkos> >("Graph", &dropFact); auto myDofsPerNode = fineLevel.Get<LO> ("DofsPerNode", &dropFact); auto filtering = fineLevel.Get<bool> ("Filtering", &dropFact); TEST_EQUALITY(as<int>(myDofsPerNode) == 1, true); TEST_EQUALITY(filtering, false); bool bCorrectGraph = false; if (comm->getSize() == 1) { auto v0 = graph->getNeighborVertices(0); auto v1 = graph->getNeighborVertices(1); auto v2 = graph->getNeighborVertices(2); if (v0.size() == 2 && ((v0(0) == 0 && v0(1) == 1) || (v0(0) == 1 && v0(1) == 0)) && v1.size() == 3 && v2.size() == 3) bCorrectGraph = true; } else { if (comm->getRank() == 0 ) { if (graph->getNeighborVertices(0).size() == 2) bCorrectGraph = true; } else { if (graph->getNeighborVertices(0).size() == 3) bCorrectGraph = true; } } TEST_EQUALITY(bCorrectGraph, true); auto myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! auto myDomainMap = graph->GetDomainMap(); TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); TEST_EQUALITY(myImportMap->getGlobalNumElements(), as<size_t>(36 + (comm->getSize()-1)*2)); TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); }
TEUCHOS_UNIT_TEST(CoalesceDropFactory, AmalgamationStridedOffsetDropping2LW) { // unit test for block size 9 = (2,3,4). wrap block 1. // drop small entries // lightweight wrap = true out << "version: " << MueLu::Version() << std::endl; RCP<const Teuchos::Comm<int> > comm = Parameters::getDefaultComm(); Xpetra::UnderlyingLib lib = TestHelpers::Parameters::getLib(); // create strided map information std::vector<size_t> stridingInfo; stridingInfo.push_back(as<size_t>(2)); stridingInfo.push_back(as<size_t>(3)); stridingInfo.push_back(as<size_t>(4)); LocalOrdinal stridedBlockId = 1; GlobalOrdinal offset = 19; RCP<const StridedMap> dofMap = Xpetra::StridedMapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build(lib, 9*comm->getSize(), 0, stridingInfo, comm, stridedBlockId, offset); ///////////////////////////////////////////////////// Teuchos::RCP<Matrix> mtx = TestHelpers::TestFactory<SC,LO,GO,NO>::BuildTridiag(dofMap, 2.0, 1.0, 0.0001); Level fineLevel; TestHelpers::TestFactory<SC,LO,GO,NO>::createSingleLevelHierarchy(fineLevel); RCP<const Map> stridedRangeMap = Xpetra::StridedMapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build( mtx->getRangeMap(), stridingInfo, stridedBlockId, offset ); RCP<const Map> stridedDomainMap = Xpetra::StridedMapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build( mtx->getDomainMap(), stridingInfo, stridedBlockId, offset ); if(mtx->IsView("stridedMaps") == true) mtx->RemoveView("stridedMaps"); mtx->CreateView("stridedMaps", stridedRangeMap, stridedDomainMap); fineLevel.Set("A", mtx); CoalesceDropFactory dropFact = CoalesceDropFactory(); dropFact.SetParameter("lightweight wrap",Teuchos::ParameterEntry(true)); dropFact.SetParameter("aggregation: drop tol",Teuchos::ParameterEntry(0.3)); fineLevel.Request("Graph", &dropFact); fineLevel.Request("DofsPerNode", &dropFact); dropFact.Build(fineLevel); fineLevel.print(out); RCP<GraphBase> graph = fineLevel.Get<RCP<GraphBase> >("Graph", &dropFact); LO myDofsPerNode = fineLevel.Get<LO>("DofsPerNode", &dropFact); TEST_EQUALITY(as<int>(graph->GetDomainMap()->getGlobalNumElements()) == comm->getSize(), true); TEST_EQUALITY(as<int>(myDofsPerNode) == 9, true); bool bCorrectGraph = false; if (comm->getSize() == 1 && graph->getNeighborVertices(0).size() == 1) { bCorrectGraph = true; } else { if (comm->getRank() == 0) { if (graph->getNeighborVertices(0).size() == 1) bCorrectGraph = true; } else { if (graph->getNeighborVertices(0).size() == 2) bCorrectGraph = true; } } TEST_EQUALITY(bCorrectGraph, true); const RCP<const Map> myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! const RCP<const Map> myDomainMap = graph->GetDomainMap(); TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), comm->getSize()-1); TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); TEST_EQUALITY(myImportMap->getMinLocalIndex(),0); TEST_EQUALITY(myImportMap->getGlobalNumElements(),as<size_t>(comm->getSize()+2*(comm->getSize()-1))); if (comm->getSize()>1) { size_t numLocalRowMapElts = graph->GetNodeNumVertices(); size_t numLocalImportElts = myImportMap->getNodeNumElements(); if (comm->getRank() == 0 || comm->getRank() == comm->getSize()-1) { TEST_EQUALITY(as<bool>(numLocalImportElts==numLocalRowMapElts+1), true); } else { TEST_EQUALITY(as<bool>(numLocalImportElts==numLocalRowMapElts+2), true); } } TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), comm->getSize()-1); TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); TEST_EQUALITY(myDomainMap->getMinLocalIndex(),0); TEST_EQUALITY(myDomainMap->getGlobalNumElements(),as<size_t>(comm->getSize())); TEST_EQUALITY(as<bool>(myDomainMap->getNodeNumElements()==1), true); } // AmalgamationStridedOffsetDropping2LW
void RebalanceTransferFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level& fineLevel, Level& coarseLevel) const { FactoryMonitor m(*this, "Build", coarseLevel); const ParameterList& pL = GetParameterList(); int implicit = !pL.get<bool>("repartition: rebalance P and R"); int writeStart = pL.get<int> ("write start"); int writeEnd = pL.get<int> ("write end"); if (writeStart == 0 && fineLevel.GetLevelID() == 0 && writeStart <= writeEnd && IsAvailable(fineLevel, "Coordinates")) { std::string fileName = "coordinates_level_0.m"; RCP<MultiVector> fineCoords = fineLevel.Get< RCP<MultiVector> >("Coordinates"); if (fineCoords != Teuchos::null) Utils::Write(fileName, *fineCoords); } RCP<const Import> importer = Get<RCP<const Import> >(coarseLevel, "Importer"); if (implicit) { // Save the importer, we'll need it for solve coarseLevel.Set("Importer", importer, NoFactory::get()); } RCP<ParameterList> params = rcp(new ParameterList());; params->set("printLoadBalancingInfo", true); params->set("printCommInfo", true); std::string transferType = pL.get<std::string>("type"); if (transferType == "Interpolation") { RCP<Matrix> originalP = Get< RCP<Matrix> >(coarseLevel, "P"); { // This line must be after the Get call SubFactoryMonitor m1(*this, "Rebalancing prolongator", coarseLevel); if (implicit || importer.is_null()) { GetOStream(Runtime0) << "Using original prolongator" << std::endl; Set(coarseLevel, "P", originalP); } else { // P is the transfer operator from the coarse grid to the fine grid. // P must transfer the data from the newly reordered coarse A to the // (unchanged) fine A. This means that the domain map (coarse) of P // must be changed according to the new partition. The range map // (fine) is kept unchanged. // // The domain map of P must match the range map of R. See also note // below about domain/range map of R and its implications for P. // // To change the domain map of P, P needs to be fillCompleted again // with the new domain map. To achieve this, P is copied into a new // matrix that is not fill-completed. The doImport() operation is // just used here to make a copy of P: the importer is trivial and // there is no data movement involved. The reordering actually // happens during the fillComplete() with domainMap == importer->getTargetMap(). RCP<Matrix> rebalancedP = originalP; RCP<const CrsMatrixWrap> crsOp = rcp_dynamic_cast<const CrsMatrixWrap>(originalP); TEUCHOS_TEST_FOR_EXCEPTION(crsOp == Teuchos::null, Exceptions::BadCast, "Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed"); RCP<CrsMatrix> rebalancedP2 = crsOp->getCrsMatrix(); TEUCHOS_TEST_FOR_EXCEPTION(rebalancedP2 == Teuchos::null, std::runtime_error, "Xpetra::CrsMatrixWrap doesn't have a CrsMatrix"); { SubFactoryMonitor subM(*this, "Rebalancing prolongator -- fast map replacement", coarseLevel); RCP<const Import> newImporter = ImportFactory::Build(importer->getTargetMap(), rebalancedP->getColMap()); rebalancedP2->replaceDomainMapAndImporter(importer->getTargetMap(), newImporter); } ///////////////////////// EXPERIMENTAL // TODO FIXME somehow we have to transfer the striding information of the permuted domain/range maps. // That is probably something for an external permutation factory // if (originalP->IsView("stridedMaps")) // rebalancedP->CreateView("stridedMaps", originalP); ///////////////////////// EXPERIMENTAL Set(coarseLevel, "P", rebalancedP); if (IsPrint(Statistics1)) GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*rebalancedP, "P (rebalanced)", params); } } if (importer.is_null()) { if (IsAvailable(coarseLevel, "Nullspace")) Set(coarseLevel, "Nullspace", Get<RCP<MultiVector> >(coarseLevel, "Nullspace")); if (pL.isParameter("Coordinates") && pL.get< RCP<const FactoryBase> >("Coordinates") != Teuchos::null) if (IsAvailable(coarseLevel, "Coordinates")) Set(coarseLevel, "Coordinates", Get< RCP<MultiVector> >(coarseLevel, "Coordinates")); return; } if (pL.isParameter("Coordinates") && pL.get< RCP<const FactoryBase> >("Coordinates") != Teuchos::null && IsAvailable(coarseLevel, "Coordinates")) { RCP<MultiVector> coords = Get<RCP<MultiVector> >(coarseLevel, "Coordinates"); // This line must be after the Get call SubFactoryMonitor subM(*this, "Rebalancing coordinates", coarseLevel); LO nodeNumElts = coords->getMap()->getNodeNumElements(); // If a process has no matrix rows, then we can't calculate blocksize using the formula below. LO myBlkSize = 0, blkSize = 0; if (nodeNumElts > 0) myBlkSize = importer->getSourceMap()->getNodeNumElements() / nodeNumElts; maxAll(coords->getMap()->getComm(), myBlkSize, blkSize); RCP<const Import> coordImporter; if (blkSize == 1) { coordImporter = importer; } else { // NOTE: there is an implicit assumption here: we assume that dof any node are enumerated consequently // Proper fix would require using decomposition similar to how we construct importer in the // RepartitionFactory RCP<const Map> origMap = coords->getMap(); GO indexBase = origMap->getIndexBase(); ArrayView<const GO> OEntries = importer->getTargetMap()->getNodeElementList(); LO numEntries = OEntries.size()/blkSize; ArrayRCP<GO> Entries(numEntries); for (LO i = 0; i < numEntries; i++) Entries[i] = (OEntries[i*blkSize]-indexBase)/blkSize + indexBase; RCP<const Map> targetMap = MapFactory::Build(origMap->lib(), origMap->getGlobalNumElements(), Entries(), indexBase, origMap->getComm()); coordImporter = ImportFactory::Build(origMap, targetMap); } RCP<MultiVector> permutedCoords = MultiVectorFactory::Build(coordImporter->getTargetMap(), coords->getNumVectors()); permutedCoords->doImport(*coords, *coordImporter, Xpetra::INSERT); if (pL.get<bool>("useSubcomm") == true) permutedCoords->replaceMap(permutedCoords->getMap()->removeEmptyProcesses()); Set(coarseLevel, "Coordinates", permutedCoords); std::string fileName = "rebalanced_coordinates_level_" + toString(coarseLevel.GetLevelID()) + ".m"; if (writeStart <= coarseLevel.GetLevelID() && coarseLevel.GetLevelID() <= writeEnd && permutedCoords->getMap() != Teuchos::null) Utils::Write(fileName, *permutedCoords); } if (IsAvailable(coarseLevel, "Nullspace")) { RCP<MultiVector> nullspace = Get< RCP<MultiVector> >(coarseLevel, "Nullspace"); // This line must be after the Get call SubFactoryMonitor subM(*this, "Rebalancing nullspace", coarseLevel); RCP<MultiVector> permutedNullspace = MultiVectorFactory::Build(importer->getTargetMap(), nullspace->getNumVectors()); permutedNullspace->doImport(*nullspace, *importer, Xpetra::INSERT); if (pL.get<bool>("useSubcomm") == true) permutedNullspace->replaceMap(permutedNullspace->getMap()->removeEmptyProcesses()); Set(coarseLevel, "Nullspace", permutedNullspace); } } else { if (pL.get<bool>("transpose: use implicit") == false) { RCP<Matrix> originalR = Get< RCP<Matrix> >(coarseLevel, "R"); SubFactoryMonitor m2(*this, "Rebalancing restriction", coarseLevel); if (implicit || importer.is_null()) { GetOStream(Runtime0) << "Using original restrictor" << std::endl; Set(coarseLevel, "R", originalR); } else { RCP<Matrix> rebalancedR; { SubFactoryMonitor subM(*this, "Rebalancing restriction -- fusedImport", coarseLevel); RCP<Map> dummy; // meaning: use originalR's domain map. rebalancedR = MatrixFactory::Build(originalR, *importer, dummy, importer->getTargetMap()); } Set(coarseLevel, "R", rebalancedR); ///////////////////////// EXPERIMENTAL // TODO FIXME somehow we have to transfer the striding information of the permuted domain/range maps. // That is probably something for an external permutation factory // if (originalR->IsView("stridedMaps")) // rebalancedR->CreateView("stridedMaps", originalR); ///////////////////////// EXPERIMENTAL if (IsPrint(Statistics1)) GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*rebalancedR, "R (rebalanced)", params); } } } }
int main(int argc, char *argv[]) { using Teuchos::RCP; // reference count pointers using Teuchos::rcp; using Teuchos::TimeMonitor; // // MPI initialization using Teuchos // Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL); RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm(); // // Parameters // Teuchos::CommandLineProcessor clp(false); // Note: GO nx,ny,nz; nx=500; ny=500; nz=100; Galeri::Xpetra::Parameters<GO> matrixParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra std::string xmlFileName = "scalingTest.xml"; clp.setOption("xml", &xmlFileName, "read parameters from a file. Otherwise, this example uses by default 'scalingTest.xml'"); int amgAsPrecond=1; clp.setOption("precond",&amgAsPrecond,"apply multigrid as preconditioner"); int amgAsSolver=0; clp.setOption("fixPoint",&amgAsSolver,"apply multigrid as solver"); bool printTimings=true; clp.setOption("timings","notimings",&printTimings,"print timings to screen"); switch (clp.parse(argc,argv)) { case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break; case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } if (comm->getRank() == 0) { std::cout << "========================================================" << std::endl << xpetraParameters << matrixParameters; } RCP<TimeMonitor> globalTimeMonitor = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: S - Global Time"))); // read aggregation options from file Teuchos::FileInputSource fileSrc(xmlFileName); Teuchos::XMLObject fileXML = fileSrc.getObject(); Teuchos::XMLParameterListReader listReader; Teuchos::ParameterList aggList = listReader.toParameterList(fileXML); //std::cout << "===========aggList start===========" << std::endl; //aggList.print(std::cout); //std::cout << "===========aggList end===========" << std::endl; // instantiate aggregate factory, set options from parameter list RCP<MueLu::SingleLevelFactoryBase> aggFact; if (aggList.name() == "UncoupledAggregationFactory") { RCP<UncoupledAggregationFactory> ucFact = rcp( new UncoupledAggregationFactory() ); //ucFact->SetParameterList(aggList); //FIXME hack until UCAgg uses PL interface std::string ordering = aggList.get<std::string>("Ordering"); MueLu::AggOptions::Ordering eordering; if (ordering=="Natural") eordering = MueLu::AggOptions::NATURAL; if (ordering=="Graph") eordering = MueLu::AggOptions::GRAPH; if (ordering=="Random") eordering = MueLu::AggOptions::RANDOM; ucFact->SetOrdering(eordering); ucFact->SetMaxNeighAlreadySelected(aggList.get<int>("MaxNeighAlreadySelected")); ucFact->SetMinNodesPerAggregate(aggList.get<int>("MinNodesPerAggregate")); aggFact = ucFact; } else if (aggList.name() == "CoupledAggregationFactory") { RCP<CoupledAggregationFactory> cFact = rcp( new CoupledAggregationFactory() ); //cFact->SetParameterList(aggList); //FIXME hack until CoupledAgg uses PL interface //cFact->SetOrdering(aggList.get<std::string>("Ordering")); cFact->SetMaxNeighAlreadySelected(aggList.get<int>("MaxNeighAlreadySelected")); cFact->SetMinNodesPerAggregate(aggList.get<int>("MinNodesPerAggregate")); aggFact = cFact; } else { throw(MueLu::Exceptions::RuntimeError("List's name does not correspond to a known aggregation factory.")); } //Teuchos::ParameterList tlist = aggFact->GetParameterList(); //std::cout << "===========verify List start===========" << std::endl; //tlist.print(std::cout); //std::cout << "===========verify List end===========" << std::endl; // build matrix RCP<TimeMonitor> tm = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 1 - Matrix Build"))); RCP<const Map> map; RCP<MultiVector> coordinates; // Retrieve matrix parameters (they may have been changed on the command line), and pass them to Galeri. // Galeri will attempt to create a square-as-possible distribution of subdomains di, e.g., // d1 d2 d3 // d4 d5 d6 // d7 d8 d9 // d10 d11 d12 // A perfect distribution is only possible when the #processors is a perfect square. // This *will* result in "strip" distribution if the #processors is a prime number or if the factors are very different in // size. For example, np=14 will give a 7-by-2 distribution. // If you don't want Galeri to do this, specify mx or my on the galeriList. Teuchos::ParameterList pl = matrixParameters.GetParameterList(); Teuchos::ParameterList galeriList; galeriList.set("nx", pl.get("nx",nx)); galeriList.set("ny", pl.get("ny",ny)); //galeriList.set("mx", comm->getSize()); //galeriList.set("my", 1); if (matrixParameters.GetMatrixType() == "Laplace1D") { map = MapFactory::Build(xpetraParameters.GetLib(), matrixParameters.GetNumGlobalElements(), 0, comm); coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("1D",map,matrixParameters.GetParameterList()); } else if (matrixParameters.GetMatrixType() == "Laplace2D" || matrixParameters.GetMatrixType() == "Star2D") { map = Galeri::Xpetra::CreateMap<LO, GO, Node>(xpetraParameters.GetLib(), "Cartesian2D", comm, galeriList); coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("2D",map,matrixParameters.GetParameterList()); } else if (matrixParameters.GetMatrixType() == "Laplace3D") { coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("3D",map,matrixParameters.GetParameterList()); //map = Galeri::Xpetra::CreateMap<LO, GO, Node>(xpetraParameters.GetLib(), "Cartesian3D", comm, galeriList); //TODO when available in Galeri map = MapFactory::Build(xpetraParameters.GetLib(), matrixParameters.GetNumGlobalElements(), 0, comm); } if (comm->getRank() == 0) { GO mx = galeriList.get("mx", -1); GO my = galeriList.get("my", -1); std::cout << "Processor subdomains in x direction: " << mx << std::endl << "Processor subdomains in y direction: " << my << std::endl << "========================================================" << std::endl; } RCP<Galeri::Xpetra::Problem<Map,CrsMatrixWrap,MultiVector> > Pr = Galeri::Xpetra::BuildProblem<SC,LO,GO,Map,CrsMatrixWrap,MultiVector>(matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList()); RCP<Matrix> A = Pr->BuildMatrix(); tm = Teuchos::null; Level level; RCP<MueLu::FactoryManagerBase> factoryHandler = rcp(new FactoryManager()); level.SetFactoryManager(factoryHandler); level.SetLevelID(0); level.Set("A", A); level.Request("Aggregates", aggFact.get()); level.Request(*aggFact); level.setVerbLevel(Teuchos::VERB_NONE); aggFact->setVerbLevel(Teuchos::VERB_NONE); tm = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("aggregation time"))); aggFact->Build(level); tm = Teuchos::null; globalTimeMonitor = Teuchos::null; if (printTimings) TimeMonitor::summarize(); } //main