void DropNegativeEntriesFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& currentLevel) const { FactoryMonitor m(*this, "Matrix filtering (springs)", currentLevel); RCP<Matrix> Ain = Get< RCP<Matrix> >(currentLevel, "A"); LocalOrdinal nDofsPerNode = Ain->GetFixedBlockSize(); // create new empty Operator Teuchos::RCP<Matrix> Aout = MatrixFactory::Build(Ain->getRowMap(), Ain->getGlobalMaxNumRowEntries(), Xpetra::StaticProfile); size_t numLocalRows = Ain->getNodeNumRows(); for(size_t row=0; row<numLocalRows; row++) { GlobalOrdinal grid = Ain->getRowMap()->getGlobalElement(row); int rDofID = Teuchos::as<int>(grid % nDofsPerNode); // extract row information from input matrix Teuchos::ArrayView<const LocalOrdinal> indices; Teuchos::ArrayView<const Scalar> vals; Ain->getLocalRowView(row, indices, vals); // just copy all values in output Teuchos::ArrayRCP<GlobalOrdinal> indout(indices.size(),Teuchos::ScalarTraits<GlobalOrdinal>::zero()); Teuchos::ArrayRCP<Scalar> valout(indices.size(),Teuchos::ScalarTraits<Scalar>::zero()); size_t nNonzeros = 0; for(size_t i=0; i<(size_t)indices.size(); i++) { GlobalOrdinal gcid = Ain->getColMap()->getGlobalElement(indices[i]); // global column id int cDofID = Teuchos::as<int>(gcid % nDofsPerNode); if(rDofID == cDofID && Teuchos::ScalarTraits<Scalar>::magnitude(vals[i]) >= Teuchos::ScalarTraits<Scalar>::magnitude(Teuchos::ScalarTraits<Scalar>::zero())) { indout [nNonzeros] = gcid; valout [nNonzeros] = vals[i]; nNonzeros++; } } indout.resize(nNonzeros); valout.resize(nNonzeros); Aout->insertGlobalValues(Ain->getRowMap()->getGlobalElement(row), indout.view(0,indout.size()), valout.view(0,valout.size())); } Aout->fillComplete(Ain->getDomainMap(), Ain->getRangeMap()); // copy block size information Aout->SetFixedBlockSize(nDofsPerNode); GetOStream(Statistics0, 0) << "Nonzeros in A (input): " << Ain->getGlobalNumEntries() << ", Nonzeros after filtering A: " << Aout->getGlobalNumEntries() << std::endl; Set(currentLevel, "A", Aout); }
void UserPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::BuildP(Level& fineLevel, Level& coarseLevel) const { FactoryMonitor m(*this, "Build", coarseLevel); RCP<Matrix> A = Get< RCP<Matrix> > (fineLevel, "A"); RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace"); TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() != 1, Exceptions::RuntimeError, "Block size > 1 has not been implemented"); const Teuchos::ParameterList& pL = GetParameterList(); std::string mapFile = pL.get<std::string>("mapFileName"); RCP<const Map> rowMap = A->getRowMap(); RCP<const Map> coarseMap = Utils2::ReadMap(mapFile, rowMap->lib(), rowMap->getComm()); Set(coarseLevel, "CoarseMap", coarseMap); std::string matrixFile = pL.get<std::string>("matrixFileName"); RCP<Matrix> P = Utils::Read(matrixFile, rowMap, coarseMap, coarseMap, rowMap); #if 1 Set(coarseLevel, "P", P); #else // Expand column map by 1 RCP<Matrix> P1 = Utils::Multiply(*A, false, *P, false); P = Utils::Read(matrixFile, rowMap, P1->getColMap(), coarseMap, rowMap); Set(coarseLevel, "P", P); #endif RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors()); P->apply(*fineNullspace, *coarseNullspace, Teuchos::TRANS, Teuchos::ScalarTraits<SC>::one(), Teuchos::ScalarTraits<SC>::zero()); Set(coarseLevel, "Nullspace", coarseNullspace); // Coordinates transfer size_t n = Teuchos::as<size_t>(sqrt(coarseMap->getGlobalNumElements())); TEUCHOS_TEST_FOR_EXCEPTION(n*n != coarseMap->getGlobalNumElements(), Exceptions::RuntimeError, "Unfortunately, this is not the case, don't know what to do"); RCP<MultiVector> coarseCoords = MultiVectorFactory::Build(coarseMap, 2); ArrayRCP<Scalar> x = coarseCoords->getDataNonConst(0), y = coarseCoords->getDataNonConst(1); for (size_t LID = 0; LID < coarseMap->getNodeNumElements(); ++LID) { GlobalOrdinal GID = coarseMap->getGlobalElement(LID) - coarseMap->getIndexBase(); GlobalOrdinal i = GID % n, j = GID/n; x[LID] = i; y[LID] = j; } Set(coarseLevel, "Coordinates", coarseCoords); if (IsPrint(Statistics1)) { RCP<ParameterList> params = rcp(new ParameterList()); params->set("printLoadBalancingInfo", true); GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*P, "P", params); } }
void RebalanceAcFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &fineLevel, Level &coarseLevel) const { FactoryMonitor m(*this, "Computing Ac", coarseLevel); RCP<Matrix> originalAc = Get< RCP<Matrix> >(coarseLevel, "A"); RCP<const Import> rebalanceImporter = Get< RCP<const Import> >(coarseLevel, "Importer"); if (rebalanceImporter != Teuchos::null) { RCP<Matrix> rebalancedAc; { SubFactoryMonitor subM(*this, "Rebalancing existing Ac", coarseLevel); RCP<const Map> targetMap = rebalanceImporter->getTargetMap(); const ParameterList & pL = GetParameterList(); ParameterList XpetraList; if (pL.get<bool>("useSubcomm") == true) { GetOStream(Runtime0,0) << "Replacing maps with a subcommunicator" << std::endl; XpetraList.set("Restrict Communicator",true); } // NOTE: If the communicator is restricted away, Build returns Teuchos::null. rebalancedAc = MatrixFactory::Build(originalAc, *rebalanceImporter, targetMap, targetMap, rcp(&XpetraList,false)); if (!rebalancedAc.is_null()) rebalancedAc->SetFixedBlockSize(originalAc->GetFixedBlockSize()); Set(coarseLevel, "A", rebalancedAc); } if (!rebalancedAc.is_null()) { RCP<ParameterList> params = rcp(new ParameterList()); params->set("printLoadBalancingInfo", true); GetOStream(Statistics0, 0) << Utils::PrintMatrixInfo(*rebalancedAc, "Ac (rebalanced)", params); } } else { // Ac already built by the load balancing process and no load balancing needed GetOStream(Warnings0, 0) << "No rebalancing" << std::endl; GetOStream(Warnings0, 0) << "Jamming A into Level " << coarseLevel.GetLevelID() << " w/ generating factory " << this << std::endl; Set(coarseLevel, "A", originalAc); } } //Build()
void FilteredAFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level& currentLevel) const { FactoryMonitor m(*this, "Matrix filtering", currentLevel); RCP<Matrix> A = Get< RCP<Matrix> >(currentLevel, "A"); if (currentLevel.Get<bool>("Filtering", currentLevel.GetFactoryManager()->GetFactory("Filtering").get()) == false) { GetOStream(Runtime0) << "Filtered matrix is not being constructed as no filtering is being done" << std::endl; Set(currentLevel, "A", A); return; } size_t blkSize = A->GetFixedBlockSize(); const ParameterList& pL = GetParameterList(); bool lumping = pL.get<bool>("lumping"); if (lumping) GetOStream(Runtime0) << "Lumping dropped entries" << std::endl; RCP<GraphBase> G = Get< RCP<GraphBase> >(currentLevel, "Graph"); SC zero = Teuchos::ScalarTraits<SC>::zero(); // Both Epetra and Tpetra matrix-matrix multiply use the following trick: // if an entry of the left matrix is zero, it does not compute or store the // zero value. // // This trick allows us to bypass constructing a new matrix. Instead, we // make a deep copy of the original one, and fill it in with zeros, which // are ignored during the prolongator smoothing. RCP<Matrix> filteredA = MatrixFactory::Build(A->getCrsGraph()); filteredA->resumeFill(); ArrayView<const LO> inds; ArrayView<const SC> valsA; #ifdef ASSUME_DIRECT_ACCESS_TO_ROW ArrayView<SC> vals; #else Array<SC> vals; #endif Array<char> filter(blkSize * G->GetImportMap()->getNodeNumElements(), 0); size_t numGRows = G->GetNodeNumVertices(); for (size_t i = 0; i < numGRows; i++) { // Set up filtering array ArrayView<const LO> indsG = G->getNeighborVertices(i); for (size_t j = 0; j < as<size_t>(indsG.size()); j++) for (size_t k = 0; k < blkSize; k++) filter[indsG[j]*blkSize+k] = 1; for (size_t k = 0; k < blkSize; k++) { LO row = i*blkSize + k; A->getLocalRowView(row, inds, valsA); size_t nnz = inds.size(); if (nnz == 0) continue; #ifdef ASSUME_DIRECT_ACCESS_TO_ROW // Transform ArrayView<const SC> into ArrayView<SC> ArrayView<const SC> vals1; filteredA->getLocalRowView(row, inds, vals1); vals = ArrayView<SC>(const_cast<SC*>(vals1.getRawPtr()), nnz); memcpy(vals.getRawPtr(), valsA.getRawPtr(), nnz*sizeof(SC)); #else vals = Array<SC>(valsA); #endif if (lumping == false) { for (size_t j = 0; j < nnz; j++) if (!filter[inds[j]]) vals[j] = zero; } else { LO diagIndex = -1; SC diagExtra = zero; for (size_t j = 0; j < nnz; j++) { if (filter[inds[j]]) continue; if (inds[j] == row) { // Remember diagonal position diagIndex = j; } else { diagExtra += vals[j]; } vals[j] = zero; } // Lump dropped entries // NOTE // * Does it make sense to lump for elasticity? // * Is it different for diffusion and elasticity? if (diagIndex != -1) vals[diagIndex] += diagExtra; } #ifndef ASSUME_DIRECT_ACCESS_TO_ROW // Because we used a column map in the construction of the matrix // we can just use insertLocalValues here instead of insertGlobalValues filteredA->replaceLocalValues(row, inds, vals); #endif } // Reset filtering array for (size_t j = 0; j < as<size_t> (indsG.size()); j++) for (size_t k = 0; k < blkSize; k++) filter[indsG[j]*blkSize+k] = 0; } RCP<ParameterList> fillCompleteParams(new ParameterList); fillCompleteParams->set("No Nonlocal Changes", true); filteredA->fillComplete(fillCompleteParams); filteredA->SetFixedBlockSize(blkSize); if (pL.get<bool>("filtered matrix: reuse eigenvalue")) { // Reuse max eigenvalue from A // It is unclear what eigenvalue is the best for the smoothing, but we already may have // the D^{-1}A estimate in A, may as well use it. // NOTE: ML does that too filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate()); } Set(currentLevel, "A", filteredA); }
int main(int argc, char *argv[]) { #include <MueLu_UseShortNames.hpp> using Teuchos::RCP; // reference count pointers using Teuchos::rcp; using Teuchos::TimeMonitor; // ========================================================================= // MPI initialization using Teuchos // ========================================================================= Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL); bool success = false; bool verbose = true; try { RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm(); // ========================================================================= // Convenient definitions // ========================================================================= SC zero = Teuchos::ScalarTraits<SC>::zero(), one = Teuchos::ScalarTraits<SC>::one(); // Instead of checking each time for rank, create a rank 0 stream RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); Teuchos::FancyOStream& fancyout = *fancy; fancyout.setOutputToRootOnly(0); // ========================================================================= // Parameters initialization // ========================================================================= Teuchos::CommandLineProcessor clp(false); //GO nx = 100, ny = 100, nz = 100; //Galeri::Xpetra::Parameters<GO> matrixParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra std::string xmlFileName = "driver.xml"; clp.setOption("xml", &xmlFileName, "read parameters from a file. Otherwise, this example uses by default 'scalingTest.xml'"); int amgAsPrecond = 1; clp.setOption("precond", &amgAsPrecond, "apply multigrid as preconditioner"); int amgAsSolver = 0; clp.setOption("fixPoint", &amgAsSolver, "apply multigrid as solver"); bool printTimings = true; clp.setOption("timings", "notimings", &printTimings, "print timings to screen"); int writeMatricesOPT = -2; clp.setOption("write", &writeMatricesOPT, "write matrices to file (-1 means all; i>=0 means level i)"); double tol = 1e-6; clp.setOption("tol", &tol, "solver convergence tolerance"); std::string krylovMethod = "gmres"; clp.setOption("krylov", &krylovMethod, "outer Krylov method"); int maxIts = 100; clp.setOption("maxits", &maxIts, "maximum number of Krylov iterations"); int output = 1; clp.setOption("output", &output, "how often to print Krylov residual history"); std::string matrixFileName = "crada1/crada_A.mm"; clp.setOption("matrixfile", &matrixFileName, "matrix market file containing matrix"); std::string rhsFileName = "crada1/crada_b.mm"; clp.setOption("rhsfile", &rhsFileName, "matrix market file containing right-hand side"); std::string nspFileName = "crada1/crada_ns.mm"; clp.setOption("nspfile", &nspFileName, "matrix market file containing fine level null space"); std::string cooFileName = "crada1/crada_coordinates.mm"; clp.setOption("coordinatesfile",&cooFileName, "matrix market file containing fine level coordinates"); std::string spcFileName = "crada1/crada_special.mm"; clp.setOption("specialfile", &spcFileName, "matrix market file containing fine level special dofs"); int nPDE = 3; clp.setOption("numpdes", &nPDE, "number of PDE equations"); int nNspVectors = 3; clp.setOption("numnsp", &nNspVectors, "number of nullspace vectors. Only used if null space is read from file. Must be smaller or equal than the number of null space vectors read in from file."); std::string convType = "r0"; clp.setOption("convtype", &convType, "convergence type (r0 or none)"); switch (clp.parse(argc,argv)) { case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break; case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } // ========================================================================= // Problem construction // ========================================================================= RCP<TimeMonitor> globalTimeMonitor = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("MatrixRead: S - Global Time"))), tm; comm->barrier(); tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 1 - Matrix Build"))); RCP<Matrix> A = Teuchos::null; if (matrixFileName != "") { fancyout << "Read matrix from file " << matrixFileName << std::endl; A = Xpetra::IO<SC,LO,GO,Node>::Read(std::string(matrixFileName), xpetraParameters.GetLib(), comm); } RCP<const Map> map = A->getRowMap(); RCP<MultiVector> nullspace = MultiVectorFactory::Build(A->getDomainMap(),nPDE); A->SetFixedBlockSize(nPDE); fancyout << "#pdes = " << A->GetFixedBlockSize() << std::endl; if (nspFileName != "") { fancyout << "Read null space from file " << nspFileName << std::endl; nullspace = Xpetra::IO<SC,LO,GO,Node>::ReadMultiVector(std::string(nspFileName), A->getRowMap()); fancyout << "Found " << nullspace->getNumVectors() << " null space vectors" << std::endl; if (nNspVectors > Teuchos::as<int>(nullspace->getNumVectors())) { fancyout << "Set number of null space vectors from " << nNspVectors << " to " << nullspace->getNumVectors() << " as only " << nullspace->getNumVectors() << " are provided by " << nspFileName << std::endl; nNspVectors = nullspace->getNumVectors(); } if (nNspVectors < 1) { fancyout << "Set number of null space vectors from " << nNspVectors << " to " << nullspace->getNumVectors() << ". Note: we need at least one null space vector!!!" << std::endl; nNspVectors = nullspace->getNumVectors(); } if (nNspVectors < Teuchos::as<int>(nullspace->getNumVectors())) { RCP<MultiVector> temp = MultiVectorFactory::Build(A->getDomainMap(),nNspVectors); for(int j=0; j<nNspVectors; j++) { Teuchos::ArrayRCP<SC> tempData = temp->getDataNonConst(j); Teuchos::ArrayRCP<const SC> nsData = nullspace->getData(j); for (int i=0; i<nsData.size(); ++i) { tempData[i] = nsData[i]; } } nullspace = Teuchos::null; nullspace = temp; } } else { if (nPDE == 1) nullspace->putScalar( Teuchos::ScalarTraits<SC>::one() ); else { for (int i=0; i<nPDE; ++i) { Teuchos::ArrayRCP<SC> nsData = nullspace->getDataNonConst(i); for (int j=0; j<nsData.size(); ++j) { GO gel = A->getDomainMap()->getGlobalElement(j) - A->getDomainMap()->getIndexBase(); if ((gel-i) % nPDE == 0) nsData[j] = Teuchos::ScalarTraits<SC>::one(); } } } } RCP<MultiVector> coordinates = Teuchos::null; //MultiVectorFactory::Build(A->getDomainMap(),1); if (cooFileName != "") { std::vector<GO> myGIDs (map->getNodeNumElements() / A->GetFixedBlockSize()); // reconstruct map for coordinates for(LO r = 0; r < Teuchos::as<LO>(map->getNodeNumElements() / A->GetFixedBlockSize()); ++r) { GO gid = map->getGlobalElement(r * A->GetFixedBlockSize()); myGIDs[r] = gid; } GO gCntGIDs = 0; GO glCntGIDs = Teuchos::as<GlobalOrdinal>(myGIDs.size()); MueLu_sumAll(comm,glCntGIDs,gCntGIDs); Teuchos::Array<GlobalOrdinal> eltList(myGIDs); RCP<const Map> myCoordMap = MapFactory::Build (xpetraParameters.GetLib(),gCntGIDs,eltList(),0,comm); fancyout << "Read fine level coordinates from file " << cooFileName << std::endl; coordinates = Xpetra::IO<SC,LO,GO,Node>::ReadMultiVector(std::string(cooFileName), myCoordMap); fancyout << "Found " << nullspace->getNumVectors() << " null space vectors of length " << myCoordMap->getGlobalNumElements() << std::endl; } RCP<Map> mySpecialMap = Teuchos::null; if (spcFileName != "") { // read file on each processor and pick out the special dof numbers which belong to the current proc std::ifstream infile(spcFileName); std::string line; std::vector<GlobalOrdinal> mySpecialGids; GlobalOrdinal cnt = 0; // count overall number of gids GlobalOrdinal mycnt = 0; // count only local gids while ( std::getline(infile, line)) { if(0 == line.find("%")) continue; if(0 == line.find(" ")) { cnt++; GlobalOrdinal gid; std::istringstream iss(line); iss >> gid; gid--; // note, that the matlab vector starts counting at 1 and not 0! if(map->isNodeGlobalElement(gid)) { mySpecialGids.push_back(gid); mycnt++; } } } Teuchos::Array<GlobalOrdinal> eltList(mySpecialGids); mySpecialMap = MapFactory::Build (xpetraParameters.GetLib(),cnt,eltList(),0,comm); // empty processors std::vector<size_t> lelePerProc(comm->getSize(),0); std::vector<size_t> gelePerProc(comm->getSize(),0); lelePerProc[comm->getRank()] = mySpecialMap->getNodeNumElements(); Teuchos::reduceAll(*comm,Teuchos::REDUCE_MAX,comm->getSize(),&lelePerProc[0],&gelePerProc[0]); if(comm->getRank() == 0) { fancyout << "Distribution of " << cnt << " special dofs over processors:" << std::endl; fancyout << "Proc #DOFs" << std::endl; for(int i=0; i<comm->getSize(); i++) { fancyout << i << " " << gelePerProc[i] << std::endl; } } }
int main(int argc, char *argv[]) { #include <MueLu_UseShortNames.hpp> using Teuchos::RCP; // reference count pointers using Teuchos::rcp; using Teuchos::TimeMonitor; // ========================================================================= // MPI initialization using Teuchos // ========================================================================= Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL); RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm(); // ========================================================================= // Convenient definitions // ========================================================================= SC zero = Teuchos::ScalarTraits<SC>::zero(), one = Teuchos::ScalarTraits<SC>::one(); // Instead of checking each time for rank, create a rank 0 stream RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); Teuchos::FancyOStream& fancyout = *fancy; fancyout.setOutputToRootOnly(0); // ========================================================================= // Parameters initialization // ========================================================================= Teuchos::CommandLineProcessor clp(false); GO nx = 100, ny = 100, nz = 100; Galeri::Xpetra::Parameters<GO> matrixParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra std::string xmlFileName = "scalingTest.xml"; clp.setOption("xml", &xmlFileName, "read parameters from a file. Otherwise, this example uses by default 'scalingTest.xml'"); int amgAsPrecond = 1; clp.setOption("precond", &amgAsPrecond, "apply multigrid as preconditioner"); int amgAsSolver = 0; clp.setOption("fixPoint", &amgAsSolver, "apply multigrid as solver"); bool printTimings = true; clp.setOption("timings", "notimings", &printTimings, "print timings to screen"); int writeMatricesOPT = -2; clp.setOption("write", &writeMatricesOPT, "write matrices to file (-1 means all; i>=0 means level i)"); double tol = 1e-12; clp.setOption("tol", &tol, "solver convergence tolerance"); std::string krylovMethod = "cg"; clp.setOption("krylov", &krylovMethod, "outer Krylov method"); int maxIts = 100; clp.setOption("maxits", &maxIts, "maximum number of Krylov iterations"); int output = 1; clp.setOption("output", &output, "how often to print Krylov residual history"); std::string matrixFileName = "A.mm"; clp.setOption("matrixfile", &matrixFileName, "matrix market file containing matrix"); std::string rhsFileName = ""; clp.setOption("rhsfile", &rhsFileName, "matrix market file containing right-hand side"); int nPDE = 1; clp.setOption("numpdes", &nPDE, "number of PDE equations"); std::string convType = "r0"; clp.setOption("convtype", &convType, "convergence type (r0 or none)"); switch (clp.parse(argc,argv)) { case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break; case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } fancyout << "========================================================\n" << xpetraParameters << matrixParameters; // ========================================================================= // Problem construction // ========================================================================= RCP<TimeMonitor> globalTimeMonitor = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("MatrixRead: S - Global Time"))), tm; comm->barrier(); tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 1 - Matrix Build"))); RCP<Matrix> A = Utils::Read(string(matrixFileName), xpetraParameters.GetLib(), comm); RCP<const Map> map = A->getRowMap(); RCP<MultiVector> nullspace = MultiVectorFactory::Build(A->getDomainMap(),nPDE); //RCP<MultiVector> fakeCoordinates = MultiVectorFactory::Build(A->getDomainMap(),1); A->SetFixedBlockSize(nPDE); std::cout << "#pdes = " << A->GetFixedBlockSize() << std::endl; if (nPDE == 1) nullspace->putScalar( Teuchos::ScalarTraits<SC>::one() ); else { for (int i=0; i<nPDE; ++i) { Teuchos::ArrayRCP<SC> nsData = nullspace->getDataNonConst(i); for (int j=0; j<nsData.size(); ++j) { GO gel = A->getDomainMap()->getGlobalElement(j) - A->getDomainMap()->getIndexBase(); if ((gel-i) % nPDE == 0) nsData[j] = Teuchos::ScalarTraits<SC>::one(); } } } comm->barrier(); tm = Teuchos::null; fancyout << "Galeri complete.\n========================================================" << std::endl; // ========================================================================= // Preconditioner construction // ========================================================================= comm->barrier(); tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 1.5 - MueLu read XML"))); ParameterListInterpreter mueLuFactory(xmlFileName, *comm); comm->barrier(); tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 2 - MueLu Setup"))); RCP<Hierarchy> H = mueLuFactory.CreateHierarchy(); // By default, we use Extreme. However, typically the xml file contains verbosity parameter // which is used instead H->SetDefaultVerbLevel(MueLu::Extreme); H->GetLevel(0)->Set("A", A); H->GetLevel(0)->Set("Nullspace", nullspace); //H->GetLevel(0)->Set("Coordinates", fakeCoordinates); mueLuFactory.SetupHierarchy(*H); comm->barrier(); tm = Teuchos::null; // Print out the hierarchy stats. We should not need this line, but for some reason the // print out in the hierarchy construction does not work. H->print(fancyout); // ========================================================================= // System solution (Ax = b) // ========================================================================= comm->barrier(); tm = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 3 - LHS and RHS initialization"))); RCP<Vector> X = VectorFactory::Build(map,1); RCP<MultiVector> B = VectorFactory::Build(map,1); if (rhsFileName != "") B = Utils2::ReadMultiVector(string(rhsFileName), A->getRowMap()); else { // we set seed for reproducibility X->setSeed(846930886); bool useSameRandomGen = false; X->randomize(useSameRandomGen); A->apply(*X, *B, Teuchos::NO_TRANS, one, zero); Teuchos::Array<Teuchos::ScalarTraits<SC>::magnitudeType> norms(1); B->norm2(norms); //B->scale(1.0/norms[0]); } X->putScalar(zero); tm = Teuchos::null; if (writeMatricesOPT > -2) H->Write(writeMatricesOPT, writeMatricesOPT); comm->barrier(); if (amgAsSolver) { tm = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 4 - Fixed Point Solve"))); H->IsPreconditioner(false); Teuchos::Array<Teuchos::ScalarTraits<SC>::magnitudeType> norms(1); norms = Utils::ResidualNorm(*A,*X,*B); std::cout << " iter: 0 residual = " << norms[0] << std::endl; for (int i=0; i< maxIts; ++i) { H->Iterate(*B, *X); norms = Utils::ResidualNorm(*A,*X,*B); std::cout << " iter: " << i+1 << " residual = " << norms[0] << std::endl; } } else if (amgAsPrecond) { #ifdef HAVE_MUELU_BELOS tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 5 - Belos Solve"))); // Operator and Multivector type that will be used with Belos typedef MultiVector MV; typedef Belos::OperatorT<MV> OP; H->IsPreconditioner(true); // Define Operator and Preconditioner Teuchos::RCP<OP> belosOp = Teuchos::rcp(new Belos::XpetraOp<SC, LO, GO, NO, LMO>(A)); // Turns a Xpetra::Matrix object into a Belos operator Teuchos::RCP<OP> belosPrec = Teuchos::rcp(new Belos::MueLuOp<SC, LO, GO, NO, LMO>(H)); // Turns a MueLu::Hierarchy object into a Belos operator // Construct a Belos LinearProblem object RCP< Belos::LinearProblem<SC, MV, OP> > belosProblem = rcp(new Belos::LinearProblem<SC, MV, OP>(belosOp, X, B)); belosProblem->setRightPrec(belosPrec); bool set = belosProblem->setProblem(); if (set == false) { fancyout << "\nERROR: Belos::LinearProblem failed to set up correctly!" << std::endl; return EXIT_FAILURE; } // Belos parameter list Teuchos::ParameterList belosList; belosList.set("Maximum Iterations", maxIts); // Maximum number of iterations allowed belosList.set("Convergence Tolerance", tol); // Relative convergence tolerance requested belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); belosList.set("Output Frequency", output); belosList.set("Output Style", Belos::Brief); //belosList.set("Orthogonalization", "ICGS"); if (convType == "none") { belosList.set("Explicit Residual Scaling", "None"); belosList.set("Implicit Residual Scaling", "None"); } // Create an iterative solver manager RCP< Belos::SolverManager<SC, MV, OP> > solver; if (krylovMethod == "cg") { solver = rcp(new Belos::BlockCGSolMgr<SC, MV, OP>(belosProblem, rcp(&belosList, false))); } else if (krylovMethod == "gmres") { solver = rcp(new Belos::BlockGmresSolMgr<SC, MV, OP>(belosProblem, rcp(&belosList, false))); } else { TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "Invalid Krylov method. Options are \"cg\" or \" gmres\"."); } // Perform solve Belos::ReturnType ret = Belos::Unconverged; try { ret = solver->solve(); // Get the number of iterations for this solve. fancyout << "Number of iterations performed for this solve: " << solver->getNumIters() << std::endl; } catch(...) { fancyout << std::endl << "ERROR: Belos threw an error! " << std::endl; } // Check convergence if (ret != Belos::Converged) fancyout << std::endl << "ERROR: Belos did not converge! " << std::endl; else fancyout << std::endl << "SUCCESS: Belos converged!" << std::endl; #endif //ifdef HAVE_MUELU_BELOS } comm->barrier(); tm = Teuchos::null; globalTimeMonitor = Teuchos::null; if (printTimings) { TimeMonitor::summarize(A->getRowMap()->getComm().ptr(), std::cout, false, true, false, Teuchos::Union); MueLu::MutuallyExclusiveTime<MueLu::BaseClass>::PrintParentChildPairs(); } return 0; } //main
void Zoltan2Interface<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& level) const { FactoryMonitor m(*this, "Build", level); RCP<Matrix> A = Get<RCP<Matrix> >(level, "A"); RCP<const Map> rowMap = A->getRowMap(); typedef Xpetra::MultiVector<double, LocalOrdinal, GlobalOrdinal, Node> dMultiVector; RCP<dMultiVector> coords = Get<RCP<dMultiVector> >(level, "Coordinates"); RCP<const Map> map = coords->getMap(); GO numElements = map->getNodeNumElements(); LO blkSize = A->GetFixedBlockSize(); // Check that the number of local coordinates is consistent with the #rows in A TEUCHOS_TEST_FOR_EXCEPTION(rowMap->getNodeNumElements()/blkSize != coords->getLocalLength(), Exceptions::Incompatible, "Coordinate vector length (" + toString(coords->getLocalLength()) << " is incompatible with number of block rows in A (" + toString(rowMap->getNodeNumElements()/blkSize) + "The vector length should be the same as the number of mesh points."); #ifdef HAVE_MUELU_DEBUG GO indexBase = rowMap->getIndexBase(); GetOStream(Runtime0) << "Checking consistence of row and coordinates maps" << std::endl; // Make sure that logical blocks in row map coincide with logical nodes in coordinates map ArrayView<const GO> rowElements = rowMap->getNodeElementList(); ArrayView<const GO> coordsElements = map ->getNodeElementList(); for (LO i = 0; i < Teuchos::as<LO>(numElements); i++) TEUCHOS_TEST_FOR_EXCEPTION((coordsElements[i]-indexBase)*blkSize + indexBase != rowElements[i*blkSize], Exceptions::RuntimeError, "i = " << i << ", coords GID = " << coordsElements[i] << ", row GID = " << rowElements[i*blkSize] << ", blkSize = " << blkSize << std::endl); #endif int numParts = Get<int>(level, "number of partitions"); if (numParts == 1) { // Single processor, decomposition is trivial: all zeros RCP<Xpetra::Vector<GO,LO,GO,NO> > decomposition = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(rowMap, true); Set(level, "Partition", decomposition); return; } else if (numParts == -1) { // No repartitioning RCP<Xpetra::Vector<GO,LO,GO,NO> > decomposition = Teuchos::null; //Xpetra::VectorFactory<GO, LO, GO, NO>::Build(rowMap, true); //decomposition->putScalar(Teuchos::as<Scalar>(rowMap->getComm()->getRank())); Set(level, "Partition", decomposition); return; } const ParameterList& pL = GetParameterList(); RCP<const ParameterList> providedList = pL.get<RCP<const ParameterList> >("ParameterList"); ParameterList Zoltan2Params; if (providedList != Teuchos::null) Zoltan2Params = *providedList; // Merge defalt Zoltan2 parameters with user provided // If default and user parameters contain the same parameter name, user one is always preferred for (ParameterList::ConstIterator param = defaultZoltan2Params->begin(); param != defaultZoltan2Params->end(); param++) { const std::string& pName = defaultZoltan2Params->name(param); if (!Zoltan2Params.isParameter(pName)) Zoltan2Params.set(pName, defaultZoltan2Params->get<std::string>(pName)); } Zoltan2Params.set("num_global_parts", Teuchos::as<int>(numParts)); GetOStream(Runtime0) << "Zoltan2 parameters:\n----------\n" << Zoltan2Params << "----------" << std::endl; const std::string& algo = Zoltan2Params.get<std::string>("algorithm"); TEUCHOS_TEST_FOR_EXCEPTION(algo != "multijagged" && algo != "rcb", Exceptions::RuntimeError, "Unknown partitioning algorithm: \"" << algo << "\""); typedef Zoltan2::XpetraMultiVectorAdapter<dMultiVector> InputAdapterType; typedef Zoltan2::PartitioningProblem<InputAdapterType> ProblemType; int rowWeight = pL.get<int>("rowWeight"); GetOStream(Runtime0) << "Using weights formula: nnz + " << rowWeight << std::endl; Array<double> weightsPerRow(numElements); for (LO i = 0; i < numElements; i++) { weightsPerRow[i] = 0.0; for (LO j = 0; j < blkSize; j++) { weightsPerRow[i] += A->getNumEntriesInLocalRow(i*blkSize+j); // Zoltan2 pqJagged gets as good partitioning as Zoltan RCB in terms of nnz // but Zoltan also gets a good partioning in rows, which sometimes does not // happen for Zoltan2. So here is an attempt to get a better row partitioning // without significantly screwing up nnz partitioning // NOTE: no good heuristic here, the value was chosen almost randomly weightsPerRow[i] += rowWeight; } } std::vector<int> strides; std::vector<const double*> weights(1, weightsPerRow.getRawPtr()); RCP<const Teuchos::MpiComm<int> > dupMpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(rowMap->getComm()->duplicate()); RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > zoltanComm = dupMpiComm->getRawMpiComm(); InputAdapterType adapter(coords, weights, strides); RCP<ProblemType> problem(new ProblemType(&adapter, &Zoltan2Params, (*zoltanComm)())); { SubFactoryMonitor m1(*this, "Zoltan2 " + toString(algo), level); problem->solve(); } RCP<Xpetra::Vector<GO,LO,GO,NO> > decomposition = Xpetra::VectorFactory<GO,LO,GO,NO>::Build(rowMap, false); ArrayRCP<GO> decompEntries = decomposition->getDataNonConst(0); const typename InputAdapterType::part_t * parts = problem->getSolution().getPartListView(); for (GO i = 0; i < numElements; i++) { int partNum = parts[i]; for (LO j = 0; j < blkSize; j++) decompEntries[i*blkSize + j] = partNum; } Set(level, "Partition", decomposition); }
void ZoltanInterface<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level& level) const { FactoryMonitor m(*this, "Build", level); RCP<Matrix> A = Get< RCP<Matrix> > (level, "A"); RCP<const Map> rowMap = A->getRowMap(); RCP<MultiVector> Coords = Get< RCP<MultiVector> >(level, "Coordinates"); size_t dim = Coords->getNumVectors(); GO numParts = level.Get<GO>("number of partitions"); if (numParts == 1) { // Running on one processor, so decomposition is the trivial one, all zeros. RCP<Xpetra::Vector<GO, LO, GO, NO> > decomposition = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(rowMap, true); Set(level, "Partition", decomposition); return; } float zoltanVersion_; Zoltan_Initialize(0, NULL, &zoltanVersion_); RCP<const Teuchos::MpiComm<int> > dupMpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(rowMap->getComm()->duplicate()); RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > zoltanComm = dupMpiComm->getRawMpiComm(); RCP<Zoltan> zoltanObj_ = rcp(new Zoltan((*zoltanComm)())); //extract the underlying MPI_Comm handle and create a Zoltan object if (zoltanObj_ == Teuchos::null) throw Exceptions::RuntimeError("MueLu::Zoltan : Unable to create Zoltan data structure"); // Tell Zoltan what kind of local/global IDs we will use. // In our case, each GID is two ints and there are no local ids. // One can skip this step if the IDs are just single ints. int rv; if ((rv = zoltanObj_->Set_Param("num_gid_entries", "1")) != ZOLTAN_OK) throw Exceptions::RuntimeError("MueLu::Zoltan::Setup : setting parameter 'num_gid_entries' returned error code " + Teuchos::toString(rv)); if ((rv = zoltanObj_->Set_Param("num_lid_entries", "0") ) != ZOLTAN_OK) throw Exceptions::RuntimeError("MueLu::Zoltan::Setup : setting parameter 'num_lid_entries' returned error code " + Teuchos::toString(rv)); if ((rv = zoltanObj_->Set_Param("obj_weight_dim", "1") ) != ZOLTAN_OK) throw Exceptions::RuntimeError("MueLu::Zoltan::Setup : setting parameter 'obj_weight_dim' returned error code " + Teuchos::toString(rv)); if (GetVerbLevel() & Statistics1) zoltanObj_->Set_Param("debug_level", "1"); else zoltanObj_->Set_Param("debug_level", "0"); zoltanObj_->Set_Param("num_global_partitions", toString(numParts)); zoltanObj_->Set_Num_Obj_Fn(GetLocalNumberOfRows, (void *) &*A); zoltanObj_->Set_Obj_List_Fn(GetLocalNumberOfNonzeros, (void *) &*A); zoltanObj_->Set_Num_Geom_Fn(GetProblemDimension, (void *) &dim); zoltanObj_->Set_Geom_Multi_Fn(GetProblemGeometry, (void *) Coords.get()); // Data pointers that Zoltan requires. ZOLTAN_ID_PTR import_gids = NULL; // Global nums of objs to be imported ZOLTAN_ID_PTR import_lids = NULL; // Local indices to objs to be imported int *import_procs = NULL; // Proc IDs of procs owning objs to be imported. int *import_to_part = NULL; // Partition #s to which imported objs should be assigned. ZOLTAN_ID_PTR export_gids = NULL; // Global nums of objs to be exported ZOLTAN_ID_PTR export_lids = NULL; // local indices to objs to be exported int *export_procs = NULL; // Proc IDs of destination procs for objs to be exported. int *export_to_part = NULL; // Partition #s for objs to be exported. int num_imported; // Number of objs to be imported. int num_exported; // Number of objs to be exported. int newDecomp; // Flag indicating whether the decomposition has changed int num_gid_entries; // Number of array entries in a global ID. int num_lid_entries; { SubFactoryMonitor m1(*this, "Zoltan RCB", level); rv = zoltanObj_->LB_Partition(newDecomp, num_gid_entries, num_lid_entries, num_imported, import_gids, import_lids, import_procs, import_to_part, num_exported, export_gids, export_lids, export_procs, export_to_part); if (rv == ZOLTAN_FATAL) throw Exceptions::RuntimeError("Zoltan::LB_Partition() returned error code"); } // TODO check that A's row map is 1-1. Zoltan requires this. RCP<Xpetra::Vector<GO, LO, GO, NO> > decomposition; if (newDecomp) { decomposition = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(rowMap, false); // Don't initialize, will be overwritten ArrayRCP<GO> decompEntries = decomposition->getDataNonConst(0); int mypid = rowMap->getComm()->getRank(); for (typename ArrayRCP<GO>::iterator i = decompEntries.begin(); i != decompEntries.end(); ++i) *i = mypid; LO blockSize = A->GetFixedBlockSize(); for (int i = 0; i < num_exported; ++i) { // We have assigned Zoltan gids to first row GID in the block // NOTE: Zoltan GIDs are different from GIDs in the Coordinates vector LO localEl = rowMap->getLocalElement(export_gids[i]); int partNum = export_to_part[i]; for (LO j = 0; j < blockSize; ++j) decompEntries[localEl + j] = partNum; } } Set(level, "Partition", decomposition); zoltanObj_->LB_Free_Part(&import_gids, &import_lids, &import_procs, &import_to_part); zoltanObj_->LB_Free_Part(&export_gids, &export_lids, &export_procs, &export_to_part); } //Build()
TEUCHOS_UNIT_TEST(Zoltan, Build3PDEs) { typedef Teuchos::ScalarTraits<Scalar> ST; out << "version: " << MueLu::Version() << std::endl; out << std::endl; out << "This tests that the partitioning produced by Zoltan is \"reasonable\" for a matrix" << std::endl; out << "that has a random number of nonzeros per row and 3 DOFs per mesh point. Good results have been precomputed" << std::endl; out << "for up to 5 processors. The results are the number of nonzeros in the local matrix" << std::endl; out << "once the Zoltan repartitioning has been applied." << std::endl; out << "The results can be viewed in Paraview by enabling code guarded by the macro MUELU_VISUALIZE_REPARTITIONING" << std::endl; RCP<const Teuchos::Comm<int> > comm = TestHelpers::Parameters::getDefaultComm(); if (comm->getSize() > 5) { out << std::endl; out << "This test must be run on 1 to 5 processes." << std::endl; TEST_EQUALITY(true, true); return; } Level level; RCP<FactoryManagerBase> factoryHandler = rcp(new FactoryManager()); level.SetFactoryManager(factoryHandler); int nx=9; int ny=nx; int dofsPerNode = 3; GO numGlobalElements = nx*ny*dofsPerNode; size_t maxEntriesPerRow=30; RCP<const Map> map; int numMyNodes = numGlobalElements / dofsPerNode; if (comm->getSize() > 1) { // In parallel, make sure that the dof's associated with a node all // reside on the same processor. int numNodes = numGlobalElements / dofsPerNode; TEUCHOS_TEST_FOR_EXCEPTION( (numGlobalElements - numNodes * dofsPerNode) != 0, MueLu::Exceptions::RuntimeError, "Number of matrix rows is not divisible by #dofs" ); int nproc = comm->getSize(); if (comm->getRank() < nproc-1) numMyNodes = numNodes / nproc; else numMyNodes = numNodes - (numNodes/nproc) * (nproc-1); map = MapFactory::createContigMap(TestHelpers::Parameters::getLib(), numGlobalElements, numMyNodes*dofsPerNode, comm); } else { map = MapFactory::createUniformContigMap(TestHelpers::Parameters::getLib(), numGlobalElements, comm); } const size_t numMyElements = map->getNodeNumElements(); Teuchos::ArrayView<const GlobalOrdinal> myGlobalElements = map->getNodeElementList(); RCP<Matrix> A = rcp(new CrsMatrixWrap(map, 1)); // Force underlying linear algebra library to allocate more // memory on the fly. While not super efficient, this // ensures that no zeros are being stored. Thus, from // Zoltan's perspective the matrix is imbalanced. // Populate CrsMatrix with random number of entries (up to maxEntriesPerRow) per row. // Create a vector with random integer entries in [1,maxEntriesPerRow]. ST::seedrandom(666*comm->getRank()); RCP<Xpetra::Vector<LO,LO,GO,NO> > entriesPerRow = Xpetra::VectorFactory<LO,LO,GO,NO>::Build(map,false); Teuchos::ArrayRCP<LO> eprData = entriesPerRow->getDataNonConst(0); for (Teuchos::ArrayRCP<LO>::iterator i=eprData.begin(); i!=eprData.end(); ++i) { *i = (LO)(std::floor(((ST::random()+1)*0.5*maxEntriesPerRow)+1)); } RCP<Teuchos::FancyOStream> fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); fos->setOutputToRootOnly(-1); Teuchos::Array<Scalar> vals(maxEntriesPerRow); Teuchos::Array<GO> cols(maxEntriesPerRow); for (size_t i = 0; i < numMyElements; ++i) { Teuchos::ArrayView<SC> av(&vals[0],eprData[i]); Teuchos::ArrayView<GO> iv(&cols[0],eprData[i]); //stick in ones for values for (LO j=0; j< eprData[i]; ++j) vals[j] = ST::one(); //figure out valid column indices GO start = std::max(myGlobalElements[i]-eprData[i]+1,0); for (LO j=0; j< eprData[i]; ++j) cols[j] = start+j; A->insertGlobalValues(myGlobalElements[i], iv, av); } A->fillComplete(); // Now treat the matrix as if it has 3 DOFs per node. A->SetFixedBlockSize(dofsPerNode); level.Set("A",A); //build coordinates Teuchos::ParameterList list; list.set("nx",nx); list.set("ny",ny); RCP<const Map> coalescedMap = MapFactory::createContigMap(TestHelpers::Parameters::getLib(), numGlobalElements/dofsPerNode, numMyNodes, comm); RCP<MultiVector> XYZ = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("2D",coalescedMap,list); // XYZ are the "coalesce" coordinates as it has been generated for 1 DOF/node and we are using them for 3 DOFS/node // level.Set("Coordinates",XYZ); "Coordinates" == uncoalesce. "X,Y,ZCoordinates" == coalesce { RCP<MultiVector> coordinates = XYZ; // making a copy because I don't want to keep 'open' the Xpetra_MultiVector if (coordinates->getNumVectors() >= 1) { Teuchos::ArrayRCP<const SC> coord = coordinates->getData(0); Teuchos::ArrayRCP<SC> coordCpy(coord.size()); for(int i=0; i<coord.size(); i++) { coordCpy[i] = coord[i]; } level.Set("XCoordinates", coordCpy); //std::cout << coordCpy << std::endl; } if (coordinates->getNumVectors() >= 2) { Teuchos::ArrayRCP<const SC> coord = coordinates->getData(1); Teuchos::ArrayRCP<SC> coordCpy(coord.size()); for(int i=0; i<coord.size(); i++) { coordCpy[i] = coord[i]; } level.Set("YCoordinates", coordCpy); } /*if (coordinates->getNumVectors() >= 3) { Teuchos::ArrayRCP<const SC> coord = coordinates->getData(2); Teuchos::ArrayRCP<SC> coordCpy(coord.size()); for(int i=0; i<coord.size(); i++) { coordCpy[i] = coord[i]; } level.Set("ZCoordinates", coordCpy); }*/ } //coalescedMap->describe(*fos,Teuchos::VERB_EXTREME); //sleep(1); comm->barrier(); //XYZ->describe(*fos,Teuchos::VERB_EXTREME); LO numPartitions = comm->getSize(); level.Set("number of partitions",numPartitions); RCP<ZoltanInterface> zoltan = rcp(new ZoltanInterface()); //zoltan->SetOutputLevel(0); //options are 0=none, 1=summary, 2=every pid prints level.Request("Partition",zoltan.get()); zoltan->Build(level); RCP<Xpetra::Vector<GO,LO,GO,NO> > decomposition = level.Get<RCP<Xpetra::Vector<GO,LO,GO,NO> > >("Partition",zoltan.get()); /* //temporary code to have the trivial decomposition (no change) ArrayRCP<GO> decompEntries = decomposition->getDataNonConst(0); for (ArrayRCP<GO>::iterator i = decompEntries.begin(); i != decompEntries.end(); ++i) *i = comm->getRank(); decompEntries=Teuchos::null; */ //Create vector whose local length is the global number of partitions. //This vector will record the local number of nonzeros associated with each partition. Teuchos::Array<GO> parts(numPartitions); for (int i=0; i<numPartitions; ++i) parts[i] = i; Teuchos::ArrayView<GO> partsView(&parts[0],numPartitions); RCP<const Map> partitionMap = MapFactory::Build(TestHelpers::Parameters::getLib(), Teuchos::OrdinalTraits<global_size_t>::invalid(), partsView, map->getIndexBase(),comm); RCP<Xpetra::Vector<LO,LO,GO,NO> > localPartsVec = Xpetra::VectorFactory<LO,LO,GO,NO>::Build(partitionMap); RCP<Xpetra::Vector<LO,LO,GO,NO> > nnzPerRow = Xpetra::VectorFactory<LO,LO,GO,NO>::Build(A->getRowMap()); Teuchos::ArrayRCP<GO> nnzData = nnzPerRow->getDataNonConst(0); //For the local rows in each partition, tally up the number of nonzeros. This is what //Zoltan should be load-balancing. Teuchos::ArrayRCP<GO> lpvData = localPartsVec->getDataNonConst(0); Teuchos::ArrayRCP<const GO> decompData = decomposition->getData(0); for (size_t i=0; i<decomposition->getLocalLength();++i) { Teuchos::ArrayView<const LO> c; Teuchos::ArrayView<const SC> v; A->getLocalRowView(i,c,v); lpvData[decompData[i]] += v.size(); nnzData[i] = v.size(); } lpvData = Teuchos::null; decompData = Teuchos::null; nnzData = Teuchos::null; /* if (comm->getRank() == 0) std::cout << "nnz per row" << std::endl; nnzPerRow->describe(*fos,Teuchos::VERB_EXTREME); if (comm->getRank() == 0) std::cout << "Row-to-partition assignment (from Zoltan)" << std::endl; decomposition->describe(*fos,Teuchos::VERB_EXTREME); if (comm->getRank() == 0) std::cout << "#nonzeros per partition" << std::endl; localPartsVec->describe(*fos,Teuchos::VERB_EXTREME); */ //Send the local nnz tallies to pid 0, which can report the global sums. size_t mysize=1; if (comm->getRank() == 0) mysize = numPartitions; RCP<const Map> globalTallyMap = MapFactory::Build(TestHelpers::Parameters::getLib(), Teuchos::OrdinalTraits<global_size_t>::invalid(), mysize, map->getIndexBase(), comm); RCP<Xpetra::Vector<LO,LO,GO,NO> > globalTallyVec = Xpetra::VectorFactory<LO,LO,GO,NO>::Build(globalTallyMap); RCP<const Export> exporter = ExportFactory::Build( partitionMap, globalTallyMap); globalTallyVec->doExport(*localPartsVec,*exporter,Xpetra::ADD); ArrayRCP<GO> expectedResults(numPartitions); switch (comm->getSize()) { case 1: expectedResults[0] = 3951; break; case 2: expectedResults[0] = 1955; expectedResults[1] = 1910; break; case 3: expectedResults[0] = 1326; expectedResults[1] = 1340; expectedResults[2] = 1321; break; case 4: expectedResults[0] = 950; expectedResults[1] = 922; expectedResults[2] = 908; expectedResults[3] = 936; break; case 5: expectedResults[0] = 774; expectedResults[1] = 735; expectedResults[2] = 726; expectedResults[3] = 771; expectedResults[4] = 759; break; default: break; }; ArrayRCP<const LO> gtvData = globalTallyVec->getData(0); #ifdef __linux__ out << "Checking results..." << std::endl; for (int i=0; i<numPartitions; ++i) { if (comm->getRank() == 0) TEST_EQUALITY( expectedResults[i], gtvData[i]); } #endif #ifdef MUELU_VISUALIZE_REPARTITIONING // //Now write everything to a comma-separate list that ParaView can grok // Teuchos::ArrayRCP<const Scalar> X = XYZ->getData(0); Teuchos::ArrayRCP<const Scalar> Y = XYZ->getData(1); Teuchos::ArrayRCP<const GO> D = decomposition->getData(0); RCP<std::ofstream> outFile; std::string fileName = "zoltanResults.csv"; //write header information if (comm->getRank() == 0) { outFile = rcp(new std::ofstream(fileName.c_str())); *outFile << "x coord, y coord, z coord, partition, row weight" << std::endl; } comm->barrier(); //append coordinates nnzData = nnzPerRow->getDataNonConst(0); for (int j=0; j<comm->getSize(); ++j) { int mypid = comm->getRank(); if (mypid == j) { outFile = rcp(new std::ofstream(fileName.c_str(),std::ios::app)); int blockSize = A->GetFixedBlockSize(); //Coordinates are for coalesced system, D is for uncoalesced for (int i=0; i < D.size()/blockSize; ++i) { int nnz=0; for (int k=0; k<blockSize; ++k) nnz += nnzData[i*blockSize+k]; *outFile << X[i] << ", " << Y[i] << ", " << ST::zero() << ", " << D[i*blockSize] << ", " << nnz << std::endl; } } } //for (int i=0; i<comm->getSize(); ++i) out << std::endl; out << "You can view the Zoltan decomposition in ParaView 3.10.1 or later:" << std::endl; out << " 1) Load the data file " << fileName << "." << std::endl; out << " 2) Run the filter Filters/ Alphabetical/ Table To Points." << std::endl; out << " 3) Tell ParaView what columns are the X, Y and Z coordinates." << std::endl; out << " 4) Split screen horizontally (Icon, top right)." << std::endl; out << " 5) Click on the eyeball in the Pipeline Browser to see the points." << std::endl; out << " 6) Under the Display tab, you can color points by scalar value and resize them." << std::endl; out << std::endl; out << " To display row weights next to each point:" << std::endl; out << " 1) Click the \"Select Points Through\" button (2nd row) and select all points." << std::endl; out << " 2) Under View pull-down menu, choose the \"Selection Inspector\"." << std::endl; out << " 3) Under the Point Label, check the Visible box and set the Label Mode to \"row weight\"." << std::endl; #endif } //Build3PDEs
void FilteredAFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level& currentLevel) const { using Teuchos::as; FactoryMonitor m(*this, "Matrix filtering", currentLevel); RCP<Matrix> A = Get< RCP<Matrix> >(currentLevel, "A"); if (currentLevel.Get<bool>("Filtering", currentLevel.GetFactoryManager()->GetFactory("Filtering").get()) == false) { GetOStream(Runtime0,0) << "Filtered matrix is not being constructed as no filtering is being done" << std::endl; Set(currentLevel, "A", A); return; } const ParameterList& pL = GetParameterList(); RCP<GraphBase> G = Get< RCP<GraphBase> >(currentLevel, "Graph"); bool lumping = pL.get<bool>("lumping"); size_t blkSize = A->GetFixedBlockSize(); if (lumping) GetOStream(Runtime0,0) << "Lumping dropped entries" << std::endl; // Calculate max entries per row RCP<Matrix> filteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), A->getNodeMaxNumRowEntries(), Xpetra::StaticProfile); Array<LO> newInds; Array<SC> newVals; Array<char> filter(blkSize*G->GetImportMap()->getNodeNumElements(), 0); size_t numGRows = G->GetNodeNumVertices(), numInds = 0, diagIndex; SC diagExtra; for (size_t i = 0; i < numGRows; i++) { // Set up filtering array Teuchos::ArrayView<const LO> indsG = G->getNeighborVertices(i); for (size_t j = 0; j < as<size_t> (indsG.size()); j++) for (size_t k = 0; k < blkSize; k++) filter[indsG[j]*blkSize+k] = 1; for (size_t k = 0; k < blkSize; k++) { LocalOrdinal row = i*blkSize+k; ArrayView<const LO> oldInds; ArrayView<const SC> oldVals; A->getLocalRowView(row, oldInds, oldVals); diagIndex = as<size_t>(-1); diagExtra = Teuchos::ScalarTraits<SC>::zero(); newInds.resize(oldInds.size()); newVals.resize(oldVals.size()); numInds = 0; for (size_t j = 0; j < as<size_t> (oldInds.size()); j++) if (filter[oldInds[j]]) { newInds[numInds] = oldInds[j]; newVals[numInds] = oldVals[j]; // Remember diagonal position if (newInds[numInds] == row) diagIndex = numInds; numInds++; } else { diagExtra += oldVals[j]; } // Lump dropped entries // NOTE // * Does it make sense to lump for elasticity? // * Is it different for diffusion and elasticity? if (lumping) newVals[diagIndex] += diagExtra; newInds.resize(numInds); newVals.resize(numInds); // Because we used a column map in the construction of the matrix // we can just use insertLocalValues here instead of insertGlobalValues filteredA->insertLocalValues(row, newInds, newVals); } // Clean up filtering array for (size_t j = 0; j < as<size_t> (indsG.size()); j++) for (size_t k = 0; k < blkSize; k++) filter[indsG[j]*blkSize+k] = 0; } RCP<ParameterList> fillCompleteParams(new ParameterList); fillCompleteParams->set("No Nonlocal Changes", true); filteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), fillCompleteParams); filteredA->SetFixedBlockSize(blkSize); // TODO: Can we reuse max eigenvalue from A? // filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate()); Set(currentLevel, "A", filteredA); }