void FilteredAFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level& currentLevel) const { FactoryMonitor m(*this, "Matrix filtering", currentLevel); RCP<Matrix> A = Get< RCP<Matrix> >(currentLevel, "A"); if (currentLevel.Get<bool>("Filtering", currentLevel.GetFactoryManager()->GetFactory("Filtering").get()) == false) { GetOStream(Runtime0) << "Filtered matrix is not being constructed as no filtering is being done" << std::endl; Set(currentLevel, "A", A); return; } size_t blkSize = A->GetFixedBlockSize(); const ParameterList& pL = GetParameterList(); bool lumping = pL.get<bool>("lumping"); if (lumping) GetOStream(Runtime0) << "Lumping dropped entries" << std::endl; RCP<GraphBase> G = Get< RCP<GraphBase> >(currentLevel, "Graph"); SC zero = Teuchos::ScalarTraits<SC>::zero(); // Both Epetra and Tpetra matrix-matrix multiply use the following trick: // if an entry of the left matrix is zero, it does not compute or store the // zero value. // // This trick allows us to bypass constructing a new matrix. Instead, we // make a deep copy of the original one, and fill it in with zeros, which // are ignored during the prolongator smoothing. RCP<Matrix> filteredA = MatrixFactory::Build(A->getCrsGraph()); filteredA->resumeFill(); ArrayView<const LO> inds; ArrayView<const SC> valsA; #ifdef ASSUME_DIRECT_ACCESS_TO_ROW ArrayView<SC> vals; #else Array<SC> vals; #endif Array<char> filter(blkSize * G->GetImportMap()->getNodeNumElements(), 0); size_t numGRows = G->GetNodeNumVertices(); for (size_t i = 0; i < numGRows; i++) { // Set up filtering array ArrayView<const LO> indsG = G->getNeighborVertices(i); for (size_t j = 0; j < as<size_t>(indsG.size()); j++) for (size_t k = 0; k < blkSize; k++) filter[indsG[j]*blkSize+k] = 1; for (size_t k = 0; k < blkSize; k++) { LO row = i*blkSize + k; A->getLocalRowView(row, inds, valsA); size_t nnz = inds.size(); if (nnz == 0) continue; #ifdef ASSUME_DIRECT_ACCESS_TO_ROW // Transform ArrayView<const SC> into ArrayView<SC> ArrayView<const SC> vals1; filteredA->getLocalRowView(row, inds, vals1); vals = ArrayView<SC>(const_cast<SC*>(vals1.getRawPtr()), nnz); memcpy(vals.getRawPtr(), valsA.getRawPtr(), nnz*sizeof(SC)); #else vals = Array<SC>(valsA); #endif if (lumping == false) { for (size_t j = 0; j < nnz; j++) if (!filter[inds[j]]) vals[j] = zero; } else { LO diagIndex = -1; SC diagExtra = zero; for (size_t j = 0; j < nnz; j++) { if (filter[inds[j]]) continue; if (inds[j] == row) { // Remember diagonal position diagIndex = j; } else { diagExtra += vals[j]; } vals[j] = zero; } // Lump dropped entries // NOTE // * Does it make sense to lump for elasticity? // * Is it different for diffusion and elasticity? if (diagIndex != -1) vals[diagIndex] += diagExtra; } #ifndef ASSUME_DIRECT_ACCESS_TO_ROW // Because we used a column map in the construction of the matrix // we can just use insertLocalValues here instead of insertGlobalValues filteredA->replaceLocalValues(row, inds, vals); #endif } // Reset filtering array for (size_t j = 0; j < as<size_t> (indsG.size()); j++) for (size_t k = 0; k < blkSize; k++) filter[indsG[j]*blkSize+k] = 0; } RCP<ParameterList> fillCompleteParams(new ParameterList); fillCompleteParams->set("No Nonlocal Changes", true); filteredA->fillComplete(fillCompleteParams); filteredA->SetFixedBlockSize(blkSize); if (pL.get<bool>("filtered matrix: reuse eigenvalue")) { // Reuse max eigenvalue from A // It is unclear what eigenvalue is the best for the smoothing, but we already may have // the D^{-1}A estimate in A, may as well use it. // NOTE: ML does that too filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate()); } Set(currentLevel, "A", filteredA); }
TEUCHOS_UNIT_TEST(CoalesceDropFactory, AmalgamationStrided2LW) { # include "MueLu_UseShortNames.hpp" MUELU_TESTING_SET_OSTREAM; MUELU_TESTING_LIMIT_SCOPE(Scalar,GlobalOrdinal,NO); out << "version: " << MueLu::Version() << std::endl; // unit test for block size 3 = (2,1). wrap block 0 // lightweight wrap = true RCP<const Teuchos::Comm<int> > comm = Parameters::getDefaultComm(); Xpetra::UnderlyingLib lib = TestHelpers::Parameters::getLib(); // create strided map information std::vector<size_t> stridingInfo; stridingInfo.push_back(as<size_t>(2)); stridingInfo.push_back(as<size_t>(1)); LocalOrdinal stridedBlockId = 0; int blockSize=3; RCP<const StridedMap> dofMap = Xpetra::StridedMapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build(lib, blockSize*comm->getSize(), 0, stridingInfo, comm, stridedBlockId /*blockId*/, 0 /*offset*/); ///////////////////////////////////////////////////// Teuchos::RCP<Matrix> mtx = TestHelpers::TestFactory<SC,LO,GO,NO>::BuildTridiag(dofMap, 2.0, -1.0, -1.0); Level fineLevel; TestHelpers::TestFactory<SC,LO,GO,NO>::createSingleLevelHierarchy(fineLevel); RCP<const Xpetra::StridedMap<LocalOrdinal, GlobalOrdinal, Node> > stridedRangeMap = Xpetra::StridedMapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build( mtx->getRangeMap(), stridingInfo, stridedBlockId, 0 /*offset*/ ); RCP<const Map> stridedDomainMap = Xpetra::StridedMapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build( mtx->getDomainMap(), stridingInfo, stridedBlockId, 0 /*offset*/ ); if(mtx->IsView("stridedMaps") == true) mtx->RemoveView("stridedMaps"); mtx->CreateView("stridedMaps", stridedRangeMap, stridedDomainMap); fineLevel.Set("A", mtx); CoalesceDropFactory dropFact = CoalesceDropFactory(); dropFact.SetParameter("lightweight wrap",Teuchos::ParameterEntry(true)); fineLevel.Request("Graph", &dropFact); fineLevel.Request("DofsPerNode", &dropFact); dropFact.Build(fineLevel); fineLevel.print(out); RCP<GraphBase> graph = fineLevel.Get<RCP<GraphBase> >("Graph", &dropFact); LO myDofsPerNode = fineLevel.Get<LO>("DofsPerNode", &dropFact); TEST_EQUALITY(as<int>(graph->GetDomainMap()->getGlobalNumElements()) == comm->getSize(), true); TEST_EQUALITY(as<int>(myDofsPerNode) == blockSize, true); bool bCorrectGraph = false; if (comm->getSize() == 1 && graph->getNeighborVertices(0).size() == 1) { bCorrectGraph = true; } else { if (comm->getRank() == 0 || comm->getRank() == comm->getSize()-1) { if (graph->getNeighborVertices(0).size() == 2) bCorrectGraph = true; } else { if (graph->getNeighborVertices(0).size() == blockSize) bCorrectGraph = true; } } TEST_EQUALITY(bCorrectGraph, true); const RCP<const Map> myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! const RCP<const Map> myDomainMap = graph->GetDomainMap(); TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), comm->getSize()-1); TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); TEST_EQUALITY(myImportMap->getMinLocalIndex(),0); TEST_EQUALITY(myImportMap->getGlobalNumElements(),as<size_t>(comm->getSize()+2*(comm->getSize()-1))); if (comm->getSize()>1) { size_t numLocalRowMapElts = graph->GetNodeNumVertices(); size_t numLocalImportElts = myImportMap->getNodeNumElements(); if (comm->getRank() == 0 || comm->getRank() == comm->getSize()-1) { TEST_EQUALITY(as<bool>(numLocalImportElts==numLocalRowMapElts+1), true); } else { TEST_EQUALITY(as<bool>(numLocalImportElts==numLocalRowMapElts+2), true); } } if (comm->getSize()>1) { size_t numLocalRowMapElts = graph->GetNodeNumVertices(); size_t maxLocalIndex = myImportMap->getMaxLocalIndex(); if (comm->getRank() == 0 || comm->getRank() == comm->getSize()-1) { TEST_EQUALITY(as<bool>(maxLocalIndex==numLocalRowMapElts*blockSize-2), true); } else { TEST_EQUALITY(as<bool>(maxLocalIndex==numLocalRowMapElts*blockSize-1), true); } } TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), comm->getSize()-1); TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); TEST_EQUALITY(myDomainMap->getMinLocalIndex(),0); TEST_EQUALITY(myDomainMap->getMaxLocalIndex(),0); TEST_EQUALITY(myDomainMap->getGlobalNumElements(),as<size_t>(comm->getSize())); TEST_EQUALITY(as<bool>(myDomainMap->getNodeNumElements()==1), true); } // AmalgamationStrided2LW
TEUCHOS_UNIT_TEST(CoalesceDropFactory, AmalgamationStridedOffsetDropping2LW) { // unit test for block size 9 = (2,3,4). wrap block 1. // drop small entries // lightweight wrap = true out << "version: " << MueLu::Version() << std::endl; RCP<const Teuchos::Comm<int> > comm = Parameters::getDefaultComm(); Xpetra::UnderlyingLib lib = TestHelpers::Parameters::getLib(); // create strided map information std::vector<size_t> stridingInfo; stridingInfo.push_back(as<size_t>(2)); stridingInfo.push_back(as<size_t>(3)); stridingInfo.push_back(as<size_t>(4)); LocalOrdinal stridedBlockId = 1; GlobalOrdinal offset = 19; RCP<const StridedMap> dofMap = Xpetra::StridedMapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build(lib, 9*comm->getSize(), 0, stridingInfo, comm, stridedBlockId, offset); ///////////////////////////////////////////////////// Teuchos::RCP<Matrix> mtx = TestHelpers::TestFactory<SC,LO,GO,NO>::BuildTridiag(dofMap, 2.0, 1.0, 0.0001); Level fineLevel; TestHelpers::TestFactory<SC,LO,GO,NO>::createSingleLevelHierarchy(fineLevel); RCP<const Map> stridedRangeMap = Xpetra::StridedMapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build( mtx->getRangeMap(), stridingInfo, stridedBlockId, offset ); RCP<const Map> stridedDomainMap = Xpetra::StridedMapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build( mtx->getDomainMap(), stridingInfo, stridedBlockId, offset ); if(mtx->IsView("stridedMaps") == true) mtx->RemoveView("stridedMaps"); mtx->CreateView("stridedMaps", stridedRangeMap, stridedDomainMap); fineLevel.Set("A", mtx); CoalesceDropFactory dropFact = CoalesceDropFactory(); dropFact.SetParameter("lightweight wrap",Teuchos::ParameterEntry(true)); dropFact.SetParameter("aggregation: drop tol",Teuchos::ParameterEntry(0.3)); fineLevel.Request("Graph", &dropFact); fineLevel.Request("DofsPerNode", &dropFact); dropFact.Build(fineLevel); fineLevel.print(out); RCP<GraphBase> graph = fineLevel.Get<RCP<GraphBase> >("Graph", &dropFact); LO myDofsPerNode = fineLevel.Get<LO>("DofsPerNode", &dropFact); TEST_EQUALITY(as<int>(graph->GetDomainMap()->getGlobalNumElements()) == comm->getSize(), true); TEST_EQUALITY(as<int>(myDofsPerNode) == 9, true); bool bCorrectGraph = false; if (comm->getSize() == 1 && graph->getNeighborVertices(0).size() == 1) { bCorrectGraph = true; } else { if (comm->getRank() == 0) { if (graph->getNeighborVertices(0).size() == 1) bCorrectGraph = true; } else { if (graph->getNeighborVertices(0).size() == 2) bCorrectGraph = true; } } TEST_EQUALITY(bCorrectGraph, true); const RCP<const Map> myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! const RCP<const Map> myDomainMap = graph->GetDomainMap(); TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), comm->getSize()-1); TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); TEST_EQUALITY(myImportMap->getMinLocalIndex(),0); TEST_EQUALITY(myImportMap->getGlobalNumElements(),as<size_t>(comm->getSize()+2*(comm->getSize()-1))); if (comm->getSize()>1) { size_t numLocalRowMapElts = graph->GetNodeNumVertices(); size_t numLocalImportElts = myImportMap->getNodeNumElements(); if (comm->getRank() == 0 || comm->getRank() == comm->getSize()-1) { TEST_EQUALITY(as<bool>(numLocalImportElts==numLocalRowMapElts+1), true); } else { TEST_EQUALITY(as<bool>(numLocalImportElts==numLocalRowMapElts+2), true); } } TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), comm->getSize()-1); TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); TEST_EQUALITY(myDomainMap->getMinLocalIndex(),0); TEST_EQUALITY(myDomainMap->getGlobalNumElements(),as<size_t>(comm->getSize())); TEST_EQUALITY(as<bool>(myDomainMap->getNodeNumElements()==1), true); } // AmalgamationStridedOffsetDropping2LW
void FilteredAFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level& currentLevel) const { using Teuchos::as; FactoryMonitor m(*this, "Matrix filtering", currentLevel); RCP<Matrix> A = Get< RCP<Matrix> >(currentLevel, "A"); if (currentLevel.Get<bool>("Filtering", currentLevel.GetFactoryManager()->GetFactory("Filtering").get()) == false) { GetOStream(Runtime0,0) << "Filtered matrix is not being constructed as no filtering is being done" << std::endl; Set(currentLevel, "A", A); return; } const ParameterList& pL = GetParameterList(); RCP<GraphBase> G = Get< RCP<GraphBase> >(currentLevel, "Graph"); bool lumping = pL.get<bool>("lumping"); size_t blkSize = A->GetFixedBlockSize(); if (lumping) GetOStream(Runtime0,0) << "Lumping dropped entries" << std::endl; // Calculate max entries per row RCP<Matrix> filteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), A->getNodeMaxNumRowEntries(), Xpetra::StaticProfile); Array<LO> newInds; Array<SC> newVals; Array<char> filter(blkSize*G->GetImportMap()->getNodeNumElements(), 0); size_t numGRows = G->GetNodeNumVertices(), numInds = 0, diagIndex; SC diagExtra; for (size_t i = 0; i < numGRows; i++) { // Set up filtering array Teuchos::ArrayView<const LO> indsG = G->getNeighborVertices(i); for (size_t j = 0; j < as<size_t> (indsG.size()); j++) for (size_t k = 0; k < blkSize; k++) filter[indsG[j]*blkSize+k] = 1; for (size_t k = 0; k < blkSize; k++) { LocalOrdinal row = i*blkSize+k; ArrayView<const LO> oldInds; ArrayView<const SC> oldVals; A->getLocalRowView(row, oldInds, oldVals); diagIndex = as<size_t>(-1); diagExtra = Teuchos::ScalarTraits<SC>::zero(); newInds.resize(oldInds.size()); newVals.resize(oldVals.size()); numInds = 0; for (size_t j = 0; j < as<size_t> (oldInds.size()); j++) if (filter[oldInds[j]]) { newInds[numInds] = oldInds[j]; newVals[numInds] = oldVals[j]; // Remember diagonal position if (newInds[numInds] == row) diagIndex = numInds; numInds++; } else { diagExtra += oldVals[j]; } // Lump dropped entries // NOTE // * Does it make sense to lump for elasticity? // * Is it different for diffusion and elasticity? if (lumping) newVals[diagIndex] += diagExtra; newInds.resize(numInds); newVals.resize(numInds); // Because we used a column map in the construction of the matrix // we can just use insertLocalValues here instead of insertGlobalValues filteredA->insertLocalValues(row, newInds, newVals); } // Clean up filtering array for (size_t j = 0; j < as<size_t> (indsG.size()); j++) for (size_t k = 0; k < blkSize; k++) filter[indsG[j]*blkSize+k] = 0; } RCP<ParameterList> fillCompleteParams(new ParameterList); fillCompleteParams->set("No Nonlocal Changes", true); filteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), fillCompleteParams); filteredA->SetFixedBlockSize(blkSize); // TODO: Can we reuse max eigenvalue from A? // filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate()); Set(currentLevel, "A", filteredA); }