void RAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& fineLevel, Level& coarseLevel) const { { FactoryMonitor m(*this, "Computing Ac", coarseLevel); std::ostringstream levelstr; levelstr << coarseLevel.GetLevelID(); TEUCHOS_TEST_FOR_EXCEPTION(hasDeclaredInput_==false, Exceptions::RuntimeError, "MueLu::RAPFactory::Build(): CallDeclareInput has not been called before Build!"); // Set "Keeps" from params const Teuchos::ParameterList& pL = GetParameterList(); if (pL.get<bool>("Keep AP Pattern")) coarseLevel.Keep("AP Pattern", this); if (pL.get<bool>("Keep RAP Pattern")) coarseLevel.Keep("RAP Pattern", this); RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel, "A"); RCP<Matrix> P = Get< RCP<Matrix> >(coarseLevel, "P"), AP, Ac; // Reuse pattern if available (multiple solve) if (coarseLevel.IsAvailable("AP Pattern", this)) { GetOStream(Runtime0) << "Ac: Using previous AP pattern" << std::endl; AP = Get< RCP<Matrix> >(coarseLevel, "AP Pattern"); } { SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel); AP = Utils::Multiply(*A, false, *P, false, AP, GetOStream(Statistics2),true,true,std::string("MueLu::A*P-")+levelstr.str()); } if (pL.get<bool>("Keep AP Pattern")) Set(coarseLevel, "AP Pattern", AP); // Reuse coarse matrix memory if available (multiple solve) if (coarseLevel.IsAvailable("RAP Pattern", this)) { GetOStream(Runtime0) << "Ac: Using previous RAP pattern" << std::endl; Ac = Get< RCP<Matrix> >(coarseLevel, "RAP Pattern"); // Some eigenvalue may have been cached with the matrix in the previous run. // As the matrix values will be updated, we need to reset the eigenvalue. Ac->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one()); } // If we do not modify matrix later, allow optimization of storage. // This is necessary for new faster Epetra MM kernels. bool doOptimizeStorage = !pL.get<bool>("RepairMainDiagonal"); const bool doTranspose = true; const bool doFillComplete = true; if (pL.get<bool>("transpose: use implicit") == true) { SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel); Ac = Utils::Multiply(*P, doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), doFillComplete, doOptimizeStorage,std::string("MueLu::R*(AP)-implicit-")+levelstr.str()); } else { RCP<Matrix> R = Get< RCP<Matrix> >(coarseLevel, "R"); SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel); Ac = Utils::Multiply(*R, !doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), doFillComplete, doOptimizeStorage,std::string("MueLu::R*(AP)-explicit-")+levelstr.str()); } CheckRepairMainDiagonal(Ac); if (IsPrint(Statistics1)) { RCP<ParameterList> params = rcp(new ParameterList());; params->set("printLoadBalancingInfo", true); params->set("printCommInfo", true); GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params); } Set(coarseLevel, "A", Ac); if (pL.get<bool>("Keep RAP Pattern")) Set(coarseLevel, "RAP Pattern", Ac); } if (transferFacts_.begin() != transferFacts_.end()) { SubFactoryMonitor m(*this, "Projections", coarseLevel); // call Build of all user-given transfer factories for (std::vector<RCP<const FactoryBase> >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { RCP<const FactoryBase> fac = *it; GetOStream(Runtime0) << "RAPFactory: call transfer factory: " << fac->description() << std::endl; fac->CallBuild(coarseLevel); // Coordinates transfer is marginally different from all other operations // because it is *optional*, and not required. For instance, we may need // coordinates only on level 4 if we start repartitioning from that level, // but we don't need them on level 1,2,3. As our current Hierarchy setup // assumes propagation of dependencies only through three levels, this // means that we need to rely on other methods to propagate optional data. // // The method currently used is through RAP transfer factories, which are // simply factories which are called at the end of RAP with a single goal: // transfer some fine data to coarser level. Because these factories are // kind of outside of the mainline factories, they behave different. In // particular, we call their Build method explicitly, rather than through // Get calls. This difference is significant, as the Get call is smart // enough to know when to release all factory dependencies, and Build is // dumb. This led to the following CoordinatesTransferFactory sequence: // 1. Request level 0 // 2. Request level 1 // 3. Request level 0 // 4. Release level 0 // 5. Release level 1 // // The problem is missing "6. Release level 0". Because it was missing, // we had outstanding request on "Coordinates", "Aggregates" and // "CoarseMap" on level 0. // // This was fixed by explicitly calling Release on transfer factories in // RAPFactory. I am still unsure how exactly it works, but now we have // clear data requests for all levels. coarseLevel.Release(*fac); } } }
void BlockedRAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level &fineLevel, Level &coarseLevel) const { //FIXME make fineLevel const!! FactoryMonitor m(*this, "Computing Ac (block)", coarseLevel); const Teuchos::ParameterList& pL = GetParameterList(); RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel, "A"); RCP<Matrix> P = Get< RCP<Matrix> >(coarseLevel, "P"); RCP<BlockedCrsMatrix> bA = rcp_dynamic_cast<BlockedCrsMatrix>(A); RCP<BlockedCrsMatrix> bP = rcp_dynamic_cast<BlockedCrsMatrix>(P); TEUCHOS_TEST_FOR_EXCEPTION(bA.is_null() || bP.is_null(), Exceptions::BadCast, "Matrices R, A and P must be of type BlockedCrsMatrix."); RCP<BlockedCrsMatrix> bAP; RCP<BlockedCrsMatrix> bAc; { SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel); // Triple matrix product for BlockedCrsMatrixClass TEUCHOS_TEST_FOR_EXCEPTION((bA->Cols() != bP->Rows()), Exceptions::BadCast, "Block matrix dimensions do not match: " "A is " << bA->Rows() << "x" << bA->Cols() << "P is " << bP->Rows() << "x" << bP->Cols()); bAP = Utils::TwoMatrixMultiplyBlock(*bA, false, *bP, false, GetOStream(Statistics2), true, true); } // If we do not modify matrix later, allow optimization of storage. // This is necessary for new faster Epetra MM kernels. bool doOptimizeStorage = !checkAc_; const bool doTranspose = true; const bool doFillComplete = true; if (pL.get<bool>("transpose: use implicit") == true) { SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel); bAc = Utils::TwoMatrixMultiplyBlock(*bP, doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), doFillComplete, doOptimizeStorage); } else { RCP<Matrix> R = Get< RCP<Matrix> >(coarseLevel, "R"); RCP<BlockedCrsMatrix> bR = rcp_dynamic_cast<BlockedCrsMatrix>(R); TEUCHOS_TEST_FOR_EXCEPTION(bR.is_null(), Exceptions::BadCast, "Matrix R must be of type BlockedCrsMatrix."); TEUCHOS_TEST_FOR_EXCEPTION(bA->Rows() != bR->Cols(), Exceptions::BadCast, "Block matrix dimensions do not match: " "R is " << bR->Rows() << "x" << bR->Cols() << "A is " << bA->Rows() << "x" << bA->Cols()); SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel); bAc = Utils::TwoMatrixMultiplyBlock(*bR, !doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), doFillComplete, doOptimizeStorage); } if (checkAc_) CheckMainDiagonal(bAc); GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*bAc, "Ac (blocked)"); // static int run = 1; // RCP<CrsMatrixWrap> A11 = rcp(new CrsMatrixWrap(bAc->getMatrix(0,0))); // Utils::Write(toString(run) + "_A_11.mm", *A11); // if (!bAc->getMatrix(1,1).is_null()) { // RCP<CrsMatrixWrap> A22 = rcp(new CrsMatrixWrap(bAc->getMatrix(1,1))); // Utils::Write(toString(run) + "_A_22.mm", *A22); // } // RCP<CrsMatrixWrap> Am = rcp(new CrsMatrixWrap(bAc->Merge())); // Utils::Write(toString(run) + "_A.mm", *Am); // run++; Set<RCP <Matrix> >(coarseLevel, "A", bAc); if (transferFacts_.begin() != transferFacts_.end()) { SubFactoryMonitor m1(*this, "Projections", coarseLevel); // call Build of all user-given transfer factories for (std::vector<RCP<const FactoryBase> >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { RCP<const FactoryBase> fac = *it; GetOStream(Runtime0) << "BlockRAPFactory: call transfer factory: " << fac->description() << std::endl; fac->CallBuild(coarseLevel); // AP (11/11/13): I am not sure exactly why we need to call Release, but we do need it to get rid // of dangling data for CoordinatesTransferFactory coarseLevel.Release(*fac); } } }
void BlockedRAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &fineLevel, Level &coarseLevel) const { //FIXME make fineLevel const!! typedef Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> BlockedCrsMatrixClass; // TODO move me FactoryMonitor m(*this, "Computing Ac (block)", coarseLevel); // // Inputs: R, A, P // RCP<Matrix> R = Get< RCP<Matrix> >(coarseLevel, "R"); RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel, "A"); RCP<Matrix> P = Get< RCP<Matrix> >(coarseLevel, "P"); // // Dynamic casts // RCP<BlockedCrsMatrixClass> bR, bA, bP; try { /* using rcp_dynamic_cast with throw_on_fail = true */ bR = Teuchos::rcp_dynamic_cast<BlockedCrsMatrixClass>(R, true); bA = Teuchos::rcp_dynamic_cast<BlockedCrsMatrixClass>(A, true); bP = Teuchos::rcp_dynamic_cast<BlockedCrsMatrixClass>(P, true); } catch(std::bad_cast e) { TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::BadCast, "MueLu::BlockedRAPFactory::Build(): matrices R, A and P must be of type BlockedCrsMatrix. " << e.what()); } /*Utils::Write( "A00.m", CrsMatrixWrap(bA->getMatrix(0,0)) ); Utils::Write( "A11.m", CrsMatrixWrap(bA->getMatrix(1,1)) ); Utils::Write( "A01.m", CrsMatrixWrap(bA->getMatrix(0,1)) ); Utils::Write( "A10.m", CrsMatrixWrap(bA->getMatrix(1,0)) ); Utils::Write( "P00.m", CrsMatrixWrap(bP->getMatrix(0,0)) ); Utils::Write( "P11.m", CrsMatrixWrap(bP->getMatrix(1,1)) );*/ // // Build Ac = RAP // // Triple matrix product for BlockedCrsMatrixClass TEUCHOS_TEST_FOR_EXCEPTION((bA->Cols() != bP->Rows()) || (bA->Rows() != bR->Cols()), Exceptions::BadCast, "MueLu::BlockedRAPFactory::Build(): block matrix dimensions do not match."); RCP<BlockedCrsMatrixClass> bAP = Utils::TwoMatrixMultiplyBlock(*bA, false, *bP, false, true, true); RCP<BlockedCrsMatrixClass> bAc = Utils::TwoMatrixMultiplyBlock(*bR, false, *bAP, false, true, true); if (checkAc_) CheckMainDiagonal(bAc); GetOStream(Statistics1, 0) << Utils::PrintMatrixInfo(*bAc, "Ac (blocked)"); Set<RCP <Matrix> >(coarseLevel, "A", bAc); if (transferFacts_.begin() != transferFacts_.end()) { SubFactoryMonitor m1(*this, "Projections", coarseLevel); // call Build of all user-given transfer factories for (std::vector<RCP<const FactoryBase> >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { RCP<const FactoryBase> fac = *it; GetOStream(Runtime0, 0) << "BlockRAPFactory: call transfer factory: " << fac->description() << std::endl; fac->CallBuild(coarseLevel); // AP (11/11/13): I am not sure exactly why we need to call Release, but we do need it to get rid // of dangling data for CoordinatesTransferFactory coarseLevel.Release(*fac); } } }