Example #1
0
      // Main algorithm
      //
      // - We do not want to set default parameters inside of the algorithm.
      //  => Otherwise, it's difficult to track the defaults. Cf. ML.
      //  => use ParameterList::get() without the default value input parameter.
      void Build() {

        if (GetParameterList().get<double>("ParamA") == 0.5) { } // change "[used]"/["unused"] flag
        if (GetParameterList().get<double>("ParamC") == 0.5) { }

        // statsParamList_.set(...);

      }
Example #2
0
    //! Default implementation of FactoryAcceptor::GetFactory()
    const RCP<const FactoryBase> GetFactory(const std::string & varName) const {
        if (!GetParameterList().isParameter(varName) && GetValidParameterList() == Teuchos::null) {
            // If the parameter is not on the list and there is not validator, the defaults values for 'varName' is not set.
            // Failback by using directly the FactoryManager
            // NOTE: call to GetValidParameterList() can be costly for classes that validate parameters.
            // But it get called only (lazy '&&' operator) if the parameter 'varName' is not on the paramlist and
            // the parameter 'varName' is always on the list when validator is present and 'varName' is valid (at least the default value is set).
            return Teuchos::null;
        }

        return GetParameterList().get< RCP<const FactoryBase> >(varName);
    }
Example #3
0
  void PatternFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::DeclareInput(Level& fineLevel, Level& coarseLevel) const {
    Input(coarseLevel, "P");

    const ParameterList& pL = GetParameterList();
    if (pL.get<int>("emin: pattern order") > 0)
      Input(fineLevel, "A");
  }
      //! Print the object with some verbosity level to an FancyOStream object.
      void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel = verbLevel_default) const {
        int vl = (verbLevel == Teuchos::VERB_DEFAULT) ? Teuchos::VERB_LOW : verbLevel;
        if (vl == Teuchos::VERB_NONE)
          return;

        if (vl == Teuchos::VERB_LOW)
          out << description() << std::endl;
        else
          out << Teuchos::Describable::description() << std::endl;

        if (vl == Teuchos:: VERB_MEDIUM || vl == Teuchos::VERB_HIGH || vl == Teuchos::VERB_EXTREME) {
          Teuchos::OSTab tab1(out);

          const Teuchos::ParameterList& paramList = GetParameterList();
          std::string matrixType = paramList.get<std::string>("matrixType");
          GO nx = paramList.get<GO>("nx");
          GO ny = paramList.get<GO>("ny");
          GO nz = paramList.get<GO>("nz");

          out << "Matrix type: "  << matrixType << std::endl
              << "Problem size: " << GetNumGlobalElements();

          if      (matrixType == "Laplace2D" || matrixType == "Elasticity2D" || matrixType == "Helmholtz2D")  out << " (" << nx << "x" << ny << ")";
          else if (matrixType == "Laplace3D" || matrixType == "Elasticity3D" || matrixType == "Helmholtz3D")  out << " (" << nx << "x" << ny << "x" << nz << ")";

          out << std::endl;
        }
      }
Example #5
0
  void MultiVectorTransferFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::DeclareInput(Level &fineLevel, Level &coarseLevel) const {
    const ParameterList & pL = GetParameterList();
    std::string vectorName   = pL.get<std::string>("Vector name");

    fineLevel.DeclareInput(vectorName, GetFactory("Vector factory").get(), this);
    Input(coarseLevel, "R");
  }
void PermutationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level & currentLevel) const {
  FactoryMonitor m(*this, "Permutation Factory ", currentLevel);

  Teuchos::RCP<Matrix> A = Get< Teuchos::RCP<Matrix> > (currentLevel, "A");

  const ParameterList & pL = GetParameterList();
  std::string mapName                        = pL.get<std::string> ("PermutationRowMapName");
  Teuchos::RCP<const FactoryBase> mapFactory = GetFactory          ("PermutationRowMapFactory");

  Teuchos::RCP<const Map> permRowMap = Teuchos::null;
  if(mapName.length() > 0 ) {
    permRowMap = currentLevel.Get<RCP<const Map> >(mapName,mapFactory.get());
  } else {
    permRowMap = A->getRowMap(); // use full row map of A
  }

  std::string strStrategy = pL.get<std::string> ("PermutationStrategy");
  if( strStrategy == "Algebraic" ) {
    Teuchos::RCP<AlgebraicPermutationStrategy> permStrat = Teuchos::rcp(new AlgebraicPermutationStrategy());
    permStrat->BuildPermutation(A,permRowMap,currentLevel,this);
  } else if( strStrategy == "Local" ) {
    Teuchos::RCP<LocalPermutationStrategy> permStrat = Teuchos::rcp(new LocalPermutationStrategy());
    permStrat->BuildPermutation(A,permRowMap,currentLevel,this);
  } else
    TEUCHOS_TEST_FOR_EXCEPTION(true,
                                  std::logic_error,
                                  "`PermutationStrategy' has incorrect value (" << strStrategy << ") in input to PermutationFactory."
                                  << "Check the documentation for a list of valid choices");

  GetOStream(Runtime0, 0) << "Using " << strStrategy << " permutation strategy." << std::endl;

}
  void RebalanceMapFactory<LocalOrdinal, GlobalOrdinal, Node>::DeclareInput(Level & currentLevel) const {
    const Teuchos::ParameterList & pL = GetParameterList();
    std::string mapName                        = pL.get<std::string> ("Map name");
    Teuchos::RCP<const FactoryBase> mapFactory = GetFactory          ("Map factory");
    currentLevel.DeclareInput(mapName,mapFactory.get(),this);

    Input(currentLevel, "Importer");
  } //DeclareInput()
  void RebalanceMapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build(Level &level) const {
    FactoryMonitor m(*this, "Build", level);

    //Teuchos::RCP<Teuchos::FancyOStream> fos = Teuchos::getFancyOStream(Teuchos::rcpFromRef(std::cout));

    // extract data from Level object
    const Teuchos::ParameterList & pL = GetParameterList();
    std::string mapName = pL.get<std::string> ("Map name");
    Teuchos::RCP<const FactoryBase> mapFactory = GetFactory ("Map factory");

    RCP<const Import> rebalanceImporter = Get<RCP<const Import> >(level, "Importer");

    if(rebalanceImporter != Teuchos::null) {
      // input map (not rebalanced)
      RCP<const Map> map = level.Get< RCP<const Map> >(mapName,mapFactory.get());

      // create vector based on input map
      // Note, that the map can be a part only of the full map stored in rebalanceImporter.getSourceMap()
      RCP<Vector> v = VectorFactory::Build(map);
      v->putScalar(1.0);

      // create a new vector based on the full rebalanceImporter.getSourceMap()
      // import the partial map information to the full source map
      RCP<const Import> blowUpImporter = ImportFactory::Build(map, rebalanceImporter->getSourceMap());
      RCP<Vector> pv = VectorFactory::Build(rebalanceImporter->getSourceMap());
      pv->doImport(*v,*blowUpImporter,Xpetra::INSERT);

      // do rebalancing using rebalanceImporter
      RCP<Vector> ptv = VectorFactory::Build(rebalanceImporter->getTargetMap());
      ptv->doImport(*pv,*rebalanceImporter,Xpetra::INSERT);

      if (pL.get<bool>("repartition: use subcommunicators") == true)
        ptv->replaceMap(ptv->getMap()->removeEmptyProcesses());

      // reconstruct rebalanced partial map
      Teuchos::ArrayRCP< const Scalar > ptvData = ptv->getData(0);
      std::vector<GlobalOrdinal> localGIDs;  // vector with GIDs that are stored on current proc

      for (size_t k = 0; k < ptv->getLocalLength(); k++) {
        if(ptvData[k] == 1.0) {
          localGIDs.push_back(ptv->getMap()->getGlobalElement(k));
        }
      }

      const Teuchos::ArrayView<const GlobalOrdinal> localGIDs_view(&localGIDs[0],localGIDs.size());

      Teuchos::RCP<const Map> localGIDsMap = MapFactory::Build(
          map->lib(),
          Teuchos::OrdinalTraits<int>::invalid(),
          localGIDs_view,
          0, ptv->getMap()->getComm());  // use correct communicator here!

      // store rebalanced partial map using the same name and generating factory as the original map
      // in the level class
      level.Set(mapName, localGIDsMap, mapFactory.get());
    }
  } //Build()
Example #9
0
void UserAggregationFactory<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &currentLevel) const {
  FactoryMonitor m(*this, "Build", currentLevel);

  const ParameterList & pL = GetParameterList();

  RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
  const int myRank = comm->getRank();

  std::string fileName = pL.get<std::string>("filePrefix") + toString(currentLevel.GetLevelID()) + "_" + toString(myRank) + "." + pL.get<std::string>("fileExt");
  std::ifstream ifs(fileName.c_str());
  if (!ifs.good())
    throw Exceptions::RuntimeError("Cannot read data from \"" + fileName + "\"");

  LO numVertices, numAggregates;
  ifs >> numVertices >> numAggregates;

  // FIXME: what is the map?
  Xpetra::UnderlyingLib  lib       = Xpetra::UseEpetra;
  const int              indexBase = 0;
  RCP<Map> map = MapFactory::Build(lib, numVertices, indexBase, comm);

  RCP<Aggregates> aggregates = rcp(new Aggregates(map));
  aggregates->setObjectLabel("User");

  aggregates->SetNumAggregates(numAggregates);

  Teuchos::ArrayRCP<LO> vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0);
  Teuchos::ArrayRCP<LO> procWinner   = aggregates->GetProcWinner()  ->getDataNonConst(0);

  for (LO i = 0; i < numAggregates; i++) {
    int aggSize = 0;
    ifs >> aggSize;

    std::vector<LO> list(aggSize);
    for (int k = 0; k < aggSize; k++) {
      // FIXME: File contains GIDs, we need LIDs
      // for now, works on a single processor
      ifs >> list[k];
    }

    // Mark first node as root node for the aggregate
    aggregates->SetIsRoot(list[0]);

    // Fill vertex2AggId and procWinner structure with information
    for (int k = 0; k < aggSize; k++) {
      vertex2AggId[list[k]] = i;
      procWinner  [list[k]] = myRank;
    }
  }

  // FIXME: do the proper check whether aggregates cross interprocessor boundary
  aggregates->AggregatesCrossProcessors(false);

  Set(currentLevel, "Aggregates", aggregates);

  GetOStream(Statistics0, 0) << aggregates->description() << std::endl;
}
 void ToggleCoordinatesTransferFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::DeclareInput(Level& fineLevel, Level& coarseLevel) const {
   const ParameterList& pL = GetParameterList();
   TEUCHOS_TEST_FOR_EXCEPTION(!pL.isParameter("Chosen P"), Exceptions::RuntimeError, "MueLu::ToggleCoordinatesTransferFactory::DeclareInput: You have to set the 'Chosen P' parameter to a factory name of type TogglePFactory. The ToggleCoordinatesTransferFactory must be used together with a TogglePFactory!");
   Input(coarseLevel,"Chosen P");
   for (std::vector<RCP<const FactoryBase> >::const_iterator it = coordFacts_.begin(); it != coordFacts_.end(); ++it) {
     coarseLevel.DeclareInput("Coordinates", (*it).get(), this);  // request/release coarse coordinates
     (*it)->CallDeclareInput(coarseLevel); // request dependencies
   }
   hasDeclaredInput_ = true;
 }
void PermutationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::DeclareInput(Level &currentLevel) const {
  Input(currentLevel, "A");

  const ParameterList & pL = GetParameterList();
  std::string mapName                        = pL.get<std::string> ("PermutationRowMapName");
  Teuchos::RCP<const FactoryBase> mapFactory = GetFactory          ("PermutationRowMapFactory");

  if(mapName.length() > 0 ) {
    currentLevel.DeclareInput(mapName,mapFactory.get(),this);
  }
}
Example #12
0
  void EminPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::BuildP(Level& fineLevel, Level& coarseLevel) const {
    FactoryMonitor m(*this, "Prolongator minimization", coarseLevel);

    const ParameterList & pL = GetParameterList();

    // Set keep flags
    if (pL.isParameter("Keep P0") && pL.get<bool>("Keep P0"))
      coarseLevel.Keep("P0",this);
    if (pL.isParameter("Keep Constraint0") && pL.get<bool>("Keep Constraint0"))
      coarseLevel.Keep("Constraint0",this);

    // Reuse
    int Niterations;

    // Get A, B
    RCP<Matrix>      A = Get< RCP<Matrix> >     (fineLevel,   "A");
    RCP<MultiVector> B = Get< RCP<MultiVector> >(fineLevel,   "Nullspace");

    // Get P0 or make P
    RCP<Matrix>      P0;
    if (coarseLevel.IsAvailable("P0", this)) {
      P0          = coarseLevel.Get<RCP<Matrix> >("P0", this);
      Niterations = pL.get<int>("Reuse Niterations");
      GetOStream(Runtime0, 0) << "EminPFactory: Reusing P0"<<std::endl;

    } else {
      P0          = Get< RCP<Matrix> >(coarseLevel, "P");
      Niterations = pL.get<int>("Niterations");
    }

    // Get Constraint0 or make Constraint
    RCP<Constraint> X;
    if (coarseLevel.IsAvailable("Constraint0", this)) {
      X = coarseLevel.Get<RCP<Constraint> >("Constraint0", this);
      GetOStream(Runtime0, 0) << "EminPFactory: Reusing Constraint0"<<std::endl;

    } else {
      X = Get< RCP<Constraint> > (coarseLevel, "Constraint");
    }


    RCP<Matrix> P;
    CGSolver EminSolver(Niterations);
    EminSolver.Iterate(*A, *X, *P0, *B, P);

    Set(coarseLevel, "Constraint0", X);
    Set(coarseLevel, "P",           P);
    Set(coarseLevel, "P0",          P);

    RCP<ParameterList> params = rcp(new ParameterList());
    params->set("printLoadBalancingInfo", true);
    GetOStream(Statistics0,0) << Utils::PrintMatrixInfo(*P, "P", params);
  }
  void UncoupledAggregationFactory<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::DeclareInput(Level& currentLevel) const {
    Input(currentLevel, "Graph");
    Input(currentLevel, "DofsPerNode");

    const ParameterList& pL = GetParameterList();

    std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name");
    if (mapOnePtName.length() > 0) {
      RCP<const FactoryBase> mapOnePtFact = GetFactory("OnePt aggregate map factory");
      currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get());
    }
  }
  void BlockedRAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::DeclareInput(Level &fineLevel, Level &coarseLevel) const {
    const Teuchos::ParameterList& pL = GetParameterList();
    if (pL.get<bool>("transpose: use implicit") == false)
      Input(coarseLevel, "R");

    Input(fineLevel,   "A");
    Input(coarseLevel, "P");

    // call DeclareInput of all user-given transfer factories
    for (std::vector<RCP<const FactoryBase> >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it)
      (*it)->CallDeclareInput(coarseLevel);
  }
  void TogglePFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& fineLevel, Level &coarseLevel) const {
    FactoryMonitor m(*this, "Prolongator toggle", coarseLevel);
    std::ostringstream levelstr;
    levelstr << coarseLevel.GetLevelID();

    typedef typename Teuchos::ScalarTraits<SC>::magnitudeType Magnitude;

    TEUCHOS_TEST_FOR_EXCEPTION(nspFacts_.size() != prolongatorFacts_.size(), Exceptions::RuntimeError, "MueLu::TogglePFactory::Build: The number of provided prolongator factories and coarse nullspace factories must be identical.");
    TEUCHOS_TEST_FOR_EXCEPTION(nspFacts_.size() != 2, Exceptions::RuntimeError, "MueLu::TogglePFactory::Build: TogglePFactory needs two different transfer operator strategies for toggling."); // TODO adapt this/weaken this as soon as other toggling strategies are introduced.

    // decision routine which prolongator factory to be used
    int nProlongatorFactory = 0; // default behavior: use first prolongator in list

    // extract user parameters
    const Teuchos::ParameterList & pL = GetParameterList();
    std::string mode = Teuchos::as<std::string>(pL.get<std::string>("toggle: mode"));
    int semicoarsen_levels = Teuchos::as<int>(pL.get<int>("semicoarsen: number of levels"));

    TEUCHOS_TEST_FOR_EXCEPTION(mode!="semicoarsen", Exceptions::RuntimeError, "MueLu::TogglePFactory::Build: The 'toggle: mode' parameter must be set to 'semicoarsen'. No other mode supported, yet.");

    LO NumZDir = -1;
    if(fineLevel.IsAvailable("NumZLayers", NoFactory::get())) {
      NumZDir = fineLevel.Get<LO>("NumZLayers", NoFactory::get()); //obtain info
      GetOStream(Runtime1) << "Number of layers for semicoarsening: " << NumZDir << std::endl;
    }

    // Make a decision which prolongator to be used.
    if(fineLevel.GetLevelID() >= semicoarsen_levels || NumZDir == 1) {
      nProlongatorFactory = 1;
    } else {
      nProlongatorFactory = 0;
    }

    RCP<Matrix> P = Teuchos::null;
    RCP<MultiVector> coarseNullspace = Teuchos::null;

    // call Build for selected transfer operator
    GetOStream(Runtime0) << "TogglePFactory: call transfer factory: " << (prolongatorFacts_[nProlongatorFactory])->description() << std::endl;
    prolongatorFacts_[nProlongatorFactory]->CallBuild(coarseLevel);
    P = coarseLevel.Get< RCP<Matrix> >("P", (prolongatorFacts_[nProlongatorFactory]).get());
    coarseNullspace = coarseLevel.Get< RCP<MultiVector> >("Nullspace", (nspFacts_[nProlongatorFactory]).get());

    // Release dependencies of all prolongator and coarse level null spaces
    for(size_t t=0; t<nspFacts_.size(); ++t) {
      coarseLevel.Release(*(prolongatorFacts_[t]));
      coarseLevel.Release(*(nspFacts_[t]));
    }

    // store prolongator with this factory identification.
    Set(coarseLevel, "P", P);
    Set(coarseLevel, "Nullspace", coarseNullspace);

  } //Build()
Example #16
0
  void UserPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::BuildP(Level& fineLevel, Level& coarseLevel) const {
    FactoryMonitor m(*this, "Build", coarseLevel);

    RCP<Matrix>      A             = Get< RCP<Matrix> >      (fineLevel, "A");
    RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");

    TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() != 1, Exceptions::RuntimeError, "Block size > 1 has not been implemented");

    const Teuchos::ParameterList& pL = GetParameterList();

    std::string    mapFile   = pL.get<std::string>("mapFileName");
    RCP<const Map> rowMap    = A->getRowMap();
    RCP<const Map> coarseMap = Utils2::ReadMap(mapFile, rowMap->lib(), rowMap->getComm());
    Set(coarseLevel, "CoarseMap", coarseMap);

    std::string matrixFile = pL.get<std::string>("matrixFileName");
    RCP<Matrix> P          = Utils::Read(matrixFile, rowMap, coarseMap, coarseMap, rowMap);
#if 1
    Set(coarseLevel, "P", P);
#else
    // Expand column map by 1
    RCP<Matrix> P1 = Utils::Multiply(*A, false, *P, false);
    P = Utils::Read(matrixFile, rowMap, P1->getColMap(), coarseMap, rowMap);
    Set(coarseLevel, "P", P);
#endif

    RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
    P->apply(*fineNullspace, *coarseNullspace, Teuchos::TRANS, Teuchos::ScalarTraits<SC>::one(), Teuchos::ScalarTraits<SC>::zero());
    Set(coarseLevel, "Nullspace", coarseNullspace);

    // Coordinates transfer
    size_t n = Teuchos::as<size_t>(sqrt(coarseMap->getGlobalNumElements()));
    TEUCHOS_TEST_FOR_EXCEPTION(n*n != coarseMap->getGlobalNumElements(), Exceptions::RuntimeError, "Unfortunately, this is not the case, don't know what to do");

    RCP<MultiVector> coarseCoords = MultiVectorFactory::Build(coarseMap, 2);
    ArrayRCP<Scalar> x = coarseCoords->getDataNonConst(0), y = coarseCoords->getDataNonConst(1);
    for (size_t LID = 0; LID < coarseMap->getNodeNumElements(); ++LID) {
      GlobalOrdinal GID = coarseMap->getGlobalElement(LID) - coarseMap->getIndexBase();
      GlobalOrdinal i = GID % n, j = GID/n;
      x[LID] = i;
      y[LID] = j;
    }
    Set(coarseLevel, "Coordinates", coarseCoords);

    if (IsPrint(Statistics1)) {
      RCP<ParameterList> params = rcp(new ParameterList());
      params->set("printLoadBalancingInfo", true);
      GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*P, "P", params);
    }
  }
  void RebalanceTransferFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::DeclareInput(Level& fineLevel, Level& coarseLevel) const {
    const ParameterList& pL = GetParameterList();

    if (pL.get<std::string>("type") == "Interpolation") {
      Input(coarseLevel, "P");
      Input(coarseLevel, "Nullspace");
      if (pL.get< RCP<const FactoryBase> >("Coordinates") != Teuchos::null)
        Input(coarseLevel, "Coordinates");

    } else {
      if (pL.get<bool>("transpose: use implicit") == false)
        Input(coarseLevel, "R");
    }

    Input(coarseLevel, "Importer");
  }
    //!
    virtual void CallBuild(Level& requestedLevel) const {
      int levelID = requestedLevel.GetLevelID();

#ifdef HAVE_MUELU_DEBUG
      // We cannot call Build method twice for the same level, but we can call it multiple times for different levels
      TEUCHOS_TEST_FOR_EXCEPTION((multipleCallCheck_ == ENABLED) && (multipleCallCheckGlobal_ == ENABLED) && (lastLevelID_ == levelID),
                                 Exceptions::RuntimeError,
                                 this->ShortClassName() << "::Build() called twice for the same level (levelID=" << levelID
                                 << "). This is likely due to a configuration error.");
      if (multipleCallCheck_ == FIRSTCALL)
        multipleCallCheck_ = ENABLED;

      lastLevelID_ = levelID;
#endif
      TEUCHOS_TEST_FOR_EXCEPTION(requestedLevel.GetPreviousLevel() == Teuchos::null, Exceptions::RuntimeError, "LevelID = " << levelID);

#ifdef HAVE_MUELU_TIMER_SYNCHRONIZATION
      RCP<const Teuchos::Comm<int> > comm = requestedLevel.GetComm();
      if (comm.is_null()) {
        // Some factories are called before we constructed Ac, and therefore,
        // before we set the level communicator. For such factories we can get
        // the comm from the previous level, as all processes go there
        RCP<Level>& prevLevel = requestedLevel.GetPreviousLevel();
        if (!prevLevel.is_null())
          comm = prevLevel->GetComm();
      }

      // Synchronization timer
      std::string syncTimer = this->ShortClassName() + ": Build sync (level=" + toString(requestedLevel.GetLevelID()) + ")";
      if (!comm.is_null()) {
        TimeMonitor timer(*this, syncTimer);
        comm->barrier();
      }
#endif

      Build(*requestedLevel.GetPreviousLevel(), requestedLevel);

#ifdef HAVE_MUELU_TIMER_SYNCHRONIZATION
      // Synchronization timer
      if (!comm.is_null()) {
        TimeMonitor timer(*this, syncTimer);
        comm->barrier();
      }
#endif

      GetOStream(Test) << *RemoveFactoriesFromList(GetParameterList()) << std::endl;
    }
  void UncoupledAggregationFactory_kokkos<LocalOrdinal, GlobalOrdinal, Node>::DeclareInput(Level& currentLevel) const {
    Input(currentLevel, "Graph");
    Input(currentLevel, "DofsPerNode");

    const ParameterList& pL = GetParameterList();

    // request special data necessary for OnePtAggregationAlgorithm
    std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name");
    if (mapOnePtName.length() > 0) {
      std::string mapOnePtFactName = pL.get<std::string>("OnePt aggregate map factory");
      if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") {
        currentLevel.DeclareInput(mapOnePtName, NoFactory::get());
      } else {
        RCP<const FactoryBase> mapOnePtFact = GetFactory(mapOnePtFactName);
        currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get());
      }
    }
  }
Example #20
0
  void RebalanceAcFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &fineLevel, Level &coarseLevel) const {
    FactoryMonitor m(*this, "Computing Ac", coarseLevel);

    RCP<Matrix> originalAc = Get< RCP<Matrix> >(coarseLevel, "A");

    RCP<const Import> rebalanceImporter = Get< RCP<const Import> >(coarseLevel, "Importer");

    if (rebalanceImporter != Teuchos::null) {
      RCP<Matrix> rebalancedAc;
      {
        SubFactoryMonitor subM(*this, "Rebalancing existing Ac", coarseLevel);
        RCP<const Map> targetMap = rebalanceImporter->getTargetMap();

        const ParameterList & pL = GetParameterList();

        ParameterList XpetraList;
        if (pL.get<bool>("useSubcomm") == true) {
          GetOStream(Runtime0,0) << "Replacing maps with a subcommunicator" << std::endl;
          XpetraList.set("Restrict Communicator",true);
        }
        // NOTE: If the communicator is restricted away, Build returns Teuchos::null.
        rebalancedAc = MatrixFactory::Build(originalAc, *rebalanceImporter, targetMap, targetMap, rcp(&XpetraList,false));

        if (!rebalancedAc.is_null())
          rebalancedAc->SetFixedBlockSize(originalAc->GetFixedBlockSize());

        Set(coarseLevel, "A", rebalancedAc);
      }

      if (!rebalancedAc.is_null()) {
        RCP<ParameterList> params = rcp(new ParameterList());
        params->set("printLoadBalancingInfo", true);
        GetOStream(Statistics0, 0) << Utils::PrintMatrixInfo(*rebalancedAc, "Ac (rebalanced)", params);
      }

    } else {
      // Ac already built by the load balancing process and no load balancing needed
      GetOStream(Warnings0, 0) << "No rebalancing" << std::endl;
      GetOStream(Warnings0, 0) << "Jamming A into Level " << coarseLevel.GetLevelID() << " w/ generating factory "
                               << this << std::endl;
      Set(coarseLevel, "A", originalAc);
    }

  } //Build()
Example #21
0
  void PatternFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level& fineLevel, Level& coarseLevel) const {
    FactoryMonitor m(*this, "Ppattern", coarseLevel);

    RCP<Matrix> P = Get< RCP<Matrix> >(coarseLevel, "P");

    const ParameterList& pL = GetParameterList();
    int k = pL.get<int>("emin: pattern order");

    if (k > 0) {
      RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel, "A");
      RCP<Matrix> AP;

      bool doFillComplete  = true;
      bool optimizeStorage = true;

      for (int i = 0; i < k; i++) {
        AP = Utils::Multiply(*A, false, *P, false, GetOStream(Statistics2), doFillComplete, optimizeStorage);
        P.swap(AP);
      }
    }

    Set(coarseLevel, "Ppattern", P->getCrsGraph());
  }
      GO GetNumGlobalElements() const {
        const Teuchos::ParameterList& pL = GetParameterList();

        const std::string& matrixType = pL.get<std::string>("matrixType");
        const GO nx = pL.get<GO>("nx");
        const GO ny = pL.get<GO>("ny");
        const GO nz = pL.get<GO>("nz");

        GO numGlobalElements = -1;
        if (matrixType == "ADR1D")
          numGlobalElements = nx;

        else if (matrixType == "ADR2D")
          numGlobalElements = nx*ny;

        else if (matrixType == "ADR3D")
          numGlobalElements = nx*ny*nz;

        TEUCHOS_TEST_FOR_EXCEPTION(numGlobalElements < 0, std::runtime_error,
                                   "Gallery: numGlobalElements < 0 (did you forget --ny (or --nz) for 2D (3D) problems?)");

        return numGlobalElements;
      }
Example #23
0
  void MultiVectorTransferFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level & fineLevel, Level &coarseLevel) const {
    FactoryMonitor m(*this, "Build", coarseLevel);

    const ParameterList & pL = GetParameterList();
    std::string vectorName   = pL.get<std::string>("Vector name");

    RCP<MultiVector> fineVector = fineLevel.Get< RCP<MultiVector> >(vectorName, GetFactory("Vector factory").get());
    RCP<Matrix>      transferOp = Get<RCP<Matrix> >(coarseLevel, "R");

    RCP<MultiVector> coarseVector = MultiVectorFactory::Build(transferOp->getRangeMap(), fineVector->getNumVectors());
    GetOStream(Runtime0, 0) << "Transferring multivector \"" << vectorName << "\"" << std::endl;

    RCP<MultiVector> onesVector = MultiVectorFactory::Build(transferOp->getDomainMap(), 1);
    onesVector->putScalar(Teuchos::ScalarTraits<Scalar>::one());
    RCP<MultiVector> rowSumVector = MultiVectorFactory::Build(transferOp->getRangeMap(), 1);
    transferOp->apply(*onesVector, *rowSumVector);
    transferOp->apply(*fineVector, *coarseVector);

    if (vectorName == "Coordinates")
      TEUCHOS_TEST_FOR_EXCEPTION(true,Exceptions::RuntimeError,"Use CoordinatesTransferFactory to transfer coordinates instead of MultiVectorTransferFactory.");

    Set<RCP<MultiVector> >(coarseLevel, vectorName, coarseVector);

  } // Build
  bool SubBlockAFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::CheckForUserSpecifiedBlockInfo(bool bRange, std::vector<size_t>& stridingInfo, LocalOrdinal& stridedBlockId) const {
    const ParameterList & pL = GetParameterList();

    if(bRange == true)
      stridedBlockId = pL.get<LocalOrdinal>("Range map: Strided block id");
    else
      stridedBlockId = pL.get<LocalOrdinal>("Domain map: Strided block id");

    // read in stridingInfo from parameter list and fill the internal member variable
    // read the data only if the parameter "Striding info" exists and is non-empty
    std::string str;
    if(bRange == true) str = std::string("Range map: Striding info");
    else               str = std::string("Domain map: Striding info");
    if(pL.isParameter(str)) {
      std::string strStridingInfo = pL.get<std::string>(str);
      if(strStridingInfo.empty() == false) {
        Teuchos::Array<size_t> arrayVal = Teuchos::fromStringToArray<size_t>(strStridingInfo);
        stridingInfo = Teuchos::createVector(arrayVal);
      }
    }

    if(stridingInfo.size() > 0) return true;
    return false;
  }
  void SaPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BuildP(Level &fineLevel, Level &coarseLevel) const {
    FactoryMonitor m(*this, "Prolongator smoothing", coarseLevel);
    std::ostringstream levelstr;
    levelstr << coarseLevel.GetLevelID();

    typedef typename Teuchos::ScalarTraits<SC>::magnitudeType Magnitude;

    // Get default tentative prolongator factory
    // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory.
    // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere!
    RCP<const FactoryBase> initialPFact = GetFactory("P");
    if (initialPFact == Teuchos::null) { initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); }

    // Level Get
    RCP<Matrix> A     = Get< RCP<Matrix> >(fineLevel, "A");
    RCP<Matrix> Ptent = coarseLevel.Get< RCP<Matrix> >("P", initialPFact.get());

    if(restrictionMode_) {
      SubFactoryMonitor m2(*this, "Transpose A", coarseLevel);
      A = Utils2::Transpose(*A, true); // build transpose of A explicitely
    }

    //Build final prolongator
    RCP<Matrix> finalP; // output

    const ParameterList & pL = GetParameterList();
    Scalar dampingFactor = as<Scalar>(pL.get<double>("sa: damping factor"));
    LO maxEigenIterations = as<LO>(pL.get<int>("sa: eigenvalue estimate num iterations"));
    bool estimateMaxEigen = pL.get<bool>("sa: calculate eigenvalue estimate");
    if (dampingFactor != Teuchos::ScalarTraits<Scalar>::zero()) {

      Scalar lambdaMax;
      {
        SubFactoryMonitor m2(*this, "Eigenvalue estimate", coarseLevel);
        lambdaMax = A->GetMaxEigenvalueEstimate();
        if (lambdaMax == -Teuchos::ScalarTraits<SC>::one() || estimateMaxEigen) {
          GetOStream(Statistics1) << "Calculating max eigenvalue estimate now (max iters = "<< maxEigenIterations << ")" << std::endl;
          Magnitude stopTol = 1e-4;
          lambdaMax = Utils::PowerMethod(*A, true, maxEigenIterations, stopTol);
          A->SetMaxEigenvalueEstimate(lambdaMax);
        } else {
          GetOStream(Statistics1) << "Using cached max eigenvalue estimate" << std::endl;
        }
        GetOStream(Statistics0) << "Prolongator damping factor = " << dampingFactor/lambdaMax << " (" << dampingFactor << " / " << lambdaMax << ")" << std::endl;
      }

      {
        SubFactoryMonitor m2(*this, "Fused (I-omega*D^{-1} A)*Ptent", coarseLevel);
        Teuchos::RCP<Vector> invDiag = Utils::GetMatrixDiagonalInverse(*A);

        SC omega = dampingFactor / lambdaMax;

        // finalP = Ptent + (I - \omega D^{-1}A) Ptent
        finalP = Utils::Jacobi(omega, *invDiag, *A, *Ptent, finalP, GetOStream(Statistics2),std::string("MueLu::SaP-")+levelstr.str());
      }

    } else {
      finalP = Ptent;
    }

    // Level Set
    if (!restrictionMode_) {
      // prolongation factory is in prolongation mode
      Set(coarseLevel, "P", finalP);

      // NOTE: EXPERIMENTAL
      if (Ptent->IsView("stridedMaps"))
        finalP->CreateView("stridedMaps", Ptent);

    } else {
      // prolongation factory is in restriction mode
      RCP<Matrix> R = Utils2::Transpose(*finalP, true); // use Utils2 -> specialization for double
      Set(coarseLevel, "R", R);

      // NOTE: EXPERIMENTAL
      if (Ptent->IsView("stridedMaps"))
        R->CreateView("stridedMaps", Ptent, true);
    }

    if (IsPrint(Statistics1)) {
      RCP<ParameterList> params = rcp(new ParameterList());
      params->set("printLoadBalancingInfo", true);
      params->set("printCommInfo",          true);
      GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*finalP, (!restrictionMode_ ? "P" : "R"), params);
    }

  } //Build()
Example #26
0
  void RAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &fineLevel, Level &coarseLevel) const { // FIXME make fineLevel const
    {
      FactoryMonitor m(*this, "Computing Ac", coarseLevel);

      // Set "Keeps" from params
      const Teuchos::ParameterList& pL = GetParameterList();
      if (pL.isParameter("Keep AP Pattern")  && pL.get<bool>("Keep AP Pattern"))
        coarseLevel.Keep("AP Pattern",  this);
      if (pL.isParameter("Keep RAP Pattern") && pL.get<bool>("Keep RAP Pattern"))
        coarseLevel.Keep("RAP Pattern", this);

      //
      // Inputs: A, P
      //

      RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel,   "A");
      RCP<Matrix> P = Get< RCP<Matrix> >(coarseLevel, "P");

      //
      // Build Ac = RAP
      //

      RCP<Matrix> AP;

      // Reuse pattern if available (multiple solve)
      if (coarseLevel.IsAvailable("AP Pattern", this)){
        GetOStream(Runtime0, 0) << "Ac: Using previous AP pattern"<<std::endl;
        AP = Get< RCP<Matrix> >(coarseLevel, "AP Pattern");
      }

      {
        SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel);
        AP = Utils::Multiply(*A, false, *P, false, AP);
        Set(coarseLevel, "AP Pattern", AP);
      }

      bool doOptimizedStorage = !checkAc_; // Optimization storage option. If not modifying matrix later (inserting local values), allow optimization of storage.
                                           // This is necessary for new faster Epetra MM kernels.

      RCP<Matrix> Ac;

      // Reuse coarse matrix memory if available (multiple solve)
      if (coarseLevel.IsAvailable("RAP Pattern", this)) {
        GetOStream(Runtime0, 0) << "Ac: Using previous RAP pattern" << std::endl;
        Ac = Get< RCP<Matrix> >(coarseLevel, "RAP Pattern");
      }

      if (implicitTranspose_) {
        SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel);

        Ac = Utils::Multiply(*P, true, *AP, false, Ac, true, doOptimizedStorage);

      } else {

        SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel);

        RCP<Matrix> R = Get< RCP<Matrix> >(coarseLevel, "R");
        Ac = Utils::Multiply(*R, false, *AP, false, Ac, true, doOptimizedStorage);

      }

      if (checkAc_)
        CheckMainDiagonal(Ac);

      RCP<ParameterList> params = rcp(new ParameterList());;
      params->set("printLoadBalancingInfo", true);
      GetOStream(Statistics0, 0) << Utils::PrintMatrixInfo(*Ac, "Ac", params);

      Set(coarseLevel, "A",           Ac);
      Set(coarseLevel, "RAP Pattern", Ac);
    }

    if (transferFacts_.begin() != transferFacts_.end()) {
      SubFactoryMonitor m(*this, "Projections", coarseLevel);

      // call Build of all user-given transfer factories
      for (std::vector<RCP<const FactoryBase> >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) {
        GetOStream(Runtime0, 0) << "Ac: call transfer factory " << (*it).get() << ": " << (*it)->description() << std::endl;
        (*it)->CallBuild(coarseLevel);
      }
    }

  }
  void SubBlockAFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level & currentLevel) const {
    typedef Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> OMatrix; //TODO
    typedef Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> CrsMatrixClass; //TODO
    typedef Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> CrsMatrixWrapClass; //TODO
    typedef Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> BlockedCrsOMatrix; //TODO
    typedef Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> MapExtractorClass;

    const ParameterList & pL = GetParameterList();
    size_t row = Teuchos::as<size_t>(pL.get<int>("block row"));
    size_t col = Teuchos::as<size_t>(pL.get<int>("block col"));

    RCP<OMatrix> Ain = Teuchos::null;
    Ain = Get< RCP<OMatrix> >(currentLevel, "A");

    RCP<BlockedCrsOMatrix> bA = Teuchos::rcp_dynamic_cast<BlockedCrsOMatrix>(Ain);

    TEUCHOS_TEST_FOR_EXCEPTION(bA==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: input matrix A is not of type BlockedCrsMatrix! error.");
    TEUCHOS_TEST_FOR_EXCEPTION(row > bA->Rows(), Exceptions::RuntimeError, "MueLu::SubBlockAFactory::Build: A.Rows() > rows_! error.");
    TEUCHOS_TEST_FOR_EXCEPTION(col > bA->Cols(), Exceptions::RuntimeError, "MueLu::SubBlockAFactory::Build: A.Cols() > cols_! error.");

    Teuchos::RCP<CrsMatrixClass> A = bA->getMatrix(row, col);

    Teuchos::RCP<CrsMatrixWrapClass> Op = Teuchos::rcp(new CrsMatrixWrapClass(A));

    //////////////// EXPERIMENTAL
    // extract striding information from RangeMapExtractor

    Teuchos::RCP<const MapExtractorClass> rgMapExtractor = bA->getRangeMapExtractor();
    Teuchos::RCP<const MapExtractorClass> doMapExtractor = bA->getDomainMapExtractor();

    Teuchos::RCP<const Map> rgMap = rgMapExtractor->getMap(row);
    Teuchos::RCP<const Map> doMap = doMapExtractor->getMap(col);

    Teuchos::RCP<const StridedMap> srgMap = Teuchos::rcp_dynamic_cast<const StridedMap>(rgMap);
    Teuchos::RCP<const StridedMap> sdoMap = Teuchos::rcp_dynamic_cast<const StridedMap>(doMap);

    if(srgMap == Teuchos::null) {
      Teuchos::RCP<const Map> fullRgMap = rgMapExtractor->getFullMap();
      Teuchos::RCP<const StridedMap> sFullRgMap = Teuchos::rcp_dynamic_cast<const StridedMap>(fullRgMap);
      TEUCHOS_TEST_FOR_EXCEPTION(sFullRgMap==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: full rangeMap is not a strided map");
      std::vector<size_t> stridedData = sFullRgMap->getStridingData();
      if(stridedData.size() == 1 && row > 0) // we have block matrices. use striding block information 0
        srgMap = StridedMapFactory::Build(rgMap, stridedData, 0, sFullRgMap->getOffset());
      else // we have strided matrices. use striding information of the corresponding block
        srgMap = StridedMapFactory::Build(rgMap, stridedData, row, sFullRgMap->getOffset());
    }

    if(sdoMap == Teuchos::null) {
      Teuchos::RCP<const Map> fullDoMap = doMapExtractor->getFullMap();
      Teuchos::RCP<const StridedMap> sFullDoMap = Teuchos::rcp_dynamic_cast<const StridedMap>(fullDoMap);
      TEUCHOS_TEST_FOR_EXCEPTION(sFullDoMap==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: full domainMap is not a strided map");
      std::vector<size_t> stridedData2 = sFullDoMap->getStridingData();
      if(stridedData2.size() == 1 && col > 0) // we have block matrices. use striding block information 0
        sdoMap = StridedMapFactory::Build(doMap, stridedData2, 0, sFullDoMap->getOffset());
      else // we have strided matrices. use striding information of the corresponding block
        sdoMap = StridedMapFactory::Build(doMap, stridedData2, col, sFullDoMap->getOffset());
    }

    TEUCHOS_TEST_FOR_EXCEPTION(srgMap==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: rangeMap " << row << " is not a strided map");
    TEUCHOS_TEST_FOR_EXCEPTION(sdoMap==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: domainMap " << col << " is not a strided map");

    GetOStream(Statistics1) << "A(" << row << "," << col << ") has strided maps: range map fixed block size=" << srgMap->getFixedBlockSize() << " strided block id = " << srgMap->getStridedBlockId() << ", domain map fixed block size=" << sdoMap->getFixedBlockSize() << ", strided block id=" << sdoMap->getStridedBlockId() << std::endl;

    if(Op->IsView("stridedMaps") == true) Op->RemoveView("stridedMaps");
    Op->CreateView("stridedMaps", srgMap, sdoMap);
    TEUCHOS_TEST_FOR_EXCEPTION(Op->IsView("stridedMaps")==false, Exceptions::RuntimeError, "MueLu::SubBlockAFactory::Build: failed to set stridedMaps");

    //////////////// EXPERIMENTAL

    currentLevel.Set("A", Teuchos::rcp_dynamic_cast<OMatrix>(Op), this);
  }
  void RAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& fineLevel, Level& coarseLevel) const {
    {
      FactoryMonitor m(*this, "Computing Ac", coarseLevel);
      std::ostringstream levelstr;
      levelstr << coarseLevel.GetLevelID();

      TEUCHOS_TEST_FOR_EXCEPTION(hasDeclaredInput_==false, Exceptions::RuntimeError,
                                 "MueLu::RAPFactory::Build(): CallDeclareInput has not been called before Build!");

      // Set "Keeps" from params
      const Teuchos::ParameterList& pL = GetParameterList();
      if (pL.get<bool>("Keep AP Pattern"))
        coarseLevel.Keep("AP Pattern",  this);
      if (pL.get<bool>("Keep RAP Pattern"))
        coarseLevel.Keep("RAP Pattern", this);

      RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel,   "A");
      RCP<Matrix> P = Get< RCP<Matrix> >(coarseLevel, "P"), AP, Ac;

      // Reuse pattern if available (multiple solve)
      if (coarseLevel.IsAvailable("AP Pattern", this)) {
        GetOStream(Runtime0) << "Ac: Using previous AP pattern" << std::endl;

        AP = Get< RCP<Matrix> >(coarseLevel, "AP Pattern");
      }

      {
        SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel);

        AP = Utils::Multiply(*A, false, *P, false, AP, GetOStream(Statistics2),true,true,std::string("MueLu::A*P-")+levelstr.str());
      }
      if (pL.get<bool>("Keep AP Pattern"))
        Set(coarseLevel, "AP Pattern", AP);

      // Reuse coarse matrix memory if available (multiple solve)
      if (coarseLevel.IsAvailable("RAP Pattern", this)) {
        GetOStream(Runtime0) << "Ac: Using previous RAP pattern" << std::endl;

        Ac = Get< RCP<Matrix> >(coarseLevel, "RAP Pattern");
        // Some eigenvalue may have been cached with the matrix in the previous run.
        // As the matrix values will be updated, we need to reset the eigenvalue.
        Ac->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
      }

      // If we do not modify matrix later, allow optimization of storage.
      // This is necessary for new faster Epetra MM kernels.
      bool doOptimizeStorage = !pL.get<bool>("RepairMainDiagonal");

      const bool doTranspose    = true;
      const bool doFillComplete = true;
      if (pL.get<bool>("transpose: use implicit") == true) {
        SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel);
        Ac = Utils::Multiply(*P,  doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), doFillComplete, doOptimizeStorage,std::string("MueLu::R*(AP)-implicit-")+levelstr.str());

      } else {
        RCP<Matrix> R = Get< RCP<Matrix> >(coarseLevel, "R");

        SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel);
        Ac = Utils::Multiply(*R, !doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), doFillComplete, doOptimizeStorage,std::string("MueLu::R*(AP)-explicit-")+levelstr.str());
      }

      CheckRepairMainDiagonal(Ac);

      if (IsPrint(Statistics1)) {
        RCP<ParameterList> params = rcp(new ParameterList());;
        params->set("printLoadBalancingInfo", true);
        params->set("printCommInfo",          true);
        GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params);
      }

      Set(coarseLevel, "A",           Ac);
      if (pL.get<bool>("Keep RAP Pattern"))
        Set(coarseLevel, "RAP Pattern", Ac);
    }

    if (transferFacts_.begin() != transferFacts_.end()) {
      SubFactoryMonitor m(*this, "Projections", coarseLevel);

      // call Build of all user-given transfer factories
      for (std::vector<RCP<const FactoryBase> >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) {
        RCP<const FactoryBase> fac = *it;
        GetOStream(Runtime0) << "RAPFactory: call transfer factory: " << fac->description() << std::endl;
        fac->CallBuild(coarseLevel);
        // Coordinates transfer is marginally different from all other operations
        // because it is *optional*, and not required. For instance, we may need
        // coordinates only on level 4 if we start repartitioning from that level,
        // but we don't need them on level 1,2,3. As our current Hierarchy setup
        // assumes propagation of dependencies only through three levels, this
        // means that we need to rely on other methods to propagate optional data.
        //
        // The method currently used is through RAP transfer factories, which are
        // simply factories which are called at the end of RAP with a single goal:
        // transfer some fine data to coarser level. Because these factories are
        // kind of outside of the mainline factories, they behave different. In
        // particular, we call their Build method explicitly, rather than through
        // Get calls. This difference is significant, as the Get call is smart
        // enough to know when to release all factory dependencies, and Build is
        // dumb. This led to the following CoordinatesTransferFactory sequence:
        // 1. Request level 0
        // 2. Request level 1
        // 3. Request level 0
        // 4. Release level 0
        // 5. Release level 1
        //
        // The problem is missing "6. Release level 0". Because it was missing,
        // we had outstanding request on "Coordinates", "Aggregates" and
        // "CoarseMap" on level 0.
        //
        // This was fixed by explicitly calling Release on transfer factories in
        // RAPFactory. I am still unsure how exactly it works, but now we have
        // clear data requests for all levels.
        coarseLevel.Release(*fac);
      }
    }

  }
  void RAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::CheckRepairMainDiagonal(RCP<Matrix>& Ac) const {
    const Teuchos::ParameterList& pL = GetParameterList();
    bool repairZeroDiagonals = pL.get<bool>("RepairMainDiagonal");
    bool checkAc             = pL.get<bool>("CheckMainDiagonal");

    if (!checkAc && !repairZeroDiagonals)
      return;

    SC zero = Teuchos::ScalarTraits<SC>::zero(), one = Teuchos::ScalarTraits<SC>::one();

    Teuchos::RCP<Teuchos::ParameterList> p = Teuchos::rcp(new Teuchos::ParameterList());
    p->set("DoOptimizeStorage", true);

    RCP<const Map> rowMap = Ac->getRowMap();
    RCP<Vector> diagVec = VectorFactory::Build(rowMap);
    Ac->getLocalDiagCopy(*diagVec);

    LO lZeroDiags = 0;
    Teuchos::ArrayRCP< Scalar > diagVal = diagVec->getDataNonConst(0);

    for (size_t i = 0; i < rowMap->getNodeNumElements(); i++) {
      if (diagVal[i] == zero) {
        lZeroDiags++;
      }
    }
    GO gZeroDiags;
    MueLu_sumAll(rowMap->getComm(), Teuchos::as<GO>(lZeroDiags), gZeroDiags);

    if (repairZeroDiagonals && gZeroDiags > 0) {
      // TAW: If Ac has empty rows, put a 1 on the diagonal of Ac. Be aware that Ac might have empty rows AND columns.
      // The columns might not exist in the column map at all.
      //
      // It would be nice to add the entries to the original matrix Ac. But then we would have to use
      // insertLocalValues. However we cannot add new entries for local column indices that do not exist in the column map
      // of Ac (at least Epetra is not able to do this).
      //
      // Here we build a diagonal matrix with zeros on the diagonal and ones on the diagonal for the rows where Ac has empty rows
      // We have to build a new matrix to be able to use insertGlobalValues. Then we add the original matrix Ac to our new block
      // diagonal matrix and use the result as new (non-singular) matrix Ac.
      // This is very inefficient.
      //
      // If you know something better, please let me know.
      RCP<Matrix> fixDiagMatrix = Teuchos::null;
      fixDiagMatrix = MatrixFactory::Build(rowMap, 1);
      for (size_t r = 0; r < rowMap->getNodeNumElements(); r++) {
        if (diagVal[r] == zero) {
          GO grid = rowMap->getGlobalElement(r);
          Teuchos::ArrayRCP<GO> indout(1,grid);
          Teuchos::ArrayRCP<SC> valout(1, one);
          fixDiagMatrix->insertGlobalValues(grid,indout.view(0, 1), valout.view(0, 1));
        }
      }
      {
        Teuchos::TimeMonitor m1(*Teuchos::TimeMonitor::getNewTimer("CheckRepairMainDiagonal: fillComplete1"));
        Ac->fillComplete(p);
      }

      MueLu::Utils2<Scalar, LocalOrdinal, GlobalOrdinal, Node>::TwoMatrixAdd(*Ac, false, 1.0, *fixDiagMatrix, 1.0);
      if (Ac->IsView("stridedMaps"))
        fixDiagMatrix->CreateView("stridedMaps", Ac);

      Ac = Teuchos::null;     // free singular coarse level matrix
      Ac = fixDiagMatrix;     // set fixed non-singular coarse level matrix
    }

    // call fillComplete with optimized storage option set to true
    // This is necessary for new faster Epetra MM kernels.
    {
      Teuchos::TimeMonitor m1(*Teuchos::TimeMonitor::getNewTimer("CheckRepairMainDiagonal: fillComplete2"));
      Ac->fillComplete(p);
    }

    // print some output
    if (IsPrint(Warnings0))
      GetOStream(Warnings0) << "RAPFactory (WARNING): " << (repairZeroDiagonals ? "repaired " : "found ")
          << gZeroDiags << " zeros on main diagonal of Ac." << std::endl;

#ifdef HAVE_MUELU_DEBUG // only for debugging
    // check whether Ac has been repaired...
    Ac->getLocalDiagCopy(*diagVec);
    Teuchos::ArrayRCP< Scalar > diagVal2 = diagVec->getDataNonConst(0);
    for (size_t r = 0; r < Ac->getRowMap()->getNodeNumElements(); r++) {
      if (diagVal2[r] == zero) {
        GetOStream(Errors,-1) << "Error: there are zeros left on diagonal after repair..." << std::endl;
        break;
      }
    }
#endif
  }
  void BlockedRAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level &fineLevel, Level &coarseLevel) const {  //FIXME make fineLevel const!!
    FactoryMonitor m(*this, "Computing Ac (block)", coarseLevel);

    const Teuchos::ParameterList& pL = GetParameterList();

    RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel,   "A");
    RCP<Matrix> P = Get< RCP<Matrix> >(coarseLevel, "P");

    RCP<BlockedCrsMatrix> bA = rcp_dynamic_cast<BlockedCrsMatrix>(A);
    RCP<BlockedCrsMatrix> bP = rcp_dynamic_cast<BlockedCrsMatrix>(P);
    TEUCHOS_TEST_FOR_EXCEPTION(bA.is_null() || bP.is_null(), Exceptions::BadCast, "Matrices R, A and P must be of type BlockedCrsMatrix.");

    RCP<BlockedCrsMatrix> bAP;
    RCP<BlockedCrsMatrix> bAc;
    {
      SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel);

      // Triple matrix product for BlockedCrsMatrixClass
      TEUCHOS_TEST_FOR_EXCEPTION((bA->Cols() != bP->Rows()), Exceptions::BadCast,
                               "Block matrix dimensions do not match: "
                               "A is " << bA->Rows() << "x" << bA->Cols() <<
                               "P is " << bP->Rows() << "x" << bP->Cols());

      bAP = Utils::TwoMatrixMultiplyBlock(*bA, false, *bP,  false, GetOStream(Statistics2), true, true);
    }


    // If we do not modify matrix later, allow optimization of storage.
    // This is necessary for new faster Epetra MM kernels.
    bool doOptimizeStorage = !checkAc_;

    const bool doTranspose    = true;
    const bool doFillComplete = true;
    if (pL.get<bool>("transpose: use implicit") == true) {
      SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel);
      bAc = Utils::TwoMatrixMultiplyBlock(*bP,  doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), doFillComplete, doOptimizeStorage);

    } else {
      RCP<Matrix>           R  = Get< RCP<Matrix> >(coarseLevel, "R");
      RCP<BlockedCrsMatrix> bR = rcp_dynamic_cast<BlockedCrsMatrix>(R);
      TEUCHOS_TEST_FOR_EXCEPTION(bR.is_null(), Exceptions::BadCast, "Matrix R must be of type BlockedCrsMatrix.");

      TEUCHOS_TEST_FOR_EXCEPTION(bA->Rows() != bR->Cols(), Exceptions::BadCast,
                                 "Block matrix dimensions do not match: "
                                 "R is " << bR->Rows() << "x" << bR->Cols() <<
                                 "A is " << bA->Rows() << "x" << bA->Cols());

      SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel);
      bAc = Utils::TwoMatrixMultiplyBlock(*bR, !doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), doFillComplete, doOptimizeStorage);
    }


    if (checkAc_)
      CheckMainDiagonal(bAc);

    GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*bAc, "Ac (blocked)");

    // static int run = 1;
    // RCP<CrsMatrixWrap> A11 = rcp(new CrsMatrixWrap(bAc->getMatrix(0,0)));
    // Utils::Write(toString(run) + "_A_11.mm", *A11);
    // if (!bAc->getMatrix(1,1).is_null()) {
      // RCP<CrsMatrixWrap> A22 = rcp(new CrsMatrixWrap(bAc->getMatrix(1,1)));
      // Utils::Write(toString(run) + "_A_22.mm", *A22);
    // }
    // RCP<CrsMatrixWrap> Am = rcp(new CrsMatrixWrap(bAc->Merge()));
    // Utils::Write(toString(run) + "_A.mm", *Am);
    // run++;

    Set<RCP <Matrix> >(coarseLevel, "A", bAc);

    if (transferFacts_.begin() != transferFacts_.end()) {
      SubFactoryMonitor m1(*this, "Projections", coarseLevel);

      // call Build of all user-given transfer factories
      for (std::vector<RCP<const FactoryBase> >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) {
        RCP<const FactoryBase> fac = *it;
        GetOStream(Runtime0) << "BlockRAPFactory: call transfer factory: " << fac->description() << std::endl;
        fac->CallBuild(coarseLevel);
        // AP (11/11/13): I am not sure exactly why we need to call Release, but we do need it to get rid
        // of dangling data for CoordinatesTransferFactory
        coarseLevel.Release(*fac);
      }
    }
  }