コード例 #1
0
  void RAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::CheckMainDiagonal(RCP<Matrix> & Ac) const {
    // plausibility check: no zeros on diagonal
    RCP<Vector> diagVec = VectorFactory::Build(Ac->getRowMap());
    Ac->getLocalDiagCopy(*diagVec);

    SC zero = Teuchos::ScalarTraits<SC>::zero(), one = Teuchos::ScalarTraits<SC>::one();

    LO lZeroDiags = 0;
    Teuchos::ArrayRCP< Scalar > diagVal = diagVec->getDataNonConst(0);
    for (size_t r = 0; r < Ac->getRowMap()->getNodeNumElements(); r++) {
      if (diagVal[r] == zero) {
        lZeroDiags++;

        if (repairZeroDiagonals_) {
          GO grid = Ac->getRowMap()->getGlobalElement(r);
          LO lcid = Ac->getColMap()->getLocalElement(grid);
          Teuchos::ArrayRCP<LO> indout(1, lcid);
          Teuchos::ArrayRCP<SC> valout(1, one);

          Ac->insertLocalValues(r, indout.view(0, indout.size()), valout.view(0, valout.size()));
        }
      }
    }

    if (IsPrint(Warnings0)) {
      const RCP<const Teuchos::Comm<int> > & comm = Ac->getRowMap()->getComm();
      GO lZeroDiagsGO = Teuchos::as<GO>(lZeroDiags); /* LO->GO conversion */
      GO gZeroDiags   = 0;
      sumAll(comm, lZeroDiagsGO, gZeroDiags);
      if (repairZeroDiagonals_) GetOStream(Warnings0,0) << "RAPFactory (WARNING): repaired " << gZeroDiags << " zeros on main diagonal of Ac." << std::endl;
      else                      GetOStream(Warnings0,0) << "RAPFactory (WARNING): found "    << gZeroDiags << " zeros on main diagonal of Ac." << std::endl;
    }
  }
  void CloneRepartitionInterface<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level &currentLevel) const {
    FactoryMonitor m(*this, "Build", currentLevel);
    currentLevel.print(GetOStream(Statistics0,0));
    // extract blocked operator A from current level
    Teuchos::RCP<Matrix> A = Get< Teuchos::RCP<Matrix> >     (currentLevel, "A");
    Teuchos::RCP<const Teuchos::Comm< int > > comm = A->getRowMap()->getComm();

    // number of Partitions only used for a shortcut.
    GO numPartitions = 0;
    if (currentLevel.IsAvailable("number of partitions")) {
      numPartitions = currentLevel.Get<GO>("number of partitions");
      GetOStream(Warnings0) << "Using user-provided \"number of partitions\", the performance is unknown" << std::endl;

    }

    // ======================================================================================================
    // Construct decomposition vector
    // ======================================================================================================
    RCP<GOVector> decomposition = Teuchos::null;

    // extract decomposition vector
    decomposition = Get<RCP<GOVector> >(currentLevel, "Partition");
    ArrayRCP<const GO> decompEntries = decomposition->getData(0);

    if (decomposition.is_null()) {
      GetOStream(Warnings0) << "No repartitioning necessary: partitions were left unchanged by the repartitioner" << std::endl;
      Set<RCP<const Import> >(currentLevel, "Importer", Teuchos::null);
      return;
    }

    // create new decomposition vector
    Teuchos::RCP<GOVector> ret = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(A->getRowMap(), false);
    ArrayRCP<GO> retDecompEntries = ret->getDataNonConst(0);

    // block size of output vector
    LocalOrdinal blkSize = A->GetFixedBlockSize();

    // plausibility check!
    size_t inLocalLength  = decomposition->getLocalLength();
    size_t outLocalLength = A->getRowMap()->getNodeNumElements();

    size_t numLocalNodes = outLocalLength / blkSize;
    TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(outLocalLength  % blkSize) != 0, MueLu::Exceptions::RuntimeError,"CloneRepartitionInterface: inconsistent number of local DOFs (" << outLocalLength << ") and degrees of freedoms ("<<blkSize<<")");

    if (numLocalNodes > 0) {
      TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(inLocalLength  % numLocalNodes) != 0, MueLu::Exceptions::RuntimeError,"CloneRepartitionInterface: inconsistent number of local DOFs (" << inLocalLength << ") and number of local nodes (" << numLocalNodes << ")");
      LocalOrdinal inBlkSize = Teuchos::as<LocalOrdinal>(inLocalLength / numLocalNodes);
      //TEUCHOS_TEST_FOR_EXCEPTION(blkSize != inBlkSize, MueLu::Exceptions::RuntimeError,"CloneRepartitionInterface: input block size = " << inBlkSize << " outpub block size = " << blkSize << ". They should be the same.");

      for(LO i = 0; i<Teuchos::as<LO>(numLocalNodes); i++) {
        for(LO j = 0; j < blkSize; j++) {
          retDecompEntries[i*blkSize + j] = Teuchos::as<GO>(decompEntries[i*inBlkSize]);
        }
      }
    } // end if numLocalNodes > 0
    Set(currentLevel, "Partition", ret);
  } //Build()
コード例 #3
0
  const RCP<const FactoryBase> FactoryManager<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::SetAndReturnDefaultFactory(const std::string & varName, const RCP<const FactoryBase> & factory) const {
    TEUCHOS_TEST_FOR_EXCEPTION(factory == Teuchos::null, Exceptions::RuntimeError, "");

    GetOStream(Warnings0,  0) << "Attention: No factory has been specified for building '" << varName << "'." << std::endl;
    GetOStream(Warnings00, 0) << "           Using default factory ";
    { Teuchos::OSTab tab(getOStream(), 7); factory->describe(GetOStream(Warnings00), GetVerbLevel());}

    defaultFactoryTable_[varName] = factory;

    return defaultFactoryTable_[varName];
  }
コード例 #4
0
  void TogglePFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& fineLevel, Level &coarseLevel) const {
    FactoryMonitor m(*this, "Prolongator toggle", coarseLevel);
    std::ostringstream levelstr;
    levelstr << coarseLevel.GetLevelID();

    typedef typename Teuchos::ScalarTraits<SC>::magnitudeType Magnitude;

    TEUCHOS_TEST_FOR_EXCEPTION(nspFacts_.size() != prolongatorFacts_.size(), Exceptions::RuntimeError, "MueLu::TogglePFactory::Build: The number of provided prolongator factories and coarse nullspace factories must be identical.");
    TEUCHOS_TEST_FOR_EXCEPTION(nspFacts_.size() != 2, Exceptions::RuntimeError, "MueLu::TogglePFactory::Build: TogglePFactory needs two different transfer operator strategies for toggling."); // TODO adapt this/weaken this as soon as other toggling strategies are introduced.

    // decision routine which prolongator factory to be used
    int nProlongatorFactory = 0; // default behavior: use first prolongator in list

    // extract user parameters
    const Teuchos::ParameterList & pL = GetParameterList();
    std::string mode = Teuchos::as<std::string>(pL.get<std::string>("toggle: mode"));
    int semicoarsen_levels = Teuchos::as<int>(pL.get<int>("semicoarsen: number of levels"));

    TEUCHOS_TEST_FOR_EXCEPTION(mode!="semicoarsen", Exceptions::RuntimeError, "MueLu::TogglePFactory::Build: The 'toggle: mode' parameter must be set to 'semicoarsen'. No other mode supported, yet.");

    LO NumZDir = -1;
    if(fineLevel.IsAvailable("NumZLayers", NoFactory::get())) {
      NumZDir = fineLevel.Get<LO>("NumZLayers", NoFactory::get()); //obtain info
      GetOStream(Runtime1) << "Number of layers for semicoarsening: " << NumZDir << std::endl;
    }

    // Make a decision which prolongator to be used.
    if(fineLevel.GetLevelID() >= semicoarsen_levels || NumZDir == 1) {
      nProlongatorFactory = 1;
    } else {
      nProlongatorFactory = 0;
    }

    RCP<Matrix> P = Teuchos::null;
    RCP<MultiVector> coarseNullspace = Teuchos::null;

    // call Build for selected transfer operator
    GetOStream(Runtime0) << "TogglePFactory: call transfer factory: " << (prolongatorFacts_[nProlongatorFactory])->description() << std::endl;
    prolongatorFacts_[nProlongatorFactory]->CallBuild(coarseLevel);
    P = coarseLevel.Get< RCP<Matrix> >("P", (prolongatorFacts_[nProlongatorFactory]).get());
    coarseNullspace = coarseLevel.Get< RCP<MultiVector> >("Nullspace", (nspFacts_[nProlongatorFactory]).get());

    // Release dependencies of all prolongator and coarse level null spaces
    for(size_t t=0; t<nspFacts_.size(); ++t) {
      coarseLevel.Release(*(prolongatorFacts_[t]));
      coarseLevel.Release(*(nspFacts_[t]));
    }

    // store prolongator with this factory identification.
    Set(coarseLevel, "P", P);
    Set(coarseLevel, "Nullspace", coarseNullspace);

  } //Build()
コード例 #5
0
  void EminPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::BuildP(Level& fineLevel, Level& coarseLevel) const {
    FactoryMonitor m(*this, "Prolongator minimization", coarseLevel);

    const ParameterList & pL = GetParameterList();

    // Set keep flags
    if (pL.isParameter("Keep P0") && pL.get<bool>("Keep P0"))
      coarseLevel.Keep("P0",this);
    if (pL.isParameter("Keep Constraint0") && pL.get<bool>("Keep Constraint0"))
      coarseLevel.Keep("Constraint0",this);

    // Reuse
    int Niterations;

    // Get A, B
    RCP<Matrix>      A = Get< RCP<Matrix> >     (fineLevel,   "A");
    RCP<MultiVector> B = Get< RCP<MultiVector> >(fineLevel,   "Nullspace");

    // Get P0 or make P
    RCP<Matrix>      P0;
    if (coarseLevel.IsAvailable("P0", this)) {
      P0          = coarseLevel.Get<RCP<Matrix> >("P0", this);
      Niterations = pL.get<int>("Reuse Niterations");
      GetOStream(Runtime0, 0) << "EminPFactory: Reusing P0"<<std::endl;

    } else {
      P0          = Get< RCP<Matrix> >(coarseLevel, "P");
      Niterations = pL.get<int>("Niterations");
    }

    // Get Constraint0 or make Constraint
    RCP<Constraint> X;
    if (coarseLevel.IsAvailable("Constraint0", this)) {
      X = coarseLevel.Get<RCP<Constraint> >("Constraint0", this);
      GetOStream(Runtime0, 0) << "EminPFactory: Reusing Constraint0"<<std::endl;

    } else {
      X = Get< RCP<Constraint> > (coarseLevel, "Constraint");
    }


    RCP<Matrix> P;
    CGSolver EminSolver(Niterations);
    EminSolver.Iterate(*A, *X, *P0, *B, P);

    Set(coarseLevel, "Constraint0", X);
    Set(coarseLevel, "P",           P);
    Set(coarseLevel, "P0",          P);

    RCP<ParameterList> params = rcp(new ParameterList());
    params->set("printLoadBalancingInfo", true);
    GetOStream(Statistics0,0) << Utils::PrintMatrixInfo(*P, "P", params);
  }
コード例 #6
0
void PermutationFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level & currentLevel) const {
  FactoryMonitor m(*this, "Permutation Factory ", currentLevel);

  Teuchos::RCP<Matrix> A = Get< Teuchos::RCP<Matrix> > (currentLevel, "A");

  const ParameterList & pL = GetParameterList();
  std::string mapName                        = pL.get<std::string> ("PermutationRowMapName");
  Teuchos::RCP<const FactoryBase> mapFactory = GetFactory          ("PermutationRowMapFactory");

  Teuchos::RCP<const Map> permRowMap = Teuchos::null;
  if(mapName.length() > 0 ) {
    permRowMap = currentLevel.Get<RCP<const Map> >(mapName,mapFactory.get());
  } else {
    permRowMap = A->getRowMap(); // use full row map of A
  }

  std::string strStrategy = pL.get<std::string> ("PermutationStrategy");
  if( strStrategy == "Algebraic" ) {
    Teuchos::RCP<AlgebraicPermutationStrategy> permStrat = Teuchos::rcp(new AlgebraicPermutationStrategy());
    permStrat->BuildPermutation(A,permRowMap,currentLevel,this);
  } else if( strStrategy == "Local" ) {
    Teuchos::RCP<LocalPermutationStrategy> permStrat = Teuchos::rcp(new LocalPermutationStrategy());
    permStrat->BuildPermutation(A,permRowMap,currentLevel,this);
  } else
    TEUCHOS_TEST_FOR_EXCEPTION(true,
                                  std::logic_error,
                                  "`PermutationStrategy' has incorrect value (" << strStrategy << ") in input to PermutationFactory."
                                  << "Check the documentation for a list of valid choices");

  GetOStream(Runtime0, 0) << "Using " << strStrategy << " permutation strategy." << std::endl;

}
  void ToggleCoordinatesTransferFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level & fineLevel, Level &coarseLevel) const {
    FactoryMonitor m(*this, "Coordinate transfer toggle", coarseLevel);

    typedef Xpetra::MultiVector<double,LO,GO,NO> xdMV;

    TEUCHOS_TEST_FOR_EXCEPTION(coordFacts_.size() != 2, Exceptions::RuntimeError, "MueLu::TogglePFactory::Build: ToggleCoordinatesTransferFactory needs two different transfer operator strategies for toggling.");

    int chosenP = Get< int >      (coarseLevel, "Chosen P");
    GetOStream(Runtime1) << "Transfer Coordinates" << chosenP << " to coarse level" << std::endl;
    RCP<xdMV> coarseCoords = coarseLevel.Get< RCP<xdMV> >("Coordinates",(coordFacts_[chosenP]).get());
    Set(coarseLevel, "Coordinates", coarseCoords);

    // loop through all coord facts and check whether the coarse coordinates are available.
    // This is the coarse coordinate transfer factory which belongs to the execution path
    // chosen by the TogglePFactory
    /*RCP<xdMV> coarseCoords = Teuchos::null;
    for(size_t t=0; t<coordFacts_.size(); ++t) {
      bool bIsAv = coarseLevel.IsAvailable("Coordinates",(coordFacts_[t]).get());
      std::cout << "Coordinates generated by " << (coordFacts_[t]).get() << " available? " << bIsAv << std::endl;
      if ( coarseLevel.IsAvailable("Coordinates",(coordFacts_[t]).get()) ) {
        GetOStream(Runtime1) << "Choose factory " << t << " (" << (coordFacts_[t]).get() << ")" << std::endl;
        coarseCoords = coarseLevel.Get< RCP<xdMV> >("Coordinates",(coordFacts_[t]).get());
        Set(coarseLevel, "Coordinates", coarseCoords);
      }
    }*/

    // Release dependencies of all coordinate transfer factories
    for(size_t t=0; t<coordFacts_.size(); ++t) {
      coarseLevel.Release(*(coordFacts_[t]));
    }

    //TODO: exception if coarseCoords == Teuchos::null
  }
コード例 #8
0
  void CoupledAggregationFactory<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &currentLevel) const
  {
    FactoryMonitor m(*this, "Build", currentLevel);

    RCP<Aggregates> aggregates;
    {
      //TODO check for reuse of aggregates here

      // Level Get
      RCP<const GraphBase> graph = Get< RCP<GraphBase> >(currentLevel, "Graph");

      // Build
      aggregates = rcp(new Aggregates(*graph));
      aggregates->setObjectLabel("UC");

      algo1_.CoarsenUncoupled(*graph, *aggregates);
      algo2_.AggregateLeftovers(*graph, *aggregates);

    }

    aggregates->AggregatesCrossProcessors(true);

    // Level Set
    Set(currentLevel, "Aggregates", aggregates);

    if (IsPrint(Statistics0)) {
      aggregates->describe(GetOStream(Statistics0, 0), getVerbLevel());
    }

  }
コード例 #9
0
  const RCP<const FactoryBase> FactoryManager<Scalar, LocalOrdinal, GlobalOrdinal, Node>::SetAndReturnDefaultFactory(const std::string& varName, const RCP<const FactoryBase>& factory) const {
    TEUCHOS_TEST_FOR_EXCEPTION(factory.is_null(), Exceptions::RuntimeError, "The default factory for building '" << varName << "' is null");

    GetOStream(Runtime1) << "Using default factory (" << factory->description() << ") for building '" << varName << "'." << std::endl;

    defaultFactoryTable_[varName] = factory;

    return defaultFactoryTable_[varName];
  }
コード例 #10
0
void UserAggregationFactory<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &currentLevel) const {
  FactoryMonitor m(*this, "Build", currentLevel);

  const ParameterList & pL = GetParameterList();

  RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
  const int myRank = comm->getRank();

  std::string fileName = pL.get<std::string>("filePrefix") + toString(currentLevel.GetLevelID()) + "_" + toString(myRank) + "." + pL.get<std::string>("fileExt");
  std::ifstream ifs(fileName.c_str());
  if (!ifs.good())
    throw Exceptions::RuntimeError("Cannot read data from \"" + fileName + "\"");

  LO numVertices, numAggregates;
  ifs >> numVertices >> numAggregates;

  // FIXME: what is the map?
  Xpetra::UnderlyingLib  lib       = Xpetra::UseEpetra;
  const int              indexBase = 0;
  RCP<Map> map = MapFactory::Build(lib, numVertices, indexBase, comm);

  RCP<Aggregates> aggregates = rcp(new Aggregates(map));
  aggregates->setObjectLabel("User");

  aggregates->SetNumAggregates(numAggregates);

  Teuchos::ArrayRCP<LO> vertex2AggId = aggregates->GetVertex2AggId()->getDataNonConst(0);
  Teuchos::ArrayRCP<LO> procWinner   = aggregates->GetProcWinner()  ->getDataNonConst(0);

  for (LO i = 0; i < numAggregates; i++) {
    int aggSize = 0;
    ifs >> aggSize;

    std::vector<LO> list(aggSize);
    for (int k = 0; k < aggSize; k++) {
      // FIXME: File contains GIDs, we need LIDs
      // for now, works on a single processor
      ifs >> list[k];
    }

    // Mark first node as root node for the aggregate
    aggregates->SetIsRoot(list[0]);

    // Fill vertex2AggId and procWinner structure with information
    for (int k = 0; k < aggSize; k++) {
      vertex2AggId[list[k]] = i;
      procWinner  [list[k]] = myRank;
    }
  }

  // FIXME: do the proper check whether aggregates cross interprocessor boundary
  aggregates->AggregatesCrossProcessors(false);

  Set(currentLevel, "Aggregates", aggregates);

  GetOStream(Statistics0, 0) << aggregates->description() << std::endl;
}
コード例 #11
0
  void RebalanceAcFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &fineLevel, Level &coarseLevel) const {
    FactoryMonitor m(*this, "Computing Ac", coarseLevel);

    RCP<Matrix> originalAc = Get< RCP<Matrix> >(coarseLevel, "A");

    RCP<const Import> rebalanceImporter = Get< RCP<const Import> >(coarseLevel, "Importer");

    if (rebalanceImporter != Teuchos::null) {
      RCP<Matrix> rebalancedAc;
      {
        SubFactoryMonitor subM(*this, "Rebalancing existing Ac", coarseLevel);
        RCP<const Map> targetMap = rebalanceImporter->getTargetMap();

        const ParameterList & pL = GetParameterList();

        ParameterList XpetraList;
        if (pL.get<bool>("useSubcomm") == true) {
          GetOStream(Runtime0,0) << "Replacing maps with a subcommunicator" << std::endl;
          XpetraList.set("Restrict Communicator",true);
        }
        // NOTE: If the communicator is restricted away, Build returns Teuchos::null.
        rebalancedAc = MatrixFactory::Build(originalAc, *rebalanceImporter, targetMap, targetMap, rcp(&XpetraList,false));

        if (!rebalancedAc.is_null())
          rebalancedAc->SetFixedBlockSize(originalAc->GetFixedBlockSize());

        Set(coarseLevel, "A", rebalancedAc);
      }

      if (!rebalancedAc.is_null()) {
        RCP<ParameterList> params = rcp(new ParameterList());
        params->set("printLoadBalancingInfo", true);
        GetOStream(Statistics0, 0) << Utils::PrintMatrixInfo(*rebalancedAc, "Ac (rebalanced)", params);
      }

    } else {
      // Ac already built by the load balancing process and no load balancing needed
      GetOStream(Warnings0, 0) << "No rebalancing" << std::endl;
      GetOStream(Warnings0, 0) << "Jamming A into Level " << coarseLevel.GetLevelID() << " w/ generating factory "
                               << this << std::endl;
      Set(coarseLevel, "A", originalAc);
    }

  } //Build()
コード例 #12
0
    //!Destructor
    ~MutuallyExclusiveTime() {
      // This timer can only be destroyed if it is not in the stack
      if (isPaused()) {
        // error message because cannot throw an exception in destructor
        GetOStream(Errors) << "MutuallyExclusiveTime::~MutuallyExclusiveTime(): Error: destructor called on a paused timer." << std::endl;
        //TODO: Even if timing results will be wrong, the timer can be removed from the stack to avoid a segmentation fault.
      }

      stop(); // if isRunning(), remove from the stack, resume previous timer
    }
コード例 #13
0
    //! @brief Starts the timer. If a MutuallyExclusiveTime timer is running, it will be stopped.
    //! @pre Timer is not already paused.
    //! @post Timer is running. Other MutuallyExclusiveTime timers are paused or stopped.
    void start(bool reset=false) {
      TEUCHOS_TEST_FOR_EXCEPTION(isPaused(), Exceptions::RuntimeError, "MueLu::MutuallyExclusiveTime::start(): timer is paused. Use resume().");

      if (isRunning()) { return; } // If timer is already running, do not pause/push-in-the-stack/start the timer.
                                   // Otherwise, something bad will happen when this.stop() will be called

      // pause currently running timer
      if (!timerStack_.empty()) {
        GetOStream(Debug) << "pausing parent timer " << timerStack_.top()->name_ << std::endl;
        timerStack_.top()->pause();
        GetOStream(Debug) << "starting child timer " << this->name_ << std::endl;
        myParent_[this->name_] = timerStack_.top()->name_;
      } else {
        GetOStream(Debug) << "starting orphan timer " << this->name_ << std::endl;
        myParent_[this->name_] = "no parent";
      }

      // start this timer
      timer_->start(reset);
      timerStack_.push(this);
    }
コード例 #14
0
  void FactoryManager<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Print() const {
    std::map<std::string, RCP<const FactoryBase> >::const_iterator it;

    Teuchos::FancyOStream& fancy = GetOStream(Debug);

    fancy << "Users factory table (factoryTable_):" << std::endl;
    for (it = factoryTable_.begin(); it != factoryTable_.end(); it++)
      fancy << "  " << it->first << " -> " << Teuchos::toString(it->second.get()) << std::endl;

    fancy << "Default factory table (defaultFactoryTable_):" << std::endl;
    for (it = defaultFactoryTable_.begin(); it != defaultFactoryTable_.end(); it++)
      fancy << "  " << it->first << " -> " << Teuchos::toString(it->second.get()) << std::endl;
  }
コード例 #15
0
    //! Constructor
    PrintMonitor(const BaseClass& object, const std::string& msg, MsgType msgLevel = Runtime0) {

      // Inherit verbosity from 'object'
      SetVerbLevel(object.GetVerbLevel());
      setOStream(object.getOStream());

      // Print description and new indent
      if (IsPrint(msgLevel)) {
        GetOStream(msgLevel, 0) << msg << std::endl;
        tab_ = rcp(new Teuchos::OSTab(getOStream()));
      }

    }
コード例 #16
0
  void MapTransferFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level & fineLevel, Level & coarseLevel) const {
    typedef Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> OperatorClass; //TODO
    typedef Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node> MapClass;
    typedef Xpetra::MapFactory<LocalOrdinal, GlobalOrdinal, Node> MapFactoryClass;

    Monitor m(*this, "Contact Map transfer factory");

    if (fineLevel.IsAvailable(mapName_, mapFact_.get())==false) {
        GetOStream(Runtime0, 0) << "MapTransferFactory::Build: User provided map " << mapName_ << " not found in Level class." << std::endl;
    }

    // fetch map extractor from level
    RCP<const MapClass> transferMap = fineLevel.Get<RCP<const MapClass> >(mapName_,mapFact_.get());

    // Get default tentative prolongator factory
    // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory.
    // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere!
    RCP<const FactoryBase> tentPFact = GetFactory("P");
    if (tentPFact == Teuchos::null) { tentPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); }
    TEUCHOS_TEST_FOR_EXCEPTION(!coarseLevel.IsAvailable("P",tentPFact.get()),Exceptions::RuntimeError, "MueLu::MapTransferFactory::Build(): P (generated by TentativePFactory) not available.");
    RCP<OperatorClass> Ptent = coarseLevel.Get<RCP<OperatorClass> >("P", tentPFact.get());

    std::vector<GlobalOrdinal > coarseMapGids;

    // loop over local rows of Ptent
    for(size_t row=0; row<Ptent->getNodeNumRows(); row++) {

      GlobalOrdinal grid = Ptent->getRowMap()->getGlobalElement(row);
      if(transferMap->isNodeGlobalElement(grid)) {

        Teuchos::ArrayView<const LocalOrdinal> indices;
        Teuchos::ArrayView<const Scalar> vals;
        Ptent->getLocalRowView(row, indices, vals);

        for(size_t i=0; i<(size_t)indices.size(); i++) {
          // mark all columns in Ptent(grid,*) to be coarse Dofs of next level transferMap
          GlobalOrdinal gcid = Ptent->getColMap()->getGlobalElement(indices[i]);
          coarseMapGids.push_back(gcid);
        }
      } // end if isNodeGlobalElement(grid)
    }

    // build column maps
    std::sort(coarseMapGids.begin(), coarseMapGids.end());
    coarseMapGids.erase(std::unique(coarseMapGids.begin(), coarseMapGids.end()), coarseMapGids.end());
    Teuchos::ArrayView<GlobalOrdinal> coarseMapGidsView (&coarseMapGids[0],coarseMapGids.size());
    Teuchos::RCP<const MapClass> coarseTransferMap = MapFactoryClass::Build(Ptent->getColMap()->lib(), -1, coarseMapGidsView, Ptent->getColMap()->getIndexBase(), Ptent->getColMap()->getComm());

    // store map extractor in coarse level
    coarseLevel.Set(mapName_, coarseTransferMap, mapFact_.get());
  }
void DropNegativeEntriesFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& currentLevel) const {
    FactoryMonitor m(*this, "Matrix filtering (springs)", currentLevel);

    RCP<Matrix> Ain = Get< RCP<Matrix> >(currentLevel, "A");

    LocalOrdinal nDofsPerNode = Ain->GetFixedBlockSize();

    // create new empty Operator
    Teuchos::RCP<Matrix> Aout = MatrixFactory::Build(Ain->getRowMap(), Ain->getGlobalMaxNumRowEntries(), Xpetra::StaticProfile);

    size_t numLocalRows = Ain->getNodeNumRows();
    for(size_t row=0; row<numLocalRows; row++) {
        GlobalOrdinal grid = Ain->getRowMap()->getGlobalElement(row);

        int rDofID = Teuchos::as<int>(grid % nDofsPerNode);

        // extract row information from input matrix
        Teuchos::ArrayView<const LocalOrdinal> indices;
        Teuchos::ArrayView<const Scalar> vals;
        Ain->getLocalRowView(row, indices, vals);

        // just copy all values in output
        Teuchos::ArrayRCP<GlobalOrdinal> indout(indices.size(),Teuchos::ScalarTraits<GlobalOrdinal>::zero());
        Teuchos::ArrayRCP<Scalar>        valout(indices.size(),Teuchos::ScalarTraits<Scalar>::zero());

        size_t nNonzeros = 0;
        for(size_t i=0; i<(size_t)indices.size(); i++) {
            GlobalOrdinal gcid = Ain->getColMap()->getGlobalElement(indices[i]); // global column id

            int cDofID = Teuchos::as<int>(gcid % nDofsPerNode);
            if(rDofID == cDofID && Teuchos::ScalarTraits<Scalar>::magnitude(vals[i]) >= Teuchos::ScalarTraits<Scalar>::magnitude(Teuchos::ScalarTraits<Scalar>::zero())) {
                indout [nNonzeros] = gcid;
                valout [nNonzeros] = vals[i];
                nNonzeros++;
            }
        }
        indout.resize(nNonzeros);
        valout.resize(nNonzeros);

        Aout->insertGlobalValues(Ain->getRowMap()->getGlobalElement(row), indout.view(0,indout.size()), valout.view(0,valout.size()));
    }

    Aout->fillComplete(Ain->getDomainMap(), Ain->getRangeMap());

    // copy block size information
    Aout->SetFixedBlockSize(nDofsPerNode);

    GetOStream(Statistics0, 0) << "Nonzeros in A (input): " << Ain->getGlobalNumEntries() << ", Nonzeros after filtering A: " << Aout->getGlobalNumEntries() << std::endl;

    Set(currentLevel, "A", Aout);
}
コード例 #18
0
  void UserPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::BuildP(Level& fineLevel, Level& coarseLevel) const {
    FactoryMonitor m(*this, "Build", coarseLevel);

    RCP<Matrix>      A             = Get< RCP<Matrix> >      (fineLevel, "A");
    RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");

    TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() != 1, Exceptions::RuntimeError, "Block size > 1 has not been implemented");

    const Teuchos::ParameterList& pL = GetParameterList();

    std::string    mapFile   = pL.get<std::string>("mapFileName");
    RCP<const Map> rowMap    = A->getRowMap();
    RCP<const Map> coarseMap = Utils2::ReadMap(mapFile, rowMap->lib(), rowMap->getComm());
    Set(coarseLevel, "CoarseMap", coarseMap);

    std::string matrixFile = pL.get<std::string>("matrixFileName");
    RCP<Matrix> P          = Utils::Read(matrixFile, rowMap, coarseMap, coarseMap, rowMap);
#if 1
    Set(coarseLevel, "P", P);
#else
    // Expand column map by 1
    RCP<Matrix> P1 = Utils::Multiply(*A, false, *P, false);
    P = Utils::Read(matrixFile, rowMap, P1->getColMap(), coarseMap, rowMap);
    Set(coarseLevel, "P", P);
#endif

    RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
    P->apply(*fineNullspace, *coarseNullspace, Teuchos::TRANS, Teuchos::ScalarTraits<SC>::one(), Teuchos::ScalarTraits<SC>::zero());
    Set(coarseLevel, "Nullspace", coarseNullspace);

    // Coordinates transfer
    size_t n = Teuchos::as<size_t>(sqrt(coarseMap->getGlobalNumElements()));
    TEUCHOS_TEST_FOR_EXCEPTION(n*n != coarseMap->getGlobalNumElements(), Exceptions::RuntimeError, "Unfortunately, this is not the case, don't know what to do");

    RCP<MultiVector> coarseCoords = MultiVectorFactory::Build(coarseMap, 2);
    ArrayRCP<Scalar> x = coarseCoords->getDataNonConst(0), y = coarseCoords->getDataNonConst(1);
    for (size_t LID = 0; LID < coarseMap->getNodeNumElements(); ++LID) {
      GlobalOrdinal GID = coarseMap->getGlobalElement(LID) - coarseMap->getIndexBase();
      GlobalOrdinal i = GID % n, j = GID/n;
      x[LID] = i;
      y[LID] = j;
    }
    Set(coarseLevel, "Coordinates", coarseCoords);

    if (IsPrint(Statistics1)) {
      RCP<ParameterList> params = rcp(new ParameterList());
      params->set("printLoadBalancingInfo", true);
      GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*P, "P", params);
    }
  }
コード例 #19
0
    //!
    virtual void CallBuild(Level& requestedLevel) const {
      int levelID = requestedLevel.GetLevelID();

#ifdef HAVE_MUELU_DEBUG
      // We cannot call Build method twice for the same level, but we can call it multiple times for different levels
      TEUCHOS_TEST_FOR_EXCEPTION((multipleCallCheck_ == ENABLED) && (multipleCallCheckGlobal_ == ENABLED) && (lastLevelID_ == levelID),
                                 Exceptions::RuntimeError,
                                 this->ShortClassName() << "::Build() called twice for the same level (levelID=" << levelID
                                 << "). This is likely due to a configuration error.");
      if (multipleCallCheck_ == FIRSTCALL)
        multipleCallCheck_ = ENABLED;

      lastLevelID_ = levelID;
#endif
      TEUCHOS_TEST_FOR_EXCEPTION(requestedLevel.GetPreviousLevel() == Teuchos::null, Exceptions::RuntimeError, "LevelID = " << levelID);

#ifdef HAVE_MUELU_TIMER_SYNCHRONIZATION
      RCP<const Teuchos::Comm<int> > comm = requestedLevel.GetComm();
      if (comm.is_null()) {
        // Some factories are called before we constructed Ac, and therefore,
        // before we set the level communicator. For such factories we can get
        // the comm from the previous level, as all processes go there
        RCP<Level>& prevLevel = requestedLevel.GetPreviousLevel();
        if (!prevLevel.is_null())
          comm = prevLevel->GetComm();
      }

      // Synchronization timer
      std::string syncTimer = this->ShortClassName() + ": Build sync (level=" + toString(requestedLevel.GetLevelID()) + ")";
      if (!comm.is_null()) {
        TimeMonitor timer(*this, syncTimer);
        comm->barrier();
      }
#endif

      Build(*requestedLevel.GetPreviousLevel(), requestedLevel);

#ifdef HAVE_MUELU_TIMER_SYNCHRONIZATION
      // Synchronization timer
      if (!comm.is_null()) {
        TimeMonitor timer(*this, syncTimer);
        comm->barrier();
      }
#endif

      GetOStream(Test) << *RemoveFactoriesFromList(GetParameterList()) << std::endl;
    }
コード例 #20
0
    //! @brief Stops the timer.
    //! The previous MutuallyExclusiveTime that has been paused when this timer was started will be resumed.
    //! This method can be called on an already stopped timer or on the currently running timer.
    double stop() {
      TEUCHOS_TEST_FOR_EXCEPTION(isPaused(), Exceptions::RuntimeError, "MueLu::MutuallyExclusiveTime::start(): timer is paused. Use resume().");
      if (!isRunning()) { return timer_->stop(); } // stop() can be called on stopped timer

      // Here, timer is running, so it is the head of the stack
      TopOfTheStack();

      timerStack_.pop();
      double r = timer_->stop();

      if (!timerStack_.empty()) {
            GetOStream(Debug) << "resuming timer " << timerStack_.top()->name_ << std::endl;
            timerStack_.top()->resume();
      }

      return r;
    }
コード例 #21
0
    void Set(const std::string& ename, const T& entry, const FactoryBase* factory = NoFactory::get()) {
      const FactoryBase* fac = GetFactory(ename, factory);

      if (fac == NoFactory::get()) {
        // Any data set with a NoFactory gets UserData keep flag by default
        AddKeepFlag(ename, NoFactory::get(), MueLu::UserData);
      }

      // Store entry only if data have been requested (or any keep flag)
      if (IsRequested(ename, factory) || GetKeepFlag(ename, factory) != 0) {
        TEUCHOS_TEST_FOR_EXCEPTION(!IsKey(factory, ename), Exceptions::RuntimeError, "" + ename + " not found in");
        map_[factory][ename]->SetData(entry);

      } else {
        GetOStream(Warnings0) << "Level::Set: unable to store \"" << ename << "\" generated by factory " << factory
            << " on level " << toString(GetLevelID()) << ", as it has not been requested and no keep flags were set for it" << std::endl;
      }
    } // Set
コード例 #22
0
    ~TimeMonitor() {
      if (timer_ != Teuchos::null) {

        // Stop the timer
        timer_->stop();

        if (IsPrint(RuntimeTimings)) {
          //FIXME: creates lot of barriers. An option to report time of proc0 only instead would be nice
          //FIXME: MPI_COMM_WORLD only... BTW, it is also the case in Teuchos::TimeMonitor...
          //
          // mfh 11 Nov 2012: Actually, Teuchos::TimeMonitor::summarize() has multiple overloads that take a Teuchos::Comm.
          ArrayRCP<double> stats = ReduceMaxMinAvg(timer_->totalElapsedTime(), *Teuchos::DefaultComm<int>::getComm ());

          //FIXME: Not very important for now, but timer will be printed even if verboseLevel of Monitor/Object changed
          //       between Monitor constructor and destructor.
          GetOStream(RuntimeTimings, 0) << "Timer: " << " max=" << stats[0] << " min=" << stats[1] << " avg=" << stats[2] << std::endl;
        }
      }
    }
コード例 #23
0
  void CoarseMapFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &currentLevel) const {
    FactoryMonitor m(*this, "Build", currentLevel);

    RCP<Aggregates>  aggregates = Get< RCP<Aggregates> >(currentLevel, "Aggregates");
    RCP<MultiVector> nullspace  = Get< RCP<MultiVector> >(currentLevel, "Nullspace");

    GlobalOrdinal                  numAggs = aggregates->GetNumAggregates();
    const size_t                   NSDim   = nullspace->getNumVectors();
    RCP<const Teuchos::Comm<int> > comm    = aggregates->GetMap()->getComm();

    // check for consistency of striding information with NSDim and nCoarseDofs
    if (stridedBlockId_== -1) {
      // this means we have no real strided map but only a block map with constant blockSize "NSDim"
      TEUCHOS_TEST_FOR_EXCEPTION(stridingInfo_.size() > 1, Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): stridingInfo_.size() but must be one");
      stridingInfo_.clear();
      stridingInfo_.push_back(NSDim);
      TEUCHOS_TEST_FOR_EXCEPTION(stridingInfo_.size() != 1, Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): stridingInfo_.size() but must be one");

    } else {
      // stridedBlockId_ > -1, set by user
      TEUCHOS_TEST_FOR_EXCEPTION(stridedBlockId_ > Teuchos::as<LO>(stridingInfo_.size() - 1) , Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): it is stridingInfo_.size() <= stridedBlockId_. error.");
      size_t stridedBlockSize = stridingInfo_[stridedBlockId_];
      TEUCHOS_TEST_FOR_EXCEPTION(stridedBlockSize != NSDim , Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): dimension of strided block != NSDim. error.");
    }

    GetOStream(Statistics1, 0) << "domainGIDOffset: " << domainGidOffset_ << " block size: " << getFixedBlockSize() << " stridedBlockId: " << stridedBlockId_ << std::endl;

    // number of coarse level dofs (fixed by number of aggregates and blocksize data)
    GlobalOrdinal nCoarseDofs = numAggs * getFixedBlockSize();
    GlobalOrdinal indexBase   = aggregates->GetMap()->getIndexBase();

    RCP<const Map> coarseMap = StridedMapFactory::Build(aggregates->GetMap()->lib(),
        Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
        nCoarseDofs,
        indexBase,
        stridingInfo_,
        comm,
        stridedBlockId_,
        domainGidOffset_);

    Set(currentLevel, "CoarseMap", coarseMap);
  } // Build
コード例 #24
0
  void PatternFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level& fineLevel, Level& coarseLevel) const {
    FactoryMonitor m(*this, "Ppattern", coarseLevel);

    RCP<Matrix> P = Get< RCP<Matrix> >(coarseLevel, "P");

    const ParameterList& pL = GetParameterList();
    int k = pL.get<int>("emin: pattern order");

    if (k > 0) {
      RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel, "A");
      RCP<Matrix> AP;

      bool doFillComplete  = true;
      bool optimizeStorage = true;

      for (int i = 0; i < k; i++) {
        AP = Utils::Multiply(*A, false, *P, false, GetOStream(Statistics2), doFillComplete, optimizeStorage);
        P.swap(AP);
      }
    }

    Set(coarseLevel, "Ppattern", P->getCrsGraph());
  }
コード例 #25
0
  void MultiVectorTransferFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level & fineLevel, Level &coarseLevel) const {
    FactoryMonitor m(*this, "Build", coarseLevel);

    const ParameterList & pL = GetParameterList();
    std::string vectorName   = pL.get<std::string>("Vector name");

    RCP<MultiVector> fineVector = fineLevel.Get< RCP<MultiVector> >(vectorName, GetFactory("Vector factory").get());
    RCP<Matrix>      transferOp = Get<RCP<Matrix> >(coarseLevel, "R");

    RCP<MultiVector> coarseVector = MultiVectorFactory::Build(transferOp->getRangeMap(), fineVector->getNumVectors());
    GetOStream(Runtime0, 0) << "Transferring multivector \"" << vectorName << "\"" << std::endl;

    RCP<MultiVector> onesVector = MultiVectorFactory::Build(transferOp->getDomainMap(), 1);
    onesVector->putScalar(Teuchos::ScalarTraits<Scalar>::one());
    RCP<MultiVector> rowSumVector = MultiVectorFactory::Build(transferOp->getRangeMap(), 1);
    transferOp->apply(*onesVector, *rowSumVector);
    transferOp->apply(*fineVector, *coarseVector);

    if (vectorName == "Coordinates")
      TEUCHOS_TEST_FOR_EXCEPTION(true,Exceptions::RuntimeError,"Use CoordinatesTransferFactory to transfer coordinates instead of MultiVectorTransferFactory.");

    Set<RCP<MultiVector> >(coarseLevel, vectorName, coarseVector);

  } // Build
コード例 #26
0
  void MHDRAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level &fineLevel, Level &coarseLevel) const { // FIXME make fineLevel const
    {
      FactoryMonitor m(*this, "Computing Ac", coarseLevel);

      //
      // Inputs: A, P
      //

      //DEBUG
      //Teuchos::FancyOStream fout(*GetOStream(Runtime1));
      //coarseLevel.print(fout,Teuchos::VERB_HIGH);


      RCP<Matrix> A   = Get< RCP<Matrix> >(fineLevel, "A"  );
      RCP<Matrix> A00 = Get< RCP<Matrix> >(fineLevel, "A00");
      RCP<Matrix> A01 = Get< RCP<Matrix> >(fineLevel, "A01");
      RCP<Matrix> A02 = Get< RCP<Matrix> >(fineLevel, "A02");
      RCP<Matrix> A10 = Get< RCP<Matrix> >(fineLevel, "A10");
      RCP<Matrix> A11 = Get< RCP<Matrix> >(fineLevel, "A11");
      RCP<Matrix> A12 = Get< RCP<Matrix> >(fineLevel, "A12");
      RCP<Matrix> A20 = Get< RCP<Matrix> >(fineLevel, "A20");
      RCP<Matrix> A21 = Get< RCP<Matrix> >(fineLevel, "A21");
      RCP<Matrix> A22 = Get< RCP<Matrix> >(fineLevel, "A22");

      RCP<Matrix> P  = Get< RCP<Matrix> >(coarseLevel, "P" );
      RCP<Matrix> PV = Get< RCP<Matrix> >(coarseLevel, "PV");
      RCP<Matrix> PP = Get< RCP<Matrix> >(coarseLevel, "PP");
      RCP<Matrix> PM = Get< RCP<Matrix> >(coarseLevel, "PM");

      //
      // Build Ac = RAP
      //

      RCP<Matrix> AP;
      RCP<Matrix> AP00;
      RCP<Matrix> AP01;
      RCP<Matrix> AP02;
      RCP<Matrix> AP10;
      RCP<Matrix> AP11;
      RCP<Matrix> AP12;
      RCP<Matrix> AP20;
      RCP<Matrix> AP21;
      RCP<Matrix> AP22;

      {
        SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel);

        AP   = Utils::Multiply(*A  , false, *P , false, AP, GetOStream(Statistics2));
        AP00 = Utils::Multiply(*A00, false, *PV, false, AP00, GetOStream(Statistics2));
        AP01 = Utils::Multiply(*A01, false, *PP, false, AP01, GetOStream(Statistics2));
        AP02 = Utils::Multiply(*A02, false, *PM, false, AP02, GetOStream(Statistics2));
        AP10 = Utils::Multiply(*A10, false, *PV, false, AP10, GetOStream(Statistics2));
        AP11 = Utils::Multiply(*A11, false, *PP, false, AP11, GetOStream(Statistics2));
        AP12 = Utils::Multiply(*A12, false, *PM, false, AP12, GetOStream(Statistics2));
        AP20 = Utils::Multiply(*A20, false, *PV, false, AP20, GetOStream(Statistics2));
        AP21 = Utils::Multiply(*A21, false, *PP, false, AP21, GetOStream(Statistics2));
        AP22 = Utils::Multiply(*A22, false, *PM, false, AP22, GetOStream(Statistics2));
      }

      RCP<Matrix> Ac;
      RCP<Matrix> Ac00;
      RCP<Matrix> Ac01;
      RCP<Matrix> Ac02;
      RCP<Matrix> Ac10;
      RCP<Matrix> Ac11;
      RCP<Matrix> Ac12;
      RCP<Matrix> Ac20;
      RCP<Matrix> Ac21;
      RCP<Matrix> Ac22;

      if (implicitTranspose_)
        {
          SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel);

          Ac   = Utils::Multiply(*P , true, *AP  , false, Ac, GetOStream(Statistics2));
          Ac00 = Utils::Multiply(*PV, true, *AP00, false, Ac00, GetOStream(Statistics2));
          Ac01 = Utils::Multiply(*PV, true, *AP01, false, Ac01, GetOStream(Statistics2));
          Ac02 = Utils::Multiply(*PV, true, *AP02, false, Ac02, GetOStream(Statistics2));
          Ac10 = Utils::Multiply(*PP, true, *AP10, false, Ac10, GetOStream(Statistics2));
          Ac11 = Utils::Multiply(*PP, true, *AP11, false, Ac11, GetOStream(Statistics2));
          Ac12 = Utils::Multiply(*PP, true, *AP12, false, Ac12, GetOStream(Statistics2));
          Ac20 = Utils::Multiply(*PM, true, *AP20, false, Ac20, GetOStream(Statistics2));
          Ac21 = Utils::Multiply(*PM, true, *AP21, false, Ac21, GetOStream(Statistics2));
          Ac22 = Utils::Multiply(*PM, true, *AP22, false, Ac22, GetOStream(Statistics2));

        }
      else
        {

          SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel);

          RCP<Matrix> R  = Get< RCP<Matrix> >(coarseLevel, "R" );
          RCP<Matrix> RV = Get< RCP<Matrix> >(coarseLevel, "RV");
          RCP<Matrix> RP = Get< RCP<Matrix> >(coarseLevel, "RP");
          RCP<Matrix> RM = Get< RCP<Matrix> >(coarseLevel, "RM");

          Ac   = Utils::Multiply(*R , false, *AP  , false, Ac, GetOStream(Statistics2));
          Ac00 = Utils::Multiply(*RV, false, *AP00, false, Ac00, GetOStream(Statistics2));
          Ac01 = Utils::Multiply(*RV, false, *AP01, false, Ac01, GetOStream(Statistics2));
          Ac02 = Utils::Multiply(*RV, false, *AP02, false, Ac02, GetOStream(Statistics2));
          Ac10 = Utils::Multiply(*RP, false, *AP10, false, Ac10, GetOStream(Statistics2));
          Ac11 = Utils::Multiply(*RP, false, *AP11, false, Ac11, GetOStream(Statistics2));
          Ac12 = Utils::Multiply(*RP, false, *AP12, false, Ac12, GetOStream(Statistics2));
          Ac20 = Utils::Multiply(*RM, false, *AP20, false, Ac20, GetOStream(Statistics2));
          Ac21 = Utils::Multiply(*RM, false, *AP21, false, Ac21, GetOStream(Statistics2));
          Ac22 = Utils::Multiply(*RM, false, *AP22, false, Ac22, GetOStream(Statistics2));

        }
      // FINISHED MAKING COARSE BLOCKS

      Set(coarseLevel, "A"  , Ac  );
      Set(coarseLevel, "A00", Ac00);
      Set(coarseLevel, "A01", Ac01);
      Set(coarseLevel, "A02", Ac02);
      Set(coarseLevel, "A10", Ac10);
      Set(coarseLevel, "A11", Ac11);
      Set(coarseLevel, "A12", Ac12);
      Set(coarseLevel, "A20", Ac20);
      Set(coarseLevel, "A21", Ac21);
      Set(coarseLevel, "A22", Ac22);


    }


  }
コード例 #27
0
  void AlgebraicPermutationStrategy<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::BuildPermutation(const Teuchos::RCP<Matrix> & A, const Teuchos::RCP<const Map> permRowMap, Level & currentLevel, const FactoryBase* genFactory) const {
#ifndef HAVE_MUELU_INST_COMPLEX_INT_INT

  const Teuchos::RCP< const Teuchos::Comm< int > > comm = A->getRowMap()->getComm();
  int numProcs = comm->getSize();
  int myRank   = comm->getRank();

  /*if( permRowMap == Teuchos::null ) {
    permRowMap = A->getRowMap(); // use full row map of A
  }*/

  size_t nDofsPerNode = 1;
  if (A->IsView("stridedMaps")) {
    Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps");
    nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize();
  }

  //GetOStream(Runtime0, 0) << "Perform generation of permutation operators on " << mapName_ << " map with " << permRowMap->getGlobalNumElements() << " elements" << std::endl;

  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidates;
  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > keepDiagonalEntries;
  std::vector<Scalar> Weights;

  // loop over all local rows in matrix A and keep diagonal entries if corresponding
  // matrix rows are not contained in permRowMap
  for (size_t row = 0; row < A->getRowMap()->getNodeNumElements(); row++) {
    GlobalOrdinal grow = A->getRowMap()->getGlobalElement(row);

    if(permRowMap->isNodeGlobalElement(grow) == true) continue;

    size_t nnz = A->getNumEntriesInLocalRow(row);

    // extract local row information from matrix
    Teuchos::ArrayView<const LocalOrdinal> indices;
    Teuchos::ArrayView<const Scalar> vals;
    A->getLocalRowView(row, indices, vals);

    TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError, "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");

    // find column entry with max absolute value
    GlobalOrdinal gMaxValIdx = 0;
    Scalar norm1 = 0.0;
    Scalar maxVal = 0.0;
    for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
      norm1 += std::abs(vals[j]);
      if(std::abs(vals[j]) > maxVal) {
        maxVal = std::abs(vals[j]);
        gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
      }
    }

    if(grow == gMaxValIdx) // only keep row/col pair if it's diagonal dominant!!!
      keepDiagonalEntries.push_back(std::make_pair(grow,grow));
  }

  //////////
  // handle rows that are marked to be relevant for permutations
  for (size_t row = 0; row < permRowMap->getNodeNumElements(); row++) {
    GlobalOrdinal grow = permRowMap->getGlobalElement(row);
    LocalOrdinal lArow = A->getRowMap()->getLocalElement(grow);
    size_t nnz = A->getNumEntriesInLocalRow(lArow);

    // extract local row information from matrix
    Teuchos::ArrayView<const LocalOrdinal> indices;
    Teuchos::ArrayView<const Scalar> vals;
    A->getLocalRowView(lArow, indices, vals);

    TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError, "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");

    // find column entry with max absolute value
    GlobalOrdinal gMaxValIdx = 0;
    Scalar norm1 = 0.0;
    Scalar maxVal = 0.0;
    for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
      norm1 += std::abs(vals[j]);
      if(std::abs(vals[j]) > maxVal) {
        maxVal = std::abs(vals[j]);
        gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
      }
    }

    if(std::abs(maxVal) > 0.0) { // keep only max Entries \neq 0.0
      permutedDiagCandidates.push_back(std::make_pair(grow,gMaxValIdx));
      Weights.push_back(maxVal/(norm1*Teuchos::as<Scalar>(nnz)));
    } else {
      std::cout << "ATTENTION: row " << grow << " has only zero entries -> singular matrix!" << std::endl;
    }

  }

  // sort Weights in descending order
  std::vector<int> permutation;
  sortingPermutation(Weights,permutation);

  // create new vector with exactly one possible entry for each column

  // each processor which requests the global column id gcid adds 1 to gColVec
  // gColVec will be summed up over all processors and communicated to gDomVec
  // which is based on the non-overlapping domain map of A.

  Teuchos::RCP<Vector> gColVec = VectorFactory::Build(A->getColMap());
  Teuchos::RCP<Vector> gDomVec = VectorFactory::Build(A->getDomainMap());
  gColVec->putScalar(0.0);
  gDomVec->putScalar(0.0);

  // put in all keep diagonal entries
  for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = keepDiagonalEntries.begin(); p != keepDiagonalEntries.end(); ++p) {
    gColVec->sumIntoGlobalValue((*p).second,1.0);
  }

  Teuchos::RCP<Export> exporter = ExportFactory::Build(gColVec->getMap(), gDomVec->getMap());
  gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD);  // communicate blocked gcolids to all procs
  gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT);

  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidatesFiltered; // TODO reserve memory
  std::map<GlobalOrdinal, Scalar> gColId2Weight;

  Teuchos::ArrayRCP< Scalar > ddata = gColVec->getDataNonConst(0);
  for(size_t i = 0; i < permutedDiagCandidates.size(); ++i) {
    // loop over all candidates
    std::pair<GlobalOrdinal, GlobalOrdinal> pp = permutedDiagCandidates[permutation[i]];
    GlobalOrdinal grow = pp.first;
    GlobalOrdinal gcol = pp.second;

    LocalOrdinal lcol = A->getColMap()->getLocalElement(gcol);
    //Teuchos::ArrayRCP< Scalar > ddata = gColVec->getDataNonConst(0);
    if(ddata[lcol] > 0.0){
      continue; // skip lcol: column already handled by another row
    }

    // mark column as already taken
    ddata[lcol]++;

    permutedDiagCandidatesFiltered.push_back(std::make_pair(grow,gcol));
    gColId2Weight[gcol] = Weights[permutation[i]];
  }

  // communicate how often each column index is requested by the different procs
  gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD);
  gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT); // probably not needed // TODO check me

  //*****************************************************************************************
  // first communicate ALL global ids of column indices which are requested by more
  // than one proc to all other procs
  // detect which global col indices are requested by more than one proc
  // and store them in the multipleColRequests vector
  std::vector<GlobalOrdinal> multipleColRequests; // store all global column indices from current processor that are also
                                                  // requested by another processor. This is possible, since they are stored
                                                  // in gDomVec which is based on the nonoverlapping domain map. That is, each
                                                  // global col id is handled by exactly one proc.
  std::queue<GlobalOrdinal> unusedColIdx; // unused column indices on current processor

  for(size_t sz = 0; sz<gDomVec->getLocalLength(); ++sz) {
    Teuchos::ArrayRCP< const Scalar > arrDomVec = gDomVec->getData(0);
    if(arrDomVec[sz] > 1.0) {
      multipleColRequests.push_back(gDomVec->getMap()->getGlobalElement(sz));
    } else if(arrDomVec[sz] == 0.0) {
      unusedColIdx.push(gDomVec->getMap()->getGlobalElement(sz));
    }
  }

  // communicate the global number of column indices which are requested by more than one proc
  LocalOrdinal localMultColRequests  = Teuchos::as<LocalOrdinal>(multipleColRequests.size());
  LocalOrdinal globalMultColRequests = 0;

  // sum up all entries in multipleColRequests over all processors
  sumAll(gDomVec->getMap()->getComm(), (LocalOrdinal)localMultColRequests, globalMultColRequests);

  if(globalMultColRequests > 0) {
    // special handling: two processors request the same global column id.
    // decide which processor gets it

    // distribute number of multipleColRequests to all processors
    // each processor stores how many column ids for exchange are handled by the cur proc
    std::vector<GlobalOrdinal> numMyMultColRequests(numProcs,0);
    std::vector<GlobalOrdinal> numGlobalMultColRequests(numProcs,0);
    numMyMultColRequests[myRank] = localMultColRequests;
    Teuchos::reduceAll(*comm,Teuchos::REDUCE_MAX,numProcs,&numMyMultColRequests[0],&numGlobalMultColRequests[0]);

    // communicate multipleColRequests entries to all processors
    int nMyOffset = 0;
    for (int i=0; i<myRank-1; i++)
      nMyOffset += numGlobalMultColRequests[i]; // calculate offset to store the weights on the corresponding place in procOverlappingWeights

    GlobalOrdinal zero=0;
    std::vector<GlobalOrdinal> procMultRequestedColIds(globalMultColRequests,zero);
    std::vector<GlobalOrdinal> global_procMultRequestedColIds(globalMultColRequests,zero);

    // loop over all local column GIDs that are also requested by other procs
    for(size_t i = 0; i < multipleColRequests.size(); i++) {
      procMultRequestedColIds[nMyOffset + i] = multipleColRequests[i]; // all weights are > 0 ?
    }

    // template ordinal, package (double)
    Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(globalMultColRequests), &procMultRequestedColIds[0], &global_procMultRequestedColIds[0]);

    // loop over global_procOverlappingWeights and eliminate wrong entries...
    for (size_t k = 0; k<global_procMultRequestedColIds.size(); k++) {
      GlobalOrdinal globColId = global_procMultRequestedColIds[k];

      std::vector<Scalar> MyWeightForColId(numProcs,0);
      std::vector<Scalar> GlobalWeightForColId(numProcs,0);

      if(gColVec->getMap()->isNodeGlobalElement(globColId)) {
        MyWeightForColId[myRank] = gColId2Weight[globColId];
      } else {
        MyWeightForColId[myRank] = 0.0;
      }

      Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &MyWeightForColId[0], &GlobalWeightForColId[0]);

      if(gColVec->getMap()->isNodeGlobalElement(globColId)) {
        // note: 2 procs could have the same weight for a column index.
        // pick the first one.
        Scalar winnerValue = 0.0;
        int winnerProcRank = 0;
        for (int proc = 0; proc < numProcs; proc++) {
          if(GlobalWeightForColId[proc] > winnerValue) {
            winnerValue = GlobalWeightForColId[proc];
            winnerProcRank = proc;
          }
        }

        // winnerProcRank is the winner for handling globColId.
        // winnerProcRank is unique (even if two procs have the same weight for a column index)

        if(myRank != winnerProcRank) {
          // remove corresponding entry from permutedDiagCandidatesFiltered
          typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = permutedDiagCandidatesFiltered.begin();
          while(p != permutedDiagCandidatesFiltered.end() )
          {
            if((*p).second == globColId)
              p = permutedDiagCandidatesFiltered.erase(p);
            else
              p++;
          }
        }

      } // end if isNodeGlobalElement
    } // end loop over global_procOverlappingWeights and eliminate wrong entries...
  } // end if globalMultColRequests > 0

  // put together all pairs:
  //size_t sizeRowColPairs = keepDiagonalEntries.size() + permutedDiagCandidatesFiltered.size();
  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > RowColPairs;
  RowColPairs.insert( RowColPairs.end(), keepDiagonalEntries.begin(), keepDiagonalEntries.end());
  RowColPairs.insert( RowColPairs.end(), permutedDiagCandidatesFiltered.begin(), permutedDiagCandidatesFiltered.end());

#ifdef DEBUG_OUTPUT
  //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
  // plausibility check
  gColVec->putScalar(0.0);
  gDomVec->putScalar(0.0);
  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator pl = RowColPairs.begin();
  while(pl != RowColPairs.end() )
  {
    //GlobalOrdinal ik = (*pl).first;
    GlobalOrdinal jk = (*pl).second;

    gColVec->sumIntoGlobalValue(jk,1.0);
    pl++;
  }
  gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD);
  for(size_t sz = 0; sz<gDomVec->getLocalLength(); ++sz) {
    Teuchos::ArrayRCP< const Scalar > arrDomVec = gDomVec->getData(0);
    if(arrDomVec[sz] > 1.0) {
      GetOStream(Runtime0,0) << "RowColPairs has multiple column [" << sz << "]=" << arrDomVec[sz] << std::endl;
    } else if(arrDomVec[sz] == 0.0) {
      GetOStream(Runtime0,0) << "RowColPairs has empty column [" << sz << "]=" << arrDomVec[sz] << std::endl;
    }
  }
  //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
#endif

  //////////////////////////////////////////////////
  // assumption: on each processor RowColPairs now contains
  // a valid set of (row,column) pairs, where the row entries
  // are a subset of the processor's rows and the column entries
  // are unique throughout all processors.
  // Note: the RowColPairs are only defined for a subset of all rows,
  // so there might be rows without an entry in RowColPairs.
  // It can be, that some rows seem to be missing in RowColPairs, since
  // the entry in that row with maximum absolute value has been reserved
  // by another row already (e.g. as already diagonal dominant row outside
  // of perRowMap).
  // In fact, the RowColPairs vector only defines the (row,column) pairs
  // that will be definitely moved to the diagonal after permutation.

#ifdef DEBUG_OUTPUT
  //  for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p) {
  //    std::cout << "proc: " << myRank << " r/c: " << (*p).first << "/" << (*p).second << std::endl;
  //  }
  //    for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p)
  //    {
  ////      if((*p).first != (*p).second) std::cout << "difference: " << (*p).first << " " << (*p).second << std::endl;
  //      std::cout << (*p).first +1 << " " << (*p).second+1 << std::endl;
  //    }
  //    std::cout << "\n";
#endif

  // vectors to store permutation information
  Teuchos::RCP<Vector> Pperm  = VectorFactory::Build(A->getRowMap());
  Teuchos::RCP<Vector> Qperm  = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map)
  Teuchos::RCP<Vector> lQperm = VectorFactory::Build(A->getColMap());  // local variant (based on column map)

  Teuchos::ArrayRCP< Scalar > PpermData = Pperm->getDataNonConst(0);
  Teuchos::ArrayRCP< Scalar > QpermData = Qperm->getDataNonConst(0);

  Pperm->putScalar(0.0);
  Qperm->putScalar(0.0);
  lQperm->putScalar(0.0);

  // setup exporter for Qperm
  Teuchos::RCP<Export> QpermExporter = ExportFactory::Build(lQperm->getMap(), Qperm->getMap());

  Teuchos::RCP<Vector> RowIdStatus  = VectorFactory::Build(A->getRowMap());
  Teuchos::RCP<Vector> ColIdStatus  = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map)
  Teuchos::RCP<Vector> lColIdStatus = VectorFactory::Build(A->getColMap()); // local variant (based on column map)
  Teuchos::RCP<Vector> ColIdUsed   = VectorFactory::Build(A->getDomainMap()); // mark column ids to be already in use
  Teuchos::ArrayRCP< Scalar > RowIdStatusArray = RowIdStatus->getDataNonConst(0);
  Teuchos::ArrayRCP< Scalar > ColIdStatusArray = ColIdStatus->getDataNonConst(0);
  Teuchos::ArrayRCP< Scalar > lColIdStatusArray = lColIdStatus->getDataNonConst(0);
  Teuchos::ArrayRCP< Scalar > ColIdUsedArray   = ColIdUsed->getDataNonConst(0); // not sure about this
  RowIdStatus->putScalar(0.0);
  ColIdStatus->putScalar(0.0);
  lColIdStatus->putScalar(0.0);
  ColIdUsed->putScalar(0.0);   // no column ids are used

  // count wide-range permutations
  // a wide-range permutation is defined as a permutation of rows/columns which do not
  // belong to the same node
  LocalOrdinal lWideRangeRowPermutations = 0;
  GlobalOrdinal gWideRangeRowPermutations = 0;
  LocalOrdinal lWideRangeColPermutations = 0;
  GlobalOrdinal gWideRangeColPermutations = 0;

  // run 1: mark all "identity" permutations
  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = RowColPairs.begin();
  while(p != RowColPairs.end() )
  {
    GlobalOrdinal ik = (*p).first;
    GlobalOrdinal jk = (*p).second;

    LocalOrdinal lik = A->getRowMap()->getLocalElement(ik);
    LocalOrdinal ljk = A->getColMap()->getLocalElement(jk);

    if(RowIdStatusArray[lik] == 0.0) {
      RowIdStatusArray[lik] = 1.0; // use this row id
      lColIdStatusArray[ljk] = 1.0; // use this column id
      Pperm->replaceLocalValue(lik, ik);
      lQperm->replaceLocalValue(ljk, ik); // use column map
      ColIdUsed->replaceGlobalValue(ik,1.0); // ik is now used
      p = RowColPairs.erase(p);

      // detect wide range permutations
      if(floor(ik/nDofsPerNode) != floor(jk/nDofsPerNode)) {
        lWideRangeColPermutations++;
      }
    }
    else
      p++;
  }

  // communicate column map -> domain map
  Qperm->doExport(*lQperm,*QpermExporter,Xpetra::ABSMAX);
  ColIdStatus->doExport(*lColIdStatus,*QpermExporter,Xpetra::ABSMAX);

  // plausibility check
  if(RowColPairs.size()>0) GetOStream(Warnings0,0) << "MueLu::PermutationFactory: There are Row/Col pairs left!!!" << std::endl; // TODO fix me

  // close Pperm

  // count, how many row permutations are missing on current proc
  size_t cntFreeRowIdx = 0;
  std::queue<GlobalOrdinal> qFreeGRowIdx;  // store global row ids of "free" rows
  for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
    if(RowIdStatusArray[lik] == 0.0) {
      cntFreeRowIdx++;
      qFreeGRowIdx.push(RowIdStatus->getMap()->getGlobalElement(lik));
    }
  }

  // fix Pperm
  for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
    if(RowIdStatusArray[lik] == 0.0) {
      RowIdStatusArray[lik] = 1.0; // use this row id
      Pperm->replaceLocalValue(lik, qFreeGRowIdx.front());
      // detect wide range permutations
      if(floor(qFreeGRowIdx.front()/nDofsPerNode) != floor(RowIdStatus->getMap()->getGlobalElement(lik)/nDofsPerNode)) {
        lWideRangeRowPermutations++;
      }
      qFreeGRowIdx.pop();
    }
  }

  // close Qperm (free permutation entries in Qperm)
  size_t cntFreeColIdx = 0;
  std::queue<GlobalOrdinal> qFreeGColIdx;  // store global column ids of "free" available columns
  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
    if(ColIdStatusArray[ljk] == 0.0) {
      cntFreeColIdx++;
      qFreeGColIdx.push(ColIdStatus->getMap()->getGlobalElement(ljk));
    }
  }

  size_t cntUnusedColIdx = 0;
  std::queue<GlobalOrdinal> qUnusedGColIdx;  // store global column ids of "free" available columns
  for (size_t ljk = 0; ljk < ColIdUsed->getLocalLength(); ++ljk) {
    if(ColIdUsedArray[ljk] == 0.0) {
      cntUnusedColIdx++;
      qUnusedGColIdx.push(ColIdUsed->getMap()->getGlobalElement(ljk));
    }
  }

  // fix Qperm with local entries
  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
    // stop if no (local) unused column idx are left
    if(cntUnusedColIdx == 0) break;

    if(ColIdStatusArray[ljk] == 0.0) {
      ColIdStatusArray[ljk] = 1.0; // use this row id
      Qperm->replaceLocalValue(ljk, qUnusedGColIdx.front()); // loop over ColIdStatus (lives on domain map)
      ColIdUsed->replaceGlobalValue(qUnusedGColIdx.front(),1.0); // ljk is now used, too
      // detect wide range permutations
      if(floor(qUnusedGColIdx.front()/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) {
        lWideRangeColPermutations++;
      }
      qUnusedGColIdx.pop();
      cntUnusedColIdx--;
      cntFreeColIdx--;
    }
  }

  //Qperm->doExport(*lQperm,*QpermExporter,Xpetra::ABSMAX); // no export necessary, since changes only locally
  //ColIdStatus->doExport(*lColIdStatus,*QpermExporter,Xpetra::ABSMAX);

  // count, how many unused column idx are needed on current processor
  // to complete Qperm
  cntFreeColIdx = 0;
  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { // TODO avoid this loop
    if(ColIdStatusArray[ljk] == 0.0) {
      cntFreeColIdx++;
    }
  }

  GlobalOrdinal global_cntFreeColIdx = 0;
  LocalOrdinal  local_cntFreeColIdx = cntFreeColIdx;
  sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntFreeColIdx), global_cntFreeColIdx);
#ifdef DEBUG_OUTPUT
  std::cout << "global # of empty column idx entries in Qperm: " << global_cntFreeColIdx << std::endl;
#endif

  // avoid global communication if possible
  if(global_cntFreeColIdx > 0) {

    // 1) count how many unused column ids are left
    GlobalOrdinal global_cntUnusedColIdx = 0;
    LocalOrdinal  local_cntUnusedColIdx = cntUnusedColIdx;
    sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntUnusedColIdx), global_cntUnusedColIdx);
#ifdef DEBUG_OUTPUT
    std::cout << "global # of unused column idx: " << global_cntUnusedColIdx << std::endl;
#endif

    // 2) communicate how many unused column ids are available on procs
    std::vector<LocalOrdinal> local_UnusedColIdxOnProc (numProcs);
    std::vector<LocalOrdinal> global_UnusedColIdxOnProc(numProcs);
    local_UnusedColIdxOnProc[myRank] = local_cntUnusedColIdx;
    Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &local_UnusedColIdxOnProc[0], &global_UnusedColIdxOnProc[0]);

#ifdef DEBUG_OUTPUT
    std::cout << "PROC " << myRank << " global num unused indices per proc: ";
    for (size_t ljk = 0; ljk < global_UnusedColIdxOnProc.size(); ++ljk) {
      std::cout << " " << global_UnusedColIdxOnProc[ljk];
    }
    std::cout << std::endl;
#endif

    // 3) build array of length global_cntUnusedColIdx to globally replicate unused column idx
    std::vector<GlobalOrdinal> local_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
    std::vector<GlobalOrdinal> global_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
    GlobalOrdinal global_cntUnusedColIdxStartIter = 0;
    for(int proc=0; proc<myRank; proc++) {
      global_cntUnusedColIdxStartIter += global_UnusedColIdxOnProc[proc];
    }
    for(GlobalOrdinal k = global_cntUnusedColIdxStartIter; k < global_cntUnusedColIdxStartIter+local_cntUnusedColIdx; k++) {
      local_UnusedColIdxVector[k] = qUnusedGColIdx.front();
      qUnusedGColIdx.pop();
    }
    Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(global_cntUnusedColIdx), &local_UnusedColIdxVector[0], &global_UnusedColIdxVector[0]);
#ifdef DEBUG_OUTPUT
    std::cout << "PROC " << myRank << " global UnusedGColIdx: ";
    for (size_t ljk = 0; ljk < global_UnusedColIdxVector.size(); ++ljk) {
      std::cout << " " << global_UnusedColIdxVector[ljk];
    }
    std::cout << std::endl;
#endif



    // 4) communicate, how many column idx are needed on each processor
    //    to complete Qperm
    std::vector<LocalOrdinal> local_EmptyColIdxOnProc (numProcs);
    std::vector<LocalOrdinal> global_EmptyColIdxOnProc(numProcs);
    local_EmptyColIdxOnProc[myRank] = local_cntFreeColIdx;
    Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &local_EmptyColIdxOnProc[0], &global_EmptyColIdxOnProc[0]);

#ifdef DEBUG_OUTPUT
    std::cout << "PROC " << myRank << " global num of needed column indices: ";
    for (size_t ljk = 0; ljk < global_EmptyColIdxOnProc.size(); ++ljk) {
      std::cout << " " << global_EmptyColIdxOnProc[ljk];
    }
    std::cout << std::endl;
#endif

    // 5) determine first index in global_UnusedColIdxVector for unused column indices,
    //    that are marked to be used by this processor
    GlobalOrdinal global_UnusedColStartIdx = 0;
    for(int proc=0; proc<myRank; proc++) {
      global_UnusedColStartIdx += global_EmptyColIdxOnProc[proc];
    }

#ifdef DEBUG_OUTPUT
    GetOStream(Statistics0,0) << "PROC " << myRank << " is allowd to use the following column gids: ";
    for(GlobalOrdinal k = global_UnusedColStartIdx; k < global_UnusedColStartIdx + Teuchos::as<GlobalOrdinal>(cntFreeColIdx); k++) {
      GetOStream(Statistics0,0) << global_UnusedColIdxVector[k] << " ";
    }
    GetOStream(Statistics0,0) << std::endl;
#endif

    // 6.) fix Qperm with global entries
    GlobalOrdinal array_iter = 0;
    for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {

      if(ColIdStatusArray[ljk] == 0.0) {
        ColIdStatusArray[ljk] = 1.0; // use this row id
        Qperm->replaceLocalValue(ljk, global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]);
        ColIdUsed->replaceGlobalValue(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter],1.0);
        // detect wide range permutations
        if(floor(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) {
          lWideRangeColPermutations++;
        }
        array_iter++;
        //cntUnusedColIdx--; // check me
      }
    }
  } // end if global_cntFreeColIdx > 0
  /////////////////// Qperm should be fine now...


  // create new empty Matrix
  Teuchos::RCP<CrsMatrixWrap> permPTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1,Xpetra::StaticProfile));
  Teuchos::RCP<CrsMatrixWrap> permQTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1,Xpetra::StaticProfile));

  for(size_t row=0; row<A->getNodeNumRows(); row++) {
    Teuchos::ArrayRCP<GlobalOrdinal> indoutP(1,Teuchos::as<GO>(PpermData[row])); // column idx for Perm^T
    Teuchos::ArrayRCP<GlobalOrdinal> indoutQ(1,Teuchos::as<GO>(QpermData[row])); // column idx for Qperm
    Teuchos::ArrayRCP<Scalar> valout(1,1.0);
    permPTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutP.view(0,indoutP.size()), valout.view(0,valout.size()));
    permQTmatrix->insertGlobalValues (A->getRowMap()->getGlobalElement(row), indoutQ.view(0,indoutQ.size()), valout.view(0,valout.size()));
  }

  permPTmatrix->fillComplete();
  permQTmatrix->fillComplete();

  Teuchos::RCP<Matrix> permPmatrix = Utils2::Transpose(permPTmatrix,true);

  for(size_t row=0; row<permPTmatrix->getNodeNumRows(); row++) {
    if(permPTmatrix->getNumEntriesInLocalRow(row) != 1)
      GetOStream(Warnings0,0) <<"#entries in row " << row << " of permPTmatrix is " << permPTmatrix->getNumEntriesInLocalRow(row) << std::endl;
    if(permPmatrix->getNumEntriesInLocalRow(row) != 1)
      GetOStream(Warnings0,0) <<"#entries in row " << row << " of permPmatrix is " << permPmatrix->getNumEntriesInLocalRow(row) << std::endl;
    if(permQTmatrix->getNumEntriesInLocalRow(row) != 1)
      GetOStream(Warnings0,0) <<"#entries in row " << row << " of permQmatrix is " << permQTmatrix->getNumEntriesInLocalRow(row) << std::endl;
  }

  // build permP * A * permQT
  Teuchos::RCP<Matrix> ApermQt = Utils::Multiply(*A, false, *permQTmatrix, false);
  Teuchos::RCP<Matrix> permPApermQt = Utils::Multiply(*permPmatrix, false, *ApermQt, false);

  /*
  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Write("A.mat", *A);
  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Write("permP.mat", *permPmatrix);
  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Write("permQt.mat", *permQTmatrix);
  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Write("permPApermQt.mat", *permPApermQt);
  */
  // build scaling matrix
  Teuchos::RCP<Vector> diagVec = VectorFactory::Build(permPApermQt->getRowMap(),true);
  Teuchos::RCP<Vector> invDiagVec = VectorFactory::Build(permPApermQt->getRowMap(),true);
  Teuchos::ArrayRCP< const Scalar > diagVecData = diagVec->getData(0);
  Teuchos::ArrayRCP< Scalar > invDiagVecData = invDiagVec->getDataNonConst(0);

  permPApermQt->getLocalDiagCopy(*diagVec);
  for(size_t i = 0; i<diagVec->getMap()->getNodeNumElements(); ++i) {
    if(diagVecData[i] != 0.0)
      invDiagVecData[i] = 1/diagVecData[i];
    else {
      invDiagVecData[i] = 1.0;
      GetOStream(Statistics0,0) << "MueLu::PermutationFactory: found zero on diagonal in row " << i << std::endl;
    }
  }

  Teuchos::RCP<CrsMatrixWrap> diagScalingOp = Teuchos::rcp(new CrsMatrixWrap(permPApermQt->getRowMap(),1,Xpetra::StaticProfile));

  for(size_t row=0; row<A->getNodeNumRows(); row++) {
    Teuchos::ArrayRCP<GlobalOrdinal> indout(1,permPApermQt->getRowMap()->getGlobalElement(row)); // column idx for Perm^T
    Teuchos::ArrayRCP<Scalar> valout(1,invDiagVecData[row]);
    diagScalingOp->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indout.view(0,indout.size()), valout.view(0,valout.size()));
  }
  diagScalingOp->fillComplete();

  Teuchos::RCP<Matrix> scaledA = Utils::Multiply(*diagScalingOp, false, *permPApermQt, false);
  currentLevel.Set("A", Teuchos::rcp_dynamic_cast<Matrix>(scaledA), genFactory/*this*/);

  currentLevel.Set("permA", Teuchos::rcp_dynamic_cast<Matrix>(permPApermQt), genFactory/*this*/);  // TODO careful with this!!!
  currentLevel.Set("permP", Teuchos::rcp_dynamic_cast<Matrix>(permPmatrix), genFactory/*this*/);
  currentLevel.Set("permQT", Teuchos::rcp_dynamic_cast<Matrix>(permQTmatrix), genFactory/*this*/);
  currentLevel.Set("permScaling", Teuchos::rcp_dynamic_cast<Matrix>(diagScalingOp), genFactory/*this*/);

  //// count row permutations
  // count zeros on diagonal in P -> number of row permutations
  Teuchos::RCP<Vector> diagPVec = VectorFactory::Build(permPmatrix->getRowMap(),true);
  permPmatrix->getLocalDiagCopy(*diagPVec);
  Teuchos::ArrayRCP< const Scalar > diagPVecData = diagPVec->getData(0);
  LocalOrdinal lNumRowPermutations = 0;
  GlobalOrdinal gNumRowPermutations = 0;
  for(size_t i = 0; i<diagPVec->getMap()->getNodeNumElements(); ++i) {
    if(diagPVecData[i] == 0.0) {
      lNumRowPermutations++;
    }
  }

  // sum up all entries in multipleColRequests over all processors
  sumAll(diagPVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumRowPermutations), gNumRowPermutations);

  //// count column permutations
  // count zeros on diagonal in Q^T -> number of column permutations
  Teuchos::RCP<Vector> diagQTVec = VectorFactory::Build(permQTmatrix->getRowMap(),true);
  permQTmatrix->getLocalDiagCopy(*diagQTVec);
  Teuchos::ArrayRCP< const Scalar > diagQTVecData = diagQTVec->getData(0);
  LocalOrdinal lNumColPermutations = 0;
  GlobalOrdinal gNumColPermutations = 0;
  for(size_t i = 0; i<diagQTVec->getMap()->getNodeNumElements(); ++i) {
    if(diagQTVecData[i] == 0.0) {
      lNumColPermutations++;
    }
  }

  // sum up all entries in multipleColRequests over all processors
  sumAll(diagQTVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumColPermutations), gNumColPermutations);

  currentLevel.Set("#RowPermutations", gNumRowPermutations, genFactory/*this*/);
  currentLevel.Set("#ColPermutations", gNumColPermutations, genFactory/*this*/);
  currentLevel.Set("#WideRangeRowPermutations", gWideRangeRowPermutations, genFactory/*this*/);
  currentLevel.Set("#WideRangeColPermutations", gWideRangeColPermutations, genFactory/*this*/);

  GetOStream(Statistics0, 0) << "#Row    permutations/max possible permutations: " << gNumRowPermutations << "/" << diagPVec->getMap()->getGlobalNumElements() << std::endl;
  GetOStream(Statistics0, 0) << "#Column permutations/max possible permutations: " << gNumColPermutations << "/" << diagQTVec->getMap()->getGlobalNumElements() << std::endl;
  GetOStream(Runtime1, 0) << "#wide range row permutations: " << gWideRangeRowPermutations << " #wide range column permutations: " << gWideRangeColPermutations << std::endl;

#else
#warning PermutationFactory not compiling/working for Scalar==complex.
#endif // #ifndef HAVE_MUELU_INST_COMPLEX_INT_INT


  }
コード例 #28
0
  void BlockedRAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level &fineLevel, Level &coarseLevel) const {  //FIXME make fineLevel const!!
    FactoryMonitor m(*this, "Computing Ac (block)", coarseLevel);

    const Teuchos::ParameterList& pL = GetParameterList();

    RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel,   "A");
    RCP<Matrix> P = Get< RCP<Matrix> >(coarseLevel, "P");

    RCP<BlockedCrsMatrix> bA = rcp_dynamic_cast<BlockedCrsMatrix>(A);
    RCP<BlockedCrsMatrix> bP = rcp_dynamic_cast<BlockedCrsMatrix>(P);
    TEUCHOS_TEST_FOR_EXCEPTION(bA.is_null() || bP.is_null(), Exceptions::BadCast, "Matrices R, A and P must be of type BlockedCrsMatrix.");

    RCP<BlockedCrsMatrix> bAP;
    RCP<BlockedCrsMatrix> bAc;
    {
      SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel);

      // Triple matrix product for BlockedCrsMatrixClass
      TEUCHOS_TEST_FOR_EXCEPTION((bA->Cols() != bP->Rows()), Exceptions::BadCast,
                               "Block matrix dimensions do not match: "
                               "A is " << bA->Rows() << "x" << bA->Cols() <<
                               "P is " << bP->Rows() << "x" << bP->Cols());

      bAP = Utils::TwoMatrixMultiplyBlock(*bA, false, *bP,  false, GetOStream(Statistics2), true, true);
    }


    // If we do not modify matrix later, allow optimization of storage.
    // This is necessary for new faster Epetra MM kernels.
    bool doOptimizeStorage = !checkAc_;

    const bool doTranspose    = true;
    const bool doFillComplete = true;
    if (pL.get<bool>("transpose: use implicit") == true) {
      SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel);
      bAc = Utils::TwoMatrixMultiplyBlock(*bP,  doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), doFillComplete, doOptimizeStorage);

    } else {
      RCP<Matrix>           R  = Get< RCP<Matrix> >(coarseLevel, "R");
      RCP<BlockedCrsMatrix> bR = rcp_dynamic_cast<BlockedCrsMatrix>(R);
      TEUCHOS_TEST_FOR_EXCEPTION(bR.is_null(), Exceptions::BadCast, "Matrix R must be of type BlockedCrsMatrix.");

      TEUCHOS_TEST_FOR_EXCEPTION(bA->Rows() != bR->Cols(), Exceptions::BadCast,
                                 "Block matrix dimensions do not match: "
                                 "R is " << bR->Rows() << "x" << bR->Cols() <<
                                 "A is " << bA->Rows() << "x" << bA->Cols());

      SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel);
      bAc = Utils::TwoMatrixMultiplyBlock(*bR, !doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), doFillComplete, doOptimizeStorage);
    }


    if (checkAc_)
      CheckMainDiagonal(bAc);

    GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*bAc, "Ac (blocked)");

    // static int run = 1;
    // RCP<CrsMatrixWrap> A11 = rcp(new CrsMatrixWrap(bAc->getMatrix(0,0)));
    // Utils::Write(toString(run) + "_A_11.mm", *A11);
    // if (!bAc->getMatrix(1,1).is_null()) {
      // RCP<CrsMatrixWrap> A22 = rcp(new CrsMatrixWrap(bAc->getMatrix(1,1)));
      // Utils::Write(toString(run) + "_A_22.mm", *A22);
    // }
    // RCP<CrsMatrixWrap> Am = rcp(new CrsMatrixWrap(bAc->Merge()));
    // Utils::Write(toString(run) + "_A.mm", *Am);
    // run++;

    Set<RCP <Matrix> >(coarseLevel, "A", bAc);

    if (transferFacts_.begin() != transferFacts_.end()) {
      SubFactoryMonitor m1(*this, "Projections", coarseLevel);

      // call Build of all user-given transfer factories
      for (std::vector<RCP<const FactoryBase> >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) {
        RCP<const FactoryBase> fac = *it;
        GetOStream(Runtime0) << "BlockRAPFactory: call transfer factory: " << fac->description() << std::endl;
        fac->CallBuild(coarseLevel);
        // AP (11/11/13): I am not sure exactly why we need to call Release, but we do need it to get rid
        // of dangling data for CoordinatesTransferFactory
        coarseLevel.Release(*fac);
      }
    }
  }
コード例 #29
0
  void RAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &fineLevel, Level &coarseLevel) const { // FIXME make fineLevel const
    {
      FactoryMonitor m(*this, "Computing Ac", coarseLevel);

      // Set "Keeps" from params
      const Teuchos::ParameterList& pL = GetParameterList();
      if (pL.isParameter("Keep AP Pattern")  && pL.get<bool>("Keep AP Pattern"))
        coarseLevel.Keep("AP Pattern",  this);
      if (pL.isParameter("Keep RAP Pattern") && pL.get<bool>("Keep RAP Pattern"))
        coarseLevel.Keep("RAP Pattern", this);

      //
      // Inputs: A, P
      //

      RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel,   "A");
      RCP<Matrix> P = Get< RCP<Matrix> >(coarseLevel, "P");

      //
      // Build Ac = RAP
      //

      RCP<Matrix> AP;

      // Reuse pattern if available (multiple solve)
      if (coarseLevel.IsAvailable("AP Pattern", this)){
        GetOStream(Runtime0, 0) << "Ac: Using previous AP pattern"<<std::endl;
        AP = Get< RCP<Matrix> >(coarseLevel, "AP Pattern");
      }

      {
        SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel);
        AP = Utils::Multiply(*A, false, *P, false, AP);
        Set(coarseLevel, "AP Pattern", AP);
      }

      bool doOptimizedStorage = !checkAc_; // Optimization storage option. If not modifying matrix later (inserting local values), allow optimization of storage.
                                           // This is necessary for new faster Epetra MM kernels.

      RCP<Matrix> Ac;

      // Reuse coarse matrix memory if available (multiple solve)
      if (coarseLevel.IsAvailable("RAP Pattern", this)) {
        GetOStream(Runtime0, 0) << "Ac: Using previous RAP pattern" << std::endl;
        Ac = Get< RCP<Matrix> >(coarseLevel, "RAP Pattern");
      }

      if (implicitTranspose_) {
        SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel);

        Ac = Utils::Multiply(*P, true, *AP, false, Ac, true, doOptimizedStorage);

      } else {

        SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel);

        RCP<Matrix> R = Get< RCP<Matrix> >(coarseLevel, "R");
        Ac = Utils::Multiply(*R, false, *AP, false, Ac, true, doOptimizedStorage);

      }

      if (checkAc_)
        CheckMainDiagonal(Ac);

      RCP<ParameterList> params = rcp(new ParameterList());;
      params->set("printLoadBalancingInfo", true);
      GetOStream(Statistics0, 0) << Utils::PrintMatrixInfo(*Ac, "Ac", params);

      Set(coarseLevel, "A",           Ac);
      Set(coarseLevel, "RAP Pattern", Ac);
    }

    if (transferFacts_.begin() != transferFacts_.end()) {
      SubFactoryMonitor m(*this, "Projections", coarseLevel);

      // call Build of all user-given transfer factories
      for (std::vector<RCP<const FactoryBase> >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) {
        GetOStream(Runtime0, 0) << "Ac: call transfer factory " << (*it).get() << ": " << (*it)->description() << std::endl;
        (*it)->CallBuild(coarseLevel);
      }
    }

  }
コード例 #30
0
  void SubBlockAFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level & currentLevel) const {
    typedef Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> OMatrix; //TODO
    typedef Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> CrsMatrixClass; //TODO
    typedef Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> CrsMatrixWrapClass; //TODO
    typedef Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> BlockedCrsOMatrix; //TODO
    typedef Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> MapExtractorClass;

    const ParameterList & pL = GetParameterList();
    size_t row = Teuchos::as<size_t>(pL.get<int>("block row"));
    size_t col = Teuchos::as<size_t>(pL.get<int>("block col"));

    RCP<OMatrix> Ain = Teuchos::null;
    Ain = Get< RCP<OMatrix> >(currentLevel, "A");

    RCP<BlockedCrsOMatrix> bA = Teuchos::rcp_dynamic_cast<BlockedCrsOMatrix>(Ain);

    TEUCHOS_TEST_FOR_EXCEPTION(bA==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: input matrix A is not of type BlockedCrsMatrix! error.");
    TEUCHOS_TEST_FOR_EXCEPTION(row > bA->Rows(), Exceptions::RuntimeError, "MueLu::SubBlockAFactory::Build: A.Rows() > rows_! error.");
    TEUCHOS_TEST_FOR_EXCEPTION(col > bA->Cols(), Exceptions::RuntimeError, "MueLu::SubBlockAFactory::Build: A.Cols() > cols_! error.");

    Teuchos::RCP<CrsMatrixClass> A = bA->getMatrix(row, col);

    Teuchos::RCP<CrsMatrixWrapClass> Op = Teuchos::rcp(new CrsMatrixWrapClass(A));

    //////////////// EXPERIMENTAL
    // extract striding information from RangeMapExtractor

    Teuchos::RCP<const MapExtractorClass> rgMapExtractor = bA->getRangeMapExtractor();
    Teuchos::RCP<const MapExtractorClass> doMapExtractor = bA->getDomainMapExtractor();

    Teuchos::RCP<const Map> rgMap = rgMapExtractor->getMap(row);
    Teuchos::RCP<const Map> doMap = doMapExtractor->getMap(col);

    Teuchos::RCP<const StridedMap> srgMap = Teuchos::rcp_dynamic_cast<const StridedMap>(rgMap);
    Teuchos::RCP<const StridedMap> sdoMap = Teuchos::rcp_dynamic_cast<const StridedMap>(doMap);

    if(srgMap == Teuchos::null) {
      Teuchos::RCP<const Map> fullRgMap = rgMapExtractor->getFullMap();
      Teuchos::RCP<const StridedMap> sFullRgMap = Teuchos::rcp_dynamic_cast<const StridedMap>(fullRgMap);
      TEUCHOS_TEST_FOR_EXCEPTION(sFullRgMap==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: full rangeMap is not a strided map");
      std::vector<size_t> stridedData = sFullRgMap->getStridingData();
      if(stridedData.size() == 1 && row > 0) // we have block matrices. use striding block information 0
        srgMap = StridedMapFactory::Build(rgMap, stridedData, 0, sFullRgMap->getOffset());
      else // we have strided matrices. use striding information of the corresponding block
        srgMap = StridedMapFactory::Build(rgMap, stridedData, row, sFullRgMap->getOffset());
    }

    if(sdoMap == Teuchos::null) {
      Teuchos::RCP<const Map> fullDoMap = doMapExtractor->getFullMap();
      Teuchos::RCP<const StridedMap> sFullDoMap = Teuchos::rcp_dynamic_cast<const StridedMap>(fullDoMap);
      TEUCHOS_TEST_FOR_EXCEPTION(sFullDoMap==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: full domainMap is not a strided map");
      std::vector<size_t> stridedData2 = sFullDoMap->getStridingData();
      if(stridedData2.size() == 1 && col > 0) // we have block matrices. use striding block information 0
        sdoMap = StridedMapFactory::Build(doMap, stridedData2, 0, sFullDoMap->getOffset());
      else // we have strided matrices. use striding information of the corresponding block
        sdoMap = StridedMapFactory::Build(doMap, stridedData2, col, sFullDoMap->getOffset());
    }

    TEUCHOS_TEST_FOR_EXCEPTION(srgMap==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: rangeMap " << row << " is not a strided map");
    TEUCHOS_TEST_FOR_EXCEPTION(sdoMap==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: domainMap " << col << " is not a strided map");

    GetOStream(Statistics1) << "A(" << row << "," << col << ") has strided maps: range map fixed block size=" << srgMap->getFixedBlockSize() << " strided block id = " << srgMap->getStridedBlockId() << ", domain map fixed block size=" << sdoMap->getFixedBlockSize() << ", strided block id=" << sdoMap->getStridedBlockId() << std::endl;

    if(Op->IsView("stridedMaps") == true) Op->RemoveView("stridedMaps");
    Op->CreateView("stridedMaps", srgMap, sdoMap);
    TEUCHOS_TEST_FOR_EXCEPTION(Op->IsView("stridedMaps")==false, Exceptions::RuntimeError, "MueLu::SubBlockAFactory::Build: failed to set stridedMaps");

    //////////////// EXPERIMENTAL

    currentLevel.Set("A", Teuchos::rcp_dynamic_cast<OMatrix>(Op), this);
  }