void RebalanceBlockRestrictionFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::DeclareInput(Level &fineLevel, Level &coarseLevel) const {
  Input(coarseLevel, "R");

  std::vector<Teuchos::RCP<const FactoryManagerBase> >::const_iterator it;
  for(it = FactManager_.begin(); it!=FactManager_.end(); ++it) {
    SetFactoryManager fineSFM  (rcpFromRef(fineLevel),   *it);
    SetFactoryManager coarseSFM(rcpFromRef(coarseLevel), *it);

    coarseLevel.DeclareInput("Importer",(*it)->GetFactory("Importer").get(), this);
    coarseLevel.DeclareInput("Nullspace",(*it)->GetFactory("Nullspace").get(), this);
  }
}
void BlockedPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::DeclareInput(Level &fineLevel, Level &coarseLevel) const {
  Input(fineLevel, "A");
  //fineLevel.DeclareInput("A",AFact_.get(),this);

  //Teuchos::RCP<Teuchos::FancyOStream> fos = Teuchos::getFancyOStream(Teuchos::rcpFromRef(std::cout));

  std::vector<Teuchos::RCP<const FactoryManagerBase> >::const_iterator it;
  for(it = FactManager_.begin(); it!=FactManager_.end(); ++it) {
    SetFactoryManager fineSFM  (rcpFromRef(fineLevel),   *it);
    SetFactoryManager coarseSFM(rcpFromRef(coarseLevel), *it);

    if (!restrictionMode_)
      coarseLevel.DeclareInput("P",(*it)->GetFactory("P").get(), this);
    else
      coarseLevel.DeclareInput("R",(*it)->GetFactory("R").get(), this);
  }

}
void BlockedPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level& fineLevel, Level &coarseLevel) const {
  typedef Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> MatrixClass;
  typedef Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> CrsMatrixClass;
  typedef Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> CrsMatrixWrapClass;
  typedef Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> BlockedCrsOMatrix;
  typedef Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node> MapClass;
  typedef Xpetra::MapFactory<LocalOrdinal, GlobalOrdinal, Node> MapFactoryClass;
  typedef Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> MapExtractorClass;
  typedef Xpetra::MapExtractorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node> MapExtractorFactoryClass;


  //Teuchos::RCP<Teuchos::FancyOStream> fos = Teuchos::getFancyOStream(Teuchos::rcpFromRef(std::cout));
  //std::ostringstream buf; buf << coarseLevel.GetLevelID();

  // Level Get
  //RCP<Matrix> A     = fineLevel.  Get< RCP<Matrix> >("A", AFact_.get()); // IMPORTANT: use main factory manager for getting A
  RCP<Matrix> A     = Get< RCP<Matrix> >(fineLevel, "A");

  RCP<BlockedCrsOMatrix> bA = Teuchos::rcp_dynamic_cast<BlockedCrsOMatrix>(A);
  TEUCHOS_TEST_FOR_EXCEPTION(bA==Teuchos::null, Exceptions::BadCast, "MueLu::BlockedPFactory::Build: input matrix A is not of type BlockedCrsMatrix! error.");

  // plausibility check
  TEUCHOS_TEST_FOR_EXCEPTION(bA->Rows() != FactManager_.size(), Exceptions::RuntimeError, "MueLu::BlockedPFactory::Build: number of block rows of A does not match number of SubFactoryManagers. error.");
  TEUCHOS_TEST_FOR_EXCEPTION(bA->Cols() != FactManager_.size(), Exceptions::RuntimeError, "MueLu::BlockedPFactory::Build: number of block cols of A does not match number of SubFactoryManagers. error.");

  // build blocked prolongator
  std::vector<RCP<Matrix> > subBlockP;
  std::vector<RCP<const MapClass> >  subBlockPRangeMaps;
  std::vector<RCP<const MapClass    > > subBlockPDomainMaps;
  std::vector<GO> fullRangeMapVector;
  std::vector<GO> fullDomainMapVector;
  subBlockP.reserve(FactManager_.size());       // reserve size for block P operators
  subBlockPRangeMaps.reserve(FactManager_.size());       // reserve size for block P operators
  subBlockPDomainMaps.reserve(FactManager_.size());       // reserve size for block P operators

  // build and store the subblocks and the corresponding range and domain maps
  // since we put together the full range and domain map from the submaps we do not have
  // to use the maps from blocked A
  std::vector<Teuchos::RCP<const FactoryManagerBase> >::const_iterator it;
  for(it = FactManager_.begin(); it!=FactManager_.end(); ++it) {
    SetFactoryManager fineSFM  (rcpFromRef(fineLevel),   *it);
    SetFactoryManager coarseSFM(rcpFromRef(coarseLevel), *it);
    if(!restrictionMode_) {
      subBlockP.push_back(coarseLevel.Get<RCP<Matrix> >("P", (*it)->GetFactory("P").get())); // create and return block P operator
    }
    else {
      subBlockP.push_back(coarseLevel.Get<RCP<Matrix> >("R", (*it)->GetFactory("R").get())); // create and return block R operator
    }

    // check if prolongator/restrictor operators have strided maps
    TEUCHOS_TEST_FOR_EXCEPTION(subBlockP.back()->IsView("stridedMaps")==false, Exceptions::BadCast, "MueLu::BlockedPFactory::Build: subBlock P operator has no strided map information. error.");

    // append strided row map (= range map) to list of range maps.
    Teuchos::RCP<const Map> rangeMap = subBlockP.back()->getRowMap("stridedMaps"); /* getRangeMap(); //*/
    subBlockPRangeMaps.push_back(rangeMap);

    // use plain range map to determine the DOF ids
    Teuchos::ArrayView< const GlobalOrdinal > nodeRangeMap = subBlockP.back()->getRangeMap()->getNodeElementList(); //subBlockPRangeMaps.back()->getNodeElementList();
    fullRangeMapVector.insert(fullRangeMapVector.end(), nodeRangeMap.begin(), nodeRangeMap.end());
    sort(fullRangeMapVector.begin(), fullRangeMapVector.end());

    // append strided col map (= domain map) to list of range maps.
    Teuchos::RCP<const Map> domainMap = subBlockP.back()->getColMap("stridedMaps"); /* getDomainMap(); //*/
    subBlockPDomainMaps.push_back(domainMap);

    // use plain domain map to determine the DOF ids
    Teuchos::ArrayView< const GlobalOrdinal > nodeDomainMap = subBlockP.back()->getDomainMap()->getNodeElementList(); //subBlockPDomainMaps.back()->getNodeElementList();
    fullDomainMapVector.insert(fullDomainMapVector.end(), nodeDomainMap.begin(), nodeDomainMap.end());
    sort(fullDomainMapVector.begin(), fullDomainMapVector.end());

  }

  // extract map index base from maps of blocked A
  GO rangeIndexBase  = 0;
  GO domainIndexBase = 0;
  if(!restrictionMode_) { // prolongation mode: just use index base of range and domain map of bA
    rangeIndexBase = bA->getRangeMap()->getIndexBase();
    domainIndexBase= bA->getDomainMap()->getIndexBase();
  } else { // restriction mode: switch range and domain map for blocked restriction operator
    rangeIndexBase = bA->getDomainMap()->getIndexBase();
    domainIndexBase= bA->getRangeMap()->getIndexBase();
  }

  // build full range map.
  // If original range map has striding information, then transfer it to the new range map
  RCP<const MapExtractorClass> rangeAMapExtractor = bA->getRangeMapExtractor();
  Teuchos::ArrayView<GO> fullRangeMapGIDs(&fullRangeMapVector[0],fullRangeMapVector.size());
  Teuchos::RCP<const StridedMap> stridedRgFullMap = Teuchos::rcp_dynamic_cast<const StridedMap>(rangeAMapExtractor->getFullMap());
  Teuchos::RCP<const Map > fullRangeMap = Teuchos::null;
  if(stridedRgFullMap != Teuchos::null) {
    std::vector<size_t> stridedData = stridedRgFullMap->getStridingData();
    fullRangeMap =
        StridedMapFactory::Build(
            bA->getRangeMap()->lib(),
            Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
            fullRangeMapGIDs,
            rangeIndexBase,
            stridedData,
            bA->getRangeMap()->getComm(),
            stridedRgFullMap->getStridedBlockId(),
            stridedRgFullMap->getOffset());
  } else {
    fullRangeMap =
        MapFactory::Build(
            bA->getRangeMap()->lib(),
            Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
            fullRangeMapGIDs,
            rangeIndexBase,
            bA->getRangeMap()->getComm());
  }

  RCP<const MapExtractorClass> domainAMapExtractor = bA->getDomainMapExtractor();
  Teuchos::ArrayView<GO> fullDomainMapGIDs(&fullDomainMapVector[0],fullDomainMapVector.size());
  Teuchos::RCP<const StridedMap> stridedDoFullMap = Teuchos::rcp_dynamic_cast<const StridedMap>(domainAMapExtractor->getFullMap());
  Teuchos::RCP<const Map > fullDomainMap = Teuchos::null;
  if(stridedDoFullMap != Teuchos::null) {
    TEUCHOS_TEST_FOR_EXCEPTION(stridedDoFullMap==Teuchos::null, Exceptions::BadCast, "MueLu::BlockedPFactory::Build: full map in domain map extractor has no striding information! error.");
    std::vector<size_t> stridedData2 = stridedDoFullMap->getStridingData();
    fullDomainMap =
        StridedMapFactory::Build(
            bA->getDomainMap()->lib(),
            Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
            fullDomainMapGIDs,
            domainIndexBase,
            stridedData2,
            bA->getDomainMap()->getComm(),
            stridedDoFullMap->getStridedBlockId(),
            stridedDoFullMap->getOffset());
  } else {

    fullDomainMap =
        MapFactory::Build(
            bA->getDomainMap()->lib(),
            Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
            fullDomainMapGIDs,
            domainIndexBase,
            bA->getDomainMap()->getComm());
  }

  // build map extractors
  Teuchos::RCP<const MapExtractorClass> rangeMapExtractor  = MapExtractorFactoryClass::Build(fullRangeMap,  subBlockPRangeMaps);
  Teuchos::RCP<const MapExtractorClass> domainMapExtractor = MapExtractorFactoryClass::Build(fullDomainMap, subBlockPDomainMaps);

  Teuchos::RCP<BlockedCrsOMatrix> bP = Teuchos::rcp(new BlockedCrsOMatrix(rangeMapExtractor,domainMapExtractor,10));
  for(size_t i = 0; i<subBlockPRangeMaps.size(); i++) {
    Teuchos::RCP<CrsMatrixWrapClass> crsOpii = Teuchos::rcp_dynamic_cast<CrsMatrixWrapClass>(subBlockP[i]);
    Teuchos::RCP<CrsMatrixClass> crsMatii = crsOpii->getCrsMatrix();
    bP->setMatrix(i,i,crsMatii);
  }

  bP->fillComplete();

  //bP->describe(*fos,Teuchos::VERB_EXTREME);

  // Level Set
  if(!restrictionMode_)
  {
    // prolongation factory is in prolongation mode
    coarseLevel.Set("P", Teuchos::rcp_dynamic_cast<MatrixClass>(bP), this);
  }
  else
  {
    // prolongation factory is in restriction mode
    // we do not have to transpose the blocked R operator since the subblocks on the diagonal
    // are already valid R subblocks
    coarseLevel.Set("R", Teuchos::rcp_dynamic_cast<MatrixClass>(bP), this);
  }

}
void RebalanceBlockRestrictionFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &fineLevel, Level &coarseLevel) const {
  FactoryMonitor m(*this, "Build", coarseLevel);
  //const Teuchos::ParameterList & pL = GetParameterList();

  RCP<Teuchos::FancyOStream> out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));

  Teuchos::RCP<Matrix> originalTransferOp = Teuchos::null;
  originalTransferOp = Get< RCP<Matrix> >(coarseLevel, "R");

  RCP<Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> > bOriginalTransferOp = Teuchos::rcp_dynamic_cast<Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> >(originalTransferOp);
  TEUCHOS_TEST_FOR_EXCEPTION(bOriginalTransferOp==Teuchos::null, Exceptions::BadCast, "MueLu::RebalanceBlockTransferFactory::Build: input matrix P or R is not of type BlockedCrsMatrix! error.");

  // plausibility check
  TEUCHOS_TEST_FOR_EXCEPTION(bOriginalTransferOp->Rows() != 2,Exceptions::RuntimeError, "MueLu::RebalanceBlockTransferFactory::Build: number of block rows of transfer operator is not equal 2. error.");
  TEUCHOS_TEST_FOR_EXCEPTION(bOriginalTransferOp->Cols() != 2,Exceptions::RuntimeError, "MueLu::RebalanceBlockTransferFactory::Build: number of block columns of transfer operator is not equal 2. error.");

  // rebuild rebalanced blocked P operator
  std::vector<GO> fullRangeMapVector;
  std::vector<GO> fullDomainMapVector;
  std::vector<RCP<const Map> > subBlockRRangeMaps;
  std::vector<RCP<const Map> > subBlockRDomainMaps;
  subBlockRRangeMaps.reserve(bOriginalTransferOp->Rows());       // reserve size for block P operators
  subBlockRDomainMaps.reserve(bOriginalTransferOp->Cols());       // reserve size for block P operators

  std::vector<Teuchos::RCP<Matrix> > subBlockRebR;
  subBlockRebR.reserve(bOriginalTransferOp->Cols());

  int curBlockId = 0;
  Teuchos::RCP<const Import> rebalanceImporter = Teuchos::null;
  std::vector<Teuchos::RCP<const FactoryManagerBase> >::const_iterator it;
  for (it = FactManager_.begin(); it != FactManager_.end(); ++it) {
    // begin SubFactoryManager environment
    SetFactoryManager fineSFM  (rcpFromRef(fineLevel),   *it);
    SetFactoryManager coarseSFM(rcpFromRef(coarseLevel), *it);

    rebalanceImporter = coarseLevel.Get<Teuchos::RCP<const Import> >("Importer", (*it)->GetFactory("Importer").get());

    // extract matrix block
    Teuchos::RCP<CrsMatrix> Rmii = bOriginalTransferOp->getMatrix(curBlockId, curBlockId);
    Teuchos::RCP<CrsMatrixWrap> Rwii = Teuchos::rcp(new CrsMatrixWrap(Rmii));
    Teuchos::RCP<Matrix> Rii = Teuchos::rcp_dynamic_cast<Matrix>(Rwii);

    Teuchos::RCP<Matrix> rebRii;
    if(rebalanceImporter != Teuchos::null) {
      std::stringstream ss; ss << "Rebalancing restriction block R(" << curBlockId << "," << curBlockId << ")";
      SubFactoryMonitor m1(*this, ss.str(), coarseLevel);
      {
        SubFactoryMonitor subM(*this, "Rebalancing restriction -- fusedImport", coarseLevel);
        // Note: The 3rd argument says to use originalR's domain map.

        RCP<Map> dummy;
        rebRii = MatrixFactory::Build(Rii,*rebalanceImporter,dummy,rebalanceImporter->getTargetMap());
      }

      RCP<ParameterList> params = rcp(new ParameterList());
      params->set("printLoadBalancingInfo", true);
      std::stringstream ss2; ss2 << "R(" << curBlockId << "," << curBlockId << ") rebalanced:";
      GetOStream(Statistics0) << PerfUtils::PrintMatrixInfo(*rebRii, ss2.str(), params);
    } else {
      rebRii = Rii;
      RCP<ParameterList> params = rcp(new ParameterList());
      params->set("printLoadBalancingInfo", true);
      std::stringstream ss2; ss2 << "R(" << curBlockId << "," << curBlockId << ") not rebalanced:";
      GetOStream(Statistics0) << PerfUtils::PrintMatrixInfo(*rebRii, ss2.str(), params);
    }

    // fix striding information for rebalanced diagonal block rebRii
    RCP<const Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> > rgRMapExtractor = bOriginalTransferOp->getRangeMapExtractor(); // original map extractor
    Teuchos::RCP<const StridedMap> orig_stridedRgMap = Teuchos::rcp_dynamic_cast<const StridedMap>(rgRMapExtractor->getMap(Teuchos::as<size_t>(curBlockId)));
    Teuchos::RCP<const Map> stridedRgMap = Teuchos::null;
    if(orig_stridedRgMap != Teuchos::null) {
      std::vector<size_t> stridingData = orig_stridedRgMap->getStridingData();
      Teuchos::ArrayView< const GlobalOrdinal > nodeRangeMapii = rebRii->getRangeMap()->getNodeElementList();
      stridedRgMap = StridedMapFactory::Build(
          originalTransferOp->getRangeMap()->lib(),
          Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
          nodeRangeMapii,
          rebRii->getRangeMap()->getIndexBase(),
          stridingData,
          originalTransferOp->getRangeMap()->getComm(),
          orig_stridedRgMap->getStridedBlockId(),
          orig_stridedRgMap->getOffset());
    }
    RCP<const Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> > doRMapExtractor = bOriginalTransferOp->getDomainMapExtractor(); // original map extractor
    Teuchos::RCP<const StridedMap> orig_stridedDoMap = Teuchos::rcp_dynamic_cast<const StridedMap>(doRMapExtractor->getMap(Teuchos::as<size_t>(curBlockId)));
    Teuchos::RCP<const Map> stridedDoMap = Teuchos::null;
    if(orig_stridedDoMap != Teuchos::null) {
      std::vector<size_t> stridingData = orig_stridedDoMap->getStridingData();
      Teuchos::ArrayView< const GlobalOrdinal > nodeDomainMapii = rebRii->getDomainMap()->getNodeElementList();
      stridedDoMap = StridedMapFactory::Build(
          originalTransferOp->getDomainMap()->lib(),
          Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
          nodeDomainMapii,
          rebRii->getDomainMap()->getIndexBase(),
          stridingData,
          originalTransferOp->getDomainMap()->getComm(),
          orig_stridedDoMap->getStridedBlockId(),
          orig_stridedDoMap->getOffset());
    }

    TEUCHOS_TEST_FOR_EXCEPTION(stridedRgMap == Teuchos::null,Exceptions::RuntimeError, "MueLu::RebalanceBlockRestrictionFactory::Build: failed to generate striding information. error.");
    TEUCHOS_TEST_FOR_EXCEPTION(stridedDoMap == Teuchos::null,Exceptions::RuntimeError, "MueLu::RebalanceBlockRestrictionFactory::Build: failed to generate striding information. error.");

    // replace stridedMaps view in diagonal sub block
    if(rebRii->IsView("stridedMaps")) rebRii->RemoveView("stridedMaps");
    rebRii->CreateView("stridedMaps", stridedRgMap, stridedDoMap);

    // store rebalanced subblock
    subBlockRebR.push_back(rebRii);

    // append strided row map (= range map) to list of range maps.
    Teuchos::RCP<const Map> rangeMapii = rebRii->getRowMap("stridedMaps"); //rebRii->getRangeMap();
    subBlockRRangeMaps.push_back(rangeMapii);
    Teuchos::ArrayView< const GlobalOrdinal > nodeRangeMapii = rebRii->getRangeMap()->getNodeElementList();
    fullRangeMapVector.insert(fullRangeMapVector.end(), nodeRangeMapii.begin(), nodeRangeMapii.end());
    sort(fullRangeMapVector.begin(), fullRangeMapVector.end());

    // append strided col map (= domain map) to list of range maps.
    Teuchos::RCP<const Map> domainMapii = rebRii->getColMap("stridedMaps"); //rebRii->getDomainMap();
    subBlockRDomainMaps.push_back(domainMapii);
    Teuchos::ArrayView< const GlobalOrdinal > nodeDomainMapii = rebRii->getDomainMap()->getNodeElementList();
    fullDomainMapVector.insert(fullDomainMapVector.end(), nodeDomainMapii.begin(), nodeDomainMapii.end());
    sort(fullDomainMapVector.begin(), fullDomainMapVector.end());

    ////////////////////////////////////////////////////////////

    // rebalance null space
    if(rebalanceImporter != Teuchos::null)
    { // rebalance null space
      std::stringstream ss2; ss2 << "Rebalancing nullspace block(" << curBlockId << "," << curBlockId << ")";
      SubFactoryMonitor subM(*this, ss2.str(), coarseLevel);

      RCP<MultiVector> nullspace = coarseLevel.Get<RCP<MultiVector> >("Nullspace", (*it)->GetFactory("Nullspace").get());
      RCP<MultiVector> permutedNullspace = MultiVectorFactory::Build(rebalanceImporter->getTargetMap(), nullspace->getNumVectors());
      permutedNullspace->doImport(*nullspace, *rebalanceImporter, Xpetra::INSERT);

      // TODO think about this
      //if (pL.get<bool>("useSubcomm") == true) // TODO either useSubcomm is enabled everywhere or nowhere
      //permutedNullspace->replaceMap(permutedNullspace->getMap()->removeEmptyProcesses());

      coarseLevel.Set<RCP<MultiVector> >("Nullspace", permutedNullspace, (*it)->GetFactory("Nullspace").get());

    } // end rebalance null space
    else { // do nothing
      RCP<MultiVector> nullspace = coarseLevel.Get<RCP<MultiVector> >("Nullspace", (*it)->GetFactory("Nullspace").get());
      coarseLevel.Set<RCP<MultiVector> >("Nullspace", nullspace, (*it)->GetFactory("Nullspace").get());
    }

    ////////////////////////////////////////////////////////////

    curBlockId++;
  } // end for loop

  // extract map index base from maps of blocked P
  GO rangeIndexBase = originalTransferOp->getRangeMap()->getIndexBase();
  GO domainIndexBase= originalTransferOp->getDomainMap()->getIndexBase();

  // check this
  RCP<const Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> > rangeRMapExtractor = bOriginalTransferOp->getRangeMapExtractor(); // original map extractor
  Teuchos::ArrayView<GO> fullRangeMapGIDs(&fullRangeMapVector[0],fullRangeMapVector.size());
  Teuchos::RCP<const StridedMap> stridedRgFullMap = Teuchos::rcp_dynamic_cast<const StridedMap>(rangeRMapExtractor->getFullMap());
  Teuchos::RCP<const Map > fullRangeMap = Teuchos::null;
  if(stridedRgFullMap != Teuchos::null) {
    std::vector<size_t> stridedData = stridedRgFullMap->getStridingData();
    fullRangeMap =
        StridedMapFactory::Build(
            originalTransferOp->getRangeMap()->lib(),
            Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
            fullRangeMapGIDs,
            rangeIndexBase,
            stridedData,
            originalTransferOp->getRangeMap()->getComm(),
            stridedRgFullMap->getStridedBlockId(),
            stridedRgFullMap->getOffset());
  } else {
    fullRangeMap =
        MapFactory::Build(
            originalTransferOp->getRangeMap()->lib(),
            Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
            fullRangeMapGIDs,
            rangeIndexBase,
            originalTransferOp->getRangeMap()->getComm());
  }

  RCP<const Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> > domainAMapExtractor = bOriginalTransferOp->getDomainMapExtractor();
  Teuchos::ArrayView<GO> fullDomainMapGIDs(&fullDomainMapVector[0],fullDomainMapVector.size());
  Teuchos::RCP<const StridedMap> stridedDoFullMap = Teuchos::rcp_dynamic_cast<const StridedMap>(domainAMapExtractor->getFullMap());
  Teuchos::RCP<const Map > fullDomainMap = Teuchos::null;
  if(stridedDoFullMap != Teuchos::null) {
    TEUCHOS_TEST_FOR_EXCEPTION(stridedDoFullMap==Teuchos::null, Exceptions::BadCast, "MueLu::BlockedPFactory::Build: full map in domain map extractor has no striding information! error.");
    std::vector<size_t> stridedData2 = stridedDoFullMap->getStridingData();
    fullDomainMap =
        StridedMapFactory::Build(
            originalTransferOp->getDomainMap()->lib(),
            Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
            fullDomainMapGIDs,
            domainIndexBase,
            stridedData2,
            originalTransferOp->getDomainMap()->getComm(),
            stridedDoFullMap->getStridedBlockId(),
            stridedDoFullMap->getOffset());
  } else {

    fullDomainMap =
        MapFactory::Build(
            originalTransferOp->getDomainMap()->lib(),
            Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
            fullDomainMapGIDs,
            domainIndexBase,
            originalTransferOp->getDomainMap()->getComm());
  }

  // build map extractors
  Teuchos::RCP<const Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> > rangeMapExtractor  =
      Xpetra::MapExtractorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(fullRangeMap,  subBlockRRangeMaps);
  Teuchos::RCP<const Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> > domainMapExtractor =
      Xpetra::MapExtractorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(fullDomainMap, subBlockRDomainMaps);

  Teuchos::RCP<BlockedCrsMatrix> bRebR = Teuchos::rcp(new BlockedCrsMatrix(rangeMapExtractor,domainMapExtractor,10));
  for(size_t i = 0; i<subBlockRRangeMaps.size(); i++) {
    Teuchos::RCP<const CrsMatrixWrap> crsOpii = Teuchos::rcp_dynamic_cast<const CrsMatrixWrap>(subBlockRebR[i]);
    Teuchos::RCP<CrsMatrix> crsMatii = crsOpii->getCrsMatrix();
    bRebR->setMatrix(i,i,crsMatii);
  }

  bRebR->fillComplete();

  Set(coarseLevel, "R", Teuchos::rcp_dynamic_cast<Matrix>(bRebR)); // do nothing  // TODO remove this!

} // Build