void CloneRepartitionInterface<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level &currentLevel) const {
    FactoryMonitor m(*this, "Build", currentLevel);
    currentLevel.print(GetOStream(Statistics0,0));
    // extract blocked operator A from current level
    Teuchos::RCP<Matrix> A = Get< Teuchos::RCP<Matrix> >     (currentLevel, "A");
    Teuchos::RCP<const Teuchos::Comm< int > > comm = A->getRowMap()->getComm();

    // number of Partitions only used for a shortcut.
    GO numPartitions = 0;
    if (currentLevel.IsAvailable("number of partitions")) {
      numPartitions = currentLevel.Get<GO>("number of partitions");
      GetOStream(Warnings0) << "Using user-provided \"number of partitions\", the performance is unknown" << std::endl;

    }

    // ======================================================================================================
    // Construct decomposition vector
    // ======================================================================================================
    RCP<GOVector> decomposition = Teuchos::null;

    // extract decomposition vector
    decomposition = Get<RCP<GOVector> >(currentLevel, "Partition");
    ArrayRCP<const GO> decompEntries = decomposition->getData(0);

    if (decomposition.is_null()) {
      GetOStream(Warnings0) << "No repartitioning necessary: partitions were left unchanged by the repartitioner" << std::endl;
      Set<RCP<const Import> >(currentLevel, "Importer", Teuchos::null);
      return;
    }

    // create new decomposition vector
    Teuchos::RCP<GOVector> ret = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(A->getRowMap(), false);
    ArrayRCP<GO> retDecompEntries = ret->getDataNonConst(0);

    // block size of output vector
    LocalOrdinal blkSize = A->GetFixedBlockSize();

    // plausibility check!
    size_t inLocalLength  = decomposition->getLocalLength();
    size_t outLocalLength = A->getRowMap()->getNodeNumElements();

    size_t numLocalNodes = outLocalLength / blkSize;
    TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(outLocalLength  % blkSize) != 0, MueLu::Exceptions::RuntimeError,"CloneRepartitionInterface: inconsistent number of local DOFs (" << outLocalLength << ") and degrees of freedoms ("<<blkSize<<")");

    if (numLocalNodes > 0) {
      TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(inLocalLength  % numLocalNodes) != 0, MueLu::Exceptions::RuntimeError,"CloneRepartitionInterface: inconsistent number of local DOFs (" << inLocalLength << ") and number of local nodes (" << numLocalNodes << ")");
      LocalOrdinal inBlkSize = Teuchos::as<LocalOrdinal>(inLocalLength / numLocalNodes);
      //TEUCHOS_TEST_FOR_EXCEPTION(blkSize != inBlkSize, MueLu::Exceptions::RuntimeError,"CloneRepartitionInterface: input block size = " << inBlkSize << " outpub block size = " << blkSize << ". They should be the same.");

      for(LO i = 0; i<Teuchos::as<LO>(numLocalNodes); i++) {
        for(LO j = 0; j < blkSize; j++) {
          retDecompEntries[i*blkSize + j] = Teuchos::as<GO>(decompEntries[i*inBlkSize]);
        }
      }
    } // end if numLocalNodes > 0
    Set(currentLevel, "Partition", ret);
  } //Build()
  void CloneRepartitionInterface<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level &currentLevel) const {
    FactoryMonitor m(*this, "Build", currentLevel);
    currentLevel.print(GetOStream(Statistics0,0));
    // extract blocked operator A from current level
    Teuchos::RCP<Matrix> A = Get< Teuchos::RCP<Matrix> >     (currentLevel, "A");
    Teuchos::RCP<const Teuchos::Comm< int > > comm = A->getRowMap()->getComm();

    // number of Partitions only used for a shortcut.
    GO numPartitions = 0;
    if (currentLevel.IsAvailable("number of partitions")) {
      numPartitions = currentLevel.Get<GO>("number of partitions");
      GetOStream(Warnings0) << "Using user-provided \"number of partitions\", the performance is unknown" << std::endl;

    }

    // ======================================================================================================
    // Construct decomposition vector
    // ======================================================================================================
    RCP<GOVector> decomposition = Teuchos::null;

    // extract decomposition vector
    decomposition = Get<RCP<GOVector> >(currentLevel, "Partition");
    ArrayRCP<const GO> decompEntries = decomposition->getData(0);

    if (decomposition.is_null()) {
      GetOStream(Warnings0) << "No repartitioning necessary: partitions were left unchanged by the repartitioner" << std::endl;
      Set<RCP<const Import> >(currentLevel, "Importer", Teuchos::null);
      return;
    }

    // create new decomposition vector
    Teuchos::RCP<GOVector> ret = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(A->getRowMap(), false);
    ArrayRCP<GO> retDecompEntries = ret->getDataNonConst(0);

    // block size of output vector
    LocalOrdinal blkSize = 1;

    // check for blocking/striding information
    if(A->IsView("stridedMaps") &&
       Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap("stridedMaps")) != Teuchos::null) {
      Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!)
      RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap());
      TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null,Exceptions::BadCast,"MueLu::CloneRepartitionInterface::Build: cast to strided row map failed.");
      LocalOrdinal stridedBlock = strMap->getStridedBlockId();
      if (stridedBlock == -1)
        blkSize = strMap->getFixedBlockSize();
      else {
        std::vector<size_t> strInfo = strMap->getStridingData();
        blkSize = strInfo[stridedBlock];
      }
      oldView = A->SwitchToView(oldView);
      GetOStream(Statistics1) << "CloneRepartitionInterface::Build():" << " found blockdim=" << blkSize << " from strided maps."<< std::endl;
    } else {
      GetOStream(Statistics1) << "CloneRepartitionInterface::Build(): no striding information available. Use blockdim=" << blkSize << " (DofsPerNode)." << std::endl;
      blkSize = A->GetFixedBlockSize();
    }

    // plausibility check!
    size_t inLocalLength  = decomposition->getLocalLength();
    size_t outLocalLength = A->getRowMap()->getNodeNumElements();

    // only for non-strided maps
    size_t numLocalNodes = outLocalLength / blkSize;
    TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(outLocalLength  % blkSize) != 0, MueLu::Exceptions::RuntimeError,"CloneRepartitionInterface: inconsistent number of local DOFs (" << outLocalLength << ") and degrees of freedoms (" << blkSize <<")");

    if (numLocalNodes > 0) {
      TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(inLocalLength  % numLocalNodes) != 0, MueLu::Exceptions::RuntimeError,"CloneRepartitionInterface: inconsistent number of local DOFs (" << inLocalLength << ") and number of local nodes (" << numLocalNodes << ")");
      LocalOrdinal inBlkSize = Teuchos::as<LocalOrdinal>(inLocalLength / numLocalNodes);
      //TEUCHOS_TEST_FOR_EXCEPTION(blkSize != inBlkSize, MueLu::Exceptions::RuntimeError,"CloneRepartitionInterface: input block size = " << inBlkSize << " outpub block size = " << blkSize << ". They should be the same.");

      for(LO i = 0; i<Teuchos::as<LO>(numLocalNodes); i++) {
        for(LO j = 0; j < blkSize; j++) {
          retDecompEntries[i*blkSize + j] = Teuchos::as<GO>(decompEntries[i*inBlkSize]);
        }
      }
    } // end if numLocalNodes > 0
    Set(currentLevel, "Partition", ret);
  } //Build()