void BlockedCoarseMapFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level &currentLevel) const {
    FactoryMonitor m(*this, "BlockedCoarseMap factory", currentLevel);

    RCP<const FactoryBase> prevCoarseMapFact = this->GetFactory("CoarseMap");
    RCP<const Map> subPDomainMap = currentLevel.Get<RCP<const Map> >("CoarseMap", prevCoarseMapFact.get() /*prevCoarseMapFact_.get()*/);

    GlobalOrdinal maxGlobalIndex = subPDomainMap->getMaxAllGlobalIndex();

    RCP<Aggregates> aggregates = Factory::Get< RCP<Aggregates> >(currentLevel, "Aggregates");
    GlobalOrdinal numAggs = aggregates->GetNumAggregates();

    // extract communicator
    RCP<const Teuchos::Comm<int> > comm = aggregates->GetMap()->getComm();

    // determine nullspace dimension
    RCP<MultiVector> nullspace  = Factory::Get< RCP<MultiVector> >(currentLevel, "Nullspace");
    const size_t NSDim = nullspace->getNumVectors();

    LocalOrdinal stridedBlockId = CoarseMapFactory::getStridedBlockId();

    // check for consistency of striding information with NSDim and nCoarseDofs
    if( stridedBlockId== -1 ) {
      // this means we have no real strided map but only a block map with constant blockSize "NSDim"
      TEUCHOS_TEST_FOR_EXCEPTION(CoarseMapFactory::stridingInfo_.size() > 1, Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): stridingInfo_.size() but must be one");
      CoarseMapFactory::stridingInfo_.clear();
      CoarseMapFactory::stridingInfo_.push_back(NSDim);
      TEUCHOS_TEST_FOR_EXCEPTION(CoarseMapFactory::stridingInfo_.size() != 1, Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): stridingInfo_.size() but must be one");
    } else {
      // stridedBlockId_ > -1, set by user
      TEUCHOS_TEST_FOR_EXCEPTION(stridedBlockId > Teuchos::as<LO>(CoarseMapFactory::stridingInfo_.size() - 1) , Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): it is stridingInfo_.size() <= stridedBlockId_. error.");
      size_t stridedBlockSize = CoarseMapFactory::stridingInfo_[stridedBlockId];
      TEUCHOS_TEST_FOR_EXCEPTION(stridedBlockSize != NSDim , Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): dimension of strided block != NSDim. error.");
    }

    CoarseMapFactory::GetOStream(Statistics2) << "domainGIDOffset: " << maxGlobalIndex + 1 << " block size: " << CoarseMapFactory::getFixedBlockSize() << " stridedBlockId: " << stridedBlockId << std::endl;

    // number of coarse level dofs (fixed by number of aggregates and blocksize data)
    GlobalOrdinal nCoarseDofs = numAggs * CoarseMapFactory::getFixedBlockSize();
    GlobalOrdinal indexBase   = aggregates->GetMap()->getIndexBase();

    RCP<const Map> coarseMap = StridedMapFactory::Build(aggregates->GetMap()->lib(),
        Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
        nCoarseDofs,
        indexBase,
        CoarseMapFactory::stridingInfo_,
        comm,
        stridedBlockId,
        maxGlobalIndex + 1);

    this->Set(currentLevel, "CoarseMap", coarseMap);
  } // Build
Exemplo n.º 2
0
  void CoarseMapFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &currentLevel) const {
    FactoryMonitor m(*this, "Build", currentLevel);

    RCP<Aggregates>  aggregates = Get< RCP<Aggregates> >(currentLevel, "Aggregates");
    RCP<MultiVector> nullspace  = Get< RCP<MultiVector> >(currentLevel, "Nullspace");

    GlobalOrdinal                  numAggs = aggregates->GetNumAggregates();
    const size_t                   NSDim   = nullspace->getNumVectors();
    RCP<const Teuchos::Comm<int> > comm    = aggregates->GetMap()->getComm();

    // check for consistency of striding information with NSDim and nCoarseDofs
    if (stridedBlockId_== -1) {
      // this means we have no real strided map but only a block map with constant blockSize "NSDim"
      TEUCHOS_TEST_FOR_EXCEPTION(stridingInfo_.size() > 1, Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): stridingInfo_.size() but must be one");
      stridingInfo_.clear();
      stridingInfo_.push_back(NSDim);
      TEUCHOS_TEST_FOR_EXCEPTION(stridingInfo_.size() != 1, Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): stridingInfo_.size() but must be one");

    } else {
      // stridedBlockId_ > -1, set by user
      TEUCHOS_TEST_FOR_EXCEPTION(stridedBlockId_ > Teuchos::as<LO>(stridingInfo_.size() - 1) , Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): it is stridingInfo_.size() <= stridedBlockId_. error.");
      size_t stridedBlockSize = stridingInfo_[stridedBlockId_];
      TEUCHOS_TEST_FOR_EXCEPTION(stridedBlockSize != NSDim , Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): dimension of strided block != NSDim. error.");
    }

    GetOStream(Statistics1, 0) << "domainGIDOffset: " << domainGidOffset_ << " block size: " << getFixedBlockSize() << " stridedBlockId: " << stridedBlockId_ << std::endl;

    // number of coarse level dofs (fixed by number of aggregates and blocksize data)
    GlobalOrdinal nCoarseDofs = numAggs * getFixedBlockSize();
    GlobalOrdinal indexBase   = aggregates->GetMap()->getIndexBase();

    RCP<const Map> coarseMap = StridedMapFactory::Build(aggregates->GetMap()->lib(),
        Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
        nCoarseDofs,
        indexBase,
        stridingInfo_,
        comm,
        stridedBlockId_,
        domainGidOffset_);

    Set(currentLevel, "CoarseMap", coarseMap);
  } // Build
Exemplo n.º 3
0
  TEUCHOS_UNIT_TEST(Aggregates, UncoupledPhase3)
  {
    out << "version: " << MueLu::Version() << std::endl;

    RCP<Matrix> A = TestHelpers::TestFactory<SC, LO, GO, NO>::Build1DPoisson(36);
    RCP<const Map> rowmap = A->getRowMap();
    RCP<AmalgamationInfo> amalgInfo;
    RCP<Aggregates> aggregates = gimmeUncoupledAggregates(A, amalgInfo,false,false,false,true);
    GO numAggs = aggregates->GetNumAggregates();
    RCP<const Teuchos::Comm<int> > comm = TestHelpers::Parameters::getDefaultComm();

    TEST_EQUALITY(aggregates->AggregatesCrossProcessors(),false);

    ArrayRCP<LO> aggSizes = Teuchos::ArrayRCP<LO>(numAggs);
    ArrayRCP<LO> aggStart;
    ArrayRCP<GO> aggToRowMap;
    amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMap);
    for (LO i = 0; i < numAggs; ++i)
      aggSizes[i] = aggStart[i+1] - aggStart[i];

    bool foundAggNotSize2=false;
    for (int i=0; i<aggSizes.size(); ++i)
      if (aggSizes[i] != 2) {
        foundAggNotSize2=true;
        break;
      }

    switch (comm->getSize()) {

      case 1 :
        TEST_EQUALITY(numAggs, 18);
        TEST_EQUALITY(foundAggNotSize2, false);
        break;

      case 2:
        TEST_EQUALITY(numAggs, 9);
        TEST_EQUALITY(foundAggNotSize2, false);
        break;

      case 3:
        TEST_EQUALITY(numAggs, 6);
        TEST_EQUALITY(foundAggNotSize2, false);
        break;

      case 4:
        TEST_EQUALITY(numAggs, 4);
        TEST_EQUALITY(foundAggNotSize2, true);
        break;

      default:
        std::string msg = "Only 1-4 MPI processes are supported.";
        //throw(MueLu::Exceptions::NotImplemented(msg));
        out << msg << std::endl;
        break;
    }

    //ArrayRCP< ArrayRCP<GO> > aggToRowMap(numAggs);
    int root = out.getOutputToRootOnly();
    out.setOutputToRootOnly(-1);
    for (int j=0; j<comm->getSize(); ++j) {
      if (comm->getRank() == j) {
        out << "++ pid " << j << " ++" << std::endl;
        out << "   num local DOFs = " << rowmap->getNodeNumElements() << std::endl;
        for (int i=0; i< numAggs; ++i) {
          out << "   aggregate " << i << ": ";
          for (int k=aggStart[i]; k< aggStart[i+1]; ++k)
            out << aggToRowMap[k] << " ";
          out << std::endl;
        }
      }
      comm->barrier();
    }
    out.setOutputToRootOnly(root);

  } //UncoupledPhase3
  void UncoupledAggregationFactory<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &currentLevel) const {
    FactoryMonitor m(*this, "Build", currentLevel);

    ParameterList pL = GetParameterList();
    bDefinitionPhase_ = false;  // definition phase is finished, now all aggregation algorithm information is fixed

    // define aggregation algorithms
    RCP<const FactoryBase> graphFact = GetFactory("Graph");

    // TODO Can we keep different aggregation algorithms over more Build calls?
    algos_.clear();
    if (pL.get<std::string>("aggregation: mode") == "old") {
      if (pL.get<bool>("UseOnePtAggregationAlgorithm")             == true)   algos_.push_back(rcp(new OnePtAggregationAlgorithm             (graphFact)));
      if (pL.get<bool>("UsePreserveDirichletAggregationAlgorithm") == true)   algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm (graphFact)));
      if (pL.get<bool>("UseUncoupledAggregationAlgorithm")         == true)   algos_.push_back(rcp(new AggregationPhase1Algorithm            (graphFact)));
      if (pL.get<bool>("UseMaxLinkAggregationAlgorithm")           == true)   algos_.push_back(rcp(new MaxLinkAggregationAlgorithm           (graphFact)));
      if (pL.get<bool>("UseEmergencyAggregationAlgorithm")         == true)   algos_.push_back(rcp(new EmergencyAggregationAlgorithm         (graphFact)));
                                                                              algos_.push_back(rcp(new IsolatedNodeAggregationAlgorithm      (graphFact)));

    } else {
      if (pL.get<bool>("aggregation: preserve Dirichlet points")   == true)   algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm (graphFact)));
      if (pL.get<bool>("aggregation: enable phase 1" )             == true)   algos_.push_back(rcp(new AggregationPhase1Algorithm            (graphFact)));
      if (pL.get<bool>("aggregation: enable phase 2a")             == true)   algos_.push_back(rcp(new AggregationPhase2aAlgorithm           (graphFact)));
      if (pL.get<bool>("aggregation: enable phase 2b")             == true)   algos_.push_back(rcp(new AggregationPhase2bAlgorithm           (graphFact)));
      if (pL.get<bool>("aggregation: enable phase 3" )             == true)   algos_.push_back(rcp(new AggregationPhase3Algorithm            (graphFact)));
                                                                              algos_.push_back(rcp(new IsolatedNodeAggregationAlgorithm      (graphFact)));
    }

    std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name");
    RCP<const Map> OnePtMap;
    if (mapOnePtName.length()) {
      RCP<const FactoryBase> mapOnePtFact = GetFactory("OnePt aggregate map factory");
      OnePtMap = currentLevel.Get<RCP<const Map> >(mapOnePtName, mapOnePtFact.get());
    }

    RCP<const GraphBase> graph = Get< RCP<GraphBase> >(currentLevel, "Graph");

    // Build
    RCP<Aggregates> aggregates = rcp(new Aggregates(*graph));
    aggregates->setObjectLabel("UC");

    const LO numRows = graph->GetNodeNumVertices();

    // construct aggStat information
    std::vector<unsigned> aggStat(numRows, READY);

    ArrayRCP<const bool> dirichletBoundaryMap = graph->GetBoundaryNodeMap();
    if (dirichletBoundaryMap != Teuchos::null)
      for (LO i = 0; i < numRows; i++)
        if (dirichletBoundaryMap[i] == true)
          aggStat[i] = BOUNDARY;

    LO nDofsPerNode = Get<LO>(currentLevel, "DofsPerNode");
    GO indexBase = graph->GetDomainMap()->getIndexBase();
    if (OnePtMap != Teuchos::null) {
      for (LO i = 0; i < numRows; i++) {
        // reconstruct global row id (FIXME only works for contiguous maps)
        GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase;

        for (LO kr = 0; kr < nDofsPerNode; kr++)
          if (OnePtMap->isNodeGlobalElement(grid + kr))
            aggStat[i] = ONEPT;
      }
    }


    const RCP<const Teuchos::Comm<int> > comm = graph->GetComm();
    GO numGlobalRows = 0;
    if (IsPrint(Statistics1))
      sumAll(comm, as<GO>(numRows), numGlobalRows);

    LO numNonAggregatedNodes = numRows;
    GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0;
    for (size_t a = 0; a < algos_.size(); a++) {
      std::string phase = algos_[a]->description();
      SubFactoryMonitor sfm(*this, "Algo \"" + phase + "\"", currentLevel);

      algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes);

      if (IsPrint(Statistics1)) {

        GO numLocalAggregated = numRows - numNonAggregatedNodes, numGlobalAggregated = 0;
        GO numLocalAggs       = aggregates->GetNumAggregates(),  numGlobalAggs = 0;
        sumAll(comm, numLocalAggregated, numGlobalAggregated);
        sumAll(comm, numLocalAggs,       numGlobalAggs);

        double aggPercent = 100*as<double>(numGlobalAggregated)/as<double>(numGlobalRows);
        if (aggPercent > 99.99 && aggPercent < 100.00) {
          // Due to round off (for instance, for 140465733/140466897), we could
          // get 100.00% display even if there are some remaining nodes. This
          // is bad from the users point of view. It is much better to change
          // it to display 99.99%.
          aggPercent = 99.99;
        }
        GetOStream(Statistics1) << "  aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed
                                   << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n"
                                   << "  remaining  : " << numGlobalRows - numGlobalAggregated << "\n"
                                   << "  aggregates : " << numGlobalAggs-numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl;
        numGlobalAggregatedPrev = numGlobalAggregated;
        numGlobalAggsPrev       = numGlobalAggs;
      }
    }

    TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!");

    aggregates->AggregatesCrossProcessors(false);

    Set(currentLevel, "Aggregates", aggregates);

    GetOStream(Statistics0) << aggregates->description() << std::endl;
  }
  void UncoupledAggregationFactory<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &currentLevel) const {
    FactoryMonitor m(*this, "Build", currentLevel);

    const ParameterList& pL = GetParameterList();
    bDefinitionPhase_ = false;  // definition phase is finished, now all aggregation algorithm information is fixed

    bool bUseOnePtAggregationAlgorithm             = pL.get<bool>("UseOnePtAggregationAlgorithm");
    bool bUseSmallAggregationAlgorithm             = pL.get<bool>("UseSmallAggregatesAggregationAlgorithm");
    bool bUsePreserveDirichletAggregationAlgorithm = pL.get<bool>("UsePreserveDirichletAggregationAlgorithm");
    bool bUseUncoupledAggregationAglorithm         = pL.get<bool>("UseUncoupledAggregationAlgorithm");
    bool bUseMaxLinkAggregationAlgorithm           = pL.get<bool>("UseMaxLinkAggregationAlgorithm");
    bool bUseIsolatedNodeAggregationAglorithm      = pL.get<bool>("UseIsolatedNodeAggregationAlgorithm");
    bool bUseEmergencyAggregationAlgorithm         = pL.get<bool>("UseEmergencyAggregationAlgorithm");

    // define aggregation algorithms
    RCP<const FactoryBase> graphFact = GetFactory("Graph");

    // TODO Can we keep different aggregation algorithms over more Build calls?
    algos_.clear();
    if (bUseOnePtAggregationAlgorithm)             algos_.push_back(rcp(new OnePtAggregationAlgorithm             (graphFact)));
    if (bUseSmallAggregationAlgorithm)             algos_.push_back(rcp(new SmallAggregationAlgorithm             (graphFact)));
    if (bUseUncoupledAggregationAglorithm)         algos_.push_back(rcp(new UncoupledAggregationAlgorithm         (graphFact)));
    if (bUseMaxLinkAggregationAlgorithm)           algos_.push_back(rcp(new MaxLinkAggregationAlgorithm           (graphFact)));
    if (bUsePreserveDirichletAggregationAlgorithm) algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm (graphFact)));
    if (bUseIsolatedNodeAggregationAglorithm)      algos_.push_back(rcp(new IsolatedNodeAggregationAlgorithm      (graphFact)));
    if (bUseEmergencyAggregationAlgorithm)         algos_.push_back(rcp(new EmergencyAggregationAlgorithm         (graphFact)));


    std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name"), mapSmallAggName = pL.get<std::string>("SmallAgg aggregate map name");
    RCP<const Map> OnePtMap, SmallAggMap;
    if (mapOnePtName.length()) {
      RCP<const FactoryBase> mapOnePtFact = GetFactory("OnePt aggregate map factory");
      OnePtMap = currentLevel.Get<RCP<const Map> >(mapOnePtName, mapOnePtFact.get());
    }
    if (mapSmallAggName.length()) {
      RCP<const FactoryBase> mapSmallAggFact = GetFactory("SmallAgg aggregate map factory");
      SmallAggMap = currentLevel.Get<RCP<const Map> >(mapSmallAggName, mapSmallAggFact.get());
    }

    RCP<const GraphBase> graph = Get< RCP<GraphBase> >(currentLevel, "Graph");

    // Build
    RCP<Aggregates> aggregates = rcp(new Aggregates(*graph));
    aggregates->setObjectLabel("UC");

    const LO nRows = graph->GetNodeNumVertices();

    // construct aggStat information
    std::vector<unsigned> aggStat(nRows, NodeStats::READY);

    ArrayRCP<const bool> dirichletBoundaryMap = graph->GetBoundaryNodeMap();
    if (dirichletBoundaryMap != Teuchos::null) {
      for (LO i = 0; i < nRows; i++)
        if (dirichletBoundaryMap[i] == true)
          aggStat[i] = NodeStats::BOUNDARY;
    }

    LO nDofsPerNode = Get<LO>(currentLevel, "DofsPerNode");
    GO indexBase = graph->GetDomainMap()->getIndexBase();
    if (SmallAggMap != Teuchos::null || OnePtMap != Teuchos::null) {
      for (LO i = 0; i < nRows; i++) {
        // reconstruct global row id (FIXME only works for contiguous maps)
        GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase;

        if (SmallAggMap != null) {
          for (LO kr = 0; kr < nDofsPerNode; kr++) {
            if (SmallAggMap->isNodeGlobalElement(grid + kr))
              aggStat[i] = MueLu::NodeStats::SMALLAGG;
          }
        }

        if (OnePtMap != null) {
          for (LO kr = 0; kr < nDofsPerNode; kr++) {
            if (OnePtMap->isNodeGlobalElement(grid + kr))
              aggStat[i] = MueLu::NodeStats::ONEPT;
          }
        }
      }
    }


    const RCP<const Teuchos::Comm<int> > comm = graph->GetComm();
    GO numGlobalRows = 0;
    if (IsPrint(Statistics1))
      sumAll(comm, as<GO>(nRows), numGlobalRows);

    LO numNonAggregatedNodes = nRows;
    GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0;
    for (size_t a = 0; a < algos_.size(); a++) {
      std::string phase = algos_[a]->description();
      SubFactoryMonitor sfm(*this, "Algo \"" + phase + "\"", currentLevel);

      algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes);

      if (IsPrint(Statistics1)) {

        GO numLocalAggregated = nRows - numNonAggregatedNodes,  numGlobalAggregated = 0;
        GO numLocalAggs       = aggregates->GetNumAggregates(), numGlobalAggs = 0;
        sumAll(comm, numLocalAggregated, numGlobalAggregated);
        sumAll(comm, numLocalAggs,       numGlobalAggs);

        double aggPercent = 100*as<double>(numGlobalAggregated)/as<double>(numGlobalRows);
        GetOStream(Statistics1) << "  aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed
                                   << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n"
                                   << "  remaining  : " << numGlobalRows - numGlobalAggregated << "\n"
                                   << "  aggregates : " << numGlobalAggs-numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl;
        numGlobalAggregatedPrev = numGlobalAggregated;
        numGlobalAggsPrev       = numGlobalAggs;
      }
    }

    TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!");

    aggregates->AggregatesCrossProcessors(false);

    Set(currentLevel, "Aggregates", aggregates);

    GetOStream(Statistics0) << aggregates->description() << std::endl;
  }
  void CoordinatesTransferFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level & fineLevel, Level &coarseLevel) const {
    FactoryMonitor m(*this, "Build", coarseLevel);

    GetOStream(Runtime0, 0) << "Transferring coordinates" << std::endl;

    const ParameterList  & pL = GetParameterList();
    int                 writeStart = pL.get< int >("write start");
    int                 writeEnd   = pL.get< int >("write end");

    RCP<Aggregates>     aggregates = Get< RCP<Aggregates> > (fineLevel, "Aggregates");
    RCP<MultiVector>    fineCoords = Get< RCP<MultiVector> >(fineLevel, "Coordinates");
    RCP<const Map>      coarseMap  = Get< RCP<const Map> >  (fineLevel, "CoarseMap");

    // coarseMap is being used to set up the domain map of tentative P, and therefore, the row map of Ac
    // Therefore, if we amalgamate coarseMap, logical nodes in the coordinates vector would correspond to
    // logical blocks in the matrix

    ArrayView<const GO> elementAList = coarseMap->getNodeElementList();
    LO                  blkSize      = 1;
    if (rcp_dynamic_cast<const StridedMap>(coarseMap) != Teuchos::null)
      blkSize = rcp_dynamic_cast<const StridedMap>(coarseMap)->getFixedBlockSize();

    GO                  indexBase    = coarseMap->getIndexBase();
    size_t              numElements  = elementAList.size() / blkSize;
    Array<GO>           elementList(numElements);

    // Amalgamate the map
    for (LO i = 0; i < Teuchos::as<LO>(numElements); i++)
      elementList[i] = (elementAList[i*blkSize]-indexBase)/blkSize + indexBase;

    RCP<const Map> coarseCoordMap = MapFactory        ::Build(coarseMap->lib(), Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), elementList, indexBase, coarseMap->getComm());
    RCP<MultiVector> coarseCoords = MultiVectorFactory::Build(coarseCoordMap, fineCoords->getNumVectors());

    // Maps
    RCP<const Map> uniqueMap    = fineCoords->getMap();
    RCP<const Map> nonUniqueMap = aggregates->GetMap();

    // Create overlapped fine coordinates to reduce global communication
    RCP<const Import>     importer = ImportFactory     ::Build(uniqueMap, nonUniqueMap);
    RCP<MultiVector> ghostedCoords = MultiVectorFactory::Build(nonUniqueMap, fineCoords->getNumVectors());
    ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT);

    // Get some info about aggregates
    int                         myPID        = uniqueMap->getComm()->getRank();
    LO                          numAggs      = aggregates->GetNumAggregates();
    ArrayRCP<LO>                aggSizes     = aggregates->ComputeAggregateSizes();
    const ArrayRCP<const LO>    vertex2AggID = aggregates->GetVertex2AggId()->getData(0);
    const ArrayRCP<const LO>    procWinner   = aggregates->GetProcWinner()->getData(0);

    // Fill in coarse coordinates
    for (size_t j = 0; j < fineCoords->getNumVectors(); j++) {
      ArrayRCP<const Scalar> fineCoordsData = ghostedCoords->getData(j);
      ArrayRCP<Scalar>     coarseCoordsData = coarseCoords->getDataNonConst(j);

      for (LO lnode = 0; lnode < vertex2AggID.size(); lnode++)
        if (procWinner[lnode] == myPID)
          coarseCoordsData[vertex2AggID[lnode]] += fineCoordsData[lnode];

      for (LO agg = 0; agg < numAggs; agg++)
        coarseCoordsData[agg] /= aggSizes[agg];
    }

    Set<RCP<MultiVector> >(coarseLevel, "Coordinates", coarseCoords);
    if (writeStart == 0 && fineLevel.GetLevelID() == 0 && writeStart <= writeEnd) {
      std::ostringstream buf;
      buf << fineLevel.GetLevelID();
      std::string fileName = "coordinates_before_rebalance_level_" + buf.str() + ".m";
      Utils::Write(fileName,*fineCoords);
    }
    if (writeStart <= coarseLevel.GetLevelID() && coarseLevel.GetLevelID() <= writeEnd) {
      std::ostringstream buf;
      buf << coarseLevel.GetLevelID();
      std::string fileName = "coordinates_before_rebalance_level_" + buf.str() + ".m";
      Utils::Write(fileName,*coarseCoords);
    }

  } // Build
  void CoarseMapFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &currentLevel) const {
    FactoryMonitor m(*this, "Build", currentLevel);

    RCP<Aggregates>  aggregates = Get< RCP<Aggregates> >(currentLevel, "Aggregates");
    RCP<MultiVector> nullspace  = Get< RCP<MultiVector> >(currentLevel, "Nullspace");

    GlobalOrdinal                  numAggs = aggregates->GetNumAggregates();
    const size_t                   NSDim   = nullspace->getNumVectors();
    RCP<const Teuchos::Comm<int> > comm    = aggregates->GetMap()->getComm();

    // read in offset information from parameter list and fill the internal member variable
    GlobalOrdinal domainGidOffset = 0;
    std::vector<GlobalOrdinal> domainGidOffsets;
    domainGidOffsets.clear();
    const ParameterList & pL = GetParameterList();
    if(pL.isParameter("Domain GID offsets")) {
      std::string strDomainGIDs = pL.get<std::string>("Domain GID offsets");
      if(strDomainGIDs.empty() == false) {
        Teuchos::Array<GlobalOrdinal> arrayVal = Teuchos::fromStringToArray<GlobalOrdinal>(strDomainGIDs);
        domainGidOffsets = Teuchos::createVector(arrayVal);
        if(currentLevel.GetLevelID() < Teuchos::as<int>(domainGidOffsets.size()) ) {
          domainGidOffset = domainGidOffsets[currentLevel.GetLevelID()];
        }
      }
    }

    LocalOrdinal stridedBlockId = pL.get<LocalOrdinal>("Strided block id");

    // read in stridingInfo from parameter list and fill the internal member variable
    // read the data only if the parameter "Striding info" exists and is non-empty
    //const ParameterList & pL = GetParameterList();
    if(pL.isParameter("Striding info")) {
      std::string strStridingInfo = pL.get<std::string>("Striding info");
      if(strStridingInfo.empty() == false) {
        Teuchos::Array<size_t> arrayVal = Teuchos::fromStringToArray<size_t>(strStridingInfo);
        stridingInfo_ = Teuchos::createVector(arrayVal);
      }
    }


    // check for consistency of striding information with NSDim and nCoarseDofs
    if (stridedBlockId== -1) {
      // this means we have no real strided map but only a block map with constant blockSize "NSDim"
      TEUCHOS_TEST_FOR_EXCEPTION(stridingInfo_.size() > 1, Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): stridingInfo_.size() but must be one");
      stridingInfo_.clear();
      stridingInfo_.push_back(NSDim);
      TEUCHOS_TEST_FOR_EXCEPTION(stridingInfo_.size() != 1, Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): stridingInfo_.size() but must be one");

    } else {
      // stridedBlockId > -1, set by user
      TEUCHOS_TEST_FOR_EXCEPTION(stridedBlockId > Teuchos::as<LO>(stridingInfo_.size() - 1) , Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): it is stridingInfo_.size() <= stridedBlockId_. error.");
      size_t stridedBlockSize = stridingInfo_[stridedBlockId];
      TEUCHOS_TEST_FOR_EXCEPTION(stridedBlockSize != NSDim , Exceptions::RuntimeError, "MueLu::CoarseMapFactory::Build(): dimension of strided block != NSDim. error.");
    }

    GetOStream(Statistics2) << "domainGIDOffset: " << domainGidOffset << " block size: " << getFixedBlockSize() << " stridedBlockId: " << stridedBlockId << std::endl;

    // number of coarse level dofs (fixed by number of aggregates and blocksize data)
    GlobalOrdinal nCoarseDofs = numAggs * getFixedBlockSize();
    GlobalOrdinal indexBase   = aggregates->GetMap()->getIndexBase();

    RCP<const Map> coarseMap = StridedMapFactory::Build(aggregates->GetMap()->lib(),
        Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
        nCoarseDofs,
        indexBase,
        stridingInfo_,
        comm,
        stridedBlockId,
        domainGidOffset);

    Set(currentLevel, "CoarseMap", coarseMap);
  } // Build