예제 #1
0
void Tempo::subS(int S){
	segundos -= S;
	if(segundos < 0){
		// segundos = segundos%60;
		segundos = 60+segundos;
		subM(1);
	}
}
예제 #2
0
  void RebalanceAcFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &fineLevel, Level &coarseLevel) const {
    FactoryMonitor m(*this, "Computing Ac", coarseLevel);

    RCP<Matrix> originalAc = Get< RCP<Matrix> >(coarseLevel, "A");

    RCP<const Import> rebalanceImporter = Get< RCP<const Import> >(coarseLevel, "Importer");

    if (rebalanceImporter != Teuchos::null) {
      RCP<Matrix> rebalancedAc;
      {
        SubFactoryMonitor subM(*this, "Rebalancing existing Ac", coarseLevel);
        RCP<const Map> targetMap = rebalanceImporter->getTargetMap();

        const ParameterList & pL = GetParameterList();

        ParameterList XpetraList;
        if (pL.get<bool>("useSubcomm") == true) {
          GetOStream(Runtime0,0) << "Replacing maps with a subcommunicator" << std::endl;
          XpetraList.set("Restrict Communicator",true);
        }
        // NOTE: If the communicator is restricted away, Build returns Teuchos::null.
        rebalancedAc = MatrixFactory::Build(originalAc, *rebalanceImporter, targetMap, targetMap, rcp(&XpetraList,false));

        if (!rebalancedAc.is_null())
          rebalancedAc->SetFixedBlockSize(originalAc->GetFixedBlockSize());

        Set(coarseLevel, "A", rebalancedAc);
      }

      if (!rebalancedAc.is_null()) {
        RCP<ParameterList> params = rcp(new ParameterList());
        params->set("printLoadBalancingInfo", true);
        GetOStream(Statistics0, 0) << Utils::PrintMatrixInfo(*rebalancedAc, "Ac (rebalanced)", params);
      }

    } else {
      // Ac already built by the load balancing process and no load balancing needed
      GetOStream(Warnings0, 0) << "No rebalancing" << std::endl;
      GetOStream(Warnings0, 0) << "Jamming A into Level " << coarseLevel.GetLevelID() << " w/ generating factory "
                               << this << std::endl;
      Set(coarseLevel, "A", originalAc);
    }

  } //Build()
예제 #3
0
  void RAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &fineLevel, Level &coarseLevel) const { // FIXME make fineLevel const
    {
      FactoryMonitor m(*this, "Computing Ac", coarseLevel);

      // Set "Keeps" from params
      const Teuchos::ParameterList& pL = GetParameterList();
      if (pL.isParameter("Keep AP Pattern")  && pL.get<bool>("Keep AP Pattern"))
        coarseLevel.Keep("AP Pattern",  this);
      if (pL.isParameter("Keep RAP Pattern") && pL.get<bool>("Keep RAP Pattern"))
        coarseLevel.Keep("RAP Pattern", this);

      //
      // Inputs: A, P
      //

      RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel,   "A");
      RCP<Matrix> P = Get< RCP<Matrix> >(coarseLevel, "P");

      //
      // Build Ac = RAP
      //

      RCP<Matrix> AP;

      // Reuse pattern if available (multiple solve)
      if (coarseLevel.IsAvailable("AP Pattern", this)){
        GetOStream(Runtime0, 0) << "Ac: Using previous AP pattern"<<std::endl;
        AP = Get< RCP<Matrix> >(coarseLevel, "AP Pattern");
      }

      {
        SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel);
        AP = Utils::Multiply(*A, false, *P, false, AP);
        Set(coarseLevel, "AP Pattern", AP);
      }

      bool doOptimizedStorage = !checkAc_; // Optimization storage option. If not modifying matrix later (inserting local values), allow optimization of storage.
                                           // This is necessary for new faster Epetra MM kernels.

      RCP<Matrix> Ac;

      // Reuse coarse matrix memory if available (multiple solve)
      if (coarseLevel.IsAvailable("RAP Pattern", this)) {
        GetOStream(Runtime0, 0) << "Ac: Using previous RAP pattern" << std::endl;
        Ac = Get< RCP<Matrix> >(coarseLevel, "RAP Pattern");
      }

      if (implicitTranspose_) {
        SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel);

        Ac = Utils::Multiply(*P, true, *AP, false, Ac, true, doOptimizedStorage);

      } else {

        SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel);

        RCP<Matrix> R = Get< RCP<Matrix> >(coarseLevel, "R");
        Ac = Utils::Multiply(*R, false, *AP, false, Ac, true, doOptimizedStorage);

      }

      if (checkAc_)
        CheckMainDiagonal(Ac);

      RCP<ParameterList> params = rcp(new ParameterList());;
      params->set("printLoadBalancingInfo", true);
      GetOStream(Statistics0, 0) << Utils::PrintMatrixInfo(*Ac, "Ac", params);

      Set(coarseLevel, "A",           Ac);
      Set(coarseLevel, "RAP Pattern", Ac);
    }

    if (transferFacts_.begin() != transferFacts_.end()) {
      SubFactoryMonitor m(*this, "Projections", coarseLevel);

      // call Build of all user-given transfer factories
      for (std::vector<RCP<const FactoryBase> >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) {
        GetOStream(Runtime0, 0) << "Ac: call transfer factory " << (*it).get() << ": " << (*it)->description() << std::endl;
        (*it)->CallBuild(coarseLevel);
      }
    }

  }
  void BlockedRAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level &fineLevel, Level &coarseLevel) const {  //FIXME make fineLevel const!!
    FactoryMonitor m(*this, "Computing Ac (block)", coarseLevel);

    const Teuchos::ParameterList& pL = GetParameterList();

    RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel,   "A");
    RCP<Matrix> P = Get< RCP<Matrix> >(coarseLevel, "P");

    RCP<BlockedCrsMatrix> bA = rcp_dynamic_cast<BlockedCrsMatrix>(A);
    RCP<BlockedCrsMatrix> bP = rcp_dynamic_cast<BlockedCrsMatrix>(P);
    TEUCHOS_TEST_FOR_EXCEPTION(bA.is_null() || bP.is_null(), Exceptions::BadCast, "Matrices R, A and P must be of type BlockedCrsMatrix.");

    RCP<BlockedCrsMatrix> bAP;
    RCP<BlockedCrsMatrix> bAc;
    {
      SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel);

      // Triple matrix product for BlockedCrsMatrixClass
      TEUCHOS_TEST_FOR_EXCEPTION((bA->Cols() != bP->Rows()), Exceptions::BadCast,
                               "Block matrix dimensions do not match: "
                               "A is " << bA->Rows() << "x" << bA->Cols() <<
                               "P is " << bP->Rows() << "x" << bP->Cols());

      bAP = Utils::TwoMatrixMultiplyBlock(*bA, false, *bP,  false, GetOStream(Statistics2), true, true);
    }


    // If we do not modify matrix later, allow optimization of storage.
    // This is necessary for new faster Epetra MM kernels.
    bool doOptimizeStorage = !checkAc_;

    const bool doTranspose    = true;
    const bool doFillComplete = true;
    if (pL.get<bool>("transpose: use implicit") == true) {
      SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel);
      bAc = Utils::TwoMatrixMultiplyBlock(*bP,  doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), doFillComplete, doOptimizeStorage);

    } else {
      RCP<Matrix>           R  = Get< RCP<Matrix> >(coarseLevel, "R");
      RCP<BlockedCrsMatrix> bR = rcp_dynamic_cast<BlockedCrsMatrix>(R);
      TEUCHOS_TEST_FOR_EXCEPTION(bR.is_null(), Exceptions::BadCast, "Matrix R must be of type BlockedCrsMatrix.");

      TEUCHOS_TEST_FOR_EXCEPTION(bA->Rows() != bR->Cols(), Exceptions::BadCast,
                                 "Block matrix dimensions do not match: "
                                 "R is " << bR->Rows() << "x" << bR->Cols() <<
                                 "A is " << bA->Rows() << "x" << bA->Cols());

      SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel);
      bAc = Utils::TwoMatrixMultiplyBlock(*bR, !doTranspose, *bAP, !doTranspose, GetOStream(Statistics2), doFillComplete, doOptimizeStorage);
    }


    if (checkAc_)
      CheckMainDiagonal(bAc);

    GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*bAc, "Ac (blocked)");

    // static int run = 1;
    // RCP<CrsMatrixWrap> A11 = rcp(new CrsMatrixWrap(bAc->getMatrix(0,0)));
    // Utils::Write(toString(run) + "_A_11.mm", *A11);
    // if (!bAc->getMatrix(1,1).is_null()) {
      // RCP<CrsMatrixWrap> A22 = rcp(new CrsMatrixWrap(bAc->getMatrix(1,1)));
      // Utils::Write(toString(run) + "_A_22.mm", *A22);
    // }
    // RCP<CrsMatrixWrap> Am = rcp(new CrsMatrixWrap(bAc->Merge()));
    // Utils::Write(toString(run) + "_A.mm", *Am);
    // run++;

    Set<RCP <Matrix> >(coarseLevel, "A", bAc);

    if (transferFacts_.begin() != transferFacts_.end()) {
      SubFactoryMonitor m1(*this, "Projections", coarseLevel);

      // call Build of all user-given transfer factories
      for (std::vector<RCP<const FactoryBase> >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) {
        RCP<const FactoryBase> fac = *it;
        GetOStream(Runtime0) << "BlockRAPFactory: call transfer factory: " << fac->description() << std::endl;
        fac->CallBuild(coarseLevel);
        // AP (11/11/13): I am not sure exactly why we need to call Release, but we do need it to get rid
        // of dangling data for CoordinatesTransferFactory
        coarseLevel.Release(*fac);
      }
    }
  }
  void MHDRAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level &fineLevel, Level &coarseLevel) const { // FIXME make fineLevel const
    {
      FactoryMonitor m(*this, "Computing Ac", coarseLevel);

      //
      // Inputs: A, P
      //

      //DEBUG
      //Teuchos::FancyOStream fout(*GetOStream(Runtime1));
      //coarseLevel.print(fout,Teuchos::VERB_HIGH);


      RCP<Matrix> A   = Get< RCP<Matrix> >(fineLevel, "A"  );
      RCP<Matrix> A00 = Get< RCP<Matrix> >(fineLevel, "A00");
      RCP<Matrix> A01 = Get< RCP<Matrix> >(fineLevel, "A01");
      RCP<Matrix> A02 = Get< RCP<Matrix> >(fineLevel, "A02");
      RCP<Matrix> A10 = Get< RCP<Matrix> >(fineLevel, "A10");
      RCP<Matrix> A11 = Get< RCP<Matrix> >(fineLevel, "A11");
      RCP<Matrix> A12 = Get< RCP<Matrix> >(fineLevel, "A12");
      RCP<Matrix> A20 = Get< RCP<Matrix> >(fineLevel, "A20");
      RCP<Matrix> A21 = Get< RCP<Matrix> >(fineLevel, "A21");
      RCP<Matrix> A22 = Get< RCP<Matrix> >(fineLevel, "A22");

      RCP<Matrix> P  = Get< RCP<Matrix> >(coarseLevel, "P" );
      RCP<Matrix> PV = Get< RCP<Matrix> >(coarseLevel, "PV");
      RCP<Matrix> PP = Get< RCP<Matrix> >(coarseLevel, "PP");
      RCP<Matrix> PM = Get< RCP<Matrix> >(coarseLevel, "PM");

      //
      // Build Ac = RAP
      //

      RCP<Matrix> AP;
      RCP<Matrix> AP00;
      RCP<Matrix> AP01;
      RCP<Matrix> AP02;
      RCP<Matrix> AP10;
      RCP<Matrix> AP11;
      RCP<Matrix> AP12;
      RCP<Matrix> AP20;
      RCP<Matrix> AP21;
      RCP<Matrix> AP22;

      {
        SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel);

        AP   = Utils::Multiply(*A  , false, *P , false, AP, GetOStream(Statistics2));
        AP00 = Utils::Multiply(*A00, false, *PV, false, AP00, GetOStream(Statistics2));
        AP01 = Utils::Multiply(*A01, false, *PP, false, AP01, GetOStream(Statistics2));
        AP02 = Utils::Multiply(*A02, false, *PM, false, AP02, GetOStream(Statistics2));
        AP10 = Utils::Multiply(*A10, false, *PV, false, AP10, GetOStream(Statistics2));
        AP11 = Utils::Multiply(*A11, false, *PP, false, AP11, GetOStream(Statistics2));
        AP12 = Utils::Multiply(*A12, false, *PM, false, AP12, GetOStream(Statistics2));
        AP20 = Utils::Multiply(*A20, false, *PV, false, AP20, GetOStream(Statistics2));
        AP21 = Utils::Multiply(*A21, false, *PP, false, AP21, GetOStream(Statistics2));
        AP22 = Utils::Multiply(*A22, false, *PM, false, AP22, GetOStream(Statistics2));
      }

      RCP<Matrix> Ac;
      RCP<Matrix> Ac00;
      RCP<Matrix> Ac01;
      RCP<Matrix> Ac02;
      RCP<Matrix> Ac10;
      RCP<Matrix> Ac11;
      RCP<Matrix> Ac12;
      RCP<Matrix> Ac20;
      RCP<Matrix> Ac21;
      RCP<Matrix> Ac22;

      if (implicitTranspose_)
        {
          SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel);

          Ac   = Utils::Multiply(*P , true, *AP  , false, Ac, GetOStream(Statistics2));
          Ac00 = Utils::Multiply(*PV, true, *AP00, false, Ac00, GetOStream(Statistics2));
          Ac01 = Utils::Multiply(*PV, true, *AP01, false, Ac01, GetOStream(Statistics2));
          Ac02 = Utils::Multiply(*PV, true, *AP02, false, Ac02, GetOStream(Statistics2));
          Ac10 = Utils::Multiply(*PP, true, *AP10, false, Ac10, GetOStream(Statistics2));
          Ac11 = Utils::Multiply(*PP, true, *AP11, false, Ac11, GetOStream(Statistics2));
          Ac12 = Utils::Multiply(*PP, true, *AP12, false, Ac12, GetOStream(Statistics2));
          Ac20 = Utils::Multiply(*PM, true, *AP20, false, Ac20, GetOStream(Statistics2));
          Ac21 = Utils::Multiply(*PM, true, *AP21, false, Ac21, GetOStream(Statistics2));
          Ac22 = Utils::Multiply(*PM, true, *AP22, false, Ac22, GetOStream(Statistics2));

        }
      else
        {

          SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel);

          RCP<Matrix> R  = Get< RCP<Matrix> >(coarseLevel, "R" );
          RCP<Matrix> RV = Get< RCP<Matrix> >(coarseLevel, "RV");
          RCP<Matrix> RP = Get< RCP<Matrix> >(coarseLevel, "RP");
          RCP<Matrix> RM = Get< RCP<Matrix> >(coarseLevel, "RM");

          Ac   = Utils::Multiply(*R , false, *AP  , false, Ac, GetOStream(Statistics2));
          Ac00 = Utils::Multiply(*RV, false, *AP00, false, Ac00, GetOStream(Statistics2));
          Ac01 = Utils::Multiply(*RV, false, *AP01, false, Ac01, GetOStream(Statistics2));
          Ac02 = Utils::Multiply(*RV, false, *AP02, false, Ac02, GetOStream(Statistics2));
          Ac10 = Utils::Multiply(*RP, false, *AP10, false, Ac10, GetOStream(Statistics2));
          Ac11 = Utils::Multiply(*RP, false, *AP11, false, Ac11, GetOStream(Statistics2));
          Ac12 = Utils::Multiply(*RP, false, *AP12, false, Ac12, GetOStream(Statistics2));
          Ac20 = Utils::Multiply(*RM, false, *AP20, false, Ac20, GetOStream(Statistics2));
          Ac21 = Utils::Multiply(*RM, false, *AP21, false, Ac21, GetOStream(Statistics2));
          Ac22 = Utils::Multiply(*RM, false, *AP22, false, Ac22, GetOStream(Statistics2));

        }
      // FINISHED MAKING COARSE BLOCKS

      Set(coarseLevel, "A"  , Ac  );
      Set(coarseLevel, "A00", Ac00);
      Set(coarseLevel, "A01", Ac01);
      Set(coarseLevel, "A02", Ac02);
      Set(coarseLevel, "A10", Ac10);
      Set(coarseLevel, "A11", Ac11);
      Set(coarseLevel, "A12", Ac12);
      Set(coarseLevel, "A20", Ac20);
      Set(coarseLevel, "A21", Ac21);
      Set(coarseLevel, "A22", Ac22);


    }


  }
  void RAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& fineLevel, Level& coarseLevel) const {
    {
      FactoryMonitor m(*this, "Computing Ac", coarseLevel);
      std::ostringstream levelstr;
      levelstr << coarseLevel.GetLevelID();

      TEUCHOS_TEST_FOR_EXCEPTION(hasDeclaredInput_==false, Exceptions::RuntimeError,
                                 "MueLu::RAPFactory::Build(): CallDeclareInput has not been called before Build!");

      // Set "Keeps" from params
      const Teuchos::ParameterList& pL = GetParameterList();
      if (pL.get<bool>("Keep AP Pattern"))
        coarseLevel.Keep("AP Pattern",  this);
      if (pL.get<bool>("Keep RAP Pattern"))
        coarseLevel.Keep("RAP Pattern", this);

      RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel,   "A");
      RCP<Matrix> P = Get< RCP<Matrix> >(coarseLevel, "P"), AP, Ac;

      // Reuse pattern if available (multiple solve)
      if (coarseLevel.IsAvailable("AP Pattern", this)) {
        GetOStream(Runtime0) << "Ac: Using previous AP pattern" << std::endl;

        AP = Get< RCP<Matrix> >(coarseLevel, "AP Pattern");
      }

      {
        SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel);

        AP = Utils::Multiply(*A, false, *P, false, AP, GetOStream(Statistics2),true,true,std::string("MueLu::A*P-")+levelstr.str());
      }
      if (pL.get<bool>("Keep AP Pattern"))
        Set(coarseLevel, "AP Pattern", AP);

      // Reuse coarse matrix memory if available (multiple solve)
      if (coarseLevel.IsAvailable("RAP Pattern", this)) {
        GetOStream(Runtime0) << "Ac: Using previous RAP pattern" << std::endl;

        Ac = Get< RCP<Matrix> >(coarseLevel, "RAP Pattern");
        // Some eigenvalue may have been cached with the matrix in the previous run.
        // As the matrix values will be updated, we need to reset the eigenvalue.
        Ac->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
      }

      // If we do not modify matrix later, allow optimization of storage.
      // This is necessary for new faster Epetra MM kernels.
      bool doOptimizeStorage = !pL.get<bool>("RepairMainDiagonal");

      const bool doTranspose    = true;
      const bool doFillComplete = true;
      if (pL.get<bool>("transpose: use implicit") == true) {
        SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel);
        Ac = Utils::Multiply(*P,  doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), doFillComplete, doOptimizeStorage,std::string("MueLu::R*(AP)-implicit-")+levelstr.str());

      } else {
        RCP<Matrix> R = Get< RCP<Matrix> >(coarseLevel, "R");

        SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel);
        Ac = Utils::Multiply(*R, !doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), doFillComplete, doOptimizeStorage,std::string("MueLu::R*(AP)-explicit-")+levelstr.str());
      }

      CheckRepairMainDiagonal(Ac);

      if (IsPrint(Statistics1)) {
        RCP<ParameterList> params = rcp(new ParameterList());;
        params->set("printLoadBalancingInfo", true);
        params->set("printCommInfo",          true);
        GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params);
      }

      Set(coarseLevel, "A",           Ac);
      if (pL.get<bool>("Keep RAP Pattern"))
        Set(coarseLevel, "RAP Pattern", Ac);
    }

    if (transferFacts_.begin() != transferFacts_.end()) {
      SubFactoryMonitor m(*this, "Projections", coarseLevel);

      // call Build of all user-given transfer factories
      for (std::vector<RCP<const FactoryBase> >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) {
        RCP<const FactoryBase> fac = *it;
        GetOStream(Runtime0) << "RAPFactory: call transfer factory: " << fac->description() << std::endl;
        fac->CallBuild(coarseLevel);
        // Coordinates transfer is marginally different from all other operations
        // because it is *optional*, and not required. For instance, we may need
        // coordinates only on level 4 if we start repartitioning from that level,
        // but we don't need them on level 1,2,3. As our current Hierarchy setup
        // assumes propagation of dependencies only through three levels, this
        // means that we need to rely on other methods to propagate optional data.
        //
        // The method currently used is through RAP transfer factories, which are
        // simply factories which are called at the end of RAP with a single goal:
        // transfer some fine data to coarser level. Because these factories are
        // kind of outside of the mainline factories, they behave different. In
        // particular, we call their Build method explicitly, rather than through
        // Get calls. This difference is significant, as the Get call is smart
        // enough to know when to release all factory dependencies, and Build is
        // dumb. This led to the following CoordinatesTransferFactory sequence:
        // 1. Request level 0
        // 2. Request level 1
        // 3. Request level 0
        // 4. Release level 0
        // 5. Release level 1
        //
        // The problem is missing "6. Release level 0". Because it was missing,
        // we had outstanding request on "Coordinates", "Aggregates" and
        // "CoarseMap" on level 0.
        //
        // This was fixed by explicitly calling Release on transfer factories in
        // RAPFactory. I am still unsure how exactly it works, but now we have
        // clear data requests for all levels.
        coarseLevel.Release(*fac);
      }
    }

  }
  void RebalanceTransferFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level& fineLevel, Level& coarseLevel) const {
    FactoryMonitor m(*this, "Build", coarseLevel);

    const ParameterList& pL = GetParameterList();

    int implicit   = !pL.get<bool>("repartition: rebalance P and R");
    int writeStart = pL.get<int> ("write start");
    int writeEnd   = pL.get<int> ("write end");

    if (writeStart == 0 && fineLevel.GetLevelID() == 0 && writeStart <= writeEnd && IsAvailable(fineLevel, "Coordinates")) {
      std::string fileName = "coordinates_level_0.m";
      RCP<MultiVector> fineCoords = fineLevel.Get< RCP<MultiVector> >("Coordinates");
      if (fineCoords != Teuchos::null)
        Utils::Write(fileName, *fineCoords);
    }

    RCP<const Import> importer = Get<RCP<const Import> >(coarseLevel, "Importer");
    if (implicit) {
      // Save the importer, we'll need it for solve
      coarseLevel.Set("Importer", importer, NoFactory::get());
    }

    RCP<ParameterList> params = rcp(new ParameterList());;
    params->set("printLoadBalancingInfo", true);
    params->set("printCommInfo",          true);

    std::string transferType = pL.get<std::string>("type");
    if (transferType == "Interpolation") {
      RCP<Matrix> originalP = Get< RCP<Matrix> >(coarseLevel, "P");

      {
        // This line must be after the Get call
        SubFactoryMonitor m1(*this, "Rebalancing prolongator", coarseLevel);

        if (implicit || importer.is_null()) {
          GetOStream(Runtime0) << "Using original prolongator" << std::endl;
          Set(coarseLevel, "P", originalP);

        } else {
          // P is the transfer operator from the coarse grid to the fine grid.
          // P must transfer the data from the newly reordered coarse A to the
          // (unchanged) fine A.  This means that the domain map (coarse) of P
          // must be changed according to the new partition. The range map
          // (fine) is kept unchanged.
          //
          // The domain map of P must match the range map of R.  See also note
          // below about domain/range map of R and its implications for P.
          //
          // To change the domain map of P, P needs to be fillCompleted again
          // with the new domain map.  To achieve this, P is copied into a new
          // matrix that is not fill-completed.  The doImport() operation is
          // just used here to make a copy of P: the importer is trivial and
          // there is no data movement involved.  The reordering actually
          // happens during the fillComplete() with domainMap == importer->getTargetMap().
          RCP<Matrix> rebalancedP = originalP;
          RCP<const CrsMatrixWrap> crsOp = rcp_dynamic_cast<const CrsMatrixWrap>(originalP);
          TEUCHOS_TEST_FOR_EXCEPTION(crsOp == Teuchos::null, Exceptions::BadCast, "Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");

          RCP<CrsMatrix> rebalancedP2 = crsOp->getCrsMatrix();
          TEUCHOS_TEST_FOR_EXCEPTION(rebalancedP2 == Teuchos::null, std::runtime_error, "Xpetra::CrsMatrixWrap doesn't have a CrsMatrix");

          {
            SubFactoryMonitor subM(*this, "Rebalancing prolongator -- fast map replacement", coarseLevel);

            RCP<const Import> newImporter = ImportFactory::Build(importer->getTargetMap(), rebalancedP->getColMap());
            rebalancedP2->replaceDomainMapAndImporter(importer->getTargetMap(), newImporter);
          }

          ///////////////////////// EXPERIMENTAL
          // TODO FIXME somehow we have to transfer the striding information of the permuted domain/range maps.
          // That is probably something for an external permutation factory
          //   if (originalP->IsView("stridedMaps"))
          //     rebalancedP->CreateView("stridedMaps", originalP);
          ///////////////////////// EXPERIMENTAL

          Set(coarseLevel, "P", rebalancedP);

          if (IsPrint(Statistics1))
            GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*rebalancedP, "P (rebalanced)", params);
        }
      }

      if (importer.is_null()) {
        if (IsAvailable(coarseLevel, "Nullspace"))
          Set(coarseLevel, "Nullspace", Get<RCP<MultiVector> >(coarseLevel, "Nullspace"));

        if (pL.isParameter("Coordinates") && pL.get< RCP<const FactoryBase> >("Coordinates") != Teuchos::null)
          if (IsAvailable(coarseLevel, "Coordinates"))
            Set(coarseLevel, "Coordinates", Get< RCP<MultiVector> >(coarseLevel, "Coordinates"));

        return;
      }

      if (pL.isParameter("Coordinates") &&
          pL.get< RCP<const FactoryBase> >("Coordinates") != Teuchos::null &&
          IsAvailable(coarseLevel, "Coordinates")) {
        RCP<MultiVector> coords = Get<RCP<MultiVector> >(coarseLevel, "Coordinates");

        // This line must be after the Get call
        SubFactoryMonitor subM(*this, "Rebalancing coordinates", coarseLevel);

        LO nodeNumElts = coords->getMap()->getNodeNumElements();

        // If a process has no matrix rows, then we can't calculate blocksize using the formula below.
        LO myBlkSize = 0, blkSize = 0;
        if (nodeNumElts > 0)
          myBlkSize = importer->getSourceMap()->getNodeNumElements() / nodeNumElts;
        maxAll(coords->getMap()->getComm(), myBlkSize, blkSize);

        RCP<const Import> coordImporter;
        if (blkSize == 1) {
          coordImporter = importer;

        } else {
          // NOTE: there is an implicit assumption here: we assume that dof any node are enumerated consequently
          // Proper fix would require using decomposition similar to how we construct importer in the
          // RepartitionFactory
          RCP<const Map> origMap   = coords->getMap();
          GO             indexBase = origMap->getIndexBase();

          ArrayView<const GO> OEntries   = importer->getTargetMap()->getNodeElementList();
          LO                  numEntries = OEntries.size()/blkSize;
          ArrayRCP<GO> Entries(numEntries);
          for (LO i = 0; i < numEntries; i++)
            Entries[i] = (OEntries[i*blkSize]-indexBase)/blkSize + indexBase;

          RCP<const Map> targetMap = MapFactory::Build(origMap->lib(), origMap->getGlobalNumElements(), Entries(), indexBase, origMap->getComm());
          coordImporter = ImportFactory::Build(origMap, targetMap);
        }

        RCP<MultiVector> permutedCoords  = MultiVectorFactory::Build(coordImporter->getTargetMap(), coords->getNumVectors());
        permutedCoords->doImport(*coords, *coordImporter, Xpetra::INSERT);

        if (pL.get<bool>("useSubcomm") == true)
          permutedCoords->replaceMap(permutedCoords->getMap()->removeEmptyProcesses());

        Set(coarseLevel, "Coordinates", permutedCoords);

        std::string fileName = "rebalanced_coordinates_level_" + toString(coarseLevel.GetLevelID()) + ".m";
        if (writeStart <= coarseLevel.GetLevelID() && coarseLevel.GetLevelID() <= writeEnd && permutedCoords->getMap() != Teuchos::null)
          Utils::Write(fileName, *permutedCoords);
      }

      if (IsAvailable(coarseLevel, "Nullspace")) {
        RCP<MultiVector> nullspace = Get< RCP<MultiVector> >(coarseLevel, "Nullspace");

        // This line must be after the Get call
        SubFactoryMonitor subM(*this, "Rebalancing nullspace", coarseLevel);

        RCP<MultiVector> permutedNullspace = MultiVectorFactory::Build(importer->getTargetMap(), nullspace->getNumVectors());
        permutedNullspace->doImport(*nullspace, *importer, Xpetra::INSERT);

        if (pL.get<bool>("useSubcomm") == true)
          permutedNullspace->replaceMap(permutedNullspace->getMap()->removeEmptyProcesses());

        Set(coarseLevel, "Nullspace", permutedNullspace);
      }

    } else {
      if (pL.get<bool>("transpose: use implicit") == false) {
        RCP<Matrix> originalR = Get< RCP<Matrix> >(coarseLevel, "R");

        SubFactoryMonitor m2(*this, "Rebalancing restriction", coarseLevel);

        if (implicit || importer.is_null()) {
          GetOStream(Runtime0) << "Using original restrictor" << std::endl;
          Set(coarseLevel, "R", originalR);

        } else {
          RCP<Matrix> rebalancedR;
          {
            SubFactoryMonitor subM(*this, "Rebalancing restriction -- fusedImport", coarseLevel);

            RCP<Map> dummy;         // meaning: use originalR's domain map.
            rebalancedR = MatrixFactory::Build(originalR, *importer, dummy, importer->getTargetMap());
          }
          Set(coarseLevel, "R", rebalancedR);

          ///////////////////////// EXPERIMENTAL
          // TODO FIXME somehow we have to transfer the striding information of the permuted domain/range maps.
          // That is probably something for an external permutation factory
          // if (originalR->IsView("stridedMaps"))
          //   rebalancedR->CreateView("stridedMaps", originalR);
          ///////////////////////// EXPERIMENTAL

          if (IsPrint(Statistics1))
            GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*rebalancedR, "R (rebalanced)", params);
        }
      }
    }
  }
int main(int argc, char *argv[])
{
    //for random number generator
    srand((unsigned)time(NULL));

    //preprocessing
    if(argc != 5){
        std::cout<<"No enough arugments\n";
        exit(0);
    }
    int k, n_threads;
    double lambda, wall_timer;
    std::string data_dir, meta_dir, train_dir, test_dir;
    std::string line;
    int row, col, train_size, test_size;

    std::cout<<"----------------------------------------------"<<std::endl;
    std::cout<<"exec filename: "<<argv[0]<<std::endl;
    wall_timer = omp_get_wtime();
    k = atoi(argv[1]);
    lambda = atof(argv[2]);
    n_threads = atoi(argv[3]);
    data_dir = argv[4];

    //set number of threads
    omp_set_num_threads(n_threads); 
   
    std::vector<std::string> files(3,"");
    files.reserve(3);
    getdir(data_dir, files);

    meta_dir = data_dir+files[0];
    train_dir = data_dir+files[1];
    test_dir = data_dir+files[2];
 

    //std::cout<<"Using [rank, lambda, n_threads, directory]->>: "<<"[ "<<k<<", "<<lambda<<", "<<n_threads<<", "<<data_dir<<"* ]\n";
    //std::cout<<meta_dir<<" "<<train_dir<<" "<<test_dir<<" "<<"\n";

    //read meta file
    std::ifstream meta_file(meta_dir.c_str());
        //get rows and clos
    std::getline(meta_file, line);
    std::stringstream meta_ss(line);
    meta_ss >> row >> col;
        //get size of train
    std::getline(meta_file, line);
    std::stringstream train_ss(line);
    train_ss >> train_size;
        //get size of test
    std::getline(meta_file, line);
    std::stringstream test_ss(line);
    test_ss >> test_size;
    //std::cout<<"row: "<<row<<" col: "<<col<<" train size: "<<train_size<<" test size: "<<test_size<<"\n";


    //read from training file, and construct matrix
    int *Ix = new int[train_size];
    int *Jx = new int[train_size];
    double *xx = new double[train_size];

    int *Iy = new int[train_size];
    int *Jy = new int[train_size];
    double *yy = new double[train_size];

    int *cc = new int[col](); //number of non zeros in each column
    int *rc = new int[row]();


    std::ifstream train_file(train_dir.c_str());
    std::vector<T> train_list;
    train_list.reserve(train_size);

    for(int i=0;i<train_size;i++){
	train_file >> Ix[i];
	train_file >> Jx[i];
	train_file >> xx[i];
        Ix[i] = Ix[i] - offset;
        Jx[i] = Jx[i] - offset;
        train_list.push_back(T(Ix[i], Jx[i], xx[i]));
        cc[Jx[i]] = cc[Jx[i]] + 1;
        rc[Ix[i]]= rc[Ix[i]] + 1;
    }
    Eigen::SparseMatrix<double> R(row, col);
    R.setFromTriplets(train_list.begin(), train_list.end());



    Eigen::SparseMatrix<double> R_tsp = R.transpose();
    int i_count = 0;
    for(int i=0;i<R_tsp.outerSize(); i++){
        for(Eigen::SparseMatrix<double>::InnerIterator it(R_tsp, i); it; ++it){
            Iy[i_count] = it.row();
            Jy[i_count] = it.col();
            yy[i_count] = it.value();
            i_count++;
        }
    }
    
    //std::cout<<"cc(1)=2->>: "<<cc[1]<<" cc(14)=1->>: "<<cc[14]<<" cc(32)=8->>: "<<cc[32]<<"\n";
    //std::cout<<"xx(0)=4->>: "<<xx[0]<<" xx(7)=5->>: "<<xx[7]<<" xx(38)=3->>: "<<xx[7]<<"\n";
    //std::cout<<"rc(4)=23->>: "<<rc[4]<<" rc(6)=1->>: "<<rc[6]<<" rc[12]=148->>: "<<rc[12]<<"\n";
    //std::cout<<"yy(2)=5->>: "<<yy[2]<<" yy(8)=4->>: "<<yy[8]<<" yy(19)=3->>: "<<yy[19]<<"\n";

    //read from testing file, and construct matrix
    
    int *Ixt = new int[test_size];
    int *Jxt = new int[test_size];
    double *xxt = new double[test_size];

    std::ifstream test_file(test_dir.c_str());
    std::vector<T> test_list;
    test_list.reserve(test_size);

    for(int i=0;i<test_size;i++){
    	test_file >> Ixt[i];
        test_file >> Jxt[i];
        test_file >> xxt[i];
        Ixt[i] = Ixt[i] - offset;
        Jxt[i] = Jxt[i] - offset;
        test_list.push_back(T(Ixt[i], Jxt[i], xx[i]));
    }
    Eigen::SparseMatrix<double> Rt(row, col);
    Rt.setFromTriplets(test_list.begin(), test_list.end());

    int maxiter = 10;

    Eigen::MatrixXd U(k, row);
    Eigen::MatrixXd M(k, col);

    //generate random numbers within 0 and 1 for U, and M
    #pragma omp parallel for
    for(int i=0;i<k;i++){
        for(int j=0;j<row;j++){
            U(i,j) = (double) rand() / (double) RAND_MAX;
        }
        for(int j=0;j<col;j++){
            M(i,j) = (double) rand() / (double) RAND_MAX;
        }
    }
    //preprocessing for parallelization
    int *cci = new int[col];
    int *rci = new int[row];
    int pre_count;

    pre_count = 0;
    for(int i=0;i<col;i++){
        cci[i] = pre_count;
        pre_count = cci[i] + cc[i];
    }

    pre_count = 0;
    for(int i=0;i<row;i++){
       rci[i] = pre_count;
       pre_count = rci[i] + rc[i];
    }


    std::cout<<"walltime spent on preprocessing data: "<<omp_get_wtime() - wall_timer<<"\n";

    //std::cout<<"R_tsp: "<<R_tsp.size()<<" R_tsp(4999, 1825) = 3, the output is: "<<R_tsp.coeffRef(4999,1825)<<"\n";

    //for small
    //std::cout<<"R size: "<<R.size()<<" R(1825, 4999) = 3, the output is: "<<R.coeffRef(1825,4999)<<"\n";
    //std::cout<<"Rt size: "<<Rt.size()<<" Rt(1395, 4999) = 3, the output is: "<<Rt.coeffRef(1395,4999)<<"\n";

    //for medium
    //std::cout<<"R size: "<<R.size()<<" R(4750, 3951) is 4, the output is:  "<<R.coeffRef(4750,3951)<<"\n";
    //std::cout<<"Rt size: "<<Rt.size()<<" R(2128, 3951) is 3, the output is: "<<Rt.coeffRef(2128,3951)<<"\n";

//------------------------------begin processing----------------------------------------------//
    double accu_sum=0;
    double rmse_test=0;
    double rmse_train=0;
    Eigen::MatrixXd U_tps = U.transpose();
    #pragma omp parallel for reduction(+:accu_sum) 
    for(int i=0;i<train_size;i++){
        accu_sum += pow(U_tps.row(Ix[i])*M.col(Jx[i]) - xx[i], 2);
    }
    rmse_train = sqrt(accu_sum/train_size);

    accu_sum = 0;
    #pragma omp parallel for reduction(+:accu_sum)
    for(int i=0;i<test_size;i++){
        accu_sum += pow(U_tps.row(Ixt[i])*M.col(Jxt[i]) - xxt[i], 2);
    }
    rmse_test = sqrt(accu_sum/test_size);

    Eigen::MatrixXd iden = Eigen::MatrixXd::Identity(k,k);

    std::cout<<"start with rmse on train: "<< rmse_train <<" rmse on test: "<< rmse_test << " n_threads: "<<n_threads<<std::endl;
    double total_timer, end_timer;
    for(int t=0;t<maxiter;t++){
	printf("iter: %d\n",t+1);

	printf("Minimize M while fixing U ...");
        wall_timer = omp_get_wtime();
        //minimize M while fixing U
	#pragma omp parallel for schedule(dynamic, 1)
        for(int i=0;i<col;i++){
            if( cc[i]>0 ){
                //construct subU, and subR
                Eigen::MatrixXd subU(k, cc[i]);
                Eigen::VectorXd subR(cc[i]);
		int j=cci[i];
                for(int l=0; l<cc[i];l++){
                    subU.col(l) = U.col(Ix[j+l]);
		    subR[l] = xx[j+l];
                }
                M.col(i) = (lambda*iden+subU*subU.transpose()).llt().solve((subU*subR));
            }else{
                M.col(i) = Eigen::VectorXd::Zero(k);
            }
        
        }
        end_timer = omp_get_wtime();
	total_timer += end_timer-wall_timer;	
        printf("%0.2f seconds\n", end_timer - wall_timer);

	printf("Minimize U whilt fixing M ...");
	wall_timer = omp_get_wtime();
        //minimize U while fixing M
        #pragma omp parallel for schedule(dynamic, 1)
        for(int i=0;i<row;i++){
            if( rc[i] > 0){
                //construct subM, and subR
                Eigen::MatrixXd subM(k, rc[i]);
                Eigen::VectorXd subR(rc[i]);
		int j=rci[i];
                for(int l=0;l<rc[i];l++){
                    subM.col(l) = M.col(Iy[j+l]);
		    subR[l] = yy[j+l];
                }
                U.col(i) = (lambda*iden+subM*subM.transpose()).llt().solve((subM*subR));
            }else{
                U.col(i) = Eigen::VectorXd::Zero(k);
            }
        }
	end_timer = omp_get_wtime();
	total_timer += end_timer-wall_timer;
	printf("%0.2f seconds\n", end_timer - wall_timer);

	Eigen::MatrixXd U_tps = U.transpose();
    
        accu_sum = 0;
        #pragma omp parallel for reduction(+:accu_sum)
        for(int i=0;i<train_size;i++){
            accu_sum += pow(U_tps.row(Ix[i])*M.col(Jx[i]) - xx[i], 2);
        }
        rmse_train = sqrt(accu_sum/train_size);
        
        accu_sum = 0;
        #pragma omp parallel for reduction(+:accu_sum)
        for(int i=0;i<test_size;i++){
            accu_sum += pow(U_tps.row(Ixt[i])*M.col(Jxt[i]) - xxt[i], 2);
        }
        rmse_test = sqrt(accu_sum/test_size);

        printf("rmse on train: %0.6f, rmse on test: %0.6f\n",rmse_train, rmse_test);
    }
    printf("total running time: %0.2f\n",total_timer);

    //free variables
    delete[] Ix;
    delete[] Jx;
    delete[] xx;
    delete[] Iy;
    delete[] Jy;
    delete[] yy;
    delete[] Ixt;
    delete[] Jxt;
    delete[] xxt;
    delete[] rci;
    delete[] cci;
}
예제 #9
0
void Tempo::sub(Tempo* tempo){
	subH(tempo->getHoras());
	subM(tempo->getMinutos());
	subS(tempo->getSegundos());
}
void RebalanceBlockRestrictionFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &fineLevel, Level &coarseLevel) const {
  FactoryMonitor m(*this, "Build", coarseLevel);
  //const Teuchos::ParameterList & pL = GetParameterList();

  RCP<Teuchos::FancyOStream> out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));

  Teuchos::RCP<Matrix> originalTransferOp = Teuchos::null;
  originalTransferOp = Get< RCP<Matrix> >(coarseLevel, "R");

  RCP<Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> > bOriginalTransferOp = Teuchos::rcp_dynamic_cast<Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> >(originalTransferOp);
  TEUCHOS_TEST_FOR_EXCEPTION(bOriginalTransferOp==Teuchos::null, Exceptions::BadCast, "MueLu::RebalanceBlockTransferFactory::Build: input matrix P or R is not of type BlockedCrsMatrix! error.");

  // plausibility check
  TEUCHOS_TEST_FOR_EXCEPTION(bOriginalTransferOp->Rows() != 2,Exceptions::RuntimeError, "MueLu::RebalanceBlockTransferFactory::Build: number of block rows of transfer operator is not equal 2. error.");
  TEUCHOS_TEST_FOR_EXCEPTION(bOriginalTransferOp->Cols() != 2,Exceptions::RuntimeError, "MueLu::RebalanceBlockTransferFactory::Build: number of block columns of transfer operator is not equal 2. error.");

  // rebuild rebalanced blocked P operator
  std::vector<GO> fullRangeMapVector;
  std::vector<GO> fullDomainMapVector;
  std::vector<RCP<const Map> > subBlockRRangeMaps;
  std::vector<RCP<const Map> > subBlockRDomainMaps;
  subBlockRRangeMaps.reserve(bOriginalTransferOp->Rows());       // reserve size for block P operators
  subBlockRDomainMaps.reserve(bOriginalTransferOp->Cols());       // reserve size for block P operators

  std::vector<Teuchos::RCP<Matrix> > subBlockRebR;
  subBlockRebR.reserve(bOriginalTransferOp->Cols());

  int curBlockId = 0;
  Teuchos::RCP<const Import> rebalanceImporter = Teuchos::null;
  std::vector<Teuchos::RCP<const FactoryManagerBase> >::const_iterator it;
  for (it = FactManager_.begin(); it != FactManager_.end(); ++it) {
    // begin SubFactoryManager environment
    SetFactoryManager fineSFM  (rcpFromRef(fineLevel),   *it);
    SetFactoryManager coarseSFM(rcpFromRef(coarseLevel), *it);

    rebalanceImporter = coarseLevel.Get<Teuchos::RCP<const Import> >("Importer", (*it)->GetFactory("Importer").get());

    // extract matrix block
    Teuchos::RCP<CrsMatrix> Rmii = bOriginalTransferOp->getMatrix(curBlockId, curBlockId);
    Teuchos::RCP<CrsMatrixWrap> Rwii = Teuchos::rcp(new CrsMatrixWrap(Rmii));
    Teuchos::RCP<Matrix> Rii = Teuchos::rcp_dynamic_cast<Matrix>(Rwii);

    Teuchos::RCP<Matrix> rebRii;
    if(rebalanceImporter != Teuchos::null) {
      std::stringstream ss; ss << "Rebalancing restriction block R(" << curBlockId << "," << curBlockId << ")";
      SubFactoryMonitor m1(*this, ss.str(), coarseLevel);
      {
        SubFactoryMonitor subM(*this, "Rebalancing restriction -- fusedImport", coarseLevel);
        // Note: The 3rd argument says to use originalR's domain map.

        RCP<Map> dummy;
        rebRii = MatrixFactory::Build(Rii,*rebalanceImporter,dummy,rebalanceImporter->getTargetMap());
      }

      RCP<ParameterList> params = rcp(new ParameterList());
      params->set("printLoadBalancingInfo", true);
      std::stringstream ss2; ss2 << "R(" << curBlockId << "," << curBlockId << ") rebalanced:";
      GetOStream(Statistics0) << PerfUtils::PrintMatrixInfo(*rebRii, ss2.str(), params);
    } else {
      rebRii = Rii;
      RCP<ParameterList> params = rcp(new ParameterList());
      params->set("printLoadBalancingInfo", true);
      std::stringstream ss2; ss2 << "R(" << curBlockId << "," << curBlockId << ") not rebalanced:";
      GetOStream(Statistics0) << PerfUtils::PrintMatrixInfo(*rebRii, ss2.str(), params);
    }

    // fix striding information for rebalanced diagonal block rebRii
    RCP<const Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> > rgRMapExtractor = bOriginalTransferOp->getRangeMapExtractor(); // original map extractor
    Teuchos::RCP<const StridedMap> orig_stridedRgMap = Teuchos::rcp_dynamic_cast<const StridedMap>(rgRMapExtractor->getMap(Teuchos::as<size_t>(curBlockId)));
    Teuchos::RCP<const Map> stridedRgMap = Teuchos::null;
    if(orig_stridedRgMap != Teuchos::null) {
      std::vector<size_t> stridingData = orig_stridedRgMap->getStridingData();
      Teuchos::ArrayView< const GlobalOrdinal > nodeRangeMapii = rebRii->getRangeMap()->getNodeElementList();
      stridedRgMap = StridedMapFactory::Build(
          originalTransferOp->getRangeMap()->lib(),
          Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
          nodeRangeMapii,
          rebRii->getRangeMap()->getIndexBase(),
          stridingData,
          originalTransferOp->getRangeMap()->getComm(),
          orig_stridedRgMap->getStridedBlockId(),
          orig_stridedRgMap->getOffset());
    }
    RCP<const Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> > doRMapExtractor = bOriginalTransferOp->getDomainMapExtractor(); // original map extractor
    Teuchos::RCP<const StridedMap> orig_stridedDoMap = Teuchos::rcp_dynamic_cast<const StridedMap>(doRMapExtractor->getMap(Teuchos::as<size_t>(curBlockId)));
    Teuchos::RCP<const Map> stridedDoMap = Teuchos::null;
    if(orig_stridedDoMap != Teuchos::null) {
      std::vector<size_t> stridingData = orig_stridedDoMap->getStridingData();
      Teuchos::ArrayView< const GlobalOrdinal > nodeDomainMapii = rebRii->getDomainMap()->getNodeElementList();
      stridedDoMap = StridedMapFactory::Build(
          originalTransferOp->getDomainMap()->lib(),
          Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
          nodeDomainMapii,
          rebRii->getDomainMap()->getIndexBase(),
          stridingData,
          originalTransferOp->getDomainMap()->getComm(),
          orig_stridedDoMap->getStridedBlockId(),
          orig_stridedDoMap->getOffset());
    }

    TEUCHOS_TEST_FOR_EXCEPTION(stridedRgMap == Teuchos::null,Exceptions::RuntimeError, "MueLu::RebalanceBlockRestrictionFactory::Build: failed to generate striding information. error.");
    TEUCHOS_TEST_FOR_EXCEPTION(stridedDoMap == Teuchos::null,Exceptions::RuntimeError, "MueLu::RebalanceBlockRestrictionFactory::Build: failed to generate striding information. error.");

    // replace stridedMaps view in diagonal sub block
    if(rebRii->IsView("stridedMaps")) rebRii->RemoveView("stridedMaps");
    rebRii->CreateView("stridedMaps", stridedRgMap, stridedDoMap);

    // store rebalanced subblock
    subBlockRebR.push_back(rebRii);

    // append strided row map (= range map) to list of range maps.
    Teuchos::RCP<const Map> rangeMapii = rebRii->getRowMap("stridedMaps"); //rebRii->getRangeMap();
    subBlockRRangeMaps.push_back(rangeMapii);
    Teuchos::ArrayView< const GlobalOrdinal > nodeRangeMapii = rebRii->getRangeMap()->getNodeElementList();
    fullRangeMapVector.insert(fullRangeMapVector.end(), nodeRangeMapii.begin(), nodeRangeMapii.end());
    sort(fullRangeMapVector.begin(), fullRangeMapVector.end());

    // append strided col map (= domain map) to list of range maps.
    Teuchos::RCP<const Map> domainMapii = rebRii->getColMap("stridedMaps"); //rebRii->getDomainMap();
    subBlockRDomainMaps.push_back(domainMapii);
    Teuchos::ArrayView< const GlobalOrdinal > nodeDomainMapii = rebRii->getDomainMap()->getNodeElementList();
    fullDomainMapVector.insert(fullDomainMapVector.end(), nodeDomainMapii.begin(), nodeDomainMapii.end());
    sort(fullDomainMapVector.begin(), fullDomainMapVector.end());

    ////////////////////////////////////////////////////////////

    // rebalance null space
    if(rebalanceImporter != Teuchos::null)
    { // rebalance null space
      std::stringstream ss2; ss2 << "Rebalancing nullspace block(" << curBlockId << "," << curBlockId << ")";
      SubFactoryMonitor subM(*this, ss2.str(), coarseLevel);

      RCP<MultiVector> nullspace = coarseLevel.Get<RCP<MultiVector> >("Nullspace", (*it)->GetFactory("Nullspace").get());
      RCP<MultiVector> permutedNullspace = MultiVectorFactory::Build(rebalanceImporter->getTargetMap(), nullspace->getNumVectors());
      permutedNullspace->doImport(*nullspace, *rebalanceImporter, Xpetra::INSERT);

      // TODO think about this
      //if (pL.get<bool>("useSubcomm") == true) // TODO either useSubcomm is enabled everywhere or nowhere
      //permutedNullspace->replaceMap(permutedNullspace->getMap()->removeEmptyProcesses());

      coarseLevel.Set<RCP<MultiVector> >("Nullspace", permutedNullspace, (*it)->GetFactory("Nullspace").get());

    } // end rebalance null space
    else { // do nothing
      RCP<MultiVector> nullspace = coarseLevel.Get<RCP<MultiVector> >("Nullspace", (*it)->GetFactory("Nullspace").get());
      coarseLevel.Set<RCP<MultiVector> >("Nullspace", nullspace, (*it)->GetFactory("Nullspace").get());
    }

    ////////////////////////////////////////////////////////////

    curBlockId++;
  } // end for loop

  // extract map index base from maps of blocked P
  GO rangeIndexBase = originalTransferOp->getRangeMap()->getIndexBase();
  GO domainIndexBase= originalTransferOp->getDomainMap()->getIndexBase();

  // check this
  RCP<const Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> > rangeRMapExtractor = bOriginalTransferOp->getRangeMapExtractor(); // original map extractor
  Teuchos::ArrayView<GO> fullRangeMapGIDs(&fullRangeMapVector[0],fullRangeMapVector.size());
  Teuchos::RCP<const StridedMap> stridedRgFullMap = Teuchos::rcp_dynamic_cast<const StridedMap>(rangeRMapExtractor->getFullMap());
  Teuchos::RCP<const Map > fullRangeMap = Teuchos::null;
  if(stridedRgFullMap != Teuchos::null) {
    std::vector<size_t> stridedData = stridedRgFullMap->getStridingData();
    fullRangeMap =
        StridedMapFactory::Build(
            originalTransferOp->getRangeMap()->lib(),
            Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
            fullRangeMapGIDs,
            rangeIndexBase,
            stridedData,
            originalTransferOp->getRangeMap()->getComm(),
            stridedRgFullMap->getStridedBlockId(),
            stridedRgFullMap->getOffset());
  } else {
    fullRangeMap =
        MapFactory::Build(
            originalTransferOp->getRangeMap()->lib(),
            Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
            fullRangeMapGIDs,
            rangeIndexBase,
            originalTransferOp->getRangeMap()->getComm());
  }

  RCP<const Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> > domainAMapExtractor = bOriginalTransferOp->getDomainMapExtractor();
  Teuchos::ArrayView<GO> fullDomainMapGIDs(&fullDomainMapVector[0],fullDomainMapVector.size());
  Teuchos::RCP<const StridedMap> stridedDoFullMap = Teuchos::rcp_dynamic_cast<const StridedMap>(domainAMapExtractor->getFullMap());
  Teuchos::RCP<const Map > fullDomainMap = Teuchos::null;
  if(stridedDoFullMap != Teuchos::null) {
    TEUCHOS_TEST_FOR_EXCEPTION(stridedDoFullMap==Teuchos::null, Exceptions::BadCast, "MueLu::BlockedPFactory::Build: full map in domain map extractor has no striding information! error.");
    std::vector<size_t> stridedData2 = stridedDoFullMap->getStridingData();
    fullDomainMap =
        StridedMapFactory::Build(
            originalTransferOp->getDomainMap()->lib(),
            Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
            fullDomainMapGIDs,
            domainIndexBase,
            stridedData2,
            originalTransferOp->getDomainMap()->getComm(),
            stridedDoFullMap->getStridedBlockId(),
            stridedDoFullMap->getOffset());
  } else {

    fullDomainMap =
        MapFactory::Build(
            originalTransferOp->getDomainMap()->lib(),
            Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
            fullDomainMapGIDs,
            domainIndexBase,
            originalTransferOp->getDomainMap()->getComm());
  }

  // build map extractors
  Teuchos::RCP<const Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> > rangeMapExtractor  =
      Xpetra::MapExtractorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(fullRangeMap,  subBlockRRangeMaps);
  Teuchos::RCP<const Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> > domainMapExtractor =
      Xpetra::MapExtractorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(fullDomainMap, subBlockRDomainMaps);

  Teuchos::RCP<BlockedCrsMatrix> bRebR = Teuchos::rcp(new BlockedCrsMatrix(rangeMapExtractor,domainMapExtractor,10));
  for(size_t i = 0; i<subBlockRRangeMaps.size(); i++) {
    Teuchos::RCP<const CrsMatrixWrap> crsOpii = Teuchos::rcp_dynamic_cast<const CrsMatrixWrap>(subBlockRebR[i]);
    Teuchos::RCP<CrsMatrix> crsMatii = crsOpii->getCrsMatrix();
    bRebR->setMatrix(i,i,crsMatii);
  }

  bRebR->fillComplete();

  Set(coarseLevel, "R", Teuchos::rcp_dynamic_cast<Matrix>(bRebR)); // do nothing  // TODO remove this!

} // Build