void TpetraOperatorWrapper::apply(const Tpetra::MultiVector<ST,LO,GO,NT>& X, Tpetra::MultiVector<ST,LO,GO,NT>& Y,Teuchos::ETransp mode,ST alpha, ST beta) const
{
   if (!useTranspose_)
   {
       // allocate space for each vector
       RCP<Thyra::MultiVectorBase<ST> > tX;
       RCP<Thyra::MultiVectorBase<ST> > tY; 

       tX = Thyra::createMembers(thyraOp_->domain(),X.getNumVectors()); 
       tY = Thyra::createMembers(thyraOp_->range(),X.getNumVectors());

       Thyra::assign(tX.ptr(),0.0);
       Thyra::assign(tY.ptr(),0.0);

       // copy epetra X into thyra X
       mapStrategy_->copyTpetraIntoThyra(X, tX.ptr());
       mapStrategy_->copyTpetraIntoThyra(Y, tY.ptr()); // if this matrix isn't block square, this probably won't work!

       // perform matrix vector multiplication
       thyraOp_->apply(Thyra::NOTRANS,*tX,tY.ptr(),alpha,beta);

       // copy thyra Y into epetra Y
       mapStrategy_->copyThyraIntoTpetra(tY, Y);
   }
   else
   {
       TEUCHOS_ASSERT(false);
   }
}
Exemple #2
0
void OverlappingRowMatrix<MatrixType>::applyTempl(const Tpetra::MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node> &X, 
						  Tpetra::MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node> &Y, 
						  Teuchos::ETransp mode, 
						  RangeScalar alpha,
						  RangeScalar beta) const
{
  // Note: This isn't AztecOO compliant.  But neither was Ifpack's version.
  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
			     "Ifpack2::OverlappingRowMatrix::apply ERROR: X.getNumVectors() != Y.getNumVectors().");

  RangeScalar zero = Teuchos::ScalarTraits<RangeScalar>::zero();
  Teuchos::ArrayRCP<Teuchos::ArrayRCP<const DomainScalar> > x_ptr = X.get2dView();
  Teuchos::ArrayRCP<Teuchos::ArrayRCP<RangeScalar> >        y_ptr = Y.get2dViewNonConst();
  Y.putScalar(zero);
  size_t NumVectors = Y.getNumVectors();

  for (size_t i = 0 ; i < NumMyRowsA_ ; ++i) {
    size_t Nnz;
    // Use this class's getrow to make the below code simpler
    A_->getLocalRowCopy(i,Indices_(),Values_(),Nnz);
    if (mode==Teuchos::NO_TRANS){
      for (size_t j = 0 ; j < Nnz ; ++j) 
	for (size_t k = 0 ; k < NumVectors ; ++k)
	  y_ptr[k][i] += (RangeScalar)Values_[j] * (RangeScalar)x_ptr[k][Indices_[j]];      
    }
    else if (mode==Teuchos::TRANS){
      for (size_t j = 0 ; j < Nnz ; ++j) 
	for (size_t k = 0 ; k < NumVectors ; ++k)
	  y_ptr[k][Indices_[j]] += (RangeScalar)Values_[j] * (RangeScalar)x_ptr[k][i];
    }
    else { //mode==Teuchos::CONJ_TRANS
      for (size_t j = 0 ; j < Nnz ; ++j) 
	for (size_t k = 0 ; k < NumVectors ; ++k)
	  y_ptr[k][Indices_[j]] += Teuchos::ScalarTraits<RangeScalar>::conjugate((RangeScalar)Values_[j]) * (RangeScalar)x_ptr[k][i];
    }
  }

 for (size_t i = 0 ; i < NumMyRowsB_ ; ++i) {
    size_t Nnz;
    // Use this class's getrow to make the below code simpler
    ExtMatrix_->getLocalRowCopy(i,Indices_(),Values_(),Nnz);
    if (mode==Teuchos::NO_TRANS){
      for (size_t j = 0 ; j < Nnz ; ++j) 
	for (size_t k = 0 ; k < NumVectors ; ++k)
	  y_ptr[k][NumMyRowsA_+i] += (RangeScalar)Values_[j] * (RangeScalar)x_ptr[k][Indices_[j]];      
    }
    else if (mode==Teuchos::TRANS){
      for (size_t j = 0 ; j < Nnz ; ++j) 
	for (size_t k = 0 ; k < NumVectors ; ++k)
	  y_ptr[k][NumMyRowsA_+Indices_[j]] += (RangeScalar)Values_[j] * (RangeScalar)x_ptr[k][i];
    }
    else { //mode==Teuchos::CONJ_TRANS
      for (size_t j = 0 ; j < Nnz ; ++j) 
	for (size_t k = 0 ; k < NumVectors ; ++k)
	  y_ptr[k][NumMyRowsA_+Indices_[j]] += Teuchos::ScalarTraits<RangeScalar>::conjugate((RangeScalar)Values_[j]) * (RangeScalar)x_ptr[k][i];
    }
  }
}
void 
Chebyshev<MatrixType>::
applyMat (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
	  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
	  Teuchos::ETransp mode) const
{
  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
   "Ifpack2::Chebyshev::applyMat(): X.getNumVectors() != Y.getNumVectors().");
  impl_.getMatrix ()->apply (X, Y, mode);
}
Exemple #4
0
void TomBlockRelaxation<MatrixType,ContainerType>::apply(
          const Tpetra::MultiVector<typename MatrixType::scalar_type,
                                    typename MatrixType::local_ordinal_type,
                                    typename MatrixType::global_ordinal_type,
                                    typename MatrixType::node_type>& X,
                Tpetra::MultiVector<typename MatrixType::scalar_type,
                                    typename MatrixType::local_ordinal_type,
                                    typename MatrixType::global_ordinal_type,
                                    typename MatrixType::node_type>& Y,
                Teuchos::ETransp mode,
                 Scalar alpha,
                 Scalar beta) const
{
  TEUCHOS_TEST_FOR_EXCEPTION(isComputed() == false, std::runtime_error,
     "Ifpack2::TomBlockRelaxation::apply ERROR: isComputed() must be true prior to calling apply.");

  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
     "Ifpack2::TomBlockRelaxation::apply ERROR: X.getNumVectors() != Y.getNumVectors().");

  TEUCHOS_TEST_FOR_EXCEPTION(mode != Teuchos::NO_TRANS, std::runtime_error,
			     "Ifpack2::TomBlockRelaxation::apply ERORR: transpose modes not supported.");

  Time_->start(true);

  // If X and Y are pointing to the same memory location,
  // we need to create an auxiliary vector, Xcopy
  Teuchos::RCP< const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > Xcopy;
  if (X.getLocalMV().getValues() == Y.getLocalMV().getValues())
    Xcopy = Teuchos::rcp( new Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(X) );
  else
    Xcopy = Teuchos::rcp( &X, false );

  if (ZeroStartingSolution_)
    Y.putScalar(0.0);

  // Flops are updated in each of the following.
  switch (PrecType_) {
  case Ifpack2::JACOBI:
    ApplyInverseJacobi(*Xcopy,Y);
    break;
  case Ifpack2::GS:
    ApplyInverseGS(*Xcopy,Y);
    break;
  case Ifpack2::SGS:
    ApplyInverseSGS(*Xcopy,Y);
    break;
  default:
    throw std::runtime_error("Ifpack2::TomBlockRelaxation::apply internal logic error.");
  }

  ++NumApply_;
  Time_->stop();
  ApplyTime_ += Time_->totalElapsedTime();
}
void ReorderFilter<MatrixType>::permuteOriginalToReorderedTempl(const Tpetra::MultiVector<DomainScalar,local_ordinal_type,global_ordinal_type,node_type> &originalX,
                                                                Tpetra::MultiVector<RangeScalar,local_ordinal_type,global_ordinal_type,node_type> &reorderedY) const
{
  TEUCHOS_TEST_FOR_EXCEPTION(originalX.getNumVectors() != reorderedY.getNumVectors(), std::runtime_error,
                             "Ifpack2::ReorderFilter::permuteOriginalToReordered ERROR: X.getNumVectors() != Y.getNumVectors().");

  Teuchos::ArrayRCP<Teuchos::ArrayRCP<const DomainScalar> > x_ptr = originalX.get2dView();
  Teuchos::ArrayRCP<Teuchos::ArrayRCP<RangeScalar> >        y_ptr = reorderedY.get2dViewNonConst();

  for(size_t k=0; k < originalX.getNumVectors(); k++)
    for(local_ordinal_type i=0; (size_t)i< originalX.getLocalLength(); i++)
      y_ptr[k][perm_[i]] = (RangeScalar)x_ptr[k][i];
}
Exemple #6
0
void ReorderFilter<MatrixType>::permuteReorderedToOriginalTempl(const Tpetra::MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node> &reorderedX, 
								Tpetra::MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node> &originalY) const
{
  TEUCHOS_TEST_FOR_EXCEPTION(reorderedX.getNumVectors() != originalY.getNumVectors(), std::runtime_error,
			     "Ifpack2::ReorderFilter::permuteReorderedToOriginal ERROR: X.getNumVectors() != Y.getNumVectors().");

  Teuchos::ArrayRCP<Teuchos::ArrayRCP<const DomainScalar> > x_ptr = reorderedX.get2dView();
  Teuchos::ArrayRCP<Teuchos::ArrayRCP<RangeScalar> >        y_ptr = originalY.get2dViewNonConst();

  for(size_t k=0; k < reorderedX.getNumVectors(); k++)
    for(LocalOrdinal i=0; (size_t)i< reorderedX.getLocalLength(); i++)
      y_ptr[k][reverseperm_[i]] = (RangeScalar)x_ptr[k][i];
}
void Hiptmair<MatrixType>::
applyHiptmairSmoother(const Tpetra::MultiVector<typename MatrixType::scalar_type,
                      typename MatrixType::local_ordinal_type,
                      typename MatrixType::global_ordinal_type,
                      typename MatrixType::node_type>& X,
                      Tpetra::MultiVector<typename MatrixType::scalar_type,
                      typename MatrixType::local_ordinal_type,
                      typename MatrixType::global_ordinal_type,
                      typename MatrixType::node_type>& Y) const
{
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
    global_ordinal_type, node_type> MV;
  const scalar_type ZERO = STS::zero ();
  const scalar_type ONE = STS::one ();

  RCP<MV> res1 = rcp (new MV (A_->getRowMap (), X.getNumVectors ()));
  RCP<MV> vec1 = rcp (new MV (A_->getRowMap (), X.getNumVectors ()));
  RCP<MV> res2 = rcp (new MV (PtAP_->getRowMap (), X.getNumVectors ()));
  RCP<MV> vec2 = rcp (new MV (PtAP_->getRowMap (), X.getNumVectors ()));

  if (preOrPost_ == "pre" || preOrPost_ == "both") {
    // apply initial relaxation to primary space
    A_->apply (Y, *res1);
    res1->update (ONE, X, -ONE);
    vec1->putScalar (ZERO);
    ifpack2_prec1_->apply (*res1, *vec1);
    Y.update (ONE, *vec1, ONE);
  }

  // project to auxiliary space and smooth
  A_->apply (Y, *res1);
  res1->update (ONE, X, -ONE);
  P_->apply (*res1, *res2, Teuchos::TRANS);
  vec2->putScalar (ZERO);
  ifpack2_prec2_->apply (*res2, *vec2);
  P_->apply (*vec2, *vec1, Teuchos::NO_TRANS);
  Y.update (ONE,*vec1,ONE);

  if (preOrPost_ == "post" || preOrPost_ == "both") {
    // smooth again on primary space
    A_->apply (Y, *res1);
    res1->update (ONE, X, -ONE);
    vec1->putScalar (ZERO);
    ifpack2_prec1_->apply (*res1, *vec1);
    Y.update (ONE, *vec1, ONE);
  }
}
void BorderedOperator<Scalar, LocalOrdinal, GlobalOrdinal, Node >::apply(
     const Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node >& X,
           Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node >& Y,
     Teuchos::ETransp mode, 
     Scalar coefAx, 
     Scalar coefY ) const 
{
  //bool opHasTrans = A_->hasTransposeApply();
  //TEUCHOS_TEST_FOR_EXCEPTION( mode  &&  !opHasTrans, std::runtime_error,
  //"Ifpack2::BorderedOperator::apply() ERROR: The operator does not implement transpose.");
  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
     "Ifpack2::BorderedOperator::apply() ERROR: X.getNumVectors() != Y.getNumVectors().");
  A_->apply(X, Y, mode, coefAx, coefY );
}
void ReorderFilter<MatrixType>::
apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &X,
       Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &Y,
       Teuchos::ETransp mode,
       scalar_type alpha,
       scalar_type beta) const
{
  typedef Teuchos::ScalarTraits<scalar_type> STS;

  // Note: This isn't AztecOO compliant.  But neither was Ifpack's version.
  // Note: The localized maps mean the matvec is trivial (and has no import)
  TEUCHOS_TEST_FOR_EXCEPTION(
    X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
    "Ifpack2::ReorderFilter::apply: X.getNumVectors() != Y.getNumVectors().");

  const scalar_type zero = STS::zero ();
  Teuchos::ArrayRCP<Teuchos::ArrayRCP<const scalar_type> > x_ptr = X.get2dView();
  Teuchos::ArrayRCP<Teuchos::ArrayRCP<scalar_type> > y_ptr = Y.get2dViewNonConst();

  Y.putScalar (zero);
  const size_t NumVectors = Y.getNumVectors ();

  for (size_t i = 0; i < A_->getNodeNumRows (); ++i) {
    size_t Nnz;
    // Use this class's getrow to make the below code simpler
    getLocalRowCopy (i, Indices_ (), Values_ (), Nnz);
    if (mode == Teuchos::NO_TRANS) {
      for (size_t j = 0; j < Nnz; ++j) {
        for (size_t k = 0; k < NumVectors; ++k) {
          y_ptr[k][i] += Values_[j] * x_ptr[k][Indices_[j]];
        }
      }
    }
    else if (mode == Teuchos::TRANS) {
      for (size_t j = 0; j < Nnz; ++j) {
        for (size_t k = 0; k < NumVectors; ++k) {
          y_ptr[k][Indices_[j]] += Values_[j] * x_ptr[k][i];
        }
      }
    }
    else { //mode==Teuchos::CONJ_TRANS
      for (size_t j = 0; j < Nnz; ++j) {
        for (size_t k = 0; k < NumVectors; ++k) {
          y_ptr[k][Indices_[j]] += STS::conjugate(Values_[j]) * x_ptr[k][i];
        }
      }
    }
  }
}
Exemple #10
0
void TomBlockRelaxation<MatrixType,ContainerType>::DoJacobi(const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& X, Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Y) const
{
  size_t NumVectors = X.getNumVectors();
  Scalar one=Teuchos::ScalarTraits<Scalar>::one();
  // Note: Flop counts copied naively from Ifpack.

  if (OverlapLevel_ == 0) {
    // Non-overlapping Jacobi
    for (LocalOrdinal i = 0 ; i < NumLocalBlocks_ ; i++) {     
      // may happen that a partition is empty
      if (Containers_[i]->getNumRows() == 0) continue;
      Containers_[i]->apply(X,Y,Teuchos::NO_TRANS,DampingFactor_,one);     
      ApplyFlops_ += NumVectors * 2 * NumGlobalRows_;
    }
  }
  else {
    // Overlapping Jacobi
    for (LocalOrdinal i = 0 ; i < NumLocalBlocks_ ; i++) {
      // may happen that a partition is empty
      if (Containers_[i]->getNumRows() == 0) continue;
      Containers_[i]->weightedApply(X,Y,*W_,Teuchos::NO_TRANS,DampingFactor_,one);
      // NOTE: do not count (for simplicity) the flops due to overlapping rows
      ApplyFlops_ += NumVectors * 4 * NumGlobalRows_;
    }
  }
}
void SingletonFilter<MatrixType>::CreateReducedRHSTempl(const Tpetra::MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& LHS,
                                                        const Tpetra::MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& RHS,
                                                        Tpetra::MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& ReducedRHS)
{
  Teuchos::ArrayRCP<Teuchos::ArrayRCP<const RangeScalar > > RHS_ptr = RHS.get2dView();
  Teuchos::ArrayRCP<Teuchos::ArrayRCP<const DomainScalar> > LHS_ptr = LHS.get2dView();
  Teuchos::ArrayRCP<Teuchos::ArrayRCP<RangeScalar> >        ReducedRHS_ptr = ReducedRHS.get2dViewNonConst();

  size_t NumVectors = LHS.getNumVectors();

  for (size_t i = 0 ; i < NumRows_ ; ++i)
    for (size_t k = 0 ; k < NumVectors ; ++k)
      ReducedRHS_ptr[k][i] = RHS_ptr[k][InvReorder_[i]];

  for (size_t i = 0 ; i < NumRows_ ; ++i) {
    LocalOrdinal ii = InvReorder_[i];
    size_t Nnz;
    A_->getLocalRowCopy(ii,Indices_(),Values_(),Nnz);

    for (size_t j = 0 ; j < Nnz ; ++j) {
      if (Reorder_[Indices_[j]] == -1) {
        for (size_t k = 0 ; k < NumVectors ; ++k)
          ReducedRHS_ptr[k][i] -= (RangeScalar)Values_[j] * (RangeScalar)LHS_ptr[k][Indices_[j]];
      }
    }
  }
}
Exemple #12
0
void TomBlockRelaxation<MatrixType,ContainerType>::applyMat(
          const Tpetra::MultiVector<typename MatrixType::scalar_type,
                                    typename MatrixType::local_ordinal_type,
                                    typename MatrixType::global_ordinal_type,
                                    typename MatrixType::node_type>& X,
                Tpetra::MultiVector<typename MatrixType::scalar_type,
                                    typename MatrixType::local_ordinal_type,
                                    typename MatrixType::global_ordinal_type,
                                    typename MatrixType::node_type>& Y,
             Teuchos::ETransp mode) const
{
  TEUCHOS_TEST_FOR_EXCEPTION(isComputed() == false, std::runtime_error,
     "Ifpack2::TomBlockRelaxation::applyMat() ERROR: isComputed() must be true prior to calling applyMat().");
  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
     "Ifpack2::TomBlockRelaxation::applyMat() ERROR: X.getNumVectors() != Y.getNumVectors().");
  A_->apply(X, Y, mode);
}
void
Chebyshev<MatrixType>::
applyMat (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
          Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
          Teuchos::ETransp mode) const
{
  TEUCHOS_TEST_FOR_EXCEPTION(
    X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
    "Ifpack2::Chebyshev::applyMat: X.getNumVectors() != Y.getNumVectors().");

  Teuchos::RCP<const row_matrix_type> A = impl_.getMatrix ();
  TEUCHOS_TEST_FOR_EXCEPTION(
    A.is_null (), std::runtime_error, "Ifpack2::Chebyshev::applyMat: The input "
    "matrix A is null.  Please call setMatrix() with a nonnull input matrix "
    "before calling this method.");

  A->apply (X, Y, mode);
}
void 
Chebyshev<MatrixType>::
apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
       Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
       Teuchos::ETransp mode,
       scalar_type alpha,
       scalar_type beta) const 
{
  {
    Teuchos::TimeMonitor timeMon (*Time_);

    // compute() calls initialize() if it hasn't already been called.
    // Thus, we only need to check isComputed().
    TEUCHOS_TEST_FOR_EXCEPTION(! isComputed(), std::runtime_error, 
      "Ifpack2::Chebyshev::apply(): You must call the compute() method before "
      "you may call apply().");
    TEUCHOS_TEST_FOR_EXCEPTION(
       X.getNumVectors() != Y.getNumVectors(), 
       std::runtime_error,
       "Ifpack2::Chebyshev::apply(): X and Y must have the same number of "
       "columns.  X.getNumVectors() = " << X.getNumVectors() << " != "
       << "Y.getNumVectors() = " << Y.getNumVectors() << ".");
#ifdef HAVE_TEUCHOS_DEBUG
    {
      // The relation 'isSameAs' is transitive.  It's also a collective,
      // so we don't have to do a "shared" test for exception (i.e., a
      // global reduction on the test value).
      TEUCHOS_TEST_FOR_EXCEPTION(
         ! X.getMap ()->isSameAs (*getDomainMap ()),
         std::runtime_error,
         "Ifpack2::Chebyshev: The domain Map of the matrix must be the same as "
	 "the Map of the input vector(s) X.");
      TEUCHOS_TEST_FOR_EXCEPTION(
         ! Y.getMap ()->isSameAs (*getRangeMap ()),
         std::runtime_error,
         "Ifpack2::Chebyshev: The range Map of the matrix must be the same as "
	 "the Map of the output vector(s) Y.");
    }
#endif // HAVE_TEUCHOS_DEBUG
    applyImpl (X, Y, mode, alpha, beta);
  }
  ++NumApply_;
  ApplyTime_ += Time_->totalElapsedTime ();
}
void SingletonFilter<MatrixType>::UpdateLHSTempl(const Tpetra::MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& ReducedLHS,
                                                 Tpetra::MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& LHS)
{

  Teuchos::ArrayRCP<Teuchos::ArrayRCP<RangeScalar> >        LHS_ptr = LHS.get2dViewNonConst();
  Teuchos::ArrayRCP<Teuchos::ArrayRCP<const DomainScalar> >  ReducedLHS_ptr = ReducedLHS.get2dView();

  for (size_t i = 0 ; i < NumRows_ ; ++i)
    for (size_t k = 0 ; k < LHS.getNumVectors() ; ++k)
      LHS_ptr[k][InvReorder_[i]] = (RangeScalar)ReducedLHS_ptr[k][i];
}
void SingletonFilter<MatrixType>::apply(const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X, 
				       Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y, 
				       Teuchos::ETransp mode, 
				       Scalar alpha,
				       Scalar beta) const
{  
  // Note: This isn't AztecOO compliant.  But neither was Ifpack's version.

  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
			     "Ifpack2::SingletonFilter::apply ERROR: X.getNumVectors() != Y.getNumVectors().");

  Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
  Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > x_ptr = X.get2dView();
  Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >       y_ptr = Y.get2dViewNonConst();

  Y.putScalar(zero);
  size_t NumVectors = Y.getNumVectors();


  for (size_t i = 0 ; i < NumRows_ ; ++i) {
    size_t Nnz;
    // Use this class's getrow to make the below code simpler
    getLocalRowCopy(i,Indices_(),Values_(),Nnz);
    if (mode==Teuchos::NO_TRANS){
      for (size_t j = 0 ; j < Nnz ; ++j) 
	for (size_t k = 0 ; k < NumVectors ; ++k)
	  y_ptr[k][i] += Values_[j] * x_ptr[k][Indices_[j]];      
    }
    else if (mode==Teuchos::TRANS){
      for (size_t j = 0 ; j < Nnz ; ++j) 
	for (size_t k = 0 ; k < NumVectors ; ++k)
	  y_ptr[k][Indices_[j]] += Values_[j] * x_ptr[k][i];
    }
    else { //mode==Teuchos::CONJ_TRANS
      for (size_t j = 0 ; j < Nnz ; ++j) 
	for (size_t k = 0 ; k < NumVectors ; ++k)
	  y_ptr[k][Indices_[j]] += Teuchos::ScalarTraits<Scalar>::conjugate(Values_[j]) * x_ptr[k][i];
    }
  }
}
Exemple #17
0
void RILUK<MatrixType>::generateXY(Teuchos::ETransp mode,
    const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Xin,
    const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Yin,
    Teuchos::RCP<const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> >& Xout,
    Teuchos::RCP<Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> >& Yout) const {

  // Generate an X and Y suitable for performing Solve() and Multiply() methods

  TEUCHOS_TEST_FOR_EXCEPTION(Xin.getNumVectors()!=Yin.getNumVectors(), std::runtime_error,
       "Ifpack2::RILUK::GenerateXY ERROR: X and Y not the same size");

  //cout << "Xin = " << Xin << endl;
  Xout = Teuchos::rcp( (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> *) &Xin, false );
  Yout = Teuchos::rcp( (Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> *) &Yin, false );
  if (!isOverlapped_) return; // Nothing more to do

  if (isOverlapped_) {
    // Make sure the number of vectors in the multivector is the same as before.
    if (OverlapX_!=Teuchos::null) {
      if (OverlapX_->getNumVectors()!=Xin.getNumVectors()) {
        OverlapX_ = Teuchos::null;
        OverlapY_ = Teuchos::null;
      }
    }
    if (OverlapX_==Teuchos::null) { // Need to allocate space for overlap X and Y
      OverlapX_ = Teuchos::rcp( new Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>(U_->getColMap(), Xout->getNumVectors()) );
      OverlapY_ = Teuchos::rcp( new Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>(L_->getRowMap(), Yout->getNumVectors()) );
    }
    if (mode == Teuchos::NO_TRANS) {
      OverlapX_->doImport(*Xout,*U_->getGraph()->getImporter(), Tpetra::INSERT); // Import X values for solve
    }
    else {
      OverlapX_->doImport(*Xout,*L_->getGraph()->getExporter(), Tpetra::INSERT); // Import X values for solve
    }
    Xout = OverlapX_;
    Yout = OverlapY_; // Set pointers for Xout and Yout to point to overlap space
    //cout << "OverlapX_ = " << *OverlapX_ << endl;
  }
}
void
Chebyshev<MatrixType>::
apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
       Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
       Teuchos::ETransp mode,
       scalar_type alpha,
       scalar_type beta) const
{
  const std::string timerName ("Ifpack2::Chebyshev::apply");
  Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::lookupCounter (timerName);
  if (timer.is_null ()) {
    timer = Teuchos::TimeMonitor::getNewCounter (timerName);
  }

  // Start timing here.
  {
    Teuchos::TimeMonitor timeMon (*timer);

    // compute() calls initialize() if it hasn't already been called.
    // Thus, we only need to check isComputed().
    TEUCHOS_TEST_FOR_EXCEPTION(
      ! isComputed (), std::runtime_error,
      "Ifpack2::Chebyshev::apply(): You must call the compute() method before "
      "you may call apply().");
    TEUCHOS_TEST_FOR_EXCEPTION(
      X.getNumVectors () != Y.getNumVectors (), std::runtime_error,
      "Ifpack2::Chebyshev::apply(): X and Y must have the same number of "
      "columns.  X.getNumVectors() = " << X.getNumVectors() << " != "
      << "Y.getNumVectors() = " << Y.getNumVectors() << ".");
    applyImpl (X, Y, mode, alpha, beta);
  }
  ++NumApply_;

  // timer->totalElapsedTime() returns the total time over all timer
  // calls.  Thus, we use = instead of +=.
  ApplyTime_ = timer->totalElapsedTime ();
}
void SingletonFilter<MatrixType>::SolveSingletonsTempl(const Tpetra::MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& RHS,
                                                       Tpetra::MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& LHS)
{
  Teuchos::ArrayRCP<Teuchos::ArrayRCP<const DomainScalar> > RHS_ptr = RHS.get2dView();
  Teuchos::ArrayRCP<Teuchos::ArrayRCP<RangeScalar> >        LHS_ptr = LHS.get2dViewNonConst();

  for (size_t i = 0 ; i < NumSingletons_ ; ++i) {
    LocalOrdinal ii = SingletonIndex_[i];
    // get the diagonal value for the singleton
    size_t Nnz;
    A_->getLocalRowCopy(ii,Indices_(),Values_(),Nnz);
    for (size_t j = 0 ; j < Nnz ; ++j) {
      if (Indices_[j] == ii) {
        for (size_t k = 0 ; k < LHS.getNumVectors() ; ++k)
          LHS_ptr[k][ii] = (RangeScalar)RHS_ptr[k][ii] / (RangeScalar)Values_[j];
      }
    }
  }
}
Exemple #20
0
 void
 apply(
     const Tpetra::MultiVector<double,int,int> & X,
     Tpetra::MultiVector<double,int,int> & Y,
     Teuchos::ETransp mode = Teuchos::NO_TRANS,
     double alpha = Teuchos::ScalarTraits<double>::one(),
     double beta = Teuchos::ScalarTraits<double>::zero()
     ) const
 {
   for (size_t k = 0; k < Y.getNumVectors(); k++) {
     const auto x_data = X.getData(k);
     const auto x0_data = x0_.getData();
     auto y_data = Y.getDataNonConst(k);
     for (size_t i = 0; i < y_data.size(); i++) {
       y_data[i] = 2 * x0_data[i] * x_data[i];
     }
   }
   return;
 }
void IdentitySolver<MatrixType>::
apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
       Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
       Teuchos::ETransp /*mode*/,
       scalar_type alpha,
       scalar_type beta) const
{
  using Teuchos::RCP;
  typedef Teuchos::ScalarTraits<scalar_type> STS;
  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
                              global_ordinal_type, node_type> MV;

  TEUCHOS_TEST_FOR_EXCEPTION(
    ! isComputed (), std::runtime_error,
    "Ifpack2::IdentitySolver::apply: If compute() has not yet been called, "
    "or if you have changed the matrix via setMatrix(), "
    "you must call compute() before you may call this method.");

  // "Identity solver" does what it says: it's the identity operator.
  // We have to Export if the domain and range Maps are not the same.
  // Otherwise, this operator would be a permutation, not the identity.
  if (export_.is_null ()) {
    Y.update (alpha, X, beta);
  }
  else {
    if (alpha == STS::one () && beta == STS::zero ()) { // the common case
      Y.doExport (X, *export_, Tpetra::REPLACE);
    }
    else {
      // We know that the domain and range Maps are compatible.  First
      // bring X into the range Map via Export.  Then compute in place
      // in Y.
      MV X_tmp (Y.getMap (), Y.getNumVectors ());
      X_tmp.doExport (X, *export_, Tpetra::REPLACE);
      Y.update (alpha, X_tmp, beta);
    }
  }
  ++numApply_;
}
Exemple #22
0
void TomBlockRelaxation<MatrixType,ContainerType>::ApplyInverseJacobi(
        const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& X, 
              Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Y) const
{
  size_t NumVectors = X.getNumVectors();
  Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> AY( Y.getMap(),NumVectors );
  
  // Initial matvec not needed
  int starting_iteration=0;
  if(ZeroStartingSolution_) {
    DoJacobi(X,Y);
    starting_iteration=1;
  }

  for (int j = starting_iteration; j < NumSweeps_ ; j++) {       
    applyMat(Y,AY);
    AY.update(1.0,X,-1.0);
    DoJacobi(AY,Y);

    // Flops for matrix apply & update
    ApplyFlops_ += NumVectors * (2 * NumGlobalNonzeros_ + 2 * NumGlobalRows_);
  }

}
Exemple #23
0
void PrecPrecomputed::applyTempl(
           const Tpetra::MultiVector<DomainScalar, Ordinal, Ordinal, Node>& X,
                 Tpetra::MultiVector<RangeScalar, Ordinal, Ordinal, Node>& Z,
                 Teuchos::ETransp mode,
               RangeScalar alpha,
               RangeScalar beta) const
{
  using Teuchos::RCP;
  using Teuchos::rcp;
  typedef Tpetra::MultiVector<DomainScalar, Ordinal, Ordinal, Node> MV;

  Teuchos::Time timer ("ILUT::apply");
  { // Timer scope for timing apply()
    Teuchos::TimeMonitor timeMon (timer, true);

    TEUCHOS_TEST_FOR_EXCEPTION(
      ! isComputed (), std::runtime_error,
      "Ifpack2::ILUT::apply: You must call compute() to compute the incomplete "
      "factorization, before calling apply().");

    TEUCHOS_TEST_FOR_EXCEPTION(
      X.getNumVectors() != Z.getNumVectors(), std::runtime_error,
      "Ifpack2::ILUT::apply: X and Y must have the same number of columns.  "
      "X has " << X.getNumVectors () << " columns, but Y has "
      << Z.getNumVectors () << " columns.");

    TEUCHOS_TEST_FOR_EXCEPTION(
      beta != STS::zero (), std::logic_error,
      "Ifpack2::ILUT::apply: This method does not currently work when beta != 0.");

    // If X and Y are pointing to the same memory location,
    // we need to create an auxiliary vector, Xcopy
    RCP<const MV> Xcopy;
    if (X.getLocalMV ().getValues () == Z.getLocalMV ().getValues ()) {
      Xcopy = rcp (new MV (X));
    }
    else {
      Xcopy = rcpFromRef (X);
      //ZACH always taken
    }

    //if (mode == Teuchos::NO_TRANS) { // Solve L U Y = X
      /*
      L_->template localSolve<RangeScalar,DomainScalar> (*Xcopy, Y, Teuchos::NO_TRANS);
      U_->template localSolve<RangeScalar,RangeScalar> (Y, Y, Teuchos::NO_TRANS);
      */
      
      //dump(X, "##X");
      //dump(*Xcopy, "##Xcopy");
      //ZACH the problem is here: This actually needs to be a global triangular solve
      L_->template localSolve<RangeScalar,DomainScalar> (*Xcopy, *Y_, Teuchos::NO_TRANS);
      //dump(*Y_, "##Y");
      U_->template localSolve<RangeScalar,RangeScalar> (*Y_, Z, Teuchos::NO_TRANS);
      
      
      //U_->template localSolve<RangeScalar,RangeScalar> (*Xcopy, Z, Teuchos::NO_TRANS);
      //dump(Z, "##Z");
      //U_->template localSolve<RangeScalar,DomainScalar> (*Xcopy, Y, Teuchos::NO_TRANS);
      //U_->template localSolve<RangeScalar,RangeScalar> (Y, Y, Teuchos::TRANS);
      
    //}
    //else { // Solve U^* L^* Y = X
      /*
      U_->template localSolve<RangeScalar,DomainScalar> (*Xcopy, Y, mode);
      L_->template localSolve<RangeScalar,RangeScalar> (Y, Y, mode);
      */
    //}

    if (alpha != STS::one ()) {
      Z.scale (alpha);
    }
  }
  ++NumApply_;
  ApplyTime_ += timer.totalElapsedTime ();
}
void Hiptmair<MatrixType>::
apply (const Tpetra::MultiVector<typename MatrixType::scalar_type,
       typename MatrixType::local_ordinal_type,
       typename MatrixType::global_ordinal_type,
       typename MatrixType::node_type>& X,
       Tpetra::MultiVector<typename MatrixType::scalar_type,
                           typename MatrixType::local_ordinal_type,
                           typename MatrixType::global_ordinal_type,
                           typename MatrixType::node_type>& Y,
       Teuchos::ETransp mode,
       typename MatrixType::scalar_type alpha,
       typename MatrixType::scalar_type beta) const
{
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
                              global_ordinal_type, node_type> MV;
  TEUCHOS_TEST_FOR_EXCEPTION(
    ! isComputed (), std::runtime_error,
    "Ifpack2::Hiptmair::apply: You must call compute() before you may call apply().");
  TEUCHOS_TEST_FOR_EXCEPTION(
    X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
    "Ifpack2::Hiptmair::apply: The MultiVector inputs X and Y do not have the "
    "same number of columns.  X.getNumVectors() = " << X.getNumVectors ()
    << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");

  // Catch unimplemented cases: alpha != 1, beta != 0, mode != NO_TRANS.
  TEUCHOS_TEST_FOR_EXCEPTION(
    alpha != STS::one (), std::logic_error,
    "Ifpack2::Hiptmair::apply: alpha != 1 has not been implemented.");
  TEUCHOS_TEST_FOR_EXCEPTION(
    beta != STS::zero (), std::logic_error,
    "Ifpack2::Hiptmair::apply: zero != 0 has not been implemented.");
  TEUCHOS_TEST_FOR_EXCEPTION(
    mode != Teuchos::NO_TRANS, std::logic_error,
    "Ifpack2::Hiptmair::apply: mode != Teuchos::NO_TRANS has not been implemented.");

  Teuchos::Time timer ("apply");
  { // The body of code to time
    Teuchos::TimeMonitor timeMon (timer);

    // If X and Y are pointing to the same memory location,
    // we need to create an auxiliary vector, Xcopy
    RCP<const MV> Xcopy;
    {
      auto X_lcl_host = X.template getLocalView<Kokkos::HostSpace> ();
      auto Y_lcl_host = Y.template getLocalView<Kokkos::HostSpace> ();
      if (X_lcl_host.ptr_on_device () == Y_lcl_host.ptr_on_device ()) {
        Xcopy = rcp (new MV (X, Teuchos::Copy));
      } else {
        Xcopy = rcpFromRef (X);
      }
    }

    RCP<MV> Ycopy = rcpFromRef (Y);
    if (ZeroStartingSolution_) {
      Ycopy->putScalar (STS::zero ());
    }

    // apply Hiptmair Smoothing
    applyHiptmairSmoother (*Xcopy, *Ycopy);

  }
  ++NumApply_;
  ApplyTime_ += timer.totalElapsedTime ();
}
size_t findUniqueGids(
  Tpetra::MultiVector<gno_t, lno_t, gno_t> &keys,
  Tpetra::Vector<gno_t, lno_t, gno_t> &gids
)
{
  // Input:  Tpetra MultiVector of keys; key length = numVectors()
  //         May contain duplicate keys within a processor.
  //         May contain duplicate keys across processors.
  // Input:  Empty Tpetra Vector with same map for holding the results
  // Output: Filled gids vector, containing unique global numbers for
  //         each unique key.  Global numbers are in range [0,#UniqueKeys).

  size_t num_keys = keys.getLocalLength();
  size_t num_entries = keys.getNumVectors();

#ifdef HAVE_ZOLTAN2_MPI
  MPI_Comm mpicomm = Teuchos::getRawMpiComm(*(keys.getMap()->getComm()));
#else
  // Zoltan's siMPI will be used here
  {
    int flag;
    MPI_Initialized(&flag);
    if (!flag) {
      int narg = 0;
      char **argv = NULL;
      MPI_Init(&narg, &argv);
    }
  }
  MPI_Comm mpicomm = MPI_COMM_WORLD;  // Will get MPI_COMM_WORLD from siMPI
#endif

  int num_gid = sizeof(gno_t)/sizeof(ZOLTAN_ID_TYPE) * num_entries;
  int num_user = sizeof(gno_t);

  // Buffer the keys for Zoltan_DD
  Teuchos::ArrayRCP<const gno_t> *tmpKeyVecs =
           new Teuchos::ArrayRCP<const gno_t>[num_entries];
  for (size_t v = 0; v < num_entries; v++) tmpKeyVecs[v] = keys.getData(v);

  ZOLTAN_ID_PTR ddkeys = new ZOLTAN_ID_TYPE[num_gid * num_keys];
  size_t idx = 0;
  for (size_t i = 0; i < num_keys; i++) {
    for (size_t v = 0; v < num_entries; v++) {
      ZOLTAN_ID_PTR ddkey = &(ddkeys[idx]);
      TPL_Traits<ZOLTAN_ID_PTR,gno_t>::ASSIGN(ddkey, tmpKeyVecs[v][i]);
      idx += TPL_Traits<ZOLTAN_ID_PTR,gno_t>::NUM_ID;
    }
  }
  delete [] tmpKeyVecs;

  // Allocate memory for the result
  char *ddnewgids = new char[num_user * num_keys];
  
  // Compute the new GIDs
  size_t nUnique = findUniqueGidsCommon<gno_t>(num_keys, num_gid,
                                               ddkeys, ddnewgids, mpicomm);

  // Copy the result into the output vector
  gno_t *result = (gno_t *)ddnewgids;
  for (size_t i = 0; i < num_keys; i++)
    gids.replaceLocalValue(i, result[i]);

  // Clean up
  delete [] ddkeys;
  delete [] ddnewgids;

  return nUnique;
}
void DenseContainer<MatrixType,LocalScalarType>::
weightedApply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
               Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
               const Tpetra::Vector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& D,
               Teuchos::ETransp mode,
               scalar_type alpha,
               scalar_type beta) const
{
    using Teuchos::ArrayRCP;
    using Teuchos::ArrayView;
    using Teuchos::Range1D;
    using Teuchos::RCP;
    using Teuchos::rcp;
    using Teuchos::rcp_const_cast;
    using std::cerr;
    using std::endl;
    typedef Teuchos::ScalarTraits<scalar_type> STS;

    // The local operator template parameter might have a different
    // Scalar type than MatrixType.  This means that we might have to
    // convert X and Y to the Tpetra::MultiVector specialization that
    // the local operator wants.  This class' X_ and Y_ internal fields
    // are of the right type for the local operator, so we can use those
    // as targets.

    // Tpetra::MultiVector specialization corresponding to the global operator.
    typedef Tpetra::MultiVector<scalar_type,
            local_ordinal_type, global_ordinal_type, node_type> global_mv_type;
    // Tpetra::Vector specialization corresponding to the local
    // operator.  We will use this for the subset permutation of the
    // diagonal scaling D.
    typedef Tpetra::Vector<local_scalar_type, local_ordinal_type,
            global_ordinal_type, node_type> local_vec_type;

    const char prefix[] = "Ifpack2::DenseContainer::weightedApply: ";
    TEUCHOS_TEST_FOR_EXCEPTION(
        ! IsComputed_, std::runtime_error, prefix << "You must have called the "
        "compute() method before you may call this method.  You may call "
        "weightedApply() as many times as you want after calling compute() once, "
        "but you must have called compute() at least once first.");
    const size_t numVecs = X.getNumVectors ();
    TEUCHOS_TEST_FOR_EXCEPTION(
        numVecs != Y.getNumVectors (), std::runtime_error,
        prefix << "X and Y have different numbers of vectors (columns).  X has "
        << X.getNumVectors () << ", but Y has " << X.getNumVectors () << ".");

    if (numVecs == 0) {
        return; // done! nothing to do
    }

    // The local operator works on a permuted subset of the local parts
    // of X and Y.  The subset and permutation are defined by the index
    // array returned by getLocalRows().  If the permutation is trivial
    // and the subset is exactly equal to the local indices, then we
    // could use the local parts of X and Y exactly, without needing to
    // permute.  Otherwise, we have to use temporary storage to permute
    // X and Y.  For now, we always use temporary storage.
    //
    // Create temporary permuted versions of the input and output.
    // (Re)allocate X_ and/or Y_ only if necessary.  We'll use them to
    // store the permuted versions of X resp. Y.  Note that X_local has
    // the domain Map of the operator, which may be a permuted subset of
    // the local Map corresponding to X.getMap().  Similarly, Y_local
    // has the range Map of the operator, which may be a permuted subset
    // of the local Map corresponding to Y.getMap().  numRows_ here
    // gives the number of rows in the row Map of the local operator.
    //
    // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
    // here that the row Map and the range Map of that operator are
    // the same.
    //
    // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
    // really belongs in Tpetra.

    if (X_.is_null ()) {
        X_ = rcp (new local_mv_type (localMap_, numVecs));
    }
    RCP<local_mv_type> X_local = X_;
    TEUCHOS_TEST_FOR_EXCEPTION(
        X_local->getLocalLength () != numRows_, std::logic_error,
        "Ifpack2::DenseContainer::weightedApply: "
        "X_local has length " << X_local->getLocalLength () << ", which does "
        "not match numRows_ = " << numRows_ << ".  Please report this bug to "
        "the Ifpack2 developers.");
    ArrayView<const local_ordinal_type> localRows = this->getLocalRows ();

    Details::MultiVectorLocalGatherScatter<global_mv_type, local_mv_type> mvgs;
    mvgs.gather (*X_local, X, localRows);

    // We must gather the output multivector Y even on input to
    // applyImpl(), since the local operator might use it as an initial
    // guess for a linear solve.  We have no way of knowing whether it
    // does or does not.

    if (Y_.is_null ()) {
        Y_ = rcp (new local_mv_type (localMap_, numVecs));
    }
    RCP<local_mv_type> Y_local = Y_;
    TEUCHOS_TEST_FOR_EXCEPTION(
        Y_local->getLocalLength () != numRows_, std::logic_error,
        "Ifpack2::DenseContainer::weightedApply: "
        "Y_local has length " << X_local->getLocalLength () << ", which does "
        "not match numRows_ = " << numRows_ << ".  Please report this bug to "
        "the Ifpack2 developers.");
    mvgs.gather (*Y_local, Y, localRows);

    // Apply the diagonal scaling D to the input X.  It's our choice
    // whether the result has the original input Map of X, or the
    // permuted subset Map of X_local.  If the latter, we also need to
    // gather D into the permuted subset Map.  We choose the latter, to
    // save memory and computation.  Thus, we do the following:
    //
    // 1. Gather D into a temporary vector D_local.
    // 2. Create a temporary X_scaled to hold diag(D_local) * X_local.
    // 3. Compute X_scaled := diag(D_loca) * X_local.

    local_vec_type D_local (localMap_);
    TEUCHOS_TEST_FOR_EXCEPTION(
        D_local.getLocalLength () != numRows_, std::logic_error,
        "Ifpack2::DenseContainer::weightedApply: "
        "D_local has length " << D_local.getLocalLength () << ", which does "
        "not match numRows_ = " << numRows_ << ".  Please report this bug to "
        "the Ifpack2 developers.");
    mvgs.gather (D_local, D, localRows);
    local_mv_type X_scaled (localMap_, numVecs);
    X_scaled.elementWiseMultiply (STS::one (), D_local, *X_local, STS::zero ());

    // Y_temp will hold the result of M^{-1}*X_scaled.  If beta == 0, we
    // can write the result of Inverse_->apply() directly to Y_local, so
    // Y_temp may alias Y_local.  Otherwise, if beta != 0, we need
    // temporary storage for M^{-1}*X_scaled, so Y_temp must be
    // different than Y_local.
    RCP<local_mv_type> Y_temp;
    if (beta == STS::zero ()) {
        Y_temp = Y_local;
    } else {
        Y_temp = rcp (new local_mv_type (localMap_, numVecs));
    }

    // Apply the local operator: Y_temp := M^{-1} * X_scaled
    this->applyImpl (X_scaled, *Y_temp, mode, STS::one (), STS::zero ());
    // Y_local := beta * Y_local + alpha * diag(D_local) * Y_temp.
    //
    // Note that we still use the permuted subset scaling D_local here,
    // because Y_temp has the same permuted subset Map.  That's good, in
    // fact, because it's a subset: less data to read and multiply.
    Y_local->elementWiseMultiply (alpha, D_local, *Y_temp, beta);

    // Copy the permuted subset output vector Y_local into the original
    // output multivector Y.
    mvgs.scatter (Y, *Y_local, localRows);
}
void DenseContainer<MatrixType, LocalScalarType>::
apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
       Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
       Teuchos::ETransp mode,
       scalar_type alpha,
       scalar_type beta) const
{
    using Teuchos::ArrayView;
    using Teuchos::as;
    using Teuchos::RCP;
    using Teuchos::rcp;

    // The local operator might have a different Scalar type than
    // MatrixType.  This means that we might have to convert X and Y to
    // the Tpetra::MultiVector specialization that the local operator
    // wants.  This class' X_ and Y_ internal fields are of the right
    // type for the local operator, so we can use those as targets.

    // Tpetra::MultiVector specialization corresponding to the global operator.
    typedef Tpetra::MultiVector<scalar_type,
            local_ordinal_type, global_ordinal_type, node_type> global_mv_type;

    const char prefix[] = "Ifpack2::DenseContainer::weightedApply: ";
    TEUCHOS_TEST_FOR_EXCEPTION(
        ! IsComputed_, std::runtime_error, prefix << "You must have called the "
        "compute() method before you may call this method.  You may call "
        "apply() as many times as you want after calling compute() once, "
        "but you must have called compute() at least once first.");
    const size_t numVecs = X.getNumVectors ();
    TEUCHOS_TEST_FOR_EXCEPTION(
        numVecs != Y.getNumVectors (), std::runtime_error,
        prefix << "X and Y have different numbers of vectors (columns).  X has "
        << X.getNumVectors () << ", but Y has " << X.getNumVectors () << ".");

    if (numVecs == 0) {
        return; // done! nothing to do
    }

    // The local operator works on a permuted subset of the local parts
    // of X and Y.  The subset and permutation are defined by the index
    // array returned by getLocalRows().  If the permutation is trivial
    // and the subset is exactly equal to the local indices, then we
    // could use the local parts of X and Y exactly, without needing to
    // permute.  Otherwise, we have to use temporary storage to permute
    // X and Y.  For now, we always use temporary storage.
    //
    // Create temporary permuted versions of the input and output.
    // (Re)allocate X_ and/or Y_ only if necessary.  We'll use them to
    // store the permuted versions of X resp. Y.  Note that X_local has
    // the domain Map of the operator, which may be a permuted subset of
    // the local Map corresponding to X.getMap().  Similarly, Y_local
    // has the range Map of the operator, which may be a permuted subset
    // of the local Map corresponding to Y.getMap().  numRows_ here
    // gives the number of rows in the row Map of the local Inverse_
    // operator.
    //
    // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
    // here that the row Map and the range Map of that operator are
    // the same.
    //
    // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
    // really belongs in Tpetra.

    if (X_.is_null ()) {
        X_ = rcp (new local_mv_type (localMap_, numVecs));
    }
    RCP<local_mv_type> X_local = X_;
    TEUCHOS_TEST_FOR_EXCEPTION(
        X_local->getLocalLength () != numRows_, std::logic_error,
        "Ifpack2::DenseContainer::apply: "
        "X_local has length " << X_local->getLocalLength () << ", which does "
        "not match numRows_ = " << numRows_ << ".  Please report this bug to "
        "the Ifpack2 developers.");
    ArrayView<const local_ordinal_type> localRows = this->getLocalRows ();

    Details::MultiVectorLocalGatherScatter<global_mv_type, local_mv_type> mvgs;
    mvgs.gather (*X_local, X, localRows);

    // We must gather the contents of the output multivector Y even on
    // input to applyImpl(), since the inverse operator might use it as
    // an initial guess for a linear solve.  We have no way of knowing
    // whether it does or does not.

    if (Y_.is_null ()) {
        Y_ = rcp (new local_mv_type (localMap_, numVecs));
    }
    RCP<local_mv_type> Y_local = Y_;
    TEUCHOS_TEST_FOR_EXCEPTION(
        Y_local->getLocalLength () != numRows_, std::logic_error,
        "Ifpack2::DenseContainer::apply: "
        "Y_local has length " << X_local->getLocalLength () << ", which does "
        "not match numRows_ = " << numRows_ << ".  Please report this bug to "
        "the Ifpack2 developers.");
    mvgs.gather (*Y_local, Y, localRows);

    // Apply the local operator:
    // Y_local := beta*Y_local + alpha*M^{-1}*X_local
    this->applyImpl (*X_local, *Y_local, mode, as<local_scalar_type> (alpha),
                     as<local_scalar_type> (beta));

    // Scatter the permuted subset output vector Y_local back into the
    // original output multivector Y.
    mvgs.scatter (Y, *Y_local, localRows);
}
void
SupportGraph<MatrixType>::
apply (const Tpetra::MultiVector<scalar_type,
                                 local_ordinal_type,
                                 global_ordinal_type,
                                 node_type>& X,
       Tpetra::MultiVector<scalar_type,
                           local_ordinal_type,
                           global_ordinal_type,
                           node_type>& Y,
       Teuchos::ETransp mode,
       scalar_type alpha,
       scalar_type beta) const
{
  using Teuchos::FancyOStream;
  using Teuchos::getFancyOStream;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  using Teuchos::Time;
  using Teuchos::TimeMonitor;
  typedef scalar_type DomainScalar;
  typedef scalar_type RangeScalar;
  typedef Tpetra::MultiVector<DomainScalar, local_ordinal_type,
    global_ordinal_type, node_type> MV;

  RCP<FancyOStream> out = getFancyOStream(rcpFromRef(std::cout));

  // Create a timer for this method, if it doesn't exist already.
  // TimeMonitor::getNewCounter registers the timer, so that
  // TimeMonitor's class methods like summarize() will report the
  // total time spent in successful calls to this method.
  const std::string timerName ("Ifpack2::SupportGraph::apply");
  RCP<Time> timer = TimeMonitor::lookupCounter(timerName);
  if (timer.is_null()) {
    timer = TimeMonitor::getNewCounter(timerName);
  }

  { // Start timing here.
    Teuchos::TimeMonitor timeMon (*timer);

    TEUCHOS_TEST_FOR_EXCEPTION(
      ! isComputed(), std::runtime_error,
      "Ifpack2::SupportGraph::apply: You must call compute() to compute the "
      "incomplete factorization, before calling apply().");

    TEUCHOS_TEST_FOR_EXCEPTION(
      X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
      "Ifpack2::SupportGraph::apply: X and Y must have the same number of "
      "columns.  X has " << X.getNumVectors() << " columns, but Y has "
      << Y.getNumVectors() << " columns.");

    TEUCHOS_TEST_FOR_EXCEPTION(
      beta != STS::zero(), std::logic_error,
      "Ifpack2::SupportGraph::apply: This method does not currently work when "
      "beta != 0.");

    // If X and Y are pointing to the same memory location,
    // we need to create an auxiliary vector, Xcopy
    RCP<const MV> Xcopy;
    if (X.getLocalMV().getValues() == Y.getLocalMV().getValues()) {
      Xcopy = rcp (new MV(X));
    }
    else {
      Xcopy = rcpFromRef(X);
    }

    if (alpha != STS::one()) {
      Y.scale(alpha);
    }

    RCP<MV> Ycopy = rcpFromRef(Y);

    solver_->setB(Xcopy);
    solver_->setX(Ycopy);

    solver_->solve ();
  } // Stop timing here.

  ++NumApply_;

  // timer->totalElapsedTime() returns the total time over all timer
  // calls.  Thus, we use = instead of +=.
  ApplyTime_ = timer->totalElapsedTime();
}
void
OverlappingRowMatrix<MatrixType>::
apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &X,
       Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &Y,
       Teuchos::ETransp mode,
       scalar_type alpha,
       scalar_type beta) const
{
  using Teuchos::ArrayRCP;
  using Teuchos::as;
  typedef scalar_type RangeScalar;
  typedef scalar_type DomainScalar;

  typedef Teuchos::ScalarTraits<RangeScalar> STRS;
  TEUCHOS_TEST_FOR_EXCEPTION(
    X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
    "Ifpack2::OverlappingRowMatrix::apply: The input X and the output Y must "
    "have the same number of columns.  X.getNumVectors() = "
    << X.getNumVectors() << " != Y.getNumVectors() = " << Y.getNumVectors()
    << ".");

  // FIXME (mfh 13 July 2013) This would be a good candidate for a
  // Kokkos local parallel operator implementation.  That would
  // obviate the need for getting views of the data and make the code
  // below a lot simpler.

  const RangeScalar zero = STRS::zero ();
  ArrayRCP<ArrayRCP<const DomainScalar> > x_ptr = X.get2dView();
  ArrayRCP<ArrayRCP<RangeScalar> >        y_ptr = Y.get2dViewNonConst();
  Y.putScalar(zero);
  size_t NumVectors = Y.getNumVectors();

  const size_t numMyRowsA = A_->getNodeNumRows ();
  for (size_t i = 0; i < numMyRowsA; ++i) {
    size_t Nnz;
    // Use this class's getrow to make the below code simpler
    A_->getLocalRowCopy (i, Indices_ (),Values_ (), Nnz);
    if (mode == Teuchos::NO_TRANS) {
      for (size_t j = 0; j < Nnz; ++j)
        for (size_t k = 0; k < NumVectors; ++k)
          y_ptr[k][i] += as<RangeScalar> (Values_[j]) *
            as<RangeScalar> (x_ptr[k][Indices_[j]]);
    }
    else if (mode == Teuchos::TRANS){
      for (size_t j = 0; j < Nnz; ++j)
        for (size_t k = 0; k < NumVectors; ++k)
          y_ptr[k][Indices_[j]] += as<RangeScalar> (Values_[j]) *
            as<RangeScalar> (x_ptr[k][i]);
    }
    else { // mode == Teuchos::CONJ_TRANS
      for (size_t j = 0; j < Nnz; ++j)
        for (size_t k = 0; k < NumVectors; ++k)
          y_ptr[k][Indices_[j]] +=
            STRS::conjugate (as<RangeScalar> (Values_[j])) *
            as<RangeScalar> (x_ptr[k][i]);
    }
  }

  const size_t numMyRowsB = ExtMatrix_->getNodeNumRows ();
  for (size_t i = 0 ; i < numMyRowsB ; ++i) {
    size_t Nnz;
    // Use this class's getrow to make the below code simpler
    ExtMatrix_->getLocalRowCopy (i, Indices_ (), Values_ (), Nnz);
    if (mode == Teuchos::NO_TRANS) {
      for (size_t j = 0; j < Nnz; ++j)
        for (size_t k = 0; k < NumVectors; ++k)
          y_ptr[k][numMyRowsA+i] += as<RangeScalar> (Values_[j]) *
            as<RangeScalar> (x_ptr[k][Indices_[j]]);
    }
    else if (mode == Teuchos::TRANS) {
      for (size_t j = 0; j < Nnz; ++j)
        for (size_t k = 0; k < NumVectors; ++k)
          y_ptr[k][numMyRowsA+Indices_[j]] += as<RangeScalar> (Values_[j]) *
            as<RangeScalar> (x_ptr[k][i]);
    }
    else { // mode == Teuchos::CONJ_TRANS
      for (size_t j = 0; j < Nnz; ++j)
        for (size_t k = 0; k < NumVectors; ++k)
          y_ptr[k][numMyRowsA+Indices_[j]] +=
            STRS::conjugate (as<RangeScalar> (Values_[j])) *
            as<RangeScalar> (x_ptr[k][i]);
    }
  }
}
Exemple #30
0
void TomBlockRelaxation<MatrixType,ContainerType>::DoSGS(const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& X, 
						      Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Xcopy, 
						      Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Y) const
{
  Scalar one=Teuchos::ScalarTraits<Scalar>::one();
  int Length = A_->getNodeMaxNumRowEntries(); 
  int NumVectors = X.getNumVectors();
  Teuchos::Array<Scalar>         Values;
  Teuchos::Array<LocalOrdinal>   Indices;
  Values.resize(Length);
  Indices.resize(Length);

  // an additonal vector is needed by parallel computations
  // (note that applications through Ifpack2_AdditiveSchwarz
  // are always seen are serial)
  Teuchos::RCP< Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node > > Y2;
  if (IsParallel_)
    Y2 = Teuchos::rcp( new Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(Importer_->getTargetMap(), NumVectors) );
  else
    Y2 = Teuchos::rcp( &Y, false );

  Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > x_ptr       = X.get2dView();
  Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >       xcopy_ptr   = Xcopy.get2dViewNonConst();
  Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >       y_ptr       = Y.get2dViewNonConst();
  Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >       y2_ptr      = Y2->get2dViewNonConst();

  // data exchange is here, once per sweep
  if (IsParallel_)  Y2->doImport(Y,*Importer_,Tpetra::INSERT);

  // Forward Sweep
  for(LocalOrdinal i = 0 ; i < NumLocalBlocks_ ; i++) {
    // may happen that a partition is empty
    if (Containers_[i]->getNumRows() == 0) continue;
    LocalOrdinal LID;

    // update from previous block
    for(size_t j = 0 ; j < Containers_[i]->getNumRows(); j++) {
      LID = Containers_[i]->ID(j);
      size_t NumEntries;
      A_->getLocalRowCopy(LID,Indices(),Values(),NumEntries);

      for (size_t k = 0 ; k < NumEntries ; k++) {
        LocalOrdinal col = Indices[k];
        for (int kk = 0 ; kk < NumVectors ; kk++) 
          xcopy_ptr[kk][LID] -= Values[k] * y2_ptr[kk][col];
      }
    }
    // solve with this block
    // Note: I'm abusing the ordering information, knowing that X/Y and Y2 have the same ordering for on-proc unknowns.
    // Note: Add flop counts for inverse apply
    Containers_[i]->apply(Xcopy,*Y2,Teuchos::NO_TRANS,DampingFactor_,one);

    // operations for all getrow's
    ApplyFlops_ += NumVectors * (2 * NumGlobalNonzeros_ + 2 * NumGlobalRows_);
  }// end forward sweep

  // Reverse Sweep
  Xcopy = X;
  for(LocalOrdinal i = NumLocalBlocks_-1; i >=0 ; i--) {
    // may happen that a partition is empty
    if (Containers_[i]->getNumRows() == 0) continue;
    LocalOrdinal LID;

    // update from previous block
    for(size_t j = 0 ; j < Containers_[i]->getNumRows(); j++) {
      LID = Containers_[i]->ID(j);
      size_t NumEntries;
      A_->getLocalRowCopy(LID,Indices(),Values(),NumEntries);

      for (size_t k = 0 ; k < NumEntries ; k++) {
        LocalOrdinal col = Indices[k];
        for (int kk = 0 ; kk < NumVectors ; kk++) 
          xcopy_ptr[kk][LID] -= Values[k] * y2_ptr[kk][col];
      }
    }
    
    // solve with this block
    // Note: I'm abusing the ordering information, knowing that X/Y and Y2 have the same ordering for on-proc unknowns.
    // Note: Add flop counts for inverse apply
    Containers_[i]->apply(Xcopy,*Y2,Teuchos::NO_TRANS,DampingFactor_,one);

    // operations for all getrow's
    ApplyFlops_ += NumVectors * (2 * NumGlobalNonzeros_ + 2 * NumGlobalRows_);
  } //end reverse sweep


  // Attention: this is delicate... Not all combinations
  // of Y2 and Y will always work (tough for ML it should be ok)
  if (IsParallel_)
    for (int m = 0 ; m < NumVectors ; ++m) 
      for (size_t i = 0 ; i < NumMyRows_ ; ++i)
        y_ptr[m][i] = y2_ptr[m][i];
}