Esempio n. 1
0
int main(int argc, char* argv[])
{
#ifdef HAVE_MPI
  MPI_Init(&argc,&argv);
#endif

  // Creating an instance of the LAPACK class for double-precision routines looks like:
  Teuchos::LAPACK<int, double> lapack;

  // This instance provides the access to all the LAPACK routines.
  Teuchos::SerialDenseMatrix<int, double> My_Matrix(4,4);
  Teuchos::SerialDenseVector<int, double> My_Vector(4);
  My_Matrix.random();
  My_Vector.random();

  // Perform an LU factorization of this matrix. 
  int ipiv[4], info;
  char TRANS = 'N';
  lapack.GETRF( 4, 4, My_Matrix.values(), My_Matrix.stride(), ipiv, &info ); 
  
  // Solve the linear system.
  lapack.GETRS( TRANS, 4, 1, My_Matrix.values(), My_Matrix.stride(), 
		ipiv, My_Vector.values(), My_Vector.stride(), &info );  

  // Print out the solution.
  cout << My_Vector << endl;

#ifdef HAVE_MPI
  MPI_Finalize();
#endif
  return 0;
}
 void solve_state(std::vector<std::vector<Real> > &U, const std::vector<Real> &z) {
   // Get Diagonal and Off-Diagonal Entries of PDE Jacobian
   std::vector<Real> d(nx_,4.0*dx_/6.0 + dt_*2.0/dx_);
   d[0]           = dx_/3.0 + dt_/dx_;
   d[nx_-1] = dx_/3.0 + dt_/dx_;
   std::vector<Real> o(nx_-1,dx_/6.0 - dt_/dx_);
   // Perform LDL factorization
   Teuchos::LAPACK<int,Real> lp;
   int info;
   int ldb  = nx_;
   int nhrs = 1;
   lp.PTTRF(nx_,&d[0],&o[0],&info);
   // Initialize State Storage
   U.clear();
   U.resize(nt_+1);
   (U[0]).assign(u0_.begin(),u0_.end());
   // Time Step Using Implicit Euler
   std::vector<Real> b(nx_,0.0);
   for ( uint t = 0; t < nt_; t++ ) {
     // Get Right Hand Side
     apply_mass(b,U[t]);
     b[nx_-1] += dt_*z[t];
     // Solve Tridiagonal System Using LAPACK's SPD Tridiagonal Solver
     lp.PTTRS(nx_,nhrs,&d[0],&o[0],&b[0],ldb,&info);
     // Update State Storage
     (U[t+1]).assign(b.begin(),b.end());
   }
 }
 void solve_adjoint_sensitivity(std::vector<std::vector<Real> > &P, const std::vector<std::vector<Real> > &U) {
   // Get Diagonal and Off-Diagonal Entries of PDE Jacobian
   std::vector<Real> d(nx_,4.0*dx_/6.0 + dt_*2.0/dx_);
   d[0]           = dx_/3.0 + dt_/dx_;
   d[nx_-1] = dx_/3.0 + dt_/dx_;
   std::vector<Real> o(nx_-1,dx_/6.0 - dt_/dx_);
   // Perform LDL factorization
   Teuchos::LAPACK<int,Real> lp;
   int info;
   int ldb  = nx_;
   int nhrs = 1;
   lp.PTTRF(nx_,&d[0],&o[0],&info);
   // Initialize State Storage
   P.clear();
   P.resize(nt_);
   // Time Step Using Implicit Euler
   std::vector<Real> b(nx_,0.0);
   for ( uint t = nt_; t > 0; t-- ) {
     // Get Right Hand Side
     if ( t == nt_ ) {
       apply_mass(b,U[t-1]);
     }
     else {
       apply_mass(b,P[t]);
     }
     // Solve Tridiagonal System Using LAPACK's SPD Tridiagonal Solver
     lp.PTTRS(nx_,nhrs,&d[0],&o[0],&b[0],ldb,&info);
     // Update State Storage
     (P[t-1]).assign(b.begin(),b.end());
   }
 }
  KOKKOS_INLINE_FUNCTION
  int
  Chol<Uplo::Upper,
       AlgoChol::ExternalLapack,Variant::One>
  ::invoke(PolicyType &policy,
           const MemberType &member,
           DenseExecViewTypeA &A) {
    // static_assert( Kokkos::Impl::is_same<
    //                typename DenseMatrixTypeA::space_type,
    //                Kokkos::Cuda
    //                >::value,
    //                "Cuda space is not available for calling external BLAS" );

    //typedef typename DenseExecViewTypeA::space_type   space_type;
    typedef typename DenseExecViewTypeA::ordinal_type ordinal_type;
    typedef typename DenseExecViewTypeA::value_type   value_type;

    int r_val = 0;      
    if (member.team_rank() == 0) {
#ifdef HAVE_SHYLUTACHO_TEUCHOS
      Teuchos::LAPACK<ordinal_type,value_type> lapack;

      lapack.POTRF('U',
                   A.NumRows(),
                   A.ValuePtr(), A.BaseObject().ColStride(),
                   &r_val);
#else
    TACHO_TEST_FOR_ABORT( true, MSG_NOT_HAVE_PACKAGE("Teuchos") );
#endif
    }

    return r_val;
  }
Esempio n. 5
0
MxDimMatrix<T, DIM> MxDimMatrix<T, DIM>::inv() const {

  MxDimMatrix<T, DIM> thisT = this->transpose(); 
  MxDimMatrix<T, DIM> solnT(MxDimVector<T, DIM>(1));

  int info;
  int ipiv[DIM];
  Teuchos::LAPACK<int, T> lapack;
  lapack.GESV(DIM, DIM, &thisT(0, 0), DIM, ipiv, &solnT(0, 0), DIM, &info);
  
  if (info != 0)
    std::cout << "MxDimMatrix::inv(): error in lapack routine. Return code: " << info << ".\n";

  return solnT.transpose();
}
Esempio n. 6
0
int MxDimMatrix<T, DIM>::eig(MxDimVector<std::complex<T>, DIM> & evals,
MxDimMatrix<std::complex<T>, DIM> & levecs,
MxDimMatrix<std::complex<T>, DIM> & revecs) const {

  MxDimMatrix<T, DIM> thisT = this->transpose(); 

  MxDimVector<T, DIM> rva, iva;
  MxDimMatrix<T, DIM> rve, lve;

  int info;
  int lwork = 5 * DIM;
  T work[lwork];
  Teuchos::LAPACK<int, T> lapack;
  lapack.GEEV('V', 'V', DIM, &thisT(0, 0), DIM, &rva[0], &iva[0], &lve(0, 0), DIM, &rve(0, 0), DIM, work, lwork, &info);
  
  if (info != 0)
    std::cout << "MxDimMatrix::eig(): error in lapack routine. Return code: " << info << ".\n";

  // gather results
  for (size_t i = 0; i < DIM; ++i) {
    if (iva[i] != 0) {
      for (size_t k = 0; k < 2; ++k) {
        evals[i + k].real() = rva[i + k];
        evals[i + k].imag() = iva[i + k];
        for (size_t j = 0; j < DIM; ++j) {
          levecs(i + k, j).real() = lve(j, i);
          levecs(i + k, j).imag() = (k == 0 ? 1.0 : -1.0) * lve(j, i + 1);
          revecs(i + k, j).real() = rve(j, i);
          revecs(i + k, j).imag() = (k == 0 ? 1.0 : -1.0) * rve(j, i + 1);
        }
      }
      i++;
    }
    else {
      evals[i].real() = rva[i];
      evals[i].imag() = iva[i];
      for (size_t j = 0; j < DIM; ++j) {
        levecs(i, j).real() = lve(j, i);
        revecs(i, j).real() = rve(j, i);
      }
    }
  }

  return info;
}
Esempio n. 7
0
  void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Setup(const MultiVector& B, const MultiVector& Bc, RCP<const CrsGraph> Ppattern) {
    Ppattern_ = Ppattern;

    const RCP<const Map> uniqueMap    = Ppattern_->getDomainMap();
    const RCP<const Map> nonUniqueMap = Ppattern_->getColMap();
    RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap);

    const size_t NSDim = Bc.getNumVectors();
    X_ = MultiVectorFactory::Build(nonUniqueMap, NSDim);
    X_->doImport(Bc, *importer, Xpetra::INSERT);

    size_t numRows = Ppattern_->getNodeNumRows();
    XXtInv_.resize(numRows);
    Teuchos::SerialDenseVector<LO,SC> BcRow(NSDim, false);
    for (size_t i = 0; i < numRows; i++) {
      Teuchos::ArrayView<const LO> indices;
      Ppattern_->getLocalRowView(i, indices);

      size_t nnz = indices.size();

      Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, nnz, false);
      for (size_t j = 0; j < nnz; j++) {
        for (size_t k = 0; k < NSDim; k++)
          BcRow[k] = X_->getData(k)[indices[j]];

        Teuchos::setCol(BcRow, (LO)j, locX);
      }

      XXtInv_[i] = Teuchos::SerialDenseMatrix<LO,SC>(NSDim, NSDim, false);

      Teuchos::BLAS<LO,SC> blas;
      blas.GEMM(Teuchos::NO_TRANS, Teuchos::CONJ_TRANS, NSDim, NSDim, nnz,
                Teuchos::ScalarTraits<SC>::one(), locX.values(), locX.stride(),
                locX.values(), locX.stride(), Teuchos::ScalarTraits<SC>::zero(),
                XXtInv_[i].values(), XXtInv_[i].stride());

      Teuchos::LAPACK<LO,SC> lapack;
      LO info, lwork = 3*NSDim;
      ArrayRCP<LO> IPIV(NSDim);
      ArrayRCP<SC> WORK(lwork);
      lapack.GETRF(NSDim, NSDim, XXtInv_[i].values(), XXtInv_[i].stride(), IPIV.get(), &info);
      lapack.GETRI(NSDim, XXtInv_[i].values(), XXtInv_[i].stride(), IPIV.get(), WORK.get(), lwork, &info);
    }
  }
Esempio n. 8
0
int MxDimMatrix<T, DIM>::solve(MxDimVector<T, DIM> & x, const MxDimVector<T, DIM> & b) const {
  x = b;

  MxDimMatrix<T, DIM> copy(*this);

  Teuchos::LAPACK<int, T> lapack;
  MxDimVector<int, DIM> ipiv;
  //int ipiv[DIM];
  int info;

  lapack.GETRF(DIM, DIM, &copy(0, 0), DIM, &ipiv[0], &info);
  if (info != 0)
    std::cout << "MxDimMatrix::solve(...): error in lapack routine getrf. Return code: " << info << ".\n";

  lapack.GETRS('T', DIM, 1, &copy(0, 0), DIM, &ipiv[0], &x[0], DIM, &info);
  if (info != 0)
    std::cout << "MxDimMatrix::solve(...): error in lapack routine getrs. Return code: " << info << ".\n";

  return info;
}
void updateGuess(Teuchos::SerialDenseVector<int, std::complex<double> >& myCurrentGuess,
		Teuchos::SerialDenseVector<int, std::complex<double> >& myTargetsCalculated,
		Teuchos::SerialDenseMatrix<int, std::complex<double> >& myJacobian, 
		Teuchos::LAPACK<int, std::complex<double> >& myLAPACK
		 )
{
	//v = J(inverse) * (-F(x))
	//new guess = v + old guess
	myTargetsCalculated *= -1.0;

	//Perform an LU factorization of this matrix. 
	int ipiv[NUMDIMENSIONS], info;
	char TRANS = 'N';
	myLAPACK.GETRF( NUMDIMENSIONS, NUMDIMENSIONS, myJacobian.values(), myJacobian.stride(), ipiv, &info ); 

	// Solve the linear system.
	myLAPACK.GETRS( TRANS, NUMDIMENSIONS, 1, myJacobian.values(), myJacobian.stride(),
		       	ipiv, myTargetsCalculated.values(), myTargetsCalculated.stride(), &info );  

	//We have overwritten myTargetsCalculated with guess update values
	//myBLAS.AXPY(NUMDIMENSIONS, 1.0, myGuessAdjustment.values(), 1, myCurrentGuess.values(), 1);
	myCurrentGuess += myTargetsCalculated;
}
void DenseContainer<MatrixType, LocalScalarType>::factor ()
{
    Teuchos::LAPACK<int, local_scalar_type> lapack;
    int INFO = 0;
    lapack.GETRF (diagBlock_.numRows (), diagBlock_.numCols (),
                  diagBlock_.values (), diagBlock_.stride (),
                  ipiv_.getRawPtr (), &INFO);
    // INFO < 0 is a bug.
    TEUCHOS_TEST_FOR_EXCEPTION(
        INFO < 0, std::logic_error, "Ifpack2::DenseContainer::factor: "
        "LAPACK's _GETRF (LU factorization with partial pivoting) was called "
        "incorrectly.  INFO = " << INFO << " < 0.  "
        "Please report this bug to the Ifpack2 developers.");
    // INFO > 0 means the matrix is singular.  This is probably an issue
    // either with the choice of rows the rows we extracted, or with the
    // input matrix itself.
    TEUCHOS_TEST_FOR_EXCEPTION(
        INFO > 0, std::runtime_error, "Ifpack2::DenseContainer::factor: "
        "LAPACK's _GETRF (LU factorization with partial pivoting) reports that the "
        "computed U factor is exactly singular.  U(" << INFO << "," << INFO << ") "
        "(one-based index i) is exactly zero.  This probably means that the input "
        "matrix has a singular diagonal block.");
}
NOX::Abstract::Group::ReturnType
LOCA::EigenvalueSort::LargestMagnitude::sort(int n, double* r_evals,
                         double* i_evals,
                         std::vector<int>* perm) const
{
  int i, j;
  int tempord = 0;
  double temp, tempr, tempi;
  Teuchos::LAPACK<int,double> lapack;

  //
  // Reset the index
  if (perm) {
    for (i=0; i < n; i++) {
      (*perm)[i] = i;
    }
  }

  //---------------------------------------------------------------
  // Sort eigenvalues in decreasing order of magnitude
  //---------------------------------------------------------------
  for (j=1; j < n; ++j) {
    tempr = r_evals[j]; tempi = i_evals[j];
    if (perm)
      tempord = (*perm)[j];
    temp=lapack.LAPY2(r_evals[j],i_evals[j]);
    for (i=j-1; i>=0 && lapack.LAPY2(r_evals[i],i_evals[i])<temp; --i) {
      r_evals[i+1]=r_evals[i]; i_evals[i+1]=i_evals[i];
      if (perm)
    (*perm)[i+1]=(*perm)[i];
    }
    r_evals[i+1] = tempr; i_evals[i+1] = tempi;
    if (perm)
      (*perm)[i+1] = tempord;
  }
  return NOX::Abstract::Group::Ok;
}
void gauss( const Teuchos::LAPACK<int,Real> &lapack,
            const ROL::Vector<Real> &a,
            const ROL::Vector<Real> &b,
            ROL::Vector<Real> &x,
            ROL::Vector<Real> &w ) {
    int INFO;

    Teuchos::RCP<const std::vector<Real> > ap = 
        (Teuchos::dyn_cast<ROL::StdVector<Real> >(const_cast<ROL::Vector<Real> &>(a))).getVector();
    Teuchos::RCP<const std::vector<Real> > bp = 
        (Teuchos::dyn_cast<ROL::StdVector<Real> >(const_cast<ROL::Vector<Real> &>(b))).getVector();
    Teuchos::RCP<std::vector<Real> > xp = 
        Teuchos::rcp_const_cast<std::vector<Real> >((Teuchos::dyn_cast<ROL::StdVector<Real> >(x)).getVector()); 
    Teuchos::RCP<std::vector<Real> > wp = 
        Teuchos::rcp_const_cast<std::vector<Real> >((Teuchos::dyn_cast<ROL::StdVector<Real> >(w)).getVector()); 

    const int N = ap->size();  
    const int LDZ = N;
    const char COMPZ = 'I';

    Teuchos::RCP<std::vector<Real> > Dp = Teuchos::rcp(new std::vector<Real>(N,0.0));
    Teuchos::RCP<std::vector<Real> > Ep = Teuchos::rcp(new std::vector<Real>(N,0.0));
    Teuchos::RCP<std::vector<Real> > WORKp = Teuchos::rcp(new std::vector<Real>(4*N,0.0));

    // Column-stacked matrix of eigenvectors needed for weights
    Teuchos::RCP<std::vector<Real> > Zp = Teuchos::rcp(new std::vector<Real>(N*N,0));

    // D = a
    std::copy(ap->begin(),ap->end(),Dp->begin());
     
    for(int i=0;i<N-1;++i) {
        (*Ep)[i] = sqrt((*bp)[i+1]);  
    }

    // Eigenvalue Decomposition 
    lapack.STEQR(COMPZ,N,&(*Dp)[0],&(*Ep)[0],&(*Zp)[0],LDZ,&(*WORKp)[0],&INFO);

    for(int i=0;i<N;++i){
        (*xp)[i] = (*Dp)[i];
        (*wp)[i] = (*bp)[0]*pow((*Zp)[N*i],2);
    } 
}
Stokhos::MonoProjPCEBasis<ordinal_type, value_type>::
MonoProjPCEBasis(
   ordinal_type p,
   const Stokhos::OrthogPolyApprox<ordinal_type, value_type>& pce,
   const Stokhos::Quadrature<ordinal_type, value_type>& quad,
   const Stokhos::Sparse3Tensor<ordinal_type, value_type>& Cijk,
   bool limit_integration_order_) :
  RecurrenceBasis<ordinal_type, value_type>("Monomial Projection", p, true),
  limit_integration_order(limit_integration_order_),
  pce_sz(pce.basis()->size()),
  pce_norms(pce.basis()->norm_squared()),
  a(pce_sz), 
  b(pce_sz),
  basis_vecs(pce_sz, p+1),
  new_pce(p+1)
{
  // If the original basis is normalized, we can use the standard QR
  // factorization.  For simplicity, we renormalize the PCE coefficients
  // for a normalized basis
  Stokhos::OrthogPolyApprox<ordinal_type, value_type> normalized_pce(pce);
  for (ordinal_type i=0; i<pce_sz; i++) {
    pce_norms[i] = std::sqrt(pce_norms[i]);
    normalized_pce[i] *= pce_norms[i];
  }

  // Evaluate PCE at quad points
  ordinal_type nqp = quad.size();
  Teuchos::Array<value_type> pce_vals(nqp);
  const Teuchos::Array<value_type>& weights = quad.getQuadWeights();
  const Teuchos::Array< Teuchos::Array<value_type> >& quad_points =
    quad.getQuadPoints();
  const Teuchos::Array< Teuchos::Array<value_type> >& basis_values =
    quad.getBasisAtQuadPoints();
  for (ordinal_type i=0; i<nqp; i++) {
    pce_vals[i] = normalized_pce.evaluate(quad_points[i], basis_values[i]);
  }

  // Form Kylov matrix up to order pce_sz
  matrix_type K(pce_sz, pce_sz);

  // Compute matrix
  matrix_type A(pce_sz, pce_sz);
  typedef Stokhos::Sparse3Tensor<ordinal_type, value_type> Cijk_type;
  for (typename Cijk_type::k_iterator k_it = Cijk.k_begin();
       k_it != Cijk.k_end(); ++k_it) {
    ordinal_type k = index(k_it);
    for (typename Cijk_type::kj_iterator j_it = Cijk.j_begin(k_it); 
	 j_it != Cijk.j_end(k_it); ++j_it) {
      ordinal_type j = index(j_it);
      value_type val = 0;
      for (typename Cijk_type::kji_iterator i_it = Cijk.i_begin(j_it);
	   i_it != Cijk.i_end(j_it); ++i_it) {
	ordinal_type i = index(i_it);
	value_type c = value(i_it) / (pce_norms[j]*pce_norms[k]);
	val += pce[i]*c;
      }
      A(k,j) = val;
    }
  }

  // Each column i is given by projection of the i-th order monomial 
  // onto original basis
  vector_type u0 = Teuchos::getCol(Teuchos::View, K, 0);
  u0(0) = 1.0;
  for (ordinal_type i=1; i<pce_sz; i++)
    u0(i) = 0.0;
  for (ordinal_type k=1; k<pce_sz; k++) {
    vector_type u = Teuchos::getCol(Teuchos::View, K, k);
    vector_type up = Teuchos::getCol(Teuchos::View, K, k-1);
    u.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1.0, A, up, 0.0);
  }
  /*
  for (ordinal_type j=0; j<pce_sz; j++) {
    for (ordinal_type i=0; i<pce_sz; i++) {
      value_type val = 0.0;
      for (ordinal_type k=0; k<nqp; k++)
	val += weights[k]*std::pow(pce_vals[k],j)*basis_values[k][i];
      K(i,j) = val;
    }
  }
  */

  std::cout << K << std::endl << std::endl;

  // Compute QR factorization of K
  ordinal_type ws_size, info;
  value_type ws_size_query;
  Teuchos::Array<value_type> tau(pce_sz);
  Teuchos::LAPACK<ordinal_type,value_type> lapack;
  lapack.GEQRF(pce_sz, pce_sz, K.values(), K.stride(), &tau[0], 
	       &ws_size_query, -1, &info);
  TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, 
		     "GEQRF returned value " << info);
  ws_size = static_cast<ordinal_type>(ws_size_query);
  Teuchos::Array<value_type> work(ws_size);
  lapack.GEQRF(pce_sz, pce_sz, K.values(), K.stride(), &tau[0], 
	       &work[0], ws_size, &info);
  TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, 
		     "GEQRF returned value " << info);
  
  // Get Q
  lapack.ORGQR(pce_sz, pce_sz, pce_sz, K.values(), K.stride(), &tau[0], 
	       &ws_size_query, -1, &info);
  TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, 
		     "ORGQR returned value " << info);
  ws_size = static_cast<ordinal_type>(ws_size_query);
  work.resize(ws_size);
  lapack.ORGQR(pce_sz, pce_sz, pce_sz, K.values(), K.stride(), &tau[0], 
	       &work[0], ws_size, &info);
  TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, 
		     "ORGQR returned value " << info);

  // Get basis vectors
  for (ordinal_type j=0; j<p+1; j++)
    for (ordinal_type i=0; i<pce_sz; i++)
      basis_vecs(i,j) = K(i,j);

  
  // Compute T = Q'*A*Q
  matrix_type prod(pce_sz,pce_sz);
  prod.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, K, A, 0.0);
  matrix_type T(pce_sz,pce_sz);
  T.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1.0, prod, K, 0.0);

  //std::cout << T << std::endl;

  // Recursion coefficients are diagonal and super diagonal
  b[0] = 1.0;
  for (ordinal_type i=0; i<pce_sz-1; i++) {
    a[i] = T(i,i);
    b[i+1] = T(i,i+1);
  }
  a[pce_sz-1] = T(pce_sz-1,pce_sz-1);

  // Setup rest of basis
  this->setup();

  // Project original PCE into the new basis
  vector_type u(pce_sz);
  for (ordinal_type i=0; i<pce_sz; i++)
    u[i] = normalized_pce[i];
  new_pce.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, basis_vecs, u, 
		   0.0);
  for (ordinal_type i=0; i<=p; i++)
    new_pce[i] /= this->norms[i];
}
Esempio n. 14
0
int main(int argc, char *argv[]) {
  Teuchos::GlobalMPISession mpiSession(&argc, &argv);
  Kokkos::initialize();
  // This little trick lets us print to std::cout only if
  // a (dummy) command-line argument is provided.
  int iprint = argc - 1;
  Teuchos::RCP<std::ostream> outStream;
  Teuchos::oblackholestream bhs; // outputs nothing
  if (iprint > 0)
    outStream = Teuchos::rcp(&std::cout, false);
  else
    outStream = Teuchos::rcp(&bhs, false);
  
  // Save the format state of the original std::cout.
  Teuchos::oblackholestream oldFormatState;
  oldFormatState.copyfmt(std::cout);
  
  *outStream \
    << "===============================================================================\n" \
    << "|                                                                             |\n" \
    << "|                  Unit Test (Basis_HGRAD_HEX_In_FEM)                         |\n" \
    << "|                                                                             |\n" \
    << "| 1) Patch test involving H(div) matrices                                     |\n" \
    << "|    for the Dirichlet problem on a hexahedron                                |\n" \
    << "|    Omega with boundary Gamma.                                               |\n" \
    << "|                                                                             |\n" \
    << "|   Questions? Contact Pavel Bochev ([email protected]),                     |\n" \
    << "|                      Robert Kirby ([email protected]),                 |\n" \
    << "|                      Denis Ridzal ([email protected]),                     |\n" \
    << "|                      Kara Peterson ([email protected]).                    |\n" \
    << "|                                                                             |\n" \
    << "|  Intrepid's website: http://trilinos.sandia.gov/packages/intrepid           |\n" \
    << "|  Trilinos website:   http://trilinos.sandia.gov                             |\n" \
    << "|                                                                             |\n" \
    << "===============================================================================\n" \
    << "| TEST 1: Patch test                                                          |\n" \
    << "===============================================================================\n";
  
  
  int errorFlag = 0;
  
  outStream -> precision(16);
  
  try {
    DefaultCubatureFactory<double> cubFactory;                                           // create cubature factory
    shards::CellTopology cell(shards::getCellTopologyData< shards::Hexahedron<> >());    // create parent cell topology
    shards::CellTopology side(shards::getCellTopologyData< shards::Quadrilateral<> >()); // create relevant subcell (side) topology
    shards::CellTopology line(shards::getCellTopologyData< shards::Line<> >() );         // for getting points to construct the basis
    
    int cellDim = cell.getDimension();
    int sideDim = side.getDimension();
    
    int min_order = 0;
    int max_order = 3;
    
    int numIntervals = 2;
    int numInterpPoints = (numIntervals + 1)*(numIntervals + 1)*(numIntervals+1);
    FieldContainer<double> interp_points_ref(numInterpPoints, cellDim);
    int counter = 0;
    for (int k=0;k<numIntervals;k++) {
      for (int j=0; j<=numIntervals; j++) {
	for (int i=0; i<=numIntervals; i++) {
          interp_points_ref(counter,0) = i*(1.0/numIntervals);
          interp_points_ref(counter,1) = j*(1.0/numIntervals);
          interp_points_ref(counter,2) = k*(1.0/numIntervals);
          counter++;
        }
      }
    }
    
    for (int basis_order=min_order;basis_order<=max_order;basis_order++) {
      // create bases
      // get the points for the vector basis
      Teuchos::RCP<Basis<double,FieldContainer<double> > > vectorBasis =
        Teuchos::rcp(new Basis_HDIV_HEX_In_FEM<double,FieldContainer<double> >(basis_order+1,POINTTYPE_SPECTRAL) );

      Teuchos::RCP<Basis<double,FieldContainer<double> > > scalarBasis =
	Teuchos::rcp(new Basis_HGRAD_HEX_Cn_FEM<double,FieldContainer<double> >(basis_order,POINTTYPE_SPECTRAL) );
      
      int numVectorFields = vectorBasis->getCardinality();
      int numScalarFields = scalarBasis->getCardinality();
      int numTotalFields = numVectorFields + numScalarFields;
      
      // create cubatures
      Teuchos::RCP<Cubature<double> > cellCub = cubFactory.create(cell, 2*(basis_order+1));
      Teuchos::RCP<Cubature<double> > sideCub = cubFactory.create(side, 2*(basis_order+1));
      
      int numCubPointsCell = cellCub->getNumPoints();
      int numCubPointsSide = sideCub->getNumPoints();
      
      // hold cubature information
      FieldContainer<double> cub_points_cell(numCubPointsCell, cellDim);
      FieldContainer<double> cub_weights_cell(numCubPointsCell);
      FieldContainer<double> cub_points_side( numCubPointsSide, sideDim );
      FieldContainer<double> cub_weights_side( numCubPointsSide );
      FieldContainer<double> cub_points_side_refcell( numCubPointsSide , cellDim );
      
      // hold basis function information on refcell
      FieldContainer<double> value_of_v_basis_at_cub_points_cell(numVectorFields, numCubPointsCell, cellDim );
      FieldContainer<double> w_value_of_v_basis_at_cub_points_cell(1, numVectorFields, numCubPointsCell, cellDim);
      FieldContainer<double> div_of_v_basis_at_cub_points_cell( numVectorFields, numCubPointsCell );
      FieldContainer<double> w_div_of_v_basis_at_cub_points_cell( 1, numVectorFields , numCubPointsCell );
      FieldContainer<double> value_of_s_basis_at_cub_points_cell(numScalarFields,numCubPointsCell);
      FieldContainer<double> w_value_of_s_basis_at_cub_points_cell(1,numScalarFields,numCubPointsCell);
      
      // containers for side integration:
      // I just need the normal component of the vector basis
      // and the exact solution at the cub points
      FieldContainer<double> value_of_v_basis_at_cub_points_side(numVectorFields,numCubPointsSide,cellDim);
      FieldContainer<double> n_of_v_basis_at_cub_points_side(numVectorFields,numCubPointsSide);
      FieldContainer<double> w_n_of_v_basis_at_cub_points_side(1,numVectorFields,numCubPointsSide);
      FieldContainer<double> diri_data_at_cub_points_side(1,numCubPointsSide);
      FieldContainer<double> side_normal(cellDim);
      
      // holds rhs data
      FieldContainer<double> rhs_at_cub_points_cell(1,numCubPointsCell);
      
      // FEM matrices and vectors
      FieldContainer<double> fe_matrix_M(1,numVectorFields,numVectorFields);
      FieldContainer<double> fe_matrix_B(1,numVectorFields,numScalarFields);
      FieldContainer<double> fe_matrix(1,numTotalFields,numTotalFields);
      
      FieldContainer<double> rhs_vector_vec(1,numVectorFields);
      FieldContainer<double> rhs_vector_scal(1,numScalarFields);
      FieldContainer<double> rhs_and_soln_vec(1,numTotalFields);
      
      FieldContainer<int> ipiv(numTotalFields);
      FieldContainer<double> value_of_s_basis_at_interp_points( numScalarFields , numInterpPoints);
      FieldContainer<double> interpolant( 1 , numInterpPoints );
      
      // set test tolerance
      double zero = (basis_order+1)*(basis_order+1)*1000.0*INTREPID_TOL;
      
      // build matrices outside the loop, and then just do the rhs
      // for each iteration
      
      cellCub->getCubature(cub_points_cell, cub_weights_cell);
      sideCub->getCubature(cub_points_side, cub_weights_side);
      
      // need the vector basis & its divergences
      vectorBasis->getValues(value_of_v_basis_at_cub_points_cell,
                             cub_points_cell,
                             OPERATOR_VALUE);
      vectorBasis->getValues(div_of_v_basis_at_cub_points_cell,
                             cub_points_cell,
                             OPERATOR_DIV);
      
      // need the scalar basis as well
      scalarBasis->getValues(value_of_s_basis_at_cub_points_cell,
                             cub_points_cell,
                             OPERATOR_VALUE);
      
      // construct mass matrix
      cub_weights_cell.resize(1,numCubPointsCell);
      FunctionSpaceTools::multiplyMeasure<double>(w_value_of_v_basis_at_cub_points_cell ,
                                                  cub_weights_cell ,
                                                  value_of_v_basis_at_cub_points_cell ); 
      cub_weights_cell.resize(numCubPointsCell);
      
      
      value_of_v_basis_at_cub_points_cell.resize( 1 , numVectorFields , numCubPointsCell , cellDim );
      FunctionSpaceTools::integrate<double>(fe_matrix_M,
                                            w_value_of_v_basis_at_cub_points_cell ,
                                            value_of_v_basis_at_cub_points_cell ,
                                            COMP_BLAS );
      value_of_v_basis_at_cub_points_cell.resize( numVectorFields , numCubPointsCell , cellDim );
      
      // div matrix
      cub_weights_cell.resize(1,numCubPointsCell);
      FunctionSpaceTools::multiplyMeasure<double>(w_div_of_v_basis_at_cub_points_cell,
                                                  cub_weights_cell,
                                                  div_of_v_basis_at_cub_points_cell);
      cub_weights_cell.resize(numCubPointsCell);
      
      value_of_s_basis_at_cub_points_cell.resize(1,numScalarFields,numCubPointsCell);
      FunctionSpaceTools::integrate<double>(fe_matrix_B,
                                            w_div_of_v_basis_at_cub_points_cell ,
                                            value_of_s_basis_at_cub_points_cell ,
                                            COMP_BLAS );
      value_of_s_basis_at_cub_points_cell.resize(numScalarFields,numCubPointsCell);
      
      for (int x_order=0;x_order<=basis_order;x_order++) {
        for (int y_order=0;y_order<=basis_order;y_order++) {
          for (int z_order=0;z_order<=basis_order;z_order++) {
            
            
            // reset global matrix since I destroyed it in LU factorization.
            fe_matrix.initialize();
            // insert mass matrix into global matrix
            for (int i=0;i<numVectorFields;i++) {
              for (int j=0;j<numVectorFields;j++) {
                fe_matrix(0,i,j) = fe_matrix_M(0,i,j);
              }
            }
            
            // insert div matrix into global matrix
            for (int i=0;i<numVectorFields;i++) {
              for (int j=0;j<numScalarFields;j++) {
                fe_matrix(0,i,numVectorFields+j)=-fe_matrix_B(0,i,j);
                fe_matrix(0,j+numVectorFields,i)=fe_matrix_B(0,i,j);
              }
            }
            
            // clear old vector data
            rhs_vector_vec.initialize();
            rhs_vector_scal.initialize();
            rhs_and_soln_vec.initialize();
            
            // now get rhs vector
            // rhs_vector_scal is just (rhs,w) for w in the scalar basis
            // I already have the scalar basis tabulated.
            cub_points_cell.resize(1,numCubPointsCell,cellDim);
            rhsFunc(rhs_at_cub_points_cell,
                    cub_points_cell,
                    x_order,
                    y_order,
                    z_order);
            
            cub_points_cell.resize(numCubPointsCell,cellDim);
            
            cub_weights_cell.resize(1,numCubPointsCell);
            FunctionSpaceTools::multiplyMeasure<double>(w_value_of_s_basis_at_cub_points_cell,
                                                        cub_weights_cell,
                                                        value_of_s_basis_at_cub_points_cell);
            cub_weights_cell.resize(numCubPointsCell);
            FunctionSpaceTools::integrate<double>(rhs_vector_scal,
                                                  rhs_at_cub_points_cell,
                                                  w_value_of_s_basis_at_cub_points_cell,
                                                  COMP_BLAS);

            for (int i=0;i<numScalarFields;i++) {
              rhs_and_soln_vec(0,numVectorFields+i) = rhs_vector_scal(0,i);
            }
            
            
            // now get <u,v.n> on boundary
            for (unsigned side_cur=0;side_cur<6;side_cur++) {
              // map side cubature to current side
              CellTools<double>::mapToReferenceSubcell( cub_points_side_refcell ,
                                                        cub_points_side ,
                                                        sideDim ,
                                                        (int)side_cur ,
                                                        cell );
              // Evaluate dirichlet data
              cub_points_side_refcell.resize(1,numCubPointsSide,cellDim);
              u_exact(diri_data_at_cub_points_side,
                      cub_points_side_refcell,x_order,y_order,z_order);

              cub_points_side_refcell.resize(numCubPointsSide,cellDim);
              
              // get normal direction, this has the edge weight factored into it already
              CellTools<double>::getReferenceSideNormal(side_normal , 
                                                        (int)side_cur,cell );

              // v.n at cub points on side
              vectorBasis->getValues(value_of_v_basis_at_cub_points_side ,
                                    cub_points_side_refcell ,
                                    OPERATOR_VALUE );

              for (int i=0;i<numVectorFields;i++) {
                for (int j=0;j<numCubPointsSide;j++) {
                  n_of_v_basis_at_cub_points_side(i,j) = 0.0;
                  for (int k=0;k<cellDim;k++) {
                    n_of_v_basis_at_cub_points_side(i,j) += side_normal(k) * 
                      value_of_v_basis_at_cub_points_side(i,j,k);
                  }
                } 
              }
              
              cub_weights_side.resize(1,numCubPointsSide);
              FunctionSpaceTools::multiplyMeasure<double>(w_n_of_v_basis_at_cub_points_side,
                                                          cub_weights_side,
                                                          n_of_v_basis_at_cub_points_side);
              cub_weights_side.resize(numCubPointsSide);
              
              FunctionSpaceTools::integrate<double>(rhs_vector_vec,
                                                    diri_data_at_cub_points_side,
                                                    w_n_of_v_basis_at_cub_points_side,
                                                    COMP_BLAS,
                                                    false);

              for (int i=0;i<numVectorFields;i++) {
                rhs_and_soln_vec(0,i) -= rhs_vector_vec(0,i);
              }
              
            }
            
            // solve linear system
            int info = 0;
            Teuchos::LAPACK<int, double> solver;
            solver.GESV(numTotalFields, 1, &fe_matrix(0,0,0), numTotalFields, &ipiv(0), &rhs_and_soln_vec(0,0), 
                        numTotalFields, &info);
            
            // compute interpolant; the scalar entries are last
            scalarBasis->getValues(value_of_s_basis_at_interp_points,
                                  interp_points_ref,
                                  OPERATOR_VALUE);
            for (int pt=0;pt<numInterpPoints;pt++) {
              interpolant(0,pt)=0.0;
              for (int i=0;i<numScalarFields;i++) {
                interpolant(0,pt) += rhs_and_soln_vec(0,numVectorFields+i)
                  * value_of_s_basis_at_interp_points(i,pt);
              }
            }
            
            interp_points_ref.resize(1,numInterpPoints,cellDim);
            // get exact solution for comparison
            FieldContainer<double> exact_solution(1,numInterpPoints);
            u_exact( exact_solution , interp_points_ref , x_order, y_order, z_order);
            interp_points_ref.resize(numInterpPoints,cellDim);

            RealSpaceTools<double>::add(interpolant,exact_solution);
            
            double nrm= RealSpaceTools<double>::vectorNorm(&interpolant(0,0),interpolant.dimension(1), NORM_TWO);

            *outStream << "\nNorm-2 error between scalar components of exact solution of order ("
                       << x_order << ", " << y_order << ", " << z_order
                       << ") and finite element interpolant of order " << basis_order << ": "
                       << nrm << "\n";

            if (nrm > zero) {
              *outStream << "\n\nPatch test failed for solution polynomial order ("
                         << x_order << ", " << y_order << ", " << z_order << ") and basis order (scalar, vector)  ("
                         << basis_order << ", " << basis_order+1 << ")\n\n";
              errorFlag++;
            }
            
          }
        }
      }
    }
    
  }
  
  catch (std::logic_error err) {
    *outStream << err.what() << "\n\n";
    errorFlag = -1000;
  };
  
  if (errorFlag != 0)
    std::cout << "End Result: TEST FAILED\n";
  else
    std::cout << "End Result: TEST PASSED\n";
  
  // reset format state of std::cout
  std::cout.copyfmt(oldFormatState);
  Kokkos::finalize();
  return errorFlag;
}
NOX::StatusTest::StatusType NOX::Solver::AndersonAcceleration::step()
{
  prePostOperator.runPreIterate(*this);
  Teuchos::ParameterList lsParams = paramsPtr->sublist("Direction").sublist("Newton").sublist("Linear Solver");

  // On the first step, do some initializations
  if (nIter == 0) {
    // Compute F of initital guess
    NOX::Abstract::Group::ReturnType rtype = solnPtr->computeF();
    if (rtype != NOX::Abstract::Group::Ok) {
      utilsPtr->out() << "NOX::Solver::AndersonAcceleration::init - "
              << "Unable to compute F" << std::endl;
      throw "NOX Error";
    }

    // Test the initial guess
    status = testPtr->checkStatus(*this, checkType);
    if ((status == NOX::StatusTest::Converged) &&
    (utilsPtr->isPrintType(NOX::Utils::Warning))) {
      utilsPtr->out() << "Warning: NOX::Solver::AndersonAcceleration::init() - "
              << "The solution passed into the solver (either "
              << "through constructor or reset method) "
              << "is already converged!  The solver wil not "
              << "attempt to solve this system since status is "
              << "flagged as converged." << std::endl;
    }
    printUpdate();

    // First check status
    if (status != NOX::StatusTest::Unconverged) {
      prePostOperator.runPostIterate(*this);
      printUpdate();
      return status;
    }

    // Apply preconditioner if enabled
    if (precond) {
      if (recomputeJacobian)
        solnPtr->computeJacobian();
      solnPtr->applyRightPreconditioning(false, lsParams, solnPtr->getF(), *oldPrecF);
    }
    else
      *oldPrecF = solnPtr->getF();

    // Copy initial guess to old soln
    *oldSolnPtr = *solnPtr;

    // Compute step then first iterate with a line search.
    workVec->update(mixParam,*oldPrecF);
    bool ok = lineSearchPtr->compute(*solnPtr, stepSize, *workVec, *this);
    if (!ok)
    {
      if (stepSize == 0.0)
      {
        utilsPtr->out() << "NOX::Solver::AndersonAcceleratino::iterate - line search failed" << std::endl;
        status = NOX::StatusTest::Failed;
        prePostOperator.runPostIterate(*this);
        printUpdate();
        return status;
      }
      else if (utilsPtr->isPrintType(NOX::Utils::Warning))
        utilsPtr->out() << "NOX::Solver::AndersonAcceleration::iterate - using recovery step for line search" << std::endl;
    }

    // Compute F for the first iterate in case it isn't in the line search
    rtype = solnPtr->computeF();
    if (rtype != NOX::Abstract::Group::Ok)
    {
      utilsPtr->out() << "NOX::Solver::AndersonAcceleration::iterate - unable to compute F" << std::endl;
      status = NOX::StatusTest::Failed;
      prePostOperator.runPostIterate(*this);
      printUpdate();
      return status;
    }

    // Evaluate the current status.
    status = testPtr->checkStatus(*this, checkType);

    //Update iteration count
    nIter++;

    prePostOperator.runPostIterate(*this);
    printUpdate();
    return status;
  }

  // First check status
  if (status != NOX::StatusTest::Unconverged) {
    prePostOperator.runPostIterate(*this);
    printUpdate();
    return status;
  }

  // Apply preconditioner if enabled
  if (precond) {
    if (recomputeJacobian)
      solnPtr->computeJacobian();
    solnPtr->applyRightPreconditioning(false, lsParams, solnPtr->getF(), *precF);
  }
  else
    *precF = solnPtr->getF();

  // Manage the matrices of past iterates and QR factors
  if (storeParam > 0) {
    if (nIter == accelerationStartIteration) {
      // Initialize
      nStore = 0;
      rMat.shape(0,0);
      oldPrecF->update(1.0, *precF, -1.0);
      qrAdd(*oldPrecF);
      xMat[0]->update(1.0, solnPtr->getX(), -1.0, oldSolnPtr->getX(), 0.0);
    }
    else if (nIter > accelerationStartIteration) {
      if (nStore == storeParam) {
        Teuchos::RCP<NOX::Abstract::Vector> tempPtr = xMat[0];
        for (int ii = 0; ii<nStore-1; ii++)
          xMat[ii] = xMat[ii+1];
        xMat[nStore-1] = tempPtr;
        qrDelete();
      }
      oldPrecF->update(1.0, *precF, -1.0);
      qrAdd(*oldPrecF);
      xMat[nStore-1]->update(1.0, solnPtr->getX(), -1.0, oldSolnPtr->getX(), 0.0);
    }
  }

  // Reorthogonalize 
  if ( (nStore > 1) && (orthoFrequency > 0) )
    if (nIter % orthoFrequency == 0)
      reorthogonalize();

  // Copy current soln to the old soln.
  *oldSolnPtr = *solnPtr;
  *oldPrecF = *precF;

  // Adjust for condition number
  if (nStore > 0) {
    Teuchos::LAPACK<int,double> lapack;
    char normType = '1';
    double invCondNum = 0.0;
    int info = 0;
    if ( WORK.size() < static_cast<std::size_t>(4*nStore) )
      WORK.resize(4*nStore);
    if (IWORK.size() < static_cast<std::size_t>(nStore))
      IWORK.resize(nStore);
    lapack.GECON(normType,nStore,rMat.values(),nStore,rMat.normOne(),&invCondNum,&WORK[0],&IWORK[0],&info);
    if (utilsPtr->isPrintType(Utils::Details))
      utilsPtr->out() << "    R condition number estimate ("<< nStore << ") = " << 1.0/invCondNum << std::endl;

    if (adjustForConditionNumber) {
      while ( (1.0/invCondNum > dropTolerance) && (nStore > 1)  ) {
        Teuchos::RCP<NOX::Abstract::Vector> tempPtr = xMat[0];
        for (int ii = 0; ii<nStore-1; ii++)
          xMat[ii] = xMat[ii+1];
        xMat[nStore-1] = tempPtr;
        qrDelete();
        lapack.GECON(normType,nStore,rMat.values(),nStore,rMat.normOne(),&invCondNum,&WORK[0],&IWORK[0],&info);
        if (utilsPtr->isPrintType(Utils::Details))
          utilsPtr->out() << "    Adjusted R condition number estimate ("<< nStore << ") = " << 1.0/invCondNum << std::endl;
      }
    }
  }

  // Solve the least-squares problem.
  Teuchos::SerialDenseMatrix<int,double> gamma(nStore,1), RHS(nStore,1), Rgamma(nStore,1);
  for (int ii = 0; ii<nStore; ii++)
    RHS(ii,0) = precF->innerProduct( *(qMat[ii]) );

  //Back-solve for gamma
  for (int ii = nStore-1; ii>=0; ii--) {
    gamma(ii,0) = RHS(ii,0);
    for (int jj = ii+1; jj<nStore; jj++) {
      gamma(ii,0) -= rMat(ii,jj)*gamma(jj,0);
    }
    gamma(ii,0) /= rMat(ii,ii);
  }

  if (nStore > 0)
    Rgamma.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,mixParam,rMat,gamma,0.0);

  // Compute the step and new solution using the line search.
  workVec->update(mixParam,*precF);
  for (int ii=0; ii<nStore; ii++)
    workVec->update(-gamma(ii,0), *(xMat[ii]), -Rgamma(ii,0),*(qMat[ii]),1.0);
  bool ok = lineSearchPtr->compute(*solnPtr, stepSize, *workVec, *this);
  if (!ok)
  {
    if (stepSize == 0.0)
    {
      utilsPtr->out() << "NOX::Solver::AndersonAcceleratino::iterate - line search failed" << std::endl;
      status = NOX::StatusTest::Failed;
      prePostOperator.runPostIterate(*this);
      printUpdate();
      return status;
    }
    else if (utilsPtr->isPrintType(NOX::Utils::Warning))
      utilsPtr->out() << "NOX::Solver::AndersonAcceleration::iterate - using recovery step for line search" << std::endl;
  }

  // Compute F for new current solution in case the line search didn't .
  NOX::Abstract::Group::ReturnType rtype = solnPtr->computeF();
  if (rtype != NOX::Abstract::Group::Ok)
  {
    utilsPtr->out() << "NOX::Solver::AndersonAcceleration::iterate - unable to compute F" << std::endl;
    status = NOX::StatusTest::Failed;
    prePostOperator.runPostIterate(*this);
    printUpdate();
    return status;
  }

  // Update iteration count
  nIter++;

  // Evaluate the current status.
  status = testPtr->checkStatus(*this, checkType);

  prePostOperator.runPostIterate(*this);

  printUpdate();
  return status;
}
Esempio n. 16
0
//==============================================================================
int Ifpack_Polynomial::Compute()
{
  if (!IsInitialized())
    IFPACK_CHK_ERR(Initialize());

  Time_->ResetStartTime();

  // reset values
  IsComputed_ = false;
  Condest_ = -1.0;

  if (PolyDegree_ <= 0)
    IFPACK_CHK_ERR(-2); // at least one application

#ifdef HAVE_IFPACK_EPETRAEXT
  // Check to see if we can run in block mode
  if(IsRowMatrix_ && InvDiagonal_ == Teuchos::null && UseBlockMode_){
    const Epetra_CrsMatrix *CrsMatrix=dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_);

    // If we don't have CrsMatrix, we can't use the block preconditioner
    if(!CrsMatrix) UseBlockMode_=false;
    else{
      int ierr;
      InvBlockDiagonal_=Teuchos::rcp(new EpetraExt_PointToBlockDiagPermute(*CrsMatrix));
      if(InvBlockDiagonal_==Teuchos::null) IFPACK_CHK_ERR(-6);

      ierr=InvBlockDiagonal_->SetParameters(BlockList_);
      if(ierr) IFPACK_CHK_ERR(-7);

      ierr=InvBlockDiagonal_->Compute();
      if(ierr) IFPACK_CHK_ERR(-8);
    }
  }
#endif
  if (IsRowMatrix_ && InvDiagonal_ == Teuchos::null && !UseBlockMode_)
  {
    InvDiagonal_ = Teuchos::rcp( new Epetra_Vector(Matrix().Map()) );

    if (InvDiagonal_ == Teuchos::null)
      IFPACK_CHK_ERR(-5);

    IFPACK_CHK_ERR(Matrix().ExtractDiagonalCopy(*InvDiagonal_));

    // Inverse diagonal elements
    // Replace zeros with 1.0
    for (int i = 0 ; i < NumMyRows_ ; ++i) {
      double diag = (*InvDiagonal_)[i];
      if (IFPACK_ABS(diag) < MinDiagonalValue_)
        (*InvDiagonal_)[i] = MinDiagonalValue_;
      else
        (*InvDiagonal_)[i] = 1.0 / diag;
    }
  }

  // Automatically compute maximum eigenvalue estimate of D^{-1}A if user hasn't provided one
  double lambda_real_min, lambda_real_max, lambda_imag_min, lambda_imag_max;
  if (LambdaRealMax_ == -1) {
    //PowerMethod(Matrix(), *InvDiagonal_, EigMaxIters_, lambda_max);
    GMRES(Matrix(), *InvDiagonal_, EigMaxIters_, lambda_real_min, lambda_real_max, lambda_imag_min, lambda_imag_max);
    LambdaRealMin_=lambda_real_min;  LambdaImagMin_=lambda_imag_min;
    LambdaRealMax_=lambda_real_max;  LambdaImagMax_=lambda_imag_max;
    //std::cout<<"LambdaRealMin: "<<LambdaRealMin_<<std::endl;
    //std::cout<<"LambdaRealMax: "<<LambdaRealMax_<<std::endl;
    //std::cout<<"LambdaImagMin: "<<LambdaImagMin_<<std::endl;
    //std::cout<<"LambdaImagMax: "<<LambdaImagMax_<<std::endl;
  }

  // find least squares polynomial for (LSPointsReal_*LSPointsImag_) zeros
  // on a rectangle in the complex plane defined as
  // [LambdaRealMin_,LambdaRealMax_] x [LambdaImagMin_,LambdaImagMax_]

  const std::complex<double> zero(0.0,0.0);

  // Compute points in complex plane
  double lenx = LambdaRealMax_-LambdaRealMin_;
  int      nx = ceil(lenx*((double) LSPointsReal_));
  if (nx<2) { nx = 2; }
  double   hx = lenx/((double) nx);
  std::vector<double> xs;
  if(abs(lenx)>1.0e-8) {
    for( int pt=0; pt<=nx; pt++ ) {
      xs.push_back(hx*pt+LambdaRealMin_);
    }
  }
  else {
    xs.push_back(LambdaRealMax_);
    nx=1;
  }
  double leny = LambdaImagMax_-LambdaImagMin_;
  int      ny = ceil(leny*((double) LSPointsImag_));
  if (ny<2) { ny = 2; }
  double   hy = leny/((double) ny);
  std::vector<double> ys;
  if(abs(leny)>1.0e-8) {
    for( int pt=0; pt<=ny; pt++ ) {
      ys.push_back(hy*pt+LambdaImagMin_);
    }
  }
  else {
    ys.push_back(LambdaImagMax_);
    ny=1;
  }
  std::vector< std::complex<double> > cpts;
  for( int jj=0; jj<ny; jj++ ) {
    for( int ii=0; ii<nx; ii++ ) {
      std::complex<double> cpt(xs[ii],ys[jj]);
      cpts.push_back(cpt);
    }
  }
  cpts.push_back(zero);

#ifdef HAVE_TEUCHOS_COMPLEX
  const std::complex<double> one(1.0,0.0);

  // Construct overdetermined Vandermonde matrix
  Teuchos::SerialDenseMatrix<int, std::complex<double> > Vmatrix(cpts.size(),PolyDegree_+1);
  Vmatrix.putScalar(zero);
  for (int jj = 0; jj <= PolyDegree_; ++jj) {
    for (int ii = 0; ii < static_cast<int> (cpts.size ()) - 1; ++ii) {
      if (jj > 0) {
        Vmatrix(ii,jj) = pow(cpts[ii],jj);
      }
      else {
        Vmatrix(ii,jj) = one;
      }
    }
  }
  Vmatrix(cpts.size()-1,0)=one;

  // Right hand side: all zero except last entry
  Teuchos::SerialDenseMatrix< int,std::complex<double> > RHS(cpts.size(),1);
  RHS.putScalar(zero);
  RHS(cpts.size()-1,0)=one;

  // Solve least squares problem using LAPACK
  Teuchos::LAPACK< int, std::complex<double> > lapack;
  const int N = Vmatrix.numCols();
  Teuchos::Array<double> singularValues(N);
  Teuchos::Array<double> rwork(1);
  rwork.resize (std::max (1, 5 * N));
  std::complex<double> lworkScalar(1.0,0.0);
  int info = 0;
  lapack.GELS('N', Vmatrix.numRows(), Vmatrix.numCols(), RHS.numCols(),
                   Vmatrix.values(),  Vmatrix.numRows(), RHS.values(),    RHS.numRows(),
                   &lworkScalar, -1, &info);
  TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
                             "_GELSS workspace query returned INFO = "
                             << info << " != 0.");
  const int lwork = static_cast<int> (real(lworkScalar));
  TEUCHOS_TEST_FOR_EXCEPTION(lwork < 0, std::logic_error,
                             "_GELSS workspace query returned LWORK = "
                             << lwork << " < 0.");
  // Allocate workspace.  Size > 0 means &work[0] makes sense.
  Teuchos::Array< std::complex<double> > work (std::max (1, lwork));
  // Solve the least-squares problem.
  lapack.GELS('N', Vmatrix.numRows(), Vmatrix.numCols(),  RHS.numCols(),
                   Vmatrix.values(),  Vmatrix.numRows(),  RHS.values(),   RHS.numRows(),
                   &work[0], lwork, &info);

  coeff_.resize(PolyDegree_+1);
  std::complex<double> c0=RHS(0,0);
  for(int ii=0; ii<=PolyDegree_; ii++) {
    // test that the imaginary part is nonzero
    //TEUCHOS_TEST_FOR_EXCEPTION(abs(imag(RHS(ii,0))) > 1e-8, std::logic_error,
    //                         "imaginary part of polynomial coefficients is nonzero! coeff = "
    //                         << RHS(ii,0));
    coeff_[ii]=real(RHS(ii,0)/c0);
    //std::cout<<"coeff["<<ii<<"]="<<coeff_[ii]<<std::endl;
  }

#else

  // Construct overdetermined Vandermonde matrix
  Teuchos::SerialDenseMatrix< int, double > Vmatrix(xs.size()+1,PolyDegree_+1);
  Vmatrix.putScalar(0.0);
  for( int jj=0; jj<=PolyDegree_; jj++) {
    for( std::vector<double>::size_type ii=0; ii<xs.size(); ii++) {
      if(jj>0) {
        Vmatrix(ii,jj)=pow(xs[ii],jj);
      }
      else {
        Vmatrix(ii,jj)=1.0;
      }
    }
  }
  Vmatrix(xs.size(),0)=1.0;

  // Right hand side: all zero except last entry
  Teuchos::SerialDenseMatrix< int, double > RHS(xs.size()+1,1);
  RHS.putScalar(0.0);
  RHS(xs.size(),0)=1.0;

  // Solve least squares problem using LAPACK
  Teuchos::LAPACK< int, double > lapack;
  const int N = Vmatrix.numCols();
  Teuchos::Array<double> singularValues(N);
  Teuchos::Array<double> rwork(1);
  rwork.resize (std::max (1, 5 * N));
  double lworkScalar(1.0);
  int info = 0;
  lapack.GELS('N', Vmatrix.numRows(), Vmatrix.numCols(), RHS.numCols(),
                   Vmatrix.values(),  Vmatrix.numRows(), RHS.values(),    RHS.numRows(),
                   &lworkScalar, -1, &info);
  TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
                             "_GELSS workspace query returned INFO = "
                             << info << " != 0.");
  const int lwork = static_cast<int> (lworkScalar);
  TEUCHOS_TEST_FOR_EXCEPTION(lwork < 0, std::logic_error,
                             "_GELSS workspace query returned LWORK = "
                             << lwork << " < 0.");
  // Allocate workspace.  Size > 0 means &work[0] makes sense.
  Teuchos::Array< double > work (std::max (1, lwork));
  // Solve the least-squares problem.
  lapack.GELS('N', Vmatrix.numRows(), Vmatrix.numCols(),  RHS.numCols(),
                   Vmatrix.values(),  Vmatrix.numRows(),  RHS.values(),   RHS.numRows(),
                   &work[0], lwork, &info);

  coeff_.resize(PolyDegree_+1);
  double c0=RHS(0,0);
  for(int ii=0; ii<=PolyDegree_; ii++) {
    // test that the imaginary part is nonzero
    //TEUCHOS_TEST_FOR_EXCEPTION(abs(imag(RHS(ii,0))) > 1e-8, std::logic_error,
    //                         "imaginary part of polynomial coefficients is nonzero! coeff = "
    //                         << RHS(ii,0));
    coeff_[ii]=RHS(ii,0)/c0;
  }

#endif

#ifdef IFPACK_FLOPCOUNTERS
  ComputeFlops_ += NumMyRows_;
#endif

  ++NumCompute_;
  ComputeTime_ += Time_->ElapsedTime();
  IsComputed_ = true;

  return(0);
}
Esempio n. 17
0
int main(int argc, char *argv[]) {

  Teuchos::GlobalMPISession mpiSession(&argc, &argv);

  // This little trick lets us print to std::cout only if
  // a (dummy) command-line argument is provided.
  int iprint     = argc - 1;
  Teuchos::RCP<std::ostream> outStream;
  Teuchos::oblackholestream bhs; // outputs nothing
  if (iprint > 0)
    outStream = Teuchos::rcp(&std::cout, false);
  else
    outStream = Teuchos::rcp(&bhs, false);

  // Save the format state of the original std::cout.
  Teuchos::oblackholestream oldFormatState;
  oldFormatState.copyfmt(std::cout);

  *outStream \
    << "===============================================================================\n" \
    << "|                                                                             |\n" \
    << "|                    Unit Test (Basis_HGRAD_QUAD_C2_FEM)                      |\n" \
    << "|                                                                             |\n" \
    << "|     1) Patch test involving mass and stiffness matrices,                    |\n" \
    << "|        for the Neumann problem on a physical parallelogram                  |\n" \
    << "|        AND a reference quad Omega with boundary Gamma.                      |\n" \
    << "|                                                                             |\n" \
    << "|        - div (grad u) + u = f  in Omega,  (grad u) . n = g  on Gamma        |\n" \
    << "|                                                                             |\n" \
    << "|        For a generic parallelogram, the basis recovers a complete           |\n" \
    << "|        polynomial space of order 2. On a (scaled and/or translated)         |\n" \
    << "|        reference quad, the basis recovers a complete tensor product         |\n" \
    << "|        space of order 2 (i.e. incl. the x^2*y, x*y^2, x^2*y^2 terms).       |\n" \
    << "|                                                                             |\n" \
    << "|  Questions? Contact  Pavel Bochev  ([email protected]),                    |\n" \
    << "|                      Denis Ridzal  ([email protected]),                    |\n" \
    << "|                      Kara Peterson ([email protected]).                    |\n" \
    << "|                                                                             |\n" \
    << "|  Intrepid's website: http://trilinos.sandia.gov/packages/intrepid           |\n" \
    << "|  Trilinos website:   http://trilinos.sandia.gov                             |\n" \
    << "|                                                                             |\n" \
    << "===============================================================================\n"\
    << "| TEST 1: Patch test                                                          |\n"\
    << "===============================================================================\n";

  
  int errorFlag = 0;

  outStream -> precision(16);


  try {

    int max_order = 2;                                                                    // max total order of polynomial solution
    DefaultCubatureFactory<double>  cubFactory;                                           // create cubature factory
    shards::CellTopology cell(shards::getCellTopologyData< shards::Quadrilateral<> >());  // create parent cell topology
    shards::CellTopology side(shards::getCellTopologyData< shards::Line<> >());           // create relevant subcell (side) topology
    int cellDim = cell.getDimension();
    int sideDim = side.getDimension();

    // Define array containing points at which the solution is evaluated, in reference cell.
    int numIntervals = 10;
    int numInterpPoints = (numIntervals + 1)*(numIntervals + 1);
    FieldContainer<double> interp_points_ref(numInterpPoints, 2);
    int counter = 0;
    for (int j=0; j<=numIntervals; j++) {
      for (int i=0; i<=numIntervals; i++) {
        interp_points_ref(counter,0) = i*(2.0/numIntervals)-1.0;
        interp_points_ref(counter,1) = j*(2.0/numIntervals)-1.0;
        counter++;
      }
    }

    /* Parent cell definition. */
    FieldContainer<double> cell_nodes[2];
    cell_nodes[0].resize(1, 4, cellDim);
    cell_nodes[1].resize(1, 4, cellDim);

    // Generic parallelogram.
    cell_nodes[0](0, 0, 0) = -5.0;
    cell_nodes[0](0, 0, 1) = -1.0;
    cell_nodes[0](0, 1, 0) = 4.0;
    cell_nodes[0](0, 1, 1) = 1.0;
    cell_nodes[0](0, 2, 0) = 8.0;
    cell_nodes[0](0, 2, 1) = 3.0;
    cell_nodes[0](0, 3, 0) = -1.0;
    cell_nodes[0](0, 3, 1) = 1.0;
    // Reference quad. 
    cell_nodes[1](0, 0, 0) = -1.0;
    cell_nodes[1](0, 0, 1) = -1.0;
    cell_nodes[1](0, 1, 0) = 1.0;
    cell_nodes[1](0, 1, 1) = -1.0;
    cell_nodes[1](0, 2, 0) = 1.0;
    cell_nodes[1](0, 2, 1) = 1.0;
    cell_nodes[1](0, 3, 0) = -1.0;
    cell_nodes[1](0, 3, 1) = 1.0;

    std::stringstream mystream[2];
    mystream[0].str("\n>> Now testing basis on a generic parallelogram ...\n");
    mystream[1].str("\n>> Now testing basis on the reference quad ...\n");

    for (int pcell = 0; pcell < 2; pcell++) {
      *outStream << mystream[pcell].str();
      FieldContainer<double> interp_points(1, numInterpPoints, cellDim);
      CellTools<double>::mapToPhysicalFrame(interp_points, interp_points_ref, cell_nodes[pcell], cell);
      interp_points.resize(numInterpPoints, cellDim);

      for (int x_order=0; x_order <= max_order; x_order++) {
        int max_y_order = max_order;
        if (pcell == 0) {
          max_y_order -= x_order;
        }
        for (int y_order=0; y_order <= max_y_order; y_order++) {

          // evaluate exact solution
          FieldContainer<double> exact_solution(1, numInterpPoints);
          u_exact(exact_solution, interp_points, x_order, y_order);

          int basis_order = 2;

          // set test tolerance
          double zero = basis_order*basis_order*100*INTREPID_TOL;

          //create basis
          Teuchos::RCP<Basis<double,FieldContainer<double> > > basis =
            Teuchos::rcp(new Basis_HGRAD_QUAD_C2_FEM<double,FieldContainer<double> >() );
          int numFields = basis->getCardinality();

          // create cubatures
          Teuchos::RCP<Cubature<double> > cellCub = cubFactory.create(cell, 2*basis_order);
          Teuchos::RCP<Cubature<double> > sideCub = cubFactory.create(side, 2*basis_order);
          int numCubPointsCell = cellCub->getNumPoints();
          int numCubPointsSide = sideCub->getNumPoints();

          /* Computational arrays. */
          /* Section 1: Related to parent cell integration. */
          FieldContainer<double> cub_points_cell(numCubPointsCell, cellDim);
          FieldContainer<double> cub_points_cell_physical(1, numCubPointsCell, cellDim);
          FieldContainer<double> cub_weights_cell(numCubPointsCell);
          FieldContainer<double> jacobian_cell(1, numCubPointsCell, cellDim, cellDim);
          FieldContainer<double> jacobian_inv_cell(1, numCubPointsCell, cellDim, cellDim);
          FieldContainer<double> jacobian_det_cell(1, numCubPointsCell);
          FieldContainer<double> weighted_measure_cell(1, numCubPointsCell);

          FieldContainer<double> value_of_basis_at_cub_points_cell(numFields, numCubPointsCell);
          FieldContainer<double> transformed_value_of_basis_at_cub_points_cell(1, numFields, numCubPointsCell);
          FieldContainer<double> weighted_transformed_value_of_basis_at_cub_points_cell(1, numFields, numCubPointsCell);
          FieldContainer<double> grad_of_basis_at_cub_points_cell(numFields, numCubPointsCell, cellDim);
          FieldContainer<double> transformed_grad_of_basis_at_cub_points_cell(1, numFields, numCubPointsCell, cellDim);
          FieldContainer<double> weighted_transformed_grad_of_basis_at_cub_points_cell(1, numFields, numCubPointsCell, cellDim);
          FieldContainer<double> fe_matrix(1, numFields, numFields);

          FieldContainer<double> rhs_at_cub_points_cell_physical(1, numCubPointsCell);
          FieldContainer<double> rhs_and_soln_vector(1, numFields);

          /* Section 2: Related to subcell (side) integration. */
          unsigned numSides = 4;
          FieldContainer<double> cub_points_side(numCubPointsSide, sideDim);
          FieldContainer<double> cub_weights_side(numCubPointsSide);
          FieldContainer<double> cub_points_side_refcell(numCubPointsSide, cellDim);
          FieldContainer<double> cub_points_side_physical(1, numCubPointsSide, cellDim);
          FieldContainer<double> jacobian_side_refcell(1, numCubPointsSide, cellDim, cellDim);
          FieldContainer<double> jacobian_det_side_refcell(1, numCubPointsSide);
          FieldContainer<double> weighted_measure_side_refcell(1, numCubPointsSide);

          FieldContainer<double> value_of_basis_at_cub_points_side_refcell(numFields, numCubPointsSide);
          FieldContainer<double> transformed_value_of_basis_at_cub_points_side_refcell(1, numFields, numCubPointsSide);
          FieldContainer<double> weighted_transformed_value_of_basis_at_cub_points_side_refcell(1, numFields, numCubPointsSide);
          FieldContainer<double> neumann_data_at_cub_points_side_physical(1, numCubPointsSide);
          FieldContainer<double> neumann_fields_per_side(1, numFields);

          /* Section 3: Related to global interpolant. */
          FieldContainer<double> value_of_basis_at_interp_points(numFields, numInterpPoints);
          FieldContainer<double> transformed_value_of_basis_at_interp_points(1, numFields, numInterpPoints);
          FieldContainer<double> interpolant(1, numInterpPoints);

          FieldContainer<int> ipiv(numFields);



          /******************* START COMPUTATION ***********************/

          // get cubature points and weights
          cellCub->getCubature(cub_points_cell, cub_weights_cell);

          // compute geometric cell information
          CellTools<double>::setJacobian(jacobian_cell, cub_points_cell, cell_nodes[pcell], cell);
          CellTools<double>::setJacobianInv(jacobian_inv_cell, jacobian_cell);
          CellTools<double>::setJacobianDet(jacobian_det_cell, jacobian_cell);

          // compute weighted measure
          FunctionSpaceTools::computeCellMeasure<double>(weighted_measure_cell, jacobian_det_cell, cub_weights_cell);

          ///////////////////////////
          // Computing mass matrices:
          // tabulate values of basis functions at (reference) cubature points
          basis->getValues(value_of_basis_at_cub_points_cell, cub_points_cell, OPERATOR_VALUE);

          // transform values of basis functions
          FunctionSpaceTools::HGRADtransformVALUE<double>(transformed_value_of_basis_at_cub_points_cell,
                                                          value_of_basis_at_cub_points_cell);

          // multiply with weighted measure
          FunctionSpaceTools::multiplyMeasure<double>(weighted_transformed_value_of_basis_at_cub_points_cell,
                                                      weighted_measure_cell,
                                                      transformed_value_of_basis_at_cub_points_cell);

          // compute mass matrices
          FunctionSpaceTools::integrate<double>(fe_matrix,
                                                transformed_value_of_basis_at_cub_points_cell,
                                                weighted_transformed_value_of_basis_at_cub_points_cell,
                                                COMP_BLAS);
          ///////////////////////////

          ////////////////////////////////
          // Computing stiffness matrices:
          // tabulate gradients of basis functions at (reference) cubature points
          basis->getValues(grad_of_basis_at_cub_points_cell, cub_points_cell, OPERATOR_GRAD);

          // transform gradients of basis functions
          FunctionSpaceTools::HGRADtransformGRAD<double>(transformed_grad_of_basis_at_cub_points_cell,
                                                         jacobian_inv_cell,
                                                         grad_of_basis_at_cub_points_cell);

          // multiply with weighted measure
          FunctionSpaceTools::multiplyMeasure<double>(weighted_transformed_grad_of_basis_at_cub_points_cell,
                                                      weighted_measure_cell,
                                                      transformed_grad_of_basis_at_cub_points_cell);

          // compute stiffness matrices and sum into fe_matrix
          FunctionSpaceTools::integrate<double>(fe_matrix,
                                                transformed_grad_of_basis_at_cub_points_cell,
                                                weighted_transformed_grad_of_basis_at_cub_points_cell,
                                                COMP_BLAS,
                                                true);
          ////////////////////////////////

          ///////////////////////////////
          // Computing RHS contributions:
          // map cell (reference) cubature points to physical space
          CellTools<double>::mapToPhysicalFrame(cub_points_cell_physical, cub_points_cell, cell_nodes[pcell], cell);

          // evaluate rhs function
          rhsFunc(rhs_at_cub_points_cell_physical, cub_points_cell_physical, x_order, y_order);

          // compute rhs
          FunctionSpaceTools::integrate<double>(rhs_and_soln_vector,
                                                rhs_at_cub_points_cell_physical,
                                                weighted_transformed_value_of_basis_at_cub_points_cell,
                                                COMP_BLAS);

          // compute neumann b.c. contributions and adjust rhs
          sideCub->getCubature(cub_points_side, cub_weights_side);
          for (unsigned i=0; i<numSides; i++) {
            // compute geometric cell information
            CellTools<double>::mapToReferenceSubcell(cub_points_side_refcell, cub_points_side, sideDim, (int)i, cell);
            CellTools<double>::setJacobian(jacobian_side_refcell, cub_points_side_refcell, cell_nodes[pcell], cell);
            CellTools<double>::setJacobianDet(jacobian_det_side_refcell, jacobian_side_refcell);

            // compute weighted edge measure
            FunctionSpaceTools::computeEdgeMeasure<double>(weighted_measure_side_refcell,
                                                           jacobian_side_refcell,
                                                           cub_weights_side,
                                                           i,
                                                           cell);

            // tabulate values of basis functions at side cubature points, in the reference parent cell domain
            basis->getValues(value_of_basis_at_cub_points_side_refcell, cub_points_side_refcell, OPERATOR_VALUE);
            // transform
            FunctionSpaceTools::HGRADtransformVALUE<double>(transformed_value_of_basis_at_cub_points_side_refcell,
                                                            value_of_basis_at_cub_points_side_refcell);

            // multiply with weighted measure
            FunctionSpaceTools::multiplyMeasure<double>(weighted_transformed_value_of_basis_at_cub_points_side_refcell,
                                                        weighted_measure_side_refcell,
                                                        transformed_value_of_basis_at_cub_points_side_refcell);

            // compute Neumann data
            // map side cubature points in reference parent cell domain to physical space
            CellTools<double>::mapToPhysicalFrame(cub_points_side_physical, cub_points_side_refcell, cell_nodes[pcell], cell);
            // now compute data
            neumann(neumann_data_at_cub_points_side_physical, cub_points_side_physical, jacobian_side_refcell,
                    cell, (int)i, x_order, y_order);

            FunctionSpaceTools::integrate<double>(neumann_fields_per_side,
                                                  neumann_data_at_cub_points_side_physical,
                                                  weighted_transformed_value_of_basis_at_cub_points_side_refcell,
                                                  COMP_BLAS);

            // adjust RHS
            RealSpaceTools<double>::add(rhs_and_soln_vector, neumann_fields_per_side);;
          }
          ///////////////////////////////

          /////////////////////////////
          // Solution of linear system:
          int info = 0;
          Teuchos::LAPACK<int, double> solver;
          solver.GESV(numFields, 1, &fe_matrix[0], numFields, &ipiv(0), &rhs_and_soln_vector[0], numFields, &info);
          /////////////////////////////

          ////////////////////////
          // Building interpolant:
          // evaluate basis at interpolation points
          basis->getValues(value_of_basis_at_interp_points, interp_points_ref, OPERATOR_VALUE);
          // transform values of basis functions
          FunctionSpaceTools::HGRADtransformVALUE<double>(transformed_value_of_basis_at_interp_points,
                                                          value_of_basis_at_interp_points);
          FunctionSpaceTools::evaluate<double>(interpolant, rhs_and_soln_vector, transformed_value_of_basis_at_interp_points);
          ////////////////////////

          /******************* END COMPUTATION ***********************/
      
          RealSpaceTools<double>::subtract(interpolant, exact_solution);

          *outStream << "\nRelative norm-2 error between exact solution polynomial of order ("
                     << x_order << ", " << y_order << ") and finite element interpolant of order " << basis_order << ": "
                     << RealSpaceTools<double>::vectorNorm(&interpolant[0], interpolant.dimension(1), NORM_TWO) /
                        RealSpaceTools<double>::vectorNorm(&exact_solution[0], exact_solution.dimension(1), NORM_TWO) << "\n";

          if (RealSpaceTools<double>::vectorNorm(&interpolant[0], interpolant.dimension(1), NORM_TWO) /
              RealSpaceTools<double>::vectorNorm(&exact_solution[0], exact_solution.dimension(1), NORM_TWO) > zero) {
            *outStream << "\n\nPatch test failed for solution polynomial order ("
                       << x_order << ", " << y_order << ") and basis order " << basis_order << "\n\n";
            errorFlag++;
          }
        } // end for y_order
      } // end for x_order
    } // end for pcell

  }
  // Catch unexpected errors
  catch (std::logic_error err) {
    *outStream << err.what() << "\n\n";
    errorFlag = -1000;
  };

  if (errorFlag != 0)
    std::cout << "End Result: TEST FAILED\n";
  else
    std::cout << "End Result: TEST PASSED\n";

  // reset format state of std::cout
  std::cout.copyfmt(oldFormatState);

  return errorFlag;
}
Esempio n. 18
0
  void RCGIter<ScalarType,MV,OP>::iterate()
  {
    TEUCHOS_TEST_FOR_EXCEPTION( initialized_ == false, RCGIterFailure,
                        "Belos::RCGIter::iterate(): RCGIter class not initialized." );
    
    // We'll need LAPACK
    Teuchos::LAPACK<int,ScalarType> lapack;

    // Create convenience variables for zero and one.
    ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
    ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero();

    // Allocate memory for scalars
    std::vector<int> index(1);
    Teuchos::SerialDenseMatrix<int,ScalarType> pAp(1,1), rTz(1,1);

    // Get the current solution std::vector.
    Teuchos::RCP<MV> cur_soln_vec = lp_->getCurrLHSVec();
 
    // Check that the current solution std::vector only has one column.
    TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*cur_soln_vec) != 1, RCGIterFailure,
                        "Belos::RCGIter::iterate(): current linear system has more than one std::vector!" );
    
    // Compute the current search dimension. 
    int searchDim = numBlocks_+1;

    // index of iteration within current cycle
    int i_ = 0;

    ////////////////////////////////////////////////////////////////
    // iterate until the status test tells us to stop.
    //
    // also break if our basis is full
    //
    Teuchos::RCP<const MV> p_ = Teuchos::null;
    Teuchos::RCP<MV> pnext_ = Teuchos::null;
    while (stest_->checkStatus(this) != Passed && curDim_+1 <= searchDim) {

      // Ap = A*p;
      index.resize( 1 );
      index[0] = i_;
      p_  = MVT::CloneView( *P_,  index );
      lp_->applyOp( *p_, *Ap_ );

      // d = p'*Ap;
      MVT::MvTransMv( one, *p_, *Ap_, pAp );
      (*D_)(i_,0) = pAp(0,0);

      // alpha = rTz_old / pAp
      (*Alpha_)(i_,0) = (*rTz_old_)(0,0) / pAp(0,0);

      // Check that alpha is a positive number
      TEUCHOS_TEST_FOR_EXCEPTION( SCT::real(pAp(0,0)) <= zero, RCGIterFailure, "Belos::RCGIter::iterate(): non-positive value for p^H*A*p encountered!" );

      // x = x + (alpha * p);
      MVT::MvAddMv( one, *cur_soln_vec, (*Alpha_)(i_,0), *p_, *cur_soln_vec );
      lp_->updateSolution();

      // r = r - (alpha * Ap);
      MVT::MvAddMv( one, *r_, -(*Alpha_)(i_,0), *Ap_, *r_ );

      std::vector<MagnitudeType> norm(1);
      MVT::MvNorm( *r_, norm );
//printf("i = %i\tnorm(r) = %e\n",i_,norm[0]);

      // z = M\r
      if ( lp_->getLeftPrec() != Teuchos::null ) {
        lp_->applyLeftPrec( *r_, *z_ );
      } 
      else if ( lp_->getRightPrec() != Teuchos::null ) {
        lp_->applyRightPrec( *r_, *z_ );
      }
      else {
        z_ = r_;
      }

      // rTz_new = r'*z;
      MVT::MvTransMv( one, *r_, *z_, rTz );

      // beta = rTz_new/rTz_old;
      (*Beta_)(i_,0) = rTz(0,0) / (*rTz_old_)(0,0);

      // rTz_old = rTz_new;
      (*rTz_old_)(0,0) = rTz(0,0);

      // get pointer for next p
      index.resize( 1 );
      index[0] = i_+1;
      pnext_ = MVT::CloneViewNonConst( *P_,  index );

      if (existU_) {
        // mu = UTAU \ (AU'*z);
        Teuchos::SerialDenseMatrix<int,ScalarType> mu( Teuchos::View, *Delta_, recycleBlocks_, 1, 0, i_ );
        MVT::MvTransMv( one, *AU_, *z_, mu );
        char TRANS = 'N';
        int info;
        lapack.GETRS( TRANS, recycleBlocks_, 1, LUUTAU_->values(), LUUTAU_->stride(), &(*ipiv_)[0], mu.values(), mu.stride(), &info );
        TEUCHOS_TEST_FOR_EXCEPTION(info != 0, RCGIterLAPACKFailure,
                           "Belos::RCGIter::solve(): LAPACK GETRS failed to compute a solution.");
        // p = -(U*mu) + (beta*p) + z (in two steps)
        // p = (beta*p) + z;
        MVT::MvAddMv( (*Beta_)(i_,0), *p_, one, *z_, *pnext_ );
        // pnext = -(U*mu) + (one)*pnext;
        MVT::MvTimesMatAddMv( -one, *U_, mu, one, *pnext_ );
      }
      else {
        // p = (beta*p) + z;
        MVT::MvAddMv( (*Beta_)(i_,0), *p_, one, *z_, *pnext_ );
      }

      // Done with this view; release pointer
      p_ = Teuchos::null;
      pnext_ = Teuchos::null;

      // increment iteration count and dimension index  
      i_++;
      iter_++;
      curDim_++;

    } // end while (statusTest == false)
    
   }
Esempio n. 19
0
// ***********************************************************
int DG_Prob::Eigenvectors(const double Dt,
                          const Epetra_Map & Map)
{
  printf("Entrou em Eigenvectors\n");
 
#ifdef HAVE_MPI
  Epetra_MpiComm Comm(MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif

  //MPI::COMM_WORLD.Barrier();
  Comm.Barrier();
  Teuchos::RCP<Epetra_FECrsMatrix> M = Teuchos::rcp(new Epetra_FECrsMatrix(Copy, Map,0));//&NNz[0]);
  Teuchos::RCP<Epetra_FEVector> RHS = Teuchos::rcp(new Epetra_FEVector(Map,1));
  
  DG_MatrizVetor_Epetra(Dt,M,RHS);

  Teuchos::RCP<Epetra_CrsMatrix> A = Teuchos::rcp(new Epetra_CrsMatrix(Copy, Map,0
                                                                       /* &NNz[0]*/) );
  Epetra_Export Exporter(Map,Map);
  A->PutScalar(0.0);
  A->Export(*(M.ptr()),Exporter,Add);
  A->FillComplete();

  using std::cout;
 // int nx = 5;
  bool boolret;
  int MyPID = Comm.MyPID();
  
  bool verbose = true;
  bool debug = false;
  std::string which("LR");
  
  Teuchos::CommandLineProcessor cmdp(false,true);
  cmdp.setOption("verbose","quiet",&verbose,"Print messages and results.");
  cmdp.setOption("debug","nodebug",&debug,"Print debugging information.");
  cmdp.setOption("sort",&which,"Targetted eigenvalues (SM,LM,SR,LR,SI,or LI).");
  
  typedef double ScalarType;
  typedef Teuchos::ScalarTraits<ScalarType>          SCT;
  typedef SCT::magnitudeType               MagnitudeType;
  typedef Epetra_MultiVector                          MV;
  typedef Epetra_Operator                             OP;
  typedef Anasazi::MultiVecTraits<ScalarType,MV>     MVT;
  typedef Anasazi::OperatorTraits<ScalarType,MV,OP>  OPT;
  
 
 // double rho = 2*nx+1;
  
  // Compute coefficients for discrete convection-diffution operator
 // const double one = 1.0;
  
 // int NumEntries, info;
  
  //************************************
  // Start the block Arnoldi iteration
  //***********************************
  //
  //  Variables used for the Block Krylov Schur Method
  //    
  int nev = 10;
  int blockSize = 1;
  int numBlocks = 20;
  int maxRestarts = 500;
  //int stepSize = 5;
  double tol = 1e-8;
  
  // Create a sort manager to pass into the block Krylov-Schur solver manager
  // -->  Make sure the reference-counted pointer is of type Anasazi::SortManager<>
  // -->  The block Krylov-Schur solver manager uses Anasazi::BasicSort<> by default,
  //      so you can also pass in the parameter "Which", instead of a sort manager.
  Teuchos::RCP<Anasazi::SortManager<MagnitudeType> > MySort =     
    Teuchos::rcp( new Anasazi::BasicSort<MagnitudeType>( which ) );
  
  // Set verbosity level
  int verbosity = Anasazi::Errors + Anasazi::Warnings;
  if (verbose) {
    verbosity += Anasazi::FinalSummary + Anasazi::TimingDetails;
  }
  if (debug) {
    verbosity += Anasazi::Debug;
  }
  //
  // Create parameter list to pass into solver manager
  //
  Teuchos::ParameterList MyPL;
  MyPL.set( "Verbosity", verbosity );
  MyPL.set( "Sort Manager", MySort );
  //MyPL.set( "Which", which );  
  MyPL.set( "Block Size", blockSize );
  MyPL.set( "Num Blocks", numBlocks );
  MyPL.set( "Maximum Restarts", maxRestarts );
  //MyPL.set( "Step Size", stepSize );
  MyPL.set( "Convergence Tolerance", tol );
  
  // Create an Epetra_MultiVector for an initial vector to start the solver.
  // Note:  This needs to have the same number of columns as the blocksize.
  Teuchos::RCP<Epetra_MultiVector> ivec = Teuchos::rcp( new Epetra_MultiVector(Map, blockSize) );
  ivec->Random();
  
  // Create the eigenproblem.
  Teuchos::RCP<Anasazi::BasicEigenproblem<double, MV, OP> > MyProblem =
    Teuchos::rcp( new Anasazi::BasicEigenproblem<double, MV, OP>(A, ivec) );
  
  // Inform the eigenproblem that the operator A is symmetric
  //MyProblem->setHermitian(rho==0.0); 
  
  // Set the number of eigenvalues requested
  MyProblem->setNEV( nev );
  
  // Inform the eigenproblem that you are finishing passing it information
  boolret = MyProblem->setProblem();
  if (boolret != true) {
    if (verbose && MyPID == 0) {
      cout << "Anasazi::BasicEigenproblem::setProblem() returned with error." << endl;
    }
#ifdef HAVE_MPI
    MPI_Finalize() ;
#endif
    return -1;
  }
  
  // Initialize the Block Arnoldi solver
  Anasazi::BlockKrylovSchurSolMgr<double, MV, OP> MySolverMgr(MyProblem, MyPL);
  
  // Solve the problem to the specified tolerances or length
  Anasazi::ReturnType returnCode = MySolverMgr.solve();
  if (returnCode != Anasazi::Converged && MyPID==0 && verbose) {
    cout << "Anasazi::EigensolverMgr::solve() returned unconverged." << endl;
  }
  
  // Get the Ritz values from the eigensolver
  std::vector<Anasazi::Value<double> > ritzValues = MySolverMgr.getRitzValues();
  
  // Output computed eigenvalues and their direct residuals
  if (verbose && MyPID==0) {
    int numritz = (int)ritzValues.size();
    cout.setf(std::ios_base::right, std::ios_base::adjustfield);
    cout<<endl<< "Computed Ritz Values"<< endl;
    if (MyProblem->isHermitian()) {
      cout<< std::setw(16) << "Real Part"
	  << endl;
      cout<<"-----------------------------------------------------------"<<endl;
      for (int i=0; i<numritz; i++) {
        cout<< std::setw(16) << ritzValues[i].realpart 
	    << endl;
      }  
      cout<<"-----------------------------------------------------------"<<endl;
    } 
    else {
      cout<< std::setw(16) << "Real Part"
	  << std::setw(16) << "Imag Part"
	  << endl;
      cout<<"-----------------------------------------------------------"<<endl;
      for (int i=0; i<numritz; i++) {
        cout<< std::setw(16) << ritzValues[i].realpart 
	    << std::setw(16) << ritzValues[i].imagpart 
	    << endl;
      }  
      cout<<"-----------------------------------------------------------"<<endl;
    }  
  }
  
  // Get the eigenvalues and eigenvectors from the eigenproblem
  Anasazi::Eigensolution<ScalarType,MV> sol = MyProblem->getSolution();
  std::vector<Anasazi::Value<ScalarType> > evals = sol.Evals;
  Teuchos::RCP<MV> evecs = sol.Evecs;
  std::vector<int> index = sol.index;
  int numev = sol.numVecs;
  
  if (numev > 0) {
    // Compute residuals.
    Teuchos::LAPACK<int,double> lapack;
    std::vector<double> normA(numev);
    
    if (MyProblem->isHermitian()) {
      // Get storage
      Epetra_MultiVector Aevecs(Map,numev);
      Teuchos::SerialDenseMatrix<int,double> B(numev,numev);
      B.putScalar(0.0); 
      for (int i=0; i<numev; i++) {B(i,i) = evals[i].realpart;}
      
      // Compute A*evecs
      OPT::Apply( *A, *evecs, Aevecs );
      
      // Compute A*evecs - lambda*evecs and its norm
      MVT::MvTimesMatAddMv( -1.0, *evecs, B, 1.0, Aevecs );
      MVT::MvNorm( Aevecs, normA );
      
      // Scale the norms by the eigenvalue
      for (int i=0; i<numev; i++) {
        normA[i] /= Teuchos::ScalarTraits<double>::magnitude( evals[i].realpart );
      }
    } else {
      // The problem is non-Hermitian.
      int i=0;
      std::vector<int> curind(1);
      std::vector<double> resnorm(1), tempnrm(1);
      Teuchos::RCP<MV> tempAevec;
      Teuchos::RCP<const MV> evecr, eveci;
      Epetra_MultiVector Aevec(Map,numev);
      
      // Compute A*evecs
      OPT::Apply( *A, *evecs, Aevec );
      
      Teuchos::SerialDenseMatrix<int,double> Breal(1,1), Bimag(1,1);
      while (i<numev) {
        if (index[i]==0) {
          // Get a view of the current eigenvector (evecr)
          curind[0] = i;
          evecr = MVT::CloneView( *evecs, curind );
	  
          // Get a copy of A*evecr
          tempAevec = MVT::CloneCopy( Aevec, curind );
	  
          // Compute A*evecr - lambda*evecr
          Breal(0,0) = evals[i].realpart;
          MVT::MvTimesMatAddMv( -1.0, *evecr, Breal, 1.0, *tempAevec );
	  
          // Compute the norm of the residual and increment counter
          MVT::MvNorm( *tempAevec, resnorm );
          normA[i] = resnorm[0]/Teuchos::ScalarTraits<MagnitudeType>::magnitude( evals[i].realpart );
          i++;
        } else {
          // Get a view of the real part of the eigenvector (evecr)
          curind[0] = i;
          evecr = MVT::CloneView( *evecs, curind );
	  
          // Get a copy of A*evecr
          tempAevec = MVT::CloneCopy( Aevec, curind );
	  
          // Get a view of the imaginary part of the eigenvector (eveci)
          curind[0] = i+1;
          eveci = MVT::CloneView( *evecs, curind );
	  
          // Set the eigenvalue into Breal and Bimag
          Breal(0,0) = evals[i].realpart;
          Bimag(0,0) = evals[i].imagpart;
	  
          // Compute A*evecr - evecr*lambdar + eveci*lambdai
          MVT::MvTimesMatAddMv( -1.0, *evecr, Breal, 1.0, *tempAevec );
          MVT::MvTimesMatAddMv( 1.0, *eveci, Bimag, 1.0, *tempAevec );
          MVT::MvNorm( *tempAevec, tempnrm );
	  
          // Get a copy of A*eveci
          tempAevec = MVT::CloneCopy( Aevec, curind );
	  
          // Compute A*eveci - eveci*lambdar - evecr*lambdai
          MVT::MvTimesMatAddMv( -1.0, *evecr, Bimag, 1.0, *tempAevec );
          MVT::MvTimesMatAddMv( -1.0, *eveci, Breal, 1.0, *tempAevec );
          MVT::MvNorm( *tempAevec, resnorm );
	  
          // Compute the norms and scale by magnitude of eigenvalue
          normA[i] = lapack.LAPY2( tempnrm[i], resnorm[i] ) /
            lapack.LAPY2( evals[i].realpart, evals[i].imagpart );
          normA[i+1] = normA[i];
	  
          i=i+2;
        }
      }
    }
    
    // Output computed eigenvalues and their direct residuals
    if (verbose && MyPID==0) {
      cout.setf(std::ios_base::right, std::ios_base::adjustfield);
      cout<<endl<< "Actual Residuals"<<endl;
      if (MyProblem->isHermitian()) {
        cout<< std::setw(16) << "Real Part"
	    << std::setw(20) << "Direct Residual"<< endl;
        cout<<"-----------------------------------------------------------"<<endl;
        for (int i=0; i<numev; i++) {
          cout<< std::setw(16) << evals[i].realpart 
	      << std::setw(20) << normA[i] << endl;
        }  
        cout<<"-----------------------------------------------------------"<<endl;
      } 
      else {
        cout<< std::setw(16) << "Real Part"
	    << std::setw(16) << "Imag Part"
	    << std::setw(20) << "Direct Residual"<< endl;
        cout<<"-----------------------------------------------------------"<<endl;
        for (int i=0; i<numev; i++) {
          cout<< std::setw(16) << evals[i].realpart 
	      << std::setw(16) << evals[i].imagpart 
	      << std::setw(20) << normA[i] << endl;
        }  
        cout<<"-----------------------------------------------------------"<<endl;
      }  
    }
  }
  
#ifdef EPETRA_MPI
  MPI_Finalize();
#endif  
  return 0;
}
int main(int argc, char *argv[]) {
  int i, j, info;
  const double one = 1.0;
  const double zero = 0.0;
  Teuchos::LAPACK<int,double> lapack;

#ifdef EPETRA_MPI
  // Initialize MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm Comm(MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif

  int MyPID = Comm.MyPID();

  //  Dimension of the matrix
  int m = 500;
  int n = 100;

  // Construct a Map that puts approximately the same number of
  // equations on each processor.

  Epetra_Map RowMap(m, 0, Comm);
  Epetra_Map ColMap(n, 0, Comm);

  // Get update list and number of local equations from newly created Map.

  int NumMyRowElements = RowMap.NumMyElements();
  
  std::vector<int> MyGlobalRowElements(NumMyRowElements);
  RowMap.MyGlobalElements(&MyGlobalRowElements[0]);

  /* We are building an m by n matrix with entries
    
              A(i,j) = k*(si)*(tj - 1) if i <= j
               = k*(tj)*(si - 1) if i  > j
  
     where si = i/(m+1) and tj = j/(n+1) and k = 1/(n+1).
  */

  // Create an Epetra_Matrix
  Teuchos::RCP<Epetra_CrsMatrix> A = Teuchos::rcp( new Epetra_CrsMatrix(Copy, RowMap, n) );

  // Compute coefficients for discrete integral operator
  std::vector<double> Values(n);
  std::vector<int> Indices(n);
  double inv_mp1 = one/(m+1);
  double inv_np1 = one/(n+1);
  for (i=0; i<n; i++) { Indices[i] = i; }
  
  for (i=0; i<NumMyRowElements; i++) {
    //
    for (j=0; j<n; j++) {
      //
      if ( MyGlobalRowElements[i] <= j ) {
        Values[j] = inv_np1 * ( (MyGlobalRowElements[i]+one)*inv_mp1 ) * ( (j+one)*inv_np1 - one );  // k*(si)*(tj-1)
      }
      else {
        Values[j] = inv_np1 * ( (j+one)*inv_np1 ) * ( (MyGlobalRowElements[i]+one)*inv_mp1 - one );  // k*(tj)*(si-1)
      }
    }
    info = A->InsertGlobalValues(MyGlobalRowElements[i], n, &Values[0], &Indices[0]);
    assert( info==0 );
  }

  // Finish up
  info = A->FillComplete(ColMap, RowMap);
  assert( info==0 );
  info = A->OptimizeStorage();
  assert( info==0 );
  A->SetTracebackMode(1); // Shutdown Epetra Warning tracebacks

  //************************************
  // Start the block Arnoldi iteration
  //***********************************
  //
  //  Variables used for the Block Arnoldi Method
  //
  int nev = 4;
  int blockSize = 1;
  int numBlocks = 10;
  int maxRestarts = 20;
  int verbosity = Anasazi::Errors + Anasazi::Warnings + Anasazi::FinalSummary;
  double tol = lapack.LAMCH('E');
  std::string which = "LM";
  //
  // Create parameter list to pass into solver
  //
  Teuchos::ParameterList MyPL;
  MyPL.set( "Verbosity", verbosity );
  MyPL.set( "Which", which );
  MyPL.set( "Block Size", blockSize );
  MyPL.set( "Num Blocks", numBlocks );
  MyPL.set( "Maximum Restarts", maxRestarts );
  MyPL.set( "Convergence Tolerance", tol );

  typedef Anasazi::MultiVec<double> MV;
  typedef Anasazi::Operator<double> OP;

  // Create an Anasazi::EpetraMultiVec for an initial vector to start the solver. 
  // Note:  This needs to have the same number of columns as the blocksize.
  Teuchos::RCP<Anasazi::EpetraMultiVec> ivec = Teuchos::rcp( new Anasazi::EpetraMultiVec(ColMap, blockSize) );
  ivec->MvRandom();

  // Call the constructor for the (A^T*A) operator
  Teuchos::RCP<Anasazi::EpetraSymOp>  Amat = Teuchos::rcp( new Anasazi::EpetraSymOp(A) );  
  Teuchos::RCP<Anasazi::BasicEigenproblem<double, MV, OP> > MyProblem =
    Teuchos::rcp( new Anasazi::BasicEigenproblem<double, MV, OP>(Amat, ivec) );

  // Inform the eigenproblem that the matrix A is symmetric
  MyProblem->setHermitian(true);

  // Set the number of eigenvalues requested and the blocksize the solver should use
  MyProblem->setNEV( nev );

  // Inform the eigenproblem that you are finished passing it information
  bool boolret = MyProblem->setProblem();
  if (boolret != true) {
    if (MyPID == 0) {
      cout << "Anasazi::BasicEigenproblem::setProblem() returned with error." << endl;
    }
#ifdef HAVE_MPI
    MPI_Finalize() ;
#endif
    return -1;
  }

  // Initialize the Block Arnoldi solver
  Anasazi::BlockKrylovSchurSolMgr<double, MV, OP> MySolverMgr(MyProblem, MyPL);
  
  // Solve the problem to the specified tolerances or length
  Anasazi::ReturnType returnCode = MySolverMgr.solve();
  if (returnCode != Anasazi::Converged && MyPID==0) {
    cout << "Anasazi::EigensolverMgr::solve() returned unconverged." << endl;
  }

  // Get the eigenvalues and eigenvectors from the eigenproblem
  Anasazi::Eigensolution<double,MV> sol = MyProblem->getSolution();
  std::vector<Anasazi::Value<double> > evals = sol.Evals;
  int numev = sol.numVecs;

  if (numev > 0) {
    
    // Compute singular values/vectors and direct residuals.
    //
    // Compute singular values which are the square root of the eigenvalues
    if (MyPID==0) {
      cout<<"------------------------------------------------------"<<endl;
      cout<<"Computed Singular Values: "<<endl;
      cout<<"------------------------------------------------------"<<endl;
    }
    for (i=0; i<numev; i++) { evals[i].realpart = Teuchos::ScalarTraits<double>::squareroot( evals[i].realpart ); }
    //
    // Compute left singular vectors :  u = Av/sigma
    //
    std::vector<double> tempnrm(numev), directnrm(numev);
    std::vector<int> index(numev);
    for (i=0; i<numev; i++) { index[i] = i; }
    Anasazi::EpetraMultiVec Av(RowMap,numev), u(RowMap,numev);
    Anasazi::EpetraMultiVec* evecs = dynamic_cast<Anasazi::EpetraMultiVec* >(sol.Evecs->CloneViewNonConst( index ));
    Teuchos::SerialDenseMatrix<int,double> S(numev,numev);
    A->Apply( *evecs, Av );
    Av.MvNorm( tempnrm );
    for (i=0; i<numev; i++) { S(i,i) = one/tempnrm[i]; };
    u.MvTimesMatAddMv( one, Av, S, zero );
    //
    // Compute direct residuals : || Av - sigma*u ||
    //
    for (i=0; i<numev; i++) { S(i,i) = evals[i].realpart; }
    Av.MvTimesMatAddMv( -one, u, S, one );
    Av.MvNorm( directnrm );
    if (MyPID==0) {
      cout.setf(std::ios_base::right, std::ios_base::adjustfield);
      cout<<std::setw(16)<<"Singular Value"
        <<std::setw(20)<<"Direct Residual"
        <<endl;
      cout<<"------------------------------------------------------"<<endl;
      for (i=0; i<numev; i++) {
        cout<<std::setw(16)<<evals[i].realpart
          <<std::setw(20)<<directnrm[i] 
          <<endl;
      }  
      cout<<"------------------------------------------------------"<<endl;
    }
  }
  
#ifdef EPETRA_MPI
    MPI_Finalize() ;
#endif
  
  return 0;
}
void DenseContainer<MatrixType, LocalScalarType>::
applyImpl (const local_mv_type& X,
           local_mv_type& Y,
           Teuchos::ETransp mode,
           LocalScalarType alpha,
           LocalScalarType beta) const
{
    using Teuchos::ArrayRCP;
    using Teuchos::RCP;
    using Teuchos::rcp;
    using Teuchos::rcpFromRef;

    TEUCHOS_TEST_FOR_EXCEPTION(
        X.getLocalLength () != Y.getLocalLength (),
        std::logic_error, "Ifpack2::DenseContainer::applyImpl: X and Y have "
        "incompatible dimensions (" << X.getLocalLength () << " resp. "
        << Y.getLocalLength () << ").  Please report this bug to "
        "the Ifpack2 developers.");
    TEUCHOS_TEST_FOR_EXCEPTION(
        localMap_->getNodeNumElements () != X.getLocalLength (),
        std::logic_error, "Ifpack2::DenseContainer::applyImpl: The inverse "
        "operator and X have incompatible dimensions (" <<
        localMap_->getNodeNumElements () << " resp. "
        << X.getLocalLength () << ").  Please report this bug to "
        "the Ifpack2 developers.");
    TEUCHOS_TEST_FOR_EXCEPTION(
        localMap_->getNodeNumElements () != Y.getLocalLength (),
        std::logic_error, "Ifpack2::DenseContainer::applyImpl: The inverse "
        "operator and Y have incompatible dimensions (" <<
        localMap_->getNodeNumElements () << " resp. "
        << Y.getLocalLength () << ").  Please report this bug to "
        "the Ifpack2 developers.");
    TEUCHOS_TEST_FOR_EXCEPTION(
        X.getLocalLength () != static_cast<size_t> (diagBlock_.numRows ()),
        std::logic_error, "Ifpack2::DenseContainer::applyImpl: The input "
        "multivector X has incompatible dimensions from those of the "
        "inverse operator (" << X.getLocalLength () << " vs. "
        << (mode == Teuchos::NO_TRANS ? diagBlock_.numCols () : diagBlock_.numRows ())
        << ").  Please report this bug to the Ifpack2 developers.");
    TEUCHOS_TEST_FOR_EXCEPTION(
        X.getLocalLength () != static_cast<size_t> (diagBlock_.numRows ()),
        std::logic_error, "Ifpack2::DenseContainer::applyImpl: The output "
        "multivector Y has incompatible dimensions from those of the "
        "inverse operator (" << Y.getLocalLength () << " vs. "
        << (mode == Teuchos::NO_TRANS ? diagBlock_.numRows () : diagBlock_.numCols ())
        << ").  Please report this bug to the Ifpack2 developers.");

    typedef Teuchos::ScalarTraits<local_scalar_type> STS;
    const int numVecs = static_cast<int> (X.getNumVectors ());
    if (alpha == STS::zero ()) { // don't need to solve the linear system
        if (beta == STS::zero ()) {
            // Use BLAS AXPY semantics for beta == 0: overwrite, clobbering
            // any Inf or NaN values in Y (rather than multiplying them by
            // zero, resulting in NaN values).
            Y.putScalar (STS::zero ());
        }
        else { // beta != 0
            Y.scale (beta);
        }
    }
    else { // alpha != 0; must solve the linear system
        Teuchos::LAPACK<int, local_scalar_type> lapack;
        // If beta is nonzero or Y is not constant stride, we have to use
        // a temporary output multivector.  It gets a (deep) copy of X,
        // since GETRS overwrites its (multi)vector input with its output.
        RCP<local_mv_type> Y_tmp;
        if (beta == STS::zero () ) {
            Tpetra::deep_copy (Y, X);
            Y_tmp = rcpFromRef (Y);
        }
        else {
            Y_tmp = rcp (new local_mv_type (X, Teuchos::Copy));
        }
        const int Y_stride = static_cast<int> (Y_tmp->getStride ());
        ArrayRCP<local_scalar_type> Y_view = Y_tmp->get1dViewNonConst ();
        local_scalar_type* const Y_ptr = Y_view.getRawPtr ();

        int INFO = 0;
        const char trans =
            (mode == Teuchos::CONJ_TRANS ? 'C' : (mode == Teuchos::TRANS ? 'T' : 'N'));
        lapack.GETRS (trans, diagBlock_.numRows (), numVecs,
                      diagBlock_.values (), diagBlock_.stride (),
                      ipiv_.getRawPtr (), Y_ptr, Y_stride, &INFO);
        TEUCHOS_TEST_FOR_EXCEPTION(
            INFO != 0, std::runtime_error, "Ifpack2::DenseContainer::applyImpl: "
            "LAPACK's _GETRS (solve using LU factorization with partial pivoting) "
            "failed with INFO = " << INFO << " != 0.");

        if (beta != STS::zero ()) {
            Y.update (alpha, *Y_tmp, beta);
        }
        else if (! Y.isConstantStride ()) {
            Tpetra::deep_copy (Y, *Y_tmp);
        }
    }
}
// Solves turning point equations via Phipps modified bordering
// The first m columns of input_x and input_null store the RHS while
// the last column stores df/dp, d(Jn)/dp respectively.  Note however
// input_param has only m columns (not m+1).  result_x, result_null,
// are result_param have the same dimensions as their input counterparts
NOX::Abstract::Group::ReturnType 
LOCA::TurningPoint::MooreSpence::PhippsBordering::solveContiguous(
		  Teuchos::ParameterList& params,
		  const NOX::Abstract::MultiVector& input_x,
		  const NOX::Abstract::MultiVector& input_null,
	          const NOX::Abstract::MultiVector::DenseMatrix& input_param,
		  NOX::Abstract::MultiVector& result_x,
		  NOX::Abstract::MultiVector& result_null,
	          NOX::Abstract::MultiVector::DenseMatrix& result_param) const
{
  std::string callingFunction = 
    "LOCA::TurningPoint::MooreSpence::PhippsBordering::solveContiguous()";
  NOX::Abstract::Group::ReturnType finalStatus = NOX::Abstract::Group::Ok;
  NOX::Abstract::Group::ReturnType status;

  int m = input_x.numVectors()-2;
  std::vector<int> index_input(m);
  std::vector<int> index_input_dp(m+1);
  std::vector<int> index_null(1);
  std::vector<int> index_dp(1);
  for (int i=0; i<m; i++) {
    index_input[i] = i;
    index_input_dp[i] = i;
  }
  index_input_dp[m] = m;
  index_dp[0] = m;
  index_null[0] = m+1;

  NOX::Abstract::MultiVector::DenseMatrix tmp_mat_1(1, m+1);
  NOX::Abstract::MultiVector::DenseMatrix tmp_mat_2(1, m+2);

  // Create view of first m+1 columns of input_x, result_x
  Teuchos::RCP<NOX::Abstract::MultiVector> input_x_view = 
      input_x.subView(index_input_dp);
  Teuchos::RCP<NOX::Abstract::MultiVector> result_x_view = 
      result_x.subView(index_input_dp);

  // verify underlying Jacobian is valid
  if (!group->isJacobian()) {
    status = group->computeJacobian();
    finalStatus = 
      globalData->locaErrorCheck->combineAndCheckReturnTypes(status, 
							     finalStatus,
							     callingFunction);
  }
  
  // Solve  |J   u||A B| = |F df/dp|
  //        |v^T 0||a b|   |0   0  |
  status = borderedSolver->applyInverse(params, input_x_view.get(), NULL, 
					*result_x_view, tmp_mat_1);
  finalStatus = 
    globalData->locaErrorCheck->combineAndCheckReturnTypes(status, finalStatus,
							   callingFunction);
  Teuchos::RCP<NOX::Abstract::MultiVector> A = 
    result_x.subView(index_input);
  Teuchos::RCP<NOX::Abstract::MultiVector> B = 
    result_x.subView(index_dp);
  double b = tmp_mat_1(0,m);

  // compute (Jv)_x[A B v]
  result_x[m+1] = *nullVector;
  Teuchos::RCP<NOX::Abstract::MultiVector> tmp = 
    result_x.clone(NOX::ShapeCopy);
  status = group->computeDJnDxaMulti(*nullVector, *JnVector, result_x,
				     *tmp);
  finalStatus = 
    globalData->locaErrorCheck->combineAndCheckReturnTypes(status, finalStatus,
							   callingFunction);

  // compute (Jv)_x[A B v] - [G d(Jn)/dp 0]
  tmp->update(-1.0, input_null, 1.0);

  // verify underlying Jacobian is valid
  if (!group->isJacobian()) {
    status = group->computeJacobian();
    finalStatus = 
      globalData->locaErrorCheck->combineAndCheckReturnTypes(status, 
							     finalStatus,
							     callingFunction);
  }

  // Solve  |J   u||C D E| = |(Jv)_x A - G  (Jv)_x B - d(Jv)/dp  (Jv)_x v|
  //        |v^T 0||c d e|   |         0             0               0   |
  status = borderedSolver->applyInverse(params, tmp.get(), NULL, result_null,
					tmp_mat_2);
  finalStatus = 
    globalData->locaErrorCheck->combineAndCheckReturnTypes(status, finalStatus,
							   callingFunction);
  Teuchos::RCP<NOX::Abstract::MultiVector> C = 
    result_null.subView(index_input);
  Teuchos::RCP<NOX::Abstract::MultiVector> D = 
    result_null.subView(index_dp);
  Teuchos::RCP<NOX::Abstract::MultiVector> E = 
    result_null.subView(index_null);
  double d = tmp_mat_2(0, m);
  double e = tmp_mat_2(0, m+1);

  // Fill coefficient arrays
  double M[9];
  M[0] = s;   M[1] =  e;  M[2] = -tpGroup->lTransNorm((*E)[0]);
  M[3] = 0.0; M[4] =  s;  M[5] =  tpGroup->lTransNorm(*nullVector);
  M[6] = b;   M[7] = -d;  M[8] =  tpGroup->lTransNorm((*D)[0]);

  // compute h + phi^T C
  tpGroup->lTransNorm(*C, result_param);
  result_param += input_param;

  double *R = new double[3*m];
  for (int i=0; i<m; i++) {
    R[3*i]   =  tmp_mat_1(0,i);
    R[3*i+1] = -tmp_mat_2(0,i);
    R[3*i+2] =  result_param(0,i);
  }

  // Solve M*P = R
  int three = 3;
  int piv[3];
  int info;
  Teuchos::LAPACK<int,double> L;
  L.GESV(three, m, M, three, piv, R, three, &info);
  if (info != 0) {
    globalData->locaErrorCheck->throwError(
				    callingFunction,
				    "Solve of 3x3 coefficient matrix failed!");
    return NOX::Abstract::Group::Failed;
  }

  NOX::Abstract::MultiVector::DenseMatrix alpha(1,m);
  NOX::Abstract::MultiVector::DenseMatrix beta(1,m);
  for (int i=0; i<m; i++) {
    alpha(0,i)        = R[3*i];
    beta(0,i)         = R[3*i+1];
    result_param(0,i) = R[3*i+2];
  }

  // compute A = A - B*z + v*alpha (remember A is a sub-view of result_x)
  A->update(Teuchos::NO_TRANS, -1.0, *B, result_param, 1.0);
  A->update(Teuchos::NO_TRANS, 1.0, *nullMultiVector, alpha, 1.0);

  // compute C = -C + d*z - E*alpha + v*beta 
  // (remember C is a sub-view of result_null)
  C->update(Teuchos::NO_TRANS, 1.0, *D, result_param, -1.0);
  C->update(Teuchos::NO_TRANS, -1.0, *E, alpha, 1.0);
  C->update(Teuchos::NO_TRANS, 1.0, *nullMultiVector, beta, 1.0);

  delete [] R;

  return finalStatus;
}
  Basis_HGRAD_LINE_Cn_FEM<SpT,OT,PT>::
  Basis_HGRAD_LINE_Cn_FEM( const ordinal_type order,
                           const EPointType   pointType ) {
    this->basisCardinality_  = order+1;
    this->basisDegree_       = order;
    this->basisCellTopology_ = shards::CellTopology(shards::getCellTopologyData<shards::Line<2> >() );
    this->basisType_         = BASIS_FEM_FIAT;
    this->basisCoordinates_  = COORDINATES_CARTESIAN;

    const ordinal_type card = this->basisCardinality_;
    
    // points are computed in the host and will be copied 
    Kokkos::DynRankView<typename scalarViewType::value_type,typename SpT::array_layout,Kokkos::HostSpace>
      dofCoords("Hgrad::Line::Cn::dofCoords", card, 1);


    switch (pointType) {
    case POINTTYPE_EQUISPACED:
    case POINTTYPE_WARPBLEND: {
      // lattice ordering 
      {
        const ordinal_type offset = 0;
        PointTools::getLattice( dofCoords,
                                this->basisCellTopology_, 
                                order, offset, 
                                pointType );
        
      }
      // topological order
      // { 
      //   // two vertices
      //   dofCoords(0,0) = -1.0;
      //   dofCoords(1,0) =  1.0;
        
      //   // internal points
      //   typedef Kokkos::pair<ordinal_type,ordinal_type> range_type;
      //   auto pts = Kokkos::subview(dofCoords, range_type(2, card), Kokkos::ALL());
        
      //   const auto offset = 1;
      //   PointTools::getLattice( pts,
      //                           this->basisCellTopology_, 
      //                           order, offset, 
      //                           pointType );
      // }
      break;
    }
    case POINTTYPE_GAUSS: {
      // internal points only
      PointTools::getGaussPoints( dofCoords, 
                                  order );
      break;
    }
    default: {
      INTREPID2_TEST_FOR_EXCEPTION( !isValidPointType(pointType),
                                    std::invalid_argument , 
                                    ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_Cn_FEM) invalid pointType." );
    }
    }

    this->dofCoords_ = Kokkos::create_mirror_view(typename SpT::memory_space(), dofCoords);
    Kokkos::deep_copy(this->dofCoords_, dofCoords);
    
    // form Vandermonde matrix; actually, this is the transpose of the VDM,
    // this matrix is used in LAPACK so it should be column major and left layout
    const ordinal_type lwork = card*card;
    Kokkos::DynRankView<typename scalarViewType::value_type,Kokkos::LayoutLeft,Kokkos::HostSpace>
      vmat("Hgrad::Line::Cn::vmat", card, card), 
      work("Hgrad::Line::Cn::work", lwork),
      ipiv("Hgrad::Line::Cn::ipiv", card);

    const double alpha = 0.0, beta = 0.0;
    Impl::Basis_HGRAD_LINE_Cn_FEM_JACOBI::
      getValues<Kokkos::HostSpace::execution_space,Parameters::MaxNumPtsPerBasisEval>
      (vmat, dofCoords, order, alpha, beta, OPERATOR_VALUE);

    ordinal_type info = 0;
    Teuchos::LAPACK<ordinal_type,typename scalarViewType::value_type> lapack;

    lapack.GETRF(card, card, 
                 vmat.data(), vmat.stride_1(),
                 (ordinal_type*)ipiv.data(),
                 &info);

    INTREPID2_TEST_FOR_EXCEPTION( info != 0,
                                  std::runtime_error , 
                                  ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_Cn_FEM) lapack.GETRF returns nonzero info." );

    lapack.GETRI(card, 
                 vmat.data(), vmat.stride_1(),
                 (ordinal_type*)ipiv.data(),
                 work.data(), lwork,
                 &info);

    INTREPID2_TEST_FOR_EXCEPTION( info != 0,
                                  std::runtime_error , 
                                  ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_Cn_FEM) lapack.GETRI returns nonzero info." );
    
    // create host mirror 
    Kokkos::DynRankView<typename scalarViewType::value_type,typename SpT::array_layout,Kokkos::HostSpace>
      vinv("Hgrad::Line::Cn::vinv", card, card);

    for (ordinal_type i=0;i<card;++i) 
      for (ordinal_type j=0;j<card;++j) 
        vinv(i,j) = vmat(j,i);

    this->vinv_ = Kokkos::create_mirror_view(typename SpT::memory_space(), vinv);
    Kokkos::deep_copy(this->vinv_ , vinv);

    // initialize tags
    {
      const bool is_vertex_included = (pointType != POINTTYPE_GAUSS);

      // Basis-dependent initializations
      const ordinal_type tagSize  = 4;        // size of DoF tag, i.e., number of fields in the tag
      const ordinal_type posScDim = 0;        // position in the tag, counting from 0, of the subcell dim 
      const ordinal_type posScOrd = 1;        // position in the tag, counting from 0, of the subcell ordinal
      const ordinal_type posDfOrd = 2;        // position in the tag, counting from 0, of DoF ordinal relative to the subcell
      

      ordinal_type tags[Parameters::MaxOrder+1][4];

      // now we check the points for association 
      if (is_vertex_included) {
        // lattice order
        {
          const auto v0 = 0;
          tags[v0][0] = 0; // vertex dof
          tags[v0][1] = 0; // vertex id
          tags[v0][2] = 0; // local dof id
          tags[v0][3] = 1; // total number of dofs in this vertex
          
          const ordinal_type iend = card - 2;
          for (ordinal_type i=0;i<iend;++i) {
            const auto e = i + 1;
            tags[e][0] = 1;    // edge dof
            tags[e][1] = 0;    // edge id
            tags[e][2] = i;    // local dof id
            tags[e][3] = iend; // total number of dofs in this edge
          }

          const auto v1 = card -1;
          tags[v1][0] = 0; // vertex dof
          tags[v1][1] = 1; // vertex id
          tags[v1][2] = 0; // local dof id
          tags[v1][3] = 1; // total number of dofs in this vertex
        }

        // topological order
        // {
        //   tags[0][0] = 0; // vertex dof
        //   tags[0][1] = 0; // vertex id
        //   tags[0][2] = 0; // local dof id
        //   tags[0][3] = 1; // total number of dofs in this vertex
          
        //   tags[1][0] = 0; // vertex dof
        //   tags[1][1] = 1; // vertex id
        //   tags[1][2] = 0; // local dof id
        //   tags[1][3] = 1; // total number of dofs in this vertex
          
        //   const ordinal_type iend = card - 2;
        //   for (ordinal_type i=0;i<iend;++i) {
        //     const auto ii = i + 2;
        //     tags[ii][0] = 1;    // edge dof
        //     tags[ii][1] = 0;    // edge id
        //     tags[ii][2] = i;    // local dof id
        //     tags[ii][3] = iend; // total number of dofs in this edge
        //   }
        // }
      } else {
        for (ordinal_type i=0;i<card;++i) {
          tags[i][0] = 1;    // edge dof
          tags[i][1] = 0;    // edge id
          tags[i][2] = i;    // local dof id
          tags[i][3] = card; // total number of dofs in this edge
        }
      }

      ordinal_type_array_1d_host tagView(&tags[0][0], card*4);

      // Basis-independent function sets tag and enum data in tagToOrdinal_ and ordinalToTag_ arrays:
      // tags are constructed on host
      this->setOrdinalTagData(this->tagToOrdinal_,
                              this->ordinalToTag_,
                              tagView,
                              this->basisCardinality_,
                              tagSize,
                              posScDim,
                              posScOrd,
                              posDfOrd);
    }  
  }
// Solves turning point equations via classic Salinger bordering
// The first m columns of input_x and input_null store the RHS while
// the last column stores df/dp, d(Jn)/dp respectively.  Note however
// input_param has only m columns (not m+1).  result_x, result_null,
// are result_param have the same dimensions as their input counterparts
NOX::Abstract::Group::ReturnType 
LOCA::TurningPoint::MooreSpence::PhippsBordering::solveTransposeContiguous(
		  Teuchos::ParameterList& params,
		  const NOX::Abstract::MultiVector& input_x,
		  const NOX::Abstract::MultiVector& input_null,
	          const NOX::Abstract::MultiVector::DenseMatrix& input_param,
		  NOX::Abstract::MultiVector& result_x,
		  NOX::Abstract::MultiVector& result_null,
	          NOX::Abstract::MultiVector::DenseMatrix& result_param) const
{
  std::string callingFunction = 
    "LOCA::TurningPoint::MooreSpence::PhippsBordering::solveTransposeContiguous()";
  NOX::Abstract::Group::ReturnType finalStatus = NOX::Abstract::Group::Ok;
  NOX::Abstract::Group::ReturnType status;

  int m = input_x.numVectors()-2;
  std::vector<int> index_input(m);
  std::vector<int> index_input_dp(m+1);
  std::vector<int> index_null(1);
  std::vector<int> index_dp(1);
  for (int i=0; i<m; i++) {
    index_input[i] = i;
    index_input_dp[i] = i;
  }
  index_input_dp[m] = m;
  index_dp[0] = m;
  index_null[0] = m+1;

  NOX::Abstract::MultiVector::DenseMatrix tmp_mat_1(1, m+1);
  NOX::Abstract::MultiVector::DenseMatrix tmp_mat_2(1, m+2);

  // Create view of first m+1 columns of input_null, result_null
  Teuchos::RCP<NOX::Abstract::MultiVector> input_null_view = 
      input_null.subView(index_input_dp);
  Teuchos::RCP<NOX::Abstract::MultiVector> result_null_view = 
      result_null.subView(index_input_dp);

  // verify underlying Jacobian is valid
  if (!group->isJacobian()) {
    status = group->computeJacobian();
    finalStatus = 
      globalData->locaErrorCheck->combineAndCheckReturnTypes(status, 
							     finalStatus,
							     callingFunction);
  }

  // Solve  |J^T v||A B| = |G -phi|
  //        |u^T 0||a b|   |0   0 |
  status =
    transposeBorderedSolver->applyInverseTranspose(params, 
						   input_null_view.get(), 
						   NULL, 
						   *result_null_view, 
						   tmp_mat_1);
  finalStatus = 
    globalData->locaErrorCheck->combineAndCheckReturnTypes(status, finalStatus,
							   callingFunction);
  Teuchos::RCP<NOX::Abstract::MultiVector> A = 
    result_null.subView(index_input);
  Teuchos::RCP<NOX::Abstract::MultiVector> B = 
    result_null.subView(index_dp);
  double b = tmp_mat_1(0,m);

  // compute (Jv)_x^T[A B u]
  result_null[m+1] = *uVector;
  Teuchos::RCP<NOX::Abstract::MultiVector> tmp = 
    result_null.clone(NOX::ShapeCopy);
  status = group->computeDwtJnDxMulti(result_null, *nullVector, *tmp);
  finalStatus = 
    globalData->locaErrorCheck->combineAndCheckReturnTypes(status, 
							   finalStatus,
							   callingFunction);

  // compute [F 0 0] - (Jv)_x^T[A B u]
  tmp->update(1.0, input_x, -1.0);

  // verify underlying Jacobian is valid
  if (!group->isJacobian()) {
    status = group->computeJacobian();
    finalStatus = 
      globalData->locaErrorCheck->combineAndCheckReturnTypes(status, 
							     finalStatus,
							     callingFunction);
  }

  // Solve  |J^T v||C D E| = |F - (Jv)_x^T A  -(Jv)_x^T B  -(Jv)_x^T u|
  //        |u^T 0||c d e|   |         0             0            0   |
  status = 
    transposeBorderedSolver->applyInverseTranspose(params, 
						   tmp.get(), 
						   NULL, 
						   result_x,
						   tmp_mat_2);
  finalStatus = 
    globalData->locaErrorCheck->combineAndCheckReturnTypes(status, finalStatus,
							   callingFunction);
  Teuchos::RCP<NOX::Abstract::MultiVector> C = 
    result_x.subView(index_input);
  Teuchos::RCP<NOX::Abstract::MultiVector> D = 
    result_x.subView(index_dp);
  Teuchos::RCP<NOX::Abstract::MultiVector> E = 
    result_x.subView(index_null);
  double d = tmp_mat_2(0, m);
  double e = tmp_mat_2(0, m+1);

  // compute (Jv)_p^T*[A B u]
  NOX::Abstract::MultiVector::DenseMatrix t1(1,m+2);
  result_null.multiply(1.0, *dJndp, t1);

  // compute f_p^T*[C D E]
  NOX::Abstract::MultiVector::DenseMatrix t2(1,m+2);
  result_x.multiply(1.0, *dfdp, t2);

  // compute f_p^T*u
  double fptu = uVector->innerProduct((*dfdp)[0]);

  // Fill coefficient arrays
  double M[9];
  M[0] = st;   M[1] =  -e;   M[2] = t1(0,m+1) + t2(0,m+1);
  M[3] = 0.0;  M[4] =   st;  M[5] = fptu;
  M[6] = -b;   M[7] =  -d;   M[8] = t1(0,m) + t2(0,m);

  // Compute RHS
  double *R = new double[3*m];
  for (int i=0; i<m; i++) {
    R[3*i]   = tmp_mat_1(0,i);
    R[3*i+1] = tmp_mat_2(0,i);
    R[3*i+2] = result_param(0,i) - t1(0,i) - t2(0,i);
  }

  // Solve M*P = R
  int three = 3;
  int piv[3];
  int info;
  Teuchos::LAPACK<int,double> L;
  L.GESV(three, m, M, three, piv, R, three, &info);
  if (info != 0) {
    globalData->locaErrorCheck->throwError(
				    callingFunction,
				    "Solve of 3x3 coefficient matrix failed!");
    return NOX::Abstract::Group::Failed;
  }

  NOX::Abstract::MultiVector::DenseMatrix alpha(1,m);
  NOX::Abstract::MultiVector::DenseMatrix beta(1,m);
  for (int i=0; i<m; i++) {
    alpha(0,i)        = R[3*i];
    beta(0,i)         = R[3*i+1];
    result_param(0,i) = R[3*i+2];
  }

  // compute A = A + B*z + alpha*u (remember A is a sub-view of result_null)
  A->update(Teuchos::NO_TRANS, 1.0, *B, result_param, 1.0);
  A->update(Teuchos::NO_TRANS, 1.0, *uMultiVector, alpha, 1.0);

  // compute C = C + D*z + alpha*E + beta*u 
  // (remember C is a sub-view of result_x)
  C->update(Teuchos::NO_TRANS, 1.0, *D, result_param, 1.0);
  C->update(Teuchos::NO_TRANS, 1.0, *E, alpha, 1.0);
  C->update(Teuchos::NO_TRANS, 1.0, *uMultiVector, beta, 1.0);

  delete [] R;

  return finalStatus;
}
NOX::Abstract::Group::ReturnType 
LOCA::BorderedSolver::LowerTriangularBlockElimination::
solve(Teuchos::ParameterList& params,
      const LOCA::BorderedSolver::AbstractOperator& op,
      const LOCA::MultiContinuation::ConstraintInterface& B,
      const NOX::Abstract::MultiVector::DenseMatrix& C,
      const NOX::Abstract::MultiVector* F,
      const NOX::Abstract::MultiVector::DenseMatrix* G,
      NOX::Abstract::MultiVector& X,
      NOX::Abstract::MultiVector::DenseMatrix& Y) const
{
  string callingFunction = 
    "LOCA::BorderedSolver::LowerTriangularBlockElimination::solve()";
  NOX::Abstract::Group::ReturnType finalStatus = NOX::Abstract::Group::Ok;
  NOX::Abstract::Group::ReturnType status;

  // Determine if X or Y is zero
  bool isZeroF = (F == NULL);
  bool isZeroG = (G == NULL);
  bool isZeroB = B.isDXZero();
  bool isZeroX = isZeroF;
  bool isZeroY = isZeroG && (isZeroB  || isZeroX);

  // First compute X
  if (isZeroX)
    X.init(0.0);
  else {
    // Solve X = J^-1 F, note F must be nonzero
    status = op.applyInverse(params, *F, X);
    finalStatus = 
      globalData->locaErrorCheck->combineAndCheckReturnTypes(status, 
							     finalStatus,
							     callingFunction);
  }

  // Now compute Y
  if (isZeroY)
    Y.putScalar(0.0);
  else {
    // Compute G - B^T*X and store in Y
    if (isZeroG) 
      B.multiplyDX(-1.0, X, Y);
    else {
      Y.assign(*G);
      if (!isZeroB && !isZeroX) {
	NOX::Abstract::MultiVector::DenseMatrix T(Y.numRows(),Y.numCols());
	B.multiplyDX(1.0, X, T);
	Y -= T;
      }
    }

    // Overwrite Y with Y = C^-1 * (G - B^T*X)
    NOX::Abstract::MultiVector::DenseMatrix M(C);
    int *ipiv = new int[M.numRows()];
    Teuchos::LAPACK<int,double> L;
    int info;
    L.GETRF(M.numRows(), M.numCols(), M.values(), M.stride(), ipiv, &info);
    if (info != 0) {
      status = NOX::Abstract::Group::Failed;
      finalStatus = 
	globalData->locaErrorCheck->combineAndCheckReturnTypes(
							      status, 
							      finalStatus,
							      callingFunction);
    }
    L.GETRS('N', M.numRows(), Y.numCols(), M.values(), M.stride(), ipiv, 
	    Y.values(), Y.stride(), &info);
    delete [] ipiv;
    if (info != 0) {
      status = NOX::Abstract::Group::Failed;
      finalStatus = 
	globalData->locaErrorCheck->combineAndCheckReturnTypes(
							     status, 
							     finalStatus,
							     callingFunction);
    }
  }

  return finalStatus;
}
Esempio n. 26
0
// ----------------------------------------------------------------------
// Main
//
//
int main(int argc, char *argv[]) {
  Teuchos::GlobalMPISession mpiSession(&argc, &argv);

  Kokkos::initialize();

  // This little trick lets us print to std::cout only if a (dummy) command-line argument is provided.
  int iprint = argc - 1;

  for (int i=0;i<argc;++i) {
    if ((strcmp(argv[i],"--nelement")          == 0)) { nelement = atoi(argv[++i]); continue;}
    if ((strcmp(argv[i],"--apply-orientation") == 0)) { apply_orientation  = atoi(argv[++i]); continue;}
    if ((strcmp(argv[i],"--verbose")           == 0)) { verbose  = atoi(argv[++i]); continue;}
    if ((strcmp(argv[i],"--maxp")              == 0)) { maxp     = atoi(argv[++i]); continue;}
  }

  Teuchos::RCP<std::ostream> outStream;
  Teuchos::oblackholestream bhs; // outputs nothing

  if (iprint > 0)
    outStream = Teuchos::rcp(&std::cout, false);
  else
    outStream = Teuchos::rcp(&bhs, false);

  // Save the format state of the original std::cout.
  Teuchos::oblackholestream oldFormatState;
  oldFormatState.copyfmt(std::cout);
  *outStream << std::scientific;
  *outStream \
    << "===============================================================================\n" \
    << "|                                                                             |\n" \
    << "|                    Unit Test (Basis_HGRAD_TRI_Cn_FEM)                       |\n" \
    << "|                                                                             |\n" \
    << "|     1) Patch test involving mass and stiffness matrices,                    |\n" \
    << "|        for the Neumann problem on a triangular patch                        |\n" \
    << "|        Omega with boundary Gamma.                                           |\n" \
    << "|                                                                             |\n" \
    << "|        - div (grad u) + u = f  in Omega,  (grad u) . n = g  on Gamma        |\n" \
    << "|                                                                             |\n" \
    << "|  Questions? Contact  Pavel Bochev  ([email protected]),                    |\n" \
    << "|                      Denis Ridzal  ([email protected]),                    |\n" \
    << "|                      Kara Peterson ([email protected]).                    |\n" \
    << "|                      Kyungjoo Kim  ([email protected]).                     |\n" \
    << "|                                                                             |\n" \
    << "|  Intrepid's website: http://trilinos.sandia.gov/packages/intrepid           |\n" \
    << "|  Trilinos website:   http://trilinos.sandia.gov                             |\n" \
    << "|                                                                             |\n" \
    << "===============================================================================\n" \
    << "| TEST 4: Patch test for high order assembly                                  |\n" \
    << "===============================================================================\n";

  int r_val = 0;

  // precision control
  outStream->precision(3);

#if defined( INTREPID_USING_EXPERIMENTAL_HIGH_ORDER )

  try {
    // test setup
    const int ndim = 2;
    FieldContainer<value_type> base_nodes(1, 4, ndim);
    base_nodes(0, 0, 0) = 0.0;
    base_nodes(0, 0, 1) = 0.0;

    base_nodes(0, 1, 0) = 1.0;
    base_nodes(0, 1, 1) = 0.0;

    base_nodes(0, 2, 0) = 0.0;
    base_nodes(0, 2, 1) = 1.0;

    base_nodes(0, 3, 0) = 1.0;
    base_nodes(0, 3, 1) = 1.0;

    // element 0 has globally permuted edge node
    const int elt_0[2][3] = { { 0, 1, 2 },
                              { 0, 2, 1 } };
    
    // element 1 is locally permuted
    int elt_1[3] = { 1, 2, 3 };

    DefaultCubatureFactory<value_type> cubature_factory;

    // for all test orders
    for (int nx=0;nx<=maxp;++nx) {
      for (int ny=0;ny<=maxp-nx;++ny) {
        // polynomial order of approximation
        const int minp = std::max(nx+ny, 1);

        // test for all basis above order p
        const EPointType pointtype[] = { POINTTYPE_EQUISPACED, POINTTYPE_WARPBLEND };
        for (int ptype=0;ptype<2;++ptype) {
          for (int p=minp;p<=maxp;++p) {
            *outStream << "\n"                                              \
                       << "===============================================================================\n" \
                       << "  Order (nx,ny,p) = " << nx << ", " << ny << ", " << p << " , PointType = " << EPointTypeToString(pointtype[ptype]) << "\n" \
                       << "===============================================================================\n";

            BasisSet_HGRAD_TRI_Cn_FEM<value_type,FieldContainer<value_type> > basis_set(p, pointtype[ptype]);
            const auto& basis = basis_set.getCellBasis();
            const shards::CellTopology cell = basis.getBaseCellTopology();

            const int nbf = basis.getCardinality();

            const int nvert = cell.getVertexCount();
            const int nedge = cell.getEdgeCount();

            FieldContainer<value_type> nodes(1, 4, ndim);
            FieldContainer<value_type> cell_nodes(1, nvert, ndim);

            // ignore the subdimension; the matrix is always considered as 1D array
            FieldContainer<value_type> A(1, nbf, nbf), b(1, nbf);

            // ***** Test for different orientations *****
            for (int conf0=0;conf0<2;++conf0) {
              for (int ino=0;ino<3;++ino) {
                nodes(0, elt_0[conf0][ino], 0) = base_nodes(0, ino, 0);
                nodes(0, elt_0[conf0][ino], 1) = base_nodes(0, ino, 1);
              }
              nodes(0, 3, 0) = base_nodes(0, 3, 0);
              nodes(0, 3, 1) = base_nodes(0, 3, 1);

              // reset element connectivity
              elt_1[0] = 1;
              elt_1[1] = 2;
              elt_1[2] = 3;

              // for all permuations of element 1
              for (int conf1=0;conf1<6;++conf1) {
                // filter out left handed element
                fill_cell_nodes(cell_nodes, 
                                nodes, 
                                elt_1,
                                nvert, ndim);
                if (OrientationTools<value_type>::isLeftHandedCell(cell_nodes)) {
                  // skip left handed
                } else {
                  const int *element[2] = { elt_0[conf0], elt_1 };
                  *outStream << "\n"                                \
                             << "  Element 0 is configured " << conf0 << " "
                             << "(" << element[0][0] << ","<< element[0][1] << "," << element[0][2] << ")"
                             << "  Element 1 is configured " << conf1 << " "
                             << "(" << element[1][0] << ","<< element[1][1] << "," << element[1][2] << ")"
                             << "\n";

                  if (verbose) {
                    *outStream << " - Element nodal connectivity - \n";
                    for (int iel=0;iel<nelement;++iel)
                      *outStream << " iel = " << std::setw(4) << iel
                                 << ", nodes = "
                                 << std::setw(4) << element[iel][0]
                                 << std::setw(4) << element[iel][1]
                                 << std::setw(4) << element[iel][2]
                                 << "\n";
                  }

                  // Step 0: count one-to-one mapping between high order nodes and dofs
                  Example::ToyMesh mesh;
                  int local2global[2][8][2], boundary[2][3], off_global = 0;

                  const int nnodes_per_element
                    = cell.getVertexCount()
                    + cell.getEdgeCount()
                    + 1;

                  for (int iel=0;iel<nelement;++iel)
                    mesh.getLocalToGlobalMap(local2global[iel], off_global, basis, element[iel]);

                  for (int iel=0;iel<nelement;++iel)
                    mesh.getBoundaryFlags(boundary[iel], cell, element[iel]);

                  if (verbose) {
                    *outStream << " - Element one-to-one local2global map -\n";
                    for (int iel=0;iel<nelement;++iel) {
                      *outStream << " iel = " << std::setw(4) << iel << "\n";
                      for (int i=0;i<(nnodes_per_element+1);++i) {
                        *outStream << "   local = " << std::setw(4) << local2global[iel][i][0]
                                   << "   global = " << std::setw(4) << local2global[iel][i][1]
                                   << "\n";
                      }
                    }
                    *outStream << " - Element boundary flags -\n";
                    const int nside = cell.getSideCount();
                    for (int iel=0;iel<nelement;++iel) {
                      *outStream << " iel = " << std::setw(4) << iel << "\n";
                      for (int i=0;i<nside;++i) {
                        *outStream << "   side = " << std::setw(4) << i
                                   << "   boundary = " << std::setw(4) << boundary[iel][i]
                                   << "\n";
                      }
                    }
                  }

                  // Step 1: assembly
                  const int ndofs = off_global;
                  FieldContainer<value_type> A_asm(1, ndofs, ndofs), b_asm(1, ndofs);

                  for (int iel=0;iel<nelement;++iel) {
                    // Step 1.1: create element matrices
                    Orientation ort = Orientation::getOrientation(cell, element[iel]);

                    // set element nodal coordinates
                    fill_cell_nodes(cell_nodes, 
                                    nodes, 
                                    element[iel], 
                                    nvert, ndim);

                    build_element_matrix_and_rhs(A, b,
                                                 cubature_factory,
                                                 basis_set,
                                                 element[iel],
                                                 boundary[iel],
                                                 cell_nodes,
                                                 ort,
                                                 nx, ny);
                    // if p is bigger than 4, not worth to look at the matrix
                    if (verbose && p < 5) {
                      *outStream << " - Element matrix and rhs, iel = " << iel << "\n";
                      *outStream << std::showpos;
                      for (int i=0;i<nbf;++i) {
                        for (int j=0;j<nbf;++j)
                          *outStream << MatVal(A, i, j) << " ";
                        *outStream << ":: " << MatVal(b, i, 0) << "\n";
                      }
                      *outStream << std::noshowpos;
                    }

                    // Step 1.2: assemble high order elements
                    assemble_element_matrix_and_rhs(A_asm, b_asm,
                                                    A, b,
                                                    local2global[iel],
                                                    nnodes_per_element);
                  }

                  if (verbose && p < 5) {
                    *outStream << " - Assembled element matrix and rhs -\n";
                    *outStream << std::showpos;
                    for (int i=0;i<ndofs;++i) {
                      for (int j=0;j<ndofs;++j)
                        *outStream << MatVal(A_asm, i, j) << " ";
                      *outStream << ":: " << MatVal(b_asm, i, 0) << "\n";
                    }
                    *outStream << std::noshowpos;
                  }

                  // Step 2: solve the system of equations
                  int info = 0;
                  Teuchos::LAPACK<int,value_type> lapack;
                  FieldContainer<int> ipiv(ndofs);
                  lapack.GESV(ndofs, 1, &A_asm(0,0,0), ndofs, &ipiv(0,0), &b_asm(0,0), ndofs, &info);
                  TEUCHOS_TEST_FOR_EXCEPTION( info != 0, std::runtime_error,
                                              ">>> ERROR (Intrepid::HGRAD_TRI_Cn::Test 04): " \
                                              "LAPACK solve fails");

                  // Step 3: construct interpolant and check solutions
                  magnitude_type interpolation_error = 0, solution_norm =0;
                  for (int iel=0;iel<nelement;++iel) {
                    retrieve_element_solution(b,
                                              b_asm,
                                              local2global[iel],
                                              nnodes_per_element);

                    if (verbose && p < 5) {
                      *outStream << " - Element solution, iel = " << iel << "\n";
                      *outStream << std::showpos;
                      for (int i=0;i<nbf;++i) {
                        *outStream << MatVal(b, i, 0) << "\n";
                      }
                      *outStream << std::noshowpos;
                    }

                    magnitude_type
                      element_interpolation_error = 0,
                      element_solution_norm = 0;

                    Orientation ort = Orientation::getOrientation(cell, element[iel]);

                    // set element nodal coordinates
                    fill_cell_nodes(cell_nodes, 
                                    nodes, 
                                    element[iel], 
                                    nvert, ndim);

                    compute_element_error(element_interpolation_error,
                                          element_solution_norm,
                                          element[iel],
                                          cell_nodes,
                                          basis_set,
                                          b,
                                          ort,
                                          nx, ny);

                    interpolation_error += element_interpolation_error;
                    solution_norm       += element_solution_norm;

                    {
                      int edge_orts[3];
                      ort.getEdgeOrientation(edge_orts, nedge);
                      *outStream << "   iel = " << std::setw(4) << iel
                                 << ", orientation = "
                                 << edge_orts[0]
                                 << edge_orts[1]
                                 << edge_orts[2]
                                 << " , error = " << element_interpolation_error
                                 << " , solution norm = " << element_solution_norm
                                 << " , relative error = " << (element_interpolation_error/element_solution_norm)
                                 << "\n";
                    }
                    const magnitude_type relative_error = interpolation_error/solution_norm;
                    const magnitude_type tol = p*p*100*INTREPID_TOL;

                    if (relative_error > tol) {
                      ++r_val;
                      *outStream << "\n\nPatch test failed: \n"
                                 << "    exact polynomial (nx, ny) = " << std::setw(4) << nx << ", " << std::setw(4) << ny << "\n"
                                 << "    basis order               = " << std::setw(4) << p << "\n"
                                 << "    orientation configuration = " << std::setw(4) << conf0 << std::setw(4) << conf1 << "\n"
                                 << "    relative error            = " << std::setw(4) << relative_error << "\n"
                                 << "    tolerance                 = " << std::setw(4) << tol << "\n";
                    }
                  }
                } 

                // for next iteration
                std::next_permutation(elt_1, elt_1+3);
              } // end of conf1
            } // end of conf0
          } // end of p
        } // end of point type
      } // end of ny
    } // end of nx
  }
  catch (std::logic_error err) {
    *outStream << err.what() << "\n\n";
    r_val = -1000;
  };
#else
  *outStream << "\t This test is for high order element assembly. \n"
             << "\t Use -D INTREPID_USING_EXPERIMENTAL_HIGH_ORDER in CMAKE_CXX_FLAGS \n";
#endif

  if (r_val != 0)
    std::cout << "End Result: TEST FAILED  :: r_val = " << r_val << "\n";
  else
    std::cout << "End Result: TEST PASSED\n";

  // reset format state of std::cout
  std::cout.copyfmt(oldFormatState);

  Kokkos::finalize();

  return r_val;
}
Esempio n. 27
0
int main(int argc, char *argv[]) {

  Teuchos::GlobalMPISession mpiSession(&argc, &argv);
 Kokkos::initialize();
  // This little trick lets us print to std::cout only if
  // a (dummy) command-line argument is provided.
  int iprint     = argc - 1;
  Teuchos::RCP<std::ostream> outStream;
  Teuchos::oblackholestream bhs; // outputs nothing
  if (iprint > 0)
    outStream = Teuchos::rcp(&std::cout, false);
  else
    outStream = Teuchos::rcp(&bhs, false);

  // Save the format state of the original std::cout.
  Teuchos::oblackholestream oldFormatState;
  oldFormatState.copyfmt(std::cout);

  *outStream \
    << "===============================================================================\n" \
    << "|                                                                             |\n" \
    << "|               Unit Test (Basis_HGRAD_LINE_C1_FEM)                           |\n" \
    << "|                                                                             |\n" \
    << "|     1) Patch test involving mass and stiffness matrices,                    |\n" \
    << "|        for the Neumann problem on a REFERENCE line:                         |\n" \
    << "|                                                                             |\n" \
    << "|            - u'' + u = f  in (-1,1),  u' = g at -1,1                        |\n" \
    << "|                                                                             |\n" \
    << "|  Questions? Contact  Pavel Bochev  ([email protected]),                    |\n" \
    << "|                      Denis Ridzal  ([email protected]),                    |\n" \
    << "|                      Kara Peterson ([email protected]).                    |\n" \
    << "|                                                                             |\n" \
    << "|  Intrepid's website: http://trilinos.sandia.gov/packages/intrepid           |\n" \
    << "|  Trilinos website:   http://trilinos.sandia.gov                             |\n" \
    << "|                                                                             |\n" \
    << "===============================================================================\n"\
    << "| TEST 1: Patch test                                                          |\n"\
    << "===============================================================================\n";

  
  int errorFlag = 0;
  double zero = 100*INTREPID_TOL;
  outStream -> precision(20);


  try {

    int max_order = 1;  // max total order of polynomial solution

    // Define array containing points at which the solution is evaluated
    int numIntervals = 100;
    int numInterpPoints = numIntervals + 1;
    FieldContainer<double> interp_points(numInterpPoints, 1);
    for (int i=0; i<numInterpPoints; i++) {
      interp_points(i,0) = -1.0+(2.0*(double)i)/(double)numIntervals;
    }
    
    DefaultCubatureFactory<double>  cubFactory;                                   // create factory
    shards::CellTopology line(shards::getCellTopologyData< shards::Line<> >());   // create cell topology

    //create basis
    Teuchos::RCP<Basis<double,FieldContainer<double> > > lineBasis =
      Teuchos::rcp(new Basis_HGRAD_LINE_C1_FEM<double,FieldContainer<double> >() );
    int numFields = lineBasis->getCardinality();
    int basis_order = lineBasis->getDegree();

    // create cubature
    Teuchos::RCP<Cubature<double> > lineCub = cubFactory.create(line, 2);
    int numCubPoints = lineCub->getNumPoints();
    int spaceDim = lineCub->getDimension();

    for (int soln_order=0; soln_order <= max_order; soln_order++) {

      // evaluate exact solution
      FieldContainer<double> exact_solution(1, numInterpPoints);
      u_exact(exact_solution, interp_points, soln_order);

      /* Computational arrays. */
      FieldContainer<double> cub_points(numCubPoints, spaceDim);
      FieldContainer<double> cub_points_physical(1, numCubPoints, spaceDim);
      FieldContainer<double> cub_weights(numCubPoints);
      FieldContainer<double> cell_nodes(1, 2, spaceDim);
      FieldContainer<double> jacobian(1, numCubPoints, spaceDim, spaceDim);
      FieldContainer<double> jacobian_inv(1, numCubPoints, spaceDim, spaceDim);
      FieldContainer<double> jacobian_det(1, numCubPoints);
      FieldContainer<double> weighted_measure(1, numCubPoints);

      FieldContainer<double> value_of_basis_at_cub_points(numFields, numCubPoints);
      FieldContainer<double> transformed_value_of_basis_at_cub_points(1, numFields, numCubPoints);
      FieldContainer<double> weighted_transformed_value_of_basis_at_cub_points(1, numFields, numCubPoints);
      FieldContainer<double> grad_of_basis_at_cub_points(numFields, numCubPoints, spaceDim);
      FieldContainer<double> transformed_grad_of_basis_at_cub_points(1, numFields, numCubPoints, spaceDim);
      FieldContainer<double> weighted_transformed_grad_of_basis_at_cub_points(1, numFields, numCubPoints, spaceDim);
      FieldContainer<double> fe_matrix(1, numFields, numFields);

      FieldContainer<double> rhs_at_cub_points_physical(1, numCubPoints);
      FieldContainer<double> rhs_and_soln_vector(1, numFields);

      FieldContainer<double> one_point(1, 1);
      FieldContainer<double> value_of_basis_at_one(numFields, 1);
      FieldContainer<double> value_of_basis_at_minusone(numFields, 1);
      FieldContainer<double> bc_neumann(2, numFields);

      FieldContainer<double> value_of_basis_at_interp_points(numFields, numInterpPoints);
      FieldContainer<double> transformed_value_of_basis_at_interp_points(1, numFields, numInterpPoints);
      FieldContainer<double> interpolant(1, numInterpPoints);

      FieldContainer<int> ipiv(numFields);

      /******************* START COMPUTATION ***********************/

      // get cubature points and weights
      lineCub->getCubature(cub_points, cub_weights);

      // fill cell vertex array
      cell_nodes(0, 0, 0) = -1.0;
      cell_nodes(0, 1, 0) = 1.0;

      // compute geometric cell information
      CellTools<double>::setJacobian(jacobian, cub_points, cell_nodes, line);
      CellTools<double>::setJacobianInv(jacobian_inv, jacobian);
      CellTools<double>::setJacobianDet(jacobian_det, jacobian);

      // compute weighted measure
      FunctionSpaceTools::computeCellMeasure<double>(weighted_measure, jacobian_det, cub_weights);

      ///////////////////////////
      // Computing mass matrices:
      // tabulate values of basis functions at (reference) cubature points
      lineBasis->getValues(value_of_basis_at_cub_points, cub_points, OPERATOR_VALUE);

      // transform values of basis functions
      FunctionSpaceTools::HGRADtransformVALUE<double>(transformed_value_of_basis_at_cub_points,
                                                      value_of_basis_at_cub_points);

      // multiply with weighted measure
      FunctionSpaceTools::multiplyMeasure<double>(weighted_transformed_value_of_basis_at_cub_points,
                                                  weighted_measure,
                                                  transformed_value_of_basis_at_cub_points);

      // compute mass matrices
      FunctionSpaceTools::integrate<double>(fe_matrix,
                                            transformed_value_of_basis_at_cub_points,
                                            weighted_transformed_value_of_basis_at_cub_points,
                                            COMP_CPP);
      ///////////////////////////

      ////////////////////////////////
      // Computing stiffness matrices:
      // tabulate gradients of basis functions at (reference) cubature points
      lineBasis->getValues(grad_of_basis_at_cub_points, cub_points, OPERATOR_GRAD);

      // transform gradients of basis functions
      FunctionSpaceTools::HGRADtransformGRAD<double>(transformed_grad_of_basis_at_cub_points,
                                                     jacobian_inv,
                                                     grad_of_basis_at_cub_points);

      // multiply with weighted measure
      FunctionSpaceTools::multiplyMeasure<double>(weighted_transformed_grad_of_basis_at_cub_points,
                                                  weighted_measure,
                                                  transformed_grad_of_basis_at_cub_points);

      // compute stiffness matrices and sum into fe_matrix
      FunctionSpaceTools::integrate<double>(fe_matrix,
                                            transformed_grad_of_basis_at_cub_points,
                                            weighted_transformed_grad_of_basis_at_cub_points,
                                            COMP_CPP,
                                            true);
      ////////////////////////////////

      ///////////////////////////////
      // Computing RHS contributions:
      // map (reference) cubature points to physical space
      CellTools<double>::mapToPhysicalFrame(cub_points_physical, cub_points, cell_nodes, line);

      // evaluate rhs function
      rhsFunc(rhs_at_cub_points_physical, cub_points_physical, soln_order);

      // compute rhs
      FunctionSpaceTools::integrate<double>(rhs_and_soln_vector,
                                            rhs_at_cub_points_physical,
                                            weighted_transformed_value_of_basis_at_cub_points,
                                            COMP_CPP);

      // compute neumann b.c. contributions and adjust rhs
      one_point(0,0) = 1.0;   lineBasis->getValues(value_of_basis_at_one, one_point, OPERATOR_VALUE);
      one_point(0,0) = -1.0;  lineBasis->getValues(value_of_basis_at_minusone, one_point, OPERATOR_VALUE);
      neumann(bc_neumann, value_of_basis_at_minusone, value_of_basis_at_one, soln_order);
      for (int i=0; i<numFields; i++) {
        rhs_and_soln_vector(0, i) -= bc_neumann(0, i);
        rhs_and_soln_vector(0, i) += bc_neumann(1, i);
      }
      ///////////////////////////////

      /////////////////////////////
      // Solution of linear system:
      int info = 0;
      Teuchos::LAPACK<int, double> solver;
      //solver.GESV(numRows, 1, &fe_mat(0,0), numRows, &ipiv(0), &fe_vec(0), numRows, &info);
      solver.GESV(numFields, 1, &fe_matrix[0], numFields, &ipiv(0), &rhs_and_soln_vector[0], numFields, &info);
      /////////////////////////////

      ////////////////////////
      // Building interpolant:
      // evaluate basis at interpolation points
      lineBasis->getValues(value_of_basis_at_interp_points, interp_points, OPERATOR_VALUE);
      // transform values of basis functions
      FunctionSpaceTools::HGRADtransformVALUE<double>(transformed_value_of_basis_at_interp_points,
                                                      value_of_basis_at_interp_points);
      FunctionSpaceTools::evaluate<double>(interpolant, rhs_and_soln_vector, transformed_value_of_basis_at_interp_points);
      ////////////////////////

      /******************* END COMPUTATION ***********************/
    
      RealSpaceTools<double>::subtract(interpolant, exact_solution);

      *outStream << "\nNorm-2 difference between exact solution polynomial of order "
                 << soln_order << " and finite element interpolant of order " << basis_order << ": "
                 << RealSpaceTools<double>::vectorNorm(&interpolant[0], interpolant.dimension(1), NORM_TWO) << "\n";

      if (RealSpaceTools<double>::vectorNorm(&interpolant[0], interpolant.dimension(1), NORM_TWO) > zero) {
        *outStream << "\n\nPatch test failed for solution polynomial order "
                   << soln_order << " and basis order " << basis_order << "\n\n";
        errorFlag++;
      }

    } // end for soln_order

  }
  // Catch unexpected errors
  catch (std::logic_error err) {
    *outStream << err.what() << "\n\n";
    errorFlag = -1000;
  };

  if (errorFlag != 0)
    std::cout << "End Result: TEST FAILED\n";
  else
    std::cout << "End Result: TEST PASSED\n";

  // reset format state of std::cout
  std::cout.copyfmt(oldFormatState);
 Kokkos::finalize();
  return errorFlag;
}
Esempio n. 28
0
int main(int argc, char *argv[]) {

#ifdef EPETRA_MPI
  // Initialize MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm Comm(MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif

  bool testFailed;
  bool boolret;
  int MyPID = Comm.MyPID();

  bool verbose = true;
  bool debug = false;
  std::string which("SM");

  Teuchos::CommandLineProcessor cmdp(false,true);
  cmdp.setOption("verbose","quiet",&verbose,"Print messages and results.");
  cmdp.setOption("debug","nodebug",&debug,"Print debugging information.");
  cmdp.setOption("sort",&which,"Targetted eigenvalues (SM,LM,SR,LR,SI,or LI).");
  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
#ifdef HAVE_MPI
    MPI_Finalize();
#endif
    return -1;
  }

  typedef double ScalarType;
  typedef Teuchos::ScalarTraits<ScalarType>          ScalarTypeTraits;
  typedef ScalarTypeTraits::magnitudeType            MagnitudeType;
  typedef Epetra_MultiVector                         MV;
  typedef Epetra_Operator                            OP;
  typedef Anasazi::MultiVecTraits<ScalarType,MV>     MVTraits;
  typedef Anasazi::OperatorTraits<ScalarType,MV,OP>  OpTraits;

  //  Dimension of the matrix
  int nx = 10;        // Discretization points in any one direction.
  int NumGlobalElements = nx*nx;  // Size of matrix nx*nx

  // Construct a Map that puts approximately the same number of
  // equations on each processor.

  Epetra_Map Map(NumGlobalElements, 0, Comm);

  // Get update list and number of local equations from newly created Map.

  int NumMyElements = Map.NumMyElements();

  std::vector<int> MyGlobalElements(NumMyElements);
  Map.MyGlobalElements(&MyGlobalElements[0]);

  // Create an integer vector NumNz that is used to build the Petra Matrix.
  // NumNz[i] is the Number of OFF-DIAGONAL term for the ith global equation
  // on this processor
  std::vector<int> NumNz(NumMyElements);

  /* We are building a matrix of block structure:

      | T -I          |
      |-I  T -I       |
      |   -I  T       |
      |        ...  -I|
      |           -I T|

   where each block is dimension nx by nx and the matrix is on the order of
   nx*nx.  The block T is a tridiagonal matrix.
  */

  for (int i=0; i<NumMyElements; i++) {
    if (MyGlobalElements[i] == 0 || MyGlobalElements[i] == NumGlobalElements-1 ||
        MyGlobalElements[i] == nx-1 || MyGlobalElements[i] == nx*(nx-1) ) {
      NumNz[i] = 3;
    }
    else if (MyGlobalElements[i] < nx || MyGlobalElements[i] > nx*(nx-1) ||
             MyGlobalElements[i]%nx == 0 || (MyGlobalElements[i]+1)%nx == 0) {
      NumNz[i] = 4;
    }
    else {
      NumNz[i] = 5;
    }
  }

  // Create an Epetra_Matrix

  Teuchos::RCP<Epetra_CrsMatrix> A = Teuchos::rcp( new Epetra_CrsMatrix(Copy, Map, &NumNz[0]) );

  // Diffusion coefficient, can be set by user.
  // When rho*h/2 <= 1, the discrete convection-diffusion operator has real eigenvalues.
  // When rho*h/2 > 1, the operator has complex eigenvalues.
  double rho = 2*(nx+1);

  // Compute coefficients for discrete convection-diffution operator
  const double one = 1.0;
  std::vector<double> Values(4);
  std::vector<int> Indices(4);
  double h = one /(nx+1);
  double h2 = h*h;
  double c = 5.0e-01*rho/ h;
  Values[0] = -one/h2 - c; Values[1] = -one/h2 + c; Values[2] = -one/h2; Values[3]= -one/h2;
  double diag = 4.0 / h2;
  int NumEntries, info;

  for (int i=0; i<NumMyElements; i++)
  {
    if (MyGlobalElements[i]==0)
    {
      Indices[0] = 1;
      Indices[1] = nx;
      NumEntries = 2;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]);
      assert( info==0 );
    }
    else if (MyGlobalElements[i] == nx*(nx-1))
    {
      Indices[0] = nx*(nx-1)+1;
      Indices[1] = nx*(nx-2);
      NumEntries = 2;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]);
      assert( info==0 );
    }
    else if (MyGlobalElements[i] == nx-1)
    {
      Indices[0] = nx-2;
      NumEntries = 1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      assert( info==0 );
      Indices[0] = 2*nx-1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]);
      assert( info==0 );
    }
    else if (MyGlobalElements[i] == NumGlobalElements-1)
    {
      Indices[0] = NumGlobalElements-2;
      NumEntries = 1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      assert( info==0 );
      Indices[0] = nx*(nx-1)-1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]);
      assert( info==0 );
    }
    else if (MyGlobalElements[i] < nx)
    {
      Indices[0] = MyGlobalElements[i]-1;
      Indices[1] = MyGlobalElements[i]+1;
      Indices[2] = MyGlobalElements[i]+nx;
      NumEntries = 3;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      assert( info==0 );
    }
    else if (MyGlobalElements[i] > nx*(nx-1))
    {
      Indices[0] = MyGlobalElements[i]-1;
      Indices[1] = MyGlobalElements[i]+1;
      Indices[2] = MyGlobalElements[i]-nx;
      NumEntries = 3;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      assert( info==0 );
    }
    else if (MyGlobalElements[i]%nx == 0)
    {
      Indices[0] = MyGlobalElements[i]+1;
      Indices[1] = MyGlobalElements[i]-nx;
      Indices[2] = MyGlobalElements[i]+nx;
      NumEntries = 3;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]);
      assert( info==0 );
    }
    else if ((MyGlobalElements[i]+1)%nx == 0)
    {
      Indices[0] = MyGlobalElements[i]-nx;
      Indices[1] = MyGlobalElements[i]+nx;
      NumEntries = 2;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]);
      assert( info==0 );
      Indices[0] = MyGlobalElements[i]-1;
      NumEntries = 1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      assert( info==0 );
    }
    else
    {
      Indices[0] = MyGlobalElements[i]-1;
      Indices[1] = MyGlobalElements[i]+1;
      Indices[2] = MyGlobalElements[i]-nx;
      Indices[3] = MyGlobalElements[i]+nx;
      NumEntries = 4;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      assert( info==0 );
    }
    // Put in the diagonal entry
    info = A->InsertGlobalValues(MyGlobalElements[i], 1, &diag, &MyGlobalElements[i]);
    assert( info==0 );
  }

  // Finish up
  info = A->FillComplete();
  assert( info==0 );
  A->SetTracebackMode(1); // Shutdown Epetra Warning tracebacks

  //************************************
  // Start the block Davidson iteration
  //***********************************
  //
  //  Variables used for the Generalized Davidson Method
  //
  int nev = 4;
  int blockSize = 1;
  int maxDim = 50;
  int restartDim = 10;
  int maxRestarts = 500;
  double tol = 1e-10;

  // Set verbosity level
  int verbosity = Anasazi::Errors + Anasazi::Warnings;
  if (verbose) {
    verbosity += Anasazi::FinalSummary + Anasazi::TimingDetails;
  }
  if (debug) {
    verbosity += Anasazi::Debug;
  }
  //
  // Create parameter list to pass into solver manager
  //
  Teuchos::ParameterList MyPL;
  MyPL.set( "Verbosity", verbosity );
  MyPL.set( "Which", which );
  MyPL.set( "Block Size", blockSize );
  MyPL.set( "Maximum Subspace Dimension", maxDim);
  MyPL.set( "Restart Dimension", restartDim);
  MyPL.set( "Maximum Restarts", maxRestarts );
  MyPL.set( "Convergence Tolerance", tol );
  MyPL.set( "Relative Convergence Tolerance", true );
  MyPL.set( "Initial Guess", "User" );

  // Create an Epetra_MultiVector for an initial vector to start the solver.
  // Note:  This needs to have the same number of columns as the blocksize.
  Teuchos::RCP<Epetra_MultiVector> ivec = Teuchos::rcp( new Epetra_MultiVector(Map, blockSize) );
  ivec->Random();

  // Create the eigenproblem.
  Teuchos::RCP<Anasazi::BasicEigenproblem<double, MV, OP> > MyProblem = Teuchos::rcp(
    new Anasazi::BasicEigenproblem<double,MV,OP>() );
  MyProblem->setA(A);
  MyProblem->setInitVec(ivec);

  // Inform the eigenproblem that the operator A is non-Hermitian
  MyProblem->setHermitian(false);

  // Set the number of eigenvalues requested
  MyProblem->setNEV( nev );

  // Inform the eigenproblem that you are finishing passing it information
  boolret = MyProblem->setProblem();
  if (boolret != true) {
    if (verbose && MyPID == 0) {
      std::cout << "Anasazi::BasicEigenproblem::setProblem() returned with error." << std::endl;
    }
#ifdef HAVE_MPI
    MPI_Finalize() ;
#endif
    return -1;
  }

  // Initialize the Block Arnoldi solver
  Anasazi::GeneralizedDavidsonSolMgr<double, MV, OP> MySolverMgr(MyProblem, MyPL);

  // Solve the problem to the specified tolerances or length
  Anasazi::ReturnType returnCode = MySolverMgr.solve();
  testFailed = false;
  if (returnCode != Anasazi::Converged && MyPID==0 && verbose) {
    testFailed = true;
  }

  // Get the eigenvalues and eigenvectors from the eigenproblem
  Anasazi::Eigensolution<ScalarType,MV> sol = MyProblem->getSolution();
  std::vector<Anasazi::Value<ScalarType> > evals = sol.Evals;
  Teuchos::RCP<MV> evecs = sol.Evecs;
  std::vector<int> index = sol.index;
  int numev = sol.numVecs;

  // Output computed eigenvalues and their direct residuals
  if (verbose && MyPID==0) {
    int numritz = (int)evals.size();
    std::cout.setf(std::ios_base::right, std::ios_base::adjustfield);
    std::cout<<std::endl<< "Computed Ritz Values"<< std::endl;
    std::cout<< std::setw(16) << "Real Part"
        << std::setw(16) << "Imag Part"
        << std::endl;
    std::cout<<"-----------------------------------------------------------"<<std::endl;
    for (int i=0; i<numritz; i++) {
      std::cout<< std::setw(16) << evals[i].realpart
          << std::setw(16) << evals[i].imagpart
          << std::endl;
    }
    std::cout<<"-----------------------------------------------------------"<<std::endl;
  }

  if (numev > 0) {
    // Compute residuals.
    Teuchos::LAPACK<int,double> lapack;
    std::vector<double> normA(numev);

    // The problem is non-Hermitian.
    int i=0;
    std::vector<int> curind(1);
    std::vector<double> resnorm(1), tempnrm(1);
    Teuchos::RCP<MV> tempAevec;
    Teuchos::RCP<const MV> evecr, eveci;
    Epetra_MultiVector Aevec(Map,numev);

    // Compute A*evecs
    OpTraits::Apply( *A, *evecs, Aevec );

    Teuchos::SerialDenseMatrix<int,double> Breal(1,1), Bimag(1,1);
    while (i<numev) {
      if (index[i]==0) {
        // Get a view of the current eigenvector (evecr)
        curind[0] = i;
        evecr = MVTraits::CloneView( *evecs, curind );

        // Get a copy of A*evecr
        tempAevec = MVTraits::CloneCopy( Aevec, curind );

        // Compute A*evecr - lambda*evecr
        Breal(0,0) = evals[i].realpart;
        MVTraits::MvTimesMatAddMv( -1.0, *evecr, Breal, 1.0, *tempAevec );

        // Compute the norm of the residual and increment counter
        MVTraits::MvNorm( *tempAevec, resnorm );
        normA[i] = resnorm[0] / Teuchos::ScalarTraits<MagnitudeType>::magnitude( evals[i].realpart );
        i++;
      } else {
        // Get a view of the real part of the eigenvector (evecr)
        curind[0] = i;
        evecr = MVTraits::CloneView( *evecs, curind );

        // Get a copy of A*evecr
        tempAevec = MVTraits::CloneCopy( Aevec, curind );

        // Get a view of the imaginary part of the eigenvector (eveci)
        curind[0] = i+1;
        eveci = MVTraits::CloneView( *evecs, curind );

        // Set the eigenvalue into Breal and Bimag
        Breal(0,0) = evals[i].realpart;
        Bimag(0,0) = evals[i].imagpart;

        // Compute A*evecr - evecr*lambdar + eveci*lambdai
        MVTraits::MvTimesMatAddMv( -1.0, *evecr, Breal, 1.0, *tempAevec );
        MVTraits::MvTimesMatAddMv( 1.0, *eveci, Bimag, 1.0, *tempAevec );
        MVTraits::MvNorm( *tempAevec, tempnrm );

        // Get a copy of A*eveci
        tempAevec = MVTraits::CloneCopy( Aevec, curind );

        // Compute A*eveci - eveci*lambdar - evecr*lambdai
        MVTraits::MvTimesMatAddMv( -1.0, *evecr, Bimag, 1.0, *tempAevec );
        MVTraits::MvTimesMatAddMv( -1.0, *eveci, Breal, 1.0, *tempAevec );
        MVTraits::MvNorm( *tempAevec, resnorm );

        // Compute the norms and scale by magnitude of eigenvalue
        normA[i] = lapack.LAPY2( tempnrm[0], resnorm[0] ) /
          lapack.LAPY2( evals[i].realpart, evals[i].imagpart );
        normA[i+1] = normA[i];

        i=i+2;
      }
    }

    // Output computed eigenvalues and their direct residuals
    if (verbose && MyPID==0) {
      std::cout.setf(std::ios_base::right, std::ios_base::adjustfield);
      std::cout<<std::endl<< "Actual Residuals"<<std::endl;
      std::cout<< std::setw(16) << "Real Part"
          << std::setw(16) << "Imag Part"
          << std::setw(20) << "Direct Residual"<< std::endl;
      std::cout<<"-----------------------------------------------------------"<<std::endl;
      for (int j=0; j<numev; j++) {
        std::cout<< std::setw(16) << evals[j].realpart
            << std::setw(16) << evals[j].imagpart
            << std::setw(20) << normA[j] << std::endl;
        if ( normA[j] > tol ) {
          testFailed = true;
        }
      }
      std::cout<<"-----------------------------------------------------------"<<std::endl;
    }
  }

#ifdef EPETRA_MPI
  MPI_Finalize();
#endif

  if (testFailed) {
    if (verbose && MyPID==0) {
      std::cout << "End Result: TEST FAILED" << std::endl;
    }
    return -1;
  }
  //
  // Default return value
  //
  if (verbose && MyPID==0) {
    std::cout << "End Result: TEST PASSED" << std::endl;
  }

  return 0;
}
Esempio n. 29
0
int main(int argc, char *argv[]) {

  Teuchos::GlobalMPISession mpiSession(&argc, &argv);
 Kokkos::initialize();
  // This little trick lets us print to std::cout only if
  // a (dummy) command-line argument is provided.
  int iprint     = argc - 1;
  Teuchos::RCP<std::ostream> outStream;
  Teuchos::oblackholestream bhs; // outputs nothing
  if (iprint > 0)
    outStream = Teuchos::rcp(&std::cout, false);
  else
    outStream = Teuchos::rcp(&bhs, false);

  // Save the format state of the original std::cout.
  Teuchos::oblackholestream oldFormatState;
  oldFormatState.copyfmt(std::cout);

  *outStream \
    << "===============================================================================\n" \
    << "|                                                                             |\n" \
    << "|                    Unit Test (Basis_HGRAD_TET_Cn_FEM)                       |\n" \
    << "|                                                                             |\n" \
    << "|     1) Patch test involving mass and stiffness matrices,                    |\n" \
    << "|        for the Neumann problem on a tetrahedral patch                       |\n" \
    << "|        Omega with boundary Gamma.                                           |\n" \
    << "|                                                                             |\n" \
    << "|        - div (grad u) + u = f  in Omega,  (grad u) . n = g  on Gamma        |\n" \
    << "|                                                                             |\n" \
    << "|  Questions? Contact  Pavel Bochev  ([email protected]),                    |\n" \
    << "|                      Denis Ridzal  ([email protected]),                    |\n" \
    << "|                      Kara Peterson ([email protected]).                    |\n" \
    << "|                                                                             |\n" \
    << "|  Intrepid's website: http://trilinos.sandia.gov/packages/intrepid           |\n" \
    << "|  Trilinos website:   http://trilinos.sandia.gov                             |\n" \
    << "|                                                                             |\n" \
    << "===============================================================================\n"\
    << "| TEST 1: Patch test                                                          |\n"\
    << "===============================================================================\n";

  
  int errorFlag = 0;

  outStream -> precision(16);


  try {

    int max_order = 5;                                                                  // max total order of polynomial solution
    DefaultCubatureFactory<double>  cubFactory;                                         // create factory
    shards::CellTopology cell(shards::getCellTopologyData< shards::Tetrahedron<> >());  // create parent cell topology
    shards::CellTopology side(shards::getCellTopologyData< shards::Triangle<> >());     // create relevant subcell (side) topology
    int cellDim = cell.getDimension();
    int sideDim = side.getDimension();

    // Define array containing points at which the solution is evaluated, on the reference tet.
    int numIntervals = 10;
    int numInterpPoints = ((numIntervals + 1)*(numIntervals + 2)*(numIntervals + 3))/6;
    FieldContainer<double> interp_points_ref(numInterpPoints, 3);
    int counter = 0;
    for (int k=0; k<=numIntervals; k++) {
      for (int j=0; j<=numIntervals; j++) {
        for (int i=0; i<=numIntervals; i++) {
          if (i+j+k <= numIntervals) {
            interp_points_ref(counter,0) = i*(1.0/numIntervals);
            interp_points_ref(counter,1) = j*(1.0/numIntervals);
            interp_points_ref(counter,2) = k*(1.0/numIntervals);
            counter++;
          }
        }
      }
    }

    /* Definition of parent cell. */
    FieldContainer<double> cell_nodes(1, 4, cellDim);
    // funky tet
    cell_nodes(0, 0, 0) = -1.0;
    cell_nodes(0, 0, 1) = -2.0;
    cell_nodes(0, 0, 2) = 0.0;
    cell_nodes(0, 1, 0) = 6.0;
    cell_nodes(0, 1, 1) = 2.0;
    cell_nodes(0, 1, 2) = 0.0;
    cell_nodes(0, 2, 0) = -5.0;
    cell_nodes(0, 2, 1) = 1.0;
    cell_nodes(0, 2, 2) = 0.0;
    cell_nodes(0, 3, 0) = -4.0;
    cell_nodes(0, 3, 1) = -1.0;
    cell_nodes(0, 3, 2) = 3.0;
    // perturbed reference tet
    /*cell_nodes(0, 0, 0) = 0.1;
    cell_nodes(0, 0, 1) = -0.1;
    cell_nodes(0, 0, 2) = 0.2;
    cell_nodes(0, 1, 0) = 1.2;
    cell_nodes(0, 1, 1) = -0.1;
    cell_nodes(0, 1, 2) = 0.05;
    cell_nodes(0, 2, 0) = 0.0;
    cell_nodes(0, 2, 1) = 0.9;
    cell_nodes(0, 2, 2) = 0.1;
    cell_nodes(0, 3, 0) = 0.1;
    cell_nodes(0, 3, 1) = -0.1;
    cell_nodes(0, 3, 2) = 1.1;*/
    // reference tet
    /*cell_nodes(0, 0, 0) = 0.0;
    cell_nodes(0, 0, 1) = 0.0;
    cell_nodes(0, 0, 2) = 0.0;
    cell_nodes(0, 1, 0) = 1.0;
    cell_nodes(0, 1, 1) = 0.0;
    cell_nodes(0, 1, 2) = 0.0;
    cell_nodes(0, 2, 0) = 0.0;
    cell_nodes(0, 2, 1) = 1.0;
    cell_nodes(0, 2, 2) = 0.0;
    cell_nodes(0, 3, 0) = 0.0;
    cell_nodes(0, 3, 1) = 0.0;
    cell_nodes(0, 3, 2) = 1.0;*/

    FieldContainer<double> interp_points(1, numInterpPoints, cellDim);
    CellTools<double>::mapToPhysicalFrame(interp_points, interp_points_ref, cell_nodes, cell);
    interp_points.resize(numInterpPoints, cellDim);

    // we test two types of bases
    EPointType pointtype[] = {POINTTYPE_EQUISPACED, POINTTYPE_WARPBLEND};
    for (int ptype=0; ptype < 2; ptype++) {

      *outStream << "\nTesting bases with " << EPointTypeToString(pointtype[ptype]) << ":\n";

      for (int x_order=0; x_order <= max_order; x_order++) {
        for (int y_order=0; y_order <= max_order-x_order; y_order++) {
          for (int z_order=0; z_order <= max_order-x_order-y_order; z_order++) {

            // evaluate exact solution
            FieldContainer<double> exact_solution(1, numInterpPoints);
            u_exact(exact_solution, interp_points, x_order, y_order, z_order);

            int total_order = std::max(x_order + y_order + z_order, 1);

            for (int basis_order=total_order; basis_order <= max_order; basis_order++) {

              // set test tolerance;
              double zero = basis_order*basis_order*basis_order*100*INTREPID_TOL;

              //create basis
              Teuchos::RCP<Basis<double,FieldContainer<double> > > basis =
                Teuchos::rcp(new Basis_HGRAD_TET_Cn_FEM<double,FieldContainer<double> >(basis_order, pointtype[ptype]) );
              int numFields = basis->getCardinality();

              // create cubatures
              Teuchos::RCP<Cubature<double> > cellCub = cubFactory.create(cell, 2*basis_order);
              Teuchos::RCP<Cubature<double> > sideCub = cubFactory.create(side, 2*basis_order);
              int numCubPointsCell = cellCub->getNumPoints();
              int numCubPointsSide = sideCub->getNumPoints();

              /* Computational arrays. */
              /* Section 1: Related to parent cell integration. */
              FieldContainer<double> cub_points_cell(numCubPointsCell, cellDim);
              FieldContainer<double> cub_points_cell_physical(1, numCubPointsCell, cellDim);
              FieldContainer<double> cub_weights_cell(numCubPointsCell);
              FieldContainer<double> jacobian_cell(1, numCubPointsCell, cellDim, cellDim);
              FieldContainer<double> jacobian_inv_cell(1, numCubPointsCell, cellDim, cellDim);
              FieldContainer<double> jacobian_det_cell(1, numCubPointsCell);
              FieldContainer<double> weighted_measure_cell(1, numCubPointsCell);

              FieldContainer<double> value_of_basis_at_cub_points_cell(numFields, numCubPointsCell);
              FieldContainer<double> transformed_value_of_basis_at_cub_points_cell(1, numFields, numCubPointsCell);
              FieldContainer<double> weighted_transformed_value_of_basis_at_cub_points_cell(1, numFields, numCubPointsCell);
              FieldContainer<double> grad_of_basis_at_cub_points_cell(numFields, numCubPointsCell, cellDim);
              FieldContainer<double> transformed_grad_of_basis_at_cub_points_cell(1, numFields, numCubPointsCell, cellDim);
              FieldContainer<double> weighted_transformed_grad_of_basis_at_cub_points_cell(1, numFields, numCubPointsCell, cellDim);
              FieldContainer<double> fe_matrix(1, numFields, numFields);

              FieldContainer<double> rhs_at_cub_points_cell_physical(1, numCubPointsCell);
              FieldContainer<double> rhs_and_soln_vector(1, numFields);

              /* Section 2: Related to subcell (side) integration. */
              unsigned numSides = 4;
              FieldContainer<double> cub_points_side(numCubPointsSide, sideDim);
              FieldContainer<double> cub_weights_side(numCubPointsSide);
              FieldContainer<double> cub_points_side_refcell(numCubPointsSide, cellDim);
              FieldContainer<double> cub_points_side_physical(1, numCubPointsSide, cellDim);
              FieldContainer<double> jacobian_side_refcell(1, numCubPointsSide, cellDim, cellDim);
              FieldContainer<double> jacobian_det_side_refcell(1, numCubPointsSide);
              FieldContainer<double> weighted_measure_side_refcell(1, numCubPointsSide);

              FieldContainer<double> value_of_basis_at_cub_points_side_refcell(numFields, numCubPointsSide);
              FieldContainer<double> transformed_value_of_basis_at_cub_points_side_refcell(1, numFields, numCubPointsSide);
              FieldContainer<double> weighted_transformed_value_of_basis_at_cub_points_side_refcell(1, numFields, numCubPointsSide);
              FieldContainer<double> neumann_data_at_cub_points_side_physical(1, numCubPointsSide);
              FieldContainer<double> neumann_fields_per_side(1, numFields);

              /* Section 3: Related to global interpolant. */
              FieldContainer<double> value_of_basis_at_interp_points_ref(numFields, numInterpPoints);
              FieldContainer<double> transformed_value_of_basis_at_interp_points_ref(1, numFields, numInterpPoints);
              FieldContainer<double> interpolant(1, numInterpPoints);

              FieldContainer<int> ipiv(numFields);



              /******************* START COMPUTATION ***********************/

              // get cubature points and weights
              cellCub->getCubature(cub_points_cell, cub_weights_cell);

              // compute geometric cell information
              CellTools<double>::setJacobian(jacobian_cell, cub_points_cell, cell_nodes, cell);
              CellTools<double>::setJacobianInv(jacobian_inv_cell, jacobian_cell);
              CellTools<double>::setJacobianDet(jacobian_det_cell, jacobian_cell);

              // compute weighted measure
              FunctionSpaceTools::computeCellMeasure<double>(weighted_measure_cell, jacobian_det_cell, cub_weights_cell);

              ///////////////////////////
              // Computing mass matrices:
              // tabulate values of basis functions at (reference) cubature points
              basis->getValues(value_of_basis_at_cub_points_cell, cub_points_cell, OPERATOR_VALUE);

              // transform values of basis functions 
              FunctionSpaceTools::HGRADtransformVALUE<double>(transformed_value_of_basis_at_cub_points_cell,
                                                              value_of_basis_at_cub_points_cell);

              // multiply with weighted measure
              FunctionSpaceTools::multiplyMeasure<double>(weighted_transformed_value_of_basis_at_cub_points_cell,
                                                          weighted_measure_cell,
                                                          transformed_value_of_basis_at_cub_points_cell);

              // compute mass matrices
              FunctionSpaceTools::integrate<double>(fe_matrix,
                                                    transformed_value_of_basis_at_cub_points_cell,
                                                    weighted_transformed_value_of_basis_at_cub_points_cell,
                                                    COMP_BLAS);
              ///////////////////////////

              ////////////////////////////////
              // Computing stiffness matrices:
              // tabulate gradients of basis functions at (reference) cubature points
              basis->getValues(grad_of_basis_at_cub_points_cell, cub_points_cell, OPERATOR_GRAD);

              // transform gradients of basis functions 
              FunctionSpaceTools::HGRADtransformGRAD<double>(transformed_grad_of_basis_at_cub_points_cell,
                                                             jacobian_inv_cell,
                                                             grad_of_basis_at_cub_points_cell);

              // multiply with weighted measure
              FunctionSpaceTools::multiplyMeasure<double>(weighted_transformed_grad_of_basis_at_cub_points_cell,
                                                          weighted_measure_cell,
                                                          transformed_grad_of_basis_at_cub_points_cell);

              // compute stiffness matrices and sum into fe_matrix
              FunctionSpaceTools::integrate<double>(fe_matrix,
                                                    transformed_grad_of_basis_at_cub_points_cell,
                                                    weighted_transformed_grad_of_basis_at_cub_points_cell,
                                                    COMP_BLAS,
                                                    true);
              ////////////////////////////////

              ///////////////////////////////
              // Computing RHS contributions:
              // map cell (reference) cubature points to physical space
              CellTools<double>::mapToPhysicalFrame(cub_points_cell_physical, cub_points_cell, cell_nodes, cell);

              // evaluate rhs function
              rhsFunc(rhs_at_cub_points_cell_physical, cub_points_cell_physical, x_order, y_order, z_order);

              // compute rhs
              FunctionSpaceTools::integrate<double>(rhs_and_soln_vector,
                                                    rhs_at_cub_points_cell_physical,
                                                    weighted_transformed_value_of_basis_at_cub_points_cell,
                                                    COMP_BLAS);

              // compute neumann b.c. contributions and adjust rhs
              sideCub->getCubature(cub_points_side, cub_weights_side);
              for (unsigned i=0; i<numSides; i++) {
                // compute geometric cell information
                CellTools<double>::mapToReferenceSubcell(cub_points_side_refcell, cub_points_side, sideDim, (int)i, cell);
                CellTools<double>::setJacobian(jacobian_side_refcell, cub_points_side_refcell, cell_nodes, cell);
                CellTools<double>::setJacobianDet(jacobian_det_side_refcell, jacobian_side_refcell);

                // compute weighted face measure
                FunctionSpaceTools::computeFaceMeasure<double>(weighted_measure_side_refcell,
                                                               jacobian_side_refcell,
                                                               cub_weights_side,
                                                               i,
                                                               cell);

                // tabulate values of basis functions at side cubature points, in the reference parent cell domain
                basis->getValues(value_of_basis_at_cub_points_side_refcell, cub_points_side_refcell, OPERATOR_VALUE);
                // transform 
                FunctionSpaceTools::HGRADtransformVALUE<double>(transformed_value_of_basis_at_cub_points_side_refcell,
                                                                value_of_basis_at_cub_points_side_refcell);

                // multiply with weighted measure
                FunctionSpaceTools::multiplyMeasure<double>(weighted_transformed_value_of_basis_at_cub_points_side_refcell,
                                                            weighted_measure_side_refcell,
                                                            transformed_value_of_basis_at_cub_points_side_refcell);

                // compute Neumann data
                // map side cubature points in reference parent cell domain to physical space
                CellTools<double>::mapToPhysicalFrame(cub_points_side_physical, cub_points_side_refcell, cell_nodes, cell);
                // now compute data
                neumann(neumann_data_at_cub_points_side_physical, cub_points_side_physical, jacobian_side_refcell,
                        cell, (int)i, x_order, y_order, z_order);

                FunctionSpaceTools::integrate<double>(neumann_fields_per_side,
                                                      neumann_data_at_cub_points_side_physical,
                                                      weighted_transformed_value_of_basis_at_cub_points_side_refcell,
                                                      COMP_BLAS);

                // adjust RHS
                RealSpaceTools<double>::add(rhs_and_soln_vector, neumann_fields_per_side);;
              }
              ///////////////////////////////

              /////////////////////////////
              // Solution of linear system:
              int info = 0;
              Teuchos::LAPACK<int, double> solver;
              solver.GESV(numFields, 1, &fe_matrix[0], numFields, &ipiv(0), &rhs_and_soln_vector[0], numFields, &info);
              /////////////////////////////

              ////////////////////////
              // Building interpolant:
              // evaluate basis at interpolation points
              basis->getValues(value_of_basis_at_interp_points_ref, interp_points_ref, OPERATOR_VALUE);
              // transform values of basis functions 
              FunctionSpaceTools::HGRADtransformVALUE<double>(transformed_value_of_basis_at_interp_points_ref,
                                                              value_of_basis_at_interp_points_ref);
              FunctionSpaceTools::evaluate<double>(interpolant, rhs_and_soln_vector, transformed_value_of_basis_at_interp_points_ref);
              ////////////////////////

              /******************* END COMPUTATION ***********************/
          
              RealSpaceTools<double>::subtract(interpolant, exact_solution);

              *outStream << "\nRelative norm-2 error between exact solution polynomial of order ("
                         << x_order << ", " << y_order << ", " << z_order
                         << ") and finite element interpolant of order " << basis_order << ": "
                         << RealSpaceTools<double>::vectorNorm(&interpolant[0], interpolant.dimension(1), NORM_TWO) /
                            RealSpaceTools<double>::vectorNorm(&exact_solution[0], exact_solution.dimension(1), NORM_TWO) << "\n";

              if (RealSpaceTools<double>::vectorNorm(&interpolant[0], interpolant.dimension(1), NORM_TWO) /
                  RealSpaceTools<double>::vectorNorm(&exact_solution[0], exact_solution.dimension(1), NORM_TWO) > zero) {
                *outStream << "\n\nPatch test failed for solution polynomial order ("
                           << x_order << ", " << y_order << ", " << z_order << ") and basis order " << basis_order << "\n\n";
                errorFlag++;
              }
            } // end for basis_order
          } // end for z_order
        } // end for y_order
      } // end for x_order
    } // end for ptype

  }
  // Catch unexpected errors
  catch (std::logic_error err) {
    *outStream << err.what() << "\n\n";
    errorFlag = -1000;
  };

  if (errorFlag != 0)
    std::cout << "End Result: TEST FAILED\n";
  else
    std::cout << "End Result: TEST PASSED\n";

  // reset format state of std::cout
  std::cout.copyfmt(oldFormatState);
 Kokkos::finalize();
  return errorFlag;
}
  void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Setup(const MultiVector& B, const MultiVector& Bc, RCP<const CrsGraph> Ppattern) {
    const size_t NSDim = Bc.getNumVectors();

    Ppattern_ = Ppattern;

    size_t numRows = Ppattern_->getNodeNumRows();
    XXtInv_.resize(numRows);

    RCP<const Import> importer = Ppattern_->getImporter();

    X_ = MultiVectorFactory::Build(Ppattern_->getColMap(), NSDim);
    if (!importer.is_null())
      X_->doImport(Bc, *importer, Xpetra::INSERT);
    else
      *X_ = Bc;

    std::vector<const SC*> Xval(NSDim);
    for (size_t j = 0; j < NSDim; j++)
      Xval[j] = X_->getData(j).get();

    SC zero = Teuchos::ScalarTraits<SC>::zero();
    SC one  = Teuchos::ScalarTraits<SC>::one();

    Teuchos::BLAS  <LO,SC> blas;
    Teuchos::LAPACK<LO,SC> lapack;
    LO lwork = 3*NSDim;
    ArrayRCP<LO> IPIV(NSDim);
    ArrayRCP<SC> WORK(lwork);

    for (size_t i = 0; i < numRows; i++) {
      Teuchos::ArrayView<const LO> indices;
      Ppattern_->getLocalRowView(i, indices);

      size_t nnz = indices.size();

      XXtInv_[i] = Teuchos::SerialDenseMatrix<LO,SC>(NSDim, NSDim, false/*zeroOut*/);
      Teuchos::SerialDenseMatrix<LO,SC>& XXtInv = XXtInv_[i];

      if (NSDim == 1) {
        SC d = zero;
        for (size_t j = 0; j < nnz; j++)
          d += Xval[0][indices[j]] * Xval[0][indices[j]];
        XXtInv(0,0) = one/d;

      } else {
        Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, nnz, false/*zeroOut*/);
        for (size_t j = 0; j < nnz; j++)
          for (size_t k = 0; k < NSDim; k++)
            locX(k,j) = Xval[k][indices[j]];

        // XXtInv_ = (locX*locX^T)^{-1}
        blas.GEMM(Teuchos::NO_TRANS, Teuchos::CONJ_TRANS, NSDim, NSDim, nnz,
                   one,   locX.values(),   locX.stride(),
                          locX.values(),   locX.stride(),
                  zero, XXtInv.values(), XXtInv.stride());

        LO info;
        // Compute LU factorization using partial pivoting with row exchanges
        lapack.GETRF(NSDim, NSDim, XXtInv.values(), XXtInv.stride(), IPIV.get(), &info);
        // Use the computed factorization to compute the inverse
        lapack.GETRI(NSDim, XXtInv.values(), XXtInv.stride(), IPIV.get(), WORK.get(), lwork, &info);
      }
    }
  }