KOKKOS_INLINE_FUNCTION
  int
  Chol<Uplo::Upper,
       AlgoChol::ExternalLapack,Variant::One>
  ::invoke(PolicyType &policy,
           const MemberType &member,
           DenseExecViewTypeA &A) {
    // static_assert( Kokkos::Impl::is_same<
    //                typename DenseMatrixTypeA::space_type,
    //                Kokkos::Cuda
    //                >::value,
    //                "Cuda space is not available for calling external BLAS" );

    //typedef typename DenseExecViewTypeA::space_type   space_type;
    typedef typename DenseExecViewTypeA::ordinal_type ordinal_type;
    typedef typename DenseExecViewTypeA::value_type   value_type;

    int r_val = 0;      
    if (member.team_rank() == 0) {
#ifdef HAVE_SHYLUTACHO_TEUCHOS
      Teuchos::LAPACK<ordinal_type,value_type> lapack;

      lapack.POTRF('U',
                   A.NumRows(),
                   A.ValuePtr(), A.BaseObject().ColStride(),
                   &r_val);
#else
    TACHO_TEST_FOR_ABORT( true, MSG_NOT_HAVE_PACKAGE("Teuchos") );
#endif
    }

    return r_val;
  }
Beispiel #2
0
int main(int argc, char *argv[]) {
  Teuchos::GlobalMPISession mpiSession(&argc, &argv);
  
  // This little trick lets us print to std::cout only if
  // a (dummy) command-line argument is provided.
  int iprint = argc - 1;
  Teuchos::RCP<std::ostream> outStream;
  Teuchos::oblackholestream bhs; // outputs nothing
  if (iprint > 0)
    outStream = Teuchos::rcp(&std::cout, false);
  else
    outStream = Teuchos::rcp(&bhs, false);
  
  // Save the format state of the original std::cout.
  Teuchos::oblackholestream oldFormatState;
  oldFormatState.copyfmt(std::cout);
  
  *outStream \
    << "===============================================================================\n" \
    << "|                                                                             |\n" \
    << "|                  Unit Test (Basis_HCURL_TET_In_FEM)                         |\n" \
    << "|                                                                             |\n" \
    << "| 1) Patch test involving H(curl) matrices                                    |\n" \
    << "|                                                                             |\n" \
    << "|   Questions? Contact Pavel Bochev ([email protected]),                     |\n" \
    << "|                      Robert Kirby ([email protected]),                 |\n" \
    << "|                      Denis Ridzal ([email protected]),                     |\n" \
    << "|                      Kara Peterson ([email protected]).                    |\n" \
    << "|                                                                             |\n" \
    << "|  Intrepid's website: http://trilinos.sandia.gov/packages/intrepid           |\n" \
    << "|  Trilinos website:   http://trilinos.sandia.gov                             |\n" \
    << "|                                                                             |\n" \
    << "===============================================================================\n" \
    << "| TEST 2: Patch test for mass matrices                                        |\n" \
    << "===============================================================================\n";
  
  
  int errorFlag = 0;
  
  outStream -> precision(16);
  
  try {
    DefaultCubatureFactory<double> cubFactory;                                          // create cubature factory
    shards::CellTopology cell(shards::getCellTopologyData< shards::Tetrahedron<> >());  // create parent cell topology
    
    int cellDim = cell.getDimension();
    
    int min_order = 1;
    int max_order = 5;
    
    int numIntervals = max_order;
    int numInterpPoints = ((numIntervals + 1)*(numIntervals + 2)*(numIntervals+3))/6;
    FieldContainer<double> interp_points_ref(numInterpPoints, cellDim);
    int counter = 0;
    for (int j=0; j<=numIntervals; j++) {
      for (int i=0; i<=numIntervals-j; i++) {
        for (int k=0;k<numIntervals-j-i;k++) {
          interp_points_ref(counter,0) = i*(1.0/numIntervals);
          interp_points_ref(counter,1) = j*(1.0/numIntervals);
          interp_points_ref(counter,2) = k*(1.0/numIntervals);
          counter++;
        }
      }
    }
    
    for (int basis_order=min_order;basis_order<=max_order;basis_order++) {
      // create basis
      Teuchos::RCP<Basis<double,FieldContainer<double> > > basis =
        Teuchos::rcp(new Basis_HCURL_TET_In_FEM<double,FieldContainer<double> >(basis_order,POINTTYPE_EQUISPACED) );
      
      int numFields = basis->getCardinality();
      
      // create cubatures
      Teuchos::RCP<Cubature<double> > cellCub = cubFactory.create(cell, 2*(basis_order+1));
      
      int numCubPointsCell = cellCub->getNumPoints();
      
      // hold cubature information
      FieldContainer<double> cub_points_cell(numCubPointsCell, cellDim);
      FieldContainer<double> cub_weights_cell(numCubPointsCell);
      
      // hold basis function information on refcell
      FieldContainer<double> value_of_basis_at_cub_points_cell(numFields, numCubPointsCell, cellDim );
      FieldContainer<double> w_value_of_basis_at_cub_points_cell(1, numFields, numCubPointsCell, cellDim);

      // holds rhs data
      FieldContainer<double> rhs_at_cub_points_cell(1,numCubPointsCell,cellDim);
      
      // FEM mass matrix
      FieldContainer<double> fe_matrix_bak(1,numFields,numFields);
      FieldContainer<double> fe_matrix(1,numFields,numFields);
      FieldContainer<double> rhs_and_soln_vec(1,numFields);
      
      FieldContainer<int> ipiv(numFields);
      FieldContainer<double> value_of_basis_at_interp_points( numFields , numInterpPoints , cellDim);
      FieldContainer<double> interpolant( 1, numInterpPoints , cellDim );

      int info = 0;
      Teuchos::LAPACK<int, double> solver;
      
      // set test tolerance
      double zero = (basis_order+1)*(basis_order+1)*1000*INTREPID_TOL;
      
      // build matrices outside the loop, and then just do the rhs
      // for each iteration
      cellCub->getCubature(cub_points_cell, cub_weights_cell);
      
      // need the vector basis
      basis->getValues(value_of_basis_at_cub_points_cell,
		       cub_points_cell,
		       OPERATOR_VALUE);
      basis->getValues( value_of_basis_at_interp_points ,
			interp_points_ref ,
			OPERATOR_VALUE );
			
			


      // construct mass matrix
      cub_weights_cell.resize(1,numCubPointsCell);
      FunctionSpaceTools::multiplyMeasure<double>(w_value_of_basis_at_cub_points_cell ,
                                                  cub_weights_cell ,
                                                  value_of_basis_at_cub_points_cell ); 
      cub_weights_cell.resize(numCubPointsCell);
      
      
      value_of_basis_at_cub_points_cell.resize( 1 , numFields , numCubPointsCell , cellDim );
      FunctionSpaceTools::integrate<double>(fe_matrix_bak,
                                            w_value_of_basis_at_cub_points_cell ,
                                            value_of_basis_at_cub_points_cell ,
                                            COMP_BLAS );
      value_of_basis_at_cub_points_cell.resize( numFields , numCubPointsCell , cellDim );
      

      //std::cout << fe_matrix_bak << std::endl;

      for (int x_order=0;x_order<basis_order;x_order++) {
        for (int y_order=0;y_order<basis_order-x_order;y_order++) {
          for (int z_order=0;z_order<basis_order-x_order-y_order;z_order++) {
	    for (int comp=0;comp<cellDim;comp++) {
	      fe_matrix.initialize();
	      // copy mass matrix 
	      for (int i=0;i<numFields;i++) {
		for (int j=0;j<numFields;j++) {
		  fe_matrix(0,i,j) = fe_matrix_bak(0,i,j);
		}
	      }
	      
	      // clear old vector data
	      rhs_and_soln_vec.initialize();
	      
	      // now get rhs vector
	      
	      cub_points_cell.resize(1,numCubPointsCell,cellDim);
	     
	      rhs_at_cub_points_cell.initialize();
	      rhsFunc(rhs_at_cub_points_cell,
		      cub_points_cell,
		      comp, 
		      x_order,
		      y_order,
		      z_order);
            
	      cub_points_cell.resize(numCubPointsCell,cellDim);

	      cub_weights_cell.resize(numCubPointsCell);

	      FunctionSpaceTools::integrate<double>(rhs_and_soln_vec,
						    rhs_at_cub_points_cell,
						    w_value_of_basis_at_cub_points_cell,
						    COMP_BLAS);
	      
	      // solve linear system

// 	      solver.GESV(numFields, 1, &fe_matrix[0], numFields, &ipiv(0), &rhs_and_soln_vec[0], 
// 			  numFields, &info);
	      solver.POTRF('L',numFields,&fe_matrix[0],numFields,&info);
	      solver.POTRS('L',numFields,1,&fe_matrix[0],numFields,&rhs_and_soln_vec[0],numFields,&info);
	      
	      interp_points_ref.resize(1,numInterpPoints,cellDim);
	      // get exact solution for comparison
	      FieldContainer<double> exact_solution(1,numInterpPoints,cellDim);
	      exact_solution.initialize();
	      u_exact( exact_solution , interp_points_ref , comp , x_order, y_order, z_order);
	      interp_points_ref.resize(numInterpPoints,cellDim);

	      // compute interpolant
	      // first evaluate basis at interpolation points
	      value_of_basis_at_interp_points.resize(1,numFields,numInterpPoints,cellDim);
	      FunctionSpaceTools::evaluate<double>( interpolant , 
						    rhs_and_soln_vec ,
						    value_of_basis_at_interp_points );
	      value_of_basis_at_interp_points.resize(numFields,numInterpPoints,cellDim);
	      
	      RealSpaceTools<double>::subtract(interpolant,exact_solution);
	      
	      double nrm= RealSpaceTools<double>::vectorNorm(&interpolant[0],interpolant.dimension(1), NORM_TWO);
	      
	      *outStream << "\nNorm-2 error between scalar components of exact solution of order ("
			 << x_order << ", " << y_order << ", " << z_order
			 << ") in component " << comp 
			 << " and finite element interpolant of order " << basis_order << ": "
			 << nrm << "\n";
	      
	      if (nrm > zero) {
		*outStream << "\n\nPatch test failed for solution polynomial order ("
			   << x_order << ", " << y_order << ", " << z_order << ") and basis order (scalar, vector)  ("
			   << basis_order << ", " << basis_order+1 << ")\n\n";
		errorFlag++;
	      }
            }
          }
        }
      }
    }
    
  }
  
  catch (std::logic_error err) {
    *outStream << err.what() << "\n\n";
    errorFlag = -1000;
  };
  
  if (errorFlag != 0)
    std::cout << "End Result: TEST FAILED\n";
  else
    std::cout << "End Result: TEST PASSED\n";
  
  // reset format state of std::cout
  std::cout.copyfmt(oldFormatState);
  
  return errorFlag;
}
  void BlockCGIter<ScalarType,MV,OP>::iterate()
  {
    //
    // Allocate/initialize data structures
    //
    if (initialized_ == false) {
      initialize();
    }
    // Allocate data needed for LAPACK work.
    int info = 0;
    char UPLO = 'U';
    Teuchos::LAPACK<int,ScalarType> lapack;

    // Allocate memory for scalars.
    Teuchos::SerialDenseMatrix<int,ScalarType> alpha( blockSize_, blockSize_ );
    Teuchos::SerialDenseMatrix<int,ScalarType> beta( blockSize_, blockSize_ );
    Teuchos::SerialDenseMatrix<int,ScalarType> rHz( blockSize_, blockSize_ ), 
      rHz_old( blockSize_, blockSize_ ), pAp( blockSize_, blockSize_ );

    // Create convenience variables for zero and one.
    const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
    
    // Get the current solution std::vector.
    Teuchos::RCP<MV> cur_soln_vec = lp_->getCurrLHSVec();

    // Check that the current solution std::vector has blockSize_ columns. 
    TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*cur_soln_vec) != blockSize_, CGIterateFailure,
                        "Belos::BlockCGIter::iterate(): current linear system does not have the right number of vectors!" );
    int rank = ortho_->normalize( *P_, Teuchos::null );
    TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_,CGIterationOrthoFailure,
                         "Belos::BlockCGIter::iterate(): Failed to compute initial block of orthonormal direction vectors.");


    ////////////////////////////////////////////////////////////////
    // Iterate until the status test tells us to stop.
    //
    while (stest_->checkStatus(this) != Passed) {
        
      // Increment the iteration
      iter_++;
    
      // Multiply the current direction std::vector by A and store in Ap_
      lp_->applyOp( *P_, *AP_ );
      
      // Compute alpha := <P_,R_> / <P_,AP_>
      // 1) Compute P^T * A * P = pAp and P^T * R 
      // 2) Compute the Cholesky Factorization of pAp
      // 3) Back and forward solves to compute alpha
      //
      MVT::MvTransMv( one, *P_, *R_, alpha );
      MVT::MvTransMv( one, *P_, *AP_, pAp );      
     
      // Compute Cholesky factorization of pAp
      lapack.POTRF(UPLO, blockSize_, pAp.values(), blockSize_, &info);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0,CGIterationLAPACKFailure,
                         "Belos::BlockCGIter::iterate(): Failed to compute Cholesky factorization using LAPACK routine POTRF.");

      // Compute alpha by performing a back and forward solve with the Cholesky factorization in pAp.
      lapack.POTRS(UPLO, blockSize_, blockSize_, pAp.values(), blockSize_, 
		   alpha.values(), blockSize_, &info);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0,CGIterationLAPACKFailure,
                         "Belos::BlockCGIter::iterate(): Failed to compute alpha using Cholesky factorization (POTRS).");
      
      //
      // Update the solution std::vector X := X + alpha * P_
      //
      MVT::MvTimesMatAddMv( one, *P_, alpha, one, *cur_soln_vec );
      lp_->updateSolution();
      //
      // Compute the new residual R_ := R_ - alpha * AP_
      //
      MVT::MvTimesMatAddMv( -one, *AP_, alpha, one, *R_ );
      //
      // Compute the new preconditioned residual, Z_.
      if ( lp_->getLeftPrec() != Teuchos::null ) {
        lp_->applyLeftPrec( *R_, *Z_ );
        if ( lp_->getRightPrec() != Teuchos::null ) {
          Teuchos::RCP<MV> tmp = MVT::Clone( *Z_, blockSize_ );
          lp_->applyRightPrec( *Z_, *tmp );
          Z_ = tmp;
        }
      }
      else if ( lp_->getRightPrec() != Teuchos::null ) {
        lp_->applyRightPrec( *R_, *Z_ );
      } 
      else {
        Z_ = R_;
      }
      //
      // Compute beta := <AP_,Z_> / <P_,AP_> 
      // 1) Compute AP_^T * Z_ 
      // 2) Compute the Cholesky Factorization of pAp (already have)
      // 3) Back and forward solves to compute beta

      // Compute <AP_,Z>
      MVT::MvTransMv( -one, *AP_, *Z_, beta );
      //
      lapack.POTRS(UPLO, blockSize_, blockSize_, pAp.values(), blockSize_, 
		   beta.values(), blockSize_, &info);
      TEUCHOS_TEST_FOR_EXCEPTION(info != 0,CGIterationLAPACKFailure,
                         "Belos::BlockCGIter::iterate(): Failed to compute beta using Cholesky factorization (POTRS).");
      //
      // Compute the new direction vectors P_ = Z_ + P_ * beta 
      //
      Teuchos::RCP<MV> Pnew = MVT::CloneCopy( *Z_ );
      MVT::MvTimesMatAddMv(one, *P_, beta, one, *Pnew);
      P_ = Pnew;

      // Compute orthonormal block of new direction vectors.
      rank = ortho_->normalize( *P_, Teuchos::null );
      TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_,CGIterationOrthoFailure,
                         "Belos::BlockCGIter::iterate(): Failed to compute block of orthonormal direction vectors.");
      
    } // end while (sTest_->checkStatus(this) != Passed)
  }