KOKKOS_INLINE_FUNCTION int Chol<Uplo::Upper, AlgoChol::ExternalLapack,Variant::One> ::invoke(PolicyType &policy, const MemberType &member, DenseExecViewTypeA &A) { // static_assert( Kokkos::Impl::is_same< // typename DenseMatrixTypeA::space_type, // Kokkos::Cuda // >::value, // "Cuda space is not available for calling external BLAS" ); //typedef typename DenseExecViewTypeA::space_type space_type; typedef typename DenseExecViewTypeA::ordinal_type ordinal_type; typedef typename DenseExecViewTypeA::value_type value_type; int r_val = 0; if (member.team_rank() == 0) { #ifdef HAVE_SHYLUTACHO_TEUCHOS Teuchos::LAPACK<ordinal_type,value_type> lapack; lapack.POTRF('U', A.NumRows(), A.ValuePtr(), A.BaseObject().ColStride(), &r_val); #else TACHO_TEST_FOR_ABORT( true, MSG_NOT_HAVE_PACKAGE("Teuchos") ); #endif } return r_val; }
int main(int argc, char *argv[]) { Teuchos::GlobalMPISession mpiSession(&argc, &argv); // This little trick lets us print to std::cout only if // a (dummy) command-line argument is provided. int iprint = argc - 1; Teuchos::RCP<std::ostream> outStream; Teuchos::oblackholestream bhs; // outputs nothing if (iprint > 0) outStream = Teuchos::rcp(&std::cout, false); else outStream = Teuchos::rcp(&bhs, false); // Save the format state of the original std::cout. Teuchos::oblackholestream oldFormatState; oldFormatState.copyfmt(std::cout); *outStream \ << "===============================================================================\n" \ << "| |\n" \ << "| Unit Test (Basis_HCURL_TET_In_FEM) |\n" \ << "| |\n" \ << "| 1) Patch test involving H(curl) matrices |\n" \ << "| |\n" \ << "| Questions? Contact Pavel Bochev ([email protected]), |\n" \ << "| Robert Kirby ([email protected]), |\n" \ << "| Denis Ridzal ([email protected]), |\n" \ << "| Kara Peterson ([email protected]). |\n" \ << "| |\n" \ << "| Intrepid's website: http://trilinos.sandia.gov/packages/intrepid |\n" \ << "| Trilinos website: http://trilinos.sandia.gov |\n" \ << "| |\n" \ << "===============================================================================\n" \ << "| TEST 2: Patch test for mass matrices |\n" \ << "===============================================================================\n"; int errorFlag = 0; outStream -> precision(16); try { DefaultCubatureFactory<double> cubFactory; // create cubature factory shards::CellTopology cell(shards::getCellTopologyData< shards::Tetrahedron<> >()); // create parent cell topology int cellDim = cell.getDimension(); int min_order = 1; int max_order = 5; int numIntervals = max_order; int numInterpPoints = ((numIntervals + 1)*(numIntervals + 2)*(numIntervals+3))/6; FieldContainer<double> interp_points_ref(numInterpPoints, cellDim); int counter = 0; for (int j=0; j<=numIntervals; j++) { for (int i=0; i<=numIntervals-j; i++) { for (int k=0;k<numIntervals-j-i;k++) { interp_points_ref(counter,0) = i*(1.0/numIntervals); interp_points_ref(counter,1) = j*(1.0/numIntervals); interp_points_ref(counter,2) = k*(1.0/numIntervals); counter++; } } } for (int basis_order=min_order;basis_order<=max_order;basis_order++) { // create basis Teuchos::RCP<Basis<double,FieldContainer<double> > > basis = Teuchos::rcp(new Basis_HCURL_TET_In_FEM<double,FieldContainer<double> >(basis_order,POINTTYPE_EQUISPACED) ); int numFields = basis->getCardinality(); // create cubatures Teuchos::RCP<Cubature<double> > cellCub = cubFactory.create(cell, 2*(basis_order+1)); int numCubPointsCell = cellCub->getNumPoints(); // hold cubature information FieldContainer<double> cub_points_cell(numCubPointsCell, cellDim); FieldContainer<double> cub_weights_cell(numCubPointsCell); // hold basis function information on refcell FieldContainer<double> value_of_basis_at_cub_points_cell(numFields, numCubPointsCell, cellDim ); FieldContainer<double> w_value_of_basis_at_cub_points_cell(1, numFields, numCubPointsCell, cellDim); // holds rhs data FieldContainer<double> rhs_at_cub_points_cell(1,numCubPointsCell,cellDim); // FEM mass matrix FieldContainer<double> fe_matrix_bak(1,numFields,numFields); FieldContainer<double> fe_matrix(1,numFields,numFields); FieldContainer<double> rhs_and_soln_vec(1,numFields); FieldContainer<int> ipiv(numFields); FieldContainer<double> value_of_basis_at_interp_points( numFields , numInterpPoints , cellDim); FieldContainer<double> interpolant( 1, numInterpPoints , cellDim ); int info = 0; Teuchos::LAPACK<int, double> solver; // set test tolerance double zero = (basis_order+1)*(basis_order+1)*1000*INTREPID_TOL; // build matrices outside the loop, and then just do the rhs // for each iteration cellCub->getCubature(cub_points_cell, cub_weights_cell); // need the vector basis basis->getValues(value_of_basis_at_cub_points_cell, cub_points_cell, OPERATOR_VALUE); basis->getValues( value_of_basis_at_interp_points , interp_points_ref , OPERATOR_VALUE ); // construct mass matrix cub_weights_cell.resize(1,numCubPointsCell); FunctionSpaceTools::multiplyMeasure<double>(w_value_of_basis_at_cub_points_cell , cub_weights_cell , value_of_basis_at_cub_points_cell ); cub_weights_cell.resize(numCubPointsCell); value_of_basis_at_cub_points_cell.resize( 1 , numFields , numCubPointsCell , cellDim ); FunctionSpaceTools::integrate<double>(fe_matrix_bak, w_value_of_basis_at_cub_points_cell , value_of_basis_at_cub_points_cell , COMP_BLAS ); value_of_basis_at_cub_points_cell.resize( numFields , numCubPointsCell , cellDim ); //std::cout << fe_matrix_bak << std::endl; for (int x_order=0;x_order<basis_order;x_order++) { for (int y_order=0;y_order<basis_order-x_order;y_order++) { for (int z_order=0;z_order<basis_order-x_order-y_order;z_order++) { for (int comp=0;comp<cellDim;comp++) { fe_matrix.initialize(); // copy mass matrix for (int i=0;i<numFields;i++) { for (int j=0;j<numFields;j++) { fe_matrix(0,i,j) = fe_matrix_bak(0,i,j); } } // clear old vector data rhs_and_soln_vec.initialize(); // now get rhs vector cub_points_cell.resize(1,numCubPointsCell,cellDim); rhs_at_cub_points_cell.initialize(); rhsFunc(rhs_at_cub_points_cell, cub_points_cell, comp, x_order, y_order, z_order); cub_points_cell.resize(numCubPointsCell,cellDim); cub_weights_cell.resize(numCubPointsCell); FunctionSpaceTools::integrate<double>(rhs_and_soln_vec, rhs_at_cub_points_cell, w_value_of_basis_at_cub_points_cell, COMP_BLAS); // solve linear system // solver.GESV(numFields, 1, &fe_matrix[0], numFields, &ipiv(0), &rhs_and_soln_vec[0], // numFields, &info); solver.POTRF('L',numFields,&fe_matrix[0],numFields,&info); solver.POTRS('L',numFields,1,&fe_matrix[0],numFields,&rhs_and_soln_vec[0],numFields,&info); interp_points_ref.resize(1,numInterpPoints,cellDim); // get exact solution for comparison FieldContainer<double> exact_solution(1,numInterpPoints,cellDim); exact_solution.initialize(); u_exact( exact_solution , interp_points_ref , comp , x_order, y_order, z_order); interp_points_ref.resize(numInterpPoints,cellDim); // compute interpolant // first evaluate basis at interpolation points value_of_basis_at_interp_points.resize(1,numFields,numInterpPoints,cellDim); FunctionSpaceTools::evaluate<double>( interpolant , rhs_and_soln_vec , value_of_basis_at_interp_points ); value_of_basis_at_interp_points.resize(numFields,numInterpPoints,cellDim); RealSpaceTools<double>::subtract(interpolant,exact_solution); double nrm= RealSpaceTools<double>::vectorNorm(&interpolant[0],interpolant.dimension(1), NORM_TWO); *outStream << "\nNorm-2 error between scalar components of exact solution of order (" << x_order << ", " << y_order << ", " << z_order << ") in component " << comp << " and finite element interpolant of order " << basis_order << ": " << nrm << "\n"; if (nrm > zero) { *outStream << "\n\nPatch test failed for solution polynomial order (" << x_order << ", " << y_order << ", " << z_order << ") and basis order (scalar, vector) (" << basis_order << ", " << basis_order+1 << ")\n\n"; errorFlag++; } } } } } } } catch (std::logic_error err) { *outStream << err.what() << "\n\n"; errorFlag = -1000; }; if (errorFlag != 0) std::cout << "End Result: TEST FAILED\n"; else std::cout << "End Result: TEST PASSED\n"; // reset format state of std::cout std::cout.copyfmt(oldFormatState); return errorFlag; }
void BlockCGIter<ScalarType,MV,OP>::iterate() { // // Allocate/initialize data structures // if (initialized_ == false) { initialize(); } // Allocate data needed for LAPACK work. int info = 0; char UPLO = 'U'; Teuchos::LAPACK<int,ScalarType> lapack; // Allocate memory for scalars. Teuchos::SerialDenseMatrix<int,ScalarType> alpha( blockSize_, blockSize_ ); Teuchos::SerialDenseMatrix<int,ScalarType> beta( blockSize_, blockSize_ ); Teuchos::SerialDenseMatrix<int,ScalarType> rHz( blockSize_, blockSize_ ), rHz_old( blockSize_, blockSize_ ), pAp( blockSize_, blockSize_ ); // Create convenience variables for zero and one. const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one(); // Get the current solution std::vector. Teuchos::RCP<MV> cur_soln_vec = lp_->getCurrLHSVec(); // Check that the current solution std::vector has blockSize_ columns. TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*cur_soln_vec) != blockSize_, CGIterateFailure, "Belos::BlockCGIter::iterate(): current linear system does not have the right number of vectors!" ); int rank = ortho_->normalize( *P_, Teuchos::null ); TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_,CGIterationOrthoFailure, "Belos::BlockCGIter::iterate(): Failed to compute initial block of orthonormal direction vectors."); //////////////////////////////////////////////////////////////// // Iterate until the status test tells us to stop. // while (stest_->checkStatus(this) != Passed) { // Increment the iteration iter_++; // Multiply the current direction std::vector by A and store in Ap_ lp_->applyOp( *P_, *AP_ ); // Compute alpha := <P_,R_> / <P_,AP_> // 1) Compute P^T * A * P = pAp and P^T * R // 2) Compute the Cholesky Factorization of pAp // 3) Back and forward solves to compute alpha // MVT::MvTransMv( one, *P_, *R_, alpha ); MVT::MvTransMv( one, *P_, *AP_, pAp ); // Compute Cholesky factorization of pAp lapack.POTRF(UPLO, blockSize_, pAp.values(), blockSize_, &info); TEUCHOS_TEST_FOR_EXCEPTION(info != 0,CGIterationLAPACKFailure, "Belos::BlockCGIter::iterate(): Failed to compute Cholesky factorization using LAPACK routine POTRF."); // Compute alpha by performing a back and forward solve with the Cholesky factorization in pAp. lapack.POTRS(UPLO, blockSize_, blockSize_, pAp.values(), blockSize_, alpha.values(), blockSize_, &info); TEUCHOS_TEST_FOR_EXCEPTION(info != 0,CGIterationLAPACKFailure, "Belos::BlockCGIter::iterate(): Failed to compute alpha using Cholesky factorization (POTRS)."); // // Update the solution std::vector X := X + alpha * P_ // MVT::MvTimesMatAddMv( one, *P_, alpha, one, *cur_soln_vec ); lp_->updateSolution(); // // Compute the new residual R_ := R_ - alpha * AP_ // MVT::MvTimesMatAddMv( -one, *AP_, alpha, one, *R_ ); // // Compute the new preconditioned residual, Z_. if ( lp_->getLeftPrec() != Teuchos::null ) { lp_->applyLeftPrec( *R_, *Z_ ); if ( lp_->getRightPrec() != Teuchos::null ) { Teuchos::RCP<MV> tmp = MVT::Clone( *Z_, blockSize_ ); lp_->applyRightPrec( *Z_, *tmp ); Z_ = tmp; } } else if ( lp_->getRightPrec() != Teuchos::null ) { lp_->applyRightPrec( *R_, *Z_ ); } else { Z_ = R_; } // // Compute beta := <AP_,Z_> / <P_,AP_> // 1) Compute AP_^T * Z_ // 2) Compute the Cholesky Factorization of pAp (already have) // 3) Back and forward solves to compute beta // Compute <AP_,Z> MVT::MvTransMv( -one, *AP_, *Z_, beta ); // lapack.POTRS(UPLO, blockSize_, blockSize_, pAp.values(), blockSize_, beta.values(), blockSize_, &info); TEUCHOS_TEST_FOR_EXCEPTION(info != 0,CGIterationLAPACKFailure, "Belos::BlockCGIter::iterate(): Failed to compute beta using Cholesky factorization (POTRS)."); // // Compute the new direction vectors P_ = Z_ + P_ * beta // Teuchos::RCP<MV> Pnew = MVT::CloneCopy( *Z_ ); MVT::MvTimesMatAddMv(one, *P_, beta, one, *Pnew); P_ = Pnew; // Compute orthonormal block of new direction vectors. rank = ortho_->normalize( *P_, Teuchos::null ); TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_,CGIterationOrthoFailure, "Belos::BlockCGIter::iterate(): Failed to compute block of orthonormal direction vectors."); } // end while (sTest_->checkStatus(this) != Passed) }