void TPSLaplaceResid<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { // Note that all the integration operations are ScalarT! We need the partials of the Jacobian to show up in the // system Jacobian as the solution is a function of coordinates (it IS the coordinates!). // This adds significant time to the compile Intrepid2::CellTools<ScalarT>::setJacobian(jacobian, refPoints, solnVec, *cellType); // Since Intrepid2 will perform calculations on the entire workset size and not // just the used portion, we must fill the excess with reasonable values. // Leaving this out leads to a floating point exception in // Intrepid2::RealSpaceTools<Scalar>::det(ArrayDet & detArray, // const ArrayIn & inMats). for (std::size_t cell = workset.numCells; cell < worksetSize; ++cell) for (std::size_t qp = 0; qp < numQPs; ++qp) for (std::size_t i = 0; i < numDims; ++i) jacobian(cell, qp, i, i) = 1.0; Intrepid2::CellTools<ScalarT>::setJacobianDet(jacobian_det, jacobian); // Straight Laplace's equation evaluation for the nodal coord solution for(std::size_t cell = 0; cell < workset.numCells; ++cell) { for(std::size_t node_a = 0; node_a < numNodes; ++node_a) { for(std::size_t eq = 0; eq < numDims; eq++) { solnResidual(cell, node_a, eq) = 0.0; } for(std::size_t qp = 0; qp < numQPs; ++qp) { for(std::size_t node_b = 0; node_b < numNodes; ++node_b) { ScalarT kk = 0.0; for(std::size_t i = 0; i < numDims; i++) { kk += grad_at_cub_points(node_a, qp, i) * grad_at_cub_points(node_b, qp, i); } for(std::size_t eq = 0; eq < numDims; eq++) { solnResidual(cell, node_a, eq) += kk * solnVec(cell, node_b, eq) * jacobian_det(cell, qp) * refWeights(qp); } } } } } }
void LaplaceResid<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { // setJacobian only needs to be RealType since the data type is only // used internally for Basis Fns on reference elements, which are // not functions of coordinates. This save 18min of compile time!!! Intrepid::CellTools<MeshScalarT>::setJacobian(jacobian, refPoints, coordVec, *cellType); // Since Intrepid will perform calculations on the entire workset size and not // just the used portion, we must fill the excess with reasonable values. // Leaving this out leads to a floating point exception in // Intrepid::RealSpaceTools<Scalar>::det(ArrayDet & detArray, // const ArrayIn & inMats). for (std::size_t cell = workset.numCells; cell < worksetSize; ++cell) for (std::size_t qp = 0; qp < numQPs; ++qp) for (std::size_t i = 0; i < numDims; ++i) jacobian(cell, qp, i, i) = 1.0; Intrepid::CellTools<MeshScalarT>::setJacobianDet(jacobian_det, jacobian); // Straight Laplace's equation evaluation for the nodal coord solution for(std::size_t cell = 0; cell < workset.numCells; ++cell) { for(std::size_t node_a = 0; node_a < numNodes; ++node_a) { for(std::size_t eq = 0; eq < numDims; eq++) { solnResidual(cell, node_a, eq) = 0.0; } for(std::size_t qp = 0; qp < numQPs; ++qp) { for(std::size_t node_b = 0; node_b < numNodes; ++node_b) { ScalarT kk = 0.0; for(std::size_t i = 0; i < numDims; i++) { kk += grad_at_cub_points(node_a, qp, i) * grad_at_cub_points(node_b, qp, i); } for(std::size_t eq = 0; eq < numDims; eq++) { solnResidual(cell, node_a, eq) += kk * solnVec(cell, node_b, eq) * jacobian_det(cell, qp) * refWeights(qp); } } } } } }
void SurfaceBasis<EvalT, Traits>::computeJacobian( const PHX::MDField<MeshScalarT, Cell, QuadPoint, Dim, Dim> basis, const PHX::MDField<MeshScalarT, Cell, QuadPoint, Dim, Dim> dualBasis, PHX::MDField<MeshScalarT, Cell, QuadPoint> area) { const std::size_t worksetSize = basis.dimension(0); for (std::size_t cell(0); cell < worksetSize; ++cell) { for (std::size_t pt(0); pt < numQPs; ++pt) { Intrepid::Tensor<MeshScalarT> dPhiInv(3, &dualBasis(cell, pt, 0, 0)); Intrepid::Tensor<MeshScalarT> dPhi(3, &basis(cell, pt, 0, 0)); Intrepid::Vector<MeshScalarT> G_2(3, &basis(cell, pt, 2, 0)); MeshScalarT j0 = Intrepid::det(dPhi); MeshScalarT jacobian = j0 * std::sqrt( Intrepid::dot(Intrepid::dot(G_2, Intrepid::transpose(dPhiInv) * dPhiInv), G_2)); area(cell, pt) = jacobian * refWeights(pt); } } }
int ComputeBasis_HGRAD_Vector(const ordinal_type nworkset, const ordinal_type C, const ordinal_type order, const bool verbose) { typedef Vector<VectorTagType> VectorType; typedef typename VectorTagType::value_type ValueType; constexpr int VectorLength = VectorTagType::length; Teuchos::RCP<std::ostream> verboseStream; Teuchos::oblackholestream bhs; // outputs nothing if (verbose) verboseStream = Teuchos::rcp(&std::cout, false); else verboseStream = Teuchos::rcp(&bhs, false); Teuchos::oblackholestream oldFormatState; oldFormatState.copyfmt(std::cout); typedef typename Kokkos::Impl::is_space<DeviceSpaceType>::host_mirror_space::execution_space HostSpaceType ; *verboseStream << "DeviceSpace:: "; DeviceSpaceType::print_configuration(*verboseStream, false); *verboseStream << "HostSpace:: "; HostSpaceType::print_configuration(*verboseStream, false); *verboseStream << "VectorLength:: " << (VectorLength) << "\n"; using BasisTypeHost = Basis_HGRAD_HEX_C1_FEM<HostSpaceType,ValueType,ValueType>; using ImplBasisType = Impl::Basis_HGRAD_HEX_C1_FEM; using range_type = Kokkos::pair<ordinal_type,ordinal_type>; constexpr size_t LLC_CAPACITY = 32*1024*1024; Intrepid2::Test::Flush<LLC_CAPACITY,DeviceSpaceType> flush; Kokkos::Impl::Timer timer; double t_vectorize = 0; int errorFlag = 0; BasisTypeHost hostBasis; const auto cellTopo = hostBasis.getBaseCellTopology(); auto cubature = DefaultCubatureFactory::create<DeviceSpaceType,ValueType,ValueType>(cellTopo, order); const ordinal_type numCells = C, numCellsAdjusted = C/VectorLength + (C%VectorLength > 0), numVerts = cellTopo.getVertexCount(), numDofs = hostBasis.getCardinality(), numPoints = cubature->getNumPoints(), spaceDim = cubature->getDimension(); Kokkos::DynRankView<ValueType,HostSpaceType> dofCoordsHost("dofCoordsHost", numDofs, spaceDim); hostBasis.getDofCoords(dofCoordsHost); const auto refNodesHost = Kokkos::subview(dofCoordsHost, range_type(0, numVerts), Kokkos::ALL()); // pertub nodes Kokkos::DynRankView<VectorType,HostSpaceType> worksetCellsHost("worksetCellsHost", numCellsAdjusted, numVerts, spaceDim); for (ordinal_type cell=0;cell<numCells;++cell) { for (ordinal_type i=0;i<numVerts;++i) for (ordinal_type j=0;j<spaceDim;++j) { ValueType val = (rand()/(RAND_MAX + 1.0))*0.2 -0.1; worksetCellsHost(cell/VectorLength, i, j)[cell%VectorLength] = refNodesHost(i, j) + val; } } auto worksetCells = Kokkos::create_mirror_view(typename DeviceSpaceType::memory_space(), worksetCellsHost); Kokkos::deep_copy(worksetCells, worksetCellsHost); Kokkos::DynRankView<ValueType,DeviceSpaceType> refPoints("refPoints", numPoints, spaceDim), refWeights("refWeights", numPoints); cubature->getCubature(refPoints, refWeights); std::cout << "===============================================================================\n" << " Performance Test evaluating ComputeBasis \n" << " # of workset = " << nworkset << "\n" << " Test Array Structure (C,F,P,D) = " << numCells << ", " << numDofs << ", " << numPoints << ", " << spaceDim << "\n" << "===============================================================================\n"; *verboseStream << "\n" << "===============================================================================\n" << "TEST 1: evaluateFields vector version\n" << "===============================================================================\n"; try { Kokkos::DynRankView<ValueType,DeviceSpaceType> refBasisValues("refBasisValues", numDofs, numPoints), refBasisGrads ("refBasisGrads", numDofs, numPoints, spaceDim); ImplBasisType::getValues<DeviceSpaceType>(refBasisValues, refPoints, OPERATOR_VALUE); ImplBasisType::getValues<DeviceSpaceType>(refBasisGrads, refPoints, OPERATOR_GRAD); const ordinal_type ibegin = -3; // testing vertical approach { Kokkos::DynRankView<VectorType,DeviceSpaceType> weightedBasisValues("weightedBasisValues", numCellsAdjusted, numDofs, numPoints), weightedBasisGrads ("weightedBasisGrads", numCellsAdjusted, numDofs, numPoints, spaceDim); typedef F_hgrad_eval<VectorType,ValueType,DeviceSpaceType> FunctorType; using range_policy_type = Kokkos::Experimental::MDRangePolicy < DeviceSpaceType, Kokkos::Experimental::Rank<2>, Kokkos::IndexType<ordinal_type> >; range_policy_type policy( { 0, 0 }, { numCellsAdjusted, numPoints } ); FunctorType functor(weightedBasisValues, weightedBasisGrads, refBasisGrads, worksetCells, refWeights, refBasisValues, refBasisGrads); for (ordinal_type iwork=ibegin;iwork<nworkset;++iwork) { flush.run(); DeviceSpaceType::fence(); timer.reset(); Kokkos::parallel_for(policy, functor); DeviceSpaceType::fence(); t_vectorize += (iwork >= 0)*timer.seconds(); } } } catch (std::exception err) { *verboseStream << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; *verboseStream << err.what() << '\n'; *verboseStream << "-------------------------------------------------------------------------------" << "\n\n"; errorFlag = -1000; } std::cout << "TEST HGRAD " << " t_vectorize = " << (t_vectorize/nworkset) << std::endl; if (errorFlag != 0) std::cout << "End Result: TEST FAILED\n"; else std::cout << "End Result: TEST PASSED\n"; // reset format state of std::cout std::cout.copyfmt(oldFormatState); return errorFlag; }