void Stokhos::GramSchmidtBasis<ordinal_type, value_type>:: transformCoeffs(const value_type *in, value_type *out) const { Teuchos::BLAS<ordinal_type, value_type> blas; for (ordinal_type i=0; i<sz; i++) out[i] = in[i]; blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::TRANS, Teuchos::UNIT_DIAG, sz, 1, 1.0, gs_mat.values(), sz, out, sz); }
KOKKOS_INLINE_FUNCTION int Trsm<Side::Left,Uplo::Upper,Trans::NoTranspose, AlgoTrsm::ExternalBlas,Variant::One> ::invoke(PolicyType &policy, const MemberType &member, const int diagA, const ScalarType alpha, DenseExecViewTypeA &A, DenseExecViewTypeB &B) { // static_assert( Kokkos::Impl::is_same< // typename DenseMatrixTypeA::space_type, // Kokkos::Cuda // >::value, // "Cuda space is not available for calling external BLAS" ); // static_assert( Kokkos::Impl::is_same< // typename DenseMatrixTypeA::space_type, // typename DenseMatrixTypeB::space_type // >::value, // "Space type of input matrices does not match" ); //typedef typename DenseExecViewTypeA::space_type space_type; typedef typename DenseExecViewTypeA::ordinal_type ordinal_type; typedef typename DenseExecViewTypeA::value_type value_type; if (member.team_rank() == 0) { #ifdef HAVE_SHYLUTACHO_TEUCHOS Teuchos::BLAS<ordinal_type,value_type> blas; const ordinal_type m = A.NumRows(); const ordinal_type n = B.NumCols(); blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS, (diagA == Diag::Unit ? Teuchos::UNIT_DIAG : Teuchos::NON_UNIT_DIAG), m, n, alpha, A.ValuePtr(), A.BaseObject().ColStride(), B.ValuePtr(), B.BaseObject().ColStride()); #else TACHO_TEST_FOR_ABORT( true, MSG_NOT_HAVE_PACKAGE("Teuchos") ); #endif } return 0; }
KOKKOS_INLINE_FUNCTION int exampleDenseTrsmMKL(const OrdinalType mmin, const OrdinalType mmax, const OrdinalType minc, const OrdinalType k, const bool verbose) { typedef ValueType value_type; typedef OrdinalType ordinal_type; typedef SizeType size_type; typedef DenseMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> DenseMatrixBaseType; int r_val = 0; Kokkos::Impl::Timer timer; double t = 0.0; cout << "DenseGemmMKL:: test matrices " <<":: mmin = " << mmin << " , mmax = " << mmax << " , minc = " << minc << " , k = "<< k << endl; ostringstream os; os.precision(3); os << scientific; for (ordinal_type m=mmin;m<=mmax;m+=minc) { os.str(""); DenseMatrixBaseType AA("AA", m, m), BB("BB", m, k), BC("BC", m, k); // setup upper triangular for (ordinal_type j=0;j<AA.NumCols();++j) { AA.Value(j,j) = 10.0; for (ordinal_type i=0;i<j;++i) AA.Value(i,j) = 2.0*((value_type)rand()/(RAND_MAX)) - 1.0; } // setup one and right hand side is going to be overwritten by the product of AB for (ordinal_type j=0;j<BB.NumCols();++j) for (ordinal_type i=0;i<BB.NumRows();++i) BB.Value(i,j) = 1.0; Teuchos::BLAS<ordinal_type,value_type> blas; blas.GEMM(Teuchos::CONJ_TRANS, Teuchos::NO_TRANS, m, k, m, 1.0, AA.ValuePtr(), AA.ColStride(), BB.ValuePtr(), BB.ColStride(), 0.0, BC.ValuePtr(), BC.ColStride()); BB.copy(BC); const double flop = get_flop_trsm_upper<value_type>(m, k); os << "DenseTrsmMKL:: m = " << m << " k = " << k; { timer.reset(); Teuchos::BLAS<ordinal_type,value_type> blas; const ordinal_type mm = AA.NumRows(); const ordinal_type nn = BB.NumCols(); blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::CONJ_TRANS, Teuchos::NON_UNIT_DIAG, mm, nn, 1.0, AA.ValuePtr(), AA.ColStride(), BB.ValuePtr(), BB.ColStride()); t = timer.seconds(); os << ":: MKL Performance = " << (flop/t/1.0e9) << " [GFLOPs] "; } cout << os.str() << endl; } return r_val; }