void
Stokhos::GramSchmidtBasis<ordinal_type, value_type>::
transformCoeffs(const value_type *in, value_type *out) const
{
  Teuchos::BLAS<ordinal_type, value_type> blas;
  for (ordinal_type i=0; i<sz; i++)
    out[i] = in[i];
  blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::TRANS,
	    Teuchos::UNIT_DIAG, sz, 1, 1.0, gs_mat.values(), sz, out, sz);
}
  KOKKOS_INLINE_FUNCTION
  int
  Trsm<Side::Left,Uplo::Upper,Trans::NoTranspose,
       AlgoTrsm::ExternalBlas,Variant::One>
  ::invoke(PolicyType &policy,
           const MemberType &member,
           const int diagA,
           const ScalarType alpha,
           DenseExecViewTypeA &A,
           DenseExecViewTypeB &B) {
    // static_assert( Kokkos::Impl::is_same<
    //                typename DenseMatrixTypeA::space_type,
    //                Kokkos::Cuda
    //                >::value,
    //                "Cuda space is not available for calling external BLAS" );

    // static_assert( Kokkos::Impl::is_same<
    //                typename DenseMatrixTypeA::space_type,
    //                typename DenseMatrixTypeB::space_type
    //                >::value,
    //                "Space type of input matrices does not match" );

    //typedef typename DenseExecViewTypeA::space_type   space_type;
    typedef typename DenseExecViewTypeA::ordinal_type ordinal_type;
    typedef typename DenseExecViewTypeA::value_type   value_type;

    if (member.team_rank() == 0) {
#ifdef HAVE_SHYLUTACHO_TEUCHOS
      Teuchos::BLAS<ordinal_type,value_type> blas;

      const ordinal_type m = A.NumRows();
      const ordinal_type n = B.NumCols();

      blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
                (diagA == Diag::Unit ? Teuchos::UNIT_DIAG : Teuchos::NON_UNIT_DIAG),
                m, n,
                alpha,
                A.ValuePtr(), A.BaseObject().ColStride(),
                B.ValuePtr(), B.BaseObject().ColStride());
#else
    TACHO_TEST_FOR_ABORT( true, MSG_NOT_HAVE_PACKAGE("Teuchos") );
#endif
    }

    return 0;
  }
예제 #3
0
  KOKKOS_INLINE_FUNCTION
  int exampleDenseTrsmMKL(const OrdinalType mmin,
                          const OrdinalType mmax,
                          const OrdinalType minc,
                          const OrdinalType k,
                          const bool verbose) {
    typedef ValueType   value_type;
    typedef OrdinalType ordinal_type;
    typedef SizeType    size_type;

    typedef DenseMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> DenseMatrixBaseType;

    int r_val = 0;

    Kokkos::Impl::Timer timer;
    double t = 0.0;

    cout << "DenseGemmMKL:: test matrices "
         <<":: mmin = " << mmin << " , mmax = " << mmax << " , minc = " << minc << " , k = "<< k << endl;

    ostringstream os;
    os.precision(3);
    os << scientific;

    for (ordinal_type m=mmin;m<=mmax;m+=minc) {
      os.str("");

      DenseMatrixBaseType AA("AA", m, m), BB("BB", m, k), BC("BC", m, k);
      
      // setup upper triangular
      for (ordinal_type j=0;j<AA.NumCols();++j) {
        AA.Value(j,j) = 10.0;
        for (ordinal_type i=0;i<j;++i)
          AA.Value(i,j) = 2.0*((value_type)rand()/(RAND_MAX)) - 1.0;
      }

      // setup one and right hand side is going to be overwritten by the product of AB
      for (ordinal_type j=0;j<BB.NumCols();++j)
        for (ordinal_type i=0;i<BB.NumRows();++i)
          BB.Value(i,j) = 1.0;

      Teuchos::BLAS<ordinal_type,value_type> blas;

      blas.GEMM(Teuchos::CONJ_TRANS, Teuchos::NO_TRANS,
                m, k, m,
                1.0,
                AA.ValuePtr(), AA.ColStride(),
                BB.ValuePtr(), BB.ColStride(),
                0.0,
                BC.ValuePtr(), BC.ColStride());
      BB.copy(BC);

      const double flop = get_flop_trsm_upper<value_type>(m, k);

      os << "DenseTrsmMKL:: m = " << m << " k = " << k;
      {
        timer.reset();
        Teuchos::BLAS<ordinal_type,value_type> blas;

        const ordinal_type mm = AA.NumRows();
        const ordinal_type nn = BB.NumCols();

        blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::CONJ_TRANS,
                  Teuchos::NON_UNIT_DIAG,
                  mm, nn,
                  1.0,
                  AA.ValuePtr(), AA.ColStride(),
                  BB.ValuePtr(), BB.ColStride());
        t = timer.seconds();
        os << ":: MKL Performance = " << (flop/t/1.0e9) << " [GFLOPs]  ";
      }
      cout << os.str() << endl;
    }

    return r_val;
  }