KOKKOS_INLINE_FUNCTION int Chol<Uplo::Upper, AlgoChol::ExternalLapack,Variant::One> ::invoke(PolicyType &policy, const MemberType &member, DenseExecViewTypeA &A) { // static_assert( Kokkos::Impl::is_same< // typename DenseMatrixTypeA::space_type, // Kokkos::Cuda // >::value, // "Cuda space is not available for calling external BLAS" ); //typedef typename DenseExecViewTypeA::space_type space_type; typedef typename DenseExecViewTypeA::ordinal_type ordinal_type; typedef typename DenseExecViewTypeA::value_type value_type; int r_val = 0; if (member.team_rank() == 0) { #ifdef HAVE_SHYLUTACHO_TEUCHOS Teuchos::LAPACK<ordinal_type,value_type> lapack; lapack.POTRF('U', A.NumRows(), A.ValuePtr(), A.BaseObject().ColStride(), &r_val); #else TACHO_TEST_FOR_ABORT( true, MSG_NOT_HAVE_PACKAGE("Teuchos") ); #endif } return r_val; }
KOKKOS_INLINE_FUNCTION int Herk<Uplo::Upper,Trans::ConjTranspose, AlgoHerk::SparseSparseSuperNodesByBlocks,Variant::One> ::invoke(PolicyType &policy, MemberType &member, const ScalarType alpha, CrsExecViewTypeA &A, const ScalarType beta, CrsExecViewTypeC &C) { if (member.team_rank() == 0) { DenseMatrixView<typename CrsExecViewTypeA::hier_mat_base_type> AA(A.Hier()); DenseMatrixView<typename CrsExecViewTypeA::hier_mat_base_type> CC(C.Hier()); Herk<Uplo::Upper,Trans::ConjTranspose, AlgoHerk::DenseByBlocks,Variant::One> ::invoke(policy, member, alpha, AA, beta, CC); } return 0; }
KOKKOS_INLINE_FUNCTION int Gemm<Trans::ConjTranspose,Trans::NoTranspose, AlgoGemm::SparseSparseSuperNodes,Variant::One> ::invoke(PolicyType &policy, MemberType &member, const ScalarType alpha, CrsExecViewTypeA &A, CrsExecViewTypeB &B, const ScalarType beta, CrsExecViewTypeC &C) { if (member.team_rank() == 0) { DenseMatrixView<typename CrsExecViewTypeA::flat_mat_base_type> AA(A.Flat()); DenseMatrixView<typename CrsExecViewTypeA::flat_mat_base_type> BB(B.Flat()); DenseMatrixView<typename CrsExecViewTypeA::flat_mat_base_type> CC(C.Flat()); Gemm<Trans::ConjTranspose,Trans::NoTranspose, AlgoGemm::ExternalBlas,Variant::One> ::invoke(policy, member, alpha, AA, BB, beta, CC); } return 0; }
bool server::login(MemberType* & _User) { MemberType* u; if (!UserDB.GetFirstList()->ToRightFindUserByID(u, _User->getId())) { cout << "Id you wrote down is not in DB" << endl; return 0; } else { if (_User->getPw() == u->getPw()) { _User = u; return true; } else { cout << "Password you wrote down is not same as DB" << endl; return false; } } }
void server::retrieveByName() { MemberType* tmpUser = new MemberType; DoublyList<MemberType>* tmpUserDB = & UserDB; while (1) { switch (PrintFriendSearchMenu()) { case 1: // by name tmpUser->setName(InputNameToSearch()); for (;;) { if (tmpUserDB->getItem()->getName() == tmpUser->getName()) { PrintDetailsOfUser(tmpUserDB->getItem()->getId(), tmpUserDB->getItem()->getPn(), tmpUserDB->getItem()->getPw(), tmpUserDB->getItem()->getName(), tmpUserDB->getItem()->getMsg(), 1); break; } else { if (tmpUserDB->GetNextList(tmpUserDB) == false) { cout << "I can't find that user! Sorry!" << endl; cin.get(); cin.get(); break; } } } break; case 2: // by ID tmpUser->setId(InputIdByUser()); for (;;) { if (tmpUserDB->getItem()->getId() == tmpUser->getId()) { PrintDetailsOfUser(tmpUserDB->getItem()->getId(), tmpUserDB->getItem()->getPn(), tmpUserDB->getItem()->getPw(), tmpUserDB->getItem()->getName(), tmpUserDB->getItem()->getMsg(), 1); break; } else { if (tmpUserDB->GetNextList(tmpUserDB) == false) { cout << "I can't find that user! Sorry!" << endl; cin.get(); cin.get(); break; } } } break; case 0: return; } } }
KOKKOS_INLINE_FUNCTION int Gemm<Trans::ConjTranspose,Trans::NoTranspose, AlgoGemm::SparseSparseUnblocked,Variant::One> ::invoke(PolicyType &policy, MemberType &member, const ScalarType alpha, CrsExecViewTypeA &A, CrsExecViewTypeB &B, const ScalarType beta, CrsExecViewTypeC &C) { typedef typename CrsExecViewTypeA::ordinal_type ordinal_type; typedef typename CrsExecViewTypeA::value_type value_type; typedef typename CrsExecViewTypeA::row_view_type row_view_type; // scale the matrix C with beta ScaleCrsMatrix::invoke(policy, member, beta, C); // C(i,j) += alpha*A'(i,k)*B(k,j) const ordinal_type mA = A.NumRows(); for (ordinal_type k=0;k<mA;++k) { row_view_type &a = A.RowView(k); const ordinal_type nnz_a = a.NumNonZeros(); row_view_type &b = B.RowView(k); const ordinal_type nnz_b = b.NumNonZeros(); if (nnz_a > 0 && nnz_b) { Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, nnz_a), [&](const ordinal_type i) { const ordinal_type row_at_i = a.Col(i); const value_type val_at_ik = Util::conj(a.Value(i)); row_view_type &c = C.RowView(row_at_i); ordinal_type idx = 0; for (ordinal_type j=0;j<nnz_b && (idx > -2);++j) { const ordinal_type col_at_j = b.Col(j); const value_type val_at_kj = b.Value(j); idx = c.Index(col_at_j, idx); if (idx >= 0) c.Value(idx) += alpha*val_at_ik*val_at_kj; } }); member.team_barrier(); } } return 0; }
KOKKOS_INLINE_FUNCTION int Gemm<Trans::ConjTranspose,Trans::NoTranspose, AlgoGemm::ExternalBlas,Variant::One> ::invoke(PolicyType &policy, MemberType &member, const ScalarType alpha, DenseExecViewTypeA &A, DenseExecViewTypeB &B, const ScalarType beta, DenseExecViewTypeC &C) { // static_assert( Kokkos::Impl::is_same< // typename DenseMatrixTypeA::space_type, // typename DenseMatrixTypeB::space_type // >::value && // Kokkos::Impl::is_same< // typename DenseMatrixTypeB::space_type, // typename DenseMatrixTypeC::space_type // >::value, // "Space type of input matrices does not match" ); if (member.team_rank() == 0) { #if \ defined( HAVE_SHYLUTACHO_TEUCHOS ) && \ defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) typedef typename DenseExecViewTypeA::ordinal_type ordinal_type; typedef typename DenseExecViewTypeA::value_type value_type; Teuchos::BLAS<ordinal_type,value_type> blas; const ordinal_type m = C.NumRows(); const ordinal_type n = C.NumCols(); const ordinal_type k = B.NumRows(); if (m > 0 && n > 0 && k > 0) blas.GEMM(Teuchos::CONJ_TRANS, Teuchos::NO_TRANS, m, n, k, alpha, A.ValuePtr(), A.BaseObject().ColStride(), B.ValuePtr(), B.BaseObject().ColStride(), beta, C.ValuePtr(), C.BaseObject().ColStride()); #else TACHO_TEST_FOR_ABORT( true, MSG_NOT_HAVE_PACKAGE("Teuchos") ); #endif } return 0; }
KOKKOS_INLINE_FUNCTION int Trsm<Side::Left,Uplo::Upper,Trans::NoTranspose, AlgoTrsm::ExternalBlas,Variant::One> ::invoke(PolicyType &policy, const MemberType &member, const int diagA, const ScalarType alpha, DenseExecViewTypeA &A, DenseExecViewTypeB &B) { // static_assert( Kokkos::Impl::is_same< // typename DenseMatrixTypeA::space_type, // Kokkos::Cuda // >::value, // "Cuda space is not available for calling external BLAS" ); // static_assert( Kokkos::Impl::is_same< // typename DenseMatrixTypeA::space_type, // typename DenseMatrixTypeB::space_type // >::value, // "Space type of input matrices does not match" ); //typedef typename DenseExecViewTypeA::space_type space_type; typedef typename DenseExecViewTypeA::ordinal_type ordinal_type; typedef typename DenseExecViewTypeA::value_type value_type; if (member.team_rank() == 0) { #ifdef HAVE_SHYLUTACHO_TEUCHOS Teuchos::BLAS<ordinal_type,value_type> blas; const ordinal_type m = A.NumRows(); const ordinal_type n = B.NumCols(); blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS, (diagA == Diag::Unit ? Teuchos::UNIT_DIAG : Teuchos::NON_UNIT_DIAG), m, n, alpha, A.ValuePtr(), A.BaseObject().ColStride(), B.ValuePtr(), B.BaseObject().ColStride()); #else TACHO_TEST_FOR_ABORT( true, MSG_NOT_HAVE_PACKAGE("Teuchos") ); #endif } return 0; }
KOKKOS_INLINE_FUNCTION int Trsm<Side::Left,Uplo::Upper,Trans::NoTranspose, AlgoTrsm::SparseDenseSuperNodes,Variant::One> ::invoke(PolicyType &policy, MemberType &member, const int diagA, const ScalarType alpha, CrsExecViewTypeA &A, DenseExecViewTypeB &B) { if (member.team_rank() == 0) { DenseMatrixView<typename CrsExecViewTypeA::flat_mat_base_type> AA(A.Flat()); Trsm<Side::Left,Uplo::Upper,Trans::NoTranspose, AlgoTrsm::ExternalBlas,Variant::One> ::invoke(policy, member, diagA, alpha, AA, B); } return 0; }
KOKKOS_INLINE_FUNCTION int Chol<Uplo::Upper, AlgoChol::Unblocked,Variant::One> ::invoke(PolicyType &policy, const MemberType &member, CrsExecViewTypeA &A) { typedef typename CrsExecViewTypeA::value_type value_type; typedef typename CrsExecViewTypeA::ordinal_type ordinal_type; typedef typename CrsExecViewTypeA::row_view_type row_view_type; // row_view_type r1t, r2t; for (ordinal_type k=0;k<A.NumRows();++k) { //r1t.setView(A, k); row_view_type &r1t = A.RowView(k); // extract diagonal from alpha11 value_type &alpha = r1t.Value(0); if (member.team_rank() == 0) { // if encounter null diag or wrong index, return -(row + 1) TACHO_TEST_FOR_ABORT( r1t.Col(0) != k, "Chol::Unblocked:: Diagonal does not exist"); if (Util::real(alpha) <= 0.0) { // warning message fprintf(stderr, " diagonal = %f, local col = %d, global col = %d\n", Util::real(alpha), k, r1t.OffsetCols() + k); // proceed with epsilon; for incomplete factorization, Cholesky factor may not exit alpha = 1.0e-8; //TACHO_TEST_FOR_ABORT( true, "Chol::Unblocked:: Diagonal is negative"); //return -(k + 1); } // error handling should be more carefully designed // sqrt on diag alpha = sqrt(Util::real(alpha)); } member.team_barrier(); const ordinal_type nnz_r1t = r1t.NumNonZeros(); if (nnz_r1t) { // inverse scale Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 1, nnz_r1t), [&](const ordinal_type j) { r1t.Value(j) /= alpha; }); member.team_barrier(); // hermitian rank update Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 1, nnz_r1t), [&](const ordinal_type i) { const ordinal_type row_at_i = r1t.Col(i); const value_type val_at_i = Util::conj(r1t.Value(i)); //r2t.setView(A, row_at_i); row_view_type &r2t = A.RowView(row_at_i); ordinal_type idx = 0; for (ordinal_type j=i;j<nnz_r1t && (idx > -2);++j) { const ordinal_type col_at_j = r1t.Col(j); idx = r2t.Index(col_at_j, idx); if (idx >= 0) { const value_type val_at_j = r1t.Value(j); r2t.Value(idx) -= val_at_i*val_at_j; } } }); } } return 0; }
KOKKOS_INLINE_FUNCTION int Gemm<Trans::ConjTranspose,Trans::NoTranspose, AlgoGemm::InternalBlas,Variant::One> ::invoke(PolicyType &policy, MemberType &member, const ScalarType alpha, DenseExecViewTypeA &A, DenseExecViewTypeB &B, const ScalarType beta, DenseExecViewTypeC &C) { // static_assert( Kokkos::Impl::is_same< // typename DenseMatrixTypeA::space_type, // Kokkos::Cuda // >::value, // "Cuda space is not available for calling external BLAS" ); // static_assert( Kokkos::Impl::is_same< // typename DenseMatrixTypeA::space_type, // typename DenseMatrixTypeB::space_type // >::value && // Kokkos::Impl::is_same< // typename DenseMatrixTypeB::space_type, // typename DenseMatrixTypeC::space_type // >::value, // "Space type of input matrices does not match" ); //typedef typename DenseExecViewTypeA::space_type space_type; typedef typename DenseExecViewTypeA::ordinal_type ordinal_type; typedef typename DenseExecViewTypeA::value_type value_type; if (member.team_rank() == 0) { const ordinal_type m = C.NumRows(); const ordinal_type n = C.NumCols(); const ordinal_type k = B.NumRows(); // for now simple implementation if (m == 0 || n == 0 || ((alpha == 0 || k == 0) && (beta == 1))) return 0; if (alpha == 0) { if (beta == 0) { Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, n), [&](const ordinal_type j) { for (ordinal_type i=0;i<m;++i) C.Value(i, j) = 0.0; }); } else { Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, n), [&](const ordinal_type j) { for (ordinal_type i=0;i<m;++i) C.Value(i, j) = beta*C.Value(i, j); }); } } else { // scale beta if (beta == 0.0) Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, n), [&](const ordinal_type j) { for (ordinal_type i=0;i<m;++i) C.Value(i, j) = 0.0; }); else if (beta != 1.0) Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, n), [&](const ordinal_type j) { for (ordinal_type i=0;i<m;++i) C.Value(i, j) = beta*C.Value(i, j); }); // gemm for (ordinal_type l=0;l<k;++l) { Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, n), [&](const ordinal_type j) { const value_type tmp = B.Value(l, j); //#pragma unroll for (ordinal_type i=0;i<m;++i) C.Value(i, j) += A.Value(l, i)*tmp; }); member.team_barrier(); } } } return 0; }
KOKKOS_INLINE_FUNCTION int Gemm<Trans::NoTranspose,Trans::NoTranspose, AlgoGemm::InternalBlas,Variant::One> ::invoke(PolicyType &policy, MemberType &member, const ScalarType alpha, DenseExecViewTypeA &A, DenseExecViewTypeB &B, const ScalarType beta, DenseExecViewTypeC &C) { typedef typename DenseExecViewTypeA::ordinal_type ordinal_type; typedef typename DenseExecViewTypeA::value_type value_type; const ordinal_type m = C.NumRows(); const ordinal_type n = C.NumCols(); const ordinal_type k = B.NumRows(); // for now simple implementation if (m == 0 || n == 0 || ((alpha == 0 || k == 0) && (beta == 1))) return 0; // C = beta C + alpha AB if (member.team_rank() == 0) { if (alpha == 0) { if (beta == 0) { for (ordinal_type j=0;j<n;++j) for (ordinal_type i=0;i<m;++i) C.Value(i, j) = 0.0; } else { for (ordinal_type j=0;j<n;++j) for (ordinal_type i=0;i<m;++i) C.Value(i, j) = beta*C.Value(i, j); } } else { // scale beta if (beta == 0.0) for (ordinal_type j=0;j<n;++j) for (ordinal_type i=0;i<m;++i) C.Value(i, j) = 0.0; else if (beta != 1.0) for (ordinal_type j=0;j<n;++j) for (ordinal_type i=0;i<m;++i) C.Value(i, j) = beta*C.Value(i, j); // gemm blocked { constexpr ordinal_type mc = 128, nr = 128, kc = 32, nnr = 16; { // block update const ordinal_type mm = m/mc, nn = n/nr, kk = k/kc; for (ordinal_type l=0;l<kk;++l) for (ordinal_type i=0;i<mm;++i) for (ordinal_type j=0;j<nn;++j) { const ordinal_type loff = l*kc, moff = i*mc, noff = j*nr; // GEBP : C_ij += A_il B_lj; { constexpr ordinal_type np = (nr/nnr); for (ordinal_type p=0;p<np;++p) { const ordinal_type poff = p*nnr; for (ordinal_type ll=0;ll<kc;++ll) for (ordinal_type ii=0;ii<mc;++ii) for (ordinal_type jj=0;jj<nnr;++jj) C.Value(ii+moff, jj+noff+poff) += A.Value(ii+moff, ll+loff)*B.Value(ll+loff, jj+noff+poff); } } } } { // remainder const ordinal_type lbegin = (k - k%kc), ibegin = (m - m%mc), jbegin = (n - n%nr); for (ordinal_type l=lbegin;l<k;++l) for (ordinal_type i=ibegin;i<m;++i) for (ordinal_type j=jbegin;j<n;++j) C.Value(i, j) += A.Value(i, l)*B.Value(l, j); } } } } return 0; }