KOKKOS_INLINE_FUNCTION
  int
  Chol<Uplo::Upper,
       AlgoChol::ExternalLapack,Variant::One>
  ::invoke(PolicyType &policy,
           const MemberType &member,
           DenseExecViewTypeA &A) {
    // static_assert( Kokkos::Impl::is_same<
    //                typename DenseMatrixTypeA::space_type,
    //                Kokkos::Cuda
    //                >::value,
    //                "Cuda space is not available for calling external BLAS" );

    //typedef typename DenseExecViewTypeA::space_type   space_type;
    typedef typename DenseExecViewTypeA::ordinal_type ordinal_type;
    typedef typename DenseExecViewTypeA::value_type   value_type;

    int r_val = 0;      
    if (member.team_rank() == 0) {
#ifdef HAVE_SHYLUTACHO_TEUCHOS
      Teuchos::LAPACK<ordinal_type,value_type> lapack;

      lapack.POTRF('U',
                   A.NumRows(),
                   A.ValuePtr(), A.BaseObject().ColStride(),
                   &r_val);
#else
    TACHO_TEST_FOR_ABORT( true, MSG_NOT_HAVE_PACKAGE("Teuchos") );
#endif
    }

    return r_val;
  }
  KOKKOS_INLINE_FUNCTION
  int
  Herk<Uplo::Upper,Trans::ConjTranspose,
       AlgoHerk::SparseSparseSuperNodesByBlocks,Variant::One>
  ::invoke(PolicyType &policy,
           MemberType &member,
           const ScalarType alpha,
           CrsExecViewTypeA &A,
           const ScalarType beta,
           CrsExecViewTypeC &C) {



    if (member.team_rank() == 0) {
      DenseMatrixView<typename CrsExecViewTypeA::hier_mat_base_type> AA(A.Hier());
      DenseMatrixView<typename CrsExecViewTypeA::hier_mat_base_type> CC(C.Hier());
      
      Herk<Uplo::Upper,Trans::ConjTranspose,
        AlgoHerk::DenseByBlocks,Variant::One>
        ::invoke(policy, member,
                 alpha, AA, beta, CC);
    }

    return 0;
  }
  KOKKOS_INLINE_FUNCTION
  int
  Gemm<Trans::ConjTranspose,Trans::NoTranspose,
       AlgoGemm::SparseSparseSuperNodes,Variant::One>
  ::invoke(PolicyType &policy,
           MemberType &member,
           const ScalarType alpha,
           CrsExecViewTypeA &A,
           CrsExecViewTypeB &B,
           const ScalarType beta,
           CrsExecViewTypeC &C) {

    if (member.team_rank() == 0) {
      DenseMatrixView<typename CrsExecViewTypeA::flat_mat_base_type> AA(A.Flat());
      DenseMatrixView<typename CrsExecViewTypeA::flat_mat_base_type> BB(B.Flat());
      DenseMatrixView<typename CrsExecViewTypeA::flat_mat_base_type> CC(C.Flat());
      
      Gemm<Trans::ConjTranspose,Trans::NoTranspose,
        AlgoGemm::ExternalBlas,Variant::One>
        ::invoke(policy, member,
                 alpha, AA, BB, beta, CC);
    }

    return 0;
  }
Esempio n. 4
0
bool server::login(MemberType* & _User) {
	MemberType* u;
	if (!UserDB.GetFirstList()->ToRightFindUserByID(u, _User->getId())) {
		cout << "Id you wrote down is not in DB" << endl;
		return 0;
	}
	else {
		if (_User->getPw() == u->getPw()) {
			_User = u;
			return true;
		}
		else {
			cout << "Password you wrote down is not same as DB" << endl;
			return false;
		}
	}
}
Esempio n. 5
0
void server::retrieveByName() {
	MemberType* tmpUser = new MemberType;
	DoublyList<MemberType>* tmpUserDB = & UserDB;
	while (1) {
		switch (PrintFriendSearchMenu()) {
		case 1: // by name
			tmpUser->setName(InputNameToSearch());
			for (;;) {
				if (tmpUserDB->getItem()->getName() == tmpUser->getName()) {
					PrintDetailsOfUser(tmpUserDB->getItem()->getId(), tmpUserDB->getItem()->getPn(), tmpUserDB->getItem()->getPw(), tmpUserDB->getItem()->getName(), tmpUserDB->getItem()->getMsg(), 1);
					break;
				}
				else {
					if (tmpUserDB->GetNextList(tmpUserDB) == false) {
						cout << "I can't find that user! Sorry!" << endl;
						cin.get(); cin.get();
						break;
					}
				}
			}
			break;
		case 2: // by ID
			tmpUser->setId(InputIdByUser());
			for (;;) {
				if (tmpUserDB->getItem()->getId() == tmpUser->getId()) {
					PrintDetailsOfUser(tmpUserDB->getItem()->getId(), tmpUserDB->getItem()->getPn(), tmpUserDB->getItem()->getPw(), tmpUserDB->getItem()->getName(), tmpUserDB->getItem()->getMsg(), 1);
					break;
				}
				else {
					if (tmpUserDB->GetNextList(tmpUserDB) == false) {
						cout << "I can't find that user! Sorry!" << endl;
						cin.get(); cin.get();
						break;
					}
				}
			}
			break;
		case 0:
			return;
		}
	}
}
  KOKKOS_INLINE_FUNCTION
  int
  Gemm<Trans::ConjTranspose,Trans::NoTranspose,
       AlgoGemm::SparseSparseUnblocked,Variant::One>
  ::invoke(PolicyType &policy,
           MemberType &member,
           const ScalarType alpha,
           CrsExecViewTypeA &A,
           CrsExecViewTypeB &B,
           const ScalarType beta,
           CrsExecViewTypeC &C) {
    typedef typename CrsExecViewTypeA::ordinal_type  ordinal_type;
    typedef typename CrsExecViewTypeA::value_type    value_type;
    typedef typename CrsExecViewTypeA::row_view_type row_view_type;



    // scale the matrix C with beta
    ScaleCrsMatrix::invoke(policy, member,
                           beta, C);

    // C(i,j) += alpha*A'(i,k)*B(k,j)
    const ordinal_type mA = A.NumRows();
    for (ordinal_type k=0;k<mA;++k) {
      row_view_type &a = A.RowView(k);
      const ordinal_type nnz_a = a.NumNonZeros();

      row_view_type &b = B.RowView(k);
      const ordinal_type nnz_b = b.NumNonZeros();

      if (nnz_a > 0 && nnz_b) {
        Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, nnz_a),
                             [&](const ordinal_type i) {
                               const ordinal_type row_at_i  = a.Col(i);
                               const value_type   val_at_ik = Util::conj(a.Value(i));

                               row_view_type &c = C.RowView(row_at_i);

                               ordinal_type idx = 0;
                               for (ordinal_type j=0;j<nnz_b && (idx > -2);++j) {
                                 const ordinal_type col_at_j  = b.Col(j);
                                 const value_type   val_at_kj = b.Value(j);

                                 idx = c.Index(col_at_j, idx);
                                 if (idx >= 0)
                                   c.Value(idx) += alpha*val_at_ik*val_at_kj;
                               }
                             });
        member.team_barrier();
      }
    }

    return 0;
  }
  KOKKOS_INLINE_FUNCTION
  int
  Gemm<Trans::ConjTranspose,Trans::NoTranspose,
       AlgoGemm::ExternalBlas,Variant::One>
  ::invoke(PolicyType &policy,
           MemberType &member,
           const ScalarType alpha,
           DenseExecViewTypeA &A,
           DenseExecViewTypeB &B,
           const ScalarType beta,
           DenseExecViewTypeC &C) {
    // static_assert( Kokkos::Impl::is_same<
    //                typename DenseMatrixTypeA::space_type,
    //                typename DenseMatrixTypeB::space_type
    //                >::value && 
    //                Kokkos::Impl::is_same<
    //                typename DenseMatrixTypeB::space_type,
    //                typename DenseMatrixTypeC::space_type
    //                >::value,
    //                "Space type of input matrices does not match" );
    
    if (member.team_rank() == 0) {
#if                                                     \
  defined( HAVE_SHYLUTACHO_TEUCHOS ) &&                 \
  defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
      typedef typename DenseExecViewTypeA::ordinal_type ordinal_type;
      typedef typename DenseExecViewTypeA::value_type   value_type;

      Teuchos::BLAS<ordinal_type,value_type> blas;
      
      const ordinal_type m = C.NumRows();
      const ordinal_type n = C.NumCols();
      const ordinal_type k = B.NumRows();

      if (m > 0 && n > 0 && k > 0)
        blas.GEMM(Teuchos::CONJ_TRANS, Teuchos::NO_TRANS,
                  m, n, k,
                  alpha,
                  A.ValuePtr(), A.BaseObject().ColStride(),
                  B.ValuePtr(), B.BaseObject().ColStride(),
                  beta,
                  C.ValuePtr(), C.BaseObject().ColStride());
#else
    TACHO_TEST_FOR_ABORT( true, MSG_NOT_HAVE_PACKAGE("Teuchos") );        
#endif
    } 

    return 0;
  }
  KOKKOS_INLINE_FUNCTION
  int
  Trsm<Side::Left,Uplo::Upper,Trans::NoTranspose,
       AlgoTrsm::ExternalBlas,Variant::One>
  ::invoke(PolicyType &policy,
           const MemberType &member,
           const int diagA,
           const ScalarType alpha,
           DenseExecViewTypeA &A,
           DenseExecViewTypeB &B) {
    // static_assert( Kokkos::Impl::is_same<
    //                typename DenseMatrixTypeA::space_type,
    //                Kokkos::Cuda
    //                >::value,
    //                "Cuda space is not available for calling external BLAS" );

    // static_assert( Kokkos::Impl::is_same<
    //                typename DenseMatrixTypeA::space_type,
    //                typename DenseMatrixTypeB::space_type
    //                >::value,
    //                "Space type of input matrices does not match" );

    //typedef typename DenseExecViewTypeA::space_type   space_type;
    typedef typename DenseExecViewTypeA::ordinal_type ordinal_type;
    typedef typename DenseExecViewTypeA::value_type   value_type;

    if (member.team_rank() == 0) {
#ifdef HAVE_SHYLUTACHO_TEUCHOS
      Teuchos::BLAS<ordinal_type,value_type> blas;

      const ordinal_type m = A.NumRows();
      const ordinal_type n = B.NumCols();

      blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
                (diagA == Diag::Unit ? Teuchos::UNIT_DIAG : Teuchos::NON_UNIT_DIAG),
                m, n,
                alpha,
                A.ValuePtr(), A.BaseObject().ColStride(),
                B.ValuePtr(), B.BaseObject().ColStride());
#else
    TACHO_TEST_FOR_ABORT( true, MSG_NOT_HAVE_PACKAGE("Teuchos") );
#endif
    }

    return 0;
  }
  KOKKOS_INLINE_FUNCTION
  int
  Trsm<Side::Left,Uplo::Upper,Trans::NoTranspose,
       AlgoTrsm::SparseDenseSuperNodes,Variant::One>
  ::invoke(PolicyType &policy,
           MemberType &member,
           const int diagA,
           const ScalarType alpha,
           CrsExecViewTypeA &A,
           DenseExecViewTypeB &B) {

    if (member.team_rank() == 0) {
      DenseMatrixView<typename CrsExecViewTypeA::flat_mat_base_type> AA(A.Flat());

      Trsm<Side::Left,Uplo::Upper,Trans::NoTranspose,
        AlgoTrsm::ExternalBlas,Variant::One>
        ::invoke(policy, member, diagA, alpha, AA, B);
    }

    return 0;
  }
  KOKKOS_INLINE_FUNCTION
  int
  Chol<Uplo::Upper,
       AlgoChol::Unblocked,Variant::One>
  ::invoke(PolicyType &policy,
           const MemberType &member,
           CrsExecViewTypeA &A) {

    typedef typename CrsExecViewTypeA::value_type    value_type;
    typedef typename CrsExecViewTypeA::ordinal_type  ordinal_type;
    typedef typename CrsExecViewTypeA::row_view_type row_view_type;

    // row_view_type r1t, r2t;

    for (ordinal_type k=0;k<A.NumRows();++k) {
      //r1t.setView(A, k);
      row_view_type &r1t = A.RowView(k);

      // extract diagonal from alpha11
      value_type &alpha = r1t.Value(0);

      if (member.team_rank() == 0) {
        // if encounter null diag or wrong index, return -(row + 1)
        TACHO_TEST_FOR_ABORT( r1t.Col(0) != k, "Chol::Unblocked:: Diagonal does not exist");        
        if (Util::real(alpha) <= 0.0) {
          // warning message
          fprintf(stderr, "   diagonal = %f, local col = %d, global col = %d\n", 
                  Util::real(alpha), k, r1t.OffsetCols() + k);
          // proceed with epsilon; for incomplete factorization, Cholesky factor may not exit
          alpha = 1.0e-8;

          //TACHO_TEST_FOR_ABORT( true, "Chol::Unblocked:: Diagonal is negative");
          //return -(k + 1);
        }

        // error handling should be more carefully designed

        // sqrt on diag
        alpha = sqrt(Util::real(alpha));
      }
      member.team_barrier();

      const ordinal_type nnz_r1t = r1t.NumNonZeros();

      if (nnz_r1t) {
        // inverse scale
        Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 1, nnz_r1t),
                             [&](const ordinal_type j) {
                               r1t.Value(j) /= alpha;
                             });

        member.team_barrier();

        // hermitian rank update
        Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 1, nnz_r1t),
                             [&](const ordinal_type i) {
                               const ordinal_type row_at_i = r1t.Col(i);
                               const value_type   val_at_i = Util::conj(r1t.Value(i));

                               //r2t.setView(A, row_at_i);
                               row_view_type &r2t = A.RowView(row_at_i);
                               ordinal_type idx = 0;

                               for (ordinal_type j=i;j<nnz_r1t && (idx > -2);++j) {
                                 const ordinal_type col_at_j = r1t.Col(j);
                                 idx = r2t.Index(col_at_j, idx);

                                 if (idx >= 0) {
                                   const value_type val_at_j = r1t.Value(j);
                                   r2t.Value(idx) -= val_at_i*val_at_j;
                                 }
                               }
                             });
      }
    }
    return 0;
  }
  KOKKOS_INLINE_FUNCTION
  int
  Gemm<Trans::ConjTranspose,Trans::NoTranspose,
       AlgoGemm::InternalBlas,Variant::One>
  ::invoke(PolicyType &policy,
           MemberType &member,
           const ScalarType alpha,
           DenseExecViewTypeA &A,
           DenseExecViewTypeB &B,
           const ScalarType beta,
           DenseExecViewTypeC &C) {
    // static_assert( Kokkos::Impl::is_same<
    //                typename DenseMatrixTypeA::space_type,
    //                Kokkos::Cuda
    //                >::value,
    //                "Cuda space is not available for calling external BLAS" );

    // static_assert( Kokkos::Impl::is_same<
    //                typename DenseMatrixTypeA::space_type,
    //                typename DenseMatrixTypeB::space_type
    //                >::value && 
    //                Kokkos::Impl::is_same<
    //                typename DenseMatrixTypeB::space_type,
    //                typename DenseMatrixTypeC::space_type
    //                >::value,
    //                "Space type of input matrices does not match" );
    
    //typedef typename DenseExecViewTypeA::space_type   space_type;
    typedef typename DenseExecViewTypeA::ordinal_type ordinal_type;
    typedef typename DenseExecViewTypeA::value_type   value_type;

    if (member.team_rank() == 0) {
      const ordinal_type m = C.NumRows();
      const ordinal_type n = C.NumCols();
      const ordinal_type k = B.NumRows();

      // for now simple implementation
      if (m == 0 || n == 0 || ((alpha == 0 || k == 0) && (beta == 1))) return 0;
      
      if (alpha == 0) {
        if (beta == 0) {
          Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, n),
                               [&](const ordinal_type j) {
                                 for (ordinal_type i=0;i<m;++i)
                                   C.Value(i, j) = 0.0;
                               });
        } else {
          Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, n),
                               [&](const ordinal_type j) {
                                 for (ordinal_type i=0;i<m;++i)
                                   C.Value(i, j) = beta*C.Value(i, j);
                               });
        }
      } else {

        // scale beta
        if (beta == 0.0) 
          Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, n),
                               [&](const ordinal_type j) {
                                 for (ordinal_type i=0;i<m;++i)
                                   C.Value(i, j) = 0.0;
                               });
        else if (beta != 1.0) 
          Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, n),
                               [&](const ordinal_type j) {
                                 for (ordinal_type i=0;i<m;++i)
                                   C.Value(i, j) = beta*C.Value(i, j);
                               });
        
        // gemm
        for (ordinal_type l=0;l<k;++l) {      
          Kokkos::parallel_for(Kokkos::TeamThreadRange(member, 0, n),
                               [&](const ordinal_type j) {
                                 const value_type tmp = B.Value(l, j);
                                 //#pragma unroll
                                 for (ordinal_type i=0;i<m;++i)
                                   C.Value(i, j) += A.Value(l, i)*tmp;
                               });
          member.team_barrier();
        }
      } 
    }

    return 0;
  }
  KOKKOS_INLINE_FUNCTION
  int
  Gemm<Trans::NoTranspose,Trans::NoTranspose,
       AlgoGemm::InternalBlas,Variant::One>
  ::invoke(PolicyType &policy,
           MemberType &member,
           const ScalarType alpha,
           DenseExecViewTypeA &A,
           DenseExecViewTypeB &B,
           const ScalarType beta,
           DenseExecViewTypeC &C) {
    typedef typename DenseExecViewTypeA::ordinal_type ordinal_type;
    typedef typename DenseExecViewTypeA::value_type   value_type;

    const ordinal_type m = C.NumRows();
    const ordinal_type n = C.NumCols();
    const ordinal_type k = B.NumRows();
    
    // for now simple implementation
    if (m == 0 || n == 0 || ((alpha == 0 || k == 0) && (beta == 1))) return 0;

    // C = beta C + alpha AB
    
    if (member.team_rank() == 0) {
      if (alpha == 0) {
        if (beta == 0) {
          for (ordinal_type j=0;j<n;++j)
            for (ordinal_type i=0;i<m;++i)
              C.Value(i, j) = 0.0;
        } else {
          for (ordinal_type j=0;j<n;++j)
            for (ordinal_type i=0;i<m;++i)
              C.Value(i, j) = beta*C.Value(i, j);
        }
      } else {
        // scale beta
        if      (beta == 0.0) 
          for (ordinal_type j=0;j<n;++j)
            for (ordinal_type i=0;i<m;++i)
              C.Value(i, j) = 0.0;
        else if (beta != 1.0) 
          for (ordinal_type j=0;j<n;++j)
            for (ordinal_type i=0;i<m;++i)
              C.Value(i, j) = beta*C.Value(i, j);
        
        // gemm blocked 
        {
          constexpr ordinal_type mc = 128, nr = 128, kc = 32, nnr = 16;
          {
            // block update
            const ordinal_type mm = m/mc, nn = n/nr, kk = k/kc;
            for (ordinal_type l=0;l<kk;++l)      
              for (ordinal_type i=0;i<mm;++i) 
                for (ordinal_type j=0;j<nn;++j) {
                  const ordinal_type loff = l*kc, moff = i*mc, noff = j*nr;
               
                  // GEBP : C_ij += A_il B_lj; 
                  {
                    constexpr ordinal_type np = (nr/nnr);
                    for (ordinal_type p=0;p<np;++p) {
                      const ordinal_type poff = p*nnr;
                      for (ordinal_type ll=0;ll<kc;++ll)      
                        for (ordinal_type ii=0;ii<mc;++ii) 
                          for (ordinal_type jj=0;jj<nnr;++jj) 
                            C.Value(ii+moff, jj+noff+poff) 
                              += A.Value(ii+moff, ll+loff)*B.Value(ll+loff, jj+noff+poff);
                    }
                  }
                }
          }
          {
            // remainder
            const ordinal_type lbegin = (k - k%kc), ibegin = (m - m%mc), jbegin = (n - n%nr);
            for (ordinal_type l=lbegin;l<k;++l)       
              for (ordinal_type i=ibegin;i<m;++i)
                for (ordinal_type j=jbegin;j<n;++j) 
                  C.Value(i, j) += A.Value(i, l)*B.Value(l, j);
          }
        }
      }        
    } 
    
    return 0;
  }