Multiply( const matrix_type & A , const size_type nrow , const size_type ncol , const vector_type & x , const vector_type & y ) { CudaSparseSingleton & s = CudaSparseSingleton::singleton(); const scalar_type alpha = 1 , beta = 0 ; cusparseStatus_t status = cusparseScsrmv( s.handle , CUSPARSE_OPERATION_NON_TRANSPOSE , nrow , ncol , A.coefficients.dimension_0() , &alpha , s.descra , A.coefficients.ptr_on_device() , A.graph.row_map.ptr_on_device() , A.graph.entries.ptr_on_device() , x.ptr_on_device() , &beta , y.ptr_on_device() ); if ( CUSPARSE_STATUS_SUCCESS != status ) { throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) ); } }
bool compareRank1(const vector_type& y, const vector_type& y_exp, const scalar_type rel_tol, const scalar_type abs_tol, Teuchos::FancyOStream& out) { typedef typename vector_type::size_type size_type; typename vector_type::HostMirror hy = Kokkos::create_mirror_view(y); typename vector_type::HostMirror hy_exp = Kokkos::create_mirror_view(y_exp); Kokkos::deep_copy(hy, y); Kokkos::deep_copy(hy_exp, y_exp); size_type num_rows = y.dimension_0(); bool success = true; for (size_type i=0; i<num_rows; ++i) { for (size_type j=0; j<y.sacado_size(); ++j) { scalar_type diff = std::abs( hy(i).fastAccessCoeff(j) - hy_exp(i).fastAccessCoeff(j) ); scalar_type tol = rel_tol*std::abs(hy_exp(i).fastAccessCoeff(j)) + abs_tol; bool s = diff < tol; out << "y_expected(" << i << ").coeff(" << j << ") - " << "y(" << i << ").coeff(" << j << ") = " << hy_exp(i).fastAccessCoeff(j) << " - " << hy(i).fastAccessCoeff(j) << " == " << diff << " < " << tol << " : "; if (s) out << "passed"; else out << "failed"; out << std::endl; success = success && s; } } return success; }
static void apply( const matrix_type & A , const vector_type & x , const vector_type & y ) { CudaSparseSingleton & s = CudaSparseSingleton::singleton(); const double alpha = 1 , beta = 0 ; const int n = A.graph.row_map.dimension_0() - 1 ; const int nz = A.graph.entries.dimension_0(); cusparseStatus_t status = cusparseDcsrmv( s.handle , CUSPARSE_OPERATION_NON_TRANSPOSE , n , n , nz , &alpha , s.descra , A.values.ptr_on_device() , A.graph.row_map.ptr_on_device() , A.graph.entries.ptr_on_device() , x.ptr_on_device() , &beta , y.ptr_on_device() ); if ( CUSPARSE_STATUS_SUCCESS != status ) { throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) ); } }
void prod( sparse_matrix_type const& A, vector_type const& x, vector_type& b ) const { int ierr = 0; petsc_sparse_matrix_type const& _A = dynamic_cast<petsc_sparse_matrix_type const&>( A ); petsc_vector_type const& _x = dynamic_cast<petsc_vector_type const&>( x ); petsc_vector_type const& _b = dynamic_cast<petsc_vector_type const&>( b ); if ( _A.mapCol().worldComm().globalSize() == x.map().worldComm().globalSize() ) { //std::cout << "BackendPetsc::prod STANDART"<< std::endl; ierr = MatMult( _A.mat(), _x.vec(), _b.vec() ); CHKERRABORT( _A.comm().globalComm(),ierr ); } else { //std::cout << "BackendPetsc::prod with convert"<< std::endl; auto x_convert = petscMPI_vector_type(_A.mapColPtr()); x_convert.duplicateFromOtherPartition(x); x_convert.close(); ierr = MatMult( _A.mat(), x_convert.vec(), _b.vec() ); CHKERRABORT( _A.comm().globalComm(),ierr ); } b.close(); }
bool operator==(box const &b2) const { return std::equal(m_lower.begin(), m_lower.end(), b2.m_lower.begin()) && std::equal(m_upper.begin(), m_upper.end(), b2.m_upper.begin()); }
unsigned_type checksum(vector_type& input) { unsigned_type sum = 0; for (vector_type::const_iterator i = input.begin(); i != input.end(); ++i) sum += (unsigned_type)((*i).m_key); return sum; }
/*! * \brief Sum a value into existing value at the global row index. The * global index must exist on process. */ static void sumIntoGlobalValue( vector_type& vector, global_ordinal_type global_row, const scalar_type& value ) { MCLS_REQUIRE( vector.getMap()->isNodeGlobalElement( global_row ) ); vector.sumIntoGlobalValue( global_row, value ); }
/*! * \brief Sum a value into existing value at the local row index. The * local index must exist on process. */ static void sumIntoLocalValue( vector_type& vector, local_ordinal_type local_row, const scalar_type& value ) { MCLS_REQUIRE( vector.getMap()->isNodeLocalElement( local_row ) ); vector.sumIntoLocalValue( local_row, value ); }
public: vector_type simulate(vector_type const& u, vector_type const& e, real_type na_value = real_type(0)) const { namespace ublas = ::boost::numeric::ublas; namespace ublasx = ::boost::numeric::ublasx; size_type n_obs = u.size(); size_type n_e = e.size(); DCS_ASSERT( n_obs == n_e, throw ::std::logic_error("Size of Input Data and Noise Data does not match.") ); size_type n_a = ublasx::size(a_); // # of output channels size_type n_b = ublasx::size(b_); // # of input channels size_type k_min = 0; size_type k_max = n_obs - ::std::min(static_cast<size_type>(d_*ts_), n_obs); vector_type y(n_obs, na_value); // for (size_type k = 0; k < k_min; ++k) // { // y(k) = e_var_*e(k); // } for (size_type k = k_min; k < k_max; ++k) { y(k) = 0; if (n_a > 0 && k > 0) { size_type nn_a = ::std::min(n_a, k); y(k) -= ublas::inner_prod( //a_, ublas::subrange(a_, 0, nn_a), //::dcs::math::la::subslice(y, k-1, -1, n_a) ublas::subslice(y, k-1, -1, nn_a) ); } if (n_b > 0) { size_type nn_b = ::std::min(n_b, k+1); y(k) += ublas::inner_prod( //b_, ublas::subrange(b_, 0, nn_b), //::dcs::math::la::subslice(u, k, -1, n_b) ublas::subslice(u, k, -1, nn_b) ); } y(k) += e_var_*e(k); } return y; }
std::pair<const_iterator, const_iterator> get_all(const TId id) const { const element_type element { id, osmium::index::empty_value<TValue>() }; return std::equal_range(m_vector.cbegin(), m_vector.cend(), element, [](const element_type& a, const element_type& b) { return a.first < b.first; }); }
typename vector_type::const_iterator find_id(const TId id) const noexcept { const element_type element { id, osmium::index::empty_value<TValue>() }; return std::lower_bound(m_vector.begin(), m_vector.end(), element, [](const element_type& a, const element_type& b) { return a.first < b.first; }); }
matrix_type Reflector(const vector_type& x) { using namespace boost::numeric::ublas; matrix_type F(x.size(), x.size()); Reflector<matrix_type, vector_type>(x, F); return F; }
void setBounds( vector_type const& __lb, vector_type const& __up ) { GST_SMART_ASSERT( __lb.size() == __up.size() )( __lb )( __up )( "inconsistent bounds definition" ); M_lb = __lb; M_ub = __up; M_lb_ub = __up - __lb; GST_SMART_ASSERT( *std::min_element( M_lb_ub.begin(), M_lb_ub.end() ) >= 0 ) ( M_lb )( M_ub )( "lower and upper bounds are not properly defined" ); }
decorated_tuple(cow_pointer_type d, const vector_type& v) : super(tuple_impl_info::statically_typed) , m_decorated(std::move(d)), m_mapping(v) { # ifdef CPPA_DEBUG const cow_pointer_type& ptr = m_decorated; // prevent detaching # endif CPPA_REQUIRE(ptr->size() >= sizeof...(ElementTypes)); CPPA_REQUIRE(v.size() == sizeof...(ElementTypes)); CPPA_REQUIRE(*(std::max_element(v.begin(), v.end())) < ptr->size()); }
/** * @test Test MaRC::Vector magnitude (norm) calculation. */ bool test_vector_magnitude() { using vector_type = MaRC::Vector<int, 3>; vector_type const v{ 3, 4, 5 }; double const mag = std::sqrt(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]); return MaRC::almost_equal(v.magnitude(), mag, ulps); }
decorated_tuple(cow_pointer_type d, size_t offset) : super(tuple_impl_info::statically_typed), m_decorated(std::move(d)) { # ifdef CPPA_DEBUG const cow_pointer_type& ptr = m_decorated; // prevent detaching # endif CPPA_REQUIRE((ptr->size() - offset) >= sizeof...(ElementTypes)); CPPA_REQUIRE(offset > 0); size_t i = offset; m_mapping.resize(sizeof...(ElementTypes)); std::generate(m_mapping.begin(), m_mapping.end(), [&]() {return i++;}); }
const TValue get(const TId id) const final { const element_type element { id, osmium::index::empty_value<TValue>() }; const auto result = std::lower_bound(m_vector.begin(), m_vector.end(), element, [](const element_type& a, const element_type& b) { return a.first < b.first; }); if (result == m_vector.end() || result->first != id) { not_found_error(id); } else { return result->second; } }
// =================================================== // Methods // =================================================== Int SolverAztecOO::solve ( vector_type& solution, const vector_type& rhs ) { M_solver.SetLHS ( &solution.epetraVector() ); // The Solver from Aztecoo takes a non const (because of rescaling?) // We should be careful if you use scaling Epetra_FEVector* rhsVectorPtr ( const_cast<Epetra_FEVector*> (&rhs.epetraVector() ) ); M_solver.SetRHS ( rhsVectorPtr ); Int maxiter (M_maxIter); Real mytol (M_tolerance); Int status; if ( isPreconditionerSet() && M_preconditioner->preconditionerType().compare ("AztecOO") ) { M_solver.SetPrecOperator (M_preconditioner->preconditioner() ); } status = M_solver.Iterate (maxiter, mytol); #ifdef HAVE_LIFEV_DEBUG M_displayer->comm()->Barrier(); M_displayer->leaderPrint ( " o- Number of iterations = ", M_solver.NumIters() ); M_displayer->leaderPrint ( " o- Norm of the true residual = ", M_solver.TrueResidual() ); M_displayer->leaderPrint ( " o- Norm of the true ratio = ", M_solver.ScaledResidual() ); #endif /* try to solve again (reason may be: -2 "Aztec status AZ_breakdown: numerical breakdown" -3 "Aztec status AZ_loss: loss of precision" -4 "Aztec status AZ_ill_cond: GMRES hessenberg ill-conditioned" */ if ( status <= -2 ) { maxiter = M_maxIter; mytol = M_tolerance; Int oldIter = M_solver.NumIters(); status = M_solver.Iterate (maxiter, mytol); #ifdef HAVE_LIFEV_DEBUG M_displayer->comm()->Barrier(); M_displayer->leaderPrint ( " o- Second run: number of iterations = ", M_solver.NumIters() ); M_displayer->leaderPrint ( " o- Norm of the true residual = ", M_solver.TrueResidual() ); M_displayer->leaderPrint ( " o- Norm of the true ratio = ", M_solver.ScaledResidual() ); #endif return ( M_solver.NumIters() + oldIter ); } return ( M_solver.NumIters() ); }
Real SolverAztecOO::computeResidual ( vector_type& solution, vector_type& rhs ) { vector_type Ax ( solution.map() ); vector_type res ( rhs ); M_solver.GetUserMatrix()->Apply ( solution.epetraVector(), Ax.epetraVector() ); res.epetraVector().Update ( 1, Ax.epetraVector(), -1 ); Real residual; res.norm2 ( &residual ); return residual; }
bool is_empty() const { for (unsigned i = 0; i < m_lower.size(); ++i) if (m_lower[i] >= m_upper[i]) return true; return false; }
int PreconditionerBlockMS<space_type>::applyInverse ( const vector_type& X, vector_type& Y ) const { tic(); U = X; U.close(); *M_uin = U.template element<0>(); M_uin->close(); *M_pin = U.template element<1>(); M_pin->close(); // Solve eq (12) // solve here eq 15 : Pm v = c backend(_name=M_prefix_11)->solve(_matrix=M_11, _rhs=M_uin, _solution=M_uout ) ; M_uout->close(); // solve here eq 16 backend(_name=M_prefix_22)->solve(_matrix=M_L, _rhs=M_pin, _solution=M_pout ); M_pout->close(); U.template element<0>() = *M_uout; U.template element<1>() = *M_pout; U.close(); Y=U; Y.close(); toc("[PreconditionerBlockMS] applyInverse update solution",FLAGS_v>0); return 0; }
public: uniform_signal_generator(vector_type const& u_min, vector_type const& u_max, random_generator_type& rng) : rng_(rng), ub_( ::std::numeric_limits<value_type>::infinity()), lb_(-::std::numeric_limits<value_type>::infinity()) { // pre: size(u_min) == size(u_max) DCS_ASSERT(u_min.size() == u_max.size(), DCS_EXCEPTION_THROW(::std::invalid_argument, "Size of min and max vectors does not match")); ::std::size_t n(u_min.size()); for (::std::size_t i = 0; i < n; ++i) { distrs_.push_back(uniform_distribution_type(u_min[i], u_max[i])); } }
/*! * \brief Create a deep copy of the provided vector and return a * reference-counted pointer. */ static Teuchos::RCP<vector_type> deepCopy( const vector_type& vector ) { Teuchos::RCP<vector_type> vector_copy = clone( vector ); Teuchos::ArrayRCP<Scalar> copy_view = vector_copy->getDataNonConst(); copy_view.deepCopy( vector.getData()() ); return vector_copy; }
bool contains(VecType const &pt, double threshold=1e-10) const { for (unsigned i = 0; i < m_lower.size(); ++i) if (pt[i] < m_lower[i] - threshold || pt[i] >= m_upper[i]+threshold) return false; return true; }
TValue get(const TId id) const final { const auto result = find_id(id); if (result == m_vector.end() || result->first != id) { throw osmium::not_found{id}; } return result->second; }
TValue get_noexcept(const TId id) const noexcept final { const auto result = find_id(id); if (result == m_vector.end() || result->first != id) { return osmium::index::empty_value<TValue>(); } return result->second; }
static void apply( const value_type& alpha , const vector_type & x , const value_type & beta , const vector_type & y ) { const size_t row_count = x.dimension_0() ; parallel_for( row_count , Update(alpha,x,beta,y) ); }
void recv( const vector_type & v ) { const size_t recv_msg_count = m_recv_request.size(); const std::pair<unsigned,unsigned> recv_range( m_map.count_owned , m_map.count_owned + m_map.count_receive ); const vector_type vrecv = subview<vector_type>( v , recv_range ); // Wait for receives and verify: for ( size_t i = 0 ; i < recv_msg_count ; ++i ) { MPI_Status recv_status ; int recv_which = 0 ; int recv_size = 0 ; MPI_Waitany( recv_msg_count , & m_recv_request[0] , & recv_which , & recv_status ); const int recv_proc = recv_status.MPI_SOURCE ; MPI_Get_count( & recv_status , MPI_BYTE , & recv_size ); // Verify message properly received: const int expected_proc = m_map.host_recv(recv_which,0); const int expected_size = m_map.host_recv(recv_which,1) * m_chunk * sizeof(scalar_type); if ( ( expected_proc != recv_proc ) || ( expected_size != recv_size ) ) { std::ostringstream msg ; msg << "MatrixMultiply communication error:" << " P" << comm::rank( m_map.machine ) << " received from P" << recv_proc << " size " << recv_size << " expected " << expected_size << " from P" << expected_proc ; throw std::runtime_error( msg.str() ); } } // Copy received data to device memory. Impl::DeepCopy<typename Device::memory_space,HostSpace>( vrecv.ptr_on_device() , m_host_recv_buffer.ptr_on_device() , m_map.count_receive * m_chunk * sizeof(scalar_type) ); }
public: vector_type simulate(vector_type const& u, real_type na_value = real_type(0)) const { namespace ublas = ::boost::numeric::ublas; namespace ublasx = ::boost::numeric::ublasx; size_type n_obs = u.size(); // # of samples size_type n_a = ublasx::size(a_); // # of output channels size_type n_b = ublasx::size(b_); // # of input channels //size_type k_min = ::std::max(n_a*ts_, (n_b+d_)*ts_)-1; //size_type k_min = (n_b > 0) ? ((n_b+d_)*ts_-1) : 0; size_type k_min = 0; //size_type k_max = n_obs - ::std::min(d_*ts_, n_obs); size_type k_max = n_obs - ::std::min(static_cast<size_type>(d_*ts_), n_obs); vector_type y(n_obs, na_value); // // Set non-computable outputs to the given N/A value // if (na_value != real_type(0)) // { // // Actually, this setting is done only if the N/A value // // is different from the default value '0'. // for (size_type k = 0; k < k_min; ++k) // { // y(k) = na_value; // } // } for (size_type k = k_min; k < k_max; ++k) { y(k) = 0; if (n_a > 0 && k > 0) { size_type nn_a = ::std::min(n_a, k); y(k) -= ublas::inner_prod( //a_, ublas::subrange(a_, 0, nn_a), //ublas::subslice(y, k-1, -1, n_a) ublas::subslice(y, k-1, -1, nn_a) ); } if (n_b > 0) { size_type nn_b = ::std::min(n_b, k+1); y(k) += ublas::inner_prod( //b_, ublas::subrange(b_, 0, nn_b), //::dcs::math::la::subslice(u, k, -1, n_b) ublas::subslice(u, k, -1, nn_b) ); } } return y; }
void DirScalingMatrix<NumType>::update( value_type const& __Delta, vector_type const& __x, vector_type const& __s, mode_type __mode ) { GST_SMART_ASSERT( __x.size() == M_lb.size() )( __x )( M_lb )( "inconsistent bounds definition" ); GST_SMART_ASSERT( __x.size() == M_ub.size() )( __x )( M_ub )( "inconsistent bounds definition" ); M_value.resize( __x.size(), __x.size(), 0, 0 ); M_jacobian.resize( __x.size(), __x.size(), 0, 0 ); M_zeta = zeta( __x ); vector_type __dl = distanceToLB( __x ); vector_type __du = distanceToUB( __x ); if ( M_zeta * std::min( norm_inf( __dl ), norm_inf( __du ) ) > __Delta ) { // we are in the trust region M_value = identity_matrix<value_type>( M_value.size1(), M_value.size2() ); } else { M_trust_region_active = true; for ( size_t __i = 0; __i < __x.size(); ++__i ) { if ( __s ( __i ) < 0 ) M_value ( __i, __i ) = M_zeta * std::min( 1. , __dl( __i ) ) / __Delta; else M_value ( __i, __i ) = M_zeta * std::min( 1. , __du( __i ) ) / __Delta; } } if ( __mode == WITH_JACOBIAN ) { for ( size_t i = 0; i < __x.size(); i++ ) { if ( ( __s( i ) < 0 ) && ( __x( i ) < M_lb( i ) + __Delta ) ) { M_jacobian ( i, i ) = M_zeta / __Delta; } else if ( ( __s ( i ) > 0 ) && ( __x ( i ) > M_ub( i ) - __Delta ) ) { M_jacobian ( i, i ) = -M_zeta / __Delta; } else { M_jacobian ( i, i ) = 0; } } } }