Ipp32f EuclidianDistance<Ipp32f>::doCalculate(const VectorT<Ipp32f>& v1, const VectorT<Ipp32f>& v2) const { if(v1.size()!=v2.size()) fthrow(Exception, "Input vectors must have the same size."); Ipp32f dist = 0; #ifdef NICE_USELIB_IPP VectorT<Ipp32f> res(v1.size()); ippsSub_32f(v1.getDataPointer(), v2.getDataPointer(), res.getDataPointer(), v1.size()); ippsSqr_32f(res.getDataPointer(), res.getDataPointer(), res.size()); dist = res.Sum(); #else // NICE_USELIB_IPP const Ipp32f* pSrc1 = v1.getDataPointer(); const Ipp32f* pSrc2 = v2.getDataPointer(); for(Ipp32u i=0; i<v1.size(); ++i,++pSrc1,++pSrc2) dist += (*pSrc1-*pSrc2)*(*pSrc1-*pSrc2); #endif // NICE_USELIB_IPP dist = std::sqrt(dist); return dist; }
Ipp32f BhattacharyyaDistance<Ipp32f>::doCalculate(const VectorT<Ipp32f>& v1, const VectorT<Ipp32f>& v2) const { if(v1.size()!=v2.size()) fthrow(Exception, "Input vectors must have the same size."); Ipp32f B; #ifdef NICE_USELIB_IPP VectorT<Ipp32f> v1f(v1); v1f *= v2; ippsSqrt(v1f.getDataPointer(), v1f.getDataPointer(), v1f.size()); ippsSum(v1f.getDataPointer(), v1f.size(), &B); #else // NICE_USELIB_IPP B = 0.0; for(uint i=0; i<v1.size(); ++i) B += std::sqrt(v1[i]*v2[i]); #endif // NICE_USELIB_IPP return std::sqrt(1-B); }
void vector_test(const VectorT& A, const VectorT& B, Accumulator& Result) { const Uint sizeA = A.size(); const Uint sizeB = B.size(); for(Uint i = 0, j = 0; i != sizeA && j != sizeB; ++i, ++j) test(A[i], B[j], Result); Result.exact(sizeA == sizeB); };
void set_row(const Uint array_idx, const VectorT& row) { if (row.size() != row_size(array_idx)) m_array[array_idx].resize(row.size()); Uint j=0; boost_foreach( const typename VectorT::value_type& v, row) m_array[array_idx][j++] = v; }
void set_row(const Uint array_idx, const VectorT& row) { cf3_assert(row.size() == row_size()); Row row_to_set = m_array[array_idx]; for(Uint j=0; j<row.size(); ++j) row_to_set[j] = row[j]; }
MatrixT diag(VectorT const &v, typename MatrixT::ScalarType zero = static_cast<typename MatrixT::ScalarType>(0)) { MatrixT diag(v.size(), v.size(), zero); diag.set_zero(zero); //populate diagnals: std::vector<IndexType> indices; for (graphblas::IndexType ix = 0; ix < v.size(); ++ix) { indices.push_back(ix); } graphblas::buildmatrix(diag, indices.begin(), indices.begin(), v.begin(), v.size()); return diag; }
void check_v( VectorT vec, IndexObj const& obj) { for (index_type p=0; p<vec.size(); ++p) test_assert(vec(p) == obj(p)); }
void set_v( VectorT vec, IndexObj const& obj) { for (index_type p=0; p<vec.size(); ++p) vec(p) = obj(p); }
void print_vector(StreamT& stream, const VectorT& vector, const std::string& sep=" ", const std::string& prefix = "", const std::string& suffix = "") { stream << prefix; const Uint vector_size = vector.size(); for(Uint i = 0; i != vector_size; ++i) { stream << (i != 0 ? sep : "") << vector[i]; } stream << suffix; }
typename VectorT::value_type norm_2(VectorT const& v1, typename viennacl::tools::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT >::type >::value >::type* dummy = 0) { //std::cout << "stl .. " << std::endl; typename VectorT::value_type result = 0; for (typename VectorT::size_type i=0; i<v1.size(); ++i) result += v1[i] * v1[i]; return sqrt(result); }
void FilterT<float,float,float>::filterX ( const ImageT<float>& src, const VectorT<float>& kernel, ImageT<float> &result, const int& anchor ) { if ( result.width() != src.width() || result.height() != src.height() ) { result = ImageT<float> ( src.width(), src.height() ); } uint kernelanch = ( anchor < 0 ) ? ( kernel.size() / 2 ) : anchor; #ifdef NICE_USELIB_IPP IppiSize ippiSize = {(int)(src.width() - ( kernel.size() - 1 )), (int)(src.height()) }; IppStatus ret = ippiFilterRow_C1R ( src.getPixelPointerXY ( kernel.size() - 1 - kernelanch, 0 ), src.getStepsize(), result.getPixelPointerXY ( kernelanch, 0 ), result.getStepsize(), ippiSize, kernel.getDataPointer(), kernel.size(), kernelanch ); if ( ret != ippStsNoErr ) fthrow ( ImageException, ippGetStatusString ( ret ) ); #else // NICE_USELIB_IPP double sum = 0.0; int ka = kernelanch + 1; uint xstart = kernelanch; uint xend = src.width() - ( kernel.size() - 1 - kernelanch ); int i; const float* pSrc; float* pDst; for ( int y = 0; y < src.height(); ++y ) { pSrc = src.getPixelPointerXY ( kernelanch, y ); pDst = result.getPixelPointerXY ( kernelanch, y ); for ( uint x = xstart; x < xend; ++x, ++pSrc, ++pDst ) { sum = 0.0; i = kernel.size(); do { sum += * ( pSrc - ka + i ) * kernel[kernel.size()-i]; --i; } while ( i != 0 ); *pDst = static_cast<float> ( sum ); } } #endif // NICE_USELIB_IPP }
void FilterT<float,float,float>::filterY ( const ImageT<float>& src, const VectorT<float>& kernel, ImageT<float> &result, const int& anchor ) { if(result.width() != src.width() || result.height() != src.height()) { result = ImageT<float>(src.width(), src.height()); } uint kernelanch = ( anchor < 0 ) ? ( kernel.size() / 2 ) : anchor; #ifdef NICE_USELIB_IPP IppiSize ippiSize = {(int)(src.width()), (int)(src.height() - ( kernel.size() - 1 )) }; IppStatus ret = ippiFilterColumn_C1R ( src.getPixelPointerXY ( 0, kernel.size() - 1 - kernelanch ), src.getStepsize(), result.getPixelPointerXY ( 0, kernelanch ), result.getStepsize(), ippiSize, kernel.getDataPointer(), kernel.size(), kernelanch ); if ( ret != ippStsNoErr ) fthrow ( ImageException, ippGetStatusString ( ret ) ); #else // NICE_USELIB_IPP*/ double sum = 0.0; int ks = kernel.size() - 1; int i; for ( uint y = kernelanch; y < src.height() - ( kernel.size() - 1 - kernelanch ); ++y ) { for ( int x = 0; x < src.width(); ++x ) { sum = 0.0; i = ks; do { //TODO: optimizable similar to filterX, old version was buggy sum += src(x,y+kernelanch-ks+i) * kernel[ks-i]; --i; } while ( i >= 0 ); result(x,y) = static_cast<float> ( sum ); } } #endif // NICE_USELIB_IPP }
std::vector< typename viennacl::result_of::cpu_value_type<typename VectorT::value_type>::type > bisect(VectorT const & alphas, VectorT const & betas) { typedef typename viennacl::result_of::value_type<VectorT>::type ScalarType; typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType; std::size_t size = betas.size(); std::vector<CPU_ScalarType> x_temp(size); std::vector<CPU_ScalarType> beta_bisect; std::vector<CPU_ScalarType> wu; double rel_error = std::numeric_limits<CPU_ScalarType>::epsilon(); beta_bisect.push_back(0); for(std::size_t i = 1; i < size; i++){ beta_bisect.push_back(betas[i] * betas[i]); } double xmin = alphas[size - 1] - std::fabs(betas[size - 1]); double xmax = alphas[size - 1] + std::fabs(betas[size - 1]); for(std::size_t i = 0; i < size - 1; i++) { double h = std::fabs(betas[i]) + std::fabs(betas[i + 1]); if (alphas[i] + h > xmax) xmax = alphas[i] + h; if (alphas[i] - h < xmin) xmin = alphas[i] - h; } double eps1 = 1e-6; /*double eps2 = (xmin + xmax > 0) ? (rel_error * xmax) : (-rel_error * xmin); if(eps1 <= 0) eps1 = eps2; else eps2 = 0.5 * eps1 + 7.0 * eps2; */ double x0 = xmax; for(std::size_t i = 0; i < size; i++) { x_temp[i] = xmax; wu.push_back(xmin); } for(long k = size - 1; k >= 0; --k) { double xu = xmin; for(long i = k; i >= 0; --i) { if(xu < wu[k-i]) { xu = wu[i]; break; } } if(x0 > x_temp[k]) x0 = x_temp[k]; double x1 = (xu + x0) / 2.0; while (x0 - xu > 2.0 * rel_error * (std::fabs(xu) + std::fabs(x0)) + eps1) { std::size_t a = 0; double q = 1; for(std::size_t i = 0; i < size; i++) { if(q != 0) q = alphas[i] - x1 - beta_bisect[i] / q; else q = alphas[i] - x1 - std::fabs(betas[i] / rel_error); if(q < 0) a++; } if (a <= static_cast<std::size_t>(k)) { xu = x1; if(a < 1) wu[0] = x1; else { wu[a] = x1; if(x_temp[a - 1] > x1) x_temp[a - 1] = x1; } } else x0 = x1; x1 = (xu + x0) / 2.0; } x_temp[k] = x1; } return x_temp; }
void operator()(SystemType pde_system, DomainType & domain, MatrixT & system_matrix, VectorT & load_vector ) const { typedef typename viennagrid::result_of::cell_tag<DomainType>::type CellTag; typedef typename viennagrid::result_of::point<DomainType>::type PointType; typedef typename viennagrid::result_of::element<DomainType, CellTag>::type CellType; typedef typename viennagrid::result_of::element_range<DomainType, CellTag>::type CellContainer; typedef typename viennagrid::result_of::iterator<CellContainer>::type CellIterator; typedef typename SystemType::equation_type EquationType; #ifdef VIENNAFEM_DEBUG std::cout << "Strong form: " << pde_system.pde(0) << std::endl; #endif log_strong_form(pde_system); EquationType weak_form_general = viennafem::make_weak_form(pde_system.pde(0)); #ifdef VIENNAFEM_DEBUG std::cout << "* pde_solver::operator(): Using weak form general: " << weak_form_general << std::endl; #endif std::vector<EquationType> temp(1); temp[0] = weak_form_general; log_weak_form(temp, pde_system); EquationType weak_form = viennamath::apply_coordinate_system(viennamath::cartesian< PointType::dim >(), weak_form_general); //EquationType weak_form = viennamath::apply_coordinate_system(viennamath::cartesian<Config::coordinate_system_tag::dim>(), weak_form_general); temp[0] = weak_form; log_coordinated_weak_form(temp, pde_system); #ifdef VIENNAFEM_DEBUG std::cout << "* pde_solver::operator(): Using weak form " << weak_form << std::endl; std::cout << "* pde_solver::operator(): Write dt_dx coefficients" << std::endl; #endif typedef typename reference_cell_for_basis<CellTag, viennafem::lagrange_tag<1> >::type ReferenceCell; // // Create accessors for performance in the subsequent dt_dx_handler step // //viennafem::dtdx_assigner<DomainType, StorageType, ReferenceCell>::apply(domain, storage); viennafem::dt_dx_handler<DomainType, StorageType, ReferenceCell> dt_dx_handler(domain, storage); //fill with cell quantities CellContainer cells = viennagrid::elements<CellType>(domain); for (CellIterator cell_iter = cells.begin(); cell_iter != cells.end(); ++cell_iter) { //cell_iter->print_short(); //viennadata::access<example_key, double>()(*cell_iter) = i; //viennafem::dt_dx_handler<ReferenceCell>::apply(storage, *cell_iter); dt_dx_handler(*cell_iter); } #ifdef VIENNAFEM_DEBUG std::cout << "* pde_solver::operator(): Create Mapping:" << std::endl; #endif std::size_t map_index = create_mapping(storage, pde_system, domain); #ifdef VIENNAFEM_DEBUG std::cout << "* pde_solver::operator(): Assigned degrees of freedom in domain so far: " << map_index << std::endl; #endif // resize global system matrix and load vector if needed: // TODO: This can be a performance bottleneck for large numbers of segments! (lots of resize operations...) if (map_index > system_matrix.size1()) { MatrixT temp = system_matrix; ////std::cout << "Resizing system matrix..." << std::endl; system_matrix.resize(map_index, map_index, false); system_matrix.clear(); system_matrix.resize(map_index, map_index, false); for (typename MatrixT::iterator1 row_it = temp.begin1(); row_it != temp.end1(); ++row_it) { for (typename MatrixT::iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) system_matrix(col_it.index1(), col_it.index2()) = *col_it; } } if (map_index > load_vector.size()) { VectorT temp = load_vector; #ifdef VIENNAFEM_DEBUG std::cout << "Resizing load vector..." << std::endl; #endif load_vector.resize(map_index, false); load_vector.clear(); load_vector.resize(map_index, false); for (std::size_t i=0; i<temp.size(); ++i) load_vector(i) = temp(i); } #ifdef VIENNAFEM_DEBUG std::cout << "* pde_solver::operator(): Transform to reference element" << std::endl; #endif EquationType transformed_weak_form = viennafem::transform_to_reference_cell<CellType>(storage, weak_form, pde_system); temp[0] = transformed_weak_form; log_transformed_weak_form<CellType, StorageType>(temp, pde_system); std::cout << "* pde_solver::operator(): Transformed weak form:" << std::endl; std::cout << transformed_weak_form << std::endl; //std::cout << std::endl; #ifdef VIENNAFEM_DEBUG std::cout << "* pde_solver::operator(): Assemble system" << std::endl; #endif typedef detail::equation_wrapper<MatrixT, VectorT> wrapper_type; wrapper_type wrapper(system_matrix, load_vector); detail::pde_assembler_internal()(storage, transformed_weak_form, pde_system, domain, wrapper); // pde_assembler_internal()(transformed_weak_form, pde_system, domain, system_matrix, load_vector); }
std::vector<int> flatnonzero(const VectorT& x) { std::vector<int> out; for (int i=0; i < x.size(); ++i) if (x[i] != 0) out.push_back(i); return out; }
// construct table from length vector template <typename VectorT> explicit RangeTable(const VectorT &lengths) { const unsigned number_of_blocks = [&lengths]() { unsigned num = (lengths.size() + 1) / (BLOCK_SIZE + 1); if ((lengths.size() + 1) % (BLOCK_SIZE + 1) != 0) { num += 1; } return num; }(); block_offsets.reserve(number_of_blocks); diff_blocks.reserve(number_of_blocks); unsigned last_length = 0; unsigned lengths_prefix_sum = 0; unsigned block_idx = 0; unsigned block_counter = 0; BlockT block; unsigned block_sum = 0; for (const unsigned l : lengths) { // first entry of a block: encode absolute offset if (block_idx == 0) { block_offsets.push_back(lengths_prefix_sum); block_sum = 0; } else { block[block_idx - 1] = last_length; block_sum += last_length; } BOOST_ASSERT((block_idx == 0 && block_offsets[block_counter] == lengths_prefix_sum) || lengths_prefix_sum == (block_offsets[block_counter] + block_sum)); // block is full if (BLOCK_SIZE == block_idx) { diff_blocks.push_back(block); block_counter++; } // we can only store strings with length 255 BOOST_ASSERT(l <= 255); lengths_prefix_sum += l; last_length = l; block_idx = (block_idx + 1) % (BLOCK_SIZE + 1); } // Last block can't be finished because we didn't add the sentinel BOOST_ASSERT(block_counter == (number_of_blocks - 1)); // one block missing: starts with guard value if (0 == block_idx) { // the last value is used as sentinel block_offsets.push_back(lengths_prefix_sum); block_idx = 1; last_length = 0; } while (0 != block_idx) { block[block_idx - 1] = last_length; last_length = 0; block_idx = (block_idx + 1) % (BLOCK_SIZE + 1); } diff_blocks.push_back(block); BOOST_ASSERT(diff_blocks.size() == number_of_blocks && block_offsets.size() == number_of_blocks); sum_lengths = lengths_prefix_sum; }