void split_calc_distance(std::vector<double>& to_sort,viennacl::ocl::context* p_context, int num_splits, naive_knn& knn, viennacl::vector<double>& distances, dense_sliding_window& sliding_window, int num_instances, viennacl::vector<double>& sample) { int len = num_instances / num_splits; auto gpu_begin = distances.begin(); auto gpu_end = gpu_begin + len; int last = num_instances - len * num_splits; int current = 0; knn.calc_distance(distances, sliding_window, current, current+len, sample); current += len; for (; current < num_instances; current += len) { p_context->get_queue().finish(); viennacl::copy(gpu_begin, gpu_end, to_sort.begin()); knn.calc_distance(distances, sliding_window, current, current+len, sample); std::sort(to_sort.begin(), to_sort.end()); } p_context->get_queue().finish(); viennacl::copy(gpu_begin, gpu_end, to_sort.begin()); std::sort(to_sort.begin(), to_sort.end()); if (last > 0) { //knn.calc_distance(distances, sliding_window, current -len, current + last, sample); } }
ScalarType diff(ublas::vector<ScalarType> & v1, viennacl::vector<ScalarType> & v2) { ublas::vector<ScalarType> v2_cpu(v2.size()); viennacl::backend::finish(); viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); for (unsigned int i=0;i<v1.size(); ++i) { if ( std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) > 0 ) { //if (std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) < 1e-10 ) //absolute tolerance (avoid round-off issues) // v2_cpu[i] = 0; //else v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); } else v2_cpu[i] = 0.0; if (v2_cpu[i] > 0.0001) { //std::cout << "Neighbor: " << i-1 << ": " << v1[i-1] << " vs. " << v2_cpu[i-1] << std::endl; std::cout << "Error at entry " << i << ": " << v1[i] << " vs. " << v2_cpu[i] << std::endl; //std::cout << "Neighbor: " << i+1 << ": " << v1[i+1] << " vs. " << v2_cpu[i+1] << std::endl; exit(EXIT_FAILURE); } } return norm_inf(v2_cpu); }
ScalarType diff_2(ublas::vector<ScalarType> & v1, viennacl::vector<ScalarType> & v2) { ublas::vector<ScalarType> v2_cpu(v2.size()); viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); return norm_2(v1 - v2_cpu) / norm_2(v1); }
NumericT diff(std::vector<NumericT> const & v1, viennacl::vector<NumericT> const & v2) { std::vector<NumericT> v2_cpu(v2.size()); viennacl::backend::finish(); viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); for (std::size_t i=0;i<v1.size(); ++i) { if ( std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) > 0 ) v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); else v2_cpu[i] = 0.0; if (v2_cpu[i] > 0.0001) { //std::cout << "Neighbor: " << i-1 << ": " << v1[i-1] << " vs. " << v2_cpu[i-1] << std::endl; std::cout << "Error at entry " << i << ": " << v1[i] << " vs. " << v2[i] << std::endl; //std::cout << "Neighbor: " << i+1 << ": " << v1[i+1] << " vs. " << v2_cpu[i+1] << std::endl; exit(EXIT_FAILURE); } } NumericT inf_norm = 0; for (std::size_t i=0;i<v2_cpu.size(); ++i) inf_norm = std::max<NumericT>(inf_norm, std::fabs(v2_cpu[i])); return inf_norm; }
void init_random(viennacl::vector<T> & x) { std::vector<T> cx(x.internal_size()); for (std::size_t i = 0; i < cx.size(); ++i) cx[i] = T(rand())/T(RAND_MAX); viennacl::fast_copy(&cx[0], &cx[0] + cx.size(), x.begin()); }
ScalarType diff ( ublas::vector<ScalarType> & v1, viennacl::vector<ScalarType,Alignment> & v2 ) { ublas::vector<ScalarType> v2_cpu ( v2.size() ); viennacl::copy( v2.begin(), v2.end(), v2_cpu.begin() ); for ( unsigned int i=0; i<v1.size(); ++i ) { if ( std::max ( fabs ( v2_cpu[i] ), fabs ( v1[i] ) ) > 0 ) v2_cpu[i] = fabs ( v2_cpu[i] - v1[i] ) / std::max ( fabs ( v2_cpu[i] ), fabs ( v1[i] ) ); else v2_cpu[i] = 0.0; } return norm_inf ( v2_cpu ); }
void prepare_householder_vector( viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT>& A, viennacl::vector<SCALARTYPE, ALIGNMENT>& D, vcl_size_t size, vcl_size_t row_start, vcl_size_t col_start, vcl_size_t start, bool is_column ) { boost::numeric::ublas::vector<SCALARTYPE> tmp = boost::numeric::ublas::scalar_vector<SCALARTYPE>(size, 0); copy_vec(A, D, row_start, col_start, is_column); fast_copy(D.begin(), D.begin() + vcl_ptrdiff_t(size - start), tmp.begin() + start); //std::cout << "1: " << tmp << "\n"; detail::householder_vector(tmp, start); fast_copy(tmp, D); //std::cout << "2: " << D << "\n"; }
ScalarType diff(ublas::vector<ScalarType> & v1, viennacl::vector<ScalarType> & v2) { ublas::vector<ScalarType> v2_cpu(v2.size()); viennacl::backend::finish(); //workaround for a bug in APP SDK 2.7 on Trinity APUs (with Catalyst 12.8) viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin()); for (std::size_t i=0;i<v1.size(); ++i) { if ( std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) > 0 ) v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ); else v2_cpu[i] = 0.0; } return norm_inf(v2_cpu); }
void prod_impl(const viennacl::toeplitz_matrix<SCALARTYPE, ALIGNMENT> & mat, const viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT> & vec, viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT> & result) { assert(mat.size1() == result.size()); assert(mat.size2() == vec.size()); viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT> tmp(vec.size() * 4); tmp.clear(); viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT> tmp2(vec.size() * 4); viennacl::vector<SCALARTYPE, VECTOR_ALIGNMENT> tep(mat.elements().size() * 2); viennacl::detail::fft::real_to_complex(mat.elements(), tep, mat.elements().size()); copy(vec, tmp); viennacl::detail::fft::real_to_complex(tmp, tmp2, vec.size() * 2); viennacl::linalg::convolve(tep, tmp2, tmp); viennacl::detail::fft::complex_to_real(tmp, tmp2, vec.size() * 2); copy(tmp2.begin(), tmp2.begin() + vec.size(), result.begin()); }