/** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'dst_buffer + dst_offset'. * * @param src_buffer A smart pointer to the begin of an allocated buffer * @param dst_buffer A smart pointer to the end of an allocated buffer * @param src_offset Offset of the first byte to be written from the address given by 'src_buffer' (in bytes) * @param dst_offset Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes) * @param bytes_to_copy Number of bytes to be copied */ inline void memory_copy(handle_type const & src_buffer, handle_type & dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy) { assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!")); assert( (src_buffer.get() != NULL) && bool("Memory not initialized!")); #ifdef VIENNACL_WITH_OPENMP #pragma omp parallel for #endif for (long i=0; i<long(bytes_to_copy); ++i) dst_buffer.get()[vcl_size_t(i)+dst_offset] = src_buffer.get()[vcl_size_t(i) + src_offset]; }
vcl_size_t size(vector_expression<LHS, const int, op_matrix_diag> const & proxy) { int k = proxy.rhs(); int A_size1 = static_cast<int>(size1(proxy.lhs())); int A_size2 = static_cast<int>(size2(proxy.lhs())); int row_depth = std::min(A_size1, A_size1 + k); int col_depth = std::min(A_size2, A_size2 - k); return vcl_size_t(std::min(row_depth, col_depth)); }
void copy_to_complex_array(std::complex<NumericT> * input_complex, viennacl::vector<NumericT, AlignmentV> const & in, vcl_size_t size) { #ifdef VIENNACL_WITH_OPENMP #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE) #endif for (long i2 = 0; i2 < long(size * 2); i2 += 2) { //change array to complex array vcl_size_t i = vcl_size_t(i2); input_complex[i / 2] = std::complex<NumericT>(in[i], in[i + 1]); } }
void zero2(NumericT *input1, NumericT *input2, vcl_size_t size) { #ifdef VIENNACL_WITH_OPENMP #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE) #endif for (long i2 = 0; i2 < long(size); i2++) { vcl_size_t i = vcl_size_t(i2); input1[i] = 0; input2[i] = 0; } }
void copy_to_vector(std::complex<NumericT> * input_complex, NumericT * in, vcl_size_t size) { #ifdef VIENNACL_WITH_OPENMP #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE) #endif for (long i2 = 0; i2 < long(size); i2++) { vcl_size_t i = vcl_size_t(i2); in[i * 2] = static_cast<NumericT>(std::real(input_complex[i])); in[i * 2 + 1] = static_cast<NumericT>(std::imag(input_complex[i])); } }
/** @brief Reads data from a buffer back to main RAM. * * @param src_buffer A smart pointer to the beginning of an allocated source buffer * @param src_offset Offset of the first byte to be read from the beginning of src_buffer (in bytes_ * @param bytes_to_copy Number of bytes to be read * @param ptr Location in main RAM where to read data should be written to */ inline void memory_read(handle_type const & src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_copy, void * ptr, bool /*async*/) { assert( (src_buffer.get() != NULL) && bool("Memory not initialized!")); #ifdef VIENNACL_WITH_OPENMP #pragma omp parallel for #endif for (long i=0; i<long(bytes_to_copy); ++i) static_cast<char *>(ptr)[i] = src_buffer.get()[vcl_size_t(i)+src_offset]; }
/** @brief Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer' * * @param dst_buffer A smart pointer to the beginning of an allocated buffer * @param dst_offset Offset of the first written byte from the beginning of 'dst_buffer' (in bytes) * @param bytes_to_copy Number of bytes to be copied * @param ptr Pointer to the first byte to be written */ inline void memory_write(handle_type & dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_copy, const void * ptr, bool /*async*/) { assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!")); #ifdef VIENNACL_WITH_OPENMP #pragma omp parallel for #endif for (long i=0; i<long(bytes_to_copy); ++i) dst_buffer.get()[vcl_size_t(i)+dst_offset] = static_cast<const char *>(ptr)[i]; }
void copy_to_vector(std::complex<NumericT> * input_complex, viennacl::vector_base<NumericT> & in, vcl_size_t size) { std::vector<NumericT> temp(2 * size); #ifdef VIENNACL_WITH_OPENMP #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE) #endif for (long i2 = 0; i2 < long(size); i2++) { vcl_size_t i = vcl_size_t(i2); temp[i * 2] = static_cast<NumericT>(std::real(input_complex[i])); temp[i * 2 + 1] = static_cast<NumericT>(std::imag(input_complex[i])); } viennacl::copy(temp, in); }
void svd_qr_shift(MatrixType & vcl_u, MatrixType & vcl_v, CPU_VectorType & q, CPU_VectorType & e) { typedef typename MatrixType::value_type ScalarType; typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType; vcl_size_t n = q.size(); int m = static_cast<int>(vcl_u.size1()); detail::transpose(vcl_u); detail::transpose(vcl_v); std::vector<CPU_ScalarType> signs_v(n, 1); std::vector<CPU_ScalarType> cs1(n), ss1(n), cs2(n), ss2(n); viennacl::vector<CPU_ScalarType> tmp1(n, viennacl::traits::context(vcl_u)), tmp2(n, viennacl::traits::context(vcl_u)); bool goto_test_conv = false; for (int k = static_cast<int>(n) - 1; k >= 0; k--) { // std::cout << "K = " << k << std::endl; vcl_size_t iter = 0; for (iter = 0; iter < detail::ITER_MAX; iter++) { // test for split int l; for (l = k; l >= 0; l--) { goto_test_conv = false; if (std::fabs(e[vcl_size_t(l)]) <= detail::EPS) { // set it goto_test_conv = true; break; } if (std::fabs(q[vcl_size_t(l) - 1]) <= detail::EPS) { // goto break; } } if (!goto_test_conv) { CPU_ScalarType c = 0.0; CPU_ScalarType s = 1.0; //int l1 = l - 1; //int l2 = k; for (int i = l; i <= k; i++) { CPU_ScalarType f = s * e[vcl_size_t(i)]; e[vcl_size_t(i)] = c * e[vcl_size_t(i)]; if (std::fabs(f) <= detail::EPS) { //l2 = i - 1; break; } CPU_ScalarType g = q[vcl_size_t(i)]; CPU_ScalarType h = detail::pythag(f, g); q[vcl_size_t(i)] = h; c = g / h; s = -f / h; cs1[vcl_size_t(i)] = c; ss1[vcl_size_t(i)] = s; } // std::cout << "Hitted!" << l1 << " " << l2 << "\n"; // for (int i = l; i <= l2; i++) // { // for (int j = 0; j < m; j++) // { // CPU_ScalarType y = u(j, l1); // CPU_ScalarType z = u(j, i); // u(j, l1) = y * cs1[i] + z * ss1[i]; // u(j, i) = -y * ss1[i] + z * cs1[i]; // } // } } CPU_ScalarType z = q[vcl_size_t(k)]; if (l == k) { if (z < 0) { q[vcl_size_t(k)] = -z; signs_v[vcl_size_t(k)] *= -1; } break; } if (iter >= detail::ITER_MAX - 1) break; CPU_ScalarType x = q[vcl_size_t(l)]; CPU_ScalarType y = q[vcl_size_t(k) - 1]; CPU_ScalarType g = e[vcl_size_t(k) - 1]; CPU_ScalarType h = e[vcl_size_t(k)]; CPU_ScalarType f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2 * h * y); g = detail::pythag<CPU_ScalarType>(f, 1); if (f < 0) { f = ((x - z) * (x + z) + h * (y / (f - g) - h)) / x; } else { f = ((x - z) * (x + z) + h * (y / (f + g) - h)) / x; } CPU_ScalarType c = 1; CPU_ScalarType s = 1; for (vcl_size_t i = static_cast<vcl_size_t>(l) + 1; i <= static_cast<vcl_size_t>(k); i++) { g = e[i]; y = q[i]; h = s * g; g = c * g; CPU_ScalarType z2 = detail::pythag(f, h); e[i - 1] = z2; c = f / z2; s = h / z2; f = x * c + g * s; g = -x * s + g * c; h = y * s; y = y * c; cs1[i] = c; ss1[i] = s; z2 = detail::pythag(f, h); q[i - 1] = z2; c = f / z2; s = h / z2; f = c * g + s * y; x = -s * g + c * y; cs2[i] = c; ss2[i] = s; } { viennacl::copy(cs1, tmp1); viennacl::copy(ss1, tmp2); givens_prev(vcl_v, tmp1, tmp2, static_cast<int>(n), l, k); } { viennacl::copy(cs2, tmp1); viennacl::copy(ss2, tmp2); givens_prev(vcl_u, tmp1, tmp2, m, l, k); } e[vcl_size_t(l)] = 0.0; e[vcl_size_t(k)] = f; q[vcl_size_t(k)] = x; } } viennacl::copy(signs_v, tmp1); change_signs(vcl_v, tmp1, static_cast<int>(n)); // transpose singular matrices again detail::transpose(vcl_u); detail::transpose(vcl_v); }
void tql1(vcl_size_t n, VectorType & d, VectorType & e) { for (vcl_size_t i = 1; i < n; i++) e[i - 1] = e[i]; e[n - 1] = 0; SCALARTYPE f = 0.; SCALARTYPE tst1 = 0.; SCALARTYPE eps = static_cast<SCALARTYPE>(1e-6); for (vcl_size_t l = 0; l < n; l++) { // Find small subdiagonal element. tst1 = std::max<SCALARTYPE>(tst1, std::fabs(d[l]) + std::fabs(e[l])); vcl_size_t m = l; while (m < n) { if (std::fabs(e[m]) <= eps * tst1) break; m++; } // If m == l, d[l) is an eigenvalue, otherwise, iterate. if (m > l) { vcl_size_t iter = 0; do { iter = iter + 1; // (Could check iteration count here.) // Compute implicit shift SCALARTYPE g = d[l]; SCALARTYPE p = (d[l + 1] - g) / (2 * e[l]); SCALARTYPE r = viennacl::linalg::detail::pythag<SCALARTYPE>(p, 1); if (p < 0) { r = -r; } d[l] = e[l] / (p + r); d[l + 1] = e[l] * (p + r); SCALARTYPE h = g - d[l]; for (vcl_size_t i = l + 2; i < n; i++) { d[i] -= h; } f = f + h; // Implicit QL transformation. p = d[m]; SCALARTYPE c = 1; SCALARTYPE s = 0; for (int i = int(m - 1); i >= int(l); i--) { g = c * e[vcl_size_t(i)]; h = c * p; r = viennacl::linalg::detail::pythag<SCALARTYPE>(p, e[vcl_size_t(i)]); e[vcl_size_t(i) + 1] = s * r; s = e[vcl_size_t(i)] / r; c = p / r; p = c * d[vcl_size_t(i)] - s * g; d[vcl_size_t(i) + 1] = h + s * (c * g + s * d[vcl_size_t(i)]); } e[l] = s * p; d[l] = c * p; // Check for convergence. } while (std::fabs(e[l]) > eps * tst1); } d[l] = d[l] + f; e[l] = 0; } }
void tql2(matrix_base<SCALARTYPE, F> & Q, VectorType & d, VectorType & e) { vcl_size_t n = static_cast<vcl_size_t>(viennacl::traits::size1(Q)); //boost::numeric::ublas::vector<SCALARTYPE> cs(n), ss(n); std::vector<SCALARTYPE> cs(n), ss(n); viennacl::vector<SCALARTYPE> tmp1(n), tmp2(n); for (vcl_size_t i = 1; i < n; i++) e[i - 1] = e[i]; e[n - 1] = 0; SCALARTYPE f = 0; SCALARTYPE tst1 = 0; SCALARTYPE eps = static_cast<SCALARTYPE>(viennacl::linalg::detail::EPS); for (vcl_size_t l = 0; l < n; l++) { // Find small subdiagonal element. tst1 = std::max<SCALARTYPE>(tst1, std::fabs(d[l]) + std::fabs(e[l])); vcl_size_t m = l; while (m < n) { if (std::fabs(e[m]) <= eps * tst1) break; m++; } // If m == l, d[l) is an eigenvalue, otherwise, iterate. if (m > l) { vcl_size_t iter = 0; do { iter = iter + 1; // (Could check iteration count here.) // Compute implicit shift SCALARTYPE g = d[l]; SCALARTYPE p = (d[l + 1] - g) / (2 * e[l]); SCALARTYPE r = viennacl::linalg::detail::pythag<SCALARTYPE>(p, 1); if (p < 0) { r = -r; } d[l] = e[l] / (p + r); d[l + 1] = e[l] * (p + r); SCALARTYPE dl1 = d[l + 1]; SCALARTYPE h = g - d[l]; for (vcl_size_t i = l + 2; i < n; i++) { d[i] -= h; } f = f + h; // Implicit QL transformation. p = d[m]; SCALARTYPE c = 1; SCALARTYPE c2 = c; SCALARTYPE c3 = c; SCALARTYPE el1 = e[l + 1]; SCALARTYPE s = 0; SCALARTYPE s2 = 0; for (int i = int(m - 1); i >= int(l); i--) { c3 = c2; c2 = c; s2 = s; g = c * e[vcl_size_t(i)]; h = c * p; r = viennacl::linalg::detail::pythag(p, e[vcl_size_t(i)]); e[vcl_size_t(i) + 1] = s * r; s = e[vcl_size_t(i)] / r; c = p / r; p = c * d[vcl_size_t(i)] - s * g; d[vcl_size_t(i) + 1] = h + s * (c * g + s * d[vcl_size_t(i)]); cs[vcl_size_t(i)] = c; ss[vcl_size_t(i)] = s; } p = -s * s2 * c3 * el1 * e[l] / dl1; e[l] = s * p; d[l] = c * p; viennacl::copy(cs, tmp1); viennacl::copy(ss, tmp2); viennacl::linalg::givens_next(Q, tmp1, tmp2, int(l), int(m)); // Check for convergence. } while (std::fabs(e[l]) > eps * tst1); } d[l] = d[l] + f; e[l] = 0; } // Sort eigenvalues and corresponding vectors. /* for (int i = 0; i < n-1; i++) { int k = i; SCALARTYPE p = d[i]; for (int j = i+1; j < n; j++) { if (d[j] > p) { k = j; p = d[j); } } if (k != i) { d[k] = d[i]; d[i] = p; for (int j = 0; j < n; j++) { p = Q(j, i); Q(j, i) = Q(j, k); Q(j, k) = p; } } } */ }
std::vector< typename viennacl::result_of::cpu_value_type<typename VectorT::value_type>::type > bisect(VectorT const & alphas, VectorT const & betas) { typedef typename viennacl::result_of::value_type<VectorT>::type NumericType; typedef typename viennacl::result_of::cpu_value_type<NumericType>::type CPU_NumericType; vcl_size_t size = betas.size(); std::vector<CPU_NumericType> x_temp(size); std::vector<CPU_NumericType> beta_bisect; std::vector<CPU_NumericType> wu; double rel_error = std::numeric_limits<CPU_NumericType>::epsilon(); beta_bisect.push_back(0); for (vcl_size_t i = 1; i < size; i++) beta_bisect.push_back(betas[i] * betas[i]); double xmin = alphas[size - 1] - std::fabs(betas[size - 1]); double xmax = alphas[size - 1] + std::fabs(betas[size - 1]); for (vcl_size_t i = 0; i < size - 1; i++) { double h = std::fabs(betas[i]) + std::fabs(betas[i + 1]); if (alphas[i] + h > xmax) xmax = alphas[i] + h; if (alphas[i] - h < xmin) xmin = alphas[i] - h; } double eps1 = 1e-6; /*double eps2 = (xmin + xmax > 0) ? (rel_error * xmax) : (-rel_error * xmin); if (eps1 <= 0) eps1 = eps2; else eps2 = 0.5 * eps1 + 7.0 * eps2; */ double x0 = xmax; for (vcl_size_t i = 0; i < size; i++) { x_temp[i] = xmax; wu.push_back(xmin); } for (long k = static_cast<long>(size) - 1; k >= 0; --k) { double xu = xmin; for (long i = k; i >= 0; --i) { if (xu < wu[vcl_size_t(k-i)]) { xu = wu[vcl_size_t(i)]; break; } } if (x0 > x_temp[vcl_size_t(k)]) x0 = x_temp[vcl_size_t(k)]; double x1 = (xu + x0) / 2.0; while (x0 - xu > 2.0 * rel_error * (std::fabs(xu) + std::fabs(x0)) + eps1) { vcl_size_t a = 0; double q = 1; for (vcl_size_t i = 0; i < size; i++) { if (q > 0 || q < 0) q = alphas[i] - x1 - beta_bisect[i] / q; else q = alphas[i] - x1 - std::fabs(betas[i] / rel_error); if (q < 0) a++; } if (a <= static_cast<vcl_size_t>(k)) { xu = x1; if (a < 1) wu[0] = x1; else { wu[a] = x1; if (x_temp[a - 1] > x1) x_temp[a - 1] = x1; } } else x0 = x1; x1 = (xu + x0) / 2.0; } x_temp[vcl_size_t(k)] = x1; } return x_temp; }