vector_slice(VectorType & v, slice const & entry_slice) : base_type(v.handle(), entry_slice.size(), v.start() + v.stride() * entry_slice.start(), v.stride() * entry_slice.stride()) {}
vector_range(VectorType & v, range const & entry_range) : base_type(v.handle(), entry_range.size(), v.start() + v.stride() * entry_range.start(), v.stride()) {}
VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag) { //typedef typename VectorType::value_type ScalarType; typedef typename viennacl::result_of::value_type<VectorType>::type ScalarType; typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType; //TODO: Assert CPU_ScalarType == double //std::cout << "Starting CG" << std::endl; vcl_size_t problem_size = viennacl::traits::size(rhs); VectorType result(rhs); viennacl::traits::clear(result); VectorType residual = rhs; CPU_ScalarType ip_rr = viennacl::linalg::inner_prod(rhs, rhs); CPU_ScalarType new_ip_rr = 0; CPU_ScalarType norm_rhs_squared = ip_rr; if (norm_rhs_squared == 0) //solution is zero if RHS norm is zero return result; static bool first = true; viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(matrix).context()); if (first) { ctx.add_program(double_float_conversion_program, "double_float_conversion_program"); } viennacl::vector<float> residual_low_precision(problem_size, viennacl::traits::context(rhs)); viennacl::vector<float> result_low_precision(problem_size, viennacl::traits::context(rhs)); viennacl::vector<float> p_low_precision(problem_size, viennacl::traits::context(rhs)); viennacl::vector<float> tmp_low_precision(problem_size, viennacl::traits::context(rhs)); float inner_ip_rr = static_cast<float>(ip_rr); float new_inner_ip_rr = 0; float initial_inner_rhs_norm_squared = static_cast<float>(ip_rr); float alpha; float beta; viennacl::ocl::kernel & assign_double_to_float = ctx.get_kernel("double_float_conversion_program", "assign_double_to_float"); viennacl::ocl::kernel & inplace_add_float_to_double = ctx.get_kernel("double_float_conversion_program", "inplace_add_float_to_double"); // transfer rhs to single precision: viennacl::ocl::enqueue( assign_double_to_float(p_low_precision.handle().opencl_handle(), rhs.handle().opencl_handle(), cl_uint(rhs.size()) ) ); //std::cout << "copying p_low_precision..." << std::endl; //assign_double_to_float(p_low_precision.handle(), residual.handle(), residual.size()); residual_low_precision = p_low_precision; // transfer matrix to single precision: viennacl::compressed_matrix<float> matrix_low_precision(matrix.size1(), matrix.size2(), matrix.nnz(), viennacl::traits::context(rhs)); viennacl::backend::memory_copy(matrix.handle1(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle1()), 0, 0, sizeof(cl_uint) * (matrix.size1() + 1) ); viennacl::backend::memory_copy(matrix.handle2(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle2()), 0, 0, sizeof(cl_uint) * (matrix.nnz()) ); viennacl::ocl::enqueue( assign_double_to_float(matrix_low_precision.handle().opencl_handle(), matrix.handle().opencl_handle(), cl_uint(matrix.nnz()) ) ); //std::cout << "copying matrix_low_precision..." << std::endl; //assign_double_to_float(const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle()), matrix.handle(), matrix.nnz()); //std::cout << "Starting CG solver iterations... " << std::endl; for (unsigned int i = 0; i < tag.max_iterations(); ++i) { tag.iters(i+1); // lower precision 'inner iteration' tmp_low_precision = viennacl::linalg::prod(matrix_low_precision, p_low_precision); alpha = inner_ip_rr / viennacl::linalg::inner_prod(tmp_low_precision, p_low_precision); result_low_precision += alpha * p_low_precision; residual_low_precision -= alpha * tmp_low_precision; new_inner_ip_rr = viennacl::linalg::inner_prod(residual_low_precision, residual_low_precision); beta = new_inner_ip_rr / inner_ip_rr; inner_ip_rr = new_inner_ip_rr; p_low_precision = residual_low_precision + beta * p_low_precision; if (new_inner_ip_rr < tag.inner_tolerance() * initial_inner_rhs_norm_squared || i == tag.max_iterations()-1) { //std::cout << "outer correction at i=" << i << std::endl; //result += result_low_precision; viennacl::ocl::enqueue( inplace_add_float_to_double(result.handle().opencl_handle(), result_low_precision.handle().opencl_handle(), cl_uint(result.size()) ) ); // residual = b - Ax (without introducing a temporary) residual = viennacl::linalg::prod(matrix, result); residual = rhs - residual; new_ip_rr = viennacl::linalg::inner_prod(residual, residual); if (new_ip_rr / norm_rhs_squared < tag.tolerance() * tag.tolerance())//squared norms involved here break; // p_low_precision = residual; viennacl::ocl::enqueue( assign_double_to_float(p_low_precision.handle().opencl_handle(), residual.handle().opencl_handle(), cl_uint(residual.size()) ) ); result_low_precision.clear(); residual_low_precision = p_low_precision; initial_inner_rhs_norm_squared = static_cast<float>(new_ip_rr); inner_ip_rr = static_cast<float>(new_ip_rr); } } //store last error estimate: tag.error(std::sqrt(new_ip_rr / norm_rhs_squared)); return result; }