Esempio n. 1
0
 vector_slice(VectorType & v, slice const & entry_slice)
     : base_type(v.handle(), entry_slice.size(), v.start() + v.stride() * entry_slice.start(), v.stride() * entry_slice.stride()) {}
Esempio n. 2
0
 vector_range(VectorType & v, range const & entry_range)
  : base_type(v.handle(), entry_range.size(), v.start() + v.stride() * entry_range.start(), v.stride()) {}
    VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag)
    {
      //typedef typename VectorType::value_type      ScalarType;
      typedef typename viennacl::result_of::value_type<VectorType>::type        ScalarType;
      typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type    CPU_ScalarType;

      //TODO: Assert CPU_ScalarType == double

      //std::cout << "Starting CG" << std::endl;
      vcl_size_t problem_size = viennacl::traits::size(rhs);
      VectorType result(rhs);
      viennacl::traits::clear(result);

      VectorType residual = rhs;

      CPU_ScalarType ip_rr = viennacl::linalg::inner_prod(rhs, rhs);
      CPU_ScalarType new_ip_rr = 0;
      CPU_ScalarType norm_rhs_squared = ip_rr;

      if (norm_rhs_squared == 0) //solution is zero if RHS norm is zero
        return result;

      static bool first = true;

      viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(matrix).context());
      if (first)
      {
        ctx.add_program(double_float_conversion_program, "double_float_conversion_program");
      }

      viennacl::vector<float> residual_low_precision(problem_size, viennacl::traits::context(rhs));
      viennacl::vector<float> result_low_precision(problem_size, viennacl::traits::context(rhs));
      viennacl::vector<float> p_low_precision(problem_size, viennacl::traits::context(rhs));
      viennacl::vector<float> tmp_low_precision(problem_size, viennacl::traits::context(rhs));
      float inner_ip_rr = static_cast<float>(ip_rr);
      float new_inner_ip_rr = 0;
      float initial_inner_rhs_norm_squared = static_cast<float>(ip_rr);
      float alpha;
      float beta;

      viennacl::ocl::kernel & assign_double_to_float      = ctx.get_kernel("double_float_conversion_program", "assign_double_to_float");
      viennacl::ocl::kernel & inplace_add_float_to_double = ctx.get_kernel("double_float_conversion_program", "inplace_add_float_to_double");

      // transfer rhs to single precision:
      viennacl::ocl::enqueue( assign_double_to_float(p_low_precision.handle().opencl_handle(),
                                                     rhs.handle().opencl_handle(),
                                                     cl_uint(rhs.size())
                                                    ) );
      //std::cout << "copying p_low_precision..." << std::endl;
      //assign_double_to_float(p_low_precision.handle(), residual.handle(), residual.size());
      residual_low_precision = p_low_precision;

      // transfer matrix to single precision:
      viennacl::compressed_matrix<float> matrix_low_precision(matrix.size1(), matrix.size2(), matrix.nnz(), viennacl::traits::context(rhs));
      viennacl::backend::memory_copy(matrix.handle1(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle1()), 0, 0, sizeof(cl_uint) * (matrix.size1() + 1) );
      viennacl::backend::memory_copy(matrix.handle2(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle2()), 0, 0, sizeof(cl_uint) * (matrix.nnz()) );

      viennacl::ocl::enqueue( assign_double_to_float(matrix_low_precision.handle().opencl_handle(),
                                                     matrix.handle().opencl_handle(),
                                                     cl_uint(matrix.nnz())
                                                    ) );
      //std::cout << "copying matrix_low_precision..." << std::endl;
      //assign_double_to_float(const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle()), matrix.handle(), matrix.nnz());

      //std::cout << "Starting CG solver iterations... " << std::endl;


      for (unsigned int i = 0; i < tag.max_iterations(); ++i)
      {
        tag.iters(i+1);

        // lower precision 'inner iteration'
        tmp_low_precision = viennacl::linalg::prod(matrix_low_precision, p_low_precision);

        alpha = inner_ip_rr / viennacl::linalg::inner_prod(tmp_low_precision, p_low_precision);
        result_low_precision += alpha * p_low_precision;
        residual_low_precision -= alpha * tmp_low_precision;

        new_inner_ip_rr = viennacl::linalg::inner_prod(residual_low_precision, residual_low_precision);

        beta = new_inner_ip_rr / inner_ip_rr;
        inner_ip_rr = new_inner_ip_rr;

        p_low_precision = residual_low_precision + beta * p_low_precision;



        if (new_inner_ip_rr < tag.inner_tolerance() * initial_inner_rhs_norm_squared || i == tag.max_iterations()-1)
        {
          //std::cout << "outer correction at i=" << i << std::endl;
          //result += result_low_precision;
          viennacl::ocl::enqueue( inplace_add_float_to_double(result.handle().opencl_handle(),
                                                              result_low_precision.handle().opencl_handle(),
                                                              cl_uint(result.size())
                                                             ) );

          // residual = b - Ax  (without introducing a temporary)
          residual = viennacl::linalg::prod(matrix, result);
          residual = rhs - residual;

          new_ip_rr = viennacl::linalg::inner_prod(residual, residual);
          if (new_ip_rr / norm_rhs_squared < tag.tolerance() *  tag.tolerance())//squared norms involved here
            break;

          // p_low_precision = residual;
          viennacl::ocl::enqueue( assign_double_to_float(p_low_precision.handle().opencl_handle(),
                                                         residual.handle().opencl_handle(),
                                                         cl_uint(residual.size())
                                                        ) );
          result_low_precision.clear();
          residual_low_precision = p_low_precision;
          initial_inner_rhs_norm_squared = static_cast<float>(new_ip_rr);
          inner_ip_rr = static_cast<float>(new_ip_rr);
        }
      }

      //store last error estimate:
      tag.error(std::sqrt(new_ip_rr / norm_rhs_squared));

      return result;
    }