コード例 #1
0
ファイル: bicgstab.hpp プロジェクト: denis14/ViennaCL-1.5.2
    viennacl::vector<ScalarType> solve(MatrixType const & A, //MatrixType const & A,
                                       viennacl::vector<ScalarType> const & rhs,
                                       bicgstab_tag const & tag,
                                       viennacl::linalg::no_precond)
    {
      viennacl::vector<ScalarType> result = viennacl::zero_vector<ScalarType>(rhs.size(), viennacl::traits::context(rhs));

      viennacl::vector<ScalarType> residual = rhs;
      viennacl::vector<ScalarType> p = rhs;
      viennacl::vector<ScalarType> r0star = rhs;
      viennacl::vector<ScalarType> Ap = rhs;
      viennacl::vector<ScalarType> s  = rhs;
      viennacl::vector<ScalarType> As = rhs;

      // Layout of temporary buffer:
      //  chunk 0: <residual, r_0^*>
      //  chunk 1: <As, As>
      //  chunk 2: <As, s>
      //  chunk 3: <Ap, r_0^*>
      //  chunk 4: <As, r_0^*>
      //  chunk 5: <s, s>
      vcl_size_t buffer_size_per_vector = 256;
      vcl_size_t num_buffer_chunks = 6;
      viennacl::vector<ScalarType> inner_prod_buffer = viennacl::zero_vector<ScalarType>(num_buffer_chunks*buffer_size_per_vector, viennacl::traits::context(rhs)); // temporary buffer
      std::vector<ScalarType>      host_inner_prod_buffer(inner_prod_buffer.size());

      ScalarType norm_rhs_host = viennacl::linalg::norm_2(residual);
      ScalarType beta;
      ScalarType alpha;
      ScalarType omega;
      ScalarType residual_norm = norm_rhs_host;
      inner_prod_buffer[0] = norm_rhs_host * norm_rhs_host;

      ScalarType  r_dot_r0 = 0;
      ScalarType As_dot_As = 0;
      ScalarType As_dot_s  = 0;
      ScalarType Ap_dot_r0 = 0;
      ScalarType As_dot_r0 = 0;
      ScalarType  s_dot_s  = 0;

      if (norm_rhs_host <= 0) //solution is zero if RHS norm is zero
        return result;

      for (vcl_size_t i = 0; i < tag.max_iterations(); ++i)
      {
        tag.iters(i+1);
        // Ap = A*p_j
        // Ap_dot_r0 = <Ap, r_0^*>
        viennacl::linalg::pipelined_bicgstab_prod(A, p, Ap, r0star,
                                                  inner_prod_buffer, buffer_size_per_vector, 3*buffer_size_per_vector);

        //////// first (weak) synchronization point ////

        ///// method 1: compute alpha on host:
        //
        //// we only need the second chunk of the buffer for computing Ap_dot_r0:
        //viennacl::fast_copy(inner_prod_buffer.begin(), inner_prod_buffer.end(), host_inner_prod_buffer.begin());
        //Ap_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() +     buffer_size_per_vector, host_inner_prod_buffer.begin() + 2 * buffer_size_per_vector, ScalarType(0));

        //alpha = residual_dot_r0 / Ap_dot_r0;

        //// s_j = r_j - alpha_j q_j
        //s = residual - alpha * Ap;

        ///// method 2: compute alpha on device:
        // s = r - alpha * Ap
        // <s, s> first stage
        // dump alpha at end of inner_prod_buffer
        viennacl::linalg::pipelined_bicgstab_update_s(s, residual, Ap,
                                                      inner_prod_buffer, buffer_size_per_vector, 5*buffer_size_per_vector);

        // As = A*s_j
        // As_dot_As = <As, As>
        // As_dot_s  = <As, s>
        // As_dot_r0 = <As, r_0^*>
        viennacl::linalg::pipelined_bicgstab_prod(A, s, As, r0star,
                                                  inner_prod_buffer, buffer_size_per_vector, 4*buffer_size_per_vector);

        //////// second (strong) synchronization point ////

        viennacl::fast_copy(inner_prod_buffer.begin(), inner_prod_buffer.end(), host_inner_prod_buffer.begin());

         r_dot_r0 = std::accumulate(host_inner_prod_buffer.begin(),                              host_inner_prod_buffer.begin() +     buffer_size_per_vector, ScalarType(0));
        As_dot_As = std::accumulate(host_inner_prod_buffer.begin() +     buffer_size_per_vector, host_inner_prod_buffer.begin() + 2 * buffer_size_per_vector, ScalarType(0));
        As_dot_s  = std::accumulate(host_inner_prod_buffer.begin() + 2 * buffer_size_per_vector, host_inner_prod_buffer.begin() + 3 * buffer_size_per_vector, ScalarType(0));
        Ap_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() + 3 * buffer_size_per_vector, host_inner_prod_buffer.begin() + 4 * buffer_size_per_vector, ScalarType(0));
        As_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() + 4 * buffer_size_per_vector, host_inner_prod_buffer.begin() + 5 * buffer_size_per_vector, ScalarType(0));
         s_dot_s  = std::accumulate(host_inner_prod_buffer.begin() + 5 * buffer_size_per_vector, host_inner_prod_buffer.begin() + 6 * buffer_size_per_vector, ScalarType(0));

        alpha =         r_dot_r0 / Ap_dot_r0;
        beta  = -1.0 * As_dot_r0 / Ap_dot_r0;
        omega =        As_dot_s  / As_dot_As;

        residual_norm = std::sqrt(s_dot_s - 2.0 * omega * As_dot_s + omega * omega *  As_dot_As);
        if (std::fabs(residual_norm / norm_rhs_host) < tag.tolerance())
          break;

        // x_{j+1} = x_j + alpha * p_j + omega * s_j
        // r_{j+1} = s_j - omega * t_j
        // p_{j+1} = r_{j+1} + beta * (p_j - omega * q_j)
        // and compute first stage of r_dot_r0 = <r_{j+1}, r_o^*> for use in next iteration
         viennacl::linalg::pipelined_bicgstab_vector_update(result, alpha, p, omega, s,
                                                            residual, As,
                                                            beta, Ap,
                                                            r0star, inner_prod_buffer, buffer_size_per_vector);
      }

      //store last error estimate:
      tag.error(residual_norm / norm_rhs_host);

      return result;
    }
コード例 #2
0
ファイル: bicgstab.hpp プロジェクト: GnsP/viennacl-dev
    VectorType solve(const MatrixType & matrix, VectorType const & rhs, bicgstab_tag const & tag)
    {
      typedef typename viennacl::result_of::value_type<VectorType>::type        ScalarType;
      typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type    CPU_ScalarType;
      VectorType result = rhs;
      viennacl::traits::clear(result);

      VectorType residual = rhs;
      VectorType p = rhs;
      VectorType r0star = rhs;
      VectorType tmp0 = rhs;
      VectorType tmp1 = rhs;
      VectorType s = rhs;

      CPU_ScalarType norm_rhs_host = viennacl::linalg::norm_2(residual);
      CPU_ScalarType ip_rr0star = norm_rhs_host * norm_rhs_host;
      CPU_ScalarType beta;
      CPU_ScalarType alpha;
      CPU_ScalarType omega;
      //ScalarType inner_prod_temp; //temporary variable for inner product computation
      CPU_ScalarType new_ip_rr0star = 0;
      CPU_ScalarType residual_norm = norm_rhs_host;

      if (norm_rhs_host == 0) //solution is zero if RHS norm is zero
        return result;

      bool restart_flag = true;
      vcl_size_t last_restart = 0;
      for (vcl_size_t i = 0; i < tag.max_iterations(); ++i)
      {
        if (restart_flag)
        {
          residual = rhs;
          residual -= viennacl::linalg::prod(matrix, result);
          p = residual;
          r0star = residual;
          ip_rr0star = viennacl::linalg::norm_2(residual);
          ip_rr0star *= ip_rr0star;
          restart_flag = false;
          last_restart = i;
        }

        tag.iters(i+1);
        tmp0 = viennacl::linalg::prod(matrix, p);
        alpha = ip_rr0star / viennacl::linalg::inner_prod(tmp0, r0star);

        s = residual - alpha*tmp0;

        tmp1 = viennacl::linalg::prod(matrix, s);
        CPU_ScalarType norm_tmp1 = viennacl::linalg::norm_2(tmp1);
        omega = viennacl::linalg::inner_prod(tmp1, s) / (norm_tmp1 * norm_tmp1);

        result += alpha * p + omega * s;
        residual = s - omega * tmp1;

        new_ip_rr0star = viennacl::linalg::inner_prod(residual, r0star);
        residual_norm = viennacl::linalg::norm_2(residual);
        if (std::fabs(residual_norm / norm_rhs_host) < tag.tolerance())
          break;

        beta = new_ip_rr0star / ip_rr0star * alpha/omega;
        ip_rr0star = new_ip_rr0star;

        if (ip_rr0star == 0 || omega == 0 || i - last_restart > tag.max_iterations_before_restart()) //search direction degenerate. A restart might help
          restart_flag = true;

        // Execution of
        //  p = residual + beta * (p - omega*tmp0);
        // without introducing temporary vectors:
        p -= omega * tmp0;
        p = residual + beta * p;
      }

      //store last error estimate:
      tag.error(residual_norm / norm_rhs_host);

      return result;
    }
コード例 #3
0
ファイル: bicgstab.hpp プロジェクト: denis14/ViennaCL-1.5.2
    VectorType solve(const MatrixType & matrix, VectorType const & rhs, bicgstab_tag const & tag, PreconditionerType const & precond)
    {
      typedef typename viennacl::result_of::value_type<VectorType>::type        ScalarType;
      typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type    CPU_ScalarType;
      VectorType result = rhs;
      viennacl::traits::clear(result);

      VectorType residual = rhs;
      VectorType r0star = residual;  //can be chosen arbitrarily in fact
      VectorType tmp0 = rhs;
      VectorType tmp1 = rhs;
      VectorType s = rhs;

      VectorType p = residual;

      CPU_ScalarType ip_rr0star = viennacl::linalg::norm_2(residual);
      CPU_ScalarType norm_rhs_host = viennacl::linalg::norm_2(residual);
      CPU_ScalarType beta;
      CPU_ScalarType alpha;
      CPU_ScalarType omega;
      CPU_ScalarType new_ip_rr0star = 0;
      CPU_ScalarType residual_norm = norm_rhs_host;

      if (!norm_rhs_host) //solution is zero if RHS norm is zero
        return result;

      bool restart_flag = true;
      vcl_size_t last_restart = 0;
      for (unsigned int i = 0; i < tag.max_iterations(); ++i)
      {
        if (restart_flag)
        {
          residual = rhs;
          residual -= viennacl::linalg::prod(matrix, result);
          precond.apply(residual);
          p = residual;
          r0star = residual;
          ip_rr0star = viennacl::linalg::norm_2(residual);
          ip_rr0star *= ip_rr0star;
          restart_flag = false;
          last_restart = i;
        }

        tag.iters(i+1);
        tmp0 = viennacl::linalg::prod(matrix, p);
        precond.apply(tmp0);
        alpha = ip_rr0star / viennacl::linalg::inner_prod(tmp0, r0star);

        s = residual - alpha*tmp0;

        tmp1 = viennacl::linalg::prod(matrix, s);
        precond.apply(tmp1);
        CPU_ScalarType norm_tmp1 = viennacl::linalg::norm_2(tmp1);
        omega = viennacl::linalg::inner_prod(tmp1, s) / (norm_tmp1 * norm_tmp1);

        result += alpha * p + omega * s;
        residual = s - omega * tmp1;

        residual_norm = viennacl::linalg::norm_2(residual);
        if (residual_norm / norm_rhs_host < tag.tolerance())
          break;

        new_ip_rr0star = viennacl::linalg::inner_prod(residual, r0star);

        beta = new_ip_rr0star / ip_rr0star * alpha/omega;
        ip_rr0star = new_ip_rr0star;

        if (!ip_rr0star || !omega || i - last_restart > tag.max_iterations_before_restart()) //search direction degenerate. A restart might help
          restart_flag = true;

        // Execution of
        //  p = residual + beta * (p - omega*tmp0);
        // without introducing temporary vectors:
        p -= omega * tmp0;
        p = residual + beta * p;

        //std::cout << "Rel. Residual in current step: " << std::sqrt(std::fabs(viennacl::linalg::inner_prod(residual, residual) / norm_rhs_host)) << std::endl;
      }

      //store last error estimate:
      tag.error(residual_norm / norm_rhs_host);

      return result;
    }