Beispiel #1
0
    VectorType solve(const MatrixType & matrix, VectorType const & rhs, cg_tag const & tag, PreconditionerType const & precond)
    {
      typedef typename viennacl::result_of::value_type<VectorType>::type        ScalarType;
      typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type    CPU_ScalarType;
      unsigned int problem_size = viennacl::traits::size(rhs);

      VectorType result(problem_size);
      viennacl::traits::clear(result);

      VectorType residual = rhs;
      VectorType tmp(problem_size);
      VectorType z = rhs;

      precond.apply(z);
      VectorType p = z;

      CPU_ScalarType ip_rr = viennacl::linalg::inner_prod(residual, z);
      CPU_ScalarType alpha;
      CPU_ScalarType new_ip_rr = 0;
      CPU_ScalarType beta;
      CPU_ScalarType norm_rhs_squared = ip_rr;
      CPU_ScalarType new_ipp_rr_over_norm_rhs;

      if (norm_rhs_squared == 0) //solution is zero if RHS norm is zero
        return result;

      for (unsigned int i = 0; i < tag.max_iterations(); ++i)
      {
        tag.iters(i+1);
        tmp = viennacl::linalg::prod(matrix, p);

        alpha = ip_rr / viennacl::linalg::inner_prod(tmp, p);

        result += alpha * p;
        residual -= alpha * tmp;
        z = residual;
        precond.apply(z);

        new_ip_rr = viennacl::linalg::inner_prod(residual, z);
        new_ipp_rr_over_norm_rhs = new_ip_rr / norm_rhs_squared;
        if (std::fabs(new_ipp_rr_over_norm_rhs) < tag.tolerance() *  tag.tolerance())    //squared norms involved here
          break;

        beta = new_ip_rr / ip_rr;
        ip_rr = new_ip_rr;

        p = z + beta*p;
      }

      //store last error estimate:
      tag.error(std::sqrt(std::fabs(new_ip_rr / norm_rhs_squared)));

      return result;
    }
Beispiel #2
0
    VectorType solve(const MatrixType & matrix, VectorType const & rhs, cg_tag const & tag, PreconditionerType const & precond)
    {
      typedef typename VectorType::value_type      ScalarType;
      typedef typename viennacl::tools::CPU_SCALAR_TYPE_DEDUCER<ScalarType>::ResultType    CPU_ScalarType;
      
      VectorType result(rhs.size());
      result.clear();
      
      VectorType residual = rhs;
      VectorType tmp(rhs.size());
      VectorType z = rhs;

      precond.apply(z);
      VectorType p = z;

      ScalarType ip_rr = viennacl::linalg::inner_prod(residual, z);
      ScalarType alpha;
      ScalarType new_ip_rr = 0;
      ScalarType beta;
      ScalarType norm_rhs = ip_rr;
      
      for (unsigned int i = 0; i < tag.iterations(); ++i)
      {
        tag.iters(i+1);
        tmp = viennacl::linalg::prod(matrix, p);
        alpha = ip_rr / viennacl::linalg::inner_prod(tmp, p);
        result += alpha * p;
        residual -= alpha * tmp;
        z = residual;
        precond.apply(z);
        
        new_ip_rr = viennacl::linalg::inner_prod(residual, z);
        if (std::fabs(new_ip_rr / norm_rhs) < tag.tolerance() *  tag.tolerance())    //squared norms involved here
          break;
        
        beta = new_ip_rr / ip_rr;
        ip_rr = new_ip_rr;
        p = z + beta*p;
      } 

      //store last error estimate:
      tag.error(sqrt(std::fabs(new_ip_rr / norm_rhs)));

      return result;
    }
Beispiel #3
0
    VectorType solve(const MatrixType & matrix, VectorType const & rhs, gmres_tag const & tag, PreconditionerType const & precond)
    {
      typedef typename viennacl::result_of::value_type<VectorType>::type        ScalarType;
      typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type    CPU_ScalarType;
      unsigned int problem_size = viennacl::traits::size(rhs);
      VectorType result(problem_size);
      viennacl::traits::clear(result);
      unsigned int krylov_dim = tag.krylov_dim();
      if (problem_size < tag.krylov_dim())
        krylov_dim = problem_size; //A Krylov space larger than the matrix would lead to seg-faults (mathematically, error is certain to be zero already)
      
      VectorType res(problem_size);
      VectorType v_k_tilde(problem_size);
      VectorType v_k_tilde_temp(problem_size);
      
      std::vector< std::vector<CPU_ScalarType> > R(krylov_dim);
      std::vector<CPU_ScalarType> projection_rhs(krylov_dim);
      std::vector<VectorType> U(krylov_dim);

      const CPU_ScalarType gpu_scalar_minus_1 = static_cast<CPU_ScalarType>(-1);    //representing the scalar '-1' on the GPU. Prevents blocking write operations
      const CPU_ScalarType gpu_scalar_1 = static_cast<CPU_ScalarType>(1);    //representing the scalar '1' on the GPU. Prevents blocking write operations
      const CPU_ScalarType gpu_scalar_2 = static_cast<CPU_ScalarType>(2);    //representing the scalar '2' on the GPU. Prevents blocking write operations
      
      CPU_ScalarType norm_rhs = viennacl::linalg::norm_2(rhs);
      
      if (norm_rhs == 0) //solution is zero if RHS norm is zero
        return result;
      
      unsigned int k;
      for (k = 0; k < krylov_dim; ++k)
      {
        R[k].resize(tag.krylov_dim()); 
        viennacl::traits::resize(U[k], problem_size);
      }

      //std::cout << "Starting GMRES..." << std::endl;
      tag.iters(0);
      
      for (unsigned int it = 0; it <= tag.max_restarts(); ++it)
      {
        //std::cout << "-- GMRES Start " << it << " -- " << std::endl;
        
        res = rhs;
        res -= viennacl::linalg::prod(matrix, result);  //initial guess zero
        precond.apply(res);
        //std::cout << "Residual: " << res << std::endl;
        
        CPU_ScalarType rho_0 = viennacl::linalg::norm_2(res); 
        CPU_ScalarType rho = static_cast<CPU_ScalarType>(1.0);
        //std::cout << "rho_0: " << rho_0 << std::endl;

        if (rho_0 / norm_rhs < tag.tolerance() || (norm_rhs == CPU_ScalarType(0.0)) )
        {
          //std::cout << "Allowed Error reached at begin of loop" << std::endl;
          tag.error(rho_0 / norm_rhs);
          return result;
        }

        res /= rho_0;
        //std::cout << "Normalized Residual: " << res << std::endl;
        
        for (k=0; k<krylov_dim; ++k)
        {
          viennacl::traits::clear(R[k]);
          viennacl::traits::clear(U[k]);
          R[k].resize(krylov_dim); 
          viennacl::traits::resize(U[k], problem_size);
        }

        for (k = 0; k < krylov_dim; ++k)
        {
          tag.iters( tag.iters() + 1 ); //increase iteration counter

          //compute v_k = A * v_{k-1} via Householder matrices
          if (k == 0)
          {
            v_k_tilde = viennacl::linalg::prod(matrix, res);
            precond.apply(v_k_tilde);
          }
          else
          {
            viennacl::traits::clear(v_k_tilde);
            v_k_tilde[k-1] = gpu_scalar_1;
            //Householder rotations part 1
            for (int i = k-1; i > -1; --i)
              v_k_tilde -= U[i] * (viennacl::linalg::inner_prod(U[i], v_k_tilde) * gpu_scalar_2);

            v_k_tilde_temp = viennacl::linalg::prod(matrix, v_k_tilde);
            precond.apply(v_k_tilde_temp);
            v_k_tilde = v_k_tilde_temp;

            //Householder rotations part 2
            for (unsigned int i = 0; i < k; ++i)
              v_k_tilde -= U[i] * (viennacl::linalg::inner_prod(U[i], v_k_tilde) * gpu_scalar_2);
          }
          
          //std::cout << "v_k_tilde: " << v_k_tilde << std::endl;

          viennacl::traits::clear(U[k]);
          viennacl::traits::resize(U[k], problem_size);
          //copy first k entries from v_k_tilde to U[k]:
          gmres_copy_helper(v_k_tilde, U[k], k);
          
          U[k][k] = std::sqrt( viennacl::linalg::inner_prod(v_k_tilde, v_k_tilde) - viennacl::linalg::inner_prod(U[k], U[k]) );

          if (fabs(U[k][k]) < CPU_ScalarType(10 * std::numeric_limits<CPU_ScalarType>::epsilon()))
            break; //Note: Solution is essentially (up to round-off error) already in Krylov space. No need to proceed.
          
          //copy first k+1 entries from U[k] to R[k]
          gmres_copy_helper(U[k], R[k], k+1);
          
          U[k] -= v_k_tilde;
          //std::cout << "U[k] before normalization: " << U[k] << std::endl;
          U[k] *= gpu_scalar_minus_1 / viennacl::linalg::norm_2( U[k] );
          //std::cout << "Householder vector U[k]: " << U[k] << std::endl;
          
          //DEBUG: Make sure that P_k v_k_tilde equals (rho_{1,k}, ... , rho_{k,k}, 0, 0 )
#ifdef VIENNACL_GMRES_DEBUG
          std::cout << "P_k v_k_tilde: " << (v_k_tilde - 2.0 * U[k] * inner_prod(U[k], v_k_tilde)) << std::endl;
          std::cout << "R[k]: [" << R[k].size() << "](";
          for (size_t i=0; i<R[k].size(); ++i)
            std::cout << R[k][i] << ",";
          std::cout << ")" << std::endl;
#endif
          //std::cout << "P_k res: " << (res - 2.0 * U[k] * inner_prod(U[k], res)) << std::endl;
          res -= U[k] * (viennacl::linalg::inner_prod( U[k], res ) * gpu_scalar_2);
          //std::cout << "zeta_k: " << viennacl::linalg::inner_prod( U[k], res ) * gpu_scalar_2 << std::endl;
          //std::cout << "Updated res: " << res << std::endl;

#ifdef VIENNACL_GMRES_DEBUG
          VectorType v1(U[k].size()); v1.clear(); v1.resize(U[k].size());
          v1(0) = 1.0;
          v1 -= U[k] * (viennacl::linalg::inner_prod( U[k], v1 ) * gpu_scalar_2);
          std::cout << "v1: " << v1 << std::endl;
          boost::numeric::ublas::matrix<ScalarType> P = -2.0 * outer_prod(U[k], U[k]);
          P(0,0) += 1.0; P(1,1) += 1.0; P(2,2) += 1.0;
          std::cout << "P: " << P << std::endl;
#endif
          
          if (res[k] > rho) //machine precision reached
            res[k] = rho;

          if (res[k] < -1.0 * rho) //machine precision reached
            res[k] = -1.0 * rho;
          
          projection_rhs[k] = res[k];
          
          rho *= std::sin( std::acos(projection_rhs[k] / rho) );
          
#ifdef VIENNACL_GMRES_DEBUG
          std::cout << "k-th component of r: " << res[k] << std::endl;
          std::cout << "New rho (norm of res): " << rho << std::endl;
#endif        

          if (std::fabs(rho * rho_0 / norm_rhs) < tag.tolerance())
          {
            //std::cout << "Krylov space big enough" << endl;
            tag.error( std::fabs(rho*rho_0 / norm_rhs) );
            ++k;
            break;
          }
          
          //std::cout << "Current residual: " << rho * rho_0 << std::endl;
          //std::cout << " - End of Krylov space setup - " << std::endl;
        } // for k
        
#ifdef VIENNACL_GMRES_DEBUG
        //inplace solution of the upper triangular matrix:
        std::cout << "Upper triangular system:" << std::endl;
        std::cout << "Size of Krylov space: " << k << std::endl;
        for (size_t i=0; i<k; ++i)
        {
          for (size_t j=0; j<k; ++j)
          {
            std::cout << R[j][i] << ", ";
          }
          std::cout << " | " << projection_rhs[i] << std::endl;
        }
#endif        
        
        for (int i=k-1; i>-1; --i)
        {
          for (unsigned int j=i+1; j<k; ++j)
            //temp_rhs[i] -= R[i][j] * temp_rhs[j];   //if R is not transposed
            projection_rhs[i] -= R[j][i] * projection_rhs[j];     //R is transposed
            
          projection_rhs[i] /= R[i][i];
        }
        
#ifdef VIENNACL_GMRES_DEBUG
        std::cout << "Result of triangular solver: ";
        for (size_t i=0; i<k; ++i)
          std::cout << projection_rhs[i] << ", ";
        std::cout << std::endl;
#endif        
        res *= projection_rhs[0];
        
        if (k > 0)
        {
          for (unsigned int i = 0; i < k-1; ++i)
          {
            res[i] += projection_rhs[i+1];
          }
        }

        for (int i = k-1; i > -1; --i)
          res -= U[i] * (viennacl::linalg::inner_prod(U[i], res) * gpu_scalar_2);

        res *= rho_0;
        result += res;

        if ( std::fabs(rho*rho_0 / norm_rhs) < tag.tolerance() )
        {
          //std::cout << "Allowed Error reached at end of loop" << std::endl;
          tag.error(std::fabs(rho*rho_0 / norm_rhs));
          return result;
        }

        //res = rhs;
        //res -= viennacl::linalg::prod(matrix, result);
        //std::cout << "norm_2(r)=" << norm_2(r) << std::endl;
        //std::cout << "std::abs(rho*rho_0)=" << std::abs(rho*rho_0) << std::endl;
        //std::cout << r << std::endl; 

        tag.error(std::fabs(rho*rho_0));
      }

      return result;
    }
Beispiel #4
0
    VectorType solve(const MatrixType & matrix, VectorType const & rhs, bicgstab_tag const & tag, PreconditionerType const & precond)
    {
      typedef typename viennacl::result_of::value_type<VectorType>::type        ScalarType;
      typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type    CPU_ScalarType;
      VectorType result = rhs;
      viennacl::traits::clear(result);

      VectorType residual = rhs;
      VectorType r0star = residual;  //can be chosen arbitrarily in fact
      VectorType tmp0 = rhs;
      VectorType tmp1 = rhs;
      VectorType s = rhs;

      VectorType p = residual;

      CPU_ScalarType ip_rr0star = viennacl::linalg::norm_2(residual);
      CPU_ScalarType norm_rhs_host = viennacl::linalg::norm_2(residual);
      CPU_ScalarType beta;
      CPU_ScalarType alpha;
      CPU_ScalarType omega;
      CPU_ScalarType new_ip_rr0star = 0;
      CPU_ScalarType residual_norm = norm_rhs_host;

      if (norm_rhs_host == 0) //solution is zero if RHS norm is zero
        return result;

      bool restart_flag = true;
      vcl_size_t last_restart = 0;
      for (unsigned int i = 0; i < tag.max_iterations(); ++i)
      {
        if (restart_flag)
        {
          residual = rhs;
          residual -= viennacl::linalg::prod(matrix, result);
          precond.apply(residual);
          p = residual;
          r0star = residual;
          ip_rr0star = viennacl::linalg::norm_2(residual);
          ip_rr0star *= ip_rr0star;
          restart_flag = false;
          last_restart = i;
        }

        tag.iters(i+1);
        tmp0 = viennacl::linalg::prod(matrix, p);
        precond.apply(tmp0);
        alpha = ip_rr0star / viennacl::linalg::inner_prod(tmp0, r0star);

        s = residual - alpha*tmp0;

        tmp1 = viennacl::linalg::prod(matrix, s);
        precond.apply(tmp1);
        CPU_ScalarType norm_tmp1 = viennacl::linalg::norm_2(tmp1);
        omega = viennacl::linalg::inner_prod(tmp1, s) / (norm_tmp1 * norm_tmp1);

        result += alpha * p + omega * s;
        residual = s - omega * tmp1;

        residual_norm = viennacl::linalg::norm_2(residual);
        if (residual_norm / norm_rhs_host < tag.tolerance())
          break;

        new_ip_rr0star = viennacl::linalg::inner_prod(residual, r0star);

        beta = new_ip_rr0star / ip_rr0star * alpha/omega;
        ip_rr0star = new_ip_rr0star;

        if (ip_rr0star == 0 || omega == 0 || i - last_restart > tag.max_iterations_before_restart()) //search direction degenerate. A restart might help
          restart_flag = true;

        // Execution of
        //  p = residual + beta * (p - omega*tmp0);
        // without introducing temporary vectors:
        p -= omega * tmp0;
        p = residual + beta * p;

        //std::cout << "Rel. Residual in current step: " << std::sqrt(std::fabs(viennacl::linalg::inner_prod(residual, residual) / norm_rhs_host)) << std::endl;
      }

      //store last error estimate:
      tag.error(residual_norm / norm_rhs_host);

      return result;
    }