예제 #1
0
/** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'dst_buffer + dst_offset'.
 *
 *  @param src_buffer     A smart pointer to the begin of an allocated buffer
 *  @param dst_buffer     A smart pointer to the end of an allocated buffer
 *  @param src_offset     Offset of the first byte to be written from the address given by 'src_buffer' (in bytes)
 *  @param dst_offset     Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes)
 *  @param bytes_to_copy  Number of bytes to be copied
 */
inline void memory_copy(handle_type const & src_buffer,
                        handle_type & dst_buffer,
                        vcl_size_t src_offset,
                        vcl_size_t dst_offset,
                        vcl_size_t bytes_to_copy)
{
  assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));
  assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));

#ifdef VIENNACL_WITH_OPENMP
  #pragma omp parallel for
#endif
  for (long i=0; i<long(bytes_to_copy); ++i)
    dst_buffer.get()[vcl_size_t(i)+dst_offset] = src_buffer.get()[vcl_size_t(i) + src_offset];
}
예제 #2
0
파일: size.hpp 프로젝트: a9raag/mahout
vcl_size_t size(vector_expression<LHS, const int, op_matrix_diag> const & proxy)
{
  int k = proxy.rhs();
  int A_size1 = static_cast<int>(size1(proxy.lhs()));
  int A_size2 = static_cast<int>(size2(proxy.lhs()));

  int row_depth = std::min(A_size1, A_size1 + k);
  int col_depth = std::min(A_size2, A_size2 - k);

  return vcl_size_t(std::min(row_depth, col_depth));
}
예제 #3
0
    void copy_to_complex_array(std::complex<NumericT> * input_complex,
                               viennacl::vector<NumericT, AlignmentV> const & in, vcl_size_t size)
    {
#ifdef VIENNACL_WITH_OPENMP
      #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
#endif
      for (long i2 = 0; i2 < long(size * 2); i2 += 2)
      { //change array to complex array
        vcl_size_t i = vcl_size_t(i2);
        input_complex[i / 2] = std::complex<NumericT>(in[i], in[i + 1]);
      }
    }
예제 #4
0
    void zero2(NumericT *input1, NumericT *input2, vcl_size_t size)
    {
#ifdef VIENNACL_WITH_OPENMP
      #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
#endif
      for (long i2 = 0; i2 < long(size); i2++)
      {
        vcl_size_t i = vcl_size_t(i2);
        input1[i] = 0;
        input2[i] = 0;
      }
    }
예제 #5
0
    void copy_to_vector(std::complex<NumericT> * input_complex, NumericT * in, vcl_size_t size)
    {
#ifdef VIENNACL_WITH_OPENMP
      #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
#endif
      for (long i2 = 0; i2 < long(size); i2++)
      {
        vcl_size_t i = vcl_size_t(i2);
        in[i * 2]     = static_cast<NumericT>(std::real(input_complex[i]));
        in[i * 2 + 1] = static_cast<NumericT>(std::imag(input_complex[i]));
      }
    }
예제 #6
0
/** @brief Reads data from a buffer back to main RAM.
 *
 * @param src_buffer         A smart pointer to the beginning of an allocated source buffer
 * @param src_offset         Offset of the first byte to be read from the beginning of src_buffer (in bytes_
 * @param bytes_to_copy      Number of bytes to be read
 * @param ptr                Location in main RAM where to read data should be written to
 */
inline void memory_read(handle_type const & src_buffer,
                        vcl_size_t src_offset,
                        vcl_size_t bytes_to_copy,
                        void * ptr,
                        bool /*async*/)
{
  assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));

#ifdef VIENNACL_WITH_OPENMP
  #pragma omp parallel for
#endif
  for (long i=0; i<long(bytes_to_copy); ++i)
    static_cast<char *>(ptr)[i] = src_buffer.get()[vcl_size_t(i)+src_offset];
}
예제 #7
0
/** @brief Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer'
 *
 * @param dst_buffer    A smart pointer to the beginning of an allocated buffer
 * @param dst_offset    Offset of the first written byte from the beginning of 'dst_buffer' (in bytes)
 * @param bytes_to_copy Number of bytes to be copied
 * @param ptr           Pointer to the first byte to be written
 */
inline void memory_write(handle_type & dst_buffer,
                         vcl_size_t dst_offset,
                         vcl_size_t bytes_to_copy,
                         const void * ptr,
                         bool /*async*/)
{
  assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));

#ifdef VIENNACL_WITH_OPENMP
  #pragma omp parallel for
#endif
  for (long i=0; i<long(bytes_to_copy); ++i)
    dst_buffer.get()[vcl_size_t(i)+dst_offset] = static_cast<const char *>(ptr)[i];
}
예제 #8
0
    void copy_to_vector(std::complex<NumericT> * input_complex,
                        viennacl::vector_base<NumericT> & in, vcl_size_t size)
    {
      std::vector<NumericT> temp(2 * size);
#ifdef VIENNACL_WITH_OPENMP
      #pragma omp parallel for if (size > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
#endif
      for (long i2 = 0; i2 < long(size); i2++)
      {
        vcl_size_t i = vcl_size_t(i2);
        temp[i * 2]     = static_cast<NumericT>(std::real(input_complex[i]));
        temp[i * 2 + 1] = static_cast<NumericT>(std::imag(input_complex[i]));
      }
      viennacl::copy(temp, in);
    }
예제 #9
0
파일: svd.hpp 프로젝트: 10imaging/viennacl
      void svd_qr_shift(MatrixType & vcl_u,
                        MatrixType & vcl_v,
                        CPU_VectorType & q,
                        CPU_VectorType & e)
      {
        typedef typename MatrixType::value_type                                   ScalarType;
        typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type    CPU_ScalarType;

        vcl_size_t n = q.size();
        int m = static_cast<int>(vcl_u.size1());

        detail::transpose(vcl_u);
        detail::transpose(vcl_v);

        std::vector<CPU_ScalarType> signs_v(n, 1);
        std::vector<CPU_ScalarType> cs1(n), ss1(n), cs2(n), ss2(n);

        viennacl::vector<CPU_ScalarType> tmp1(n, viennacl::traits::context(vcl_u)), tmp2(n, viennacl::traits::context(vcl_u));

        bool goto_test_conv = false;

        for (int k = static_cast<int>(n) - 1; k >= 0; k--)
        {
          // std::cout << "K = " << k << std::endl;

          vcl_size_t iter = 0;
          for (iter = 0; iter < detail::ITER_MAX; iter++)
          {
            // test for split
            int l;
            for (l = k; l >= 0; l--)
            {
              goto_test_conv = false;
              if (std::fabs(e[vcl_size_t(l)]) <= detail::EPS)
              {
                // set it
                goto_test_conv = true;
                break;
              }

              if (std::fabs(q[vcl_size_t(l) - 1]) <= detail::EPS)
              {
                // goto
                break;
              }
            }

            if (!goto_test_conv)
            {
              CPU_ScalarType c = 0.0;
              CPU_ScalarType s = 1.0;

              //int l1 = l - 1;
              //int l2 = k;

              for (int i = l; i <= k; i++)
              {
                CPU_ScalarType f = s * e[vcl_size_t(i)];
                e[vcl_size_t(i)] = c * e[vcl_size_t(i)];

                if (std::fabs(f) <= detail::EPS)
                {
                  //l2 = i - 1;
                  break;
                }

                CPU_ScalarType g = q[vcl_size_t(i)];
                CPU_ScalarType h = detail::pythag(f, g);
                q[vcl_size_t(i)] = h;
                c = g / h;
                s = -f / h;

                cs1[vcl_size_t(i)] = c;
                ss1[vcl_size_t(i)] = s;
              }

              // std::cout << "Hitted!" << l1 << " " << l2 << "\n";

              // for (int i = l; i <= l2; i++)
              // {
              //   for (int j = 0; j < m; j++)
              //   {
              //     CPU_ScalarType y = u(j, l1);
              //     CPU_ScalarType z = u(j, i);
              //     u(j, l1) = y * cs1[i] + z * ss1[i];
              //     u(j, i) = -y * ss1[i] + z * cs1[i];
              //   }
              // }
            }

            CPU_ScalarType z = q[vcl_size_t(k)];

            if (l == k)
            {
              if (z < 0)
              {
                q[vcl_size_t(k)] = -z;

                signs_v[vcl_size_t(k)] *= -1;
              }

              break;
            }

            if (iter >= detail::ITER_MAX - 1)
              break;

            CPU_ScalarType x = q[vcl_size_t(l)];
            CPU_ScalarType y = q[vcl_size_t(k) - 1];
            CPU_ScalarType g = e[vcl_size_t(k) - 1];
            CPU_ScalarType h = e[vcl_size_t(k)];
            CPU_ScalarType f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2 * h * y);

            g = detail::pythag<CPU_ScalarType>(f, 1);

            if (f < 0) {
              f = ((x - z) * (x + z) + h * (y / (f - g) - h)) / x;
            } else {
              f = ((x - z) * (x + z) + h * (y / (f + g) - h)) / x;
            }

            CPU_ScalarType c = 1;
            CPU_ScalarType s = 1;

            for (vcl_size_t i = static_cast<vcl_size_t>(l) + 1; i <= static_cast<vcl_size_t>(k); i++)
            {
              g = e[i];
              y = q[i];
              h = s * g;
              g = c * g;
              CPU_ScalarType z2 = detail::pythag(f, h);
              e[i - 1] = z2;
              c = f / z2;
              s = h / z2;
              f = x * c + g * s;
              g = -x * s + g * c;
              h = y * s;
              y = y * c;

              cs1[i] = c;
              ss1[i] = s;

              z2 = detail::pythag(f, h);
              q[i - 1] = z2;
              c = f / z2;
              s = h / z2;
              f = c * g + s * y;
              x = -s * g + c * y;

              cs2[i] = c;
              ss2[i] = s;
            }

            {
              viennacl::copy(cs1, tmp1);
              viennacl::copy(ss1, tmp2);

              givens_prev(vcl_v, tmp1, tmp2, static_cast<int>(n), l, k);
            }

            {
              viennacl::copy(cs2, tmp1);
              viennacl::copy(ss2, tmp2);

              givens_prev(vcl_u, tmp1, tmp2, m, l, k);
            }

            e[vcl_size_t(l)] = 0.0;
            e[vcl_size_t(k)] = f;
            q[vcl_size_t(k)] = x;
          }

        }


        viennacl::copy(signs_v, tmp1);
        change_signs(vcl_v, tmp1, static_cast<int>(n));

        // transpose singular matrices again
        detail::transpose(vcl_u);
        detail::transpose(vcl_v);
      }
예제 #10
0
  void tql1(vcl_size_t n,
            VectorType & d,
            VectorType & e)
  {
      for (vcl_size_t i = 1; i < n; i++)
          e[i - 1] = e[i];


      e[n - 1] = 0;

      SCALARTYPE f = 0.;
      SCALARTYPE tst1 = 0.;
      SCALARTYPE eps = static_cast<SCALARTYPE>(1e-6);


      for (vcl_size_t l = 0; l < n; l++)
      {
          // Find small subdiagonal element.
          tst1 = std::max<SCALARTYPE>(tst1, std::fabs(d[l]) + std::fabs(e[l]));
          vcl_size_t m = l;
          while (m < n)
          {
              if (std::fabs(e[m]) <= eps * tst1)
                  break;
              m++;
          }

          // If m == l, d[l) is an eigenvalue, otherwise, iterate.
          if (m > l)
          {
              vcl_size_t iter = 0;
              do
              {
                  iter = iter + 1;  // (Could check iteration count here.)

                  // Compute implicit shift
                  SCALARTYPE g = d[l];
                  SCALARTYPE p = (d[l + 1] - g) / (2 * e[l]);
                  SCALARTYPE r = viennacl::linalg::detail::pythag<SCALARTYPE>(p, 1);
                  if (p < 0)
                  {
                      r = -r;
                  }

                  d[l] = e[l] / (p + r);
                  d[l + 1] = e[l] * (p + r);
                  SCALARTYPE h = g - d[l];
                  for (vcl_size_t i = l + 2; i < n; i++)
                  {
                      d[i] -= h;
                  }

                  f = f + h;

                  // Implicit QL transformation.
                  p = d[m];
                  SCALARTYPE c = 1;
                  SCALARTYPE s = 0;
                  for (int i = int(m - 1); i >= int(l); i--)
                  {
                      g = c * e[vcl_size_t(i)];
                      h = c * p;
                      r = viennacl::linalg::detail::pythag<SCALARTYPE>(p, e[vcl_size_t(i)]);
                      e[vcl_size_t(i) + 1] = s * r;
                      s = e[vcl_size_t(i)] / r;
                      c = p / r;
                      p = c * d[vcl_size_t(i)] - s * g;
                      d[vcl_size_t(i) + 1] = h + s * (c * g + s * d[vcl_size_t(i)]);
                  }
                  e[l] = s * p;
                  d[l] = c * p;
              // Check for convergence.
              }
              while (std::fabs(e[l]) > eps * tst1);
          }
          d[l] = d[l] + f;
          e[l] = 0;
      }
  }
예제 #11
0
void tql2(matrix_base<SCALARTYPE, F> & Q,
          VectorType & d,
          VectorType & e)
{
    vcl_size_t n = static_cast<vcl_size_t>(viennacl::traits::size1(Q));

    //boost::numeric::ublas::vector<SCALARTYPE> cs(n), ss(n);
    std::vector<SCALARTYPE> cs(n), ss(n);
    viennacl::vector<SCALARTYPE> tmp1(n), tmp2(n);

    for (vcl_size_t i = 1; i < n; i++)
        e[i - 1] = e[i];

    e[n - 1] = 0;

    SCALARTYPE f = 0;
    SCALARTYPE tst1 = 0;
    SCALARTYPE eps = static_cast<SCALARTYPE>(viennacl::linalg::detail::EPS);

    for (vcl_size_t l = 0; l < n; l++)
    {
        // Find small subdiagonal element.
        tst1 = std::max<SCALARTYPE>(tst1, std::fabs(d[l]) + std::fabs(e[l]));
        vcl_size_t m = l;
        while (m < n)
        {
            if (std::fabs(e[m]) <= eps * tst1)
                break;
            m++;
        }

        // If m == l, d[l) is an eigenvalue, otherwise, iterate.
        if (m > l)
        {
            vcl_size_t iter = 0;
            do
            {
                iter = iter + 1;  // (Could check iteration count here.)

                // Compute implicit shift
                SCALARTYPE g = d[l];
                SCALARTYPE p = (d[l + 1] - g) / (2 * e[l]);
                SCALARTYPE r = viennacl::linalg::detail::pythag<SCALARTYPE>(p, 1);
                if (p < 0)
                {
                    r = -r;
                }

                d[l] = e[l] / (p + r);
                d[l + 1] = e[l] * (p + r);
                SCALARTYPE dl1 = d[l + 1];
                SCALARTYPE h = g - d[l];
                for (vcl_size_t i = l + 2; i < n; i++)
                {
                    d[i] -= h;
                }

                f = f + h;

                // Implicit QL transformation.
                p = d[m];
                SCALARTYPE c = 1;
                SCALARTYPE c2 = c;
                SCALARTYPE c3 = c;
                SCALARTYPE el1 = e[l + 1];
                SCALARTYPE s = 0;
                SCALARTYPE s2 = 0;
                for (int i = int(m - 1); i >= int(l); i--)
                {
                    c3 = c2;
                    c2 = c;
                    s2 = s;
                    g = c * e[vcl_size_t(i)];
                    h = c * p;
                    r = viennacl::linalg::detail::pythag(p, e[vcl_size_t(i)]);
                    e[vcl_size_t(i) + 1] = s * r;
                    s = e[vcl_size_t(i)] / r;
                    c = p / r;
                    p = c * d[vcl_size_t(i)] - s * g;
                    d[vcl_size_t(i) + 1] = h + s * (c * g + s * d[vcl_size_t(i)]);


                    cs[vcl_size_t(i)] = c;
                    ss[vcl_size_t(i)] = s;
                }


                p = -s * s2 * c3 * el1 * e[l] / dl1;
                e[l] = s * p;
                d[l] = c * p;

                viennacl::copy(cs, tmp1);
                viennacl::copy(ss, tmp2);

                viennacl::linalg::givens_next(Q, tmp1, tmp2, int(l), int(m));

                // Check for convergence.
            }
            while (std::fabs(e[l]) > eps * tst1);
        }
        d[l] = d[l] + f;
        e[l] = 0;
    }

    // Sort eigenvalues and corresponding vectors.
/*
       for (int i = 0; i < n-1; i++) {
          int k = i;
          SCALARTYPE p = d[i];
          for (int j = i+1; j < n; j++) {
             if (d[j] > p) {
                k = j;
                p = d[j);
             }
          }
          if (k != i) {
             d[k] = d[i];
             d[i] = p;
             for (int j = 0; j < n; j++) {
                p = Q(j, i);
                Q(j, i) = Q(j, k);
                Q(j, k) = p;
             }
          }
       }

*/

}
예제 #12
0
std::vector<
        typename viennacl::result_of::cpu_value_type<typename VectorT::value_type>::type
        >
bisect(VectorT const & alphas, VectorT const & betas)
{
  typedef typename viennacl::result_of::value_type<VectorT>::type           NumericType;
  typedef typename viennacl::result_of::cpu_value_type<NumericType>::type   CPU_NumericType;

  vcl_size_t size = betas.size();
  std::vector<CPU_NumericType>  x_temp(size);


  std::vector<CPU_NumericType> beta_bisect;
  std::vector<CPU_NumericType> wu;

  double rel_error = std::numeric_limits<CPU_NumericType>::epsilon();
  beta_bisect.push_back(0);

  for (vcl_size_t i = 1; i < size; i++)
    beta_bisect.push_back(betas[i] * betas[i]);

  double xmin = alphas[size - 1] - std::fabs(betas[size - 1]);
  double xmax = alphas[size - 1] + std::fabs(betas[size - 1]);

  for (vcl_size_t i = 0; i < size - 1; i++)
  {
    double h = std::fabs(betas[i]) + std::fabs(betas[i + 1]);
    if (alphas[i] + h > xmax)
      xmax = alphas[i] + h;
    if (alphas[i] - h < xmin)
      xmin = alphas[i] - h;
  }


  double eps1 = 1e-6;
  /*double eps2 = (xmin + xmax > 0) ? (rel_error * xmax) : (-rel_error * xmin);
  if (eps1 <= 0)
    eps1 = eps2;
  else
    eps2 = 0.5 * eps1 + 7.0 * eps2; */

  double x0 = xmax;

  for (vcl_size_t i = 0; i < size; i++)
  {
    x_temp[i] = xmax;
    wu.push_back(xmin);
  }

  for (long k = static_cast<long>(size) - 1; k >= 0; --k)
  {
    double xu = xmin;
    for (long i = k; i >= 0; --i)
    {
      if (xu < wu[vcl_size_t(k-i)])
      {
        xu = wu[vcl_size_t(i)];
        break;
      }
    }

    if (x0 > x_temp[vcl_size_t(k)])
      x0 = x_temp[vcl_size_t(k)];

    double x1 = (xu + x0) / 2.0;
    while (x0 - xu > 2.0 * rel_error * (std::fabs(xu) + std::fabs(x0)) + eps1)
    {
      vcl_size_t a = 0;
      double q = 1;
      for (vcl_size_t i = 0; i < size; i++)
      {
        if (q > 0 || q < 0)
          q = alphas[i] - x1 - beta_bisect[i] / q;
        else
          q = alphas[i] - x1 - std::fabs(betas[i] / rel_error);

        if (q < 0)
          a++;
      }

      if (a <= static_cast<vcl_size_t>(k))
      {
        xu = x1;
        if (a < 1)
          wu[0] = x1;
        else
        {
          wu[a] = x1;
          if (x_temp[a - 1] > x1)
              x_temp[a - 1] = x1;
        }
      }
      else
        x0 = x1;

      x1 = (xu + x0) / 2.0;
    }
    x_temp[vcl_size_t(k)] = x1;
  }
  return x_temp;
}