void copy(const CPU_MATRIX & cpu_matrix,
                  compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix )
 {
   if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 )
   {
     gpu_matrix.resize(static_cast<unsigned int>(cpu_matrix.size1()), static_cast<unsigned int>(cpu_matrix.size2()), false);
     
     //determine nonzeros:
     long num_entries = 0;
     for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
           row_it != cpu_matrix.end1();
           ++row_it)
     {
       unsigned int entries_per_row = 0;
       for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
             col_it != row_it.end();
             ++col_it)
       {
         ++entries_per_row;
       }
       num_entries += viennacl::tools::roundUpToNextMultiple<unsigned int>(entries_per_row, ALIGNMENT);
     }
     
     //std::cout << "CPU->GPU, Number of entries: " << num_entries << std::endl;
     
     //set up matrix entries:
     std::vector<unsigned int> row_buffer(cpu_matrix.size1() + 1);
     std::vector<unsigned int> col_buffer(num_entries);
     std::vector<SCALARTYPE> elements(num_entries);
     
     unsigned int row_index = 0;
     unsigned int data_index = 0;
     
     for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
           row_it != cpu_matrix.end1();
           ++row_it)
     {
       row_buffer[row_index] = data_index;
       ++row_index;
       
       for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
             col_it != row_it.end();
             ++col_it)
       {
         col_buffer[data_index] = static_cast<unsigned int>(col_it.index2());
         elements[data_index] = *col_it;
         ++data_index;
       }
       data_index = viennacl::tools::roundUpToNextMultiple<unsigned int>(data_index, ALIGNMENT); //take care of alignment
     }
     row_buffer[row_index] = data_index;
     
     /*gpu_matrix._row_buffer = viennacl::ocl::device().createMemory(CL_MEM_READ_WRITE, row_buffer);
     gpu_matrix._col_buffer = viennacl::ocl::device().createMemory(CL_MEM_READ_WRITE, col_buffer);
     gpu_matrix._elements = viennacl::ocl::device().createMemory(CL_MEM_READ_WRITE, elements);
     
     gpu_matrix._nonzeros = num_entries;*/
     gpu_matrix.set(&row_buffer[0], &col_buffer[0], &elements[0], static_cast<unsigned int>(cpu_matrix.size1()), num_entries);
   }
 }
    void inplace_solve(compressed_matrix<SCALARTYPE, MAT_ALIGNMENT> const & L, vector<SCALARTYPE, VEC_ALIGNMENT> & vec, viennacl::linalg::unit_lower_tag)
    {
      viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(viennacl::linalg::kernels::compressed_matrix<SCALARTYPE, MAT_ALIGNMENT>::program_name(), "lu_forward");
      unsigned int threads = k.local_work_size();

      k.global_work_size(k.local_work_size());
      viennacl::ocl::enqueue(k(L.handle1(), L.handle2(), L,
                                                              viennacl::ocl::local_mem(sizeof(int) * (threads+1)),
                                                              viennacl::ocl::local_mem(sizeof(SCALARTYPE) * threads),
                                                              vec, L.size1()));        
    }
Esempio n. 3
0
void ilu_transpose(compressed_matrix<NumericT> const & A,
                   compressed_matrix<NumericT>       & B)
{
  viennacl::context orig_ctx = viennacl::traits::context(A);
  viennacl::context cpu_ctx(viennacl::MAIN_MEMORY);
  (void)orig_ctx;
  (void)cpu_ctx;

  viennacl::compressed_matrix<NumericT> A_host(0, 0, 0, cpu_ctx);
  (void)A_host;

  switch (viennacl::traits::handle(A).get_active_handle_id())
  {
  case viennacl::MAIN_MEMORY:
    viennacl::linalg::host_based::ilu_transpose(A, B);
    break;
#ifdef VIENNACL_WITH_OPENCL
  case viennacl::OPENCL_MEMORY:
    A_host = A;
    B.switch_memory_context(cpu_ctx);
    viennacl::linalg::host_based::ilu_transpose(A_host, B);
    B.switch_memory_context(orig_ctx);
    break;
#endif
#ifdef VIENNACL_WITH_CUDA
  case viennacl::CUDA_MEMORY:
    A_host = A;
    B.switch_memory_context(cpu_ctx);
    viennacl::linalg::host_based::ilu_transpose(A_host, B);
    B.switch_memory_context(orig_ctx);
    break;
#endif
  case viennacl::MEMORY_NOT_INITIALIZED:
    throw memory_exception("not initialised!");
  default:
    throw memory_exception("not implemented");
  }
}
 void row_info(compressed_matrix<ScalarType, MAT_ALIGNMENT> const & mat,
               vector_base<ScalarType> & vec,
               viennacl::linalg::detail::row_info_types info_selector)
 {
   ScalarType const * ptr1 = detail::cuda_arg<ScalarType>(mat.handle().cuda_handle());
   ScalarType       * ptr2 = detail::cuda_arg<ScalarType>(vec);
   //csr_row_info_extractor_kernel<<<128, 128>>>(detail::cuda_arg<unsigned int>(mat.handle1().cuda_handle()),
   //                                            detail::cuda_arg<unsigned int>(mat.handle2().cuda_handle()),
   //                                            detail::cuda_arg<ScalarType>(mat.handle().cuda_handle()),
   //                                            detail::cuda_arg<ScalarType>(vec),
   //                                            static_cast<unsigned int>(mat.size1()),
   //                                           static_cast<unsigned int>(info_selector)
   //                                           );
   VIENNACL_CUDA_LAST_ERROR_CHECK("csr_row_info_extractor_kernel");
 }
 void copy(const compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix,
                  CPU_MATRIX & cpu_matrix )
 {
   if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 )
   {
     cpu_matrix.resize(gpu_matrix.size1(), gpu_matrix.size2());
     
     //get raw data from memory:
     std::vector<unsigned int> row_buffer(gpu_matrix.size1() + 1);
     std::vector<unsigned int> col_buffer(gpu_matrix.nnz());
     std::vector<SCALARTYPE> elements(gpu_matrix.nnz());
     
     //std::cout << "GPU->CPU, nonzeros: " << gpu_matrix.nnz() << std::endl;
     
     cl_int err;
     err = clEnqueueReadBuffer(viennacl::ocl::device().queue().get(), gpu_matrix.handle1().get(), CL_TRUE, 0, sizeof(unsigned int)*(gpu_matrix.size1() + 1), &(row_buffer[0]), 0, NULL, NULL);
     CL_ERR_CHECK(err);
     err = clEnqueueReadBuffer(viennacl::ocl::device().queue().get(), gpu_matrix.handle2().get(), CL_TRUE, 0, sizeof(unsigned int)*gpu_matrix.nnz(), &(col_buffer[0]), 0, NULL, NULL);
     CL_ERR_CHECK(err);
     err = clEnqueueReadBuffer(viennacl::ocl::device().queue().get(), gpu_matrix.handle().get(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL);
     CL_ERR_CHECK(err);
     viennacl::ocl::finish();
     
     //fill the cpu_matrix:
     unsigned int data_index = 0;
     for (unsigned int row = 1; row <= gpu_matrix.size1(); ++row)
     {
       while (data_index < row_buffer[row])
       {
         if (col_buffer[data_index] >= gpu_matrix.size1())
         {
           std::cerr << "ViennaCL encountered invalid data at colbuffer[" << data_index << "]: " << col_buffer[data_index] << std::endl;
           return;
         }
         
         if (elements[data_index] != static_cast<SCALARTYPE>(0.0))
           cpu_matrix(row-1, col_buffer[data_index]) = elements[data_index];
         ++data_index;
       }
     }
   }
 }