void copy(const CPU_MATRIX & cpu_matrix,
                  compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix )
 {
   if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 )
   {
     gpu_matrix.resize(static_cast<unsigned int>(cpu_matrix.size1()), static_cast<unsigned int>(cpu_matrix.size2()), false);
     
     //determine nonzeros:
     long num_entries = 0;
     for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
           row_it != cpu_matrix.end1();
           ++row_it)
     {
       unsigned int entries_per_row = 0;
       for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
             col_it != row_it.end();
             ++col_it)
       {
         ++entries_per_row;
       }
       num_entries += viennacl::tools::roundUpToNextMultiple<unsigned int>(entries_per_row, ALIGNMENT);
     }
     
     //std::cout << "CPU->GPU, Number of entries: " << num_entries << std::endl;
     
     //set up matrix entries:
     std::vector<unsigned int> row_buffer(cpu_matrix.size1() + 1);
     std::vector<unsigned int> col_buffer(num_entries);
     std::vector<SCALARTYPE> elements(num_entries);
     
     unsigned int row_index = 0;
     unsigned int data_index = 0;
     
     for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
           row_it != cpu_matrix.end1();
           ++row_it)
     {
       row_buffer[row_index] = data_index;
       ++row_index;
       
       for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
             col_it != row_it.end();
             ++col_it)
       {
         col_buffer[data_index] = static_cast<unsigned int>(col_it.index2());
         elements[data_index] = *col_it;
         ++data_index;
       }
       data_index = viennacl::tools::roundUpToNextMultiple<unsigned int>(data_index, ALIGNMENT); //take care of alignment
     }
     row_buffer[row_index] = data_index;
     
     /*gpu_matrix._row_buffer = viennacl::ocl::device().createMemory(CL_MEM_READ_WRITE, row_buffer);
     gpu_matrix._col_buffer = viennacl::ocl::device().createMemory(CL_MEM_READ_WRITE, col_buffer);
     gpu_matrix._elements = viennacl::ocl::device().createMemory(CL_MEM_READ_WRITE, elements);
     
     gpu_matrix._nonzeros = num_entries;*/
     gpu_matrix.set(&row_buffer[0], &col_buffer[0], &elements[0], static_cast<unsigned int>(cpu_matrix.size1()), num_entries);
   }
 }
ScalarType diff(ublas::compressed_matrix<ScalarType> & cpu_matrix, VCL_MATRIX & gpu_matrix)
{
  typedef ublas::compressed_matrix<ScalarType>  CPU_MATRIX;
  CPU_MATRIX from_gpu;

  viennacl::backend::finish();
  viennacl::copy(gpu_matrix, from_gpu);

  ScalarType error = 0;

  //step 1: compare all entries from cpu_matrix with gpu_matrix:
  //std::cout << "Ublas matrix: " << std::endl;
  for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
        row_it != cpu_matrix.end1();
        ++row_it)
  {
    //std::cout << "Row " << row_it.index1() << ": " << std::endl;
    for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
          col_it != row_it.end();
          ++col_it)
    {
      //std::cout << "(" << col_it.index2() << ", " << *col_it << std::endl;
      ScalarType current_error = 0;

      if ( std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())),
                      std::fabs(from_gpu(col_it.index1(), col_it.index2()))   ) > 0 )
        current_error = std::fabs(cpu_matrix(col_it.index1(), col_it.index2()) - from_gpu(col_it.index1(), col_it.index2()))
                          / std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())),
                                      std::fabs(from_gpu(col_it.index1(), col_it.index2()))   );
      if (current_error > error)
        error = current_error;
    }
  }

  //step 2: compare all entries from gpu_matrix with cpu_matrix (sparsity pattern might differ):
  //std::cout << "ViennaCL matrix: " << std::endl;
  for (typename CPU_MATRIX::const_iterator1 row_it = from_gpu.begin1();
        row_it != from_gpu.end1();
        ++row_it)
  {
    //std::cout << "Row " << row_it.index1() << ": " << std::endl;
    for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
          col_it != row_it.end();
          ++col_it)
    {
      //std::cout << "(" << col_it.index2() << ", " << *col_it << std::endl;
      ScalarType current_error = 0;

      if ( std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())),
                      std::fabs(from_gpu(col_it.index1(), col_it.index2()))   ) > 0 )
        current_error = std::fabs(cpu_matrix(col_it.index1(), col_it.index2()) - from_gpu(col_it.index1(), col_it.index2()))
                          / std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())),
                                      std::fabs(from_gpu(col_it.index1(), col_it.index2()))   );
      if (current_error > error)
        error = current_error;
    }
  }

  return error;
}
Beispiel #3
0
ScalarType diff(ublas::compressed_matrix<ScalarType> & cpu_matrix, VCL_MATRIX & gpu_matrix)
{
  typedef ublas::compressed_matrix<ScalarType>  CPU_MATRIX;
   CPU_MATRIX from_gpu;
   
   copy(gpu_matrix, from_gpu);

   ScalarType error = 0;
   
   //step 1: compare all entries from cpu_matrix with gpu_matrix:
    for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1();
          row_it != cpu_matrix.end1();
          ++row_it)
    {
      for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
            col_it != row_it.end();
            ++col_it)
      {
        ScalarType current_error = 0;
        
        if ( std::max( fabs(cpu_matrix(col_it.index1(), col_it.index2())), 
                       fabs(from_gpu(col_it.index1(), col_it.index2()))   ) > 0 )
          current_error = fabs(cpu_matrix(col_it.index1(), col_it.index2()) - from_gpu(col_it.index1(), col_it.index2())) 
                            / std::max( fabs(cpu_matrix(col_it.index1(), col_it.index2())), 
                                        fabs(from_gpu(col_it.index1(), col_it.index2()))   );
        if (current_error > error)
          error = current_error;
      }
    }

   //step 2: compare all entries from gpu_matrix with cpu_matrix (sparsity pattern might differ):
    for (typename CPU_MATRIX::const_iterator1 row_it = from_gpu.begin1();
          row_it != from_gpu.end1();
          ++row_it)
    {
      for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin();
            col_it != row_it.end();
            ++col_it)
      {
        ScalarType current_error = 0;
        
        if ( std::max( fabs(cpu_matrix(col_it.index1(), col_it.index2())), 
                       fabs(from_gpu(col_it.index1(), col_it.index2()))   ) > 0 )
          current_error = fabs(cpu_matrix(col_it.index1(), col_it.index2()) - from_gpu(col_it.index1(), col_it.index2())) 
                            / std::max( fabs(cpu_matrix(col_it.index1(), col_it.index2())), 
                                        fabs(from_gpu(col_it.index1(), col_it.index2()))   );
        if (current_error > error)
          error = current_error;
      }
    }

   return error;
}