void copy(const CPU_MATRIX & cpu_matrix, compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix ) { if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 ) { gpu_matrix.resize(static_cast<unsigned int>(cpu_matrix.size1()), static_cast<unsigned int>(cpu_matrix.size2()), false); //determine nonzeros: long num_entries = 0; for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) { unsigned int entries_per_row = 0; for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { ++entries_per_row; } num_entries += viennacl::tools::roundUpToNextMultiple<unsigned int>(entries_per_row, ALIGNMENT); } //std::cout << "CPU->GPU, Number of entries: " << num_entries << std::endl; //set up matrix entries: std::vector<unsigned int> row_buffer(cpu_matrix.size1() + 1); std::vector<unsigned int> col_buffer(num_entries); std::vector<SCALARTYPE> elements(num_entries); unsigned int row_index = 0; unsigned int data_index = 0; for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) { row_buffer[row_index] = data_index; ++row_index; for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { col_buffer[data_index] = static_cast<unsigned int>(col_it.index2()); elements[data_index] = *col_it; ++data_index; } data_index = viennacl::tools::roundUpToNextMultiple<unsigned int>(data_index, ALIGNMENT); //take care of alignment } row_buffer[row_index] = data_index; /*gpu_matrix._row_buffer = viennacl::ocl::device().createMemory(CL_MEM_READ_WRITE, row_buffer); gpu_matrix._col_buffer = viennacl::ocl::device().createMemory(CL_MEM_READ_WRITE, col_buffer); gpu_matrix._elements = viennacl::ocl::device().createMemory(CL_MEM_READ_WRITE, elements); gpu_matrix._nonzeros = num_entries;*/ gpu_matrix.set(&row_buffer[0], &col_buffer[0], &elements[0], static_cast<unsigned int>(cpu_matrix.size1()), num_entries); } }
ScalarType diff(ublas::compressed_matrix<ScalarType> & cpu_matrix, VCL_MATRIX & gpu_matrix) { typedef ublas::compressed_matrix<ScalarType> CPU_MATRIX; CPU_MATRIX from_gpu; viennacl::backend::finish(); viennacl::copy(gpu_matrix, from_gpu); ScalarType error = 0; //step 1: compare all entries from cpu_matrix with gpu_matrix: //std::cout << "Ublas matrix: " << std::endl; for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) { //std::cout << "Row " << row_it.index1() << ": " << std::endl; for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { //std::cout << "(" << col_it.index2() << ", " << *col_it << std::endl; ScalarType current_error = 0; if ( std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())), std::fabs(from_gpu(col_it.index1(), col_it.index2())) ) > 0 ) current_error = std::fabs(cpu_matrix(col_it.index1(), col_it.index2()) - from_gpu(col_it.index1(), col_it.index2())) / std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())), std::fabs(from_gpu(col_it.index1(), col_it.index2())) ); if (current_error > error) error = current_error; } } //step 2: compare all entries from gpu_matrix with cpu_matrix (sparsity pattern might differ): //std::cout << "ViennaCL matrix: " << std::endl; for (typename CPU_MATRIX::const_iterator1 row_it = from_gpu.begin1(); row_it != from_gpu.end1(); ++row_it) { //std::cout << "Row " << row_it.index1() << ": " << std::endl; for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { //std::cout << "(" << col_it.index2() << ", " << *col_it << std::endl; ScalarType current_error = 0; if ( std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())), std::fabs(from_gpu(col_it.index1(), col_it.index2())) ) > 0 ) current_error = std::fabs(cpu_matrix(col_it.index1(), col_it.index2()) - from_gpu(col_it.index1(), col_it.index2())) / std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())), std::fabs(from_gpu(col_it.index1(), col_it.index2())) ); if (current_error > error) error = current_error; } } return error; }
ScalarType diff(ublas::compressed_matrix<ScalarType> & cpu_matrix, VCL_MATRIX & gpu_matrix) { typedef ublas::compressed_matrix<ScalarType> CPU_MATRIX; CPU_MATRIX from_gpu; copy(gpu_matrix, from_gpu); ScalarType error = 0; //step 1: compare all entries from cpu_matrix with gpu_matrix: for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) { for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { ScalarType current_error = 0; if ( std::max( fabs(cpu_matrix(col_it.index1(), col_it.index2())), fabs(from_gpu(col_it.index1(), col_it.index2())) ) > 0 ) current_error = fabs(cpu_matrix(col_it.index1(), col_it.index2()) - from_gpu(col_it.index1(), col_it.index2())) / std::max( fabs(cpu_matrix(col_it.index1(), col_it.index2())), fabs(from_gpu(col_it.index1(), col_it.index2())) ); if (current_error > error) error = current_error; } } //step 2: compare all entries from gpu_matrix with cpu_matrix (sparsity pattern might differ): for (typename CPU_MATRIX::const_iterator1 row_it = from_gpu.begin1(); row_it != from_gpu.end1(); ++row_it) { for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { ScalarType current_error = 0; if ( std::max( fabs(cpu_matrix(col_it.index1(), col_it.index2())), fabs(from_gpu(col_it.index1(), col_it.index2())) ) > 0 ) current_error = fabs(cpu_matrix(col_it.index1(), col_it.index2()) - from_gpu(col_it.index1(), col_it.index2())) / std::max( fabs(cpu_matrix(col_it.index1(), col_it.index2())), fabs(from_gpu(col_it.index1(), col_it.index2())) ); if (current_error > error) error = current_error; } } return error; }