ScalarType diff(ublas::compressed_matrix<ScalarType> & cpu_matrix, VCL_MATRIX & gpu_matrix) { typedef ublas::compressed_matrix<ScalarType> CPU_MATRIX; CPU_MATRIX from_gpu; viennacl::backend::finish(); viennacl::copy(gpu_matrix, from_gpu); ScalarType error = 0; //step 1: compare all entries from cpu_matrix with gpu_matrix: //std::cout << "Ublas matrix: " << std::endl; for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) { //std::cout << "Row " << row_it.index1() << ": " << std::endl; for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { //std::cout << "(" << col_it.index2() << ", " << *col_it << std::endl; ScalarType current_error = 0; if ( std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())), std::fabs(from_gpu(col_it.index1(), col_it.index2())) ) > 0 ) current_error = std::fabs(cpu_matrix(col_it.index1(), col_it.index2()) - from_gpu(col_it.index1(), col_it.index2())) / std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())), std::fabs(from_gpu(col_it.index1(), col_it.index2())) ); if (current_error > error) error = current_error; } } //step 2: compare all entries from gpu_matrix with cpu_matrix (sparsity pattern might differ): //std::cout << "ViennaCL matrix: " << std::endl; for (typename CPU_MATRIX::const_iterator1 row_it = from_gpu.begin1(); row_it != from_gpu.end1(); ++row_it) { //std::cout << "Row " << row_it.index1() << ": " << std::endl; for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { //std::cout << "(" << col_it.index2() << ", " << *col_it << std::endl; ScalarType current_error = 0; if ( std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())), std::fabs(from_gpu(col_it.index1(), col_it.index2())) ) > 0 ) current_error = std::fabs(cpu_matrix(col_it.index1(), col_it.index2()) - from_gpu(col_it.index1(), col_it.index2())) / std::max( std::fabs(cpu_matrix(col_it.index1(), col_it.index2())), std::fabs(from_gpu(col_it.index1(), col_it.index2())) ); if (current_error > error) error = current_error; } } return error; }
ScalarType diff(ublas::compressed_matrix<ScalarType> & cpu_matrix, VCL_MATRIX & gpu_matrix) { typedef ublas::compressed_matrix<ScalarType> CPU_MATRIX; CPU_MATRIX from_gpu; copy(gpu_matrix, from_gpu); ScalarType error = 0; //step 1: compare all entries from cpu_matrix with gpu_matrix: for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) { for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { ScalarType current_error = 0; if ( std::max( fabs(cpu_matrix(col_it.index1(), col_it.index2())), fabs(from_gpu(col_it.index1(), col_it.index2())) ) > 0 ) current_error = fabs(cpu_matrix(col_it.index1(), col_it.index2()) - from_gpu(col_it.index1(), col_it.index2())) / std::max( fabs(cpu_matrix(col_it.index1(), col_it.index2())), fabs(from_gpu(col_it.index1(), col_it.index2())) ); if (current_error > error) error = current_error; } } //step 2: compare all entries from gpu_matrix with cpu_matrix (sparsity pattern might differ): for (typename CPU_MATRIX::const_iterator1 row_it = from_gpu.begin1(); row_it != from_gpu.end1(); ++row_it) { for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { ScalarType current_error = 0; if ( std::max( fabs(cpu_matrix(col_it.index1(), col_it.index2())), fabs(from_gpu(col_it.index1(), col_it.index2())) ) > 0 ) current_error = fabs(cpu_matrix(col_it.index1(), col_it.index2()) - from_gpu(col_it.index1(), col_it.index2())) / std::max( fabs(cpu_matrix(col_it.index1(), col_it.index2())), fabs(from_gpu(col_it.index1(), col_it.index2())) ); if (current_error > error) error = current_error; } } return error; }