void copy(matrix_slice<matrix<NumericT, row_major, 1> > const & gpu_matrix_slice, CPUMatrixT & cpu_matrix) { assert( (cpu_matrix.size1() == gpu_matrix_slice.size1()) && (cpu_matrix.size2() == gpu_matrix_slice.size2()) && bool("Matrix size mismatch!")); if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) ) { vcl_size_t num_entries = gpu_matrix_slice.size2() * gpu_matrix_slice.stride2(); //no. of entries per stride std::vector<NumericT> entries(num_entries); //copy each stride separately: for (vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i) { vcl_size_t start_offset = (gpu_matrix_slice.start1() + i * gpu_matrix_slice.stride1()) * gpu_matrix_slice.internal_size2() + gpu_matrix_slice.start2(); viennacl::backend::memory_read(gpu_matrix_slice.handle(), sizeof(NumericT)*start_offset, sizeof(NumericT)*num_entries, &(entries[0])); for (vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j) cpu_matrix(i,j) = entries[j * gpu_matrix_slice.stride2()]; } } }
typename result_of::size_type<MatrixType>::type stride2(matrix_slice<MatrixType> const & s) { return s.stride2(); }
matrix_slice(matrix_slice<MatrixType> const & A, slice const & row_slice, slice const & col_slice) : base_type(const_cast<handle_type &>(A.handle()), row_slice.size(), row_slice.start() * A.stride1() + A.start1(), row_slice.stride() * A.stride1(), A.internal_size1(), col_slice.size(), col_slice.start() * A.stride2() + A.start2(), col_slice.stride() * A.stride2(), A.internal_size2(), A.row_major()) {}