void copy(matrix_slice<matrix<NumericT, column_major, 1> > const & gpu_matrix_slice, CPUMatrixT & cpu_matrix) { assert( (cpu_matrix.size1() == gpu_matrix_slice.size1()) && (cpu_matrix.size2() == gpu_matrix_slice.size2()) && bool("Matrix size mismatch!")); if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) ) { vcl_size_t num_entries = gpu_matrix_slice.size1() * gpu_matrix_slice.stride1(); //no. of entries per stride std::vector<NumericT> entries(num_entries); //copy each column stride separately: for (vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j) { vcl_size_t start_offset = gpu_matrix_slice.start1() + (gpu_matrix_slice.start2() + j * gpu_matrix_slice.stride2()) * gpu_matrix_slice.internal_size1(); viennacl::backend::memory_read(gpu_matrix_slice.handle(), sizeof(NumericT)*start_offset, sizeof(NumericT)*num_entries, &(entries[0])); for (vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i) cpu_matrix(i,j) = entries[i * gpu_matrix_slice.stride1()]; } } }
matrix_slice<MatrixType> project(matrix_slice<MatrixType> const & A, viennacl::slice const & r1, viennacl::slice const & r2) { assert(r1.size() <= A.size1() && r2.size() <= A.size2() && bool("Size of slice invalid!")); return matrix_slice<MatrixType>(A, r1, r2); }