void copy(const CPUMatrixT & cpu_matrix, matrix_slice<matrix<NumericT, column_major, 1> > & gpu_matrix_slice ) { assert( (cpu_matrix.size1() == gpu_matrix_slice.size1()) && (cpu_matrix.size2() == gpu_matrix_slice.size2()) && bool("Matrix size mismatch!")); if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) ) { vcl_size_t num_entries = gpu_matrix_slice.size1() * gpu_matrix_slice.stride1(); //no. of entries per stride std::vector<NumericT> entries(num_entries); //copy each column stride separately: for (vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j) { vcl_size_t start_offset = gpu_matrix_slice.start1() + (gpu_matrix_slice.start2() + j * gpu_matrix_slice.stride2()) * gpu_matrix_slice.internal_size1(); viennacl::backend::memory_read(gpu_matrix_slice.handle(), sizeof(NumericT)*start_offset, sizeof(NumericT)*num_entries, &(entries[0])); for (vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i) entries[i * gpu_matrix_slice.stride1()] = cpu_matrix(i,j); viennacl::backend::memory_write(gpu_matrix_slice.handle(), sizeof(NumericT)*start_offset, sizeof(NumericT)*num_entries, &(entries[0])); } } }
matrix_slice(matrix_slice<MatrixType> const & A, slice const & row_slice, slice const & col_slice) : base_type(const_cast<handle_type &>(A.handle()), row_slice.size(), row_slice.start() * A.stride1() + A.start1(), row_slice.stride() * A.stride1(), A.internal_size1(), col_slice.size(), col_slice.start() * A.stride2() + A.start2(), col_slice.stride() * A.stride2(), A.internal_size2(), A.row_major()) {}