void test() { uint8_t i,j; for(i=0; i<NROWS; i++) { row_buffer(i)[2] = parity(hex(i>>4)); row_buffer(i)[3] = parity(hex(i&0xf)); row_buffer(i)[4] = ' '; for(j=5; j<42; j++) row_buffer(i)[j] = parity('A'-5+j); } }
void copy(const CPU_MATRIX & cpu_matrix, compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix ) { if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 ) { gpu_matrix.resize(static_cast<unsigned int>(cpu_matrix.size1()), static_cast<unsigned int>(cpu_matrix.size2()), false); //determine nonzeros: long num_entries = 0; for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) { unsigned int entries_per_row = 0; for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { ++entries_per_row; } num_entries += viennacl::tools::roundUpToNextMultiple<unsigned int>(entries_per_row, ALIGNMENT); } //std::cout << "CPU->GPU, Number of entries: " << num_entries << std::endl; //set up matrix entries: std::vector<unsigned int> row_buffer(cpu_matrix.size1() + 1); std::vector<unsigned int> col_buffer(num_entries); std::vector<SCALARTYPE> elements(num_entries); unsigned int row_index = 0; unsigned int data_index = 0; for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) { row_buffer[row_index] = data_index; ++row_index; for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) { col_buffer[data_index] = static_cast<unsigned int>(col_it.index2()); elements[data_index] = *col_it; ++data_index; } data_index = viennacl::tools::roundUpToNextMultiple<unsigned int>(data_index, ALIGNMENT); //take care of alignment } row_buffer[row_index] = data_index; /*gpu_matrix._row_buffer = viennacl::ocl::device().createMemory(CL_MEM_READ_WRITE, row_buffer); gpu_matrix._col_buffer = viennacl::ocl::device().createMemory(CL_MEM_READ_WRITE, col_buffer); gpu_matrix._elements = viennacl::ocl::device().createMemory(CL_MEM_READ_WRITE, elements); gpu_matrix._nonzeros = num_entries;*/ gpu_matrix.set(&row_buffer[0], &col_buffer[0], &elements[0], static_cast<unsigned int>(cpu_matrix.size1()), num_entries); } }
void console_clear_buffer(uint8_t buffer) { uint8_t j; for(j=2; j<42; j++) row_buffer(row)[j] = ' '; // NOTE: parity(' ') == ' ' }
void console_putchar(char c) { if (c==0) return; switch(c) { case '\n': column = 0; row++; break; default: row_buffer(row)[2+column] = parity(c); column++; break; } if(column >= 40) { column = 0; row++; } if((row > 0) && ((row%NROWS) == first_row)) { row %= NROWS; first_row++; first_row %= NROWS; console_clear_buffer(row); move_rows(); } }
void move_rows() { uint8_t i; for(i=0; i<NROWS; i++) { fill_mrag(1, 3+((i+NROWS-first_row)%NROWS), row_buffer(i)); } return; }
void copy(const compressed_matrix<SCALARTYPE, ALIGNMENT> & gpu_matrix, CPU_MATRIX & cpu_matrix ) { if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) { cpu_matrix.resize(gpu_matrix.size1(), gpu_matrix.size2()); //get raw data from memory: std::vector<unsigned int> row_buffer(gpu_matrix.size1() + 1); std::vector<unsigned int> col_buffer(gpu_matrix.nnz()); std::vector<SCALARTYPE> elements(gpu_matrix.nnz()); //std::cout << "GPU->CPU, nonzeros: " << gpu_matrix.nnz() << std::endl; cl_int err; err = clEnqueueReadBuffer(viennacl::ocl::device().queue().get(), gpu_matrix.handle1().get(), CL_TRUE, 0, sizeof(unsigned int)*(gpu_matrix.size1() + 1), &(row_buffer[0]), 0, NULL, NULL); CL_ERR_CHECK(err); err = clEnqueueReadBuffer(viennacl::ocl::device().queue().get(), gpu_matrix.handle2().get(), CL_TRUE, 0, sizeof(unsigned int)*gpu_matrix.nnz(), &(col_buffer[0]), 0, NULL, NULL); CL_ERR_CHECK(err); err = clEnqueueReadBuffer(viennacl::ocl::device().queue().get(), gpu_matrix.handle().get(), CL_TRUE, 0, sizeof(SCALARTYPE)*gpu_matrix.nnz(), &(elements[0]), 0, NULL, NULL); CL_ERR_CHECK(err); viennacl::ocl::finish(); //fill the cpu_matrix: unsigned int data_index = 0; for (unsigned int row = 1; row <= gpu_matrix.size1(); ++row) { while (data_index < row_buffer[row]) { if (col_buffer[data_index] >= gpu_matrix.size1()) { std::cerr << "ViennaCL encountered invalid data at colbuffer[" << data_index << "]: " << col_buffer[data_index] << std::endl; return; } if (elements[data_index] != static_cast<SCALARTYPE>(0.0)) cpu_matrix(row-1, col_buffer[data_index]) = elements[data_index]; ++data_index; } } } }
void copy(viennashe::math::sparse_matrix<NumericT> const & assembled_matrix, viennacl::compressed_matrix<NumericT> & vcl_matrix) { std::size_t nonzeros = assembled_matrix.nnz(); viennacl::backend::typesafe_host_array<unsigned int> row_buffer(vcl_matrix.handle1(), assembled_matrix.size1() + 1); viennacl::backend::typesafe_host_array<unsigned int> col_buffer(vcl_matrix.handle2(), nonzeros); std::vector<NumericT> elements(nonzeros); std::size_t data_index = 0; for (std::size_t i = 0; i != assembled_matrix.size1(); ++i) { typedef typename viennashe::math::sparse_matrix<NumericT>::const_iterator2 AlongRowIterator; typedef typename viennashe::math::sparse_matrix<NumericT>::row_type RowType; row_buffer.set(i, data_index); RowType const & row_i = assembled_matrix.row(i); for (AlongRowIterator col_it = row_i.begin(); col_it != row_i.end(); ++col_it) { col_buffer.set(data_index, col_it->first); elements[data_index] = col_it->second; ++data_index; } } row_buffer.set(assembled_matrix.size1(), data_index); vcl_matrix.set(row_buffer.get(), col_buffer.get(), &elements[0], assembled_matrix.size1(), assembled_matrix.size2(), nonzeros); }