void MICAcceleratorMatrixHYB<ValueType>::CopyToHost(HostMatrix<ValueType> *dst) const { HostMatrixHYB<ValueType> *cast_mat; // copy only in the same format assert(this->get_mat_format() == dst->get_mat_format()); // MIC to CPU copy if ((cast_mat = dynamic_cast<HostMatrixHYB<ValueType>*> (dst)) != NULL) { cast_mat->set_backend(this->local_backend_); if (dst->get_nnz() == 0) cast_mat->AllocateHYB(this->get_ell_nnz(), this->get_coo_nnz(), this->get_ell_max_row(), this->get_nrow(), this->get_ncol()); assert((this->get_nnz() == dst->get_nnz()) && (this->get_nrow() == dst->get_nrow()) && (this->get_ncol() == dst->get_ncol()) ); if (this->get_ell_nnz() > 0) { copy_to_host(this->local_backend_.MIC_dev, this->mat_.ELL.val, cast_mat->mat_.ELL.val, this->get_ell_nnz()); copy_to_host(this->local_backend_.MIC_dev, this->mat_.ELL.col, cast_mat->mat_.ELL.col, this->get_ell_nnz()); } if (this->get_coo_nnz() > 0) { copy_to_host(this->local_backend_.MIC_dev, this->mat_.COO.row, cast_mat->mat_.COO.row, this->get_coo_nnz()); copy_to_host(this->local_backend_.MIC_dev, this->mat_.COO.col, cast_mat->mat_.COO.col, this->get_coo_nnz()); copy_to_host(this->local_backend_.MIC_dev, this->mat_.COO.val, cast_mat->mat_.COO.val, this->get_coo_nnz()); } } else { LOG_INFO("Error unsupported MIC matrix type"); this->info(); dst->info(); FATAL_ERROR(__FILE__, __LINE__); } }
//! @brief Copy the ND-array content to a std::vector object. __host__ inline void copy_to_host(std::vector<T>& host_vector) const { if (host_vector.size() != size()) host_vector.resize(size()); copy_to_host(host_vector.data()); }
void MICAcceleratorMatrixDIA<ValueType>::CopyToHost(HostMatrix<ValueType> *dst) const { HostMatrixDIA<ValueType> *cast_mat; // copy only in the same format assert(this->get_mat_format() == dst->get_mat_format()); // MIC to CPU copy if ((cast_mat = dynamic_cast<HostMatrixDIA<ValueType>*> (dst)) != NULL) { cast_mat->set_backend(this->local_backend_); if (dst->get_nnz() == 0) cast_mat->AllocateDIA(this->get_nnz(), this->get_nrow(), this->get_ncol(), this->get_ndiag()); assert((this->get_nnz() == dst->get_nnz()) && (this->get_nrow() == dst->get_nrow()) && (this->get_ncol() == dst->get_ncol()) ); if (this->get_nnz() > 0) { copy_to_host(this->mat_.val, cast_mat->mat_.val, this->get_nnz()); copy_to_host(this->mat_.offset, cast_mat->mat_.offset, this->mat_.num_diag); /* // TODO for (int j=0; j<this->get_nnz(); ++j) cast_mat->mat_.val[j] = this->mat_.val[j]; for (int j=0; j<this->mat_.num_diag; ++j) cast_mat->mat_.offset[j] = this->mat_.offset[j]; */ } } else { LOG_INFO("Error unsupported MIC matrix type"); this->info(); dst->info(); FATAL_ERROR(__FILE__, __LINE__); } }
inline std::vector<bool>::iterator copy_to_host(DeviceIterator first, DeviceIterator last, std::vector<bool>::iterator result, command_queue &queue) { std::vector<uint8_t> temp(std::distance(first, last)); copy_to_host(first, last, temp.begin(), queue); return std::copy(temp.begin(), temp.end(), result); }
inline OutputIterator dispatch_copy(InputIterator first, InputIterator last, OutputIterator result, command_queue &queue, typename boost::enable_if_c< is_device_iterator<InputIterator>::value && !is_device_iterator<OutputIterator>::value >::type* = 0) { if(is_contiguous_iterator<OutputIterator>::value){ return copy_to_host(first, last, result, queue); } else { // for non-contiguous input we first copy the values to // a temporary std::vector and then copy from there typedef typename std::iterator_traits<InputIterator>::value_type T; std::vector<T> vector(iterator_range_size(first, last)); copy_to_host(first, last, vector.begin(), queue); return std::copy(vector.begin(), vector.end(), result); } }
int main(int argc, char** argv) { LOGI("\nvvvv vvvv vvvv"); int width = 128; int height = 128; int channels = 4; auto input = Halide::Buffer<int>::make_interleaved(width, height, channels); LOGI("Allocated memory for %dx%dx%d image", width, height, channels); input.for_each_element([&](int i, int j, int k) { input(i, j, k) = ((i + j) % 2) * 6; }); LOGI("Input :\n"); print(input); auto output = Halide::Buffer<int>::make_interleaved(width, height, channels); two_kernels_filter(input, output); LOGI("Filter is done."); output.device_sync(); LOGI("Sync is done"); output.copy_to_host(); LOGI("Output :\n"); print(output); int count_mismatches = 0; output.for_each_element([&](int i, int j, int k) { int32_t output_value = output(i, j, k); int32_t input_value = input(i, j, k); if (output_value != input_value) { if (count_mismatches < 100) { std::ostringstream str; str << "output and input results differ at " << "(" << i << ", " << j << ", " << k << "):" << output_value << " != " << input_value << "\n"; LOGI("%s", str.str().c_str()); } count_mismatches++; } }); LOGI(count_mismatches == 0 ? "Test passed.\n": "Test failed.\n"); halide_device_release(NULL, halide_openglcompute_device_interface()); LOGI("^^^^ ^^^^ ^^^^\n"); }