inline void memory_read(mem_handle const & src_buffer, std::size_t src_offset, std::size_t bytes_to_read, void * ptr) { //finish(); //Fixes some issues with AMD APP SDK. However, might sacrifice a few percents of performance in some cases. if (bytes_to_read > 0) { switch(src_buffer.get_active_handle_id()) { case MAIN_MEMORY: cpu_ram::memory_read(src_buffer.ram_handle(), src_offset, bytes_to_read, ptr); break; #ifdef VIENNACL_WITH_OPENCL case OPENCL_MEMORY: opencl::memory_read(src_buffer.opencl_handle(), src_offset, bytes_to_read, ptr); break; #endif #ifdef VIENNACL_WITH_CUDA case CUDA_MEMORY: cuda::memory_read(src_buffer.cuda_handle(), src_offset, bytes_to_read, ptr); break; #endif default: throw "unknown memory handle!"; } } }
inline void memory_write(mem_handle & dst_buffer, std::size_t dst_offset, std::size_t bytes_to_write, const void * ptr) { if (bytes_to_write > 0) { switch(dst_buffer.get_active_handle_id()) { case MAIN_MEMORY: cpu_ram::memory_write(dst_buffer.ram_handle(), dst_offset, bytes_to_write, ptr); break; #ifdef VIENNACL_WITH_OPENCL case OPENCL_MEMORY: opencl::memory_write(dst_buffer.opencl_handle(), dst_offset, bytes_to_write, ptr); break; #endif #ifdef VIENNACL_WITH_CUDA case CUDA_MEMORY: cuda::memory_write(dst_buffer.cuda_handle(), dst_offset, bytes_to_write, ptr); break; #endif default: throw "unknown memory handle!"; } } }
inline void memory_create(mem_handle & handle, std::size_t size_in_bytes, const void * host_ptr = NULL) { if (size_in_bytes > 0) { if (handle.get_active_handle_id() == MEMORY_NOT_INITIALIZED) handle.switch_active_handle_id(default_memory_type()); switch(handle.get_active_handle_id()) { case MAIN_MEMORY: handle.ram_handle() = cpu_ram::memory_create(size_in_bytes, host_ptr); handle.raw_size(size_in_bytes); break; #ifdef VIENNACL_WITH_OPENCL case OPENCL_MEMORY: handle.opencl_handle() = opencl::memory_create(size_in_bytes, host_ptr); handle.raw_size(size_in_bytes); break; #endif #ifdef VIENNACL_WITH_CUDA case CUDA_MEMORY: handle.cuda_handle() = cuda::memory_create(size_in_bytes, host_ptr); handle.raw_size(size_in_bytes); break; #endif default: throw "unknown memory handle!"; } } }
inline void memory_copy(mem_handle const & src_buffer, mem_handle & dst_buffer, std::size_t src_offset, std::size_t dst_offset, std::size_t bytes_to_copy) { assert( src_buffer.get_active_handle_id() == dst_buffer.get_active_handle_id() && bool("memory_copy() must be called on buffers from the same domain") ); if (bytes_to_copy > 0) { switch(src_buffer.get_active_handle_id()) { case MAIN_MEMORY: cpu_ram::memory_copy(src_buffer.ram_handle(), dst_buffer.ram_handle(), src_offset, dst_offset, bytes_to_copy); break; #ifdef VIENNACL_WITH_OPENCL case OPENCL_MEMORY: opencl::memory_copy(src_buffer.opencl_handle(), dst_buffer.opencl_handle(), src_offset, dst_offset, bytes_to_copy); break; #endif #ifdef VIENNACL_WITH_CUDA case CUDA_MEMORY: cuda::memory_copy(src_buffer.cuda_handle(), dst_buffer.cuda_handle(), src_offset, dst_offset, bytes_to_copy); break; #endif default: throw "unknown memory handle!"; } } }
/** @brief Resize without initializing the new memory */ void raw_resize(mem_handle const & handle, vcl_size_t num) { buffer_size_ = sizeof(cpu_type) * num; (void)handle; //silence unused variable warning if compiled without OpenCL support #ifdef VIENNACL_WITH_OPENCL memory_types mem_type = handle.get_active_handle_id(); if (mem_type == MEMORY_NOT_INITIALIZED) mem_type = default_memory_type(); if (mem_type == OPENCL_MEMORY) { convert_to_opencl_ = true; buffer_size_ = sizeof(target_type) * num; } #endif if (num > 0) { if (bytes_buffer_) delete[] bytes_buffer_; bytes_buffer_ = new char[buffer_size_]; } }
/** @brief Resize without initializing the new memory */ void raw_resize(mem_handle const & handle, std::size_t num) { buffer_size_ = sizeof(cpu_type) * num; #ifdef VIENNACL_WITH_OPENCL memory_types mem_type = handle.get_active_handle_id(); if (mem_type == MEMORY_NOT_INITIALIZED) mem_type = default_memory_type(); if (mem_type == OPENCL_MEMORY) { convert_to_opencl_ = true; buffer_size_ = sizeof(target_type) * num; } #endif if (num > 0) { if (bytes_buffer_) delete[] bytes_buffer_; bytes_buffer_ = new char[buffer_size_]; } }
void typesafe_memory_copy(mem_handle const & handle_src, mem_handle & handle_dst) { if (handle_dst.get_active_handle_id() == MEMORY_NOT_INITIALIZED) handle_dst.switch_active_handle_id(default_memory_type()); std::size_t element_size_src = detail::element_size<DataType>(handle_src.get_active_handle_id()); std::size_t element_size_dst = detail::element_size<DataType>(handle_dst.get_active_handle_id()); if (element_size_src != element_size_dst) { // Data needs to be converted. typesafe_host_array<DataType> buffer_src(handle_src); typesafe_host_array<DataType> buffer_dst(handle_dst, handle_src.raw_size() / element_size_src); // // Step 1: Fill buffer_dst depending on where the data resides: // DataType const * src_data; switch (handle_src.get_active_handle_id()) { case MAIN_MEMORY: src_data = reinterpret_cast<DataType const *>(handle_src.ram_handle().get()); for (std::size_t i=0; i<buffer_dst.size(); ++i) buffer_dst.set(i, src_data[i]); break; #ifdef VIENNACL_WITH_OPENCL case OPENCL_MEMORY: buffer_src.resize(handle_src, handle_src.raw_size() / element_size_src); opencl::memory_read(handle_src.opencl_handle(), 0, buffer_src.raw_size(), buffer_src.get()); for (std::size_t i=0; i<buffer_dst.size(); ++i) buffer_dst.set(i, buffer_src[i]); break; #endif #ifdef VIENNACL_WITH_CUDA case CUDA_MEMORY: buffer_src.resize(handle_src, handle_src.raw_size() / element_size_src); cuda::memory_read(handle_src.cuda_handle(), 0, buffer_src.raw_size(), buffer_src.get()); for (std::size_t i=0; i<buffer_dst.size(); ++i) buffer_dst.set(i, buffer_src[i]); break; #endif default: throw "unsupported memory domain"; } // // Step 2: Write to destination // if (handle_dst.raw_size() == buffer_dst.raw_size()) viennacl::backend::memory_write(handle_dst, 0, buffer_dst.raw_size(), buffer_dst.get()); else viennacl::backend::memory_create(handle_dst, buffer_dst.raw_size(), buffer_dst.get()); } else { // No data conversion required. typesafe_host_array<DataType> buffer(handle_src); switch (handle_src.get_active_handle_id()) { case MAIN_MEMORY: switch (handle_dst.get_active_handle_id()) { case MAIN_MEMORY: case OPENCL_MEMORY: case CUDA_MEMORY: if (handle_dst.raw_size() == handle_src.raw_size()) viennacl::backend::memory_write(handle_dst, 0, handle_src.raw_size(), handle_src.ram_handle().get()); else viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), handle_src.ram_handle().get()); break; default: throw "unsupported destination memory domain"; } break; case OPENCL_MEMORY: switch (handle_dst.get_active_handle_id()) { case MAIN_MEMORY: if (handle_dst.raw_size() != handle_src.raw_size()) viennacl::backend::memory_create(handle_dst, handle_src.raw_size()); viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), handle_dst.ram_handle().get()); break; case OPENCL_MEMORY: if (handle_dst.raw_size() != handle_src.raw_size()) viennacl::backend::memory_create(handle_dst, handle_src.raw_size()); viennacl::backend::memory_copy(handle_src, handle_dst, 0, 0, handle_src.raw_size()); break; case CUDA_MEMORY: if (handle_dst.raw_size() != handle_src.raw_size()) viennacl::backend::memory_create(handle_dst, handle_src.raw_size()); buffer.resize(handle_src, handle_src.raw_size() / element_size_src); viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), buffer.get()); viennacl::backend::memory_write(handle_dst, 0, handle_src.raw_size(), buffer.get()); break; default: throw "unsupported destination memory domain"; } break; case CUDA_MEMORY: switch (handle_dst.get_active_handle_id()) { case MAIN_MEMORY: if (handle_dst.raw_size() != handle_src.raw_size()) viennacl::backend::memory_create(handle_dst, handle_src.raw_size()); viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), handle_dst.ram_handle().get()); break; case OPENCL_MEMORY: if (handle_dst.raw_size() != handle_src.raw_size()) viennacl::backend::memory_create(handle_dst, handle_src.raw_size()); buffer.resize(handle_src, handle_src.raw_size() / element_size_src); viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), buffer.get()); viennacl::backend::memory_write(handle_dst, 0, handle_src.raw_size(), buffer.get()); break; case CUDA_MEMORY: if (handle_dst.raw_size() != handle_src.raw_size()) viennacl::backend::memory_create(handle_dst, handle_src.raw_size()); viennacl::backend::memory_copy(handle_src, handle_dst, 0, 0, handle_src.raw_size()); break; default: throw "unsupported destination memory domain"; } break; default: throw "unsupported source memory domain"; } } }
void switch_memory_domain(mem_handle & handle, viennacl::memory_types new_mem_domain) { if (handle.get_active_handle_id() == new_mem_domain) return; if (handle.get_active_handle_id() == viennacl::MEMORY_NOT_INITIALIZED) { handle.switch_active_handle_id(new_mem_domain); return; } std::size_t size_dst = detail::element_size<DataType>(handle.get_active_handle_id()); std::size_t size_src = detail::element_size<DataType>(new_mem_domain); if (size_dst != size_src) // OpenCL data element size not the same as host data element size { throw "Heterogeneous data element sizes not yet supported!"; } else //no data conversion required { if (handle.get_active_handle_id() == MAIN_MEMORY) //we can access the existing data directly { switch (new_mem_domain) { #ifdef VIENNACL_WITH_OPENCL case OPENCL_MEMORY: handle.opencl_handle() = opencl::memory_create(handle.raw_size(), handle.ram_handle().get()); break; #endif #ifdef VIENNACL_WITH_CUDA case CUDA_MEMORY: handle.cuda_handle() = cuda::memory_create(handle.raw_size(), handle.ram_handle().get()); break; #endif case MAIN_MEMORY: default: throw "Invalid destination domain"; } } #ifdef VIENNACL_WITH_OPENCL else if (handle.get_active_handle_id() == OPENCL_MEMORY) // data can be dumped into destination directly { std::vector<DataType> buffer; switch (new_mem_domain) { case MAIN_MEMORY: handle.ram_handle() = cpu_ram::memory_create(handle.raw_size()); opencl::memory_read(handle.opencl_handle(), 0, handle.raw_size(), handle.ram_handle().get()); break; #ifdef VIENNACL_WITH_CUDA case CUDA_MEMORY: buffer.resize(handle.raw_size() / sizeof(DataType)); opencl::memory_read(handle.opencl_handle(), 0, handle.raw_size(), &(buffer[0])); cuda::memory_create(handle.cuda_handle(), handle.raw_size(), &(buffer[0])); break; #endif default: throw "Invalid destination domain"; } } #endif #ifdef VIENNACL_WITH_CUDA else //CUDA_MEMORY { std::vector<DataType> buffer; // write switch (new_mem_domain) { case MAIN_MEMORY: handle.ram_handle() = cpu_ram::memory_create(handle.raw_size()); cuda::memory_read(handle.cuda_handle(), 0, handle.raw_size(), handle.ram_handle().get()); break; #ifdef VIENNACL_WITH_OPENCL case OPENCL_MEMORY: buffer.resize(handle.raw_size() / sizeof(DataType)); cuda::memory_read(handle.cuda_handle(), 0, handle.raw_size(), &(buffer[0])); handle.opencl_handle() = opencl::memory_create(handle.raw_size(), &(buffer[0])); break; #endif default: throw "Unsupported source memory domain"; } } #endif // everything succeeded so far, now switch to new domain: handle.switch_active_handle_id(new_mem_domain); } // no data conversion }
// TODO: Refine this concept. Maybe move to constructor? inline void memory_shallow_copy(mem_handle const & src_buffer, mem_handle & dst_buffer) { assert( (dst_buffer.get_active_handle_id() == MEMORY_NOT_INITIALIZED) && bool("Shallow copy on already initialized memory not supported!")); switch(src_buffer.get_active_handle_id()) { case MAIN_MEMORY: dst_buffer.switch_active_handle_id(src_buffer.get_active_handle_id()); dst_buffer.ram_handle() = src_buffer.ram_handle(); break; #ifdef VIENNACL_WITH_OPENCL case OPENCL_MEMORY: dst_buffer.switch_active_handle_id(src_buffer.get_active_handle_id()); dst_buffer.opencl_handle() = src_buffer.opencl_handle(); break; #endif #ifdef VIENNACL_WITH_CUDA case CUDA_MEMORY: dst_buffer.switch_active_handle_id(src_buffer.get_active_handle_id()); dst_buffer.cuda_handle() = src_buffer.cuda_handle(); break; #endif default: throw "unknown memory handle!"; } }