inline DeviceIterator copy_to_device(HostIterator first, HostIterator last, DeviceIterator result, command_queue &queue, const wait_list &events) { typedef typename std::iterator_traits<DeviceIterator>::value_type value_type; typedef typename std::iterator_traits<DeviceIterator>::difference_type difference_type; size_t count = iterator_range_size(first, last); if(count == 0){ return result; } size_t offset = result.get_index(); queue.enqueue_write_buffer(result.get_buffer(), offset * sizeof(value_type), count * sizeof(value_type), ::boost::addressof(*first), events); return result + static_cast<difference_type>(count); }
inline event write_single_value(const T &value, const buffer &buffer, size_t index, command_queue &queue) { BOOST_ASSERT(index < buffer.size() / sizeof(T)); BOOST_ASSERT(buffer.get_context() == queue.get_context()); return queue.enqueue_write_buffer(buffer, index * sizeof(T), sizeof(T), &value); }
inline void dispatch_fill(BufferIterator first, size_t count, const T &value, command_queue &queue, typename boost::enable_if< is_valid_fill_buffer_iterator<BufferIterator> >::type* = 0) { typedef typename std::iterator_traits<BufferIterator>::value_type value_type; if(count == 0){ // nothing to do return; } // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer) if(!queue.check_device_version(1, 2)){ return fill_with_copy(first, count, value, queue); } value_type pattern = static_cast<value_type>(value); size_t offset = static_cast<size_t>(first.get_index()); if(count == 1){ // use clEnqueueWriteBuffer() directly when writing a single value // to the device buffer. this is potentially more efficient and also // works around a bug in the intel opencl driver. queue.enqueue_write_buffer( first.get_buffer(), offset * sizeof(value_type), sizeof(value_type), &pattern ); } else { queue.enqueue_fill_buffer( first.get_buffer(), &pattern, sizeof(value_type), offset * sizeof(value_type), count * sizeof(value_type) ); } }