inline typename std::enable_if< std::is_convertible< typename std::iterator_traits<InputIterator>::iterator_category , host_random_access_iterator_tag >::value && std::is_convertible< typename std::iterator_traits<OutputIterator>::iterator_category , host_random_access_iterator_tag >::value && std::is_same< typename std::iterator_traits<InputIterator>::value_type , typename std::iterator_traits<OutputIterator>::value_type >::value , OutputIterator>::type copy(InputIterator first, InputIterator last, OutputIterator result, stream& stream) { typename std::iterator_traits<InputIterator>::difference_type size = last - first; CUDA_CALL( cudaMemcpyAsync( &*result , &*first , size * sizeof(typename std::iterator_traits<InputIterator>::value_type) , cudaMemcpyHostToHost , stream.data() ) ); return result + size; }
void copy(vector<T> const& src, symbol<T[]>& dst, stream& stream) { assert(src.size() == dst.size()); CUDA_CALL(cudaMemcpyToSymbolAsync(reinterpret_cast<char const*>(dst.data()), src.data(), src.size() * sizeof(T), 0, cudaMemcpyDeviceToDevice, stream.data())); }
void copy(vector<T> const& src, host::vector<T>& dst, size_t size, stream& stream) { assert(size <= src.capacity()); assert(size <= dst.capacity()); CUDA_CALL(cudaMemcpyAsync(dst.data(), src.data(), size * sizeof(T), cudaMemcpyDeviceToHost, stream.data())); }
void copy(symbol<T[]> const& src, host::vector<T>& dst, stream& stream) { assert(src.size() == dst.size()); CUDA_CALL(cudaMemcpyFromSymbolAsync(dst.data(), reinterpret_cast<char const*>(src.data()), src.size() * sizeof(T), 0, cudaMemcpyDeviceToHost, stream.data())); }
void copy(vector<T> const& src, host::vector<T>& dst, stream& stream) { assert(src.size() == dst.size()); CUDA_CALL(cudaMemcpyAsync(dst.data(), src.data(), src.size() * sizeof(T), cudaMemcpyDeviceToHost, stream.data())); }