inline void copy( In const& a, Out& b , HDI const& , HDO const& , cudaStream_t stream = 0) { using T = typename Out::value_type; //TODO CUDA_ERROR(cudaMemcpyAsync( (T*)b.data() , a.data() , a.size()* sizeof(T) , copy_<HDI,HDO>::mode() , stream )); }
inline void transfer_htd( In & in, int blockid, Stream & stream ,std::size_t streamid , std::size_t leftover , nt2::pinned_ & ) { std::size_t sizeb = blocksize; if(leftover !=0) sizeb = leftover ; if( block_stream_htd[blockid] == false ) { block_stream_htd[blockid] = true; CUDA_ERROR(cudaMemcpyAsync( buffers.get_device(streamid) , in.data() , sizeb* sizeof(T) , cudaMemcpyHostToDevice , stream )); cudaStreamSynchronize(stream); } }