/***************************************************************************//** @fn magma_copyvector_async( n, elemSize, dx_src, incx, dy_dst, incy, queue ) Copy vector dx_src on GPU device to dy_dst on GPU device. Elements may be arbitrary size. Type-safe versions set elemSize appropriately. With CUDA unified addressing, dx and dy can be on different GPUs. This version is asynchronous: it may return before the transfer finishes. See magma_copyvector() for a synchronous version. @param[in] n Number of elements in vector. @param[in] elemSize Size of each element, e.g., sizeof(double). @param[in] dx_src Source array of dimension (1 + (n-1))*incx, on GPU device. @param[in] incx Increment between elements of hx_src. incx > 0. @param[out] dy_dst Destination array of dimension (1 + (n-1))*incy, on GPU device. @param[in] incy Increment between elements of dy_dst. incy > 0. @param[in] queue Queue to execute in. @ingroup magma_copyvector *******************************************************************************/ extern "C" void magma_copyvector_async_internal( magma_int_t n, magma_int_t elemSize, magma_const_ptr dx_src, magma_int_t incx, magma_ptr dy_dst, magma_int_t incy, magma_queue_t queue, const char* func, const char* file, int line ) { // for backwards compatability, accepts NULL queue to mean NULL stream. cudaStream_t stream = NULL; if ( queue != NULL ) { stream = queue->cuda_stream(); } else { fprintf( stderr, "Warning: %s got NULL queue\n", __func__ ); } if ( incx == 1 && incy == 1 ) { cudaError_t status; status = cudaMemcpyAsync( dy_dst, dx_src, int(n*elemSize), cudaMemcpyDeviceToDevice, stream ); check_xerror( status, func, file, line ); } else { magma_copymatrix_async_internal( 1, n, elemSize, dx_src, incx, dy_dst, incy, queue, func, file, line ); } }
void magma_copyvector_async_internal( magma_int_t n, magma_int_t elemSize, void const* dx_src, magma_int_t incx, void* dy_dst, magma_int_t incy, cudaStream_t stream, const char* func, const char* file, int line ) { if ( incx == 1 && incy == 1 ) { cudaError_t status; status = cudaMemcpyAsync( dy_dst, dx_src, n*elemSize, cudaMemcpyDeviceToDevice, stream ); check_xerror( status, func, file, line ); } else { magma_copymatrix_async_internal( 1, n, elemSize, dx_src, incx, dy_dst, incy, stream, func, file, line ); } }