void magma_getvector_async( magma_int_t n, size_t elemSize, void const* dx_src, magma_int_t incx, void* hy_dst, magma_int_t incy, cudaStream_t stream ) { cublasStatus_t status; status = cublasGetVectorAsync( n, elemSize, dx_src, incx, hy_dst, incy, stream ); check_error( status ); }
void magma_getvector_async_internal( magma_int_t n, magma_int_t elemSize, void const* dx_src, magma_int_t incx, void* hy_dst, magma_int_t incy, cudaStream_t stream, const char* func, const char* file, int line ) { cublasStatus_t status; status = cublasGetVectorAsync( n, elemSize, dx_src, incx, hy_dst, incy, stream ); check_xerror( status, func, file, line ); }
void magma_sgetvector_async_internal( magma_int_t n, float const* dx_src, magma_int_t incx, float* hy_dst, magma_int_t incy, cudaStream_t stream, const char* func, const char* file, int line ) { cublasStatus_t status; status = cublasGetVectorAsync( n, sizeof(float), dx_src, incx, hy_dst, incy, stream ); check_xerror( status, func, file, line ); }
// -------------------- extern "C" void magma_zgetvector_async_internal( magma_int_t n, magmaDoubleComplex_const_ptr dx_src, magma_int_t incx, magmaDoubleComplex* hy_dst, magma_int_t incy, magma_queue_t queue, const char* func, const char* file, int line ) { cublasStatus_t status; status = cublasGetVectorAsync( n, sizeof(magmaDoubleComplex), dx_src, incx, hy_dst, incy, queue ); check_xerror( status, func, file, line ); }
/***************************************************************************//** @fn magma_getvector( n, elemSize, dx_src, incx, hy_dst, incy, queue ) Copy vector dx_src on GPU device to hy_dst on CPU host. Elements may be arbitrary size. Type-safe versions set elemSize appropriately. This version synchronizes the queue after the transfer. See magma_getvector_async() for an asynchronous version. @param[in] n Number of elements in vector. @param[in] elemSize Size of each element, e.g., sizeof(double). @param[in] dx_src Source array of dimension (1 + (n-1))*incx, on GPU device. @param[in] incx Increment between elements of hx_src. incx > 0. @param[out] hy_dst Destination array of dimension (1 + (n-1))*incy, on CPU host. @param[in] incy Increment between elements of dy_dst. incy > 0. @param[in] queue Queue to execute in. @ingroup magma_getvector *******************************************************************************/ extern "C" void magma_getvector_q_internal( magma_int_t n, magma_int_t elemSize, magma_const_ptr dx_src, magma_int_t incx, void* hy_dst, magma_int_t incy, magma_queue_t queue, const char* func, const char* file, int line ) { cublasStatus_t status; status = cublasGetVectorAsync( int(n), int(elemSize), dx_src, int(incx), hy_dst, int(incy), queue->cuda_stream() ); cudaStreamSynchronize( queue->cuda_stream() ); check_xerror( status, func, file, line ); }
/***************************************************************************//** @fn magma_getvector_async( n, elemSize, dx_src, incx, hy_dst, incy, queue ) Copy vector dx_src on GPU device to hy_dst on CPU host. Elements may be arbitrary size. Type-safe versions set elemSize appropriately. This version is asynchronous: it may return before the transfer finishes, if hy_dst is pinned CPU memory. See magma_getvector() for a synchronous version. @param[in] n Number of elements in vector. @param[in] elemSize Size of each element, e.g., sizeof(double). @param[in] dx_src Source array of dimension (1 + (n-1))*incx, on GPU device. @param[in] incx Increment between elements of hx_src. incx > 0. @param[out] hy_dst Destination array of dimension (1 + (n-1))*incy, on CPU host. @param[in] incy Increment between elements of dy_dst. incy > 0. @param[in] queue Queue to execute in. @ingroup magma_getvector *******************************************************************************/ extern "C" void magma_getvector_async_internal( magma_int_t n, magma_int_t elemSize, magma_const_ptr dx_src, magma_int_t incx, void* hy_dst, magma_int_t incy, magma_queue_t queue, const char* func, const char* file, int line ) { // for backwards compatability, accepts NULL queue to mean NULL stream. cudaStream_t stream = NULL; if ( queue != NULL ) { stream = queue->cuda_stream(); } else { fprintf( stderr, "Warning: %s got NULL queue\n", __func__ ); } cublasStatus_t status; status = cublasGetVectorAsync( int(n), int(elemSize), dx_src, int(incx), hy_dst, int(incy), stream ); check_xerror( status, func, file, line ); }