void CUDABLAS1::scal( IndexType n, const double alpha, double* x_d, const IndexType incx, SyncToken* syncToken ) { LAMA_CHECK_CUDA_ACCESS cudaStream_t stream = NULL; if ( syncToken ) { CUDAStreamSyncToken* cudaStreamSyncToken = dynamic_cast<CUDAStreamSyncToken*>( syncToken ); LAMA_ASSERT_DEBUG( cudaStreamSyncToken, "no cuda stream sync token provided" ) stream = cudaStreamSyncToken->getCUDAStream(); } cublasSetKernelStream( stream ); LAMA_CHECK_CUBLAS_ERROR cublasDscal( n, alpha, x_d, incx ); // No error check here possible as kernel is started asynchronously if ( !syncToken ) { cudaStreamSynchronize( 0 ); LAMA_CHECK_CUDA_ERROR }
void trsm_magmatask( int m, int t, REAL *A, REAL *B) { REAL one = 1.0; cudaStream_t stream = nanos_get_kernel_execution_stream(); cublasSetKernelStream(stream); MAGMABLASTRSM('R', 'L', 'T', 'N', m, m, one, A, m, B, m ); }
void gemm_magmatask( int m, int t, REAL *A, REAL *B, REAL *C) { REAL mone = -1.0; REAL one = 1.0; cudaStream_t stream = nanos_get_kernel_execution_stream(); cublasSetKernelStream(stream); MAGMABLASGEMM('N', 'T', m, m, m, mone, A, m, B, m, one, C, m); }
void syrk_magmatask(int m, REAL *A, REAL *C) { REAL mone = -1.0; REAL one = 1.0; cudaStream_t stream = nanos_get_kernel_execution_stream(); cublasSetKernelStream(stream); MAGMABLASSYRK('L', 'N', m, m, mone, A, m, one, C, m ); }