Esempio n. 1
0
void CUDABLAS1::scal( IndexType n, const double alpha, double* x_d, const IndexType incx, SyncToken* syncToken )
{
    LAMA_CHECK_CUDA_ACCESS

    cudaStream_t stream = NULL;

    if ( syncToken )
    {
        CUDAStreamSyncToken* cudaStreamSyncToken = dynamic_cast<CUDAStreamSyncToken*>( syncToken );
        LAMA_ASSERT_DEBUG( cudaStreamSyncToken, "no cuda stream sync token provided" )
        stream = cudaStreamSyncToken->getCUDAStream();
    }

    cublasSetKernelStream( stream );
    LAMA_CHECK_CUBLAS_ERROR

    cublasDscal( n, alpha, x_d, incx );

    // No error check here possible as kernel is started asynchronously

    if ( !syncToken )
    {
        cudaStreamSynchronize( 0 );
        LAMA_CHECK_CUDA_ERROR
    }
Esempio n. 2
0
void trsm_magmatask( int m, int t, REAL *A, REAL *B) {
    	REAL one = 1.0;
 	cudaStream_t stream = nanos_get_kernel_execution_stream();
	cublasSetKernelStream(stream);

	MAGMABLASTRSM('R', 'L', 'T', 'N', m, m, one, A, m, B, m );
}
Esempio n. 3
0
void gemm_magmatask( int m, int t, REAL *A, REAL *B, REAL *C) {
	REAL mone = -1.0; 
	REAL one = 1.0;

	cudaStream_t stream = nanos_get_kernel_execution_stream();
	cublasSetKernelStream(stream);

 	MAGMABLASGEMM('N', 'T', m, m, m, mone, A, m, B, m, one, C, m);
}
Esempio n. 4
0
void syrk_magmatask(int m, REAL *A, REAL *C) {
	REAL mone = -1.0;
	REAL one = 1.0;

	cudaStream_t stream = nanos_get_kernel_execution_stream();
	cublasSetKernelStream(stream);

 	MAGMABLASSYRK('L', 'N', m, m, mone, A, m, one, C, m );
}