/* Computes the Frobenius norm of a matrix. */ double nrm2(Mat mA) { const int n2 = MatN2(mA); const void* a = MatElems(mA); const bool dev = MatDev(mA); double norm; switch (MatElemSize(mA)) { case 4: if (dev) { float norm32; cublasSnrm2(g_cublasHandle, n2, a, 1, (float*)&norm32); norm = norm32; } else { norm = cblas_snrm2(n2, a, 1); } break; case 8: if (dev) { cublasDnrm2(g_cublasHandle, n2, a, 1, (double*)&norm); } else { norm = cblas_dnrm2(n2, a, 1); } break; } return norm; }
float CUDABLAS1::nrm2( IndexType n, const float* x_d, IndexType incx, SyncToken* syncToken ) { LAMA_CHECK_CUDA_ACCESS cudaStream_t stream = NULL; if ( syncToken ) { CUDAStreamSyncToken* cudaStreamSyncToken = dynamic_cast<CUDAStreamSyncToken*>( syncToken ); LAMA_ASSERT_DEBUG( cudaStreamSyncToken, "no cuda stream sync token provided" ) stream = cudaStreamSyncToken->getCUDAStream(); } cublasSetKernelStream( stream ); LAMA_CHECK_CUBLAS_ERROR float res = cublasSnrm2( n, x_d, incx ); // No error check here possible as kernel is started asynchronously if ( !syncToken ) { cudaStreamSynchronize( 0 ); LAMA_CHECK_CUDA_ERROR }
static double cuda_norm(long size, const float* src1) { #if 1 // cublasSnrm2 produces NaN in some situations // e.g. nlinv -g -i8 utests/data/und2x2 o // git rev: ab28a9a953a80d243511640b23501f964a585349 // printf("cublas: %f\n", cublasSnrm2(size, src1, 1)); // printf("GPU norm (sdot: %f)\n", sqrt(cuda_sdot(size, src1, src1))); return sqrt(cuda_sdot(size, src1, src1)); #else return cublasSnrm2(size, src1, 1); #endif }
float cunorm2 (const Darray<float>& ary) { ary.deviceSet(); float ret; CUBLAS_SAFE_CALL( cublasSnrm2 (DeviceManager::handle, ary.size(), ary.dev_data, 1, &ret) ); return ret; }
CAMLprim value spoc_cublasSnrm2 (value n, value x, value incx, value dev){ CAMLparam4(n,x,incx, dev); CAMLlocal4(dev_vec_array, dev_vec, res, gi); CUdeviceptr d_A; int id; float result; GET_VEC(x, d_A); CUBLAS_GET_CONTEXT; result = cublasSnrm2(Int_val(n), (float*)d_A, Int_val(incx)); CUBLAS_CHECK_CALL(cublasGetError()); res = caml_copy_double((double)result); CUDA_RESTORE_CONTEXT; CAMLreturn((res)); }
float magma_snrm2( magma_int_t n, const float *dx, magma_int_t incx ) { return cublasSnrm2( n, dx, incx ); }
// // Overloaded function for dispatching to // * CUBLAS backend, and // * float value-type. // inline float nrm2( const int n, const float* x, const int incx ) { return cublasSnrm2( n, x, incx ); }