Beispiel #1
0
/* Computes the Frobenius norm of a matrix. */
double nrm2(Mat mA) {
  const int n2 = MatN2(mA);
  const void* a = MatElems(mA);
  const bool dev = MatDev(mA);
  double norm;

  switch (MatElemSize(mA)) {
  case 4:
    if (dev) {
      float norm32;
      cublasSnrm2(g_cublasHandle, n2, a, 1, (float*)&norm32);
      norm = norm32;
    } else {
      norm = cblas_snrm2(n2, a, 1);
    }
    break;

  case 8:
    if (dev) {
      cublasDnrm2(g_cublasHandle, n2, a, 1, (double*)&norm);
    } else {
      norm = cblas_dnrm2(n2, a, 1);
    }
    break;
  }

  return norm;
}
Beispiel #2
0
float CUDABLAS1::nrm2( IndexType n, const float* x_d, IndexType incx, SyncToken* syncToken )
{
    LAMA_CHECK_CUDA_ACCESS

    cudaStream_t stream = NULL;

    if ( syncToken )
    {
        CUDAStreamSyncToken* cudaStreamSyncToken = dynamic_cast<CUDAStreamSyncToken*>( syncToken );
        LAMA_ASSERT_DEBUG( cudaStreamSyncToken, "no cuda stream sync token provided" )
        stream = cudaStreamSyncToken->getCUDAStream();
    }

    cublasSetKernelStream( stream );
    LAMA_CHECK_CUBLAS_ERROR

    float res = cublasSnrm2( n, x_d, incx );

    // No error check here possible as kernel is started asynchronously

    if ( !syncToken )
    {
        cudaStreamSynchronize( 0 );
        LAMA_CHECK_CUDA_ERROR
    }
Beispiel #3
0
static double cuda_norm(long size, const float* src1)
{
#if 1
	// cublasSnrm2 produces NaN in some situations
	// e.g. nlinv -g -i8 utests/data/und2x2 o 
	// git rev: ab28a9a953a80d243511640b23501f964a585349
//	printf("cublas: %f\n", cublasSnrm2(size, src1, 1));
//	printf("GPU norm (sdot: %f)\n", sqrt(cuda_sdot(size, src1, src1)));
	return sqrt(cuda_sdot(size, src1, src1));
#else
	return cublasSnrm2(size, src1, 1);
#endif
}
Beispiel #4
0
float cunorm2 (const Darray<float>& ary)
{
	ary.deviceSet();
	float ret;
	CUBLAS_SAFE_CALL(
			cublasSnrm2 (DeviceManager::handle,
						 ary.size(),
						 ary.dev_data,
						 1,
						 &ret)
	);
	return ret;
}
Beispiel #5
0
CAMLprim value spoc_cublasSnrm2 (value n, value x, value incx, value dev){
	CAMLparam4(n,x,incx, dev);
	CAMLlocal4(dev_vec_array, dev_vec, res, gi);
	CUdeviceptr d_A;
	int id;
	float result;
	GET_VEC(x, d_A);
	CUBLAS_GET_CONTEXT;
	result = cublasSnrm2(Int_val(n), (float*)d_A, Int_val(incx));
	CUBLAS_CHECK_CALL(cublasGetError());
	res = caml_copy_double((double)result);
	CUDA_RESTORE_CONTEXT;
	CAMLreturn((res));
}
Beispiel #6
0
float magma_snrm2(
    magma_int_t n,
    const float *dx, magma_int_t incx )
{
    return cublasSnrm2( n, dx, incx );
}
//
// Overloaded function for dispatching to
// * CUBLAS backend, and
// * float value-type.
//
inline float nrm2( const int n, const float* x, const int incx ) {
    return cublasSnrm2( n, x, incx );
}