/* Computes the Frobenius norm of a matrix. */ double nrm2(Mat mA) { const int n2 = MatN2(mA); const void* a = MatElems(mA); const bool dev = MatDev(mA); double norm; switch (MatElemSize(mA)) { case 4: if (dev) { float norm32; cublasSnrm2(g_cublasHandle, n2, a, 1, (float*)&norm32); norm = norm32; } else { norm = cblas_snrm2(n2, a, 1); } break; case 8: if (dev) { cublasDnrm2(g_cublasHandle, n2, a, 1, (double*)&norm); } else { norm = cblas_dnrm2(n2, a, 1); } break; } return norm; }
void gpu_cublas1(double *A, double *B, double *C, double *D, double *r, double *nrmC, int N, int N2) { #pragma acc data present(A, B, C, D) { #pragma acc host_data use_device(A, B, C, D) { cublasHandle_t handle; cublasCreate(&handle); const double alpha = 1.0; const double beta = 0.0; cublasDgemm(handle, CUBLAS_OP_T, CUBLAS_OP_T, N, N, N, &alpha, A, N, B, N, &beta, C, N); printf(" gpu gemm success \n"); cublasDdot(handle, N2, C, 1, B, 1, r); printf(" gpu dot success \n"); *r = -1.0 * *r; cublasDaxpy(handle, N2, r, B, 1, C, 1); printf(" gpu axpy success \n"); cublasDnrm2(handle, N2, C, 1, nrmC); printf(" gpu nrm2 success \n"); cublasDcopy(handle, N2, C, 1, D, 1); printf(" gpu copy success \n"); *nrmC = 1.0 / *nrmC; cublasDscal(handle, N2, nrmC, D, 1); printf(" gpu scal success \n"); cublasDestroy(handle); printf(" gpu destroy success \n"); } } }
static double e_cuda(double* target, double* output, size_t count, double* dedy) { /* this is to sort of keep a uniform API w. the matrix */ assert(target == dedy); cublasDaxpy(count, -1, output, 1, dedy, 1); return 0.5 * cublasDnrm2(count, dedy, 1); }
double cunorm2 (const Darray<double>& ary) { ary.deviceSet(); double ret; CUBLAS_SAFE_CALL( cublasDnrm2 (DeviceManager::handle, ary.size(), ary.dev_data, 1, &ret) ); return ret; }
void norm_gpu(double *x, double *norm, int N) { #pragma acc data present(x) { #pragma acc host_data use_device(x) { cublasHandle_t h; cublasCreate(&h); cublasDnrm2(h, N, x, 1, norm); cublasDestroy(h); } } }
SEXP d_nrm2(SEXP rx, SEXP rincx) { int n, incx = asInteger(rincx); double * x; unpackVector(rx, &n, &x); SEXP out; PROTECT(out = allocVector(REALSXP, 1)); REAL(out)[0] = cublasDnrm2(n, x, incx); checkCublasError("d_nrm2"); UNPROTECT(1); return out; }
double cublas_gemm_norm(const double *A, const double *B, double *C, int N) { double *norm; norm = (double *) malloc(1*sizeof(double)); #pragma acc data present(A, B, C) copyout(norm[0]) { #pragma acc host_data use_device(A, B, C) { cublasHandle_t h; cublasCreate(&h); const double alpha = 1.0; const double beta = 0.0; cublasDgemm(h, CUBLAS_OP_T, CUBLAS_OP_T, N, N, N, &alpha, A, N, B, N, &beta, C, N); cublasDnrm2(h, N*N, C, 1, norm); cublasDestroy(h); } } return *norm; }
double nrm2(const Vector<double> &x) { return cublasDnrm2(x.getSize(), x, x.inc()); }
double magma_dnrm2( magma_int_t n, const double *dx, magma_int_t incx ) { return cublasDnrm2( n, dx, incx ); }
// // Overloaded function for dispatching to // * CUBLAS backend, and // * double value-type. // inline double nrm2( const int n, const double* x, const int incx ) { return cublasDnrm2( n, x, incx ); }