void magmaf_dpotri_gpu( magma_uplo_t *uplo, magma_int_t *n, devptr_t *dA, magma_int_t *ldda, magma_int_t *info ) { magma_dpotri_gpu( *uplo, *n, magma_ddevptr(dA), *ldda, info ); }
double *cholesky_gpu(double *ml, int m) { magma_int_t mm = m*m; magma_int_t info; double *a; double *d_a ; magma_err_t err; err = magma_dmalloc_cpu ( &a , mm ); err = magma_dmalloc ( &d_a , mm ); magma_dsetmatrix ( m, m, ml, m, d_a , m ); magma_dpotrf_gpu('L',m,d_a,m,&info); magma_dpotri_gpu('L',m,d_a,m,&info); magma_dgetmatrix ( m, m, d_a , m, a, m ); magma_free (d_a ); return a; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing dpotrf */ int main( int argc, char** argv) { TESTING_INIT(); real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time; double *h_A, *h_R; double *d_A; double c_neg_one = MAGMA_D_NEG_ONE; magma_int_t N, n2, lda, ldda, info; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; double work[1], error; magma_int_t status = 0; magma_opts opts; parse_opts( argc, argv, &opts ); opts.lapack |= opts.check; // check (-c) implies lapack (-l) double tol = opts.tolerance * lapackf77_dlamch("E"); printf(" N CPU GFlop/s (sec) GPU GFlop/s (sec) ||R||_F / ||A||_F\n"); printf("=================================================================\n"); for( int i = 0; i < opts.ntest; ++i ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[i]; lda = N; n2 = lda*N; ldda = ((N+31)/32)*32; gflops = FLOPS_DPOTRI( N ) / 1e9; TESTING_MALLOC_CPU( h_A, double, n2 ); TESTING_MALLOC_PIN( h_R, double, n2 ); TESTING_MALLOC_DEV( d_A, double, ldda*N ); /* Initialize the matrix */ lapackf77_dlarnv( &ione, ISEED, &n2, h_A ); magma_dmake_hpd( N, h_A, lda ); lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ /* factorize matrix */ magma_dsetmatrix( N, N, h_A, lda, d_A, ldda ); magma_dpotrf_gpu( opts.uplo, N, d_A, ldda, &info ); // check for exact singularity //magma_dgetmatrix( N, N, d_A, ldda, h_R, lda ); //h_R[ 10 + 10*lda ] = MAGMA_D_MAKE( 0.0, 0.0 ); //magma_dsetmatrix( N, N, h_R, lda, d_A, ldda ); gpu_time = magma_wtime(); magma_dpotri_gpu( opts.uplo, N, d_A, ldda, &info ); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) printf("magma_dpotri_gpu returned error %d: %s.\n", (int) info, magma_strerror( info )); /* ===================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { lapackf77_dpotrf( &opts.uplo, &N, h_A, &lda, &info ); cpu_time = magma_wtime(); lapackf77_dpotri( &opts.uplo, &N, h_A, &lda, &info ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) printf("lapackf77_dpotri returned error %d: %s.\n", (int) info, magma_strerror( info )); /* ===================================================================== Check the result compared to LAPACK =================================================================== */ magma_dgetmatrix( N, N, d_A, ldda, h_R, lda ); error = lapackf77_dlange("f", &N, &N, h_A, &lda, work); blasf77_daxpy(&n2, &c_neg_one, h_A, &ione, h_R, &ione); error = lapackf77_dlange("f", &N, &N, h_R, &lda, work) / error; printf("%5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e%s\n", (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time, error, (error < tol ? "" : " failed") ); status |= ! (error < tol); } else { printf("%5d --- ( --- ) %7.2f (%7.2f) ---\n", (int) N, gpu_perf, gpu_time ); } TESTING_FREE_CPU( h_A ); TESTING_FREE_PIN( h_R ); TESTING_FREE_DEV( d_A ); } if ( opts.niter > 1 ) { printf( "\n" ); } } TESTING_FINALIZE(); return status; }
SEXP magma_dpoMatrix_solve(SEXP x) { #ifdef HIPLAR_WITH_MAGMA SEXP Chol = magma_dpoMatrix_chol(x); SEXP val = PROTECT(NEW_OBJECT(MAKE_CLASS("dpoMatrix"))); int *dims = INTEGER(GET_SLOT(x, Matrix_DimSym)), info; SET_SLOT(val, Matrix_factorSym, allocVector(VECSXP, 0)); slot_dup(val, Chol, Matrix_uploSym); slot_dup(val, Chol, Matrix_xSym); slot_dup(val, Chol, Matrix_DimSym); SET_SLOT(val, Matrix_DimNamesSym, duplicate(GET_SLOT(x, Matrix_DimNamesSym))); double *A = REAL(GET_SLOT(val, Matrix_xSym)); int N = *dims; int lda = N; const char *uplo = uplo_P(val); if(GPUFlag == 0) { F77_CALL(dpotri)(uplo_P(val), dims, A, dims, &info); } else if(GPUFlag == 1 && Interface == 0) { #ifdef HIPLAR_DBG R_ShowMessage("DBG: Solving using magma_dpotri"); #endif magma_dpotri(uplo[0], N, A, lda, &info); } else if(GPUFlag == 1 && Interface == 1){ double *d_A; cublasStatus retStatus; cublasAlloc( N * lda , sizeof(double), (void**)&d_A); #ifdef HIPLAR_DBG R_ShowMessage("DBG: Solving using magma_dpotri_gpu"); #endif /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Memory Allocation")); /********************************************/ cublasSetVector( N * lda, sizeof(double), A, 1, d_A, 1); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Data Transfer to Device")); /********************************************/ magma_dpotri_gpu(uplo[0], N, d_A, lda, &info); cublasGetVector(N * lda, sizeof(double), d_A, 1, val, 1); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Data Transfer from Device")); /********************************************/ cublasFree(d_A); } else error(_("MAGMA/LAPACK/Interface Flag not defined correctly")); if (info) { if(info > 0) error(_("the leading minor of order %d is not positive definite"), info); else /* should never happen! */ error(_("Lapack routine %s returned error code %d"), "dpotrf", info); } UNPROTECT(1); return val; #endif return R_NilValue; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing dpotrf */ int main( int argc, char** argv) { TESTING_CUDA_INIT(); magma_timestr_t start, end; double flops, gpu_perf, cpu_perf; double *h_A, *h_R; double *d_A; magma_int_t N = 0, n2, lda, ldda; magma_int_t size[10] = {1024,2048,3072,4032,5184,6016,7040,8064,9088,10112}; magma_int_t i, info; const char *uplo = MagmaUpperStr; double c_neg_one = MAGMA_D_NEG_ONE; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; double work[1], matnorm; if (argc != 1){ for(i = 1; i<argc; i++){ if (strcmp("-N", argv[i])==0) N = atoi(argv[++i]); } if (N>0) size[0] = size[9] = N; else exit(1); } else { printf("\nUsage: \n"); printf(" testing_dpotri_gpu -N %d\n\n", 1024); } /* Allocate host memory for the matrix */ n2 = size[9] * size[9]; ldda = ((size[9]+31)/32) * 32; TESTING_MALLOC( h_A, double, n2); TESTING_HOSTALLOC( h_R, double, n2); TESTING_DEVALLOC( d_A, double, ldda*size[9] ); printf(" N CPU GFlop/s GPU GFlop/s ||R||_F / ||A||_F\n"); printf("========================================================\n"); for(i=0; i<10; i++){ N = size[i]; lda = N; n2 = lda*N; flops = FLOPS_DPOTRI( (double)N ) / 1000000; ldda = ((N+31)/32)*32; /* Initialize the matrix */ lapackf77_dlarnv( &ione, ISEED, &n2, h_A ); /* Symmetrize and increase the diagonal */ { magma_int_t i, j; for(i=0; i<N; i++) { MAGMA_D_SET2REAL( h_A[i*lda+i], ( MAGMA_D_REAL(h_A[i*lda+i]) + 1.*N ) ); for(j=0; j<i; j++) h_A[i*lda+j] = (h_A[j*lda+i]); } } lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ //cublasSetMatrix( N, N, sizeof(double), h_A, lda, d_A, ldda); //magma_dpotrf_gpu(uplo[0], N, d_A, ldda, &info); /* factorize matrix */ magma_dsetmatrix( N, N, h_A, lda, d_A, ldda ); magma_dpotrf_gpu(uplo[0], N, d_A, ldda, &info); // check for exact singularity //magma_dgetmatrix( N, N, d_A, ldda, h_R, lda ); //h_R[ 10 + 10*lda ] = MAGMA_D_MAKE( 0.0, 0.0 ); //magma_dsetmatrix( N, N, h_R, lda, d_A, ldda ); start = get_current_time(); magma_dpotri_gpu(uplo[0], N, d_A, ldda, &info); end = get_current_time(); if (info != 0) printf("magma_dpotri_gpu returned error %d\n", (int) info); gpu_perf = flops / GetTimerValue(start, end); /* ===================================================================== Performs operation using LAPACK =================================================================== */ lapackf77_dpotrf(uplo, &N, h_A, &lda, &info); start = get_current_time(); lapackf77_dpotri(uplo, &N, h_A, &lda, &info); end = get_current_time(); if (info != 0) printf("lapackf77_dpotri returned error %d\n", (int) info); cpu_perf = flops / GetTimerValue(start, end); /* ===================================================================== Check the result compared to LAPACK =================================================================== */ magma_dgetmatrix( N, N, d_A, ldda, h_R, lda ); matnorm = lapackf77_dlange("f", &N, &N, h_A, &lda, work); blasf77_daxpy(&n2, &c_neg_one, h_A, &ione, h_R, &ione); printf("%5d %6.2f %6.2f %e\n", (int) size[i], cpu_perf, gpu_perf, lapackf77_dlange("f", &N, &N, h_R, &lda, work) / matnorm); if (argc != 1) break; } /* Memory clean up */ TESTING_FREE( h_A ); TESTING_HOSTFREE( h_R ); TESTING_DEVFREE( d_A ); /* Shutdown */ TESTING_CUDA_FINALIZE(); }