BOOST_FORCEINLINE result_type operator()(A0& a0, A1 const a1) const { result_type that; nt2_la_int n = nt2::width(a0); nt2_la_int lda = a0.leading_size(); magma_zpotrf(a1,n,(cuDoubleComplex*)a0.raw(),lda,&that); return that; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing zpotrf */ int main( int argc, char** argv) { TESTING_INIT(); real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time; magmaDoubleComplex *h_A, *h_R; magma_int_t N, n2, lda, info; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; double work[1], error; magma_int_t status = 0; magma_opts opts; parse_opts( argc, argv, &opts ); opts.lapack |= opts.check; // check (-c) implies lapack (-l) double tol = opts.tolerance * lapackf77_dlamch("E"); printf("ngpu %d, uplo %c\n", (int) opts.ngpu, opts.uplo ); printf(" N CPU GFlop/s (sec) GPU GFlop/s (sec) ||R_magma - R_lapack||_F / ||R_lapack||_F\n"); printf("========================================================\n"); for( int i = 0; i < opts.ntest; ++i ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[i]; lda = N; n2 = lda*N; gflops = FLOPS_ZPOTRF( N ) / 1e9; TESTING_MALLOC( h_A, magmaDoubleComplex, n2 ); TESTING_HOSTALLOC( h_R, magmaDoubleComplex, n2 ); /* Initialize the matrix */ lapackf77_zlarnv( &ione, ISEED, &n2, h_A ); magma_zmake_hpd( N, h_A, lda ); lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ gpu_time = magma_wtime(); magma_zpotrf( opts.uplo, N, h_R, lda, &info ); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) printf("magma_zpotrf returned error %d: %s.\n", (int) info, magma_strerror( info )); if ( opts.lapack ) { /* ===================================================================== Performs operation using LAPACK =================================================================== */ cpu_time = magma_wtime(); lapackf77_zpotrf( &opts.uplo, &N, h_A, &lda, &info ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) printf("lapackf77_zpotrf returned error %d: %s.\n", (int) info, magma_strerror( info )); /* ===================================================================== Check the result compared to LAPACK =================================================================== */ error = lapackf77_zlange("f", &N, &N, h_A, &lda, work); blasf77_zaxpy(&n2, &c_neg_one, h_A, &ione, h_R, &ione); error = lapackf77_zlange("f", &N, &N, h_R, &lda, work) / error; printf("%5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e%s\n", (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time, error, (error < tol ? "" : " failed") ); status |= ! (error < tol); } else { printf("%5d --- ( --- ) %7.2f (%7.2f) --- \n", (int) N, gpu_perf, gpu_time ); } TESTING_FREE( h_A ); TESTING_HOSTFREE( h_R ); } } TESTING_FINALIZE(); return status; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing zpotri */ int main( int argc, char** argv) { TESTING_INIT(); real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time; magmaDoubleComplex *h_A, *h_R; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magma_int_t N, n2, lda, info; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; double Anorm, error, work[1]; magma_int_t status = 0; magma_opts opts; opts.parse_opts( argc, argv ); opts.lapack |= opts.check; // check (-c) implies lapack (-l) double tol = opts.tolerance * lapackf77_dlamch("E"); printf("%% uplo = %s\n", lapack_uplo_const(opts.uplo) ); printf("%% N CPU Gflop/s (sec) GPU Gflop/s (sec) ||R||_F / ||A||_F\n"); printf("%%================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[itest]; lda = N; n2 = lda*N; gflops = FLOPS_ZPOTRI( N ) / 1e9; TESTING_MALLOC_CPU( h_A, magmaDoubleComplex, n2 ); TESTING_MALLOC_PIN( h_R, magmaDoubleComplex, n2 ); /* ==================================================================== Initialize the matrix =================================================================== */ lapackf77_zlarnv( &ione, ISEED, &n2, h_A ); magma_zmake_hpd( N, h_A, lda ); lapackf77_zlacpy( MagmaFullStr, &N, &N, h_A, &lda, h_R, &lda ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ if ( opts.warmup ) { magma_zpotrf( opts.uplo, N, h_R, lda, &info ); magma_zpotri( opts.uplo, N, h_R, lda, &info ); lapackf77_zlacpy( MagmaFullStr, &N, &N, h_A, &lda, h_R, &lda ); } /* factorize matrix */ magma_zpotrf( opts.uplo, N, h_R, lda, &info ); // check for exact singularity //h_R[ 10 + 10*lda ] = MAGMA_Z_MAKE( 0.0, 0.0 ); gpu_time = magma_wtime(); magma_zpotri( opts.uplo, N, h_R, lda, &info ); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) { printf("magma_zpotri returned error %d: %s.\n", (int) info, magma_strerror( info )); } /* ===================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { lapackf77_zpotrf( lapack_uplo_const(opts.uplo), &N, h_A, &lda, &info ); cpu_time = magma_wtime(); lapackf77_zpotri( lapack_uplo_const(opts.uplo), &N, h_A, &lda, &info ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) { printf("lapackf77_zpotri returned error %d: %s.\n", (int) info, magma_strerror( info )); } /* ===================================================================== Check the result compared to LAPACK =================================================================== */ blasf77_zaxpy(&n2, &c_neg_one, h_A, &ione, h_R, &ione); Anorm = lapackf77_zlange("f", &N, &N, h_A, &lda, work); error = lapackf77_zlange("f", &N, &N, h_R, &lda, work) / Anorm; printf("%5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n", (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time, error, (error < tol ? "ok" : "failed") ); status += ! (error < tol); } else { printf("%5d --- ( --- ) %7.2f (%7.2f) ---\n", (int) N, gpu_perf, gpu_time ); } TESTING_FREE_CPU( h_A ); TESTING_FREE_PIN( h_R ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } opts.cleanup(); TESTING_FINALIZE(); return status; }
void MAGMA_ZPOTRF( char *uplo, magma_int_t *n, double2 *A, magma_int_t *lda, magma_int_t *info) { magma_zpotrf( uplo[0], *n, A, *lda, info); }
void MAGMAF_ZPOTRF( char *uplo, magma_int_t *n, cuDoubleComplex *A, magma_int_t *lda, magma_int_t *info) { magma_zpotrf( uplo[0], *n, A, *lda, info); }
/* //////////////////////////////////////////////////////////////////////////// -- Testing zpotri */ int main( int argc, char** argv) { TESTING_CUDA_INIT(); magma_timestr_t start, end; double flops, gpu_perf, cpu_perf; cuDoubleComplex *h_A, *h_R; magma_int_t N=0, n2, lda; magma_int_t size[10] = {1024,2048,3072,4032,5184,6016,7040,8064,9088,10112}; magma_int_t i, info; const char *uplo = MagmaLowerStr; cuDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; double work[1], matnorm; if (argc != 1){ for(i = 1; i<argc; i++){ if (strcmp("-N", argv[i])==0) N = atoi(argv[++i]); } if (N>0) size[0] = size[9] = N; else exit(1); } else { printf("\nUsage: \n"); printf(" testing_zpotri -N %d\n\n", 1024); } /* Allocate host memory for the matrix */ n2 = size[9] * size[9]; TESTING_MALLOC( h_A, cuDoubleComplex, n2); TESTING_HOSTALLOC( h_R, cuDoubleComplex, n2); printf(" N CPU GFlop/s GPU GFlop/s ||R||_F / ||A||_F\n"); printf("========================================================\n"); for(i=0; i<10; i++){ N = size[i]; lda = N; n2 = lda*N; flops = FLOPS_ZPOTRI( (double)N ) / 1000000; /* ==================================================================== Initialize the matrix =================================================================== */ lapackf77_zlarnv( &ione, ISEED, &n2, h_A ); /* Symmetrize and increase the diagonal */ { magma_int_t i, j; for(i=0; i<N; i++) { MAGMA_Z_SET2REAL( h_A[i*lda+i], ( MAGMA_Z_REAL(h_A[i*lda+i]) + 1.*N ) ); for(j=0; j<i; j++) h_A[i*lda+j] = cuConj(h_A[j*lda+i]); } } lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ /* warm-up */ magma_zpotrf(uplo[0], N, h_R, lda, &info); magma_zpotri(uplo[0], N, h_R, lda, &info); lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda ); /* factorize matrix */ magma_zpotrf(uplo[0], N, h_R, lda, &info); // check for exact singularity //h_R[ 10 + 10*lda ] = MAGMA_Z_MAKE( 0.0, 0.0 ); start = get_current_time(); magma_zpotri(uplo[0], N, h_R, lda, &info); end = get_current_time(); if (info != 0) printf("magma_zpotri returned error %d\n", (int) info); gpu_perf = flops / GetTimerValue(start, end); /* ===================================================================== Performs operation using LAPACK =================================================================== */ lapackf77_zpotrf(uplo, &N, h_A, &lda, &info); start = get_current_time(); lapackf77_zpotri(uplo, &N, h_A, &lda, &info); end = get_current_time(); if (info != 0) printf("lapackf77_zpotri returned error %d\n", (int) info); cpu_perf = flops / GetTimerValue(start, end); /* ===================================================================== Check the result compared to LAPACK =================================================================== */ matnorm = lapackf77_zlange("f", &N, &N, h_A, &N, work); blasf77_zaxpy(&n2, &c_neg_one, h_A, &ione, h_R, &ione); printf("%5d %6.2f %6.2f %e\n", (int) size[i], cpu_perf, gpu_perf, lapackf77_zlange("f", &N, &N, h_R, &N, work) / matnorm ); if (argc != 1) break; } /* Memory clean up */ TESTING_FREE( h_A ); TESTING_HOSTFREE( h_R ); TESTING_CUDA_FINALIZE(); }