/* //////////////////////////////////////////////////////////////////////////// -- Testing zhesv */ int main( int argc, char** argv) { TESTING_INIT(); magmaDoubleComplex *h_A, *h_B, *h_X, *work, temp; real_Double_t gflops, gpu_perf, gpu_time = 0.0, cpu_perf=0, cpu_time=0; double error, error_lapack = 0.0; magma_int_t *ipiv; magma_int_t N, n2, lda, ldb, sizeB, lwork, info; magma_int_t status = 0, ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; magma_opts opts; opts.parse_opts( argc, argv ); double tol = opts.tolerance * lapackf77_dlamch("E"); printf("%% M N CPU Gflop/s (sec) GPU Gflop/s (sec) |Ax-b|/(N*|A|*|x|)\n"); printf("%%========================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[itest]; ldb = N; lda = N; n2 = lda*N; sizeB = ldb*opts.nrhs; gflops = ( FLOPS_ZPOTRF( N ) + FLOPS_ZPOTRS( N, opts.nrhs ) ) / 1e9; TESTING_MALLOC_CPU( ipiv, magma_int_t, N ); TESTING_MALLOC_PIN( h_A, magmaDoubleComplex, n2 ); TESTING_MALLOC_PIN( h_B, magmaDoubleComplex, sizeB ); TESTING_MALLOC_PIN( h_X, magmaDoubleComplex, sizeB ); /* ===================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { lwork = -1; lapackf77_zhesv(lapack_uplo_const(opts.uplo), &N, &opts.nrhs, h_A, &lda, ipiv, h_X, &ldb, &temp, &lwork, &info); lwork = (int)MAGMA_Z_REAL(temp); TESTING_MALLOC_CPU( work, magmaDoubleComplex, lwork ); init_matrix( N, N, h_A, lda ); lapackf77_zlarnv( &ione, ISEED, &sizeB, h_B ); lapackf77_zlacpy( MagmaFullStr, &N, &opts.nrhs, h_B, &ldb, h_X, &ldb ); cpu_time = magma_wtime(); lapackf77_zhesv(lapack_uplo_const(opts.uplo), &N, &opts.nrhs, h_A, &lda, ipiv, h_X, &ldb, work, &lwork, &info); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) { printf("lapackf77_zhesv returned error %d: %s.\n", (int) info, magma_strerror( info )); } error_lapack = get_residual( opts.uplo, N, opts.nrhs, h_A, lda, ipiv, h_X, ldb, h_B, ldb ); TESTING_FREE_CPU( work ); } /* ==================================================================== Performs operation using MAGMA =================================================================== */ init_matrix( N, N, h_A, lda ); lapackf77_zlarnv( &ione, ISEED, &sizeB, h_B ); lapackf77_zlacpy( MagmaFullStr, &N, &opts.nrhs, h_B, &ldb, h_X, &ldb ); magma_setdevice(0); gpu_time = magma_wtime(); magma_zhesv( opts.uplo, N, opts.nrhs, h_A, lda, ipiv, h_X, ldb, &info); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) { printf("magma_zhesv returned error %d: %s.\n", (int) info, magma_strerror( info )); } /* ===================================================================== Check the factorization =================================================================== */ if ( opts.lapack ) { printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f)", (int) N, (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time ); } else { printf("%5d %5d --- ( --- ) %7.2f (%7.2f)", (int) N, (int) N, gpu_perf, gpu_time ); } if ( opts.check == 0 ) { printf(" --- \n"); } else { error = get_residual( opts.uplo, N, opts.nrhs, h_A, lda, ipiv, h_X, ldb, h_B, ldb ); printf(" %8.2e %s", error, (error < tol ? "ok" : "failed")); if (opts.lapack) printf(" (lapack rel.res. = %8.2e)", error_lapack); printf("\n"); status += ! (error < tol); } TESTING_FREE_CPU( ipiv ); TESTING_FREE_PIN( h_X ); TESTING_FREE_PIN( h_B ); TESTING_FREE_PIN( h_A ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } opts.cleanup(); TESTING_FINALIZE(); return status; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing zposv_gpu */ int main( int argc, char** argv) { TESTING_INIT(); real_Double_t gflops, cpu_perf, cpu_time, gpu_perf, gpu_time; double error, Rnorm, Anorm, Xnorm, *work; magmaDoubleComplex c_one = MAGMA_Z_ONE; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magmaDoubleComplex *h_A, *h_B, *h_X; magmaDoubleComplex *d_A, *d_B; magma_int_t N, lda, ldb, ldda, lddb, info, sizeA, sizeB; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t status = 0; magma_opts opts; parse_opts( argc, argv, &opts ); double tol = opts.tolerance * lapackf77_dlamch("E"); printf("uplo = %s\n", lapack_uplo_const(opts.uplo) ); printf(" N NRHS CPU Gflop/s (sec) GPU GFlop/s (sec) ||B - AX|| / N*||A||*||X||\n"); printf("================================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[itest]; lda = ldb = N; ldda = ((N+31)/32)*32; lddb = ldda; gflops = ( FLOPS_ZPOTRF( N ) + FLOPS_ZPOTRS( N, opts.nrhs ) ) / 1e9; TESTING_MALLOC_CPU( h_A, magmaDoubleComplex, lda*N ); TESTING_MALLOC_CPU( h_B, magmaDoubleComplex, ldb*opts.nrhs ); TESTING_MALLOC_CPU( h_X, magmaDoubleComplex, ldb*opts.nrhs ); TESTING_MALLOC_CPU( work, double, N ); TESTING_MALLOC_DEV( d_A, magmaDoubleComplex, ldda*N ); TESTING_MALLOC_DEV( d_B, magmaDoubleComplex, lddb*opts.nrhs ); /* ==================================================================== Initialize the matrix =================================================================== */ sizeA = lda*N; sizeB = ldb*opts.nrhs; lapackf77_zlarnv( &ione, ISEED, &sizeA, h_A ); lapackf77_zlarnv( &ione, ISEED, &sizeB, h_B ); magma_zmake_hpd( N, h_A, lda ); magma_zsetmatrix( N, N, h_A, N, d_A, ldda ); magma_zsetmatrix( N, opts.nrhs, h_B, N, d_B, lddb ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ gpu_time = magma_wtime(); magma_zposv_gpu( opts.uplo, N, opts.nrhs, d_A, ldda, d_B, lddb, &info ); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) printf("magma_zpotrf_gpu returned error %d: %s.\n", (int) info, magma_strerror( info )); /* ===================================================================== Residual =================================================================== */ magma_zgetmatrix( N, opts.nrhs, d_B, lddb, h_X, ldb ); Anorm = lapackf77_zlange("I", &N, &N, h_A, &lda, work); Xnorm = lapackf77_zlange("I", &N, &opts.nrhs, h_X, &ldb, work); blasf77_zgemm( MagmaNoTransStr, MagmaNoTransStr, &N, &opts.nrhs, &N, &c_one, h_A, &lda, h_X, &ldb, &c_neg_one, h_B, &ldb ); Rnorm = lapackf77_zlange("I", &N, &opts.nrhs, h_B, &ldb, work); error = Rnorm/(N*Anorm*Xnorm); status += ! (error < tol); /* ==================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { cpu_time = magma_wtime(); lapackf77_zposv( lapack_uplo_const(opts.uplo), &N, &opts.nrhs, h_A, &lda, h_B, &ldb, &info ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) printf("lapackf77_zposv returned error %d: %s.\n", (int) info, magma_strerror( info )); printf( "%5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n", (int) N, (int) opts.nrhs, cpu_perf, cpu_time, gpu_perf, gpu_time, error, (error < tol ? "ok" : "failed")); } else { printf( "%5d %5d --- ( --- ) %7.2f (%7.2f) %8.2e %s\n", (int) N, (int) opts.nrhs, gpu_perf, gpu_time, error, (error < tol ? "ok" : "failed")); } TESTING_FREE_CPU( h_A ); TESTING_FREE_CPU( h_B ); TESTING_FREE_CPU( h_X ); TESTING_FREE_CPU( work ); TESTING_FREE_DEV( d_A ); TESTING_FREE_DEV( d_B ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } TESTING_FINALIZE(); return status; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing zposv_batched */ int main(int argc, char **argv) { TESTING_INIT(); real_Double_t gflops, cpu_perf, cpu_time, gpu_perf, gpu_time; double err = 0.0, Rnorm, Anorm, Xnorm, *work; magmaDoubleComplex c_one = MAGMA_Z_ONE; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magmaDoubleComplex *h_A, *h_B, *h_X; magmaDoubleComplex_ptr d_A, d_B; magma_int_t *dinfo_array; magma_int_t N, nrhs, lda, ldb, ldda, lddb, info, sizeA, sizeB; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t status = 0; magma_int_t batchCount = 1; magmaDoubleComplex **dA_array = NULL; magmaDoubleComplex **dB_array = NULL; magma_queue_t queue = magma_stream; magma_opts opts; parse_opts( argc, argv, &opts ); double tol = opts.tolerance * lapackf77_dlamch("E"); nrhs = opts.nrhs; batchCount = opts.batchcount ; printf("uplo = %s\n", lapack_uplo_const(opts.uplo) ); printf("BatchCount N NRHS CPU GFlop/s (sec) GPU GFlop/s (sec) ||B - AX|| / N*||A||*||X||\n"); printf("================================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[itest]; lda = N; ldb = lda; ldda = ((N+31)/32)*32; lddb = ldda; gflops = ( FLOPS_ZPOTRF( N) + FLOPS_ZPOTRS( N, nrhs ) ) / 1e9 * batchCount; sizeA = lda*N*batchCount; sizeB = ldb*nrhs*batchCount; TESTING_MALLOC_CPU( h_A, magmaDoubleComplex, sizeA ); TESTING_MALLOC_CPU( h_B, magmaDoubleComplex, sizeB ); TESTING_MALLOC_CPU( h_X, magmaDoubleComplex, sizeB ); TESTING_MALLOC_CPU( work, double, N); TESTING_MALLOC_DEV( d_A, magmaDoubleComplex, ldda*N*batchCount ); TESTING_MALLOC_DEV( d_B, magmaDoubleComplex, lddb*nrhs*batchCount ); TESTING_MALLOC_DEV( dinfo_array, magma_int_t, batchCount ); magma_malloc((void**)&dA_array, batchCount * sizeof(*dA_array)); magma_malloc((void**)&dB_array, batchCount * sizeof(*dB_array)); /* Initialize the matrices */ lapackf77_zlarnv( &ione, ISEED, &sizeA, h_A ); lapackf77_zlarnv( &ione, ISEED, &sizeB, h_B ); for(int i=0; i<batchCount; i++) { magma_zmake_hpd( N, h_A + i * lda * N, lda );// need modification } magma_zsetmatrix( N, N*batchCount, h_A, lda, d_A, ldda ); magma_zsetmatrix( N, nrhs*batchCount, h_B, ldb, d_B, lddb ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ zset_pointer(dA_array, d_A, ldda, 0, 0, ldda*N, batchCount, queue); zset_pointer(dB_array, d_B, lddb, 0, 0, lddb*nrhs, batchCount, queue); gpu_time = magma_wtime(); info = magma_zposv_batched(opts.uplo, N, nrhs, dA_array, ldda, dB_array, lddb, dinfo_array, batchCount, queue); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; // check correctness of results throught "dinfo_magma" and correctness of argument throught "info" magma_int_t *cpu_info = (magma_int_t*) malloc(batchCount*sizeof(magma_int_t)); magma_getvector( batchCount, sizeof(magma_int_t), dinfo_array, 1, cpu_info, 1); for(int i=0; i<batchCount; i++) { if(cpu_info[i] != 0 ){ printf("magma_zposv_batched matrix %d returned internal error %d\n",i, (int)cpu_info[i] ); } } if (info != 0) printf("magma_zposv_batched returned argument error %d: %s.\n", (int) info, magma_strerror( info )); //===================================================================== // Residual //===================================================================== magma_zgetmatrix( N, nrhs*batchCount, d_B, lddb, h_X, ldb ); for(magma_int_t s=0; s<batchCount; s++) { Anorm = lapackf77_zlange("I", &N, &N, h_A + s * lda * N, &lda, work); Xnorm = lapackf77_zlange("I", &N, &nrhs, h_X + s * ldb * nrhs, &ldb, work); blasf77_zgemm( MagmaNoTransStr, MagmaNoTransStr, &N, &nrhs, &N, &c_one, h_A + s * lda * N, &lda, h_X + s * ldb * nrhs, &ldb, &c_neg_one, h_B + s * ldb * nrhs, &ldb); Rnorm = lapackf77_zlange("I", &N, &nrhs, h_B + s * ldb * nrhs, &ldb, work); double error = Rnorm/(N*Anorm*Xnorm); if ( isnan(error) || isinf(error) ) { err = error; break; } err = max(err, error); } status += ! (err < tol); /* ==================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { cpu_time = magma_wtime(); for(magma_int_t s=0; s<batchCount; s++) { lapackf77_zposv( lapack_uplo_const(opts.uplo), &N, &nrhs, h_A + s * lda * N, &lda, h_B + s * ldb * nrhs, &ldb, &info ); } cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) printf("lapackf77_zposv returned err %d: %s.\n", (int) info, magma_strerror( info )); printf( "%10d %5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n", (int)batchCount, (int) N, (int) nrhs, cpu_perf, cpu_time, gpu_perf, gpu_time, err, (err < tol ? "ok" : "failed")); } else { printf( "%10d %5d %5d --- ( --- ) %7.2f (%7.2f) %8.2e %s\n", (int)batchCount, (int) N, (int) nrhs, gpu_perf, gpu_time, err, (err < tol ? "ok" : "failed")); } TESTING_FREE_CPU( h_A ); TESTING_FREE_CPU( h_B ); TESTING_FREE_CPU( h_X ); TESTING_FREE_CPU( work ); TESTING_FREE_DEV( d_A ); TESTING_FREE_DEV( d_B ); TESTING_FREE_DEV( dinfo_array ); magma_free(dA_array); magma_free(dB_array); free(cpu_info); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } TESTING_FINALIZE(); return status; }
int main(int argc, char **argv) { TESTING_INIT(); real_Double_t gflopsF, gflopsS, gpu_perf, gpu_time /*cpu_perf, cpu_time*/; real_Double_t gpu_perfdf, gpu_perfds; real_Double_t gpu_perfsf, gpu_perfss; double error, Rnorm, Anorm; magmaDoubleComplex c_one = MAGMA_Z_ONE; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magmaDoubleComplex *h_A, *h_B, *h_X; magmaDoubleComplex *d_A, *d_B, *d_X, *d_workd; magmaFloatComplex *d_As, *d_Bs, *d_works; double *h_workd; magma_int_t lda, ldb, ldx; magma_int_t N, nrhs, posv_iter, info, size; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; printf("Epsilon(double): %8.6e\n" "Epsilon(single): %8.6e\n\n", lapackf77_dlamch("Epsilon"), lapackf77_slamch("Epsilon") ); magma_int_t status = 0; magma_opts opts; parse_opts( argc, argv, &opts ); double tol = opts.tolerance * lapackf77_dlamch("E"); nrhs = opts.nrhs; printf("using: uplo = %s\n", lapack_uplo_const(opts.uplo)); printf(" N NRHS DP-Factor DP-Solve SP-Factor SP-Solve MP-Solve Iter |b-Ax|/|A|\n"); printf("=====================================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[itest]; ldb = ldx = lda = N; gflopsF = FLOPS_ZPOTRF( N ) / 1e9; gflopsS = gflopsF + FLOPS_ZPOTRS( N, nrhs ) / 1e9; TESTING_MALLOC_CPU( h_A, magmaDoubleComplex, lda*N ); TESTING_MALLOC_CPU( h_B, magmaDoubleComplex, ldb*nrhs ); TESTING_MALLOC_CPU( h_X, magmaDoubleComplex, ldx*nrhs ); TESTING_MALLOC_CPU( h_workd, double, N ); TESTING_MALLOC_DEV( d_A, magmaDoubleComplex, lda*N ); TESTING_MALLOC_DEV( d_B, magmaDoubleComplex, ldb*nrhs ); TESTING_MALLOC_DEV( d_X, magmaDoubleComplex, ldx*nrhs ); TESTING_MALLOC_DEV( d_works, magmaFloatComplex, lda*(N+nrhs) ); TESTING_MALLOC_DEV( d_workd, magmaDoubleComplex, N*nrhs ); /* Initialize the matrix */ size = lda * N ; lapackf77_zlarnv( &ione, ISEED, &size, h_A ); magma_zmake_hpd( N, h_A, lda ); size = ldb * nrhs ; lapackf77_zlarnv( &ione, ISEED, &size, h_B ); magma_zsetmatrix( N, N, h_A, lda, d_A, lda ); magma_zsetmatrix( N, nrhs, h_B, ldb, d_B, ldb ); //===================================================================== // Mixed Precision Iterative Refinement - GPU //===================================================================== gpu_time = magma_wtime(); magma_zcposv_gpu(opts.uplo, N, nrhs, d_A, lda, d_B, ldb, d_X, ldx, d_workd, d_works, &posv_iter, &info); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflopsS / gpu_time; if (info != 0) printf("magma_zcposv returned error %d: %s.\n", (int) info, magma_strerror( info )); //===================================================================== // Error Computation //===================================================================== magma_zgetmatrix( N, nrhs, d_X, ldx, h_X, ldx ) ; Anorm = lapackf77_zlanhe( "I", lapack_uplo_const(opts.uplo), &N, h_A, &N, h_workd); blasf77_zhemm( "L", lapack_uplo_const(opts.uplo), &N, &nrhs, &c_one, h_A, &lda, h_X, &ldx, &c_neg_one, h_B, &ldb); Rnorm = lapackf77_zlange( "I", &N, &nrhs, h_B, &ldb, h_workd); error = Rnorm / Anorm; //===================================================================== // Double Precision Factor //===================================================================== magma_zsetmatrix( N, N, h_A, lda, d_A, lda ); gpu_time = magma_wtime(); magma_zpotrf_gpu(opts.uplo, N, d_A, lda, &info); gpu_time = magma_wtime() - gpu_time; gpu_perfdf = gflopsF / gpu_time; if (info != 0) printf("magma_zpotrf returned error %d: %s.\n", (int) info, magma_strerror( info )); //===================================================================== // Double Precision Solve //===================================================================== magma_zsetmatrix( N, N, h_A, lda, d_A, lda ); magma_zsetmatrix( N, nrhs, h_B, ldb, d_B, ldb ); gpu_time = magma_wtime(); magma_zpotrf_gpu(opts.uplo, N, d_A, lda, &info); magma_zpotrs_gpu(opts.uplo, N, nrhs, d_A, lda, d_B, ldb, &info); gpu_time = magma_wtime() - gpu_time; gpu_perfds = gflopsS / gpu_time; if (info != 0) printf("magma_zpotrs returned error %d: %s.\n", (int) info, magma_strerror( info )); //===================================================================== // Single Precision Factor //===================================================================== d_As = d_works; d_Bs = d_works + lda*N; magma_zsetmatrix( N, N, h_A, lda, d_A, lda ); magma_zsetmatrix( N, nrhs, h_B, ldb, d_B, ldb ); magmablas_zlag2c( N, N, d_A, lda, d_As, N, &info ); magmablas_zlag2c( N, nrhs, d_B, ldb, d_Bs, N, &info ); gpu_time = magma_wtime(); magma_cpotrf_gpu(opts.uplo, N, d_As, N, &info); gpu_time = magma_wtime() - gpu_time; gpu_perfsf = gflopsF / gpu_time; if (info != 0) printf("magma_cpotrf returned error %d: %s.\n", (int) info, magma_strerror( info )); //===================================================================== // Single Precision Solve //===================================================================== magmablas_zlag2c(N, N, d_A, lda, d_As, N, &info ); magmablas_zlag2c(N, nrhs, d_B, ldb, d_Bs, N, &info ); gpu_time = magma_wtime(); magma_cpotrf_gpu(opts.uplo, N, d_As, lda, &info); magma_cpotrs_gpu(opts.uplo, N, nrhs, d_As, N, d_Bs, N, &info); gpu_time = magma_wtime() - gpu_time; gpu_perfss = gflopsS / gpu_time; if (info != 0) printf("magma_cpotrs returned error %d: %s.\n", (int) info, magma_strerror( info )); printf("%5d %5d %7.2f %7.2f %7.2f %7.2f %7.2f %4d %8.2e %s\n", (int) N, (int) nrhs, gpu_perfdf, gpu_perfds, gpu_perfsf, gpu_perfss, gpu_perf, (int) posv_iter, error, (error < tol ? "ok" : "failed")); status += ! (error < tol); TESTING_FREE_CPU( h_A ); TESTING_FREE_CPU( h_B ); TESTING_FREE_CPU( h_X ); TESTING_FREE_CPU( h_workd ); TESTING_FREE_DEV( d_A ); TESTING_FREE_DEV( d_B ); TESTING_FREE_DEV( d_X ); TESTING_FREE_DEV( d_works ); TESTING_FREE_DEV( d_workd ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } TESTING_FINALIZE(); return status; }