/* //////////////////////////////////////////////////////////////////////////// -- Testing cgesv */ int main(int argc, char **argv) { real_Double_t gflops, cpu_perf, cpu_time, gpu_perf, gpu_time; float error, Rnorm, Anorm, Xnorm, *work; magmaFloatComplex c_one = MAGMA_C_ONE; magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE; magmaFloatComplex *h_A, *h_LU, *h_B, *h_X; magma_int_t *ipiv; magma_int_t N, nrhs, lda, ldb, info, sizeA, sizeB; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t status = 0; /* Initialize */ magma_queue_t queue[2]; magma_device_t device[ MagmaMaxGPUs ]; int num = 0; magma_err_t err; magma_init(); magma_opts opts; parse_opts( argc, argv, &opts ); float tol = opts.tolerance * lapackf77_slamch("E"); nrhs = opts.nrhs; err = magma_get_devices( device, MagmaMaxGPUs, &num ); if ( err != 0 || num < 1 ) { fprintf( stderr, "magma_get_devices failed: %d\n", err ); exit(-1); } // Create two queues on device opts.device err = magma_queue_create( device[ opts.device ], &queue[0] ); if ( err != 0 ) { fprintf( stderr, "magma_queue_create failed: %d\n", err ); exit(-1); } err = magma_queue_create( device[ opts.device ], &queue[1] ); if ( err != 0 ) { fprintf( stderr, "magma_queue_create failed: %d\n", err ); exit(-1); } printf("ngpu %d\n", (int) opts.ngpu ); printf(" N NRHS CPU Gflop/s (sec) GPU GFlop/s (sec) ||B - AX|| / N*||A||*||X||\n"); printf("================================================================================\n"); for( int i = 0; i < opts.ntest; ++i ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[i]; lda = N; ldb = lda; gflops = ( FLOPS_CGETRF( N, N ) + FLOPS_CGETRS( N, nrhs ) ) / 1e9; TESTING_MALLOC_CPU( h_A, magmaFloatComplex, lda*N ); TESTING_MALLOC_CPU( h_LU, magmaFloatComplex, lda*N ); TESTING_MALLOC_CPU( h_B, magmaFloatComplex, ldb*nrhs ); TESTING_MALLOC_CPU( h_X, magmaFloatComplex, ldb*nrhs ); TESTING_MALLOC_CPU( work, float, N ); TESTING_MALLOC_CPU( ipiv, magma_int_t, N ); /* Initialize the matrices */ sizeA = lda*N; sizeB = ldb*nrhs; lapackf77_clarnv( &ione, ISEED, &sizeA, h_A ); lapackf77_clarnv( &ione, ISEED, &sizeB, h_B ); // copy A to LU and B to X; save A and B for residual lapackf77_clacpy( "F", &N, &N, h_A, &lda, h_LU, &lda ); lapackf77_clacpy( "F", &N, &nrhs, h_B, &ldb, h_X, &ldb ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ gpu_time = magma_wtime(); magma_cgesv( N, nrhs, h_LU, lda, ipiv, h_X, ldb, &info, queue ); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) printf("magma_cgesv returned error %d: %s.\n", (int) info, magma_strerror( info )); //===================================================================== // Residual //===================================================================== Anorm = lapackf77_clange("I", &N, &N, h_A, &lda, work); Xnorm = lapackf77_clange("I", &N, &nrhs, h_X, &ldb, work); blasf77_cgemm( MagmaNoTransStr, MagmaNoTransStr, &N, &nrhs, &N, &c_one, h_A, &lda, h_X, &ldb, &c_neg_one, h_B, &ldb); Rnorm = lapackf77_clange("I", &N, &nrhs, h_B, &ldb, work); error = Rnorm/(N*Anorm*Xnorm); status |= ! (error < tol); /* ==================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { cpu_time = magma_wtime(); lapackf77_cgesv( &N, &nrhs, h_A, &lda, ipiv, h_B, &ldb, &info ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) printf("lapackf77_cgesv returned error %d: %s.\n", (int) info, magma_strerror( info )); printf( "%5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e%s\n", (int) N, (int) nrhs, cpu_perf, cpu_time, gpu_perf, gpu_time, error, (error < tol ? "" : " failed")); } else { printf( "%5d %5d --- ( --- ) %7.2f (%7.2f) %8.2e%s\n", (int) N, (int) nrhs, gpu_perf, gpu_time, error, (error < tol ? "" : " failed")); } TESTING_FREE_CPU( h_A ); TESTING_FREE_CPU( h_LU ); TESTING_FREE_CPU( h_B ); TESTING_FREE_CPU( h_X ); TESTING_FREE_CPU( work ); TESTING_FREE_CPU( ipiv ); } if ( opts.niter > 1 ) { printf( "\n" ); } } magma_queue_destroy( queue[0] ); magma_queue_destroy( queue[1] ); magma_finalize(); return status; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing chesv_nopiv_gpu */ int main(int argc, char **argv) { TESTING_INIT(); real_Double_t gflops, cpu_perf, cpu_time, gpu_perf, gpu_time; float error, Rnorm, Anorm, Xnorm, *work; magmaFloatComplex c_one = MAGMA_C_ONE; magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE; magmaFloatComplex *h_A, *h_B, *h_X, temp, *hwork; magmaFloatComplex_ptr d_A, d_B; magma_int_t *ipiv; magma_int_t N, nrhs, lda, ldb, ldda, lddb, info, sizeA, sizeB, lwork; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t status = 0; magma_opts opts; parse_opts( argc, argv, &opts ); float tol = opts.tolerance * lapackf77_slamch("E"); nrhs = opts.nrhs; printf(" N NRHS CPU GFlop/s (sec) GPU GFlop/s (sec) ||B - AX|| / N*||A||*||X||\n"); printf("================================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[itest]; lda = N; ldb = lda; ldda = ((N+31)/32)*32; lddb = ldda; gflops = ( FLOPS_CGETRF( N, N ) + FLOPS_CGETRS( N, nrhs ) ) / 1e9; TESTING_MALLOC_CPU( h_A, magmaFloatComplex, lda*N ); TESTING_MALLOC_CPU( h_B, magmaFloatComplex, ldb*nrhs ); TESTING_MALLOC_CPU( h_X, magmaFloatComplex, ldb*nrhs ); TESTING_MALLOC_CPU( work, float, N ); TESTING_MALLOC_CPU( ipiv, magma_int_t, N ); TESTING_MALLOC_DEV( d_A, magmaFloatComplex, ldda*N ); TESTING_MALLOC_DEV( d_B, magmaFloatComplex, lddb*nrhs ); /* Initialize the matrices */ sizeA = lda*N; sizeB = ldb*nrhs; lapackf77_clarnv( &ione, ISEED, &sizeA, h_A ); lapackf77_clarnv( &ione, ISEED, &sizeB, h_B ); bool nopiv = true; if ( nopiv ) { magma_cmake_hpd( N, h_A, lda ); // SPD / HPD does not require pivoting } else { magma_cmake_hermitian( N, h_A, lda ); // symmetric/Hermitian generally requires pivoting } magma_csetmatrix( N, N, h_A, lda, d_A, ldda ); magma_csetmatrix( N, nrhs, h_B, ldb, d_B, lddb ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ gpu_time = magma_wtime(); magma_chesv_nopiv_gpu( opts.uplo, N, nrhs, d_A, ldda, d_B, lddb, &info ); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) printf("magma_cgesv_gpu returned error %d: %s.\n", (int) info, magma_strerror( info )); //===================================================================== // Residual //===================================================================== magma_cgetmatrix( N, nrhs, d_B, lddb, h_X, ldb ); Anorm = lapackf77_clange("I", &N, &N, h_A, &lda, work); Xnorm = lapackf77_clange("I", &N, &nrhs, h_X, &ldb, work); blasf77_cgemm( MagmaNoTransStr, MagmaNoTransStr, &N, &nrhs, &N, &c_one, h_A, &lda, h_X, &ldb, &c_neg_one, h_B, &ldb); Rnorm = lapackf77_clange("I", &N, &nrhs, h_B, &ldb, work); error = Rnorm/(N*Anorm*Xnorm); status += ! (error < tol); /* ==================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { lwork = -1; lapackf77_chesv( lapack_uplo_const(opts.uplo), &N,&nrhs, h_A, &lda, ipiv, h_B, &ldb, &temp, &lwork, &info ); lwork = (magma_int_t) MAGMA_C_REAL( temp ); TESTING_MALLOC_PIN( hwork, magmaFloatComplex, lwork ); cpu_time = magma_wtime(); lapackf77_chesv( lapack_uplo_const(opts.uplo), &N, &nrhs, h_A, &lda, ipiv, h_B, &ldb, hwork, &lwork, &info ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) printf("lapackf77_chesv returned error %d: %s.\n", (int) info, magma_strerror( info )); printf( "%5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n", (int) N, (int) nrhs, cpu_perf, cpu_time, gpu_perf, gpu_time, error, (error < tol ? "ok" : "failed")); TESTING_FREE_CPU( hwork ); } else { printf( "%5d %5d --- ( --- ) %7.2f (%7.2f) %8.2e %s\n", (int) N, (int) nrhs, gpu_perf, gpu_time, error, (error < tol ? "ok" : "failed")); } TESTING_FREE_CPU( h_A ); TESTING_FREE_CPU( h_B ); TESTING_FREE_CPU( h_X ); TESTING_FREE_CPU( work ); TESTING_FREE_CPU( ipiv ); TESTING_FREE_DEV( d_A ); TESTING_FREE_DEV( d_B ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } TESTING_FINALIZE(); return status; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing cgesv_rbt */ int main(int argc, char **argv) { TESTING_INIT(); real_Double_t gflops, cpu_perf, cpu_time, gpu_perf, gpu_time; float error, Rnorm, Anorm, Xnorm, *work; magmaFloatComplex c_one = MAGMA_C_ONE; magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE; magmaFloatComplex *h_A, *h_LU, *h_B, *h_X; magma_int_t *ipiv; magma_int_t N, nrhs, lda, ldb, info, sizeA, sizeB; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t status = 0; magma_opts opts; opts.parse_opts( argc, argv ); float tol = opts.tolerance * lapackf77_slamch("E"); nrhs = opts.nrhs; printf("%% ngpu %d\n", (int) opts.ngpu ); printf("%% N NRHS CPU Gflop/s (sec) GPU Gflop/s (sec) ||B - AX|| / N*||A||*||X||\n"); printf("%%===============================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[itest]; lda = N; ldb = lda; gflops = ( FLOPS_CGETRF( N, N ) + FLOPS_CGETRS( N, nrhs ) ) / 1e9; TESTING_MALLOC_CPU( h_A, magmaFloatComplex, lda*N ); TESTING_MALLOC_CPU( h_LU, magmaFloatComplex, lda*N ); TESTING_MALLOC_CPU( h_B, magmaFloatComplex, ldb*nrhs ); TESTING_MALLOC_CPU( h_X, magmaFloatComplex, ldb*nrhs ); TESTING_MALLOC_CPU( work, float, N ); TESTING_MALLOC_CPU( ipiv, magma_int_t, N ); /* Initialize the matrices */ sizeA = lda*N; sizeB = ldb*nrhs; lapackf77_clarnv( &ione, ISEED, &sizeA, h_A ); lapackf77_clarnv( &ione, ISEED, &sizeB, h_B ); // copy A to LU and B to X; save A and B for residual lapackf77_clacpy( "F", &N, &N, h_A, &lda, h_LU, &lda ); lapackf77_clacpy( "F", &N, &nrhs, h_B, &ldb, h_X, &ldb ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ gpu_time = magma_wtime(); magma_cgesv_rbt(MagmaTrue, N, nrhs, h_LU, lda, h_X, ldb, &info ); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) { printf("magma_cgesv_rbt returned error %d: %s.\n", (int) info, magma_strerror( info )); } for (int i = 0; i < N; i++) ipiv[i] = i+1; //===================================================================== // Residual //===================================================================== Anorm = lapackf77_clange("I", &N, &N, h_A, &lda, work); Xnorm = lapackf77_clange("I", &N, &nrhs, h_X, &ldb, work); blasf77_cgemm( MagmaNoTransStr, MagmaNoTransStr, &N, &nrhs, &N, &c_one, h_A, &lda, h_X, &ldb, &c_neg_one, h_B, &ldb); Rnorm = lapackf77_clange("I", &N, &nrhs, h_B, &ldb, work); error = Rnorm/(N*Anorm*Xnorm); status += ! (error < tol); /* ==================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { cpu_time = magma_wtime(); lapackf77_cgesv( &N, &nrhs, h_A, &lda, ipiv, h_B, &ldb, &info ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) { printf("lapackf77_cgesv returned error %d: %s.\n", (int) info, magma_strerror( info )); } printf( "%5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n", (int) N, (int) nrhs, cpu_perf, cpu_time, gpu_perf, gpu_time, error, (error < tol ? "ok" : "failed")); } else { printf( "%5d %5d --- ( --- ) %7.2f (%7.2f) %8.2e %s\n", (int) N, (int) nrhs, gpu_perf, gpu_time, error, (error < tol ? "ok" : "failed")); } TESTING_FREE_CPU( h_A ); TESTING_FREE_CPU( h_LU ); TESTING_FREE_CPU( h_B ); TESTING_FREE_CPU( h_X ); TESTING_FREE_CPU( work ); TESTING_FREE_CPU( ipiv ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } opts.cleanup(); TESTING_FINALIZE(); return status; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing cgesv_gpu */ int main(int argc, char **argv) { TESTING_INIT(); real_Double_t gflops, cpu_perf, cpu_time, gpu_perf, gpu_time; float error, Rnorm, Anorm, Xnorm, *work; magmaFloatComplex c_one = MAGMA_C_ONE; magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE; magmaFloatComplex *h_A, *h_B, *h_X; magmaFloatComplex *d_A, *d_B; magma_int_t *ipiv; magma_int_t N, nrhs, lda, ldb, ldda, lddb, info, sizeA, sizeB; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t status = 0; magma_opts opts; parse_opts( argc, argv, &opts ); float tol = opts.tolerance * lapackf77_slamch("E"); nrhs = opts.nrhs; printf(" N NRHS CPU GFlop/s (sec) GPU GFlop/s (sec) ||B - AX|| / N*||A||*||X||\n"); printf("================================================================================\n"); for( int i = 0; i < opts.ntest; ++i ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[i]; lda = N; ldb = lda; ldda = ((N+31)/32)*32; lddb = ldda; gflops = ( FLOPS_CGETRF( N, N ) + FLOPS_CGETRS( N, nrhs ) ) / 1e9; TESTING_MALLOC( h_A, magmaFloatComplex, lda*N ); TESTING_MALLOC( h_B, magmaFloatComplex, ldb*nrhs ); TESTING_MALLOC( h_X, magmaFloatComplex, ldb*nrhs ); TESTING_MALLOC( work, float, N ); TESTING_MALLOC( ipiv, magma_int_t, N ); TESTING_DEVALLOC( d_A, magmaFloatComplex, ldda*N ); TESTING_DEVALLOC( d_B, magmaFloatComplex, lddb*nrhs ); /* Initialize the matrices */ sizeA = lda*N; sizeB = ldb*nrhs; lapackf77_clarnv( &ione, ISEED, &sizeA, h_A ); lapackf77_clarnv( &ione, ISEED, &sizeB, h_B ); magma_csetmatrix( N, N, h_A, lda, d_A, ldda ); magma_csetmatrix( N, nrhs, h_B, ldb, d_B, lddb ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ gpu_time = magma_wtime(); magma_cgesv_gpu( N, nrhs, d_A, ldda, ipiv, d_B, lddb, &info ); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) printf("magma_cgesv_gpu returned error %d: %s.\n", (int) info, magma_strerror( info )); //===================================================================== // Residual //===================================================================== magma_cgetmatrix( N, nrhs, d_B, lddb, h_X, ldb ); Anorm = lapackf77_clange("I", &N, &N, h_A, &lda, work); Xnorm = lapackf77_clange("I", &N, &nrhs, h_X, &ldb, work); blasf77_cgemm( MagmaNoTransStr, MagmaNoTransStr, &N, &nrhs, &N, &c_one, h_A, &lda, h_X, &ldb, &c_neg_one, h_B, &ldb); Rnorm = lapackf77_clange("I", &N, &nrhs, h_B, &ldb, work); error = Rnorm/(N*Anorm*Xnorm); status |= ! (error < tol); /* ==================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { cpu_time = magma_wtime(); lapackf77_cgesv( &N, &nrhs, h_A, &lda, ipiv, h_B, &ldb, &info ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) printf("lapackf77_cgesv returned error %d: %s.\n", (int) info, magma_strerror( info )); printf( "%5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e%s\n", (int) N, (int) nrhs, cpu_perf, cpu_time, gpu_perf, gpu_time, error, (error < tol ? "" : " failed")); } else { printf( "%5d %5d --- ( --- ) %7.2f (%7.2f) %8.2e%s\n", (int) N, (int) nrhs, gpu_perf, gpu_time, error, (error < tol ? "" : " failed")); } TESTING_FREE( h_A ); TESTING_FREE( h_B ); TESTING_FREE( h_X ); TESTING_FREE( work ); TESTING_FREE( ipiv ); TESTING_DEVFREE( d_A ); TESTING_DEVFREE( d_B ); } if ( opts.niter > 1 ) { printf( "\n" ); } } TESTING_FINALIZE(); return status; }