void magmaf_dgesv( magma_int_t *n, magma_int_t *nrhs, double *A, magma_int_t *lda, magma_int_t *ipiv, double *B, magma_int_t *ldb, magma_int_t *info ) { magma_dgesv( *n, *nrhs, A, *lda, ipiv, B, *ldb, info ); }
/* //////////////////////////////////////////////////////////////////////////// -- Testing dgesv */ int main(int argc, char **argv) { TESTING_INIT(); real_Double_t gflops, cpu_perf, cpu_time, gpu_perf, gpu_time; double error, Rnorm, Anorm, Xnorm, *work; double c_one = MAGMA_D_ONE; double c_neg_one = MAGMA_D_NEG_ONE; double *h_A, *h_LU, *h_B, *h_X; magma_int_t *ipiv; magma_int_t N, nrhs, lda, ldb, info, sizeA, sizeB; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t status = 0; magma_opts opts; parse_opts( argc, argv, &opts ); double tol = opts.tolerance * lapackf77_dlamch("E"); nrhs = opts.nrhs; printf("ngpu %d\n", (int) opts.ngpu ); printf(" N NRHS CPU Gflop/s (sec) GPU GFlop/s (sec) ||B - AX|| / N*||A||*||X||\n"); printf("================================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[itest]; lda = N; ldb = lda; gflops = ( FLOPS_DGETRF( N, N ) + FLOPS_DGETRS( N, nrhs ) ) / 1e9; TESTING_MALLOC_CPU( h_A, double, lda*N ); TESTING_MALLOC_CPU( h_LU, double, lda*N ); TESTING_MALLOC_CPU( h_B, double, ldb*nrhs ); TESTING_MALLOC_CPU( h_X, double, ldb*nrhs ); TESTING_MALLOC_CPU( work, double, N ); TESTING_MALLOC_CPU( ipiv, magma_int_t, N ); /* Initialize the matrices */ sizeA = lda*N; sizeB = ldb*nrhs; lapackf77_dlarnv( &ione, ISEED, &sizeA, h_A ); lapackf77_dlarnv( &ione, ISEED, &sizeB, h_B ); // copy A to LU and B to X; save A and B for residual lapackf77_dlacpy( "F", &N, &N, h_A, &lda, h_LU, &lda ); lapackf77_dlacpy( "F", &N, &nrhs, h_B, &ldb, h_X, &ldb ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ gpu_time = magma_wtime(); magma_dgesv( N, nrhs, h_LU, lda, ipiv, h_X, ldb, &info ); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) printf("magma_dgesv returned error %d: %s.\n", (int) info, magma_strerror( info )); //===================================================================== // Residual //===================================================================== Anorm = lapackf77_dlange("I", &N, &N, h_A, &lda, work); Xnorm = lapackf77_dlange("I", &N, &nrhs, h_X, &ldb, work); blasf77_dgemm( MagmaNoTransStr, MagmaNoTransStr, &N, &nrhs, &N, &c_one, h_A, &lda, h_X, &ldb, &c_neg_one, h_B, &ldb); Rnorm = lapackf77_dlange("I", &N, &nrhs, h_B, &ldb, work); error = Rnorm/(N*Anorm*Xnorm); status += ! (error < tol); /* ==================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { cpu_time = magma_wtime(); lapackf77_dgesv( &N, &nrhs, h_A, &lda, ipiv, h_B, &ldb, &info ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) printf("lapackf77_dgesv returned error %d: %s.\n", (int) info, magma_strerror( info )); printf( "%5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n", (int) N, (int) nrhs, cpu_perf, cpu_time, gpu_perf, gpu_time, error, (error < tol ? "ok" : "failed")); } else { printf( "%5d %5d --- ( --- ) %7.2f (%7.2f) %8.2e %s\n", (int) N, (int) nrhs, gpu_perf, gpu_time, error, (error < tol ? "ok" : "failed")); } TESTING_FREE_CPU( h_A ); TESTING_FREE_CPU( h_LU ); TESTING_FREE_CPU( h_B ); TESTING_FREE_CPU( h_X ); TESTING_FREE_CPU( work ); TESTING_FREE_CPU( ipiv ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } TESTING_FINALIZE(); return status; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing dgesv */ int main(int argc , char **argv) { TESTING_CUDA_INIT(); real_Double_t gflops, gpu_perf, gpu_time; double Rnorm, Anorm, Xnorm, *work; double c_one = MAGMA_D_ONE; double c_neg_one = MAGMA_D_NEG_ONE; double *h_A, *h_LU, *h_B, *h_X; magma_int_t *ipiv; magma_int_t lda, ldb; magma_int_t i, info, szeA, szeB; magma_int_t ione = 1; magma_int_t N = 0; magma_int_t NRHS = 100; magma_int_t ISEED[4] = {0,0,0,1}; const int MAXTESTS = 10; magma_int_t size[MAXTESTS] = { 1024, 2048, 3072, 4032, 5184, 6016, 7040, 8064, 9088, 10112 }; // process command line arguments printf( "\nUsage: %s -N <matrix size> -R <right hand sides>\n", argv[0] ); printf( " -N can be repeated up to %d times\n\n", MAXTESTS ); int ntest = 0; for( int i = 1; i < argc; ++i ) { if ( strcmp("-N", argv[i]) == 0 && i+1 < argc ) { magma_assert( ntest < MAXTESTS, "error: -N repeated more than maximum %d tests\n", MAXTESTS ); size[ntest] = atoi( argv[++i] ); magma_assert( size[ntest] > 0, "error: -N %s is invalid; must be > 0.\n", argv[i] ); N = max( N, size[ntest] ); ntest++; } else if ( strcmp("-R", argv[i]) == 0 && i+1 < argc ) { NRHS = atoi( argv[++i] ); magma_assert( NRHS > 0, "error: -R %is is invalid; must be > 0.\n", argv[i] ); } else { printf( "invalid argument: %s\n", argv[i] ); exit(1); } } if ( ntest == 0 ) { ntest = MAXTESTS; N = size[ntest-1]; } // allocate maximum amount of memory required lda = ldb = N; TESTING_MALLOC( h_A, double, lda*N ); TESTING_MALLOC( h_LU, double, lda*N ); TESTING_MALLOC( h_B, double, ldb*NRHS ); TESTING_MALLOC( h_X, double, ldb*NRHS ); TESTING_MALLOC( work, double, N ); TESTING_MALLOC( ipiv, magma_int_t, N ); printf(" N NRHS GPU GFlop/s (sec) ||B - AX|| / ||A||*||X||\n"); printf("===========================================================\n"); for( i = 0; i < ntest; ++i ) { N = size[i]; lda = ldb = N; gflops = ( FLOPS_DGETRF( (double)N, (double)N ) + FLOPS_DGETRS( (double)N, (double)NRHS ) ) / 1e9; /* Initialize the matrices */ szeA = lda*N; szeB = ldb*NRHS; lapackf77_dlarnv( &ione, ISEED, &szeA, h_A ); lapackf77_dlarnv( &ione, ISEED, &szeB, h_B ); // copy A to LU and B to X; save A and B for residual lapackf77_dlacpy( "F", &N, &N, h_A, &lda, h_LU, &lda ); lapackf77_dlacpy( "F", &N, &NRHS, h_B, &ldb, h_X, &ldb ); //===================================================================== // Solve Ax = b through an LU factorization //===================================================================== gpu_time = magma_wtime(); magma_dgesv( N, NRHS, h_LU, lda, ipiv, h_X, ldb, &info ); gpu_time = magma_wtime() - gpu_time; if (info != 0) printf("magma_dgesv returned error %d.\n", (int) info); gpu_perf = gflops / gpu_time; //===================================================================== // Residual //===================================================================== Anorm = lapackf77_dlange("I", &N, &N, h_A, &lda, work); Xnorm = lapackf77_dlange("I", &N, &NRHS, h_X, &ldb, work); blasf77_dgemm( MagmaNoTransStr, MagmaNoTransStr, &N, &NRHS, &N, &c_one, h_A, &lda, h_X, &ldb, &c_neg_one, h_B, &ldb); Rnorm = lapackf77_dlange("I", &N, &NRHS, h_B, &ldb, work); printf( "%5d %5d %7.2f (%7.2f) %8.2e\n", (int) N, (int) NRHS, gpu_perf, gpu_time, Rnorm/(Anorm*Xnorm) ); } /* Memory clean up */ TESTING_FREE( h_A ); TESTING_FREE( h_LU ); TESTING_FREE( h_B ); TESTING_FREE( h_X ); TESTING_FREE( work ); TESTING_FREE( ipiv ); /* Shutdown */ TESTING_CUDA_FINALIZE(); }