/* //////////////////////////////////////////////////////////////////////////// -- Testing cgelqf_gpu */ int main( int argc, char** argv) { TESTING_INIT(); real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time; float error, work[1]; magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE; magmaFloatComplex *h_A, *h_R, *tau, *h_work, tmp[1]; magmaFloatComplex *d_A; magma_int_t M, N, n2, lda, lwork, info, min_mn, nb; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; magma_opts opts; parse_opts( argc, argv, &opts ); printf(" M N CPU GFlop/s (sec) GPU GFlop/s (sec) ||R||_F / ||A||_F\n"); printf("=======================================================================\n"); for( int i = 0; i < opts.ntest; ++i ) { for( int iter = 0; iter < opts.niter; ++iter ) { M = opts.msize[i]; N = opts.nsize[i]; min_mn = min(M, N); lda = M; n2 = lda*N; nb = magma_get_cgeqrf_nb(M); gflops = FLOPS_CGELQF( M, N ) / 1e9; // query for workspace size lwork = -1; lapackf77_cgelqf(&M, &N, h_A, &M, tau, tmp, &lwork, &info); lwork = (magma_int_t)MAGMA_C_REAL( tmp[0] ); lwork = max( lwork, M*nb ); TESTING_MALLOC( tau, magmaFloatComplex, min_mn ); TESTING_MALLOC( h_A, magmaFloatComplex, n2 ); TESTING_HOSTALLOC( h_R, magmaFloatComplex, n2 ); TESTING_DEVALLOC( d_A, magmaFloatComplex, lda*N ); TESTING_HOSTALLOC( h_work, magmaFloatComplex, lwork ); /* Initialize the matrix */ lapackf77_clarnv( &ione, ISEED, &n2, h_A ); lapackf77_clacpy( MagmaUpperLowerStr, &M, &N, h_A, &lda, h_R, &lda ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ magma_csetmatrix( M, N, h_R, lda, d_A, lda ); gpu_time = magma_wtime(); magma_cgelqf_gpu( M, N, d_A, lda, tau, h_work, lwork, &info); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) printf("magma_cgelqf_gpu returned error %d: %s.\n", (int) info, magma_strerror( info )); /* ===================================================================== Performs operation using LAPACK =================================================================== */ cpu_time = magma_wtime(); lapackf77_cgelqf(&M, &N, h_A, &lda, tau, h_work, &lwork, &info); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) printf("lapack_cgelqf returned error %d: %s.\n", (int) info, magma_strerror( info )); /* ===================================================================== Check the result compared to LAPACK =================================================================== */ magma_cgetmatrix( M, N, d_A, lda, h_R, lda ); error = lapackf77_clange("f", &M, &N, h_A, &lda, work); blasf77_caxpy(&n2, &c_neg_one, h_A, &ione, h_R, &ione); error = lapackf77_clange("f", &M, &N, h_R, &lda, work) / error; printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e\n", (int) M, (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time, error ); TESTING_FREE( tau ); TESTING_FREE( h_A ); TESTING_HOSTFREE( h_R ); TESTING_HOSTFREE( h_work ); } if ( opts.niter > 1 ) { printf( "\n" ); } } TESTING_FINALIZE(); return 0; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing cgelqf_gpu */ int main( int argc, char** argv) { TESTING_INIT(); const float d_neg_one = MAGMA_D_NEG_ONE; const float d_one = MAGMA_D_ONE; const magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE; const magmaFloatComplex c_one = MAGMA_C_ONE; const magmaFloatComplex c_zero = MAGMA_C_ZERO; const magma_int_t ione = 1; real_Double_t gflops, gpu_perf, gpu_time, cpu_perf=0, cpu_time=0; float Anorm, error=0, error2=0; magmaFloatComplex *h_A, *h_R, *tau, *h_work, tmp[1]; magmaFloatComplex_ptr d_A; magma_int_t M, N, n2, lda, ldda, lwork, info, min_mn, nb; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t status = 0; magma_opts opts; opts.parse_opts( argc, argv ); float tol = opts.tolerance * lapackf77_slamch("E"); printf("%% M N CPU Gflop/s (sec) GPU Gflop/s (sec) |L - A*Q^H| |I - Q*Q^H|\n"); printf("%%==============================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { M = opts.msize[itest]; N = opts.nsize[itest]; min_mn = min(M, N); lda = M; ldda = magma_roundup( M, opts.align ); // multiple of 32 by default n2 = lda*N; nb = magma_get_cgeqrf_nb( M, N ); gflops = FLOPS_CGELQF( M, N ) / 1e9; // query for workspace size lwork = -1; lapackf77_cgelqf(&M, &N, NULL, &M, NULL, tmp, &lwork, &info); lwork = (magma_int_t)MAGMA_C_REAL( tmp[0] ); lwork = max( lwork, M*nb ); TESTING_MALLOC_CPU( tau, magmaFloatComplex, min_mn ); TESTING_MALLOC_CPU( h_A, magmaFloatComplex, n2 ); TESTING_MALLOC_PIN( h_R, magmaFloatComplex, n2 ); TESTING_MALLOC_PIN( h_work, magmaFloatComplex, lwork ); TESTING_MALLOC_DEV( d_A, magmaFloatComplex, ldda*N ); /* Initialize the matrix */ lapackf77_clarnv( &ione, ISEED, &n2, h_A ); lapackf77_clacpy( MagmaFullStr, &M, &N, h_A, &lda, h_R, &lda ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ magma_csetmatrix( M, N, h_R, lda, d_A, ldda, opts.queue ); gpu_time = magma_wtime(); magma_cgelqf_gpu( M, N, d_A, ldda, tau, h_work, lwork, &info); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) { printf("magma_cgelqf_gpu returned error %d: %s.\n", (int) info, magma_strerror( info )); } /* ===================================================================== Check the result, following zlqt01 except using the reduced Q. This works for any M,N (square, tall, wide). =================================================================== */ if ( opts.check ) { magma_cgetmatrix( M, N, d_A, ldda, h_R, lda, opts.queue ); magma_int_t ldq = min_mn; magma_int_t ldl = M; magmaFloatComplex *Q, *L; float *work; TESTING_MALLOC_CPU( Q, magmaFloatComplex, ldq*N ); // K by N TESTING_MALLOC_CPU( L, magmaFloatComplex, ldl*min_mn ); // M by K TESTING_MALLOC_CPU( work, float, min_mn ); // generate K by N matrix Q, where K = min(M,N) lapackf77_clacpy( "Upper", &min_mn, &N, h_R, &lda, Q, &ldq ); lapackf77_cunglq( &min_mn, &N, &min_mn, Q, &ldq, tau, h_work, &lwork, &info ); assert( info == 0 ); // copy N by K matrix L lapackf77_claset( "Upper", &M, &min_mn, &c_zero, &c_zero, L, &ldl ); lapackf77_clacpy( "Lower", &M, &min_mn, h_R, &lda, L, &ldl ); // error = || L - A*Q^H || / (N * ||A||) blasf77_cgemm( "NoTrans", "Conj", &M, &min_mn, &N, &c_neg_one, h_A, &lda, Q, &ldq, &c_one, L, &ldl ); Anorm = lapackf77_clange( "1", &M, &N, h_A, &lda, work ); error = lapackf77_clange( "1", &M, &min_mn, L, &ldl, work ); if ( N > 0 && Anorm > 0 ) error /= (N*Anorm); // set L = I (K by K), then L = I - Q*Q^H // error = || I - Q*Q^H || / N lapackf77_claset( "Upper", &min_mn, &min_mn, &c_zero, &c_one, L, &ldl ); blasf77_cherk( "Upper", "NoTrans", &min_mn, &N, &d_neg_one, Q, &ldq, &d_one, L, &ldl ); error2 = safe_lapackf77_clanhe( "1", "Upper", &min_mn, L, &ldl, work ); if ( N > 0 ) error2 /= N; TESTING_FREE_CPU( Q ); Q = NULL; TESTING_FREE_CPU( L ); L = NULL; TESTING_FREE_CPU( work ); work = NULL; } /* ===================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { cpu_time = magma_wtime(); lapackf77_cgelqf( &M, &N, h_A, &lda, tau, h_work, &lwork, &info ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) { printf("lapack_cgelqf returned error %d: %s.\n", (int) info, magma_strerror( info )); } } /* ===================================================================== Print performance and error. =================================================================== */ printf("%5d %5d ", (int) M, (int) N ); if ( opts.lapack ) { printf( "%7.2f (%7.2f)", cpu_perf, cpu_time ); } else { printf(" --- ( --- )" ); } printf( " %7.2f (%7.2f) ", gpu_perf, gpu_time ); if ( opts.check ) { bool okay = (error < tol && error2 < tol); printf( "error %.4g, error2 %.4g, tol %.4g, okay %d\n", error, error2, tol, okay ); status += ! okay; printf( "%11.2e %11.2e %s\n", error, error2, (okay ? "ok" : "failed") ); } else { printf( " ---\n" ); } TESTING_FREE_CPU( tau ); TESTING_FREE_CPU( h_A ); TESTING_FREE_PIN( h_R ); TESTING_FREE_PIN( h_work ); TESTING_FREE_DEV( d_A ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } opts.cleanup(); TESTING_FINALIZE(); return status; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing cgelqf */ int main( int argc, char** argv) { TESTING_CUDA_INIT(); magma_timestr_t start, end; float flops, gpu_perf, cpu_perf; float matnorm, work[1]; cuFloatComplex c_neg_one = MAGMA_C_NEG_ONE; cuFloatComplex *h_A, *h_R, *tau, *h_work, tmp[1]; /* Matrix size */ magma_int_t M = 0, N = 0, n2, lda, lwork; magma_int_t size[10] = {1024,2048,3072,4032,5184,6016,7040,8064,9088,10112}; magma_int_t i, info, min_mn, nb; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; if (argc != 1){ for(i = 1; i<argc; i++){ if (strcmp("-N", argv[i])==0) N = atoi(argv[++i]); else if (strcmp("-M", argv[i])==0) M = atoi(argv[++i]); } if ( M == 0 ) { M = N; } if ( N == 0 ) { N = M; } if (N>0 && M>0) printf(" testing_cgelqf -M %d -N %d\n\n", (int) M, (int) N); else { printf("\nUsage: \n"); printf(" testing_cgelqf -M %d -N %d\n\n", (int) M, (int) N); exit(1); } } else { printf("\nUsage: \n"); printf(" testing_cgelqf -M %d -N %d\n\n", 1024, 1024); M = N = size[9]; } n2 = M * N; min_mn = min(M, N); nb = magma_get_cgeqrf_nb(M); TESTING_MALLOC( tau, cuFloatComplex, min_mn ); TESTING_MALLOC( h_A, cuFloatComplex, n2 ); TESTING_HOSTALLOC( h_R, cuFloatComplex, n2 ); lwork = -1; lapackf77_cgelqf(&M, &N, h_A, &M, tau, tmp, &lwork, &info); lwork = (magma_int_t)MAGMA_C_REAL( tmp[0] ); lwork = max( lwork, M*nb ); TESTING_HOSTALLOC( h_work, cuFloatComplex, lwork ); printf(" M N CPU GFlop/s GPU GFlop/s ||R||_F / ||A||_F\n"); printf("==========================================================\n"); for(i=0; i<10; i++){ if (argc == 1){ M = N = size[i]; } min_mn= min(M, N); lda = M; n2 = lda*N; flops = FLOPS( (float)M, (float)N ) / 1000000; /* Initialize the matrix */ lapackf77_clarnv( &ione, ISEED, &n2, h_A ); lapackf77_clacpy( MagmaUpperLowerStr, &M, &N, h_A, &lda, h_R, &lda ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ start = get_current_time(); magma_cgelqf( M, N, h_R, lda, tau, h_work, lwork, &info); end = get_current_time(); if (info < 0) printf("Argument %d of magma_cgelqf had an illegal value.\n", (int) -info); gpu_perf = flops / GetTimerValue(start, end); /* ===================================================================== Performs operation using LAPACK =================================================================== */ start = get_current_time(); lapackf77_cgelqf(&M, &N, h_A, &lda, tau, h_work, &lwork, &info); end = get_current_time(); if (info < 0) printf("Argument %d of lapack_cgelqf had an illegal value.\n", (int) -info); cpu_perf = flops / GetTimerValue(start, end); /* ===================================================================== Check the result compared to LAPACK =================================================================== */ matnorm = lapackf77_clange("f", &M, &N, h_A, &lda, work); blasf77_caxpy(&n2, &c_neg_one, h_A, &ione, h_R, &ione); printf("%5d %5d %6.2f %6.2f %e\n", (int) M, (int) N, cpu_perf, gpu_perf, lapackf77_clange("f", &M, &N, h_R, &lda, work) / matnorm); if (argc != 1) break; } /* Memory clean up */ TESTING_FREE( tau ); TESTING_FREE( h_A ); TESTING_HOSTFREE( h_R ); TESTING_HOSTFREE( h_work ); /* Shutdown */ TESTING_CUDA_FINALIZE(); }