/* //////////////////////////////////////////////////////////////////////////// -- Testing chegvd */ int main( int argc, char** argv) { TESTING_INIT_MGPU(); magmaFloatComplex *h_A, *h_Ainit, *h_B, *h_Binit, *h_work; #if defined(PRECISION_z) || defined(PRECISION_c) float *rwork; #endif float *w1, *w2, result; magma_int_t *iwork; real_Double_t mgpu_time, gpu_time, cpu_time; /* Matrix size */ magma_int_t N, n2, nb; magma_int_t info; magma_int_t ione = 1; magmaFloatComplex c_zero = MAGMA_C_ZERO; magmaFloatComplex c_one = MAGMA_C_ONE; magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t status = 0; magma_opts opts; parse_opts( argc, argv, &opts ); float tol = opts.tolerance * lapackf77_slamch("E"); float tolulp = opts.tolerance * lapackf77_slamch("P"); if ( opts.check && opts.jobz == MagmaNoVec ) { fprintf( stderr, "checking results requires vectors; setting jobz=V (option -JV)\n" ); opts.jobz = MagmaVec; } printf("using: ngpu = %d, itype = %d, jobz = %s, uplo = %s, check = %d\n", (int) opts.ngpu, (int) opts.itype, lapack_vec_const(opts.jobz), lapack_uplo_const(opts.uplo), (int) opts.check); printf(" N CPU Time (sec) GPU Time (sec) MGPU Time (sec)\n"); printf("=========================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { // TODO define lda N = opts.nsize[itest]; n2 = N*N; nb = magma_get_chetrd_nb(N); #if defined(PRECISION_z) || defined(PRECISION_c) magma_int_t lwork = max( N + N*nb, 2*N + N*N ); magma_int_t lrwork = 1 + 5*N +2*N*N; #else magma_int_t lwork = max( 2*N + N*nb, 1 + 6*N + 2*N*N ); #endif magma_int_t liwork = 3 + 5*N; TESTING_MALLOC_PIN( h_A, magmaFloatComplex, n2 ); TESTING_MALLOC_PIN( h_B, magmaFloatComplex, n2 ); TESTING_MALLOC_PIN( h_work, magmaFloatComplex, lwork ); #if defined(PRECISION_z) || defined(PRECISION_c) TESTING_MALLOC_PIN( rwork, float, lrwork ); #endif TESTING_MALLOC_CPU( w1, float, N ); TESTING_MALLOC_CPU( w2, float, N ); TESTING_MALLOC_CPU( iwork, magma_int_t, liwork ); /* Initialize the matrix */ lapackf77_clarnv( &ione, ISEED, &n2, h_A ); lapackf77_clarnv( &ione, ISEED, &n2, h_B ); magma_cmake_hpd( N, h_B, N ); magma_cmake_hermitian( N, h_A, N ); if ( opts.warmup || opts.check ) { TESTING_MALLOC_CPU( h_Ainit, magmaFloatComplex, n2 ); TESTING_MALLOC_CPU( h_Binit, magmaFloatComplex, n2 ); lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_Ainit, &N ); lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_Binit, &N ); } if (opts.warmup) { // ================================================================== // Warmup using MAGMA. // ================================================================== magma_chegvd_m( opts.ngpu, opts.itype, opts.jobz, opts.uplo, N, h_A, N, h_B, N, w1, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info); lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_Ainit, &N, h_A, &N ); lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_Binit, &N, h_B, &N ); } // =================================================================== // Performs operation using MAGMA // =================================================================== mgpu_time = magma_wtime(); magma_chegvd_m( opts.ngpu, opts.itype, opts.jobz, opts.uplo, N, h_A, N, h_B, N, w1, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info); mgpu_time = magma_wtime() - mgpu_time; if (info != 0) printf("magma_chegvd_m returned error %d: %s.\n", (int) info, magma_strerror( info )); if ( opts.check ) { /* ===================================================================== Check the results following the LAPACK's [zc]hegvd routine. A x = lambda B x is solved and the following 3 tests computed: (1) | A Z - B Z D | / ( |A||Z| N ) (itype = 1) | A B Z - Z D | / ( |A||Z| N ) (itype = 2) | B A Z - Z D | / ( |A||Z| N ) (itype = 3) =================================================================== */ #if defined(PRECISION_d) || defined(PRECISION_s) float *rwork = h_work + N*N; #endif result = 1.; result /= lapackf77_clanhe("1", lapack_uplo_const(opts.uplo), &N, h_Ainit, &N, rwork); result /= lapackf77_clange("1", &N, &N, h_A, &N, rwork); if (opts.itype == 1) { blasf77_chemm("L", lapack_uplo_const(opts.uplo), &N, &N, &c_one, h_Ainit, &N, h_A, &N, &c_zero, h_work, &N); for(int i=0; i<N; ++i) blasf77_csscal(&N, &w1[i], &h_A[i*N], &ione); blasf77_chemm("L", lapack_uplo_const(opts.uplo), &N, &N, &c_neg_one, h_Binit, &N, h_A, &N, &c_one, h_work, &N); result *= lapackf77_clange("1", &N, &N, h_work, &N, rwork)/N; } else if (opts.itype == 2) { blasf77_chemm("L", lapack_uplo_const(opts.uplo), &N, &N, &c_one, h_Binit, &N, h_A, &N, &c_zero, h_work, &N); for(int i=0; i<N; ++i) blasf77_csscal(&N, &w1[i], &h_A[i*N], &ione); blasf77_chemm("L", lapack_uplo_const(opts.uplo), &N, &N, &c_one, h_Ainit, &N, h_work, &N, &c_neg_one, h_A, &N); result *= lapackf77_clange("1", &N, &N, h_A, &N, rwork)/N; } else if (opts.itype == 3) { blasf77_chemm("L", lapack_uplo_const(opts.uplo), &N, &N, &c_one, h_Ainit, &N, h_A, &N, &c_zero, h_work, &N); for(int i=0; i<N; ++i) blasf77_csscal(&N, &w1[i], &h_A[i*N], &ione); blasf77_chemm("L", lapack_uplo_const(opts.uplo), &N, &N, &c_one, h_Binit, &N, h_work, &N, &c_neg_one, h_A, &N); result *= lapackf77_clange("1", &N, &N, h_A, &N, rwork)/N; } lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_Ainit, &N, h_A, &N ); lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_Binit, &N, h_B, &N ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ gpu_time = magma_wtime(); magma_chegvd(opts.itype, opts.jobz, opts.uplo, N, h_A, N, h_B, N, w2, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info); gpu_time = magma_wtime() - gpu_time; if (info != 0) printf("magma_chegvd returned error %d: %s.\n", (int) info, magma_strerror( info )); /* ===================================================================== Performs operation using LAPACK =================================================================== */ cpu_time = magma_wtime(); lapackf77_chegvd(&opts.itype, lapack_vec_const(opts.jobz), lapack_uplo_const(opts.uplo), &N, h_Ainit, &N, h_Binit, &N, w2, h_work, &lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, &lrwork, #endif iwork, &liwork, &info); cpu_time = magma_wtime() - cpu_time; if (info != 0) printf("lapackf77_chegvd returned error %d: %s.\n", (int) info, magma_strerror( info )); float temp1 = 0; float temp2 = 0; for(int j=0; j<N; j++) { temp1 = max(temp1, fabs(w1[j])); temp1 = max(temp1, fabs(w2[j])); temp2 = max(temp2, fabs(w1[j]-w2[j])); } float result2 = temp2 / (((float)N)*temp1); /* ===================================================================== Print execution time =================================================================== */ printf("%5d %7.2f %7.2f %7.2f\n", (int) N, cpu_time, gpu_time, mgpu_time); printf("Testing the eigenvalues and eigenvectors for correctness:\n"); if (opts.itype==1) { printf("(1) | A Z - B Z D | / (|A| |Z| N) = %8.2e %s\n", result, (result < tol ? "ok" : "failed") ); } else if (opts.itype==2) { printf("(1) | A B Z - Z D | / (|A| |Z| N) = %8.2e %s\n", result, (result < tol ? "ok" : "failed") ); } else if (opts.itype==3) { printf("(1) | B A Z - Z D | / (|A| |Z| N) = %8.2e %s\n", result, (result < tol ? "ok" : "failed") ); } printf( "(3) | D(MGPU)-D(LAPACK) |/ |D| = %8.2e %s\n\n", result2, (result2 < tolulp ? "ok" : "failed") ); status += ! (result < tol && result2 < tolulp); } else { printf("%5d --- --- %7.2f\n", (int) N, mgpu_time); } /* Memory clean up */ TESTING_FREE_PIN( h_A ); TESTING_FREE_PIN( h_B ); TESTING_FREE_PIN( h_work ); #if defined(PRECISION_z) || defined(PRECISION_c) TESTING_FREE_PIN( rwork ); #endif TESTING_FREE_CPU( w1 ); TESTING_FREE_CPU( w2 ); TESTING_FREE_CPU( iwork ); if ( opts.warmup || opts.check ) { TESTING_FREE_CPU( h_Ainit ); TESTING_FREE_CPU( h_Binit ); } fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } /* Shutdown */ TESTING_FINALIZE_MGPU(); return status; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing chegvd */ int main( int argc, char** argv) { TESTING_CUDA_INIT(); cuFloatComplex *h_A, *h_R, *h_B, *h_S, *h_work; float *rwork, *w1, *w2; magma_int_t *iwork; float gpu_time, cpu_time; magma_timestr_t start, end; /* Matrix size */ magma_int_t N=0, n2; magma_int_t size[4] = {1024,2048,4100,6001}; magma_int_t i, itype, info; magma_int_t ione = 1, izero = 0; magma_int_t five = 5; cuFloatComplex c_zero = MAGMA_C_ZERO; cuFloatComplex c_one = MAGMA_C_ONE; cuFloatComplex c_neg_one = MAGMA_C_NEG_ONE; float d_one = 1.; float d_neg_one = -1.; float d_ten = 10.; magma_int_t ISEED[4] = {0,0,0,1}; const char *uplo = MagmaLowerStr; const char *jobz = MagmaVectorsStr; itype = 1; magma_int_t checkres; float result[4]; int flagN = 0; if (argc != 1){ for(i = 1; i<argc; i++){ if (strcmp("-N", argv[i])==0){ N = atoi(argv[++i]); if (N>0){ printf(" testing_chegvd -N %d\n\n", (int) N); flagN=1; } else { printf("\nUsage: \n"); printf(" testing_chegvd -N %d\n\n", (int) N); exit(1); } } if (strcmp("-itype", argv[i])==0){ itype = atoi(argv[++i]); if (itype>0 && itype <= 3){ printf(" testing_chegvd -itype %d\n\n", (int) itype); } else { printf("\nUsage: \n"); printf(" testing_chegvd -itype %d\n\n", (int) itype); exit(1); } } if (strcmp("-L", argv[i])==0){ uplo = MagmaLowerStr; printf(" testing_chegvd -L"); } if (strcmp("-U", argv[i])==0){ uplo = MagmaUpperStr; printf(" testing_chegvd -U"); } } } else { printf("\nUsage: \n"); printf(" testing_chegvd -L/U -N %d -itype %d\n\n", 1024, 1); } if(!flagN) N = size[3]; checkres = getenv("MAGMA_TESTINGS_CHECK") != NULL; n2 = N * N; /* Allocate host memory for the matrix */ TESTING_MALLOC( h_A, cuFloatComplex, n2); TESTING_MALLOC( h_B, cuFloatComplex, n2); TESTING_MALLOC( w1, float , N); TESTING_MALLOC( w2, float , N); TESTING_HOSTALLOC(h_R, cuFloatComplex, n2); TESTING_HOSTALLOC(h_S, cuFloatComplex, n2); magma_int_t nb = magma_get_chetrd_nb(N); magma_int_t lwork = 2*N*nb + N*N; magma_int_t lrwork = 1 + 5*N +2*N*N; magma_int_t liwork = 3 + 5*N; TESTING_HOSTALLOC(h_work, cuFloatComplex, lwork); TESTING_MALLOC( rwork, float, lrwork); TESTING_MALLOC( iwork, magma_int_t, liwork); printf(" N CPU Time(s) GPU Time(s) \n"); printf("===================================\n"); for(i=0; i<4; i++){ if (!flagN){ N = size[i]; n2 = N*N; } /* Initialize the matrix */ lapackf77_clarnv( &ione, ISEED, &n2, h_A ); //lapackf77_clatms( &N, &N, "U", ISEED, "P", w1, &five, &d_ten, // &d_one, &N, &N, uplo, h_B, &N, h_work, &info); //lapackf77_claset( "A", &N, &N, &c_zero, &c_one, h_B, &N); lapackf77_clarnv( &ione, ISEED, &n2, h_B ); /* increase the diagonal */ { magma_int_t i, j; for(i=0; i<N; i++) { MAGMA_C_SET2REAL( h_B[i*N+i], MAGMA_C_REAL(h_B[i*N+i]) + 1.*N ); MAGMA_C_SET2REAL( h_A[i*N+i], MAGMA_C_REAL(h_A[i*N+i]) ); } } lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N ); lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N ); magma_chegvd(itype, jobz[0], uplo[0], N, h_R, N, h_S, N, w1, h_work, lwork, rwork, lrwork, iwork, liwork, &info); lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N ); lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ start = get_current_time(); magma_chegvd(itype, jobz[0], uplo[0], N, h_R, N, h_S, N, w1, h_work, lwork, rwork, lrwork, iwork, liwork, &info); end = get_current_time(); gpu_time = GetTimerValue(start,end)/1000.; if ( checkres ) { /* ===================================================================== Check the results following the LAPACK's [zc]hegvd routine. A x = lambda B x is solved and the following 3 tests computed: (1) | A Z - B Z D | / ( |A||Z| N ) (itype = 1) | A B Z - Z D | / ( |A||Z| N ) (itype = 2) | B A Z - Z D | / ( |A||Z| N ) (itype = 3) (2) | I - V V' B | / ( N ) (itype = 1,2) | B - V V' | / ( |B| N ) (itype = 3) (3) | S(with V) - S(w/o V) | / | S | =================================================================== */ float temp1, temp2; cuFloatComplex *tau; if (itype == 1 || itype == 2){ lapackf77_claset( "A", &N, &N, &c_zero, &c_one, h_S, &N); blasf77_cgemm("N", "C", &N, &N, &N, &c_one, h_R, &N, h_R, &N, &c_zero, h_work, &N); blasf77_chemm("R", uplo, &N, &N, &c_neg_one, h_B, &N, h_work, &N, &c_one, h_S, &N); result[1]= lapackf77_clange("1", &N, &N, h_S, &N, rwork) / N; } else if (itype == 3){ lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N); blasf77_cherk(uplo, "N", &N, &N, &d_neg_one, h_R, &N, &d_one, h_S, &N); result[1]= lapackf77_clanhe("1",uplo, &N, h_S, &N, rwork) / N / lapackf77_clanhe("1",uplo, &N, h_B, &N, rwork); } result[0] = 1.; result[0] /= lapackf77_clanhe("1",uplo, &N, h_A, &N, rwork); result[0] /= lapackf77_clange("1",&N , &N, h_R, &N, rwork); if (itype == 1){ blasf77_chemm("L", uplo, &N, &N, &c_one, h_A, &N, h_R, &N, &c_zero, h_work, &N); for(int i=0; i<N; ++i) blasf77_csscal(&N, &w1[i], &h_R[i*N], &ione); blasf77_chemm("L", uplo, &N, &N, &c_neg_one, h_B, &N, h_R, &N, &c_one, h_work, &N); result[0] *= lapackf77_clange("1", &N, &N, h_work, &N, rwork)/N; } else if (itype == 2){ blasf77_chemm("L", uplo, &N, &N, &c_one, h_B, &N, h_R, &N, &c_zero, h_work, &N); for(int i=0; i<N; ++i) blasf77_csscal(&N, &w1[i], &h_R[i*N], &ione); blasf77_chemm("L", uplo, &N, &N, &c_one, h_A, &N, h_work, &N, &c_neg_one, h_R, &N); result[0] *= lapackf77_clange("1", &N, &N, h_R, &N, rwork)/N; } else if (itype == 3){ blasf77_chemm("L", uplo, &N, &N, &c_one, h_A, &N, h_R, &N, &c_zero, h_work, &N); for(int i=0; i<N; ++i) blasf77_csscal(&N, &w1[i], &h_R[i*N], &ione); blasf77_chemm("L", uplo, &N, &N, &c_one, h_B, &N, h_work, &N, &c_neg_one, h_R, &N); result[0] *= lapackf77_clange("1", &N, &N, h_R, &N, rwork)/N; } /* lapackf77_chet21(&ione, uplo, &N, &izero, h_A, &N, w1, w1, h_R, &N, h_R, &N, tau, h_work, rwork, &result[0]); */ lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N ); lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N ); magma_chegvd(itype, 'N', uplo[0], N, h_R, N, h_S, N, w2, h_work, lwork, rwork, lrwork, iwork, liwork, &info); temp1 = temp2 = 0; for(int j=0; j<N; j++){ temp1 = max(temp1, absv(w1[j])); temp1 = max(temp1, absv(w2[j])); temp2 = max(temp2, absv(w1[j]-w2[j])); } result[2] = temp2 / temp1; } /* ===================================================================== Performs operation using LAPACK =================================================================== */ start = get_current_time(); lapackf77_chegvd(&itype, jobz, uplo, &N, h_A, &N, h_B, &N, w2, h_work, &lwork, rwork, &lrwork, iwork, &liwork, &info); end = get_current_time(); if (info < 0) printf("Argument %d of chegvd had an illegal value.\n", (int) -info); cpu_time = GetTimerValue(start,end)/1000.; /* ===================================================================== Print execution time =================================================================== */ printf("%5d %6.2f %6.2f\n", (int) N, cpu_time, gpu_time); if ( checkres ){ printf("Testing the eigenvalues and eigenvectors for correctness:\n"); if(itype==1) printf("(1) | A Z - B Z D | / (|A| |Z| N) = %e\n", result[0]); else if(itype==2) printf("(1) | A B Z - Z D | / (|A| |Z| N) = %e\n", result[0]); else if(itype==3) printf("(1) | B A Z - Z D | / (|A| |Z| N) = %e\n", result[0]); if(itype==1 || itype ==2) printf("(2) | I - Z Z' B | / N = %e\n", result[1]); else printf("(2) | B - Z Z' | / (|B| N) = %e\n", result[1]); printf("(3) | D(w/ Z)-D(w/o Z)|/ |D| = %e\n\n", result[2]); } if (flagN) break; } /* Memory clean up */ TESTING_FREE( h_A); TESTING_FREE( h_B); TESTING_FREE( w1); TESTING_FREE( w2); TESTING_FREE( rwork); TESTING_FREE( iwork); TESTING_HOSTFREE(h_work); TESTING_HOSTFREE( h_R); TESTING_HOSTFREE( h_S); /* Shutdown */ TESTING_CUDA_FINALIZE(); }