/* //////////////////////////////////////////////////////////////////////////// -- Testing chegvdx */ int main( int argc, char** argv) { TESTING_INIT(); real_Double_t gpu_time; magmaFloatComplex *h_A, *h_R, *h_work; #if defined(PRECISION_z) || defined(PRECISION_c) float *rwork; magma_int_t lrwork; #endif /* Matrix size */ float *w1, *w2; magma_int_t *iwork; magma_int_t N, n2, info, lwork, liwork; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1};; magma_int_t info_ortho = 0; magma_int_t info_solution = 0; magma_int_t info_reduction = 0; magma_timestr_t start, end; magma_opts opts; parse_opts( argc, argv, &opts ); magma_int_t ngpu = opts.ngpu; char jobz = opts.jobz; magma_int_t checkres = opts.check; char range = 'A'; char uplo = opts.uplo; magma_int_t itype = opts.itype; float f = opts.fraction; if (f != 1) range='I'; if ( checkres && jobz == MagmaNoVec ) { fprintf( stderr, "checking results requires vectors; setting jobz=V (option -JV)\n" ); jobz = MagmaVec; } printf("using: itype = %d, jobz = %c, range = %c, uplo = %c, checkres = %d, fraction = %6.4f\n", (int) itype, jobz, range, uplo, (int) checkres, f); printf(" N M GPU Time(s) \n"); printf("==========================\n"); magma_int_t threads = magma_get_numthreads(); for( magma_int_t i = 0; i < opts.ntest; ++i ) { for( magma_int_t iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[i]; n2 = N*N; #if defined(PRECISION_z) || defined(PRECISION_c) lwork = magma_cbulge_get_lq2(N, threads) + 2*N + N*N; lrwork = 1 + 5*N +2*N*N; #else lwork = magma_cbulge_get_lq2(N, threads) + 1 + 6*N + 2*N*N; #endif liwork = 3 + 5*N; /* Allocate host memory for the matrix */ TESTING_MALLOC( h_A, magmaFloatComplex, n2); TESTING_MALLOC( w1, float , N); TESTING_MALLOC( w2, float , N); TESTING_HOSTALLOC(h_R, magmaFloatComplex, n2); TESTING_HOSTALLOC(h_work, magmaFloatComplex, lwork); #if defined(PRECISION_z) || defined(PRECISION_c) TESTING_HOSTALLOC( rwork, float, lrwork); #endif TESTING_MALLOC( iwork, magma_int_t, liwork); /* Initialize the matrix */ lapackf77_clarnv( &ione, ISEED, &n2, h_A ); /* Make diagonal real */ for(int i=0; i<N; i++) { MAGMA_C_SET2REAL( h_A[i*N+i], MAGMA_C_REAL(h_A[i*N+i]) ); } magma_int_t m1 = 0; float vl = 0; float vu = 0; magma_int_t il = 0; magma_int_t iu = 0; if (range == 'I'){ il = 1; iu = (int) (f*N); } if(opts.warmup){ // ================================================================== // Warmup using MAGMA // ================================================================== lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N ); if(ngpu==1){ printf("calling cheevdx_2stage 1 GPU\n"); magma_cheevdx_2stage(jobz, range, uplo, N, h_R, N, vl, vu, il, iu, &m1, w1, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info); }else{ printf("calling cheevdx_2stage_m %d GPU\n", (int) ngpu); magma_cheevdx_2stage_m(ngpu, jobz, range, uplo, N, h_R, N, vl, vu, il, iu, &m1, w1, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info); } } // =================================================================== // Performs operation using MAGMA // =================================================================== lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N ); start = get_current_time(); if(ngpu==1){ printf("calling cheevdx_2stage 1 GPU\n"); magma_cheevdx_2stage(jobz, range, uplo, N, h_R, N, vl, vu, il, iu, &m1, w1, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info); }else{ printf("calling cheevdx_2stage_m %d GPU\n", (int) ngpu); magma_cheevdx_2stage_m(ngpu, jobz, range, uplo, N, h_R, N, vl, vu, il, iu, &m1, w1, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info); } end = get_current_time(); gpu_time = GetTimerValue(start,end)/1000.; if ( checkres ) { float eps = lapackf77_slamch("E"); printf("\n"); printf("------ TESTS FOR MAGMA CHEEVD ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", (int) N, (int) N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is %8.2e\n",eps); printf(" Computational tests pass if scaled residuals are less than 60.\n"); /* Check the orthogonality, reduction and the eigen solutions */ if (jobz == MagmaVec) { info_ortho = check_orthogonality(N, N, h_R, N, eps); info_reduction = check_reduction(uplo, N, 1, h_A, w1, N, h_R, eps); } printf("------ CALLING LAPACK CHEEVD TO COMPUTE only eigenvalue and verify elementswise ------- \n"); lapackf77_cheevd("N", "L", &N, h_A, &N, w2, h_work, &lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, &lrwork, #endif iwork, &liwork, &info); info_solution = check_solution(N, w2, w1, eps); if ( (info_solution == 0) & (info_ortho == 0) & (info_reduction == 0) ) { printf("***************************************************\n"); printf(" ---- TESTING CHEEVD ...................... PASSED !\n"); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING CHEEVD ... FAILED !\n"); printf("************************************************\n"); } } /* ===================================================================== Print execution time =================================================================== */ printf("%5d %5d %6.2f\n", (int) N, (int) m1, gpu_time); TESTING_FREE( h_A); TESTING_FREE( w1); TESTING_FREE( w2); #if defined(PRECISION_z) || defined(PRECISION_c) TESTING_HOSTFREE( rwork); #endif TESTING_FREE( iwork); TESTING_HOSTFREE(h_work); TESTING_HOSTFREE( h_R); } if ( opts.niter > 1 ) { printf( "\n" ); } } /* Shutdown */ TESTING_FINALIZE(); return 0; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing dsygvdx */ int main( int argc, char** argv) { TESTING_INIT(); real_Double_t gpu_time; double *h_A, *h_R, *h_work; #if defined(PRECISION_z) || defined(PRECISION_c) double *rwork; magma_int_t lrwork; #endif /* Matrix size */ double *w1, *w2; magma_int_t *iwork; magma_int_t N, n2, info, lwork, liwork; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1};; magma_int_t info_ortho = 0; magma_int_t info_solution = 0; magma_int_t info_reduction = 0; magma_int_t status = 0; magma_opts opts; parse_opts( argc, argv, &opts ); magma_range_t range = MagmaRangeAll; if (opts.fraction != 1) range = MagmaRangeI; if ( opts.check && opts.jobz == MagmaNoVec ) { fprintf( stderr, "checking results requires vectors; setting jobz=V (option -JV)\n" ); opts.jobz = MagmaVec; } printf("using: itype = %d, jobz = %s, range = %s, uplo = %s, check = %d, fraction = %6.4f\n", (int) opts.itype, lapack_vec_const(opts.jobz), lapack_range_const(range), lapack_uplo_const(opts.uplo), (int) opts.check, opts.fraction); printf(" N M GPU Time (sec) ||I-Q'Q||/. ||A-QDQ'||/. ||D-D_magma||/.\n"); printf("=======================================================================\n"); magma_int_t threads = magma_get_parallel_numthreads(); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[itest]; n2 = N*N; #if defined(PRECISION_z) || defined(PRECISION_c) lwork = magma_dbulge_get_lq2(N, threads) + 2*N + N*N; lrwork = 1 + 5*N +2*N*N; #else lwork = magma_dbulge_get_lq2(N, threads) + 1 + 6*N + 2*N*N; #endif liwork = 3 + 5*N; /* Allocate host memory for the matrix */ TESTING_MALLOC_CPU( h_A, double, n2 ); TESTING_MALLOC_CPU( w1, double, N ); TESTING_MALLOC_CPU( w2, double, N ); TESTING_MALLOC_CPU( iwork, magma_int_t, liwork ); TESTING_MALLOC_PIN( h_R, double, n2 ); TESTING_MALLOC_PIN( h_work, double, lwork ); #if defined(PRECISION_z) || defined(PRECISION_c) TESTING_MALLOC_PIN( rwork, double, lrwork ); #endif /* Initialize the matrix */ lapackf77_dlarnv( &ione, ISEED, &n2, h_A ); magma_dmake_symmetric( N, h_A, N ); magma_int_t m1 = 0; double vl = 0; double vu = 0; magma_int_t il = 0; magma_int_t iu = 0; if (range == MagmaRangeI) { il = 1; iu = (int) (opts.fraction*N); } if (opts.warmup) { // ================================================================== // Warmup using MAGMA // ================================================================== lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N ); if (opts.ngpu == 1) { //printf("calling dsyevdx_2stage 1 GPU\n"); magma_dsyevdx_2stage(opts.jobz, range, opts.uplo, N, h_R, N, vl, vu, il, iu, &m1, w1, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info); } else { //printf("calling dsyevdx_2stage_m %d GPU\n", (int) opts.ngpu); magma_dsyevdx_2stage_m(opts.ngpu, opts.jobz, range, opts.uplo, N, h_R, N, vl, vu, il, iu, &m1, w1, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info); } } // =================================================================== // Performs operation using MAGMA // =================================================================== lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N ); gpu_time = magma_wtime(); if (opts.ngpu == 1) { //printf("calling dsyevdx_2stage 1 GPU\n"); magma_dsyevdx_2stage(opts.jobz, range, opts.uplo, N, h_R, N, vl, vu, il, iu, &m1, w1, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info); } else { //printf("calling dsyevdx_2stage_m %d GPU\n", (int) opts.ngpu); magma_dsyevdx_2stage_m(opts.ngpu, opts.jobz, range, opts.uplo, N, h_R, N, vl, vu, il, iu, &m1, w1, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info); } gpu_time = magma_wtime() - gpu_time; printf("%5d %5d %7.2f ", (int) N, (int) m1, gpu_time ); if ( opts.check ) { double eps = lapackf77_dlamch("E"); //printf("\n"); //printf("------ TESTS FOR MAGMA DSYEVD ROUTINE ------- \n"); //printf(" Size of the Matrix %d by %d\n", (int) N, (int) N); //printf("\n"); //printf(" The matrix A is randomly generated for each test.\n"); //printf("============\n"); //printf(" The relative machine precision (eps) is %8.2e\n",eps); //printf(" Computational tests pass if scaled residuals are less than 60.\n"); /* Check the orthogonality, reduction and the eigen solutions */ if (opts.jobz == MagmaVec) { info_ortho = check_orthogonality(N, N, h_R, N, eps); info_reduction = check_reduction(opts.uplo, N, 1, h_A, w1, N, h_R, eps); } //printf("------ CALLING LAPACK DSYEVD TO COMPUTE only eigenvalue and verify elementswise ------- \n"); lapackf77_dsyevd("N", "L", &N, h_A, &N, w2, h_work, &lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, &lrwork, #endif iwork, &liwork, &info); info_solution = check_solution(N, w2, w1, eps); if ( (info_solution == 0) && (info_ortho == 0) && (info_reduction == 0) ) { printf(" ok\n"); //printf("***************************************************\n"); //printf(" ---- TESTING DSYEVD ...................... PASSED !\n"); //printf("***************************************************\n"); } else { printf(" failed\n"); status += 1; //printf("************************************************\n"); //printf(" - TESTING DSYEVD ... FAILED !\n"); //printf("************************************************\n"); } } TESTING_FREE_CPU( h_A ); TESTING_FREE_CPU( w1 ); TESTING_FREE_CPU( w2 ); TESTING_FREE_CPU( iwork ); TESTING_FREE_PIN( h_R ); TESTING_FREE_PIN( h_work ); #if defined(PRECISION_z) || defined(PRECISION_c) TESTING_FREE_PIN( rwork ); #endif fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } /* Shutdown */ TESTING_FINALIZE(); return status; }
int testing_dsygv(int argc, char **argv) { /* Check for number of arguments*/ if (argc != 3) { USAGE("HEGV", "N LDA LDB", " - N : size of the matrices A and B\n" " - LDA : leading dimension of the matrix A\n" " - LDB : leading dimension of the matrix B\n"); return -1; } double eps = LAPACKE_dlamch_work('e'); PLASMA_enum vec = PlasmaNoVec; int N = atoi(argv[0]); int LDA = atoi(argv[1]); int LDB = atoi(argv[2]); int LDQ = LDA; int LDAxN = LDA*N; int LDBxN = LDB*N; int LDQxN = LDQ*N; int info_ortho = 0; int info_solution = 0; int info_reduction = 0; int i, u; double *A1 = (double *)malloc(LDAxN*sizeof(double)); double *A2 = (double *)malloc(LDAxN*sizeof(double)); double *B1 = (double *)malloc(LDBxN*sizeof(double)); double *B2 = (double *)malloc(LDBxN*sizeof(double)); double *Q = (double *)malloc(LDQxN*sizeof(double)); double *Ainit = (double *)malloc(LDAxN*sizeof(double)); double *Binit = (double *)malloc(LDBxN*sizeof(double)); double *W1 = (double *)malloc(N*sizeof(double)); double *W2 = (double *)malloc(N*sizeof(double)); double *work = (double *)malloc(3*N* sizeof(double)); PLASMA_desc *T; /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)||(!Q)||(!Ainit)||(!Binit)){ printf("Out of Memory \n "); return -2; } /* PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, 120); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, 20); */ PLASMA_Enable(PLASMA_WARNINGS); PLASMA_Enable(PLASMA_ERRORS); PLASMA_Alloc_Workspace_dsygv(N, N, &T); /*---------------------------------------------------------- * TESTING DSYGV */ /* Initialize A1 and Ainit */ PLASMA_dplgsy(0., N, A1, LDA, 5198); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, A1, LDA, Ainit, LDA); /* Initialize B1 and Binit */ PLASMA_dplgsy((double)N, N, B1, LDB, 4321 ); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, B1, LDB, Binit, LDB); printf("\n"); printf("------ TESTS FOR PLASMA DSYGV ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", N, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 60.\n"); /*---------------------------------------------------------- * TESTING DSYGV */ for (i=0; i<3; i++) { for (u=0; u<2; u++) { LAPACKE_dlaset_work(LAPACK_COL_MAJOR, 'A', LDA, N, 0., 1., Q, LDA); memcpy(A2, Ainit, LDAxN*sizeof(double)); memcpy(B2, Binit, LDBxN*sizeof(double)); PLASMA_dsygv(itype[i], vec, uplo[u], N, A2, LDA, B2, LDB, W2, T, Q, LDQ); /* Check the orthogonality, reduction and the eigen solutions */ if (vec == PlasmaVec) info_ortho = check_orthogonality(N, N, Q, LDA, eps); /* * WARNING: For now, Q is associated to Band tridiagonal reduction and * not to the final tridiagonal reduction, so we can not call the check */ if (0) info_reduction = check_reduction(itype[i], uplo[u], N, 1, A1, A2, LDA, B2, LDB, Q, eps); memcpy(A1, Ainit, LDAxN*sizeof(double)); memcpy(B1, Binit, LDBxN*sizeof(double)); LAPACKE_dsygv( LAPACK_COL_MAJOR, itype[i], lapack_const(vec), lapack_const(uplo[u]), N, A1, LDA, B1, LDB, W1); /*info_solution = check_solution(N, N, N, A1, LDA, B1, B2, LDB, eps);*/ info_solution = check_solution(N, W1, W2, eps); if ( (info_ortho == 0) & (info_reduction == 0) & (info_solution == 0)) { printf("***************************************************\n"); printf(" ---- TESTING DSYGV (%s, %s) ...................... PASSED !\n", itypestr[i], uplostr[u]); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING DSYGV (%s, %s) ... FAILED !\n", itypestr[i], uplostr[u]); printf("************************************************\n"); } } } PLASMA_Dealloc_Handle_Tile(&T); free(A1); free(A2); free(B1); free(B2); free(Q); free(Ainit); free(Binit); free(W1); free(W2); free(work); return 0; }