void magmaf_ssyevd( magma_vec_t *jobz, magma_uplo_t *uplo, magma_int_t *n, float *a, magma_int_t *lda, float *w, float *work, magma_int_t *lwork, magma_int_t *iwork, magma_int_t *liwork, magma_int_t *info, magma_queue_t *queue ) { magma_ssyevd( *jobz, *uplo, *n, a, *lda, w, work, *lwork, iwork, *liwork, info, *queue ); }
std::vector<EigenComponent> solver_magma(const Eigen::MatrixXf& A, unsigned int num_ev) { static MagmaSpectralSolver magma; magma_int_t N = A.rows(); std::cout << "MAGMA Solver N=" << N << std::endl; magma_timestr_t start, end; float gpu_time; start = get_current_time(); magma_int_t info; const float *h_A = A.data(); float *h_R, *h_work; float *w1; magma_int_t *iwork; const char *uplo = MagmaLowerStr; const char *jobz = MagmaVecStr; /* Query for workspace sizes */ float aux_work[1]; magma_int_t aux_iwork[1]; std::cout << "Querying workspace size" << std::endl; magma_ssyevd( jobz[0], uplo[0], N, h_R, N, w1, aux_work, -1, aux_iwork, -1, &info ); magma_int_t lwork = (magma_int_t) aux_work[0]; magma_int_t liwork = aux_iwork[0]; std::cout << lwork << " " << liwork << std::endl; std::cout << "Allocating" << std::endl; w1 = magma.malloc<float>(N ); h_R = magma.hostmalloc<float>(N*N); h_work = magma.hostmalloc<float>(lwork); iwork = magma.malloc<magma_int_t>(liwork); std::cout << "Copying" << std::endl; slacpy_( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N ); std::cout << "Solving" << std::endl; magma_ssyevd(jobz[0], uplo[0], N, h_R, N, w1, h_work, lwork, iwork, liwork, &info); std::cout << "Collecting" << std::endl; // save eigenvectors and eigenvalues std::vector<EigenComponent> solution(std::min<int>(N, num_ev)); for(unsigned int i=0; i<solution.size(); i++) { solution[i].eigenvalue = w1[i+1]; Eigen::VectorXf ev(N); for(unsigned int j=0; j<N; j++) { ev[j] = *(h_R + i*N + j); } solution[i].eigenvector = ev; } std::cout << "Freeing" << std::endl; magma.free(w1); magma.hostfree(h_R); magma.hostfree(h_work); magma.free(iwork); end = get_current_time(); gpu_time = GetTimerValue(start,end)/1000.; std::cout << "Time: " << gpu_time << std::endl; return solution; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing ssyevd */ int main( int argc, char** argv) { TESTING_INIT(); real_Double_t gpu_time, cpu_time; float *h_A, *h_R, *h_work; float *w1, *w2; magma_int_t *iwork; magma_int_t N, n2, info, lwork, liwork, lda, aux_iwork[1]; magma_int_t izero = 0; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; float result[3], eps, aux_work[1]; eps = lapackf77_slamch( "E" ); magma_int_t status = 0; magma_opts opts; parse_opts( argc, argv, &opts ); float tol = opts.tolerance * lapackf77_slamch("E"); float tolulp = opts.tolerance * lapackf77_slamch("P"); if ( opts.check && opts.jobz == MagmaNoVec ) { fprintf( stderr, "checking results requires vectors; setting jobz=V (option -JV)\n" ); opts.jobz = MagmaVec; } printf("using: jobz = %s, uplo = %s\n", lapack_vec_const(opts.jobz), lapack_uplo_const(opts.uplo)); printf(" N CPU Time (sec) GPU Time (sec)\n"); printf("=======================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[itest]; n2 = N*N; lda = N; // query for workspace sizes magma_ssyevd( opts.jobz, opts.uplo, N, NULL, lda, NULL, aux_work, -1, aux_iwork, -1, &info ); lwork = (magma_int_t) aux_work[0]; liwork = aux_iwork[0]; /* Allocate host memory for the matrix */ TESTING_MALLOC_CPU( h_A, float, N*lda ); TESTING_MALLOC_CPU( w1, float, N ); TESTING_MALLOC_CPU( w2, float, N ); TESTING_MALLOC_CPU( iwork, magma_int_t, liwork ); TESTING_MALLOC_PIN( h_R, float, N*lda ); TESTING_MALLOC_PIN( h_work, float, lwork ); /* Initialize the matrix */ lapackf77_slarnv( &ione, ISEED, &n2, h_A ); lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda ); /* warm up run */ if ( opts.warmup ) { magma_ssyevd( opts.jobz, opts.uplo, N, h_R, lda, w1, h_work, lwork, iwork, liwork, &info ); if (info != 0) printf("magma_ssyevd returned error %d: %s.\n", (int) info, magma_strerror( info )); lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda ); } /* ==================================================================== Performs operation using MAGMA =================================================================== */ gpu_time = magma_wtime(); magma_ssyevd( opts.jobz, opts.uplo, N, h_R, lda, w1, h_work, lwork, iwork, liwork, &info ); gpu_time = magma_wtime() - gpu_time; if (info != 0) printf("magma_ssyevd returned error %d: %s.\n", (int) info, magma_strerror( info )); if ( opts.check ) { /* ===================================================================== Check the results following the LAPACK's [zcds]drvst routine. A is factored as A = U S U' and the following 3 tests computed: (1) | A - U S U' | / ( |A| N ) (2) | I - U'U | / ( N ) (3) | S(with U) - S(w/o U) | / | S | =================================================================== */ float temp1, temp2; // tau=NULL is unused since itype=1 lapackf77_ssyt21( &ione, lapack_uplo_const(opts.uplo), &N, &izero, h_A, &lda, w1, h_work, h_R, &lda, h_R, &lda, NULL, h_work, &result[0] ); lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda ); magma_ssyevd( MagmaNoVec, opts.uplo, N, h_R, lda, w2, h_work, lwork, iwork, liwork, &info ); if (info != 0) printf("magma_ssyevd returned error %d: %s.\n", (int) info, magma_strerror( info )); temp1 = temp2 = 0; for( int j=0; j<N; j++ ) { temp1 = max(temp1, fabsf(w1[j])); temp1 = max(temp1, fabsf(w2[j])); temp2 = max(temp2, fabsf(w1[j]-w2[j])); } result[2] = temp2 / (((float)N)*temp1); } /* ===================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { cpu_time = magma_wtime(); lapackf77_ssyevd( lapack_vec_const(opts.jobz), lapack_uplo_const(opts.uplo), &N, h_A, &lda, w2, h_work, &lwork, iwork, &liwork, &info ); cpu_time = magma_wtime() - cpu_time; if (info != 0) printf("lapackf77_ssyevd returned error %d: %s.\n", (int) info, magma_strerror( info )); printf("%5d %7.2f %7.2f\n", (int) N, cpu_time, gpu_time); } else { printf("%5d --- %7.2f\n", (int) N, gpu_time); } /* ===================================================================== Print execution time =================================================================== */ if ( opts.check ) { printf("Testing the factorization A = U S U' for correctness:\n"); printf("(1) | A - U S U' | / (|A| N) = %8.2e %s\n", result[0]*eps, (result[0]*eps < tol ? "ok" : "failed") ); printf("(2) | I - U'U | / N = %8.2e %s\n", result[1]*eps, (result[1]*eps < tol ? "ok" : "failed") ); printf("(3) | S(w/ U) - S(w/o U) | / |S| = %8.2e %s\n\n", result[2] , (result[2] < tolulp ? "ok" : "failed") ); status += ! (result[0]*eps < tol && result[1]*eps < tol && result[2] < tolulp); } TESTING_FREE_CPU( h_A ); TESTING_FREE_CPU( w1 ); TESTING_FREE_CPU( w2 ); TESTING_FREE_CPU( iwork ); TESTING_FREE_PIN( h_R ); TESTING_FREE_PIN( h_work ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } TESTING_FINALIZE(); return status; }