/** @return true if either real(x) or imag(x) is INF. */ inline bool magma_c_isinf( magmaFloatComplex x ) { #ifdef COMPLEX return isinf( MAGMA_C_REAL( x )) || isinf( MAGMA_C_IMAG( x )); #else return isinf( x ); #endif }
void magma_cmake_spd( magma_int_t N, magmaFloatComplex* A, magma_int_t lda ) { magma_int_t i, j; for( i=0; i < N; ++i ) { A(i,i) = MAGMA_C_MAKE( MAGMA_C_REAL( A(i,i) ) + N, MAGMA_C_IMAG( A(i,i) ) ); for( j=0; j < i; ++j ) { A(j,i) = A(i,j); } } }
void magma_cprint( magma_int_t m, magma_int_t n, const magmaFloatComplex *A, magma_int_t lda ) { #define A(i,j) (A + (i) + (j)*lda) magma_int_t info = 0; if ( m < 0 ) info = -1; else if ( n < 0 ) info = -2; else if ( lda < max(1,m) ) info = -4; if (info != 0) { magma_xerbla( __func__, -(info) ); return; //info; } magmaFloatComplex c_zero = MAGMA_C_ZERO; if ( m == 1 ) { printf( "[ " ); } else { printf( "[\n" ); } for( int i = 0; i < m; ++i ) { for( int j = 0; j < n; ++j ) { if ( MAGMA_C_EQUAL( *A(i,j), c_zero )) { #ifdef COMPLEX printf( " 0. " ); #else printf( " 0. " ); #endif } else { #ifdef COMPLEX printf( " %8.4f+%8.4fi", MAGMA_C_REAL( *A(i,j) ), MAGMA_C_IMAG( *A(i,j) )); #else printf( " %8.4f", MAGMA_C_REAL( *A(i,j) )); #endif } } if ( m > 1 ) { printf( "\n" ); } else { printf( " " ); } } printf( "];\n" ); }
void magma_cprint( magma_int_t m, magma_int_t n, const magmaFloatComplex *A, magma_int_t lda ) { if ( magma_is_devptr( A ) == 1 ) { fprintf( stderr, "ERROR: cprint called with device pointer.\n" ); exit(1); } magmaFloatComplex c_zero = MAGMA_C_ZERO; if ( m == 1 ) { printf( "[ " ); } else { printf( "[\n" ); } for( int i = 0; i < m; ++i ) { for( int j = 0; j < n; ++j ) { if ( MAGMA_C_EQUAL( *A(i,j), c_zero )) { printf( " 0. " ); } else { #if defined(PRECISION_z) || defined(PRECISION_c) printf( " %8.4f+%8.4fi", MAGMA_C_REAL( *A(i,j) ), MAGMA_C_IMAG( *A(i,j) )); #else printf( " %8.4f", MAGMA_C_REAL( *A(i,j) )); #endif } } if ( m > 1 ) { printf( "\n" ); } else { printf( " " ); } } printf( "];\n" ); }
/***************************************************************************//** Purpose ------- CGEEV computes for an N-by-N complex nonsymmetric matrix A, the eigenvalues and, optionally, the left and/or right eigenvectors. The right eigenvector v(j) of A satisfies A * v(j) = lambda(j) * v(j) where lambda(j) is its eigenvalue. The left eigenvector u(j) of A satisfies u(j)**H * A = lambda(j) * u(j)**H where u(j)**H denotes the conjugate transpose of u(j). The computed eigenvectors are normalized to have Euclidean norm equal to 1 and largest component real. Arguments --------- @param[in] jobvl magma_vec_t - = MagmaNoVec: left eigenvectors of A are not computed; - = MagmaVec: left eigenvectors of are computed. @param[in] jobvr magma_vec_t - = MagmaNoVec: right eigenvectors of A are not computed; - = MagmaVec: right eigenvectors of A are computed. @param[in] n INTEGER The order of the matrix A. N >= 0. @param[in,out] A COMPLEX array, dimension (LDA,N) On entry, the N-by-N matrix A. On exit, A has been overwritten. @param[in] lda INTEGER The leading dimension of the array A. LDA >= max(1,N). @param[out] w COMPLEX array, dimension (N) W contains the computed eigenvalues. @param[out] VL COMPLEX array, dimension (LDVL,N) If JOBVL = MagmaVec, the left eigenvectors u(j) are stored one after another in the columns of VL, in the same order as their eigenvalues. If JOBVL = MagmaNoVec, VL is not referenced. u(j) = VL(:,j), the j-th column of VL. @param[in] ldvl INTEGER The leading dimension of the array VL. LDVL >= 1; if JOBVL = MagmaVec, LDVL >= N. @param[out] VR COMPLEX array, dimension (LDVR,N) If JOBVR = MagmaVec, the right eigenvectors v(j) are stored one after another in the columns of VR, in the same order as their eigenvalues. If JOBVR = MagmaNoVec, VR is not referenced. v(j) = VR(:,j), the j-th column of VR. @param[in] ldvr INTEGER The leading dimension of the array VR. LDVR >= 1; if JOBVR = MagmaVec, LDVR >= N. @param[out] work (workspace) COMPLEX array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. @param[in] lwork INTEGER The dimension of the array WORK. LWORK >= (1 + nb + nb*ngpu)*N. For optimal performance, LWORK >= (1 + 2*nb + nb*ngpu)*N. \n If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. @param rwork (workspace) REAL array, dimension (2*N) @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = i, the QR algorithm failed to compute all the eigenvalues, and no eigenvectors have been computed; elements and i+1:N of W contain eigenvalues which have converged. @ingroup magma_geev *******************************************************************************/ extern "C" magma_int_t magma_cgeev_m( magma_vec_t jobvl, magma_vec_t jobvr, magma_int_t n, magmaFloatComplex *A, magma_int_t lda, #ifdef COMPLEX magmaFloatComplex *w, #else float *wr, float *wi, #endif magmaFloatComplex *VL, magma_int_t ldvl, magmaFloatComplex *VR, magma_int_t ldvr, magmaFloatComplex *work, magma_int_t lwork, #ifdef COMPLEX float *rwork, #endif magma_int_t *info ) { #define VL(i,j) (VL + (i) + (j)*ldvl) #define VR(i,j) (VR + (i) + (j)*ldvr) const magma_int_t ione = 1; const magma_int_t izero = 0; float d__1, d__2; magmaFloatComplex tmp; float scl; float dum[1], eps; float anrm, cscale, bignum, smlnum; magma_int_t i, k, ilo, ihi; magma_int_t ibal, ierr, itau, iwrk, nout, liwrk, nb; magma_int_t scalea, minwrk, optwrk, irwork, lquery, wantvl, wantvr, select[1]; magma_side_t side = MagmaRight; magma_int_t ngpu = magma_num_gpus(); irwork = 0; *info = 0; lquery = (lwork == -1); wantvl = (jobvl == MagmaVec); wantvr = (jobvr == MagmaVec); if (! wantvl && jobvl != MagmaNoVec) { *info = -1; } else if (! wantvr && jobvr != MagmaNoVec) { *info = -2; } else if (n < 0) { *info = -3; } else if (lda < max(1,n)) { *info = -5; } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) { *info = -8; } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) { *info = -10; } /* Compute workspace */ nb = magma_get_cgehrd_nb( n ); if (*info == 0) { minwrk = (1 + nb + nb*ngpu)*n; optwrk = (1 + 2*nb + nb*ngpu)*n; work[0] = magma_cmake_lwork( optwrk ); if (lwork < minwrk && ! lquery) { *info = -12; } } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) { return *info; } /* Quick return if possible */ if (n == 0) { return *info; } #if defined(Version3) magmaFloatComplex *dT; if (MAGMA_SUCCESS != magma_cmalloc( &dT, nb*n )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } #endif #if defined(Version5) magmaFloatComplex *T; if (MAGMA_SUCCESS != magma_cmalloc_cpu( &T, nb*n )) { *info = MAGMA_ERR_HOST_ALLOC; return *info; } #endif /* Get machine constants */ eps = lapackf77_slamch( "P" ); smlnum = lapackf77_slamch( "S" ); bignum = 1. / smlnum; lapackf77_slabad( &smlnum, &bignum ); smlnum = magma_ssqrt( smlnum ) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = lapackf77_clange( "M", &n, &n, A, &lda, dum ); scalea = 0; if (anrm > 0. && anrm < smlnum) { scalea = 1; cscale = smlnum; } else if (anrm > bignum) { scalea = 1; cscale = bignum; } if (scalea) { lapackf77_clascl( "G", &izero, &izero, &anrm, &cscale, &n, &n, A, &lda, &ierr ); } /* Balance the matrix * (CWorkspace: none) * (RWorkspace: need N) * - this space is reserved until after gebak */ ibal = 0; lapackf77_cgebal( "B", &n, A, &lda, &ilo, &ihi, &rwork[ibal], &ierr ); /* Reduce to upper Hessenberg form * (CWorkspace: need 2*N, prefer N + N*NB + NB*NGPU) * (RWorkspace: N) * - added NB*NGPU needed for multi-GPU magma_cgehrd_m * - including N reserved for gebal/gebak, unused by cgehrd */ itau = 0; iwrk = itau + n; liwrk = lwork - iwrk; #if defined(Version1) // Version 1 - LAPACK lapackf77_cgehrd( &n, &ilo, &ihi, A, &lda, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(Version2) // Version 2 - LAPACK consistent HRD magma_cgehrd2( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], liwrk, &ierr ); #elif defined(Version3) // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored, magma_cgehrd( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], liwrk, dT, &ierr ); #elif defined(Version5) // Version 4 - Multi-GPU, T on host magma_cgehrd_m( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], liwrk, T, &ierr ); #endif if (wantvl) { /* Want left eigenvectors * Copy Householder vectors to VL */ side = MagmaLeft; lapackf77_clacpy( MagmaLowerStr, &n, &n, A, &lda, VL, &ldvl ); /* Generate unitary matrix in VL * (CWorkspace: need 2*N-1, prefer N + (N-1)*NB) * (RWorkspace: N) * - including N reserved for gebal/gebak, unused by cunghr */ #if defined(Version1) || defined(Version2) // Version 1 & 2 - LAPACK lapackf77_cunghr( &n, &ilo, &ihi, VL, &ldvl, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(Version3) // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored magma_cunghr( n, ilo, ihi, VL, ldvl, &work[itau], dT, nb, &ierr ); #elif defined(Version5) // Version 5 - Multi-GPU, T on host magma_cunghr_m( n, ilo, ihi, VL, ldvl, &work[itau], T, nb, &ierr ); #endif /* Perform QR iteration, accumulating Schur vectors in VL * (CWorkspace: need 1, prefer HSWORK (see comments) ) * (RWorkspace: N) * - including N reserved for gebal/gebak, unused by chseqr */ iwrk = itau; liwrk = lwork - iwrk; lapackf77_chseqr( "S", "V", &n, &ilo, &ihi, A, &lda, w, VL, &ldvl, &work[iwrk], &liwrk, info ); if (wantvr) { /* Want left and right eigenvectors * Copy Schur vectors to VR */ side = MagmaBothSides; lapackf77_clacpy( "F", &n, &n, VL, &ldvl, VR, &ldvr ); } } else if (wantvr) { /* Want right eigenvectors * Copy Householder vectors to VR */ side = MagmaRight; lapackf77_clacpy( "L", &n, &n, A, &lda, VR, &ldvr ); /* Generate unitary matrix in VR * (CWorkspace: need 2*N-1, prefer N + (N-1)*NB) * (RWorkspace: N) * - including N reserved for gebal/gebak, unused by cunghr */ #if defined(Version1) || defined(Version2) // Version 1 & 2 - LAPACK lapackf77_cunghr( &n, &ilo, &ihi, VR, &ldvr, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(Version3) // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored magma_cunghr( n, ilo, ihi, VR, ldvr, &work[itau], dT, nb, &ierr ); #elif defined(Version5) // Version 5 - Multi-GPU, T on host magma_cunghr_m( n, ilo, ihi, VR, ldvr, &work[itau], T, nb, &ierr ); #endif /* Perform QR iteration, accumulating Schur vectors in VR * (CWorkspace: need 1, prefer HSWORK (see comments) ) * (RWorkspace: N) * - including N reserved for gebal/gebak, unused by chseqr */ iwrk = itau; liwrk = lwork - iwrk; lapackf77_chseqr( "S", "V", &n, &ilo, &ihi, A, &lda, w, VR, &ldvr, &work[iwrk], &liwrk, info ); } else { /* Compute eigenvalues only * (CWorkspace: need 1, prefer HSWORK (see comments) ) * (RWorkspace: N) * - including N reserved for gebal/gebak, unused by chseqr */ iwrk = itau; liwrk = lwork - iwrk; lapackf77_chseqr( "E", "N", &n, &ilo, &ihi, A, &lda, w, VR, &ldvr, &work[iwrk], &liwrk, info ); } /* If INFO > 0 from CHSEQR, then quit */ if (*info > 0) { goto CLEANUP; } if (wantvl || wantvr) { /* Compute left and/or right eigenvectors * (CWorkspace: need 2*N) * (RWorkspace: need 2*N) * - including N reserved for gebal/gebak, unused by ctrevc */ irwork = ibal + n; #if TREVC_VERSION == 1 lapackf77_ctrevc( lapack_side_const(side), "B", select, &n, A, &lda, VL, &ldvl, VR, &ldvr, &n, &nout, &work[iwrk], &rwork[irwork], &ierr ); #elif TREVC_VERSION == 2 liwrk = lwork - iwrk; lapackf77_ctrevc3( lapack_side_const(side), "B", select, &n, A, &lda, VL, &ldvl, VR, &ldvr, &n, &nout, &work[iwrk], &liwrk, &rwork[irwork], &ierr ); #elif TREVC_VERSION == 3 magma_ctrevc3( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl, VR, ldvr, n, &nout, &work[iwrk], liwrk, &rwork[irwork], &ierr ); #elif TREVC_VERSION == 4 magma_ctrevc3_mt( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl, VR, ldvr, n, &nout, &work[iwrk], liwrk, &rwork[irwork], &ierr ); #elif TREVC_VERSION == 5 magma_ctrevc3_mt_gpu( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl, VR, ldvr, n, &nout, &work[iwrk], liwrk, &rwork[irwork], &ierr ); #else #error Unknown TREVC_VERSION #endif } if (wantvl) { /* Undo balancing of left eigenvectors * (CWorkspace: none) * (RWorkspace: need N) */ lapackf77_cgebak( "B", "L", &n, &ilo, &ihi, &rwork[ibal], &n, VL, &ldvl, &ierr ); /* Normalize left eigenvectors and make largest component real */ for (i = 0; i < n; ++i) { scl = 1. / magma_cblas_scnrm2( n, VL(0,i), 1 ); blasf77_csscal( &n, &scl, VL(0,i), &ione ); for (k = 0; k < n; ++k) { /* Computing 2nd power */ d__1 = MAGMA_C_REAL( *VL(k,i) ); d__2 = MAGMA_C_IMAG( *VL(k,i) ); rwork[irwork + k] = d__1*d__1 + d__2*d__2; } k = blasf77_isamax( &n, &rwork[irwork], &ione ) - 1; // subtract 1; k is 0-based tmp = MAGMA_C_CONJ( *VL(k,i) ) / magma_ssqrt( rwork[irwork + k] ); blasf77_cscal( &n, &tmp, VL(0,i), &ione ); *VL(k,i) = MAGMA_C_MAKE( MAGMA_C_REAL( *VL(k,i) ), 0 ); } } if (wantvr) { /* Undo balancing of right eigenvectors * (CWorkspace: none) * (RWorkspace: need N) */ lapackf77_cgebak( "B", "R", &n, &ilo, &ihi, &rwork[ibal], &n, VR, &ldvr, &ierr ); /* Normalize right eigenvectors and make largest component real */ for (i = 0; i < n; ++i) { scl = 1. / magma_cblas_scnrm2( n, VR(0,i), 1 ); blasf77_csscal( &n, &scl, VR(0,i), &ione ); for (k = 0; k < n; ++k) { /* Computing 2nd power */ d__1 = MAGMA_C_REAL( *VR(k,i) ); d__2 = MAGMA_C_IMAG( *VR(k,i) ); rwork[irwork + k] = d__1*d__1 + d__2*d__2; } k = blasf77_isamax( &n, &rwork[irwork], &ione ) - 1; // subtract 1; k is 0-based tmp = MAGMA_C_CONJ( *VR(k,i) ) / magma_ssqrt( rwork[irwork + k] ); blasf77_cscal( &n, &tmp, VR(0,i), &ione ); *VR(k,i) = MAGMA_C_MAKE( MAGMA_C_REAL( *VR(k,i) ), 0 ); } } CLEANUP: /* Undo scaling if necessary */ if (scalea) { // converged eigenvalues, stored in WR[i+1:n] and WI[i+1:n] for i = INFO magma_int_t nval = n - (*info); magma_int_t ld = max( nval, 1 ); lapackf77_clascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, w + (*info), &ld, &ierr ); if (*info > 0) { // first ilo columns were already upper triangular, // so the corresponding eigenvalues are also valid. nval = ilo - 1; lapackf77_clascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, w, &n, &ierr ); } } #if defined(Version3) magma_free( dT ); #endif #if defined(Version5) magma_free_cpu( T ); #endif work[0] = magma_cmake_lwork( minwrk ); // TODO use optwrk as in dgeev return *info; } /* magma_cgeev */
float magma_szlapy2(magmaFloatComplex x) { return magma_slapy2( MAGMA_C_REAL(x), MAGMA_C_IMAG(x) ); }
// comparison operator for sorting bool compare( magmaFloatComplex a, magmaFloatComplex b ) { return (MAGMA_C_REAL(a) < MAGMA_C_REAL(b)) || (MAGMA_C_REAL(a) == MAGMA_C_REAL(b) && MAGMA_C_IMAG(a) < MAGMA_C_IMAG(b)); }
/* //////////////////////////////////////////////////////////////////////////// -- Testing cgetrf_batched */ int main( int argc, char** argv) { TESTING_INIT(); real_Double_t gflops, magma_perf, magma_time, cublas_perf=0., cublas_time=0., cpu_perf=0, cpu_time=0; float error; magma_int_t cublas_enable = 0; magmaFloatComplex *h_A, *h_R; magmaFloatComplex *dA_magma; magmaFloatComplex **dA_array = NULL; magma_int_t **dipiv_array = NULL; magma_int_t *ipiv, *cpu_info; magma_int_t *dipiv_magma, *dinfo_magma; magma_int_t M, N, n2, lda, ldda, min_mn, info; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t batchCount; magma_int_t status = 0; magma_opts opts( MagmaOptsBatched ); opts.parse_opts( argc, argv ); //opts.lapack |= opts.check; batchCount = opts.batchcount; magma_int_t columns; float tol = opts.tolerance * lapackf77_slamch("E"); printf("%% BatchCount M N CPU Gflop/s (ms) MAGMA Gflop/s (ms) CUBLAS Gflop/s (ms) ||PA-LU||/(||A||*N)\n"); printf("%%==========================================================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { M = opts.msize[itest]; N = opts.nsize[itest]; min_mn = min(M, N); lda = M; n2 = lda*N * batchCount; ldda = magma_roundup( M, opts.align ); // multiple of 32 by default gflops = FLOPS_CGETRF( M, N ) / 1e9 * batchCount; TESTING_MALLOC_CPU( cpu_info, magma_int_t, batchCount ); TESTING_MALLOC_CPU( ipiv, magma_int_t, min_mn * batchCount ); TESTING_MALLOC_CPU( h_A, magmaFloatComplex, n2 ); TESTING_MALLOC_CPU( h_R, magmaFloatComplex, n2 ); TESTING_MALLOC_DEV( dA_magma, magmaFloatComplex, ldda*N * batchCount ); TESTING_MALLOC_DEV( dipiv_magma, magma_int_t, min_mn * batchCount ); TESTING_MALLOC_DEV( dinfo_magma, magma_int_t, batchCount ); TESTING_MALLOC_DEV( dA_array, magmaFloatComplex*, batchCount ); TESTING_MALLOC_DEV( dipiv_array, magma_int_t*, batchCount ); /* Initialize the matrix */ lapackf77_clarnv( &ione, ISEED, &n2, h_A ); // make A diagonally dominant, to not need pivoting for( int s=0; s < batchCount; ++s ) { for( int i=0; i < min_mn; ++i ) { h_A[ i + i*lda + s*lda*N ] = MAGMA_C_MAKE( MAGMA_C_REAL( h_A[ i + i*lda + s*lda*N ] ) + N, MAGMA_C_IMAG( h_A[ i + i*lda + s*lda*N ] )); } } columns = N * batchCount; lapackf77_clacpy( MagmaFullStr, &M, &columns, h_A, &lda, h_R, &lda ); magma_csetmatrix( M, columns, h_R, lda, dA_magma, ldda ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ magma_cset_pointer( dA_array, dA_magma, ldda, 0, 0, ldda*N, batchCount, opts.queue ); magma_time = magma_sync_wtime( opts.queue ); info = magma_cgetrf_nopiv_batched( M, N, dA_array, ldda, dinfo_magma, batchCount, opts.queue); magma_time = magma_sync_wtime( opts.queue ) - magma_time; magma_perf = gflops / magma_time; // check correctness of results throught "dinfo_magma" and correctness of argument throught "info" magma_getvector( batchCount, sizeof(magma_int_t), dinfo_magma, 1, cpu_info, 1); for (int i=0; i < batchCount; i++) { if (cpu_info[i] != 0 ) { printf("magma_cgetrf_batched matrix %d returned internal error %d\n", i, (int)cpu_info[i] ); } } if (info != 0) { printf("magma_cgetrf_batched returned argument error %d: %s.\n", (int) info, magma_strerror( info )); } /* ===================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { cpu_time = magma_wtime(); for (int i=0; i < batchCount; i++) { lapackf77_cgetrf(&M, &N, h_A + i*lda*N, &lda, ipiv + i * min_mn, &info); assert( info == 0 ); } cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) { printf("lapackf77_cgetrf returned error %d: %s.\n", (int) info, magma_strerror( info )); } } /* ===================================================================== Check the factorization =================================================================== */ if ( opts.lapack ) { printf("%10d %5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %7.2f (%7.2f)", (int) batchCount, (int) M, (int) N, cpu_perf, cpu_time*1000., magma_perf, magma_time*1000., cublas_perf*cublas_enable, cublas_time*1000.*cublas_enable ); } else { printf("%10d %5d %5d --- ( --- ) %7.2f (%7.2f) %7.2f (%7.2f)", (int) batchCount, (int) M, (int) N, magma_perf, magma_time*1000., cublas_perf*cublas_enable, cublas_time*1000.*cublas_enable ); } if ( opts.check ) { // initialize ipiv to 1, 2, 3, ... for (int i=0; i < batchCount; i++) { for (int k=0; k < min_mn; k++) { ipiv[i*min_mn+k] = k+1; } } magma_cgetmatrix( M, N*batchCount, dA_magma, ldda, h_A, lda ); error = 0; for (int i=0; i < batchCount; i++) { float err; err = get_LU_error( M, N, h_R + i * lda*N, lda, h_A + i * lda*N, ipiv + i * min_mn); if ( isnan(err) || isinf(err) ) { error = err; break; } error = max( err, error ); } bool okay = (error < tol); status += ! okay; printf(" %8.2e %s\n", error, (okay ? "ok" : "failed") ); } else { printf(" --- \n"); } TESTING_FREE_CPU( cpu_info ); TESTING_FREE_CPU( ipiv ); TESTING_FREE_CPU( h_A ); TESTING_FREE_CPU( h_R ); TESTING_FREE_DEV( dA_magma ); TESTING_FREE_DEV( dinfo_magma ); TESTING_FREE_DEV( dipiv_magma ); TESTING_FREE_DEV( dipiv_array ); TESTING_FREE_DEV( dA_array ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } opts.cleanup(); TESTING_FINALIZE(); return status; }