/** Purpose ------- ZPOTRI computes the inverse of a real symmetric positive definite matrix A using the Cholesky factorization A = U**T*U or A = L*L**T computed by ZPOTRF. Arguments --------- @param[in] uplo magma_uplo_t - = MagmaUpper: Upper triangle of A is stored; - = MagmaLower: Lower triangle of A is stored. @param[in] n INTEGER The order of the matrix A. N >= 0. @param[in,out] A COMPLEX_16 array, dimension (LDA,N) On entry, the triangular factor U or L from the Cholesky factorization A = U**T*U or A = L*L**T, as computed by ZPOTRF. On exit, the upper or lower triangle of the (symmetric) inverse of A, overwriting the input factor U or L. @param[in] lda INTEGER The leading dimension of the array A. LDA >= max(1,N). @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, the (i,i) element of the factor U or L is zero, and the inverse could not be computed. @ingroup magma_zposv_comp ********************************************************************/ extern "C" magma_int_t magma_zpotri(magma_uplo_t uplo, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda, magma_int_t *info) { /* Local variables */ *info = 0; if ((uplo != MagmaUpper) && (uplo != MagmaLower)) *info = -1; else if (n < 0) *info = -2; else if (lda < max(1,n)) *info = -4; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return if possible */ if ( n == 0 ) return *info; /* Invert the triangular Cholesky factor U or L */ magma_ztrtri( uplo, MagmaNonUnit, n, A, lda, info ); if ( *info == 0 ) { /* Form inv(U) * inv(U)**T or inv(L)**T * inv(L) */ magma_zlauum( uplo, n, A, lda, info ); } return *info; } /* magma_zpotri */
/* //////////////////////////////////////////////////////////////////////////// -- Testing ztrtri */ int main( int argc, char** argv) { TESTING_INIT(); real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time; magmaDoubleComplex *h_A, *h_R; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magma_int_t N, n2, lda, info; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; double Anorm, error, work[1]; magma_int_t status = 0; magma_opts opts; opts.parse_opts( argc, argv ); opts.lapack |= opts.check; // check (-c) implies lapack (-l) double tol = opts.tolerance * lapackf77_dlamch("E"); printf("%% uplo = %s\n", lapack_uplo_const(opts.uplo) ); printf("%% N CPU Gflop/s (sec) GPU Gflop/s (sec) ||R||_F / ||A||_F\n"); printf("%%================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[itest]; lda = N; n2 = lda*N; gflops = FLOPS_ZTRTRI( N ) / 1e9; TESTING_MALLOC_CPU( h_A, magmaDoubleComplex, n2 ); TESTING_MALLOC_PIN( h_R, magmaDoubleComplex, n2 ); /* ==================================================================== Initialize the matrix =================================================================== */ lapackf77_zlarnv( &ione, ISEED, &n2, h_A ); magma_zmake_hpd( N, h_A, lda ); lapackf77_zlacpy( MagmaFullStr, &N, &N, h_A, &lda, h_R, &lda ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ if ( opts.warmup ) { magma_zpotrf( opts.uplo, N, h_R, lda, &info ); magma_ztrtri( opts.uplo, opts.diag, N, h_R, lda, &info ); lapackf77_zlacpy( MagmaFullStr, &N, &N, h_A, &lda, h_R, &lda ); } /* factorize matrix */ magma_zpotrf( opts.uplo, N, h_R, lda, &info ); // check for exact singularity //h_R[ 10 + 10*lda ] = MAGMA_Z_ZERO; gpu_time = magma_wtime(); magma_ztrtri( opts.uplo, opts.diag, N, h_R, lda, &info ); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) { printf("magma_ztrtri returned error %d: %s.\n", (int) info, magma_strerror( info )); } /* ===================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { lapackf77_zpotrf( lapack_uplo_const(opts.uplo), &N, h_A, &lda, &info ); cpu_time = magma_wtime(); lapackf77_ztrtri( lapack_uplo_const(opts.uplo), lapack_diag_const(opts.diag), &N, h_A, &lda, &info ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) { printf("lapackf77_ztrtri returned error %d: %s.\n", (int) info, magma_strerror( info )); } /* ===================================================================== Check the result compared to LAPACK =================================================================== */ blasf77_zaxpy(&n2, &c_neg_one, h_A, &ione, h_R, &ione); Anorm = lapackf77_zlantr("f", lapack_uplo_const(opts.uplo), MagmaNonUnitStr, &N, &N, h_A, &lda, work); error = lapackf77_zlantr("f", lapack_uplo_const(opts.uplo), MagmaNonUnitStr, &N, &N, h_R, &lda, work) / Anorm; bool okay = (error < tol); status += ! okay; printf("%5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n", (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time, error, (okay ? "ok" : "failed") ); } else { printf("%5d --- ( --- ) %7.2f (%7.2f) ---\n", (int) N, gpu_perf, gpu_time ); } TESTING_FREE_CPU( h_A ); TESTING_FREE_PIN( h_R ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } opts.cleanup(); TESTING_FINALIZE(); return status; }