/* //////////////////////////////////////////////////////////////////////////// -- Auxiliary function: 'a' is pointer to the current panel holding the Householder vectors for the QR factorization of the panel. This routine puts ones on the diagonal and zeros in the upper triangular part of 'a'. The upper triangular values are stored in work. Then, the inverse is calculated in place in work, so as a final result, work holds the inverse of the upper triangular diagonal block. */ void zsplit_diag_block(magma_int_t ib, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *work) { magma_int_t i, j, info; magmaDoubleComplex *cola, *colw; magmaDoubleComplex c_zero = MAGMA_Z_ZERO; magmaDoubleComplex c_one = MAGMA_Z_ONE; for (i=0; i < ib; i++) { cola = a + i*lda; colw = work + i*ib; for (j=0; j < i; j++) { colw[j] = cola[j]; cola[j] = c_zero; } colw[i] = cola[i]; cola[i] = c_one; } lapackf77_ztrtri( MagmaUpperStr, MagmaNonUnitStr, &ib, work, &ib, &info); }
/** Purpose ------- ZTRTRI computes the inverse of a real upper or lower triangular matrix A. This is the Level 3 BLAS version of the algorithm. Arguments --------- @param[in] uplo magma_uplo_t - = MagmaUpper: A is upper triangular; - = MagmaLower: A is lower triangular. @param[in] diag magma_diag_t - = MagmaNonUnit: A is non-unit triangular; - = MagmaUnit: A is unit triangular. @param[in] n INTEGER The order of the matrix A. N >= 0. @param[in,out] A COMPLEX_16 array, dimension (LDA,N) On entry, the triangular matrix A. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If DIAG = MagmaUnit, the diagonal elements of A are also not referenced and are assumed to be 1. On exit, the (triangular) inverse of the original matrix, in the same storage format. @param[in] lda INTEGER The leading dimension of the array A. LDA >= max(1,N). @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, A(i,i) is exactly zero. The triangular matrix is singular and its inverse cannot be computed. @ingroup magma_zgesv_aux ********************************************************************/ extern "C" magma_int_t magma_ztrtri( magma_uplo_t uplo, magma_diag_t diag, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda, magma_int_t *info) { #define A(i, j) ( A + (i) + (j)*lda ) #define dA(i, j) (dA + (i) + (j)*ldda) /* Local variables */ const char* uplo_ = lapack_uplo_const( uplo ); const char* diag_ = lapack_diag_const( diag ); magma_int_t ldda, nb, nn, j, jb; magmaDoubleComplex c_zero = MAGMA_Z_ZERO; magmaDoubleComplex c_one = MAGMA_Z_ONE; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magmaDoubleComplex *dA; int upper = (uplo == MagmaUpper); int nounit = (diag == MagmaNonUnit); *info = 0; if (! upper && uplo != MagmaLower) *info = -1; else if (! nounit && diag != MagmaUnit) *info = -2; else if (n < 0) *info = -3; else if (lda < max(1,n)) *info = -5; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return */ if ( n == 0 ) return *info; /* Check for singularity if non-unit */ if (nounit) { for (j=0; j < n; ++j) { if ( MAGMA_Z_EQUAL( *A(j,j), c_zero )) { *info = j+1; // Fortran index return *info; } } } /* Determine the block size for this environment */ nb = magma_get_zpotrf_nb(n); ldda = ((n+31)/32)*32; if (MAGMA_SUCCESS != magma_zmalloc( &dA, (n)*ldda )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } magma_queue_t stream[2]; magma_queue_create( &stream[0] ); magma_queue_create( &stream[1] ); if (nb <= 1 || nb >= n) lapackf77_ztrtri(uplo_, diag_, &n, A, &lda, info); else { if (upper) { /* Compute inverse of upper triangular matrix */ for (j=0; j < n; j += nb) { jb = min(nb, (n-j)); magma_zsetmatrix( jb, (n-j), A(j, j), lda, dA(j, j), ldda ); /* Compute rows 1:j-1 of current block column */ magma_ztrmm( MagmaLeft, MagmaUpper, MagmaNoTrans, MagmaNonUnit, j, jb, c_one, dA(0,0), ldda, dA(0, j),ldda); magma_ztrsm( MagmaRight, MagmaUpper, MagmaNoTrans, MagmaNonUnit, j, jb, c_neg_one, dA(j,j), ldda, dA(0, j),ldda); magma_zgetmatrix_async( jb, jb, dA(j, j), ldda, A(j, j), lda, stream[1] ); magma_zgetmatrix_async( j, jb, dA(0, j), ldda, A(0, j), lda, stream[0] ); magma_queue_sync( stream[1] ); /* Compute inverse of current diagonal block */ lapackf77_ztrtri(MagmaUpperStr, diag_, &jb, A(j,j), &lda, info); magma_zsetmatrix( jb, jb, A(j, j), lda, dA(j, j), ldda ); } } else { /* Compute inverse of lower triangular matrix */ nn=((n-1)/nb)*nb+1; for (j=nn-1; j >= 0; j -= nb) { jb=min(nb,(n-j)); if ((j+jb) < n) { magma_zsetmatrix( (n-j), jb, A(j, j), lda, dA(j, j), ldda ); /* Compute rows j+jb:n of current block column */ magma_ztrmm( MagmaLeft, MagmaLower, MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb, c_one, dA(j+jb,j+jb), ldda, dA(j+jb, j), ldda ); magma_ztrsm( MagmaRight, MagmaLower, MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb, c_neg_one, dA(j,j), ldda, dA(j+jb, j), ldda ); magma_zgetmatrix_async( n-j-jb, jb, dA(j+jb, j), ldda, A(j+jb, j), lda, stream[1] ); magma_zgetmatrix_async( jb, jb, dA(j,j), ldda, A(j,j), lda, stream[0] ); magma_queue_sync( stream[0] ); } /* Compute inverse of current diagonal block */ lapackf77_ztrtri(MagmaLowerStr, diag_, &jb, A(j,j), &lda, info); magma_zsetmatrix( jb, jb, A(j, j), lda, dA(j, j), ldda ); } } } magma_queue_destroy( stream[0] ); magma_queue_destroy( stream[1] ); magma_free( dA ); return *info; }
extern "C" magma_int_t magma_ztrtri_gpu(magma_uplo_t uplo, magma_diag_t diag, magma_int_t n, magmaDoubleComplex_ptr dA, size_t dA_offset, magma_int_t ldda, magma_int_t *info) { /* -- clMAGMA (version 1.0.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver August 2012 Purpose ======= ZTRTRI computes the inverse of a real upper or lower triangular matrix dA. This is the Level 3 BLAS version of the algorithm. Arguments ========= UPLO (input) CHARACTER*1 = 'U': A is upper triangular; = 'L': A is lower triangular. DIAG (input) CHARACTER*1 = 'N': A is non-unit triangular; = 'U': A is unit triangular. N (input) INTEGER The order of the matrix A. N >= 0. dA (input/output) DOUBLE PRECISION array ON THE GPU, dimension (LDDA,N) On entry, the triangular matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of the array dA contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of the array dA contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If DIAG = 'U', the diagonal elements of A are also not referenced and are assumed to be 1. On exit, the (triangular) inverse of the original matrix, in the same storage format. LDDA (input) INTEGER The leading dimension of the array dA. LDDA >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, dA(i,i) is exactly zero. The triangular matrix is singular and its inverse can not be computed. ===================================================================== */ /* Local variables */ magma_uplo_t uplo_ = uplo; magma_diag_t diag_ = diag; magma_int_t nb, nn, j, jb; magmaDoubleComplex c_one = MAGMA_Z_ONE; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magmaDoubleComplex *work; int upper = lapackf77_lsame(lapack_const(uplo_), lapack_const(MagmaUpper)); int nounit = lapackf77_lsame(lapack_const(diag_), lapack_const(MagmaNonUnit)); *info = 0; if ((! upper) && (! lapackf77_lsame(lapack_const(uplo_), lapack_const(MagmaLower)))) *info = -1; else if ((! nounit) && (! lapackf77_lsame(lapack_const(diag_), lapack_const(MagmaUnit)))) *info = -2; else if (n < 0) *info = -3; else if (ldda < max(1,n)) *info = -5; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } nb = magma_get_zpotrf_nb(n); /* Create Queues */ magma_queue_t queues[2]; magma_device_t device; int num = 0; magma_err_t err; err = magma_get_devices( &device, 1, &num ); if ( err != 0 || num < 1 ) { fprintf( stderr, "magma_get_devices failed: %d\n", err ); exit(-1); } err = magma_queue_create( device, &queues[0] ); if ( err != 0 ) { fprintf( stderr, "magma_queue_create 0 failed: %d\n", err ); exit(-1); } err = magma_queue_create( device, &queues[1] ); if ( err != 0 ) { fprintf( stderr, "magma_queue_create 1 failed: %d\n", err ); exit(-1); } if (MAGMA_SUCCESS != magma_malloc_host( (void**)&work, nb*nb*sizeof(magmaDoubleComplex) )) { *info = MAGMA_ERR_HOST_ALLOC; return *info; } if (nb <= 1 || nb >= n) { magma_zgetmatrix( n, n, dA, dA_offset, ldda, work, 0, n, queues[0] ); lapackf77_ztrtri(lapack_const(uplo_), lapack_const(diag_), &n, work, &n, info); magma_zsetmatrix( n, n, work, 0, n, dA, dA_offset, ldda, queues[0] ); } else { if (upper){ /* Compute inverse of upper triangular matrix */ for (j=0; j<n; j =j+ nb){ jb = min(nb, (n-j)); /* Compute rows 1:j-1 of current block column */ magma_ztrmm(MagmaLeft, MagmaUpper, MagmaNoTrans, MagmaNonUnit, j, jb, c_one, dA(0,0), ldda, dA(0, j), ldda, queues[0]); magma_ztrsm(MagmaRight, MagmaUpper, MagmaNoTrans, MagmaNonUnit, j, jb, c_neg_one, dA(j,j), ldda, dA(0, j), ldda, queues[0]); magma_zgetmatrix_async( jb, jb, dA(j, j), ldda, work, 0, jb, queues[1], NULL ); magma_queue_sync( queues[1] ); /* Compute inverse of current diagonal block */ lapackf77_ztrtri(MagmaUpperStr, lapack_const(diag_), &jb, work, &jb, info); /* magma_zsetmatrix_async( jb, jb, work, 0, jb, dA(j, j), ldda, queues[0], NULL ); */ magma_zsetmatrix( jb, jb, work, 0, jb, dA(j, j), ldda, queues[0]); } } else{ /* Compute inverse of lower triangular matrix */ nn=((n-1)/nb)*nb+1; for(j=nn-1; j>=0; j=j-nb){ jb=min(nb,(n-j)); if((j+jb) < n){ /* Compute rows j+jb:n of current block column */ magma_ztrmm(MagmaLeft, MagmaLower, MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb, c_one, dA(j+jb,j+jb), ldda, dA(j+jb, j), ldda, queues[0]); magma_ztrsm(MagmaRight, MagmaLower, MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb, c_neg_one, dA(j,j), ldda, dA(j+jb, j), ldda, queues[0]); } magma_zgetmatrix_async( jb, jb, dA(j, j), ldda, work, 0, jb, queues[1], NULL ); magma_queue_sync( queues[1] ); /* Compute inverse of current diagonal block */ lapackf77_ztrtri(MagmaLowerStr, lapack_const(diag_), &jb, work, &jb, info); /* magma_zsetmatrix_async( jb, jb, work, 0, jb, dA(j, j), ldda, queues[0], NULL ); */ magma_zsetmatrix( jb, jb, work, 0, jb, dA(j, j), ldda, queues[0] ); } } } magma_free_host( work ); magma_queue_destroy(queues[0]); magma_queue_destroy(queues[1]); return *info; }
extern "C" magma_int_t magma_ztrtri(char uplo, char diag, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda, magma_int_t *info) { /* -- MAGMA (version 1.4.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver August 2013 Purpose ======= ZTRTRI computes the inverse of a real upper or lower triangular matrix A. This is the Level 3 BLAS version of the algorithm. Arguments ========= UPLO (input) CHARACTER*1 = 'U': A is upper triangular; = 'L': A is lower triangular. DIAG (input) CHARACTER*1 = 'N': A is non-unit triangular; = 'U': A is unit triangular. N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) COMPLEX_16 array, dimension (LDA,N) On entry, the triangular matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If DIAG = 'U', the diagonal elements of A are also not referenced and are assumed to be 1. On exit, the (triangular) inverse of the original matrix, in the same storage format. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, A(i,i) is exactly zero. The triangular matrix is singular and its inverse cannot be computed. ===================================================================== */ #define A(i, j) ( A + (i) + (j)*lda ) #define dA(i, j) (dA + (i) + (j)*ldda) /* Local variables */ char uplo_[2] = {uplo, 0}; char diag_[2] = {diag, 0}; magma_int_t ldda, nb, nn, j, jb; magmaDoubleComplex c_zero = MAGMA_Z_ZERO; magmaDoubleComplex c_one = MAGMA_Z_ONE; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magmaDoubleComplex *dA; int upper = lapackf77_lsame(uplo_, "U"); int nounit = lapackf77_lsame(diag_, "N"); *info = 0; if ((! upper) && (! lapackf77_lsame(uplo_, "L"))) *info = -1; else if ((! nounit) && (! lapackf77_lsame(diag_, "U"))) *info = -2; else if (n < 0) *info = -3; else if (lda < max(1,n)) *info = -5; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return */ if ( n == 0 ) return *info; /* Check for singularity if non-unit */ if (nounit) { for ( j=0; j<n; ++j ) { if ( MAGMA_Z_EQUAL( *A(j,j), c_zero )) { *info = j+1; // Fortran index return *info; } } } /* Determine the block size for this environment */ nb = magma_get_zpotrf_nb(n); ldda = ((n+31)/32)*32; if (MAGMA_SUCCESS != magma_zmalloc( &dA, (n)*ldda )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } magma_queue_t stream[2]; magma_queue_create( &stream[0] ); magma_queue_create( &stream[1] ); if (nb <= 1 || nb >= n) lapackf77_ztrtri(uplo_, diag_, &n, A, &lda, info); else { if (upper) { /* Compute inverse of upper triangular matrix */ for (j=0; j<n; j=j+nb) { jb = min(nb, (n-j)); magma_zsetmatrix( jb, (n-j), A(j, j), lda, dA(j, j), ldda ); /* Compute rows 1:j-1 of current block column */ magma_ztrmm( MagmaLeft, MagmaUpper, MagmaNoTrans, MagmaNonUnit, j, jb, c_one, dA(0,0), ldda, dA(0, j),ldda); magma_ztrsm( MagmaRight, MagmaUpper, MagmaNoTrans, MagmaNonUnit, j, jb, c_neg_one, dA(j,j), ldda, dA(0, j),ldda); magma_zgetmatrix_async( jb, jb, dA(j, j), ldda, A(j, j), lda, stream[1] ); magma_zgetmatrix_async( j, jb, dA(0, j), ldda, A(0, j), lda, stream[0] ); magma_queue_sync( stream[1] ); /* Compute inverse of current diagonal block */ lapackf77_ztrtri(MagmaUpperStr, diag_, &jb, A(j,j), &lda, info); magma_zsetmatrix( jb, jb, A(j, j), lda, dA(j, j), ldda ); } } else { /* Compute inverse of lower triangular matrix */ nn=((n-1)/nb)*nb+1; for(j=nn-1; j>=0; j=j-nb) { jb=min(nb,(n-j)); if((j+jb) < n) { magma_zsetmatrix( (n-j), jb, A(j, j), lda, dA(j, j), ldda ); /* Compute rows j+jb:n of current block column */ magma_ztrmm( MagmaLeft, MagmaLower, MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb, c_one, dA(j+jb,j+jb), ldda, dA(j+jb, j), ldda ); magma_ztrsm( MagmaRight, MagmaLower, MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb, c_neg_one, dA(j,j), ldda, dA(j+jb, j), ldda ); magma_zgetmatrix_async( n-j-jb, jb, dA(j+jb, j), ldda, A(j+jb, j), lda, stream[1] ); magma_zgetmatrix_async( jb, jb, dA(j,j), ldda, A(j,j), lda, stream[0] ); magma_queue_sync( stream[0] ); } /* Compute inverse of current diagonal block */ lapackf77_ztrtri(MagmaLowerStr, diag_, &jb, A(j,j), &lda, info); magma_zsetmatrix( jb, jb, A(j, j), lda, dA(j, j), ldda ); } } } magma_queue_destroy( stream[0] ); magma_queue_destroy( stream[1] ); magma_free( dA ); return *info; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing ztrtri */ int main( int argc, char** argv) { TESTING_INIT(); real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time; magmaDoubleComplex *h_A, *h_R; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magma_int_t N, n2, lda, info; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; double Anorm, error, work[1]; magma_int_t status = 0; magma_opts opts; opts.parse_opts( argc, argv ); opts.lapack |= opts.check; // check (-c) implies lapack (-l) double tol = opts.tolerance * lapackf77_dlamch("E"); printf("%% uplo = %s\n", lapack_uplo_const(opts.uplo) ); printf("%% N CPU Gflop/s (sec) GPU Gflop/s (sec) ||R||_F / ||A||_F\n"); printf("%%================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[itest]; lda = N; n2 = lda*N; gflops = FLOPS_ZTRTRI( N ) / 1e9; TESTING_MALLOC_CPU( h_A, magmaDoubleComplex, n2 ); TESTING_MALLOC_PIN( h_R, magmaDoubleComplex, n2 ); /* ==================================================================== Initialize the matrix =================================================================== */ lapackf77_zlarnv( &ione, ISEED, &n2, h_A ); magma_zmake_hpd( N, h_A, lda ); lapackf77_zlacpy( MagmaFullStr, &N, &N, h_A, &lda, h_R, &lda ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ if ( opts.warmup ) { magma_zpotrf( opts.uplo, N, h_R, lda, &info ); magma_ztrtri( opts.uplo, opts.diag, N, h_R, lda, &info ); lapackf77_zlacpy( MagmaFullStr, &N, &N, h_A, &lda, h_R, &lda ); } /* factorize matrix */ magma_zpotrf( opts.uplo, N, h_R, lda, &info ); // check for exact singularity //h_R[ 10 + 10*lda ] = MAGMA_Z_ZERO; gpu_time = magma_wtime(); magma_ztrtri( opts.uplo, opts.diag, N, h_R, lda, &info ); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) { printf("magma_ztrtri returned error %d: %s.\n", (int) info, magma_strerror( info )); } /* ===================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { lapackf77_zpotrf( lapack_uplo_const(opts.uplo), &N, h_A, &lda, &info ); cpu_time = magma_wtime(); lapackf77_ztrtri( lapack_uplo_const(opts.uplo), lapack_diag_const(opts.diag), &N, h_A, &lda, &info ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) { printf("lapackf77_ztrtri returned error %d: %s.\n", (int) info, magma_strerror( info )); } /* ===================================================================== Check the result compared to LAPACK =================================================================== */ blasf77_zaxpy(&n2, &c_neg_one, h_A, &ione, h_R, &ione); Anorm = lapackf77_zlantr("f", lapack_uplo_const(opts.uplo), MagmaNonUnitStr, &N, &N, h_A, &lda, work); error = lapackf77_zlantr("f", lapack_uplo_const(opts.uplo), MagmaNonUnitStr, &N, &N, h_R, &lda, work) / Anorm; bool okay = (error < tol); status += ! okay; printf("%5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n", (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time, error, (okay ? "ok" : "failed") ); } else { printf("%5d --- ( --- ) %7.2f (%7.2f) ---\n", (int) N, gpu_perf, gpu_time ); } TESTING_FREE_CPU( h_A ); TESTING_FREE_PIN( h_R ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } opts.cleanup(); TESTING_FINALIZE(); return status; }
extern "C" magma_int_t magma_ztrtri_gpu( magma_uplo_t uplo, magma_diag_t diag, magma_int_t n, magmaDoubleComplex_ptr dA, size_t dA_offset, magma_int_t ldda, magma_queue_t queues[2], magma_int_t *info) { /* -- clMAGMA (version 1.3.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver @date November 2014 Purpose ======= ZTRTRI computes the inverse of a real upper or lower triangular matrix dA. This is the Level 3 BLAS version of the algorithm. Arguments ========= UPLO (input) CHARACTER*1 = 'U': A is upper triangular; = 'L': A is lower triangular. DIAG (input) CHARACTER*1 = 'N': A is non-unit triangular; = 'U': A is unit triangular. N (input) INTEGER The order of the matrix A. N >= 0. dA (input/output) COMPLEX_16 array ON THE GPU, dimension (LDDA,N) On entry, the triangular matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of the array dA contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of the array dA contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If DIAG = 'U', the diagonal elements of A are also not referenced and are assumed to be 1. On exit, the (triangular) inverse of the original matrix, in the same storage format. LDDA (input) INTEGER The leading dimension of the array dA. LDDA >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, dA(i,i) is exactly zero. The triangular matrix is singular and its inverse cannot be computed. (Singularity check is currently disabled.) ===================================================================== */ /* Local variables */ magma_int_t nb, nn, j, jb; //magmaDoubleComplex c_zero = MAGMA_Z_ZERO; magmaDoubleComplex c_one = MAGMA_Z_ONE; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magmaDoubleComplex *work; int upper = (uplo == MagmaUpper); int nounit = (diag == MagmaNonUnit); *info = 0; if (! upper && uplo != MagmaLower) *info = -1; else if (! nounit && diag != MagmaUnit) *info = -2; else if (n < 0) *info = -3; else if (ldda < max(1,n)) *info = -5; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Check for singularity if non-unit */ /* cannot do here with matrix dA on GPU -- need kernel */ /* if (nounit) { for (j=0; j < n; ++j) { if ( MAGMA_Z_EQUAL( *dA(j,j), c_zero )) { *info = j+1; // Fortran index return *info; } } } */ /* Determine the block size for this environment */ nb = magma_get_zpotrf_nb(n); /* Create Queues */ //magma_queue_t queues[2]; //magma_device_t device[MagmaMaxGPUs]; //magma_int_t num = 0; //magma_int_t err; // //err = magma_getdevices( device, MagmaMaxGPUs, &num ); //if ( err != 0 || num < 1 ) { // fprintf( stderr, "magma_getdevices failed: %d\n", err ); // exit(-1); //} //err = magma_queue_create( device[0], &queues[0] ); //if ( err != 0 ) { // fprintf( stderr, "magma_queue_create 0 failed: %d\n", err ); // exit(-1); //} //err = magma_queue_create( device[0], &queues[1] ); //if ( err != 0 ) { // fprintf( stderr, "magma_queue_create 1 failed: %d\n", err ); // exit(-1); //} if (MAGMA_SUCCESS != magma_zmalloc_cpu( &work, nb*nb )) { *info = MAGMA_ERR_HOST_ALLOC; return *info; } if (nb <= 1 || nb >= n) { magma_zgetmatrix( n, n, dA, dA_offset, ldda, work, n, queues[0] ); lapackf77_ztrtri( lapack_const(uplo), lapack_const(diag), &n, work, &n, info ); magma_zsetmatrix( n, n, work, n, dA, dA_offset, ldda, queues[0] ); } else { if (upper) { /* Compute inverse of upper triangular matrix */ for (j=0; j < n; j += nb){ jb = min(nb, (n-j)); /* Compute rows 1:j-1 of current block column */ magma_ztrmm( MagmaLeft, MagmaUpper, MagmaNoTrans, MagmaNonUnit, j, jb, c_one, dA(0,0), ldda, dA(0, j), ldda, queues[0] ); magma_ztrsm( MagmaRight, MagmaUpper, MagmaNoTrans, MagmaNonUnit, j, jb, c_neg_one, dA(j,j), ldda, dA(0, j), ldda, queues[0] ); magma_zgetmatrix_async( jb, jb, dA(j, j), ldda, work, jb, queues[1], NULL ); magma_queue_sync( queues[1] ); /* Compute inverse of current diagonal block */ lapackf77_ztrtri( MagmaUpperStr, lapack_const(diag), &jb, work, &jb, info ); /* magma_zsetmatrix_async( jb, jb, work, 0, jb, dA(j, j), ldda, queues[0], NULL ); */ magma_zsetmatrix( jb, jb, work, jb, dA(j, j), ldda, queues[0] ); } } else { /* Compute inverse of lower triangular matrix */ nn = ((n-1)/nb)*nb+1; for(j=nn-1; j >= 0; j -= nb){ jb = min(nb,(n-j)); if((j+jb) < n){ /* Compute rows j+jb:n of current block column */ magma_ztrmm( MagmaLeft, MagmaLower, MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb, c_one, dA(j+jb,j+jb), ldda, dA(j+jb, j), ldda, queues[0] ); magma_ztrsm( MagmaRight, MagmaLower, MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb, c_neg_one, dA(j,j), ldda, dA(j+jb, j), ldda, queues[0] ); } magma_zgetmatrix_async( jb, jb, dA(j, j), ldda, work, jb, queues[1], NULL ); magma_queue_sync( queues[1] ); /* Compute inverse of current diagonal block */ lapackf77_ztrtri( MagmaLowerStr, lapack_const(diag), &jb, work, &jb, info ); /* magma_zsetmatrix_async( jb, jb, work, 0, jb, dA(j, j), ldda, queues[0], NULL ); */ magma_zsetmatrix( jb, jb, work, jb, dA(j, j), ldda, queues[0] ); } } } //magma_queue_destroy( queues[0] ); //magma_queue_destroy( queues[1] ); magma_free_cpu( work ); return *info; }