예제 #1
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Auxiliary function: 'a' is pointer to the current panel holding the
      Householder vectors for the QR factorization of the panel. This routine
      puts ones on the diagonal and zeros in the upper triangular part of 'a'.
      The upper triangular values are stored in work.
      
      Then, the inverse is calculated in place in work, so as a final result,
      work holds the inverse of the upper triangular diagonal block.
*/
void zsplit_diag_block(magma_int_t ib, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *work)
{
    magma_int_t i, j, info;
    magmaDoubleComplex *cola, *colw;
    magmaDoubleComplex c_zero = MAGMA_Z_ZERO;
    magmaDoubleComplex c_one  = MAGMA_Z_ONE;

    for (i=0; i < ib; i++) {
        cola = a    + i*lda;
        colw = work + i*ib;
        for (j=0; j < i; j++) {
            colw[j] = cola[j];
            cola[j] = c_zero;
        }
        colw[i] = cola[i];
        cola[i] = c_one;
    }
    lapackf77_ztrtri( MagmaUpperStr, MagmaNonUnitStr, &ib, work, &ib, &info);
}
예제 #2
0
파일: ztrtri.cpp 프로젝트: cjy7117/FT-MAGMA
/**
    Purpose
    -------
    ZTRTRI computes the inverse of a real upper or lower triangular
    matrix A.

    This is the Level 3 BLAS version of the algorithm.

    Arguments
    ---------
    @param[in]
    uplo    magma_uplo_t
      -     = MagmaUpper:  A is upper triangular;
      -     = MagmaLower:  A is lower triangular.

    @param[in]
    diag    magma_diag_t
      -     = MagmaNonUnit:  A is non-unit triangular;
      -     = MagmaUnit:     A is unit triangular.

    @param[in]
    n       INTEGER
            The order of the matrix A.  N >= 0.

    @param[in,out]
    A       COMPLEX_16 array, dimension (LDA,N)
            On entry, the triangular matrix A.  If UPLO = MagmaUpper, the
            leading N-by-N upper triangular part of the array A contains
            the upper triangular matrix, and the strictly lower
            triangular part of A is not referenced.  If UPLO = MagmaLower, the
            leading N-by-N lower triangular part of the array A contains
            the lower triangular matrix, and the strictly upper
            triangular part of A is not referenced.  If DIAG = MagmaUnit, the
            diagonal elements of A are also not referenced and are
            assumed to be 1.
            On exit, the (triangular) inverse of the original matrix, in
            the same storage format.

    @param[in]
    lda     INTEGER
            The leading dimension of the array A.  LDA >= max(1,N).

    @param[out]
    info    INTEGER
      -     = 0: successful exit
      -     < 0: if INFO = -i, the i-th argument had an illegal value
      -     > 0: if INFO = i, A(i,i) is exactly zero.  The triangular
                    matrix is singular and its inverse cannot be computed.

    @ingroup magma_zgesv_aux
    ********************************************************************/
extern "C" magma_int_t
magma_ztrtri(
    magma_uplo_t uplo, magma_diag_t diag, magma_int_t n,
    magmaDoubleComplex *A, magma_int_t lda,
    magma_int_t *info)
{
    #define  A(i, j) ( A + (i) + (j)*lda )
    #define dA(i, j) (dA + (i) + (j)*ldda)

    /* Local variables */
    const char* uplo_ = lapack_uplo_const( uplo );
    const char* diag_ = lapack_diag_const( diag );
    magma_int_t     ldda, nb, nn, j, jb;
    magmaDoubleComplex c_zero     = MAGMA_Z_ZERO;
    magmaDoubleComplex c_one      = MAGMA_Z_ONE;
    magmaDoubleComplex c_neg_one  = MAGMA_Z_NEG_ONE;
    magmaDoubleComplex *dA;

    int upper  = (uplo == MagmaUpper);
    int nounit = (diag == MagmaNonUnit);

    *info = 0;

    if (! upper && uplo != MagmaLower)
        *info = -1;
    else if (! nounit && diag != MagmaUnit)
        *info = -2;
    else if (n < 0)
        *info = -3;
    else if (lda < max(1,n))
        *info = -5;

    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    /* Quick return */
    if ( n == 0 )
        return *info;

    /* Check for singularity if non-unit */
    if (nounit) {
        for (j=0; j < n; ++j) {
            if ( MAGMA_Z_EQUAL( *A(j,j), c_zero )) {
                *info = j+1;  // Fortran index
                return *info;
            }
        }
    }

    /* Determine the block size for this environment */
    nb = magma_get_zpotrf_nb(n);

    ldda = ((n+31)/32)*32;
    if (MAGMA_SUCCESS != magma_zmalloc( &dA, (n)*ldda )) {
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }

    magma_queue_t stream[2];
    magma_queue_create( &stream[0] );
    magma_queue_create( &stream[1] );

    if (nb <= 1 || nb >= n)
        lapackf77_ztrtri(uplo_, diag_, &n, A, &lda, info);
    else {
        if (upper) {
            /* Compute inverse of upper triangular matrix */
            for (j=0; j < n; j += nb) {
                jb = min(nb, (n-j));
                magma_zsetmatrix( jb, (n-j),
                                  A(j, j),  lda,
                                  dA(j, j), ldda );

                /* Compute rows 1:j-1 of current block column */
                magma_ztrmm( MagmaLeft, MagmaUpper,
                             MagmaNoTrans, MagmaNonUnit, j, jb,
                             c_one, dA(0,0), ldda, dA(0, j),ldda);

                magma_ztrsm( MagmaRight, MagmaUpper,
                             MagmaNoTrans, MagmaNonUnit, j, jb,
                             c_neg_one, dA(j,j), ldda, dA(0, j),ldda);

                magma_zgetmatrix_async( jb, jb,
                                        dA(j, j), ldda,
                                        A(j, j),  lda, stream[1] );

                magma_zgetmatrix_async( j, jb,
                                        dA(0, j), ldda,
                                        A(0, j),  lda, stream[0] );

                magma_queue_sync( stream[1] );

                /* Compute inverse of current diagonal block */
                lapackf77_ztrtri(MagmaUpperStr, diag_, &jb, A(j,j), &lda, info);

                magma_zsetmatrix( jb, jb,
                                  A(j, j),  lda,
                                  dA(j, j), ldda );
            }
        }
        else {
            /* Compute inverse of lower triangular matrix */
            nn=((n-1)/nb)*nb+1;

            for (j=nn-1; j >= 0; j -= nb) {
                jb=min(nb,(n-j));

                if ((j+jb) < n) {
                    magma_zsetmatrix( (n-j), jb,
                                      A(j, j),  lda,
                                      dA(j, j), ldda );

                    /* Compute rows j+jb:n of current block column */
                    magma_ztrmm( MagmaLeft, MagmaLower,
                                 MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb,
                                 c_one, dA(j+jb,j+jb), ldda, dA(j+jb, j), ldda );

                    magma_ztrsm( MagmaRight, MagmaLower,
                                 MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb,
                                 c_neg_one, dA(j,j), ldda, dA(j+jb, j), ldda );

                    magma_zgetmatrix_async( n-j-jb, jb,
                                            dA(j+jb, j), ldda,
                                            A(j+jb, j),  lda, stream[1] );

                    magma_zgetmatrix_async( jb, jb,
                                            dA(j,j), ldda,
                                            A(j,j),  lda, stream[0] );

                    magma_queue_sync( stream[0] );
                }

                /* Compute inverse of current diagonal block */
                lapackf77_ztrtri(MagmaLowerStr, diag_, &jb, A(j,j), &lda, info);

                magma_zsetmatrix( jb, jb,
                                  A(j, j),  lda,
                                  dA(j, j), ldda );
            }
        }
    }

    magma_queue_destroy( stream[0] );
    magma_queue_destroy( stream[1] );
    magma_free( dA );

    return *info;
}
extern "C" magma_int_t
magma_ztrtri_gpu(magma_uplo_t uplo, magma_diag_t diag, magma_int_t n,
		magmaDoubleComplex_ptr dA, size_t dA_offset, magma_int_t ldda, magma_int_t *info)
{
	/*  -- clMAGMA (version 1.0.0) --
		Univ. of Tennessee, Knoxville
		Univ. of California, Berkeley
		Univ. of Colorado, Denver
		August 2012

		Purpose
		=======

		ZTRTRI computes the inverse of a real upper or lower triangular
		matrix dA.

		This is the Level 3 BLAS version of the algorithm.

		Arguments
		=========

		UPLO    (input) CHARACTER*1
		= 'U':  A is upper triangular;
		= 'L':  A is lower triangular.

		DIAG    (input) CHARACTER*1
		= 'N':  A is non-unit triangular;
		= 'U':  A is unit triangular.

		N       (input) INTEGER
		The order of the matrix A.  N >= 0.

		dA       (input/output) DOUBLE PRECISION array ON THE GPU, dimension (LDDA,N)
		On entry, the triangular matrix A.  If UPLO = 'U', the
		leading N-by-N upper triangular part of the array dA contains
		the upper triangular matrix, and the strictly lower
		triangular part of A is not referenced.  If UPLO = 'L', the
		leading N-by-N lower triangular part of the array dA contains
		the lower triangular matrix, and the strictly upper
		triangular part of A is not referenced.  If DIAG = 'U', the
		diagonal elements of A are also not referenced and are
		assumed to be 1.
		On exit, the (triangular) inverse of the original matrix, in
		the same storage format.

		LDDA     (input) INTEGER
		The leading dimension of the array dA.  LDDA >= max(1,N).
		INFO    (output) INTEGER
		= 0: successful exit
		< 0: if INFO = -i, the i-th argument had an illegal value
		> 0: if INFO = i, dA(i,i) is exactly zero.  The triangular
		matrix is singular and its inverse can not be computed.

		===================================================================== */

	/* Local variables */
	magma_uplo_t uplo_ = uplo;
	magma_diag_t diag_ = diag;
	magma_int_t         nb, nn, j, jb;
	magmaDoubleComplex     c_one      = MAGMA_Z_ONE;
	magmaDoubleComplex     c_neg_one  = MAGMA_Z_NEG_ONE;
	magmaDoubleComplex     *work;

	int upper  = lapackf77_lsame(lapack_const(uplo_), lapack_const(MagmaUpper));
	int nounit = lapackf77_lsame(lapack_const(diag_), lapack_const(MagmaNonUnit));

	*info = 0;

	if ((! upper) && (! lapackf77_lsame(lapack_const(uplo_), lapack_const(MagmaLower))))
		*info = -1;
	else if ((! nounit) && (! lapackf77_lsame(lapack_const(diag_), lapack_const(MagmaUnit))))
		*info = -2;
	else if (n < 0)
		*info = -3;
	else if (ldda < max(1,n))
		*info = -5;

	if (*info != 0) {
		magma_xerbla( __func__, -(*info) );
		return *info;
	}

	nb = magma_get_zpotrf_nb(n);

	/* Create Queues */
	magma_queue_t  queues[2];
	magma_device_t device;
	int num = 0;
	magma_err_t err;

	err = magma_get_devices( &device, 1, &num );
	if ( err != 0 || num < 1 ) {
		fprintf( stderr, "magma_get_devices failed: %d\n", err );
		exit(-1);
	}
	err = magma_queue_create( device, &queues[0] );
	if ( err != 0 ) {
		fprintf( stderr, "magma_queue_create 0 failed: %d\n", err );
		exit(-1);
	}	
	err = magma_queue_create( device, &queues[1] );
	if ( err != 0 ) {
		fprintf( stderr, "magma_queue_create 1 failed: %d\n", err );
		exit(-1);
	}	

	if (MAGMA_SUCCESS != magma_malloc_host( (void**)&work, nb*nb*sizeof(magmaDoubleComplex) )) {
		*info = MAGMA_ERR_HOST_ALLOC;
		return *info;
	}

	if (nb <= 1 || nb >= n)
	{
		magma_zgetmatrix( n, n, dA, dA_offset, ldda, work, 0, n, queues[0] );
		lapackf77_ztrtri(lapack_const(uplo_), lapack_const(diag_), &n, work, &n, info);
		magma_zsetmatrix( n, n, work, 0, n, dA, dA_offset, ldda, queues[0] );
	}
	else
	{
		if (upper){
			/* Compute inverse of upper triangular matrix */
			for (j=0; j<n; j =j+ nb){
				jb = min(nb, (n-j));

				/* Compute rows 1:j-1 of current block column */
				magma_ztrmm(MagmaLeft, MagmaUpper, 
						MagmaNoTrans, MagmaNonUnit, j, jb, 
						c_one, dA(0,0), ldda, dA(0, j), ldda, 
						queues[0]);

				magma_ztrsm(MagmaRight, MagmaUpper, 
						MagmaNoTrans, MagmaNonUnit, j, jb,
						c_neg_one, dA(j,j), ldda, dA(0, j), ldda, 
						queues[0]);
	
				magma_zgetmatrix_async( jb, jb,
						dA(j, j), ldda,
						work, 0, jb, queues[1], NULL );
				
				magma_queue_sync( queues[1] );

				/* Compute inverse of current diagonal block */
				lapackf77_ztrtri(MagmaUpperStr, lapack_const(diag_), &jb, work, &jb, info);
				/*
				magma_zsetmatrix_async( jb, jb, 
						work, 0, jb,
						dA(j, j), ldda, queues[0], NULL );
				*/
				magma_zsetmatrix( jb, jb, 
						work, 0, jb,
						dA(j, j), ldda, queues[0]);
			}
		}
		else{
			/* Compute inverse of lower triangular matrix */
			nn=((n-1)/nb)*nb+1;

			for(j=nn-1; j>=0; j=j-nb){
				jb=min(nb,(n-j));

				if((j+jb) < n){
					/* Compute rows j+jb:n of current block column */
					magma_ztrmm(MagmaLeft, MagmaLower, 
							MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb,
							c_one, dA(j+jb,j+jb), ldda, dA(j+jb, j), ldda, 
							queues[0]);

					magma_ztrsm(MagmaRight, MagmaLower,
							MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb,
							c_neg_one, dA(j,j), ldda, dA(j+jb, j), ldda, 
							queues[0]);
				}
				magma_zgetmatrix_async( jb, jb, 
						dA(j, j), ldda,
						work, 0, jb, queues[1], NULL );
				
				magma_queue_sync( queues[1] );

				/* Compute inverse of current diagonal block */
				lapackf77_ztrtri(MagmaLowerStr, lapack_const(diag_), &jb, work, &jb, info);
				/*
				magma_zsetmatrix_async( jb, jb,
						work, 0, jb,
						dA(j, j), ldda, queues[0], NULL );
				*/
				magma_zsetmatrix( jb, jb,
						work, 0, jb,
						dA(j, j), ldda, queues[0] );
			}
		}
	}

	magma_free_host( work );
	magma_queue_destroy(queues[0]);
	magma_queue_destroy(queues[1]);
	return *info;
}
예제 #4
0
파일: ztrtri.cpp 프로젝트: soulsheng/magma
extern "C" magma_int_t
magma_ztrtri(char uplo, char diag, magma_int_t n,
              magmaDoubleComplex *A, magma_int_t lda, magma_int_t *info)
{
/*  -- MAGMA (version 1.4.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       August 2013

    Purpose
    =======
    ZTRTRI computes the inverse of a real upper or lower triangular
    matrix A.

    This is the Level 3 BLAS version of the algorithm.

    Arguments
    =========
    UPLO    (input) CHARACTER*1
            = 'U':  A is upper triangular;
            = 'L':  A is lower triangular.

    DIAG    (input) CHARACTER*1
            = 'N':  A is non-unit triangular;
            = 'U':  A is unit triangular.

    N       (input) INTEGER
            The order of the matrix A.  N >= 0.

    A       (input/output) COMPLEX_16 array, dimension (LDA,N)
            On entry, the triangular matrix A.  If UPLO = 'U', the
            leading N-by-N upper triangular part of the array A contains
            the upper triangular matrix, and the strictly lower
            triangular part of A is not referenced.  If UPLO = 'L', the
            leading N-by-N lower triangular part of the array A contains
            the lower triangular matrix, and the strictly upper
            triangular part of A is not referenced.  If DIAG = 'U', the
            diagonal elements of A are also not referenced and are
            assumed to be 1.
            On exit, the (triangular) inverse of the original matrix, in
            the same storage format.

    LDA     (input) INTEGER
            The leading dimension of the array A.  LDA >= max(1,N).

    INFO    (output) INTEGER
            = 0: successful exit
            < 0: if INFO = -i, the i-th argument had an illegal value
            > 0: if INFO = i, A(i,i) is exactly zero.  The triangular
                    matrix is singular and its inverse cannot be computed.

    ===================================================================== */

    #define  A(i, j) ( A + (i) + (j)*lda )
    #define dA(i, j) (dA + (i) + (j)*ldda)

    /* Local variables */
    char uplo_[2] = {uplo, 0};
    char diag_[2] = {diag, 0};
    magma_int_t     ldda, nb, nn, j, jb;
    magmaDoubleComplex c_zero     = MAGMA_Z_ZERO;
    magmaDoubleComplex c_one      = MAGMA_Z_ONE;
    magmaDoubleComplex c_neg_one  = MAGMA_Z_NEG_ONE;
    magmaDoubleComplex *dA;

    int upper  = lapackf77_lsame(uplo_, "U");
    int nounit = lapackf77_lsame(diag_, "N");

    *info = 0;

    if ((! upper) && (! lapackf77_lsame(uplo_, "L")))
        *info = -1;
    else if ((! nounit) && (! lapackf77_lsame(diag_, "U")))
        *info = -2;
    else if (n < 0)
        *info = -3;
    else if (lda < max(1,n))
        *info = -5;

    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    /* Quick return */
    if ( n == 0 )
        return *info;

    /* Check for singularity if non-unit */
    if (nounit) {
        for ( j=0; j<n; ++j ) {
            if ( MAGMA_Z_EQUAL( *A(j,j), c_zero )) {
                *info = j+1;  // Fortran index
                return *info;
            }
        }
    }

    /* Determine the block size for this environment */
    nb = magma_get_zpotrf_nb(n);

    ldda = ((n+31)/32)*32;
    if (MAGMA_SUCCESS != magma_zmalloc( &dA, (n)*ldda )) {
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }

    magma_queue_t stream[2];
    magma_queue_create( &stream[0] );
    magma_queue_create( &stream[1] );

    if (nb <= 1 || nb >= n)
        lapackf77_ztrtri(uplo_, diag_, &n, A, &lda, info);
    else {
        if (upper) {
            /* Compute inverse of upper triangular matrix */
            for (j=0; j<n; j=j+nb) {
                jb = min(nb, (n-j));
                magma_zsetmatrix( jb, (n-j),
                                  A(j, j),  lda,
                                  dA(j, j), ldda );

                /* Compute rows 1:j-1 of current block column */
                magma_ztrmm( MagmaLeft, MagmaUpper,
                             MagmaNoTrans, MagmaNonUnit, j, jb,
                             c_one, dA(0,0), ldda, dA(0, j),ldda);

                magma_ztrsm( MagmaRight, MagmaUpper,
                             MagmaNoTrans, MagmaNonUnit, j, jb,
                             c_neg_one, dA(j,j), ldda, dA(0, j),ldda);

                magma_zgetmatrix_async( jb, jb,
                                        dA(j, j), ldda,
                                        A(j, j),  lda, stream[1] );

                magma_zgetmatrix_async( j, jb,
                                        dA(0, j), ldda,
                                        A(0, j),  lda, stream[0] );

                magma_queue_sync( stream[1] );

                /* Compute inverse of current diagonal block */
                lapackf77_ztrtri(MagmaUpperStr, diag_, &jb, A(j,j), &lda, info);

                magma_zsetmatrix( jb, jb,
                                  A(j, j),  lda,
                                  dA(j, j), ldda );
            }
        }
        else {
            /* Compute inverse of lower triangular matrix */
            nn=((n-1)/nb)*nb+1;

            for(j=nn-1; j>=0; j=j-nb) {
                jb=min(nb,(n-j));

                if((j+jb) < n) {
                    magma_zsetmatrix( (n-j), jb,
                                      A(j, j),  lda,
                                      dA(j, j), ldda );

                    /* Compute rows j+jb:n of current block column */
                    magma_ztrmm( MagmaLeft, MagmaLower,
                                 MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb,
                                 c_one, dA(j+jb,j+jb), ldda, dA(j+jb, j), ldda );

                    magma_ztrsm( MagmaRight, MagmaLower,
                                 MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb,
                                 c_neg_one, dA(j,j), ldda, dA(j+jb, j), ldda );

                    magma_zgetmatrix_async( n-j-jb, jb,
                                            dA(j+jb, j), ldda,
                                            A(j+jb, j),  lda, stream[1] );

                    magma_zgetmatrix_async( jb, jb,
                                            dA(j,j), ldda,
                                            A(j,j),  lda, stream[0] );

                    magma_queue_sync( stream[0] );
                }

                /* Compute inverse of current diagonal block */
                lapackf77_ztrtri(MagmaLowerStr, diag_, &jb, A(j,j), &lda, info);

                magma_zsetmatrix( jb, jb,
                                  A(j, j),  lda,
                                  dA(j, j), ldda );
            }
        }
    }

    magma_queue_destroy( stream[0] );
    magma_queue_destroy( stream[1] );
    magma_free( dA );

    return *info;
}
예제 #5
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing ztrtri
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    real_Double_t   gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
    magmaDoubleComplex *h_A, *h_R;
    magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
    magma_int_t N, n2, lda, info;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};
    double      Anorm, error, work[1];
    magma_int_t status = 0;

    magma_opts opts;
    opts.parse_opts( argc, argv );
    opts.lapack |= opts.check;  // check (-c) implies lapack (-l)
    
    double tol = opts.tolerance * lapackf77_dlamch("E");
    
    printf("%% uplo = %s\n", lapack_uplo_const(opts.uplo) );
    printf("%%   N   CPU Gflop/s (sec)   GPU Gflop/s (sec)   ||R||_F / ||A||_F\n");
    printf("%%================================================================\n");
    for( int itest = 0; itest < opts.ntest; ++itest ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            N = opts.nsize[itest];
            lda    = N;
            n2     = lda*N;
            gflops = FLOPS_ZTRTRI( N ) / 1e9;
            
            TESTING_MALLOC_CPU( h_A, magmaDoubleComplex, n2 );
            TESTING_MALLOC_PIN( h_R, magmaDoubleComplex, n2 );
            
            /* ====================================================================
               Initialize the matrix
               =================================================================== */
            lapackf77_zlarnv( &ione, ISEED, &n2, h_A );
            magma_zmake_hpd( N, h_A, lda );
            lapackf77_zlacpy( MagmaFullStr, &N, &N, h_A, &lda, h_R, &lda );

            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            if ( opts.warmup ) {
                magma_zpotrf( opts.uplo, N, h_R, lda, &info );
                magma_ztrtri( opts.uplo, opts.diag, N, h_R, lda, &info );
                lapackf77_zlacpy( MagmaFullStr, &N, &N, h_A, &lda, h_R, &lda );
            }
            
            /* factorize matrix */
            magma_zpotrf( opts.uplo, N, h_R, lda, &info );
            
            // check for exact singularity
            //h_R[ 10 + 10*lda ] = MAGMA_Z_ZERO;
            
            gpu_time = magma_wtime();
            magma_ztrtri( opts.uplo, opts.diag, N, h_R, lda, &info );
            gpu_time = magma_wtime() - gpu_time;
            gpu_perf = gflops / gpu_time;
            if (info != 0) {
                printf("magma_ztrtri returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            }
            
            /* =====================================================================
               Performs operation using LAPACK
               =================================================================== */
            if ( opts.lapack ) {
                lapackf77_zpotrf( lapack_uplo_const(opts.uplo), &N, h_A, &lda, &info );
                
                cpu_time = magma_wtime();
                lapackf77_ztrtri( lapack_uplo_const(opts.uplo), lapack_diag_const(opts.diag), &N, h_A, &lda, &info );
                cpu_time = magma_wtime() - cpu_time;
                cpu_perf = gflops / cpu_time;
                if (info != 0) {
                    printf("lapackf77_ztrtri returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
                }
                
                /* =====================================================================
                   Check the result compared to LAPACK
                   =================================================================== */
                blasf77_zaxpy(&n2, &c_neg_one, h_A, &ione, h_R, &ione);
                Anorm = lapackf77_zlantr("f", lapack_uplo_const(opts.uplo), MagmaNonUnitStr, &N, &N, h_A, &lda, work);
                error = lapackf77_zlantr("f", lapack_uplo_const(opts.uplo), MagmaNonUnitStr, &N, &N, h_R, &lda, work) / Anorm;
                bool okay = (error < tol);
                status += ! okay;
                printf("%5d   %7.2f (%7.2f)   %7.2f (%7.2f)   %8.2e   %s\n",
                       (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time,
                       error, (okay ? "ok" : "failed") );
            }
            else {
                printf("%5d     ---   (  ---  )   %7.2f (%7.2f)     ---\n",
                       (int) N, gpu_perf, gpu_time );
            }
            
            TESTING_FREE_CPU( h_A );
            TESTING_FREE_PIN( h_R );
            fflush( stdout );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }

    opts.cleanup();
    TESTING_FINALIZE();
    return status;
}
예제 #6
0
extern "C" magma_int_t
magma_ztrtri_gpu(
    magma_uplo_t uplo, magma_diag_t diag, magma_int_t n,
    magmaDoubleComplex_ptr dA, size_t dA_offset, magma_int_t ldda,
    magma_queue_t queues[2],
    magma_int_t *info)
{
/*  -- clMAGMA (version 1.3.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       @date November 2014

    Purpose
    =======
    ZTRTRI computes the inverse of a real upper or lower triangular
    matrix dA.

    This is the Level 3 BLAS version of the algorithm.

    Arguments
    =========
    UPLO    (input) CHARACTER*1
            = 'U':  A is upper triangular;
            = 'L':  A is lower triangular.

    DIAG    (input) CHARACTER*1
            = 'N':  A is non-unit triangular;
            = 'U':  A is unit triangular.

    N       (input) INTEGER
            The order of the matrix A.  N >= 0.

    dA      (input/output) COMPLEX_16 array ON THE GPU, dimension (LDDA,N)
            On entry, the triangular matrix A.  If UPLO = 'U', the
            leading N-by-N upper triangular part of the array dA contains
            the upper triangular matrix, and the strictly lower
            triangular part of A is not referenced.  If UPLO = 'L', the
            leading N-by-N lower triangular part of the array dA contains
            the lower triangular matrix, and the strictly upper
            triangular part of A is not referenced.  If DIAG = 'U', the
            diagonal elements of A are also not referenced and are
            assumed to be 1.
            On exit, the (triangular) inverse of the original matrix, in
            the same storage format.

    LDDA    (input) INTEGER
            The leading dimension of the array dA.  LDDA >= max(1,N).

    INFO    (output) INTEGER
            = 0: successful exit
            < 0: if INFO = -i, the i-th argument had an illegal value
            > 0: if INFO = i, dA(i,i) is exactly zero.  The triangular
                    matrix is singular and its inverse cannot be computed.
                 (Singularity check is currently disabled.)

    ===================================================================== */

    /* Local variables */
    magma_int_t nb, nn, j, jb;
    //magmaDoubleComplex c_zero     = MAGMA_Z_ZERO;
    magmaDoubleComplex c_one      = MAGMA_Z_ONE;
    magmaDoubleComplex c_neg_one  = MAGMA_Z_NEG_ONE;
    magmaDoubleComplex *work;

    int upper  = (uplo == MagmaUpper);
    int nounit = (diag == MagmaNonUnit);

    *info = 0;

    if (! upper && uplo != MagmaLower)
        *info = -1;
    else if (! nounit && diag != MagmaUnit)
        *info = -2;
    else if (n < 0)
        *info = -3;
    else if (ldda < max(1,n))
        *info = -5;

    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    /* Check for singularity if non-unit */
    /* cannot do here with matrix dA on GPU -- need kernel */
    /*
    if (nounit) {
        for (j=0; j < n; ++j) {
            if ( MAGMA_Z_EQUAL( *dA(j,j), c_zero )) {
                *info = j+1;  // Fortran index
                return *info;
            }
        }
    }
    */

    /* Determine the block size for this environment */
    nb = magma_get_zpotrf_nb(n);

    /* Create Queues */
    //magma_queue_t  queues[2];
    //magma_device_t device[MagmaMaxGPUs];
    //magma_int_t num = 0;
    //magma_int_t err;
    //
    //err = magma_getdevices( device, MagmaMaxGPUs, &num );
    //if ( err != 0 || num < 1 ) {
    //    fprintf( stderr, "magma_getdevices failed: %d\n", err );
    //    exit(-1);
    //}
    //err = magma_queue_create( device[0], &queues[0] );
    //if ( err != 0 ) {
    //    fprintf( stderr, "magma_queue_create 0 failed: %d\n", err );
    //    exit(-1);
    //}
    //err = magma_queue_create( device[0], &queues[1] );
    //if ( err != 0 ) {
    //    fprintf( stderr, "magma_queue_create 1 failed: %d\n", err );
    //    exit(-1);
    //}

    if (MAGMA_SUCCESS != magma_zmalloc_cpu( &work, nb*nb )) {
        *info = MAGMA_ERR_HOST_ALLOC;
        return *info;
    }

    if (nb <= 1 || nb >= n) {
        magma_zgetmatrix( n, n, dA, dA_offset, ldda, work, n, queues[0] );
        lapackf77_ztrtri( lapack_const(uplo), lapack_const(diag), &n, work, &n, info );
        magma_zsetmatrix( n, n, work, n, dA, dA_offset, ldda, queues[0] );
    }
    else {
        if (upper) {
            /* Compute inverse of upper triangular matrix */
            for (j=0; j < n; j += nb){
                jb = min(nb, (n-j));

                /* Compute rows 1:j-1 of current block column */
                magma_ztrmm( MagmaLeft, MagmaUpper,
                             MagmaNoTrans, MagmaNonUnit, j, jb,
                             c_one, dA(0,0), ldda, dA(0, j), ldda,
                             queues[0] );

                magma_ztrsm( MagmaRight, MagmaUpper,
                             MagmaNoTrans, MagmaNonUnit, j, jb,
                             c_neg_one, dA(j,j), ldda, dA(0, j), ldda,
                             queues[0] );

                magma_zgetmatrix_async( jb, jb,
                                        dA(j, j), ldda,
                                        work, jb, queues[1], NULL );

                magma_queue_sync( queues[1] );

                /* Compute inverse of current diagonal block */
                lapackf77_ztrtri( MagmaUpperStr, lapack_const(diag), &jb, work, &jb, info );
                /*
                magma_zsetmatrix_async( jb, jb,
                                        work, 0, jb,
                                        dA(j, j), ldda, queues[0], NULL );
                */
                magma_zsetmatrix( jb, jb,
                                  work, jb,
                                  dA(j, j), ldda, queues[0] );
            }
        }
        else {
            /* Compute inverse of lower triangular matrix */
            nn = ((n-1)/nb)*nb+1;

            for(j=nn-1; j >= 0; j -= nb){
                jb = min(nb,(n-j));

                if((j+jb) < n){
                    /* Compute rows j+jb:n of current block column */
                    magma_ztrmm( MagmaLeft, MagmaLower,
                                 MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb,
                                 c_one, dA(j+jb,j+jb), ldda, dA(j+jb, j), ldda,
                                 queues[0] );

                    magma_ztrsm( MagmaRight, MagmaLower,
                                 MagmaNoTrans, MagmaNonUnit, (n-j-jb), jb,
                                 c_neg_one, dA(j,j), ldda, dA(j+jb, j), ldda,
                                 queues[0] );
                }
                magma_zgetmatrix_async( jb, jb,
                                        dA(j, j), ldda,
                                        work, jb, queues[1], NULL );

                magma_queue_sync( queues[1] );

                /* Compute inverse of current diagonal block */
                lapackf77_ztrtri( MagmaLowerStr, lapack_const(diag), &jb, work, &jb, info );
                /*
                magma_zsetmatrix_async( jb, jb,
                        work, 0, jb,
                        dA(j, j), ldda, queues[0], NULL );
                */
                magma_zsetmatrix( jb, jb,
                                  work, jb,
                                  dA(j, j), ldda, queues[0] );
            }
        }
    }

    //magma_queue_destroy( queues[0] );
    //magma_queue_destroy( queues[1] );
    magma_free_cpu( work );

    return *info;
}