// On input, A and ipiv is LU factorization of A. On output, A is overwritten. // Requires m == n. // Uses init_matrix() to re-generate original A as needed. // Generates random RHS b and solves Ax=b. // Returns residual, |Ax - b| / (n |A| |x|). float get_residual( magma_opts &opts, magma_int_t m, magma_int_t n, magmaFloatComplex *A, magma_int_t lda, magma_int_t *ipiv ) { if ( m != n ) { printf( "\nERROR: residual check defined only for square matrices\n" ); return -1; } const magmaFloatComplex c_one = MAGMA_C_ONE; const magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE; const magma_int_t ione = 1; // this seed should be DIFFERENT than used in init_matrix // (else x is column of A, so residual can be exactly zero) magma_int_t ISEED[4] = {0,0,0,2}; magma_int_t info = 0; magmaFloatComplex *x, *b; // initialize RHS TESTING_MALLOC_CPU( x, magmaFloatComplex, n ); TESTING_MALLOC_CPU( b, magmaFloatComplex, n ); lapackf77_clarnv( &ione, ISEED, &n, b ); blasf77_ccopy( &n, b, &ione, x, &ione ); // solve Ax = b lapackf77_cgetrs( "Notrans", &n, &ione, A, &lda, ipiv, x, &n, &info ); if (info != 0) { printf("lapackf77_cgetrs returned error %d: %s.\n", (int) info, magma_strerror( info )); } // reset to original A init_matrix( opts, m, n, A, lda ); // compute r = Ax - b, saved in b blasf77_cgemv( "Notrans", &m, &n, &c_one, A, &lda, x, &ione, &c_neg_one, b, &ione ); // compute residual |Ax - b| / (n*|A|*|x|) float norm_x, norm_A, norm_r, work[1]; norm_A = lapackf77_clange( "F", &m, &n, A, &lda, work ); norm_r = lapackf77_clange( "F", &n, &ione, b, &n, work ); norm_x = lapackf77_clange( "F", &n, &ione, x, &n, work ); //printf( "r=\n" ); magma_cprint( 1, n, b, 1 ); TESTING_FREE_CPU( x ); TESTING_FREE_CPU( b ); //printf( "r=%.2e, A=%.2e, x=%.2e, n=%d\n", norm_r, norm_A, norm_x, n ); return norm_r / (n * norm_A * norm_x); }
extern "C" magma_int_t magma_cgesv( magma_int_t n, magma_int_t nrhs, magmaFloatComplex *A, magma_int_t lda, magma_int_t *ipiv, magmaFloatComplex *B, magma_int_t ldb, magma_int_t *info) { /* -- MAGMA (version 1.4.1) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver December 2013 Purpose ======= Solves a system of linear equations A * X = B where A is a general N-by-N matrix and X and B are N-by-NRHS matrices. The LU decomposition with partial pivoting and row interchanges is used to factor A as A = P * L * U, where P is a permutation matrix, L is unit lower triangular, and U is upper triangular. The factored form of A is then used to solve the system of equations A * X = B. Arguments ========= N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. A (input/output) COMPLEX array, dimension (LDA,N). On entry, the M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). IPIV (output) INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). B (input/output) COMPLEX array, dimension (LDB,NRHS) On entry, the right hand side matrix B. On exit, the solution matrix X. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== */ magma_int_t num_gpus, ldda, lddb; *info = 0; if (n < 0) { *info = -1; } else if (nrhs < 0) { *info = -2; } else if (lda < max(1,n)) { *info = -4; } else if (ldb < max(1,n)) { *info = -7; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return if possible */ if (n == 0 || nrhs == 0) { return *info; } /* If single-GPU and allocation suceeds, use GPU interface. */ num_gpus = magma_num_gpus(); magmaFloatComplex *dA, *dB; if ( num_gpus > 1 ) { goto CPU_INTERFACE; } ldda = ((n+31)/32)*32; lddb = ldda; if ( MAGMA_SUCCESS != magma_cmalloc( &dA, ldda*n )) { goto CPU_INTERFACE; } if ( MAGMA_SUCCESS != magma_cmalloc( &dB, lddb*nrhs )) { magma_free( dA ); goto CPU_INTERFACE; } magma_csetmatrix( n, n, A, lda, dA, ldda ); magma_cgetrf_gpu( n, n, dA, ldda, ipiv, info ); if ( *info == MAGMA_ERR_DEVICE_ALLOC ) { magma_free( dA ); magma_free( dB ); goto CPU_INTERFACE; } magma_cgetmatrix( n, n, dA, ldda, A, lda ); if ( *info == 0 ) { magma_csetmatrix( n, nrhs, B, ldb, dB, lddb ); magma_cgetrs_gpu( MagmaNoTrans, n, nrhs, dA, ldda, ipiv, dB, lddb, info ); magma_cgetmatrix( n, nrhs, dB, lddb, B, ldb ); } magma_free( dA ); magma_free( dB ); return *info; CPU_INTERFACE: /* If multi-GPU or allocation failed, use CPU interface and LAPACK. * Faster to use LAPACK for getrs than to copy A to GPU. */ magma_cgetrf( n, n, A, lda, ipiv, info ); if ( *info == 0 ) { lapackf77_cgetrs( MagmaNoTransStr, &n, &nrhs, A, &lda, ipiv, B, &ldb, info ); } return *info; }
/** Purpose ------- Solves a system of linear equations A * X = B where A is a general N-by-N matrix and X and B are N-by-NRHS matrices. The LU decomposition with partial pivoting and row interchanges is used to factor A as A = P * L * U, where P is a permutation matrix, L is unit lower triangular, and U is upper triangular. The factored form of A is then used to solve the system of equations A * X = B. Arguments --------- @param[in] n INTEGER The order of the matrix A. N >= 0. @param[in] nrhs INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. @param[in,out] A COMPLEX array, dimension (LDA,N). On entry, the M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. @param[in] lda INTEGER The leading dimension of the array A. LDA >= max(1,N). @param[out] ipiv INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). @param[in,out] B COMPLEX array, dimension (LDB,NRHS) On entry, the right hand side matrix B. On exit, the solution matrix X. @param[in] ldb INTEGER The leading dimension of the array B. LDB >= max(1,N). @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value @ingroup magma_cgesv_driver ********************************************************************/ extern "C" magma_int_t magma_cgesv( magma_int_t n, magma_int_t nrhs, magmaFloatComplex *A, magma_int_t lda, magma_int_t *ipiv, magmaFloatComplex *B, magma_int_t ldb, magma_int_t *info) { magma_int_t ngpu, ldda, lddb; *info = 0; if (n < 0) { *info = -1; } else if (nrhs < 0) { *info = -2; } else if (lda < max(1,n)) { *info = -4; } else if (ldb < max(1,n)) { *info = -7; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return if possible */ if (n == 0 || nrhs == 0) { return *info; } /* If single-GPU and allocation suceeds, use GPU interface. */ ngpu = magma_num_gpus(); magmaFloatComplex *dA, *dB; if ( ngpu > 1 ) { goto CPU_INTERFACE; } ldda = ((n+31)/32)*32; lddb = ldda; if ( MAGMA_SUCCESS != magma_cmalloc( &dA, ldda*n )) { goto CPU_INTERFACE; } if ( MAGMA_SUCCESS != magma_cmalloc( &dB, lddb*nrhs )) { magma_free( dA ); goto CPU_INTERFACE; } magma_csetmatrix( n, n, A, lda, dA, ldda ); magma_cgetrf_gpu( n, n, dA, ldda, ipiv, info ); if ( *info == MAGMA_ERR_DEVICE_ALLOC ) { magma_free( dA ); magma_free( dB ); goto CPU_INTERFACE; } magma_cgetmatrix( n, n, dA, ldda, A, lda ); if ( *info == 0 ) { magma_csetmatrix( n, nrhs, B, ldb, dB, lddb ); magma_cgetrs_gpu( MagmaNoTrans, n, nrhs, dA, ldda, ipiv, dB, lddb, info ); magma_cgetmatrix( n, nrhs, dB, lddb, B, ldb ); } magma_free( dA ); magma_free( dB ); return *info; CPU_INTERFACE: /* If multi-GPU or allocation failed, use CPU interface and LAPACK. * Faster to use LAPACK for getrs than to copy A to GPU. */ magma_cgetrf( n, n, A, lda, ipiv, info ); if ( *info == 0 ) { lapackf77_cgetrs( MagmaNoTransStr, &n, &nrhs, A, &lda, ipiv, B, &ldb, info ); } return *info; }