/** Purpose ------- DPOSV computes the solution to a real system of linear equations A * X = B, where A is an N-by-N symmetric positive definite matrix and X and B are N-by-NRHS matrices. The Cholesky decomposition is used to factor A as A = U**H * U, if UPLO = MagmaUpper, or A = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is a lower triangular matrix. The factored form of A is then used to solve the system of equations A * X = B. Arguments --------- @param[in] uplo magma_uplo_t - = MagmaUpper: Upper triangle of A is stored; - = MagmaLower: Lower triangle of A is stored. @param[in] n INTEGER The order of the matrix A. N >= 0. @param[in] nrhs INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. @param[in,out] dA DOUBLE PRECISION array on the GPU, dimension (LDDA,N) On entry, the symmetric matrix dA. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of dA contains the upper triangular part of the matrix dA, and the strictly lower triangular part of dA is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of dA contains the lower triangular part of the matrix dA, and the strictly upper triangular part of dA is not referenced. \n On exit, if INFO = 0, the factor U or L from the Cholesky factorization dA = U**H*U or dA = L*L**H. @param[in] ldda INTEGER The leading dimension of the array A. LDDA >= max(1,N). @param[in,out] dB DOUBLE PRECISION array on the GPU, dimension (LDDB,NRHS) On entry, the right hand side matrix B. On exit, the solution matrix X. @param[in] lddb INTEGER The leading dimension of the array B. LDDB >= max(1,N). @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value @ingroup magma_dposv_driver ********************************************************************/ extern "C" magma_int_t magma_dposv_gpu( magma_uplo_t uplo, magma_int_t n, magma_int_t nrhs, magmaDouble_ptr dA, magma_int_t ldda, magmaDouble_ptr dB, magma_int_t lddb, magma_int_t *info ) { *info = 0; if ( uplo != MagmaUpper && uplo != MagmaLower ) *info = -1; if ( n < 0 ) *info = -2; if ( nrhs < 0 ) *info = -3; if ( ldda < max(1, n) ) *info = -5; if ( lddb < max(1, n) ) *info = -7; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return if possible */ if ( (n == 0) || (nrhs == 0) ) { return *info; } magma_dpotrf_gpu( uplo, n, dA, ldda, info ); if ( *info == 0 ) { magma_dpotrs_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info ); } return *info; }
void magmaf_dpotrs_gpu( magma_uplo_t *uplo, magma_int_t *n, magma_int_t *nrhs, devptr_t *dA, magma_int_t *ldda, devptr_t *dB, magma_int_t *lddb, magma_int_t *info ) { magma_dpotrs_gpu( *uplo, *n, *nrhs, magma_ddevptr(dA), *ldda, magma_ddevptr(dB), *lddb, info ); }
extern "C" magma_int_t magma_dposv ( char uplo, magma_int_t n, magma_int_t nrhs, double *A, magma_int_t lda, double *B, magma_int_t ldb, magma_int_t *info ) { /* -- MAGMA (version 1.3.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver November 2012 Purpose ======= DPOSV computes the solution to a real system of linear equations A * X = B, where A is an N-by-N symmetric positive definite matrix and X and B are N-by-NRHS matrices. The Cholesky decomposition is used to factor A as A = U**T * U, if UPLO = 'U', or A = L * L**T, if UPLO = 'L', where U is an upper triangular matrix and L is a lower triangular matrix. The factored form of A is then used to solve the system of equations A * X = B. Arguments ========= UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. A (input/output) DOUBLE_PRECISION array, dimension (LDA,N) On entry, the symmetric matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization A = U**T*U or A = L*L**T. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input/output) DOUBLE_PRECISION array, dimension (LDB,NRHS) On entry, the right hand side matrix B. On exit, the solution matrix X. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== */ magma_int_t num_gpus, ldda, lddb; *info = 0 ; if( (uplo != 'U') && (uplo != 'u') && (uplo != 'L') && (uplo != 'l') ) *info = -1; if( n < 0 ) *info = -2; if( nrhs < 0) *info = -3; if ( lda < max(1, n) ) *info = -5; if ( ldb < max(1, n) ) *info = -7; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return if possible */ if ( (n==0) || (nrhs == 0) ) { return *info; } /* If single-GPU and allocation suceeds, use GPU interface. */ num_gpus = magma_num_gpus(); double *dA, *dB; if ( num_gpus > 1 ) { goto CPU_INTERFACE; } ldda = ((n+31)/32)*32; lddb = ldda; if ( MAGMA_SUCCESS != magma_dmalloc( &dA, ldda*n )) { goto CPU_INTERFACE; } if ( MAGMA_SUCCESS != magma_dmalloc( &dB, lddb*nrhs )) { magma_free( dA ); dA = NULL; goto CPU_INTERFACE; } assert( num_gpus == 1 && dA != NULL && dB != NULL ); magma_dsetmatrix( n, n, A, lda, dA, ldda ); magma_dpotrf_gpu( uplo, n, dA, ldda, info ); magma_dgetmatrix( n, n, dA, ldda, A, lda ); if ( *info == 0 ) { magma_dsetmatrix( n, nrhs, B, ldb, dB, lddb ); magma_dpotrs_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info ); magma_dgetmatrix( n, nrhs, dB, lddb, B, ldb ); } magma_free( dA ); magma_free( dB ); return *info; CPU_INTERFACE: /* If multi-GPU or allocation failed, use CPU interface and LAPACK. * Faster to use LAPACK for potrs than to copy A to GPU. */ magma_dpotrf( uplo, n, A, lda, info ); if ( *info == 0 ) { lapackf77_dpotrs( &uplo, &n, &nrhs, A, &lda, B, &ldb, info ); } return *info; }
SEXP magma_dpoMatrix_matrix_solve(SEXP a, SEXP b) { #ifdef HIPLAR_WITH_MAGMA SEXP Chol = magma_dpoMatrix_chol(a), val = PROTECT(duplicate(b)); int *adims = INTEGER(GET_SLOT(a, Matrix_DimSym)), *bdims = INTEGER(getAttrib(b, R_DimSymbol)), info; if (!(isReal(b) && isMatrix(b))) error(_("Argument b must be a numeric matrix")); if (*adims != *bdims || bdims[1] < 1 || *adims < 1) error(_("Dimensions of system to be solved are inconsistent")); double *A = REAL(GET_SLOT(Chol, Matrix_xSym)); double *B = REAL(val); //const char *uplo = uplo_P(Chol); //int N = bdims[1]; //There is only a GPU interface for this call //so it will be the default setting if the GPU is on if(GPUFlag == 1) { #ifdef HIPLAR_DBG R_ShowMessage("DBG: Solving system of Ax = b, A = dpo, b = dge, using dpotrs_gpu;"); #endif double *d_A, *d_B; const char *uplo = uplo_P(Chol); magma_int_t NRHS = bdims[1]; magma_int_t lda = adims[1]; magma_int_t ldb = bdims[0]; magma_int_t N = adims[0]; cublasStatus retStatus; cublasAlloc(N * lda, sizeof(double), (void**)&d_A); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Memory Allocation")); /********************************************/ cublasAlloc(N * NRHS, sizeof(double), (void**)&d_B); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Memory Allocation")); /********************************************/ cublasSetVector( N * lda , sizeof(double), A, 1, d_A, 1); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Data Transfer to Device")); /********************************************/ cublasSetVector( ldb * NRHS, sizeof(double), B, 1, d_B, 1 ); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Data Transfer to Device")); /********************************************/ magma_dpotrs_gpu(uplo[0], N, NRHS , d_A, lda, d_B, ldb, &info); cublasGetVector( ldb * NRHS, sizeof(double), d_B, 1, B, 1); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Data Transfer from Device")); /********************************************/ cublasFree(d_A); cublasFree(d_B); } else { F77_CALL(dpotrs)(uplo_P(Chol), adims, bdims + 1, REAL(GET_SLOT(Chol, Matrix_xSym)), adims, REAL(val), bdims, &info); } // Error checking of MAGMA/LAPACK calls if (info) { if(info > 0) error(_("the leading minor of order %d is not positive definite"), info); else /* should never happen! */ error(_("Lapack routine %s returned error code %d"), "dpotrf", info); } UNPROTECT(1); return val; #endif return R_NilValue; }
SEXP magma_dpoMatrix_dgeMatrix_solve(SEXP a, SEXP b) { #ifdef HIPLAR_WITH_MAGMA SEXP Chol = magma_dpoMatrix_chol(a), val = PROTECT(NEW_OBJECT(MAKE_CLASS("dgeMatrix"))); int *adims = INTEGER(GET_SLOT(a, Matrix_DimSym)), *bdims = INTEGER(GET_SLOT(b, Matrix_DimSym)), info; /* Checking Matrix Dimensions */ if (adims[1] != bdims[0]) error(_("Dimensions of system to be solved are inconsistent")); if (adims[0] < 1 || bdims[1] < 1) error(_("Cannot solve() for matrices with zero extents")); /* ****************************************** */ SET_SLOT(val, Matrix_factorSym, allocVector(VECSXP, 0)); slot_dup(val, b, Matrix_DimSym); slot_dup(val, b, Matrix_xSym); double *A = REAL(GET_SLOT(Chol, Matrix_xSym)); double *B = REAL(GET_SLOT(val, Matrix_xSym)); if(GPUFlag == 1) { #ifdef HIPLAR_DBG R_ShowMessage("DBG: Solving system of Ax = b, A = dpo, b = dge, using dpotrs_gpu;"); #endif double *d_A, *d_B; const char *uplo = uplo_P(Chol); magma_int_t NRHS = bdims[1]; magma_int_t lda = adims[1]; magma_int_t ldb = bdims[0]; magma_int_t N = adims[0]; cublasStatus retStatus; /*if(uplo == "U") uplo = MagmaUpperStr; else if(uplo == "L") uplo = MagmaLowerStr; else uplo = MagmaUpperStr; */ cublasAlloc(N * lda, sizeof(double), (void**)&d_A); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Memory Allocation")); /********************************************/ cublasAlloc(N * NRHS, sizeof(double), (void**)&d_B); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Memory Allocation")); /********************************************/ cublasSetVector( N * lda , sizeof(double), A, 1, d_A, 1); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Data Transfer to Device")); /********************************************/ cublasSetVector( ldb * NRHS, sizeof(double), B, 1, d_B, 1 ); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Data Transfer to Device")); /********************************************/ magma_dpotrs_gpu(uplo[0], N, NRHS , d_A, lda, d_B, ldb, &info); cublasGetVector( ldb * NRHS, sizeof(double), d_B, 1, B, 1); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Data Transfer from Device")); /********************************************/ cublasFree(d_A); cublasFree(d_B); } else { #ifdef HIPLAR_DBG R_ShowMessage("DBG: Solving system of Ax = b, A = dpo, b = dge, using dpotrs;"); #endif F77_CALL(dpotrs)(uplo_P(Chol), adims, bdims + 1, A , adims, B , bdims, &info); } if (info) { if(info > 0) error(_("the leading minor of order %d is not positive definite"), info); else /* should never happen! */ error(_("Lapack routine %s returned error code %d"), "dpotrf", info); } UNPROTECT(1); return val; #endif return R_NilValue; }
/** Purpose ------- DSPOSV computes the solution to a real system of linear equations A * X = B, where A is an N-by-N symmetric positive definite matrix and X and B are N-by-NRHS matrices. DSPOSV first attempts to factorize the matrix in real SINGLE PRECISION and use this factorization within an iterative refinement procedure to produce a solution with real DOUBLE PRECISION norm-wise backward error quality (see below). If the approach fails the method switches to a real DOUBLE PRECISION factorization and solve. The iterative refinement is not going to be a winning strategy if the ratio real SINGLE PRECISION performance over real DOUBLE PRECISION performance is too small. A reasonable strategy should take the number of right-hand sides and the size of the matrix into account. This might be done with a call to ILAENV in the future. Up to now, we always try iterative refinement. The iterative refinement process is stopped if ITER > ITERMAX or for all the RHS we have: RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX where o ITER is the number of the current iteration in the iterative refinement process o RNRM is the infinity-norm of the residual o XNRM is the infinity-norm of the solution o ANRM is the infinity-operator-norm of the matrix A o EPS is the machine epsilon returned by DLAMCH('Epsilon') The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 respectively. Arguments --------- @param[in] uplo magma_uplo_t - = MagmaUpper: Upper triangle of A is stored; - = MagmaLower: Lower triangle of A is stored. @param[in] n INTEGER The number of linear equations, i.e., the order of the matrix A. N >= 0. @param[in] nrhs INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. @param[in,out] dA DOUBLE PRECISION array on the GPU, dimension (LDDA,N) On entry, the symmetric matrix A. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if iterative refinement has been successfully used (INFO.EQ.0 and ITER.GE.0, see description below), then A is unchanged, if double factorization has been used (INFO.EQ.0 and ITER.LT.0, see description below), then the array dA contains the factor U or L from the Cholesky factorization A = U**T*U or A = L*L**T. @param[in] ldda INTEGER The leading dimension of the array dA. LDDA >= max(1,N). @param[in] dB DOUBLE PRECISION array on the GPU, dimension (LDDB,NRHS) The N-by-NRHS right hand side matrix B. @param[in] lddb INTEGER The leading dimension of the array dB. LDDB >= max(1,N). @param[out] dX DOUBLE PRECISION array on the GPU, dimension (LDDX,NRHS) If INFO = 0, the N-by-NRHS solution matrix X. @param[in] lddx INTEGER The leading dimension of the array dX. LDDX >= max(1,N). @param dworkd (workspace) DOUBLE PRECISION array on the GPU, dimension (N*NRHS) This array is used to hold the residual vectors. @param dworks (workspace) SINGLE PRECISION array on the GPU, dimension (N*(N+NRHS)) This array is used to store the real single precision matrix and the right-hand sides or solutions in single precision. @param[out] iter INTEGER - < 0: iterative refinement has failed, double precision factorization has been performed + -1 : the routine fell back to full precision for implementation- or machine-specific reasons + -2 : narrowing the precision induced an overflow, the routine fell back to full precision + -3 : failure of SPOTRF + -31: stop the iterative refinement after the 30th iteration - > 0: iterative refinement has been successfully used. Returns the number of iterations @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i, the leading minor of order i of (DOUBLE PRECISION) A is not positive definite, so the factorization could not be completed, and the solution has not been computed. @ingroup magma_dposv_driver ********************************************************************/ extern "C" magma_int_t magma_dsposv_gpu( magma_uplo_t uplo, magma_int_t n, magma_int_t nrhs, magmaDouble_ptr dA, magma_int_t ldda, magmaDouble_ptr dB, magma_int_t lddb, magmaDouble_ptr dX, magma_int_t lddx, magmaDouble_ptr dworkd, magmaFloat_ptr dworks, magma_int_t *iter, magma_int_t *info) { #define dB(i,j) (dB + (i) + (j)*lddb) #define dX(i,j) (dX + (i) + (j)*lddx) #define dR(i,j) (dR + (i) + (j)*lddr) #define dSX(i,j) (dSX + (i) + (j)*lddsx) // Constants const double BWDMAX = 1.0; const magma_int_t ITERMAX = 30; const double c_neg_one = MAGMA_D_NEG_ONE; const double c_one = MAGMA_D_ONE; const magma_int_t ione = 1; // Local variables magmaDouble_ptr dR; magmaFloat_ptr dSA, dSX; double Xnrmv, Rnrmv; double Anrm, Xnrm, Rnrm, cte, eps; magma_int_t i, j, iiter, lddsa, lddsx, lddr; /* Check arguments */ *iter = 0; *info = 0; if ( n < 0 ) *info = -1; else if ( nrhs < 0 ) *info = -2; else if ( ldda < max(1,n)) *info = -4; else if ( lddb < max(1,n)) *info = -7; else if ( lddx < max(1,n)) *info = -9; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } if ( n == 0 || nrhs == 0 ) return *info; lddsa = n; lddsx = n; lddr = n; dSA = dworks; dSX = dSA + lddsa*n; dR = dworkd; magma_queue_t queue; magma_device_t cdev; magma_getdevice( &cdev ); magma_queue_create( cdev, &queue ); eps = lapackf77_dlamch("Epsilon"); Anrm = magmablas_dlansy( MagmaInfNorm, uplo, n, dA, ldda, (double*)dworkd, n*nrhs, queue ); cte = Anrm * eps * magma_dsqrt( n ) * BWDMAX; /* * Convert to single precision */ magmablas_dlag2s( n, nrhs, dB, lddb, dSX, lddsx, queue, info ); if (*info != 0) { *iter = -2; goto fallback; } magmablas_dlat2s( uplo, n, dA, ldda, dSA, lddsa, queue, info ); if (*info != 0) { *iter = -2; goto fallback; } // factor dSA in single precision magma_spotrf_gpu( uplo, n, dSA, lddsa, info ); if (*info != 0) { *iter = -3; goto fallback; } // solve dSA*dSX = dB in single precision magma_spotrs_gpu( uplo, n, nrhs, dSA, lddsa, dSX, lddsx, info ); // residual dR = dB - dA*dX in double precision magmablas_slag2d( n, nrhs, dSX, lddsx, dX, lddx, queue, info ); magmablas_dlacpy( MagmaFull, n, nrhs, dB, lddb, dR, lddr, queue ); if ( nrhs == 1 ) { magma_dsymv( uplo, n, c_neg_one, dA, ldda, dX, 1, c_one, dR, 1, queue ); } else { magma_dsymm( MagmaLeft, uplo, n, nrhs, c_neg_one, dA, ldda, dX, lddx, c_one, dR, lddr, queue ); } // TODO: use MAGMA_D_ABS( dX(i,j) ) instead of dlange? for( j=0; j < nrhs; j++ ) { i = magma_idamax( n, dX(0,j), 1, queue ) - 1; magma_dgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1, queue ); Xnrm = lapackf77_dlange( "F", &ione, &ione, &Xnrmv, &ione, NULL ); i = magma_idamax( n, dR(0,j), 1, queue ) - 1; magma_dgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1, queue ); Rnrm = lapackf77_dlange( "F", &ione, &ione, &Rnrmv, &ione, NULL ); if ( Rnrm > Xnrm*cte ) { goto refinement; } } *iter = 0; goto cleanup; //return *info; refinement: for( iiter=1; iiter < ITERMAX; ) { *info = 0; // convert residual dR to single precision dSX magmablas_dlag2s( n, nrhs, dR, lddr, dSX, lddsx, queue, info ); if (*info != 0) { *iter = -2; goto fallback; } // solve dSA*dSX = R in single precision magma_spotrs_gpu( uplo, n, nrhs, dSA, lddsa, dSX, lddsx, info ); // Add correction and setup residual // dX += dSX [including conversion] --and-- // dR = dB for( j=0; j < nrhs; j++ ) { magmablas_dsaxpycp( n, dSX(0,j), dX(0,j), dB(0,j), dR(0,j), queue ); } // residual dR = dB - dA*dX in double precision if ( nrhs == 1 ) { magma_dsymv( uplo, n, c_neg_one, dA, ldda, dX, 1, c_one, dR, 1, queue ); } else { magma_dsymm( MagmaLeft, uplo, n, nrhs, c_neg_one, dA, ldda, dX, lddx, c_one, dR, lddr, queue ); } // TODO: use MAGMA_D_ABS( dX(i,j) ) instead of dlange? /* Check whether the nrhs normwise backward errors satisfy the * stopping criterion. If yes, set ITER=IITER > 0 and return. */ for( j=0; j < nrhs; j++ ) { i = magma_idamax( n, dX(0,j), 1, queue ) - 1; magma_dgetmatrix( 1, 1, dX(i,j), 1, &Xnrmv, 1, queue ); Xnrm = lapackf77_dlange( "F", &ione, &ione, &Xnrmv, &ione, NULL ); i = magma_idamax( n, dR(0,j), 1, queue ) - 1; magma_dgetmatrix( 1, 1, dR(i,j), 1, &Rnrmv, 1, queue ); Rnrm = lapackf77_dlange( "F", &ione, &ione, &Rnrmv, &ione, NULL ); if ( Rnrm > Xnrm*cte ) { goto L20; } } /* If we are here, the nrhs normwise backward errors satisfy * the stopping criterion, we are good to exit. */ *iter = iiter; goto cleanup; //return *info; L20: iiter++; } /* If we are at this place of the code, this is because we have * performed ITER=ITERMAX iterations and never satisified the * stopping criterion. Set up the ITER flag accordingly and follow * up on double precision routine. */ *iter = -ITERMAX - 1; fallback: /* Single-precision iterative refinement failed to converge to a * satisfactory solution, so we resort to double precision. */ magma_dpotrf_gpu( uplo, n, dA, ldda, info ); if (*info == 0) { magmablas_dlacpy( MagmaFull, n, nrhs, dB, lddb, dX, lddx, queue ); magma_dpotrs_gpu( uplo, n, nrhs, dA, ldda, dX, lddx, info ); } cleanup: magma_queue_destroy( queue ); return *info; }
/** Purpose ------- DPOSV computes the solution to a real system of linear equations A * X = B, where A is an N-by-N symmetric positive definite matrix and X and B are N-by-NRHS matrices. The Cholesky decomposition is used to factor A as A = U**H * U, if UPLO = MagmaUpper, or A = L * L**H, if UPLO = MagmaLower, where U is an upper triangular matrix and L is a lower triangular matrix. The factored form of A is then used to solve the system of equations A * X = B. Arguments --------- @param[in] uplo magma_uplo_t - = MagmaUpper: Upper triangle of A is stored; - = MagmaLower: Lower triangle of A is stored. @param[in] n INTEGER The order of the matrix A. N >= 0. @param[in] nrhs INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. @param[in,out] A DOUBLE PRECISION array, dimension (LDA,N) On entry, the symmetric matrix A. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = MagmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. \n On exit, if INFO = 0, the factor U or L from the Cholesky factorization A = U**H*U or A = L*L**H. @param[in] lda INTEGER The leading dimension of the array A. LDA >= max(1,N). @param[in,out] B DOUBLE PRECISION array, dimension (LDB,NRHS) On entry, the right hand side matrix B. On exit, the solution matrix X. @param[in] ldb INTEGER The leading dimension of the array B. LDB >= max(1,N). @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value @ingroup magma_dposv_driver ********************************************************************/ extern "C" magma_int_t magma_dposv( magma_uplo_t uplo, magma_int_t n, magma_int_t nrhs, double *A, magma_int_t lda, double *B, magma_int_t ldb, magma_int_t *info ) { #ifdef HAVE_clBLAS #define dA(i_, j_) dA, ((i_) + (j_)*ldda) #define dB(i_, j_) dB, ((i_) + (j_)*lddb) #else #define dA(i_, j_) (dA + (i_) + (j_)*ldda) #define dB(i_, j_) (dB + (i_) + (j_)*lddb) #endif magma_int_t ngpu, ldda, lddb; magma_queue_t queue = NULL; magma_device_t cdev; *info = 0; if ( uplo != MagmaUpper && uplo != MagmaLower ) *info = -1; if ( n < 0 ) *info = -2; if ( nrhs < 0) *info = -3; if ( lda < max(1, n) ) *info = -5; if ( ldb < max(1, n) ) *info = -7; if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return if possible */ if (n == 0 || nrhs == 0) { return *info; } /* If single-GPU and allocation suceeds, use GPU interface. */ ngpu = magma_num_gpus(); magmaDouble_ptr dA, dB; if ( ngpu > 1 ) { goto CPU_INTERFACE; } ldda = magma_roundup( n, 32 ); lddb = ldda; if ( MAGMA_SUCCESS != magma_dmalloc( &dA, ldda*n )) { goto CPU_INTERFACE; } if ( MAGMA_SUCCESS != magma_dmalloc( &dB, lddb*nrhs )) { magma_free( dA ); goto CPU_INTERFACE; } magma_getdevice( &cdev ); magma_queue_create( cdev, &queue ); magma_dsetmatrix( n, n, A, lda, dA(0,0), ldda, queue ); magma_dpotrf_gpu( uplo, n, dA(0,0), ldda, info ); if ( *info == MAGMA_ERR_DEVICE_ALLOC ) { magma_queue_destroy( queue ); magma_free( dA ); magma_free( dB ); goto CPU_INTERFACE; } magma_dgetmatrix( n, n, dA(0,0), ldda, A, lda, queue ); if ( *info == 0 ) { magma_dsetmatrix( n, nrhs, B, ldb, dB(0,0), lddb, queue ); magma_dpotrs_gpu( uplo, n, nrhs, dA(0,0), ldda, dB(0,0), lddb, info ); magma_dgetmatrix( n, nrhs, dB(0,0), lddb, B, ldb, queue ); } magma_queue_destroy( queue ); magma_free( dA ); magma_free( dB ); return *info; CPU_INTERFACE: /* If multi-GPU or allocation failed, use CPU interface and LAPACK. * Faster to use LAPACK for potrs than to copy A to GPU. */ magma_dpotrf( uplo, n, A, lda, info ); if ( *info == 0 ) { lapackf77_dpotrs( lapack_uplo_const(uplo), &n, &nrhs, A, &lda, B, &ldb, info ); } return *info; }