/***************************************************************************//** * Parallel forward substitution for tile LU - dynamic scheduling **/ void plasma_pztrsmpl_quark(PLASMA_desc A, PLASMA_desc B, PLASMA_desc L, int *IPIV, PLASMA_sequence *sequence, PLASMA_request *request) { plasma_context_t *plasma; Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer; int k, m, n; int ldak, ldam, ldbk, ldbm; int tempkm, tempnn, tempkmin, tempmm, tempkn; int ib; plasma = plasma_context_self(); if (sequence->status != PLASMA_SUCCESS) return; QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence); ib = PLASMA_IB; for (k = 0; k < min(A.mt, A.nt); k++) { tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb; tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb; tempkmin = k == min(A.mt, A.nt)-1 ? min(A.m, A.n)-k*A.mb : A.mb; ldak = BLKLDD(A, k); ldbk = BLKLDD(B, k); for (n = 0; n < B.nt; n++) { tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb; QUARK_CORE_zgessm( plasma->quark, &task_flags, tempkm, tempnn, tempkmin, ib, L.nb, IPIV(k, k), A(k, k), ldak, B(k, n), ldbk); } for (m = k+1; m < A.mt; m++) { tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb; ldam = BLKLDD(A, m); ldbm = BLKLDD(B, m); for (n = 0; n < B.nt; n++) { tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb; QUARK_CORE_zssssm( plasma->quark, &task_flags, A.nb, tempnn, tempmm, tempnn, tempkn, ib, L.nb, B(k, n), ldbk, B(m, n), ldbm, L(m, k), L.mb, A(m, k), ldam, IPIV(m, k)); } } } }
/***************************************************************************//** * Parallel tile row interchanges - dynamic scheduling **/ void plasma_pclaswp_quark(PLASMA_desc B, int *IPIV, int inc, PLASMA_sequence *sequence, PLASMA_request *request) { plasma_context_t *plasma; Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer; int m, n; int tempi, tempm, tempmm, tempnn; plasma = plasma_context_self(); if (sequence->status != PLASMA_SUCCESS) return; QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence); if ( inc > 0 ) { for (m = 0; m < B.mt; m++) { tempi = m * B.mb; tempm = B.m - tempi; tempmm = m == B.mt-1 ? tempm : B.mb; for (n = 0; n < B.nt; n++) { tempnn = n == B.nt-1 ? B.n - n * B.nb : B.nb; QUARK_CORE_claswp_ontile( plasma->quark, &task_flags, plasma_desc_submatrix(B, tempi, n*B.nb, tempm, tempnn), B(m, n), 1, tempmm, IPIV(m), inc, B(B.mt-1, n) ); } } } else { for (m = B.mt-1; m > -1; m--) { tempi = m * B.mb; tempm = B.m - tempi; tempmm = m == B.mt-1 ? tempm : B.mb; for (n = 0; n < B.nt; n++) { tempnn = n == B.nt-1 ? B.n - n * B.nb : B.nb; QUARK_CORE_claswp_ontile( plasma->quark, &task_flags, plasma_desc_submatrix(B, tempi, n*B.nb, tempm, tempnn), B(m, n), 1, tempmm, IPIV(m), inc, B(0, n) ); } } } }
void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Setup(const MultiVector& B, const MultiVector& Bc, RCP<const CrsGraph> Ppattern) { Ppattern_ = Ppattern; const RCP<const Map> uniqueMap = Ppattern_->getDomainMap(); const RCP<const Map> nonUniqueMap = Ppattern_->getColMap(); RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap); const size_t NSDim = Bc.getNumVectors(); X_ = MultiVectorFactory::Build(nonUniqueMap, NSDim); X_->doImport(Bc, *importer, Xpetra::INSERT); size_t numRows = Ppattern_->getNodeNumRows(); XXtInv_.resize(numRows); Teuchos::SerialDenseVector<LO,SC> BcRow(NSDim, false); for (size_t i = 0; i < numRows; i++) { Teuchos::ArrayView<const LO> indices; Ppattern_->getLocalRowView(i, indices); size_t nnz = indices.size(); Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, nnz, false); for (size_t j = 0; j < nnz; j++) { for (size_t k = 0; k < NSDim; k++) BcRow[k] = X_->getData(k)[indices[j]]; Teuchos::setCol(BcRow, (LO)j, locX); } XXtInv_[i] = Teuchos::SerialDenseMatrix<LO,SC>(NSDim, NSDim, false); Teuchos::BLAS<LO,SC> blas; blas.GEMM(Teuchos::NO_TRANS, Teuchos::CONJ_TRANS, NSDim, NSDim, nnz, Teuchos::ScalarTraits<SC>::one(), locX.values(), locX.stride(), locX.values(), locX.stride(), Teuchos::ScalarTraits<SC>::zero(), XXtInv_[i].values(), XXtInv_[i].stride()); Teuchos::LAPACK<LO,SC> lapack; LO info, lwork = 3*NSDim; ArrayRCP<LO> IPIV(NSDim); ArrayRCP<SC> WORK(lwork); lapack.GETRF(NSDim, NSDim, XXtInv_[i].values(), XXtInv_[i].stride(), IPIV.get(), &info); lapack.GETRI(NSDim, XXtInv_[i].values(), XXtInv_[i].stride(), IPIV.get(), WORK.get(), lwork, &info); } }
/* Subroutine */ int dgtrfs_(char *trans, integer *n, integer *nrhs, doublereal *dl, doublereal *d, doublereal *du, doublereal *dlf, doublereal *df, doublereal *duf, doublereal *du2, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal * ferr, doublereal *berr, doublereal *work, integer *iwork, integer * info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DGTRFS improves the computed solution to a system of linear equations when the coefficient matrix is tridiagonal, and provides error bounds and backward error estimates for the solution. Arguments ========= TRANS (input) CHARACTER*1 Specifies the form of the system of equations: = 'N': A * X = B (No transpose) = 'T': A**T * X = B (Transpose) = 'C': A**H * X = B (Conjugate transpose = Transpose) N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. DL (input) DOUBLE PRECISION array, dimension (N-1) The (n-1) subdiagonal elements of A. D (input) DOUBLE PRECISION array, dimension (N) The diagonal elements of A. DU (input) DOUBLE PRECISION array, dimension (N-1) The (n-1) superdiagonal elements of A. DLF (input) DOUBLE PRECISION array, dimension (N-1) The (n-1) multipliers that define the matrix L from the LU factorization of A as computed by DGTTRF. DF (input) DOUBLE PRECISION array, dimension (N) The n diagonal elements of the upper triangular matrix U from the LU factorization of A. DUF (input) DOUBLE PRECISION array, dimension (N-1) The (n-1) elements of the first superdiagonal of U. DU2 (input) DOUBLE PRECISION array, dimension (N-2) The (n-2) elements of the second superdiagonal of U. IPIV (input) INTEGER array, dimension (N) The pivot indices; for 1 <= i <= n, row i of the matrix was interchanged with row IPIV(i). IPIV(i) will always be either i or i+1; IPIV(i) = i indicates a row interchange was not required. B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) The right hand side matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) On entry, the solution matrix X, as computed by DGTTRS. On exit, the improved solution matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). FERR (output) DOUBLE PRECISION array, dimension (NRHS) The estimated forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), FERR(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). The estimate is as reliable as the estimate for RCOND, and is almost always a slight overestimate of the true error. BERR (output) DOUBLE PRECISION array, dimension (NRHS) The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). WORK (workspace) DOUBLE PRECISION array, dimension (3*N) IWORK (workspace) INTEGER array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Internal Parameters =================== ITMAX is the maximum number of steps of iterative refinement. ===================================================================== Test the input parameters. Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; static doublereal c_b18 = -1.; static doublereal c_b19 = 1.; /* System generated locals */ integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2; doublereal d__1, d__2, d__3, d__4; /* Local variables */ static integer kase; static doublereal safe1, safe2; static integer i, j; static doublereal s; extern logical lsame_(char *, char *); extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static integer count; extern doublereal dlamch_(char *); extern /* Subroutine */ int dlacon_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static integer nz; extern /* Subroutine */ int dlagtm_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *); static doublereal safmin; extern /* Subroutine */ int xerbla_(char *, integer *); static logical notran; static char transn[1]; extern /* Subroutine */ int dgttrs_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); static char transt[1]; static doublereal lstres, eps; #define DL(I) dl[(I)-1] #define D(I) d[(I)-1] #define DU(I) du[(I)-1] #define DLF(I) dlf[(I)-1] #define DF(I) df[(I)-1] #define DUF(I) duf[(I)-1] #define DU2(I) du2[(I)-1] #define IPIV(I) ipiv[(I)-1] #define FERR(I) ferr[(I)-1] #define BERR(I) berr[(I)-1] #define WORK(I) work[(I)-1] #define IWORK(I) iwork[(I)-1] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] #define X(I,J) x[(I)-1 + ((J)-1)* ( *ldx)] *info = 0; notran = lsame_(trans, "N"); if (! notran && ! lsame_(trans, "T") && ! lsame_(trans, "C")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*ldb < max(1,*n)) { *info = -13; } else if (*ldx < max(1,*n)) { *info = -15; } if (*info != 0) { i__1 = -(*info); xerbla_("DGTRFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= *nrhs; ++j) { FERR(j) = 0.; BERR(j) = 0.; /* L10: */ } return 0; } if (notran) { *(unsigned char *)transn = 'N'; *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transn = 'T'; *(unsigned char *)transt = 'N'; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = 4; eps = dlamch_("Epsilon"); safmin = dlamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= *nrhs; ++j) { count = 1; lstres = 3.; L20: /* Loop until stopping criterion is satisfied. Compute residual R = B - op(A) * X, where op(A) = A, A**T, or A**H, depending on TRANS. */ dcopy_(n, &B(1,j), &c__1, &WORK(*n + 1), &c__1); dlagtm_(trans, n, &c__1, &c_b18, &DL(1), &D(1), &DU(1), &X(1,j), ldx, &c_b19, &WORK(*n + 1), n); /* Compute abs(op(A))*abs(x) + abs(b) for use in the backward error bound. */ if (notran) { if (*n == 1) { WORK(1) = (d__1 = B(1,j), abs(d__1)) + (d__2 = D(1) * X(1,j), abs(d__2)); } else { WORK(1) = (d__1 = B(1,j), abs(d__1)) + (d__2 = D(1) * X(1,j), abs(d__2)) + (d__3 = DU(1) * X(2,j), abs(d__3)); i__2 = *n - 1; for (i = 2; i <= *n-1; ++i) { WORK(i) = (d__1 = B(i,j), abs(d__1)) + (d__2 = DL(i - 1) * X(i-1,j), abs(d__2)) + ( d__3 = D(i) * X(i,j), abs(d__3)) + ( d__4 = DU(i) * X(i+1,j), abs(d__4)); /* L30: */ } WORK(*n) = (d__1 = B(*n,j), abs(d__1)) + (d__2 = DL(*n - 1) * X(*n-1,j), abs(d__2)) + ( d__3 = D(*n) * X(*n,j), abs(d__3)); } } else { if (*n == 1) { WORK(1) = (d__1 = B(1,j), abs(d__1)) + (d__2 = D(1) * X(1,j), abs(d__2)); } else { WORK(1) = (d__1 = B(1,j), abs(d__1)) + (d__2 = D(1) * X(1,j), abs(d__2)) + (d__3 = DL(1) * X(2,j), abs(d__3)); i__2 = *n - 1; for (i = 2; i <= *n-1; ++i) { WORK(i) = (d__1 = B(i,j), abs(d__1)) + (d__2 = DU(i - 1) * X(i-1,j), abs(d__2)) + ( d__3 = D(i) * X(i,j), abs(d__3)) + ( d__4 = DL(i) * X(i+1,j), abs(d__4)); /* L40: */ } WORK(*n) = (d__1 = B(*n,j), abs(d__1)) + (d__2 = DU(*n - 1) * X(*n-1,j), abs(d__2)) + ( d__3 = D(*n) * X(*n,j), abs(d__3)); } } /* Compute componentwise relative backward error from formula max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) where abs(Z) is the componentwise absolute value of the matr ix or vector Z. If the i-th component of the denominator is le ss than SAFE2, then SAFE1 is added to the i-th components of th e numerator and denominator before dividing. */ s = 0.; i__2 = *n; for (i = 1; i <= *n; ++i) { if (WORK(i) > safe2) { /* Computing MAX */ d__2 = s, d__3 = (d__1 = WORK(*n + i), abs(d__1)) / WORK(i); s = max(d__2,d__3); } else { /* Computing MAX */ d__2 = s, d__3 = ((d__1 = WORK(*n + i), abs(d__1)) + safe1) / (WORK(i) + safe1); s = max(d__2,d__3); } /* L50: */ } BERR(j) = s; /* Test stopping criterion. Continue iterating if 1) The residual BERR(J) is larger than machine epsilon, a nd 2) BERR(J) decreased by at least a factor of 2 during the last iteration, and 3) At most ITMAX iterations tried. */ if (BERR(j) > eps && BERR(j) * 2. <= lstres && count <= 5) { /* Update solution and try again. */ dgttrs_(trans, n, &c__1, &DLF(1), &DF(1), &DUF(1), &DU2(1), &IPIV( 1), &WORK(*n + 1), n, info); daxpy_(n, &c_b19, &WORK(*n + 1), &c__1, &X(1,j), &c__1) ; lstres = BERR(j); ++count; goto L20; } /* Bound error from formula norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X ) where norm(Z) is the magnitude of the largest component of Z inv(op(A)) is the inverse of op(A) abs(Z) is the componentwise absolute value of the matrix o r vector Z NZ is the maximum number of nonzeros in any row of A, plus 1 EPS is machine epsilon The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B )) is incremented by SAFE1 if the i-th component of abs(op(A))*abs(X) + abs(B) is less than SAFE2. Use DLACON to estimate the infinity-norm of the matrix inv(op(A)) * diag(W), where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ i__2 = *n; for (i = 1; i <= *n; ++i) { if (WORK(i) > safe2) { WORK(i) = (d__1 = WORK(*n + i), abs(d__1)) + nz * eps * WORK( i); } else { WORK(i) = (d__1 = WORK(*n + i), abs(d__1)) + nz * eps * WORK( i) + safe1; } /* L60: */ } kase = 0; L70: dlacon_(n, &WORK((*n << 1) + 1), &WORK(*n + 1), &IWORK(1), &FERR(j), & kase); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(op(A)**T). */ dgttrs_(transt, n, &c__1, &DLF(1), &DF(1), &DUF(1), &DU2(1), & IPIV(1), &WORK(*n + 1), n, info); i__2 = *n; for (i = 1; i <= *n; ++i) { WORK(*n + i) = WORK(i) * WORK(*n + i); /* L80: */ } } else { /* Multiply by inv(op(A))*diag(W). */ i__2 = *n; for (i = 1; i <= *n; ++i) { WORK(*n + i) = WORK(i) * WORK(*n + i); /* L90: */ } dgttrs_(transn, n, &c__1, &DLF(1), &DF(1), &DUF(1), &DU2(1), & IPIV(1), &WORK(*n + 1), n, info); } goto L70; } /* Normalize error. */ lstres = 0.; i__2 = *n; for (i = 1; i <= *n; ++i) { /* Computing MAX */ d__2 = lstres, d__3 = (d__1 = X(i,j), abs(d__1)); lstres = max(d__2,d__3); /* L100: */ } if (lstres != 0.) { FERR(j) /= lstres; } /* L110: */ } return 0; /* End of DGTRFS */ } /* dgtrfs_ */
/* Subroutine */ int cgtcon_(char *norm, integer *n, complex *dl, complex *d, complex *du, complex *du2, integer *ipiv, real *anorm, real *rcond, complex *work, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CGTCON estimates the reciprocal of the condition number of a complex tridiagonal matrix A using the LU factorization as computed by CGTTRF. An estimate is obtained for norm(inv(A)), and the reciprocal of the condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). Arguments ========= NORM (input) CHARACTER*1 Specifies whether the 1-norm condition number or the infinity-norm condition number is required: = '1' or 'O': 1-norm; = 'I': Infinity-norm. N (input) INTEGER The order of the matrix A. N >= 0. DL (input) COMPLEX array, dimension (N-1) The (n-1) multipliers that define the matrix L from the LU factorization of A as computed by CGTTRF. D (input) COMPLEX array, dimension (N) The n diagonal elements of the upper triangular matrix U from the LU factorization of A. DU (input) COMPLEX array, dimension (N-1) The (n-1) elements of the first superdiagonal of U. DU2 (input) COMPLEX array, dimension (N-2) The (n-2) elements of the second superdiagonal of U. IPIV (input) INTEGER array, dimension (N) The pivot indices; for 1 <= i <= n, row i of the matrix was interchanged with row IPIV(i). IPIV(i) will always be either i or i+1; IPIV(i) = i indicates a row interchange was not required. ANORM (input) REAL If NORM = '1' or 'O', the 1-norm of the original matrix A. If NORM = 'I', the infinity-norm of the original matrix A. RCOND (output) REAL The reciprocal of the condition number of the matrix A, computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an estimate of the 1-norm of inv(A) computed in this routine. WORK (workspace) COMPLEX array, dimension (2*N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input arguments. Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer i__1, i__2; /* Local variables */ static integer kase, kase1, i; extern logical lsame_(char *, char *); extern /* Subroutine */ int clacon_(integer *, complex *, complex *, real *, integer *), xerbla_(char *, integer *); static real ainvnm; static logical onenrm; extern /* Subroutine */ int cgttrs_(char *, integer *, integer *, complex *, complex *, complex *, complex *, integer *, complex *, integer *, integer *); #define WORK(I) work[(I)-1] #define IPIV(I) ipiv[(I)-1] #define DU2(I) du2[(I)-1] #define DU(I) du[(I)-1] #define D(I) d[(I)-1] #define DL(I) dl[(I)-1] *info = 0; onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O"); if (! onenrm && ! lsame_(norm, "I")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*anorm < 0.f) { *info = -8; } if (*info != 0) { i__1 = -(*info); xerbla_("CGTCON", &i__1); return 0; } /* Quick return if possible */ *rcond = 0.f; if (*n == 0) { *rcond = 1.f; return 0; } else if (*anorm == 0.f) { return 0; } /* Check that D(1:N) is non-zero. */ i__1 = *n; for (i = 1; i <= *n; ++i) { i__2 = i; if (D(i).r == 0.f && D(i).i == 0.f) { return 0; } /* L10: */ } ainvnm = 0.f; if (onenrm) { kase1 = 1; } else { kase1 = 2; } kase = 0; L20: clacon_(n, &WORK(*n + 1), &WORK(1), &ainvnm, &kase); if (kase != 0) { if (kase == kase1) { /* Multiply by inv(U)*inv(L). */ cgttrs_("No transpose", n, &c__1, &DL(1), &D(1), &DU(1), &DU2(1), &IPIV(1), &WORK(1), n, info); } else { /* Multiply by inv(L')*inv(U'). */ cgttrs_("Conjugate transpose", n, &c__1, &DL(1), &D(1), &DU(1), & DU2(1), &IPIV(1), &WORK(1), n, info); } goto L20; } /* Compute the estimate of the reciprocal condition number. */ if (ainvnm != 0.f) { *rcond = 1.f / ainvnm / *anorm; } return 0; /* End of CGTCON */ } /* cgtcon_ */
/* Subroutine */ int sgetf2_(integer *m, integer *n, real *a, integer *lda, integer *ipiv, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1992 Purpose ======= SGETF2 computes an LU factorization of a general m-by-n matrix A using partial pivoting with row interchanges. The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n). This is the right-looking Level 2 BLAS version of the algorithm. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. A (input/output) REAL array, dimension (LDA,N) On entry, the m by n matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). IPIV (output) INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -k, the k-th argument had an illegal value > 0: if INFO = k, U(k,k) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations. ===================================================================== Test the input parameters. Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; static real c_b6 = -1.f; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; real r__1; /* Local variables */ extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, integer *, real *, integer *, real *, integer *); static integer j; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer *); static integer jp; extern /* Subroutine */ int xerbla_(char *, integer *); extern integer isamax_(integer *, real *, integer *); #define IPIV(I) ipiv[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); xerbla_("SGETF2", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } i__1 = min(*m,*n); for (j = 1; j <= min(*m,*n); ++j) { /* Find pivot and test for singularity. */ i__2 = *m - j + 1; jp = j - 1 + isamax_(&i__2, &A(j,j), &c__1); IPIV(j) = jp; if (A(jp,j) != 0.f) { /* Apply the interchange to columns 1:N. */ if (jp != j) { sswap_(n, &A(j,1), lda, &A(jp,1), lda); } /* Compute elements J+1:M of J-th column. */ if (j < *m) { i__2 = *m - j; r__1 = 1.f / A(j,j); sscal_(&i__2, &r__1, &A(j+1,j), &c__1); } } else if (*info == 0) { *info = j; } if (j < min(*m,*n)) { /* Update trailing submatrix. */ i__2 = *m - j; i__3 = *n - j; sger_(&i__2, &i__3, &c_b6, &A(j+1,j), &c__1, &A(j,j+1), lda, &A(j+1,j+1), lda); } /* L10: */ } return 0; /* End of SGETF2 */ } /* sgetf2_ */
/* Subroutine */ int dsyrfs_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer * ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DSYRFS improves the computed solution to a system of linear equations when the coefficient matrix is symmetric indefinite, and provides error bounds and backward error estimates for the solution. Arguments ========= UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0. A (input) DOUBLE PRECISION array, dimension (LDA,N) The symmetric matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). AF (input) DOUBLE PRECISION array, dimension (LDAF,N) The factored form of the matrix A. AF contains the block diagonal matrix D and the multipliers used to obtain the factor U or L from the factorization A = U*D*U**T or A = L*D*L**T as computed by DSYTRF. LDAF (input) INTEGER The leading dimension of the array AF. LDAF >= max(1,N). IPIV (input) INTEGER array, dimension (N) Details of the interchanges and the block structure of D as determined by DSYTRF. B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) The right hand side matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) On entry, the solution matrix X, as computed by DSYTRS. On exit, the improved solution matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). FERR (output) DOUBLE PRECISION array, dimension (NRHS) The estimated forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), FERR(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). The estimate is as reliable as the estimate for RCOND, and is almost always a slight overestimate of the true error. BERR (output) DOUBLE PRECISION array, dimension (NRHS) The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). WORK (workspace) DOUBLE PRECISION array, dimension (3*N) IWORK (workspace) INTEGER array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Internal Parameters =================== ITMAX is the maximum number of steps of iterative refinement. ===================================================================== Test the input parameters. Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; static doublereal c_b12 = -1.; static doublereal c_b14 = 1.; /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3; doublereal d__1, d__2, d__3; /* Local variables */ static integer kase; static doublereal safe1, safe2; static integer i, j, k; static doublereal s; extern logical lsame_(char *, char *); extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static integer count; static logical upper; extern /* Subroutine */ int dsymv_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern doublereal dlamch_(char *); extern /* Subroutine */ int dlacon_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static doublereal xk; static integer nz; static doublereal safmin; extern /* Subroutine */ int xerbla_(char *, integer *); static doublereal lstres; extern /* Subroutine */ int dsytrs_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); static doublereal eps; #define IPIV(I) ipiv[(I)-1] #define FERR(I) ferr[(I)-1] #define BERR(I) berr[(I)-1] #define WORK(I) work[(I)-1] #define IWORK(I) iwork[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define AF(I,J) af[(I)-1 + ((J)-1)* ( *ldaf)] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] #define X(I,J) x[(I)-1 + ((J)-1)* ( *ldx)] *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldaf < max(1,*n)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -10; } else if (*ldx < max(1,*n)) { *info = -12; } if (*info != 0) { i__1 = -(*info); xerbla_("DSYRFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= *nrhs; ++j) { FERR(j) = 0.; BERR(j) = 0.; /* L10: */ } return 0; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = *n + 1; eps = dlamch_("Epsilon"); safmin = dlamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= *nrhs; ++j) { count = 1; lstres = 3.; L20: /* Loop until stopping criterion is satisfied. Compute residual R = B - A * X */ dcopy_(n, &B(1,j), &c__1, &WORK(*n + 1), &c__1); dsymv_(uplo, n, &c_b12, &A(1,1), lda, &X(1,j), &c__1, &c_b14, &WORK(*n + 1), &c__1); /* Compute componentwise relative backward error from formula max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) where abs(Z) is the componentwise absolute value of the matr ix or vector Z. If the i-th component of the denominator is le ss than SAFE2, then SAFE1 is added to the i-th components of th e numerator and denominator before dividing. */ i__2 = *n; for (i = 1; i <= *n; ++i) { WORK(i) = (d__1 = B(i,j), abs(d__1)); /* L30: */ } /* Compute abs(A)*abs(X) + abs(B). */ if (upper) { i__2 = *n; for (k = 1; k <= *n; ++k) { s = 0.; xk = (d__1 = X(k,j), abs(d__1)); i__3 = k - 1; for (i = 1; i <= k-1; ++i) { WORK(i) += (d__1 = A(i,k), abs(d__1)) * xk; s += (d__1 = A(i,k), abs(d__1)) * (d__2 = X(i,j), abs(d__2)); /* L40: */ } WORK(k) = WORK(k) + (d__1 = A(k,k), abs(d__1)) * xk + s; /* L50: */ } } else { i__2 = *n; for (k = 1; k <= *n; ++k) { s = 0.; xk = (d__1 = X(k,j), abs(d__1)); WORK(k) += (d__1 = A(k,k), abs(d__1)) * xk; i__3 = *n; for (i = k + 1; i <= *n; ++i) { WORK(i) += (d__1 = A(i,k), abs(d__1)) * xk; s += (d__1 = A(i,k), abs(d__1)) * (d__2 = X(i,j), abs(d__2)); /* L60: */ } WORK(k) += s; /* L70: */ } } s = 0.; i__2 = *n; for (i = 1; i <= *n; ++i) { if (WORK(i) > safe2) { /* Computing MAX */ d__2 = s, d__3 = (d__1 = WORK(*n + i), abs(d__1)) / WORK(i); s = max(d__2,d__3); } else { /* Computing MAX */ d__2 = s, d__3 = ((d__1 = WORK(*n + i), abs(d__1)) + safe1) / (WORK(i) + safe1); s = max(d__2,d__3); } /* L80: */ } BERR(j) = s; /* Test stopping criterion. Continue iterating if 1) The residual BERR(J) is larger than machine epsilon, a nd 2) BERR(J) decreased by at least a factor of 2 during the last iteration, and 3) At most ITMAX iterations tried. */ if (BERR(j) > eps && BERR(j) * 2. <= lstres && count <= 5) { /* Update solution and try again. */ dsytrs_(uplo, n, &c__1, &AF(1,1), ldaf, &IPIV(1), &WORK(*n + 1), n, info); daxpy_(n, &c_b14, &WORK(*n + 1), &c__1, &X(1,j), &c__1) ; lstres = BERR(j); ++count; goto L20; } /* Bound error from formula norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(A))* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) where norm(Z) is the magnitude of the largest component of Z inv(A) is the inverse of A abs(Z) is the componentwise absolute value of the matrix o r vector Z NZ is the maximum number of nonzeros in any row of A, plus 1 EPS is machine epsilon The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) is incremented by SAFE1 if the i-th component of abs(A)*abs(X) + abs(B) is less than SAFE2. Use DLACON to estimate the infinity-norm of the matrix inv(A) * diag(W), where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */ i__2 = *n; for (i = 1; i <= *n; ++i) { if (WORK(i) > safe2) { WORK(i) = (d__1 = WORK(*n + i), abs(d__1)) + nz * eps * WORK( i); } else { WORK(i) = (d__1 = WORK(*n + i), abs(d__1)) + nz * eps * WORK( i) + safe1; } /* L90: */ } kase = 0; L100: dlacon_(n, &WORK((*n << 1) + 1), &WORK(*n + 1), &IWORK(1), &FERR(j), & kase); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(A'). */ dsytrs_(uplo, n, &c__1, &AF(1,1), ldaf, &IPIV(1), &WORK( *n + 1), n, info); i__2 = *n; for (i = 1; i <= *n; ++i) { WORK(*n + i) = WORK(i) * WORK(*n + i); /* L110: */ } } else if (kase == 2) { /* Multiply by inv(A)*diag(W). */ i__2 = *n; for (i = 1; i <= *n; ++i) { WORK(*n + i) = WORK(i) * WORK(*n + i); /* L120: */ } dsytrs_(uplo, n, &c__1, &AF(1,1), ldaf, &IPIV(1), &WORK( *n + 1), n, info); } goto L100; } /* Normalize error. */ lstres = 0.; i__2 = *n; for (i = 1; i <= *n; ++i) { /* Computing MAX */ d__2 = lstres, d__3 = (d__1 = X(i,j), abs(d__1)); lstres = max(d__2,d__3); /* L130: */ } if (lstres != 0.) { FERR(j) /= lstres; } /* L140: */ } return 0; /* End of DSYRFS */ } /* dsyrfs_ */
/* Subroutine */ int cgbsv_(integer *n, integer *kl, integer *ku, integer * nrhs, complex *ab, integer *ldab, integer *ipiv, complex *b, integer * ldb, integer *info) { /* -- LAPACK driver routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= CGBSV computes the solution to a complex system of linear equations A * X = B, where A is a band matrix of order N with KL subdiagonals and KU superdiagonals, and X and B are N-by-NRHS matrices. The LU decomposition with partial pivoting and row interchanges is used to factor A as A = L * U, where L is a product of permutation and unit lower triangular matrices with KL subdiagonals, and U is upper triangular with KL+KU superdiagonals. The factored form of A is then used to solve the system of equations A * X = B. Arguments ========= N (input) INTEGER The number of linear equations, i.e., the order of the matrix A. N >= 0. KL (input) INTEGER The number of subdiagonals within the band of A. KL >= 0. KU (input) INTEGER The number of superdiagonals within the band of A. KU >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. AB (input/output) COMPLEX array, dimension (LDAB,N) On entry, the matrix A in band storage, in rows KL+1 to 2*KL+KU+1; rows 1 to KL of the array need not be set. The j-th column of A is stored in the j-th column of the array AB as follows: AB(KL+KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+KL) On exit, details of the factorization: U is stored as an upper triangular band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and the multipliers used during the factorization are stored in rows KL+KU+2 to 2*KL+KU+1. See below for further details. LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= 2*KL+KU+1. IPIV (output) INTEGER array, dimension (N) The pivot indices that define the permutation matrix P; row i of the matrix was interchanged with row IPIV(i). B (input/output) COMPLEX array, dimension (LDB,NRHS) On entry, the N-by-NRHS right hand side matrix B. On exit, if INFO = 0, the N-by-NRHS solution matrix X. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and the solution has not been computed. Further Details =============== The band storage scheme is illustrated by the following example, when M = N = 6, KL = 2, KU = 1: On entry: On exit: * * * + + + * * * u14 u25 u36 * * + + + + * * u13 u24 u35 u46 * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * a31 a42 a53 a64 * * m31 m42 m53 m64 * * VISArray elements marked * are not used by the routine; elements marked + need not be set on entry, but are required by the routine to store elements of U because of fill-in resulting from the row interchanges. ===================================================================== Test the input parameters. Parameter adjustments Function Body */ /* System generated locals */ integer ab_dim1, ab_offset, b_dim1, b_offset, i__1; /* Local variables */ extern /* Subroutine */ int cgbtrf_(integer *, integer *, integer *, integer *, complex *, integer *, integer *, integer *), xerbla_( char *, integer *), cgbtrs_(char *, integer *, integer *, integer *, integer *, complex *, integer *, integer *, complex *, integer *, integer *); #define IPIV(I) ipiv[(I)-1] #define AB(I,J) ab[(I)-1 + ((J)-1)* ( *ldab)] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] *info = 0; if (*n < 0) { *info = -1; } else if (*kl < 0) { *info = -2; } else if (*ku < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*ldab < (*kl << 1) + *ku + 1) { *info = -6; } else if (*ldb < max(*n,1)) { *info = -9; } if (*info != 0) { i__1 = -(*info); xerbla_("CGBSV ", &i__1); return 0; } /* Compute the LU factorization of the band matrix A. */ cgbtrf_(n, n, kl, ku, &AB(1,1), ldab, &IPIV(1), info); if (*info == 0) { /* Solve the system A*X = B, overwriting B with X. */ cgbtrs_("No transpose", n, kl, ku, nrhs, &AB(1,1), ldab, &IPIV( 1), &B(1,1), ldb, info); } return 0; /* End of CGBSV */ } /* cgbsv_ */
/* Subroutine */ int dsytrs_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer * ldb, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= DSYTRS solves a system of linear equations A*X = B with a real symmetric matrix A using the factorization A = U*D*U**T or A = L*D*L**T computed by DSYTRF. Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the details of the factorization are stored as an upper or lower triangular matrix. = 'U': Upper triangular, form is A = U*D*U**T; = 'L': Lower triangular, form is A = L*D*L**T. N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. A (input) DOUBLE PRECISION array, dimension (LDA,N) The block diagonal matrix D and the multipliers used to obtain the factor U or L as computed by DSYTRF. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). IPIV (input) INTEGER array, dimension (N) Details of the interchanges and the block structure of D as determined by DSYTRF. B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) On entry, the right hand side matrix B. On exit, the solution matrix X. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Parameter adjustments Function Body */ /* Table of constant values */ static doublereal c_b7 = -1.; static integer c__1 = 1; static doublereal c_b19 = 1.; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; doublereal d__1; /* Local variables */ extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); static doublereal akm1k; static integer j, k; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); extern logical lsame_(char *, char *); static doublereal denom; extern /* Subroutine */ int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dswap_(integer *, doublereal *, integer *, doublereal *, integer *); static logical upper; static doublereal ak, bk; static integer kp; extern /* Subroutine */ int xerbla_(char *, integer *); static doublereal akm1, bkm1; #define IPIV(I) ipiv[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -8; } if (*info != 0) { i__1 = -(*info); xerbla_("DSYTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { return 0; } if (upper) { /* Solve A*X = B, where A = U*D*U'. First solve U*D*X = B, overwriting B with X. K is the main loop index, decreasing from N to 1 in steps of 1 or 2, depending on the size of the diagonal blocks. */ k = *n; L10: /* If K < 1, exit from loop. */ if (k < 1) { goto L30; } if (IPIV(k) > 0) { /* 1 x 1 diagonal block Interchange rows K and IPIV(K). */ kp = IPIV(k); if (kp != k) { dswap_(nrhs, &B(k,1), ldb, &B(kp,1), ldb); } /* Multiply by inv(U(K)), where U(K) is the transformati on stored in column K of A. */ i__1 = k - 1; dger_(&i__1, nrhs, &c_b7, &A(1,k), &c__1, &B(k,1), ldb, &B(1,1), ldb); /* Multiply by the inverse of the diagonal block. */ d__1 = 1. / A(k,k); dscal_(nrhs, &d__1, &B(k,1), ldb); --k; } else { /* 2 x 2 diagonal block Interchange rows K-1 and -IPIV(K). */ kp = -IPIV(k); if (kp != k - 1) { dswap_(nrhs, &B(k-1,1), ldb, &B(kp,1), ldb); } /* Multiply by inv(U(K)), where U(K) is the transformati on stored in columns K-1 and K of A. */ i__1 = k - 2; dger_(&i__1, nrhs, &c_b7, &A(1,k), &c__1, &B(k,1), ldb, &B(1,1), ldb); i__1 = k - 2; dger_(&i__1, nrhs, &c_b7, &A(1,k-1), &c__1, &B(k-1,1), ldb, &B(1,1), ldb); /* Multiply by the inverse of the diagonal block. */ akm1k = A(k-1,k); akm1 = A(k-1,k-1) / akm1k; ak = A(k,k) / akm1k; denom = akm1 * ak - 1.; i__1 = *nrhs; for (j = 1; j <= *nrhs; ++j) { bkm1 = B(k-1,j) / akm1k; bk = B(k,j) / akm1k; B(k-1,j) = (ak * bkm1 - bk) / denom; B(k,j) = (akm1 * bk - bkm1) / denom; /* L20: */ } k += -2; } goto L10; L30: /* Next solve U'*X = B, overwriting B with X. K is the main loop index, increasing from 1 to N in steps of 1 or 2, depending on the size of the diagonal blocks. */ k = 1; L40: /* If K > N, exit from loop. */ if (k > *n) { goto L50; } if (IPIV(k) > 0) { /* 1 x 1 diagonal block Multiply by inv(U'(K)), where U(K) is the transformat ion stored in column K of A. */ i__1 = k - 1; dgemv_("Transpose", &i__1, nrhs, &c_b7, &B(1,1), ldb, &A(1,k), &c__1, &c_b19, &B(k,1), ldb); /* Interchange rows K and IPIV(K). */ kp = IPIV(k); if (kp != k) { dswap_(nrhs, &B(k,1), ldb, &B(kp,1), ldb); } ++k; } else { /* 2 x 2 diagonal block Multiply by inv(U'(K+1)), where U(K+1) is the transfo rmation stored in columns K and K+1 of A. */ i__1 = k - 1; dgemv_("Transpose", &i__1, nrhs, &c_b7, &B(1,1), ldb, &A(1,k), &c__1, &c_b19, &B(k,1), ldb); i__1 = k - 1; dgemv_("Transpose", &i__1, nrhs, &c_b7, &B(1,1), ldb, &A(1,k+1), &c__1, &c_b19, &B(k+1,1), ldb); /* Interchange rows K and -IPIV(K). */ kp = -IPIV(k); if (kp != k) { dswap_(nrhs, &B(k,1), ldb, &B(kp,1), ldb); } k += 2; } goto L40; L50: ; } else { /* Solve A*X = B, where A = L*D*L'. First solve L*D*X = B, overwriting B with X. K is the main loop index, increasing from 1 to N in steps of 1 or 2, depending on the size of the diagonal blocks. */ k = 1; L60: /* If K > N, exit from loop. */ if (k > *n) { goto L80; } if (IPIV(k) > 0) { /* 1 x 1 diagonal block Interchange rows K and IPIV(K). */ kp = IPIV(k); if (kp != k) { dswap_(nrhs, &B(k,1), ldb, &B(kp,1), ldb); } /* Multiply by inv(L(K)), where L(K) is the transformati on stored in column K of A. */ if (k < *n) { i__1 = *n - k; dger_(&i__1, nrhs, &c_b7, &A(k+1,k), &c__1, &B(k,1), ldb, &B(k+1,1), ldb); } /* Multiply by the inverse of the diagonal block. */ d__1 = 1. / A(k,k); dscal_(nrhs, &d__1, &B(k,1), ldb); ++k; } else { /* 2 x 2 diagonal block Interchange rows K+1 and -IPIV(K). */ kp = -IPIV(k); if (kp != k + 1) { dswap_(nrhs, &B(k+1,1), ldb, &B(kp,1), ldb); } /* Multiply by inv(L(K)), where L(K) is the transformati on stored in columns K and K+1 of A. */ if (k < *n - 1) { i__1 = *n - k - 1; dger_(&i__1, nrhs, &c_b7, &A(k+2,k), &c__1, &B(k,1), ldb, &B(k+2,1), ldb); i__1 = *n - k - 1; dger_(&i__1, nrhs, &c_b7, &A(k+2,k+1), &c__1, &B(k+1,1), ldb, &B(k+2,1), ldb); } /* Multiply by the inverse of the diagonal block. */ akm1k = A(k+1,k); akm1 = A(k,k) / akm1k; ak = A(k+1,k+1) / akm1k; denom = akm1 * ak - 1.; i__1 = *nrhs; for (j = 1; j <= *nrhs; ++j) { bkm1 = B(k,j) / akm1k; bk = B(k+1,j) / akm1k; B(k,j) = (ak * bkm1 - bk) / denom; B(k+1,j) = (akm1 * bk - bkm1) / denom; /* L70: */ } k += 2; } goto L60; L80: /* Next solve L'*X = B, overwriting B with X. K is the main loop index, decreasing from N to 1 in steps of 1 or 2, depending on the size of the diagonal blocks. */ k = *n; L90: /* If K < 1, exit from loop. */ if (k < 1) { goto L100; } if (IPIV(k) > 0) { /* 1 x 1 diagonal block Multiply by inv(L'(K)), where L(K) is the transformat ion stored in column K of A. */ if (k < *n) { i__1 = *n - k; dgemv_("Transpose", &i__1, nrhs, &c_b7, &B(k+1,1), ldb, &A(k+1,k), &c__1, &c_b19, &B(k,1), ldb); } /* Interchange rows K and IPIV(K). */ kp = IPIV(k); if (kp != k) { dswap_(nrhs, &B(k,1), ldb, &B(kp,1), ldb); } --k; } else { /* 2 x 2 diagonal block Multiply by inv(L'(K-1)), where L(K-1) is the transfo rmation stored in columns K-1 and K of A. */ if (k < *n) { i__1 = *n - k; dgemv_("Transpose", &i__1, nrhs, &c_b7, &B(k+1,1), ldb, &A(k+1,k), &c__1, &c_b19, &B(k,1), ldb); i__1 = *n - k; dgemv_("Transpose", &i__1, nrhs, &c_b7, &B(k+1,1), ldb, &A(k+1,k-1), &c__1, &c_b19, &B(k-1,1), ldb); } /* Interchange rows K and -IPIV(K). */ kp = -IPIV(k); if (kp != k) { dswap_(nrhs, &B(k,1), ldb, &B(kp,1), ldb); } k += -2; } goto L90; L100: ; } return 0; /* End of DSYTRS */ } /* dsytrs_ */
void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Setup(const MultiVector& B, const MultiVector& Bc, RCP<const CrsGraph> Ppattern) { const size_t NSDim = Bc.getNumVectors(); Ppattern_ = Ppattern; size_t numRows = Ppattern_->getNodeNumRows(); XXtInv_.resize(numRows); RCP<const Import> importer = Ppattern_->getImporter(); X_ = MultiVectorFactory::Build(Ppattern_->getColMap(), NSDim); if (!importer.is_null()) X_->doImport(Bc, *importer, Xpetra::INSERT); else *X_ = Bc; std::vector<const SC*> Xval(NSDim); for (size_t j = 0; j < NSDim; j++) Xval[j] = X_->getData(j).get(); SC zero = Teuchos::ScalarTraits<SC>::zero(); SC one = Teuchos::ScalarTraits<SC>::one(); Teuchos::BLAS <LO,SC> blas; Teuchos::LAPACK<LO,SC> lapack; LO lwork = 3*NSDim; ArrayRCP<LO> IPIV(NSDim); ArrayRCP<SC> WORK(lwork); for (size_t i = 0; i < numRows; i++) { Teuchos::ArrayView<const LO> indices; Ppattern_->getLocalRowView(i, indices); size_t nnz = indices.size(); XXtInv_[i] = Teuchos::SerialDenseMatrix<LO,SC>(NSDim, NSDim, false/*zeroOut*/); Teuchos::SerialDenseMatrix<LO,SC>& XXtInv = XXtInv_[i]; if (NSDim == 1) { SC d = zero; for (size_t j = 0; j < nnz; j++) d += Xval[0][indices[j]] * Xval[0][indices[j]]; XXtInv(0,0) = one/d; } else { Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, nnz, false/*zeroOut*/); for (size_t j = 0; j < nnz; j++) for (size_t k = 0; k < NSDim; k++) locX(k,j) = Xval[k][indices[j]]; // XXtInv_ = (locX*locX^T)^{-1} blas.GEMM(Teuchos::NO_TRANS, Teuchos::CONJ_TRANS, NSDim, NSDim, nnz, one, locX.values(), locX.stride(), locX.values(), locX.stride(), zero, XXtInv.values(), XXtInv.stride()); LO info; // Compute LU factorization using partial pivoting with row exchanges lapack.GETRF(NSDim, NSDim, XXtInv.values(), XXtInv.stride(), IPIV.get(), &info); // Use the computed factorization to compute the inverse lapack.GETRI(NSDim, XXtInv.values(), XXtInv.stride(), IPIV.get(), WORK.get(), lwork, &info); } } }
/* Subroutine */ int ssytrf_(char *uplo, integer *n, real *a, integer *lda, integer *ipiv, real *work, integer *lwork, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= SSYTRF computes the factorization of a real symmetric matrix A using the Bunch-Kaufman diagonal pivoting method. The form of the factorization is A = U*D*U**T or A = L*D*L**T where U (or L) is a product of permutation and unit upper (lower) triangular matrices, and D is symmetric and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. This is the blocked version of the algorithm, calling Level 3 BLAS. Arguments ========= UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) REAL array, dimension (LDA,N) On entry, the symmetric matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, the block diagonal matrix D and the multipliers used to obtain the factor U or L (see below for further details). LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). IPIV (output) INTEGER array, dimension (N) Details of the interchanges and the block structure of D. If IPIV(k) > 0, then rows and columns k and IPIV(k) were interchanged and D(k,k) is a 1-by-1 diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. WORK (workspace/output) REAL array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The length of WORK. LWORK >=1. For best performance LWORK >= N*NB, where NB is the block size returned by ILAENV. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, D(i,i) is exactly zero. The factorization has been completed, but the block diagonal matrix D is exactly singular, and division by zero will occur if it is used to solve a system of equations. Further Details =============== If UPLO = 'U', then A = U*D*U', where U = P(n)*U(n)* ... *P(k)U(k)* ..., i.e., U is a product of terms P(k)*U(k), where k decreases from n to 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as defined by IPIV(k), and U(k) is a unit upper triangular matrix, such that if the diagonal block D(k) is of order s (s = 1 or 2), then ( I v 0 ) k-s U(k) = ( 0 I 0 ) s ( 0 0 I ) n-k k-s s n-k If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), and A(k,k), and v overwrites A(1:k-2,k-1:k). If UPLO = 'L', then A = L*D*L', where L = P(1)*L(1)* ... *P(k)*L(k)* ..., i.e., L is a product of terms P(k)*L(k), where k increases from 1 to n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as defined by IPIV(k), and L(k) is a unit lower triangular matrix, such that if the diagonal block D(k) is of order s (s = 1 or 2), then ( I 0 0 ) k-1 L(k) = ( 0 I 0 ) s ( 0 v I ) n-k-s+1 k-1 s n-k-s+1 If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). ===================================================================== Test the input parameters. Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__2 = 2; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ static integer j, k; extern logical lsame_(char *, char *); static integer nbmin, iinfo; static logical upper; static integer kb, nb; extern /* Subroutine */ int ssytf2_(char *, integer *, real *, integer *, integer *, integer *), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern /* Subroutine */ int slasyf_(char *, integer *, integer *, integer *, real *, integer *, integer *, real *, integer *, integer *); static integer ldwork, iws; #define IPIV(I) ipiv[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } else if (*lwork < 1) { *info = -7; } if (*info != 0) { i__1 = -(*info); xerbla_("SSYTRF", &i__1); return 0; } /* Determine the block size */ nb = ilaenv_(&c__1, "SSYTRF", uplo, n, &c_n1, &c_n1, &c_n1, 6L, 1L); nbmin = 2; ldwork = *n; if (nb > 1 && nb < *n) { iws = ldwork * nb; if (*lwork < iws) { /* Computing MAX */ i__1 = *lwork / ldwork; nb = max(i__1,1); /* Computing MAX */ i__1 = 2, i__2 = ilaenv_(&c__2, "SSYTRF", uplo, n, &c_n1, &c_n1, & c_n1, 6L, 1L); nbmin = max(i__1,i__2); } } else { iws = 1; } if (nb < nbmin) { nb = *n; } if (upper) { /* Factorize A as U*D*U' using the upper triangle of A K is the main loop index, decreasing from N to 1 in steps of KB, where KB is the number of columns factorized by SLASYF; KB is either NB or NB-1, or K for the last block */ k = *n; L10: /* If K < 1, exit from loop */ if (k < 1) { goto L40; } if (k > nb) { /* Factorize columns k-kb+1:k of A and use blocked code to update columns 1:k-kb */ slasyf_(uplo, &k, &nb, &kb, &A(1,1), lda, &IPIV(1), &WORK(1), &ldwork, &iinfo); } else { /* Use unblocked code to factorize columns 1:k of A */ ssytf2_(uplo, &k, &A(1,1), lda, &IPIV(1), &iinfo); kb = k; } /* Set INFO on the first occurrence of a zero pivot */ if (*info == 0 && iinfo > 0) { *info = iinfo; } /* Decrease K and return to the start of the main loop */ k -= kb; goto L10; } else { /* Factorize A as L*D*L' using the lower triangle of A K is the main loop index, increasing from 1 to N in steps of KB, where KB is the number of columns factorized by SLASYF; KB is either NB or NB-1, or N-K+1 for the last block */ k = 1; L20: /* If K > N, exit from loop */ if (k > *n) { goto L40; } if (k <= *n - nb) { /* Factorize columns k:k+kb-1 of A and use blocked code to update columns k+kb:n */ i__1 = *n - k + 1; slasyf_(uplo, &i__1, &nb, &kb, &A(k,k), lda, &IPIV(k), &WORK(1), &ldwork, &iinfo); } else { /* Use unblocked code to factorize columns k:n of A */ i__1 = *n - k + 1; ssytf2_(uplo, &i__1, &A(k,k), lda, &IPIV(k), &iinfo); kb = *n - k + 1; } /* Set INFO on the first occurrence of a zero pivot */ if (*info == 0 && iinfo > 0) { *info = iinfo + k - 1; } /* Adjust IPIV */ i__1 = k + kb - 1; for (j = k; j <= k+kb-1; ++j) { if (IPIV(j) > 0) { IPIV(j) = IPIV(j) + k - 1; } else { IPIV(j) = IPIV(j) - k + 1; } /* L30: */ } /* Increase K and return to the start of the main loop */ k += kb; goto L20; } L40: WORK(1) = (real) iws; return 0; /* End of SSYTRF */ } /* ssytrf_ */
/***************************************************************************//** * Parallel tile LU factorization - dynamic scheduling - Right looking **/ void plasma_pdgetrf_rectil_quark(PLASMA_desc A, int *IPIV) { int k, m, n; int tempk, tempm, tempkm, tempkn, tempmm, tempnn; int ldak, ldam; double zone = (double)1.0; double mzone = (double)-1.0; void * fakedep; /* How many threads per panel? Probably needs to be adjusted during factorization. */ CORE_dgetrf_rectil_init(); for (k = 0; k < min(A.mt, A.nt); k++) { tempk = k * A.mb; tempm = A.m - tempk; tempkm = k == A.mt-1 ? tempm : A.mb; tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb; ldak = BLKLDD(A, k); double *dA = A(k, k); int *dB = IPIV(k); PLASMA_desc pDesc = plasma_desc_submatrix(A, tempk, k*A.nb, tempm, tempkn); hclib_pragma_marker("omp", "task depend(inout:dA[0:A.mb*A.nb]) depend(out:dB[0:pDesc.n])", "pragma59_omp_task"); { int info[3]; info[1] = 0; info[2] = 1; CORE_dgetrf_rectil( pDesc, dB, info ); } /* * Update the trailing submatrix */ fakedep = (void *)(intptr_t)(k+1); for (n = k+1; n < A.nt; n++) { /* * Apply row interchange after the panel (work on the panel) */ tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb; PLASMA_desc descA = plasma_desc_submatrix(A, tempk, n*A.nb, tempm, tempnn); double *dA = A(k, n); double *dB = A(k, k); int *dipiv = IPIV(k); hclib_pragma_marker("omp", "task depend(inout:dA[0:1]) depend(in:dB[0:ldak], dipiv[0:tempkm])", "pragma82_omp_task"); CORE_dswptr_ontile(descA, 1, tempkm, dipiv, 1, dB, ldak); m = k+1; if ( m < A.mt ) { tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb; ldam = BLKLDD(A, m); double *dA = A(m , k); double *dB = A(k , n); double *dC = A(m , n); hclib_pragma_marker("omp", "task depend(in:dA[0:A.mb*A.mb], dB[0:A.mb*A.mb]) depend(inout:dC[0:A.mb*A.mb])", "pragma93_omp_task"); cblas_dgemm(CblasColMajor, (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_TRANSPOSE)PlasmaNoTrans, tempmm, tempnn, A.nb, mzone, dA, ldam, dB, ldak, zone, dC, ldam); for (m = k+2; m < A.mt; m++) { tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb; ldam = BLKLDD(A, m); double *dA = A(m , k); double *dB = A(k , n); double *dC = A(m , n); double *fake1 = A(k+1, n); double *fake2 = (double *)fakedep; hclib_pragma_marker("omp", "task depend(in:dA[0:A.mb*A.mb], dB[0:A.mb*A.mb], fake2[0:1]) depend(inout:dC[0:A.mb*A.mb], fake1[0:A.mb*A.nb])", "pragma110_omp_task"); cblas_dgemm(CblasColMajor, (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_TRANSPOSE)PlasmaNoTrans, tempmm, tempnn, A.nb, mzone, dA, ldam, dB, ldak, zone, dC, ldam); } } } } for (k = 0; k < min(A.mt, A.nt); k++) { int mintmp; tempk = k * A.mb; tempm = A.m - tempk; tempkm = k == A.mt-1 ? tempm : A.mb; tempkn = k == A.nt-1 ? A.n - k * A.nb : A.nb; mintmp = min(tempkm, tempkn); ldak = BLKLDD(A, k); /* * Apply row interchange behind the panel (work on the panel) */ fakedep = (void*)(intptr_t)k; for (n = 0; n < k; n++) { tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb; double *Aij = A(k, n); double *prevSwap = A(k-1, n); int *dipiv = IPIV(k); PLASMA_desc descA = plasma_desc_submatrix(A, tempk, n*A.nb, tempm, tempnn); hclib_pragma_marker("omp", "task depend(inout:Aij[0:1],fakedep) depend(in:dipiv[0:mintmp], prevSwap[0:A.lm*A.nb])", "pragma142_omp_task"); CORE_dlaswp_ontile(descA, 1, mintmp, dipiv, 1); } } }
/* Subroutine */ int dlasyf_(char *uplo, integer *n, integer *nb, integer *kb, doublereal *a, integer *lda, integer *ipiv, doublereal *w, integer * ldw, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= DLASYF computes a partial factorization of a real symmetric matrix A using the Bunch-Kaufman diagonal pivoting method. The partial factorization has the form: A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: ( 0 U22 ) ( 0 D ) ( U12' U22' ) A = ( L11 0 ) ( D 0 ) ( L11' L21' ) if UPLO = 'L' ( L21 I ) ( 0 A22 ) ( 0 I ) where the order of D is at most NB. The actual order is returned in the argument KB, and is either NB or NB-1, or N if N <= NB. DLASYF is an auxiliary routine called by DSYTRF. It uses blocked code (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the upper or lower triangular part of the symmetric matrix A is stored: = 'U': Upper triangular = 'L': Lower triangular N (input) INTEGER The order of the matrix A. N >= 0. NB (input) INTEGER The maximum number of columns of the matrix A that should be factored. NB should be at least 2 to allow for 2-by-2 pivot blocks. KB (output) INTEGER The number of columns of A that were actually factored. KB is either NB-1 or NB, or N if N <= NB. A (input/output) DOUBLE PRECISION array, dimension (LDA,N) On entry, the symmetric matrix A. If UPLO = 'U', the leading n-by-n upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading n-by-n lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, A contains details of the partial factorization. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). IPIV (output) INTEGER array, dimension (N) Details of the interchanges and the block structure of D. If UPLO = 'U', only the last KB elements of IPIV are set; if UPLO = 'L', only the first KB elements are set. If IPIV(k) > 0, then rows and columns k and IPIV(k) were interchanged and D(k,k) is a 1-by-1 diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. W (workspace) DOUBLE PRECISION array, dimension (LDW,NB) LDW (input) INTEGER The leading dimension of the array W. LDW >= max(1,N). INFO (output) INTEGER = 0: successful exit > 0: if INFO = k, D(k,k) is exactly zero. The factorization has been completed, but the block diagonal matrix D is exactly singular. ===================================================================== Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; static doublereal c_b8 = -1.; static doublereal c_b9 = 1.; /* System generated locals */ integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer imax, jmax, j, k; static doublereal t, alpha; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *), dgemm_(char *, char *, integer *, integer *, integer * , doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical lsame_(char *, char *); extern /* Subroutine */ int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), dswap_(integer *, doublereal *, integer *, doublereal *, integer *); static integer kstep; static doublereal r1, d11, d21, d22; static integer jb, jj, kk, jp, kp; static doublereal absakk; static integer kw; extern integer idamax_(integer *, doublereal *, integer *); static doublereal colmax, rowmax; static integer kkw; #define IPIV(I) ipiv[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define W(I,J) w[(I)-1 + ((J)-1)* ( *ldw)] *info = 0; /* Initialize ALPHA for use in choosing pivot block size. */ alpha = (sqrt(17.) + 1.) / 8.; if (lsame_(uplo, "U")) { /* Factorize the trailing columns of A using the upper triangle of A and working backwards, and compute the matrix W = U12*D for use in updating A11 K is the main loop index, decreasing from N in steps of 1 or 2 KW is the column of W which corresponds to column K of A */ k = *n; L10: kw = *nb + k - *n; /* Exit from loop */ if (k <= *n - *nb + 1 && *nb < *n || k < 1) { goto L30; } /* Copy column K of A to column KW of W and update it */ dcopy_(&k, &A(1,k), &c__1, &W(1,kw), &c__1); if (k < *n) { i__1 = *n - k; dgemv_("No transpose", &k, &i__1, &c_b8, &A(1,k+1), lda, &W(k,kw+1), ldw, &c_b9, &W(1,kw), &c__1); } kstep = 1; /* Determine rows and columns to be interchanged and whether a 1-by-1 or 2-by-2 pivot block will be used */ absakk = (d__1 = W(k,kw), abs(d__1)); /* IMAX is the row-index of the largest off-diagonal element in column K, and COLMAX is its absolute value */ if (k > 1) { i__1 = k - 1; imax = idamax_(&i__1, &W(1,kw), &c__1); colmax = (d__1 = W(imax,kw), abs(d__1)); } else { colmax = 0.; } if (max(absakk,colmax) == 0.) { /* Column K is zero: set INFO and continue */ if (*info == 0) { *info = k; } kp = k; } else { if (absakk >= alpha * colmax) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else { /* Copy column IMAX to column KW-1 of W and updat e it */ dcopy_(&imax, &A(1,imax), &c__1, &W(1,kw-1), &c__1); i__1 = k - imax; dcopy_(&i__1, &A(imax,imax+1), lda, &W(imax+1,kw-1), &c__1); if (k < *n) { i__1 = *n - k; dgemv_("No transpose", &k, &i__1, &c_b8, &A(1,k+1), lda, &W(imax,kw+1), ldw, &c_b9, &W(1,kw-1), &c__1) ; } /* JMAX is the column-index of the largest off-di agonal element in row IMAX, and ROWMAX is its absolut e value */ i__1 = k - imax; jmax = imax + idamax_(&i__1, &W(imax+1,kw-1), &c__1); rowmax = (d__1 = W(jmax,kw-1), abs(d__1)); if (imax > 1) { i__1 = imax - 1; jmax = idamax_(&i__1, &W(1,kw-1), &c__1); /* Computing MAX */ d__2 = rowmax, d__3 = (d__1 = W(jmax,kw-1), abs(d__1)); rowmax = max(d__2,d__3); } if (absakk >= alpha * colmax * (colmax / rowmax)) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else if ((d__1 = W(imax,kw-1), abs(d__1)) >= alpha * rowmax) { /* interchange rows and columns K and IMAX , use 1-by-1 pivot block */ kp = imax; /* copy column KW-1 of W to column KW */ dcopy_(&k, &W(1,kw-1), &c__1, &W(1,kw), &c__1); } else { /* interchange rows and columns K-1 and IM AX, use 2-by-2 pivot block */ kp = imax; kstep = 2; } } kk = k - kstep + 1; kkw = *nb + kk - *n; /* Updated column KP is already stored in column KKW of W */ if (kp != kk) { /* Copy non-updated column KK to column KP */ A(kp,k) = A(kk,k); i__1 = k - 1 - kp; dcopy_(&i__1, &A(kp+1,kk), &c__1, &A(kp,kp+1), lda); dcopy_(&kp, &A(1,kk), &c__1, &A(1,kp), & c__1); /* Interchange rows KK and KP in last KK columns of A and W */ i__1 = *n - kk + 1; dswap_(&i__1, &A(kk,kk), lda, &A(kp,kk), lda); i__1 = *n - kk + 1; dswap_(&i__1, &W(kk,kkw), ldw, &W(kp,kkw), ldw); } if (kstep == 1) { /* 1-by-1 pivot block D(k): column KW of W now ho lds W(k) = U(k)*D(k) where U(k) is the k-th column of U Store U(k) in column k of A */ dcopy_(&k, &W(1,kw), &c__1, &A(1,k), & c__1); r1 = 1. / A(k,k); i__1 = k - 1; dscal_(&i__1, &r1, &A(1,k), &c__1); } else { /* 2-by-2 pivot block D(k): columns KW and KW-1 o f W now hold ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) where U(k) and U(k-1) are the k-th and (k-1)-t h columns of U */ if (k > 2) { /* Store U(k) and U(k-1) in columns k and k-1 of A */ d21 = W(k-1,kw); d11 = W(k,kw) / d21; d22 = W(k-1,kw-1) / d21; t = 1. / (d11 * d22 - 1.); d21 = t / d21; i__1 = k - 2; for (j = 1; j <= k-2; ++j) { A(j,k-1) = d21 * (d11 * W(j,kw-1) - W(j,kw)); A(j,k) = d21 * (d22 * W(j,kw) - W(j,kw-1)); /* L20: */ } } /* Copy D(k) to A */ A(k-1,k-1) = W(k-1,kw-1); A(k-1,k) = W(k-1,kw); A(k,k) = W(k,kw); } } /* Store details of the interchanges in IPIV */ if (kstep == 1) { IPIV(k) = kp; } else { IPIV(k) = -kp; IPIV(k - 1) = -kp; } /* Decrease K and return to the start of the main loop */ k -= kstep; goto L10; L30: /* Update the upper triangle of A11 (= A(1:k,1:k)) as A11 := A11 - U12*D*U12' = A11 - U12*W' computing blocks of NB columns at a time */ i__1 = -(*nb); for (j = (k - 1) / *nb * *nb + 1; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { /* Computing MIN */ i__2 = *nb, i__3 = k - j + 1; jb = min(i__2,i__3); /* Update the upper triangle of the diagonal block */ i__2 = j + jb - 1; for (jj = j; jj <= j+jb-1; ++jj) { i__3 = jj - j + 1; i__4 = *n - k; dgemv_("No transpose", &i__3, &i__4, &c_b8, &A(j,k+1), lda, &W(jj,kw+1), ldw, &c_b9, &A(j,jj), &c__1); /* L40: */ } /* Update the rectangular superdiagonal block */ i__2 = j - 1; i__3 = *n - k; dgemm_("No transpose", "Transpose", &i__2, &jb, &i__3, &c_b8, &A(1,k+1), lda, &W(j,kw+1), ldw, &c_b9, &A(1,j), lda); /* L50: */ } /* Put U12 in standard form by partially undoing the interchang es in columns k+1:n */ j = k + 1; L60: jj = j; jp = IPIV(j); if (jp < 0) { jp = -jp; ++j; } ++j; if (jp != jj && j <= *n) { i__1 = *n - j + 1; dswap_(&i__1, &A(jp,j), lda, &A(jj,j), lda); } if (j <= *n) { goto L60; } /* Set KB to the number of columns factorized */ *kb = *n - k; } else { /* Factorize the leading columns of A using the lower triangle of A and working forwards, and compute the matrix W = L21*D for use in updating A22 K is the main loop index, increasing from 1 in steps of 1 or 2 */ k = 1; L70: /* Exit from loop */ if (k >= *nb && *nb < *n || k > *n) { goto L90; } /* Copy column K of A to column K of W and update it */ i__1 = *n - k + 1; dcopy_(&i__1, &A(k,k), &c__1, &W(k,k), &c__1); i__1 = *n - k + 1; i__2 = k - 1; dgemv_("No transpose", &i__1, &i__2, &c_b8, &A(k,1), lda, &W(k,1), ldw, &c_b9, &W(k,k), &c__1); kstep = 1; /* Determine rows and columns to be interchanged and whether a 1-by-1 or 2-by-2 pivot block will be used */ absakk = (d__1 = W(k,k), abs(d__1)); /* IMAX is the row-index of the largest off-diagonal element in column K, and COLMAX is its absolute value */ if (k < *n) { i__1 = *n - k; imax = k + idamax_(&i__1, &W(k+1,k), &c__1); colmax = (d__1 = W(imax,k), abs(d__1)); } else { colmax = 0.; } if (max(absakk,colmax) == 0.) { /* Column K is zero: set INFO and continue */ if (*info == 0) { *info = k; } kp = k; } else { if (absakk >= alpha * colmax) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else { /* Copy column IMAX to column K+1 of W and update it */ i__1 = imax - k; dcopy_(&i__1, &A(imax,k), lda, &W(k,k+1), &c__1); i__1 = *n - imax + 1; dcopy_(&i__1, &A(imax,imax), &c__1, &W(imax,k+1), &c__1); i__1 = *n - k + 1; i__2 = k - 1; dgemv_("No transpose", &i__1, &i__2, &c_b8, &A(k,1), lda, &W(imax,1), ldw, &c_b9, &W(k,k+1), &c__1); /* JMAX is the column-index of the largest off-di agonal element in row IMAX, and ROWMAX is its absolut e value */ i__1 = imax - k; jmax = k - 1 + idamax_(&i__1, &W(k,k+1), &c__1) ; rowmax = (d__1 = W(jmax,k+1), abs(d__1)); if (imax < *n) { i__1 = *n - imax; jmax = imax + idamax_(&i__1, &W(imax+1,k+1), &c__1); /* Computing MAX */ d__2 = rowmax, d__3 = (d__1 = W(jmax,k+1), abs(d__1)); rowmax = max(d__2,d__3); } if (absakk >= alpha * colmax * (colmax / rowmax)) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else if ((d__1 = W(imax,k+1), abs(d__1)) >= alpha * rowmax) { /* interchange rows and columns K and IMAX , use 1-by-1 pivot block */ kp = imax; /* copy column K+1 of W to column K */ i__1 = *n - k + 1; dcopy_(&i__1, &W(k,k+1), &c__1, &W(k,k), &c__1); } else { /* interchange rows and columns K+1 and IM AX, use 2-by-2 pivot block */ kp = imax; kstep = 2; } } kk = k + kstep - 1; /* Updated column KP is already stored in column KK of W */ if (kp != kk) { /* Copy non-updated column KK to column KP */ A(kp,k) = A(kk,k); i__1 = kp - k - 1; dcopy_(&i__1, &A(k+1,kk), &c__1, &A(kp,k+1), lda); i__1 = *n - kp + 1; dcopy_(&i__1, &A(kp,kk), &c__1, &A(kp,kp), &c__1); /* Interchange rows KK and KP in first KK columns of A and W */ dswap_(&kk, &A(kk,1), lda, &A(kp,1), lda); dswap_(&kk, &W(kk,1), ldw, &W(kp,1), ldw); } if (kstep == 1) { /* 1-by-1 pivot block D(k): column k of W now hol ds W(k) = L(k)*D(k) where L(k) is the k-th column of L Store L(k) in column k of A */ i__1 = *n - k + 1; dcopy_(&i__1, &W(k,k), &c__1, &A(k,k), & c__1); if (k < *n) { r1 = 1. / A(k,k); i__1 = *n - k; dscal_(&i__1, &r1, &A(k+1,k), &c__1); } } else { /* 2-by-2 pivot block D(k): columns k and k+1 of W now hold ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) where L(k) and L(k+1) are the k-th and (k+1)-t h columns of L */ if (k < *n - 1) { /* Store L(k) and L(k+1) in columns k and k+1 of A */ d21 = W(k+1,k); d11 = W(k+1,k+1) / d21; d22 = W(k,k) / d21; t = 1. / (d11 * d22 - 1.); d21 = t / d21; i__1 = *n; for (j = k + 2; j <= *n; ++j) { A(j,k) = d21 * (d11 * W(j,k) - W(j,k+1)); A(j,k+1) = d21 * (d22 * W(j,k+1) - W(j,k)); /* L80: */ } } /* Copy D(k) to A */ A(k,k) = W(k,k); A(k+1,k) = W(k+1,k); A(k+1,k+1) = W(k+1,k+1); } } /* Store details of the interchanges in IPIV */ if (kstep == 1) { IPIV(k) = kp; } else { IPIV(k) = -kp; IPIV(k + 1) = -kp; } /* Increase K and return to the start of the main loop */ k += kstep; goto L70; L90: /* Update the lower triangle of A22 (= A(k:n,k:n)) as A22 := A22 - L21*D*L21' = A22 - L21*W' computing blocks of NB columns at a time */ i__1 = *n; i__2 = *nb; for (j = k; *nb < 0 ? j >= *n : j <= *n; j += *nb) { /* Computing MIN */ i__3 = *nb, i__4 = *n - j + 1; jb = min(i__3,i__4); /* Update the lower triangle of the diagonal block */ i__3 = j + jb - 1; for (jj = j; jj <= j+jb-1; ++jj) { i__4 = j + jb - jj; i__5 = k - 1; dgemv_("No transpose", &i__4, &i__5, &c_b8, &A(jj,1), lda, &W(jj,1), ldw, &c_b9, &A(jj,jj) , &c__1); /* L100: */ } /* Update the rectangular subdiagonal block */ if (j + jb <= *n) { i__3 = *n - j - jb + 1; i__4 = k - 1; dgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &c_b8, &A(j+jb,1), lda, &W(j,1), ldw, &c_b9, &A(j+jb,j), lda); } /* L110: */ } /* Put L21 in standard form by partially undoing the interchang es in columns 1:k-1 */ j = k - 1; L120: jj = j; jp = IPIV(j); if (jp < 0) { jp = -jp; --j; } --j; if (jp != jj && j >= 1) { dswap_(&j, &A(jp,1), lda, &A(jj,1), lda); } if (j >= 1) { goto L120; } /* Set KB to the number of columns factorized */ *kb = k - 1; } return 0; /* End of DLASYF */ } /* dlasyf_ */
/* Subroutine */ int dgbtrs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, doublereal *ab, integer *ldab, integer *ipiv, doublereal *b, integer *ldb, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= DGBTRS solves a system of linear equations A * X = B or A' * X = B with a general band matrix A using the LU factorization computed by DGBTRF. Arguments ========= TRANS (input) CHARACTER*1 Specifies the form of the system of equations. = 'N': A * X = B (No transpose) = 'T': A'* X = B (Transpose) = 'C': A'* X = B (Conjugate transpose = Transpose) N (input) INTEGER The order of the matrix A. N >= 0. KL (input) INTEGER The number of subdiagonals within the band of A. KL >= 0. KU (input) INTEGER The number of superdiagonals within the band of A. KU >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. AB (input) DOUBLE PRECISION array, dimension (LDAB,N) Details of the LU factorization of the band matrix A, as computed by DGBTRF. U is stored as an upper triangular band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and the multipliers used during the factorization are stored in rows KL+KU+2 to 2*KL+KU+1. LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= 2*KL+KU+1. IPIV (input) INTEGER array, dimension (N) The pivot indices; for 1 <= i <= N, row i of the matrix was interchanged with row IPIV(i). B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) On entry, the right hand side matrix B. On exit, the solution matrix X. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input parameters. Parameter adjustments Function Body */ /* Table of constant values */ static doublereal c_b7 = -1.; static integer c__1 = 1; static doublereal c_b23 = 1.; /* System generated locals */ integer ab_dim1, ab_offset, b_dim1, b_offset, i__1, i__2, i__3; /* Local variables */ extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); static integer i, j, l; extern logical lsame_(char *, char *); extern /* Subroutine */ int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dswap_(integer *, doublereal *, integer *, doublereal *, integer *), dtbsv_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); static logical lnoti; static integer kd, lm; extern /* Subroutine */ int xerbla_(char *, integer *); static logical notran; #define IPIV(I) ipiv[(I)-1] #define AB(I,J) ab[(I)-1 + ((J)-1)* ( *ldab)] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] *info = 0; notran = lsame_(trans, "N"); if (! notran && ! lsame_(trans, "T") && ! lsame_(trans, "C")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kl < 0) { *info = -3; } else if (*ku < 0) { *info = -4; } else if (*nrhs < 0) { *info = -5; } else if (*ldab < (*kl << 1) + *ku + 1) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -10; } if (*info != 0) { i__1 = -(*info); xerbla_("DGBTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { return 0; } kd = *ku + *kl + 1; lnoti = *kl > 0; if (notran) { /* Solve A*X = B. Solve L*X = B, overwriting B with X. L is represented as a product of permutations and unit lower triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1), where each transformation L(i) is a rank-one modification of the identity matrix. */ if (lnoti) { i__1 = *n - 1; for (j = 1; j <= *n-1; ++j) { /* Computing MIN */ i__2 = *kl, i__3 = *n - j; lm = min(i__2,i__3); l = IPIV(j); if (l != j) { dswap_(nrhs, &B(l,1), ldb, &B(j,1), ldb); } dger_(&lm, nrhs, &c_b7, &AB(kd+1,j), &c__1, &B(j,1), ldb, &B(j+1,1), ldb); /* L10: */ } } i__1 = *nrhs; for (i = 1; i <= *nrhs; ++i) { /* Solve U*X = B, overwriting B with X. */ i__2 = *kl + *ku; dtbsv_("Upper", "No transpose", "Non-unit", n, &i__2, &AB(1,1), ldab, &B(1,i), &c__1); /* L20: */ } } else { /* Solve A'*X = B. */ i__1 = *nrhs; for (i = 1; i <= *nrhs; ++i) { /* Solve U'*X = B, overwriting B with X. */ i__2 = *kl + *ku; dtbsv_("Upper", "Transpose", "Non-unit", n, &i__2, &AB(1,1), ldab, &B(1,i), &c__1); /* L30: */ } /* Solve L'*X = B, overwriting B with X. */ if (lnoti) { for (j = *n - 1; j >= 1; --j) { /* Computing MIN */ i__1 = *kl, i__2 = *n - j; lm = min(i__1,i__2); dgemv_("Transpose", &lm, nrhs, &c_b7, &B(j+1,1), ldb, &AB(kd+1,j), &c__1, &c_b23, &B(j,1), ldb); l = IPIV(j); if (l != j) { dswap_(nrhs, &B(l,1), ldb, &B(j,1), ldb); } /* L40: */ } } } return 0; /* End of DGBTRS */ } /* dgbtrs_ */
/* Subroutine */ int chetri_(char *uplo, integer *n, complex *a, integer *lda, integer *ipiv, complex *work, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CHETRI computes the inverse of a complex Hermitian indefinite matrix A using the factorization A = U*D*U**H or A = L*D*L**H computed by CHETRF. Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the details of the factorization are stored as an upper or lower triangular matrix. = 'U': Upper triangular, form is A = U*D*U**H; = 'L': Lower triangular, form is A = L*D*L**H. N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) COMPLEX array, dimension (LDA,N) On entry, the block diagonal matrix D and the multipliers used to obtain the factor U or L as computed by CHETRF. On exit, if INFO = 0, the (Hermitian) inverse of the original matrix. If UPLO = 'U', the upper triangular part of the inverse is formed and the part of A below the diagonal is not referenced; if UPLO = 'L' the lower triangular part of the inverse is formed and the part of A above the diagonal is not referenced. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). IPIV (input) INTEGER array, dimension (N) Details of the interchanges and the block structure of D as determined by CHETRF. WORK (workspace) COMPLEX array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its inverse could not be computed. ===================================================================== Test the input parameters. Parameter adjustments Function Body */ /* Table of constant values */ static complex c_b2 = {0.f,0.f}; static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; doublereal d__1; complex q__1, q__2; /* Builtin functions */ double c_abs(complex *); void r_cnjg(complex *, complex *); /* Local variables */ static complex temp, akkp1; static real d; static integer j, k; static real t; extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer *, complex *, integer *); extern logical lsame_(char *, char *); extern /* Subroutine */ int chemv_(char *, integer *, complex *, complex * , integer *, complex *, integer *, complex *, complex *, integer * ), ccopy_(integer *, complex *, integer *, complex *, integer *), cswap_(integer *, complex *, integer *, complex *, integer *); static integer kstep; static logical upper; static real ak; static integer kp; extern /* Subroutine */ int xerbla_(char *, integer *); static real akp1; #define IPIV(I) ipiv[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); xerbla_("CHETRI", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Check that the diagonal matrix D is nonsingular. */ if (upper) { /* Upper triangular storage: examine D from bottom to top */ for (*info = *n; *info >= 1; --(*info)) { i__1 = *info + *info * a_dim1; if (IPIV(*info) > 0 && (A(*info,*info).r == 0.f && A(*info,*info).i == 0.f)) { return 0; } /* L10: */ } } else { /* Lower triangular storage: examine D from top to bottom. */ i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { i__2 = *info + *info * a_dim1; if (IPIV(*info) > 0 && (A(*info,*info).r == 0.f && A(*info,*info).i == 0.f)) { return 0; } /* L20: */ } } *info = 0; if (upper) { /* Compute inv(A) from the factorization A = U*D*U'. K is the main loop index, increasing from 1 to N in steps of 1 or 2, depending on the size of the diagonal blocks. */ k = 1; L30: /* If K > N, exit from loop. */ if (k > *n) { goto L50; } if (IPIV(k) > 0) { /* 1 x 1 diagonal block Invert the diagonal block. */ i__1 = k + k * a_dim1; i__2 = k + k * a_dim1; d__1 = 1.f / A(k,k).r; A(k,k).r = d__1, A(k,k).i = 0.f; /* Compute column K of the inverse. */ if (k > 1) { i__1 = k - 1; ccopy_(&i__1, &A(1,k), &c__1, &WORK(1), &c__1); i__1 = k - 1; q__1.r = -1.f, q__1.i = 0.f; chemv_(uplo, &i__1, &q__1, &A(1,1), lda, &WORK(1), &c__1, &c_b2, &A(1,k), &c__1); i__1 = k + k * a_dim1; i__2 = k + k * a_dim1; i__3 = k - 1; cdotc_(&q__2, &i__3, &WORK(1), &c__1, &A(1,k), & c__1); d__1 = q__2.r; q__1.r = A(k,k).r - d__1, q__1.i = A(k,k).i; A(k,k).r = q__1.r, A(k,k).i = q__1.i; } kstep = 1; } else { /* 2 x 2 diagonal block Invert the diagonal block. */ t = c_abs(&A(k,k+1)); i__1 = k + k * a_dim1; ak = A(k,k).r / t; i__1 = k + 1 + (k + 1) * a_dim1; akp1 = A(k+1,k+1).r / t; i__1 = k + (k + 1) * a_dim1; q__1.r = A(k,k+1).r / t, q__1.i = A(k,k+1).i / t; akkp1.r = q__1.r, akkp1.i = q__1.i; d = t * (ak * akp1 - 1.f); i__1 = k + k * a_dim1; d__1 = akp1 / d; A(k,k).r = d__1, A(k,k).i = 0.f; i__1 = k + 1 + (k + 1) * a_dim1; d__1 = ak / d; A(k+1,k+1).r = d__1, A(k+1,k+1).i = 0.f; i__1 = k + (k + 1) * a_dim1; q__2.r = -(doublereal)akkp1.r, q__2.i = -(doublereal)akkp1.i; q__1.r = q__2.r / d, q__1.i = q__2.i / d; A(k,k+1).r = q__1.r, A(k,k+1).i = q__1.i; /* Compute columns K and K+1 of the inverse. */ if (k > 1) { i__1 = k - 1; ccopy_(&i__1, &A(1,k), &c__1, &WORK(1), &c__1); i__1 = k - 1; q__1.r = -1.f, q__1.i = 0.f; chemv_(uplo, &i__1, &q__1, &A(1,1), lda, &WORK(1), &c__1, &c_b2, &A(1,k), &c__1); i__1 = k + k * a_dim1; i__2 = k + k * a_dim1; i__3 = k - 1; cdotc_(&q__2, &i__3, &WORK(1), &c__1, &A(1,k), & c__1); d__1 = q__2.r; q__1.r = A(k,k).r - d__1, q__1.i = A(k,k).i; A(k,k).r = q__1.r, A(k,k).i = q__1.i; i__1 = k + (k + 1) * a_dim1; i__2 = k + (k + 1) * a_dim1; i__3 = k - 1; cdotc_(&q__2, &i__3, &A(1,k), &c__1, &A(1,k+1), &c__1); q__1.r = A(k,k+1).r - q__2.r, q__1.i = A(k,k+1).i - q__2.i; A(k,k+1).r = q__1.r, A(k,k+1).i = q__1.i; i__1 = k - 1; ccopy_(&i__1, &A(1,k+1), &c__1, &WORK(1), & c__1); i__1 = k - 1; q__1.r = -1.f, q__1.i = 0.f; chemv_(uplo, &i__1, &q__1, &A(1,1), lda, &WORK(1), &c__1, &c_b2, &A(1,k+1), &c__1); i__1 = k + 1 + (k + 1) * a_dim1; i__2 = k + 1 + (k + 1) * a_dim1; i__3 = k - 1; cdotc_(&q__2, &i__3, &WORK(1), &c__1, &A(1,k+1) , &c__1); d__1 = q__2.r; q__1.r = A(k+1,k+1).r - d__1, q__1.i = A(k+1,k+1).i; A(k+1,k+1).r = q__1.r, A(k+1,k+1).i = q__1.i; } kstep = 2; } kp = (i__1 = IPIV(k), abs(i__1)); if (kp != k) { /* Interchange rows and columns K and KP in the leading submatrix A(1:k+1,1:k+1) */ i__1 = kp - 1; cswap_(&i__1, &A(1,k), &c__1, &A(1,kp), & c__1); i__1 = k - 1; for (j = kp + 1; j <= k-1; ++j) { r_cnjg(&q__1, &A(j,k)); temp.r = q__1.r, temp.i = q__1.i; i__2 = j + k * a_dim1; r_cnjg(&q__1, &A(kp,j)); A(j,k).r = q__1.r, A(j,k).i = q__1.i; i__2 = kp + j * a_dim1; A(kp,j).r = temp.r, A(kp,j).i = temp.i; /* L40: */ } i__1 = kp + k * a_dim1; r_cnjg(&q__1, &A(kp,k)); A(kp,k).r = q__1.r, A(kp,k).i = q__1.i; i__1 = k + k * a_dim1; temp.r = A(k,k).r, temp.i = A(k,k).i; i__1 = k + k * a_dim1; i__2 = kp + kp * a_dim1; A(k,k).r = A(kp,kp).r, A(k,k).i = A(kp,kp).i; i__1 = kp + kp * a_dim1; A(kp,kp).r = temp.r, A(kp,kp).i = temp.i; if (kstep == 2) { i__1 = k + (k + 1) * a_dim1; temp.r = A(k,k+1).r, temp.i = A(k,k+1).i; i__1 = k + (k + 1) * a_dim1; i__2 = kp + (k + 1) * a_dim1; A(k,k+1).r = A(kp,k+1).r, A(k,k+1).i = A(kp,k+1).i; i__1 = kp + (k + 1) * a_dim1; A(kp,k+1).r = temp.r, A(kp,k+1).i = temp.i; } } k += kstep; goto L30; L50: ; } else { /* Compute inv(A) from the factorization A = L*D*L'. K is the main loop index, increasing from 1 to N in steps of 1 or 2, depending on the size of the diagonal blocks. */ k = *n; L60: /* If K < 1, exit from loop. */ if (k < 1) { goto L80; } if (IPIV(k) > 0) { /* 1 x 1 diagonal block Invert the diagonal block. */ i__1 = k + k * a_dim1; i__2 = k + k * a_dim1; d__1 = 1.f / A(k,k).r; A(k,k).r = d__1, A(k,k).i = 0.f; /* Compute column K of the inverse. */ if (k < *n) { i__1 = *n - k; ccopy_(&i__1, &A(k+1,k), &c__1, &WORK(1), &c__1); i__1 = *n - k; q__1.r = -1.f, q__1.i = 0.f; chemv_(uplo, &i__1, &q__1, &A(k+1,k+1), lda, &WORK(1), &c__1, &c_b2, &A(k+1,k), &c__1); i__1 = k + k * a_dim1; i__2 = k + k * a_dim1; i__3 = *n - k; cdotc_(&q__2, &i__3, &WORK(1), &c__1, &A(k+1,k), &c__1); d__1 = q__2.r; q__1.r = A(k,k).r - d__1, q__1.i = A(k,k).i; A(k,k).r = q__1.r, A(k,k).i = q__1.i; } kstep = 1; } else { /* 2 x 2 diagonal block Invert the diagonal block. */ t = c_abs(&A(k,k-1)); i__1 = k - 1 + (k - 1) * a_dim1; ak = A(k-1,k-1).r / t; i__1 = k + k * a_dim1; akp1 = A(k,k).r / t; i__1 = k + (k - 1) * a_dim1; q__1.r = A(k,k-1).r / t, q__1.i = A(k,k-1).i / t; akkp1.r = q__1.r, akkp1.i = q__1.i; d = t * (ak * akp1 - 1.f); i__1 = k - 1 + (k - 1) * a_dim1; d__1 = akp1 / d; A(k-1,k-1).r = d__1, A(k-1,k-1).i = 0.f; i__1 = k + k * a_dim1; d__1 = ak / d; A(k,k).r = d__1, A(k,k).i = 0.f; i__1 = k + (k - 1) * a_dim1; q__2.r = -(doublereal)akkp1.r, q__2.i = -(doublereal)akkp1.i; q__1.r = q__2.r / d, q__1.i = q__2.i / d; A(k,k-1).r = q__1.r, A(k,k-1).i = q__1.i; /* Compute columns K-1 and K of the inverse. */ if (k < *n) { i__1 = *n - k; ccopy_(&i__1, &A(k+1,k), &c__1, &WORK(1), &c__1); i__1 = *n - k; q__1.r = -1.f, q__1.i = 0.f; chemv_(uplo, &i__1, &q__1, &A(k+1,k+1), lda, &WORK(1), &c__1, &c_b2, &A(k+1,k), &c__1); i__1 = k + k * a_dim1; i__2 = k + k * a_dim1; i__3 = *n - k; cdotc_(&q__2, &i__3, &WORK(1), &c__1, &A(k+1,k), &c__1); d__1 = q__2.r; q__1.r = A(k,k).r - d__1, q__1.i = A(k,k).i; A(k,k).r = q__1.r, A(k,k).i = q__1.i; i__1 = k + (k - 1) * a_dim1; i__2 = k + (k - 1) * a_dim1; i__3 = *n - k; cdotc_(&q__2, &i__3, &A(k+1,k), &c__1, &A(k+1,k-1), &c__1); q__1.r = A(k,k-1).r - q__2.r, q__1.i = A(k,k-1).i - q__2.i; A(k,k-1).r = q__1.r, A(k,k-1).i = q__1.i; i__1 = *n - k; ccopy_(&i__1, &A(k+1,k-1), &c__1, &WORK(1), & c__1); i__1 = *n - k; q__1.r = -1.f, q__1.i = 0.f; chemv_(uplo, &i__1, &q__1, &A(k+1,k+1), lda, &WORK(1), &c__1, &c_b2, &A(k+1,k-1), &c__1); i__1 = k - 1 + (k - 1) * a_dim1; i__2 = k - 1 + (k - 1) * a_dim1; i__3 = *n - k; cdotc_(&q__2, &i__3, &WORK(1), &c__1, &A(k+1,k-1), &c__1); d__1 = q__2.r; q__1.r = A(k-1,k-1).r - d__1, q__1.i = A(k-1,k-1).i; A(k-1,k-1).r = q__1.r, A(k-1,k-1).i = q__1.i; } kstep = 2; } kp = (i__1 = IPIV(k), abs(i__1)); if (kp != k) { /* Interchange rows and columns K and KP in the trailing submatrix A(k-1:n,k-1:n) */ if (kp < *n) { i__1 = *n - kp; cswap_(&i__1, &A(kp+1,k), &c__1, &A(kp+1,kp), &c__1); } i__1 = kp - 1; for (j = k + 1; j <= kp-1; ++j) { r_cnjg(&q__1, &A(j,k)); temp.r = q__1.r, temp.i = q__1.i; i__2 = j + k * a_dim1; r_cnjg(&q__1, &A(kp,j)); A(j,k).r = q__1.r, A(j,k).i = q__1.i; i__2 = kp + j * a_dim1; A(kp,j).r = temp.r, A(kp,j).i = temp.i; /* L70: */ } i__1 = kp + k * a_dim1; r_cnjg(&q__1, &A(kp,k)); A(kp,k).r = q__1.r, A(kp,k).i = q__1.i; i__1 = k + k * a_dim1; temp.r = A(k,k).r, temp.i = A(k,k).i; i__1 = k + k * a_dim1; i__2 = kp + kp * a_dim1; A(k,k).r = A(kp,kp).r, A(k,k).i = A(kp,kp).i; i__1 = kp + kp * a_dim1; A(kp,kp).r = temp.r, A(kp,kp).i = temp.i; if (kstep == 2) { i__1 = k + (k - 1) * a_dim1; temp.r = A(k,k-1).r, temp.i = A(k,k-1).i; i__1 = k + (k - 1) * a_dim1; i__2 = kp + (k - 1) * a_dim1; A(k,k-1).r = A(kp,k-1).r, A(k,k-1).i = A(kp,k-1).i; i__1 = kp + (k - 1) * a_dim1; A(kp,k-1).r = temp.r, A(kp,k-1).i = temp.i; } } k -= kstep; goto L60; L80: ; } return 0; /* End of CHETRI */ } /* chetri_ */
/* Subroutine */ int chptri_(char *uplo, integer *n, complex *ap, integer * ipiv, complex *work, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CHPTRI computes the inverse of a complex Hermitian indefinite matrix A in packed storage using the factorization A = U*D*U**H or A = L*D*L**H computed by CHPTRF. Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the details of the factorization are stored as an upper or lower triangular matrix. = 'U': Upper triangular, form is A = U*D*U**H; = 'L': Lower triangular, form is A = L*D*L**H. N (input) INTEGER The order of the matrix A. N >= 0. AP (input/output) COMPLEX array, dimension (N*(N+1)/2) On entry, the block diagonal matrix D and the multipliers used to obtain the factor U or L as computed by CHPTRF, stored as a packed triangular matrix. On exit, if INFO = 0, the (Hermitian) inverse of the original matrix, stored as a packed triangular matrix. The j-th column of inv(A) is stored in the array AP as follows: if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j; if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n. IPIV (input) INTEGER array, dimension (N) Details of the interchanges and the block structure of D as determined by CHPTRF. WORK (workspace) COMPLEX array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its inverse could not be computed. ===================================================================== Test the input parameters. Parameter adjustments Function Body */ /* Table of constant values */ static complex c_b2 = {0.f,0.f}; static integer c__1 = 1; /* System generated locals */ integer i__1, i__2, i__3; doublereal d__1; complex q__1, q__2; /* Builtin functions */ double c_abs(complex *); void r_cnjg(complex *, complex *); /* Local variables */ static complex temp, akkp1; static real d; static integer j, k; static real t; extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer *, complex *, integer *); extern logical lsame_(char *, char *); extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, complex *, integer *), chpmv_(char *, integer *, complex *, complex *, complex *, integer *, complex *, complex *, integer *), cswap_(integer *, complex *, integer *, complex *, integer *); static integer kstep; static logical upper; static real ak; static integer kc, kp, kx; extern /* Subroutine */ int xerbla_(char *, integer *); static integer kcnext, kpc, npp; static real akp1; #define WORK(I) work[(I)-1] #define IPIV(I) ipiv[(I)-1] #define AP(I) ap[(I)-1] *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } if (*info != 0) { i__1 = -(*info); xerbla_("CHPTRI", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Check that the diagonal matrix D is nonsingular. */ if (upper) { /* Upper triangular storage: examine D from bottom to top */ kp = *n * (*n + 1) / 2; for (*info = *n; *info >= 1; --(*info)) { i__1 = kp; if (IPIV(*info) > 0 && (AP(kp).r == 0.f && AP(kp).i == 0.f)) { return 0; } kp -= *info; /* L10: */ } } else { /* Lower triangular storage: examine D from top to bottom. */ kp = 1; i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { i__2 = kp; if (IPIV(*info) > 0 && (AP(kp).r == 0.f && AP(kp).i == 0.f)) { return 0; } kp = kp + *n - *info + 1; /* L20: */ } } *info = 0; if (upper) { /* Compute inv(A) from the factorization A = U*D*U'. K is the main loop index, increasing from 1 to N in steps of 1 or 2, depending on the size of the diagonal blocks. */ k = 1; kc = 1; L30: /* If K > N, exit from loop. */ if (k > *n) { goto L50; } kcnext = kc + k; if (IPIV(k) > 0) { /* 1 x 1 diagonal block Invert the diagonal block. */ i__1 = kc + k - 1; i__2 = kc + k - 1; d__1 = 1.f / AP(kc+k-1).r; AP(kc+k-1).r = d__1, AP(kc+k-1).i = 0.f; /* Compute column K of the inverse. */ if (k > 1) { i__1 = k - 1; ccopy_(&i__1, &AP(kc), &c__1, &WORK(1), &c__1); i__1 = k - 1; q__1.r = -1.f, q__1.i = 0.f; chpmv_(uplo, &i__1, &q__1, &AP(1), &WORK(1), &c__1, &c_b2, & AP(kc), &c__1); i__1 = kc + k - 1; i__2 = kc + k - 1; i__3 = k - 1; cdotc_(&q__2, &i__3, &WORK(1), &c__1, &AP(kc), &c__1); d__1 = q__2.r; q__1.r = AP(kc+k-1).r - d__1, q__1.i = AP(kc+k-1).i; AP(kc+k-1).r = q__1.r, AP(kc+k-1).i = q__1.i; } kstep = 1; } else { /* 2 x 2 diagonal block Invert the diagonal block. */ t = c_abs(&AP(kcnext + k - 1)); i__1 = kc + k - 1; ak = AP(kc+k-1).r / t; i__1 = kcnext + k; akp1 = AP(kcnext+k).r / t; i__1 = kcnext + k - 1; q__1.r = AP(kcnext+k-1).r / t, q__1.i = AP(kcnext+k-1).i / t; akkp1.r = q__1.r, akkp1.i = q__1.i; d = t * (ak * akp1 - 1.f); i__1 = kc + k - 1; d__1 = akp1 / d; AP(kc+k-1).r = d__1, AP(kc+k-1).i = 0.f; i__1 = kcnext + k; d__1 = ak / d; AP(kcnext+k).r = d__1, AP(kcnext+k).i = 0.f; i__1 = kcnext + k - 1; q__2.r = -(doublereal)akkp1.r, q__2.i = -(doublereal)akkp1.i; q__1.r = q__2.r / d, q__1.i = q__2.i / d; AP(kcnext+k-1).r = q__1.r, AP(kcnext+k-1).i = q__1.i; /* Compute columns K and K+1 of the inverse. */ if (k > 1) { i__1 = k - 1; ccopy_(&i__1, &AP(kc), &c__1, &WORK(1), &c__1); i__1 = k - 1; q__1.r = -1.f, q__1.i = 0.f; chpmv_(uplo, &i__1, &q__1, &AP(1), &WORK(1), &c__1, &c_b2, & AP(kc), &c__1); i__1 = kc + k - 1; i__2 = kc + k - 1; i__3 = k - 1; cdotc_(&q__2, &i__3, &WORK(1), &c__1, &AP(kc), &c__1); d__1 = q__2.r; q__1.r = AP(kc+k-1).r - d__1, q__1.i = AP(kc+k-1).i; AP(kc+k-1).r = q__1.r, AP(kc+k-1).i = q__1.i; i__1 = kcnext + k - 1; i__2 = kcnext + k - 1; i__3 = k - 1; cdotc_(&q__2, &i__3, &AP(kc), &c__1, &AP(kcnext), &c__1); q__1.r = AP(kcnext+k-1).r - q__2.r, q__1.i = AP(kcnext+k-1).i - q__2.i; AP(kcnext+k-1).r = q__1.r, AP(kcnext+k-1).i = q__1.i; i__1 = k - 1; ccopy_(&i__1, &AP(kcnext), &c__1, &WORK(1), &c__1); i__1 = k - 1; q__1.r = -1.f, q__1.i = 0.f; chpmv_(uplo, &i__1, &q__1, &AP(1), &WORK(1), &c__1, &c_b2, & AP(kcnext), &c__1); i__1 = kcnext + k; i__2 = kcnext + k; i__3 = k - 1; cdotc_(&q__2, &i__3, &WORK(1), &c__1, &AP(kcnext), &c__1); d__1 = q__2.r; q__1.r = AP(kcnext+k).r - d__1, q__1.i = AP(kcnext+k).i; AP(kcnext+k).r = q__1.r, AP(kcnext+k).i = q__1.i; } kstep = 2; kcnext = kcnext + k + 1; } kp = (i__1 = IPIV(k), abs(i__1)); if (kp != k) { /* Interchange rows and columns K and KP in the leading submatrix A(1:k+1,1:k+1) */ kpc = (kp - 1) * kp / 2 + 1; i__1 = kp - 1; cswap_(&i__1, &AP(kc), &c__1, &AP(kpc), &c__1); kx = kpc + kp - 1; i__1 = k - 1; for (j = kp + 1; j <= k-1; ++j) { kx = kx + j - 1; r_cnjg(&q__1, &AP(kc + j - 1)); temp.r = q__1.r, temp.i = q__1.i; i__2 = kc + j - 1; r_cnjg(&q__1, &AP(kx)); AP(kc+j-1).r = q__1.r, AP(kc+j-1).i = q__1.i; i__2 = kx; AP(kx).r = temp.r, AP(kx).i = temp.i; /* L40: */ } i__1 = kc + kp - 1; r_cnjg(&q__1, &AP(kc + kp - 1)); AP(kc+kp-1).r = q__1.r, AP(kc+kp-1).i = q__1.i; i__1 = kc + k - 1; temp.r = AP(kc+k-1).r, temp.i = AP(kc+k-1).i; i__1 = kc + k - 1; i__2 = kpc + kp - 1; AP(kc+k-1).r = AP(kpc+kp-1).r, AP(kc+k-1).i = AP(kpc+kp-1).i; i__1 = kpc + kp - 1; AP(kpc+kp-1).r = temp.r, AP(kpc+kp-1).i = temp.i; if (kstep == 2) { i__1 = kc + k + k - 1; temp.r = AP(kc+k+k-1).r, temp.i = AP(kc+k+k-1).i; i__1 = kc + k + k - 1; i__2 = kc + k + kp - 1; AP(kc+k+k-1).r = AP(kc+k+kp-1).r, AP(kc+k+k-1).i = AP(kc+k+kp-1).i; i__1 = kc + k + kp - 1; AP(kc+k+kp-1).r = temp.r, AP(kc+k+kp-1).i = temp.i; } } k += kstep; kc = kcnext; goto L30; L50: ; } else { /* Compute inv(A) from the factorization A = L*D*L'. K is the main loop index, increasing from 1 to N in steps of 1 or 2, depending on the size of the diagonal blocks. */ npp = *n * (*n + 1) / 2; k = *n; kc = npp; L60: /* If K < 1, exit from loop. */ if (k < 1) { goto L80; } kcnext = kc - (*n - k + 2); if (IPIV(k) > 0) { /* 1 x 1 diagonal block Invert the diagonal block. */ i__1 = kc; i__2 = kc; d__1 = 1.f / AP(kc).r; AP(kc).r = d__1, AP(kc).i = 0.f; /* Compute column K of the inverse. */ if (k < *n) { i__1 = *n - k; ccopy_(&i__1, &AP(kc + 1), &c__1, &WORK(1), &c__1); i__1 = *n - k; q__1.r = -1.f, q__1.i = 0.f; chpmv_(uplo, &i__1, &q__1, &AP(kc + *n - k + 1), &WORK(1), & c__1, &c_b2, &AP(kc + 1), &c__1); i__1 = kc; i__2 = kc; i__3 = *n - k; cdotc_(&q__2, &i__3, &WORK(1), &c__1, &AP(kc + 1), &c__1); d__1 = q__2.r; q__1.r = AP(kc).r - d__1, q__1.i = AP(kc).i; AP(kc).r = q__1.r, AP(kc).i = q__1.i; } kstep = 1; } else { /* 2 x 2 diagonal block Invert the diagonal block. */ t = c_abs(&AP(kcnext + 1)); i__1 = kcnext; ak = AP(kcnext).r / t; i__1 = kc; akp1 = AP(kc).r / t; i__1 = kcnext + 1; q__1.r = AP(kcnext+1).r / t, q__1.i = AP(kcnext+1).i / t; akkp1.r = q__1.r, akkp1.i = q__1.i; d = t * (ak * akp1 - 1.f); i__1 = kcnext; d__1 = akp1 / d; AP(kcnext).r = d__1, AP(kcnext).i = 0.f; i__1 = kc; d__1 = ak / d; AP(kc).r = d__1, AP(kc).i = 0.f; i__1 = kcnext + 1; q__2.r = -(doublereal)akkp1.r, q__2.i = -(doublereal)akkp1.i; q__1.r = q__2.r / d, q__1.i = q__2.i / d; AP(kcnext+1).r = q__1.r, AP(kcnext+1).i = q__1.i; /* Compute columns K-1 and K of the inverse. */ if (k < *n) { i__1 = *n - k; ccopy_(&i__1, &AP(kc + 1), &c__1, &WORK(1), &c__1); i__1 = *n - k; q__1.r = -1.f, q__1.i = 0.f; chpmv_(uplo, &i__1, &q__1, &AP(kc + (*n - k + 1)), &WORK(1), & c__1, &c_b2, &AP(kc + 1), &c__1); i__1 = kc; i__2 = kc; i__3 = *n - k; cdotc_(&q__2, &i__3, &WORK(1), &c__1, &AP(kc + 1), &c__1); d__1 = q__2.r; q__1.r = AP(kc).r - d__1, q__1.i = AP(kc).i; AP(kc).r = q__1.r, AP(kc).i = q__1.i; i__1 = kcnext + 1; i__2 = kcnext + 1; i__3 = *n - k; cdotc_(&q__2, &i__3, &AP(kc + 1), &c__1, &AP(kcnext + 2), & c__1); q__1.r = AP(kcnext+1).r - q__2.r, q__1.i = AP(kcnext+1).i - q__2.i; AP(kcnext+1).r = q__1.r, AP(kcnext+1).i = q__1.i; i__1 = *n - k; ccopy_(&i__1, &AP(kcnext + 2), &c__1, &WORK(1), &c__1); i__1 = *n - k; q__1.r = -1.f, q__1.i = 0.f; chpmv_(uplo, &i__1, &q__1, &AP(kc + (*n - k + 1)), &WORK(1), & c__1, &c_b2, &AP(kcnext + 2), &c__1); i__1 = kcnext; i__2 = kcnext; i__3 = *n - k; cdotc_(&q__2, &i__3, &WORK(1), &c__1, &AP(kcnext + 2), &c__1); d__1 = q__2.r; q__1.r = AP(kcnext).r - d__1, q__1.i = AP(kcnext).i; AP(kcnext).r = q__1.r, AP(kcnext).i = q__1.i; } kstep = 2; kcnext -= *n - k + 3; } kp = (i__1 = IPIV(k), abs(i__1)); if (kp != k) { /* Interchange rows and columns K and KP in the trailing submatrix A(k-1:n,k-1:n) */ kpc = npp - (*n - kp + 1) * (*n - kp + 2) / 2 + 1; if (kp < *n) { i__1 = *n - kp; cswap_(&i__1, &AP(kc + kp - k + 1), &c__1, &AP(kpc + 1), & c__1); } kx = kc + kp - k; i__1 = kp - 1; for (j = k + 1; j <= kp-1; ++j) { kx = kx + *n - j + 1; r_cnjg(&q__1, &AP(kc + j - k)); temp.r = q__1.r, temp.i = q__1.i; i__2 = kc + j - k; r_cnjg(&q__1, &AP(kx)); AP(kc+j-k).r = q__1.r, AP(kc+j-k).i = q__1.i; i__2 = kx; AP(kx).r = temp.r, AP(kx).i = temp.i; /* L70: */ } i__1 = kc + kp - k; r_cnjg(&q__1, &AP(kc + kp - k)); AP(kc+kp-k).r = q__1.r, AP(kc+kp-k).i = q__1.i; i__1 = kc; temp.r = AP(kc).r, temp.i = AP(kc).i; i__1 = kc; i__2 = kpc; AP(kc).r = AP(kpc).r, AP(kc).i = AP(kpc).i; i__1 = kpc; AP(kpc).r = temp.r, AP(kpc).i = temp.i; if (kstep == 2) { i__1 = kc - *n + k - 1; temp.r = AP(kc-*n+k-1).r, temp.i = AP(kc-*n+k-1).i; i__1 = kc - *n + k - 1; i__2 = kc - *n + kp - 1; AP(kc-*n+k-1).r = AP(kc-*n+kp-1).r, AP(kc-*n+k-1).i = AP(kc-*n+kp-1).i; i__1 = kc - *n + kp - 1; AP(kc-*n+kp-1).r = temp.r, AP(kc-*n+kp-1).i = temp.i; } } k -= kstep; kc = kcnext; goto L60; L80: ; } return 0; /* End of CHPTRI */ } /* chptri_ */
void setup (int N, const Parameter ¶m, Array<double, 1> &WR, Array<double,2> &ev, Array<double,2> &evInv) { int Nm1 = N; int i; Array<double, 1> x; Array<double, 2> D; Array<double, 1> r; Array<double, 2> Dsec; Array<double, 1> XX; Array<double, 1> YY; Array<double, 2> A(N,N); Array<double, 2> B(N,N); Array<int, 1> IPIV(Nm1); char BALANC[1]; char JOBVL[1]; char JOBVR[1]; char SENSE[1]; int LDA; int LDVL; int LDVR; int NRHS; int LDB; int INFO; //resize output arrays WR.resize(N); ev.resize(N, N); evInv.resize(N, N); // parameters for DGEEVX Array<double, 1> WI(Nm1); // WR(Nm1), // The real and imaginary part of the eig.values Array<double, 2> VL(N, N); Array<double, 2> VR(Nm1,Nm1); //VR(Nm1,Nm1); // The left and rigth eigenvectors int ILO, IHI; // Info on the balanced output matrix Array<double, 1> SCALE(Nm1); // Scaling factors applied for balancing double ABNRM; // 1-Norm of the balanced matrix Array<double, 1> RCONDE(Nm1); // the reciprocal cond. numb of the respective eig.val Array<double, 1> RCONDV(Nm1); // the reciprocal cond. numb of the respective eig.vec int LWORK = (N+1)*(N+7); // Depending on SENSE Array<double, 1> WORK(LWORK); Array<int, 1> IWORK(2*(N+1)-2); // Compute the Chebyshev differensiation matrix and D*D // cheb(N, x, D); cheb(N, x, D); Dsec.resize(D.shape()); MatrixMatrixMultiply(D, D, Dsec); // Compute the 1. and 2. derivatives of the transformations XYmat(N, param, XX, YY, r); // Set up the full timepropagation matrix A // dy/dt = - i A y Range range(1, N); //Dsec and D have range 0, N+1. //We don't want the edge points in A A = XX(tensor::i) * Dsec(range, range) + YY(tensor::i) * D(range, range); //Transpose A for (int i=0; i<A.extent(0); i++) { for (int j=0; j<i; j++) { double t = A(i,j); A(i,j) = A(j, i); A(j,i) = t; } } // Add radialpart of non-time dependent potential here /* 2D radial for (int i=0; i<A.extent(0); i++) { A(i, i) += 0.25 / (r(i)*r(i)); } */ // Compute eigen decomposition BALANC[0] ='B'; JOBVL[0] ='V'; JOBVR[0] ='V'; SENSE[0] ='B'; LDA = Nm1; LDVL = Nm1; LDVR = Nm1; FORTRAN_NAME(dgeevx)(BALANC, JOBVL, JOBVR, SENSE, &Nm1, A.data(), &LDA, WR.data(), WI.data(), VL.data(), &LDVL, VR.data(), &LDVR, &ILO, &IHI, SCALE.data(), &ABNRM, RCONDE.data(), RCONDV.data(), WORK.data(), &LWORK, IWORK.data(), &INFO); // Compute the inverse of the eigen vector matrix NRHS = Nm1; evInv = VR ;// VL; LDB = LDA; B = 0.0; for (i=0; i<Nm1; i++) B(i,i) = 1.0; FORTRAN_NAME(dgesv)(&Nm1, &NRHS, evInv.data(), &LDA, IPIV.data(), B.data(), &LDB, &INFO); ev = VR(tensor::j, tensor::i); //Transpose evInv = B(tensor::j, tensor::i); //Transpose //cout << "Eigenvectors (right): " << ev << endl; //cout << "Eigenvectors (inv): " << evInv << endl; //printf(" Done inverse, INFO = %d \n", INFO); } // done
/* Subroutine */ int ssptri_(char *uplo, integer *n, real *ap, integer *ipiv, real *work, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= SSPTRI computes the inverse of a real symmetric indefinite matrix A in packed storage using the factorization A = U*D*U**T or A = L*D*L**T computed by SSPTRF. Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the details of the factorization are stored as an upper or lower triangular matrix. = 'U': Upper triangular, form is A = U*D*U**T; = 'L': Lower triangular, form is A = L*D*L**T. N (input) INTEGER The order of the matrix A. N >= 0. AP (input/output) REAL array, dimension (N*(N+1)/2) On entry, the block diagonal matrix D and the multipliers used to obtain the factor U or L as computed by SSPTRF, stored as a packed triangular matrix. On exit, if INFO = 0, the (symmetric) inverse of the original matrix, stored as a packed triangular matrix. The j-th column of inv(A) is stored in the array AP as follows: if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j; if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n. IPIV (input) INTEGER array, dimension (N) Details of the interchanges and the block structure of D as determined by SSPTRF. WORK (workspace) REAL array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its inverse could not be computed. ===================================================================== Test the input parameters. Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; static real c_b11 = -1.f; static real c_b13 = 0.f; /* System generated locals */ integer i__1; real r__1; /* Local variables */ static real temp; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); static real akkp1, d; static integer j, k; static real t; extern logical lsame_(char *, char *); static integer kstep; static logical upper; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer * ), sspmv_(char *, integer *, real *, real *, real *, integer *, real *, real *, integer *); static real ak; static integer kc, kp, kx; extern /* Subroutine */ int xerbla_(char *, integer *); static integer kcnext, kpc, npp; static real akp1; #define WORK(I) work[(I)-1] #define IPIV(I) ipiv[(I)-1] #define AP(I) ap[(I)-1] *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } if (*info != 0) { i__1 = -(*info); xerbla_("SSPTRI", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Check that the diagonal matrix D is nonsingular. */ if (upper) { /* Upper triangular storage: examine D from bottom to top */ kp = *n * (*n + 1) / 2; for (*info = *n; *info >= 1; --(*info)) { if (IPIV(*info) > 0 && AP(kp) == 0.f) { return 0; } kp -= *info; /* L10: */ } } else { /* Lower triangular storage: examine D from top to bottom. */ kp = 1; i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { if (IPIV(*info) > 0 && AP(kp) == 0.f) { return 0; } kp = kp + *n - *info + 1; /* L20: */ } } *info = 0; if (upper) { /* Compute inv(A) from the factorization A = U*D*U'. K is the main loop index, increasing from 1 to N in steps of 1 or 2, depending on the size of the diagonal blocks. */ k = 1; kc = 1; L30: /* If K > N, exit from loop. */ if (k > *n) { goto L50; } kcnext = kc + k; if (IPIV(k) > 0) { /* 1 x 1 diagonal block Invert the diagonal block. */ AP(kc + k - 1) = 1.f / AP(kc + k - 1); /* Compute column K of the inverse. */ if (k > 1) { i__1 = k - 1; scopy_(&i__1, &AP(kc), &c__1, &WORK(1), &c__1); i__1 = k - 1; sspmv_(uplo, &i__1, &c_b11, &AP(1), &WORK(1), &c__1, &c_b13, & AP(kc), &c__1); i__1 = k - 1; AP(kc + k - 1) -= sdot_(&i__1, &WORK(1), &c__1, &AP(kc), & c__1); } kstep = 1; } else { /* 2 x 2 diagonal block Invert the diagonal block. */ t = (r__1 = AP(kcnext + k - 1), dabs(r__1)); ak = AP(kc + k - 1) / t; akp1 = AP(kcnext + k) / t; akkp1 = AP(kcnext + k - 1) / t; d = t * (ak * akp1 - 1.f); AP(kc + k - 1) = akp1 / d; AP(kcnext + k) = ak / d; AP(kcnext + k - 1) = -(doublereal)akkp1 / d; /* Compute columns K and K+1 of the inverse. */ if (k > 1) { i__1 = k - 1; scopy_(&i__1, &AP(kc), &c__1, &WORK(1), &c__1); i__1 = k - 1; sspmv_(uplo, &i__1, &c_b11, &AP(1), &WORK(1), &c__1, &c_b13, & AP(kc), &c__1); i__1 = k - 1; AP(kc + k - 1) -= sdot_(&i__1, &WORK(1), &c__1, &AP(kc), & c__1); i__1 = k - 1; AP(kcnext + k - 1) -= sdot_(&i__1, &AP(kc), &c__1, &AP(kcnext) , &c__1); i__1 = k - 1; scopy_(&i__1, &AP(kcnext), &c__1, &WORK(1), &c__1); i__1 = k - 1; sspmv_(uplo, &i__1, &c_b11, &AP(1), &WORK(1), &c__1, &c_b13, & AP(kcnext), &c__1); i__1 = k - 1; AP(kcnext + k) -= sdot_(&i__1, &WORK(1), &c__1, &AP(kcnext), & c__1); } kstep = 2; kcnext = kcnext + k + 1; } kp = (i__1 = IPIV(k), abs(i__1)); if (kp != k) { /* Interchange rows and columns K and KP in the leading submatrix A(1:k+1,1:k+1) */ kpc = (kp - 1) * kp / 2 + 1; i__1 = kp - 1; sswap_(&i__1, &AP(kc), &c__1, &AP(kpc), &c__1); kx = kpc + kp - 1; i__1 = k - 1; for (j = kp + 1; j <= k-1; ++j) { kx = kx + j - 1; temp = AP(kc + j - 1); AP(kc + j - 1) = AP(kx); AP(kx) = temp; /* L40: */ } temp = AP(kc + k - 1); AP(kc + k - 1) = AP(kpc + kp - 1); AP(kpc + kp - 1) = temp; if (kstep == 2) { temp = AP(kc + k + k - 1); AP(kc + k + k - 1) = AP(kc + k + kp - 1); AP(kc + k + kp - 1) = temp; } } k += kstep; kc = kcnext; goto L30; L50: ; } else { /* Compute inv(A) from the factorization A = L*D*L'. K is the main loop index, increasing from 1 to N in steps of 1 or 2, depending on the size of the diagonal blocks. */ npp = *n * (*n + 1) / 2; k = *n; kc = npp; L60: /* If K < 1, exit from loop. */ if (k < 1) { goto L80; } kcnext = kc - (*n - k + 2); if (IPIV(k) > 0) { /* 1 x 1 diagonal block Invert the diagonal block. */ AP(kc) = 1.f / AP(kc); /* Compute column K of the inverse. */ if (k < *n) { i__1 = *n - k; scopy_(&i__1, &AP(kc + 1), &c__1, &WORK(1), &c__1); i__1 = *n - k; sspmv_(uplo, &i__1, &c_b11, &AP(kc + *n - k + 1), &WORK(1), & c__1, &c_b13, &AP(kc + 1), &c__1); i__1 = *n - k; AP(kc) -= sdot_(&i__1, &WORK(1), &c__1, &AP(kc + 1), &c__1); } kstep = 1; } else { /* 2 x 2 diagonal block Invert the diagonal block. */ t = (r__1 = AP(kcnext + 1), dabs(r__1)); ak = AP(kcnext) / t; akp1 = AP(kc) / t; akkp1 = AP(kcnext + 1) / t; d = t * (ak * akp1 - 1.f); AP(kcnext) = akp1 / d; AP(kc) = ak / d; AP(kcnext + 1) = -(doublereal)akkp1 / d; /* Compute columns K-1 and K of the inverse. */ if (k < *n) { i__1 = *n - k; scopy_(&i__1, &AP(kc + 1), &c__1, &WORK(1), &c__1); i__1 = *n - k; sspmv_(uplo, &i__1, &c_b11, &AP(kc + (*n - k + 1)), &WORK(1), &c__1, &c_b13, &AP(kc + 1), &c__1); i__1 = *n - k; AP(kc) -= sdot_(&i__1, &WORK(1), &c__1, &AP(kc + 1), &c__1); i__1 = *n - k; AP(kcnext + 1) -= sdot_(&i__1, &AP(kc + 1), &c__1, &AP(kcnext + 2), &c__1); i__1 = *n - k; scopy_(&i__1, &AP(kcnext + 2), &c__1, &WORK(1), &c__1); i__1 = *n - k; sspmv_(uplo, &i__1, &c_b11, &AP(kc + (*n - k + 1)), &WORK(1), &c__1, &c_b13, &AP(kcnext + 2), &c__1); i__1 = *n - k; AP(kcnext) -= sdot_(&i__1, &WORK(1), &c__1, &AP(kcnext + 2), & c__1); } kstep = 2; kcnext -= *n - k + 3; } kp = (i__1 = IPIV(k), abs(i__1)); if (kp != k) { /* Interchange rows and columns K and KP in the trailing submatrix A(k-1:n,k-1:n) */ kpc = npp - (*n - kp + 1) * (*n - kp + 2) / 2 + 1; if (kp < *n) { i__1 = *n - kp; sswap_(&i__1, &AP(kc + kp - k + 1), &c__1, &AP(kpc + 1), & c__1); } kx = kc + kp - k; i__1 = kp - 1; for (j = k + 1; j <= kp-1; ++j) { kx = kx + *n - j + 1; temp = AP(kc + j - k); AP(kc + j - k) = AP(kx); AP(kx) = temp; /* L70: */ } temp = AP(kc); AP(kc) = AP(kpc); AP(kpc) = temp; if (kstep == 2) { temp = AP(kc - *n + k - 1); AP(kc - *n + k - 1) = AP(kc - *n + kp - 1); AP(kc - *n + kp - 1) = temp; } } k -= kstep; kc = kcnext; goto L60; L80: ; } return 0; /* End of SSPTRI */ } /* ssptri_ */
/* Subroutine */ int dgbtrf_(integer *m, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, integer *ipiv, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= DGBTRF computes an LU factorization of a real m-by-n band matrix A using partial pivoting with row interchanges. This is the blocked version of the algorithm, calling Level 3 BLAS. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. KL (input) INTEGER The number of subdiagonals within the band of A. KL >= 0. KU (input) INTEGER The number of superdiagonals within the band of A. KU >= 0. AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) On entry, the matrix A in band storage, in rows KL+1 to 2*KL+KU+1; rows 1 to KL of the array need not be set. The j-th column of A is stored in the j-th column of the array AB as follows: AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) On exit, details of the factorization: U is stored as an upper triangular band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and the multipliers used during the factorization are stored in rows KL+KU+2 to 2*KL+KU+1. See below for further details. LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= 2*KL+KU+1. IPIV (output) INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = +i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations. Further Details =============== The band storage scheme is illustrated by the following example, when M = N = 6, KL = 2, KU = 1: On entry: On exit: * * * + + + * * * u14 u25 u36 * * + + + + * * u13 u24 u35 u46 * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * a31 a42 a53 a64 * * m31 m42 m53 m64 * * VISArray elements marked * are not used by the routine; elements marked + need not be set on entry, but are required by the routine to store elements of U because of fill-in resulting from the row interchanges. ===================================================================== KV is the number of superdiagonals in the factor U, allowing for fill-in Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; static integer c__65 = 65; static doublereal c_b18 = -1.; static doublereal c_b31 = 1.; /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6; doublereal d__1; /* Local variables */ extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); static doublereal temp; static integer i, j; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *), dgemm_(char *, char *, integer *, integer *, integer * , doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dcopy_( integer *, doublereal *, integer *, doublereal *, integer *), dswap_(integer *, doublereal *, integer *, doublereal *, integer * ); static doublereal work13[4160] /* was [65][64] */, work31[4160] /* was [65][64] */; extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static integer i2, i3, j2, j3, k2; extern /* Subroutine */ int dgbtf2_(integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *); static integer jb, nb, ii, jj, jm, ip, jp, km, ju, kv; extern integer idamax_(integer *, doublereal *, integer *); static integer nw; extern /* Subroutine */ int xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *); #define WORK13(I) work13[(I)] #define WAS(I) was[(I)] #define IPIV(I) ipiv[(I)-1] #define AB(I,J) ab[(I)-1 + ((J)-1)* ( *ldab)] kv = *ku + *kl; /* Test the input parameters. */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kl < 0) { *info = -3; } else if (*ku < 0) { *info = -4; } else if (*ldab < *kl + kv + 1) { *info = -6; } if (*info != 0) { i__1 = -(*info); xerbla_("DGBTRF", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } /* Determine the block size for this environment */ nb = ilaenv_(&c__1, "DGBTRF", " ", m, n, kl, ku, 6L, 1L); /* The block size must not exceed the limit set by the size of the local arrays WORK13 and WORK31. */ nb = min(nb,64); if (nb <= 1 || nb > *kl) { /* Use unblocked code */ dgbtf2_(m, n, kl, ku, &AB(1,1), ldab, &IPIV(1), info); } else { /* Use blocked code Zero the superdiagonal elements of the work array WORK13 */ i__1 = nb; for (j = 1; j <= nb; ++j) { i__2 = j - 1; for (i = 1; i <= j-1; ++i) { WORK13(i + j * 65 - 66) = 0.; /* L10: */ } /* L20: */ } /* Zero the subdiagonal elements of the work array WORK31 */ i__1 = nb; for (j = 1; j <= nb; ++j) { i__2 = nb; for (i = j + 1; i <= nb; ++i) { work31[i + j * 65 - 66] = 0.; /* L30: */ } /* L40: */ } /* Gaussian elimination with partial pivoting Set fill-in elements in columns KU+2 to KV to zero */ i__1 = min(kv,*n); for (j = *ku + 2; j <= min(kv,*n); ++j) { i__2 = *kl; for (i = kv - j + 2; i <= *kl; ++i) { AB(i,j) = 0.; /* L50: */ } /* L60: */ } /* JU is the index of the last column affected by the current stage of the factorization */ ju = 1; i__1 = min(*m,*n); i__2 = nb; for (j = 1; nb < 0 ? j >= min(*m,*n) : j <= min(*m,*n); j += nb) { /* Computing MIN */ i__3 = nb, i__4 = min(*m,*n) - j + 1; jb = min(i__3,i__4); /* The active part of the matrix is partitioned A11 A12 A13 A21 A22 A23 A31 A32 A33 Here A11, A21 and A31 denote the current block of JB columns which is about to be factorized. The number of rows i n the partitioning are JB, I2, I3 respectively, and the num bers of columns are JB, J2, J3. The superdiagonal elements of A13 and the subdiagonal elements of A31 lie outside the b and. Computing MIN */ i__3 = *kl - jb, i__4 = *m - j - jb + 1; i2 = min(i__3,i__4); /* Computing MIN */ i__3 = jb, i__4 = *m - j - *kl + 1; i3 = min(i__3,i__4); /* J2 and J3 are computed after JU has been updated. Factorize the current block of JB columns */ i__3 = j + jb - 1; for (jj = j; jj <= j+jb-1; ++jj) { /* Set fill-in elements in column JJ+KV to zero */ if (jj + kv <= *n) { i__4 = *kl; for (i = 1; i <= *kl; ++i) { AB(i,jj+kv) = 0.; /* L70: */ } } /* Find pivot and test for singularity. KM is the number of subdiagonal elements in the current column. Computing MIN */ i__4 = *kl, i__5 = *m - jj; km = min(i__4,i__5); i__4 = km + 1; jp = idamax_(&i__4, &AB(kv+1,jj), &c__1); IPIV(jj) = jp + jj - j; if (AB(kv+jp,jj) != 0.) { /* Computing MAX Computing MIN */ i__6 = jj + *ku + jp - 1; i__4 = ju, i__5 = min(i__6,*n); ju = max(i__4,i__5); if (jp != 1) { /* Apply interchange to columns J t o J+JB-1 */ if (jp + jj - 1 < j + *kl) { i__4 = *ldab - 1; i__5 = *ldab - 1; dswap_(&jb, &AB(kv+1+jj-j,j), & i__4, &AB(kv+jp+jj-j,j), &i__5); } else { /* The interchange affects c olumns J to JJ-1 of A31 which are stored in the w ork array WORK31 */ i__4 = jj - j; i__5 = *ldab - 1; dswap_(&i__4, &AB(kv+1+jj-j,j), &i__5, &work31[jp + jj - j - *kl - 1], & c__65); i__4 = j + jb - jj; i__5 = *ldab - 1; i__6 = *ldab - 1; dswap_(&i__4, &AB(kv+1,jj), &i__5, & AB(kv+jp,jj), &i__6); } } /* Compute multipliers */ d__1 = 1. / AB(kv+1,jj); dscal_(&km, &d__1, &AB(kv+2,jj), &c__1); /* Update trailing submatrix within the ba nd and within the current block. JM is the index of t he last column which needs to be updated. Computing MIN */ i__4 = ju, i__5 = j + jb - 1; jm = min(i__4,i__5); if (jm > jj) { i__4 = jm - jj; i__5 = *ldab - 1; i__6 = *ldab - 1; dger_(&km, &i__4, &c_b18, &AB(kv+2,jj), &c__1, &AB(kv,jj+1), &i__5, & AB(kv+1,jj+1), &i__6); } } else { /* If pivot is zero, set INFO to the index of the pivot unless a zero pivot has already been fo und. */ if (*info == 0) { *info = jj; } } /* Copy current column of A31 into the work array WORK31 Computing MIN */ i__4 = jj - j + 1; nw = min(i__4,i3); if (nw > 0) { dcopy_(&nw, &AB(kv+*kl+1-jj+j,jj), & c__1, &work31[(jj - j + 1) * 65 - 65], &c__1); } /* L80: */ } if (j + jb <= *n) { /* Apply the row interchanges to the other blocks . Computing MIN */ i__3 = ju - j + 1; j2 = min(i__3,kv) - jb; /* Computing MAX */ i__3 = 0, i__4 = ju - j - kv + 1; j3 = max(i__3,i__4); /* Use DLASWP to apply the row interchanges to A1 2, A22, and A32. */ i__3 = *ldab - 1; dlaswp_(&j2, &AB(kv+1-jb,j+jb), &i__3, & c__1, &jb, &IPIV(j), &c__1); /* Adjust the pivot indices. */ i__3 = j + jb - 1; for (i = j; i <= j+jb-1; ++i) { IPIV(i) = IPIV(i) + j - 1; /* L90: */ } /* Apply the row interchanges to A13, A23, and A3 3 columnwise. */ k2 = j - 1 + jb + j2; i__3 = j3; for (i = 1; i <= j3; ++i) { jj = k2 + i; i__4 = j + jb - 1; for (ii = j + i - 1; ii <= j+jb-1; ++ii) { ip = IPIV(ii); if (ip != ii) { temp = AB(kv+1+ii-jj,jj); AB(kv+1+ii-jj,jj) = AB(kv+1+ip-jj,jj); AB(kv+1+ip-jj,jj) = temp; } /* L100: */ } /* L110: */ } /* Update the relevant part of the trailing subma trix */ if (j2 > 0) { /* Update A12 */ i__3 = *ldab - 1; i__4 = *ldab - 1; dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j2, &c_b31, &AB(kv+1,j), &i__3, &AB(kv+1-jb,j+jb), &i__4); if (i2 > 0) { /* Update A22 */ i__3 = *ldab - 1; i__4 = *ldab - 1; i__5 = *ldab - 1; dgemm_("No transpose", "No transpose", &i2, &j2, &jb, &c_b18, &AB(kv+1+jb,j), &i__3, &AB(kv+1-jb,j+jb), &i__4, &c_b31, &AB(kv+1,j+jb), & i__5); } if (i3 > 0) { /* Update A32 */ i__3 = *ldab - 1; i__4 = *ldab - 1; dgemm_("No transpose", "No transpose", &i3, &j2, &jb, &c_b18, work31, &c__65, &AB(kv+1-jb,j+jb), &i__3, &c_b31, &AB(kv+*kl+1-jb,j+jb), &i__4); } } if (j3 > 0) { /* Copy the lower triangle of A13 into the work array WORK13 */ i__3 = j3; for (jj = 1; jj <= j3; ++jj) { i__4 = jb; for (ii = jj; ii <= jb; ++ii) { WORK13(ii + jj * 65 - 66) = AB(ii-jj+1,jj+j+kv-1); /* L120: */ } /* L130: */ } /* Update A13 in the work array */ i__3 = *ldab - 1; dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j3, &c_b31, &AB(kv+1,j), &i__3, work13, &c__65); if (i2 > 0) { /* Update A23 */ i__3 = *ldab - 1; i__4 = *ldab - 1; dgemm_("No transpose", "No transpose", &i2, &j3, &jb, &c_b18, &AB(kv+1+jb,j), &i__3, work13, &c__65, &c_b31, &AB(jb+1,j+kv), &i__4); } if (i3 > 0) { /* Update A33 */ i__3 = *ldab - 1; dgemm_("No transpose", "No transpose", &i3, &j3, &jb, &c_b18, work31, &c__65, work13, &c__65, & c_b31, &AB(*kl+1,j+kv), & i__3); } /* Copy the lower triangle of A13 back int o place */ i__3 = j3; for (jj = 1; jj <= j3; ++jj) { i__4 = jb; for (ii = jj; ii <= jb; ++ii) { AB(ii-jj+1,jj+j+kv-1) = WORK13(ii + jj * 65 - 66); /* L140: */ } /* L150: */ } } } else { /* Adjust the pivot indices. */ i__3 = j + jb - 1; for (i = j; i <= j+jb-1; ++i) { IPIV(i) = IPIV(i) + j - 1; /* L160: */ } } /* Partially undo the interchanges in the current block to restore the upper triangular form of A31 and copy the upper triangle of A31 back into place */ i__3 = j; for (jj = j + jb - 1; jj >= j; --jj) { jp = IPIV(jj) - jj + 1; if (jp != 1) { /* Apply interchange to columns J to JJ-1 */ if (jp + jj - 1 < j + *kl) { /* The interchange does not affect A31 */ i__4 = jj - j; i__5 = *ldab - 1; i__6 = *ldab - 1; dswap_(&i__4, &AB(kv+1+jj-j,j), & i__5, &AB(kv+jp+jj-j,j), & i__6); } else { /* The interchange does affect A31 */ i__4 = jj - j; i__5 = *ldab - 1; dswap_(&i__4, &AB(kv+1+jj-j,j), & i__5, &work31[jp + jj - j - *kl - 1], &c__65); } } /* Copy the current column of A31 back into place Computing MIN */ i__4 = i3, i__5 = jj - j + 1; nw = min(i__4,i__5); if (nw > 0) { dcopy_(&nw, &work31[(jj - j + 1) * 65 - 65], &c__1, &AB(kv+*kl+1-jj+j,jj), &c__1); } /* L170: */ } /* L180: */ } } return 0; /* End of DGBTRF */ } /* dgbtrf_ */
/* Subroutine */ int dsysv_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer *ldb, doublereal *work, integer *lwork, integer *info) { /* -- LAPACK driver routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DSYSV computes the solution to a real system of linear equations A * X = B, where A is an N-by-N symmetric matrix and X and B are N-by-NRHS matrices. The diagonal pivoting method is used to factor A as A = U * D * U**T, if UPLO = 'U', or A = L * D * L**T, if UPLO = 'L', where U (or L) is a product of permutation and unit upper (lower) triangular matrices, and D is symmetric and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then used to solve the system of equations A * X = B. Arguments ========= UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The number of linear equations, i.e., the order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. A (input/output) DOUBLE PRECISION array, dimension (LDA,N) On entry, the symmetric matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if INFO = 0, the block diagonal matrix D and the multipliers used to obtain the factor U or L from the factorization A = U*D*U**T or A = L*D*L**T as computed by DSYTRF. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). IPIV (output) INTEGER array, dimension (N) Details of the interchanges and the block structure of D, as determined by DSYTRF. If IPIV(k) > 0, then rows and columns k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) On entry, the N-by-NRHS right hand side matrix B. On exit, if INFO = 0, the N-by-NRHS solution matrix X. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The length of WORK. LWORK >= 1, and for best performance LWORK >= N*NB, where NB is the optimal blocksize for DSYTRF. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, D(i,i) is exactly zero. The factorization has been completed, but the block diagonal matrix D is exactly singular, so the solution could not be computed. ===================================================================== Test the input parameters. Parameter adjustments Function Body */ /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; /* Local variables */ extern logical lsame_(char *, char *); extern /* Subroutine */ int xerbla_(char *, integer *), dsytrf_( char *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), dsytrs_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); #define IPIV(I) ipiv[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] *info = 0; if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -8; } else if (*lwork < 1) { *info = -10; } if (*info != 0) { i__1 = -(*info); xerbla_("DSYSV ", &i__1); return 0; } /* Compute the factorization A = U*D*U' or A = L*D*L'. */ dsytrf_(uplo, n, &A(1,1), lda, &IPIV(1), &WORK(1), lwork, info); if (*info == 0) { /* Solve the system A*X = B, overwriting B with X. */ dsytrs_(uplo, n, nrhs, &A(1,1), lda, &IPIV(1), &B(1,1), ldb, info); } return 0; /* End of DSYSV */ } /* dsysv_ */
/* Subroutine */ int dgesv_(integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer *ldb, integer *info) { /* -- LAPACK driver routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= DGESV computes the solution to a real system of linear equations A * X = B, where A is an N-by-N matrix and X and B are N-by-NRHS matrices. The LU decomposition with partial pivoting and row interchanges is used to factor A as A = P * L * U, where P is a permutation matrix, L is unit lower triangular, and U is upper triangular. The factored form of A is then used to solve the system of equations A * X = B. Arguments ========= N (input) INTEGER The number of linear equations, i.e., the order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. A (input/output) DOUBLE PRECISION array, dimension (LDA,N) On entry, the N-by-N coefficient matrix A. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). IPIV (output) INTEGER array, dimension (N) The pivot indices that define the permutation matrix P; row i of the matrix was interchanged with row IPIV(i). B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) On entry, the N-by-NRHS matrix of right hand side matrix B. On exit, if INFO = 0, the N-by-NRHS solution matrix X. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, so the solution could not be computed. ===================================================================== Test the input parameters. Parameter adjustments Function Body */ /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; /* Local variables */ extern /* Subroutine */ int dgetrf_(integer *, integer *, doublereal *, integer *, integer *, integer *), xerbla_(char *, integer *), dgetrs_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); #define IPIV(I) ipiv[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] *info = 0; if (*n < 0) { *info = -1; } else if (*nrhs < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } else if (*ldb < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); xerbla_("DGESV ", &i__1); return 0; } /* Compute the LU factorization of A. */ dgetrf_(n, n, &A(1,1), lda, &IPIV(1), info); if (*info == 0) { /* Solve the system A*X = B, overwriting B with X. */ dgetrs_("No transpose", n, nrhs, &A(1,1), lda, &IPIV(1), &B(1,1), ldb, info); } return 0; /* End of DGESV */ } /* dgesv_ */
/***************************************************************************//** * Parallel tile LU factorization - static scheduling **/ void plasma_pzgetrf_incpiv(plasma_context_t *plasma) { PLASMA_desc A; PLASMA_desc L; int *IPIV; PLASMA_sequence *sequence; PLASMA_request *request; int k, m, n; int next_k; int next_m; int next_n; int ldak, ldam; int info; int tempkn, tempkm, tempmm, tempnn; int ib = PLASMA_IB; PLASMA_Complex64_t *work; plasma_unpack_args_5(A, L, IPIV, sequence, request); if (sequence->status != PLASMA_SUCCESS) return; work = (PLASMA_Complex64_t*)plasma_private_alloc(plasma, ib*L.nb, L.dtyp); ss_init(A.mt, A.nt, -1); k = 0; n = PLASMA_RANK; while (n >= A.nt) { k++; n = n-A.nt+k; } m = k; while (k < min(A.mt, A.nt) && n < A.nt && !ss_aborted()) { next_n = n; next_m = m; next_k = k; next_m++; if (next_m == A.mt) { next_n += PLASMA_SIZE; while (next_n >= A.nt && next_k < min(A.mt, A.nt)) { next_k++; next_n = next_n-A.nt+next_k; } next_m = next_k; } tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb; tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb; tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb; tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb; ldak = BLKLDD(A, k); ldam = BLKLDD(A, m); if (n == k) { if (m == k) { ss_cond_wait(k, k, k-1); CORE_zgetrf_incpiv( tempkm, tempkn, ib, A(k, k), ldak, IPIV(k, k), &info); if (info != 0 && m == A.mt-1) { plasma_request_fail(sequence, request, info + A.nb*k); ss_abort(); } ss_cond_set(k, k, k); } else { ss_cond_wait(m, k, k-1); CORE_ztstrf( tempmm, tempkn, ib, A.nb, A(k, k), ldak, A(m, k), ldam, L(m, k), L.mb, IPIV(m, k), work, L.nb, &info); if (info != 0 && m == A.mt-1) { plasma_request_fail(sequence, request, info + A.nb*k); ss_abort(); } ss_cond_set(m, k, k); } } else { if (m == k) { ss_cond_wait(k, k, k); ss_cond_wait(k, n, k-1); CORE_zgessm( tempkm, tempnn, tempkm, ib, IPIV(k, k), A(k, k), ldak, A(k, n), ldak); } else { ss_cond_wait(m, k, k); ss_cond_wait(m, n, k-1); CORE_zssssm( A.nb, tempnn, tempmm, tempnn, A.nb, ib, A(k, n), ldak, A(m, n), ldam, L(m, k), L.mb, A(m, k), ldam, IPIV(m, k)); ss_cond_set(m, n, k); } } n = next_n; m = next_m; k = next_k; } plasma_private_free(plasma, work); ss_finalize(); }
/* Subroutine */ int zsysvx_(char *fact, char *uplo, integer *n, integer * nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer * ldaf, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info) { /* -- LAPACK driver routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZSYSVX uses the diagonal pivoting factorization to compute the solution to a complex system of linear equations A * X = B, where A is an N-by-N symmetric matrix and X and B are N-by-NRHS matrices. Error bounds on the solution and a condition estimate are also provided. Description =========== The following steps are performed: 1. If FACT = 'N', the diagonal pivoting method is used to factor A. The form of the factorization is A = U * D * U**T, if UPLO = 'U', or A = L * D * L**T, if UPLO = 'L', where U (or L) is a product of permutation and unit upper (lower) triangular matrices, and D is symmetric and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. 2. The factored form of A is used to estimate the condition number of the matrix A. If the reciprocal of the condition number is less than machine precision, steps 3 and 4 are skipped. 3. The system of equations is solved for X using the factored form of A. 4. Iterative refinement is applied to improve the computed solution matrix and calculate error bounds and backward error estimates for it. Arguments ========= FACT (input) CHARACTER*1 Specifies whether or not the factored form of A has been supplied on entry. = 'F': On entry, AF and IPIV contain the factored form of A. A, AF and IPIV will not be modified. = 'N': The matrix A will be copied to AF and factored. UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The number of linear equations, i.e., the order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0. A (input) COMPLEX*16 array, dimension (LDA,N) The symmetric matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). AF (input or output) COMPLEX*16 array, dimension (LDAF,N) If FACT = 'F', then AF is an input argument and on entry contains the block diagonal matrix D and the multipliers used to obtain the factor U or L from the factorization A = U*D*U**T or A = L*D*L**T as computed by ZSYTRF. If FACT = 'N', then AF is an output argument and on exit returns the block diagonal matrix D and the multipliers used to obtain the factor U or L from the factorization A = U*D*U**T or A = L*D*L**T. LDAF (input) INTEGER The leading dimension of the array AF. LDAF >= max(1,N). IPIV (input or output) INTEGER array, dimension (N) If FACT = 'F', then IPIV is an input argument and on entry contains details of the interchanges and the block structure of D, as determined by ZSYTRF. If IPIV(k) > 0, then rows and columns k and IPIV(k) were interchanged and D(k,k) is a 1-by-1 diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. If FACT = 'N', then IPIV is an output argument and on exit contains details of the interchanges and the block structure of D, as determined by ZSYTRF. B (input) COMPLEX*16 array, dimension (LDB,NRHS) The N-by-NRHS right hand side matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (output) COMPLEX*16 array, dimension (LDX,NRHS) If INFO = 0, the N-by-NRHS solution matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). RCOND (output) DOUBLE PRECISION The estimate of the reciprocal condition number of the matrix A. If RCOND is less than the machine precision (in particular, if RCOND = 0), the matrix is singular to working precision. This condition is indicated by a return code of INFO > 0, and the solution and error bounds are not computed. FERR (output) DOUBLE PRECISION array, dimension (NRHS) The estimated forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), FERR(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). The estimate is as reliable as the estimate for RCOND, and is almost always a slight overestimate of the true error. BERR (output) DOUBLE PRECISION array, dimension (NRHS) The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The length of WORK. LWORK >= 2*N, and for best performance LWORK >= N*NB, where NB is the optimal blocksize for ZSYTRF. RWORK (workspace) DOUBLE PRECISION array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, and i is <= N: D(i,i) is exactly zero. The factorization has been completed, but the block diagonal matrix D is exactly singular, so the solution and error bounds could not be computed. = N+1: the block diagonal matrix D is nonsingular, but RCOND is less than machine precision. The factorization has been completed, but the matrix is singular to working precision, so the solution and error bounds have not been computed. ===================================================================== Test the input parameters. Parameter adjustments Function Body */ /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, x_offset, i__1; /* Local variables */ extern logical lsame_(char *, char *); static doublereal anorm; extern doublereal dlamch_(char *); static logical nofact; extern /* Subroutine */ int xerbla_(char *, integer *), zlacpy_( char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); extern doublereal zlansy_(char *, char *, integer *, doublecomplex *, integer *, doublereal *); extern /* Subroutine */ int zsycon_(char *, integer *, doublecomplex *, integer *, integer *, doublereal *, doublereal *, doublecomplex *, integer *), zsyrfs_(char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublereal *, doublereal *, doublecomplex *, doublereal *, integer *), zsytrf_(char *, integer *, doublecomplex *, integer *, integer *, doublecomplex *, integer *, integer *), zsytrs_(char *, integer *, integer *, doublecomplex *, integer *, integer *, doublecomplex *, integer *, integer *); #define IPIV(I) ipiv[(I)-1] #define FERR(I) ferr[(I)-1] #define BERR(I) berr[(I)-1] #define WORK(I) work[(I)-1] #define RWORK(I) rwork[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define AF(I,J) af[(I)-1 + ((J)-1)* ( *ldaf)] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] #define X(I,J) x[(I)-1 + ((J)-1)* ( *ldx)] *info = 0; nofact = lsame_(fact, "N"); if (! nofact && ! lsame_(fact, "F")) { *info = -1; } else if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldaf < max(1,*n)) { *info = -8; } else if (*ldb < max(1,*n)) { *info = -11; } else if (*ldx < max(1,*n)) { *info = -13; } else if (*lwork < *n << 1) { *info = -18; } if (*info != 0) { i__1 = -(*info); xerbla_("ZSYSVX", &i__1); return 0; } if (nofact) { /* Compute the factorization A = U*D*U' or A = L*D*L'. */ zlacpy_(uplo, n, n, &A(1,1), lda, &AF(1,1), ldaf); zsytrf_(uplo, n, &AF(1,1), ldaf, &IPIV(1), &WORK(1), lwork, info); /* Return if INFO is non-zero. */ if (*info != 0) { if (*info > 0) { *rcond = 0.; } return 0; } } /* Compute the norm of the matrix A. */ anorm = zlansy_("I", uplo, n, &A(1,1), lda, &RWORK(1)); /* Compute the reciprocal of the condition number of A. */ zsycon_(uplo, n, &AF(1,1), ldaf, &IPIV(1), &anorm, rcond, &WORK(1), info); /* Return if the matrix is singular to working precision. */ if (*rcond < dlamch_("Epsilon")) { *info = *n + 1; return 0; } /* Compute the solution vectors X. */ zlacpy_("Full", n, nrhs, &B(1,1), ldb, &X(1,1), ldx); zsytrs_(uplo, n, nrhs, &AF(1,1), ldaf, &IPIV(1), &X(1,1), ldx, info); /* Use iterative refinement to improve the computed solutions and compute error bounds and backward error estimates for them. */ zsyrfs_(uplo, n, nrhs, &A(1,1), lda, &AF(1,1), ldaf, &IPIV(1), &B(1,1), ldb, &X(1,1), ldx, &FERR(1), &BERR(1), &WORK(1) , &RWORK(1), info); return 0; /* End of ZSYSVX */ } /* zsysvx_ */
/* Subroutine */ int sgtsvx_(char *fact, char *trans, integer *n, integer * nrhs, real *dl, real *d, real *du, real *dlf, real *df, real *duf, real *du2, integer *ipiv, real *b, integer *ldb, real *x, integer * ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= SGTSVX uses the LU factorization to compute the solution to a real system of linear equations A * X = B or A**T * X = B, where A is a tridiagonal matrix of order N and X and B are N-by-NRHS matrices. Error bounds on the solution and a condition estimate are also provided. Description =========== The following steps are performed: 1. If FACT = 'N', the LU decomposition is used to factor the matrix A as A = L * U, where L is a product of permutation and unit lower bidiagonal matrices and U is upper triangular with nonzeros in only the main diagonal and first two superdiagonals. 2. The factored form of A is used to estimate the condition number of the matrix A. If the reciprocal of the condition number is less than machine precision, steps 3 and 4 are skipped. 3. The system of equations is solved for X using the factored form of A. 4. Iterative refinement is applied to improve the computed solution matrix and calculate error bounds and backward error estimates for it. Arguments ========= FACT (input) CHARACTER*1 Specifies whether or not the factored form of A has been supplied on entry. = 'F': DLF, DF, DUF, DU2, and IPIV contain the factored form of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV will not be modified. = 'N': The matrix will be copied to DLF, DF, and DUF and factored. TRANS (input) CHARACTER*1 Specifies the form of the system of equations: = 'N': A * X = B (No transpose) = 'T': A**T * X = B (Transpose) = 'C': A**H * X = B (Conjugate transpose = Transpose) N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. DL (input) REAL array, dimension (N-1) The (n-1) subdiagonal elements of A. D (input) REAL array, dimension (N) The n diagonal elements of A. DU (input) REAL array, dimension (N-1) The (n-1) superdiagonal elements of A. DLF (input or output) REAL array, dimension (N-1) If FACT = 'F', then DLF is an input argument and on entry contains the (n-1) multipliers that define the matrix L from the LU factorization of A as computed by SGTTRF. If FACT = 'N', then DLF is an output argument and on exit contains the (n-1) multipliers that define the matrix L from the LU factorization of A. DF (input or output) REAL array, dimension (N) If FACT = 'F', then DF is an input argument and on entry contains the n diagonal elements of the upper triangular matrix U from the LU factorization of A. If FACT = 'N', then DF is an output argument and on exit contains the n diagonal elements of the upper triangular matrix U from the LU factorization of A. DUF (input or output) REAL array, dimension (N-1) If FACT = 'F', then DUF is an input argument and on entry contains the (n-1) elements of the first superdiagonal of U. If FACT = 'N', then DUF is an output argument and on exit contains the (n-1) elements of the first superdiagonal of U. DU2 (input or output) REAL array, dimension (N-2) If FACT = 'F', then DU2 is an input argument and on entry contains the (n-2) elements of the second superdiagonal of U. If FACT = 'N', then DU2 is an output argument and on exit contains the (n-2) elements of the second superdiagonal of U. IPIV (input or output) INTEGER array, dimension (N) If FACT = 'F', then IPIV is an input argument and on entry contains the pivot indices from the LU factorization of A as computed by SGTTRF. If FACT = 'N', then IPIV is an output argument and on exit contains the pivot indices from the LU factorization of A; row i of the matrix was interchanged with row IPIV(i). IPIV(i) will always be either i or i+1; IPIV(i) = i indicates a row interchange was not required. B (input) REAL array, dimension (LDB,NRHS) The N-by-NRHS right hand side matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (output) REAL array, dimension (LDX,NRHS) If INFO = 0, the N-by-NRHS solution matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). RCOND (output) REAL The estimate of the reciprocal condition number of the matrix A. If RCOND is less than the machine precision (in particular, if RCOND = 0), the matrix is singular to working precision. This condition is indicated by a return code of INFO > 0, and the solution and error bounds are not computed. FERR (output) REAL array, dimension (NRHS) The estimated forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), FERR(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). The estimate is as reliable as the estimate for RCOND, and is almost always a slight overestimate of the true error. BERR (output) REAL array, dimension (NRHS) The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). WORK (workspace) REAL array, dimension (3*N) IWORK (workspace) INTEGER array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, and i is <= N: U(i,i) is exactly zero. The factorization has not been completed unless i = N, but the factor U is exactly singular, so the solution and error bounds could not be computed. = N+1: RCOND is less than machine precision. The factorization has been completed, but the matrix is singular to working precision, and the solution and error bounds have not been computed. ===================================================================== Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer b_dim1, b_offset, x_dim1, x_offset, i__1; /* Local variables */ static char norm[1]; extern logical lsame_(char *, char *); static real anorm; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *); extern doublereal slamch_(char *); static logical nofact; extern /* Subroutine */ int xerbla_(char *, integer *); extern doublereal slangt_(char *, integer *, real *, real *, real *); extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *), sgtcon_(char *, integer *, real *, real *, real *, real *, integer *, real *, real *, real *, integer *, integer *); static logical notran; extern /* Subroutine */ int sgtrfs_(char *, integer *, integer *, real *, real *, real *, real *, real *, real *, real *, integer *, real *, integer *, real *, integer *, real *, real *, real *, integer *, integer *), sgttrf_(integer *, real *, real *, real *, real *, integer *, integer *), sgttrs_(char *, integer *, integer *, real *, real *, real *, real *, integer *, real *, integer *, integer *); #define DL(I) dl[(I)-1] #define D(I) d[(I)-1] #define DU(I) du[(I)-1] #define DLF(I) dlf[(I)-1] #define DF(I) df[(I)-1] #define DUF(I) duf[(I)-1] #define DU2(I) du2[(I)-1] #define IPIV(I) ipiv[(I)-1] #define FERR(I) ferr[(I)-1] #define BERR(I) berr[(I)-1] #define WORK(I) work[(I)-1] #define IWORK(I) iwork[(I)-1] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] #define X(I,J) x[(I)-1 + ((J)-1)* ( *ldx)] *info = 0; nofact = lsame_(fact, "N"); notran = lsame_(trans, "N"); if (! nofact && ! lsame_(fact, "F")) { *info = -1; } else if (! notran && ! lsame_(trans, "T") && ! lsame_(trans, "C")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*ldb < max(1,*n)) { *info = -14; } else if (*ldx < max(1,*n)) { *info = -16; } if (*info != 0) { i__1 = -(*info); xerbla_("SGTSVX", &i__1); return 0; } if (nofact) { /* Compute the LU factorization of A. */ scopy_(n, &D(1), &c__1, &DF(1), &c__1); if (*n > 1) { i__1 = *n - 1; scopy_(&i__1, &DL(1), &c__1, &DLF(1), &c__1); i__1 = *n - 1; scopy_(&i__1, &DU(1), &c__1, &DUF(1), &c__1); } sgttrf_(n, &DLF(1), &DF(1), &DUF(1), &DU2(1), &IPIV(1), info); /* Return if INFO is non-zero. */ if (*info != 0) { if (*info > 0) { *rcond = 0.f; } return 0; } } /* Compute the norm of the matrix A. */ if (notran) { *(unsigned char *)norm = '1'; } else { *(unsigned char *)norm = 'I'; } anorm = slangt_(norm, n, &DL(1), &D(1), &DU(1)); /* Compute the reciprocal of the condition number of A. */ sgtcon_(norm, n, &DLF(1), &DF(1), &DUF(1), &DU2(1), &IPIV(1), &anorm, rcond, &WORK(1), &IWORK(1), info); /* Return if the matrix is singular to working precision. */ if (*rcond < slamch_("Epsilon")) { *info = *n + 1; return 0; } /* Compute the solution vectors X. */ slacpy_("Full", n, nrhs, &B(1,1), ldb, &X(1,1), ldx); sgttrs_(trans, n, nrhs, &DLF(1), &DF(1), &DUF(1), &DU2(1), &IPIV(1), &X(1,1), ldx, info); /* Use iterative refinement to improve the computed solutions and compute error bounds and backward error estimates for them. */ sgtrfs_(trans, n, nrhs, &DL(1), &D(1), &DU(1), &DLF(1), &DF(1), &DUF(1), & DU2(1), &IPIV(1), &B(1,1), ldb, &X(1,1), ldx, &FERR(1), &BERR(1), &WORK(1), &IWORK(1), info); return 0; /* End of SGTSVX */ } /* sgtsvx_ */
/***************************************************************************//** * Parallel forward substitution for tile LU - static scheduling **/ void plasma_pztrsmpl(plasma_context_t *plasma) { PLASMA_desc A; PLASMA_desc B; PLASMA_desc L; int *IPIV; PLASMA_sequence *sequence; PLASMA_request *request; int k, m, n; int next_k; int next_m; int next_n; int ldak, ldbk, ldam, ldbm; int tempkm, tempnn, tempkmin, tempmm, tempkn; int ib; plasma_unpack_args_6(A, B, L, IPIV, sequence, request); if (sequence->status != PLASMA_SUCCESS) return; ss_init(B.mt, B.nt, -1); ib = PLASMA_IB; k = 0; n = PLASMA_RANK; while (n >= B.nt) { k++; n = n-B.nt; } m = k; while (k < min(A.mt, A.nt) && n < B.nt) { next_n = n; next_m = m; next_k = k; next_m++; if (next_m == A.mt) { next_n += PLASMA_SIZE; while (next_n >= B.nt && next_k < min(A.mt, A.nt)) { next_k++; next_n = next_n-B.nt; } next_m = next_k; } tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb; tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb; tempkmin = k == min(A.mt, A.nt)-1 ? min(A.m, A.n)-k*A.mb : A.mb; tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb; tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb; ldak = BLKLDD(A, k); ldbk = BLKLDD(B, k); ldam = BLKLDD(A, m); ldbm = BLKLDD(B, m); if (m == k) { ss_cond_wait(k, n, k-1); CORE_zgessm( tempkm, tempnn, tempkmin, ib, IPIV(k, k), A(k, k), ldak, B(k, n), ldbk); ss_cond_set(k, n, k); } else { ss_cond_wait(m, n, k-1); CORE_zssssm( A.nb, tempnn, tempmm, tempnn, tempkn, ib, B(k, n), ldbk, B(m, n), ldbm, L(m, k), L.mb, A(m, k), ldam, IPIV(m, k)); ss_cond_set(m, n, k); } n = next_n; m = next_m; k = next_k; } ss_finalize(); }
/***************************************************************************//** * Parallel tile LU factorization - dynamic scheduling **/ void plasma_psgetrf_incpiv_quark(PLASMA_desc A, PLASMA_desc L, int *IPIV, PLASMA_sequence *sequence, PLASMA_request *request) { plasma_context_t *plasma; Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer; int k, m, n; int ldak, ldam; int tempkm, tempkn, tempmm, tempnn; int ib; plasma = plasma_context_self(); if (sequence->status != PLASMA_SUCCESS) return; QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence); ib = PLASMA_IB; for (k = 0; k < min(A.mt, A.nt); k++) { tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb; tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb; ldak = BLKLDD(A, k); QUARK_CORE_sgetrf_incpiv( plasma->quark, &task_flags, tempkm, tempkn, ib, L.nb, A(k, k), ldak, IPIV(k, k), sequence, request, k == A.mt-1, A.nb*k); for (n = k+1; n < A.nt; n++) { tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb; QUARK_CORE_sgessm( plasma->quark, &task_flags, tempkm, tempnn, tempkm, ib, L.nb, IPIV(k, k), A(k, k), ldak, A(k, n), ldak); } for (m = k+1; m < A.mt; m++) { tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb; ldam = BLKLDD(A, m); QUARK_CORE_ststrf( plasma->quark, &task_flags, tempmm, tempkn, ib, L.nb, A(k, k), ldak, A(m, k), ldam, L(m, k), L.mb, IPIV(m, k), sequence, request, m == A.mt-1, A.nb*k); for (n = k+1; n < A.nt; n++) { tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb; QUARK_CORE_sssssm( plasma->quark, &task_flags, A.nb, tempnn, tempmm, tempnn, A.nb, ib, L.nb, A(k, n), ldak, A(m, n), ldam, L(m, k), L.mb, A(m, k), ldam, IPIV(m, k)); } } } }
/* Subroutine */ int cspsv_(char *uplo, integer *n, integer *nrhs, complex * ap, integer *ipiv, complex *b, integer *ldb, integer *info) { /* -- LAPACK driver routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= CSPSV computes the solution to a complex system of linear equations A * X = B, where A is an N-by-N symmetric matrix stored in packed format and X and B are N-by-NRHS matrices. The diagonal pivoting method is used to factor A as A = U * D * U**T, if UPLO = 'U', or A = L * D * L**T, if UPLO = 'L', where U (or L) is a product of permutation and unit upper (lower) triangular matrices, D is symmetric and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then used to solve the system of equations A * X = B. Arguments ========= UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The number of linear equations, i.e., the order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. AP (input/output) COMPLEX array, dimension (N*(N+1)/2) On entry, the upper or lower triangle of the symmetric matrix A, packed columnwise in a linear array. The j-th column of A is stored in the array AP as follows: if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. See below for further details. On exit, the block diagonal matrix D and the multipliers used to obtain the factor U or L from the factorization A = U*D*U**T or A = L*D*L**T as computed by CSPTRF, stored as a packed triangular matrix in the same storage format as A. IPIV (output) INTEGER array, dimension (N) Details of the interchanges and the block structure of D, as determined by CSPTRF. If IPIV(k) > 0, then rows and columns k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. B (input/output) COMPLEX array, dimension (LDB,NRHS) On entry, the N-by-NRHS right hand side matrix B. On exit, if INFO = 0, the N-by-NRHS solution matrix X. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, D(i,i) is exactly zero. The factorization has been completed, but the block diagonal matrix D is exactly singular, so the solution could not be computed. Further Details =============== The packed storage scheme is illustrated by the following example when N = 4, UPLO = 'U': Two-dimensional storage of the symmetric matrix A: a11 a12 a13 a14 a22 a23 a24 a33 a34 (aij = aji) a44 Packed storage of the upper triangle of A: AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] ===================================================================== Test the input parameters. Parameter adjustments Function Body */ /* System generated locals */ integer b_dim1, b_offset, i__1; /* Local variables */ extern logical lsame_(char *, char *); extern /* Subroutine */ int xerbla_(char *, integer *), csptrf_( char *, integer *, complex *, integer *, integer *), csptrs_(char *, integer *, integer *, complex *, integer *, complex *, integer *, integer *); #define AP(I) ap[(I)-1] #define IPIV(I) ipiv[(I)-1] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] *info = 0; if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*ldb < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); xerbla_("CSPSV ", &i__1); return 0; } /* Compute the factorization A = U*D*U' or A = L*D*L'. */ csptrf_(uplo, n, &AP(1), &IPIV(1), info); if (*info == 0) { /* Solve the system A*X = B, overwriting B with X. */ csptrs_(uplo, n, nrhs, &AP(1), &IPIV(1), &B(1,1), ldb, info); } return 0; /* End of CSPSV */ } /* cspsv_ */