bool CLinkMatrix::applyColumnPivot(CMatrix< C_FLOAT64 > & matrix, const C_INT & incr) const { if (matrix.numCols() < mRowPivots.size()) { return false; } C_INT N = matrix.numRows(); C_INT LDA = matrix.numCols(); C_INT K1 = 1; C_INT K2 = mRowPivots.size(); dlaswp_(&N, matrix.array(), &LDA, &K1, &K2, const_cast< C_INT * >(mSwapVector.array()), const_cast< C_INT * >(&incr)); return true; }
/* Subroutine */ int dgesc2_(integer *n, doublereal *a, integer *lda, doublereal *rhs, integer *ipiv, integer *jpiv, doublereal *scale) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; doublereal d__1, d__2; /* Local variables */ integer i__, j; doublereal eps, temp; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *), dlabad_(doublereal *, doublereal *); extern doublereal dlamch_(char *); extern integer idamax_(integer *, doublereal *, integer *); doublereal bignum; extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *); doublereal smlnum; /* -- LAPACK auxiliary routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGESC2 solves a system of linear equations */ /* A * X = scale* RHS */ /* with a general N-by-N matrix A using the LU factorization with */ /* complete pivoting computed by DGETC2. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix A. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the LU part of the factorization of the n-by-n */ /* matrix A computed by DGETC2: A = P * L * U * Q */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1, N). */ /* RHS (input/output) DOUBLE PRECISION array, dimension (N). */ /* On entry, the right hand side vector b. */ /* On exit, the solution vector X. */ /* IPIV (input) INTEGER array, dimension (N). */ /* The pivot indices; for 1 <= i <= N, row i of the */ /* matrix has been interchanged with row IPIV(i). */ /* JPIV (input) INTEGER array, dimension (N). */ /* The pivot indices; for 1 <= j <= N, column j of the */ /* matrix has been interchanged with column JPIV(j). */ /* SCALE (output) DOUBLE PRECISION */ /* On exit, SCALE contains the scale factor. SCALE is chosen */ /* 0 <= SCALE <= 1 to prevent owerflow in the solution. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ /* Umea University, S-901 87 Umea, Sweden. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Set constant to control owerflow */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --rhs; --ipiv; --jpiv; /* Function Body */ eps = dlamch_("P"); smlnum = dlamch_("S") / eps; bignum = 1. / smlnum; dlabad_(&smlnum, &bignum); /* Apply permutations IPIV to RHS */ i__1 = *n - 1; dlaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &ipiv[1], &c__1); /* Solve for L part */ i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { rhs[j] -= a[j + i__ * a_dim1] * rhs[i__]; /* L10: */ } /* L20: */ } /* Solve for U part */ *scale = 1.; /* Check for scaling */ i__ = idamax_(n, &rhs[1], &c__1); if (smlnum * 2. * (d__1 = rhs[i__], abs(d__1)) > (d__2 = a[*n + *n * a_dim1], abs(d__2))) { temp = .5 / (d__1 = rhs[i__], abs(d__1)); dscal_(n, &temp, &rhs[1], &c__1); *scale *= temp; } for (i__ = *n; i__ >= 1; --i__) { temp = 1. / a[i__ + i__ * a_dim1]; rhs[i__] *= temp; i__1 = *n; for (j = i__ + 1; j <= i__1; ++j) { rhs[i__] -= rhs[j] * (a[i__ + j * a_dim1] * temp); /* L30: */ } /* L40: */ } /* Apply permutations JPIV to the solution (RHS) */ i__1 = *n - 1; dlaswp_(&c__1, &rhs[1], lda, &c__1, &i__1, &jpiv[1], &c_n1); return 0; /* End of DGESC2 */ } /* dgesc2_ */
/* Subroutine */ int dgbtrf_(integer *m, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, integer *ipiv, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6; doublereal d__1; /* Local variables */ integer i__, j, i2, i3, j2, j3, k2, jb, nb, ii, jj, jm, ip, jp, km, ju, kv, nw; extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); doublereal temp; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *), dgemm_(char *, char *, integer *, integer *, integer * , doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dcopy_( integer *, doublereal *, integer *, doublereal *, integer *), dswap_(integer *, doublereal *, integer *, doublereal *, integer * ); doublereal work13[4160] /* was [65][64] */, work31[4160] /* was [65][64] */; extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), dgbtf2_( integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *); extern integer idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGBTRF computes an LU factorization of a real m-by-n band matrix A */ /* using partial pivoting with row interchanges. */ /* This is the blocked version of the algorithm, calling Level 3 BLAS. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* KL (input) INTEGER */ /* The number of subdiagonals within the band of A. KL >= 0. */ /* KU (input) INTEGER */ /* The number of superdiagonals within the band of A. KU >= 0. */ /* AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) */ /* On entry, the matrix A in band storage, in rows KL+1 to */ /* 2*KL+KU+1; rows 1 to KL of the array need not be set. */ /* The j-th column of A is stored in the j-th column of the */ /* array AB as follows: */ /* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) */ /* On exit, details of the factorization: U is stored as an */ /* upper triangular band matrix with KL+KU superdiagonals in */ /* rows 1 to KL+KU+1, and the multipliers used during the */ /* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. */ /* See below for further details. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. */ /* IPIV (output) INTEGER array, dimension (min(M,N)) */ /* The pivot indices; for 1 <= i <= min(M,N), row i of the */ /* matrix was interchanged with row IPIV(i). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly */ /* singular, and division by zero will occur if it is used */ /* to solve a system of equations. */ /* Further Details */ /* =============== */ /* The band storage scheme is illustrated by the following example, when */ /* M = N = 6, KL = 2, KU = 1: */ /* On entry: On exit: */ /* * * * + + + * * * u14 u25 u36 */ /* * * + + + + * * u13 u24 u35 u46 */ /* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 */ /* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 */ /* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * */ /* a31 a42 a53 a64 * * m31 m42 m53 m64 * * */ /* Array elements marked * are not used by the routine; elements marked */ /* + need not be set on entry, but are required by the routine to store */ /* elements of U because of fill-in resulting from the row interchanges. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* KV is the number of superdiagonals in the factor U, allowing for */ /* fill-in */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --ipiv; /* Function Body */ kv = *ku + *kl; /* Test the input parameters. */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kl < 0) { *info = -3; } else if (*ku < 0) { *info = -4; } else if (*ldab < *kl + kv + 1) { *info = -6; } if (*info != 0) { i__1 = -(*info); xerbla_("DGBTRF", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } /* Determine the block size for this environment */ nb = ilaenv_(&c__1, "DGBTRF", " ", m, n, kl, ku); /* The block size must not exceed the limit set by the size of the */ /* local arrays WORK13 and WORK31. */ nb = min(nb,64); if (nb <= 1 || nb > *kl) { /* Use unblocked code */ dgbtf2_(m, n, kl, ku, &ab[ab_offset], ldab, &ipiv[1], info); } else { /* Use blocked code */ /* Zero the superdiagonal elements of the work array WORK13 */ i__1 = nb; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { work13[i__ + j * 65 - 66] = 0.; /* L10: */ } /* L20: */ } /* Zero the subdiagonal elements of the work array WORK31 */ i__1 = nb; for (j = 1; j <= i__1; ++j) { i__2 = nb; for (i__ = j + 1; i__ <= i__2; ++i__) { work31[i__ + j * 65 - 66] = 0.; /* L30: */ } /* L40: */ } /* Gaussian elimination with partial pivoting */ /* Set fill-in elements in columns KU+2 to KV to zero */ i__1 = min(kv,*n); for (j = *ku + 2; j <= i__1; ++j) { i__2 = *kl; for (i__ = kv - j + 2; i__ <= i__2; ++i__) { ab[i__ + j * ab_dim1] = 0.; /* L50: */ } /* L60: */ } /* JU is the index of the last column affected by the current */ /* stage of the factorization */ ju = 1; i__1 = min(*m,*n); i__2 = nb; for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Computing MIN */ i__3 = nb, i__4 = min(*m,*n) - j + 1; jb = min(i__3,i__4); /* The active part of the matrix is partitioned */ /* A11 A12 A13 */ /* A21 A22 A23 */ /* A31 A32 A33 */ /* Here A11, A21 and A31 denote the current block of JB columns */ /* which is about to be factorized. The number of rows in the */ /* partitioning are JB, I2, I3 respectively, and the numbers */ /* of columns are JB, J2, J3. The superdiagonal elements of A13 */ /* and the subdiagonal elements of A31 lie outside the band. */ /* Computing MIN */ i__3 = *kl - jb, i__4 = *m - j - jb + 1; i2 = min(i__3,i__4); /* Computing MIN */ i__3 = jb, i__4 = *m - j - *kl + 1; i3 = min(i__3,i__4); /* J2 and J3 are computed after JU has been updated. */ /* Factorize the current block of JB columns */ i__3 = j + jb - 1; for (jj = j; jj <= i__3; ++jj) { /* Set fill-in elements in column JJ+KV to zero */ if (jj + kv <= *n) { i__4 = *kl; for (i__ = 1; i__ <= i__4; ++i__) { ab[i__ + (jj + kv) * ab_dim1] = 0.; /* L70: */ } } /* Find pivot and test for singularity. KM is the number of */ /* subdiagonal elements in the current column. */ /* Computing MIN */ i__4 = *kl, i__5 = *m - jj; km = min(i__4,i__5); i__4 = km + 1; jp = idamax_(&i__4, &ab[kv + 1 + jj * ab_dim1], &c__1); ipiv[jj] = jp + jj - j; if (ab[kv + jp + jj * ab_dim1] != 0.) { /* Computing MAX */ /* Computing MIN */ i__6 = jj + *ku + jp - 1; i__4 = ju, i__5 = min(i__6,*n); ju = max(i__4,i__5); if (jp != 1) { /* Apply interchange to columns J to J+JB-1 */ if (jp + jj - 1 < j + *kl) { i__4 = *ldab - 1; i__5 = *ldab - 1; dswap_(&jb, &ab[kv + 1 + jj - j + j * ab_dim1], & i__4, &ab[kv + jp + jj - j + j * ab_dim1], &i__5); } else { /* The interchange affects columns J to JJ-1 of A31 */ /* which are stored in the work array WORK31 */ i__4 = jj - j; i__5 = *ldab - 1; dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], &i__5, &work31[jp + jj - j - *kl - 1], & c__65); i__4 = j + jb - jj; i__5 = *ldab - 1; i__6 = *ldab - 1; dswap_(&i__4, &ab[kv + 1 + jj * ab_dim1], &i__5, & ab[kv + jp + jj * ab_dim1], &i__6); } } /* Compute multipliers */ d__1 = 1. / ab[kv + 1 + jj * ab_dim1]; dscal_(&km, &d__1, &ab[kv + 2 + jj * ab_dim1], &c__1); /* Update trailing submatrix within the band and within */ /* the current block. JM is the index of the last column */ /* which needs to be updated. */ /* Computing MIN */ i__4 = ju, i__5 = j + jb - 1; jm = min(i__4,i__5); if (jm > jj) { i__4 = jm - jj; i__5 = *ldab - 1; i__6 = *ldab - 1; dger_(&km, &i__4, &c_b18, &ab[kv + 2 + jj * ab_dim1], &c__1, &ab[kv + (jj + 1) * ab_dim1], &i__5, & ab[kv + 1 + (jj + 1) * ab_dim1], &i__6); } } else { /* If pivot is zero, set INFO to the index of the pivot */ /* unless a zero pivot has already been found. */ if (*info == 0) { *info = jj; } } /* Copy current column of A31 into the work array WORK31 */ /* Computing MIN */ i__4 = jj - j + 1; nw = min(i__4,i3); if (nw > 0) { dcopy_(&nw, &ab[kv + *kl + 1 - jj + j + jj * ab_dim1], & c__1, &work31[(jj - j + 1) * 65 - 65], &c__1); } /* L80: */ } if (j + jb <= *n) { /* Apply the row interchanges to the other blocks. */ /* Computing MIN */ i__3 = ju - j + 1; j2 = min(i__3,kv) - jb; /* Computing MAX */ i__3 = 0, i__4 = ju - j - kv + 1; j3 = max(i__3,i__4); /* Use DLASWP to apply the row interchanges to A12, A22, and */ /* A32. */ i__3 = *ldab - 1; dlaswp_(&j2, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__3, & c__1, &jb, &ipiv[j], &c__1); /* Adjust the pivot indices. */ i__3 = j + jb - 1; for (i__ = j; i__ <= i__3; ++i__) { ipiv[i__] = ipiv[i__] + j - 1; /* L90: */ } /* Apply the row interchanges to A13, A23, and A33 */ /* columnwise. */ k2 = j - 1 + jb + j2; i__3 = j3; for (i__ = 1; i__ <= i__3; ++i__) { jj = k2 + i__; i__4 = j + jb - 1; for (ii = j + i__ - 1; ii <= i__4; ++ii) { ip = ipiv[ii]; if (ip != ii) { temp = ab[kv + 1 + ii - jj + jj * ab_dim1]; ab[kv + 1 + ii - jj + jj * ab_dim1] = ab[kv + 1 + ip - jj + jj * ab_dim1]; ab[kv + 1 + ip - jj + jj * ab_dim1] = temp; } /* L100: */ } /* L110: */ } /* Update the relevant part of the trailing submatrix */ if (j2 > 0) { /* Update A12 */ i__3 = *ldab - 1; i__4 = *ldab - 1; dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j2, &c_b31, &ab[kv + 1 + j * ab_dim1], &i__3, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__4); if (i2 > 0) { /* Update A22 */ i__3 = *ldab - 1; i__4 = *ldab - 1; i__5 = *ldab - 1; dgemm_("No transpose", "No transpose", &i2, &j2, &jb, &c_b18, &ab[kv + 1 + jb + j * ab_dim1], &i__3, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__4, &c_b31, &ab[kv + 1 + (j + jb) * ab_dim1], & i__5); } if (i3 > 0) { /* Update A32 */ i__3 = *ldab - 1; i__4 = *ldab - 1; dgemm_("No transpose", "No transpose", &i3, &j2, &jb, &c_b18, work31, &c__65, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__3, &c_b31, &ab[kv + *kl + 1 - jb + (j + jb) * ab_dim1], &i__4); } } if (j3 > 0) { /* Copy the lower triangle of A13 into the work array */ /* WORK13 */ i__3 = j3; for (jj = 1; jj <= i__3; ++jj) { i__4 = jb; for (ii = jj; ii <= i__4; ++ii) { work13[ii + jj * 65 - 66] = ab[ii - jj + 1 + (jj + j + kv - 1) * ab_dim1]; /* L120: */ } /* L130: */ } /* Update A13 in the work array */ i__3 = *ldab - 1; dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j3, &c_b31, &ab[kv + 1 + j * ab_dim1], &i__3, work13, &c__65); if (i2 > 0) { /* Update A23 */ i__3 = *ldab - 1; i__4 = *ldab - 1; dgemm_("No transpose", "No transpose", &i2, &j3, &jb, &c_b18, &ab[kv + 1 + jb + j * ab_dim1], &i__3, work13, &c__65, &c_b31, &ab[jb + 1 + (j + kv) * ab_dim1], &i__4); } if (i3 > 0) { /* Update A33 */ i__3 = *ldab - 1; dgemm_("No transpose", "No transpose", &i3, &j3, &jb, &c_b18, work31, &c__65, work13, &c__65, & c_b31, &ab[*kl + 1 + (j + kv) * ab_dim1], & i__3); } /* Copy the lower triangle of A13 back into place */ i__3 = j3; for (jj = 1; jj <= i__3; ++jj) { i__4 = jb; for (ii = jj; ii <= i__4; ++ii) { ab[ii - jj + 1 + (jj + j + kv - 1) * ab_dim1] = work13[ii + jj * 65 - 66]; /* L140: */ } /* L150: */ } } } else { /* Adjust the pivot indices. */ i__3 = j + jb - 1; for (i__ = j; i__ <= i__3; ++i__) { ipiv[i__] = ipiv[i__] + j - 1; /* L160: */ } } /* Partially undo the interchanges in the current block to */ /* restore the upper triangular form of A31 and copy the upper */ /* triangle of A31 back into place */ i__3 = j; for (jj = j + jb - 1; jj >= i__3; --jj) { jp = ipiv[jj] - jj + 1; if (jp != 1) { /* Apply interchange to columns J to JJ-1 */ if (jp + jj - 1 < j + *kl) { /* The interchange does not affect A31 */ i__4 = jj - j; i__5 = *ldab - 1; i__6 = *ldab - 1; dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], & i__5, &ab[kv + jp + jj - j + j * ab_dim1], & i__6); } else { /* The interchange does affect A31 */ i__4 = jj - j; i__5 = *ldab - 1; dswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], & i__5, &work31[jp + jj - j - *kl - 1], &c__65); } } /* Copy the current column of A31 back into place */ /* Computing MIN */ i__4 = i3, i__5 = jj - j + 1; nw = min(i__4,i__5); if (nw > 0) { dcopy_(&nw, &work31[(jj - j + 1) * 65 - 65], &c__1, &ab[ kv + *kl + 1 - jj + j + jj * ab_dim1], &c__1); } /* L170: */ } /* L180: */ } } return 0; /* End of DGBTRF */ } /* dgbtrf_ */
/* Subroutine */ int dgetrf_(integer *m, integer *n, doublereal *a, integer * lda, integer *ipiv, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; /* Local variables */ integer i__, j, jb, nb; extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer iinfo; extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), dgetf2_( integer *, integer *, doublereal *, integer *, integer *, integer *), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *); /* -- LAPACK routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* March 2008 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGETRF computes an LU factorization of a general M-by-N matrix A */ /* using partial pivoting with row interchanges. */ /* The factorization has the form */ /* A = P * L * U */ /* where P is a permutation matrix, L is lower triangular with unit */ /* diagonal elements (lower trapezoidal if m > n), and U is upper */ /* triangular (upper trapezoidal if m < n). */ /* This is the Crout Level 3 BLAS version of the algorithm. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix to be factored. */ /* On exit, the factors L and U from the factorization */ /* A = P*L*U; the unit diagonal elements of L are not stored. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* IPIV (output) INTEGER array, dimension (min(M,N)) */ /* The pivot indices; for 1 <= i <= min(M,N), row i of the */ /* matrix was interchanged with row IPIV(i). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly */ /* singular, and division by zero will occur if it is used */ /* to solve a system of equations. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); xerbla_("DGETRF", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } /* Determine the block size for this environment. */ nb = ilaenv_(&c__1, "DGETRF", " ", m, n, &c_n1, &c_n1); if (nb <= 1 || nb >= min(*m,*n)) { /* Use unblocked code. */ dgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info); } else { /* Use blocked code. */ i__1 = min(*m,*n); i__2 = nb; for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Computing MIN */ i__3 = min(*m,*n) - j + 1; jb = min(i__3,nb); /* Update current block. */ i__3 = *m - j + 1; i__4 = j - 1; dgemm_("No transpose", "No transpose", &i__3, &jb, &i__4, &c_b11, &a[j + a_dim1], lda, &a[j * a_dim1 + 1], lda, &c_b12, &a[ j + j * a_dim1], lda); /* Factor diagonal and subdiagonal blocks and test for exact */ /* singularity. */ i__3 = *m - j + 1; dgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); /* Adjust INFO and the pivot indices. */ if (*info == 0 && iinfo > 0) { *info = iinfo + j - 1; } /* Computing MIN */ i__4 = *m, i__5 = j + jb - 1; i__3 = min(i__4,i__5); for (i__ = j; i__ <= i__3; ++i__) { ipiv[i__] = j - 1 + ipiv[i__]; /* L10: */ } /* Apply interchanges to column 1:J-1 */ i__3 = j - 1; i__4 = j + jb - 1; dlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); if (j + jb <= *n) { /* Apply interchanges to column J+JB:N */ i__3 = *n - j - jb + 1; i__4 = j + jb - 1; dlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & ipiv[1], &c__1); i__3 = *n - j - jb + 1; i__4 = j - 1; dgemm_("No transpose", "No transpose", &jb, &i__3, &i__4, & c_b11, &a[j + a_dim1], lda, &a[(j + jb) * a_dim1 + 1], lda, &c_b12, &a[j + (j + jb) * a_dim1], lda); /* Compute block row of U. */ i__3 = *n - j - jb + 1; dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, & c_b12, &a[j + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda); } /* L20: */ } } return 0; /* End of DGETRF */ } /* dgetrf_ */
/* Subroutine */ int dlatdf_(integer *ijob, integer *n, doublereal *z__, integer *ldz, doublereal *rhs, doublereal *rdsum, doublereal *rdscal, integer *ipiv, integer *jpiv) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= DLATDF uses the LU factorization of the n-by-n matrix Z computed by DGETC2 and computes a contribution to the reciprocal Dif-estimate by solving Z * x = b for x, and choosing the r.h.s. b such that the norm of x is as large as possible. On entry RHS = b holds the contribution from earlier solved sub-systems, and on return RHS = x. The factorization of Z returned by DGETC2 has the form Z = P*L*U*Q, where P and Q are permutation matrices. L is lower triangular with unit diagonal elements and U is upper triangular. Arguments ========= IJOB (input) INTEGER IJOB = 2: First compute an approximative null-vector e of Z using DGECON, e is normalized and solve for Zx = +-e - f with the sign giving the greater value of 2-norm(x). About 5 times as expensive as Default. IJOB .ne. 2: Local look ahead strategy where all entries of the r.h.s. b is choosen as either +1 or -1 (Default). N (input) INTEGER The number of columns of the matrix Z. Z (input) DOUBLE PRECISION array, dimension (LDZ, N) On entry, the LU part of the factorization of the n-by-n matrix Z computed by DGETC2: Z = P * L * U * Q LDZ (input) INTEGER The leading dimension of the array Z. LDA >= max(1, N). RHS (input/output) DOUBLE PRECISION array, dimension N. On entry, RHS contains contributions from other subsystems. On exit, RHS contains the solution of the subsystem with entries acoording to the value of IJOB (see above). RDSUM (input/output) DOUBLE PRECISION On entry, the sum of squares of computed contributions to the Dif-estimate under computation by DTGSYL, where the scaling factor RDSCAL (see below) has been factored out. On exit, the corresponding sum of squares updated with the contributions from the current sub-system. If TRANS = 'T' RDSUM is not touched. NOTE: RDSUM only makes sense when DTGSY2 is called by STGSYL. RDSCAL (input/output) DOUBLE PRECISION On entry, scaling factor used to prevent overflow in RDSUM. On exit, RDSCAL is updated w.r.t. the current contributions in RDSUM. If TRANS = 'T', RDSCAL is not touched. NOTE: RDSCAL only makes sense when DTGSY2 is called by DTGSYL. IPIV (input) INTEGER array, dimension (N). The pivot indices; for 1 <= i <= N, row i of the matrix has been interchanged with row IPIV(i). JPIV (input) INTEGER array, dimension (N). The pivot indices; for 1 <= j <= N, column j of the matrix has been interchanged with column JPIV(j). Further Details =============== Based on contributions by Bo Kagstrom and Peter Poromaa, Department of Computing Science, Umea University, S-901 87 Umea, Sweden. This routine is a further developed implementation of algorithm BSOLVE in [1] using complete pivoting in the LU factorization. [1] Bo Kagstrom and Lars Westin, Generalized Schur Methods with Condition Estimators for Solving the Generalized Sylvester Equation, IEEE Transactions on Automatic Control, Vol. 34, No. 7, July 1989, pp 745-751. [2] Peter Poromaa, On Efficient and Robust Estimators for the Separation between two Regular Matrix Pairs with Applications in Condition Estimation. Report IMINF-95.05, Departement of Computing Science, Umea University, S-901 87 Umea, Sweden, 1995. ===================================================================== Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b23 = 1.; static doublereal c_b37 = -1.; /* System generated locals */ integer z_dim1, z_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, integer *); static integer info; static doublereal temp, work[32]; static integer i__, j, k; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); extern doublereal dasum_(integer *, doublereal *, integer *); static doublereal pmone; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static doublereal sminu; static integer iwork[8]; static doublereal splus; extern /* Subroutine */ int dgesc2_(integer *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *); static doublereal bm, bp; extern /* Subroutine */ int dgecon_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *); static doublereal xm[8], xp[8]; extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *), dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *); #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --rhs; --ipiv; --jpiv; /* Function Body */ if (*ijob != 2) { /* Apply permutations IPIV to RHS */ i__1 = *n - 1; dlaswp_(&c__1, &rhs[1], ldz, &c__1, &i__1, &ipiv[1], &c__1); /* Solve for L-part choosing RHS either to +1 or -1. */ pmone = -1.; i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { bp = rhs[j] + 1.; bm = rhs[j] - 1.; splus = 1.; /* Look-ahead for L-part RHS(1:N-1) = + or -1, SPLUS and SMIN computed more efficiently than in BSOLVE [1]. */ i__2 = *n - j; splus += ddot_(&i__2, &z___ref(j + 1, j), &c__1, &z___ref(j + 1, j), &c__1); i__2 = *n - j; sminu = ddot_(&i__2, &z___ref(j + 1, j), &c__1, &rhs[j + 1], & c__1); splus *= rhs[j]; if (splus > sminu) { rhs[j] = bp; } else if (sminu > splus) { rhs[j] = bm; } else { /* In this case the updating sums are equal and we can choose RHS(J) +1 or -1. The first time this happens we choose -1, thereafter +1. This is a simple way to get good estimates of matrices like Byers well-known example (see [1]). (Not done in BSOLVE.) */ rhs[j] += pmone; pmone = 1.; } /* Compute the remaining r.h.s. */ temp = -rhs[j]; i__2 = *n - j; daxpy_(&i__2, &temp, &z___ref(j + 1, j), &c__1, &rhs[j + 1], & c__1); /* L10: */ } /* Solve for U-part, look-ahead for RHS(N) = +-1. This is not done in BSOLVE and will hopefully give us a better estimate because any ill-conditioning of the original matrix is transfered to U and not to L. U(N, N) is an approximation to sigma_min(LU). */ i__1 = *n - 1; dcopy_(&i__1, &rhs[1], &c__1, xp, &c__1); xp[*n - 1] = rhs[*n] + 1.; rhs[*n] += -1.; splus = 0.; sminu = 0.; for (i__ = *n; i__ >= 1; --i__) { temp = 1. / z___ref(i__, i__); xp[i__ - 1] *= temp; rhs[i__] *= temp; i__1 = *n; for (k = i__ + 1; k <= i__1; ++k) { xp[i__ - 1] -= xp[k - 1] * (z___ref(i__, k) * temp); rhs[i__] -= rhs[k] * (z___ref(i__, k) * temp); /* L20: */ } splus += (d__1 = xp[i__ - 1], abs(d__1)); sminu += (d__1 = rhs[i__], abs(d__1)); /* L30: */ } if (splus > sminu) { dcopy_(n, xp, &c__1, &rhs[1], &c__1); } /* Apply the permutations JPIV to the computed solution (RHS) */ i__1 = *n - 1; dlaswp_(&c__1, &rhs[1], ldz, &c__1, &i__1, &jpiv[1], &c_n1); /* Compute the sum of squares */ dlassq_(n, &rhs[1], &c__1, rdscal, rdsum); } else { /* IJOB = 2, Compute approximate nullvector XM of Z */ dgecon_("I", n, &z__[z_offset], ldz, &c_b23, &temp, work, iwork, & info); dcopy_(n, &work[*n], &c__1, xm, &c__1); /* Compute RHS */ i__1 = *n - 1; dlaswp_(&c__1, xm, ldz, &c__1, &i__1, &ipiv[1], &c_n1); temp = 1. / sqrt(ddot_(n, xm, &c__1, xm, &c__1)); dscal_(n, &temp, xm, &c__1); dcopy_(n, xm, &c__1, xp, &c__1); daxpy_(n, &c_b23, &rhs[1], &c__1, xp, &c__1); daxpy_(n, &c_b37, xm, &c__1, &rhs[1], &c__1); dgesc2_(n, &z__[z_offset], ldz, &rhs[1], &ipiv[1], &jpiv[1], &temp); dgesc2_(n, &z__[z_offset], ldz, xp, &ipiv[1], &jpiv[1], &temp); if (dasum_(n, xp, &c__1) > dasum_(n, &rhs[1], &c__1)) { dcopy_(n, xp, &c__1, &rhs[1], &c__1); } /* Compute the sum of squares */ dlassq_(n, &rhs[1], &c__1, rdscal, rdsum); } return 0; /* End of DLATDF */ } /* dlatdf_ */
/* Subroutine */ int dget01_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *afac, integer *ldafac, integer *ipiv, doublereal * rwork, doublereal *resid) { /* System generated locals */ integer a_dim1, a_offset, afac_dim1, afac_offset, i__1, i__2; /* Local variables */ extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, integer *); static integer i__, j, k; static doublereal t; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *), dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); static doublereal anorm; extern /* Subroutine */ int dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *); extern doublereal dlamch_(char *), dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *); static doublereal eps; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define afac_ref(a_1,a_2) afac[(a_2)*afac_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= DGET01 reconstructs a matrix A from its L*U factorization and computes the residual norm(L*U - A) / ( N * norm(A) * EPS ), where EPS is the machine epsilon. Arguments ========== M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. A (input) DOUBLE PRECISION array, dimension (LDA,N) The original M x N matrix A. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). AFAC (input/output) DOUBLE PRECISION array, dimension (LDAFAC,N) The factored form of the matrix A. AFAC contains the factors L and U from the L*U factorization as computed by DGETRF. Overwritten with the reconstructed matrix, and then with the difference L*U - A. LDAFAC (input) INTEGER The leading dimension of the array AFAC. LDAFAC >= max(1,M). IPIV (input) INTEGER array, dimension (N) The pivot indices from DGETRF. RWORK (workspace) DOUBLE PRECISION array, dimension (M) RESID (output) DOUBLE PRECISION norm(L*U - A) / ( N * norm(A) * EPS ) ===================================================================== Quick exit if M = 0 or N = 0. Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; afac_dim1 = *ldafac; afac_offset = 1 + afac_dim1 * 1; afac -= afac_offset; --ipiv; --rwork; /* Function Body */ if (*m <= 0 || *n <= 0) { *resid = 0.; return 0; } /* Determine EPS and the norm of A. */ eps = dlamch_("Epsilon"); anorm = dlange_("1", m, n, &a[a_offset], lda, &rwork[1]); /* Compute the product L*U and overwrite AFAC with the result. A column at a time of the product is obtained, starting with column N. */ for (k = *n; k >= 1; --k) { if (k > *m) { dtrmv_("Lower", "No transpose", "Unit", m, &afac[afac_offset], ldafac, &afac_ref(1, k), &c__1); } else { /* Compute elements (K+1:M,K) */ t = afac_ref(k, k); if (k + 1 <= *m) { i__1 = *m - k; dscal_(&i__1, &t, &afac_ref(k + 1, k), &c__1); i__1 = *m - k; i__2 = k - 1; dgemv_("No transpose", &i__1, &i__2, &c_b11, &afac_ref(k + 1, 1), ldafac, &afac_ref(1, k), &c__1, &c_b11, &afac_ref( k + 1, k), &c__1); } /* Compute the (K,K) element */ i__1 = k - 1; afac_ref(k, k) = t + ddot_(&i__1, &afac_ref(k, 1), ldafac, & afac_ref(1, k), &c__1); /* Compute elements (1:K-1,K) */ i__1 = k - 1; dtrmv_("Lower", "No transpose", "Unit", &i__1, &afac[afac_offset], ldafac, &afac_ref(1, k), &c__1); } /* L10: */ } i__1 = min(*m,*n); dlaswp_(n, &afac[afac_offset], ldafac, &c__1, &i__1, &ipiv[1], &c_n1); /* Compute the difference L*U - A and store in AFAC. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { afac_ref(i__, j) = afac_ref(i__, j) - a_ref(i__, j); /* L20: */ } /* L30: */ } /* Compute norm( L*U - A ) / ( N * norm(A) * EPS ) */ *resid = dlange_("1", m, n, &afac[afac_offset], ldafac, &rwork[1]); if (anorm <= 0.) { if (*resid != 0.) { *resid = 1. / eps; } } else { *resid = *resid / (doublereal) (*n) / anorm / eps; } return 0; /* End of DGET01 */ } /* dget01_ */
/* Subroutine */ int dget01_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *afac, integer *ldafac, integer *ipiv, doublereal * rwork, doublereal *resid) { /* System generated locals */ integer a_dim1, a_offset, afac_dim1, afac_offset, i__1, i__2; /* Local variables */ integer i__, j, k; doublereal t, eps; extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, integer *); extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *), dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); doublereal anorm; extern /* Subroutine */ int dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *); extern doublereal dlamch_(char *), dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *); /* -- LAPACK test routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGET01 reconstructs a matrix A from its L*U factorization and */ /* computes the residual */ /* norm(L*U - A) / ( N * norm(A) * EPS ), */ /* where EPS is the machine epsilon. */ /* Arguments */ /* ========== */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The original M x N matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* AFAC (input/output) DOUBLE PRECISION array, dimension (LDAFAC,N) */ /* The factored form of the matrix A. AFAC contains the factors */ /* L and U from the L*U factorization as computed by DGETRF. */ /* Overwritten with the reconstructed matrix, and then with the */ /* difference L*U - A. */ /* LDAFAC (input) INTEGER */ /* The leading dimension of the array AFAC. LDAFAC >= max(1,M). */ /* IPIV (input) INTEGER array, dimension (N) */ /* The pivot indices from DGETRF. */ /* RWORK (workspace) DOUBLE PRECISION array, dimension (M) */ /* RESID (output) DOUBLE PRECISION */ /* norm(L*U - A) / ( N * norm(A) * EPS ) */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Quick exit if M = 0 or N = 0. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; afac_dim1 = *ldafac; afac_offset = 1 + afac_dim1; afac -= afac_offset; --ipiv; --rwork; /* Function Body */ if (*m <= 0 || *n <= 0) { *resid = 0.; return 0; } /* Determine EPS and the norm of A. */ eps = dlamch_("Epsilon"); anorm = dlange_("1", m, n, &a[a_offset], lda, &rwork[1]); /* Compute the product L*U and overwrite AFAC with the result. */ /* A column at a time of the product is obtained, starting with */ /* column N. */ for (k = *n; k >= 1; --k) { if (k > *m) { dtrmv_("Lower", "No transpose", "Unit", m, &afac[afac_offset], ldafac, &afac[k * afac_dim1 + 1], &c__1); } else { /* Compute elements (K+1:M,K) */ t = afac[k + k * afac_dim1]; if (k + 1 <= *m) { i__1 = *m - k; dscal_(&i__1, &t, &afac[k + 1 + k * afac_dim1], &c__1); i__1 = *m - k; i__2 = k - 1; dgemv_("No transpose", &i__1, &i__2, &c_b11, &afac[k + 1 + afac_dim1], ldafac, &afac[k * afac_dim1 + 1], &c__1, & c_b11, &afac[k + 1 + k * afac_dim1], &c__1); } /* Compute the (K,K) element */ i__1 = k - 1; afac[k + k * afac_dim1] = t + ddot_(&i__1, &afac[k + afac_dim1], ldafac, &afac[k * afac_dim1 + 1], &c__1); /* Compute elements (1:K-1,K) */ i__1 = k - 1; dtrmv_("Lower", "No transpose", "Unit", &i__1, &afac[afac_offset], ldafac, &afac[k * afac_dim1 + 1], &c__1); } /* L10: */ } i__1 = min(*m,*n); dlaswp_(n, &afac[afac_offset], ldafac, &c__1, &i__1, &ipiv[1], &c_n1); /* Compute the difference L*U - A and store in AFAC. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { afac[i__ + j * afac_dim1] -= a[i__ + j * a_dim1]; /* L20: */ } /* L30: */ } /* Compute norm( L*U - A ) / ( N * norm(A) * EPS ) */ *resid = dlange_("1", m, n, &afac[afac_offset], ldafac, &rwork[1]); if (anorm <= 0.) { if (*resid != 0.) { *resid = 1. / eps; } } else { *resid = *resid / (doublereal) (*n) / anorm / eps; } return 0; /* End of DGET01 */ } /* dget01_ */
/* Subroutine */ int dgbtrf_(integer *m, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, integer *ipiv, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= DGBTRF computes an LU factorization of a real m-by-n band matrix A using partial pivoting with row interchanges. This is the blocked version of the algorithm, calling Level 3 BLAS. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. KL (input) INTEGER The number of subdiagonals within the band of A. KL >= 0. KU (input) INTEGER The number of superdiagonals within the band of A. KU >= 0. AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) On entry, the matrix A in band storage, in rows KL+1 to 2*KL+KU+1; rows 1 to KL of the array need not be set. The j-th column of A is stored in the j-th column of the array AB as follows: AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) On exit, details of the factorization: U is stored as an upper triangular band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and the multipliers used during the factorization are stored in rows KL+KU+2 to 2*KL+KU+1. See below for further details. LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= 2*KL+KU+1. IPIV (output) INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = +i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations. Further Details =============== The band storage scheme is illustrated by the following example, when M = N = 6, KL = 2, KU = 1: On entry: On exit: * * * + + + * * * u14 u25 u36 * * + + + + * * u13 u24 u35 u46 * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * a31 a42 a53 a64 * * m31 m42 m53 m64 * * VISArray elements marked * are not used by the routine; elements marked + need not be set on entry, but are required by the routine to store elements of U because of fill-in resulting from the row interchanges. ===================================================================== KV is the number of superdiagonals in the factor U, allowing for fill-in Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; static integer c__65 = 65; static doublereal c_b18 = -1.; static doublereal c_b31 = 1.; /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6; doublereal d__1; /* Local variables */ extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); static doublereal temp; static integer i, j; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *), dgemm_(char *, char *, integer *, integer *, integer * , doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dcopy_( integer *, doublereal *, integer *, doublereal *, integer *), dswap_(integer *, doublereal *, integer *, doublereal *, integer * ); static doublereal work13[4160] /* was [65][64] */, work31[4160] /* was [65][64] */; extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static integer i2, i3, j2, j3, k2; extern /* Subroutine */ int dgbtf2_(integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *); static integer jb, nb, ii, jj, jm, ip, jp, km, ju, kv; extern integer idamax_(integer *, doublereal *, integer *); static integer nw; extern /* Subroutine */ int xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *); #define WORK13(I) work13[(I)] #define WAS(I) was[(I)] #define IPIV(I) ipiv[(I)-1] #define AB(I,J) ab[(I)-1 + ((J)-1)* ( *ldab)] kv = *ku + *kl; /* Test the input parameters. */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kl < 0) { *info = -3; } else if (*ku < 0) { *info = -4; } else if (*ldab < *kl + kv + 1) { *info = -6; } if (*info != 0) { i__1 = -(*info); xerbla_("DGBTRF", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } /* Determine the block size for this environment */ nb = ilaenv_(&c__1, "DGBTRF", " ", m, n, kl, ku, 6L, 1L); /* The block size must not exceed the limit set by the size of the local arrays WORK13 and WORK31. */ nb = min(nb,64); if (nb <= 1 || nb > *kl) { /* Use unblocked code */ dgbtf2_(m, n, kl, ku, &AB(1,1), ldab, &IPIV(1), info); } else { /* Use blocked code Zero the superdiagonal elements of the work array WORK13 */ i__1 = nb; for (j = 1; j <= nb; ++j) { i__2 = j - 1; for (i = 1; i <= j-1; ++i) { WORK13(i + j * 65 - 66) = 0.; /* L10: */ } /* L20: */ } /* Zero the subdiagonal elements of the work array WORK31 */ i__1 = nb; for (j = 1; j <= nb; ++j) { i__2 = nb; for (i = j + 1; i <= nb; ++i) { work31[i + j * 65 - 66] = 0.; /* L30: */ } /* L40: */ } /* Gaussian elimination with partial pivoting Set fill-in elements in columns KU+2 to KV to zero */ i__1 = min(kv,*n); for (j = *ku + 2; j <= min(kv,*n); ++j) { i__2 = *kl; for (i = kv - j + 2; i <= *kl; ++i) { AB(i,j) = 0.; /* L50: */ } /* L60: */ } /* JU is the index of the last column affected by the current stage of the factorization */ ju = 1; i__1 = min(*m,*n); i__2 = nb; for (j = 1; nb < 0 ? j >= min(*m,*n) : j <= min(*m,*n); j += nb) { /* Computing MIN */ i__3 = nb, i__4 = min(*m,*n) - j + 1; jb = min(i__3,i__4); /* The active part of the matrix is partitioned A11 A12 A13 A21 A22 A23 A31 A32 A33 Here A11, A21 and A31 denote the current block of JB columns which is about to be factorized. The number of rows i n the partitioning are JB, I2, I3 respectively, and the num bers of columns are JB, J2, J3. The superdiagonal elements of A13 and the subdiagonal elements of A31 lie outside the b and. Computing MIN */ i__3 = *kl - jb, i__4 = *m - j - jb + 1; i2 = min(i__3,i__4); /* Computing MIN */ i__3 = jb, i__4 = *m - j - *kl + 1; i3 = min(i__3,i__4); /* J2 and J3 are computed after JU has been updated. Factorize the current block of JB columns */ i__3 = j + jb - 1; for (jj = j; jj <= j+jb-1; ++jj) { /* Set fill-in elements in column JJ+KV to zero */ if (jj + kv <= *n) { i__4 = *kl; for (i = 1; i <= *kl; ++i) { AB(i,jj+kv) = 0.; /* L70: */ } } /* Find pivot and test for singularity. KM is the number of subdiagonal elements in the current column. Computing MIN */ i__4 = *kl, i__5 = *m - jj; km = min(i__4,i__5); i__4 = km + 1; jp = idamax_(&i__4, &AB(kv+1,jj), &c__1); IPIV(jj) = jp + jj - j; if (AB(kv+jp,jj) != 0.) { /* Computing MAX Computing MIN */ i__6 = jj + *ku + jp - 1; i__4 = ju, i__5 = min(i__6,*n); ju = max(i__4,i__5); if (jp != 1) { /* Apply interchange to columns J t o J+JB-1 */ if (jp + jj - 1 < j + *kl) { i__4 = *ldab - 1; i__5 = *ldab - 1; dswap_(&jb, &AB(kv+1+jj-j,j), & i__4, &AB(kv+jp+jj-j,j), &i__5); } else { /* The interchange affects c olumns J to JJ-1 of A31 which are stored in the w ork array WORK31 */ i__4 = jj - j; i__5 = *ldab - 1; dswap_(&i__4, &AB(kv+1+jj-j,j), &i__5, &work31[jp + jj - j - *kl - 1], & c__65); i__4 = j + jb - jj; i__5 = *ldab - 1; i__6 = *ldab - 1; dswap_(&i__4, &AB(kv+1,jj), &i__5, & AB(kv+jp,jj), &i__6); } } /* Compute multipliers */ d__1 = 1. / AB(kv+1,jj); dscal_(&km, &d__1, &AB(kv+2,jj), &c__1); /* Update trailing submatrix within the ba nd and within the current block. JM is the index of t he last column which needs to be updated. Computing MIN */ i__4 = ju, i__5 = j + jb - 1; jm = min(i__4,i__5); if (jm > jj) { i__4 = jm - jj; i__5 = *ldab - 1; i__6 = *ldab - 1; dger_(&km, &i__4, &c_b18, &AB(kv+2,jj), &c__1, &AB(kv,jj+1), &i__5, & AB(kv+1,jj+1), &i__6); } } else { /* If pivot is zero, set INFO to the index of the pivot unless a zero pivot has already been fo und. */ if (*info == 0) { *info = jj; } } /* Copy current column of A31 into the work array WORK31 Computing MIN */ i__4 = jj - j + 1; nw = min(i__4,i3); if (nw > 0) { dcopy_(&nw, &AB(kv+*kl+1-jj+j,jj), & c__1, &work31[(jj - j + 1) * 65 - 65], &c__1); } /* L80: */ } if (j + jb <= *n) { /* Apply the row interchanges to the other blocks . Computing MIN */ i__3 = ju - j + 1; j2 = min(i__3,kv) - jb; /* Computing MAX */ i__3 = 0, i__4 = ju - j - kv + 1; j3 = max(i__3,i__4); /* Use DLASWP to apply the row interchanges to A1 2, A22, and A32. */ i__3 = *ldab - 1; dlaswp_(&j2, &AB(kv+1-jb,j+jb), &i__3, & c__1, &jb, &IPIV(j), &c__1); /* Adjust the pivot indices. */ i__3 = j + jb - 1; for (i = j; i <= j+jb-1; ++i) { IPIV(i) = IPIV(i) + j - 1; /* L90: */ } /* Apply the row interchanges to A13, A23, and A3 3 columnwise. */ k2 = j - 1 + jb + j2; i__3 = j3; for (i = 1; i <= j3; ++i) { jj = k2 + i; i__4 = j + jb - 1; for (ii = j + i - 1; ii <= j+jb-1; ++ii) { ip = IPIV(ii); if (ip != ii) { temp = AB(kv+1+ii-jj,jj); AB(kv+1+ii-jj,jj) = AB(kv+1+ip-jj,jj); AB(kv+1+ip-jj,jj) = temp; } /* L100: */ } /* L110: */ } /* Update the relevant part of the trailing subma trix */ if (j2 > 0) { /* Update A12 */ i__3 = *ldab - 1; i__4 = *ldab - 1; dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j2, &c_b31, &AB(kv+1,j), &i__3, &AB(kv+1-jb,j+jb), &i__4); if (i2 > 0) { /* Update A22 */ i__3 = *ldab - 1; i__4 = *ldab - 1; i__5 = *ldab - 1; dgemm_("No transpose", "No transpose", &i2, &j2, &jb, &c_b18, &AB(kv+1+jb,j), &i__3, &AB(kv+1-jb,j+jb), &i__4, &c_b31, &AB(kv+1,j+jb), & i__5); } if (i3 > 0) { /* Update A32 */ i__3 = *ldab - 1; i__4 = *ldab - 1; dgemm_("No transpose", "No transpose", &i3, &j2, &jb, &c_b18, work31, &c__65, &AB(kv+1-jb,j+jb), &i__3, &c_b31, &AB(kv+*kl+1-jb,j+jb), &i__4); } } if (j3 > 0) { /* Copy the lower triangle of A13 into the work array WORK13 */ i__3 = j3; for (jj = 1; jj <= j3; ++jj) { i__4 = jb; for (ii = jj; ii <= jb; ++ii) { WORK13(ii + jj * 65 - 66) = AB(ii-jj+1,jj+j+kv-1); /* L120: */ } /* L130: */ } /* Update A13 in the work array */ i__3 = *ldab - 1; dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j3, &c_b31, &AB(kv+1,j), &i__3, work13, &c__65); if (i2 > 0) { /* Update A23 */ i__3 = *ldab - 1; i__4 = *ldab - 1; dgemm_("No transpose", "No transpose", &i2, &j3, &jb, &c_b18, &AB(kv+1+jb,j), &i__3, work13, &c__65, &c_b31, &AB(jb+1,j+kv), &i__4); } if (i3 > 0) { /* Update A33 */ i__3 = *ldab - 1; dgemm_("No transpose", "No transpose", &i3, &j3, &jb, &c_b18, work31, &c__65, work13, &c__65, & c_b31, &AB(*kl+1,j+kv), & i__3); } /* Copy the lower triangle of A13 back int o place */ i__3 = j3; for (jj = 1; jj <= j3; ++jj) { i__4 = jb; for (ii = jj; ii <= jb; ++ii) { AB(ii-jj+1,jj+j+kv-1) = WORK13(ii + jj * 65 - 66); /* L140: */ } /* L150: */ } } } else { /* Adjust the pivot indices. */ i__3 = j + jb - 1; for (i = j; i <= j+jb-1; ++i) { IPIV(i) = IPIV(i) + j - 1; /* L160: */ } } /* Partially undo the interchanges in the current block to restore the upper triangular form of A31 and copy the upper triangle of A31 back into place */ i__3 = j; for (jj = j + jb - 1; jj >= j; --jj) { jp = IPIV(jj) - jj + 1; if (jp != 1) { /* Apply interchange to columns J to JJ-1 */ if (jp + jj - 1 < j + *kl) { /* The interchange does not affect A31 */ i__4 = jj - j; i__5 = *ldab - 1; i__6 = *ldab - 1; dswap_(&i__4, &AB(kv+1+jj-j,j), & i__5, &AB(kv+jp+jj-j,j), & i__6); } else { /* The interchange does affect A31 */ i__4 = jj - j; i__5 = *ldab - 1; dswap_(&i__4, &AB(kv+1+jj-j,j), & i__5, &work31[jp + jj - j - *kl - 1], &c__65); } } /* Copy the current column of A31 back into place Computing MIN */ i__4 = i3, i__5 = jj - j + 1; nw = min(i__4,i__5); if (nw > 0) { dcopy_(&nw, &work31[(jj - j + 1) * 65 - 65], &c__1, &AB(kv+*kl+1-jj+j,jj), &c__1); } /* L170: */ } /* L180: */ } } return 0; /* End of DGBTRF */ } /* dgbtrf_ */
/* Subroutine */ int dgetrs_(char *trans, integer *n, integer *nrhs, doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer * ldb, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= DGETRS solves a system of linear equations A * X = B or A' * X = B with a general N-by-N matrix A using the LU factorization computed by DGETRF. Arguments ========= TRANS (input) CHARACTER*1 Specifies the form of the system of equations: = 'N': A * X = B (No transpose) = 'T': A'* X = B (Transpose) = 'C': A'* X = B (Conjugate transpose = Transpose) N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. A (input) DOUBLE PRECISION array, dimension (LDA,N) The factors L and U from the factorization A = P*L*U as computed by DGETRF. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). IPIV (input) INTEGER array, dimension (N) The pivot indices from DGETRF; for 1<=i<=N, row i of the matrix was interchanged with row IPIV(i). B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) On entry, the right hand side matrix B. On exit, the solution matrix X. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static doublereal c_b12 = 1.; static integer c_n1 = -1; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; /* Local variables */ extern logical lsame_(char *, char *); extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), xerbla_( char *, integer *), dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *); static logical notran; a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; /* Function Body */ *info = 0; notran = lsame_(trans, "N"); if (! notran && ! lsame_(trans, "T") && ! lsame_( trans, "C")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -8; } if (*info != 0) { i__1 = -(*info); xerbla_("DGETRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { return 0; } if (notran) { /* Solve A * X = B. Apply row interchanges to the right hand sides. */ dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1); /* Solve L*X = B, overwriting B with X. */ dtrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b12, &a[ a_offset], lda, &b[b_offset], ldb); /* Solve U*X = B, overwriting B with X. */ dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b12, & a[a_offset], lda, &b[b_offset], ldb); } else { /* Solve A' * X = B. Solve U'*X = B, overwriting B with X. */ dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b12, &a[ a_offset], lda, &b[b_offset], ldb); /* Solve L'*X = B, overwriting B with X. */ dtrsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b12, &a[ a_offset], lda, &b[b_offset], ldb); /* Apply row interchanges to the solution vectors. */ dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1); } return 0; /* End of DGETRS */ } /* dgetrs_ */
/* Subroutine */ int dgetrf_(integer *m, integer *n, doublereal *a, integer * lda, integer *ipiv, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; doublereal d__1; /* Local variables */ integer i__, j, ipivstart, jpivstart, jp; doublereal tmp; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *), dgemm_(char *, char *, integer *, integer *, integer * , doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer kcols; doublereal sfmin; integer nstep; extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); integer kahead; extern doublereal dlamch_(char *); extern integer idamax_(integer *, doublereal *, integer *); extern logical disnan_(doublereal *); extern /* Subroutine */ int xerbla_(char *, integer *); integer npived; extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, integer *, integer *, integer *, integer *); integer kstart, ntopiv; /* -- LAPACK routine (version 3.X) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* May 2008 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGETRF computes an LU factorization of a general M-by-N matrix A */ /* using partial pivoting with row interchanges. */ /* The factorization has the form */ /* A = P * L * U */ /* where P is a permutation matrix, L is lower triangular with unit */ /* diagonal elements (lower trapezoidal if m > n), and U is upper */ /* triangular (upper trapezoidal if m < n). */ /* This code implements an iterative version of Sivan Toledo's recursive */ /* LU algorithm[1]. For square matrices, this iterative versions should */ /* be within a factor of two of the optimum number of memory transfers. */ /* The pattern is as follows, with the large blocks of U being updated */ /* in one call to DTRSM, and the dotted lines denoting sections that */ /* have had all pending permutations applied: */ /* 1 2 3 4 5 6 7 8 */ /* +-+-+---+-------+------ */ /* | |1| | | */ /* |.+-+ 2 | | */ /* | | | | | */ /* |.|.+-+-+ 4 | */ /* | | | |1| | */ /* | | |.+-+ | */ /* | | | | | | */ /* |.|.|.|.+-+-+---+ 8 */ /* | | | | | |1| | */ /* | | | | |.+-+ 2 | */ /* | | | | | | | | */ /* | | | | |.|.+-+-+ */ /* | | | | | | | |1| */ /* | | | | | | |.+-+ */ /* | | | | | | | | | */ /* |.|.|.|.|.|.|.|.+----- */ /* | | | | | | | | | */ /* The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in */ /* the binary expansion of the current column. Each Schur update is */ /* applied as soon as the necessary portion of U is available. */ /* [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with */ /* Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997), */ /* 1065-1081. http://dx.doi.org/10.1137/S0895479896297744 */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix to be factored. */ /* On exit, the factors L and U from the factorization */ /* A = P*L*U; the unit diagonal elements of L are not stored. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* IPIV (output) INTEGER array, dimension (min(M,N)) */ /* The pivot indices; for 1 <= i <= min(M,N), row i of the */ /* matrix was interchanged with row IPIV(i). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly */ /* singular, and division by zero will occur if it is used */ /* to solve a system of equations. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); xerbla_("DGETRF", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } /* Compute machine safe minimum */ sfmin = dlamch_("S"); nstep = min(*m,*n); i__1 = nstep; for (j = 1; j <= i__1; ++j) { kahead = j & -j; kstart = j + 1 - kahead; /* Computing MIN */ i__2 = kahead, i__3 = *m - j; kcols = min(i__2,i__3); /* Find pivot. */ i__2 = *m - j + 1; jp = j - 1 + idamax_(&i__2, &a[j + j * a_dim1], &c__1); ipiv[j] = jp; /* Permute just this column. */ if (jp != j) { tmp = a[j + j * a_dim1]; a[j + j * a_dim1] = a[jp + j * a_dim1]; a[jp + j * a_dim1] = tmp; } /* Apply pending permutations to L */ ntopiv = 1; ipivstart = j; jpivstart = j - ntopiv; while(ntopiv < kahead) { dlaswp_(&ntopiv, &a[jpivstart * a_dim1 + 1], lda, &ipivstart, &j, &ipiv[1], &c__1); ipivstart -= ntopiv; ntopiv <<= 1; jpivstart -= ntopiv; } /* Permute U block to match L */ dlaswp_(&kcols, &a[(j + 1) * a_dim1 + 1], lda, &kstart, &j, &ipiv[1], &c__1); /* Factor the current column */ if (a[j + j * a_dim1] != 0. && ! disnan_(&a[j + j * a_dim1])) { if ((d__1 = a[j + j * a_dim1], abs(d__1)) >= sfmin) { i__2 = *m - j; d__1 = 1. / a[j + j * a_dim1]; dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1); } else { i__2 = *m - j; for (i__ = 1; i__ <= i__2; ++i__) { a[j + i__ + j * a_dim1] /= a[j + j * a_dim1]; } } } else if (a[j + j * a_dim1] == 0. && *info == 0) { *info = j; } /* Solve for U block. */ dtrsm_("Left", "Lower", "No transpose", "Unit", &kahead, &kcols, & c_b12, &a[kstart + kstart * a_dim1], lda, &a[kstart + (j + 1) * a_dim1], lda); /* Schur complement. */ i__2 = *m - j; dgemm_("No transpose", "No transpose", &i__2, &kcols, &kahead, &c_b15, &a[j + 1 + kstart * a_dim1], lda, &a[kstart + (j + 1) * a_dim1], lda, &c_b12, &a[j + 1 + (j + 1) * a_dim1], lda); } /* Handle pivot permutations on the way out of the recursion */ npived = nstep & -nstep; j = nstep - npived; while(j > 0) { ntopiv = j & -j; i__1 = j + 1; dlaswp_(&ntopiv, &a[(j - ntopiv + 1) * a_dim1 + 1], lda, &i__1, & nstep, &ipiv[1], &c__1); j -= ntopiv; } /* If short and wide, handle the rest of the columns. */ if (*m < *n) { i__1 = *n - *m; dlaswp_(&i__1, &a[(*m + kcols + 1) * a_dim1 + 1], lda, &c__1, m, & ipiv[1], &c__1); i__1 = *n - *m; dtrsm_("Left", "Lower", "No transpose", "Unit", m, &i__1, &c_b12, &a[ a_offset], lda, &a[(*m + kcols + 1) * a_dim1 + 1], lda); } return 0; /* End of DGETRF */ } /* dgetrf_ */
/*< >*/ /* Subroutine */ int dlatdf_(integer *ijob, integer *n, doublereal *z__, integer *ldz, doublereal *rhs, doublereal *rdsum, doublereal *rdscal, integer *ipiv, integer *jpiv) { /* System generated locals */ integer z_dim1, z_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, j, k; doublereal bm, bp, xm[8], xp[8]; extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, integer *); integer info; doublereal temp, work[32]; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); extern doublereal dasum_(integer *, doublereal *, integer *); doublereal pmone; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); doublereal sminu; integer iwork[8]; doublereal splus; extern /* Subroutine */ int dgesc2_(integer *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *), dgecon_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, ftnlen), dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *), dlaswp_( integer *, doublereal *, integer *, integer *, integer *, integer *, integer *); /* -- LAPACK auxiliary routine (version 3.0) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ /* Courant Institute, Argonne National Lab, and Rice University */ /* June 30, 1999 */ /* .. Scalar Arguments .. */ /*< INTEGER IJOB, LDZ, N >*/ /*< DOUBLE PRECISION RDSCAL, RDSUM >*/ /* .. */ /* .. Array Arguments .. */ /*< INTEGER IPIV( * ), JPIV( * ) >*/ /*< DOUBLE PRECISION RHS( * ), Z( LDZ, * ) >*/ /* .. */ /* Purpose */ /* ======= */ /* DLATDF uses the LU factorization of the n-by-n matrix Z computed by */ /* DGETC2 and computes a contribution to the reciprocal Dif-estimate */ /* by solving Z * x = b for x, and choosing the r.h.s. b such that */ /* the norm of x is as large as possible. On entry RHS = b holds the */ /* contribution from earlier solved sub-systems, and on return RHS = x. */ /* The factorization of Z returned by DGETC2 has the form Z = P*L*U*Q, */ /* where P and Q are permutation matrices. L is lower triangular with */ /* unit diagonal elements and U is upper triangular. */ /* Arguments */ /* ========= */ /* IJOB (input) INTEGER */ /* IJOB = 2: First compute an approximative null-vector e */ /* of Z using DGECON, e is normalized and solve for */ /* Zx = +-e - f with the sign giving the greater value */ /* of 2-norm(x). About 5 times as expensive as Default. */ /* IJOB .ne. 2: Local look ahead strategy where all entries of */ /* the r.h.s. b is choosen as either +1 or -1 (Default). */ /* N (input) INTEGER */ /* The number of columns of the matrix Z. */ /* Z (input) DOUBLE PRECISION array, dimension (LDZ, N) */ /* On entry, the LU part of the factorization of the n-by-n */ /* matrix Z computed by DGETC2: Z = P * L * U * Q */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDA >= max(1, N). */ /* RHS (input/output) DOUBLE PRECISION array, dimension N. */ /* On entry, RHS contains contributions from other subsystems. */ /* On exit, RHS contains the solution of the subsystem with */ /* entries acoording to the value of IJOB (see above). */ /* RDSUM (input/output) DOUBLE PRECISION */ /* On entry, the sum of squares of computed contributions to */ /* the Dif-estimate under computation by DTGSYL, where the */ /* scaling factor RDSCAL (see below) has been factored out. */ /* On exit, the corresponding sum of squares updated with the */ /* contributions from the current sub-system. */ /* If TRANS = 'T' RDSUM is not touched. */ /* NOTE: RDSUM only makes sense when DTGSY2 is called by STGSYL. */ /* RDSCAL (input/output) DOUBLE PRECISION */ /* On entry, scaling factor used to prevent overflow in RDSUM. */ /* On exit, RDSCAL is updated w.r.t. the current contributions */ /* in RDSUM. */ /* If TRANS = 'T', RDSCAL is not touched. */ /* NOTE: RDSCAL only makes sense when DTGSY2 is called by */ /* DTGSYL. */ /* IPIV (input) INTEGER array, dimension (N). */ /* The pivot indices; for 1 <= i <= N, row i of the */ /* matrix has been interchanged with row IPIV(i). */ /* JPIV (input) INTEGER array, dimension (N). */ /* The pivot indices; for 1 <= j <= N, column j of the */ /* matrix has been interchanged with column JPIV(j). */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ /* Umea University, S-901 87 Umea, Sweden. */ /* This routine is a further developed implementation of algorithm */ /* BSOLVE in [1] using complete pivoting in the LU factorization. */ /* [1] Bo Kagstrom and Lars Westin, */ /* Generalized Schur Methods with Condition Estimators for */ /* Solving the Generalized Sylvester Equation, IEEE Transactions */ /* on Automatic Control, Vol. 34, No. 7, July 1989, pp 745-751. */ /* [2] Peter Poromaa, */ /* On Efficient and Robust Estimators for the Separation */ /* between two Regular Matrix Pairs with Applications in */ /* Condition Estimation. Report IMINF-95.05, Departement of */ /* Computing Science, Umea University, S-901 87 Umea, Sweden, 1995. */ /* ===================================================================== */ /* .. Parameters .. */ /*< INTEGER MAXDIM >*/ /*< PARAMETER ( MAXDIM = 8 ) >*/ /*< DOUBLE PRECISION ZERO, ONE >*/ /*< PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) >*/ /* .. */ /* .. Local Scalars .. */ /*< INTEGER I, INFO, J, K >*/ /*< DOUBLE PRECISION BM, BP, PMONE, SMINU, SPLUS, TEMP >*/ /* .. */ /* .. Local Arrays .. */ /*< INTEGER IWORK( MAXDIM ) >*/ /*< DOUBLE PRECISION WORK( 4*MAXDIM ), XM( MAXDIM ), XP( MAXDIM ) >*/ /* .. */ /* .. External Subroutines .. */ /*< >*/ /* .. */ /* .. External Functions .. */ /*< DOUBLE PRECISION DASUM, DDOT >*/ /*< EXTERNAL DASUM, DDOT >*/ /* .. */ /* .. Intrinsic Functions .. */ /*< INTRINSIC ABS, SQRT >*/ /* .. */ /* .. Executable Statements .. */ /*< IF( IJOB.NE.2 ) THEN >*/ /* Parameter adjustments */ z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --rhs; --ipiv; --jpiv; /* Function Body */ if (*ijob != 2) { /* Apply permutations IPIV to RHS */ /*< CALL DLASWP( 1, RHS, LDZ, 1, N-1, IPIV, 1 ) >*/ i__1 = *n - 1; dlaswp_(&c__1, &rhs[1], ldz, &c__1, &i__1, &ipiv[1], &c__1); /* Solve for L-part choosing RHS either to +1 or -1. */ /*< PMONE = -ONE >*/ pmone = -1.; /*< DO 10 J = 1, N - 1 >*/ i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { /*< BP = RHS( J ) + ONE >*/ bp = rhs[j] + 1.; /*< BM = RHS( J ) - ONE >*/ bm = rhs[j] - 1.; /*< SPLUS = ONE >*/ splus = 1.; /* Look-ahead for L-part RHS(1:N-1) = + or -1, SPLUS and */ /* SMIN computed more efficiently than in BSOLVE [1]. */ /*< SPLUS = SPLUS + DDOT( N-J, Z( J+1, J ), 1, Z( J+1, J ), 1 ) >*/ i__2 = *n - j; splus += ddot_(&i__2, &z__[j + 1 + j * z_dim1], &c__1, &z__[j + 1 + j * z_dim1], &c__1); /*< SMINU = DDOT( N-J, Z( J+1, J ), 1, RHS( J+1 ), 1 ) >*/ i__2 = *n - j; sminu = ddot_(&i__2, &z__[j + 1 + j * z_dim1], &c__1, &rhs[j + 1], &c__1); /*< SPLUS = SPLUS*RHS( J ) >*/ splus *= rhs[j]; /*< IF( SPLUS.GT.SMINU ) THEN >*/ if (splus > sminu) { /*< RHS( J ) = BP >*/ rhs[j] = bp; /*< ELSE IF( SMINU.GT.SPLUS ) THEN >*/ } else if (sminu > splus) { /*< RHS( J ) = BM >*/ rhs[j] = bm; /*< ELSE >*/ } else { /* In this case the updating sums are equal and we can */ /* choose RHS(J) +1 or -1. The first time this happens */ /* we choose -1, thereafter +1. This is a simple way to */ /* get good estimates of matrices like Byers well-known */ /* example (see [1]). (Not done in BSOLVE.) */ /*< RHS( J ) = RHS( J ) + PMONE >*/ rhs[j] += pmone; /*< PMONE = ONE >*/ pmone = 1.; /*< END IF >*/ } /* Compute the remaining r.h.s. */ /*< TEMP = -RHS( J ) >*/ temp = -rhs[j]; /*< CALL DAXPY( N-J, TEMP, Z( J+1, J ), 1, RHS( J+1 ), 1 ) >*/ i__2 = *n - j; daxpy_(&i__2, &temp, &z__[j + 1 + j * z_dim1], &c__1, &rhs[j + 1], &c__1); /*< 10 CONTINUE >*/ /* L10: */ } /* Solve for U-part, look-ahead for RHS(N) = +-1. This is not done */ /* in BSOLVE and will hopefully give us a better estimate because */ /* any ill-conditioning of the original matrix is transfered to U */ /* and not to L. U(N, N) is an approximation to sigma_min(LU). */ /*< CALL DCOPY( N-1, RHS, 1, XP, 1 ) >*/ i__1 = *n - 1; dcopy_(&i__1, &rhs[1], &c__1, xp, &c__1); /*< XP( N ) = RHS( N ) + ONE >*/ xp[*n - 1] = rhs[*n] + 1.; /*< RHS( N ) = RHS( N ) - ONE >*/ rhs[*n] += -1.; /*< SPLUS = ZERO >*/ splus = 0.; /*< SMINU = ZERO >*/ sminu = 0.; /*< DO 30 I = N, 1, -1 >*/ for (i__ = *n; i__ >= 1; --i__) { /*< TEMP = ONE / Z( I, I ) >*/ temp = 1. / z__[i__ + i__ * z_dim1]; /*< XP( I ) = XP( I )*TEMP >*/ xp[i__ - 1] *= temp; /*< RHS( I ) = RHS( I )*TEMP >*/ rhs[i__] *= temp; /*< DO 20 K = I + 1, N >*/ i__1 = *n; for (k = i__ + 1; k <= i__1; ++k) { /*< XP( I ) = XP( I ) - XP( K )*( Z( I, K )*TEMP ) >*/ xp[i__ - 1] -= xp[k - 1] * (z__[i__ + k * z_dim1] * temp); /*< RHS( I ) = RHS( I ) - RHS( K )*( Z( I, K )*TEMP ) >*/ rhs[i__] -= rhs[k] * (z__[i__ + k * z_dim1] * temp); /*< 20 CONTINUE >*/ /* L20: */ } /*< SPLUS = SPLUS + ABS( XP( I ) ) >*/ splus += (d__1 = xp[i__ - 1], abs(d__1)); /*< SMINU = SMINU + ABS( RHS( I ) ) >*/ sminu += (d__1 = rhs[i__], abs(d__1)); /*< 30 CONTINUE >*/ /* L30: */ } /*< >*/ if (splus > sminu) { dcopy_(n, xp, &c__1, &rhs[1], &c__1); } /* Apply the permutations JPIV to the computed solution (RHS) */ /*< CALL DLASWP( 1, RHS, LDZ, 1, N-1, JPIV, -1 ) >*/ i__1 = *n - 1; dlaswp_(&c__1, &rhs[1], ldz, &c__1, &i__1, &jpiv[1], &c_n1); /* Compute the sum of squares */ /*< CALL DLASSQ( N, RHS, 1, RDSCAL, RDSUM ) >*/ dlassq_(n, &rhs[1], &c__1, rdscal, rdsum); /*< ELSE >*/ } else { /* IJOB = 2, Compute approximate nullvector XM of Z */ /*< CALL DGECON( 'I', N, Z, LDZ, ONE, TEMP, WORK, IWORK, INFO ) >*/ dgecon_("I", n, &z__[z_offset], ldz, &c_b23, &temp, work, iwork, & info, (ftnlen)1); /*< CALL DCOPY( N, WORK( N+1 ), 1, XM, 1 ) >*/ dcopy_(n, &work[*n], &c__1, xm, &c__1); /* Compute RHS */ /*< CALL DLASWP( 1, XM, LDZ, 1, N-1, IPIV, -1 ) >*/ i__1 = *n - 1; dlaswp_(&c__1, xm, ldz, &c__1, &i__1, &ipiv[1], &c_n1); /*< TEMP = ONE / SQRT( DDOT( N, XM, 1, XM, 1 ) ) >*/ temp = 1. / sqrt(ddot_(n, xm, &c__1, xm, &c__1)); /*< CALL DSCAL( N, TEMP, XM, 1 ) >*/ dscal_(n, &temp, xm, &c__1); /*< CALL DCOPY( N, XM, 1, XP, 1 ) >*/ dcopy_(n, xm, &c__1, xp, &c__1); /*< CALL DAXPY( N, ONE, RHS, 1, XP, 1 ) >*/ daxpy_(n, &c_b23, &rhs[1], &c__1, xp, &c__1); /*< CALL DAXPY( N, -ONE, XM, 1, RHS, 1 ) >*/ daxpy_(n, &c_b37, xm, &c__1, &rhs[1], &c__1); /*< CALL DGESC2( N, Z, LDZ, RHS, IPIV, JPIV, TEMP ) >*/ dgesc2_(n, &z__[z_offset], ldz, &rhs[1], &ipiv[1], &jpiv[1], &temp); /*< CALL DGESC2( N, Z, LDZ, XP, IPIV, JPIV, TEMP ) >*/ dgesc2_(n, &z__[z_offset], ldz, xp, &ipiv[1], &jpiv[1], &temp); /*< >*/ if (dasum_(n, xp, &c__1) > dasum_(n, &rhs[1], &c__1)) { dcopy_(n, xp, &c__1, &rhs[1], &c__1); } /* Compute the sum of squares */ /*< CALL DLASSQ( N, RHS, 1, RDSCAL, RDSUM ) >*/ dlassq_(n, &rhs[1], &c__1, rdscal, rdsum); /*< END IF >*/ } /*< RETURN >*/ return 0; /* End of DLATDF */ /*< END >*/ } /* dlatdf_ */