/* Subroutine */ int dgbtf2_(integer *m, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, integer *ipiv, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= DGBTF2 computes an LU factorization of a real m-by-n band matrix A using partial pivoting with row interchanges. This is the unblocked version of the algorithm, calling Level 2 BLAS. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. KL (input) INTEGER The number of subdiagonals within the band of A. KL >= 0. KU (input) INTEGER The number of superdiagonals within the band of A. KU >= 0. AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) On entry, the matrix A in band storage, in rows KL+1 to 2*KL+KU+1; rows 1 to KL of the array need not be set. The j-th column of A is stored in the j-th column of the array AB as follows: AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) On exit, details of the factorization: U is stored as an upper triangular band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and the multipliers used during the factorization are stored in rows KL+KU+2 to 2*KL+KU+1. See below for further details. LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= 2*KL+KU+1. IPIV (output) INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = +i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations. Further Details =============== The band storage scheme is illustrated by the following example, when M = N = 6, KL = 2, KU = 1: On entry: On exit: * * * + + + * * * u14 u25 u36 * * + + + + * * u13 u24 u35 u46 * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * a31 a42 a53 a64 * * m31 m42 m53 m64 * * Array elements marked * are not used by the routine; elements marked + need not be set on entry, but are required by the routine to store elements of U, because of fill-in resulting from the row interchanges. ===================================================================== KV is the number of superdiagonals in the factor U, allowing for fill-in. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static doublereal c_b9 = -1.; /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; doublereal d__1; /* Local variables */ extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); static integer i__, j; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *), dswap_(integer *, doublereal *, integer *, doublereal *, integer *); static integer km, jp, ju, kv; extern integer idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int xerbla_(char *, integer *); #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; --ipiv; /* Function Body */ kv = *ku + *kl; /* Test the input parameters. */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kl < 0) { *info = -3; } else if (*ku < 0) { *info = -4; } else if (*ldab < *kl + kv + 1) { *info = -6; } if (*info != 0) { i__1 = -(*info); xerbla_("DGBTF2", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } /* Gaussian elimination with partial pivoting Set fill-in elements in columns KU+2 to KV to zero. */ i__1 = min(kv,*n); for (j = *ku + 2; j <= i__1; ++j) { i__2 = *kl; for (i__ = kv - j + 2; i__ <= i__2; ++i__) { ab_ref(i__, j) = 0.; /* L10: */ } /* L20: */ } /* JU is the index of the last column affected by the current stage of the factorization. */ ju = 1; i__1 = min(*m,*n); for (j = 1; j <= i__1; ++j) { /* Set fill-in elements in column J+KV to zero. */ if (j + kv <= *n) { i__2 = *kl; for (i__ = 1; i__ <= i__2; ++i__) { ab_ref(i__, j + kv) = 0.; /* L30: */ } } /* Find pivot and test for singularity. KM is the number of subdiagonal elements in the current column. Computing MIN */ i__2 = *kl, i__3 = *m - j; km = min(i__2,i__3); i__2 = km + 1; jp = idamax_(&i__2, &ab_ref(kv + 1, j), &c__1); ipiv[j] = jp + j - 1; if (ab_ref(kv + jp, j) != 0.) { /* Computing MAX Computing MIN */ i__4 = j + *ku + jp - 1; i__2 = ju, i__3 = min(i__4,*n); ju = max(i__2,i__3); /* Apply interchange to columns J to JU. */ if (jp != 1) { i__2 = ju - j + 1; i__3 = *ldab - 1; i__4 = *ldab - 1; dswap_(&i__2, &ab_ref(kv + jp, j), &i__3, &ab_ref(kv + 1, j), &i__4); } if (km > 0) { /* Compute multipliers. */ d__1 = 1. / ab_ref(kv + 1, j); dscal_(&km, &d__1, &ab_ref(kv + 2, j), &c__1); /* Update trailing submatrix within the band. */ if (ju > j) { i__2 = ju - j; i__3 = *ldab - 1; i__4 = *ldab - 1; dger_(&km, &i__2, &c_b9, &ab_ref(kv + 2, j), &c__1, & ab_ref(kv, j + 1), &i__3, &ab_ref(kv + 1, j + 1), &i__4); } } } else { /* If pivot is zero, set INFO to the index of the pivot unless a zero pivot has already been found. */ if (*info == 0) { *info = j; } } /* L40: */ } return 0; /* End of DGBTF2 */ } /* dgbtf2_ */
/* Subroutine */ int dlaqgb_(integer *m, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, doublereal *r__, doublereal *c__, doublereal *rowcnd, doublereal *colcnd, doublereal *amax, char *equed) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= DLAQGB equilibrates a general M by N band matrix A with KL subdiagonals and KU superdiagonals using the row and scaling factors in the vectors R and C. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. KL (input) INTEGER The number of subdiagonals within the band of A. KL >= 0. KU (input) INTEGER The number of superdiagonals within the band of A. KU >= 0. AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) On entry, the matrix A in band storage, in rows 1 to KL+KU+1. The j-th column of A is stored in the j-th column of the array AB as follows: AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) On exit, the equilibrated matrix, in the same storage format as A. See EQUED for the form of the equilibrated matrix. LDAB (input) INTEGER The leading dimension of the array AB. LDA >= KL+KU+1. R (output) DOUBLE PRECISION array, dimension (M) The row scale factors for A. C (output) DOUBLE PRECISION array, dimension (N) The column scale factors for A. ROWCND (output) DOUBLE PRECISION Ratio of the smallest R(i) to the largest R(i). COLCND (output) DOUBLE PRECISION Ratio of the smallest C(i) to the largest C(i). AMAX (input) DOUBLE PRECISION Absolute value of largest matrix entry. EQUED (output) CHARACTER*1 Specifies the form of equilibration that was done. = 'N': No equilibration = 'R': Row equilibration, i.e., A has been premultiplied by diag(R). = 'C': Column equilibration, i.e., A has been postmultiplied by diag(C). = 'B': Both row and column equilibration, i.e., A has been replaced by diag(R) * A * diag(C). Internal Parameters =================== THRESH is a threshold value used to decide if row or column scaling should be done based on the ratio of the row or column scaling factors. If ROWCND < THRESH, row scaling is done, and if COLCND < THRESH, column scaling is done. LARGE and SMALL are threshold values used to decide if row scaling should be done based on the absolute size of the largest matrix element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. ===================================================================== Quick return if possible Parameter adjustments */ /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6; /* Local variables */ static integer i__, j; static doublereal large, small, cj; extern doublereal dlamch_(char *); #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; --r__; --c__; /* Function Body */ if (*m <= 0 || *n <= 0) { *(unsigned char *)equed = 'N'; return 0; } /* Initialize LARGE and SMALL. */ small = dlamch_("Safe minimum") / dlamch_("Precision"); large = 1. / small; if (*rowcnd >= .1 && *amax >= small && *amax <= large) { /* No row scaling */ if (*colcnd >= .1) { /* No column scaling */ *(unsigned char *)equed = 'N'; } else { /* Column scaling */ i__1 = *n; for (j = 1; j <= i__1; ++j) { cj = c__[j]; /* Computing MAX */ i__2 = 1, i__3 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__4 = min(i__5,i__6); for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { ab_ref(*ku + 1 + i__ - j, j) = cj * ab_ref(*ku + 1 + i__ - j, j); /* L10: */ } /* L20: */ } *(unsigned char *)equed = 'C'; } } else if (*colcnd >= .1) { /* Row scaling, no column scaling */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__4 = 1, i__2 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__3 = min(i__5,i__6); for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { ab_ref(*ku + 1 + i__ - j, j) = r__[i__] * ab_ref(*ku + 1 + i__ - j, j); /* L30: */ } /* L40: */ } *(unsigned char *)equed = 'R'; } else { /* Row and column scaling */ i__1 = *n; for (j = 1; j <= i__1; ++j) { cj = c__[j]; /* Computing MAX */ i__3 = 1, i__4 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__2 = min(i__5,i__6); for (i__ = max(i__3,i__4); i__ <= i__2; ++i__) { ab_ref(*ku + 1 + i__ - j, j) = cj * r__[i__] * ab_ref(*ku + 1 + i__ - j, j); /* L50: */ } /* L60: */ } *(unsigned char *)equed = 'B'; } return 0; /* End of DLAQGB */ } /* dlaqgb_ */
/* Subroutine */ int zpbtrf_(char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZPBTRF computes the Cholesky factorization of a complex Hermitian positive definite band matrix A. The factorization has the form A = U**H * U, if UPLO = 'U', or A = L * L**H, if UPLO = 'L', where U is an upper triangular matrix and L is lower triangular. Arguments ========= UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. KD (input) INTEGER The number of superdiagonals of the matrix A if UPLO = 'U', or the number of subdiagonals if UPLO = 'L'. KD >= 0. AB (input/output) COMPLEX*16 array, dimension (LDAB,N) On entry, the upper or lower triangle of the Hermitian band matrix A, stored in the first KD+1 rows of the array. The j-th column of A is stored in the j-th column of the array AB as follows: if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). On exit, if INFO = 0, the triangular factor U or L from the Cholesky factorization A = U**H*U or A = L*L**H of the band matrix A, in the same storage format as A. LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= KD+1. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the leading minor of order i is not positive definite, and the factorization could not be completed. Further Details =============== The band storage scheme is illustrated by the following example, when N = 6, KD = 2, and UPLO = 'U': On entry: On exit: * * a13 a24 a35 a46 * * u13 u24 u35 u46 * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 Similarly, if UPLO = 'L' the format of A is as follows: On entry: On exit: a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * a31 a42 a53 a64 * * l31 l42 l53 l64 * * Array elements marked * are not used by the routine. Contributed by Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989 ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static doublecomplex c_b1 = {1.,0.}; static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b21 = -1.; static doublereal c_b22 = 1.; static integer c__33 = 33; /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6; doublecomplex z__1; /* Local variables */ static doublecomplex work[1056] /* was [33][32] */; static integer i__, j; extern logical lsame_(char *, char *); extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *), zherk_(char *, char *, integer *, integer *, doublereal *, doublecomplex *, integer *, doublereal *, doublecomplex *, integer *); static integer i2, i3; extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *), zpbtf2_(char *, integer *, integer *, doublecomplex *, integer *, integer *); static integer ib, nb, ii, jj; extern /* Subroutine */ int zpotf2_(char *, integer *, doublecomplex *, integer *, integer *), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); #define work_subscr(a_1,a_2) (a_2)*33 + a_1 - 34 #define work_ref(a_1,a_2) work[work_subscr(a_1,a_2)] #define ab_subscr(a_1,a_2) (a_2)*ab_dim1 + a_1 #define ab_ref(a_1,a_2) ab[ab_subscr(a_1,a_2)] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; /* Function Body */ *info = 0; if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kd < 0) { *info = -3; } else if (*ldab < *kd + 1) { *info = -5; } if (*info != 0) { i__1 = -(*info); xerbla_("ZPBTRF", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine the block size for this environment */ nb = ilaenv_(&c__1, "ZPBTRF", uplo, n, kd, &c_n1, &c_n1, (ftnlen)6, ( ftnlen)1); /* The block size must not exceed the semi-bandwidth KD, and must not exceed the limit set by the size of the local array WORK. */ nb = min(nb,32); if (nb <= 1 || nb > *kd) { /* Use unblocked code */ zpbtf2_(uplo, n, kd, &ab[ab_offset], ldab, info); } else { /* Use blocked code */ if (lsame_(uplo, "U")) { /* Compute the Cholesky factorization of a Hermitian band matrix, given the upper triangle of the matrix in band storage. Zero the upper triangle of the work array. */ i__1 = nb; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = work_subscr(i__, j); work[i__3].r = 0., work[i__3].i = 0.; /* L10: */ } /* L20: */ } /* Process the band matrix one diagonal block at a time. */ i__1 = *n; i__2 = nb; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = nb, i__4 = *n - i__ + 1; ib = min(i__3,i__4); /* Factorize the diagonal block */ i__3 = *ldab - 1; zpotf2_(uplo, &ib, &ab_ref(*kd + 1, i__), &i__3, &ii); if (ii != 0) { *info = i__ + ii - 1; goto L150; } if (i__ + ib <= *n) { /* Update the relevant part of the trailing submatrix. If A11 denotes the diagonal block which has just been factorized, then we need to update the remaining blocks in the diagram: A11 A12 A13 A22 A23 A33 The numbers of rows and columns in the partitioning are IB, I2, I3 respectively. The blocks A12, A22 and A23 are empty if IB = KD. The upper triangle of A13 lies outside the band. Computing MIN */ i__3 = *kd - ib, i__4 = *n - i__ - ib + 1; i2 = min(i__3,i__4); /* Computing MIN */ i__3 = ib, i__4 = *n - i__ - *kd + 1; i3 = min(i__3,i__4); if (i2 > 0) { /* Update A12 */ i__3 = *ldab - 1; i__4 = *ldab - 1; ztrsm_("Left", "Upper", "Conjugate transpose", "Non-" "unit", &ib, &i2, &c_b1, &ab_ref(*kd + 1, i__), &i__3, &ab_ref(*kd + 1 - ib, i__ + ib), & i__4); /* Update A22 */ i__3 = *ldab - 1; i__4 = *ldab - 1; zherk_("Upper", "Conjugate transpose", &i2, &ib, & c_b21, &ab_ref(*kd + 1 - ib, i__ + ib), &i__3, &c_b22, &ab_ref(*kd + 1, i__ + ib), &i__4); } if (i3 > 0) { /* Copy the lower triangle of A13 into the work array. */ i__3 = i3; for (jj = 1; jj <= i__3; ++jj) { i__4 = ib; for (ii = jj; ii <= i__4; ++ii) { i__5 = work_subscr(ii, jj); i__6 = ab_subscr(ii - jj + 1, jj + i__ + *kd - 1); work[i__5].r = ab[i__6].r, work[i__5].i = ab[ i__6].i; /* L30: */ } /* L40: */ } /* Update A13 (in the work array). */ i__3 = *ldab - 1; ztrsm_("Left", "Upper", "Conjugate transpose", "Non-" "unit", &ib, &i3, &c_b1, &ab_ref(*kd + 1, i__), &i__3, work, &c__33); /* Update A23 */ if (i2 > 0) { z__1.r = -1., z__1.i = 0.; i__3 = *ldab - 1; i__4 = *ldab - 1; zgemm_("Conjugate transpose", "No transpose", &i2, &i3, &ib, &z__1, &ab_ref(*kd + 1 - ib, i__ + ib), &i__3, work, &c__33, &c_b1, & ab_ref(ib + 1, i__ + *kd), &i__4); } /* Update A33 */ i__3 = *ldab - 1; zherk_("Upper", "Conjugate transpose", &i3, &ib, & c_b21, work, &c__33, &c_b22, &ab_ref(*kd + 1, i__ + *kd), &i__3); /* Copy the lower triangle of A13 back into place. */ i__3 = i3; for (jj = 1; jj <= i__3; ++jj) { i__4 = ib; for (ii = jj; ii <= i__4; ++ii) { i__5 = ab_subscr(ii - jj + 1, jj + i__ + *kd - 1); i__6 = work_subscr(ii, jj); ab[i__5].r = work[i__6].r, ab[i__5].i = work[ i__6].i; /* L50: */ } /* L60: */ } } } /* L70: */ } } else { /* Compute the Cholesky factorization of a Hermitian band matrix, given the lower triangle of the matrix in band storage. Zero the lower triangle of the work array. */ i__2 = nb; for (j = 1; j <= i__2; ++j) { i__1 = nb; for (i__ = j + 1; i__ <= i__1; ++i__) { i__3 = work_subscr(i__, j); work[i__3].r = 0., work[i__3].i = 0.; /* L80: */ } /* L90: */ } /* Process the band matrix one diagonal block at a time. */ i__2 = *n; i__1 = nb; for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { /* Computing MIN */ i__3 = nb, i__4 = *n - i__ + 1; ib = min(i__3,i__4); /* Factorize the diagonal block */ i__3 = *ldab - 1; zpotf2_(uplo, &ib, &ab_ref(1, i__), &i__3, &ii); if (ii != 0) { *info = i__ + ii - 1; goto L150; } if (i__ + ib <= *n) { /* Update the relevant part of the trailing submatrix. If A11 denotes the diagonal block which has just been factorized, then we need to update the remaining blocks in the diagram: A11 A21 A22 A31 A32 A33 The numbers of rows and columns in the partitioning are IB, I2, I3 respectively. The blocks A21, A22 and A32 are empty if IB = KD. The lower triangle of A31 lies outside the band. Computing MIN */ i__3 = *kd - ib, i__4 = *n - i__ - ib + 1; i2 = min(i__3,i__4); /* Computing MIN */ i__3 = ib, i__4 = *n - i__ - *kd + 1; i3 = min(i__3,i__4); if (i2 > 0) { /* Update A21 */ i__3 = *ldab - 1; i__4 = *ldab - 1; ztrsm_("Right", "Lower", "Conjugate transpose", "Non" "-unit", &i2, &ib, &c_b1, &ab_ref(1, i__), & i__3, &ab_ref(ib + 1, i__), &i__4); /* Update A22 */ i__3 = *ldab - 1; i__4 = *ldab - 1; zherk_("Lower", "No transpose", &i2, &ib, &c_b21, & ab_ref(ib + 1, i__), &i__3, &c_b22, &ab_ref(1, i__ + ib), &i__4); } if (i3 > 0) { /* Copy the upper triangle of A31 into the work array. */ i__3 = ib; for (jj = 1; jj <= i__3; ++jj) { i__4 = min(jj,i3); for (ii = 1; ii <= i__4; ++ii) { i__5 = work_subscr(ii, jj); i__6 = ab_subscr(*kd + 1 - jj + ii, jj + i__ - 1); work[i__5].r = ab[i__6].r, work[i__5].i = ab[ i__6].i; /* L100: */ } /* L110: */ } /* Update A31 (in the work array). */ i__3 = *ldab - 1; ztrsm_("Right", "Lower", "Conjugate transpose", "Non" "-unit", &i3, &ib, &c_b1, &ab_ref(1, i__), & i__3, work, &c__33); /* Update A32 */ if (i2 > 0) { z__1.r = -1., z__1.i = 0.; i__3 = *ldab - 1; i__4 = *ldab - 1; zgemm_("No transpose", "Conjugate transpose", &i3, &i2, &ib, &z__1, work, &c__33, &ab_ref( ib + 1, i__), &i__3, &c_b1, &ab_ref(*kd + 1 - ib, i__ + ib), &i__4); } /* Update A33 */ i__3 = *ldab - 1; zherk_("Lower", "No transpose", &i3, &ib, &c_b21, work, &c__33, &c_b22, &ab_ref(1, i__ + *kd), & i__3); /* Copy the upper triangle of A31 back into place. */ i__3 = ib; for (jj = 1; jj <= i__3; ++jj) { i__4 = min(jj,i3); for (ii = 1; ii <= i__4; ++ii) { i__5 = ab_subscr(*kd + 1 - jj + ii, jj + i__ - 1); i__6 = work_subscr(ii, jj); ab[i__5].r = work[i__6].r, ab[i__5].i = work[ i__6].i; /* L120: */ } /* L130: */ } } } /* L140: */ } } } return 0; L150: return 0; /* End of ZPBTRF */ } /* zpbtrf_ */
/* Subroutine */ int cgbt05_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, complex *ab, integer *ldab, complex *b, integer * ldb, complex *x, integer *ldx, complex *xact, integer *ldxact, real * ferr, real *berr, real *reslts) { /* System generated locals */ integer ab_dim1, ab_offset, b_dim1, b_offset, x_dim1, x_offset, xact_dim1, xact_offset, i__1, i__2, i__3, i__4, i__5; real r__1, r__2, r__3, r__4; complex q__1, q__2; /* Builtin functions */ double r_imag(complex *); /* Local variables */ static real diff, axbi; static integer imax; static real unfl, ovfl; static integer i__, j, k; extern logical lsame_(char *, char *); static real xnorm; extern integer icamax_(integer *, complex *, integer *); extern doublereal slamch_(char *); static integer nz; static real errbnd; static logical notran; static real eps, tmp; #define xact_subscr(a_1,a_2) (a_2)*xact_dim1 + a_1 #define xact_ref(a_1,a_2) xact[xact_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define x_subscr(a_1,a_2) (a_2)*x_dim1 + a_1 #define x_ref(a_1,a_2) x[x_subscr(a_1,a_2)] #define ab_subscr(a_1,a_2) (a_2)*ab_dim1 + a_1 #define ab_ref(a_1,a_2) ab[ab_subscr(a_1,a_2)] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= CGBT05 tests the error bounds from iterative refinement for the computed solution to a system of equations op(A)*X = B, where A is a general band matrix of order n with kl subdiagonals and ku superdiagonals and op(A) = A or A**T, depending on TRANS. RESLTS(1) = test of the error bound = norm(X - XACT) / ( norm(X) * FERR ) A large value is returned if this ratio is not less than one. RESLTS(2) = residual from the iterative refinement routine = the maximum of BERR / ( NZ*EPS + (*) ), where (*) = NZ*UNFL / (min_i (abs(op(A))*abs(X) +abs(b))_i ) and NZ = max. number of nonzeros in any row of A, plus 1 Arguments ========= TRANS (input) CHARACTER*1 Specifies the form of the system of equations. = 'N': A * X = B (No transpose) = 'T': A**T * X = B (Transpose) = 'C': A**H * X = B (Conjugate transpose = Transpose) N (input) INTEGER The number of rows of the matrices X, B, and XACT, and the order of the matrix A. N >= 0. KL (input) INTEGER The number of subdiagonals within the band of A. KL >= 0. KU (input) INTEGER The number of superdiagonals within the band of A. KU >= 0. NRHS (input) INTEGER The number of columns of the matrices X, B, and XACT. NRHS >= 0. AB (input) COMPLEX array, dimension (LDAB,N) The original band matrix A, stored in rows 1 to KL+KU+1. The j-th column of A is stored in the j-th column of the array AB as follows: AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= KL+KU+1. B (input) COMPLEX array, dimension (LDB,NRHS) The right hand side vectors for the system of linear equations. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (input) COMPLEX array, dimension (LDX,NRHS) The computed solution vectors. Each vector is stored as a column of the matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). XACT (input) COMPLEX array, dimension (LDX,NRHS) The exact solution vectors. Each vector is stored as a column of the matrix XACT. LDXACT (input) INTEGER The leading dimension of the array XACT. LDXACT >= max(1,N). FERR (input) REAL array, dimension (NRHS) The estimated forward error bounds for each solution vector X. If XTRUE is the true solution, FERR bounds the magnitude of the largest entry in (X - XTRUE) divided by the magnitude of the largest entry in X. BERR (input) REAL array, dimension (NRHS) The componentwise relative backward error of each solution vector (i.e., the smallest relative change in any entry of A or B that makes X an exact solution). RESLTS (output) REAL array, dimension (2) The maximum over the NRHS solution vectors of the ratios: RESLTS(1) = norm(X - XACT) / ( norm(X) * FERR ) RESLTS(2) = BERR / ( NZ*EPS + (*) ) ===================================================================== Quick exit if N = 0 or NRHS = 0. Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; xact_dim1 = *ldxact; xact_offset = 1 + xact_dim1 * 1; xact -= xact_offset; --ferr; --berr; --reslts; /* Function Body */ if (*n <= 0 || *nrhs <= 0) { reslts[1] = 0.f; reslts[2] = 0.f; return 0; } eps = slamch_("Epsilon"); unfl = slamch_("Safe minimum"); ovfl = 1.f / unfl; notran = lsame_(trans, "N"); /* Computing MIN */ i__1 = *kl + *ku + 2, i__2 = *n + 1; nz = min(i__1,i__2); /* Test 1: Compute the maximum of norm(X - XACT) / ( norm(X) * FERR ) over all the vectors X and XACT using the infinity-norm. */ errbnd = 0.f; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { imax = icamax_(n, &x_ref(1, j), &c__1); /* Computing MAX */ i__2 = x_subscr(imax, j); r__3 = (r__1 = x[i__2].r, dabs(r__1)) + (r__2 = r_imag(&x_ref(imax, j) ), dabs(r__2)); xnorm = dmax(r__3,unfl); diff = 0.f; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = x_subscr(i__, j); i__4 = xact_subscr(i__, j); q__2.r = x[i__3].r - xact[i__4].r, q__2.i = x[i__3].i - xact[i__4] .i; q__1.r = q__2.r, q__1.i = q__2.i; /* Computing MAX */ r__3 = diff, r__4 = (r__1 = q__1.r, dabs(r__1)) + (r__2 = r_imag(& q__1), dabs(r__2)); diff = dmax(r__3,r__4); /* L10: */ } if (xnorm > 1.f) { goto L20; } else if (diff <= ovfl * xnorm) { goto L20; } else { errbnd = 1.f / eps; goto L30; } L20: if (diff / xnorm <= ferr[j]) { /* Computing MAX */ r__1 = errbnd, r__2 = diff / xnorm / ferr[j]; errbnd = dmax(r__1,r__2); } else { errbnd = 1.f / eps; } L30: ; } reslts[1] = errbnd; /* Test 2: Compute the maximum of BERR / ( NZ*EPS + (*) ), where (*) = NZ*UNFL / (min_i (abs(op(A))*abs(X) +abs(b))_i ) */ i__1 = *nrhs; for (k = 1; k <= i__1; ++k) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = b_subscr(i__, k); tmp = (r__1 = b[i__3].r, dabs(r__1)) + (r__2 = r_imag(&b_ref(i__, k)), dabs(r__2)); if (notran) { /* Computing MAX */ i__3 = i__ - *kl; /* Computing MIN */ i__5 = i__ + *ku; i__4 = min(i__5,*n); for (j = max(i__3,1); j <= i__4; ++j) { i__3 = ab_subscr(*ku + 1 + i__ - j, j); i__5 = x_subscr(j, k); tmp += ((r__1 = ab[i__3].r, dabs(r__1)) + (r__2 = r_imag(& ab_ref(*ku + 1 + i__ - j, j)), dabs(r__2))) * (( r__3 = x[i__5].r, dabs(r__3)) + (r__4 = r_imag(& x_ref(j, k)), dabs(r__4))); /* L40: */ } } else { /* Computing MAX */ i__4 = i__ - *ku; /* Computing MIN */ i__5 = i__ + *kl; i__3 = min(i__5,*n); for (j = max(i__4,1); j <= i__3; ++j) { i__4 = ab_subscr(*ku + 1 + j - i__, i__); i__5 = x_subscr(j, k); tmp += ((r__1 = ab[i__4].r, dabs(r__1)) + (r__2 = r_imag(& ab_ref(*ku + 1 + j - i__, i__)), dabs(r__2))) * (( r__3 = x[i__5].r, dabs(r__3)) + (r__4 = r_imag(& x_ref(j, k)), dabs(r__4))); /* L50: */ } } if (i__ == 1) { axbi = tmp; } else { axbi = dmin(axbi,tmp); } /* L60: */ } /* Computing MAX */ r__1 = axbi, r__2 = nz * unfl; tmp = berr[k] / (nz * eps + nz * unfl / dmax(r__1,r__2)); if (k == 1) { reslts[2] = tmp; } else { reslts[2] = dmax(reslts[2],tmp); } /* L70: */ } return 0; /* End of CGBT05 */ } /* cgbt05_ */
/* Subroutine */ int dgbsvx_(char *fact, char *trans, integer *n, integer *kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, char *equed, doublereal *r__, doublereal *c__, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= DGBSVX uses the LU factorization to compute the solution to a real system of linear equations A * X = B, A**T * X = B, or A**H * X = B, where A is a band matrix of order N with KL subdiagonals and KU superdiagonals, and X and B are N-by-NRHS matrices. Error bounds on the solution and a condition estimate are also provided. Description =========== The following steps are performed by this subroutine: 1. If FACT = 'E', real scaling factors are computed to equilibrate the system: TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B Whether or not the system will be equilibrated depends on the scaling of the matrix A, but if equilibration is used, A is overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') or diag(C)*B (if TRANS = 'T' or 'C'). 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the matrix A (after equilibration if FACT = 'E') as A = L * U, where L is a product of permutation and unit lower triangular matrices with KL subdiagonals, and U is upper triangular with KL+KU superdiagonals. 3. If some U(i,i)=0, so that U is exactly singular, then the routine returns with INFO = i. Otherwise, the factored form of A is used to estimate the condition number of the matrix A. If the reciprocal of the condition number is less than machine precision, INFO = N+1 is returned as a warning, but the routine still goes on to solve for X and compute error bounds as described below. 4. The system of equations is solved for X using the factored form of A. 5. Iterative refinement is applied to improve the computed solution matrix and calculate error bounds and backward error estimates for it. 6. If equilibration was used, the matrix X is premultiplied by diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so that it solves the original system before equilibration. Arguments ========= FACT (input) CHARACTER*1 Specifies whether or not the factored form of the matrix A is supplied on entry, and if not, whether the matrix A should be equilibrated before it is factored. = 'F': On entry, AFB and IPIV contain the factored form of A. If EQUED is not 'N', the matrix A has been equilibrated with scaling factors given by R and C. AB, AFB, and IPIV are not modified. = 'N': The matrix A will be copied to AFB and factored. = 'E': The matrix A will be equilibrated if necessary, then copied to AFB and factored. TRANS (input) CHARACTER*1 Specifies the form of the system of equations. = 'N': A * X = B (No transpose) = 'T': A**T * X = B (Transpose) = 'C': A**H * X = B (Transpose) N (input) INTEGER The number of linear equations, i.e., the order of the matrix A. N >= 0. KL (input) INTEGER The number of subdiagonals within the band of A. KL >= 0. KU (input) INTEGER The number of superdiagonals within the band of A. KU >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0. AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) On entry, the matrix A in band storage, in rows 1 to KL+KU+1. The j-th column of A is stored in the j-th column of the array AB as follows: AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) If FACT = 'F' and EQUED is not 'N', then A must have been equilibrated by the scaling factors in R and/or C. AB is not modified if FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. On exit, if EQUED .ne. 'N', A is scaled as follows: EQUED = 'R': A := diag(R) * A EQUED = 'C': A := A * diag(C) EQUED = 'B': A := diag(R) * A * diag(C). LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= KL+KU+1. AFB (input or output) DOUBLE PRECISION array, dimension (LDAFB,N) If FACT = 'F', then AFB is an input argument and on entry contains details of the LU factorization of the band matrix A, as computed by DGBTRF. U is stored as an upper triangular band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and the multipliers used during the factorization are stored in rows KL+KU+2 to 2*KL+KU+1. If EQUED .ne. 'N', then AFB is the factored form of the equilibrated matrix A. If FACT = 'N', then AFB is an output argument and on exit returns details of the LU factorization of A. If FACT = 'E', then AFB is an output argument and on exit returns details of the LU factorization of the equilibrated matrix A (see the description of AB for the form of the equilibrated matrix). LDAFB (input) INTEGER The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. IPIV (input or output) INTEGER array, dimension (N) If FACT = 'F', then IPIV is an input argument and on entry contains the pivot indices from the factorization A = L*U as computed by DGBTRF; row i of the matrix was interchanged with row IPIV(i). If FACT = 'N', then IPIV is an output argument and on exit contains the pivot indices from the factorization A = L*U of the original matrix A. If FACT = 'E', then IPIV is an output argument and on exit contains the pivot indices from the factorization A = L*U of the equilibrated matrix A. EQUED (input or output) CHARACTER*1 Specifies the form of equilibration that was done. = 'N': No equilibration (always true if FACT = 'N'). = 'R': Row equilibration, i.e., A has been premultiplied by diag(R). = 'C': Column equilibration, i.e., A has been postmultiplied by diag(C). = 'B': Both row and column equilibration, i.e., A has been replaced by diag(R) * A * diag(C). EQUED is an input argument if FACT = 'F'; otherwise, it is an output argument. R (input or output) DOUBLE PRECISION array, dimension (N) The row scale factors for A. If EQUED = 'R' or 'B', A is multiplied on the left by diag(R); if EQUED = 'N' or 'C', R is not accessed. R is an input argument if FACT = 'F'; otherwise, R is an output argument. If FACT = 'F' and EQUED = 'R' or 'B', each element of R must be positive. C (input or output) DOUBLE PRECISION array, dimension (N) The column scale factors for A. If EQUED = 'C' or 'B', A is multiplied on the right by diag(C); if EQUED = 'N' or 'R', C is not accessed. C is an input argument if FACT = 'F'; otherwise, C is an output argument. If FACT = 'F' and EQUED = 'C' or 'B', each element of C must be positive. B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) On entry, the right hand side matrix B. On exit, if EQUED = 'N', B is not modified; if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by diag(R)*B; if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is overwritten by diag(C)*B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (output) DOUBLE PRECISION array, dimension (LDX,NRHS) If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to the original system of equations. Note that A and B are modified on exit if EQUED .ne. 'N', and the solution to the equilibrated system is inv(diag(C))*X if TRANS = 'N' and EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' and EQUED = 'R' or 'B'. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). RCOND (output) DOUBLE PRECISION The estimate of the reciprocal condition number of the matrix A after equilibration (if done). If RCOND is less than the machine precision (in particular, if RCOND = 0), the matrix is singular to working precision. This condition is indicated by a return code of INFO > 0. FERR (output) DOUBLE PRECISION array, dimension (NRHS) The estimated forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), FERR(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). The estimate is as reliable as the estimate for RCOND, and is almost always a slight overestimate of the true error. BERR (output) DOUBLE PRECISION array, dimension (NRHS) The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). WORK (workspace/output) DOUBLE PRECISION array, dimension (3*N) On exit, WORK(1) contains the reciprocal pivot growth factor norm(A)/norm(U). The "max absolute element" norm is used. If WORK(1) is much less than 1, then the stability of the LU factorization of the (equilibrated) matrix A could be poor. This also means that the solution X, condition estimator RCOND, and forward error bound FERR could be unreliable. If factorization fails with 0<INFO<=N, then WORK(1) contains the reciprocal pivot growth factor for the leading INFO columns of A. IWORK (workspace) INTEGER array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, and i is <= N: U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, so the solution and error bounds could not be computed. RCOND = 0 is returned. = N+1: U is nonsingular, but RCOND is less than machine precision, meaning that the matrix is singular to working precision. Nevertheless, the solution and error bounds are computed because there are a number of situations where the computed solution can be more accurate than the value of RCOND would suggest. ===================================================================== Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2, d__3; /* Local variables */ static doublereal amax; static char norm[1]; static integer i__, j; extern logical lsame_(char *, char *); static doublereal rcmin, rcmax, anorm; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); static logical equil; static integer j1, j2; extern doublereal dlamch_(char *), dlangb_(char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int dlaqgb_(integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, char *), dgbcon_(char *, integer *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *); static doublereal colcnd; extern doublereal dlantb_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int dgbequ_(integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *), dgbrfs_( char *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *), dgbtrf_(integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *); static logical nofact; extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *); static doublereal bignum; extern /* Subroutine */ int dgbtrs_(char *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); static integer infequ; static logical colequ; static doublereal rowcnd; static logical notran; static doublereal smlnum; static logical rowequ; static doublereal rpvgrw; #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1] #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] #define afb_ref(a_1,a_2) afb[(a_2)*afb_dim1 + a_1] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; afb_dim1 = *ldafb; afb_offset = 1 + afb_dim1 * 1; afb -= afb_offset; --ipiv; --r__; --c__; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; nofact = lsame_(fact, "N"); equil = lsame_(fact, "E"); notran = lsame_(trans, "N"); if (nofact || equil) { *(unsigned char *)equed = 'N'; rowequ = FALSE_; colequ = FALSE_; } else { rowequ = lsame_(equed, "R") || lsame_(equed, "B"); colequ = lsame_(equed, "C") || lsame_(equed, "B"); smlnum = dlamch_("Safe minimum"); bignum = 1. / smlnum; } /* Test the input parameters. */ if (! nofact && ! equil && ! lsame_(fact, "F")) { *info = -1; } else if (! notran && ! lsame_(trans, "T") && ! lsame_(trans, "C")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*kl < 0) { *info = -4; } else if (*ku < 0) { *info = -5; } else if (*nrhs < 0) { *info = -6; } else if (*ldab < *kl + *ku + 1) { *info = -8; } else if (*ldafb < (*kl << 1) + *ku + 1) { *info = -10; } else if (lsame_(fact, "F") && ! (rowequ || colequ || lsame_(equed, "N"))) { *info = -12; } else { if (rowequ) { rcmin = bignum; rcmax = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ d__1 = rcmin, d__2 = r__[j]; rcmin = min(d__1,d__2); /* Computing MAX */ d__1 = rcmax, d__2 = r__[j]; rcmax = max(d__1,d__2); /* L10: */ } if (rcmin <= 0.) { *info = -13; } else if (*n > 0) { rowcnd = max(rcmin,smlnum) / min(rcmax,bignum); } else { rowcnd = 1.; } } if (colequ && *info == 0) { rcmin = bignum; rcmax = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ d__1 = rcmin, d__2 = c__[j]; rcmin = min(d__1,d__2); /* Computing MAX */ d__1 = rcmax, d__2 = c__[j]; rcmax = max(d__1,d__2); /* L20: */ } if (rcmin <= 0.) { *info = -14; } else if (*n > 0) { colcnd = max(rcmin,smlnum) / min(rcmax,bignum); } else { colcnd = 1.; } } if (*info == 0) { if (*ldb < max(1,*n)) { *info = -16; } else if (*ldx < max(1,*n)) { *info = -18; } } } if (*info != 0) { i__1 = -(*info); xerbla_("DGBSVX", &i__1); return 0; } if (equil) { /* Compute row and column scalings to equilibrate the matrix A. */ dgbequ_(n, n, kl, ku, &ab[ab_offset], ldab, &r__[1], &c__[1], &rowcnd, &colcnd, &amax, &infequ); if (infequ == 0) { /* Equilibrate the matrix. */ dlaqgb_(n, n, kl, ku, &ab[ab_offset], ldab, &r__[1], &c__[1], & rowcnd, &colcnd, &amax, equed); rowequ = lsame_(equed, "R") || lsame_(equed, "B"); colequ = lsame_(equed, "C") || lsame_(equed, "B"); } } /* Scale the right hand side. */ if (notran) { if (rowequ) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { b_ref(i__, j) = r__[i__] * b_ref(i__, j); /* L30: */ } /* L40: */ } } } else if (colequ) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { b_ref(i__, j) = c__[i__] * b_ref(i__, j); /* L50: */ } /* L60: */ } } if (nofact || equil) { /* Compute the LU factorization of the band matrix A. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = j - *ku; j1 = max(i__2,1); /* Computing MIN */ i__2 = j + *kl; j2 = min(i__2,*n); i__2 = j2 - j1 + 1; dcopy_(&i__2, &ab_ref(*ku + 1 - j + j1, j), &c__1, &afb_ref(*kl + *ku + 1 - j + j1, j), &c__1); /* L70: */ } dgbtrf_(n, n, kl, ku, &afb[afb_offset], ldafb, &ipiv[1], info); /* Return if INFO is non-zero. */ if (*info != 0) { if (*info > 0) { /* Compute the reciprocal pivot growth factor of the leading rank-deficient INFO columns of A. */ anorm = 0.; i__1 = *info; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = *ku + 2 - j; /* Computing MIN */ i__4 = *n + *ku + 1 - j, i__5 = *kl + *ku + 1; i__3 = min(i__4,i__5); for (i__ = max(i__2,1); i__ <= i__3; ++i__) { /* Computing MAX */ d__2 = anorm, d__3 = (d__1 = ab_ref(i__, j), abs(d__1) ); anorm = max(d__2,d__3); /* L80: */ } /* L90: */ } /* Computing MAX */ i__1 = 1, i__3 = *kl + *ku + 2 - *info; /* Computing MIN */ i__4 = *info - 1, i__5 = *kl + *ku; i__2 = min(i__4,i__5); rpvgrw = dlantb_("M", "U", "N", info, &i__2, &afb_ref(max( i__1,i__3), 1), ldafb, &work[1]); if (rpvgrw == 0.) { rpvgrw = 1.; } else { rpvgrw = anorm / rpvgrw; } work[1] = rpvgrw; *rcond = 0.; } return 0; } } /* Compute the norm of the matrix A and the reciprocal pivot growth factor RPVGRW. */ if (notran) { *(unsigned char *)norm = '1'; } else { *(unsigned char *)norm = 'I'; } anorm = dlangb_(norm, n, kl, ku, &ab[ab_offset], ldab, &work[1]); i__1 = *kl + *ku; rpvgrw = dlantb_("M", "U", "N", n, &i__1, &afb[afb_offset], ldafb, &work[ 1]); if (rpvgrw == 0.) { rpvgrw = 1.; } else { rpvgrw = dlangb_("M", n, kl, ku, &ab[ab_offset], ldab, &work[1]) / rpvgrw; } /* Compute the reciprocal of the condition number of A. */ dgbcon_(norm, n, kl, ku, &afb[afb_offset], ldafb, &ipiv[1], &anorm, rcond, &work[1], &iwork[1], info); /* Set INFO = N+1 if the matrix is singular to working precision. */ if (*rcond < dlamch_("Epsilon")) { *info = *n + 1; } /* Compute the solution matrix X. */ dlacpy_("Full", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); dgbtrs_(trans, n, kl, ku, nrhs, &afb[afb_offset], ldafb, &ipiv[1], &x[ x_offset], ldx, info); /* Use iterative refinement to improve the computed solution and compute error bounds and backward error estimates for it. */ dgbrfs_(trans, n, kl, ku, nrhs, &ab[ab_offset], ldab, &afb[afb_offset], ldafb, &ipiv[1], &b[b_offset], ldb, &x[x_offset], ldx, &ferr[1], & berr[1], &work[1], &iwork[1], info); /* Transform the solution matrix X to a solution of the original system. */ if (notran) { if (colequ) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { x_ref(i__, j) = c__[i__] * x_ref(i__, j); /* L100: */ } /* L110: */ } i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] /= colcnd; /* L120: */ } } } else if (rowequ) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { x_ref(i__, j) = r__[i__] * x_ref(i__, j); /* L130: */ } /* L140: */ } i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] /= rowcnd; /* L150: */ } } work[1] = rpvgrw; return 0; /* End of DGBSVX */ } /* dgbsvx_ */
/* Subroutine */ int stbt03_(char *uplo, char *trans, char *diag, integer *n, integer *kd, integer *nrhs, real *ab, integer *ldab, real *scale, real *cnorm, real *tscal, real *x, integer *ldx, real *b, integer * ldb, real *work, real *resid) { /* System generated locals */ integer ab_dim1, ab_offset, b_dim1, b_offset, x_dim1, x_offset, i__1; real r__1, r__2, r__3; /* Local variables */ static integer j; extern logical lsame_(char *, char *); extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static real xscal; extern /* Subroutine */ int stbmv_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *); static real tnorm, xnorm; extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, real *, integer *), slabad_(real *, real *); static integer ix; extern doublereal slamch_(char *); static real bignum; extern integer isamax_(integer *, real *, integer *); static real smlnum, eps, err; #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1] #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= STBT03 computes the residual for the solution to a scaled triangular system of equations A*x = s*b or A'*x = s*b when A is a triangular band matrix. Here A' is the transpose of A, s is a scalar, and x and b are N by NRHS matrices. The test ratio is the maximum over the number of right hand sides of norm(s*b - op(A)*x) / ( norm(op(A)) * norm(x) * EPS ), where op(A) denotes A or A' and EPS is the machine epsilon. Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the matrix A is upper or lower triangular. = 'U': Upper triangular = 'L': Lower triangular TRANS (input) CHARACTER*1 Specifies the operation applied to A. = 'N': A *x = b (No transpose) = 'T': A'*x = b (Transpose) = 'C': A'*x = b (Conjugate transpose = Transpose) DIAG (input) CHARACTER*1 Specifies whether or not the matrix A is unit triangular. = 'N': Non-unit triangular = 'U': Unit triangular N (input) INTEGER The order of the matrix A. N >= 0. KD (input) INTEGER The number of superdiagonals or subdiagonals of the triangular band matrix A. KD >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrices X and B. NRHS >= 0. AB (input) REAL array, dimension (LDAB,N) The upper or lower triangular band matrix A, stored in the first kd+1 rows of the array. The j-th column of A is stored in the j-th column of the array AB as follows: if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= KD+1. SCALE (input) REAL The scaling factor s used in solving the triangular system. CNORM (input) REAL array, dimension (N) The 1-norms of the columns of A, not counting the diagonal. TSCAL (input) REAL The scaling factor used in computing the 1-norms in CNORM. CNORM actually contains the column norms of TSCAL*A. X (input) REAL array, dimension (LDX,NRHS) The computed solution vectors for the system of linear equations. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). B (input) REAL array, dimension (LDB,NRHS) The right hand side vectors for the system of linear equations. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). WORK (workspace) REAL array, dimension (N) RESID (output) REAL The maximum over the number of right hand sides of norm(op(A)*x - s*b) / ( norm(op(A)) * norm(x) * EPS ). ===================================================================== Quick exit if N = 0 Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; --cnorm; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --work; /* Function Body */ if (*n <= 0 || *nrhs <= 0) { *resid = 0.f; return 0; } eps = slamch_("Epsilon"); smlnum = slamch_("Safe minimum"); bignum = 1.f / smlnum; slabad_(&smlnum, &bignum); /* Compute the norm of the triangular matrix A using the column norms already computed by SLATBS. */ tnorm = 0.f; if (lsame_(diag, "N")) { if (lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ r__2 = tnorm, r__3 = *tscal * (r__1 = ab_ref(*kd + 1, j), dabs(r__1)) + cnorm[j]; tnorm = dmax(r__2,r__3); /* L10: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ r__2 = tnorm, r__3 = *tscal * (r__1 = ab_ref(1, j), dabs(r__1) ) + cnorm[j]; tnorm = dmax(r__2,r__3); /* L20: */ } } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ r__1 = tnorm, r__2 = *tscal + cnorm[j]; tnorm = dmax(r__1,r__2); /* L30: */ } } /* Compute the maximum over the number of right hand sides of norm(op(A)*x - s*b) / ( norm(op(A)) * norm(x) * EPS ). */ *resid = 0.f; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { scopy_(n, &x_ref(1, j), &c__1, &work[1], &c__1); ix = isamax_(n, &work[1], &c__1); /* Computing MAX */ r__2 = 1.f, r__3 = (r__1 = x_ref(ix, j), dabs(r__1)); xnorm = dmax(r__2,r__3); xscal = 1.f / xnorm / (real) (*kd + 1); sscal_(n, &xscal, &work[1], &c__1); stbmv_(uplo, trans, diag, n, kd, &ab[ab_offset], ldab, &work[1], & c__1); r__1 = -(*scale) * xscal; saxpy_(n, &r__1, &b_ref(1, j), &c__1, &work[1], &c__1); ix = isamax_(n, &work[1], &c__1); err = *tscal * (r__1 = work[ix], dabs(r__1)); ix = isamax_(n, &x_ref(1, j), &c__1); xnorm = (r__1 = x_ref(ix, j), dabs(r__1)); if (err * smlnum <= xnorm) { if (xnorm > 0.f) { err /= xnorm; } } else { if (err > 0.f) { err = 1.f / eps; } } if (err * smlnum <= tnorm) { if (tnorm > 0.f) { err /= tnorm; } } else { if (err > 0.f) { err = 1.f / eps; } } *resid = dmax(*resid,err); /* L40: */ } return 0; /* End of STBT03 */ } /* stbt03_ */
/* Subroutine */ int slaqsb_(char *uplo, integer *n, integer *kd, real *ab, integer *ldab, real *s, real *scond, real *amax, char *equed) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= SLAQSB equilibrates a symmetric band matrix A using the scaling factors in the vector S. Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the upper or lower triangular part of the symmetric matrix A is stored. = 'U': Upper triangular = 'L': Lower triangular N (input) INTEGER The order of the matrix A. N >= 0. KD (input) INTEGER The number of super-diagonals of the matrix A if UPLO = 'U', or the number of sub-diagonals if UPLO = 'L'. KD >= 0. AB (input/output) REAL array, dimension (LDAB,N) On entry, the upper or lower triangle of the symmetric band matrix A, stored in the first KD+1 rows of the array. The j-th column of A is stored in the j-th column of the array AB as follows: if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). On exit, if INFO = 0, the triangular factor U or L from the Cholesky factorization A = U'*U or A = L*L' of the band matrix A, in the same storage format as A. LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= KD+1. S (output) REAL array, dimension (N) The scale factors for A. SCOND (input) REAL Ratio of the smallest S(i) to the largest S(i). AMAX (input) REAL Absolute value of largest matrix entry. EQUED (output) CHARACTER*1 Specifies whether or not equilibration was done. = 'N': No equilibration. = 'Y': Equilibration was done, i.e., A has been replaced by diag(S) * A * diag(S). Internal Parameters =================== THRESH is a threshold value used to decide if scaling should be done based on the ratio of the scaling factors. If SCOND < THRESH, scaling is done. LARGE and SMALL are threshold values used to decide if scaling should be done based on the absolute size of the largest matrix element. If AMAX > LARGE or AMAX < SMALL, scaling is done. ===================================================================== Quick return if possible Parameter adjustments */ /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; /* Local variables */ static integer i__, j; static real large; extern logical lsame_(char *, char *); static real small, cj; extern doublereal slamch_(char *); #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; --s; /* Function Body */ if (*n <= 0) { *(unsigned char *)equed = 'N'; return 0; } /* Initialize LARGE and SMALL. */ small = slamch_("Safe minimum") / slamch_("Precision"); large = 1.f / small; if (*scond >= .1f && *amax >= small && *amax <= large) { /* No equilibration */ *(unsigned char *)equed = 'N'; } else { /* Replace A by diag(S) * A * diag(S). */ if (lsame_(uplo, "U")) { /* Upper triangle of A is stored in band format. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { cj = s[j]; /* Computing MAX */ i__2 = 1, i__3 = j - *kd; i__4 = j; for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { ab_ref(*kd + 1 + i__ - j, j) = cj * s[i__] * ab_ref(*kd + 1 + i__ - j, j); /* L10: */ } /* L20: */ } } else { /* Lower triangle of A is stored. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { cj = s[j]; /* Computing MIN */ i__2 = *n, i__3 = j + *kd; i__4 = min(i__2,i__3); for (i__ = j; i__ <= i__4; ++i__) { ab_ref(i__ + 1 - j, j) = cj * s[i__] * ab_ref(i__ + 1 - j, j); /* L30: */ } /* L40: */ } } *(unsigned char *)equed = 'Y'; } return 0; /* End of SLAQSB */ } /* slaqsb_ */
/* Subroutine */ int stbt05_(char *uplo, char *trans, char *diag, integer *n, integer *kd, integer *nrhs, real *ab, integer *ldab, real *b, integer *ldb, real *x, integer *ldx, real *xact, integer *ldxact, real *ferr, real *berr, real *reslts) { /* System generated locals */ integer ab_dim1, ab_offset, b_dim1, b_offset, x_dim1, x_offset, xact_dim1, xact_offset, i__1, i__2, i__3, i__4; real r__1, r__2, r__3; /* Local variables */ static real diff, axbi; static integer imax; static real unfl, ovfl; static logical unit; static integer i__, j, k; extern logical lsame_(char *, char *); static logical upper; static real xnorm; extern doublereal slamch_(char *); static integer nz; static real errbnd; extern integer isamax_(integer *, real *, integer *); static logical notran; static integer ifu; static real eps, tmp; #define xact_ref(a_1,a_2) xact[(a_2)*xact_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1] #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= STBT05 tests the error bounds from iterative refinement for the computed solution to a system of equations A*X = B, where A is a triangular band matrix. RESLTS(1) = test of the error bound = norm(X - XACT) / ( norm(X) * FERR ) A large value is returned if this ratio is not less than one. RESLTS(2) = residual from the iterative refinement routine = the maximum of BERR / ( NZ*EPS + (*) ), where (*) = NZ*UNFL / (min_i (abs(A)*abs(X) +abs(b))_i ) and NZ = max. number of nonzeros in any row of A, plus 1 Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the matrix A is upper or lower triangular. = 'U': Upper triangular = 'L': Lower triangular TRANS (input) CHARACTER*1 Specifies the form of the system of equations. = 'N': A * X = B (No transpose) = 'T': A'* X = B (Transpose) = 'C': A'* X = B (Conjugate transpose = Transpose) DIAG (input) CHARACTER*1 Specifies whether or not the matrix A is unit triangular. = 'N': Non-unit triangular = 'U': Unit triangular N (input) INTEGER The number of rows of the matrices X, B, and XACT, and the order of the matrix A. N >= 0. KD (input) INTEGER The number of super-diagonals of the matrix A if UPLO = 'U', or the number of sub-diagonals if UPLO = 'L'. KD >= 0. NRHS (input) INTEGER The number of columns of the matrices X, B, and XACT. NRHS >= 0. AB (input) REAL array, dimension (LDAB,N) The upper or lower triangular band matrix A, stored in the first kd+1 rows of the array. The j-th column of A is stored in the j-th column of the array AB as follows: if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). If DIAG = 'U', the diagonal elements of A are not referenced and are assumed to be 1. LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= KD+1. B (input) REAL array, dimension (LDB,NRHS) The right hand side vectors for the system of linear equations. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (input) REAL array, dimension (LDX,NRHS) The computed solution vectors. Each vector is stored as a column of the matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). XACT (input) REAL array, dimension (LDX,NRHS) The exact solution vectors. Each vector is stored as a column of the matrix XACT. LDXACT (input) INTEGER The leading dimension of the array XACT. LDXACT >= max(1,N). FERR (input) REAL array, dimension (NRHS) The estimated forward error bounds for each solution vector X. If XTRUE is the true solution, FERR bounds the magnitude of the largest entry in (X - XTRUE) divided by the magnitude of the largest entry in X. BERR (input) REAL array, dimension (NRHS) The componentwise relative backward error of each solution vector (i.e., the smallest relative change in any entry of A or B that makes X an exact solution). RESLTS (output) REAL array, dimension (2) The maximum over the NRHS solution vectors of the ratios: RESLTS(1) = norm(X - XACT) / ( norm(X) * FERR ) RESLTS(2) = BERR / ( NZ*EPS + (*) ) ===================================================================== Quick exit if N = 0 or NRHS = 0. Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; xact_dim1 = *ldxact; xact_offset = 1 + xact_dim1 * 1; xact -= xact_offset; --ferr; --berr; --reslts; /* Function Body */ if (*n <= 0 || *nrhs <= 0) { reslts[1] = 0.f; reslts[2] = 0.f; return 0; } eps = slamch_("Epsilon"); unfl = slamch_("Safe minimum"); ovfl = 1.f / unfl; upper = lsame_(uplo, "U"); notran = lsame_(trans, "N"); unit = lsame_(diag, "U"); /* Computing MIN */ i__1 = *kd, i__2 = *n - 1; nz = min(i__1,i__2) + 1; /* Test 1: Compute the maximum of norm(X - XACT) / ( norm(X) * FERR ) over all the vectors X and XACT using the infinity-norm. */ errbnd = 0.f; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { imax = isamax_(n, &x_ref(1, j), &c__1); /* Computing MAX */ r__2 = (r__1 = x_ref(imax, j), dabs(r__1)); xnorm = dmax(r__2,unfl); diff = 0.f; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ r__2 = diff, r__3 = (r__1 = x_ref(i__, j) - xact_ref(i__, j), dabs(r__1)); diff = dmax(r__2,r__3); /* L10: */ } if (xnorm > 1.f) { goto L20; } else if (diff <= ovfl * xnorm) { goto L20; } else { errbnd = 1.f / eps; goto L30; } L20: if (diff / xnorm <= ferr[j]) { /* Computing MAX */ r__1 = errbnd, r__2 = diff / xnorm / ferr[j]; errbnd = dmax(r__1,r__2); } else { errbnd = 1.f / eps; } L30: ; } reslts[1] = errbnd; /* Test 2: Compute the maximum of BERR / ( NZ*EPS + (*) ), where (*) = NZ*UNFL / (min_i (abs(A)*abs(X) +abs(b))_i ) */ ifu = 0; if (unit) { ifu = 1; } i__1 = *nrhs; for (k = 1; k <= i__1; ++k) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { tmp = (r__1 = b_ref(i__, k), dabs(r__1)); if (upper) { if (! notran) { /* Computing MAX */ i__3 = i__ - *kd; i__4 = i__ - ifu; for (j = max(i__3,1); j <= i__4; ++j) { tmp += (r__1 = ab_ref(*kd + 1 - i__ + j, i__), dabs( r__1)) * (r__2 = x_ref(j, k), dabs(r__2)); /* L40: */ } if (unit) { tmp += (r__1 = x_ref(i__, k), dabs(r__1)); } } else { if (unit) { tmp += (r__1 = x_ref(i__, k), dabs(r__1)); } /* Computing MIN */ i__3 = i__ + *kd; i__4 = min(i__3,*n); for (j = i__ + ifu; j <= i__4; ++j) { tmp += (r__1 = ab_ref(*kd + 1 + i__ - j, j), dabs( r__1)) * (r__2 = x_ref(j, k), dabs(r__2)); /* L50: */ } } } else { if (notran) { /* Computing MAX */ i__4 = i__ - *kd; i__3 = i__ - ifu; for (j = max(i__4,1); j <= i__3; ++j) { tmp += (r__1 = ab_ref(i__ + 1 - j, j), dabs(r__1)) * ( r__2 = x_ref(j, k), dabs(r__2)); /* L60: */ } if (unit) { tmp += (r__1 = x_ref(i__, k), dabs(r__1)); } } else { if (unit) { tmp += (r__1 = x_ref(i__, k), dabs(r__1)); } /* Computing MIN */ i__4 = i__ + *kd; i__3 = min(i__4,*n); for (j = i__ + ifu; j <= i__3; ++j) { tmp += (r__1 = ab_ref(j + 1 - i__, i__), dabs(r__1)) * (r__2 = x_ref(j, k), dabs(r__2)); /* L70: */ } } } if (i__ == 1) { axbi = tmp; } else { axbi = dmin(axbi,tmp); } /* L80: */ } /* Computing MAX */ r__1 = axbi, r__2 = nz * unfl; tmp = berr[k] / (nz * eps + nz * unfl / dmax(r__1,r__2)); if (k == 1) { reslts[2] = tmp; } else { reslts[2] = dmax(reslts[2],tmp); } /* L90: */ } return 0; /* End of STBT05 */ } /* stbt05_ */
/* Subroutine */ int sgbtrf_(integer *m, integer *n, integer *kl, integer *ku, real *ab, integer *ldab, integer *ipiv, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= SGBTRF computes an LU factorization of a real m-by-n band matrix A using partial pivoting with row interchanges. This is the blocked version of the algorithm, calling Level 3 BLAS. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. KL (input) INTEGER The number of subdiagonals within the band of A. KL >= 0. KU (input) INTEGER The number of superdiagonals within the band of A. KU >= 0. AB (input/output) REAL array, dimension (LDAB,N) On entry, the matrix A in band storage, in rows KL+1 to 2*KL+KU+1; rows 1 to KL of the array need not be set. The j-th column of A is stored in the j-th column of the array AB as follows: AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) On exit, details of the factorization: U is stored as an upper triangular band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and the multipliers used during the factorization are stored in rows KL+KU+2 to 2*KL+KU+1. See below for further details. LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= 2*KL+KU+1. IPIV (output) INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = +i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations. Further Details =============== The band storage scheme is illustrated by the following example, when M = N = 6, KL = 2, KU = 1: On entry: On exit: * * * + + + * * * u14 u25 u36 * * + + + + * * u13 u24 u35 u46 * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * a31 a42 a53 a64 * * m31 m42 m53 m64 * * Array elements marked * are not used by the routine; elements marked + need not be set on entry, but are required by the routine to store elements of U because of fill-in resulting from the row interchanges. ===================================================================== KV is the number of superdiagonals in the factor U, allowing for fill-in Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c__65 = 65; static real c_b18 = -1.f; static real c_b31 = 1.f; /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6; real r__1; /* Local variables */ extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, integer *, real *, integer *, real *, integer *); static real temp; static integer i__, j; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static real work13[4160] /* was [65][64] */, work31[4160] /* was [65][64] */; static integer i2, i3, j2, j3, k2; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer * ), strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *), sgbtf2_(integer *, integer *, integer *, integer *, real *, integer *, integer *, integer *); static integer jb, nb, ii, jj, jm, ip, jp, km, ju, kv, nw; extern /* Subroutine */ int xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen), isamax_(integer *, real *, integer *); extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer *, integer *, integer *, integer *); #define work13_ref(a_1,a_2) work13[(a_2)*65 + a_1 - 66] #define work31_ref(a_1,a_2) work31[(a_2)*65 + a_1 - 66] #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; --ipiv; /* Function Body */ kv = *ku + *kl; /* Test the input parameters. */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kl < 0) { *info = -3; } else if (*ku < 0) { *info = -4; } else if (*ldab < *kl + kv + 1) { *info = -6; } if (*info != 0) { i__1 = -(*info); xerbla_("SGBTRF", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } /* Determine the block size for this environment */ nb = ilaenv_(&c__1, "SGBTRF", " ", m, n, kl, ku, (ftnlen)6, (ftnlen)1); /* The block size must not exceed the limit set by the size of the local arrays WORK13 and WORK31. */ nb = min(nb,64); if (nb <= 1 || nb > *kl) { /* Use unblocked code */ sgbtf2_(m, n, kl, ku, &ab[ab_offset], ldab, &ipiv[1], info); } else { /* Use blocked code Zero the superdiagonal elements of the work array WORK13 */ i__1 = nb; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { work13_ref(i__, j) = 0.f; /* L10: */ } /* L20: */ } /* Zero the subdiagonal elements of the work array WORK31 */ i__1 = nb; for (j = 1; j <= i__1; ++j) { i__2 = nb; for (i__ = j + 1; i__ <= i__2; ++i__) { work31_ref(i__, j) = 0.f; /* L30: */ } /* L40: */ } /* Gaussian elimination with partial pivoting Set fill-in elements in columns KU+2 to KV to zero */ i__1 = min(kv,*n); for (j = *ku + 2; j <= i__1; ++j) { i__2 = *kl; for (i__ = kv - j + 2; i__ <= i__2; ++i__) { ab_ref(i__, j) = 0.f; /* L50: */ } /* L60: */ } /* JU is the index of the last column affected by the current stage of the factorization */ ju = 1; i__1 = min(*m,*n); i__2 = nb; for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Computing MIN */ i__3 = nb, i__4 = min(*m,*n) - j + 1; jb = min(i__3,i__4); /* The active part of the matrix is partitioned A11 A12 A13 A21 A22 A23 A31 A32 A33 Here A11, A21 and A31 denote the current block of JB columns which is about to be factorized. The number of rows in the partitioning are JB, I2, I3 respectively, and the numbers of columns are JB, J2, J3. The superdiagonal elements of A13 and the subdiagonal elements of A31 lie outside the band. Computing MIN */ i__3 = *kl - jb, i__4 = *m - j - jb + 1; i2 = min(i__3,i__4); /* Computing MIN */ i__3 = jb, i__4 = *m - j - *kl + 1; i3 = min(i__3,i__4); /* J2 and J3 are computed after JU has been updated. Factorize the current block of JB columns */ i__3 = j + jb - 1; for (jj = j; jj <= i__3; ++jj) { /* Set fill-in elements in column JJ+KV to zero */ if (jj + kv <= *n) { i__4 = *kl; for (i__ = 1; i__ <= i__4; ++i__) { ab_ref(i__, jj + kv) = 0.f; /* L70: */ } } /* Find pivot and test for singularity. KM is the number of subdiagonal elements in the current column. Computing MIN */ i__4 = *kl, i__5 = *m - jj; km = min(i__4,i__5); i__4 = km + 1; jp = isamax_(&i__4, &ab_ref(kv + 1, jj), &c__1); ipiv[jj] = jp + jj - j; if (ab_ref(kv + jp, jj) != 0.f) { /* Computing MAX Computing MIN */ i__6 = jj + *ku + jp - 1; i__4 = ju, i__5 = min(i__6,*n); ju = max(i__4,i__5); if (jp != 1) { /* Apply interchange to columns J to J+JB-1 */ if (jp + jj - 1 < j + *kl) { i__4 = *ldab - 1; i__5 = *ldab - 1; sswap_(&jb, &ab_ref(kv + 1 + jj - j, j), &i__4, & ab_ref(kv + jp + jj - j, j), &i__5); } else { /* The interchange affects columns J to JJ-1 of A31 which are stored in the work array WORK31 */ i__4 = jj - j; i__5 = *ldab - 1; sswap_(&i__4, &ab_ref(kv + 1 + jj - j, j), &i__5, &work31_ref(jp + jj - j - *kl, 1), &c__65) ; i__4 = j + jb - jj; i__5 = *ldab - 1; i__6 = *ldab - 1; sswap_(&i__4, &ab_ref(kv + 1, jj), &i__5, &ab_ref( kv + jp, jj), &i__6); } } /* Compute multipliers */ r__1 = 1.f / ab_ref(kv + 1, jj); sscal_(&km, &r__1, &ab_ref(kv + 2, jj), &c__1); /* Update trailing submatrix within the band and within the current block. JM is the index of the last column which needs to be updated. Computing MIN */ i__4 = ju, i__5 = j + jb - 1; jm = min(i__4,i__5); if (jm > jj) { i__4 = jm - jj; i__5 = *ldab - 1; i__6 = *ldab - 1; sger_(&km, &i__4, &c_b18, &ab_ref(kv + 2, jj), &c__1, &ab_ref(kv, jj + 1), &i__5, &ab_ref(kv + 1, jj + 1), &i__6); } } else { /* If pivot is zero, set INFO to the index of the pivot unless a zero pivot has already been found. */ if (*info == 0) { *info = jj; } } /* Copy current column of A31 into the work array WORK31 Computing MIN */ i__4 = jj - j + 1; nw = min(i__4,i3); if (nw > 0) { scopy_(&nw, &ab_ref(kv + *kl + 1 - jj + j, jj), &c__1, & work31_ref(1, jj - j + 1), &c__1); } /* L80: */ } if (j + jb <= *n) { /* Apply the row interchanges to the other blocks. Computing MIN */ i__3 = ju - j + 1; j2 = min(i__3,kv) - jb; /* Computing MAX */ i__3 = 0, i__4 = ju - j - kv + 1; j3 = max(i__3,i__4); /* Use SLASWP to apply the row interchanges to A12, A22, and A32. */ i__3 = *ldab - 1; slaswp_(&j2, &ab_ref(kv + 1 - jb, j + jb), &i__3, &c__1, &jb, &ipiv[j], &c__1); /* Adjust the pivot indices. */ i__3 = j + jb - 1; for (i__ = j; i__ <= i__3; ++i__) { ipiv[i__] = ipiv[i__] + j - 1; /* L90: */ } /* Apply the row interchanges to A13, A23, and A33 columnwise. */ k2 = j - 1 + jb + j2; i__3 = j3; for (i__ = 1; i__ <= i__3; ++i__) { jj = k2 + i__; i__4 = j + jb - 1; for (ii = j + i__ - 1; ii <= i__4; ++ii) { ip = ipiv[ii]; if (ip != ii) { temp = ab_ref(kv + 1 + ii - jj, jj); ab_ref(kv + 1 + ii - jj, jj) = ab_ref(kv + 1 + ip - jj, jj); ab_ref(kv + 1 + ip - jj, jj) = temp; } /* L100: */ } /* L110: */ } /* Update the relevant part of the trailing submatrix */ if (j2 > 0) { /* Update A12 */ i__3 = *ldab - 1; i__4 = *ldab - 1; strsm_("Left", "Lower", "No transpose", "Unit", &jb, &j2, &c_b31, &ab_ref(kv + 1, j), &i__3, &ab_ref(kv + 1 - jb, j + jb), &i__4); if (i2 > 0) { /* Update A22 */ i__3 = *ldab - 1; i__4 = *ldab - 1; i__5 = *ldab - 1; sgemm_("No transpose", "No transpose", &i2, &j2, &jb, &c_b18, &ab_ref(kv + 1 + jb, j), &i__3, & ab_ref(kv + 1 - jb, j + jb), &i__4, &c_b31, & ab_ref(kv + 1, j + jb), &i__5); } if (i3 > 0) { /* Update A32 */ i__3 = *ldab - 1; i__4 = *ldab - 1; sgemm_("No transpose", "No transpose", &i3, &j2, &jb, &c_b18, work31, &c__65, &ab_ref(kv + 1 - jb, j + jb), &i__3, &c_b31, &ab_ref(kv + *kl + 1 - jb, j + jb), &i__4); } } if (j3 > 0) { /* Copy the lower triangle of A13 into the work array WORK13 */ i__3 = j3; for (jj = 1; jj <= i__3; ++jj) { i__4 = jb; for (ii = jj; ii <= i__4; ++ii) { work13_ref(ii, jj) = ab_ref(ii - jj + 1, jj + j + kv - 1); /* L120: */ } /* L130: */ } /* Update A13 in the work array */ i__3 = *ldab - 1; strsm_("Left", "Lower", "No transpose", "Unit", &jb, &j3, &c_b31, &ab_ref(kv + 1, j), &i__3, work13, &c__65); if (i2 > 0) { /* Update A23 */ i__3 = *ldab - 1; i__4 = *ldab - 1; sgemm_("No transpose", "No transpose", &i2, &j3, &jb, &c_b18, &ab_ref(kv + 1 + jb, j), &i__3, work13, &c__65, &c_b31, &ab_ref(jb + 1, j + kv), &i__4); } if (i3 > 0) { /* Update A33 */ i__3 = *ldab - 1; sgemm_("No transpose", "No transpose", &i3, &j3, &jb, &c_b18, work31, &c__65, work13, &c__65, & c_b31, &ab_ref(*kl + 1, j + kv), &i__3); } /* Copy the lower triangle of A13 back into place */ i__3 = j3; for (jj = 1; jj <= i__3; ++jj) { i__4 = jb; for (ii = jj; ii <= i__4; ++ii) { ab_ref(ii - jj + 1, jj + j + kv - 1) = work13_ref( ii, jj); /* L140: */ } /* L150: */ } } } else { /* Adjust the pivot indices. */ i__3 = j + jb - 1; for (i__ = j; i__ <= i__3; ++i__) { ipiv[i__] = ipiv[i__] + j - 1; /* L160: */ } } /* Partially undo the interchanges in the current block to restore the upper triangular form of A31 and copy the upper triangle of A31 back into place */ i__3 = j; for (jj = j + jb - 1; jj >= i__3; --jj) { jp = ipiv[jj] - jj + 1; if (jp != 1) { /* Apply interchange to columns J to JJ-1 */ if (jp + jj - 1 < j + *kl) { /* The interchange does not affect A31 */ i__4 = jj - j; i__5 = *ldab - 1; i__6 = *ldab - 1; sswap_(&i__4, &ab_ref(kv + 1 + jj - j, j), &i__5, & ab_ref(kv + jp + jj - j, j), &i__6); } else { /* The interchange does affect A31 */ i__4 = jj - j; i__5 = *ldab - 1; sswap_(&i__4, &ab_ref(kv + 1 + jj - j, j), &i__5, & work31_ref(jp + jj - j - *kl, 1), &c__65); } } /* Copy the current column of A31 back into place Computing MIN */ i__4 = i3, i__5 = jj - j + 1; nw = min(i__4,i__5); if (nw > 0) { scopy_(&nw, &work31_ref(1, jj - j + 1), &c__1, &ab_ref(kv + *kl + 1 - jj + j, jj), &c__1); } /* L170: */ } /* L180: */ } } return 0; /* End of SGBTRF */ } /* sgbtrf_ */
/* Subroutine */ int spbrfs_(char *uplo, integer *n, integer *kd, integer * nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, real *b, integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real * work, integer *iwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= SPBRFS improves the computed solution to a system of linear equations when the coefficient matrix is symmetric positive definite and banded, and provides error bounds and backward error estimates for the solution. Arguments ========= UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. KD (input) INTEGER The number of superdiagonals of the matrix A if UPLO = 'U', or the number of subdiagonals if UPLO = 'L'. KD >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0. AB (input) REAL array, dimension (LDAB,N) The upper or lower triangle of the symmetric band matrix A, stored in the first KD+1 rows of the array. The j-th column of A is stored in the j-th column of the array AB as follows: if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= KD+1. AFB (input) REAL array, dimension (LDAFB,N) The triangular factor U or L from the Cholesky factorization A = U**T*U or A = L*L**T of the band matrix A as computed by SPBTRF, in the same storage format as A (see AB). LDAFB (input) INTEGER The leading dimension of the array AFB. LDAFB >= KD+1. B (input) REAL array, dimension (LDB,NRHS) The right hand side matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (input/output) REAL array, dimension (LDX,NRHS) On entry, the solution matrix X, as computed by SPBTRS. On exit, the improved solution matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). FERR (output) REAL array, dimension (NRHS) The estimated forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), FERR(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). The estimate is as reliable as the estimate for RCOND, and is almost always a slight overestimate of the true error. BERR (output) REAL array, dimension (NRHS) The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). WORK (workspace) REAL array, dimension (3*N) IWORK (workspace) INTEGER array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Internal Parameters =================== ITMAX is the maximum number of steps of iterative refinement. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static real c_b12 = -1.f; static real c_b14 = 1.f; /* System generated locals */ integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5; real r__1, r__2, r__3; /* Local variables */ static integer kase; static real safe1, safe2; static integer i__, j, k, l; static real s; extern logical lsame_(char *, char *); static integer count; extern /* Subroutine */ int ssbmv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static logical upper; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), saxpy_(integer *, real *, real *, integer *, real *, integer *); static real xk; extern doublereal slamch_(char *); static integer nz; static real safmin; extern /* Subroutine */ int xerbla_(char *, integer *), slacon_( integer *, real *, real *, integer *, real *, integer *); static real lstres; extern /* Subroutine */ int spbtrs_(char *, integer *, integer *, integer *, real *, integer *, real *, integer *, integer *); static real eps; #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1] #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; afb_dim1 = *ldafb; afb_offset = 1 + afb_dim1 * 1; afb -= afb_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kd < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*ldab < *kd + 1) { *info = -6; } else if (*ldafb < *kd + 1) { *info = -8; } else if (*ldb < max(1,*n)) { *info = -10; } else if (*ldx < max(1,*n)) { *info = -12; } if (*info != 0) { i__1 = -(*info); xerbla_("SPBRFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] = 0.f; berr[j] = 0.f; /* L10: */ } return 0; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 Computing MIN */ i__1 = *n + 1, i__2 = (*kd << 1) + 2; nz = min(i__1,i__2); eps = slamch_("Epsilon"); safmin = slamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { count = 1; lstres = 3.f; L20: /* Loop until stopping criterion is satisfied. Compute residual R = B - A * X */ scopy_(n, &b_ref(1, j), &c__1, &work[*n + 1], &c__1); ssbmv_(uplo, n, kd, &c_b12, &ab[ab_offset], ldab, &x_ref(1, j), &c__1, &c_b14, &work[*n + 1], &c__1); /* Compute componentwise relative backward error from formula max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) where abs(Z) is the componentwise absolute value of the matrix or vector Z. If the i-th component of the denominator is less than SAFE2, then SAFE1 is added to the i-th components of the numerator and denominator before dividing. */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] = (r__1 = b_ref(i__, j), dabs(r__1)); /* L30: */ } /* Compute abs(A)*abs(X) + abs(B). */ if (upper) { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.f; xk = (r__1 = x_ref(k, j), dabs(r__1)); l = *kd + 1 - k; /* Computing MAX */ i__3 = 1, i__4 = k - *kd; i__5 = k - 1; for (i__ = max(i__3,i__4); i__ <= i__5; ++i__) { work[i__] += (r__1 = ab_ref(l + i__, k), dabs(r__1)) * xk; s += (r__1 = ab_ref(l + i__, k), dabs(r__1)) * (r__2 = x_ref(i__, j), dabs(r__2)); /* L40: */ } work[k] = work[k] + (r__1 = ab_ref(*kd + 1, k), dabs(r__1)) * xk + s; /* L50: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.f; xk = (r__1 = x_ref(k, j), dabs(r__1)); work[k] += (r__1 = ab_ref(1, k), dabs(r__1)) * xk; l = 1 - k; /* Computing MIN */ i__3 = *n, i__4 = k + *kd; i__5 = min(i__3,i__4); for (i__ = k + 1; i__ <= i__5; ++i__) { work[i__] += (r__1 = ab_ref(l + i__, k), dabs(r__1)) * xk; s += (r__1 = ab_ref(l + i__, k), dabs(r__1)) * (r__2 = x_ref(i__, j), dabs(r__2)); /* L60: */ } work[k] += s; /* L70: */ } } s = 0.f; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { /* Computing MAX */ r__2 = s, r__3 = (r__1 = work[*n + i__], dabs(r__1)) / work[ i__]; s = dmax(r__2,r__3); } else { /* Computing MAX */ r__2 = s, r__3 = ((r__1 = work[*n + i__], dabs(r__1)) + safe1) / (work[i__] + safe1); s = dmax(r__2,r__3); } /* L80: */ } berr[j] = s; /* Test stopping criterion. Continue iterating if 1) The residual BERR(J) is larger than machine epsilon, and 2) BERR(J) decreased by at least a factor of 2 during the last iteration, and 3) At most ITMAX iterations tried. */ if (berr[j] > eps && berr[j] * 2.f <= lstres && count <= 5) { /* Update solution and try again. */ spbtrs_(uplo, n, kd, &c__1, &afb[afb_offset], ldafb, &work[*n + 1] , n, info); saxpy_(n, &c_b14, &work[*n + 1], &c__1, &x_ref(1, j), &c__1); lstres = berr[j]; ++count; goto L20; } /* Bound error from formula norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(A))* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) where norm(Z) is the magnitude of the largest component of Z inv(A) is the inverse of A abs(Z) is the componentwise absolute value of the matrix or vector Z NZ is the maximum number of nonzeros in any row of A, plus 1 EPS is machine epsilon The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) is incremented by SAFE1 if the i-th component of abs(A)*abs(X) + abs(B) is less than SAFE2. Use SLACON to estimate the infinity-norm of the matrix inv(A) * diag(W), where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { work[i__] = (r__1 = work[*n + i__], dabs(r__1)) + nz * eps * work[i__]; } else { work[i__] = (r__1 = work[*n + i__], dabs(r__1)) + nz * eps * work[i__] + safe1; } /* L90: */ } kase = 0; L100: slacon_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & kase); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(A'). */ spbtrs_(uplo, n, kd, &c__1, &afb[afb_offset], ldafb, &work[*n + 1], n, info); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] *= work[i__]; /* L110: */ } } else if (kase == 2) { /* Multiply by inv(A)*diag(W). */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] *= work[i__]; /* L120: */ } spbtrs_(uplo, n, kd, &c__1, &afb[afb_offset], ldafb, &work[*n + 1], n, info); } goto L100; } /* Normalize error. */ lstres = 0.f; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ r__2 = lstres, r__3 = (r__1 = x_ref(i__, j), dabs(r__1)); lstres = dmax(r__2,r__3); /* L130: */ } if (lstres != 0.f) { ferr[j] /= lstres; } /* L140: */ } return 0; /* End of SPBRFS */ } /* spbrfs_ */
/* Subroutine */ int zlattb_(integer *imat, char *uplo, char *trans, char * diag, integer *iseed, integer *n, integer *kd, doublecomplex *ab, integer *ldab, doublecomplex *b, doublecomplex *work, doublereal * rwork, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2; doublecomplex z__1, z__2; /* Builtin functions Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); double sqrt(doublereal); void z_div(doublecomplex *, doublecomplex *, doublecomplex *); double pow_dd(doublereal *, doublereal *), z_abs(doublecomplex *); /* Local variables */ static doublereal sfac; static integer ioff, mode, lenj; static char path[3], dist[1]; static doublereal unfl, rexp; static char type__[1]; static doublereal texp; static doublecomplex star1, plus1, plus2; static integer i__, j; static doublereal bscal; extern logical lsame_(char *, char *); static doublereal tscal, anorm, bnorm, tleft; static logical upper; static doublereal tnorm; extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), zlatb4_(char *, integer *, integer *, integer *, char *, integer *, integer *, doublereal *, integer *, doublereal *, char *), dlabad_(doublereal *, doublereal *); static integer kl; extern doublereal dlamch_(char *); static integer ku, iy; extern doublereal dlarnd_(integer *, integer *); static char packit[1]; extern /* Subroutine */ int zdscal_(integer *, doublereal *, doublecomplex *, integer *); static doublereal bignum, cndnum; extern /* Subroutine */ int dlarnv_(integer *, integer *, integer *, doublereal *); extern integer izamax_(integer *, doublecomplex *, integer *); extern /* Double Complex */ VOID zlarnd_(doublecomplex *, integer *, integer *); static integer jcount; extern /* Subroutine */ int zlatms_(integer *, integer *, char *, integer *, char *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, char *, doublecomplex *, integer *, doublecomplex *, integer *); static doublereal smlnum; extern /* Subroutine */ int zlarnv_(integer *, integer *, integer *, doublecomplex *); static doublereal ulp; #define ab_subscr(a_1,a_2) (a_2)*ab_dim1 + a_1 #define ab_ref(a_1,a_2) ab[ab_subscr(a_1,a_2)] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZLATTB generates a triangular test matrix in 2-dimensional storage. IMAT and UPLO uniquely specify the properties of the test matrix, which is returned in the array A. Arguments ========= IMAT (input) INTEGER An integer key describing which matrix to generate for this path. UPLO (input) CHARACTER*1 Specifies whether the matrix A will be upper or lower triangular. = 'U': Upper triangular = 'L': Lower triangular TRANS (input) CHARACTER*1 Specifies whether the matrix or its transpose will be used. = 'N': No transpose = 'T': Transpose = 'C': Conjugate transpose (= transpose) DIAG (output) CHARACTER*1 Specifies whether or not the matrix A is unit triangular. = 'N': Non-unit triangular = 'U': Unit triangular ISEED (input/output) INTEGER array, dimension (4) The seed vector for the random number generator (used in ZLATMS). Modified on exit. N (input) INTEGER The order of the matrix to be generated. KD (input) INTEGER The number of superdiagonals or subdiagonals of the banded triangular matrix A. KD >= 0. AB (output) COMPLEX*16 array, dimension (LDAB,N) The upper or lower triangular banded matrix A, stored in the first KD+1 rows of AB. Let j be a column of A, 1<=j<=n. If UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j. If UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= KD+1. B (workspace) COMPLEX*16 array, dimension (N) WORK (workspace) COMPLEX*16 array, dimension (2*N) RWORK (workspace) DOUBLE PRECISION array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Parameter adjustments */ --iseed; ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; --b; --work; --rwork; /* Function Body */ s_copy(path, "Zomplex precision", (ftnlen)1, (ftnlen)17); s_copy(path + 1, "TB", (ftnlen)2, (ftnlen)2); unfl = dlamch_("Safe minimum"); ulp = dlamch_("Epsilon") * dlamch_("Base"); smlnum = unfl; bignum = (1. - ulp) / smlnum; dlabad_(&smlnum, &bignum); if (*imat >= 6 && *imat <= 9 || *imat == 17) { *(unsigned char *)diag = 'U'; } else { *(unsigned char *)diag = 'N'; } *info = 0; /* Quick return if N.LE.0. */ if (*n <= 0) { return 0; } /* Call ZLATB4 to set parameters for CLATMS. */ upper = lsame_(uplo, "U"); if (upper) { zlatb4_(path, imat, n, n, type__, &kl, &ku, &anorm, &mode, &cndnum, dist); ku = *kd; /* Computing MAX */ i__1 = 0, i__2 = *kd - *n + 1; ioff = max(i__1,i__2) + 1; kl = 0; *(unsigned char *)packit = 'Q'; } else { i__1 = -(*imat); zlatb4_(path, &i__1, n, n, type__, &kl, &ku, &anorm, &mode, &cndnum, dist); kl = *kd; ioff = 1; ku = 0; *(unsigned char *)packit = 'B'; } /* IMAT <= 5: Non-unit triangular matrix */ if (*imat <= 5) { zlatms_(n, n, dist, &iseed[1], type__, &rwork[1], &mode, &cndnum, & anorm, &kl, &ku, packit, &ab_ref(ioff, 1), ldab, &work[1], info); /* IMAT > 5: Unit triangular matrix The diagonal is deliberately set to something other than 1. IMAT = 6: Matrix is the identity */ } else if (*imat == 6) { if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = 1, i__3 = *kd + 2 - j; i__4 = *kd; for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { i__2 = ab_subscr(i__, j); ab[i__2].r = 0., ab[i__2].i = 0.; /* L10: */ } i__4 = ab_subscr(*kd + 1, j); ab[i__4].r = (doublereal) j, ab[i__4].i = 0.; /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__4 = ab_subscr(1, j); ab[i__4].r = (doublereal) j, ab[i__4].i = 0.; /* Computing MIN */ i__2 = *kd + 1, i__3 = *n - j + 1; i__4 = min(i__2,i__3); for (i__ = 2; i__ <= i__4; ++i__) { i__2 = ab_subscr(i__, j); ab[i__2].r = 0., ab[i__2].i = 0.; /* L30: */ } /* L40: */ } } /* IMAT > 6: Non-trivial unit triangular matrix A unit triangular matrix T with condition CNDNUM is formed. In this version, T only has bandwidth 2, the rest of it is zero. */ } else if (*imat <= 9) { tnorm = sqrt(cndnum); /* Initialize AB to zero. */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__4 = 1, i__2 = *kd + 2 - j; i__3 = *kd; for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { i__4 = ab_subscr(i__, j); ab[i__4].r = 0., ab[i__4].i = 0.; /* L50: */ } i__3 = ab_subscr(*kd + 1, j); d__1 = (doublereal) j; ab[i__3].r = d__1, ab[i__3].i = 0.; /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__4 = *kd + 1, i__2 = *n - j + 1; i__3 = min(i__4,i__2); for (i__ = 2; i__ <= i__3; ++i__) { i__4 = ab_subscr(i__, j); ab[i__4].r = 0., ab[i__4].i = 0.; /* L70: */ } i__3 = ab_subscr(1, j); d__1 = (doublereal) j; ab[i__3].r = d__1, ab[i__3].i = 0.; /* L80: */ } } /* Special case: T is tridiagonal. Set every other offdiagonal so that the matrix has norm TNORM+1. */ if (*kd == 1) { if (upper) { i__1 = ab_subscr(1, 2); zlarnd_(&z__2, &c__5, &iseed[1]); z__1.r = tnorm * z__2.r, z__1.i = tnorm * z__2.i; ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; lenj = (*n - 3) / 2; zlarnv_(&c__2, &iseed[1], &lenj, &work[1]); i__1 = lenj; for (j = 1; j <= i__1; ++j) { i__3 = ab_subscr(1, j + 1 << 1); i__4 = j; z__1.r = tnorm * work[i__4].r, z__1.i = tnorm * work[i__4] .i; ab[i__3].r = z__1.r, ab[i__3].i = z__1.i; /* L90: */ } } else { i__1 = ab_subscr(2, 1); zlarnd_(&z__2, &c__5, &iseed[1]); z__1.r = tnorm * z__2.r, z__1.i = tnorm * z__2.i; ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; lenj = (*n - 3) / 2; zlarnv_(&c__2, &iseed[1], &lenj, &work[1]); i__1 = lenj; for (j = 1; j <= i__1; ++j) { i__3 = ab_subscr(2, (j << 1) + 1); i__4 = j; z__1.r = tnorm * work[i__4].r, z__1.i = tnorm * work[i__4] .i; ab[i__3].r = z__1.r, ab[i__3].i = z__1.i; /* L100: */ } } } else if (*kd > 1) { /* Form a unit triangular matrix T with condition CNDNUM. T is given by | 1 + * | | 1 + | T = | 1 + * | | 1 + | | 1 + * | | 1 + | | . . . | Each element marked with a '*' is formed by taking the product of the adjacent elements marked with '+'. The '*'s can be chosen freely, and the '+'s are chosen so that the inverse of T will have elements of the same magnitude as T. The two offdiagonals of T are stored in WORK. */ zlarnd_(&z__2, &c__5, &iseed[1]); z__1.r = tnorm * z__2.r, z__1.i = tnorm * z__2.i; star1.r = z__1.r, star1.i = z__1.i; sfac = sqrt(tnorm); zlarnd_(&z__2, &c__5, &iseed[1]); z__1.r = sfac * z__2.r, z__1.i = sfac * z__2.i; plus1.r = z__1.r, plus1.i = z__1.i; i__1 = *n; for (j = 1; j <= i__1; j += 2) { z_div(&z__1, &star1, &plus1); plus2.r = z__1.r, plus2.i = z__1.i; i__3 = j; work[i__3].r = plus1.r, work[i__3].i = plus1.i; i__3 = *n + j; work[i__3].r = star1.r, work[i__3].i = star1.i; if (j + 1 <= *n) { i__3 = j + 1; work[i__3].r = plus2.r, work[i__3].i = plus2.i; i__3 = *n + j + 1; work[i__3].r = 0., work[i__3].i = 0.; z_div(&z__1, &star1, &plus2); plus1.r = z__1.r, plus1.i = z__1.i; /* Generate a new *-value with norm between sqrt(TNORM) and TNORM. */ rexp = dlarnd_(&c__2, &iseed[1]); if (rexp < 0.) { d__2 = 1. - rexp; d__1 = -pow_dd(&sfac, &d__2); zlarnd_(&z__2, &c__5, &iseed[1]); z__1.r = d__1 * z__2.r, z__1.i = d__1 * z__2.i; star1.r = z__1.r, star1.i = z__1.i; } else { d__2 = rexp + 1.; d__1 = pow_dd(&sfac, &d__2); zlarnd_(&z__2, &c__5, &iseed[1]); z__1.r = d__1 * z__2.r, z__1.i = d__1 * z__2.i; star1.r = z__1.r, star1.i = z__1.i; } } /* L110: */ } /* Copy the tridiagonal T to AB. */ if (upper) { i__1 = *n - 1; zcopy_(&i__1, &work[1], &c__1, &ab_ref(*kd, 2), ldab); i__1 = *n - 2; zcopy_(&i__1, &work[*n + 1], &c__1, &ab_ref(*kd - 1, 3), ldab) ; } else { i__1 = *n - 1; zcopy_(&i__1, &work[1], &c__1, &ab_ref(2, 1), ldab); i__1 = *n - 2; zcopy_(&i__1, &work[*n + 1], &c__1, &ab_ref(3, 1), ldab); } } /* IMAT > 9: Pathological test cases. These triangular matrices are badly scaled or badly conditioned, so when used in solving a triangular system they may cause overflow in the solution vector. */ } else if (*imat == 10) { /* Type 10: Generate a triangular matrix with elements between -1 and 1. Give the diagonal norm 2 to make it well-conditioned. Make the right hand side large so that it requires scaling. */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__3 = j - 1; lenj = min(i__3,*kd); zlarnv_(&c__4, &iseed[1], &lenj, &ab_ref(*kd + 1 - lenj, j)); i__3 = ab_subscr(*kd + 1, j); zlarnd_(&z__2, &c__5, &iseed[1]); z__1.r = z__2.r * 2., z__1.i = z__2.i * 2.; ab[i__3].r = z__1.r, ab[i__3].i = z__1.i; /* L120: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__3 = *n - j; lenj = min(i__3,*kd); if (lenj > 0) { zlarnv_(&c__4, &iseed[1], &lenj, &ab_ref(2, j)); } i__3 = ab_subscr(1, j); zlarnd_(&z__2, &c__5, &iseed[1]); z__1.r = z__2.r * 2., z__1.i = z__2.i * 2.; ab[i__3].r = z__1.r, ab[i__3].i = z__1.i; /* L130: */ } } /* Set the right hand side so that the largest value is BIGNUM. */ zlarnv_(&c__2, &iseed[1], n, &b[1]); iy = izamax_(n, &b[1], &c__1); bnorm = z_abs(&b[iy]); bscal = bignum / max(1.,bnorm); zdscal_(n, &bscal, &b[1], &c__1); } else if (*imat == 11) { /* Type 11: Make the first diagonal element in the solve small to cause immediate overflow when dividing by T(j,j). In type 11, the offdiagonal elements are small (CNORM(j) < 1). */ zlarnv_(&c__2, &iseed[1], n, &b[1]); tscal = 1. / (doublereal) (*kd + 1); if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__3 = j - 1; lenj = min(i__3,*kd); if (lenj > 0) { zlarnv_(&c__4, &iseed[1], &lenj, &ab_ref(*kd + 2 - lenj, j)); zdscal_(&lenj, &tscal, &ab_ref(*kd + 2 - lenj, j), &c__1); } i__3 = ab_subscr(*kd + 1, j); zlarnd_(&z__1, &c__5, &iseed[1]); ab[i__3].r = z__1.r, ab[i__3].i = z__1.i; /* L140: */ } i__1 = ab_subscr(*kd + 1, *n); i__3 = ab_subscr(*kd + 1, *n); z__1.r = smlnum * ab[i__3].r, z__1.i = smlnum * ab[i__3].i; ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__3 = *n - j; lenj = min(i__3,*kd); if (lenj > 0) { zlarnv_(&c__4, &iseed[1], &lenj, &ab_ref(2, j)); zdscal_(&lenj, &tscal, &ab_ref(2, j), &c__1); } i__3 = ab_subscr(1, j); zlarnd_(&z__1, &c__5, &iseed[1]); ab[i__3].r = z__1.r, ab[i__3].i = z__1.i; /* L150: */ } i__1 = ab_subscr(1, 1); i__3 = ab_subscr(1, 1); z__1.r = smlnum * ab[i__3].r, z__1.i = smlnum * ab[i__3].i; ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; } } else if (*imat == 12) { /* Type 12: Make the first diagonal element in the solve small to cause immediate overflow when dividing by T(j,j). In type 12, the offdiagonal elements are O(1) (CNORM(j) > 1). */ zlarnv_(&c__2, &iseed[1], n, &b[1]); if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__3 = j - 1; lenj = min(i__3,*kd); if (lenj > 0) { zlarnv_(&c__4, &iseed[1], &lenj, &ab_ref(*kd + 2 - lenj, j)); } i__3 = ab_subscr(*kd + 1, j); zlarnd_(&z__1, &c__5, &iseed[1]); ab[i__3].r = z__1.r, ab[i__3].i = z__1.i; /* L160: */ } i__1 = ab_subscr(*kd + 1, *n); i__3 = ab_subscr(*kd + 1, *n); z__1.r = smlnum * ab[i__3].r, z__1.i = smlnum * ab[i__3].i; ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__3 = *n - j; lenj = min(i__3,*kd); if (lenj > 0) { zlarnv_(&c__4, &iseed[1], &lenj, &ab_ref(2, j)); } i__3 = ab_subscr(1, j); zlarnd_(&z__1, &c__5, &iseed[1]); ab[i__3].r = z__1.r, ab[i__3].i = z__1.i; /* L170: */ } i__1 = ab_subscr(1, 1); i__3 = ab_subscr(1, 1); z__1.r = smlnum * ab[i__3].r, z__1.i = smlnum * ab[i__3].i; ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; } } else if (*imat == 13) { /* Type 13: T is diagonal with small numbers on the diagonal to make the growth factor underflow, but a small right hand side chosen so that the solution does not overflow. */ if (upper) { jcount = 1; for (j = *n; j >= 1; --j) { /* Computing MAX */ i__1 = 1, i__3 = *kd + 1 - (j - 1); i__4 = *kd; for (i__ = max(i__1,i__3); i__ <= i__4; ++i__) { i__1 = ab_subscr(i__, j); ab[i__1].r = 0., ab[i__1].i = 0.; /* L180: */ } if (jcount <= 2) { i__4 = ab_subscr(*kd + 1, j); zlarnd_(&z__2, &c__5, &iseed[1]); z__1.r = smlnum * z__2.r, z__1.i = smlnum * z__2.i; ab[i__4].r = z__1.r, ab[i__4].i = z__1.i; } else { i__4 = ab_subscr(*kd + 1, j); zlarnd_(&z__1, &c__5, &iseed[1]); ab[i__4].r = z__1.r, ab[i__4].i = z__1.i; } ++jcount; if (jcount > 4) { jcount = 1; } /* L190: */ } } else { jcount = 1; i__4 = *n; for (j = 1; j <= i__4; ++j) { /* Computing MIN */ i__3 = *n - j + 1, i__2 = *kd + 1; i__1 = min(i__3,i__2); for (i__ = 2; i__ <= i__1; ++i__) { i__3 = ab_subscr(i__, j); ab[i__3].r = 0., ab[i__3].i = 0.; /* L200: */ } if (jcount <= 2) { i__1 = ab_subscr(1, j); zlarnd_(&z__2, &c__5, &iseed[1]); z__1.r = smlnum * z__2.r, z__1.i = smlnum * z__2.i; ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; } else { i__1 = ab_subscr(1, j); zlarnd_(&z__1, &c__5, &iseed[1]); ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; } ++jcount; if (jcount > 4) { jcount = 1; } /* L210: */ } } /* Set the right hand side alternately zero and small. */ if (upper) { b[1].r = 0., b[1].i = 0.; for (i__ = *n; i__ >= 2; i__ += -2) { i__4 = i__; b[i__4].r = 0., b[i__4].i = 0.; i__4 = i__ - 1; zlarnd_(&z__2, &c__5, &iseed[1]); z__1.r = smlnum * z__2.r, z__1.i = smlnum * z__2.i; b[i__4].r = z__1.r, b[i__4].i = z__1.i; /* L220: */ } } else { i__4 = *n; b[i__4].r = 0., b[i__4].i = 0.; i__4 = *n - 1; for (i__ = 1; i__ <= i__4; i__ += 2) { i__1 = i__; b[i__1].r = 0., b[i__1].i = 0.; i__1 = i__ + 1; zlarnd_(&z__2, &c__5, &iseed[1]); z__1.r = smlnum * z__2.r, z__1.i = smlnum * z__2.i; b[i__1].r = z__1.r, b[i__1].i = z__1.i; /* L230: */ } } } else if (*imat == 14) { /* Type 14: Make the diagonal elements small to cause gradual overflow when dividing by T(j,j). To control the amount of scaling needed, the matrix is bidiagonal. */ texp = 1. / (doublereal) (*kd + 1); tscal = pow_dd(&smlnum, &texp); zlarnv_(&c__4, &iseed[1], n, &b[1]); if (upper) { i__4 = *n; for (j = 1; j <= i__4; ++j) { /* Computing MAX */ i__1 = 1, i__3 = *kd + 2 - j; i__2 = *kd; for (i__ = max(i__1,i__3); i__ <= i__2; ++i__) { i__1 = ab_subscr(i__, j); ab[i__1].r = 0., ab[i__1].i = 0.; /* L240: */ } if (j > 1 && *kd > 0) { i__2 = ab_subscr(*kd, j); ab[i__2].r = -1., ab[i__2].i = -1.; } i__2 = ab_subscr(*kd + 1, j); zlarnd_(&z__2, &c__5, &iseed[1]); z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i; ab[i__2].r = z__1.r, ab[i__2].i = z__1.i; /* L250: */ } i__4 = *n; b[i__4].r = 1., b[i__4].i = 1.; } else { i__4 = *n; for (j = 1; j <= i__4; ++j) { /* Computing MIN */ i__1 = *n - j + 1, i__3 = *kd + 1; i__2 = min(i__1,i__3); for (i__ = 3; i__ <= i__2; ++i__) { i__1 = ab_subscr(i__, j); ab[i__1].r = 0., ab[i__1].i = 0.; /* L260: */ } if (j < *n && *kd > 0) { i__2 = ab_subscr(2, j); ab[i__2].r = -1., ab[i__2].i = -1.; } i__2 = ab_subscr(1, j); zlarnd_(&z__2, &c__5, &iseed[1]); z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i; ab[i__2].r = z__1.r, ab[i__2].i = z__1.i; /* L270: */ } b[1].r = 1., b[1].i = 1.; } } else if (*imat == 15) { /* Type 15: One zero diagonal element. */ iy = *n / 2 + 1; if (upper) { i__4 = *n; for (j = 1; j <= i__4; ++j) { /* Computing MIN */ i__2 = j, i__1 = *kd + 1; lenj = min(i__2,i__1); zlarnv_(&c__4, &iseed[1], &lenj, &ab_ref(*kd + 2 - lenj, j)); if (j != iy) { i__2 = ab_subscr(*kd + 1, j); zlarnd_(&z__2, &c__5, &iseed[1]); z__1.r = z__2.r * 2., z__1.i = z__2.i * 2.; ab[i__2].r = z__1.r, ab[i__2].i = z__1.i; } else { i__2 = ab_subscr(*kd + 1, j); ab[i__2].r = 0., ab[i__2].i = 0.; } /* L280: */ } } else { i__4 = *n; for (j = 1; j <= i__4; ++j) { /* Computing MIN */ i__2 = *n - j + 1, i__1 = *kd + 1; lenj = min(i__2,i__1); zlarnv_(&c__4, &iseed[1], &lenj, &ab_ref(1, j)); if (j != iy) { i__2 = ab_subscr(1, j); zlarnd_(&z__2, &c__5, &iseed[1]); z__1.r = z__2.r * 2., z__1.i = z__2.i * 2.; ab[i__2].r = z__1.r, ab[i__2].i = z__1.i; } else { i__2 = ab_subscr(1, j); ab[i__2].r = 0., ab[i__2].i = 0.; } /* L290: */ } } zlarnv_(&c__2, &iseed[1], n, &b[1]); zdscal_(n, &c_b91, &b[1], &c__1); } else if (*imat == 16) { /* Type 16: Make the offdiagonal elements large to cause overflow when adding a column of T. In the non-transposed case, the matrix is constructed to cause overflow when adding a column in every other step. */ tscal = unfl / ulp; tscal = (1. - ulp) / tscal; i__4 = *n; for (j = 1; j <= i__4; ++j) { i__2 = *kd + 1; for (i__ = 1; i__ <= i__2; ++i__) { i__1 = ab_subscr(i__, j); ab[i__1].r = 0., ab[i__1].i = 0.; /* L300: */ } /* L310: */ } texp = 1.; if (*kd > 0) { if (upper) { i__4 = -(*kd); for (j = *n; i__4 < 0 ? j >= 1 : j <= 1; j += i__4) { /* Computing MAX */ i__1 = 1, i__3 = j - *kd + 1; i__2 = max(i__1,i__3); for (i__ = j; i__ >= i__2; i__ += -2) { i__1 = ab_subscr(j - i__ + 1, i__); d__1 = -tscal / (doublereal) (*kd + 2); ab[i__1].r = d__1, ab[i__1].i = 0.; i__1 = ab_subscr(*kd + 1, i__); ab[i__1].r = 1., ab[i__1].i = 0.; i__1 = i__; d__1 = texp * (1. - ulp); b[i__1].r = d__1, b[i__1].i = 0.; /* Computing MAX */ i__1 = 1, i__3 = j - *kd + 1; if (i__ > max(i__1,i__3)) { i__1 = ab_subscr(j - i__ + 2, i__ - 1); d__1 = -(tscal / (doublereal) (*kd + 2)) / ( doublereal) (*kd + 3); ab[i__1].r = d__1, ab[i__1].i = 0.; i__1 = ab_subscr(*kd + 1, i__ - 1); ab[i__1].r = 1., ab[i__1].i = 0.; i__1 = i__ - 1; d__1 = texp * (doublereal) ((*kd + 1) * (*kd + 1) + *kd); b[i__1].r = d__1, b[i__1].i = 0.; } texp *= 2.; /* L320: */ } /* Computing MAX */ i__1 = 1, i__3 = j - *kd + 1; i__2 = max(i__1,i__3); d__1 = (doublereal) (*kd + 2) / (doublereal) (*kd + 3) * tscal; b[i__2].r = d__1, b[i__2].i = 0.; /* L330: */ } } else { i__4 = *n; i__2 = *kd; for (j = 1; i__2 < 0 ? j >= i__4 : j <= i__4; j += i__2) { texp = 1.; /* Computing MIN */ i__1 = *kd + 1, i__3 = *n - j + 1; lenj = min(i__1,i__3); /* Computing MIN */ i__3 = *n, i__5 = j + *kd - 1; i__1 = min(i__3,i__5); for (i__ = j; i__ <= i__1; i__ += 2) { i__3 = ab_subscr(lenj - (i__ - j), j); d__1 = -tscal / (doublereal) (*kd + 2); ab[i__3].r = d__1, ab[i__3].i = 0.; i__3 = ab_subscr(1, j); ab[i__3].r = 1., ab[i__3].i = 0.; i__3 = j; d__1 = texp * (1. - ulp); b[i__3].r = d__1, b[i__3].i = 0.; /* Computing MIN */ i__3 = *n, i__5 = j + *kd - 1; if (i__ < min(i__3,i__5)) { i__3 = ab_subscr(lenj - (i__ - j + 1), i__ + 1); d__1 = -(tscal / (doublereal) (*kd + 2)) / ( doublereal) (*kd + 3); ab[i__3].r = d__1, ab[i__3].i = 0.; i__3 = ab_subscr(1, i__ + 1); ab[i__3].r = 1., ab[i__3].i = 0.; i__3 = i__ + 1; d__1 = texp * (doublereal) ((*kd + 1) * (*kd + 1) + *kd); b[i__3].r = d__1, b[i__3].i = 0.; } texp *= 2.; /* L340: */ } /* Computing MIN */ i__3 = *n, i__5 = j + *kd - 1; i__1 = min(i__3,i__5); d__1 = (doublereal) (*kd + 2) / (doublereal) (*kd + 3) * tscal; b[i__1].r = d__1, b[i__1].i = 0.; /* L350: */ } } } } else if (*imat == 17) { /* Type 17: Generate a unit triangular matrix with elements between -1 and 1, and make the right hand side large so that it requires scaling. */ if (upper) { i__2 = *n; for (j = 1; j <= i__2; ++j) { /* Computing MIN */ i__4 = j - 1; lenj = min(i__4,*kd); zlarnv_(&c__4, &iseed[1], &lenj, &ab_ref(*kd + 1 - lenj, j)); i__4 = ab_subscr(*kd + 1, j); d__1 = (doublereal) j; ab[i__4].r = d__1, ab[i__4].i = 0.; /* L360: */ } } else { i__2 = *n; for (j = 1; j <= i__2; ++j) { /* Computing MIN */ i__4 = *n - j; lenj = min(i__4,*kd); if (lenj > 0) { zlarnv_(&c__4, &iseed[1], &lenj, &ab_ref(2, j)); } i__4 = ab_subscr(1, j); d__1 = (doublereal) j; ab[i__4].r = d__1, ab[i__4].i = 0.; /* L370: */ } } /* Set the right hand side so that the largest value is BIGNUM. */ zlarnv_(&c__2, &iseed[1], n, &b[1]); iy = izamax_(n, &b[1], &c__1); bnorm = z_abs(&b[iy]); bscal = bignum / max(1.,bnorm); zdscal_(n, &bscal, &b[1], &c__1); } else if (*imat == 18) { /* Type 18: Generate a triangular matrix with elements between BIGNUM/(KD+1) and BIGNUM so that at least one of the column norms will exceed BIGNUM. 1/3/91: ZLATBS no longer can handle this case */ tleft = bignum / (doublereal) (*kd + 1); tscal = bignum * ((doublereal) (*kd + 1) / (doublereal) (*kd + 2)); if (upper) { i__2 = *n; for (j = 1; j <= i__2; ++j) { /* Computing MIN */ i__4 = j, i__1 = *kd + 1; lenj = min(i__4,i__1); zlarnv_(&c__5, &iseed[1], &lenj, &ab_ref(*kd + 2 - lenj, j)); dlarnv_(&c__1, &iseed[1], &lenj, &rwork[*kd + 2 - lenj]); i__4 = *kd + 1; for (i__ = *kd + 2 - lenj; i__ <= i__4; ++i__) { i__1 = ab_subscr(i__, j); i__3 = ab_subscr(i__, j); d__1 = tleft + rwork[i__] * tscal; z__1.r = d__1 * ab[i__3].r, z__1.i = d__1 * ab[i__3].i; ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; /* L380: */ } /* L390: */ } } else { i__2 = *n; for (j = 1; j <= i__2; ++j) { /* Computing MIN */ i__4 = *n - j + 1, i__1 = *kd + 1; lenj = min(i__4,i__1); zlarnv_(&c__5, &iseed[1], &lenj, &ab_ref(1, j)); dlarnv_(&c__1, &iseed[1], &lenj, &rwork[1]); i__4 = lenj; for (i__ = 1; i__ <= i__4; ++i__) { i__1 = ab_subscr(i__, j); i__3 = ab_subscr(i__, j); d__1 = tleft + rwork[i__] * tscal; z__1.r = d__1 * ab[i__3].r, z__1.i = d__1 * ab[i__3].i; ab[i__1].r = z__1.r, ab[i__1].i = z__1.i; /* L400: */ } /* L410: */ } } zlarnv_(&c__2, &iseed[1], n, &b[1]); zdscal_(n, &c_b91, &b[1], &c__1); } /* Flip the matrix if the transpose will be used. */ if (! lsame_(trans, "N")) { if (upper) { i__2 = *n / 2; for (j = 1; j <= i__2; ++j) { /* Computing MIN */ i__4 = *n - (j << 1) + 1, i__1 = *kd + 1; lenj = min(i__4,i__1); i__4 = *ldab - 1; zswap_(&lenj, &ab_ref(*kd + 1, j), &i__4, &ab_ref(*kd + 2 - lenj, *n - j + 1), &c_n1); /* L420: */ } } else { i__2 = *n / 2; for (j = 1; j <= i__2; ++j) { /* Computing MIN */ i__4 = *n - (j << 1) + 1, i__1 = *kd + 1; lenj = min(i__4,i__1); i__4 = -(*ldab) + 1; zswap_(&lenj, &ab_ref(1, j), &c__1, &ab_ref(lenj, *n - j + 2 - lenj), &i__4); /* L430: */ } } } return 0; /* End of ZLATTB */ } /* zlattb_ */
doublereal dlansb_(char *norm, char *uplo, integer *n, integer *k, doublereal *ab, integer *ldab, doublereal *work) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 Purpose ======= DLANSB returns the value of the one norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of an n by n symmetric band matrix A, with k super-diagonals. Description =========== DLANSB returns the value DLANSB = ( max(abs(A(i,j))), NORM = 'M' or 'm' ( ( norm1(A), NORM = '1', 'O' or 'o' ( ( normI(A), NORM = 'I' or 'i' ( ( normF(A), NORM = 'F', 'f', 'E' or 'e' where norm1 denotes the one norm of a matrix (maximum column sum), normI denotes the infinity norm of a matrix (maximum row sum) and normF denotes the Frobenius norm of a matrix (square root of sum of squares). Note that max(abs(A(i,j))) is not a matrix norm. Arguments ========= NORM (input) CHARACTER*1 Specifies the value to be returned in DLANSB as described above. UPLO (input) CHARACTER*1 Specifies whether the upper or lower triangular part of the band matrix A is supplied. = 'U': Upper triangular part is supplied = 'L': Lower triangular part is supplied N (input) INTEGER The order of the matrix A. N >= 0. When N = 0, DLANSB is set to zero. K (input) INTEGER The number of super-diagonals or sub-diagonals of the band matrix A. K >= 0. AB (input) DOUBLE PRECISION array, dimension (LDAB,N) The upper or lower triangle of the symmetric band matrix A, stored in the first K+1 rows of AB. The j-th column of A is stored in the j-th column of the array AB as follows: if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= K+1. WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, WORK is not referenced. ===================================================================== Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; doublereal ret_val, d__1, d__2, d__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static doublereal absa; static integer i__, j, l; static doublereal scale; extern logical lsame_(char *, char *); static doublereal value; extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *, doublereal *, doublereal *); static doublereal sum; #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; --work; /* Function Body */ if (*n == 0) { value = 0.; } else if (lsame_(norm, "M")) { /* Find max(abs(A(i,j))). */ value = 0.; if (lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = *k + 2 - j; i__3 = *k + 1; for (i__ = max(i__2,1); i__ <= i__3; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = ab_ref(i__, j), abs(d__1)); value = max(d__2,d__3); /* L10: */ } /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__2 = *n + 1 - j, i__4 = *k + 1; i__3 = min(i__2,i__4); for (i__ = 1; i__ <= i__3; ++i__) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = ab_ref(i__, j), abs(d__1)); value = max(d__2,d__3); /* L30: */ } /* L40: */ } } } else if (lsame_(norm, "I") || lsame_(norm, "O") || *(unsigned char *)norm == '1') { /* Find normI(A) ( = norm1(A), since A is symmetric). */ value = 0.; if (lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = 0.; l = *k + 1 - j; /* Computing MAX */ i__3 = 1, i__2 = j - *k; i__4 = j - 1; for (i__ = max(i__3,i__2); i__ <= i__4; ++i__) { absa = (d__1 = ab_ref(l + i__, j), abs(d__1)); sum += absa; work[i__] += absa; /* L50: */ } work[j] = sum + (d__1 = ab_ref(*k + 1, j), abs(d__1)); /* L60: */ } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__1 = value, d__2 = work[i__]; value = max(d__1,d__2); /* L70: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; /* L80: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = work[j] + (d__1 = ab_ref(1, j), abs(d__1)); l = 1 - j; /* Computing MIN */ i__3 = *n, i__2 = j + *k; i__4 = min(i__3,i__2); for (i__ = j + 1; i__ <= i__4; ++i__) { absa = (d__1 = ab_ref(l + i__, j), abs(d__1)); sum += absa; work[i__] += absa; /* L90: */ } value = max(value,sum); /* L100: */ } } } else if (lsame_(norm, "F") || lsame_(norm, "E")) { /* Find normF(A). */ scale = 0.; sum = 1.; if (*k > 0) { if (lsame_(uplo, "U")) { i__1 = *n; for (j = 2; j <= i__1; ++j) { /* Computing MAX */ i__4 = *k + 2 - j; /* Computing MIN */ i__2 = j - 1; i__3 = min(i__2,*k); dlassq_(&i__3, &ab_ref(max(i__4,1), j), &c__1, &scale, & sum); /* L110: */ } l = *k + 1; } else { i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__3 = *n - j; i__4 = min(i__3,*k); dlassq_(&i__4, &ab_ref(2, j), &c__1, &scale, &sum); /* L120: */ } l = 1; } sum *= 2; } else { l = 1; } dlassq_(n, &ab_ref(l, 1), ldab, &scale, &sum); value = scale * sqrt(sum); } ret_val = value; return ret_val; /* End of DLANSB */ } /* dlansb_ */
/* Subroutine */ int dsbevd_(char *jobz, char *uplo, integer *n, integer *kd, doublereal *ab, integer *ldab, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= DSBEVD computes all the eigenvalues and, optionally, eigenvectors of a real symmetric band matrix A. If eigenvectors are desired, it uses a divide and conquer algorithm. The divide and conquer algorithm makes very mild assumptions about floating point arithmetic. It will work on machines with a guard digit in add/subtract, or on those binary machines without guard digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. It could conceivably fail on hexadecimal or decimal machines without guard digits, but we know of none. Arguments ========= JOBZ (input) CHARACTER*1 = 'N': Compute eigenvalues only; = 'V': Compute eigenvalues and eigenvectors. UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. KD (input) INTEGER The number of superdiagonals of the matrix A if UPLO = 'U', or the number of subdiagonals if UPLO = 'L'. KD >= 0. AB (input/output) DOUBLE PRECISION array, dimension (LDAB, N) On entry, the upper or lower triangle of the symmetric band matrix A, stored in the first KD+1 rows of the array. The j-th column of A is stored in the j-th column of the array AB as follows: if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). On exit, AB is overwritten by values generated during the reduction to tridiagonal form. If UPLO = 'U', the first superdiagonal and the diagonal of the tridiagonal matrix T are returned in rows KD and KD+1 of AB, and if UPLO = 'L', the diagonal and first subdiagonal of T are returned in the first two rows of AB. LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= KD + 1. W (output) DOUBLE PRECISION array, dimension (N) If INFO = 0, the eigenvalues in ascending order. Z (output) DOUBLE PRECISION array, dimension (LDZ, N) If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal eigenvectors of the matrix A, with the i-th column of Z holding the eigenvector associated with W(i). If JOBZ = 'N', then Z is not referenced. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= 1, and if JOBZ = 'V', LDZ >= max(1,N). WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. IF N <= 1, LWORK must be at least 1. If JOBZ = 'N' and N > 2, LWORK must be at least 2*N. If JOBZ = 'V' and N > 2, LWORK must be at least ( 1 + 5*N + 2*N**2 ). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. IWORK (workspace/output) INTEGER array, dimension (LIWORK) On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. LIWORK (input) INTEGER The dimension of the array LIWORK. If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. If JOBZ = 'V' and N > 2, LIWORK must be at least 3 + 5*N. If LIWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the IWORK array, returns this value as the first entry of the IWORK array, and no error message related to LIWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the algorithm failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static doublereal c_b11 = 1.; static doublereal c_b18 = 0.; static integer c__1 = 1; /* System generated locals */ integer ab_dim1, ab_offset, z_dim1, z_offset, i__1; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer inde; static doublereal anrm, rmin, rmax; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *), dgemm_(char *, char *, integer *, integer *, integer * , doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); static doublereal sigma; extern logical lsame_(char *, char *); static integer iinfo, lwmin; static logical lower, wantz; static integer indwk2, llwrk2; extern doublereal dlamch_(char *); static integer iscale; extern /* Subroutine */ int dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); extern doublereal dlansb_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int dstedc_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *), dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); static doublereal safmin; extern /* Subroutine */ int xerbla_(char *, integer *); static doublereal bignum; extern /* Subroutine */ int dsbtrd_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *), dsterf_( integer *, doublereal *, doublereal *, integer *); static integer indwrk, liwmin; static doublereal smlnum; static logical lquery; static doublereal eps; #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --work; --iwork; /* Function Body */ wantz = lsame_(jobz, "V"); lower = lsame_(uplo, "L"); lquery = *lwork == -1 || *liwork == -1; *info = 0; if (*n <= 1) { liwmin = 1; lwmin = 1; } else { if (wantz) { liwmin = *n * 5 + 3; /* Computing 2nd power */ i__1 = *n; lwmin = *n * 5 + 1 + (i__1 * i__1 << 1); } else { liwmin = 1; lwmin = *n << 1; } } if (! (wantz || lsame_(jobz, "N"))) { *info = -1; } else if (! (lower || lsame_(uplo, "U"))) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*kd < 0) { *info = -4; } else if (*ldab < *kd + 1) { *info = -6; } else if (*ldz < 1 || wantz && *ldz < *n) { *info = -9; } else if (*lwork < lwmin && ! lquery) { *info = -11; } else if (*liwork < liwmin && ! lquery) { *info = -13; } if (*info == 0) { work[1] = (doublereal) lwmin; iwork[1] = liwmin; } if (*info != 0) { i__1 = -(*info); xerbla_("DSBEVD", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { w[1] = ab_ref(1, 1); if (wantz) { z___ref(1, 1) = 1.; } return 0; } /* Get machine constants. */ safmin = dlamch_("Safe minimum"); eps = dlamch_("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = sqrt(smlnum); rmax = sqrt(bignum); /* Scale matrix to allowable range, if necessary. */ anrm = dlansb_("M", uplo, n, kd, &ab[ab_offset], ldab, &work[1]); iscale = 0; if (anrm > 0. && anrm < rmin) { iscale = 1; sigma = rmin / anrm; } else if (anrm > rmax) { iscale = 1; sigma = rmax / anrm; } if (iscale == 1) { if (lower) { dlascl_("B", kd, kd, &c_b11, &sigma, n, n, &ab[ab_offset], ldab, info); } else { dlascl_("Q", kd, kd, &c_b11, &sigma, n, n, &ab[ab_offset], ldab, info); } } /* Call DSBTRD to reduce symmetric band matrix to tridiagonal form. */ inde = 1; indwrk = inde + *n; indwk2 = indwrk + *n * *n; llwrk2 = *lwork - indwk2 + 1; dsbtrd_(jobz, uplo, n, kd, &ab[ab_offset], ldab, &w[1], &work[inde], &z__[ z_offset], ldz, &work[indwrk], &iinfo); /* For eigenvalues only, call DSTERF. For eigenvectors, call SSTEDC. */ if (! wantz) { dsterf_(n, &w[1], &work[inde], info); } else { dstedc_("I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], & llwrk2, &iwork[1], liwork, info); dgemm_("N", "N", n, n, n, &c_b11, &z__[z_offset], ldz, &work[indwrk], n, &c_b18, &work[indwk2], n); dlacpy_("A", n, n, &work[indwk2], n, &z__[z_offset], ldz); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ if (iscale == 1) { d__1 = 1. / sigma; dscal_(n, &d__1, &w[1], &c__1); } work[1] = (doublereal) lwmin; iwork[1] = liwmin; return 0; /* End of DSBEVD */ } /* dsbevd_ */
/* "LAPACK" function for factorization of band matrices */ void mydgbtrf(int *m, int *n, int *kl, int *ku, double *ab, int *ldab, int *ipiv, int *info) { /* System generated locals */ int i__1, i__2, i__3, i__4; double d__1, d__2; /* Local variables */ int i__, i, j, k; int km, jp, ju, kv; #define ab_ref(a_1,a_2) ab[(a_2)**ldab + a_1] /* Function Body */ kv = *ku + *kl; *info = 0; /* Gaussian eliMINation with partial pivoting Set fill-in elements in columns KU+2 to KV to zero. */ i__1 = MIN(kv,*n); for (j = *ku + 1; j < i__1; ++j) { i__2 = *kl; for (i__ = kv - j; i__ < i__2; ++i__) { ab_ref(i__, j) = 0.; } } /* JU is the index of the last column affected by the current stage of the factorization. */ ju = 0; i__1 = MIN(*m,*n); for (j = 0; j < i__1; ++j) { /* Set fill-in elements in column J+KV to zero. */ if (j + kv < *n) { i__2 = *kl; for (i__ = 0; i__ < i__2; ++i__) { ab_ref(i__,kv) = 0.; } } /* Find pivot and test for singularity. KM is the number of subdiagonal elements in the current column. Computing MIN */ i__2 = *kl, i__3 = *m - j - 1; km = MIN(i__2,i__3); d__2 = -1.; jp = -1; for (i = 0; i <= km; i++) { d__1 = ab[kv + i]; d__1 *= d__1; if (d__1 > d__2) { d__2 = d__1; jp = i; } } *ipiv = jp + 1 + j; if (ab[kv + jp] != 0.) { /* Computing MAX Computing MIN */ i__2 = *n - 1; i__3 = j + *ku + jp; if (i__3 < i__2) i__2 = i__3; if (i__2 > ju) ju = i__2; /* Apply interchange to columns J to JU. */ if (jp != 0) { i__2 = ju - j; i__3 = *ldab - 1; i__4 = *ldab - 1; for (i = 0; i <= i__2; i++) { d__1 = ab[kv + jp + i * i__3]; ab[kv + jp + i * i__3] = ab[kv + i * i__4]; ab[kv + i * i__4] = d__1; } } if (km > 0) { /* Compute multipliers. */ d__1 = 1. / ab[kv]; i__2 = km + kv; for (i = 1 + kv; i <= i__2; i++) ab[i] *= d__1; /* Update trailing submatrix within the band. */ if (ju > j) { i__2 = ju - j; i__3 = km + kv; i__4 = 1 + kv; for (k = 1; k <= i__2; k++) { d__1 = ab_ref(kv-k, k); for (i = i__4; i <= i__3; i++) ab_ref(i-k, k) -= ab[i] * d__1; } } } } else { /* If pivot is zero, set INFO to the index of the pivot unless a zero pivot has already been found. */ if (*info == 0) { *info = j + 1; } } ab += *ldab; ipiv++; } /* return 0; */ }
/* Subroutine */ int dpbstf_(char *uplo, integer *n, integer *kd, doublereal * ab, integer *ldab, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DPBSTF computes a split Cholesky factorization of a real symmetric positive definite band matrix A. This routine is designed to be used in conjunction with DSBGST. The factorization has the form A = S**T*S where S is a band matrix of the same bandwidth as A and the following structure: S = ( U ) ( M L ) where U is upper triangular of order m = (n+kd)/2, and L is lower triangular of order n-m. Arguments ========= UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. KD (input) INTEGER The number of superdiagonals of the matrix A if UPLO = 'U', or the number of subdiagonals if UPLO = 'L'. KD >= 0. AB (input/output) DOUBLE PRECISION array, dimension (LDAB,N) On entry, the upper or lower triangle of the symmetric band matrix A, stored in the first kd+1 rows of the array. The j-th column of A is stored in the j-th column of the array AB as follows: if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). On exit, if INFO = 0, the factor S from the split Cholesky factorization A = S**T*S. See Further Details. LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= KD+1. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the factorization could not be completed, because the updated element a(i,i) was negative; the matrix A is not positive definite. Further Details =============== The band storage scheme is illustrated by the following example, when N = 7, KD = 2: S = ( s11 s12 s13 ) ( s22 s23 s24 ) ( s33 s34 ) ( s44 ) ( s53 s54 s55 ) ( s64 s65 s66 ) ( s75 s76 s77 ) If UPLO = 'U', the array AB holds: on entry: on exit: * * a13 a24 a35 a46 a57 * * s13 s24 s53 s64 s75 * a12 a23 a34 a45 a56 a67 * s12 s23 s34 s54 s65 s76 a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 If UPLO = 'L', the array AB holds: on entry: on exit: a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 a21 a32 a43 a54 a65 a76 * s12 s23 s34 s54 s65 s76 * a31 a42 a53 a64 a64 * * s13 s24 s53 s64 s75 * * Array elements marked * are not used by the routine. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static doublereal c_b9 = -1.; /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ extern /* Subroutine */ int dsyr_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static integer j, m; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); extern logical lsame_(char *, char *); static logical upper; static integer km; extern /* Subroutine */ int xerbla_(char *, integer *); static doublereal ajj; static integer kld; #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kd < 0) { *info = -3; } else if (*ldab < *kd + 1) { *info = -5; } if (*info != 0) { i__1 = -(*info); xerbla_("DPBSTF", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Computing MAX */ i__1 = 1, i__2 = *ldab - 1; kld = max(i__1,i__2); /* Set the splitting point m. */ m = (*n + *kd) / 2; if (upper) { /* Factorize A(m+1:n,m+1:n) as L**T*L, and update A(1:m,1:m). */ i__1 = m + 1; for (j = *n; j >= i__1; --j) { /* Compute s(j,j) and test for non-positive-definiteness. */ ajj = ab_ref(*kd + 1, j); if (ajj <= 0.) { goto L50; } ajj = sqrt(ajj); ab_ref(*kd + 1, j) = ajj; /* Computing MIN */ i__2 = j - 1; km = min(i__2,*kd); /* Compute elements j-km:j-1 of the j-th column and update the the leading submatrix within the band. */ d__1 = 1. / ajj; dscal_(&km, &d__1, &ab_ref(*kd + 1 - km, j), &c__1); dsyr_("Upper", &km, &c_b9, &ab_ref(*kd + 1 - km, j), &c__1, & ab_ref(*kd + 1, j - km), &kld); /* L10: */ } /* Factorize the updated submatrix A(1:m,1:m) as U**T*U. */ i__1 = m; for (j = 1; j <= i__1; ++j) { /* Compute s(j,j) and test for non-positive-definiteness. */ ajj = ab_ref(*kd + 1, j); if (ajj <= 0.) { goto L50; } ajj = sqrt(ajj); ab_ref(*kd + 1, j) = ajj; /* Computing MIN */ i__2 = *kd, i__3 = m - j; km = min(i__2,i__3); /* Compute elements j+1:j+km of the j-th row and update the trailing submatrix within the band. */ if (km > 0) { d__1 = 1. / ajj; dscal_(&km, &d__1, &ab_ref(*kd, j + 1), &kld); dsyr_("Upper", &km, &c_b9, &ab_ref(*kd, j + 1), &kld, &ab_ref( *kd + 1, j + 1), &kld); } /* L20: */ } } else { /* Factorize A(m+1:n,m+1:n) as L**T*L, and update A(1:m,1:m). */ i__1 = m + 1; for (j = *n; j >= i__1; --j) { /* Compute s(j,j) and test for non-positive-definiteness. */ ajj = ab_ref(1, j); if (ajj <= 0.) { goto L50; } ajj = sqrt(ajj); ab_ref(1, j) = ajj; /* Computing MIN */ i__2 = j - 1; km = min(i__2,*kd); /* Compute elements j-km:j-1 of the j-th row and update the trailing submatrix within the band. */ d__1 = 1. / ajj; dscal_(&km, &d__1, &ab_ref(km + 1, j - km), &kld); dsyr_("Lower", &km, &c_b9, &ab_ref(km + 1, j - km), &kld, &ab_ref( 1, j - km), &kld); /* L30: */ } /* Factorize the updated submatrix A(1:m,1:m) as U**T*U. */ i__1 = m; for (j = 1; j <= i__1; ++j) { /* Compute s(j,j) and test for non-positive-definiteness. */ ajj = ab_ref(1, j); if (ajj <= 0.) { goto L50; } ajj = sqrt(ajj); ab_ref(1, j) = ajj; /* Computing MIN */ i__2 = *kd, i__3 = m - j; km = min(i__2,i__3); /* Compute elements j+1:j+km of the j-th column and update the trailing submatrix within the band. */ if (km > 0) { d__1 = 1. / ajj; dscal_(&km, &d__1, &ab_ref(2, j), &c__1); dsyr_("Lower", &km, &c_b9, &ab_ref(2, j), &c__1, &ab_ref(1, j + 1), &kld); } /* L40: */ } } return 0; L50: *info = j; return 0; /* End of DPBSTF */ } /* dpbstf_ */
/* Subroutine */ int dtbrfs_(char *uplo, char *trans, char *diag, integer *n, integer *kd, integer *nrhs, doublereal *ab, integer *ldab, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DTBRFS provides error bounds and backward error estimates for the solution to a system of linear equations with a triangular band coefficient matrix. The solution matrix X must be computed by DTBTRS or some other means before entering this routine. DTBRFS does not do iterative refinement because doing so cannot improve the backward error. Arguments ========= UPLO (input) CHARACTER*1 = 'U': A is upper triangular; = 'L': A is lower triangular. TRANS (input) CHARACTER*1 Specifies the form of the system of equations: = 'N': A * X = B (No transpose) = 'T': A**T * X = B (Transpose) = 'C': A**H * X = B (Conjugate transpose = Transpose) DIAG (input) CHARACTER*1 = 'N': A is non-unit triangular; = 'U': A is unit triangular. N (input) INTEGER The order of the matrix A. N >= 0. KD (input) INTEGER The number of superdiagonals or subdiagonals of the triangular band matrix A. KD >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0. AB (input) DOUBLE PRECISION array, dimension (LDAB,N) The upper or lower triangular band matrix A, stored in the first kd+1 rows of the array. The j-th column of A is stored in the j-th column of the array AB as follows: if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). If DIAG = 'U', the diagonal elements of A are not referenced and are assumed to be 1. LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= KD+1. B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) The right hand side matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (input) DOUBLE PRECISION array, dimension (LDX,NRHS) The solution matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). FERR (output) DOUBLE PRECISION array, dimension (NRHS) The estimated forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), FERR(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). The estimate is as reliable as the estimate for RCOND, and is almost always a slight overestimate of the true error. BERR (output) DOUBLE PRECISION array, dimension (NRHS) The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). WORK (workspace) DOUBLE PRECISION array, dimension (3*N) IWORK (workspace) INTEGER array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static doublereal c_b19 = -1.; /* System generated locals */ integer ab_dim1, ab_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2, d__3; /* Local variables */ static integer kase; static doublereal safe1, safe2; static integer i__, j, k; static doublereal s; extern logical lsame_(char *, char *); extern /* Subroutine */ int dtbmv_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer * , doublereal *, integer *), dtbsv_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), daxpy_(integer *, doublereal * , doublereal *, integer *, doublereal *, integer *); static logical upper; extern doublereal dlamch_(char *); extern /* Subroutine */ int dlacon_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static doublereal xk; static integer nz; static doublereal safmin; extern /* Subroutine */ int xerbla_(char *, integer *); static logical notran; static char transt[1]; static logical nounit; static doublereal lstres, eps; #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1] #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); notran = lsame_(trans, "N"); nounit = lsame_(diag, "N"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (! notran && ! lsame_(trans, "T") && ! lsame_(trans, "C")) { *info = -2; } else if (! nounit && ! lsame_(diag, "U")) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*kd < 0) { *info = -5; } else if (*nrhs < 0) { *info = -6; } else if (*ldab < *kd + 1) { *info = -8; } else if (*ldb < max(1,*n)) { *info = -10; } else if (*ldx < max(1,*n)) { *info = -12; } if (*info != 0) { i__1 = -(*info); xerbla_("DTBRFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] = 0.; berr[j] = 0.; /* L10: */ } return 0; } if (notran) { *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transt = 'N'; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = *kd + 2; eps = dlamch_("Epsilon"); safmin = dlamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { /* Compute residual R = B - op(A) * X, where op(A) = A or A', depending on TRANS. */ dcopy_(n, &x_ref(1, j), &c__1, &work[*n + 1], &c__1); dtbmv_(uplo, trans, diag, n, kd, &ab[ab_offset], ldab, &work[*n + 1], &c__1); daxpy_(n, &c_b19, &b_ref(1, j), &c__1, &work[*n + 1], &c__1); /* Compute componentwise relative backward error from formula max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) where abs(Z) is the componentwise absolute value of the matrix or vector Z. If the i-th component of the denominator is less than SAFE2, then SAFE1 is added to the i-th components of the numerator and denominator before dividing. */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] = (d__1 = b_ref(i__, j), abs(d__1)); /* L20: */ } if (notran) { /* Compute abs(A)*abs(X) + abs(B). */ if (upper) { if (nounit) { i__2 = *n; for (k = 1; k <= i__2; ++k) { xk = (d__1 = x_ref(k, j), abs(d__1)); /* Computing MAX */ i__3 = 1, i__4 = k - *kd; i__5 = k; for (i__ = max(i__3,i__4); i__ <= i__5; ++i__) { work[i__] += (d__1 = ab_ref(*kd + 1 + i__ - k, k), abs(d__1)) * xk; /* L30: */ } /* L40: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { xk = (d__1 = x_ref(k, j), abs(d__1)); /* Computing MAX */ i__5 = 1, i__3 = k - *kd; i__4 = k - 1; for (i__ = max(i__5,i__3); i__ <= i__4; ++i__) { work[i__] += (d__1 = ab_ref(*kd + 1 + i__ - k, k), abs(d__1)) * xk; /* L50: */ } work[k] += xk; /* L60: */ } } } else { if (nounit) { i__2 = *n; for (k = 1; k <= i__2; ++k) { xk = (d__1 = x_ref(k, j), abs(d__1)); /* Computing MIN */ i__5 = *n, i__3 = k + *kd; i__4 = min(i__5,i__3); for (i__ = k; i__ <= i__4; ++i__) { work[i__] += (d__1 = ab_ref(i__ + 1 - k, k), abs( d__1)) * xk; /* L70: */ } /* L80: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { xk = (d__1 = x_ref(k, j), abs(d__1)); /* Computing MIN */ i__5 = *n, i__3 = k + *kd; i__4 = min(i__5,i__3); for (i__ = k + 1; i__ <= i__4; ++i__) { work[i__] += (d__1 = ab_ref(i__ + 1 - k, k), abs( d__1)) * xk; /* L90: */ } work[k] += xk; /* L100: */ } } } } else { /* Compute abs(A')*abs(X) + abs(B). */ if (upper) { if (nounit) { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; /* Computing MAX */ i__4 = 1, i__5 = k - *kd; i__3 = k; for (i__ = max(i__4,i__5); i__ <= i__3; ++i__) { s += (d__1 = ab_ref(*kd + 1 + i__ - k, k), abs( d__1)) * (d__2 = x_ref(i__, j), abs(d__2)) ; /* L110: */ } work[k] += s; /* L120: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = (d__1 = x_ref(k, j), abs(d__1)); /* Computing MAX */ i__3 = 1, i__4 = k - *kd; i__5 = k - 1; for (i__ = max(i__3,i__4); i__ <= i__5; ++i__) { s += (d__1 = ab_ref(*kd + 1 + i__ - k, k), abs( d__1)) * (d__2 = x_ref(i__, j), abs(d__2)) ; /* L130: */ } work[k] += s; /* L140: */ } } } else { if (nounit) { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; /* Computing MIN */ i__3 = *n, i__4 = k + *kd; i__5 = min(i__3,i__4); for (i__ = k; i__ <= i__5; ++i__) { s += (d__1 = ab_ref(i__ + 1 - k, k), abs(d__1)) * (d__2 = x_ref(i__, j), abs(d__2)); /* L150: */ } work[k] += s; /* L160: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = (d__1 = x_ref(k, j), abs(d__1)); /* Computing MIN */ i__3 = *n, i__4 = k + *kd; i__5 = min(i__3,i__4); for (i__ = k + 1; i__ <= i__5; ++i__) { s += (d__1 = ab_ref(i__ + 1 - k, k), abs(d__1)) * (d__2 = x_ref(i__, j), abs(d__2)); /* L170: */ } work[k] += s; /* L180: */ } } } } s = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { /* Computing MAX */ d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ i__]; s = max(d__2,d__3); } else { /* Computing MAX */ d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) / (work[i__] + safe1); s = max(d__2,d__3); } /* L190: */ } berr[j] = s; /* Bound error from formula norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) where norm(Z) is the magnitude of the largest component of Z inv(op(A)) is the inverse of op(A) abs(Z) is the componentwise absolute value of the matrix or vector Z NZ is the maximum number of nonzeros in any row of A, plus 1 EPS is machine epsilon The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) is incremented by SAFE1 if the i-th component of abs(op(A))*abs(X) + abs(B) is less than SAFE2. Use DLACON to estimate the infinity-norm of the matrix inv(op(A)) * diag(W), where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__]; } else { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__] + safe1; } /* L200: */ } kase = 0; L210: dlacon_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & kase); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(op(A)'). */ dtbsv_(uplo, transt, diag, n, kd, &ab[ab_offset], ldab, &work[ *n + 1], &c__1); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L220: */ } } else { /* Multiply by inv(op(A))*diag(W). */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L230: */ } dtbsv_(uplo, trans, diag, n, kd, &ab[ab_offset], ldab, &work[* n + 1], &c__1); } goto L210; } /* Normalize error. */ lstres = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = lstres, d__3 = (d__1 = x_ref(i__, j), abs(d__1)); lstres = max(d__2,d__3); /* L240: */ } if (lstres != 0.) { ferr[j] /= lstres; } /* L250: */ } return 0; /* End of DTBRFS */ } /* dtbrfs_ */
/* Subroutine */ int dgbcon_(char *norm, integer *n, integer *kl, integer *ku, doublereal *ab, integer *ldab, integer *ipiv, doublereal *anorm, doublereal *rcond, doublereal *work, integer *iwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DGBCON estimates the reciprocal of the condition number of a real general band matrix A, in either the 1-norm or the infinity-norm, using the LU factorization computed by DGBTRF. An estimate is obtained for norm(inv(A)), and the reciprocal of the condition number is computed as RCOND = 1 / ( norm(A) * norm(inv(A)) ). Arguments ========= NORM (input) CHARACTER*1 Specifies whether the 1-norm condition number or the infinity-norm condition number is required: = '1' or 'O': 1-norm; = 'I': Infinity-norm. N (input) INTEGER The order of the matrix A. N >= 0. KL (input) INTEGER The number of subdiagonals within the band of A. KL >= 0. KU (input) INTEGER The number of superdiagonals within the band of A. KU >= 0. AB (input) DOUBLE PRECISION array, dimension (LDAB,N) Details of the LU factorization of the band matrix A, as computed by DGBTRF. U is stored as an upper triangular band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and the multipliers used during the factorization are stored in rows KL+KU+2 to 2*KL+KU+1. LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= 2*KL+KU+1. IPIV (input) INTEGER array, dimension (N) The pivot indices; for 1 <= i <= N, row i of the matrix was interchanged with row IPIV(i). ANORM (input) DOUBLE PRECISION If NORM = '1' or 'O', the 1-norm of the original matrix A. If NORM = 'I', the infinity-norm of the original matrix A. RCOND (output) DOUBLE PRECISION The reciprocal of the condition number of the matrix A, computed as RCOND = 1/(norm(A) * norm(inv(A))). WORK (workspace) DOUBLE PRECISION array, dimension (3*N) IWORK (workspace) INTEGER array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3; doublereal d__1; /* Local variables */ static integer kase; extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, integer *); static integer kase1, j; static doublereal t, scale; extern logical lsame_(char *, char *); extern /* Subroutine */ int drscl_(integer *, doublereal *, doublereal *, integer *); static logical lnoti; extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static integer kd; extern doublereal dlamch_(char *); static integer lm, jp, ix; extern /* Subroutine */ int dlacon_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); extern integer idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int dlatbs_(char *, char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); static doublereal ainvnm; static logical onenrm; static char normin[1]; static doublereal smlnum; #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; --ipiv; --work; --iwork; /* Function Body */ *info = 0; onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O"); if (! onenrm && ! lsame_(norm, "I")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kl < 0) { *info = -3; } else if (*ku < 0) { *info = -4; } else if (*ldab < (*kl << 1) + *ku + 1) { *info = -6; } else if (*anorm < 0.) { *info = -8; } if (*info != 0) { i__1 = -(*info); xerbla_("DGBCON", &i__1); return 0; } /* Quick return if possible */ *rcond = 0.; if (*n == 0) { *rcond = 1.; return 0; } else if (*anorm == 0.) { return 0; } smlnum = dlamch_("Safe minimum"); /* Estimate the norm of inv(A). */ ainvnm = 0.; *(unsigned char *)normin = 'N'; if (onenrm) { kase1 = 1; } else { kase1 = 2; } kd = *kl + *ku + 1; lnoti = *kl > 0; kase = 0; L10: dlacon_(n, &work[*n + 1], &work[1], &iwork[1], &ainvnm, &kase); if (kase != 0) { if (kase == kase1) { /* Multiply by inv(L). */ if (lnoti) { i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__2 = *kl, i__3 = *n - j; lm = min(i__2,i__3); jp = ipiv[j]; t = work[jp]; if (jp != j) { work[jp] = work[j]; work[j] = t; } d__1 = -t; daxpy_(&lm, &d__1, &ab_ref(kd + 1, j), &c__1, &work[j + 1] , &c__1); /* L20: */ } } /* Multiply by inv(U). */ i__1 = *kl + *ku; dlatbs_("Upper", "No transpose", "Non-unit", normin, n, &i__1, & ab[ab_offset], ldab, &work[1], &scale, &work[(*n << 1) + 1], info); } else { /* Multiply by inv(U'). */ i__1 = *kl + *ku; dlatbs_("Upper", "Transpose", "Non-unit", normin, n, &i__1, &ab[ ab_offset], ldab, &work[1], &scale, &work[(*n << 1) + 1], info); /* Multiply by inv(L'). */ if (lnoti) { for (j = *n - 1; j >= 1; --j) { /* Computing MIN */ i__1 = *kl, i__2 = *n - j; lm = min(i__1,i__2); work[j] -= ddot_(&lm, &ab_ref(kd + 1, j), &c__1, &work[j + 1], &c__1); jp = ipiv[j]; if (jp != j) { t = work[jp]; work[jp] = work[j]; work[j] = t; } /* L30: */ } } } /* Divide X by 1/SCALE if doing so will not cause overflow. */ *(unsigned char *)normin = 'Y'; if (scale != 1.) { ix = idamax_(n, &work[1], &c__1); if (scale < (d__1 = work[ix], abs(d__1)) * smlnum || scale == 0.) { goto L40; } drscl_(n, &scale, &work[1], &c__1); } goto L10; } /* Compute the estimate of the reciprocal condition number. */ if (ainvnm != 0.) { *rcond = 1. / ainvnm / *anorm; } L40: return 0; /* End of DGBCON */ } /* dgbcon_ */
/* Subroutine */ int sgbbrd_(char *vect, integer *m, integer *n, integer *ncc, integer *kl, integer *ku, real *ab, integer *ldab, real *d__, real * e, real *q, integer *ldq, real *pt, integer *ldpt, real *c__, integer *ldc, real *work, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= SGBBRD reduces a real general m-by-n band matrix A to upper bidiagonal form B by an orthogonal transformation: Q' * A * P = B. The routine computes B, and optionally forms Q or P', or computes Q'*C for a given matrix C. Arguments ========= VECT (input) CHARACTER*1 Specifies whether or not the matrices Q and P' are to be formed. = 'N': do not form Q or P'; = 'Q': form Q only; = 'P': form P' only; = 'B': form both. M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. NCC (input) INTEGER The number of columns of the matrix C. NCC >= 0. KL (input) INTEGER The number of subdiagonals of the matrix A. KL >= 0. KU (input) INTEGER The number of superdiagonals of the matrix A. KU >= 0. AB (input/output) REAL array, dimension (LDAB,N) On entry, the m-by-n band matrix A, stored in rows 1 to KL+KU+1. The j-th column of A is stored in the j-th column of the array AB as follows: AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). On exit, A is overwritten by values generated during the reduction. LDAB (input) INTEGER The leading dimension of the array A. LDAB >= KL+KU+1. D (output) REAL array, dimension (min(M,N)) The diagonal elements of the bidiagonal matrix B. E (output) REAL array, dimension (min(M,N)-1) The superdiagonal elements of the bidiagonal matrix B. Q (output) REAL array, dimension (LDQ,M) If VECT = 'Q' or 'B', the m-by-m orthogonal matrix Q. If VECT = 'N' or 'P', the array Q is not referenced. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= max(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise. PT (output) REAL array, dimension (LDPT,N) If VECT = 'P' or 'B', the n-by-n orthogonal matrix P'. If VECT = 'N' or 'Q', the array PT is not referenced. LDPT (input) INTEGER The leading dimension of the array PT. LDPT >= max(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise. C (input/output) REAL array, dimension (LDC,NCC) On entry, an m-by-ncc matrix C. On exit, C is overwritten by Q'*C. C is not referenced if NCC = 0. LDC (input) INTEGER The leading dimension of the array C. LDC >= max(1,M) if NCC > 0; LDC >= 1 if NCC = 0. WORK (workspace) REAL array, dimension (2*max(M,N)) INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. ===================================================================== Test the input parameters Parameter adjustments */ /* Table of constant values */ static real c_b8 = 0.f; static real c_b9 = 1.f; static integer c__1 = 1; /* System generated locals */ integer ab_dim1, ab_offset, c_dim1, c_offset, pt_dim1, pt_offset, q_dim1, q_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; /* Local variables */ static integer inca; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); static integer i__, j, l; extern logical lsame_(char *, char *); static logical wantb, wantc; static integer minmn; static logical wantq; static integer j1, j2, kb; static real ra, rb, rc; static integer kk, ml, mn, nr, mu; static real rs; extern /* Subroutine */ int xerbla_(char *, integer *), slaset_( char *, integer *, integer *, real *, real *, real *, integer *), slartg_(real *, real *, real *, real *, real *); static integer kb1; extern /* Subroutine */ int slargv_(integer *, real *, integer *, real *, integer *, real *, integer *); static integer ml0; extern /* Subroutine */ int slartv_(integer *, real *, integer *, real *, integer *, real *, real *, integer *); static logical wantpt; static integer mu0, klm, kun, nrt, klu1; #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1] #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] #define pt_ref(a_1,a_2) pt[(a_2)*pt_dim1 + a_1] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; --d__; --e; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; pt_dim1 = *ldpt; pt_offset = 1 + pt_dim1 * 1; pt -= pt_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1 * 1; c__ -= c_offset; --work; /* Function Body */ wantb = lsame_(vect, "B"); wantq = lsame_(vect, "Q") || wantb; wantpt = lsame_(vect, "P") || wantb; wantc = *ncc > 0; klu1 = *kl + *ku + 1; *info = 0; if (! wantq && ! wantpt && ! lsame_(vect, "N")) { *info = -1; } else if (*m < 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ncc < 0) { *info = -4; } else if (*kl < 0) { *info = -5; } else if (*ku < 0) { *info = -6; } else if (*ldab < klu1) { *info = -8; } else if (*ldq < 1 || wantq && *ldq < max(1,*m)) { *info = -12; } else if (*ldpt < 1 || wantpt && *ldpt < max(1,*n)) { *info = -14; } else if (*ldc < 1 || wantc && *ldc < max(1,*m)) { *info = -16; } if (*info != 0) { i__1 = -(*info); xerbla_("SGBBRD", &i__1); return 0; } /* Initialize Q and P' to the unit matrix, if needed */ if (wantq) { slaset_("Full", m, m, &c_b8, &c_b9, &q[q_offset], ldq); } if (wantpt) { slaset_("Full", n, n, &c_b8, &c_b9, &pt[pt_offset], ldpt); } /* Quick return if possible. */ if (*m == 0 || *n == 0) { return 0; } minmn = min(*m,*n); if (*kl + *ku > 1) { /* Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce first to lower bidiagonal form and then transform to upper bidiagonal */ if (*ku > 0) { ml0 = 1; mu0 = 2; } else { ml0 = 2; mu0 = 1; } /* Wherever possible, plane rotations are generated and applied in vector operations of length NR over the index set J1:J2:KLU1. The sines of the plane rotations are stored in WORK(1:max(m,n)) and the cosines in WORK(max(m,n)+1:2*max(m,n)). */ mn = max(*m,*n); /* Computing MIN */ i__1 = *m - 1; klm = min(i__1,*kl); /* Computing MIN */ i__1 = *n - 1; kun = min(i__1,*ku); kb = klm + kun; kb1 = kb + 1; inca = kb1 * *ldab; nr = 0; j1 = klm + 2; j2 = 1 - kun; i__1 = minmn; for (i__ = 1; i__ <= i__1; ++i__) { /* Reduce i-th column and i-th row of matrix to bidiagonal form */ ml = klm + 1; mu = kun + 1; i__2 = kb; for (kk = 1; kk <= i__2; ++kk) { j1 += kb; j2 += kb; /* generate plane rotations to annihilate nonzero elements which have been created below the band */ if (nr > 0) { slargv_(&nr, &ab_ref(klu1, j1 - klm - 1), &inca, &work[j1] , &kb1, &work[mn + j1], &kb1); } /* apply plane rotations from the left */ i__3 = kb; for (l = 1; l <= i__3; ++l) { if (j2 - klm + l - 1 > *n) { nrt = nr - 1; } else { nrt = nr; } if (nrt > 0) { slartv_(&nrt, &ab_ref(klu1 - l, j1 - klm + l - 1), & inca, &ab_ref(klu1 - l + 1, j1 - klm + l - 1), &inca, &work[mn + j1], &work[j1], &kb1); } /* L10: */ } if (ml > ml0) { if (ml <= *m - i__ + 1) { /* generate plane rotation to annihilate a(i+ml-1,i) within the band, and apply rotation from the left */ slartg_(&ab_ref(*ku + ml - 1, i__), &ab_ref(*ku + ml, i__), &work[mn + i__ + ml - 1], &work[i__ + ml - 1], &ra); ab_ref(*ku + ml - 1, i__) = ra; if (i__ < *n) { /* Computing MIN */ i__4 = *ku + ml - 2, i__5 = *n - i__; i__3 = min(i__4,i__5); i__6 = *ldab - 1; i__7 = *ldab - 1; srot_(&i__3, &ab_ref(*ku + ml - 2, i__ + 1), & i__6, &ab_ref(*ku + ml - 1, i__ + 1), & i__7, &work[mn + i__ + ml - 1], &work[i__ + ml - 1]); } } ++nr; j1 -= kb1; } if (wantq) { /* accumulate product of plane rotations in Q */ i__3 = j2; i__4 = kb1; for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { srot_(m, &q_ref(1, j - 1), &c__1, &q_ref(1, j), &c__1, &work[mn + j], &work[j]); /* L20: */ } } if (wantc) { /* apply plane rotations to C */ i__4 = j2; i__3 = kb1; for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { srot_(ncc, &c___ref(j - 1, 1), ldc, &c___ref(j, 1), ldc, &work[mn + j], &work[j]); /* L30: */ } } if (j2 + kun > *n) { /* adjust J2 to keep within the bounds of the matrix */ --nr; j2 -= kb1; } i__3 = j2; i__4 = kb1; for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { /* create nonzero element a(j-1,j+ku) above the band and store it in WORK(n+1:2*n) */ work[j + kun] = work[j] * ab_ref(1, j + kun); ab_ref(1, j + kun) = work[mn + j] * ab_ref(1, j + kun); /* L40: */ } /* generate plane rotations to annihilate nonzero elements which have been generated above the band */ if (nr > 0) { slargv_(&nr, &ab_ref(1, j1 + kun - 1), &inca, &work[j1 + kun], &kb1, &work[mn + j1 + kun], &kb1); } /* apply plane rotations from the right */ i__4 = kb; for (l = 1; l <= i__4; ++l) { if (j2 + l - 1 > *m) { nrt = nr - 1; } else { nrt = nr; } if (nrt > 0) { slartv_(&nrt, &ab_ref(l + 1, j1 + kun - 1), &inca, & ab_ref(l, j1 + kun), &inca, &work[mn + j1 + kun], &work[j1 + kun], &kb1); } /* L50: */ } if (ml == ml0 && mu > mu0) { if (mu <= *n - i__ + 1) { /* generate plane rotation to annihilate a(i,i+mu-1) within the band, and apply rotation from the right */ slartg_(&ab_ref(*ku - mu + 3, i__ + mu - 2), &ab_ref(* ku - mu + 2, i__ + mu - 1), &work[mn + i__ + mu - 1], &work[i__ + mu - 1], &ra); ab_ref(*ku - mu + 3, i__ + mu - 2) = ra; /* Computing MIN */ i__3 = *kl + mu - 2, i__5 = *m - i__; i__4 = min(i__3,i__5); srot_(&i__4, &ab_ref(*ku - mu + 4, i__ + mu - 2), & c__1, &ab_ref(*ku - mu + 3, i__ + mu - 1), & c__1, &work[mn + i__ + mu - 1], &work[i__ + mu - 1]); } ++nr; j1 -= kb1; } if (wantpt) { /* accumulate product of plane rotations in P' */ i__4 = j2; i__3 = kb1; for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { srot_(n, &pt_ref(j + kun - 1, 1), ldpt, &pt_ref(j + kun, 1), ldpt, &work[mn + j + kun], &work[j + kun]); /* L60: */ } } if (j2 + kb > *m) { /* adjust J2 to keep within the bounds of the matrix */ --nr; j2 -= kb1; } i__3 = j2; i__4 = kb1; for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { /* create nonzero element a(j+kl+ku,j+ku-1) below the band and store it in WORK(1:n) */ work[j + kb] = work[j + kun] * ab_ref(klu1, j + kun); ab_ref(klu1, j + kun) = work[mn + j + kun] * ab_ref(klu1, j + kun); /* L70: */ } if (ml > ml0) { --ml; } else { --mu; } /* L80: */ } /* L90: */ } } if (*ku == 0 && *kl > 0) { /* A has been reduced to lower bidiagonal form Transform lower bidiagonal form to upper bidiagonal by applying plane rotations from the left, storing diagonal elements in D and off-diagonal elements in E Computing MIN */ i__2 = *m - 1; i__1 = min(i__2,*n); for (i__ = 1; i__ <= i__1; ++i__) { slartg_(&ab_ref(1, i__), &ab_ref(2, i__), &rc, &rs, &ra); d__[i__] = ra; if (i__ < *n) { e[i__] = rs * ab_ref(1, i__ + 1); ab_ref(1, i__ + 1) = rc * ab_ref(1, i__ + 1); } if (wantq) { srot_(m, &q_ref(1, i__), &c__1, &q_ref(1, i__ + 1), &c__1, & rc, &rs); } if (wantc) { srot_(ncc, &c___ref(i__, 1), ldc, &c___ref(i__ + 1, 1), ldc, & rc, &rs); } /* L100: */ } if (*m <= *n) { d__[*m] = ab_ref(1, *m); } } else if (*ku > 0) { /* A has been reduced to upper bidiagonal form */ if (*m < *n) { /* Annihilate a(m,m+1) by applying plane rotations from the right, storing diagonal elements in D and off-diagonal elements in E */ rb = ab_ref(*ku, *m + 1); for (i__ = *m; i__ >= 1; --i__) { slartg_(&ab_ref(*ku + 1, i__), &rb, &rc, &rs, &ra); d__[i__] = ra; if (i__ > 1) { rb = -rs * ab_ref(*ku, i__); e[i__ - 1] = rc * ab_ref(*ku, i__); } if (wantpt) { srot_(n, &pt_ref(i__, 1), ldpt, &pt_ref(*m + 1, 1), ldpt, &rc, &rs); } /* L110: */ } } else { /* Copy off-diagonal elements to E and diagonal elements to D */ i__1 = minmn - 1; for (i__ = 1; i__ <= i__1; ++i__) { e[i__] = ab_ref(*ku, i__ + 1); /* L120: */ } i__1 = minmn; for (i__ = 1; i__ <= i__1; ++i__) { d__[i__] = ab_ref(*ku + 1, i__); /* L130: */ } } } else { /* A is diagonal. Set elements of E to zero and copy diagonal elements to D. */ i__1 = minmn - 1; for (i__ = 1; i__ <= i__1; ++i__) { e[i__] = 0.f; /* L140: */ } i__1 = minmn; for (i__ = 1; i__ <= i__1; ++i__) { d__[i__] = ab_ref(1, i__); /* L150: */ } } return 0; /* End of SGBBRD */ } /* sgbbrd_ */
/* Subroutine */ int dgbrfs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DGBRFS improves the computed solution to a system of linear equations when the coefficient matrix is banded, and provides error bounds and backward error estimates for the solution. Arguments ========= TRANS (input) CHARACTER*1 Specifies the form of the system of equations: = 'N': A * X = B (No transpose) = 'T': A**T * X = B (Transpose) = 'C': A**H * X = B (Conjugate transpose = Transpose) N (input) INTEGER The order of the matrix A. N >= 0. KL (input) INTEGER The number of subdiagonals within the band of A. KL >= 0. KU (input) INTEGER The number of superdiagonals within the band of A. KU >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0. AB (input) DOUBLE PRECISION array, dimension (LDAB,N) The original band matrix A, stored in rows 1 to KL+KU+1. The j-th column of A is stored in the j-th column of the array AB as follows: AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= KL+KU+1. AFB (input) DOUBLE PRECISION array, dimension (LDAFB,N) Details of the LU factorization of the band matrix A, as computed by DGBTRF. U is stored as an upper triangular band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and the multipliers used during the factorization are stored in rows KL+KU+2 to 2*KL+KU+1. LDAFB (input) INTEGER The leading dimension of the array AFB. LDAFB >= 2*KL*KU+1. IPIV (input) INTEGER array, dimension (N) The pivot indices from DGBTRF; for 1<=i<=N, row i of the matrix was interchanged with row IPIV(i). B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) The right hand side matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) On entry, the solution matrix X, as computed by DGBTRS. On exit, the improved solution matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). FERR (output) DOUBLE PRECISION array, dimension (NRHS) The estimated forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), FERR(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). The estimate is as reliable as the estimate for RCOND, and is almost always a slight overestimate of the true error. BERR (output) DOUBLE PRECISION array, dimension (NRHS) The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). WORK (workspace) DOUBLE PRECISION array, dimension (3*N) IWORK (workspace) INTEGER array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Internal Parameters =================== ITMAX is the maximum number of steps of iterative refinement. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static doublereal c_b15 = -1.; static doublereal c_b17 = 1.; /* System generated locals */ integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; doublereal d__1, d__2, d__3; /* Local variables */ static integer kase; static doublereal safe1, safe2; static integer i__, j, k; static doublereal s; extern /* Subroutine */ int dgbmv_(char *, integer *, integer *, integer * , integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical lsame_(char *, char *); extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static integer count, kk; extern doublereal dlamch_(char *); extern /* Subroutine */ int dlacon_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static doublereal xk; static integer nz; static doublereal safmin; extern /* Subroutine */ int xerbla_(char *, integer *), dgbtrs_( char *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); static logical notran; static char transt[1]; static doublereal lstres, eps; #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1] #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; afb_dim1 = *ldafb; afb_offset = 1 + afb_dim1 * 1; afb -= afb_offset; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; notran = lsame_(trans, "N"); if (! notran && ! lsame_(trans, "T") && ! lsame_( trans, "C")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kl < 0) { *info = -3; } else if (*ku < 0) { *info = -4; } else if (*nrhs < 0) { *info = -5; } else if (*ldab < *kl + *ku + 1) { *info = -7; } else if (*ldafb < (*kl << 1) + *ku + 1) { *info = -9; } else if (*ldb < max(1,*n)) { *info = -12; } else if (*ldx < max(1,*n)) { *info = -14; } if (*info != 0) { i__1 = -(*info); xerbla_("DGBRFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] = 0.; berr[j] = 0.; /* L10: */ } return 0; } if (notran) { *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transt = 'N'; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 Computing MIN */ i__1 = *kl + *ku + 2, i__2 = *n + 1; nz = min(i__1,i__2); eps = dlamch_("Epsilon"); safmin = dlamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { count = 1; lstres = 3.; L20: /* Loop until stopping criterion is satisfied. Compute residual R = B - op(A) * X, where op(A) = A, A**T, or A**H, depending on TRANS. */ dcopy_(n, &b_ref(1, j), &c__1, &work[*n + 1], &c__1); dgbmv_(trans, n, n, kl, ku, &c_b15, &ab[ab_offset], ldab, &x_ref(1, j) , &c__1, &c_b17, &work[*n + 1], &c__1); /* Compute componentwise relative backward error from formula max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) where abs(Z) is the componentwise absolute value of the matrix or vector Z. If the i-th component of the denominator is less than SAFE2, then SAFE1 is added to the i-th components of the numerator and denominator before dividing. */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] = (d__1 = b_ref(i__, j), abs(d__1)); /* L30: */ } /* Compute abs(op(A))*abs(X) + abs(B). */ if (notran) { i__2 = *n; for (k = 1; k <= i__2; ++k) { kk = *ku + 1 - k; xk = (d__1 = x_ref(k, j), abs(d__1)); /* Computing MAX */ i__3 = 1, i__4 = k - *ku; /* Computing MIN */ i__6 = *n, i__7 = k + *kl; i__5 = min(i__6,i__7); for (i__ = max(i__3,i__4); i__ <= i__5; ++i__) { work[i__] += (d__1 = ab_ref(kk + i__, k), abs(d__1)) * xk; /* L40: */ } /* L50: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; kk = *ku + 1 - k; /* Computing MAX */ i__5 = 1, i__3 = k - *ku; /* Computing MIN */ i__6 = *n, i__7 = k + *kl; i__4 = min(i__6,i__7); for (i__ = max(i__5,i__3); i__ <= i__4; ++i__) { s += (d__1 = ab_ref(kk + i__, k), abs(d__1)) * (d__2 = x_ref(i__, j), abs(d__2)); /* L60: */ } work[k] += s; /* L70: */ } } s = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { /* Computing MAX */ d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ i__]; s = max(d__2,d__3); } else { /* Computing MAX */ d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) / (work[i__] + safe1); s = max(d__2,d__3); } /* L80: */ } berr[j] = s; /* Test stopping criterion. Continue iterating if 1) The residual BERR(J) is larger than machine epsilon, and 2) BERR(J) decreased by at least a factor of 2 during the last iteration, and 3) At most ITMAX iterations tried. */ if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { /* Update solution and try again. */ dgbtrs_(trans, n, kl, ku, &c__1, &afb[afb_offset], ldafb, &ipiv[1] , &work[*n + 1], n, info); daxpy_(n, &c_b17, &work[*n + 1], &c__1, &x_ref(1, j), &c__1); lstres = berr[j]; ++count; goto L20; } /* Bound error from formula norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) where norm(Z) is the magnitude of the largest component of Z inv(op(A)) is the inverse of op(A) abs(Z) is the componentwise absolute value of the matrix or vector Z NZ is the maximum number of nonzeros in any row of A, plus 1 EPS is machine epsilon The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) is incremented by SAFE1 if the i-th component of abs(op(A))*abs(X) + abs(B) is less than SAFE2. Use DLACON to estimate the infinity-norm of the matrix inv(op(A)) * diag(W), where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__]; } else { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__] + safe1; } /* L90: */ } kase = 0; L100: dlacon_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & kase); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(op(A)**T). */ dgbtrs_(transt, n, kl, ku, &c__1, &afb[afb_offset], ldafb, & ipiv[1], &work[*n + 1], n, info); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] *= work[i__]; /* L110: */ } } else { /* Multiply by inv(op(A))*diag(W). */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] *= work[i__]; /* L120: */ } dgbtrs_(trans, n, kl, ku, &c__1, &afb[afb_offset], ldafb, & ipiv[1], &work[*n + 1], n, info); } goto L100; } /* Normalize error. */ lstres = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = lstres, d__3 = (d__1 = x_ref(i__, j), abs(d__1)); lstres = max(d__2,d__3); /* L130: */ } if (lstres != 0.) { ferr[j] /= lstres; } /* L140: */ } return 0; /* End of DGBRFS */ } /* dgbrfs_ */
/* Subroutine */ int sgbtrs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, real *ab, integer *ldab, integer *ipiv, real *b, integer *ldb, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= SGBTRS solves a system of linear equations A * X = B or A' * X = B with a general band matrix A using the LU factorization computed by SGBTRF. Arguments ========= TRANS (input) CHARACTER*1 Specifies the form of the system of equations. = 'N': A * X = B (No transpose) = 'T': A'* X = B (Transpose) = 'C': A'* X = B (Conjugate transpose = Transpose) N (input) INTEGER The order of the matrix A. N >= 0. KL (input) INTEGER The number of subdiagonals within the band of A. KL >= 0. KU (input) INTEGER The number of superdiagonals within the band of A. KU >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. AB (input) REAL array, dimension (LDAB,N) Details of the LU factorization of the band matrix A, as computed by SGBTRF. U is stored as an upper triangular band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and the multipliers used during the factorization are stored in rows KL+KU+2 to 2*KL+KU+1. LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= 2*KL+KU+1. IPIV (input) INTEGER array, dimension (N) The pivot indices; for 1 <= i <= N, row i of the matrix was interchanged with row IPIV(i). B (input/output) REAL array, dimension (LDB,NRHS) On entry, the right hand side matrix B. On exit, the solution matrix X. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static real c_b7 = -1.f; static integer c__1 = 1; static real c_b23 = 1.f; /* System generated locals */ integer ab_dim1, ab_offset, b_dim1, b_offset, i__1, i__2, i__3; /* Local variables */ extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, integer *, real *, integer *, real *, integer *); static integer i__, j, l; extern logical lsame_(char *, char *); extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static logical lnoti; extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, integer *), stbsv_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *); static integer kd, lm; extern /* Subroutine */ int xerbla_(char *, integer *); static logical notran; #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; /* Function Body */ *info = 0; notran = lsame_(trans, "N"); if (! notran && ! lsame_(trans, "T") && ! lsame_( trans, "C")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kl < 0) { *info = -3; } else if (*ku < 0) { *info = -4; } else if (*nrhs < 0) { *info = -5; } else if (*ldab < (*kl << 1) + *ku + 1) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -10; } if (*info != 0) { i__1 = -(*info); xerbla_("SGBTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { return 0; } kd = *ku + *kl + 1; lnoti = *kl > 0; if (notran) { /* Solve A*X = B. Solve L*X = B, overwriting B with X. L is represented as a product of permutations and unit lower triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1), where each transformation L(i) is a rank-one modification of the identity matrix. */ if (lnoti) { i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__2 = *kl, i__3 = *n - j; lm = min(i__2,i__3); l = ipiv[j]; if (l != j) { sswap_(nrhs, &b_ref(l, 1), ldb, &b_ref(j, 1), ldb); } sger_(&lm, nrhs, &c_b7, &ab_ref(kd + 1, j), &c__1, &b_ref(j, 1), ldb, &b_ref(j + 1, 1), ldb); /* L10: */ } } i__1 = *nrhs; for (i__ = 1; i__ <= i__1; ++i__) { /* Solve U*X = B, overwriting B with X. */ i__2 = *kl + *ku; stbsv_("Upper", "No transpose", "Non-unit", n, &i__2, &ab[ ab_offset], ldab, &b_ref(1, i__), &c__1); /* L20: */ } } else { /* Solve A'*X = B. */ i__1 = *nrhs; for (i__ = 1; i__ <= i__1; ++i__) { /* Solve U'*X = B, overwriting B with X. */ i__2 = *kl + *ku; stbsv_("Upper", "Transpose", "Non-unit", n, &i__2, &ab[ab_offset], ldab, &b_ref(1, i__), &c__1); /* L30: */ } /* Solve L'*X = B, overwriting B with X. */ if (lnoti) { for (j = *n - 1; j >= 1; --j) { /* Computing MIN */ i__1 = *kl, i__2 = *n - j; lm = min(i__1,i__2); sgemv_("Transpose", &lm, nrhs, &c_b7, &b_ref(j + 1, 1), ldb, & ab_ref(kd + 1, j), &c__1, &c_b23, &b_ref(j, 1), ldb); l = ipiv[j]; if (l != j) { sswap_(nrhs, &b_ref(l, 1), ldb, &b_ref(j, 1), ldb); } /* L40: */ } } } return 0; /* End of SGBTRS */ } /* sgbtrs_ */
/* Subroutine */ int schkbb_(integer *nsizes, integer *mval, integer *nval, integer *nwdths, integer *kk, integer *ntypes, logical *dotype, integer *nrhs, integer *iseed, real *thresh, integer *nounit, real *a, integer *lda, real *ab, integer *ldab, real *bd, real *be, real *q, integer *ldq, real *p, integer *ldp, real *c__, integer *ldc, real * cc, real *work, integer *lwork, real *result, integer *info) { /* Initialized data */ static integer ktype[15] = { 1,2,4,4,4,4,4,6,6,6,6,6,9,9,9 }; static integer kmagn[15] = { 1,1,1,1,1,2,3,1,1,1,2,3,1,2,3 }; static integer kmode[15] = { 0,0,4,3,1,4,4,4,3,1,4,4,0,0,0 }; /* Format strings */ static char fmt_9999[] = "(\002 SCHKBB: \002,a,\002 returned INFO=\002,i" "5,\002.\002,/9x,\002M=\002,i5,\002 N=\002,i5,\002 K=\002,i5,\002" ", JTYPE=\002,i5,\002, ISEED=(\002,3(i5,\002,\002),i5,\002)\002)"; static char fmt_9998[] = "(\002 M =\002,i4,\002 N=\002,i4,\002, K=\002,i" "3,\002, seed=\002,4(i4,\002,\002),\002 type \002,i2,\002, test" "(\002,i2,\002)=\002,g10.3)"; /* System generated locals */ integer a_dim1, a_offset, ab_dim1, ab_offset, c_dim1, c_offset, cc_dim1, cc_offset, p_dim1, p_offset, q_dim1, q_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9; /* Builtin functions */ double sqrt(doublereal); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ static real cond; static integer jcol, kmax, mmax, nmax; static real unfl, ovfl; static integer i__, j, k, m, n; static logical badmm, badnn; static integer imode; extern /* Subroutine */ int sbdt01_(integer *, integer *, integer *, real *, integer *, real *, integer *, real *, real *, real *, integer * , real *, real *), sbdt02_(integer *, integer *, real *, integer * , real *, integer *, real *, integer *, real *, real *); static integer iinfo; static real anorm; static integer mnmin, mnmax, nmats, jsize; extern /* Subroutine */ int sort01_(char *, integer *, integer *, real *, integer *, real *, integer *, real *); static integer nerrs, itype, jtype, ntest; extern /* Subroutine */ int slahd2_(integer *, char *); static logical badnnb; static integer kl, jr, ku; extern /* Subroutine */ int sgbbrd_(char *, integer *, integer *, integer *, integer *, integer *, real *, integer *, real *, real *, real * , integer *, real *, integer *, real *, integer *, real *, integer *); extern doublereal slamch_(char *); static integer idumma[1]; extern /* Subroutine */ int xerbla_(char *, integer *); static integer ioldsd[4]; static real amninv; static integer jwidth; extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, real *, integer *), slatmr_( integer *, integer *, char *, integer *, char *, real *, integer * , real *, real *, char *, char *, real *, integer *, real *, real *, integer *, real *, char *, integer *, integer *, integer *, real *, real *, char *, real *, integer *, integer *, integer *), slatms_(integer * , integer *, char *, integer *, char *, real *, integer *, real *, real *, integer *, integer *, char *, real *, integer *, real *, integer *), slasum_(char *, integer *, integer *, integer *); static real rtunfl, rtovfl, ulpinv; static integer mtypes, ntestt; static real ulp; /* Fortran I/O blocks */ static cilist io___41 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___43 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___45 = { 0, 0, 0, fmt_9998, 0 }; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] /* -- LAPACK test routine (release 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= SCHKBB tests the reduction of a general real rectangular band matrix to bidiagonal form. SGBBRD factors a general band matrix A as Q B P* , where * means transpose, B is upper bidiagonal, and Q and P are orthogonal; SGBBRD can also overwrite a given matrix C with Q* C . For each pair of matrix dimensions (M,N) and each selected matrix type, an M by N matrix A and an M by NRHS matrix C are generated. The problem dimensions are as follows A: M x N Q: M x M P: N x N B: min(M,N) x min(M,N) C: M x NRHS For each generated matrix, 4 tests are performed: (1) | A - Q B PT | / ( |A| max(M,N) ulp ), PT = P' (2) | I - Q' Q | / ( M ulp ) (3) | I - PT PT' | / ( N ulp ) (4) | Y - Q' C | / ( |Y| max(M,NRHS) ulp ), where Y = Q' C. The "types" are specified by a logical array DOTYPE( 1:NTYPES ); if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. Currently, the list of possible types is: The possible matrix types are (1) The zero matrix. (2) The identity matrix. (3) A diagonal matrix with evenly spaced entries 1, ..., ULP and random signs. (ULP = (first number larger than 1) - 1 ) (4) A diagonal matrix with geometrically spaced entries 1, ..., ULP and random signs. (5) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP and random signs. (6) Same as (3), but multiplied by SQRT( overflow threshold ) (7) Same as (3), but multiplied by SQRT( underflow threshold ) (8) A matrix of the form U D V, where U and V are orthogonal and D has evenly spaced entries 1, ..., ULP with random signs on the diagonal. (9) A matrix of the form U D V, where U and V are orthogonal and D has geometrically spaced entries 1, ..., ULP with random signs on the diagonal. (10) A matrix of the form U D V, where U and V are orthogonal and D has "clustered" entries 1, ULP,..., ULP with random signs on the diagonal. (11) Same as (8), but multiplied by SQRT( overflow threshold ) (12) Same as (8), but multiplied by SQRT( underflow threshold ) (13) Rectangular matrix with random entries chosen from (-1,1). (14) Same as (13), but multiplied by SQRT( overflow threshold ) (15) Same as (13), but multiplied by SQRT( underflow threshold ) Arguments ========= NSIZES (input) INTEGER The number of values of M and N contained in the vectors MVAL and NVAL. The matrix sizes are used in pairs (M,N). If NSIZES is zero, SCHKBB does nothing. NSIZES must be at least zero. MVAL (input) INTEGER array, dimension (NSIZES) The values of the matrix row dimension M. NVAL (input) INTEGER array, dimension (NSIZES) The values of the matrix column dimension N. NWDTHS (input) INTEGER The number of bandwidths to use. If it is zero, SCHKBB does nothing. It must be at least zero. KK (input) INTEGER array, dimension (NWDTHS) An array containing the bandwidths to be used for the band matrices. The values must be at least zero. NTYPES (input) INTEGER The number of elements in DOTYPE. If it is zero, SCHKBB does nothing. It must be at least zero. If it is MAXTYP+1 and NSIZES is 1, then an additional type, MAXTYP+1 is defined, which is to use whatever matrix is in A. This is only useful if DOTYPE(1:MAXTYP) is .FALSE. and DOTYPE(MAXTYP+1) is .TRUE. . DOTYPE (input) LOGICAL array, dimension (NTYPES) If DOTYPE(j) is .TRUE., then for each size in NN a matrix of that size and of type j will be generated. If NTYPES is smaller than the maximum number of types defined (PARAMETER MAXTYP), then types NTYPES+1 through MAXTYP will not be generated. If NTYPES is larger than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) will be ignored. NRHS (input) INTEGER The number of columns in the "right-hand side" matrix C. If NRHS = 0, then the operations on the right-hand side will not be tested. NRHS must be at least 0. ISEED (input/output) INTEGER array, dimension (4) On entry ISEED specifies the seed of the random number generator. The array elements should be between 0 and 4095; if not they will be reduced mod 4096. Also, ISEED(4) must be odd. The random number generator uses a linear congruential sequence limited to small integers, and so should produce machine independent random numbers. The values of ISEED are changed on exit, and can be used in the next call to SCHKBB to continue the same random number sequence. THRESH (input) REAL A test will count as "failed" if the "error", computed as described above, exceeds THRESH. Note that the error is scaled to be O(1), so THRESH should be a reasonably small multiple of 1, e.g., 10 or 100. In particular, it should not depend on the precision (single vs. double) or the size of the matrix. It must be at least zero. NOUNIT (input) INTEGER The FORTRAN unit number for printing out error messages (e.g., if a routine returns IINFO not equal to 0.) A (input/workspace) REAL array, dimension (LDA, max(NN)) Used to hold the matrix A. LDA (input) INTEGER The leading dimension of A. It must be at least 1 and at least max( NN ). AB (workspace) REAL array, dimension (LDAB, max(NN)) Used to hold A in band storage format. LDAB (input) INTEGER The leading dimension of AB. It must be at least 2 (not 1!) and at least max( KK )+1. BD (workspace) REAL array, dimension (max(NN)) Used to hold the diagonal of the bidiagonal matrix computed by SGBBRD. BE (workspace) REAL array, dimension (max(NN)) Used to hold the off-diagonal of the bidiagonal matrix computed by SGBBRD. Q (workspace) REAL array, dimension (LDQ, max(NN)) Used to hold the orthogonal matrix Q computed by SGBBRD. LDQ (input) INTEGER The leading dimension of Q. It must be at least 1 and at least max( NN ). P (workspace) REAL array, dimension (LDP, max(NN)) Used to hold the orthogonal matrix P computed by SGBBRD. LDP (input) INTEGER The leading dimension of P. It must be at least 1 and at least max( NN ). C (workspace) REAL array, dimension (LDC, max(NN)) Used to hold the matrix C updated by SGBBRD. LDC (input) INTEGER The leading dimension of U. It must be at least 1 and at least max( NN ). CC (workspace) REAL array, dimension (LDC, max(NN)) Used to hold a copy of the matrix C. WORK (workspace) REAL array, dimension (LWORK) LWORK (input) INTEGER The number of entries in WORK. This must be at least max( LDA+1, max(NN)+1 )*max(NN). RESULT (output) REAL array, dimension (4) The values computed by the tests described above. The values are currently limited to 1/ulp, to avoid overflow. INFO (output) INTEGER If 0, then everything ran OK. ----------------------------------------------------------------------- Some Local Variables and Parameters: ---- ----- --------- --- ---------- ZERO, ONE Real 0 and 1. MAXTYP The number of types defined. NTEST The number of tests performed, or which can be performed so far, for the current matrix. NTESTT The total number of tests performed so far. NMAX Largest value in NN. NMATS The number of matrices generated so far. NERRS The number of tests which have exceeded THRESH so far. COND, IMODE Values to be passed to the matrix generators. ANORM Norm of A; passed to matrix generators. OVFL, UNFL Overflow and underflow thresholds. ULP, ULPINV Finest relative precision and its inverse. RTOVFL, RTUNFL Square roots of the previous 2 values. The following four arrays decode JTYPE: KTYPE(j) The general type (1-10) for type "j". KMODE(j) The MODE value to be passed to the matrix generator for type "j". KMAGN(j) The order of magnitude ( O(1), O(overflow^(1/2) ), O(underflow^(1/2) ) ===================================================================== Parameter adjustments */ --mval; --nval; --kk; --dotype; --iseed; a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; --bd; --be; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; p_dim1 = *ldp; p_offset = 1 + p_dim1 * 1; p -= p_offset; cc_dim1 = *ldc; cc_offset = 1 + cc_dim1 * 1; cc -= cc_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1 * 1; c__ -= c_offset; --work; --result; /* Function Body Check for errors */ ntestt = 0; *info = 0; /* Important constants */ badmm = FALSE_; badnn = FALSE_; mmax = 1; nmax = 1; mnmax = 1; i__1 = *nsizes; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = mmax, i__3 = mval[j]; mmax = max(i__2,i__3); if (mval[j] < 0) { badmm = TRUE_; } /* Computing MAX */ i__2 = nmax, i__3 = nval[j]; nmax = max(i__2,i__3); if (nval[j] < 0) { badnn = TRUE_; } /* Computing MAX Computing MIN */ i__4 = mval[j], i__5 = nval[j]; i__2 = mnmax, i__3 = min(i__4,i__5); mnmax = max(i__2,i__3); /* L10: */ } badnnb = FALSE_; kmax = 0; i__1 = *nwdths; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = kmax, i__3 = kk[j]; kmax = max(i__2,i__3); if (kk[j] < 0) { badnnb = TRUE_; } /* L20: */ } /* Check for errors */ if (*nsizes < 0) { *info = -1; } else if (badmm) { *info = -2; } else if (badnn) { *info = -3; } else if (*nwdths < 0) { *info = -4; } else if (badnnb) { *info = -5; } else if (*ntypes < 0) { *info = -6; } else if (*nrhs < 0) { *info = -8; } else if (*lda < nmax) { *info = -13; } else if (*ldab < (kmax << 1) + 1) { *info = -15; } else if (*ldq < nmax) { *info = -19; } else if (*ldp < nmax) { *info = -21; } else if (*ldc < nmax) { *info = -23; } else if ((max(*lda,nmax) + 1) * nmax > *lwork) { *info = -26; } if (*info != 0) { i__1 = -(*info); xerbla_("SCHKBB", &i__1); return 0; } /* Quick return if possible */ if (*nsizes == 0 || *ntypes == 0 || *nwdths == 0) { return 0; } /* More Important constants */ unfl = slamch_("Safe minimum"); ovfl = 1.f / unfl; ulp = slamch_("Epsilon") * slamch_("Base"); ulpinv = 1.f / ulp; rtunfl = sqrt(unfl); rtovfl = sqrt(ovfl); /* Loop over sizes, widths, types */ nerrs = 0; nmats = 0; i__1 = *nsizes; for (jsize = 1; jsize <= i__1; ++jsize) { m = mval[jsize]; n = nval[jsize]; mnmin = min(m,n); /* Computing MAX */ i__2 = max(1,m); amninv = 1.f / (real) max(i__2,n); i__2 = *nwdths; for (jwidth = 1; jwidth <= i__2; ++jwidth) { k = kk[jwidth]; if (k >= m && k >= n) { goto L150; } /* Computing MAX Computing MIN */ i__5 = m - 1; i__3 = 0, i__4 = min(i__5,k); kl = max(i__3,i__4); /* Computing MAX Computing MIN */ i__5 = n - 1; i__3 = 0, i__4 = min(i__5,k); ku = max(i__3,i__4); if (*nsizes != 1) { mtypes = min(15,*ntypes); } else { mtypes = min(16,*ntypes); } i__3 = mtypes; for (jtype = 1; jtype <= i__3; ++jtype) { if (! dotype[jtype]) { goto L140; } ++nmats; ntest = 0; for (j = 1; j <= 4; ++j) { ioldsd[j - 1] = iseed[j]; /* L30: */ } /* Compute "A". Control parameters: KMAGN KMODE KTYPE =1 O(1) clustered 1 zero =2 large clustered 2 identity =3 small exponential (none) =4 arithmetic diagonal, (w/ singular values) =5 random log (none) =6 random nonhermitian, w/ singular values =7 (none) =8 (none) =9 random nonhermitian */ if (mtypes > 15) { goto L90; } itype = ktype[jtype - 1]; imode = kmode[jtype - 1]; /* Compute norm */ switch (kmagn[jtype - 1]) { case 1: goto L40; case 2: goto L50; case 3: goto L60; } L40: anorm = 1.f; goto L70; L50: anorm = rtovfl * ulp * amninv; goto L70; L60: anorm = rtunfl * max(m,n) * ulpinv; goto L70; L70: slaset_("Full", lda, &n, &c_b18, &c_b18, &a[a_offset], lda); slaset_("Full", ldab, &n, &c_b18, &c_b18, &ab[ab_offset], ldab); iinfo = 0; cond = ulpinv; /* Special Matrices -- Identity & Jordan block Zero */ if (itype == 1) { iinfo = 0; } else if (itype == 2) { /* Identity */ i__4 = n; for (jcol = 1; jcol <= i__4; ++jcol) { a_ref(jcol, jcol) = anorm; /* L80: */ } } else if (itype == 4) { /* Diagonal Matrix, singular values specified */ slatms_(&m, &n, "S", &iseed[1], "N", &work[1], &imode, & cond, &anorm, &c__0, &c__0, "N", &a[a_offset], lda, &work[m + 1], &iinfo); } else if (itype == 6) { /* Nonhermitian, singular values specified */ slatms_(&m, &n, "S", &iseed[1], "N", &work[1], &imode, & cond, &anorm, &kl, &ku, "N", &a[a_offset], lda, & work[m + 1], &iinfo); } else if (itype == 9) { /* Nonhermitian, random entries */ slatmr_(&m, &n, "S", &iseed[1], "N", &work[1], &c__6, & c_b35, &c_b35, "T", "N", &work[n + 1], &c__1, & c_b35, &work[(n << 1) + 1], &c__1, &c_b35, "N", idumma, &kl, &ku, &c_b18, &anorm, "N", &a[ a_offset], lda, idumma, &iinfo); } else { iinfo = 1; } /* Generate Right-Hand Side */ slatmr_(&m, nrhs, "S", &iseed[1], "N", &work[1], &c__6, & c_b35, &c_b35, "T", "N", &work[m + 1], &c__1, &c_b35, &work[(m << 1) + 1], &c__1, &c_b35, "N", idumma, &m, nrhs, &c_b18, &c_b35, "NO", &c__[c_offset], ldc, idumma, &iinfo); if (iinfo != 0) { io___41.ciunit = *nounit; s_wsfe(&io___41); do_fio(&c__1, "Generator", (ftnlen)9); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)) ; e_wsfe(); *info = abs(iinfo); return 0; } L90: /* Copy A to band storage. */ i__4 = n; for (j = 1; j <= i__4; ++j) { /* Computing MAX */ i__5 = 1, i__6 = j - ku; /* Computing MIN */ i__8 = m, i__9 = j + kl; i__7 = min(i__8,i__9); for (i__ = max(i__5,i__6); i__ <= i__7; ++i__) { ab_ref(ku + 1 + i__ - j, j) = a_ref(i__, j); /* L100: */ } /* L110: */ } /* Copy C */ slacpy_("Full", &m, nrhs, &c__[c_offset], ldc, &cc[cc_offset], ldc); /* Call SGBBRD to compute B, Q and P, and to update C. */ sgbbrd_("B", &m, &n, nrhs, &kl, &ku, &ab[ab_offset], ldab, & bd[1], &be[1], &q[q_offset], ldq, &p[p_offset], ldp, & cc[cc_offset], ldc, &work[1], &iinfo); if (iinfo != 0) { io___43.ciunit = *nounit; s_wsfe(&io___43); do_fio(&c__1, "SGBBRD", (ftnlen)6); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)) ; e_wsfe(); *info = abs(iinfo); if (iinfo < 0) { return 0; } else { result[1] = ulpinv; goto L120; } } /* Test 1: Check the decomposition A := Q * B * P' 2: Check the orthogonality of Q 3: Check the orthogonality of P 4: Check the computation of Q' * C */ sbdt01_(&m, &n, &c_n1, &a[a_offset], lda, &q[q_offset], ldq, & bd[1], &be[1], &p[p_offset], ldp, &work[1], &result[1] ); sort01_("Columns", &m, &m, &q[q_offset], ldq, &work[1], lwork, &result[2]); sort01_("Rows", &n, &n, &p[p_offset], ldp, &work[1], lwork, & result[3]); sbdt02_(&m, nrhs, &c__[c_offset], ldc, &cc[cc_offset], ldc, & q[q_offset], ldq, &work[1], &result[4]); /* End of Loop -- Check for RESULT(j) > THRESH */ ntest = 4; L120: ntestt += ntest; /* Print out tests which fail. */ i__4 = ntest; for (jr = 1; jr <= i__4; ++jr) { if (result[jr] >= *thresh) { if (nerrs == 0) { slahd2_(nounit, "SBB"); } ++nerrs; io___45.ciunit = *nounit; s_wsfe(&io___45); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&jr, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&result[jr], (ftnlen)sizeof( real)); e_wsfe(); } /* L130: */ } L140: ; } L150: ; } /* L160: */ } /* Summary */ slasum_("SBB", nounit, &nerrs, &ntestt); return 0; /* End of SCHKBB */ } /* schkbb_ */
/* Subroutine */ int zpbtf2_(char *uplo, integer *n, integer *kd, doublecomplex *ab, integer *ldab, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= ZPBTF2 computes the Cholesky factorization of a complex Hermitian positive definite band matrix A. The factorization has the form A = U' * U , if UPLO = 'U', or A = L * L', if UPLO = 'L', where U is an upper triangular matrix, U' is the conjugate transpose of U, and L is lower triangular. This is the unblocked version of the algorithm, calling Level 2 BLAS. Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the upper or lower triangular part of the Hermitian matrix A is stored: = 'U': Upper triangular = 'L': Lower triangular N (input) INTEGER The order of the matrix A. N >= 0. KD (input) INTEGER The number of super-diagonals of the matrix A if UPLO = 'U', or the number of sub-diagonals if UPLO = 'L'. KD >= 0. AB (input/output) COMPLEX*16 array, dimension (LDAB,N) On entry, the upper or lower triangle of the Hermitian band matrix A, stored in the first KD+1 rows of the array. The j-th column of A is stored in the j-th column of the array AB as follows: if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). On exit, if INFO = 0, the triangular factor U or L from the Cholesky factorization A = U'*U or A = L*L' of the band matrix A, in the same storage format as A. LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= KD+1. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -k, the k-th argument had an illegal value > 0: if INFO = k, the leading minor of order k is not positive definite, and the factorization could not be completed. Further Details =============== The band storage scheme is illustrated by the following example, when N = 6, KD = 2, and UPLO = 'U': On entry: On exit: * * a13 a24 a35 a46 * * u13 u24 u35 u46 * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 Similarly, if UPLO = 'L' the format of A is as follows: On entry: On exit: a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * a31 a42 a53 a64 * * l31 l42 l53 l64 * * Array elements marked * are not used by the routine. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static doublereal c_b8 = -1.; static integer c__1 = 1; /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ extern /* Subroutine */ int zher_(char *, integer *, doublereal *, doublecomplex *, integer *, doublecomplex *, integer *); static integer j; extern logical lsame_(char *, char *); static logical upper; static integer kn; extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_( integer *, doublereal *, doublecomplex *, integer *), zlacgv_( integer *, doublecomplex *, integer *); static doublereal ajj; static integer kld; #define ab_subscr(a_1,a_2) (a_2)*ab_dim1 + a_1 #define ab_ref(a_1,a_2) ab[ab_subscr(a_1,a_2)] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kd < 0) { *info = -3; } else if (*ldab < *kd + 1) { *info = -5; } if (*info != 0) { i__1 = -(*info); xerbla_("ZPBTF2", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Computing MAX */ i__1 = 1, i__2 = *ldab - 1; kld = max(i__1,i__2); if (upper) { /* Compute the Cholesky factorization A = U'*U. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Compute U(J,J) and test for non-positive-definiteness. */ i__2 = ab_subscr(*kd + 1, j); ajj = ab[i__2].r; if (ajj <= 0.) { i__2 = ab_subscr(*kd + 1, j); ab[i__2].r = ajj, ab[i__2].i = 0.; goto L30; } ajj = sqrt(ajj); i__2 = ab_subscr(*kd + 1, j); ab[i__2].r = ajj, ab[i__2].i = 0.; /* Compute elements J+1:J+KN of row J and update the trailing submatrix within the band. Computing MIN */ i__2 = *kd, i__3 = *n - j; kn = min(i__2,i__3); if (kn > 0) { d__1 = 1. / ajj; zdscal_(&kn, &d__1, &ab_ref(*kd, j + 1), &kld); zlacgv_(&kn, &ab_ref(*kd, j + 1), &kld); zher_("Upper", &kn, &c_b8, &ab_ref(*kd, j + 1), &kld, &ab_ref( *kd + 1, j + 1), &kld); zlacgv_(&kn, &ab_ref(*kd, j + 1), &kld); } /* L10: */ } } else { /* Compute the Cholesky factorization A = L*L'. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Compute L(J,J) and test for non-positive-definiteness. */ i__2 = ab_subscr(1, j); ajj = ab[i__2].r; if (ajj <= 0.) { i__2 = ab_subscr(1, j); ab[i__2].r = ajj, ab[i__2].i = 0.; goto L30; } ajj = sqrt(ajj); i__2 = ab_subscr(1, j); ab[i__2].r = ajj, ab[i__2].i = 0.; /* Compute elements J+1:J+KN of column J and update the trailing submatrix within the band. Computing MIN */ i__2 = *kd, i__3 = *n - j; kn = min(i__2,i__3); if (kn > 0) { d__1 = 1. / ajj; zdscal_(&kn, &d__1, &ab_ref(2, j), &c__1); zher_("Lower", &kn, &c_b8, &ab_ref(2, j), &c__1, &ab_ref(1, j + 1), &kld); } /* L20: */ } } return 0; L30: *info = j; return 0; /* End of ZPBTF2 */ } /* zpbtf2_ */
/* Subroutine */ int dpbequ_(char *uplo, integer *n, integer *kd, doublereal * ab, integer *ldab, doublereal *s, doublereal *scond, doublereal *amax, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= DPBEQU computes row and column scalings intended to equilibrate a symmetric positive definite band matrix A and reduce its condition number (with respect to the two-norm). S contains the scale factors, S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This choice of S puts the condition number of B within a factor N of the smallest possible condition number over all possible diagonal scalings. Arguments ========= UPLO (input) CHARACTER*1 = 'U': Upper triangular of A is stored; = 'L': Lower triangular of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. KD (input) INTEGER The number of superdiagonals of the matrix A if UPLO = 'U', or the number of subdiagonals if UPLO = 'L'. KD >= 0. AB (input) DOUBLE PRECISION array, dimension (LDAB,N) The upper or lower triangle of the symmetric band matrix A, stored in the first KD+1 rows of the array. The j-th column of A is stored in the j-th column of the array AB as follows: if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). LDAB (input) INTEGER The leading dimension of the array A. LDAB >= KD+1. S (output) DOUBLE PRECISION array, dimension (N) If INFO = 0, S contains the scale factors for A. SCOND (output) DOUBLE PRECISION If INFO = 0, S contains the ratio of the smallest S(i) to the largest S(i). If SCOND >= 0.1 and AMAX is neither too large nor too small, it is not worth scaling by S. AMAX (output) DOUBLE PRECISION Absolute value of largest matrix element. If AMAX is very close to overflow or very close to underflow, the matrix should be scaled. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = i, the i-th diagonal element is nonpositive. ===================================================================== Test the input parameters. Parameter adjustments */ /* System generated locals */ integer ab_dim1, ab_offset, i__1; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static doublereal smin; static integer i__, j; extern logical lsame_(char *, char *); static logical upper; extern /* Subroutine */ int xerbla_(char *, integer *); #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; --s; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kd < 0) { *info = -3; } else if (*ldab < *kd + 1) { *info = -5; } if (*info != 0) { i__1 = -(*info); xerbla_("DPBEQU", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { *scond = 1.; *amax = 0.; return 0; } if (upper) { j = *kd + 1; } else { j = 1; } /* Initialize SMIN and AMAX. */ s[1] = ab_ref(j, 1); smin = s[1]; *amax = s[1]; /* Find the minimum and maximum diagonal elements. */ i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { s[i__] = ab_ref(j, i__); /* Computing MIN */ d__1 = smin, d__2 = s[i__]; smin = min(d__1,d__2); /* Computing MAX */ d__1 = *amax, d__2 = s[i__]; *amax = max(d__1,d__2); /* L10: */ } if (smin <= 0.) { /* Find the first non-positive diagonal element and return. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (s[i__] <= 0.) { *info = i__; return 0; } /* L20: */ } } else { /* Set the scale factors to the reciprocals of the diagonal elements. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { s[i__] = 1. / sqrt(s[i__]); /* L30: */ } /* Compute SCOND = min(S(I)) / max(S(I)) */ *scond = sqrt(smin) / sqrt(*amax); } return 0; /* End of DPBEQU */ } /* dpbequ_ */
/* Subroutine */ int slaebz_(integer *ijob, integer *nitmax, integer *n, integer *mmax, integer *minp, integer *nbmin, real *abstol, real * reltol, real *pivmin, real *d__, real *e, real *e2, integer *nval, real *ab, real *c__, integer *mout, integer *nab, real *work, integer *iwork, integer *info) { /* System generated locals */ integer nab_dim1, nab_offset, ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6; real r__1, r__2, r__3, r__4; /* Local variables */ static integer itmp1, itmp2, j, kfnew, klnew, kf, ji, kl, jp, jit; static real tmp1, tmp2; #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] #define nab_ref(a_1,a_2) nab[(a_2)*nab_dim1 + a_1] /* -- LAPACK auxiliary routine (instrum. to count ops. version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Common block to return operation count and iteration count ITCNT and OPS are only incremented (not initialized) ----------------------------------------------------------------------- Purpose ======= SLAEBZ contains the iteration loops which compute and use the function N(w), which is the count of eigenvalues of a symmetric tridiagonal matrix T less than or equal to its argument w. It performs a choice of two types of loops: IJOB=1, followed by IJOB=2: It takes as input a list of intervals and returns a list of sufficiently small intervals whose union contains the same eigenvalues as the union of the original intervals. The input intervals are (AB(j,1),AB(j,2)], j=1,...,MINP. The output interval (AB(j,1),AB(j,2)] will contain eigenvalues NAB(j,1)+1,...,NAB(j,2), where 1 <= j <= MOUT. IJOB=3: It performs a binary search in each input interval (AB(j,1),AB(j,2)] for a point w(j) such that N(w(j))=NVAL(j), and uses C(j) as the starting point of the search. If such a w(j) is found, then on output AB(j,1)=AB(j,2)=w. If no such w(j) is found, then on output (AB(j,1),AB(j,2)] will be a small interval containing the point where N(w) jumps through NVAL(j), unless that point lies outside the initial interval. Note that the intervals are in all cases half-open intervals, i.e., of the form (a,b] , which includes b but not a . To avoid underflow, the matrix should be scaled so that its largest element is no greater than overflow**(1/2) * underflow**(1/4) in absolute value. To assure the most accurate computation of small eigenvalues, the matrix should be scaled to be not much smaller than that, either. See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal Matrix", Report CS41, Computer Science Dept., Stanford University, July 21, 1966 Note: the arguments are, in general, *not* checked for unreasonable values. Arguments ========= IJOB (input) INTEGER Specifies what is to be done: = 1: Compute NAB for the initial intervals. = 2: Perform bisection iteration to find eigenvalues of T. = 3: Perform bisection iteration to invert N(w), i.e., to find a point which has a specified number of eigenvalues of T to its left. Other values will cause SLAEBZ to return with INFO=-1. NITMAX (input) INTEGER The maximum number of "levels" of bisection to be performed, i.e., an interval of width W will not be made smaller than 2^(-NITMAX) * W. If not all intervals have converged after NITMAX iterations, then INFO is set to the number of non-converged intervals. N (input) INTEGER The dimension n of the tridiagonal matrix T. It must be at least 1. MMAX (input) INTEGER The maximum number of intervals. If more than MMAX intervals are generated, then SLAEBZ will quit with INFO=MMAX+1. MINP (input) INTEGER The initial number of intervals. It may not be greater than MMAX. NBMIN (input) INTEGER The smallest number of intervals that should be processed using a vector loop. If zero, then only the scalar loop will be used. ABSTOL (input) REAL The minimum (absolute) width of an interval. When an interval is narrower than ABSTOL, or than RELTOL times the larger (in magnitude) endpoint, then it is considered to be sufficiently small, i.e., converged. This must be at least zero. RELTOL (input) REAL The minimum relative width of an interval. When an interval is narrower than ABSTOL, or than RELTOL times the larger (in magnitude) endpoint, then it is considered to be sufficiently small, i.e., converged. Note: this should always be at least radix*machine epsilon. PIVMIN (input) REAL The minimum absolute value of a "pivot" in the Sturm sequence loop. This *must* be at least max |e(j)**2| * safe_min and at least safe_min, where safe_min is at least the smallest number that can divide one without overflow. D (input) REAL array, dimension (N) The diagonal elements of the tridiagonal matrix T. E (input) REAL array, dimension (N) The offdiagonal elements of the tridiagonal matrix T in positions 1 through N-1. E(N) is arbitrary. E2 (input) REAL array, dimension (N) The squares of the offdiagonal elements of the tridiagonal matrix T. E2(N) is ignored. NVAL (input/output) INTEGER array, dimension (MINP) If IJOB=1 or 2, not referenced. If IJOB=3, the desired values of N(w). The elements of NVAL will be reordered to correspond with the intervals in AB. Thus, NVAL(j) on output will not, in general be the same as NVAL(j) on input, but it will correspond with the interval (AB(j,1),AB(j,2)] on output. AB (input/output) REAL array, dimension (MMAX,2) The endpoints of the intervals. AB(j,1) is a(j), the left endpoint of the j-th interval, and AB(j,2) is b(j), the right endpoint of the j-th interval. The input intervals will, in general, be modified, split, and reordered by the calculation. C (input/output) REAL array, dimension (MMAX) If IJOB=1, ignored. If IJOB=2, workspace. If IJOB=3, then on input C(j) should be initialized to the first search point in the binary search. MOUT (output) INTEGER If IJOB=1, the number of eigenvalues in the intervals. If IJOB=2 or 3, the number of intervals output. If IJOB=3, MOUT will equal MINP. NAB (input/output) INTEGER array, dimension (MMAX,2) If IJOB=1, then on output NAB(i,j) will be set to N(AB(i,j)). If IJOB=2, then on input, NAB(i,j) should be set. It must satisfy the condition: N(AB(i,1)) <= NAB(i,1) <= NAB(i,2) <= N(AB(i,2)), which means that in interval i only eigenvalues NAB(i,1)+1,...,NAB(i,2) will be considered. Usually, NAB(i,j)=N(AB(i,j)), from a previous call to SLAEBZ with IJOB=1. On output, NAB(i,j) will contain max(na(k),min(nb(k),N(AB(i,j)))), where k is the index of the input interval that the output interval (AB(j,1),AB(j,2)] came from, and na(k) and nb(k) are the the input values of NAB(k,1) and NAB(k,2). If IJOB=3, then on output, NAB(i,j) contains N(AB(i,j)), unless N(w) > NVAL(i) for all search points w , in which case NAB(i,1) will not be modified, i.e., the output value will be the same as the input value (modulo reorderings -- see NVAL and AB), or unless N(w) < NVAL(i) for all search points w , in which case NAB(i,2) will not be modified. Normally, NAB should be set to some distinctive value(s) before SLAEBZ is called. WORK (workspace) REAL array, dimension (MMAX) Workspace. IWORK (workspace) INTEGER array, dimension (MMAX) Workspace. INFO (output) INTEGER = 0: All intervals converged. = 1--MMAX: The last INFO intervals did not converge. = MMAX+1: More than MMAX intervals were generated. Further Details =============== This routine is intended to be called only by other LAPACK routines, thus the interface is less user-friendly. It is intended for two purposes: (a) finding eigenvalues. In this case, SLAEBZ should have one or more initial intervals set up in AB, and SLAEBZ should be called with IJOB=1. This sets up NAB, and also counts the eigenvalues. Intervals with no eigenvalues would usually be thrown out at this point. Also, if not all the eigenvalues in an interval i are desired, NAB(i,1) can be increased or NAB(i,2) decreased. For example, set NAB(i,1)=NAB(i,2)-1 to get the largest eigenvalue. SLAEBZ is then called with IJOB=2 and MMAX no smaller than the value of MOUT returned by the call with IJOB=1. After this (IJOB=2) call, eigenvalues NAB(i,1)+1 through NAB(i,2) are approximately AB(i,1) (or AB(i,2)) to the tolerance specified by ABSTOL and RELTOL. (b) finding an interval (a',b'] containing eigenvalues w(f),...,w(l). In this case, start with a Gershgorin interval (a,b). Set up AB to contain 2 search intervals, both initially (a,b). One NVAL element should contain f-1 and the other should contain l , while C should contain a and b, resp. NAB(i,1) should be -1 and NAB(i,2) should be N+1, to flag an error if the desired interval does not lie in (a,b). SLAEBZ is then called with IJOB=3. On exit, if w(f-1) < w(f), then one of the intervals -- j -- will have AB(j,1)=AB(j,2) and NAB(j,1)=NAB(j,2)=f-1, while if, to the specified tolerance, w(f-k)=...=w(f+r), k > 0 and r >= 0, then the interval will have N(AB(j,1))=NAB(j,1)=f-k and N(AB(j,2))=NAB(j,2)=f+r. The cases w(l) < w(l+1) and w(l-r)=...=w(l+k) are handled similarly. ===================================================================== Check for Errors Parameter adjustments */ nab_dim1 = *mmax; nab_offset = 1 + nab_dim1 * 1; nab -= nab_offset; ab_dim1 = *mmax; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; --d__; --e; --e2; --nval; --c__; --work; --iwork; /* Function Body */ *info = 0; if (*ijob < 1 || *ijob > 3) { *info = -1; return 0; } /* Initialize NAB */ if (*ijob == 1) { /* Compute the number of eigenvalues in the initial intervals. */ *mout = 0; /* DIR$ NOVECTOR */ i__1 = *minp; for (ji = 1; ji <= i__1; ++ji) { for (jp = 1; jp <= 2; ++jp) { tmp1 = d__[1] - ab_ref(ji, jp); if (dabs(tmp1) < *pivmin) { tmp1 = -(*pivmin); } nab_ref(ji, jp) = 0; if (tmp1 <= 0.f) { nab_ref(ji, jp) = 1; } i__2 = *n; for (j = 2; j <= i__2; ++j) { tmp1 = d__[j] - e2[j - 1] / tmp1 - ab_ref(ji, jp); if (dabs(tmp1) < *pivmin) { tmp1 = -(*pivmin); } if (tmp1 <= 0.f) { nab_ref(ji, jp) = nab_ref(ji, jp) + 1; } /* L10: */ } /* L20: */ } *mout = *mout + nab_ref(ji, 2) - nab_ref(ji, 1); /* L30: */ } /* Increment opcount for determining the number of eigenvalues in the initial intervals. */ latime_1.ops += (*minp << 1) * (*n - 1) * 3; return 0; } /* Initialize for loop KF and KL have the following meaning: Intervals 1,...,KF-1 have converged. Intervals KF,...,KL still need to be refined. */ kf = 1; kl = *minp; /* If IJOB=2, initialize C. If IJOB=3, use the user-supplied starting point. */ if (*ijob == 2) { i__1 = *minp; for (ji = 1; ji <= i__1; ++ji) { c__[ji] = (ab_ref(ji, 1) + ab_ref(ji, 2)) * .5f; /* L40: */ } /* Increment opcount for initializing C. */ latime_1.ops += *minp << 1; } /* Iteration loop */ i__1 = *nitmax; for (jit = 1; jit <= i__1; ++jit) { /* Loop over intervals */ if (kl - kf + 1 >= *nbmin && *nbmin > 0) { /* Begin of Parallel Version of the loop */ i__2 = kl; for (ji = kf; ji <= i__2; ++ji) { /* Compute N(c), the number of eigenvalues less than c */ work[ji] = d__[1] - c__[ji]; iwork[ji] = 0; if (work[ji] <= *pivmin) { iwork[ji] = 1; /* Computing MIN */ r__1 = work[ji], r__2 = -(*pivmin); work[ji] = dmin(r__1,r__2); } i__3 = *n; for (j = 2; j <= i__3; ++j) { work[ji] = d__[j] - e2[j - 1] / work[ji] - c__[ji]; if (work[ji] <= *pivmin) { ++iwork[ji]; /* Computing MIN */ r__1 = work[ji], r__2 = -(*pivmin); work[ji] = dmin(r__1,r__2); } /* L50: */ } /* L60: */ } /* Increment iteration counter. */ latime_1.itcnt = latime_1.itcnt + kl - kf + 1; /* Increment opcount for evaluating Sturm sequences on each interval. */ latime_1.ops += (kl - kf + 1) * (*n - 1) * 3; if (*ijob <= 2) { /* IJOB=2: Choose all intervals containing eigenvalues. */ klnew = kl; i__2 = kl; for (ji = kf; ji <= i__2; ++ji) { /* Insure that N(w) is monotone Computing MIN Computing MAX */ i__5 = nab_ref(ji, 1), i__6 = iwork[ji]; i__3 = nab_ref(ji, 2), i__4 = max(i__5,i__6); iwork[ji] = min(i__3,i__4); /* Update the Queue -- add intervals if both halves contain eigenvalues. */ if (iwork[ji] == nab_ref(ji, 2)) { /* No eigenvalue in the upper interval: just use the lower interval. */ ab_ref(ji, 2) = c__[ji]; } else if (iwork[ji] == nab_ref(ji, 1)) { /* No eigenvalue in the lower interval: just use the upper interval. */ ab_ref(ji, 1) = c__[ji]; } else { ++klnew; if (klnew <= *mmax) { /* Eigenvalue in both intervals -- add upper to queue. */ ab_ref(klnew, 2) = ab_ref(ji, 2); nab_ref(klnew, 2) = nab_ref(ji, 2); ab_ref(klnew, 1) = c__[ji]; nab_ref(klnew, 1) = iwork[ji]; ab_ref(ji, 2) = c__[ji]; nab_ref(ji, 2) = iwork[ji]; } else { *info = *mmax + 1; } } /* L70: */ } if (*info != 0) { return 0; } kl = klnew; } else { /* IJOB=3: Binary search. Keep only the interval containing w s.t. N(w) = NVAL */ i__2 = kl; for (ji = kf; ji <= i__2; ++ji) { if (iwork[ji] <= nval[ji]) { ab_ref(ji, 1) = c__[ji]; nab_ref(ji, 1) = iwork[ji]; } if (iwork[ji] >= nval[ji]) { ab_ref(ji, 2) = c__[ji]; nab_ref(ji, 2) = iwork[ji]; } /* L80: */ } } } else { /* End of Parallel Version of the loop Begin of Serial Version of the loop */ klnew = kl; i__2 = kl; for (ji = kf; ji <= i__2; ++ji) { /* Compute N(w), the number of eigenvalues less than w */ tmp1 = c__[ji]; tmp2 = d__[1] - tmp1; itmp1 = 0; if (tmp2 <= *pivmin) { itmp1 = 1; /* Computing MIN */ r__1 = tmp2, r__2 = -(*pivmin); tmp2 = dmin(r__1,r__2); } /* A series of compiler directives to defeat vectorization for the next loop $PL$ CMCHAR=' ' DIR$ NEXTSCALAR $DIR SCALAR DIR$ NEXT SCALAR VD$L NOVECTOR DEC$ NOVECTOR VD$ NOVECTOR VDIR NOVECTOR VOCL LOOP,SCALAR IBM PREFER SCALAR $PL$ CMCHAR='*' */ i__3 = *n; for (j = 2; j <= i__3; ++j) { tmp2 = d__[j] - e2[j - 1] / tmp2 - tmp1; if (tmp2 <= *pivmin) { ++itmp1; /* Computing MIN */ r__1 = tmp2, r__2 = -(*pivmin); tmp2 = dmin(r__1,r__2); } /* L90: */ } if (*ijob <= 2) { /* IJOB=2: Choose all intervals containing eigenvalues. Insure that N(w) is monotone Computing MIN Computing MAX */ i__5 = nab_ref(ji, 1); i__3 = nab_ref(ji, 2), i__4 = max(i__5,itmp1); itmp1 = min(i__3,i__4); /* Update the Queue -- add intervals if both halves contain eigenvalues. */ if (itmp1 == nab_ref(ji, 2)) { /* No eigenvalue in the upper interval: just use the lower interval. */ ab_ref(ji, 2) = tmp1; } else if (itmp1 == nab_ref(ji, 1)) { /* No eigenvalue in the lower interval: just use the upper interval. */ ab_ref(ji, 1) = tmp1; } else if (klnew < *mmax) { /* Eigenvalue in both intervals -- add upper to queue. */ ++klnew; ab_ref(klnew, 2) = ab_ref(ji, 2); nab_ref(klnew, 2) = nab_ref(ji, 2); ab_ref(klnew, 1) = tmp1; nab_ref(klnew, 1) = itmp1; ab_ref(ji, 2) = tmp1; nab_ref(ji, 2) = itmp1; } else { *info = *mmax + 1; return 0; } } else { /* IJOB=3: Binary search. Keep only the interval containing w s.t. N(w) = NVAL */ if (itmp1 <= nval[ji]) { ab_ref(ji, 1) = tmp1; nab_ref(ji, 1) = itmp1; } if (itmp1 >= nval[ji]) { ab_ref(ji, 2) = tmp1; nab_ref(ji, 2) = itmp1; } } /* L100: */ } /* Increment iteration counter. */ latime_1.itcnt = latime_1.itcnt + kl - kf + 1; /* Increment opcount for evaluating Sturm sequences on each interval. */ latime_1.ops += (kl - kf + 1) * (*n - 1) * 3; kl = klnew; /* End of Serial Version of the loop */ } /* Check for convergence */ kfnew = kf; i__2 = kl; for (ji = kf; ji <= i__2; ++ji) { tmp1 = (r__1 = ab_ref(ji, 2) - ab_ref(ji, 1), dabs(r__1)); /* Computing MAX */ r__3 = (r__1 = ab_ref(ji, 2), dabs(r__1)), r__4 = (r__2 = ab_ref( ji, 1), dabs(r__2)); tmp2 = dmax(r__3,r__4); /* Computing MAX */ r__1 = max(*abstol,*pivmin), r__2 = *reltol * tmp2; if (tmp1 < dmax(r__1,r__2) || nab_ref(ji, 1) >= nab_ref(ji, 2)) { /* Converged -- Swap with position KFNEW, then increment KFNEW */ if (ji > kfnew) { tmp1 = ab_ref(ji, 1); tmp2 = ab_ref(ji, 2); itmp1 = nab_ref(ji, 1); itmp2 = nab_ref(ji, 2); ab_ref(ji, 1) = ab_ref(kfnew, 1); ab_ref(ji, 2) = ab_ref(kfnew, 2); nab_ref(ji, 1) = nab_ref(kfnew, 1); nab_ref(ji, 2) = nab_ref(kfnew, 2); ab_ref(kfnew, 1) = tmp1; ab_ref(kfnew, 2) = tmp2; nab_ref(kfnew, 1) = itmp1; nab_ref(kfnew, 2) = itmp2; if (*ijob == 3) { itmp1 = nval[ji]; nval[ji] = nval[kfnew]; nval[kfnew] = itmp1; } } ++kfnew; } /* L110: */ } kf = kfnew; /* Choose Midpoints */ i__2 = kl; for (ji = kf; ji <= i__2; ++ji) { c__[ji] = (ab_ref(ji, 1) + ab_ref(ji, 2)) * .5f; /* L120: */ } /* Increment opcount for convergence check and choosing midpoints. */ latime_1.ops += kl - kf + 1 << 2; /* If no more intervals to refine, quit. */ if (kf > kl) { goto L140; } /* L130: */ } /* Converged */ L140: /* Computing MAX */ i__1 = kl + 1 - kf; *info = max(i__1,0); *mout = kl; return 0; /* End of SLAEBZ */ } /* slaebz_ */
doublereal zlangb_(char *norm, integer *n, integer *kl, integer *ku, doublecomplex *ab, integer *ldab, doublereal *work) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 Purpose ======= ZLANGB returns the value of the one norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of an n by n band matrix A, with kl sub-diagonals and ku super-diagonals. Description =========== ZLANGB returns the value ZLANGB = ( max(abs(A(i,j))), NORM = 'M' or 'm' ( ( norm1(A), NORM = '1', 'O' or 'o' ( ( normI(A), NORM = 'I' or 'i' ( ( normF(A), NORM = 'F', 'f', 'E' or 'e' where norm1 denotes the one norm of a matrix (maximum column sum), normI denotes the infinity norm of a matrix (maximum row sum) and normF denotes the Frobenius norm of a matrix (square root of sum of squares). Note that max(abs(A(i,j))) is not a matrix norm. Arguments ========= NORM (input) CHARACTER*1 Specifies the value to be returned in ZLANGB as described above. N (input) INTEGER The order of the matrix A. N >= 0. When N = 0, ZLANGB is set to zero. KL (input) INTEGER The number of sub-diagonals of the matrix A. KL >= 0. KU (input) INTEGER The number of super-diagonals of the matrix A. KU >= 0. AB (input) COMPLEX*16 array, dimension (LDAB,N) The band matrix A, stored in rows 1 to KL+KU+1. The j-th column of A is stored in the j-th column of the array AB as follows: AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= KL+KU+1. WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), where LWORK >= N when NORM = 'I'; otherwise, WORK is not referenced. ===================================================================== Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6; doublereal ret_val, d__1, d__2; /* Builtin functions */ double z_abs(doublecomplex *), sqrt(doublereal); /* Local variables */ static integer i__, j, k, l; static doublereal scale; extern logical lsame_(char *, char *); static doublereal value; extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *, doublereal *, doublereal *); static doublereal sum; #define ab_subscr(a_1,a_2) (a_2)*ab_dim1 + a_1 #define ab_ref(a_1,a_2) ab[ab_subscr(a_1,a_2)] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; --work; /* Function Body */ if (*n == 0) { value = 0.; } else if (lsame_(norm, "M")) { /* Find max(abs(A(i,j))). */ value = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = *ku + 2 - j; /* Computing MIN */ i__4 = *n + *ku + 1 - j, i__5 = *kl + *ku + 1; i__3 = min(i__4,i__5); for (i__ = max(i__2,1); i__ <= i__3; ++i__) { /* Computing MAX */ d__1 = value, d__2 = z_abs(&ab_ref(i__, j)); value = max(d__1,d__2); /* L10: */ } /* L20: */ } } else if (lsame_(norm, "O") || *(unsigned char *) norm == '1') { /* Find norm1(A). */ value = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { sum = 0.; /* Computing MAX */ i__3 = *ku + 2 - j; /* Computing MIN */ i__4 = *n + *ku + 1 - j, i__5 = *kl + *ku + 1; i__2 = min(i__4,i__5); for (i__ = max(i__3,1); i__ <= i__2; ++i__) { sum += z_abs(&ab_ref(i__, j)); /* L30: */ } value = max(value,sum); /* L40: */ } } else if (lsame_(norm, "I")) { /* Find normI(A). */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; /* L50: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { k = *ku + 1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *ku; /* Computing MIN */ i__5 = *n, i__6 = j + *kl; i__4 = min(i__5,i__6); for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { work[i__] += z_abs(&ab_ref(k + i__, j)); /* L60: */ } /* L70: */ } value = 0.; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__1 = value, d__2 = work[i__]; value = max(d__1,d__2); /* L80: */ } } else if (lsame_(norm, "F") || lsame_(norm, "E")) { /* Find normF(A). */ scale = 0.; sum = 1.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__4 = 1, i__2 = j - *ku; l = max(i__4,i__2); k = *ku + 1 - j + l; /* Computing MIN */ i__2 = *n, i__3 = j + *kl; i__4 = min(i__2,i__3) - l + 1; zlassq_(&i__4, &ab_ref(k, j), &c__1, &scale, &sum); /* L90: */ } value = scale * sqrt(sum); } ret_val = value; return ret_val; /* End of ZLANGB */ } /* zlangb_ */
doublereal zlantb_(char *norm, char *uplo, char *diag, integer *n, integer *k, doublecomplex *ab, integer *ldab, doublereal *work) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 Purpose ======= ZLANTB returns the value of the one norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of an n by n triangular band matrix A, with ( k + 1 ) diagonals. Description =========== ZLANTB returns the value ZLANTB = ( max(abs(A(i,j))), NORM = 'M' or 'm' ( ( norm1(A), NORM = '1', 'O' or 'o' ( ( normI(A), NORM = 'I' or 'i' ( ( normF(A), NORM = 'F', 'f', 'E' or 'e' where norm1 denotes the one norm of a matrix (maximum column sum), normI denotes the infinity norm of a matrix (maximum row sum) and normF denotes the Frobenius norm of a matrix (square root of sum of squares). Note that max(abs(A(i,j))) is not a matrix norm. Arguments ========= NORM (input) CHARACTER*1 Specifies the value to be returned in ZLANTB as described above. UPLO (input) CHARACTER*1 Specifies whether the matrix A is upper or lower triangular. = 'U': Upper triangular = 'L': Lower triangular DIAG (input) CHARACTER*1 Specifies whether or not the matrix A is unit triangular. = 'N': Non-unit triangular = 'U': Unit triangular N (input) INTEGER The order of the matrix A. N >= 0. When N = 0, ZLANTB is set to zero. K (input) INTEGER The number of super-diagonals of the matrix A if UPLO = 'U', or the number of sub-diagonals of the matrix A if UPLO = 'L'. K >= 0. AB (input) COMPLEX*16 array, dimension (LDAB,N) The upper or lower triangular band matrix A, stored in the first k+1 rows of AB. The j-th column of A is stored in the j-th column of the array AB as follows: if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). Note that when DIAG = 'U', the elements of the array AB corresponding to the diagonal elements of the matrix A are not referenced, but are assumed to be one. LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= K+1. WORK (workspace) DOUBLE PRECISION array, dimension (LWORK), where LWORK >= N when NORM = 'I'; otherwise, WORK is not referenced. ===================================================================== Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5; doublereal ret_val, d__1, d__2; /* Builtin functions */ double z_abs(doublecomplex *), sqrt(doublereal); /* Local variables */ static integer i__, j, l; static doublereal scale; static logical udiag; extern logical lsame_(char *, char *); static doublereal value; extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *, doublereal *, doublereal *); static doublereal sum; #define ab_subscr(a_1,a_2) (a_2)*ab_dim1 + a_1 #define ab_ref(a_1,a_2) ab[ab_subscr(a_1,a_2)] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; --work; /* Function Body */ if (*n == 0) { value = 0.; } else if (lsame_(norm, "M")) { /* Find max(abs(A(i,j))). */ if (lsame_(diag, "U")) { value = 1.; if (lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = *k + 2 - j; i__3 = *k; for (i__ = max(i__2,1); i__ <= i__3; ++i__) { /* Computing MAX */ d__1 = value, d__2 = z_abs(&ab_ref(i__, j)); value = max(d__1,d__2); /* L10: */ } /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__2 = *n + 1 - j, i__4 = *k + 1; i__3 = min(i__2,i__4); for (i__ = 2; i__ <= i__3; ++i__) { /* Computing MAX */ d__1 = value, d__2 = z_abs(&ab_ref(i__, j)); value = max(d__1,d__2); /* L30: */ } /* L40: */ } } } else { value = 0.; if (lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__3 = *k + 2 - j; i__2 = *k + 1; for (i__ = max(i__3,1); i__ <= i__2; ++i__) { /* Computing MAX */ d__1 = value, d__2 = z_abs(&ab_ref(i__, j)); value = max(d__1,d__2); /* L50: */ } /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__3 = *n + 1 - j, i__4 = *k + 1; i__2 = min(i__3,i__4); for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__1 = value, d__2 = z_abs(&ab_ref(i__, j)); value = max(d__1,d__2); /* L70: */ } /* L80: */ } } } } else if (lsame_(norm, "O") || *(unsigned char *) norm == '1') { /* Find norm1(A). */ value = 0.; udiag = lsame_(diag, "U"); if (lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (udiag) { sum = 1.; /* Computing MAX */ i__2 = *k + 2 - j; i__3 = *k; for (i__ = max(i__2,1); i__ <= i__3; ++i__) { sum += z_abs(&ab_ref(i__, j)); /* L90: */ } } else { sum = 0.; /* Computing MAX */ i__3 = *k + 2 - j; i__2 = *k + 1; for (i__ = max(i__3,1); i__ <= i__2; ++i__) { sum += z_abs(&ab_ref(i__, j)); /* L100: */ } } value = max(value,sum); /* L110: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (udiag) { sum = 1.; /* Computing MIN */ i__3 = *n + 1 - j, i__4 = *k + 1; i__2 = min(i__3,i__4); for (i__ = 2; i__ <= i__2; ++i__) { sum += z_abs(&ab_ref(i__, j)); /* L120: */ } } else { sum = 0.; /* Computing MIN */ i__3 = *n + 1 - j, i__4 = *k + 1; i__2 = min(i__3,i__4); for (i__ = 1; i__ <= i__2; ++i__) { sum += z_abs(&ab_ref(i__, j)); /* L130: */ } } value = max(value,sum); /* L140: */ } } } else if (lsame_(norm, "I")) { /* Find normI(A). */ value = 0.; if (lsame_(uplo, "U")) { if (lsame_(diag, "U")) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 1.; /* L150: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { l = *k + 1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__4 = j - 1; for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { work[i__] += z_abs(&ab_ref(l + i__, j)); /* L160: */ } /* L170: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; /* L180: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { l = *k + 1 - j; /* Computing MAX */ i__4 = 1, i__2 = j - *k; i__3 = j; for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { work[i__] += z_abs(&ab_ref(l + i__, j)); /* L190: */ } /* L200: */ } } } else { if (lsame_(diag, "U")) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 1.; /* L210: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { l = 1 - j; /* Computing MIN */ i__4 = *n, i__2 = j + *k; i__3 = min(i__4,i__2); for (i__ = j + 1; i__ <= i__3; ++i__) { work[i__] += z_abs(&ab_ref(l + i__, j)); /* L220: */ } /* L230: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.; /* L240: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { l = 1 - j; /* Computing MIN */ i__4 = *n, i__2 = j + *k; i__3 = min(i__4,i__2); for (i__ = j; i__ <= i__3; ++i__) { work[i__] += z_abs(&ab_ref(l + i__, j)); /* L250: */ } /* L260: */ } } } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__1 = value, d__2 = work[i__]; value = max(d__1,d__2); /* L270: */ } } else if (lsame_(norm, "F") || lsame_(norm, "E")) { /* Find normF(A). */ if (lsame_(uplo, "U")) { if (lsame_(diag, "U")) { scale = 1.; sum = (doublereal) (*n); if (*k > 0) { i__1 = *n; for (j = 2; j <= i__1; ++j) { /* Computing MAX */ i__3 = *k + 2 - j; /* Computing MIN */ i__2 = j - 1; i__4 = min(i__2,*k); zlassq_(&i__4, &ab_ref(max(i__3,1), j), &c__1, &scale, &sum); /* L280: */ } } } else { scale = 0.; sum = 1.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__3 = *k + 2 - j; /* Computing MIN */ i__2 = j, i__5 = *k + 1; i__4 = min(i__2,i__5); zlassq_(&i__4, &ab_ref(max(i__3,1), j), &c__1, &scale, & sum); /* L290: */ } } } else { if (lsame_(diag, "U")) { scale = 1.; sum = (doublereal) (*n); if (*k > 0) { i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__4 = *n - j; i__3 = min(i__4,*k); zlassq_(&i__3, &ab_ref(2, j), &c__1, &scale, &sum); /* L300: */ } } } else { scale = 0.; sum = 1.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__4 = *n - j + 1, i__2 = *k + 1; i__3 = min(i__4,i__2); zlassq_(&i__3, &ab_ref(1, j), &c__1, &scale, &sum); /* L310: */ } } } value = scale * sqrt(sum); } ret_val = value; return ret_val; /* End of ZLANTB */ } /* zlantb_ */
/* Subroutine */ int dlatbs_(char *uplo, char *trans, char *diag, char * normin, integer *n, integer *kd, doublereal *ab, integer *ldab, doublereal *x, doublereal *scale, doublereal *cnorm, integer *info) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1992 Purpose ======= DLATBS solves one of the triangular systems A *x = s*b or A'*x = s*b with scaling to prevent overflow, where A is an upper or lower triangular band matrix. Here A' denotes the transpose of A, x and b are n-element vectors, and s is a scaling factor, usually less than or equal to 1, chosen so that the components of x will be less than the overflow threshold. If the unscaled problem will not cause overflow, the Level 2 BLAS routine DTBSV is called. If the matrix A is singular (A(j,j) = 0 for some j), then s is set to 0 and a non-trivial solution to A*x = 0 is returned. Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the matrix A is upper or lower triangular. = 'U': Upper triangular = 'L': Lower triangular TRANS (input) CHARACTER*1 Specifies the operation applied to A. = 'N': Solve A * x = s*b (No transpose) = 'T': Solve A'* x = s*b (Transpose) = 'C': Solve A'* x = s*b (Conjugate transpose = Transpose) DIAG (input) CHARACTER*1 Specifies whether or not the matrix A is unit triangular. = 'N': Non-unit triangular = 'U': Unit triangular NORMIN (input) CHARACTER*1 Specifies whether CNORM has been set or not. = 'Y': CNORM contains the column norms on entry = 'N': CNORM is not set on entry. On exit, the norms will be computed and stored in CNORM. N (input) INTEGER The order of the matrix A. N >= 0. KD (input) INTEGER The number of subdiagonals or superdiagonals in the triangular matrix A. KD >= 0. AB (input) DOUBLE PRECISION array, dimension (LDAB,N) The upper or lower triangular band matrix A, stored in the first KD+1 rows of the array. The j-th column of A is stored in the j-th column of the array AB as follows: if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= KD+1. X (input/output) DOUBLE PRECISION array, dimension (N) On entry, the right hand side b of the triangular system. On exit, X is overwritten by the solution vector x. SCALE (output) DOUBLE PRECISION The scaling factor s for the triangular system A * x = s*b or A'* x = s*b. If SCALE = 0, the matrix A is singular or badly scaled, and the vector x is an exact or approximate solution to A*x = 0. CNORM (input or output) DOUBLE PRECISION array, dimension (N) If NORMIN = 'Y', CNORM is an input argument and CNORM(j) contains the norm of the off-diagonal part of the j-th column of A. If TRANS = 'N', CNORM(j) must be greater than or equal to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) must be greater than or equal to the 1-norm. If NORMIN = 'N', CNORM is an output argument and CNORM(j) returns the 1-norm of the offdiagonal part of the j-th column of A. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -k, the k-th argument had an illegal value Further Details ======= ======= A rough bound on x is computed; if that is less than overflow, DTBSV is called, otherwise, specific code is used which checks for possible overflow or divide-by-zero at every operation. A columnwise scheme is used for solving A*x = b. The basic algorithm if A is lower triangular is x[1:n] := b[1:n] for j = 1, ..., n x(j) := x(j) / A(j,j) x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] end Define bounds on the components of x after j iterations of the loop: M(j) = bound on x[1:j] G(j) = bound on x[j+1:n] Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. Then for iteration j+1 we have M(j+1) <= G(j) / | A(j+1,j+1) | G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) where CNORM(j+1) is greater than or equal to the infinity-norm of column j+1 of A, not counting the diagonal. Hence G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) 1<=i<=j and |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) 1<=i< j Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTBSV if the reciprocal of the largest M(j), j=1,..,n, is larger than max(underflow, 1/overflow). The bound on x(j) is also used to determine when a step in the columnwise method can be performed without fear of overflow. If the computed bound is greater than a large constant, x is scaled to prevent overflow, but if the bound overflows, x is set to 0, x(j) to 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. Similarly, a row-wise scheme is used to solve A'*x = b. The basic algorithm for A upper triangular is for j = 1, ..., n x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) end We simultaneously compute two bounds G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j M(j) = bound on x(i), 1<=i<=j The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. Then the bound on x(j) is M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) 1<=i<=j and we can safely call DTBSV if 1/M(n) and 1/G(n) are both greater than max(underflow, 1/overflow). ===================================================================== Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static doublereal c_b36 = .5; /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; doublereal d__1, d__2, d__3; /* Local variables */ static integer jinc, jlen; extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, integer *); static doublereal xbnd; static integer imax; static doublereal tmax, tjjs, xmax, grow, sumj; static integer i__, j; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); static integer maind; extern logical lsame_(char *, char *); static doublereal tscal, uscal; extern doublereal dasum_(integer *, doublereal *, integer *); static integer jlast; extern /* Subroutine */ int dtbsv_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static logical upper; extern doublereal dlamch_(char *); static doublereal xj; extern integer idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int xerbla_(char *, integer *); static doublereal bignum; static logical notran; static integer jfirst; static doublereal smlnum; static logical nounit; static doublereal rec, tjj; #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; --x; --cnorm; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); notran = lsame_(trans, "N"); nounit = lsame_(diag, "N"); /* Test the input parameters. */ if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (! notran && ! lsame_(trans, "T") && ! lsame_(trans, "C")) { *info = -2; } else if (! nounit && ! lsame_(diag, "U")) { *info = -3; } else if (! lsame_(normin, "Y") && ! lsame_(normin, "N")) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*kd < 0) { *info = -6; } else if (*ldab < *kd + 1) { *info = -8; } if (*info != 0) { i__1 = -(*info); xerbla_("DLATBS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine machine dependent parameters to control overflow. */ smlnum = dlamch_("Safe minimum") / dlamch_("Precision"); bignum = 1. / smlnum; *scale = 1.; if (lsame_(normin, "N")) { /* Compute the 1-norm of each column, not including the diagonal. */ if (upper) { /* A is upper triangular. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__2 = *kd, i__3 = j - 1; jlen = min(i__2,i__3); cnorm[j] = dasum_(&jlen, &ab_ref(*kd + 1 - jlen, j), &c__1); /* L10: */ } } else { /* A is lower triangular. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__2 = *kd, i__3 = *n - j; jlen = min(i__2,i__3); if (jlen > 0) { cnorm[j] = dasum_(&jlen, &ab_ref(2, j), &c__1); } else { cnorm[j] = 0.; } /* L20: */ } } } /* Scale the column norms by TSCAL if the maximum element in CNORM is greater than BIGNUM. */ imax = idamax_(n, &cnorm[1], &c__1); tmax = cnorm[imax]; if (tmax <= bignum) { tscal = 1.; } else { tscal = 1. / (smlnum * tmax); dscal_(n, &tscal, &cnorm[1], &c__1); } /* Compute a bound on the computed solution vector to see if the Level 2 BLAS routine DTBSV can be used. */ j = idamax_(n, &x[1], &c__1); xmax = (d__1 = x[j], abs(d__1)); xbnd = xmax; if (notran) { /* Compute the growth in A * x = b. */ if (upper) { jfirst = *n; jlast = 1; jinc = -1; maind = *kd + 1; } else { jfirst = 1; jlast = *n; jinc = 1; maind = 1; } if (tscal != 1.) { grow = 0.; goto L50; } if (nounit) { /* A is non-unit triangular. Compute GROW = 1/G(j) and XBND = 1/M(j). Initially, G(0) = max{x(i), i=1,...,n}. */ grow = 1. / max(xbnd,smlnum); xbnd = grow; i__1 = jlast; i__2 = jinc; for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L50; } /* M(j) = G(j-1) / abs(A(j,j)) */ tjj = (d__1 = ab_ref(maind, j), abs(d__1)); /* Computing MIN */ d__1 = xbnd, d__2 = min(1.,tjj) * grow; xbnd = min(d__1,d__2); if (tjj + cnorm[j] >= smlnum) { /* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */ grow *= tjj / (tjj + cnorm[j]); } else { /* G(j) could overflow, set GROW to 0. */ grow = 0.; } /* L30: */ } grow = xbnd; } else { /* A is unit triangular. Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. Computing MIN */ d__1 = 1., d__2 = 1. / max(xbnd,smlnum); grow = min(d__1,d__2); i__2 = jlast; i__1 = jinc; for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L50; } /* G(j) = G(j-1)*( 1 + CNORM(j) ) */ grow *= 1. / (cnorm[j] + 1.); /* L40: */ } } L50: ; } else { /* Compute the growth in A' * x = b. */ if (upper) { jfirst = 1; jlast = *n; jinc = 1; maind = *kd + 1; } else { jfirst = *n; jlast = 1; jinc = -1; maind = 1; } if (tscal != 1.) { grow = 0.; goto L80; } if (nounit) { /* A is non-unit triangular. Compute GROW = 1/G(j) and XBND = 1/M(j). Initially, M(0) = max{x(i), i=1,...,n}. */ grow = 1. / max(xbnd,smlnum); xbnd = grow; i__1 = jlast; i__2 = jinc; for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L80; } /* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */ xj = cnorm[j] + 1.; /* Computing MIN */ d__1 = grow, d__2 = xbnd / xj; grow = min(d__1,d__2); /* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */ tjj = (d__1 = ab_ref(maind, j), abs(d__1)); if (xj > tjj) { xbnd *= tjj / xj; } /* L60: */ } grow = min(grow,xbnd); } else { /* A is unit triangular. Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. Computing MIN */ d__1 = 1., d__2 = 1. / max(xbnd,smlnum); grow = min(d__1,d__2); i__2 = jlast; i__1 = jinc; for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L80; } /* G(j) = ( 1 + CNORM(j) )*G(j-1) */ xj = cnorm[j] + 1.; grow /= xj; /* L70: */ } } L80: ; } if (grow * tscal > smlnum) { /* Use the Level 2 BLAS solve if the reciprocal of the bound on elements of X is not too small. */ dtbsv_(uplo, trans, diag, n, kd, &ab[ab_offset], ldab, &x[1], &c__1); } else { /* Use a Level 1 BLAS solve, scaling intermediate results. */ if (xmax > bignum) { /* Scale X so that its components are less than or equal to BIGNUM in absolute value. */ *scale = bignum / xmax; dscal_(n, scale, &x[1], &c__1); xmax = bignum; } if (notran) { /* Solve A * x = b */ i__1 = jlast; i__2 = jinc; for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Compute x(j) = b(j) / A(j,j), scaling x if necessary. */ xj = (d__1 = x[j], abs(d__1)); if (nounit) { tjjs = ab_ref(maind, j) * tscal; } else { tjjs = tscal; if (tscal == 1.) { goto L100; } } tjj = abs(tjjs); if (tjj > smlnum) { /* abs(A(j,j)) > SMLNUM: */ if (tjj < 1.) { if (xj > tjj * bignum) { /* Scale x by 1/b(j). */ rec = 1. / xj; dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } x[j] /= tjjs; xj = (d__1 = x[j], abs(d__1)); } else if (tjj > 0.) { /* 0 < abs(A(j,j)) <= SMLNUM: */ if (xj > tjj * bignum) { /* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM to avoid overflow when dividing by A(j,j). */ rec = tjj * bignum / xj; if (cnorm[j] > 1.) { /* Scale by 1/CNORM(j) to avoid overflow when multiplying x(j) times column j. */ rec /= cnorm[j]; } dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } x[j] /= tjjs; xj = (d__1 = x[j], abs(d__1)); } else { /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and scale = 0, and compute a solution to A*x = 0. */ i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { x[i__] = 0.; /* L90: */ } x[j] = 1.; xj = 1.; *scale = 0.; xmax = 0.; } L100: /* Scale x if necessary to avoid overflow when adding a multiple of column j of A. */ if (xj > 1.) { rec = 1. / xj; if (cnorm[j] > (bignum - xmax) * rec) { /* Scale x by 1/(2*abs(x(j))). */ rec *= .5; dscal_(n, &rec, &x[1], &c__1); *scale *= rec; } } else if (xj * cnorm[j] > bignum - xmax) { /* Scale x by 1/2. */ dscal_(n, &c_b36, &x[1], &c__1); *scale *= .5; } if (upper) { if (j > 1) { /* Compute the update x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) - x(j)* A(max(1,j-kd):j-1,j) Computing MIN */ i__3 = *kd, i__4 = j - 1; jlen = min(i__3,i__4); d__1 = -x[j] * tscal; daxpy_(&jlen, &d__1, &ab_ref(*kd + 1 - jlen, j), & c__1, &x[j - jlen], &c__1); i__3 = j - 1; i__ = idamax_(&i__3, &x[1], &c__1); xmax = (d__1 = x[i__], abs(d__1)); } } else if (j < *n) { /* Compute the update x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) - x(j) * A(j+1:min(j+kd,n),j) Computing MIN */ i__3 = *kd, i__4 = *n - j; jlen = min(i__3,i__4); if (jlen > 0) { d__1 = -x[j] * tscal; daxpy_(&jlen, &d__1, &ab_ref(2, j), &c__1, &x[j + 1], &c__1); } i__3 = *n - j; i__ = j + idamax_(&i__3, &x[j + 1], &c__1); xmax = (d__1 = x[i__], abs(d__1)); } /* L110: */ } } else { /* Solve A' * x = b */ i__2 = jlast; i__1 = jinc; for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Compute x(j) = b(j) - sum A(k,j)*x(k). k<>j */ xj = (d__1 = x[j], abs(d__1)); uscal = tscal; rec = 1. / max(xmax,1.); if (cnorm[j] > (bignum - xj) * rec) { /* If x(j) could overflow, scale x by 1/(2*XMAX). */ rec *= .5; if (nounit) { tjjs = ab_ref(maind, j) * tscal; } else { tjjs = tscal; } tjj = abs(tjjs); if (tjj > 1.) { /* Divide by A(j,j) when scaling x if A(j,j) > 1. Computing MIN */ d__1 = 1., d__2 = rec * tjj; rec = min(d__1,d__2); uscal /= tjjs; } if (rec < 1.) { dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } sumj = 0.; if (uscal == 1.) { /* If the scaling needed for A in the dot product is 1, call DDOT to perform the dot product. */ if (upper) { /* Computing MIN */ i__3 = *kd, i__4 = j - 1; jlen = min(i__3,i__4); sumj = ddot_(&jlen, &ab_ref(*kd + 1 - jlen, j), &c__1, &x[j - jlen], &c__1); } else { /* Computing MIN */ i__3 = *kd, i__4 = *n - j; jlen = min(i__3,i__4); if (jlen > 0) { sumj = ddot_(&jlen, &ab_ref(2, j), &c__1, &x[j + 1], &c__1); } } } else { /* Otherwise, use in-line code for the dot product. */ if (upper) { /* Computing MIN */ i__3 = *kd, i__4 = j - 1; jlen = min(i__3,i__4); i__3 = jlen; for (i__ = 1; i__ <= i__3; ++i__) { sumj += ab_ref(*kd + i__ - jlen, j) * uscal * x[j - jlen - 1 + i__]; /* L120: */ } } else { /* Computing MIN */ i__3 = *kd, i__4 = *n - j; jlen = min(i__3,i__4); i__3 = jlen; for (i__ = 1; i__ <= i__3; ++i__) { sumj += ab_ref(i__ + 1, j) * uscal * x[j + i__]; /* L130: */ } } } if (uscal == tscal) { /* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) was not used to scale the dotproduct. */ x[j] -= sumj; xj = (d__1 = x[j], abs(d__1)); if (nounit) { /* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ tjjs = ab_ref(maind, j) * tscal; } else { tjjs = tscal; if (tscal == 1.) { goto L150; } } tjj = abs(tjjs); if (tjj > smlnum) { /* abs(A(j,j)) > SMLNUM: */ if (tjj < 1.) { if (xj > tjj * bignum) { /* Scale X by 1/abs(x(j)). */ rec = 1. / xj; dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } x[j] /= tjjs; } else if (tjj > 0.) { /* 0 < abs(A(j,j)) <= SMLNUM: */ if (xj > tjj * bignum) { /* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ rec = tjj * bignum / xj; dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } x[j] /= tjjs; } else { /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and scale = 0, and compute a solution to A'*x = 0. */ i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { x[i__] = 0.; /* L140: */ } x[j] = 1.; *scale = 0.; xmax = 0.; } L150: ; } else { /* Compute x(j) := x(j) / A(j,j) - sumj if the dot product has already been divided by 1/A(j,j). */ x[j] = x[j] / tjjs - sumj; } /* Computing MAX */ d__2 = xmax, d__3 = (d__1 = x[j], abs(d__1)); xmax = max(d__2,d__3); /* L160: */ } } *scale /= tscal; } /* Scale the column norms by 1/TSCAL for return. */ if (tscal != 1.) { d__1 = 1. / tscal; dscal_(n, &d__1, &cnorm[1], &c__1); } return 0; /* End of DLATBS */ } /* dlatbs_ */