/* Subroutine */ int sgbt05_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, real *ab, integer *ldab, real *b, integer *ldb, real *x, integer *ldx, real *xact, integer *ldxact, real *ferr, real * berr, real *reslts) { /* System generated locals */ integer ab_dim1, ab_offset, b_dim1, b_offset, x_dim1, x_offset, xact_dim1, xact_offset, i__1, i__2, i__3, i__4, i__5; real r__1, r__2, r__3; /* Local variables */ static real diff, axbi; static integer imax; static real unfl, ovfl; static integer i__, j, k; extern logical lsame_(char *, char *); static real xnorm; extern doublereal slamch_(char *); static integer nz; static real errbnd; extern integer isamax_(integer *, real *, integer *); static logical notran; static real eps, tmp; #define xact_ref(a_1,a_2) xact[(a_2)*xact_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1] #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= SGBT05 tests the error bounds from iterative refinement for the computed solution to a system of equations op(A)*X = B, where A is a general band matrix of order n with kl subdiagonals and ku superdiagonals and op(A) = A or A**T, depending on TRANS. RESLTS(1) = test of the error bound = norm(X - XACT) / ( norm(X) * FERR ) A large value is returned if this ratio is not less than one. RESLTS(2) = residual from the iterative refinement routine = the maximum of BERR / ( NZ*EPS + (*) ), where (*) = NZ*UNFL / (min_i (abs(op(A))*abs(X) +abs(b))_i ) and NZ = max. number of nonzeros in any row of A, plus 1 Arguments ========= TRANS (input) CHARACTER*1 Specifies the form of the system of equations. = 'N': A * X = B (No transpose) = 'T': A**T * X = B (Transpose) = 'C': A**H * X = B (Conjugate transpose = Transpose) N (input) INTEGER The number of rows of the matrices X, B, and XACT, and the order of the matrix A. N >= 0. KL (input) INTEGER The number of subdiagonals within the band of A. KL >= 0. KU (input) INTEGER The number of superdiagonals within the band of A. KU >= 0. NRHS (input) INTEGER The number of columns of the matrices X, B, and XACT. NRHS >= 0. AB (input) REAL array, dimension (LDAB,N) The original band matrix A, stored in rows 1 to KL+KU+1. The j-th column of A is stored in the j-th column of the array AB as follows: AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= KL+KU+1. B (input) REAL array, dimension (LDB,NRHS) The right hand side vectors for the system of linear equations. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (input) REAL array, dimension (LDX,NRHS) The computed solution vectors. Each vector is stored as a column of the matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). XACT (input) REAL array, dimension (LDX,NRHS) The exact solution vectors. Each vector is stored as a column of the matrix XACT. LDXACT (input) INTEGER The leading dimension of the array XACT. LDXACT >= max(1,N). FERR (input) REAL array, dimension (NRHS) The estimated forward error bounds for each solution vector X. If XTRUE is the true solution, FERR bounds the magnitude of the largest entry in (X - XTRUE) divided by the magnitude of the largest entry in X. BERR (input) REAL array, dimension (NRHS) The componentwise relative backward error of each solution vector (i.e., the smallest relative change in any entry of A or B that makes X an exact solution). RESLTS (output) REAL array, dimension (2) The maximum over the NRHS solution vectors of the ratios: RESLTS(1) = norm(X - XACT) / ( norm(X) * FERR ) RESLTS(2) = BERR / ( NZ*EPS + (*) ) ===================================================================== Quick exit if N = 0 or NRHS = 0. Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; xact_dim1 = *ldxact; xact_offset = 1 + xact_dim1 * 1; xact -= xact_offset; --ferr; --berr; --reslts; /* Function Body */ if (*n <= 0 || *nrhs <= 0) { reslts[1] = 0.f; reslts[2] = 0.f; return 0; } eps = slamch_("Epsilon"); unfl = slamch_("Safe minimum"); ovfl = 1.f / unfl; notran = lsame_(trans, "N"); /* Computing MIN */ i__1 = *kl + *ku + 2, i__2 = *n + 1; nz = min(i__1,i__2); /* Test 1: Compute the maximum of norm(X - XACT) / ( norm(X) * FERR ) over all the vectors X and XACT using the infinity-norm. */ errbnd = 0.f; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { imax = isamax_(n, &x_ref(1, j), &c__1); /* Computing MAX */ r__2 = (r__1 = x_ref(imax, j), dabs(r__1)); xnorm = dmax(r__2,unfl); diff = 0.f; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ r__2 = diff, r__3 = (r__1 = x_ref(i__, j) - xact_ref(i__, j), dabs(r__1)); diff = dmax(r__2,r__3); /* L10: */ } if (xnorm > 1.f) { goto L20; } else if (diff <= ovfl * xnorm) { goto L20; } else { errbnd = 1.f / eps; goto L30; } L20: if (diff / xnorm <= ferr[j]) { /* Computing MAX */ r__1 = errbnd, r__2 = diff / xnorm / ferr[j]; errbnd = dmax(r__1,r__2); } else { errbnd = 1.f / eps; } L30: ; } reslts[1] = errbnd; /* Test 2: Compute the maximum of BERR / ( NZ*EPS + (*) ), where (*) = NZ*UNFL / (min_i (abs(op(A))*abs(X) +abs(b))_i ) */ i__1 = *nrhs; for (k = 1; k <= i__1; ++k) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { tmp = (r__1 = b_ref(i__, k), dabs(r__1)); if (notran) { /* Computing MAX */ i__3 = i__ - *kl; /* Computing MIN */ i__5 = i__ + *ku; i__4 = min(i__5,*n); for (j = max(i__3,1); j <= i__4; ++j) { tmp += (r__1 = ab_ref(*ku + 1 + i__ - j, j), dabs(r__1)) * (r__2 = x_ref(j, k), dabs(r__2)); /* L40: */ } } else { /* Computing MAX */ i__4 = i__ - *ku; /* Computing MIN */ i__5 = i__ + *kl; i__3 = min(i__5,*n); for (j = max(i__4,1); j <= i__3; ++j) { tmp += (r__1 = ab_ref(*ku + 1 + j - i__, i__), dabs(r__1)) * (r__2 = x_ref(j, k), dabs(r__2)); /* L50: */ } } if (i__ == 1) { axbi = tmp; } else { axbi = dmin(axbi,tmp); } /* L60: */ } /* Computing MAX */ r__1 = axbi, r__2 = nz * unfl; tmp = berr[k] / (nz * eps + nz * unfl / dmax(r__1,r__2)); if (k == 1) { reslts[2] = tmp; } else { reslts[2] = dmax(reslts[2],tmp); } /* L70: */ } return 0; /* End of SGBT05 */ } /* sgbt05_ */
/* Subroutine */ int dsptrs_(char *uplo, integer *n, integer *nrhs, doublereal *ap, integer *ipiv, doublereal *b, integer *ldb, integer * info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= DSPTRS solves a system of linear equations A*X = B with a real symmetric matrix A stored in packed format using the factorization A = U*D*U**T or A = L*D*L**T computed by DSPTRF. Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the details of the factorization are stored as an upper or lower triangular matrix. = 'U': Upper triangular, form is A = U*D*U**T; = 'L': Lower triangular, form is A = L*D*L**T. N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) The block diagonal matrix D and the multipliers used to obtain the factor U or L as computed by DSPTRF, stored as a packed triangular matrix. IPIV (input) INTEGER array, dimension (N) Details of the interchanges and the block structure of D as determined by DSPTRF. B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) On entry, the right hand side matrix B. On exit, the solution matrix X. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Parameter adjustments */ /* Table of constant values */ static doublereal c_b7 = -1.; static integer c__1 = 1; static doublereal c_b19 = 1.; /* System generated locals */ integer b_dim1, b_offset, i__1; doublereal d__1; /* Local variables */ extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); static doublereal akm1k; static integer j, k; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); extern logical lsame_(char *, char *); static doublereal denom; extern /* Subroutine */ int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dswap_(integer *, doublereal *, integer *, doublereal *, integer *); static logical upper; static doublereal ak, bk; static integer kc, kp; extern /* Subroutine */ int xerbla_(char *, integer *); static doublereal akm1, bkm1; #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] --ap; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*ldb < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); xerbla_("DSPTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { return 0; } if (upper) { /* Solve A*X = B, where A = U*D*U'. First solve U*D*X = B, overwriting B with X. K is the main loop index, decreasing from N to 1 in steps of 1 or 2, depending on the size of the diagonal blocks. */ k = *n; kc = *n * (*n + 1) / 2 + 1; L10: /* If K < 1, exit from loop. */ if (k < 1) { goto L30; } kc -= k; if (ipiv[k] > 0) { /* 1 x 1 diagonal block Interchange rows K and IPIV(K). */ kp = ipiv[k]; if (kp != k) { dswap_(nrhs, &b_ref(k, 1), ldb, &b_ref(kp, 1), ldb); } /* Multiply by inv(U(K)), where U(K) is the transformation stored in column K of A. */ i__1 = k - 1; dger_(&i__1, nrhs, &c_b7, &ap[kc], &c__1, &b_ref(k, 1), ldb, & b_ref(1, 1), ldb); /* Multiply by the inverse of the diagonal block. */ d__1 = 1. / ap[kc + k - 1]; dscal_(nrhs, &d__1, &b_ref(k, 1), ldb); --k; } else { /* 2 x 2 diagonal block Interchange rows K-1 and -IPIV(K). */ kp = -ipiv[k]; if (kp != k - 1) { dswap_(nrhs, &b_ref(k - 1, 1), ldb, &b_ref(kp, 1), ldb); } /* Multiply by inv(U(K)), where U(K) is the transformation stored in columns K-1 and K of A. */ i__1 = k - 2; dger_(&i__1, nrhs, &c_b7, &ap[kc], &c__1, &b_ref(k, 1), ldb, & b_ref(1, 1), ldb); i__1 = k - 2; dger_(&i__1, nrhs, &c_b7, &ap[kc - (k - 1)], &c__1, &b_ref(k - 1, 1), ldb, &b_ref(1, 1), ldb); /* Multiply by the inverse of the diagonal block. */ akm1k = ap[kc + k - 2]; akm1 = ap[kc - 1] / akm1k; ak = ap[kc + k - 1] / akm1k; denom = akm1 * ak - 1.; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { bkm1 = b_ref(k - 1, j) / akm1k; bk = b_ref(k, j) / akm1k; b_ref(k - 1, j) = (ak * bkm1 - bk) / denom; b_ref(k, j) = (akm1 * bk - bkm1) / denom; /* L20: */ } kc = kc - k + 1; k += -2; } goto L10; L30: /* Next solve U'*X = B, overwriting B with X. K is the main loop index, increasing from 1 to N in steps of 1 or 2, depending on the size of the diagonal blocks. */ k = 1; kc = 1; L40: /* If K > N, exit from loop. */ if (k > *n) { goto L50; } if (ipiv[k] > 0) { /* 1 x 1 diagonal block Multiply by inv(U'(K)), where U(K) is the transformation stored in column K of A. */ i__1 = k - 1; dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &ap[kc] , &c__1, &c_b19, &b_ref(k, 1), ldb); /* Interchange rows K and IPIV(K). */ kp = ipiv[k]; if (kp != k) { dswap_(nrhs, &b_ref(k, 1), ldb, &b_ref(kp, 1), ldb); } kc += k; ++k; } else { /* 2 x 2 diagonal block Multiply by inv(U'(K+1)), where U(K+1) is the transformation stored in columns K and K+1 of A. */ i__1 = k - 1; dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &ap[kc] , &c__1, &c_b19, &b_ref(k, 1), ldb); i__1 = k - 1; dgemv_("Transpose", &i__1, nrhs, &c_b7, &b[b_offset], ldb, &ap[kc + k], &c__1, &c_b19, &b_ref(k + 1, 1), ldb); /* Interchange rows K and -IPIV(K). */ kp = -ipiv[k]; if (kp != k) { dswap_(nrhs, &b_ref(k, 1), ldb, &b_ref(kp, 1), ldb); } kc = kc + (k << 1) + 1; k += 2; } goto L40; L50: ; } else { /* Solve A*X = B, where A = L*D*L'. First solve L*D*X = B, overwriting B with X. K is the main loop index, increasing from 1 to N in steps of 1 or 2, depending on the size of the diagonal blocks. */ k = 1; kc = 1; L60: /* If K > N, exit from loop. */ if (k > *n) { goto L80; } if (ipiv[k] > 0) { /* 1 x 1 diagonal block Interchange rows K and IPIV(K). */ kp = ipiv[k]; if (kp != k) { dswap_(nrhs, &b_ref(k, 1), ldb, &b_ref(kp, 1), ldb); } /* Multiply by inv(L(K)), where L(K) is the transformation stored in column K of A. */ if (k < *n) { i__1 = *n - k; dger_(&i__1, nrhs, &c_b7, &ap[kc + 1], &c__1, &b_ref(k, 1), ldb, &b_ref(k + 1, 1), ldb); } /* Multiply by the inverse of the diagonal block. */ d__1 = 1. / ap[kc]; dscal_(nrhs, &d__1, &b_ref(k, 1), ldb); kc = kc + *n - k + 1; ++k; } else { /* 2 x 2 diagonal block Interchange rows K+1 and -IPIV(K). */ kp = -ipiv[k]; if (kp != k + 1) { dswap_(nrhs, &b_ref(k + 1, 1), ldb, &b_ref(kp, 1), ldb); } /* Multiply by inv(L(K)), where L(K) is the transformation stored in columns K and K+1 of A. */ if (k < *n - 1) { i__1 = *n - k - 1; dger_(&i__1, nrhs, &c_b7, &ap[kc + 2], &c__1, &b_ref(k, 1), ldb, &b_ref(k + 2, 1), ldb); i__1 = *n - k - 1; dger_(&i__1, nrhs, &c_b7, &ap[kc + *n - k + 2], &c__1, &b_ref( k + 1, 1), ldb, &b_ref(k + 2, 1), ldb); } /* Multiply by the inverse of the diagonal block. */ akm1k = ap[kc + 1]; akm1 = ap[kc] / akm1k; ak = ap[kc + *n - k + 1] / akm1k; denom = akm1 * ak - 1.; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { bkm1 = b_ref(k, j) / akm1k; bk = b_ref(k + 1, j) / akm1k; b_ref(k, j) = (ak * bkm1 - bk) / denom; b_ref(k + 1, j) = (akm1 * bk - bkm1) / denom; /* L70: */ } kc = kc + (*n - k << 1) + 1; k += 2; } goto L60; L80: /* Next solve L'*X = B, overwriting B with X. K is the main loop index, decreasing from N to 1 in steps of 1 or 2, depending on the size of the diagonal blocks. */ k = *n; kc = *n * (*n + 1) / 2 + 1; L90: /* If K < 1, exit from loop. */ if (k < 1) { goto L100; } kc -= *n - k + 1; if (ipiv[k] > 0) { /* 1 x 1 diagonal block Multiply by inv(L'(K)), where L(K) is the transformation stored in column K of A. */ if (k < *n) { i__1 = *n - k; dgemv_("Transpose", &i__1, nrhs, &c_b7, &b_ref(k + 1, 1), ldb, &ap[kc + 1], &c__1, &c_b19, &b_ref(k, 1), ldb); } /* Interchange rows K and IPIV(K). */ kp = ipiv[k]; if (kp != k) { dswap_(nrhs, &b_ref(k, 1), ldb, &b_ref(kp, 1), ldb); } --k; } else { /* 2 x 2 diagonal block Multiply by inv(L'(K-1)), where L(K-1) is the transformation stored in columns K-1 and K of A. */ if (k < *n) { i__1 = *n - k; dgemv_("Transpose", &i__1, nrhs, &c_b7, &b_ref(k + 1, 1), ldb, &ap[kc + 1], &c__1, &c_b19, &b_ref(k, 1), ldb); i__1 = *n - k; dgemv_("Transpose", &i__1, nrhs, &c_b7, &b_ref(k + 1, 1), ldb, &ap[kc - (*n - k)], &c__1, &c_b19, &b_ref(k - 1, 1), ldb); } /* Interchange rows K and -IPIV(K). */ kp = -ipiv[k]; if (kp != k) { dswap_(nrhs, &b_ref(k, 1), ldb, &b_ref(kp, 1), ldb); } kc -= *n - k + 2; k += -2; } goto L90; L100: ; } return 0; /* End of DSPTRS */ } /* dsptrs_ */
/* Subroutine */ int sggglm_(integer *n, integer *m, integer *p, real *a, integer *lda, real *b, integer *ldb, real *d__, real *x, real *y, real *work, integer *lwork, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= SGGGLM solves a general Gauss-Markov linear model (GLM) problem: minimize || y ||_2 subject to d = A*x + B*y x where A is an N-by-M matrix, B is an N-by-P matrix, and d is a given N-vector. It is assumed that M <= N <= M+P, and rank(A) = M and rank( A B ) = N. Under these assumptions, the constrained equation is always consistent, and there is a unique solution x and a minimal 2-norm solution y, which is obtained using a generalized QR factorization of A and B. In particular, if matrix B is square nonsingular, then the problem GLM is equivalent to the following weighted linear least squares problem minimize || inv(B)*(d-A*x) ||_2 x where inv(B) denotes the inverse of B. Arguments ========= N (input) INTEGER The number of rows of the matrices A and B. N >= 0. M (input) INTEGER The number of columns of the matrix A. 0 <= M <= N. P (input) INTEGER The number of columns of the matrix B. P >= N-M. A (input/output) REAL array, dimension (LDA,M) On entry, the N-by-M matrix A. On exit, A is destroyed. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input/output) REAL array, dimension (LDB,P) On entry, the N-by-P matrix B. On exit, B is destroyed. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). D (input/output) REAL array, dimension (N) On entry, D is the left hand side of the GLM equation. On exit, D is destroyed. X (output) REAL array, dimension (M) Y (output) REAL array, dimension (P) On exit, X and Y are the solutions of the GLM problem. WORK (workspace/output) REAL array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,N+M+P). For optimum performance, LWORK >= M+min(N,P)+max(N,P)*NB, where NB is an upper bound for the optimal blocksizes for SGEQRF, SGERQF, SORMQR and SORMRQ. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. =================================================================== Test the input parameters Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static real c_b32 = -1.f; static real c_b34 = 1.f; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; /* Local variables */ static integer lopt, i__; extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *), strsv_(char *, char *, char *, integer *, real *, integer *, real *, integer *); static integer nb, np; extern /* Subroutine */ int xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern /* Subroutine */ int sggqrf_(integer *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, real *, integer * , integer *); static integer nb1, nb2, nb3, nb4, lwkopt; static logical lquery; extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, integer *, integer *), sormrq_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, real * , integer *, real *, integer *, integer *); #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --d__; --x; --y; --work; /* Function Body */ *info = 0; np = min(*n,*p); nb1 = ilaenv_(&c__1, "SGEQRF", " ", n, m, &c_n1, &c_n1, (ftnlen)6, ( ftnlen)1); nb2 = ilaenv_(&c__1, "SGERQF", " ", n, m, &c_n1, &c_n1, (ftnlen)6, ( ftnlen)1); nb3 = ilaenv_(&c__1, "SORMQR", " ", n, m, p, &c_n1, (ftnlen)6, (ftnlen)1); nb4 = ilaenv_(&c__1, "SORMRQ", " ", n, m, p, &c_n1, (ftnlen)6, (ftnlen)1); /* Computing MAX */ i__1 = max(nb1,nb2), i__1 = max(i__1,nb3); nb = max(i__1,nb4); lwkopt = *m + np + max(*n,*p) * nb; work[1] = (real) lwkopt; lquery = *lwork == -1; if (*n < 0) { *info = -1; } else if (*m < 0 || *m > *n) { *info = -2; } else if (*p < 0 || *p < *n - *m) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 1, i__2 = *n + *m + *p; if (*lwork < max(i__1,i__2) && ! lquery) { *info = -12; } } if (*info != 0) { i__1 = -(*info); xerbla_("SGGGLM", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Compute the GQR factorization of matrices A and B: Q'*A = ( R11 ) M, Q'*B*Z' = ( T11 T12 ) M ( 0 ) N-M ( 0 T22 ) N-M M M+P-N N-M where R11 and T22 are upper triangular, and Q and Z are orthogonal. */ i__1 = *lwork - *m - np; sggqrf_(n, m, p, &a[a_offset], lda, &work[1], &b[b_offset], ldb, &work[*m + 1], &work[*m + np + 1], &i__1, info); lopt = work[*m + np + 1]; /* Update left-hand-side vector d = Q'*d = ( d1 ) M ( d2 ) N-M */ i__1 = max(1,*n); i__2 = *lwork - *m - np; sormqr_("Left", "Transpose", n, &c__1, m, &a[a_offset], lda, &work[1], & d__[1], &i__1, &work[*m + np + 1], &i__2, info); /* Computing MAX */ i__1 = lopt, i__2 = (integer) work[*m + np + 1]; lopt = max(i__1,i__2); /* Solve T22*y2 = d2 for y2 */ i__1 = *n - *m; strsv_("Upper", "No transpose", "Non unit", &i__1, &b_ref(*m + 1, *m + *p - *n + 1), ldb, &d__[*m + 1], &c__1); i__1 = *n - *m; scopy_(&i__1, &d__[*m + 1], &c__1, &y[*m + *p - *n + 1], &c__1); /* Set y1 = 0 */ i__1 = *m + *p - *n; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = 0.f; /* L10: */ } /* Update d1 = d1 - T12*y2 */ i__1 = *n - *m; sgemv_("No transpose", m, &i__1, &c_b32, &b_ref(1, *m + *p - *n + 1), ldb, &y[*m + *p - *n + 1], &c__1, &c_b34, &d__[1], &c__1); /* Solve triangular system: R11*x = d1 */ strsv_("Upper", "No Transpose", "Non unit", m, &a[a_offset], lda, &d__[1], &c__1); /* Copy D to X */ scopy_(m, &d__[1], &c__1, &x[1], &c__1); /* Backward transformation y = Z'*y Computing MAX */ i__1 = 1, i__2 = *n - *p + 1; i__3 = max(1,*p); i__4 = *lwork - *m - np; sormrq_("Left", "Transpose", p, &c__1, &np, &b_ref(max(i__1,i__2), 1), ldb, &work[*m + 1], &y[1], &i__3, &work[*m + np + 1], &i__4, info); /* Computing MAX */ i__1 = lopt, i__2 = (integer) work[*m + np + 1]; work[1] = (real) (*m + np + max(i__1,i__2)); return 0; /* End of SGGGLM */ } /* sggglm_ */
/* Subroutine */ int stptrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, real *ap, real *b, integer *ldb, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= STPTRS solves a triangular system of the form A * X = B or A**T * X = B, where A is a triangular matrix of order N stored in packed format, and B is an N-by-NRHS matrix. A check is made to verify that A is nonsingular. Arguments ========= UPLO (input) CHARACTER*1 = 'U': A is upper triangular; = 'L': A is lower triangular. TRANS (input) CHARACTER*1 Specifies the form of the system of equations: = 'N': A * X = B (No transpose) = 'T': A**T * X = B (Transpose) = 'C': A**H * X = B (Conjugate transpose = Transpose) DIAG (input) CHARACTER*1 = 'N': A is non-unit triangular; = 'U': A is unit triangular. N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. AP (input) REAL array, dimension (N*(N+1)/2) The upper or lower triangular matrix A, packed columnwise in a linear array. The j-th column of A is stored in the array AP as follows: if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. B (input/output) REAL array, dimension (LDB,NRHS) On entry, the right hand side matrix B. On exit, if INFO = 0, the solution matrix X. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the i-th diagonal element of A is zero, indicating that the matrix is singular and the solutions X have not been computed. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer b_dim1, b_offset, i__1; /* Local variables */ static integer j; extern logical lsame_(char *, char *); static logical upper; extern /* Subroutine */ int stpsv_(char *, char *, char *, integer *, real *, real *, integer *); static integer jc; extern /* Subroutine */ int xerbla_(char *, integer *); static logical nounit; #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] --ap; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); nounit = lsame_(diag, "N"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C")) { *info = -2; } else if (! nounit && ! lsame_(diag, "U")) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*nrhs < 0) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -8; } if (*info != 0) { i__1 = -(*info); xerbla_("STPTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Check for singularity. */ if (nounit) { if (upper) { jc = 1; i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { if (ap[jc + *info - 1] == 0.f) { return 0; } jc += *info; /* L10: */ } } else { jc = 1; i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { if (ap[jc] == 0.f) { return 0; } jc = jc + *n - *info + 1; /* L20: */ } } } *info = 0; /* Solve A * x = b or A' * x = b. */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { stpsv_(uplo, trans, diag, n, &ap[1], &b_ref(1, j), &c__1); /* L30: */ } return 0; /* End of STPTRS */ } /* stptrs_ */
/* Subroutine */ int slals0_(integer *icompq, integer *nl, integer *nr, integer *sqre, integer *nrhs, real *b, integer *ldb, real *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * difl, real *difr, real *z__, integer *k, real *c__, real *s, real * work, integer *info) { /* System generated locals */ integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, i__1, i__2; real r__1; /* Local variables */ static real temp; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); extern doublereal snrm2_(integer *, real *, integer *); static integer i__, j, m, n; static real diflj, difrj, dsigj; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), sgemv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), scopy_( integer *, real *, integer *, real *, integer *); extern doublereal slamc3_(real *, real *), sopbl2_(char *, integer *, integer *, integer *, integer *); static real dj; extern /* Subroutine */ int xerbla_(char *, integer *); static real dsigjp; extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *); static integer nlp1; #define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define poles_ref(a_1,a_2) poles[(a_2)*poles_dim1 + a_1] #define bx_ref(a_1,a_2) bx[(a_2)*bx_dim1 + a_1] #define givcol_ref(a_1,a_2) givcol[(a_2)*givcol_dim1 + a_1] #define givnum_ref(a_1,a_2) givnum[(a_2)*givnum_dim1 + a_1] /* -- LAPACK routine (instrumented to count ops, version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University December 22, 1999 Purpose ======= SLALS0 applies back the multiplying factors of either the left or the right singular vector matrix of a diagonal matrix appended by a row to the right hand side matrix B in solving the least squares problem using the divide-and-conquer SVD approach. For the left singular vector matrix, three types of orthogonal matrices are involved: (1L) Givens rotations: the number of such rotations is GIVPTR; the pairs of columns/rows they were applied to are stored in GIVCOL; and the C- and S-values of these rotations are stored in GIVNUM. (2L) Permutation. The (NL+1)-st row of B is to be moved to the first row, and for J=2:N, PERM(J)-th row of B is to be moved to the J-th row. (3L) The left singular vector matrix of the remaining matrix. For the right singular vector matrix, four types of orthogonal matrices are involved: (1R) The right singular vector matrix of the remaining matrix. (2R) If SQRE = 1, one extra Givens rotation to generate the right null space. (3R) The inverse transformation of (2L). (4R) The inverse transformation of (1L). Arguments ========= ICOMPQ (input) INTEGER Specifies whether singular vectors are to be computed in factored form: = 0: Left singular vector matrix. = 1: Right singular vector matrix. NL (input) INTEGER The row dimension of the upper block. NL >= 1. NR (input) INTEGER The row dimension of the lower block. NR >= 1. SQRE (input) INTEGER = 0: the lower block is an NR-by-NR square matrix. = 1: the lower block is an NR-by-(NR+1) rectangular matrix. The bidiagonal matrix has row dimension N = NL + NR + 1, and column dimension M = N + SQRE. NRHS (input) INTEGER The number of columns of B and BX. NRHS must be at least 1. B (input/output) REAL array, dimension ( LDB, NRHS ) On input, B contains the right hand sides of the least squares problem in rows 1 through M. On output, B contains the solution X in rows 1 through N. LDB (input) INTEGER The leading dimension of B. LDB must be at least max(1,MAX( M, N ) ). BX (workspace) REAL array, dimension ( LDBX, NRHS ) LDBX (input) INTEGER The leading dimension of BX. PERM (input) INTEGER array, dimension ( N ) The permutations (from deflation and sorting) applied to the two blocks. GIVPTR (input) INTEGER The number of Givens rotations which took place in this subproblem. GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) Each pair of numbers indicates a pair of rows/columns involved in a Givens rotation. LDGCOL (input) INTEGER The leading dimension of GIVCOL, must be at least N. GIVNUM (input) REAL array, dimension ( LDGNUM, 2 ) Each number indicates the C or S value used in the corresponding Givens rotation. LDGNUM (input) INTEGER The leading dimension of arrays DIFR, POLES and GIVNUM, must be at least K. POLES (input) REAL array, dimension ( LDGNUM, 2 ) On entry, POLES(1:K, 1) contains the new singular values obtained from solving the secular equation, and POLES(1:K, 2) is an array containing the poles in the secular equation. DIFL (input) REAL array, dimension ( K ). On entry, DIFL(I) is the distance between I-th updated (undeflated) singular value and the I-th (undeflated) old singular value. DIFR (input) REAL array, dimension ( LDGNUM, 2 ). On entry, DIFR(I, 1) contains the distances between I-th updated (undeflated) singular value and the I+1-th (undeflated) old singular value. And DIFR(I, 2) is the normalizing factor for the I-th right singular vector. Z (input) REAL array, dimension ( K ) Contain the components of the deflation-adjusted updating row vector. K (input) INTEGER Contains the dimension of the non-deflated matrix, This is the order of the related secular equation. 1 <= K <=N. C (input) REAL C contains garbage if SQRE =0 and the C-value of a Givens rotation related to the right null space if SQRE = 1. S (input) REAL S contains garbage if SQRE =0 and the S-value of a Givens rotation related to the right null space if SQRE = 1. WORK (workspace) REAL array, dimension ( K ) INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. ===================================================================== Test the input parameters. Parameter adjustments */ b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; bx_dim1 = *ldbx; bx_offset = 1 + bx_dim1 * 1; bx -= bx_offset; --perm; givcol_dim1 = *ldgcol; givcol_offset = 1 + givcol_dim1 * 1; givcol -= givcol_offset; difr_dim1 = *ldgnum; difr_offset = 1 + difr_dim1 * 1; difr -= difr_offset; poles_dim1 = *ldgnum; poles_offset = 1 + poles_dim1 * 1; poles -= poles_offset; givnum_dim1 = *ldgnum; givnum_offset = 1 + givnum_dim1 * 1; givnum -= givnum_offset; --difl; --z__; --work; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*nl < 1) { *info = -2; } else if (*nr < 1) { *info = -3; } else if (*sqre < 0 || *sqre > 1) { *info = -4; } n = *nl + *nr + 1; if (*nrhs < 1) { *info = -5; } else if (*ldb < n) { *info = -7; } else if (*ldbx < n) { *info = -9; } else if (*givptr < 0) { *info = -11; } else if (*ldgcol < n) { *info = -13; } else if (*ldgnum < n) { *info = -15; } else if (*k < 1) { *info = -20; } if (*info != 0) { i__1 = -(*info); xerbla_("SLALS0", &i__1); return 0; } m = n + *sqre; nlp1 = *nl + 1; if (*icompq == 0) { /* Apply back orthogonal transformations from the left. Step (1L): apply back the Givens rotations performed. */ latime_1.ops += (real) (*nrhs * 6 * *givptr); i__1 = *givptr; for (i__ = 1; i__ <= i__1; ++i__) { srot_(nrhs, &b_ref(givcol_ref(i__, 2), 1), ldb, &b_ref(givcol_ref( i__, 1), 1), ldb, &givnum_ref(i__, 2), &givnum_ref(i__, 1) ); /* L10: */ } /* Step (2L): permute rows of B. */ scopy_(nrhs, &b_ref(nlp1, 1), ldb, &bx_ref(1, 1), ldbx); i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { scopy_(nrhs, &b_ref(perm[i__], 1), ldb, &bx_ref(i__, 1), ldbx); /* L20: */ } /* Step (3L): apply the inverse of the left singular vector matrix to BX. */ if (*k == 1) { scopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb); if (z__[1] < 0.f) { latime_1.ops += (real) (*nrhs); sscal_(nrhs, &c_b5, &b[b_offset], ldb); } } else { i__1 = *k; for (j = 1; j <= i__1; ++j) { diflj = difl[j]; dj = poles_ref(j, 1); dsigj = -poles_ref(j, 2); if (j < *k) { difrj = -difr_ref(j, 1); dsigjp = -poles_ref(j + 1, 2); } if (z__[j] == 0.f || poles_ref(j, 2) == 0.f) { work[j] = 0.f; } else { latime_1.ops += 4.f; work[j] = -poles_ref(j, 2) * z__[j] / diflj / (poles_ref( j, 2) + dj); } i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (z__[i__] == 0.f || poles_ref(i__, 2) == 0.f) { work[i__] = 0.f; } else { latime_1.ops += 6.f; work[i__] = poles_ref(i__, 2) * z__[i__] / (slamc3_(& poles_ref(i__, 2), &dsigj) - diflj) / ( poles_ref(i__, 2) + dj); } /* L30: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { if (z__[i__] == 0.f || poles_ref(i__, 2) == 0.f) { work[i__] = 0.f; } else { latime_1.ops += 6.f; work[i__] = poles_ref(i__, 2) * z__[i__] / (slamc3_(& poles_ref(i__, 2), &dsigjp) + difrj) / ( poles_ref(i__, 2) + dj); } /* L40: */ } work[1] = -1.f; latime_1.ops = latime_1.ops + (*k << 1) + *nrhs + sopbl2_( "SGEMV ", k, nrhs, &c__0, &c__0); temp = snrm2_(k, &work[1], &c__1); sgemv_("T", k, nrhs, &c_b14, &bx[bx_offset], ldbx, &work[1], & c__1, &c_b16, &b_ref(j, 1), ldb); slascl_("G", &c__0, &c__0, &temp, &c_b14, &c__1, nrhs, &b_ref( j, 1), ldb, info); /* L50: */ } } /* Move the deflated rows of BX to B also. */ if (*k < max(m,n)) { i__1 = n - *k; slacpy_("A", &i__1, nrhs, &bx_ref(*k + 1, 1), ldbx, &b_ref(*k + 1, 1), ldb); } } else { /* Apply back the right orthogonal transformations. Step (1R): apply back the new right singular vector matrix to B. */ if (*k == 1) { scopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx); } else { i__1 = *k; for (j = 1; j <= i__1; ++j) { dsigj = poles_ref(j, 2); if (z__[j] == 0.f) { work[j] = 0.f; } else { latime_1.ops += 4.f; work[j] = -z__[j] / difl[j] / (dsigj + poles_ref(j, 1)) / difr_ref(j, 2); } i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (z__[j] == 0.f) { work[i__] = 0.f; } else { latime_1.ops += 6.f; r__1 = -poles_ref(i__ + 1, 2); work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difr_ref(i__, 1)) / (dsigj + poles_ref(i__, 1) ) / difr_ref(i__, 2); } /* L60: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { if (z__[j] == 0.f) { work[i__] = 0.f; } else { latime_1.ops += 6.f; r__1 = -poles_ref(i__, 2); work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difl[ i__]) / (dsigj + poles_ref(i__, 1)) / difr_ref(i__, 2); } /* L70: */ } latime_1.ops += sopbl2_("SGEMV ", k, nrhs, &c__0, &c__0); sgemv_("T", k, nrhs, &c_b14, &b[b_offset], ldb, &work[1], & c__1, &c_b16, &bx_ref(j, 1), ldbx); /* L80: */ } } /* Step (2R): if SQRE = 1, apply back the rotation that is related to the right null space of the subproblem. */ if (*sqre == 1) { latime_1.ops += (real) (*nrhs * 6); scopy_(nrhs, &b_ref(m, 1), ldb, &bx_ref(m, 1), ldbx); srot_(nrhs, &bx_ref(1, 1), ldbx, &bx_ref(m, 1), ldbx, c__, s); } if (*k < max(m,n)) { i__1 = n - *k; slacpy_("A", &i__1, nrhs, &b_ref(*k + 1, 1), ldb, &bx_ref(*k + 1, 1), ldbx); } /* Step (3R): permute rows of B. */ scopy_(nrhs, &bx_ref(1, 1), ldbx, &b_ref(nlp1, 1), ldb); if (*sqre == 1) { scopy_(nrhs, &bx_ref(m, 1), ldbx, &b_ref(m, 1), ldb); } i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { scopy_(nrhs, &bx_ref(i__, 1), ldbx, &b_ref(perm[i__], 1), ldb); /* L90: */ } /* Step (4R): apply back the Givens rotations performed. */ latime_1.ops += (real) (*nrhs * 6 * *givptr); for (i__ = *givptr; i__ >= 1; --i__) { r__1 = -givnum_ref(i__, 1); srot_(nrhs, &b_ref(givcol_ref(i__, 2), 1), ldb, &b_ref(givcol_ref( i__, 1), 1), ldb, &givnum_ref(i__, 2), &r__1); /* L100: */ } } return 0; /* End of SLALS0 */ } /* slals0_ */
/* Subroutine */ int cgelsd_(integer *m, integer *n, integer *nrhs, complex * a, integer *lda, complex *b, integer *ldb, real *s, real *rcond, integer *rank, complex *work, integer *lwork, real *rwork, integer * iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; real r__1; complex q__1; /* Local variables */ static real anrm, bnrm; static integer itau, iascl, ibscl; static real sfmin; static integer minmn, maxmn, itaup, itauq, mnthr, nwork, ie, il; extern /* Subroutine */ int cgebrd_(integer *, integer *, complex *, integer *, real *, real *, complex *, complex *, complex *, integer *, integer *), slabad_(real *, real *); extern doublereal clange_(char *, integer *, integer *, complex *, integer *, real *); static integer mm; extern /* Subroutine */ int cgelqf_(integer *, integer *, complex *, integer *, complex *, complex *, integer *, integer *), clalsd_( char *, integer *, integer *, integer *, real *, real *, complex * , integer *, real *, integer *, complex *, real *, integer *, integer *), clascl_(char *, integer *, integer *, real *, real *, integer *, integer *, complex *, integer *, integer *), cgeqrf_(integer *, integer *, complex *, integer *, complex *, complex *, integer *, integer *); extern doublereal slamch_(char *); extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex *, integer *, complex *, integer *), claset_(char *, integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); static real bignum; extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, real *, integer *, integer *, real *, integer *, integer *), cunmbr_(char *, char *, char *, integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, integer *, integer *), slaset_( char *, integer *, integer *, real *, real *, real *, integer *), cunmlq_(char *, char *, integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, integer *, integer *); static integer ldwork; extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, integer *, integer *); static integer minwrk, maxwrk; static real smlnum; static logical lquery; static integer nrwork, smlsiz; static real eps; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1999 Purpose ======= CGELSD computes the minimum-norm solution to a real linear least squares problem: minimize 2-norm(| b - A*x |) using the singular value decomposition (SVD) of A. A is an M-by-N matrix which may be rank-deficient. Several right hand side vectors b and solution vectors x can be handled in a single call; they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix X. The problem is solved in three steps: (1) Reduce the coefficient matrix A to bidiagonal form with Householder tranformations, reducing the original problem into a "bidiagonal least squares problem" (BLS) (2) Solve the BLS using a divide and conquer approach. (3) Apply back all the Householder tranformations to solve the original least squares problem. The effective rank of A is determined by treating as zero those singular values which are less than RCOND times the largest singular value. The divide and conquer algorithm makes very mild assumptions about floating point arithmetic. It will work on machines with a guard digit in add/subtract, or on those binary machines without guard digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. It could conceivably fail on hexadecimal or decimal machines without guard digits, but we know of none. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0. A (input/output) COMPLEX array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, A has been destroyed. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). B (input/output) COMPLEX array, dimension (LDB,NRHS) On entry, the M-by-NRHS right hand side matrix B. On exit, B is overwritten by the N-by-NRHS solution matrix X. If m >= n and RANK = n, the residual sum-of-squares for the solution in the i-th column is given by the sum of squares of elements n+1:m in that column. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,M,N). S (output) REAL array, dimension (min(M,N)) The singular values of A in decreasing order. The condition number of A in the 2-norm = S(1)/S(min(m,n)). RCOND (input) REAL RCOND is used to determine the effective rank of A. Singular values S(i) <= RCOND*S(1) are treated as zero. If RCOND < 0, machine precision is used instead. RANK (output) INTEGER The effective rank of A, i.e., the number of singular values which are greater than RCOND*S(1). WORK (workspace/output) COMPLEX array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK must be at least 1. The exact minimum amount of workspace needed depends on M, N and NRHS. As long as LWORK is at least 2 * N + N * NRHS if M is greater than or equal to N or 2 * M + M * NRHS if M is less than N, the code will execute correctly. For good performance, LWORK should generally be larger. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. RWORK (workspace) REAL array, dimension at least 10*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + (SMLSIZ+1)**2 if M is greater than or equal to N or 10*M + 2*M*SMLSIZ + 8*M*NLVL + 3*SMLSIZ*NRHS + (SMLSIZ+1)**2 if M is less than N, the code will execute correctly. SMLSIZ is returned by ILAENV and is equal to the maximum size of the subproblems at the bottom of the computation tree (usually about 25), and NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) IWORK (workspace) INTEGER array, dimension (LIWORK) LIWORK >= 3 * MINMN * NLVL + 11 * MINMN, where MINMN = MIN( M,N ). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. > 0: the algorithm for computing the SVD failed to converge; if INFO = i, i off-diagonal elements of an intermediate bidiagonal form did not converge to zero. Further Details =============== Based on contributions by Ming Gu and Ren-Cang Li, Computer Science Division, University of California at Berkeley, USA Osni Marques, LBNL/NERSC, USA ===================================================================== Test the input arguments. Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --s; --work; --rwork; --iwork; /* Function Body */ *info = 0; minmn = min(*m,*n); maxmn = max(*m,*n); mnthr = ilaenv_(&c__6, "CGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, ( ftnlen)1); lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else if (*ldb < max(1,maxmn)) { *info = -7; } smlsiz = ilaenv_(&c__9, "CGELSD", " ", &c__0, &c__0, &c__0, &c__0, ( ftnlen)6, (ftnlen)1); /* Compute workspace. (Note: Comments in the code beginning "Workspace:" describe the minimal amount of workspace needed at that point in the code, as well as the preferred amount for good performance. NB refers to the optimal block size for the immediately following subroutine, as returned by ILAENV.) */ minwrk = 1; if (*info == 0) { maxwrk = 0; mm = *m; if (*m >= *n && *m >= mnthr) { /* Path 1a - overdetermined, with many more rows than columns. */ mm = *n; /* Computing MAX */ i__1 = maxwrk, i__2 = *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, & c_n1, &c_n1, (ftnlen)6, (ftnlen)1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *nrhs * ilaenv_(&c__1, "CUNMQR", "LC", m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2); maxwrk = max(i__1,i__2); } if (*m >= *n) { /* Path 1 - overdetermined or exactly determined. Computing MAX */ i__1 = maxwrk, i__2 = (*n << 1) + (mm + *n) * ilaenv_(&c__1, "CGEBRD", " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1) ; maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = (*n << 1) + *nrhs * ilaenv_(&c__1, "CUNMBR", "QLC", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, "CUN" "MBR", "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = (*n << 1) + *n * *nrhs; maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = (*n << 1) + mm, i__2 = (*n << 1) + *n * *nrhs; minwrk = max(i__1,i__2); } if (*n > *m) { if (*n >= mnthr) { /* Path 2a - underdetermined, with many more columns than rows. */ maxwrk = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * ilaenv_(&c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1, ( ftnlen)6, (ftnlen)1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * ilaenv_(& c__1, "CUNMBR", "QLC", m, nrhs, m, &c_n1, (ftnlen)6, ( ftnlen)3); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * ilaenv_(&c__1, "CUNMLQ", "LC", n, nrhs, m, &c_n1, ( ftnlen)6, (ftnlen)2); maxwrk = max(i__1,i__2); if (*nrhs > 1) { /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs; maxwrk = max(i__1,i__2); } else { /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 1); maxwrk = max(i__1,i__2); } /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *m * *nrhs; maxwrk = max(i__1,i__2); } else { /* Path 2 - underdetermined. */ maxwrk = (*m << 1) + (*n + *m) * ilaenv_(&c__1, "CGEBRD", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); /* Computing MAX */ i__1 = maxwrk, i__2 = (*m << 1) + *nrhs * ilaenv_(&c__1, "CUNMBR", "QLC", m, nrhs, m, &c_n1, (ftnlen)6, ( ftnlen)3); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, "CUNMBR" , "PLN", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)3); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = (*m << 1) + *m * *nrhs; maxwrk = max(i__1,i__2); } /* Computing MAX */ i__1 = (*m << 1) + *n, i__2 = (*m << 1) + *m * *nrhs; minwrk = max(i__1,i__2); } minwrk = min(minwrk,maxwrk); r__1 = (real) maxwrk; q__1.r = r__1, q__1.i = 0.f; work[1].r = q__1.r, work[1].i = q__1.i; if (*lwork < minwrk && ! lquery) { *info = -12; } } if (*info != 0) { i__1 = -(*info); xerbla_("CGELSD", &i__1); return 0; } else if (lquery) { goto L10; } /* Quick return if possible. */ if (*m == 0 || *n == 0) { *rank = 0; return 0; } /* Get machine parameters. */ eps = slamch_("P"); sfmin = slamch_("S"); smlnum = sfmin / eps; bignum = 1.f / smlnum; slabad_(&smlnum, &bignum); /* Scale A if max entry outside range [SMLNUM,BIGNUM]. */ anrm = clange_("M", m, n, &a[a_offset], lda, &rwork[1]); iascl = 0; if (anrm > 0.f && anrm < smlnum) { /* Scale matrix norm up to SMLNUM */ clascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, info); iascl = 1; } else if (anrm > bignum) { /* Scale matrix norm down to BIGNUM. */ clascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, info); iascl = 2; } else if (anrm == 0.f) { /* Matrix all zero. Return zero solution. */ i__1 = max(*m,*n); claset_("F", &i__1, nrhs, &c_b1, &c_b1, &b[b_offset], ldb); slaset_("F", &minmn, &c__1, &c_b81, &c_b81, &s[1], &c__1); *rank = 0; goto L10; } /* Scale B if max entry outside range [SMLNUM,BIGNUM]. */ bnrm = clange_("M", m, nrhs, &b[b_offset], ldb, &rwork[1]); ibscl = 0; if (bnrm > 0.f && bnrm < smlnum) { /* Scale matrix norm up to SMLNUM. */ clascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, info); ibscl = 1; } else if (bnrm > bignum) { /* Scale matrix norm down to BIGNUM. */ clascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, info); ibscl = 2; } /* If M < N make sure B(M+1:N,:) = 0 */ if (*m < *n) { i__1 = *n - *m; claset_("F", &i__1, nrhs, &c_b1, &c_b1, &b_ref(*m + 1, 1), ldb); } /* Overdetermined case. */ if (*m >= *n) { /* Path 1 - overdetermined or exactly determined. */ mm = *m; if (*m >= mnthr) { /* Path 1a - overdetermined, with many more rows than columns */ mm = *n; itau = 1; nwork = itau + *n; /* Compute A=Q*R. (RWorkspace: need N) (CWorkspace: need N, prefer N*NB) */ i__1 = *lwork - nwork + 1; cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, info); /* Multiply B by transpose(Q). (RWorkspace: need N) (CWorkspace: need NRHS, prefer NRHS*NB) */ i__1 = *lwork - nwork + 1; cunmqr_("L", "C", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[ b_offset], ldb, &work[nwork], &i__1, info); /* Zero out below R. */ if (*n > 1) { i__1 = *n - 1; i__2 = *n - 1; claset_("L", &i__1, &i__2, &c_b1, &c_b1, &a_ref(2, 1), lda); } } itauq = 1; itaup = itauq + *n; nwork = itaup + *n; ie = 1; nrwork = ie + *n; /* Bidiagonalize R in A. (RWorkspace: need N) (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB) */ i__1 = *lwork - nwork + 1; cgebrd_(&mm, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], & work[itaup], &work[nwork], &i__1, info); /* Multiply B by transpose of left bidiagonalizing vectors of R. (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB) */ i__1 = *lwork - nwork + 1; cunmbr_("Q", "L", "C", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], &b[b_offset], ldb, &work[nwork], &i__1, info); /* Solve the bidiagonal least squares problem. */ clalsd_("U", &smlsiz, n, nrhs, &s[1], &rwork[ie], &b[b_offset], ldb, rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1], info); if (*info != 0) { goto L10; } /* Multiply B by right bidiagonalizing vectors of R. */ i__1 = *lwork - nwork + 1; cunmbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], & b[b_offset], ldb, &work[nwork], &i__1, info); } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1,i__2), i__1 = max( i__1,*nrhs), i__2 = *n - *m * 3; if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,i__2)) { /* Path 2a - underdetermined, with many more columns than rows and sufficient workspace for an efficient algorithm. */ ldwork = *m; /* Computing MAX Computing MAX */ i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = max(i__3,*nrhs), i__4 = *n - *m * 3; i__1 = (*m << 2) + *m * *lda + max(i__3,i__4), i__2 = *m * *lda + *m + *m * *nrhs; if (*lwork >= max(i__1,i__2)) { ldwork = *lda; } itau = 1; nwork = *m + 1; /* Compute A=L*Q. (CWorkspace: need 2*M, prefer M+M*NB) */ i__1 = *lwork - nwork + 1; cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, info); il = nwork; /* Copy L to WORK(IL), zeroing out above its diagonal. */ clacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork); i__1 = *m - 1; i__2 = *m - 1; claset_("U", &i__1, &i__2, &c_b1, &c_b1, &work[il + ldwork], & ldwork); itauq = il + ldwork * *m; itaup = itauq + *m; nwork = itaup + *m; ie = 1; nrwork = ie + *m; /* Bidiagonalize L in WORK(IL). (RWorkspace: need M) (CWorkspace: need M*M+4*M, prefer M*M+4*M+2*M*NB) */ i__1 = *lwork - nwork + 1; cgebrd_(m, m, &work[il], &ldwork, &s[1], &rwork[ie], &work[itauq], &work[itaup], &work[nwork], &i__1, info); /* Multiply B by transpose of left bidiagonalizing vectors of L. (CWorkspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) */ i__1 = *lwork - nwork + 1; cunmbr_("Q", "L", "C", m, nrhs, m, &work[il], &ldwork, &work[ itauq], &b[b_offset], ldb, &work[nwork], &i__1, info); /* Solve the bidiagonal least squares problem. */ clalsd_("U", &smlsiz, m, nrhs, &s[1], &rwork[ie], &b[b_offset], ldb, rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1], info); if (*info != 0) { goto L10; } /* Multiply B by right bidiagonalizing vectors of L. */ i__1 = *lwork - nwork + 1; cunmbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[ itaup], &b[b_offset], ldb, &work[nwork], &i__1, info); /* Zero out below first M rows of B. */ i__1 = *n - *m; claset_("F", &i__1, nrhs, &c_b1, &c_b1, &b_ref(*m + 1, 1), ldb); nwork = itau + *m; /* Multiply transpose(Q) by B. (CWorkspace: need NRHS, prefer NRHS*NB) */ i__1 = *lwork - nwork + 1; cunmlq_("L", "C", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[ b_offset], ldb, &work[nwork], &i__1, info); } else { /* Path 2 - remaining underdetermined cases. */ itauq = 1; itaup = itauq + *m; nwork = itaup + *m; ie = 1; nrwork = ie + *m; /* Bidiagonalize A. (RWorkspace: need M) (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) */ i__1 = *lwork - nwork + 1; cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], &work[itaup], &work[nwork], &i__1, info); /* Multiply B by transpose of left bidiagonalizing vectors. (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB) */ i__1 = *lwork - nwork + 1; cunmbr_("Q", "L", "C", m, nrhs, n, &a[a_offset], lda, &work[itauq] , &b[b_offset], ldb, &work[nwork], &i__1, info); /* Solve the bidiagonal least squares problem. */ clalsd_("L", &smlsiz, m, nrhs, &s[1], &rwork[ie], &b[b_offset], ldb, rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1], info); if (*info != 0) { goto L10; } /* Multiply B by right bidiagonalizing vectors of A. */ i__1 = *lwork - nwork + 1; cunmbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup] , &b[b_offset], ldb, &work[nwork], &i__1, info); } } /* Undo scaling. */ if (iascl == 1) { clascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, info); slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & minmn, info); } else if (iascl == 2) { clascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, info); slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & minmn, info); } if (ibscl == 1) { clascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, info); } else if (ibscl == 2) { clascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, info); } L10: r__1 = (real) maxwrk; q__1.r = r__1, q__1.i = 0.f; work[1].r = q__1.r, work[1].i = q__1.i; return 0; /* End of CGELSD */ } /* cgelsd_ */
/* Subroutine */ HYPRE_Int dlacpy_(char *uplo, integer *m, integer *n, doublereal * a, integer *lda, doublereal *b, integer *ldb) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= DLACPY copies all or part of a two-dimensional matrix A to another matrix B. Arguments ========= UPLO (input) CHARACTER*1 Specifies the part of the matrix A to be copied to B. = 'U': Upper triangular part = 'L': Lower triangular part Otherwise: All of the matrix A M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. A (input) DOUBLE PRECISION array, dimension (LDA,N) The m by n matrix A. If UPLO = 'U', only the upper triangle or trapezoid is accessed; if UPLO = 'L', only the lower triangle or trapezoid is accessed. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). B (output) DOUBLE PRECISION array, dimension (LDB,N) On exit, B = A in the locations specified by UPLO. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,M). ===================================================================== Parameter adjustments */ /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; /* Local variables */ static integer i__, j; extern logical lsame_(char *, char *); #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; /* Function Body */ if (lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = min(j,*m); for (i__ = 1; i__ <= i__2; ++i__) { b_ref(i__, j) = a_ref(i__, j); /* L10: */ } /* L20: */ } } else if (lsame_(uplo, "L")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = j; i__ <= i__2; ++i__) { b_ref(i__, j) = a_ref(i__, j); /* L30: */ } /* L40: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b_ref(i__, j) = a_ref(i__, j); /* L50: */ } /* L60: */ } } return 0; /* End of DLACPY */ } /* dlacpy_ */
/* Subroutine */ int sgegv_(char *jobvl, char *jobvr, integer *n, real *a, integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real *beta, real *vl, integer *ldvl, real *vr, integer *ldvr, real *work, integer *lwork, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= This routine is deprecated and has been replaced by routine SGGEV. SGEGV computes for a pair of n-by-n real nonsymmetric matrices A and B, the generalized eigenvalues (alphar +/- alphai*i, beta), and optionally, the left and/or right generalized eigenvectors (VL and VR). A generalized eigenvalue for a pair of matrices (A,B) is, roughly speaking, a scalar w or a ratio alpha/beta = w, such that A - w*B is singular. It is usually represented as the pair (alpha,beta), as there is a reasonable interpretation for beta=0, and even for both being zero. A good beginning reference is the book, "Matrix Computations", by G. Golub & C. van Loan (Johns Hopkins U. Press) A right generalized eigenvector corresponding to a generalized eigenvalue w for a pair of matrices (A,B) is a vector r such that (A - w B) r = 0 . A left generalized eigenvector is a vector l such that l**H * (A - w B) = 0, where l**H is the conjugate-transpose of l. Note: this routine performs "full balancing" on A and B -- see "Further Details", below. Arguments ========= JOBVL (input) CHARACTER*1 = 'N': do not compute the left generalized eigenvectors; = 'V': compute the left generalized eigenvectors. JOBVR (input) CHARACTER*1 = 'N': do not compute the right generalized eigenvectors; = 'V': compute the right generalized eigenvectors. N (input) INTEGER The order of the matrices A, B, VL, and VR. N >= 0. A (input/output) REAL array, dimension (LDA, N) On entry, the first of the pair of matrices whose generalized eigenvalues and (optionally) generalized eigenvectors are to be computed. On exit, the contents will have been destroyed. (For a description of the contents of A on exit, see "Further Details", below.) LDA (input) INTEGER The leading dimension of A. LDA >= max(1,N). B (input/output) REAL array, dimension (LDB, N) On entry, the second of the pair of matrices whose generalized eigenvalues and (optionally) generalized eigenvectors are to be computed. On exit, the contents will have been destroyed. (For a description of the contents of B on exit, see "Further Details", below.) LDB (input) INTEGER The leading dimension of B. LDB >= max(1,N). ALPHAR (output) REAL array, dimension (N) ALPHAI (output) REAL array, dimension (N) BETA (output) REAL array, dimension (N) On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will be the generalized eigenvalues. If ALPHAI(j) is zero, then the j-th eigenvalue is real; if positive, then the j-th and (j+1)-st eigenvalues are a complex conjugate pair, with ALPHAI(j+1) negative. Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) may easily over- or underflow, and BETA(j) may even be zero. Thus, the user should avoid naively computing the ratio alpha/beta. However, ALPHAR and ALPHAI will be always less than and usually comparable with norm(A) in magnitude, and BETA always less than and usually comparable with norm(B). VL (output) REAL array, dimension (LDVL,N) If JOBVL = 'V', the left generalized eigenvectors. (See "Purpose", above.) Real eigenvectors take one column, complex take two columns, the first for the real part and the second for the imaginary part. Complex eigenvectors correspond to an eigenvalue with positive imaginary part. Each eigenvector will be scaled so the largest component will have abs(real part) + abs(imag. part) = 1, *except* that for eigenvalues with alpha=beta=0, a zero vector will be returned as the corresponding eigenvector. Not referenced if JOBVL = 'N'. LDVL (input) INTEGER The leading dimension of the matrix VL. LDVL >= 1, and if JOBVL = 'V', LDVL >= N. VR (output) REAL array, dimension (LDVR,N) If JOBVR = 'V', the right generalized eigenvectors. (See "Purpose", above.) Real eigenvectors take one column, complex take two columns, the first for the real part and the second for the imaginary part. Complex eigenvectors correspond to an eigenvalue with positive imaginary part. Each eigenvector will be scaled so the largest component will have abs(real part) + abs(imag. part) = 1, *except* that for eigenvalues with alpha=beta=0, a zero vector will be returned as the corresponding eigenvector. Not referenced if JOBVR = 'N'. LDVR (input) INTEGER The leading dimension of the matrix VR. LDVR >= 1, and if JOBVR = 'V', LDVR >= N. WORK (workspace/output) REAL array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,8*N). For good performance, LWORK must generally be larger. To compute the optimal value of LWORK, call ILAENV to get blocksizes (for SGEQRF, SORMQR, and SORGQR.) Then compute: NB -- MAX of the blocksizes for SGEQRF, SORMQR, and SORGQR; The optimal LWORK is: 2*N + MAX( 6*N, N*(NB+1) ). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. = 1,...,N: The QZ iteration failed. No eigenvectors have been calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) should be correct for j=INFO+1,...,N. > N: errors that usually indicate LAPACK problems: =N+1: error return from SGGBAL =N+2: error return from SGEQRF =N+3: error return from SORMQR =N+4: error return from SORGQR =N+5: error return from SGGHRD =N+6: error return from SHGEQZ (other than failed iteration) =N+7: error return from STGEVC =N+8: error return from SGGBAK (computing VL) =N+9: error return from SGGBAK (computing VR) =N+10: error return from SLASCL (various calls) Further Details =============== Balancing --------- This driver calls SGGBAL to both permute and scale rows and columns of A and B. The permutations PL and PR are chosen so that PL*A*PR and PL*B*R will be upper triangular except for the diagonal blocks A(i:j,i:j) and B(i:j,i:j), with i and j as close together as possible. The diagonal scaling matrices DL and DR are chosen so that the pair DL*PL*A*PR*DR, DL*PL*B*PR*DR have elements close to one (except for the elements that start out zero.) After the eigenvalues and eigenvectors of the balanced matrices have been computed, SGGBAK transforms the eigenvectors back to what they would have been (in perfect arithmetic) if they had not been balanced. Contents of A and B on Exit -------- -- - --- - -- ---- If any eigenvectors are computed (either JOBVL='V' or JOBVR='V' or both), then on exit the arrays A and B will contain the real Schur form[*] of the "balanced" versions of A and B. If no eigenvectors are computed, then only the diagonal blocks will be correct. [*] See SHGEQZ, SGEGS, or read the book "Matrix Computations", by Golub & van Loan, pub. by Johns Hopkins U. Press. ===================================================================== Decode the input arguments Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static real c_b27 = 1.f; static real c_b38 = 0.f; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2; real r__1, r__2, r__3, r__4; /* Local variables */ static real absb, anrm, bnrm; static integer itau; static real temp; static logical ilvl, ilvr; static integer lopt; static real anrm1, anrm2, bnrm1, bnrm2, absai, scale, absar, sbeta; extern logical lsame_(char *, char *); static integer ileft, iinfo, icols, iwork, irows, jc, nb, in, jr; static real salfai; extern /* Subroutine */ int sggbak_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, integer * ), sggbal_(char *, integer *, real *, integer *, real *, integer *, integer *, integer *, real *, real *, real *, integer *); static real salfar; extern doublereal slamch_(char *), slange_(char *, integer *, integer *, real *, integer *, real *); static real safmin; extern /* Subroutine */ int sgghrd_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, integer *, real *, integer * , real *, integer *, integer *); static real safmax; static char chtemp[1]; static logical ldumma[1]; extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, real *, integer *, integer *, real *, integer *, integer *), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); static integer ijobvl, iright; static logical ilimit; extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer *, real *, real *, integer *, integer *); static integer ijobvr; extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, real *, integer *), stgevc_( char *, char *, logical *, integer *, real *, integer *, real *, integer *, real *, integer *, real *, integer *, integer *, integer *, real *, integer *); static real onepls; static integer lwkmin, nb1, nb2, nb3; extern /* Subroutine */ int shgeqz_(char *, char *, char *, integer *, integer *, integer *, real *, integer *, real *, integer *, real * , real *, real *, real *, integer *, real *, integer *, real *, integer *, integer *), sorgqr_(integer *, integer *, integer *, real *, integer *, real *, real *, integer * , integer *); static integer lwkopt; static logical lquery; extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, integer *, integer *); static integer ihi, ilo; static real eps; static logical ilv; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define vl_ref(a_1,a_2) vl[(a_2)*vl_dim1 + a_1] #define vr_ref(a_1,a_2) vr[(a_2)*vr_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --alphar; --alphai; --beta; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1 * 1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1 * 1; vr -= vr_offset; --work; /* Function Body */ if (lsame_(jobvl, "N")) { ijobvl = 1; ilvl = FALSE_; } else if (lsame_(jobvl, "V")) { ijobvl = 2; ilvl = TRUE_; } else { ijobvl = -1; ilvl = FALSE_; } if (lsame_(jobvr, "N")) { ijobvr = 1; ilvr = FALSE_; } else if (lsame_(jobvr, "V")) { ijobvr = 2; ilvr = TRUE_; } else { ijobvr = -1; ilvr = FALSE_; } ilv = ilvl || ilvr; /* Test the input arguments Computing MAX */ i__1 = *n << 3; lwkmin = max(i__1,1); lwkopt = lwkmin; work[1] = (real) lwkopt; lquery = *lwork == -1; *info = 0; if (ijobvl <= 0) { *info = -1; } else if (ijobvr <= 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } else if (*ldvl < 1 || ilvl && *ldvl < *n) { *info = -12; } else if (*ldvr < 1 || ilvr && *ldvr < *n) { *info = -14; } else if (*lwork < lwkmin && ! lquery) { *info = -16; } if (*info == 0) { nb1 = ilaenv_(&c__1, "SGEQRF", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( ftnlen)1); nb2 = ilaenv_(&c__1, "SORMQR", " ", n, n, n, &c_n1, (ftnlen)6, ( ftnlen)1); nb3 = ilaenv_(&c__1, "SORGQR", " ", n, n, n, &c_n1, (ftnlen)6, ( ftnlen)1); /* Computing MAX */ i__1 = max(nb1,nb2); nb = max(i__1,nb3); /* Computing MAX */ i__1 = *n * 6, i__2 = *n * (nb + 1); lopt = (*n << 1) + max(i__1,i__2); work[1] = (real) lopt; } if (*info != 0) { i__1 = -(*info); xerbla_("SGEGV ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Get machine constants */ eps = slamch_("E") * slamch_("B"); safmin = slamch_("S"); safmin += safmin; safmax = 1.f / safmin; onepls = eps * 4 + 1.f; /* Scale A */ anrm = slange_("M", n, n, &a[a_offset], lda, &work[1]); anrm1 = anrm; anrm2 = 1.f; if (anrm < 1.f) { if (safmax * anrm < 1.f) { anrm1 = safmin; anrm2 = safmax * anrm; } } if (anrm > 0.f) { slascl_("G", &c_n1, &c_n1, &anrm, &c_b27, n, n, &a[a_offset], lda, & iinfo); if (iinfo != 0) { *info = *n + 10; return 0; } } /* Scale B */ bnrm = slange_("M", n, n, &b[b_offset], ldb, &work[1]); bnrm1 = bnrm; bnrm2 = 1.f; if (bnrm < 1.f) { if (safmax * bnrm < 1.f) { bnrm1 = safmin; bnrm2 = safmax * bnrm; } } if (bnrm > 0.f) { slascl_("G", &c_n1, &c_n1, &bnrm, &c_b27, n, n, &b[b_offset], ldb, & iinfo); if (iinfo != 0) { *info = *n + 10; return 0; } } /* Permute the matrix to make it more nearly triangular Workspace layout: (8*N words -- "work" requires 6*N words) left_permutation, right_permutation, work... */ ileft = 1; iright = *n + 1; iwork = iright + *n; sggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[ ileft], &work[iright], &work[iwork], &iinfo); if (iinfo != 0) { *info = *n + 1; goto L120; } /* Reduce B to triangular form, and initialize VL and/or VR Workspace layout: ("work..." must have at least N words) left_permutation, right_permutation, tau, work... */ irows = ihi + 1 - ilo; if (ilv) { icols = *n + 1 - ilo; } else { icols = irows; } itau = iwork; iwork = itau + irows; i__1 = *lwork + 1 - iwork; sgeqrf_(&irows, &icols, &b_ref(ilo, ilo), ldb, &work[itau], &work[iwork], &i__1, &iinfo); if (iinfo >= 0) { /* Computing MAX */ i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; lwkopt = max(i__1,i__2); } if (iinfo != 0) { *info = *n + 2; goto L120; } i__1 = *lwork + 1 - iwork; sormqr_("L", "T", &irows, &icols, &irows, &b_ref(ilo, ilo), ldb, &work[ itau], &a_ref(ilo, ilo), lda, &work[iwork], &i__1, &iinfo); if (iinfo >= 0) { /* Computing MAX */ i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; lwkopt = max(i__1,i__2); } if (iinfo != 0) { *info = *n + 3; goto L120; } if (ilvl) { slaset_("Full", n, n, &c_b38, &c_b27, &vl[vl_offset], ldvl) ; i__1 = irows - 1; i__2 = irows - 1; slacpy_("L", &i__1, &i__2, &b_ref(ilo + 1, ilo), ldb, &vl_ref(ilo + 1, ilo), ldvl); i__1 = *lwork + 1 - iwork; sorgqr_(&irows, &irows, &irows, &vl_ref(ilo, ilo), ldvl, &work[itau], &work[iwork], &i__1, &iinfo); if (iinfo >= 0) { /* Computing MAX */ i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; lwkopt = max(i__1,i__2); } if (iinfo != 0) { *info = *n + 4; goto L120; } } if (ilvr) { slaset_("Full", n, n, &c_b38, &c_b27, &vr[vr_offset], ldvr) ; } /* Reduce to generalized Hessenberg form */ if (ilv) { /* Eigenvectors requested -- work on whole matrix. */ sgghrd_(jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &iinfo); } else { sgghrd_("N", "N", &irows, &c__1, &irows, &a_ref(ilo, ilo), lda, & b_ref(ilo, ilo), ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &iinfo); } if (iinfo != 0) { *info = *n + 5; goto L120; } /* Perform QZ algorithm Workspace layout: ("work..." must have at least 1 word) left_permutation, right_permutation, work... */ iwork = itau; if (ilv) { *(unsigned char *)chtemp = 'S'; } else { *(unsigned char *)chtemp = 'E'; } i__1 = *lwork + 1 - iwork; shgeqz_(chtemp, jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[ b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &work[iwork], &i__1, &iinfo); if (iinfo >= 0) { /* Computing MAX */ i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; lwkopt = max(i__1,i__2); } if (iinfo != 0) { if (iinfo > 0 && iinfo <= *n) { *info = iinfo; } else if (iinfo > *n && iinfo <= *n << 1) { *info = iinfo - *n; } else { *info = *n + 6; } goto L120; } if (ilv) { /* Compute Eigenvectors (STGEVC requires 6*N words of workspace) */ if (ilvl) { if (ilvr) { *(unsigned char *)chtemp = 'B'; } else { *(unsigned char *)chtemp = 'L'; } } else { *(unsigned char *)chtemp = 'R'; } stgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, &work[ iwork], &iinfo); if (iinfo != 0) { *info = *n + 7; goto L120; } /* Undo balancing on VL and VR, rescale */ if (ilvl) { sggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, & vl[vl_offset], ldvl, &iinfo); if (iinfo != 0) { *info = *n + 8; goto L120; } i__1 = *n; for (jc = 1; jc <= i__1; ++jc) { if (alphai[jc] < 0.f) { goto L50; } temp = 0.f; if (alphai[jc] == 0.f) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { /* Computing MAX */ r__2 = temp, r__3 = (r__1 = vl_ref(jr, jc), dabs(r__1) ); temp = dmax(r__2,r__3); /* L10: */ } } else { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { /* Computing MAX */ r__3 = temp, r__4 = (r__1 = vl_ref(jr, jc), dabs(r__1) ) + (r__2 = vl_ref(jr, jc + 1), dabs(r__2)); temp = dmax(r__3,r__4); /* L20: */ } } if (temp < safmin) { goto L50; } temp = 1.f / temp; if (alphai[jc] == 0.f) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { vl_ref(jr, jc) = vl_ref(jr, jc) * temp; /* L30: */ } } else { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { vl_ref(jr, jc) = vl_ref(jr, jc) * temp; vl_ref(jr, jc + 1) = vl_ref(jr, jc + 1) * temp; /* L40: */ } } L50: ; } } if (ilvr) { sggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, & vr[vr_offset], ldvr, &iinfo); if (iinfo != 0) { *info = *n + 9; goto L120; } i__1 = *n; for (jc = 1; jc <= i__1; ++jc) { if (alphai[jc] < 0.f) { goto L100; } temp = 0.f; if (alphai[jc] == 0.f) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { /* Computing MAX */ r__2 = temp, r__3 = (r__1 = vr_ref(jr, jc), dabs(r__1) ); temp = dmax(r__2,r__3); /* L60: */ } } else { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { /* Computing MAX */ r__3 = temp, r__4 = (r__1 = vr_ref(jr, jc), dabs(r__1) ) + (r__2 = vr_ref(jr, jc + 1), dabs(r__2)); temp = dmax(r__3,r__4); /* L70: */ } } if (temp < safmin) { goto L100; } temp = 1.f / temp; if (alphai[jc] == 0.f) { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { vr_ref(jr, jc) = vr_ref(jr, jc) * temp; /* L80: */ } } else { i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { vr_ref(jr, jc) = vr_ref(jr, jc) * temp; vr_ref(jr, jc + 1) = vr_ref(jr, jc + 1) * temp; /* L90: */ } } L100: ; } } /* End of eigenvector calculation */ } /* Undo scaling in alpha, beta Note: this does not give the alpha and beta for the unscaled problem. Un-scaling is limited to avoid underflow in alpha and beta if they are significant. */ i__1 = *n; for (jc = 1; jc <= i__1; ++jc) { absar = (r__1 = alphar[jc], dabs(r__1)); absai = (r__1 = alphai[jc], dabs(r__1)); absb = (r__1 = beta[jc], dabs(r__1)); salfar = anrm * alphar[jc]; salfai = anrm * alphai[jc]; sbeta = bnrm * beta[jc]; ilimit = FALSE_; scale = 1.f; /* Check for significant underflow in ALPHAI Computing MAX */ r__1 = safmin, r__2 = eps * absar, r__1 = max(r__1,r__2), r__2 = eps * absb; if (dabs(salfai) < safmin && absai >= dmax(r__1,r__2)) { ilimit = TRUE_; /* Computing MAX */ r__1 = onepls * safmin, r__2 = anrm2 * absai; scale = onepls * safmin / anrm1 / dmax(r__1,r__2); } else if (salfai == 0.f) { /* If insignificant underflow in ALPHAI, then make the conjugate eigenvalue real. */ if (alphai[jc] < 0.f && jc > 1) { alphai[jc - 1] = 0.f; } else if (alphai[jc] > 0.f && jc < *n) { alphai[jc + 1] = 0.f; } } /* Check for significant underflow in ALPHAR Computing MAX */ r__1 = safmin, r__2 = eps * absai, r__1 = max(r__1,r__2), r__2 = eps * absb; if (dabs(salfar) < safmin && absar >= dmax(r__1,r__2)) { ilimit = TRUE_; /* Computing MAX Computing MAX */ r__3 = onepls * safmin, r__4 = anrm2 * absar; r__1 = scale, r__2 = onepls * safmin / anrm1 / dmax(r__3,r__4); scale = dmax(r__1,r__2); } /* Check for significant underflow in BETA Computing MAX */ r__1 = safmin, r__2 = eps * absar, r__1 = max(r__1,r__2), r__2 = eps * absai; if (dabs(sbeta) < safmin && absb >= dmax(r__1,r__2)) { ilimit = TRUE_; /* Computing MAX Computing MAX */ r__3 = onepls * safmin, r__4 = bnrm2 * absb; r__1 = scale, r__2 = onepls * safmin / bnrm1 / dmax(r__3,r__4); scale = dmax(r__1,r__2); } /* Check for possible overflow when limiting scaling */ if (ilimit) { /* Computing MAX */ r__1 = dabs(salfar), r__2 = dabs(salfai), r__1 = max(r__1,r__2), r__2 = dabs(sbeta); temp = scale * safmin * dmax(r__1,r__2); if (temp > 1.f) { scale /= temp; } if (scale < 1.f) { ilimit = FALSE_; } } /* Recompute un-scaled ALPHAR, ALPHAI, BETA if necessary. */ if (ilimit) { salfar = scale * alphar[jc] * anrm; salfai = scale * alphai[jc] * anrm; sbeta = scale * beta[jc] * bnrm; } alphar[jc] = salfar; alphai[jc] = salfai; beta[jc] = sbeta; /* L110: */ } L120: work[1] = (real) lwkopt; return 0; /* End of SGEGV */ } /* sgegv_ */
/* Subroutine */ int zggev_(char *jobvl, char *jobvr, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *alpha, doublecomplex *beta, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, doublecomplex *work, integer *lwork, doublereal *rwork, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= ZGGEV computes for a pair of N-by-N complex nonsymmetric matrices (A,B), the generalized eigenvalues, and optionally, the left and/or right generalized eigenvectors. A generalized eigenvalue for a pair of matrices (A,B) is a scalar lambda or a ratio alpha/beta = lambda, such that A - lambda*B is singular. It is usually represented as the pair (alpha,beta), as there is a reasonable interpretation for beta=0, and even for both being zero. The right generalized eigenvector v(j) corresponding to the generalized eigenvalue lambda(j) of (A,B) satisfies A * v(j) = lambda(j) * B * v(j). The left generalized eigenvector u(j) corresponding to the generalized eigenvalues lambda(j) of (A,B) satisfies u(j)**H * A = lambda(j) * u(j)**H * B where u(j)**H is the conjugate-transpose of u(j). Arguments ========= JOBVL (input) CHARACTER*1 = 'N': do not compute the left generalized eigenvectors; = 'V': compute the left generalized eigenvectors. JOBVR (input) CHARACTER*1 = 'N': do not compute the right generalized eigenvectors; = 'V': compute the right generalized eigenvectors. N (input) INTEGER The order of the matrices A, B, VL, and VR. N >= 0. A (input/output) COMPLEX*16 array, dimension (LDA, N) On entry, the matrix A in the pair (A,B). On exit, A has been overwritten. LDA (input) INTEGER The leading dimension of A. LDA >= max(1,N). B (input/output) COMPLEX*16 array, dimension (LDB, N) On entry, the matrix B in the pair (A,B). On exit, B has been overwritten. LDB (input) INTEGER The leading dimension of B. LDB >= max(1,N). ALPHA (output) COMPLEX*16 array, dimension (N) BETA (output) COMPLEX*16 array, dimension (N) On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the generalized eigenvalues. Note: the quotients ALPHA(j)/BETA(j) may easily over- or underflow, and BETA(j) may even be zero. Thus, the user should avoid naively computing the ratio alpha/beta. However, ALPHA will be always less than and usually comparable with norm(A) in magnitude, and BETA always less than and usually comparable with norm(B). VL (output) COMPLEX*16 array, dimension (LDVL,N) If JOBVL = 'V', the left generalized eigenvectors u(j) are stored one after another in the columns of VL, in the same order as their eigenvalues. Each eigenvector will be scaled so the largest component will have abs(real part) + abs(imag. part) = 1. Not referenced if JOBVL = 'N'. LDVL (input) INTEGER The leading dimension of the matrix VL. LDVL >= 1, and if JOBVL = 'V', LDVL >= N. VR (output) COMPLEX*16 array, dimension (LDVR,N) If JOBVR = 'V', the right generalized eigenvectors v(j) are stored one after another in the columns of VR, in the same order as their eigenvalues. Each eigenvector will be scaled so the largest component will have abs(real part) + abs(imag. part) = 1. Not referenced if JOBVR = 'N'. LDVR (input) INTEGER The leading dimension of the matrix VR. LDVR >= 1, and if JOBVR = 'V', LDVR >= N. WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,2*N). For good performance, LWORK must generally be larger. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. RWORK (workspace/output) DOUBLE PRECISION array, dimension (8*N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. =1,...,N: The QZ iteration failed. No eigenvectors have been calculated, but ALPHA(j) and BETA(j) should be correct for j=INFO+1,...,N. > N: =N+1: other then QZ iteration failed in DHGEQZ, =N+2: error return from DTGEVC. ===================================================================== Decode the input arguments Parameter adjustments */ /* Table of constant values */ static doublecomplex c_b1 = {0.,0.}; static doublecomplex c_b2 = {1.,0.}; static integer c__1 = 1; static integer c__0 = 0; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2, i__3, i__4; doublereal d__1, d__2, d__3, d__4; doublecomplex z__1; /* Builtin functions */ double sqrt(doublereal), d_imag(doublecomplex *); /* Local variables */ static doublereal anrm, bnrm; static integer ierr, itau; static doublereal temp; static logical ilvl, ilvr; static integer iwrk; extern logical lsame_(char *, char *); static integer ileft, icols, irwrk, irows; extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); static integer jc, in; extern doublereal dlamch_(char *); static integer jr; extern /* Subroutine */ int zggbak_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublecomplex *, integer *, integer *), zggbal_(char *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer * , integer *, doublereal *, doublereal *, doublereal *, integer *); static logical ilascl, ilbscl; extern /* Subroutine */ int xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); static logical ldumma[1]; static char chtemp[1]; static doublereal bignum; extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, integer *, doublereal *); static integer ijobvl, iright; extern /* Subroutine */ int zgghrd_(char *, char *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer * ), zlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublecomplex *, integer *, integer *); static integer ijobvr; extern /* Subroutine */ int zgeqrf_(integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, integer * ); static doublereal anrmto; static integer lwkmin; static doublereal bnrmto; extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *), zlaset_(char *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, integer *), ztgevc_( char *, char *, logical *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, doublecomplex *, doublereal *, integer *), zhgeqz_(char *, char *, char *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublereal *, integer *); static doublereal smlnum; static integer lwkopt; static logical lquery; extern /* Subroutine */ int zungqr_(integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, integer *), zunmqr_(char *, char *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, integer *); static integer ihi, ilo; static doublereal eps; static logical ilv; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define vl_subscr(a_1,a_2) (a_2)*vl_dim1 + a_1 #define vl_ref(a_1,a_2) vl[vl_subscr(a_1,a_2)] #define vr_subscr(a_1,a_2) (a_2)*vr_dim1 + a_1 #define vr_ref(a_1,a_2) vr[vr_subscr(a_1,a_2)] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --alpha; --beta; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1 * 1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1 * 1; vr -= vr_offset; --work; --rwork; /* Function Body */ if (lsame_(jobvl, "N")) { ijobvl = 1; ilvl = FALSE_; } else if (lsame_(jobvl, "V")) { ijobvl = 2; ilvl = TRUE_; } else { ijobvl = -1; ilvl = FALSE_; } if (lsame_(jobvr, "N")) { ijobvr = 1; ilvr = FALSE_; } else if (lsame_(jobvr, "V")) { ijobvr = 2; ilvr = TRUE_; } else { ijobvr = -1; ilvr = FALSE_; } ilv = ilvl || ilvr; /* Test the input arguments */ *info = 0; lquery = *lwork == -1; if (ijobvl <= 0) { *info = -1; } else if (ijobvr <= 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } else if (*ldvl < 1 || ilvl && *ldvl < *n) { *info = -11; } else if (*ldvr < 1 || ilvr && *ldvr < *n) { *info = -13; } /* Compute workspace (Note: Comments in the code beginning "Workspace:" describe the minimal amount of workspace needed at that point in the code, as well as the preferred amount for good performance. NB refers to the optimal block size for the immediately following subroutine, as returned by ILAENV. The workspace is computed assuming ILO = 1 and IHI = N, the worst case.) */ lwkmin = 1; if (*info == 0 && (*lwork >= 1 || lquery)) { lwkopt = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", n, &c__1, n, &c__0, ( ftnlen)6, (ftnlen)1); /* Computing MAX */ i__1 = 1, i__2 = *n << 1; lwkmin = max(i__1,i__2); work[1].r = (doublereal) lwkopt, work[1].i = 0.; } if (*lwork < lwkmin && ! lquery) { *info = -15; } if (*info != 0) { i__1 = -(*info); xerbla_("ZGGEV ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ work[1].r = (doublereal) lwkopt, work[1].i = 0.; if (*n == 0) { return 0; } /* Get machine constants */ eps = dlamch_("E") * dlamch_("B"); smlnum = dlamch_("S"); bignum = 1. / smlnum; dlabad_(&smlnum, &bignum); smlnum = sqrt(smlnum) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = zlange_("M", n, n, &a[a_offset], lda, &rwork[1]); ilascl = FALSE_; if (anrm > 0. && anrm < smlnum) { anrmto = smlnum; ilascl = TRUE_; } else if (anrm > bignum) { anrmto = bignum; ilascl = TRUE_; } if (ilascl) { zlascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & ierr); } /* Scale B if max element outside range [SMLNUM,BIGNUM] */ bnrm = zlange_("M", n, n, &b[b_offset], ldb, &rwork[1]); ilbscl = FALSE_; if (bnrm > 0. && bnrm < smlnum) { bnrmto = smlnum; ilbscl = TRUE_; } else if (bnrm > bignum) { bnrmto = bignum; ilbscl = TRUE_; } if (ilbscl) { zlascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & ierr); } /* Permute the matrices A, B to isolate eigenvalues if possible (Real Workspace: need 6*N) */ ileft = 1; iright = *n + 1; irwrk = iright + *n; zggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &rwork[ ileft], &rwork[iright], &rwork[irwrk], &ierr); /* Reduce B to triangular form (QR decomposition of B) (Complex Workspace: need N, prefer N*NB) */ irows = ihi + 1 - ilo; if (ilv) { icols = *n + 1 - ilo; } else { icols = irows; } itau = 1; iwrk = itau + irows; i__1 = *lwork + 1 - iwrk; zgeqrf_(&irows, &icols, &b_ref(ilo, ilo), ldb, &work[itau], &work[iwrk], & i__1, &ierr); /* Apply the orthogonal transformation to matrix A (Complex Workspace: need N, prefer N*NB) */ i__1 = *lwork + 1 - iwrk; zunmqr_("L", "C", &irows, &icols, &irows, &b_ref(ilo, ilo), ldb, &work[ itau], &a_ref(ilo, ilo), lda, &work[iwrk], &i__1, &ierr); /* Initialize VL (Complex Workspace: need N, prefer N*NB) */ if (ilvl) { zlaset_("Full", n, n, &c_b1, &c_b2, &vl[vl_offset], ldvl); i__1 = irows - 1; i__2 = irows - 1; zlacpy_("L", &i__1, &i__2, &b_ref(ilo + 1, ilo), ldb, &vl_ref(ilo + 1, ilo), ldvl); i__1 = *lwork + 1 - iwrk; zungqr_(&irows, &irows, &irows, &vl_ref(ilo, ilo), ldvl, &work[itau], &work[iwrk], &i__1, &ierr); } /* Initialize VR */ if (ilvr) { zlaset_("Full", n, n, &c_b1, &c_b2, &vr[vr_offset], ldvr); } /* Reduce to generalized Hessenberg form */ if (ilv) { /* Eigenvectors requested -- work on whole matrix. */ zgghrd_(jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &ierr); } else { zgghrd_("N", "N", &irows, &c__1, &irows, &a_ref(ilo, ilo), lda, & b_ref(ilo, ilo), ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &ierr); } /* Perform QZ algorithm (Compute eigenvalues, and optionally, the Schur form and Schur vectors) (Complex Workspace: need N) (Real Workspace: need N) */ iwrk = itau; if (ilv) { *(unsigned char *)chtemp = 'S'; } else { *(unsigned char *)chtemp = 'E'; } i__1 = *lwork + 1 - iwrk; zhgeqz_(chtemp, jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[ b_offset], ldb, &alpha[1], &beta[1], &vl[vl_offset], ldvl, &vr[ vr_offset], ldvr, &work[iwrk], &i__1, &rwork[irwrk], &ierr); if (ierr != 0) { if (ierr > 0 && ierr <= *n) { *info = ierr; } else if (ierr > *n && ierr <= *n << 1) { *info = ierr - *n; } else { *info = *n + 1; } goto L70; } /* Compute Eigenvectors (Real Workspace: need 2*N) (Complex Workspace: need 2*N) */ if (ilv) { if (ilvl) { if (ilvr) { *(unsigned char *)chtemp = 'B'; } else { *(unsigned char *)chtemp = 'L'; } } else { *(unsigned char *)chtemp = 'R'; } ztgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, &work[ iwrk], &rwork[irwrk], &ierr); if (ierr != 0) { *info = *n + 2; goto L70; } /* Undo balancing on VL and VR and normalization (Workspace: none needed) */ if (ilvl) { zggbak_("P", "L", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, &vl[vl_offset], ldvl, &ierr); i__1 = *n; for (jc = 1; jc <= i__1; ++jc) { temp = 0.; i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { /* Computing MAX */ i__3 = vl_subscr(jr, jc); d__3 = temp, d__4 = (d__1 = vl[i__3].r, abs(d__1)) + ( d__2 = d_imag(&vl_ref(jr, jc)), abs(d__2)); temp = max(d__3,d__4); /* L10: */ } if (temp < smlnum) { goto L30; } temp = 1. / temp; i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { i__3 = vl_subscr(jr, jc); i__4 = vl_subscr(jr, jc); z__1.r = temp * vl[i__4].r, z__1.i = temp * vl[i__4].i; vl[i__3].r = z__1.r, vl[i__3].i = z__1.i; /* L20: */ } L30: ; } } if (ilvr) { zggbak_("P", "R", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, &vr[vr_offset], ldvr, &ierr); i__1 = *n; for (jc = 1; jc <= i__1; ++jc) { temp = 0.; i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { /* Computing MAX */ i__3 = vr_subscr(jr, jc); d__3 = temp, d__4 = (d__1 = vr[i__3].r, abs(d__1)) + ( d__2 = d_imag(&vr_ref(jr, jc)), abs(d__2)); temp = max(d__3,d__4); /* L40: */ } if (temp < smlnum) { goto L60; } temp = 1. / temp; i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { i__3 = vr_subscr(jr, jc); i__4 = vr_subscr(jr, jc); z__1.r = temp * vr[i__4].r, z__1.i = temp * vr[i__4].i; vr[i__3].r = z__1.r, vr[i__3].i = z__1.i; /* L50: */ } L60: ; } } } /* Undo scaling if necessary */ if (ilascl) { zlascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alpha[1], n, & ierr); } if (ilbscl) { zlascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & ierr); } L70: work[1].r = (doublereal) lwkopt, work[1].i = 0.; return 0; /* End of ZGGEV */ } /* zggev_ */
/* Subroutine */ int sgemm_(char *transa, char *transb, integer *m, integer * n, integer *k, real *alpha, real *a, integer *lda, real *b, integer * ldb, real *beta, real *c__, integer *ldc) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3; /* Local variables */ static integer info; static logical nota, notb; static real temp; static integer i__, j, l, ncola; extern logical lsame_(char *, char *); static integer nrowa, nrowb; extern /* Subroutine */ int xerbla_(char *, integer *); #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1] /* Purpose ======= SGEMM performs one of the matrix-matrix operations C := alpha*op( A )*op( B ) + beta*C, where op( X ) is one of op( X ) = X or op( X ) = X', alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. Parameters ========== TRANSA - CHARACTER*1. On entry, TRANSA specifies the form of op( A ) to be used in the matrix multiplication as follows: TRANSA = 'N' or 'n', op( A ) = A. TRANSA = 'T' or 't', op( A ) = A'. TRANSA = 'C' or 'c', op( A ) = A'. Unchanged on exit. TRANSB - CHARACTER*1. On entry, TRANSB specifies the form of op( B ) to be used in the matrix multiplication as follows: TRANSB = 'N' or 'n', op( B ) = B. TRANSB = 'T' or 't', op( B ) = B'. TRANSB = 'C' or 'c', op( B ) = B'. Unchanged on exit. M - INTEGER. On entry, M specifies the number of rows of the matrix op( A ) and of the matrix C. M must be at least zero. Unchanged on exit. N - INTEGER. On entry, N specifies the number of columns of the matrix op( B ) and the number of columns of the matrix C. N must be at least zero. Unchanged on exit. K - INTEGER. On entry, K specifies the number of columns of the matrix op( A ) and the number of rows of the matrix op( B ). K must be at least zero. Unchanged on exit. ALPHA - REAL . On entry, ALPHA specifies the scalar alpha. Unchanged on exit. A - REAL array of DIMENSION ( LDA, ka ), where ka is k when TRANSA = 'N' or 'n', and is m otherwise. Before entry with TRANSA = 'N' or 'n', the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A. Unchanged on exit. LDA - INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When TRANSA = 'N' or 'n' then LDA must be at least max( 1, m ), otherwise LDA must be at least max( 1, k ). Unchanged on exit. B - REAL array of DIMENSION ( LDB, kb ), where kb is n when TRANSB = 'N' or 'n', and is k otherwise. Before entry with TRANSB = 'N' or 'n', the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B. Unchanged on exit. LDB - INTEGER. On entry, LDB specifies the first dimension of B as declared in the calling (sub) program. When TRANSB = 'N' or 'n' then LDB must be at least max( 1, k ), otherwise LDB must be at least max( 1, n ). Unchanged on exit. BETA - REAL . On entry, BETA specifies the scalar beta. When BETA is supplied as zero then C need not be set on input. Unchanged on exit. C - REAL array of DIMENSION ( LDC, n ). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ). LDC - INTEGER. On entry, LDC specifies the first dimension of C as declared in the calling (sub) program. LDC must be at least max( 1, m ). Unchanged on exit. Level 3 Blas routine. -- Written on 8-February-1989. Jack Dongarra, Argonne National Laboratory. Iain Duff, AERE Harwell. Jeremy Du Croz, Numerical Algorithms Group Ltd. Sven Hammarling, Numerical Algorithms Group Ltd. Set NOTA and NOTB as true if A and B respectively are not transposed and set NROWA, NCOLA and NROWB as the number of rows and columns of A and the number of rows of B respectively. Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1 * 1; c__ -= c_offset; /* Function Body */ nota = lsame_(transa, "N"); notb = lsame_(transb, "N"); if (nota) { nrowa = *m; ncola = *k; } else { nrowa = *k; ncola = *m; } if (notb) { nrowb = *k; } else { nrowb = *n; } /* Test the input parameters. */ info = 0; if (! nota && ! lsame_(transa, "C") && ! lsame_( transa, "T")) { info = 1; } else if (! notb && ! lsame_(transb, "C") && ! lsame_(transb, "T")) { info = 2; } else if (*m < 0) { info = 3; } else if (*n < 0) { info = 4; } else if (*k < 0) { info = 5; } else if (*lda < max(1,nrowa)) { info = 8; } else if (*ldb < max(1,nrowb)) { info = 10; } else if (*ldc < max(1,*m)) { info = 13; } if (info != 0) { xerbla_("SGEMM ", &info); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) { return 0; } /* And if alpha.eq.zero. */ if (*alpha == 0.f) { if (*beta == 0.f) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c___ref(i__, j) = 0.f; /* L10: */ } /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c___ref(i__, j) = *beta * c___ref(i__, j); /* L30: */ } /* L40: */ } } return 0; } /* Start the operations. */ if (notb) { if (nota) { /* Form C := alpha*A*B + beta*C. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { if (*beta == 0.f) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c___ref(i__, j) = 0.f; /* L50: */ } } else if (*beta != 1.f) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c___ref(i__, j) = *beta * c___ref(i__, j); /* L60: */ } } i__2 = *k; for (l = 1; l <= i__2; ++l) { if (b_ref(l, j) != 0.f) { temp = *alpha * b_ref(l, j); i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { c___ref(i__, j) = c___ref(i__, j) + temp * a_ref( i__, l); /* L70: */ } } /* L80: */ } /* L90: */ } } else { /* Form C := alpha*A'*B + beta*C */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { temp = 0.f; i__3 = *k; for (l = 1; l <= i__3; ++l) { temp += a_ref(l, i__) * b_ref(l, j); /* L100: */ } if (*beta == 0.f) { c___ref(i__, j) = *alpha * temp; } else { c___ref(i__, j) = *alpha * temp + *beta * c___ref(i__, j); } /* L110: */ } /* L120: */ } } } else { if (nota) { /* Form C := alpha*A*B' + beta*C */ i__1 = *n; for (j = 1; j <= i__1; ++j) { if (*beta == 0.f) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c___ref(i__, j) = 0.f; /* L130: */ } } else if (*beta != 1.f) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { c___ref(i__, j) = *beta * c___ref(i__, j); /* L140: */ } } i__2 = *k; for (l = 1; l <= i__2; ++l) { if (b_ref(j, l) != 0.f) { temp = *alpha * b_ref(j, l); i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { c___ref(i__, j) = c___ref(i__, j) + temp * a_ref( i__, l); /* L150: */ } } /* L160: */ } /* L170: */ } } else { /* Form C := alpha*A'*B' + beta*C */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { temp = 0.f; i__3 = *k; for (l = 1; l <= i__3; ++l) { temp += a_ref(l, i__) * b_ref(j, l); /* L180: */ } if (*beta == 0.f) { c___ref(i__, j) = *alpha * temp; } else { c___ref(i__, j) = *alpha * temp + *beta * c___ref(i__, j); } /* L190: */ } /* L200: */ } } } return 0; /* End of SGEMM . */ } /* sgemm_ */
/* Subroutine */ int sgelss_(integer *m, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, real *s, real *rcond, integer * rank, real *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; real r__1; /* Local variables */ static real anrm, bnrm; static integer itau; static real vdum[1]; static integer i__, iascl, ibscl, chunk; extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static real sfmin; static integer minmn, maxmn; extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static integer itaup, itauq; extern /* Subroutine */ int srscl_(integer *, real *, real *, integer *); static integer mnthr, iwork; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *); static integer bl, ie, il; extern /* Subroutine */ int slabad_(real *, real *); static integer mm, bdspac; extern /* Subroutine */ int sgebrd_(integer *, integer *, real *, integer *, real *, real *, real *, real *, real *, integer *, integer *); extern doublereal slamch_(char *), slange_(char *, integer *, integer *, real *, integer *, real *); extern /* Subroutine */ int xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); static real bignum; extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer *, real *, real *, integer *, integer *), slascl_(char *, integer *, integer *, real *, real *, integer *, integer *, real *, integer *, integer *), sgeqrf_(integer *, integer *, real *, integer *, real *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, real *, integer *), sbdsqr_(char *, integer *, integer *, integer *, integer *, real *, real *, real *, integer *, real *, integer *, real *, integer *, real *, integer *), sorgbr_( char *, integer *, integer *, integer *, real *, integer *, real * , real *, integer *, integer *); static integer ldwork; extern /* Subroutine */ int sormbr_(char *, char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer * , real *, integer *, integer *); static integer minwrk, maxwrk; static real smlnum; extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, integer *, integer *); static logical lquery; extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, integer *, integer *); static real eps, thr; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1999 Purpose ======= SGELSS computes the minimum norm solution to a real linear least squares problem: Minimize 2-norm(| b - A*x |). using the singular value decomposition (SVD) of A. A is an M-by-N matrix which may be rank-deficient. Several right hand side vectors b and solution vectors x can be handled in a single call; they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix X. The effective rank of A is determined by treating as zero those singular values which are less than RCOND times the largest singular value. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0. A (input/output) REAL array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, the first min(m,n) rows of A are overwritten with its right singular vectors, stored rowwise. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). B (input/output) REAL array, dimension (LDB,NRHS) On entry, the M-by-NRHS right hand side matrix B. On exit, B is overwritten by the N-by-NRHS solution matrix X. If m >= n and RANK = n, the residual sum-of-squares for the solution in the i-th column is given by the sum of squares of elements n+1:m in that column. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,max(M,N)). S (output) REAL array, dimension (min(M,N)) The singular values of A in decreasing order. The condition number of A in the 2-norm = S(1)/S(min(m,n)). RCOND (input) REAL RCOND is used to determine the effective rank of A. Singular values S(i) <= RCOND*S(1) are treated as zero. If RCOND < 0, machine precision is used instead. RANK (output) INTEGER The effective rank of A, i.e., the number of singular values which are greater than RCOND*S(1). WORK (workspace/output) REAL array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= 1, and also: LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS ) For good performance, LWORK should generally be larger. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. > 0: the algorithm for computing the SVD failed to converge; if INFO = i, i off-diagonal elements of an intermediate bidiagonal form did not converge to zero. ===================================================================== Test the input arguments Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --s; --work; /* Function Body */ *info = 0; minmn = min(*m,*n); maxmn = max(*m,*n); mnthr = ilaenv_(&c__6, "SGELSS", " ", m, n, nrhs, &c_n1, (ftnlen)6, ( ftnlen)1); lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else if (*ldb < max(1,maxmn)) { *info = -7; } /* Compute workspace (Note: Comments in the code beginning "Workspace:" describe the minimal amount of workspace needed at that point in the code, as well as the preferred amount for good performance. NB refers to the optimal block size for the immediately following subroutine, as returned by ILAENV.) */ minwrk = 1; if (*info == 0 && (*lwork >= 1 || lquery)) { maxwrk = 0; mm = *m; if (*m >= *n && *m >= mnthr) { /* Path 1a - overdetermined, with many more rows than columns */ mm = *n; /* Computing MAX */ i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, "SORMQR", "LT", m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2); maxwrk = max(i__1,i__2); } if (*m >= *n) { /* Path 1 - overdetermined or exactly determined Compute workspace needed for SBDSQR Computing MAX */ i__1 = 1, i__2 = *n * 5; bdspac = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, "SGEBRD" , " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, "SORMBR", "QLT", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, "SORGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1); maxwrk = max(i__1,i__2); maxwrk = max(maxwrk,bdspac); /* Computing MAX */ i__1 = maxwrk, i__2 = *n * *nrhs; maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1,i__2); minwrk = max(i__1,bdspac); maxwrk = max(minwrk,maxwrk); } if (*n > *m) { /* Compute workspace needed for SBDSQR Computing MAX */ i__1 = 1, i__2 = *m * 5; bdspac = max(i__1,i__2); /* Computing MAX */ i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *n, i__1 = max(i__1,i__2); minwrk = max(i__1,bdspac); if (*n >= mnthr) { /* Path 2a - underdetermined, with many more columns than rows */ maxwrk = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * ilaenv_(&c__1, "SGEBRD", " ", m, m, &c_n1, &c_n1, ( ftnlen)6, (ftnlen)1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * ilaenv_(& c__1, "SORMBR", "QLT", m, nrhs, m, &c_n1, (ftnlen)6, ( ftnlen)3); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * ilaenv_(&c__1, "SORGBR", "P", m, m, m, &c_n1, (ftnlen) 6, (ftnlen)1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + *m + bdspac; maxwrk = max(i__1,i__2); if (*nrhs > 1) { /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs; maxwrk = max(i__1,i__2); } else { /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 1); maxwrk = max(i__1,i__2); } /* Computing MAX */ i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, "SORMLQ", "LT", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)2); maxwrk = max(i__1,i__2); } else { /* Path 2 - underdetermined */ maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, "SGEBRD", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, "SORMBR" , "QLT", m, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)3); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1); maxwrk = max(i__1,i__2); maxwrk = max(maxwrk,bdspac); /* Computing MAX */ i__1 = maxwrk, i__2 = *n * *nrhs; maxwrk = max(i__1,i__2); } } maxwrk = max(minwrk,maxwrk); work[1] = (real) maxwrk; } minwrk = max(minwrk,1); if (*lwork < minwrk && ! lquery) { *info = -12; } if (*info != 0) { i__1 = -(*info); xerbla_("SGELSS", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { *rank = 0; return 0; } /* Get machine parameters */ eps = slamch_("P"); sfmin = slamch_("S"); smlnum = sfmin / eps; bignum = 1.f / smlnum; slabad_(&smlnum, &bignum); /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = slange_("M", m, n, &a[a_offset], lda, &work[1]); iascl = 0; if (anrm > 0.f && anrm < smlnum) { /* Scale matrix norm up to SMLNUM */ slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, info); iascl = 1; } else if (anrm > bignum) { /* Scale matrix norm down to BIGNUM */ slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, info); iascl = 2; } else if (anrm == 0.f) { /* Matrix all zero. Return zero solution. */ i__1 = max(*m,*n); slaset_("F", &i__1, nrhs, &c_b74, &c_b74, &b[b_offset], ldb); slaset_("F", &minmn, &c__1, &c_b74, &c_b74, &s[1], &c__1); *rank = 0; goto L70; } /* Scale B if max element outside range [SMLNUM,BIGNUM] */ bnrm = slange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); ibscl = 0; if (bnrm > 0.f && bnrm < smlnum) { /* Scale matrix norm up to SMLNUM */ slascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, info); ibscl = 1; } else if (bnrm > bignum) { /* Scale matrix norm down to BIGNUM */ slascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, info); ibscl = 2; } /* Overdetermined case */ if (*m >= *n) { /* Path 1 - overdetermined or exactly determined */ mm = *m; if (*m >= mnthr) { /* Path 1a - overdetermined, with many more rows than columns */ mm = *n; itau = 1; iwork = itau + *n; /* Compute A=Q*R (Workspace: need 2*N, prefer N+N*NB) */ i__1 = *lwork - iwork + 1; sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__1, info); /* Multiply B by transpose(Q) (Workspace: need N+NRHS, prefer N+NRHS*NB) */ i__1 = *lwork - iwork + 1; sormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[ b_offset], ldb, &work[iwork], &i__1, info); /* Zero out below R */ if (*n > 1) { i__1 = *n - 1; i__2 = *n - 1; slaset_("L", &i__1, &i__2, &c_b74, &c_b74, &a_ref(2, 1), lda); } } ie = 1; itauq = ie + *n; itaup = itauq + *n; iwork = itaup + *n; /* Bidiagonalize R in A (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) */ i__1 = *lwork - iwork + 1; sgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & work[itaup], &work[iwork], &i__1, info); /* Multiply B by transpose of left bidiagonalizing vectors of R (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) */ i__1 = *lwork - iwork + 1; sormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], &b[b_offset], ldb, &work[iwork], &i__1, info); /* Generate right bidiagonalizing vectors of R in A (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */ i__1 = *lwork - iwork + 1; sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[iwork], & i__1, info); iwork = ie + *n; /* Perform bidiagonal QR iteration multiply B by transpose of left singular vectors compute right singular vectors in A (Workspace: need BDSPAC) */ sbdsqr_("U", n, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], lda, vdum, &c__1, &b[b_offset], ldb, &work[iwork], info) ; if (*info != 0) { goto L70; } /* Multiply B by reciprocals of singular values Computing MAX */ r__1 = *rcond * s[1]; thr = dmax(r__1,sfmin); if (*rcond < 0.f) { /* Computing MAX */ r__1 = eps * s[1]; thr = dmax(r__1,sfmin); } *rank = 0; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (s[i__] > thr) { srscl_(nrhs, &s[i__], &b_ref(i__, 1), ldb); ++(*rank); } else { slaset_("F", &c__1, nrhs, &c_b74, &c_b74, &b_ref(i__, 1), ldb); } /* L10: */ } /* Multiply B by right singular vectors (Workspace: need N, prefer N*NRHS) */ if (*lwork >= *ldb * *nrhs && *nrhs > 1) { sgemm_("T", "N", n, nrhs, n, &c_b108, &a[a_offset], lda, &b[ b_offset], ldb, &c_b74, &work[1], ldb); slacpy_("G", n, nrhs, &work[1], ldb, &b[b_offset], ldb) ; } else if (*nrhs > 1) { chunk = *lwork / *n; i__1 = *nrhs; i__2 = chunk; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = *nrhs - i__ + 1; bl = min(i__3,chunk); sgemm_("T", "N", n, &bl, n, &c_b108, &a[a_offset], lda, & b_ref(1, i__), ldb, &c_b74, &work[1], n); slacpy_("G", n, &bl, &work[1], n, &b_ref(1, i__), ldb); /* L20: */ } } else { sgemv_("T", n, n, &c_b108, &a[a_offset], lda, &b[b_offset], &c__1, &c_b74, &work[1], &c__1); scopy_(n, &work[1], &c__1, &b[b_offset], &c__1); } } else /* if(complicated condition) */ { /* Computing MAX */ i__2 = *m, i__1 = (*m << 1) - 4, i__2 = max(i__2,i__1), i__2 = max( i__2,*nrhs), i__1 = *n - *m * 3; if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__2,i__1)) { /* Path 2a - underdetermined, with many more columns than rows and sufficient workspace for an efficient algorithm */ ldwork = *m; /* Computing MAX Computing MAX */ i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = max(i__3,*nrhs), i__4 = *n - *m * 3; i__2 = (*m << 2) + *m * *lda + max(i__3,i__4), i__1 = *m * *lda + *m + *m * *nrhs; if (*lwork >= max(i__2,i__1)) { ldwork = *lda; } itau = 1; iwork = *m + 1; /* Compute A=L*Q (Workspace: need 2*M, prefer M+M*NB) */ i__2 = *lwork - iwork + 1; sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__2, info); il = iwork; /* Copy L to WORK(IL), zeroing out above it */ slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork); i__2 = *m - 1; i__1 = *m - 1; slaset_("U", &i__2, &i__1, &c_b74, &c_b74, &work[il + ldwork], & ldwork); ie = il + ldwork * *m; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Bidiagonalize L in WORK(IL) (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) */ i__2 = *lwork - iwork + 1; sgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], &work[itaup], &work[iwork], &i__2, info); /* Multiply B by transpose of left bidiagonalizing vectors of L (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) */ i__2 = *lwork - iwork + 1; sormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[ itauq], &b[b_offset], ldb, &work[iwork], &i__2, info); /* Generate right bidiagonalizing vectors of R in WORK(IL) (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB) */ i__2 = *lwork - iwork + 1; sorgbr_("P", m, m, m, &work[il], &ldwork, &work[itaup], &work[ iwork], &i__2, info); iwork = ie + *m; /* Perform bidiagonal QR iteration, computing right singular vectors of L in WORK(IL) and multiplying B by transpose of left singular vectors (Workspace: need M*M+M+BDSPAC) */ sbdsqr_("U", m, m, &c__0, nrhs, &s[1], &work[ie], &work[il], & ldwork, &a[a_offset], lda, &b[b_offset], ldb, &work[iwork] , info); if (*info != 0) { goto L70; } /* Multiply B by reciprocals of singular values Computing MAX */ r__1 = *rcond * s[1]; thr = dmax(r__1,sfmin); if (*rcond < 0.f) { /* Computing MAX */ r__1 = eps * s[1]; thr = dmax(r__1,sfmin); } *rank = 0; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (s[i__] > thr) { srscl_(nrhs, &s[i__], &b_ref(i__, 1), ldb); ++(*rank); } else { slaset_("F", &c__1, nrhs, &c_b74, &c_b74, &b_ref(i__, 1), ldb); } /* L30: */ } iwork = ie; /* Multiply B by right singular vectors of L in WORK(IL) (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS) */ if (*lwork >= *ldb * *nrhs + iwork - 1 && *nrhs > 1) { sgemm_("T", "N", m, nrhs, m, &c_b108, &work[il], &ldwork, &b[ b_offset], ldb, &c_b74, &work[iwork], ldb); slacpy_("G", m, nrhs, &work[iwork], ldb, &b[b_offset], ldb); } else if (*nrhs > 1) { chunk = (*lwork - iwork + 1) / *m; i__2 = *nrhs; i__1 = chunk; for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { /* Computing MIN */ i__3 = *nrhs - i__ + 1; bl = min(i__3,chunk); sgemm_("T", "N", m, &bl, m, &c_b108, &work[il], &ldwork, & b_ref(1, i__), ldb, &c_b74, &work[iwork], n); slacpy_("G", m, &bl, &work[iwork], n, &b_ref(1, i__), ldb); /* L40: */ } } else { sgemv_("T", m, m, &c_b108, &work[il], &ldwork, &b_ref(1, 1), & c__1, &c_b74, &work[iwork], &c__1); scopy_(m, &work[iwork], &c__1, &b_ref(1, 1), &c__1); } /* Zero out below first M rows of B */ i__1 = *n - *m; slaset_("F", &i__1, nrhs, &c_b74, &c_b74, &b_ref(*m + 1, 1), ldb); iwork = itau + *m; /* Multiply transpose(Q) by B (Workspace: need M+NRHS, prefer M+NRHS*NB) */ i__1 = *lwork - iwork + 1; sormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[ b_offset], ldb, &work[iwork], &i__1, info); } else { /* Path 2 - remaining underdetermined cases */ ie = 1; itauq = ie + *m; itaup = itauq + *m; iwork = itaup + *m; /* Bidiagonalize A (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ i__1 = *lwork - iwork + 1; sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & work[itaup], &work[iwork], &i__1, info); /* Multiply B by transpose of left bidiagonalizing vectors (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) */ i__1 = *lwork - iwork + 1; sormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq] , &b[b_offset], ldb, &work[iwork], &i__1, info); /* Generate right bidiagonalizing vectors in A (Workspace: need 4*M, prefer 3*M+M*NB) */ i__1 = *lwork - iwork + 1; sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[ iwork], &i__1, info); iwork = ie + *m; /* Perform bidiagonal QR iteration, computing right singular vectors of A in A and multiplying B by transpose of left singular vectors (Workspace: need BDSPAC) */ sbdsqr_("L", m, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], lda, vdum, &c__1, &b[b_offset], ldb, &work[iwork], info); if (*info != 0) { goto L70; } /* Multiply B by reciprocals of singular values Computing MAX */ r__1 = *rcond * s[1]; thr = dmax(r__1,sfmin); if (*rcond < 0.f) { /* Computing MAX */ r__1 = eps * s[1]; thr = dmax(r__1,sfmin); } *rank = 0; i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { if (s[i__] > thr) { srscl_(nrhs, &s[i__], &b_ref(i__, 1), ldb); ++(*rank); } else { slaset_("F", &c__1, nrhs, &c_b74, &c_b74, &b_ref(i__, 1), ldb); } /* L50: */ } /* Multiply B by right singular vectors of A (Workspace: need N, prefer N*NRHS) */ if (*lwork >= *ldb * *nrhs && *nrhs > 1) { sgemm_("T", "N", n, nrhs, m, &c_b108, &a[a_offset], lda, &b[ b_offset], ldb, &c_b74, &work[1], ldb); slacpy_("F", n, nrhs, &work[1], ldb, &b[b_offset], ldb); } else if (*nrhs > 1) { chunk = *lwork / *n; i__1 = *nrhs; i__2 = chunk; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = *nrhs - i__ + 1; bl = min(i__3,chunk); sgemm_("T", "N", n, &bl, m, &c_b108, &a[a_offset], lda, & b_ref(1, i__), ldb, &c_b74, &work[1], n); slacpy_("F", n, &bl, &work[1], n, &b_ref(1, i__), ldb); /* L60: */ } } else { sgemv_("T", m, n, &c_b108, &a[a_offset], lda, &b[b_offset], & c__1, &c_b74, &work[1], &c__1); scopy_(n, &work[1], &c__1, &b[b_offset], &c__1); } } } /* Undo scaling */ if (iascl == 1) { slascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, info); slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & minmn, info); } else if (iascl == 2) { slascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, info); slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & minmn, info); } if (ibscl == 1) { slascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, info); } else if (ibscl == 2) { slascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, info); } L70: work[1] = (real) maxwrk; return 0; /* End of SGELSS */ } /* sgelss_ */
/* Subroutine */ int dptrfs_(integer *n, integer *nrhs, doublereal *d__, doublereal *e, doublereal *df, doublereal *ef, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DPTRFS improves the computed solution to a system of linear equations when the coefficient matrix is symmetric positive definite and tridiagonal, and provides error bounds and backward error estimates for the solution. Arguments ========= N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. D (input) DOUBLE PRECISION array, dimension (N) The n diagonal elements of the tridiagonal matrix A. E (input) DOUBLE PRECISION array, dimension (N-1) The (n-1) subdiagonal elements of the tridiagonal matrix A. DF (input) DOUBLE PRECISION array, dimension (N) The n diagonal elements of the diagonal matrix D from the factorization computed by DPTTRF. EF (input) DOUBLE PRECISION array, dimension (N-1) The (n-1) subdiagonal elements of the unit bidiagonal factor L from the factorization computed by DPTTRF. B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) The right hand side matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) On entry, the solution matrix X, as computed by DPTTRS. On exit, the improved solution matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). FERR (output) DOUBLE PRECISION array, dimension (NRHS) The forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), FERR(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). BERR (output) DOUBLE PRECISION array, dimension (NRHS) The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). WORK (workspace) DOUBLE PRECISION array, dimension (2*N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Internal Parameters =================== ITMAX is the maximum number of steps of iterative refinement. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static doublereal c_b11 = 1.; /* System generated locals */ integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2; doublereal d__1, d__2, d__3; /* Local variables */ static doublereal safe1, safe2; static integer i__, j; static doublereal s; extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static integer count; static doublereal bi; extern doublereal dlamch_(char *); static doublereal cx, dx, ex; static integer ix; extern integer idamax_(integer *, doublereal *, integer *); static integer nz; static doublereal safmin; extern /* Subroutine */ int xerbla_(char *, integer *); static doublereal lstres; extern /* Subroutine */ int dpttrs_(integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *); static doublereal eps; #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1] --d__; --e; --df; --ef; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; --ferr; --berr; --work; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; } else if (*nrhs < 0) { *info = -2; } else if (*ldb < max(1,*n)) { *info = -8; } else if (*ldx < max(1,*n)) { *info = -10; } if (*info != 0) { i__1 = -(*info); xerbla_("DPTRFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] = 0.; berr[j] = 0.; /* L10: */ } return 0; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = 4; eps = dlamch_("Epsilon"); safmin = dlamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { count = 1; lstres = 3.; L20: /* Loop until stopping criterion is satisfied. Compute residual R = B - A * X. Also compute abs(A)*abs(x) + abs(b) for use in the backward error bound. */ if (*n == 1) { bi = b_ref(1, j); dx = d__[1] * x_ref(1, j); work[*n + 1] = bi - dx; work[1] = abs(bi) + abs(dx); } else { bi = b_ref(1, j); dx = d__[1] * x_ref(1, j); ex = e[1] * x_ref(2, j); work[*n + 1] = bi - dx - ex; work[1] = abs(bi) + abs(dx) + abs(ex); i__2 = *n - 1; for (i__ = 2; i__ <= i__2; ++i__) { bi = b_ref(i__, j); cx = e[i__ - 1] * x_ref(i__ - 1, j); dx = d__[i__] * x_ref(i__, j); ex = e[i__] * x_ref(i__ + 1, j); work[*n + i__] = bi - cx - dx - ex; work[i__] = abs(bi) + abs(cx) + abs(dx) + abs(ex); /* L30: */ } bi = b_ref(*n, j); cx = e[*n - 1] * x_ref(*n - 1, j); dx = d__[*n] * x_ref(*n, j); work[*n + *n] = bi - cx - dx; work[*n] = abs(bi) + abs(cx) + abs(dx); } /* Compute componentwise relative backward error from formula max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) where abs(Z) is the componentwise absolute value of the matrix or vector Z. If the i-th component of the denominator is less than SAFE2, then SAFE1 is added to the i-th components of the numerator and denominator before dividing. */ s = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { /* Computing MAX */ d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ i__]; s = max(d__2,d__3); } else { /* Computing MAX */ d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) / (work[i__] + safe1); s = max(d__2,d__3); } /* L40: */ } berr[j] = s; /* Test stopping criterion. Continue iterating if 1) The residual BERR(J) is larger than machine epsilon, and 2) BERR(J) decreased by at least a factor of 2 during the last iteration, and 3) At most ITMAX iterations tried. */ if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { /* Update solution and try again. */ dpttrs_(n, &c__1, &df[1], &ef[1], &work[*n + 1], n, info); daxpy_(n, &c_b11, &work[*n + 1], &c__1, &x_ref(1, j), &c__1); lstres = berr[j]; ++count; goto L20; } /* Bound error from formula norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(A))* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) where norm(Z) is the magnitude of the largest component of Z inv(A) is the inverse of A abs(Z) is the componentwise absolute value of the matrix or vector Z NZ is the maximum number of nonzeros in any row of A, plus 1 EPS is machine epsilon The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) is incremented by SAFE1 if the i-th component of abs(A)*abs(X) + abs(B) is less than SAFE2. */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__]; } else { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__] + safe1; } /* L50: */ } ix = idamax_(n, &work[1], &c__1); ferr[j] = work[ix]; /* Estimate the norm of inv(A). Solve M(A) * x = e, where M(A) = (m(i,j)) is given by m(i,j) = abs(A(i,j)), i = j, m(i,j) = -abs(A(i,j)), i .ne. j, and e = [ 1, 1, ..., 1 ]'. Note M(A) = M(L)*D*M(L)'. Solve M(L) * x = e. */ work[1] = 1.; i__2 = *n; for (i__ = 2; i__ <= i__2; ++i__) { work[i__] = work[i__ - 1] * (d__1 = ef[i__ - 1], abs(d__1)) + 1.; /* L60: */ } /* Solve D * M(L)' * x = b. */ work[*n] /= df[*n]; for (i__ = *n - 1; i__ >= 1; --i__) { work[i__] = work[i__] / df[i__] + work[i__ + 1] * (d__1 = ef[i__], abs(d__1)); /* L70: */ } /* Compute norm(inv(A)) = max(x(i)), 1<=i<=n. */ ix = idamax_(n, &work[1], &c__1); ferr[j] *= (d__1 = work[ix], abs(d__1)); /* Normalize error. */ lstres = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = lstres, d__3 = (d__1 = x_ref(i__, j), abs(d__1)); lstres = max(d__2,d__3); /* L80: */ } if (lstres != 0.) { ferr[j] /= lstres; } /* L90: */ } return 0; /* End of DPTRFS */ } /* dptrfs_ */
/* Subroutine */ int dchkgk_(integer *nin, integer *nout) { /* Format strings */ static char fmt_9999[] = "(1x,\002.. test output of DGGBAK .. \002)"; static char fmt_9998[] = "(\002 value of largest test error " " =\002,d12.3)"; static char fmt_9997[] = "(\002 example number where DGGBAL info is not " "0 =\002,i4)"; static char fmt_9996[] = "(\002 example number where DGGBAK(L) info is n" "ot 0 =\002,i4)"; static char fmt_9995[] = "(\002 example number where DGGBAK(R) info is n" "ot 0 =\002,i4)"; static char fmt_9994[] = "(\002 example number having largest error " " =\002,i4)"; static char fmt_9993[] = "(\002 number of examples where info is not 0 " " =\002,i4)"; static char fmt_9992[] = "(\002 total number of examples tested " " =\002,i4)"; /* System generated locals */ integer i__1, i__2; doublereal d__1, d__2, d__3; /* Builtin functions */ integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ static integer info, lmax[4]; static doublereal rmax, vmax, work[2500] /* was [50][50] */, a[2500] /* was [50][50] */, b[2500] /* was [50][50] */, e[2500] /* was [50][50] */, f[2500] /* was [50][50] */; static integer i__, j, m, n; extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); static integer ninfo; static doublereal anorm, bnorm, af[2500] /* was [50][50] */, bf[2500] /* was [50][50] */; extern /* Subroutine */ int dggbak_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), dggbal_(char *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); extern doublereal dlamch_(char *), dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); static doublereal vl[2500] /* was [50][50] */, lscale[50], vr[2500] /* was [50][50] */, rscale[50]; extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); static integer ihi, ilo; static doublereal eps, vlf[2500] /* was [50][50] */; static integer knt; static doublereal vrf[2500] /* was [50][50] */; /* Fortran I/O blocks */ static cilist io___6 = { 0, 0, 0, 0, 0 }; static cilist io___10 = { 0, 0, 0, 0, 0 }; static cilist io___13 = { 0, 0, 0, 0, 0 }; static cilist io___15 = { 0, 0, 0, 0, 0 }; static cilist io___17 = { 0, 0, 0, 0, 0 }; static cilist io___34 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___35 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___36 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___37 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___38 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___39 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___40 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___41 = { 0, 0, 0, fmt_9992, 0 }; #define a_ref(a_1,a_2) a[(a_2)*50 + a_1 - 51] #define b_ref(a_1,a_2) b[(a_2)*50 + a_1 - 51] #define e_ref(a_1,a_2) e[(a_2)*50 + a_1 - 51] #define f_ref(a_1,a_2) f[(a_2)*50 + a_1 - 51] #define vl_ref(a_1,a_2) vl[(a_2)*50 + a_1 - 51] #define vr_ref(a_1,a_2) vr[(a_2)*50 + a_1 - 51] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DCHKGK tests DGGBAK, a routine for backward balancing of a matrix pair (A, B). Arguments ========= NIN (input) INTEGER The logical unit number for input. NIN > 0. NOUT (input) INTEGER The logical unit number for output. NOUT > 0. ===================================================================== Initialization */ lmax[0] = 0; lmax[1] = 0; lmax[2] = 0; lmax[3] = 0; ninfo = 0; knt = 0; rmax = 0.; eps = dlamch_("Precision"); L10: io___6.ciunit = *nin; s_rsle(&io___6); do_lio(&c__3, &c__1, (char *)&n, (ftnlen)sizeof(integer)); do_lio(&c__3, &c__1, (char *)&m, (ftnlen)sizeof(integer)); e_rsle(); if (n == 0) { goto L100; } i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { io___10.ciunit = *nin; s_rsle(&io___10); i__2 = n; for (j = 1; j <= i__2; ++j) { do_lio(&c__5, &c__1, (char *)&a_ref(i__, j), (ftnlen)sizeof( doublereal)); } e_rsle(); /* L20: */ } i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { io___13.ciunit = *nin; s_rsle(&io___13); i__2 = n; for (j = 1; j <= i__2; ++j) { do_lio(&c__5, &c__1, (char *)&b_ref(i__, j), (ftnlen)sizeof( doublereal)); } e_rsle(); /* L30: */ } i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { io___15.ciunit = *nin; s_rsle(&io___15); i__2 = m; for (j = 1; j <= i__2; ++j) { do_lio(&c__5, &c__1, (char *)&vl_ref(i__, j), (ftnlen)sizeof( doublereal)); } e_rsle(); /* L40: */ } i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { io___17.ciunit = *nin; s_rsle(&io___17); i__2 = m; for (j = 1; j <= i__2; ++j) { do_lio(&c__5, &c__1, (char *)&vr_ref(i__, j), (ftnlen)sizeof( doublereal)); } e_rsle(); /* L50: */ } ++knt; anorm = dlange_("M", &n, &n, a, &c__50, work); bnorm = dlange_("M", &n, &n, b, &c__50, work); dlacpy_("FULL", &n, &n, a, &c__50, af, &c__50); dlacpy_("FULL", &n, &n, b, &c__50, bf, &c__50); dggbal_("B", &n, a, &c__50, b, &c__50, &ilo, &ihi, lscale, rscale, work, & info); if (info != 0) { ++ninfo; lmax[0] = knt; } dlacpy_("FULL", &n, &m, vl, &c__50, vlf, &c__50); dlacpy_("FULL", &n, &m, vr, &c__50, vrf, &c__50); dggbak_("B", "L", &n, &ilo, &ihi, lscale, rscale, &m, vl, &c__50, &info); if (info != 0) { ++ninfo; lmax[1] = knt; } dggbak_("B", "R", &n, &ilo, &ihi, lscale, rscale, &m, vr, &c__50, &info); if (info != 0) { ++ninfo; lmax[2] = knt; } /* Test of DGGBAK Check tilde(VL)'*A*tilde(VR) - VL'*tilde(A)*VR where tilde(A) denotes the transformed matrix. */ dgemm_("N", "N", &n, &m, &n, &c_b52, af, &c__50, vr, &c__50, &c_b55, work, &c__50); dgemm_("T", "N", &m, &m, &n, &c_b52, vl, &c__50, work, &c__50, &c_b55, e, &c__50); dgemm_("N", "N", &n, &m, &n, &c_b52, a, &c__50, vrf, &c__50, &c_b55, work, &c__50); dgemm_("T", "N", &m, &m, &n, &c_b52, vlf, &c__50, work, &c__50, &c_b55, f, &c__50); vmax = 0.; i__1 = m; for (j = 1; j <= i__1; ++j) { i__2 = m; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = vmax, d__3 = (d__1 = e_ref(i__, j) - f_ref(i__, j), abs( d__1)); vmax = max(d__2,d__3); /* L60: */ } /* L70: */ } vmax /= eps * max(anorm,bnorm); if (vmax > rmax) { lmax[3] = knt; rmax = vmax; } /* Check tilde(VL)'*B*tilde(VR) - VL'*tilde(B)*VR */ dgemm_("N", "N", &n, &m, &n, &c_b52, bf, &c__50, vr, &c__50, &c_b55, work, &c__50); dgemm_("T", "N", &m, &m, &n, &c_b52, vl, &c__50, work, &c__50, &c_b55, e, &c__50); dgemm_("N", "N", &n, &m, &n, &c_b52, b, &c__50, vrf, &c__50, &c_b55, work, &c__50); dgemm_("T", "N", &m, &m, &n, &c_b52, vlf, &c__50, work, &c__50, &c_b55, f, &c__50); vmax = 0.; i__1 = m; for (j = 1; j <= i__1; ++j) { i__2 = m; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = vmax, d__3 = (d__1 = e_ref(i__, j) - f_ref(i__, j), abs( d__1)); vmax = max(d__2,d__3); /* L80: */ } /* L90: */ } vmax /= eps * max(anorm,bnorm); if (vmax > rmax) { lmax[3] = knt; rmax = vmax; } goto L10; L100: io___34.ciunit = *nout; s_wsfe(&io___34); e_wsfe(); io___35.ciunit = *nout; s_wsfe(&io___35); do_fio(&c__1, (char *)&rmax, (ftnlen)sizeof(doublereal)); e_wsfe(); io___36.ciunit = *nout; s_wsfe(&io___36); do_fio(&c__1, (char *)&lmax[0], (ftnlen)sizeof(integer)); e_wsfe(); io___37.ciunit = *nout; s_wsfe(&io___37); do_fio(&c__1, (char *)&lmax[1], (ftnlen)sizeof(integer)); e_wsfe(); io___38.ciunit = *nout; s_wsfe(&io___38); do_fio(&c__1, (char *)&lmax[2], (ftnlen)sizeof(integer)); e_wsfe(); io___39.ciunit = *nout; s_wsfe(&io___39); do_fio(&c__1, (char *)&lmax[3], (ftnlen)sizeof(integer)); e_wsfe(); io___40.ciunit = *nout; s_wsfe(&io___40); do_fio(&c__1, (char *)&ninfo, (ftnlen)sizeof(integer)); e_wsfe(); io___41.ciunit = *nout; s_wsfe(&io___41); do_fio(&c__1, (char *)&knt, (ftnlen)sizeof(integer)); e_wsfe(); return 0; /* End of DCHKGK */ } /* dchkgk_ */
/* Subroutine */ int sggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp selctg, char *sense, integer *n, real *a, integer *lda, real *b, integer *ldb, integer *sdim, real *alphar, real *alphai, real *beta, real *vsl, integer *ldvsl, real *vsr, integer *ldvsr, real *rconde, real *rcondv, real *work, integer *lwork, integer *iwork, integer * liwork, logical *bwork, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= SGGESX computes for a pair of N-by-N real nonsymmetric matrices (A,B), the generalized eigenvalues, the real Schur form (S,T), and, optionally, the left and/or right matrices of Schur vectors (VSL and VSR). This gives the generalized Schur factorization (A,B) = ( (VSL) S (VSR)**T, (VSL) T (VSR)**T ) Optionally, it also orders the eigenvalues so that a selected cluster of eigenvalues appears in the leading diagonal blocks of the upper quasi-triangular matrix S and the upper triangular matrix T; computes a reciprocal condition number for the average of the selected eigenvalues (RCONDE); and computes a reciprocal condition number for the right and left deflating subspaces corresponding to the selected eigenvalues (RCONDV). The leading columns of VSL and VSR then form an orthonormal basis for the corresponding left and right eigenspaces (deflating subspaces). A generalized eigenvalue for a pair of matrices (A,B) is a scalar w or a ratio alpha/beta = w, such that A - w*B is singular. It is usually represented as the pair (alpha,beta), as there is a reasonable interpretation for beta=0 or for both being zero. A pair of matrices (S,T) is in generalized real Schur form if T is upper triangular with non-negative diagonal and S is block upper triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond to real generalized eigenvalues, while 2-by-2 blocks of S will be "standardized" by making the corresponding elements of T have the form: [ a 0 ] [ 0 b ] and the pair of corresponding 2-by-2 blocks in S and T will have a complex conjugate pair of generalized eigenvalues. Arguments ========= JOBVSL (input) CHARACTER*1 = 'N': do not compute the left Schur vectors; = 'V': compute the left Schur vectors. JOBVSR (input) CHARACTER*1 = 'N': do not compute the right Schur vectors; = 'V': compute the right Schur vectors. SORT (input) CHARACTER*1 Specifies whether or not to order the eigenvalues on the diagonal of the generalized Schur form. = 'N': Eigenvalues are not ordered; = 'S': Eigenvalues are ordered (see SELCTG). SELCTG (input) LOGICAL FUNCTION of three REAL arguments SELCTG must be declared EXTERNAL in the calling subroutine. If SORT = 'N', SELCTG is not referenced. If SORT = 'S', SELCTG is used to select eigenvalues to sort to the top left of the Schur form. An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either one of a complex conjugate pair of eigenvalues is selected, then both complex eigenvalues are selected. Note that a selected complex eigenvalue may no longer satisfy SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) = .TRUE. after ordering, since ordering may change the value of complex eigenvalues (especially if the eigenvalue is ill-conditioned), in this case INFO is set to N+3. SENSE (input) CHARACTER Determines which reciprocal condition numbers are computed. = 'N' : None are computed; = 'E' : Computed for average of selected eigenvalues only; = 'V' : Computed for selected deflating subspaces only; = 'B' : Computed for both. If SENSE = 'E', 'V', or 'B', SORT must equal 'S'. N (input) INTEGER The order of the matrices A, B, VSL, and VSR. N >= 0. A (input/output) REAL array, dimension (LDA, N) On entry, the first of the pair of matrices. On exit, A has been overwritten by its generalized Schur form S. LDA (input) INTEGER The leading dimension of A. LDA >= max(1,N). B (input/output) REAL array, dimension (LDB, N) On entry, the second of the pair of matrices. On exit, B has been overwritten by its generalized Schur form T. LDB (input) INTEGER The leading dimension of B. LDB >= max(1,N). SDIM (output) INTEGER If SORT = 'N', SDIM = 0. If SORT = 'S', SDIM = number of eigenvalues (after sorting) for which SELCTG is true. (Complex conjugate pairs for which SELCTG is true for either eigenvalue count as 2.) ALPHAR (output) REAL array, dimension (N) ALPHAI (output) REAL array, dimension (N) BETA (output) REAL array, dimension (N) On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i and BETA(j),j=1,...,N are the diagonals of the complex Schur form (S,T) that would result if the 2-by-2 diagonal blocks of the real Schur form of (A,B) were further reduced to triangular form using 2-by-2 complex unitary transformations. If ALPHAI(j) is zero, then the j-th eigenvalue is real; if positive, then the j-th and (j+1)-st eigenvalues are a complex conjugate pair, with ALPHAI(j+1) negative. Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) may easily over- or underflow, and BETA(j) may even be zero. Thus, the user should avoid naively computing the ratio. However, ALPHAR and ALPHAI will be always less than and usually comparable with norm(A) in magnitude, and BETA always less than and usually comparable with norm(B). VSL (output) REAL array, dimension (LDVSL,N) If JOBVSL = 'V', VSL will contain the left Schur vectors. Not referenced if JOBVSL = 'N'. LDVSL (input) INTEGER The leading dimension of the matrix VSL. LDVSL >=1, and if JOBVSL = 'V', LDVSL >= N. VSR (output) REAL array, dimension (LDVSR,N) If JOBVSR = 'V', VSR will contain the right Schur vectors. Not referenced if JOBVSR = 'N'. LDVSR (input) INTEGER The leading dimension of the matrix VSR. LDVSR >= 1, and if JOBVSR = 'V', LDVSR >= N. RCONDE (output) REAL array, dimension ( 2 ) If SENSE = 'E' or 'B', RCONDE(1) and RCONDE(2) contain the reciprocal condition numbers for the average of the selected eigenvalues. Not referenced if SENSE = 'N' or 'V'. RCONDV (output) REAL array, dimension ( 2 ) If SENSE = 'V' or 'B', RCONDV(1) and RCONDV(2) contain the reciprocal condition numbers for the selected deflating subspaces. Not referenced if SENSE = 'N' or 'E'. WORK (workspace/output) REAL array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= 8*(N+1)+16. If SENSE = 'E', 'V', or 'B', LWORK >= MAX( 8*(N+1)+16, 2*SDIM*(N-SDIM) ). IWORK (workspace) INTEGER array, dimension (LIWORK) Not referenced if SENSE = 'N'. LIWORK (input) INTEGER The dimension of the array WORK. LIWORK >= N+6. BWORK (workspace) LOGICAL array, dimension (N) Not referenced if SORT = 'N'. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. = 1,...,N: The QZ iteration failed. (A,B) are not in Schur form, but ALPHAR(j), ALPHAI(j), and BETA(j) should be correct for j=INFO+1,...,N. > N: =N+1: other than QZ iteration failed in SHGEQZ =N+2: after reordering, roundoff changed values of some complex eigenvalues so that leading eigenvalues in the Generalized Schur form no longer satisfy SELCTG=.TRUE. This could also be caused due to scaling. =N+3: reordering failed in STGSEN. Further details =============== An approximate (asymptotic) bound on the average absolute error of the selected eigenvalues is EPS * norm((A, B)) / RCONDE( 1 ). An approximate (asymptotic) bound on the maximum angular error in the computed deflating subspaces is EPS * norm((A, B)) / RCONDV( 2 ). See LAPACK User's Guide, section 4.11 for more information. ===================================================================== Decode the input arguments Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c__0 = 0; static integer c_n1 = -1; static real c_b37 = 0.f; static real c_b38 = 1.f; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, vsr_dim1, vsr_offset, i__1, i__2; real r__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer ijob; static real anrm, bnrm; static integer ierr, itau, iwrk, i__; extern logical lsame_(char *, char *); static integer ileft, icols; static logical cursl, ilvsl, ilvsr; static integer irows; static logical lst2sl; extern /* Subroutine */ int slabad_(real *, real *); static integer ip; static real pl; extern /* Subroutine */ int sggbak_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, integer * ), sggbal_(char *, integer *, real *, integer *, real *, integer *, integer *, integer *, real *, real *, real *, integer *); static real pr; static logical ilascl, ilbscl; extern doublereal slamch_(char *), slange_(char *, integer *, integer *, real *, integer *, real *); static real safmin; extern /* Subroutine */ int sgghrd_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, integer *, real *, integer * , real *, integer *, integer *); static real safmax; extern /* Subroutine */ int xerbla_(char *, integer *); static real bignum; extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, real *, integer *, integer *, real *, integer *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); static integer ijobvl, iright; extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer *, real *, real *, integer *, integer *); static integer ijobvr; extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *); static logical wantsb, wantse, lastsl; static integer liwmin; static real anrmto, bnrmto; static integer minwrk, maxwrk; static logical wantsn; static real smlnum; extern /* Subroutine */ int shgeqz_(char *, char *, char *, integer *, integer *, integer *, real *, integer *, real *, integer *, real * , real *, real *, real *, integer *, real *, integer *, real *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *, real *, integer *), sorgqr_(integer *, integer *, integer *, real *, integer *, real * , real *, integer *, integer *), stgsen_(integer *, logical *, logical *, logical *, integer *, real *, integer *, real *, integer *, real *, real *, real *, real *, integer *, real *, integer *, integer *, real *, real *, real *, real *, integer *, integer *, integer *, integer *); static logical wantst, wantsv; extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, integer *, integer *); static real dif[2]; static integer ihi, ilo; static real eps; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define vsl_ref(a_1,a_2) vsl[(a_2)*vsl_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --alphar; --alphai; --beta; vsl_dim1 = *ldvsl; vsl_offset = 1 + vsl_dim1 * 1; vsl -= vsl_offset; vsr_dim1 = *ldvsr; vsr_offset = 1 + vsr_dim1 * 1; vsr -= vsr_offset; --rconde; --rcondv; --work; --iwork; --bwork; /* Function Body */ if (lsame_(jobvsl, "N")) { ijobvl = 1; ilvsl = FALSE_; } else if (lsame_(jobvsl, "V")) { ijobvl = 2; ilvsl = TRUE_; } else { ijobvl = -1; ilvsl = FALSE_; } if (lsame_(jobvsr, "N")) { ijobvr = 1; ilvsr = FALSE_; } else if (lsame_(jobvsr, "V")) { ijobvr = 2; ilvsr = TRUE_; } else { ijobvr = -1; ilvsr = FALSE_; } wantst = lsame_(sort, "S"); wantsn = lsame_(sense, "N"); wantse = lsame_(sense, "E"); wantsv = lsame_(sense, "V"); wantsb = lsame_(sense, "B"); if (wantsn) { ijob = 0; iwork[1] = 1; } else if (wantse) { ijob = 1; } else if (wantsv) { ijob = 2; } else if (wantsb) { ijob = 4; } /* Test the input arguments */ *info = 0; if (ijobvl <= 0) { *info = -1; } else if (ijobvr <= 0) { *info = -2; } else if (! wantst && ! lsame_(sort, "N")) { *info = -3; } else if (! (wantsn || wantse || wantsv || wantsb) || ! wantst && ! wantsn) { *info = -5; } else if (*n < 0) { *info = -6; } else if (*lda < max(1,*n)) { *info = -8; } else if (*ldb < max(1,*n)) { *info = -10; } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) { *info = -16; } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) { *info = -18; } /* Compute workspace (Note: Comments in the code beginning "Workspace:" describe the minimal amount of workspace needed at that point in the code, as well as the preferred amount for good performance. NB refers to the optimal block size for the immediately following subroutine, as returned by ILAENV.) */ minwrk = 1; if (*info == 0 && *lwork >= 1) { minwrk = (*n + 1 << 3) + 16; maxwrk = (*n + 1) * 7 + *n * ilaenv_(&c__1, "SGEQRF", " ", n, &c__1, n, &c__0, (ftnlen)6, (ftnlen)1) + 16; if (ilvsl) { /* Computing MAX */ i__1 = maxwrk, i__2 = (*n + 1 << 3) + *n * ilaenv_(&c__1, "SORGQR" , " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1) + 16; maxwrk = max(i__1,i__2); } work[1] = (real) maxwrk; } if (! wantsn) { liwmin = 1; } else { liwmin = *n + 6; } iwork[1] = liwmin; if (*info == 0 && *lwork < minwrk) { *info = -22; } else if (*info == 0 && ijob >= 1) { if (*liwork < liwmin) { *info = -24; } } if (*info != 0) { i__1 = -(*info); xerbla_("SGGESX", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { *sdim = 0; return 0; } /* Get machine constants */ eps = slamch_("P"); safmin = slamch_("S"); safmax = 1.f / safmin; slabad_(&safmin, &safmax); smlnum = sqrt(safmin) / eps; bignum = 1.f / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = slange_("M", n, n, &a[a_offset], lda, &work[1]); ilascl = FALSE_; if (anrm > 0.f && anrm < smlnum) { anrmto = smlnum; ilascl = TRUE_; } else if (anrm > bignum) { anrmto = bignum; ilascl = TRUE_; } if (ilascl) { slascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & ierr); } /* Scale B if max element outside range [SMLNUM,BIGNUM] */ bnrm = slange_("M", n, n, &b[b_offset], ldb, &work[1]); ilbscl = FALSE_; if (bnrm > 0.f && bnrm < smlnum) { bnrmto = smlnum; ilbscl = TRUE_; } else if (bnrm > bignum) { bnrmto = bignum; ilbscl = TRUE_; } if (ilbscl) { slascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & ierr); } /* Permute the matrix to make it more nearly triangular (Workspace: need 6*N + 2*N for permutation parameters) */ ileft = 1; iright = *n + 1; iwrk = iright + *n; sggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[ ileft], &work[iright], &work[iwrk], &ierr); /* Reduce B to triangular form (QR decomposition of B) (Workspace: need N, prefer N*NB) */ irows = ihi + 1 - ilo; icols = *n + 1 - ilo; itau = iwrk; iwrk = itau + irows; i__1 = *lwork + 1 - iwrk; sgeqrf_(&irows, &icols, &b_ref(ilo, ilo), ldb, &work[itau], &work[iwrk], & i__1, &ierr); /* Apply the orthogonal transformation to matrix A (Workspace: need N, prefer N*NB) */ i__1 = *lwork + 1 - iwrk; sormqr_("L", "T", &irows, &icols, &irows, &b_ref(ilo, ilo), ldb, &work[ itau], &a_ref(ilo, ilo), lda, &work[iwrk], &i__1, &ierr); /* Initialize VSL (Workspace: need N, prefer N*NB) */ if (ilvsl) { slaset_("Full", n, n, &c_b37, &c_b38, &vsl[vsl_offset], ldvsl); i__1 = irows - 1; i__2 = irows - 1; slacpy_("L", &i__1, &i__2, &b_ref(ilo + 1, ilo), ldb, &vsl_ref(ilo + 1, ilo), ldvsl); i__1 = *lwork + 1 - iwrk; sorgqr_(&irows, &irows, &irows, &vsl_ref(ilo, ilo), ldvsl, &work[itau] , &work[iwrk], &i__1, &ierr); } /* Initialize VSR */ if (ilvsr) { slaset_("Full", n, n, &c_b37, &c_b38, &vsr[vsr_offset], ldvsr); } /* Reduce to generalized Hessenberg form (Workspace: none needed) */ sgghrd_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &ierr); *sdim = 0; /* Perform QZ algorithm, computing Schur vectors if desired (Workspace: need N) */ iwrk = itau; i__1 = *lwork + 1 - iwrk; shgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[ b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[vsl_offset] , ldvsl, &vsr[vsr_offset], ldvsr, &work[iwrk], &i__1, &ierr); if (ierr != 0) { if (ierr > 0 && ierr <= *n) { *info = ierr; } else if (ierr > *n && ierr <= *n << 1) { *info = ierr - *n; } else { *info = *n + 1; } goto L50; } /* Sort eigenvalues ALPHA/BETA and compute the reciprocal of condition number(s) (Workspace: If IJOB >= 1, need MAX( 8*(N+1), 2*SDIM*(N-SDIM) ) otherwise, need 8*(N+1) ) */ if (wantst) { /* Undo scaling on eigenvalues before SELCTGing */ if (ilascl) { slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, &ierr); slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, &ierr); } if (ilbscl) { slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, &ierr); } /* Select eigenvalues */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { bwork[i__] = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]); /* L10: */ } /* Reorder eigenvalues, transform Generalized Schur vectors, and compute reciprocal condition numbers */ i__1 = *lwork - iwrk + 1; stgsen_(&ijob, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, &b[ b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[ vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, sdim, &pl, &pr, dif, &work[iwrk], &i__1, &iwork[1], liwork, &ierr); if (ijob >= 1) { /* Computing MAX */ i__1 = maxwrk, i__2 = (*sdim << 1) * (*n - *sdim); maxwrk = max(i__1,i__2); } if (ierr == -22) { /* not enough real workspace */ *info = -22; } else { rconde[1] = pl; rconde[2] = pr; rcondv[1] = dif[0]; rcondv[2] = dif[1]; if (ierr == 1) { *info = *n + 3; } } } /* Apply permutation to VSL and VSR (Workspace: none needed) */ if (ilvsl) { sggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsl[ vsl_offset], ldvsl, &ierr); } if (ilvsr) { sggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsr[ vsr_offset], ldvsr, &ierr); } /* Check if unscaling would cause over/underflow, if so, rescale (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I) */ if (ilascl) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (alphai[i__] != 0.f) { if (alphar[i__] / safmax > anrmto / anrm || safmin / alphar[ i__] > anrm / anrmto) { work[1] = (r__1 = a_ref(i__, i__) / alphar[i__], dabs( r__1)); beta[i__] *= work[1]; alphar[i__] *= work[1]; alphai[i__] *= work[1]; } else if (alphai[i__] / safmax > anrmto / anrm || safmin / alphai[i__] > anrm / anrmto) { work[1] = (r__1 = a_ref(i__, i__ + 1) / alphai[i__], dabs( r__1)); beta[i__] *= work[1]; alphar[i__] *= work[1]; alphai[i__] *= work[1]; } } /* L20: */ } } if (ilbscl) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (alphai[i__] != 0.f) { if (beta[i__] / safmax > bnrmto / bnrm || safmin / beta[i__] > bnrm / bnrmto) { work[1] = (r__1 = b_ref(i__, i__) / beta[i__], dabs(r__1)) ; beta[i__] *= work[1]; alphar[i__] *= work[1]; alphai[i__] *= work[1]; } } /* L25: */ } } /* Undo scaling */ if (ilascl) { slascl_("H", &c__0, &c__0, &anrmto, &anrm, n, n, &a[a_offset], lda, & ierr); slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphar[1], n, & ierr); slascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alphai[1], n, & ierr); } if (ilbscl) { slascl_("U", &c__0, &c__0, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, & ierr); slascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & ierr); } /* L30: */ if (wantst) { /* Check if reordering is correct */ lastsl = TRUE_; lst2sl = TRUE_; *sdim = 0; ip = 0; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { cursl = (*selctg)(&alphar[i__], &alphai[i__], &beta[i__]); if (alphai[i__] == 0.f) { if (cursl) { ++(*sdim); } ip = 0; if (cursl && ! lastsl) { *info = *n + 2; } } else { if (ip == 1) { /* Last eigenvalue of conjugate pair */ cursl = cursl || lastsl; lastsl = cursl; if (cursl) { *sdim += 2; } ip = -1; if (cursl && ! lst2sl) { *info = *n + 2; } } else { /* First eigenvalue of conjugate pair */ ip = 1; } } lst2sl = lastsl; lastsl = cursl; /* L40: */ } } L50: work[1] = (real) maxwrk; iwork[1] = liwmin; return 0; /* End of SGGESX */ } /* sggesx_ */
/* Subroutine */ int dbdt02_(integer *m, integer *n, doublereal *b, integer * ldb, doublereal *c__, integer *ldc, doublereal *u, integer *ldu, doublereal *work, doublereal *resid) { /* System generated locals */ integer b_dim1, b_offset, c_dim1, c_offset, u_dim1, u_offset, i__1; doublereal d__1, d__2; /* Local variables */ static integer j; extern /* Subroutine */ int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern doublereal dasum_(integer *, doublereal *, integer *); static doublereal bnorm; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); extern doublereal dlamch_(char *), dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); static doublereal realmn, eps; #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= DBDT02 tests the change of basis C = U' * B by computing the residual RESID = norm( B - U * C ) / ( max(m,n) * norm(B) * EPS ), where B and C are M by N matrices, U is an M by M orthogonal matrix, and EPS is the machine precision. Arguments ========= M (input) INTEGER The number of rows of the matrices B and C and the order of the matrix Q. N (input) INTEGER The number of columns of the matrices B and C. B (input) DOUBLE PRECISION array, dimension (LDB,N) The m by n matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,M). C (input) DOUBLE PRECISION array, dimension (LDC,N) The m by n matrix C, assumed to contain U' * B. LDC (input) INTEGER The leading dimension of the array C. LDC >= max(1,M). U (input) DOUBLE PRECISION array, dimension (LDU,M) The m by m orthogonal matrix U. LDU (input) INTEGER The leading dimension of the array U. LDU >= max(1,M). WORK (workspace) DOUBLE PRECISION array, dimension (M) RESID (output) DOUBLE PRECISION RESID = norm( B - U * C ) / ( max(m,n) * norm(B) * EPS ), ====================================================================== Quick return if possible Parameter adjustments */ b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1 * 1; c__ -= c_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; --work; /* Function Body */ *resid = 0.; if (*m <= 0 || *n <= 0) { return 0; } realmn = (doublereal) max(*m,*n); eps = dlamch_("Precision"); /* Compute norm( B - U * C ) */ i__1 = *n; for (j = 1; j <= i__1; ++j) { dcopy_(m, &b_ref(1, j), &c__1, &work[1], &c__1); dgemv_("No transpose", m, m, &c_b7, &u[u_offset], ldu, &c___ref(1, j), &c__1, &c_b9, &work[1], &c__1); /* Computing MAX */ d__1 = *resid, d__2 = dasum_(m, &work[1], &c__1); *resid = max(d__1,d__2); /* L10: */ } /* Compute norm of B. */ bnorm = dlange_("1", m, n, &b[b_offset], ldb, &work[1]); if (bnorm <= 0.) { if (*resid != 0.) { *resid = 1. / eps; } } else { if (bnorm >= *resid) { *resid = *resid / bnorm / (realmn * eps); } else { if (bnorm < 1.) { /* Computing MIN */ d__1 = *resid, d__2 = realmn * bnorm; *resid = min(d__1,d__2) / bnorm / (realmn * eps); } else { /* Computing MIN */ d__1 = *resid / bnorm; *resid = min(d__1,realmn) / (realmn * eps); } } } return 0; /* End of DBDT02 */ } /* dbdt02_ */
/* Subroutine */ int dsprfs_(char *uplo, integer *n, integer *nrhs, doublereal *ap, doublereal *afp, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DSPRFS improves the computed solution to a system of linear equations when the coefficient matrix is symmetric indefinite and packed, and provides error bounds and backward error estimates for the solution. Arguments ========= UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0. AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) The upper or lower triangle of the symmetric matrix A, packed columnwise in a linear array. The j-th column of A is stored in the array AP as follows: if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. AFP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) The factored form of the matrix A. AFP contains the block diagonal matrix D and the multipliers used to obtain the factor U or L from the factorization A = U*D*U**T or A = L*D*L**T as computed by DSPTRF, stored as a packed triangular matrix. IPIV (input) INTEGER array, dimension (N) Details of the interchanges and the block structure of D as determined by DSPTRF. B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) The right hand side matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) On entry, the solution matrix X, as computed by DSPTRS. On exit, the improved solution matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). FERR (output) DOUBLE PRECISION array, dimension (NRHS) The estimated forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), FERR(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). The estimate is as reliable as the estimate for RCOND, and is almost always a slight overestimate of the true error. BERR (output) DOUBLE PRECISION array, dimension (NRHS) The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). WORK (workspace) DOUBLE PRECISION array, dimension (3*N) IWORK (workspace) INTEGER array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Internal Parameters =================== ITMAX is the maximum number of steps of iterative refinement. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static doublereal c_b12 = -1.; static doublereal c_b14 = 1.; /* System generated locals */ integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3; doublereal d__1, d__2, d__3; /* Local variables */ static integer kase; static doublereal safe1, safe2; static integer i__, j, k; static doublereal s; extern logical lsame_(char *, char *); extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static integer count; extern /* Subroutine */ int dspmv_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *); static logical upper; static integer ik, kk; extern doublereal dlamch_(char *); extern /* Subroutine */ int dlacon_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static doublereal xk; static integer nz; static doublereal safmin; extern /* Subroutine */ int xerbla_(char *, integer *); static doublereal lstres; extern /* Subroutine */ int dsptrs_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); static doublereal eps; #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1] --ap; --afp; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*ldb < max(1,*n)) { *info = -8; } else if (*ldx < max(1,*n)) { *info = -10; } if (*info != 0) { i__1 = -(*info); xerbla_("DSPRFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] = 0.; berr[j] = 0.; /* L10: */ } return 0; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = *n + 1; eps = dlamch_("Epsilon"); safmin = dlamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { count = 1; lstres = 3.; L20: /* Loop until stopping criterion is satisfied. Compute residual R = B - A * X */ dcopy_(n, &b_ref(1, j), &c__1, &work[*n + 1], &c__1); dspmv_(uplo, n, &c_b12, &ap[1], &x_ref(1, j), &c__1, &c_b14, &work[*n + 1], &c__1); /* Compute componentwise relative backward error from formula max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) where abs(Z) is the componentwise absolute value of the matrix or vector Z. If the i-th component of the denominator is less than SAFE2, then SAFE1 is added to the i-th components of the numerator and denominator before dividing. */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] = (d__1 = b_ref(i__, j), abs(d__1)); /* L30: */ } /* Compute abs(A)*abs(X) + abs(B). */ kk = 1; if (upper) { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; xk = (d__1 = x_ref(k, j), abs(d__1)); ik = kk; i__3 = k - 1; for (i__ = 1; i__ <= i__3; ++i__) { work[i__] += (d__1 = ap[ik], abs(d__1)) * xk; s += (d__1 = ap[ik], abs(d__1)) * (d__2 = x_ref(i__, j), abs(d__2)); ++ik; /* L40: */ } work[k] = work[k] + (d__1 = ap[kk + k - 1], abs(d__1)) * xk + s; kk += k; /* L50: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; xk = (d__1 = x_ref(k, j), abs(d__1)); work[k] += (d__1 = ap[kk], abs(d__1)) * xk; ik = kk + 1; i__3 = *n; for (i__ = k + 1; i__ <= i__3; ++i__) { work[i__] += (d__1 = ap[ik], abs(d__1)) * xk; s += (d__1 = ap[ik], abs(d__1)) * (d__2 = x_ref(i__, j), abs(d__2)); ++ik; /* L60: */ } work[k] += s; kk += *n - k + 1; /* L70: */ } } s = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { /* Computing MAX */ d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ i__]; s = max(d__2,d__3); } else { /* Computing MAX */ d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) / (work[i__] + safe1); s = max(d__2,d__3); } /* L80: */ } berr[j] = s; /* Test stopping criterion. Continue iterating if 1) The residual BERR(J) is larger than machine epsilon, and 2) BERR(J) decreased by at least a factor of 2 during the last iteration, and 3) At most ITMAX iterations tried. */ if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { /* Update solution and try again. */ dsptrs_(uplo, n, &c__1, &afp[1], &ipiv[1], &work[*n + 1], n, info); daxpy_(n, &c_b14, &work[*n + 1], &c__1, &x_ref(1, j), &c__1); lstres = berr[j]; ++count; goto L20; } /* Bound error from formula norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(A))* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) where norm(Z) is the magnitude of the largest component of Z inv(A) is the inverse of A abs(Z) is the componentwise absolute value of the matrix or vector Z NZ is the maximum number of nonzeros in any row of A, plus 1 EPS is machine epsilon The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) is incremented by SAFE1 if the i-th component of abs(A)*abs(X) + abs(B) is less than SAFE2. Use DLACON to estimate the infinity-norm of the matrix inv(A) * diag(W), where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__]; } else { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__] + safe1; } /* L90: */ } kase = 0; L100: dlacon_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & kase); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(A'). */ dsptrs_(uplo, n, &c__1, &afp[1], &ipiv[1], &work[*n + 1], n, info); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L110: */ } } else if (kase == 2) { /* Multiply by inv(A)*diag(W). */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = work[i__] * work[*n + i__]; /* L120: */ } dsptrs_(uplo, n, &c__1, &afp[1], &ipiv[1], &work[*n + 1], n, info); } goto L100; } /* Normalize error. */ lstres = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = lstres, d__3 = (d__1 = x_ref(i__, j), abs(d__1)); lstres = max(d__2,d__3); /* L130: */ } if (lstres != 0.) { ferr[j] /= lstres; } /* L140: */ } return 0; /* End of DSPRFS */ } /* dsprfs_ */
/* Subroutine */ int dpptrs_(char *uplo, integer *n, integer *nrhs, doublereal *ap, doublereal *b, integer *ldb, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= DPPTRS solves a system of linear equations A*X = B with a symmetric positive definite matrix A in packed storage using the Cholesky factorization A = U**T*U or A = L*L**T computed by DPPTRF. Arguments ========= UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. AP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) The triangular factor U or L from the Cholesky factorization A = U**T*U or A = L*L**T, packed columnwise in a linear array. The j-th column of U or L is stored in the array AP as follows: if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) On entry, the right hand side matrix B. On exit, the solution matrix X. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer b_dim1, b_offset, i__1; /* Local variables */ static integer i__; extern logical lsame_(char *, char *); static logical upper; extern /* Subroutine */ int dtpsv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] --ap; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*ldb < max(1,*n)) { *info = -6; } if (*info != 0) { i__1 = -(*info); xerbla_("DPPTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { return 0; } if (upper) { /* Solve A*X = B where A = U'*U. */ i__1 = *nrhs; for (i__ = 1; i__ <= i__1; ++i__) { /* Solve U'*X = B, overwriting B with X. */ dtpsv_("Upper", "Transpose", "Non-unit", n, &ap[1], &b_ref(1, i__) , &c__1); /* Solve U*X = B, overwriting B with X. */ dtpsv_("Upper", "No transpose", "Non-unit", n, &ap[1], &b_ref(1, i__), &c__1); /* L10: */ } } else { /* Solve A*X = B where A = L*L'. */ i__1 = *nrhs; for (i__ = 1; i__ <= i__1; ++i__) { /* Solve L*Y = B, overwriting B with X. */ dtpsv_("Lower", "No transpose", "Non-unit", n, &ap[1], &b_ref(1, i__), &c__1); /* Solve L'*X = Y, overwriting B with X. */ dtpsv_("Lower", "Transpose", "Non-unit", n, &ap[1], &b_ref(1, i__) , &c__1); /* L20: */ } } return 0; /* End of DPPTRS */ } /* dpptrs_ */
/* Subroutine */ int dpot02_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *x, integer *ldx, doublereal * b, integer *ldb, doublereal *rwork, doublereal *resid) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset, i__1; doublereal d__1, d__2; /* Local variables */ static integer j; extern doublereal dasum_(integer *, doublereal *, integer *); static doublereal anorm, bnorm; extern /* Subroutine */ int dsymm_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); static doublereal xnorm; extern doublereal dlamch_(char *), dlansy_(char *, char *, integer *, doublereal *, integer *, doublereal *); static doublereal eps; #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= DPOT02 computes the residual for the solution of a symmetric system of linear equations A*x = b: RESID = norm(B - A*X) / ( norm(A) * norm(X) * EPS ), where EPS is the machine epsilon. Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the upper or lower triangular part of the symmetric matrix A is stored: = 'U': Upper triangular = 'L': Lower triangular N (input) INTEGER The number of rows and columns of the matrix A. N >= 0. NRHS (input) INTEGER The number of columns of B, the matrix of right hand sides. NRHS >= 0. A (input) DOUBLE PRECISION array, dimension (LDA,N) The original symmetric matrix A. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N) X (input) DOUBLE PRECISION array, dimension (LDX,NRHS) The computed solution vectors for the system of linear equations. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) On entry, the right hand side vectors for the system of linear equations. On exit, B is overwritten with the difference B - A*X. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). RWORK (workspace) DOUBLE PRECISION array, dimension (N) RESID (output) DOUBLE PRECISION The maximum over the number of right hand sides of norm(B - A*X) / ( norm(A) * norm(X) * EPS ). ===================================================================== Quick exit if N = 0 or NRHS = 0. Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --rwork; /* Function Body */ if (*n <= 0 || *nrhs <= 0) { *resid = 0.; return 0; } /* Exit with RESID = 1/EPS if ANORM = 0. */ eps = dlamch_("Epsilon"); anorm = dlansy_("1", uplo, n, &a[a_offset], lda, &rwork[1]); if (anorm <= 0.) { *resid = 1. / eps; return 0; } /* Compute B - A*X */ dsymm_("Left", uplo, n, nrhs, &c_b5, &a[a_offset], lda, &x[x_offset], ldx, &c_b6, &b[b_offset], ldb); /* Compute the maximum over the number of right hand sides of norm( B - A*X ) / ( norm(A) * norm(X) * EPS ) . */ *resid = 0.; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { bnorm = dasum_(n, &b_ref(1, j), &c__1); xnorm = dasum_(n, &x_ref(1, j), &c__1); if (xnorm <= 0.) { *resid = 1. / eps; } else { /* Computing MAX */ d__1 = *resid, d__2 = bnorm / anorm / xnorm / eps; *resid = max(d__1,d__2); } /* L10: */ } return 0; /* End of DPOT02 */ } /* dpot02_ */
/* Subroutine */ int cgerfs_(char *trans, integer *n, integer *nrhs, complex * a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex * b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CGERFS improves the computed solution to a system of linear equations and provides error bounds and backward error estimates for the solution. Arguments ========= TRANS (input) CHARACTER*1 Specifies the form of the system of equations: = 'N': A * X = B (No transpose) = 'T': A**T * X = B (Transpose) = 'C': A**H * X = B (Conjugate transpose) N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0. A (input) COMPLEX array, dimension (LDA,N) The original N-by-N matrix A. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). AF (input) COMPLEX array, dimension (LDAF,N) The factors L and U from the factorization A = P*L*U as computed by CGETRF. LDAF (input) INTEGER The leading dimension of the array AF. LDAF >= max(1,N). IPIV (input) INTEGER array, dimension (N) The pivot indices from CGETRF; for 1<=i<=N, row i of the matrix was interchanged with row IPIV(i). B (input) COMPLEX array, dimension (LDB,NRHS) The right hand side matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (input/output) COMPLEX array, dimension (LDX,NRHS) On entry, the solution matrix X, as computed by CGETRS. On exit, the improved solution matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). FERR (output) REAL array, dimension (NRHS) The estimated forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), FERR(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). The estimate is as reliable as the estimate for RCOND, and is almost always a slight overestimate of the true error. BERR (output) REAL array, dimension (NRHS) The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). WORK (workspace) COMPLEX array, dimension (2*N) RWORK (workspace) REAL array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Internal Parameters =================== ITMAX is the maximum number of steps of iterative refinement. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static complex c_b1 = {1.f,0.f}; static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5; real r__1, r__2, r__3, r__4; complex q__1; /* Builtin functions */ double r_imag(complex *); /* Local variables */ static integer kase; static real safe1, safe2; static integer i__, j, k; static real s; extern logical lsame_(char *, char *); extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *), ccopy_(integer *, complex *, integer *, complex *, integer *), caxpy_(integer *, complex *, complex *, integer *, complex *, integer *); static integer count; extern /* Subroutine */ int clacon_(integer *, complex *, complex *, real *, integer *); static real xk; extern doublereal slamch_(char *); static integer nz; static real safmin; extern /* Subroutine */ int xerbla_(char *, integer *), cgetrs_( char *, integer *, integer *, complex *, integer *, integer *, complex *, integer *, integer *); static logical notran; static char transn[1], transt[1]; static real lstres, eps; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define x_subscr(a_1,a_2) (a_2)*x_dim1 + a_1 #define x_ref(a_1,a_2) x[x_subscr(a_1,a_2)] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; af_dim1 = *ldaf; af_offset = 1 + af_dim1 * 1; af -= af_offset; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; --ferr; --berr; --work; --rwork; /* Function Body */ *info = 0; notran = lsame_(trans, "N"); if (! notran && ! lsame_(trans, "T") && ! lsame_( trans, "C")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldaf < max(1,*n)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -10; } else if (*ldx < max(1,*n)) { *info = -12; } if (*info != 0) { i__1 = -(*info); xerbla_("CGERFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] = 0.f; berr[j] = 0.f; /* L10: */ } return 0; } if (notran) { *(unsigned char *)transn = 'N'; *(unsigned char *)transt = 'C'; } else { *(unsigned char *)transn = 'C'; *(unsigned char *)transt = 'N'; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = *n + 1; eps = slamch_("Epsilon"); safmin = slamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { count = 1; lstres = 3.f; L20: /* Loop until stopping criterion is satisfied. Compute residual R = B - op(A) * X, where op(A) = A, A**T, or A**H, depending on TRANS. */ ccopy_(n, &b_ref(1, j), &c__1, &work[1], &c__1); q__1.r = -1.f, q__1.i = 0.f; cgemv_(trans, n, n, &q__1, &a[a_offset], lda, &x_ref(1, j), &c__1, & c_b1, &work[1], &c__1); /* Compute componentwise relative backward error from formula max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) where abs(Z) is the componentwise absolute value of the matrix or vector Z. If the i-th component of the denominator is less than SAFE2, then SAFE1 is added to the i-th components of the numerator and denominator before dividing. */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = b_subscr(i__, j); rwork[i__] = (r__1 = b[i__3].r, dabs(r__1)) + (r__2 = r_imag(& b_ref(i__, j)), dabs(r__2)); /* L30: */ } /* Compute abs(op(A))*abs(X) + abs(B). */ if (notran) { i__2 = *n; for (k = 1; k <= i__2; ++k) { i__3 = x_subscr(k, j); xk = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x_ref(k, j)), dabs(r__2)); i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = a_subscr(i__, k); rwork[i__] += ((r__1 = a[i__4].r, dabs(r__1)) + (r__2 = r_imag(&a_ref(i__, k)), dabs(r__2))) * xk; /* L40: */ } /* L50: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.f; i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = a_subscr(i__, k); i__5 = x_subscr(i__, j); s += ((r__1 = a[i__4].r, dabs(r__1)) + (r__2 = r_imag(& a_ref(i__, k)), dabs(r__2))) * ((r__3 = x[i__5].r, dabs(r__3)) + (r__4 = r_imag(&x_ref(i__, j)), dabs(r__4))); /* L60: */ } rwork[k] += s; /* L70: */ } } s = 0.f; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (rwork[i__] > safe2) { /* Computing MAX */ i__3 = i__; r__3 = s, r__4 = ((r__1 = work[i__3].r, dabs(r__1)) + (r__2 = r_imag(&work[i__]), dabs(r__2))) / rwork[i__]; s = dmax(r__3,r__4); } else { /* Computing MAX */ i__3 = i__; r__3 = s, r__4 = ((r__1 = work[i__3].r, dabs(r__1)) + (r__2 = r_imag(&work[i__]), dabs(r__2)) + safe1) / (rwork[i__] + safe1); s = dmax(r__3,r__4); } /* L80: */ } berr[j] = s; /* Test stopping criterion. Continue iterating if 1) The residual BERR(J) is larger than machine epsilon, and 2) BERR(J) decreased by at least a factor of 2 during the last iteration, and 3) At most ITMAX iterations tried. */ if (berr[j] > eps && berr[j] * 2.f <= lstres && count <= 5) { /* Update solution and try again. */ cgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[1], n, info); caxpy_(n, &c_b1, &work[1], &c__1, &x_ref(1, j), &c__1); lstres = berr[j]; ++count; goto L20; } /* Bound error from formula norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) where norm(Z) is the magnitude of the largest component of Z inv(op(A)) is the inverse of op(A) abs(Z) is the componentwise absolute value of the matrix or vector Z NZ is the maximum number of nonzeros in any row of A, plus 1 EPS is machine epsilon The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) is incremented by SAFE1 if the i-th component of abs(op(A))*abs(X) + abs(B) is less than SAFE2. Use CLACON to estimate the infinity-norm of the matrix inv(op(A)) * diag(W), where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (rwork[i__] > safe2) { i__3 = i__; rwork[i__] = (r__1 = work[i__3].r, dabs(r__1)) + (r__2 = r_imag(&work[i__]), dabs(r__2)) + nz * eps * rwork[ i__]; } else { i__3 = i__; rwork[i__] = (r__1 = work[i__3].r, dabs(r__1)) + (r__2 = r_imag(&work[i__]), dabs(r__2)) + nz * eps * rwork[ i__] + safe1; } /* L90: */ } kase = 0; L100: clacon_(n, &work[*n + 1], &work[1], &ferr[j], &kase); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(op(A)**H). */ cgetrs_(transt, n, &c__1, &af[af_offset], ldaf, &ipiv[1], & work[1], n, info); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = i__; q__1.r = rwork[i__4] * work[i__5].r, q__1.i = rwork[i__4] * work[i__5].i; work[i__3].r = q__1.r, work[i__3].i = q__1.i; /* L110: */ } } else { /* Multiply by inv(op(A))*diag(W). */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = i__; q__1.r = rwork[i__4] * work[i__5].r, q__1.i = rwork[i__4] * work[i__5].i; work[i__3].r = q__1.r, work[i__3].i = q__1.i; /* L120: */ } cgetrs_(transn, n, &c__1, &af[af_offset], ldaf, &ipiv[1], & work[1], n, info); } goto L100; } /* Normalize error. */ lstres = 0.f; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ i__3 = x_subscr(i__, j); r__3 = lstres, r__4 = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x_ref(i__, j)), dabs(r__2)); lstres = dmax(r__3,r__4); /* L130: */ } if (lstres != 0.f) { ferr[j] /= lstres; } /* L140: */ } return 0; /* End of CGERFS */ } /* cgerfs_ */
/* Subroutine */ int chptrs_(char *uplo, integer *n, integer *nrhs, complex * ap, integer *ipiv, complex *b, integer *ldb, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CHPTRS solves a system of linear equations A*X = B with a complex Hermitian matrix A stored in packed format using the factorization A = U*D*U**H or A = L*D*L**H computed by CHPTRF. Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the details of the factorization are stored as an upper or lower triangular matrix. = 'U': Upper triangular, form is A = U*D*U**H; = 'L': Lower triangular, form is A = L*D*L**H. N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. AP (input) COMPLEX array, dimension (N*(N+1)/2) The block diagonal matrix D and the multipliers used to obtain the factor U or L as computed by CHPTRF, stored as a packed triangular matrix. IPIV (input) INTEGER array, dimension (N) Details of the interchanges and the block structure of D as determined by CHPTRF. B (input/output) COMPLEX array, dimension (LDB,NRHS) On entry, the right hand side matrix B. On exit, the solution matrix X. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Parameter adjustments */ /* Table of constant values */ static complex c_b1 = {1.f,0.f}; static integer c__1 = 1; /* System generated locals */ integer b_dim1, b_offset, i__1, i__2; complex q__1, q__2, q__3; /* Builtin functions */ void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *); /* Local variables */ static complex akm1k; static integer j, k; static real s; extern logical lsame_(char *, char *); static complex denom; extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *), cgeru_(integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, integer *), cswap_(integer *, complex *, integer *, complex *, integer *); static logical upper; static complex ak, bk; static integer kc, kp; extern /* Subroutine */ int clacgv_(integer *, complex *, integer *), csscal_(integer *, real *, complex *, integer *), xerbla_(char *, integer *); static complex akm1, bkm1; #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] --ap; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*ldb < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); xerbla_("CHPTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { return 0; } if (upper) { /* Solve A*X = B, where A = U*D*U'. First solve U*D*X = B, overwriting B with X. K is the main loop index, decreasing from N to 1 in steps of 1 or 2, depending on the size of the diagonal blocks. */ k = *n; kc = *n * (*n + 1) / 2 + 1; L10: /* If K < 1, exit from loop. */ if (k < 1) { goto L30; } kc -= k; if (ipiv[k] > 0) { /* 1 x 1 diagonal block Interchange rows K and IPIV(K). */ kp = ipiv[k]; if (kp != k) { cswap_(nrhs, &b_ref(k, 1), ldb, &b_ref(kp, 1), ldb); } /* Multiply by inv(U(K)), where U(K) is the transformation stored in column K of A. */ i__1 = k - 1; q__1.r = -1.f, q__1.i = 0.f; cgeru_(&i__1, nrhs, &q__1, &ap[kc], &c__1, &b_ref(k, 1), ldb, & b_ref(1, 1), ldb); /* Multiply by the inverse of the diagonal block. */ i__1 = kc + k - 1; s = 1.f / ap[i__1].r; csscal_(nrhs, &s, &b_ref(k, 1), ldb); --k; } else { /* 2 x 2 diagonal block Interchange rows K-1 and -IPIV(K). */ kp = -ipiv[k]; if (kp != k - 1) { cswap_(nrhs, &b_ref(k - 1, 1), ldb, &b_ref(kp, 1), ldb); } /* Multiply by inv(U(K)), where U(K) is the transformation stored in columns K-1 and K of A. */ i__1 = k - 2; q__1.r = -1.f, q__1.i = 0.f; cgeru_(&i__1, nrhs, &q__1, &ap[kc], &c__1, &b_ref(k, 1), ldb, & b_ref(1, 1), ldb); i__1 = k - 2; q__1.r = -1.f, q__1.i = 0.f; cgeru_(&i__1, nrhs, &q__1, &ap[kc - (k - 1)], &c__1, &b_ref(k - 1, 1), ldb, &b_ref(1, 1), ldb); /* Multiply by the inverse of the diagonal block. */ i__1 = kc + k - 2; akm1k.r = ap[i__1].r, akm1k.i = ap[i__1].i; c_div(&q__1, &ap[kc - 1], &akm1k); akm1.r = q__1.r, akm1.i = q__1.i; r_cnjg(&q__2, &akm1k); c_div(&q__1, &ap[kc + k - 1], &q__2); ak.r = q__1.r, ak.i = q__1.i; q__2.r = akm1.r * ak.r - akm1.i * ak.i, q__2.i = akm1.r * ak.i + akm1.i * ak.r; q__1.r = q__2.r - 1.f, q__1.i = q__2.i + 0.f; denom.r = q__1.r, denom.i = q__1.i; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { c_div(&q__1, &b_ref(k - 1, j), &akm1k); bkm1.r = q__1.r, bkm1.i = q__1.i; r_cnjg(&q__2, &akm1k); c_div(&q__1, &b_ref(k, j), &q__2); bk.r = q__1.r, bk.i = q__1.i; i__2 = b_subscr(k - 1, j); q__3.r = ak.r * bkm1.r - ak.i * bkm1.i, q__3.i = ak.r * bkm1.i + ak.i * bkm1.r; q__2.r = q__3.r - bk.r, q__2.i = q__3.i - bk.i; c_div(&q__1, &q__2, &denom); b[i__2].r = q__1.r, b[i__2].i = q__1.i; i__2 = b_subscr(k, j); q__3.r = akm1.r * bk.r - akm1.i * bk.i, q__3.i = akm1.r * bk.i + akm1.i * bk.r; q__2.r = q__3.r - bkm1.r, q__2.i = q__3.i - bkm1.i; c_div(&q__1, &q__2, &denom); b[i__2].r = q__1.r, b[i__2].i = q__1.i; /* L20: */ } kc = kc - k + 1; k += -2; } goto L10; L30: /* Next solve U'*X = B, overwriting B with X. K is the main loop index, increasing from 1 to N in steps of 1 or 2, depending on the size of the diagonal blocks. */ k = 1; kc = 1; L40: /* If K > N, exit from loop. */ if (k > *n) { goto L50; } if (ipiv[k] > 0) { /* 1 x 1 diagonal block Multiply by inv(U'(K)), where U(K) is the transformation stored in column K of A. */ if (k > 1) { clacgv_(nrhs, &b_ref(k, 1), ldb); i__1 = k - 1; q__1.r = -1.f, q__1.i = 0.f; cgemv_("Conjugate transpose", &i__1, nrhs, &q__1, &b[b_offset] , ldb, &ap[kc], &c__1, &c_b1, &b_ref(k, 1), ldb); clacgv_(nrhs, &b_ref(k, 1), ldb); } /* Interchange rows K and IPIV(K). */ kp = ipiv[k]; if (kp != k) { cswap_(nrhs, &b_ref(k, 1), ldb, &b_ref(kp, 1), ldb); } kc += k; ++k; } else { /* 2 x 2 diagonal block Multiply by inv(U'(K+1)), where U(K+1) is the transformation stored in columns K and K+1 of A. */ if (k > 1) { clacgv_(nrhs, &b_ref(k, 1), ldb); i__1 = k - 1; q__1.r = -1.f, q__1.i = 0.f; cgemv_("Conjugate transpose", &i__1, nrhs, &q__1, &b[b_offset] , ldb, &ap[kc], &c__1, &c_b1, &b_ref(k, 1), ldb); clacgv_(nrhs, &b_ref(k, 1), ldb); clacgv_(nrhs, &b_ref(k + 1, 1), ldb); i__1 = k - 1; q__1.r = -1.f, q__1.i = 0.f; cgemv_("Conjugate transpose", &i__1, nrhs, &q__1, &b[b_offset] , ldb, &ap[kc + k], &c__1, &c_b1, &b_ref(k + 1, 1), ldb); clacgv_(nrhs, &b_ref(k + 1, 1), ldb); } /* Interchange rows K and -IPIV(K). */ kp = -ipiv[k]; if (kp != k) { cswap_(nrhs, &b_ref(k, 1), ldb, &b_ref(kp, 1), ldb); } kc = kc + (k << 1) + 1; k += 2; } goto L40; L50: ; } else { /* Solve A*X = B, where A = L*D*L'. First solve L*D*X = B, overwriting B with X. K is the main loop index, increasing from 1 to N in steps of 1 or 2, depending on the size of the diagonal blocks. */ k = 1; kc = 1; L60: /* If K > N, exit from loop. */ if (k > *n) { goto L80; } if (ipiv[k] > 0) { /* 1 x 1 diagonal block Interchange rows K and IPIV(K). */ kp = ipiv[k]; if (kp != k) { cswap_(nrhs, &b_ref(k, 1), ldb, &b_ref(kp, 1), ldb); } /* Multiply by inv(L(K)), where L(K) is the transformation stored in column K of A. */ if (k < *n) { i__1 = *n - k; q__1.r = -1.f, q__1.i = 0.f; cgeru_(&i__1, nrhs, &q__1, &ap[kc + 1], &c__1, &b_ref(k, 1), ldb, &b_ref(k + 1, 1), ldb); } /* Multiply by the inverse of the diagonal block. */ i__1 = kc; s = 1.f / ap[i__1].r; csscal_(nrhs, &s, &b_ref(k, 1), ldb); kc = kc + *n - k + 1; ++k; } else { /* 2 x 2 diagonal block Interchange rows K+1 and -IPIV(K). */ kp = -ipiv[k]; if (kp != k + 1) { cswap_(nrhs, &b_ref(k + 1, 1), ldb, &b_ref(kp, 1), ldb); } /* Multiply by inv(L(K)), where L(K) is the transformation stored in columns K and K+1 of A. */ if (k < *n - 1) { i__1 = *n - k - 1; q__1.r = -1.f, q__1.i = 0.f; cgeru_(&i__1, nrhs, &q__1, &ap[kc + 2], &c__1, &b_ref(k, 1), ldb, &b_ref(k + 2, 1), ldb); i__1 = *n - k - 1; q__1.r = -1.f, q__1.i = 0.f; cgeru_(&i__1, nrhs, &q__1, &ap[kc + *n - k + 2], &c__1, & b_ref(k + 1, 1), ldb, &b_ref(k + 2, 1), ldb); } /* Multiply by the inverse of the diagonal block. */ i__1 = kc + 1; akm1k.r = ap[i__1].r, akm1k.i = ap[i__1].i; r_cnjg(&q__2, &akm1k); c_div(&q__1, &ap[kc], &q__2); akm1.r = q__1.r, akm1.i = q__1.i; c_div(&q__1, &ap[kc + *n - k + 1], &akm1k); ak.r = q__1.r, ak.i = q__1.i; q__2.r = akm1.r * ak.r - akm1.i * ak.i, q__2.i = akm1.r * ak.i + akm1.i * ak.r; q__1.r = q__2.r - 1.f, q__1.i = q__2.i + 0.f; denom.r = q__1.r, denom.i = q__1.i; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { r_cnjg(&q__2, &akm1k); c_div(&q__1, &b_ref(k, j), &q__2); bkm1.r = q__1.r, bkm1.i = q__1.i; c_div(&q__1, &b_ref(k + 1, j), &akm1k); bk.r = q__1.r, bk.i = q__1.i; i__2 = b_subscr(k, j); q__3.r = ak.r * bkm1.r - ak.i * bkm1.i, q__3.i = ak.r * bkm1.i + ak.i * bkm1.r; q__2.r = q__3.r - bk.r, q__2.i = q__3.i - bk.i; c_div(&q__1, &q__2, &denom); b[i__2].r = q__1.r, b[i__2].i = q__1.i; i__2 = b_subscr(k + 1, j); q__3.r = akm1.r * bk.r - akm1.i * bk.i, q__3.i = akm1.r * bk.i + akm1.i * bk.r; q__2.r = q__3.r - bkm1.r, q__2.i = q__3.i - bkm1.i; c_div(&q__1, &q__2, &denom); b[i__2].r = q__1.r, b[i__2].i = q__1.i; /* L70: */ } kc = kc + (*n - k << 1) + 1; k += 2; } goto L60; L80: /* Next solve L'*X = B, overwriting B with X. K is the main loop index, decreasing from N to 1 in steps of 1 or 2, depending on the size of the diagonal blocks. */ k = *n; kc = *n * (*n + 1) / 2 + 1; L90: /* If K < 1, exit from loop. */ if (k < 1) { goto L100; } kc -= *n - k + 1; if (ipiv[k] > 0) { /* 1 x 1 diagonal block Multiply by inv(L'(K)), where L(K) is the transformation stored in column K of A. */ if (k < *n) { clacgv_(nrhs, &b_ref(k, 1), ldb); i__1 = *n - k; q__1.r = -1.f, q__1.i = 0.f; cgemv_("Conjugate transpose", &i__1, nrhs, &q__1, &b_ref(k + 1, 1), ldb, &ap[kc + 1], &c__1, &c_b1, &b_ref(k, 1), ldb); clacgv_(nrhs, &b_ref(k, 1), ldb); } /* Interchange rows K and IPIV(K). */ kp = ipiv[k]; if (kp != k) { cswap_(nrhs, &b_ref(k, 1), ldb, &b_ref(kp, 1), ldb); } --k; } else { /* 2 x 2 diagonal block Multiply by inv(L'(K-1)), where L(K-1) is the transformation stored in columns K-1 and K of A. */ if (k < *n) { clacgv_(nrhs, &b_ref(k, 1), ldb); i__1 = *n - k; q__1.r = -1.f, q__1.i = 0.f; cgemv_("Conjugate transpose", &i__1, nrhs, &q__1, &b_ref(k + 1, 1), ldb, &ap[kc + 1], &c__1, &c_b1, &b_ref(k, 1), ldb); clacgv_(nrhs, &b_ref(k, 1), ldb); clacgv_(nrhs, &b_ref(k - 1, 1), ldb); i__1 = *n - k; q__1.r = -1.f, q__1.i = 0.f; cgemv_("Conjugate transpose", &i__1, nrhs, &q__1, &b_ref(k + 1, 1), ldb, &ap[kc - (*n - k)], &c__1, &c_b1, &b_ref( k - 1, 1), ldb); clacgv_(nrhs, &b_ref(k - 1, 1), ldb); } /* Interchange rows K and -IPIV(K). */ kp = -ipiv[k]; if (kp != k) { cswap_(nrhs, &b_ref(k, 1), ldb, &b_ref(kp, 1), ldb); } kc -= *n - k + 2; k += -2; } goto L90; L100: ; } return 0; /* End of CHPTRS */ } /* chptrs_ */
/* Subroutine */ int dlagtm_(char *trans, integer *n, integer *nrhs, doublereal *alpha, doublereal *dl, doublereal *d__, doublereal *du, doublereal *x, integer *ldx, doublereal *beta, doublereal *b, integer *ldb) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 Purpose ======= DLAGTM performs a matrix-vector product of the form B := alpha * A * X + beta * B where A is a tridiagonal matrix of order N, B and X are N by NRHS matrices, and alpha and beta are real scalars, each of which may be 0., 1., or -1. Arguments ========= TRANS (input) CHARACTER Specifies the operation applied to A. = 'N': No transpose, B := alpha * A * X + beta * B = 'T': Transpose, B := alpha * A'* X + beta * B = 'C': Conjugate transpose = Transpose N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrices X and B. ALPHA (input) DOUBLE PRECISION The scalar alpha. ALPHA must be 0., 1., or -1.; otherwise, it is assumed to be 0. DL (input) DOUBLE PRECISION array, dimension (N-1) The (n-1) sub-diagonal elements of T. D (input) DOUBLE PRECISION array, dimension (N) The diagonal elements of T. DU (input) DOUBLE PRECISION array, dimension (N-1) The (n-1) super-diagonal elements of T. X (input) DOUBLE PRECISION array, dimension (LDX,NRHS) The N by NRHS matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(N,1). BETA (input) DOUBLE PRECISION The scalar beta. BETA must be 0., 1., or -1.; otherwise, it is assumed to be 1. B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) On entry, the N by NRHS matrix B. On exit, B is overwritten by the matrix expression B := alpha * A * X + beta * B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(N,1). ===================================================================== Parameter adjustments */ /* System generated locals */ integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2; /* Local variables */ static integer i__, j; extern logical lsame_(char *, char *); #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1] --dl; --d__; --du; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; /* Function Body */ if (*n == 0) { return 0; } /* Multiply B by BETA if BETA.NE.1. */ if (*beta == 0.) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { b_ref(i__, j) = 0.; /* L10: */ } /* L20: */ } } else if (*beta == -1.) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { b_ref(i__, j) = -b_ref(i__, j); /* L30: */ } /* L40: */ } } if (*alpha == 1.) { if (lsame_(trans, "N")) { /* Compute B := B + A*X */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { if (*n == 1) { b_ref(1, j) = b_ref(1, j) + d__[1] * x_ref(1, j); } else { b_ref(1, j) = b_ref(1, j) + d__[1] * x_ref(1, j) + du[1] * x_ref(2, j); b_ref(*n, j) = b_ref(*n, j) + dl[*n - 1] * x_ref(*n - 1, j) + d__[*n] * x_ref(*n, j); i__2 = *n - 1; for (i__ = 2; i__ <= i__2; ++i__) { b_ref(i__, j) = b_ref(i__, j) + dl[i__ - 1] * x_ref( i__ - 1, j) + d__[i__] * x_ref(i__, j) + du[ i__] * x_ref(i__ + 1, j); /* L50: */ } } /* L60: */ } } else { /* Compute B := B + A'*X */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { if (*n == 1) { b_ref(1, j) = b_ref(1, j) + d__[1] * x_ref(1, j); } else { b_ref(1, j) = b_ref(1, j) + d__[1] * x_ref(1, j) + dl[1] * x_ref(2, j); b_ref(*n, j) = b_ref(*n, j) + du[*n - 1] * x_ref(*n - 1, j) + d__[*n] * x_ref(*n, j); i__2 = *n - 1; for (i__ = 2; i__ <= i__2; ++i__) { b_ref(i__, j) = b_ref(i__, j) + du[i__ - 1] * x_ref( i__ - 1, j) + d__[i__] * x_ref(i__, j) + dl[ i__] * x_ref(i__ + 1, j); /* L70: */ } } /* L80: */ } } } else if (*alpha == -1.) { if (lsame_(trans, "N")) { /* Compute B := B - A*X */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { if (*n == 1) { b_ref(1, j) = b_ref(1, j) - d__[1] * x_ref(1, j); } else { b_ref(1, j) = b_ref(1, j) - d__[1] * x_ref(1, j) - du[1] * x_ref(2, j); b_ref(*n, j) = b_ref(*n, j) - dl[*n - 1] * x_ref(*n - 1, j) - d__[*n] * x_ref(*n, j); i__2 = *n - 1; for (i__ = 2; i__ <= i__2; ++i__) { b_ref(i__, j) = b_ref(i__, j) - dl[i__ - 1] * x_ref( i__ - 1, j) - d__[i__] * x_ref(i__, j) - du[ i__] * x_ref(i__ + 1, j); /* L90: */ } } /* L100: */ } } else { /* Compute B := B - A'*X */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { if (*n == 1) { b_ref(1, j) = b_ref(1, j) - d__[1] * x_ref(1, j); } else { b_ref(1, j) = b_ref(1, j) - d__[1] * x_ref(1, j) - dl[1] * x_ref(2, j); b_ref(*n, j) = b_ref(*n, j) - du[*n - 1] * x_ref(*n - 1, j) - d__[*n] * x_ref(*n, j); i__2 = *n - 1; for (i__ = 2; i__ <= i__2; ++i__) { b_ref(i__, j) = b_ref(i__, j) - du[i__ - 1] * x_ref( i__ - 1, j) - d__[i__] * x_ref(i__, j) - dl[ i__] * x_ref(i__ + 1, j); /* L110: */ } } /* L120: */ } } } return 0; /* End of DLAGTM */ } /* dlagtm_ */
/* Subroutine */ int zgghrd_(char *compq, char *compz, integer *n, integer * ilo, integer *ihi, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *q, integer *ldq, doublecomplex *z__, integer *ldz, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZGGHRD reduces a pair of complex matrices (A,B) to generalized upper Hessenberg form using unitary transformations, where A is a general matrix and B is upper triangular: Q' * A * Z = H and Q' * B * Z = T, where H is upper Hessenberg, T is upper triangular, and Q and Z are unitary, and ' means conjugate transpose. The unitary matrices Q and Z are determined as products of Givens rotations. They may either be formed explicitly, or they may be postmultiplied into input matrices Q1 and Z1, so that Q1 * A * Z1' = (Q1*Q) * H * (Z1*Z)' Q1 * B * Z1' = (Q1*Q) * T * (Z1*Z)' Arguments ========= COMPQ (input) CHARACTER*1 = 'N': do not compute Q; = 'I': Q is initialized to the unit matrix, and the unitary matrix Q is returned; = 'V': Q must contain a unitary matrix Q1 on entry, and the product Q1*Q is returned. COMPZ (input) CHARACTER*1 = 'N': do not compute Q; = 'I': Q is initialized to the unit matrix, and the unitary matrix Q is returned; = 'V': Q must contain a unitary matrix Q1 on entry, and the product Q1*Q is returned. N (input) INTEGER The order of the matrices A and B. N >= 0. ILO (input) INTEGER IHI (input) INTEGER It is assumed that A is already upper triangular in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally set by a previous call to ZGGBAL; otherwise they should be set to 1 and N respectively. 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. A (input/output) COMPLEX*16 array, dimension (LDA, N) On entry, the N-by-N general matrix to be reduced. On exit, the upper triangle and the first subdiagonal of A are overwritten with the upper Hessenberg matrix H, and the rest is set to zero. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input/output) COMPLEX*16 array, dimension (LDB, N) On entry, the N-by-N upper triangular matrix B. On exit, the upper triangular matrix T = Q' B Z. The elements below the diagonal are set to zero. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). Q (input/output) COMPLEX*16 array, dimension (LDQ, N) If COMPQ='N': Q is not referenced. If COMPQ='I': on entry, Q need not be set, and on exit it contains the unitary matrix Q, where Q' is the product of the Givens transformations which are applied to A and B on the left. If COMPQ='V': on entry, Q must contain a unitary matrix Q1, and on exit this is overwritten by Q1*Q. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. Z (input/output) COMPLEX*16 array, dimension (LDZ, N) If COMPZ='N': Z is not referenced. If COMPZ='I': on entry, Z need not be set, and on exit it contains the unitary matrix Z, which is the product of the Givens transformations which are applied to A and B on the right. If COMPZ='V': on entry, Z must contain a unitary matrix Z1, and on exit this is overwritten by Z1*Z. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. Further Details =============== This routine reduces A to Hessenberg and B to triangular form by an unblocked reduction, as described in _Matrix_Computations_, by Golub and van Loan (Johns Hopkins Press). ===================================================================== Decode COMPQ Parameter adjustments */ /* Table of constant values */ static doublecomplex c_b1 = {1.,0.}; static doublecomplex c_b2 = {0.,0.}; static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, z_offset, i__1, i__2, i__3; doublecomplex z__1; /* Builtin functions */ void d_cnjg(doublecomplex *, doublecomplex *); /* Local variables */ static integer jcol, jrow; extern /* Subroutine */ int zrot_(integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublereal *, doublecomplex *); static doublereal c__; static doublecomplex s; extern logical lsame_(char *, char *); static doublecomplex ctemp; extern /* Subroutine */ int xerbla_(char *, integer *); static integer icompq, icompz; extern /* Subroutine */ int zlaset_(char *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, integer *), zlartg_(doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, doublecomplex *); static logical ilq, ilz; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define q_subscr(a_1,a_2) (a_2)*q_dim1 + a_1 #define q_ref(a_1,a_2) q[q_subscr(a_1,a_2)] #define z___subscr(a_1,a_2) (a_2)*z_dim1 + a_1 #define z___ref(a_1,a_2) z__[z___subscr(a_1,a_2)] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; /* Function Body */ if (lsame_(compq, "N")) { ilq = FALSE_; icompq = 1; } else if (lsame_(compq, "V")) { ilq = TRUE_; icompq = 2; } else if (lsame_(compq, "I")) { ilq = TRUE_; icompq = 3; } else { icompq = 0; } /* Decode COMPZ */ if (lsame_(compz, "N")) { ilz = FALSE_; icompz = 1; } else if (lsame_(compz, "V")) { ilz = TRUE_; icompz = 2; } else if (lsame_(compz, "I")) { ilz = TRUE_; icompz = 3; } else { icompz = 0; } /* Test the input parameters. */ *info = 0; if (icompq <= 0) { *info = -1; } else if (icompz <= 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ilo < 1) { *info = -4; } else if (*ihi > *n || *ihi < *ilo - 1) { *info = -5; } else if (*lda < max(1,*n)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -9; } else if (ilq && *ldq < *n || *ldq < 1) { *info = -11; } else if (ilz && *ldz < *n || *ldz < 1) { *info = -13; } if (*info != 0) { i__1 = -(*info); xerbla_("ZGGHRD", &i__1); return 0; } /* Initialize Q and Z if desired. */ if (icompq == 3) { zlaset_("Full", n, n, &c_b2, &c_b1, &q[q_offset], ldq); } if (icompz == 3) { zlaset_("Full", n, n, &c_b2, &c_b1, &z__[z_offset], ldz); } /* Quick return if possible */ if (*n <= 1) { return 0; } /* Zero out lower triangle of B */ i__1 = *n - 1; for (jcol = 1; jcol <= i__1; ++jcol) { i__2 = *n; for (jrow = jcol + 1; jrow <= i__2; ++jrow) { i__3 = b_subscr(jrow, jcol); b[i__3].r = 0., b[i__3].i = 0.; /* L10: */ } /* L20: */ } /* Reduce A and B */ i__1 = *ihi - 2; for (jcol = *ilo; jcol <= i__1; ++jcol) { i__2 = jcol + 2; for (jrow = *ihi; jrow >= i__2; --jrow) { /* Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL) */ i__3 = a_subscr(jrow - 1, jcol); ctemp.r = a[i__3].r, ctemp.i = a[i__3].i; zlartg_(&ctemp, &a_ref(jrow, jcol), &c__, &s, &a_ref(jrow - 1, jcol)); i__3 = a_subscr(jrow, jcol); a[i__3].r = 0., a[i__3].i = 0.; i__3 = *n - jcol; zrot_(&i__3, &a_ref(jrow - 1, jcol + 1), lda, &a_ref(jrow, jcol + 1), lda, &c__, &s); i__3 = *n + 2 - jrow; zrot_(&i__3, &b_ref(jrow - 1, jrow - 1), ldb, &b_ref(jrow, jrow - 1), ldb, &c__, &s); if (ilq) { d_cnjg(&z__1, &s); zrot_(n, &q_ref(1, jrow - 1), &c__1, &q_ref(1, jrow), &c__1, & c__, &z__1); } /* Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1) */ i__3 = b_subscr(jrow, jrow); ctemp.r = b[i__3].r, ctemp.i = b[i__3].i; zlartg_(&ctemp, &b_ref(jrow, jrow - 1), &c__, &s, &b_ref(jrow, jrow)); i__3 = b_subscr(jrow, jrow - 1); b[i__3].r = 0., b[i__3].i = 0.; zrot_(ihi, &a_ref(1, jrow), &c__1, &a_ref(1, jrow - 1), &c__1, & c__, &s); i__3 = jrow - 1; zrot_(&i__3, &b_ref(1, jrow), &c__1, &b_ref(1, jrow - 1), &c__1, & c__, &s); if (ilz) { zrot_(n, &z___ref(1, jrow), &c__1, &z___ref(1, jrow - 1), & c__1, &c__, &s); } /* L30: */ } /* L40: */ } return 0; /* End of ZGGHRD */ } /* zgghrd_ */
/* Subroutine */ int ctgsna_(char *job, char *howmny, logical *select, integer *n, complex *a, integer *lda, complex *b, integer *ldb, complex *vl, integer *ldvl, complex *vr, integer *ldvr, real *s, real *dif, integer *mm, integer *m, complex *work, integer *lwork, integer *iwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= CTGSNA estimates reciprocal condition numbers for specified eigenvalues and/or eigenvectors of a matrix pair (A, B). (A, B) must be in generalized Schur canonical form, that is, A and B are both upper triangular. Arguments ========= JOB (input) CHARACTER*1 Specifies whether condition numbers are required for eigenvalues (S) or eigenvectors (DIF): = 'E': for eigenvalues only (S); = 'V': for eigenvectors only (DIF); = 'B': for both eigenvalues and eigenvectors (S and DIF). HOWMNY (input) CHARACTER*1 = 'A': compute condition numbers for all eigenpairs; = 'S': compute condition numbers for selected eigenpairs specified by the array SELECT. SELECT (input) LOGICAL array, dimension (N) If HOWMNY = 'S', SELECT specifies the eigenpairs for which condition numbers are required. To select condition numbers for the corresponding j-th eigenvalue and/or eigenvector, SELECT(j) must be set to .TRUE.. If HOWMNY = 'A', SELECT is not referenced. N (input) INTEGER The order of the square matrix pair (A, B). N >= 0. A (input) COMPLEX array, dimension (LDA,N) The upper triangular matrix A in the pair (A,B). LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input) COMPLEX array, dimension (LDB,N) The upper triangular matrix B in the pair (A, B). LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). VL (input) COMPLEX array, dimension (LDVL,M) IF JOB = 'E' or 'B', VL must contain left eigenvectors of (A, B), corresponding to the eigenpairs specified by HOWMNY and SELECT. The eigenvectors must be stored in consecutive columns of VL, as returned by CTGEVC. If JOB = 'V', VL is not referenced. LDVL (input) INTEGER The leading dimension of the array VL. LDVL >= 1; and If JOB = 'E' or 'B', LDVL >= N. VR (input) COMPLEX array, dimension (LDVR,M) IF JOB = 'E' or 'B', VR must contain right eigenvectors of (A, B), corresponding to the eigenpairs specified by HOWMNY and SELECT. The eigenvectors must be stored in consecutive columns of VR, as returned by CTGEVC. If JOB = 'V', VR is not referenced. LDVR (input) INTEGER The leading dimension of the array VR. LDVR >= 1; If JOB = 'E' or 'B', LDVR >= N. S (output) REAL array, dimension (MM) If JOB = 'E' or 'B', the reciprocal condition numbers of the selected eigenvalues, stored in consecutive elements of the array. If JOB = 'V', S is not referenced. DIF (output) REAL array, dimension (MM) If JOB = 'V' or 'B', the estimated reciprocal condition numbers of the selected eigenvectors, stored in consecutive elements of the array. If the eigenvalues cannot be reordered to compute DIF(j), DIF(j) is set to 0; this can only occur when the true value would be very small anyway. For each eigenvalue/vector specified by SELECT, DIF stores a Frobenius norm-based estimate of Difl. If JOB = 'E', DIF is not referenced. MM (input) INTEGER The number of elements in the arrays S and DIF. MM >= M. M (output) INTEGER The number of elements of the arrays S and DIF used to store the specified condition numbers; for each selected eigenvalue one element is used. If HOWMNY = 'A', M is set to N. WORK (workspace/output) COMPLEX array, dimension (LWORK) If JOB = 'E', WORK is not referenced. Otherwise, on exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= 1. If JOB = 'V' or 'B', LWORK >= 2*N*N. IWORK (workspace) INTEGER array, dimension (N+2) If JOB = 'E', IWORK is not referenced. INFO (output) INTEGER = 0: Successful exit < 0: If INFO = -i, the i-th argument had an illegal value Further Details =============== The reciprocal of the condition number of the i-th generalized eigenvalue w = (a, b) is defined as S(I) = (|v'Au|**2 + |v'Bu|**2)**(1/2) / (norm(u)*norm(v)) where u and v are the right and left eigenvectors of (A, B) corresponding to w; |z| denotes the absolute value of the complex number, and norm(u) denotes the 2-norm of the vector u. The pair (a, b) corresponds to an eigenvalue w = a/b (= v'Au/v'Bu) of the matrix pair (A, B). If both a and b equal zero, then (A,B) is singular and S(I) = -1 is returned. An approximate error bound on the chordal distance between the i-th computed generalized eigenvalue w and the corresponding exact eigenvalue lambda is chord(w, lambda) <= EPS * norm(A, B) / S(I), where EPS is the machine precision. The reciprocal of the condition number of the right eigenvector u and left eigenvector v corresponding to the generalized eigenvalue w is defined as follows. Suppose (A, B) = ( a * ) ( b * ) 1 ( 0 A22 ),( 0 B22 ) n-1 1 n-1 1 n-1 Then the reciprocal condition number DIF(I) is Difl[(a, b), (A22, B22)] = sigma-min( Zl ) where sigma-min(Zl) denotes the smallest singular value of Zl = [ kron(a, In-1) -kron(1, A22) ] [ kron(b, In-1) -kron(1, B22) ]. Here In-1 is the identity matrix of size n-1 and X' is the conjugate transpose of X. kron(X, Y) is the Kronecker product between the matrices X and Y. We approximate the smallest singular value of Zl with an upper bound. This is done by CLATDF. An approximate error bound for a computed eigenvector VL(i) or VR(i) is given by EPS * norm(A, B) / DIF(i). See ref. [2-3] for more details and further references. Based on contributions by Bo Kagstrom and Peter Poromaa, Department of Computing Science, Umea University, S-901 87 Umea, Sweden. References ========== [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the Generalized Real Schur Form of a Regular Matrix Pair (A, B), in M.S. Moonen et al (eds), Linear Algebra for Large Scale and Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified Eigenvalues of a Regular Matrix Pair (A, B) and Condition Estimation: Theory, Algorithms and Software, Report UMINF - 94.04, Department of Computing Science, Umea University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87. To appear in Numerical Algorithms, 1996. [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software for Solving the Generalized Sylvester Equation and Estimating the Separation between Regular Matrix Pairs, Report UMINF - 93.23, Department of Computing Science, Umea University, S-901 87 Umea, Sweden, December 1993, Revised April 1994, Also as LAPACK Working Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1, 1996. ===================================================================== Decode and test the input parameters Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static complex c_b19 = {1.f,0.f}; static complex c_b20 = {0.f,0.f}; static logical c_false = FALSE_; static integer c__3 = 3; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2; real r__1, r__2; complex q__1; /* Builtin functions */ double c_abs(complex *); /* Local variables */ static real cond; static integer ierr, ifst; static real lnrm; static complex yhax, yhbx; static integer ilst; static real rnrm; static integer i__, k; static real scale; extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer *, complex *, integer *); extern logical lsame_(char *, char *); extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *); static integer lwmin; static logical wants; static integer llwrk, n1, n2; static complex dummy[1]; extern doublereal scnrm2_(integer *, complex *, integer *), slapy2_(real * , real *); static complex dummy1[1]; extern /* Subroutine */ int slabad_(real *, real *); static integer ks; extern doublereal slamch_(char *); extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex *, integer *, complex *, integer *), ctgexc_(logical *, logical *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, integer *), xerbla_(char *, integer *); static real bignum; static logical wantbh, wantdf, somcon; extern /* Subroutine */ int ctgsyl_(char *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, real *, real *, complex *, integer *, integer *, integer *); static real smlnum; static logical lquery; static real eps; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define vl_subscr(a_1,a_2) (a_2)*vl_dim1 + a_1 #define vl_ref(a_1,a_2) vl[vl_subscr(a_1,a_2)] #define vr_subscr(a_1,a_2) (a_2)*vr_dim1 + a_1 #define vr_ref(a_1,a_2) vr[vr_subscr(a_1,a_2)] --select; a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1 * 1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1 * 1; vr -= vr_offset; --s; --dif; --work; --iwork; /* Function Body */ wantbh = lsame_(job, "B"); wants = lsame_(job, "E") || wantbh; wantdf = lsame_(job, "V") || wantbh; somcon = lsame_(howmny, "S"); *info = 0; lquery = *lwork == -1; if (lsame_(job, "V") || lsame_(job, "B")) { /* Computing MAX */ i__1 = 1, i__2 = (*n << 1) * *n; lwmin = max(i__1,i__2); } else { lwmin = 1; } if (! wants && ! wantdf) { *info = -1; } else if (! lsame_(howmny, "A") && ! somcon) { *info = -2; } else if (*n < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldb < max(1,*n)) { *info = -8; } else if (wants && *ldvl < *n) { *info = -10; } else if (wants && *ldvr < *n) { *info = -12; } else { /* Set M to the number of eigenpairs for which condition numbers are required, and test MM. */ if (somcon) { *m = 0; i__1 = *n; for (k = 1; k <= i__1; ++k) { if (select[k]) { ++(*m); } /* L10: */ } } else { *m = *n; } if (*mm < *m) { *info = -15; } else if (*lwork < lwmin && ! lquery) { *info = -18; } } if (*info == 0) { work[1].r = (real) lwmin, work[1].i = 0.f; } if (*info != 0) { i__1 = -(*info); xerbla_("CTGSNA", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Get machine constants */ eps = slamch_("P"); smlnum = slamch_("S") / eps; bignum = 1.f / smlnum; slabad_(&smlnum, &bignum); llwrk = *lwork - (*n << 1) * *n; ks = 0; i__1 = *n; for (k = 1; k <= i__1; ++k) { /* Determine whether condition numbers are required for the k-th eigenpair. */ if (somcon) { if (! select[k]) { goto L20; } } ++ks; if (wants) { /* Compute the reciprocal condition number of the k-th eigenvalue. */ rnrm = scnrm2_(n, &vr_ref(1, ks), &c__1); lnrm = scnrm2_(n, &vl_ref(1, ks), &c__1); cgemv_("N", n, n, &c_b19, &a[a_offset], lda, &vr_ref(1, ks), & c__1, &c_b20, &work[1], &c__1); cdotc_(&q__1, n, &work[1], &c__1, &vl_ref(1, ks), &c__1); yhax.r = q__1.r, yhax.i = q__1.i; cgemv_("N", n, n, &c_b19, &b[b_offset], ldb, &vr_ref(1, ks), & c__1, &c_b20, &work[1], &c__1); cdotc_(&q__1, n, &work[1], &c__1, &vl_ref(1, ks), &c__1); yhbx.r = q__1.r, yhbx.i = q__1.i; r__1 = c_abs(&yhax); r__2 = c_abs(&yhbx); cond = slapy2_(&r__1, &r__2); if (cond == 0.f) { s[ks] = -1.f; } else { s[ks] = cond / (rnrm * lnrm); } } if (wantdf) { if (*n == 1) { r__1 = c_abs(&a_ref(1, 1)); r__2 = c_abs(&b_ref(1, 1)); dif[ks] = slapy2_(&r__1, &r__2); goto L20; } /* Estimate the reciprocal condition number of the k-th eigenvectors. Copy the matrix (A, B) to the array WORK and move the (k,k)th pair to the (1,1) position. */ clacpy_("Full", n, n, &a[a_offset], lda, &work[1], n); clacpy_("Full", n, n, &b[b_offset], ldb, &work[*n * *n + 1], n); ifst = k; ilst = 1; ctgexc_(&c_false, &c_false, n, &work[1], n, &work[*n * *n + 1], n, dummy, &c__1, dummy1, &c__1, &ifst, &ilst, &ierr); if (ierr > 0) { /* Ill-conditioned problem - swap rejected. */ dif[ks] = 0.f; } else { /* Reordering successful, solve generalized Sylvester equation for R and L, A22 * R - L * A11 = A12 B22 * R - L * B11 = B12, and compute estimate of Difl[(A11,B11), (A22, B22)]. */ n1 = 1; n2 = *n - n1; i__ = *n * *n + 1; ctgsyl_("N", &c__3, &n2, &n1, &work[*n * n1 + n1 + 1], n, & work[1], n, &work[n1 + 1], n, &work[*n * n1 + n1 + i__], n, &work[i__], n, &work[n1 + i__], n, &scale, & dif[ks], &work[(*n * *n << 1) + 1], &llwrk, &iwork[1], &ierr); } } L20: ; } work[1].r = (real) lwmin, work[1].i = 0.f; return 0; /* End of CTGSNA */ } /* ctgsna_ */
/* Subroutine */ int dlalsa_(integer *icompq, integer *smlsiz, integer *n, integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer * ldbx, doublereal *u, integer *ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr, doublereal *z__, doublereal * poles, integer *givptr, integer *givcol, integer *ldgcol, integer * perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal * work, integer *iwork, integer *info) { /* System generated locals */ integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, b_dim1, b_offset, bx_dim1, bx_offset, difl_dim1, difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1, i__2; /* Builtin functions */ integer pow_ii(integer *, integer *); /* Local variables */ static integer nlvl, sqre, i__, j; extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); static integer inode, ndiml, ndimr; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); static integer i1; extern /* Subroutine */ int dlals0_(integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *); extern doublereal dopbl3_(char *, integer *, integer *, integer *) ; static integer ic, lf, nd, ll, nl, nr; extern /* Subroutine */ int dlasdt_(integer *, integer *, integer *, integer *, integer *, integer *, integer *), xerbla_(char *, integer *); static integer im1, nlf, nrf, lvl, ndb1, nlp1, lvl2, nrp1; #define difl_ref(a_1,a_2) difl[(a_2)*difl_dim1 + a_1] #define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1] #define perm_ref(a_1,a_2) perm[(a_2)*perm_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] #define poles_ref(a_1,a_2) poles[(a_2)*poles_dim1 + a_1] #define bx_ref(a_1,a_2) bx[(a_2)*bx_dim1 + a_1] #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1] #define givcol_ref(a_1,a_2) givcol[(a_2)*givcol_dim1 + a_1] #define givnum_ref(a_1,a_2) givnum[(a_2)*givnum_dim1 + a_1] /* -- LAPACK routine (instrumented to count ops, version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= DLALSA is an itermediate step in solving the least squares problem by computing the SVD of the coefficient matrix in compact form (The singular vectors are computed as products of simple orthorgonal matrices.). If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector matrix of an upper bidiagonal matrix to the right hand side; and if ICOMPQ = 1, DLALSA applies the right singular vector matrix to the right hand side. The singular vector matrices were generated in compact form by DLALSA. Arguments ========= ICOMPQ (input) INTEGER Specifies whether the left or the right singular vector matrix is involved. = 0: Left singular vector matrix = 1: Right singular vector matrix SMLSIZ (input) INTEGER The maximum size of the subproblems at the bottom of the computation tree. N (input) INTEGER The row and column dimensions of the upper bidiagonal matrix. NRHS (input) INTEGER The number of columns of B and BX. NRHS must be at least 1. B (input) DOUBLE PRECISION array, dimension ( LDB, NRHS ) On input, B contains the right hand sides of the least squares problem in rows 1 through M. On output, B contains the solution X in rows 1 through N. LDB (input) INTEGER The leading dimension of B in the calling subprogram. LDB must be at least max(1,MAX( M, N ) ). BX (output) DOUBLE PRECISION array, dimension ( LDBX, NRHS ) On exit, the result of applying the left or right singular vector matrix to B. LDBX (input) INTEGER The leading dimension of BX. U (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ). On entry, U contains the left singular vector matrices of all subproblems at the bottom level. LDU (input) INTEGER, LDU = > N. The leading dimension of arrays U, VT, DIFL, DIFR, POLES, GIVNUM, and Z. VT (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ). On entry, VT' contains the right singular vector matrices of all subproblems at the bottom level. K (input) INTEGER array, dimension ( N ). DIFL (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. DIFR (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record distances between singular values on the I-th level and singular values on the (I -1)-th level, and DIFR(*, 2 * I) record the normalizing factors of the right singular vectors matrices of subproblems on I-th level. Z (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). On entry, Z(1, I) contains the components of the deflation- adjusted updating row vector for subproblems on the I-th level. POLES (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old singular values involved in the secular equations on the I-th level. GIVPTR (input) INTEGER array, dimension ( N ). On entry, GIVPTR( I ) records the number of Givens rotations performed on the I-th problem on the computation tree. GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the locations of Givens rotations performed on the I-th level on the computation tree. LDGCOL (input) INTEGER, LDGCOL = > N. The leading dimension of arrays GIVCOL and PERM. PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). On entry, PERM(*, I) records permutations done on the I-th level of the computation tree. GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- values of Givens rotations performed on the I-th level on the computation tree. C (input) DOUBLE PRECISION array, dimension ( N ). On entry, if the I-th subproblem is not square, C( I ) contains the C-value of a Givens rotation related to the right null space of the I-th subproblem. S (input) DOUBLE PRECISION array, dimension ( N ). On entry, if the I-th subproblem is not square, S( I ) contains the S-value of a Givens rotation related to the right null space of the I-th subproblem. WORK (workspace) DOUBLE PRECISION array. The dimension must be at least N. IWORK (workspace) INTEGER array. The dimension must be at least 3 * N INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. ===================================================================== Test the input parameters. Parameter adjustments */ b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; bx_dim1 = *ldbx; bx_offset = 1 + bx_dim1 * 1; bx -= bx_offset; givnum_dim1 = *ldu; givnum_offset = 1 + givnum_dim1 * 1; givnum -= givnum_offset; poles_dim1 = *ldu; poles_offset = 1 + poles_dim1 * 1; poles -= poles_offset; z_dim1 = *ldu; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; difr_dim1 = *ldu; difr_offset = 1 + difr_dim1 * 1; difr -= difr_offset; difl_dim1 = *ldu; difl_offset = 1 + difl_dim1 * 1; difl -= difl_offset; vt_dim1 = *ldu; vt_offset = 1 + vt_dim1 * 1; vt -= vt_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; --k; --givptr; perm_dim1 = *ldgcol; perm_offset = 1 + perm_dim1 * 1; perm -= perm_offset; givcol_dim1 = *ldgcol; givcol_offset = 1 + givcol_dim1 * 1; givcol -= givcol_offset; --c__; --s; --work; --iwork; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*smlsiz < 3) { *info = -2; } else if (*n < *smlsiz) { *info = -3; } else if (*nrhs < 1) { *info = -4; } else if (*ldb < *n) { *info = -6; } else if (*ldbx < *n) { *info = -8; } else if (*ldu < *n) { *info = -10; } else if (*ldgcol < *n) { *info = -19; } if (*info != 0) { i__1 = -(*info); xerbla_("DLALSA", &i__1); return 0; } /* Book-keeping and setting up the computation tree. */ inode = 1; ndiml = inode + *n; ndimr = ndiml + *n; dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], smlsiz); /* The following code applies back the left singular vector factors. For applying back the right singular vector factors, go to 50. */ if (*icompq == 1) { goto L50; } /* The nodes on the bottom level of the tree were solved by DLASDQ. The corresponding left and right singular vector matrices are in explicit form. First apply back the left singular vector matrices. */ ndb1 = (nd + 1) / 2; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { /* IC : center row of each node NL : number of rows of left subproblem NR : number of rows of right subproblem NLF: starting row of the left subproblem NRF: starting row of the right subproblem */ i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nr = iwork[ndimr + i1]; nlf = ic - nl; nrf = ic + 1; latime_1.ops += dopbl3_("DGEMM ", &nl, nrhs, &nl); latime_1.ops += dopbl3_("DGEMM ", &nr, nrhs, &nr); dgemm_("T", "N", &nl, nrhs, &nl, &c_b9, &u_ref(nlf, 1), ldu, &b_ref( nlf, 1), ldb, &c_b10, &bx_ref(nlf, 1), ldbx); dgemm_("T", "N", &nr, nrhs, &nr, &c_b9, &u_ref(nrf, 1), ldu, &b_ref( nrf, 1), ldb, &c_b10, &bx_ref(nrf, 1), ldbx); /* L10: */ } /* Next copy the rows of B that correspond to unchanged rows in the bidiagonal matrix to BX. */ i__1 = nd; for (i__ = 1; i__ <= i__1; ++i__) { ic = iwork[inode + i__ - 1]; dcopy_(nrhs, &b_ref(ic, 1), ldb, &bx_ref(ic, 1), ldbx); /* L20: */ } /* Finally go through the left singular vector matrices of all the other subproblems bottom-up on the tree. */ j = pow_ii(&c__2, &nlvl); sqre = 0; for (lvl = nlvl; lvl >= 1; --lvl) { lvl2 = (lvl << 1) - 1; /* find the first node LF and last node LL on the current level LVL */ if (lvl == 1) { lf = 1; ll = 1; } else { i__1 = lvl - 1; lf = pow_ii(&c__2, &i__1); ll = (lf << 1) - 1; } i__1 = ll; for (i__ = lf; i__ <= i__1; ++i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; nrf = ic + 1; --j; dlals0_(icompq, &nl, &nr, &sqre, nrhs, &bx_ref(nlf, 1), ldbx, & b_ref(nlf, 1), ldb, &perm_ref(nlf, lvl), &givptr[j], & givcol_ref(nlf, lvl2), ldgcol, &givnum_ref(nlf, lvl2), ldu, &poles_ref(nlf, lvl2), &difl_ref(nlf, lvl), & difr_ref(nlf, lvl2), &z___ref(nlf, lvl), &k[j], &c__[j], & s[j], &work[1], info); /* L30: */ } /* L40: */ } goto L90; /* ICOMPQ = 1: applying back the right singular vector factors. */ L50: /* First now go through the right singular vector matrices of all the tree nodes top-down. */ j = 0; i__1 = nlvl; for (lvl = 1; lvl <= i__1; ++lvl) { lvl2 = (lvl << 1) - 1; /* Find the first node LF and last node LL on the current level LVL. */ if (lvl == 1) { lf = 1; ll = 1; } else { i__2 = lvl - 1; lf = pow_ii(&c__2, &i__2); ll = (lf << 1) - 1; } i__2 = lf; for (i__ = ll; i__ >= i__2; --i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; nrf = ic + 1; if (i__ == ll) { sqre = 0; } else { sqre = 1; } ++j; dlals0_(icompq, &nl, &nr, &sqre, nrhs, &b_ref(nlf, 1), ldb, & bx_ref(nlf, 1), ldbx, &perm_ref(nlf, lvl), &givptr[j], & givcol_ref(nlf, lvl2), ldgcol, &givnum_ref(nlf, lvl2), ldu, &poles_ref(nlf, lvl2), &difl_ref(nlf, lvl), & difr_ref(nlf, lvl2), &z___ref(nlf, lvl), &k[j], &c__[j], & s[j], &work[1], info); /* L60: */ } /* L70: */ } /* The nodes on the bottom level of the tree were solved by DLASDQ. The corresponding right singular vector matrices are in explicit form. Apply them back. */ ndb1 = (nd + 1) / 2; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nr = iwork[ndimr + i1]; nlp1 = nl + 1; if (i__ == nd) { nrp1 = nr; } else { nrp1 = nr + 1; } nlf = ic - nl; nrf = ic + 1; latime_1.ops += dopbl3_("DGEMM ", &nlp1, nrhs, &nlp1); latime_1.ops += dopbl3_("DGEMM ", &nrp1, nrhs, &nrp1); dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b9, &vt_ref(nlf, 1), ldu, & b_ref(nlf, 1), ldb, &c_b10, &bx_ref(nlf, 1), ldbx); dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b9, &vt_ref(nrf, 1), ldu, & b_ref(nrf, 1), ldb, &c_b10, &bx_ref(nrf, 1), ldbx); /* L80: */ } L90: return 0; /* End of DLALSA */ } /* dlalsa_ */
/* Subroutine */ int ctgevc_(char *side, char *howmny, logical *select, integer *n, complex *a, integer *lda, complex *b, integer *ldb, complex *vl, integer *ldvl, complex *vr, integer *ldvr, integer *mm, integer *m, complex *work, real *rwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= CTGEVC computes some or all of the right and/or left generalized eigenvectors of a pair of complex upper triangular matrices (A,B). The right generalized eigenvector x and the left generalized eigenvector y of (A,B) corresponding to a generalized eigenvalue w are defined by: (A - wB) * x = 0 and y**H * (A - wB) = 0 where y**H denotes the conjugate tranpose of y. If an eigenvalue w is determined by zero diagonal elements of both A and B, a unit vector is returned as the corresponding eigenvector. If all eigenvectors are requested, the routine may either return the matrices X and/or Y of right or left eigenvectors of (A,B), or the products Z*X and/or Q*Y, where Z and Q are input unitary matrices. If (A,B) was obtained from the generalized Schur factorization of an original pair of matrices (A0,B0) = (Q*A*Z**H,Q*B*Z**H), then Z*X and Q*Y are the matrices of right or left eigenvectors of A. Arguments ========= SIDE (input) CHARACTER*1 = 'R': compute right eigenvectors only; = 'L': compute left eigenvectors only; = 'B': compute both right and left eigenvectors. HOWMNY (input) CHARACTER*1 = 'A': compute all right and/or left eigenvectors; = 'B': compute all right and/or left eigenvectors, and backtransform them using the input matrices supplied in VR and/or VL; = 'S': compute selected right and/or left eigenvectors, specified by the logical array SELECT. SELECT (input) LOGICAL array, dimension (N) If HOWMNY='S', SELECT specifies the eigenvectors to be computed. If HOWMNY='A' or 'B', SELECT is not referenced. To select the eigenvector corresponding to the j-th eigenvalue, SELECT(j) must be set to .TRUE.. N (input) INTEGER The order of the matrices A and B. N >= 0. A (input) COMPLEX array, dimension (LDA,N) The upper triangular matrix A. LDA (input) INTEGER The leading dimension of array A. LDA >= max(1,N). B (input) COMPLEX array, dimension (LDB,N) The upper triangular matrix B. B must have real diagonal elements. LDB (input) INTEGER The leading dimension of array B. LDB >= max(1,N). VL (input/output) COMPLEX array, dimension (LDVL,MM) On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must contain an N-by-N matrix Q (usually the unitary matrix Q of left Schur vectors returned by CHGEQZ). On exit, if SIDE = 'L' or 'B', VL contains: if HOWMNY = 'A', the matrix Y of left eigenvectors of (A,B); if HOWMNY = 'B', the matrix Q*Y; if HOWMNY = 'S', the left eigenvectors of (A,B) specified by SELECT, stored consecutively in the columns of VL, in the same order as their eigenvalues. If SIDE = 'R', VL is not referenced. LDVL (input) INTEGER The leading dimension of array VL. LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise. VR (input/output) COMPLEX array, dimension (LDVR,MM) On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must contain an N-by-N matrix Q (usually the unitary matrix Z of right Schur vectors returned by CHGEQZ). On exit, if SIDE = 'R' or 'B', VR contains: if HOWMNY = 'A', the matrix X of right eigenvectors of (A,B); if HOWMNY = 'B', the matrix Z*X; if HOWMNY = 'S', the right eigenvectors of (A,B) specified by SELECT, stored consecutively in the columns of VR, in the same order as their eigenvalues. If SIDE = 'L', VR is not referenced. LDVR (input) INTEGER The leading dimension of the array VR. LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise. MM (input) INTEGER The number of columns in the arrays VL and/or VR. MM >= M. M (output) INTEGER The number of columns in the arrays VL and/or VR actually used to store the eigenvectors. If HOWMNY = 'A' or 'B', M is set to N. Each selected eigenvector occupies one column. WORK (workspace) COMPLEX array, dimension (2*N) RWORK (workspace) REAL array, dimension (2*N) INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. ===================================================================== Decode and Test the input parameters Parameter adjustments */ /* Table of constant values */ static complex c_b1 = {0.f,0.f}; static complex c_b2 = {1.f,0.f}; static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2, i__3, i__4, i__5; real r__1, r__2, r__3, r__4, r__5, r__6; complex q__1, q__2, q__3, q__4; /* Builtin functions */ double r_imag(complex *); void r_cnjg(complex *, complex *); /* Local variables */ static integer ibeg, ieig, iend; static real dmin__; static integer isrc; static real temp; static complex suma, sumb; static real xmax; static complex d__; static integer i__, j; static real scale; static logical ilall; static integer iside; static real sbeta; extern logical lsame_(char *, char *); extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *); static real small; static logical compl; static real anorm, bnorm; static logical compr; static complex ca, cb; static logical ilbbad; static real acoefa; static integer je; static real bcoefa, acoeff; static complex bcoeff; static logical ilback; static integer im; extern /* Subroutine */ int slabad_(real *, real *); static real ascale, bscale; static integer jr; extern /* Complex */ VOID cladiv_(complex *, complex *, complex *); extern doublereal slamch_(char *); static complex salpha; static real safmin; extern /* Subroutine */ int xerbla_(char *, integer *); static real bignum; static logical ilcomp; static integer ihwmny; static real big; static logical lsa, lsb; static real ulp; static complex sum; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define vl_subscr(a_1,a_2) (a_2)*vl_dim1 + a_1 #define vl_ref(a_1,a_2) vl[vl_subscr(a_1,a_2)] #define vr_subscr(a_1,a_2) (a_2)*vr_dim1 + a_1 #define vr_ref(a_1,a_2) vr[vr_subscr(a_1,a_2)] --select; a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1 * 1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1 * 1; vr -= vr_offset; --work; --rwork; /* Function Body */ if (lsame_(howmny, "A")) { ihwmny = 1; ilall = TRUE_; ilback = FALSE_; } else if (lsame_(howmny, "S")) { ihwmny = 2; ilall = FALSE_; ilback = FALSE_; } else if (lsame_(howmny, "B") || lsame_(howmny, "T")) { ihwmny = 3; ilall = TRUE_; ilback = TRUE_; } else { ihwmny = -1; } if (lsame_(side, "R")) { iside = 1; compl = FALSE_; compr = TRUE_; } else if (lsame_(side, "L")) { iside = 2; compl = TRUE_; compr = FALSE_; } else if (lsame_(side, "B")) { iside = 3; compl = TRUE_; compr = TRUE_; } else { iside = -1; } *info = 0; if (iside < 0) { *info = -1; } else if (ihwmny < 0) { *info = -2; } else if (*n < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldb < max(1,*n)) { *info = -8; } if (*info != 0) { i__1 = -(*info); xerbla_("CTGEVC", &i__1); return 0; } /* Count the number of eigenvectors */ if (! ilall) { im = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (select[j]) { ++im; } /* L10: */ } } else { im = *n; } /* Check diagonal of B */ ilbbad = FALSE_; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (r_imag(&b_ref(j, j)) != 0.f) { ilbbad = TRUE_; } /* L20: */ } if (ilbbad) { *info = -7; } else if (compl && *ldvl < *n || *ldvl < 1) { *info = -10; } else if (compr && *ldvr < *n || *ldvr < 1) { *info = -12; } else if (*mm < im) { *info = -13; } if (*info != 0) { i__1 = -(*info); xerbla_("CTGEVC", &i__1); return 0; } /* Quick return if possible */ *m = im; if (*n == 0) { return 0; } /* Machine Constants */ safmin = slamch_("Safe minimum"); big = 1.f / safmin; slabad_(&safmin, &big); ulp = slamch_("Epsilon") * slamch_("Base"); small = safmin * *n / ulp; big = 1.f / small; bignum = 1.f / (safmin * *n); /* Compute the 1-norm of each column of the strictly upper triangular part of A and B to check for possible overflow in the triangular solver. */ i__1 = a_subscr(1, 1); anorm = (r__1 = a[i__1].r, dabs(r__1)) + (r__2 = r_imag(&a_ref(1, 1)), dabs(r__2)); i__1 = b_subscr(1, 1); bnorm = (r__1 = b[i__1].r, dabs(r__1)) + (r__2 = r_imag(&b_ref(1, 1)), dabs(r__2)); rwork[1] = 0.f; rwork[*n + 1] = 0.f; i__1 = *n; for (j = 2; j <= i__1; ++j) { rwork[j] = 0.f; rwork[*n + j] = 0.f; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = a_subscr(i__, j); rwork[j] += (r__1 = a[i__3].r, dabs(r__1)) + (r__2 = r_imag(& a_ref(i__, j)), dabs(r__2)); i__3 = b_subscr(i__, j); rwork[*n + j] += (r__1 = b[i__3].r, dabs(r__1)) + (r__2 = r_imag(& b_ref(i__, j)), dabs(r__2)); /* L30: */ } /* Computing MAX */ i__2 = a_subscr(j, j); r__3 = anorm, r__4 = rwork[j] + ((r__1 = a[i__2].r, dabs(r__1)) + ( r__2 = r_imag(&a_ref(j, j)), dabs(r__2))); anorm = dmax(r__3,r__4); /* Computing MAX */ i__2 = b_subscr(j, j); r__3 = bnorm, r__4 = rwork[*n + j] + ((r__1 = b[i__2].r, dabs(r__1)) + (r__2 = r_imag(&b_ref(j, j)), dabs(r__2))); bnorm = dmax(r__3,r__4); /* L40: */ } ascale = 1.f / dmax(anorm,safmin); bscale = 1.f / dmax(bnorm,safmin); /* Left eigenvectors */ if (compl) { ieig = 0; /* Main loop over eigenvalues */ i__1 = *n; for (je = 1; je <= i__1; ++je) { if (ilall) { ilcomp = TRUE_; } else { ilcomp = select[je]; } if (ilcomp) { ++ieig; i__2 = a_subscr(je, je); i__3 = b_subscr(je, je); if ((r__2 = a[i__2].r, dabs(r__2)) + (r__3 = r_imag(&a_ref(je, je)), dabs(r__3)) <= safmin && (r__1 = b[i__3].r, dabs(r__1)) <= safmin) { /* Singular matrix pencil -- return unit eigenvector */ i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { i__3 = vl_subscr(jr, ieig); vl[i__3].r = 0.f, vl[i__3].i = 0.f; /* L50: */ } i__2 = vl_subscr(ieig, ieig); vl[i__2].r = 1.f, vl[i__2].i = 0.f; goto L140; } /* Non-singular eigenvalue: Compute coefficients a and b in H y ( a A - b B ) = 0 Computing MAX */ i__2 = a_subscr(je, je); i__3 = b_subscr(je, je); r__4 = ((r__2 = a[i__2].r, dabs(r__2)) + (r__3 = r_imag(& a_ref(je, je)), dabs(r__3))) * ascale, r__5 = (r__1 = b[i__3].r, dabs(r__1)) * bscale, r__4 = max(r__4,r__5) ; temp = 1.f / dmax(r__4,safmin); i__2 = a_subscr(je, je); q__2.r = temp * a[i__2].r, q__2.i = temp * a[i__2].i; q__1.r = ascale * q__2.r, q__1.i = ascale * q__2.i; salpha.r = q__1.r, salpha.i = q__1.i; i__2 = b_subscr(je, je); sbeta = temp * b[i__2].r * bscale; acoeff = sbeta * ascale; q__1.r = bscale * salpha.r, q__1.i = bscale * salpha.i; bcoeff.r = q__1.r, bcoeff.i = q__1.i; /* Scale to avoid underflow */ lsa = dabs(sbeta) >= safmin && dabs(acoeff) < small; lsb = (r__1 = salpha.r, dabs(r__1)) + (r__2 = r_imag(&salpha), dabs(r__2)) >= safmin && (r__3 = bcoeff.r, dabs(r__3) ) + (r__4 = r_imag(&bcoeff), dabs(r__4)) < small; scale = 1.f; if (lsa) { scale = small / dabs(sbeta) * dmin(anorm,big); } if (lsb) { /* Computing MAX */ r__3 = scale, r__4 = small / ((r__1 = salpha.r, dabs(r__1) ) + (r__2 = r_imag(&salpha), dabs(r__2))) * dmin( bnorm,big); scale = dmax(r__3,r__4); } if (lsa || lsb) { /* Computing MIN Computing MAX */ r__5 = 1.f, r__6 = dabs(acoeff), r__5 = max(r__5,r__6), r__6 = (r__1 = bcoeff.r, dabs(r__1)) + (r__2 = r_imag(&bcoeff), dabs(r__2)); r__3 = scale, r__4 = 1.f / (safmin * dmax(r__5,r__6)); scale = dmin(r__3,r__4); if (lsa) { acoeff = ascale * (scale * sbeta); } else { acoeff = scale * acoeff; } if (lsb) { q__2.r = scale * salpha.r, q__2.i = scale * salpha.i; q__1.r = bscale * q__2.r, q__1.i = bscale * q__2.i; bcoeff.r = q__1.r, bcoeff.i = q__1.i; } else { q__1.r = scale * bcoeff.r, q__1.i = scale * bcoeff.i; bcoeff.r = q__1.r, bcoeff.i = q__1.i; } } acoefa = dabs(acoeff); bcoefa = (r__1 = bcoeff.r, dabs(r__1)) + (r__2 = r_imag(& bcoeff), dabs(r__2)); xmax = 1.f; i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { i__3 = jr; work[i__3].r = 0.f, work[i__3].i = 0.f; /* L60: */ } i__2 = je; work[i__2].r = 1.f, work[i__2].i = 0.f; /* Computing MAX */ r__1 = ulp * acoefa * anorm, r__2 = ulp * bcoefa * bnorm, r__1 = max(r__1,r__2); dmin__ = dmax(r__1,safmin); /* H Triangular solve of (a A - b B) y = 0 H (rowwise in (a A - b B) , or columnwise in a A - b B) */ i__2 = *n; for (j = je + 1; j <= i__2; ++j) { /* Compute j-1 SUM = sum conjg( a*A(k,j) - b*B(k,j) )*x(k) k=je (Scale if necessary) */ temp = 1.f / xmax; if (acoefa * rwork[j] + bcoefa * rwork[*n + j] > bignum * temp) { i__3 = j - 1; for (jr = je; jr <= i__3; ++jr) { i__4 = jr; i__5 = jr; q__1.r = temp * work[i__5].r, q__1.i = temp * work[i__5].i; work[i__4].r = q__1.r, work[i__4].i = q__1.i; /* L70: */ } xmax = 1.f; } suma.r = 0.f, suma.i = 0.f; sumb.r = 0.f, sumb.i = 0.f; i__3 = j - 1; for (jr = je; jr <= i__3; ++jr) { r_cnjg(&q__3, &a_ref(jr, j)); i__4 = jr; q__2.r = q__3.r * work[i__4].r - q__3.i * work[i__4] .i, q__2.i = q__3.r * work[i__4].i + q__3.i * work[i__4].r; q__1.r = suma.r + q__2.r, q__1.i = suma.i + q__2.i; suma.r = q__1.r, suma.i = q__1.i; r_cnjg(&q__3, &b_ref(jr, j)); i__4 = jr; q__2.r = q__3.r * work[i__4].r - q__3.i * work[i__4] .i, q__2.i = q__3.r * work[i__4].i + q__3.i * work[i__4].r; q__1.r = sumb.r + q__2.r, q__1.i = sumb.i + q__2.i; sumb.r = q__1.r, sumb.i = q__1.i; /* L80: */ } q__2.r = acoeff * suma.r, q__2.i = acoeff * suma.i; r_cnjg(&q__4, &bcoeff); q__3.r = q__4.r * sumb.r - q__4.i * sumb.i, q__3.i = q__4.r * sumb.i + q__4.i * sumb.r; q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i; sum.r = q__1.r, sum.i = q__1.i; /* Form x(j) = - SUM / conjg( a*A(j,j) - b*B(j,j) ) with scaling and perturbation of the denominator */ i__3 = a_subscr(j, j); q__3.r = acoeff * a[i__3].r, q__3.i = acoeff * a[i__3].i; i__4 = b_subscr(j, j); q__4.r = bcoeff.r * b[i__4].r - bcoeff.i * b[i__4].i, q__4.i = bcoeff.r * b[i__4].i + bcoeff.i * b[i__4] .r; q__2.r = q__3.r - q__4.r, q__2.i = q__3.i - q__4.i; r_cnjg(&q__1, &q__2); d__.r = q__1.r, d__.i = q__1.i; if ((r__1 = d__.r, dabs(r__1)) + (r__2 = r_imag(&d__), dabs(r__2)) <= dmin__) { q__1.r = dmin__, q__1.i = 0.f; d__.r = q__1.r, d__.i = q__1.i; } if ((r__1 = d__.r, dabs(r__1)) + (r__2 = r_imag(&d__), dabs(r__2)) < 1.f) { if ((r__1 = sum.r, dabs(r__1)) + (r__2 = r_imag(&sum), dabs(r__2)) >= bignum * ((r__3 = d__.r, dabs( r__3)) + (r__4 = r_imag(&d__), dabs(r__4)))) { temp = 1.f / ((r__1 = sum.r, dabs(r__1)) + (r__2 = r_imag(&sum), dabs(r__2))); i__3 = j - 1; for (jr = je; jr <= i__3; ++jr) { i__4 = jr; i__5 = jr; q__1.r = temp * work[i__5].r, q__1.i = temp * work[i__5].i; work[i__4].r = q__1.r, work[i__4].i = q__1.i; /* L90: */ } xmax = temp * xmax; q__1.r = temp * sum.r, q__1.i = temp * sum.i; sum.r = q__1.r, sum.i = q__1.i; } } i__3 = j; q__2.r = -sum.r, q__2.i = -sum.i; cladiv_(&q__1, &q__2, &d__); work[i__3].r = q__1.r, work[i__3].i = q__1.i; /* Computing MAX */ i__3 = j; r__3 = xmax, r__4 = (r__1 = work[i__3].r, dabs(r__1)) + ( r__2 = r_imag(&work[j]), dabs(r__2)); xmax = dmax(r__3,r__4); /* L100: */ } /* Back transform eigenvector if HOWMNY='B'. */ if (ilback) { i__2 = *n + 1 - je; cgemv_("N", n, &i__2, &c_b2, &vl_ref(1, je), ldvl, &work[ je], &c__1, &c_b1, &work[*n + 1], &c__1); isrc = 2; ibeg = 1; } else { isrc = 1; ibeg = je; } /* Copy and scale eigenvector into column of VL */ xmax = 0.f; i__2 = *n; for (jr = ibeg; jr <= i__2; ++jr) { /* Computing MAX */ i__3 = (isrc - 1) * *n + jr; r__3 = xmax, r__4 = (r__1 = work[i__3].r, dabs(r__1)) + ( r__2 = r_imag(&work[(isrc - 1) * *n + jr]), dabs( r__2)); xmax = dmax(r__3,r__4); /* L110: */ } if (xmax > safmin) { temp = 1.f / xmax; i__2 = *n; for (jr = ibeg; jr <= i__2; ++jr) { i__3 = vl_subscr(jr, ieig); i__4 = (isrc - 1) * *n + jr; q__1.r = temp * work[i__4].r, q__1.i = temp * work[ i__4].i; vl[i__3].r = q__1.r, vl[i__3].i = q__1.i; /* L120: */ } } else { ibeg = *n + 1; } i__2 = ibeg - 1; for (jr = 1; jr <= i__2; ++jr) { i__3 = vl_subscr(jr, ieig); vl[i__3].r = 0.f, vl[i__3].i = 0.f; /* L130: */ } } L140: ; } } /* Right eigenvectors */ if (compr) { ieig = im + 1; /* Main loop over eigenvalues */ for (je = *n; je >= 1; --je) { if (ilall) { ilcomp = TRUE_; } else { ilcomp = select[je]; } if (ilcomp) { --ieig; i__1 = a_subscr(je, je); i__2 = b_subscr(je, je); if ((r__2 = a[i__1].r, dabs(r__2)) + (r__3 = r_imag(&a_ref(je, je)), dabs(r__3)) <= safmin && (r__1 = b[i__2].r, dabs(r__1)) <= safmin) { /* Singular matrix pencil -- return unit eigenvector */ i__1 = *n; for (jr = 1; jr <= i__1; ++jr) { i__2 = vr_subscr(jr, ieig); vr[i__2].r = 0.f, vr[i__2].i = 0.f; /* L150: */ } i__1 = vr_subscr(ieig, ieig); vr[i__1].r = 1.f, vr[i__1].i = 0.f; goto L250; } /* Non-singular eigenvalue: Compute coefficients a and b in ( a A - b B ) x = 0 Computing MAX */ i__1 = a_subscr(je, je); i__2 = b_subscr(je, je); r__4 = ((r__2 = a[i__1].r, dabs(r__2)) + (r__3 = r_imag(& a_ref(je, je)), dabs(r__3))) * ascale, r__5 = (r__1 = b[i__2].r, dabs(r__1)) * bscale, r__4 = max(r__4,r__5) ; temp = 1.f / dmax(r__4,safmin); i__1 = a_subscr(je, je); q__2.r = temp * a[i__1].r, q__2.i = temp * a[i__1].i; q__1.r = ascale * q__2.r, q__1.i = ascale * q__2.i; salpha.r = q__1.r, salpha.i = q__1.i; i__1 = b_subscr(je, je); sbeta = temp * b[i__1].r * bscale; acoeff = sbeta * ascale; q__1.r = bscale * salpha.r, q__1.i = bscale * salpha.i; bcoeff.r = q__1.r, bcoeff.i = q__1.i; /* Scale to avoid underflow */ lsa = dabs(sbeta) >= safmin && dabs(acoeff) < small; lsb = (r__1 = salpha.r, dabs(r__1)) + (r__2 = r_imag(&salpha), dabs(r__2)) >= safmin && (r__3 = bcoeff.r, dabs(r__3) ) + (r__4 = r_imag(&bcoeff), dabs(r__4)) < small; scale = 1.f; if (lsa) { scale = small / dabs(sbeta) * dmin(anorm,big); } if (lsb) { /* Computing MAX */ r__3 = scale, r__4 = small / ((r__1 = salpha.r, dabs(r__1) ) + (r__2 = r_imag(&salpha), dabs(r__2))) * dmin( bnorm,big); scale = dmax(r__3,r__4); } if (lsa || lsb) { /* Computing MIN Computing MAX */ r__5 = 1.f, r__6 = dabs(acoeff), r__5 = max(r__5,r__6), r__6 = (r__1 = bcoeff.r, dabs(r__1)) + (r__2 = r_imag(&bcoeff), dabs(r__2)); r__3 = scale, r__4 = 1.f / (safmin * dmax(r__5,r__6)); scale = dmin(r__3,r__4); if (lsa) { acoeff = ascale * (scale * sbeta); } else { acoeff = scale * acoeff; } if (lsb) { q__2.r = scale * salpha.r, q__2.i = scale * salpha.i; q__1.r = bscale * q__2.r, q__1.i = bscale * q__2.i; bcoeff.r = q__1.r, bcoeff.i = q__1.i; } else { q__1.r = scale * bcoeff.r, q__1.i = scale * bcoeff.i; bcoeff.r = q__1.r, bcoeff.i = q__1.i; } } acoefa = dabs(acoeff); bcoefa = (r__1 = bcoeff.r, dabs(r__1)) + (r__2 = r_imag(& bcoeff), dabs(r__2)); xmax = 1.f; i__1 = *n; for (jr = 1; jr <= i__1; ++jr) { i__2 = jr; work[i__2].r = 0.f, work[i__2].i = 0.f; /* L160: */ } i__1 = je; work[i__1].r = 1.f, work[i__1].i = 0.f; /* Computing MAX */ r__1 = ulp * acoefa * anorm, r__2 = ulp * bcoefa * bnorm, r__1 = max(r__1,r__2); dmin__ = dmax(r__1,safmin); /* Triangular solve of (a A - b B) x = 0 (columnwise) WORK(1:j-1) contains sums w, WORK(j+1:JE) contains x */ i__1 = je - 1; for (jr = 1; jr <= i__1; ++jr) { i__2 = jr; i__3 = a_subscr(jr, je); q__2.r = acoeff * a[i__3].r, q__2.i = acoeff * a[i__3].i; i__4 = b_subscr(jr, je); q__3.r = bcoeff.r * b[i__4].r - bcoeff.i * b[i__4].i, q__3.i = bcoeff.r * b[i__4].i + bcoeff.i * b[i__4] .r; q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i; work[i__2].r = q__1.r, work[i__2].i = q__1.i; /* L170: */ } i__1 = je; work[i__1].r = 1.f, work[i__1].i = 0.f; for (j = je - 1; j >= 1; --j) { /* Form x(j) := - w(j) / d with scaling and perturbation of the denominator */ i__1 = a_subscr(j, j); q__2.r = acoeff * a[i__1].r, q__2.i = acoeff * a[i__1].i; i__2 = b_subscr(j, j); q__3.r = bcoeff.r * b[i__2].r - bcoeff.i * b[i__2].i, q__3.i = bcoeff.r * b[i__2].i + bcoeff.i * b[i__2] .r; q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i; d__.r = q__1.r, d__.i = q__1.i; if ((r__1 = d__.r, dabs(r__1)) + (r__2 = r_imag(&d__), dabs(r__2)) <= dmin__) { q__1.r = dmin__, q__1.i = 0.f; d__.r = q__1.r, d__.i = q__1.i; } if ((r__1 = d__.r, dabs(r__1)) + (r__2 = r_imag(&d__), dabs(r__2)) < 1.f) { i__1 = j; if ((r__1 = work[i__1].r, dabs(r__1)) + (r__2 = r_imag(&work[j]), dabs(r__2)) >= bignum * (( r__3 = d__.r, dabs(r__3)) + (r__4 = r_imag(& d__), dabs(r__4)))) { i__1 = j; temp = 1.f / ((r__1 = work[i__1].r, dabs(r__1)) + (r__2 = r_imag(&work[j]), dabs(r__2))); i__1 = je; for (jr = 1; jr <= i__1; ++jr) { i__2 = jr; i__3 = jr; q__1.r = temp * work[i__3].r, q__1.i = temp * work[i__3].i; work[i__2].r = q__1.r, work[i__2].i = q__1.i; /* L180: */ } } } i__1 = j; i__2 = j; q__2.r = -work[i__2].r, q__2.i = -work[i__2].i; cladiv_(&q__1, &q__2, &d__); work[i__1].r = q__1.r, work[i__1].i = q__1.i; if (j > 1) { /* w = w + x(j)*(a A(*,j) - b B(*,j) ) with scaling */ i__1 = j; if ((r__1 = work[i__1].r, dabs(r__1)) + (r__2 = r_imag(&work[j]), dabs(r__2)) > 1.f) { i__1 = j; temp = 1.f / ((r__1 = work[i__1].r, dabs(r__1)) + (r__2 = r_imag(&work[j]), dabs(r__2))); if (acoefa * rwork[j] + bcoefa * rwork[*n + j] >= bignum * temp) { i__1 = je; for (jr = 1; jr <= i__1; ++jr) { i__2 = jr; i__3 = jr; q__1.r = temp * work[i__3].r, q__1.i = temp * work[i__3].i; work[i__2].r = q__1.r, work[i__2].i = q__1.i; /* L190: */ } } } i__1 = j; q__1.r = acoeff * work[i__1].r, q__1.i = acoeff * work[i__1].i; ca.r = q__1.r, ca.i = q__1.i; i__1 = j; q__1.r = bcoeff.r * work[i__1].r - bcoeff.i * work[ i__1].i, q__1.i = bcoeff.r * work[i__1].i + bcoeff.i * work[i__1].r; cb.r = q__1.r, cb.i = q__1.i; i__1 = j - 1; for (jr = 1; jr <= i__1; ++jr) { i__2 = jr; i__3 = jr; i__4 = a_subscr(jr, j); q__3.r = ca.r * a[i__4].r - ca.i * a[i__4].i, q__3.i = ca.r * a[i__4].i + ca.i * a[i__4] .r; q__2.r = work[i__3].r + q__3.r, q__2.i = work[ i__3].i + q__3.i; i__5 = b_subscr(jr, j); q__4.r = cb.r * b[i__5].r - cb.i * b[i__5].i, q__4.i = cb.r * b[i__5].i + cb.i * b[i__5] .r; q__1.r = q__2.r - q__4.r, q__1.i = q__2.i - q__4.i; work[i__2].r = q__1.r, work[i__2].i = q__1.i; /* L200: */ } } /* L210: */ } /* Back transform eigenvector if HOWMNY='B'. */ if (ilback) { cgemv_("N", n, &je, &c_b2, &vr[vr_offset], ldvr, &work[1], &c__1, &c_b1, &work[*n + 1], &c__1); isrc = 2; iend = *n; } else { isrc = 1; iend = je; } /* Copy and scale eigenvector into column of VR */ xmax = 0.f; i__1 = iend; for (jr = 1; jr <= i__1; ++jr) { /* Computing MAX */ i__2 = (isrc - 1) * *n + jr; r__3 = xmax, r__4 = (r__1 = work[i__2].r, dabs(r__1)) + ( r__2 = r_imag(&work[(isrc - 1) * *n + jr]), dabs( r__2)); xmax = dmax(r__3,r__4); /* L220: */ } if (xmax > safmin) { temp = 1.f / xmax; i__1 = iend; for (jr = 1; jr <= i__1; ++jr) { i__2 = vr_subscr(jr, ieig); i__3 = (isrc - 1) * *n + jr; q__1.r = temp * work[i__3].r, q__1.i = temp * work[ i__3].i; vr[i__2].r = q__1.r, vr[i__2].i = q__1.i; /* L230: */ } } else { iend = 0; } i__1 = *n; for (jr = iend + 1; jr <= i__1; ++jr) { i__2 = vr_subscr(jr, ieig); vr[i__2].r = 0.f, vr[i__2].i = 0.f; /* L240: */ } } L250: ; } } return 0; /* End of CTGEVC */ } /* ctgevc_ */
/* Subroutine */ int sgghrd_(char *compq, char *compz, integer *n, integer * ilo, integer *ihi, real *a, integer *lda, real *b, integer *ldb, real *q, integer *ldq, real *z__, integer *ldz, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, z_offset, i__1, i__2, i__3; /* Local variables */ static integer jcol; static real temp; static integer jrow; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); static real c__, s; extern logical lsame_(char *, char *); extern /* Subroutine */ int xerbla_(char *, integer *); static integer icompq; extern /* Subroutine */ int slaset_(char *, integer *, integer *, real *, real *, real *, integer *), slartg_(real *, real *, real * , real *, real *); static integer icompz; static logical ilq, ilz; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] /* -- LAPACK routine (instrumented to count operations, version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 ---------------------- Begin Timing Code ------------------------- Common block to return operation count and iteration count ITCNT is initialized to 0, OPS is only incremented OPST is used to accumulate small contributions to OPS to avoid roundoff error ----------------------- End Timing Code -------------------------- Purpose ======= SGGHRD reduces a pair of real matrices (A,B) to generalized upper Hessenberg form using orthogonal transformations, where A is a general matrix and B is upper triangular: Q' * A * Z = H and Q' * B * Z = T, where H is upper Hessenberg, T is upper triangular, and Q and Z are orthogonal, and ' means transpose. The orthogonal matrices Q and Z are determined as products of Givens rotations. They may either be formed explicitly, or they may be postmultiplied into input matrices Q1 and Z1, so that Q1 * A * Z1' = (Q1*Q) * H * (Z1*Z)' Q1 * B * Z1' = (Q1*Q) * T * (Z1*Z)' Arguments ========= COMPQ (input) CHARACTER*1 = 'N': do not compute Q; = 'I': Q is initialized to the unit matrix, and the orthogonal matrix Q is returned; = 'V': Q must contain an orthogonal matrix Q1 on entry, and the product Q1*Q is returned. COMPZ (input) CHARACTER*1 = 'N': do not compute Z; = 'I': Z is initialized to the unit matrix, and the orthogonal matrix Z is returned; = 'V': Z must contain an orthogonal matrix Z1 on entry, and the product Z1*Z is returned. N (input) INTEGER The order of the matrices A and B. N >= 0. ILO (input) INTEGER IHI (input) INTEGER It is assumed that A is already upper triangular in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally set by a previous call to SGGBAL; otherwise they should be set to 1 and N respectively. 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. A (input/output) REAL array, dimension (LDA, N) On entry, the N-by-N general matrix to be reduced. On exit, the upper triangle and the first subdiagonal of A are overwritten with the upper Hessenberg matrix H, and the rest is set to zero. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input/output) REAL array, dimension (LDB, N) On entry, the N-by-N upper triangular matrix B. On exit, the upper triangular matrix T = Q' B Z. The elements below the diagonal are set to zero. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). Q (input/output) REAL array, dimension (LDQ, N) If COMPQ='N': Q is not referenced. If COMPQ='I': on entry, Q need not be set, and on exit it contains the orthogonal matrix Q, where Q' is the product of the Givens transformations which are applied to A and B on the left. If COMPQ='V': on entry, Q must contain an orthogonal matrix Q1, and on exit this is overwritten by Q1*Q. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. Z (input/output) REAL array, dimension (LDZ, N) If COMPZ='N': Z is not referenced. If COMPZ='I': on entry, Z need not be set, and on exit it contains the orthogonal matrix Z, which is the product of the Givens transformations which are applied to A and B on the right. If COMPZ='V': on entry, Z must contain an orthogonal matrix Z1, and on exit this is overwritten by Z1*Z. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. Further Details =============== This routine reduces A to Hessenberg and B to triangular form by an unblocked reduction, as described in _Matrix_Computations_, by Golub and Van Loan (Johns Hopkins Press.) ===================================================================== Decode COMPQ Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; /* Function Body */ if (lsame_(compq, "N")) { ilq = FALSE_; icompq = 1; } else if (lsame_(compq, "V")) { ilq = TRUE_; icompq = 2; } else if (lsame_(compq, "I")) { ilq = TRUE_; icompq = 3; } else { icompq = 0; } /* Decode COMPZ */ if (lsame_(compz, "N")) { ilz = FALSE_; icompz = 1; } else if (lsame_(compz, "V")) { ilz = TRUE_; icompz = 2; } else if (lsame_(compz, "I")) { ilz = TRUE_; icompz = 3; } else { icompz = 0; } /* Test the input parameters. */ *info = 0; if (icompq <= 0) { *info = -1; } else if (icompz <= 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ilo < 1) { *info = -4; } else if (*ihi > *n || *ihi < *ilo - 1) { *info = -5; } else if (*lda < max(1,*n)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -9; } else if (ilq && *ldq < *n || *ldq < 1) { *info = -11; } else if (ilz && *ldz < *n || *ldz < 1) { *info = -13; } if (*info != 0) { i__1 = -(*info); xerbla_("SGGHRD", &i__1); return 0; } /* Initialize Q and Z if desired. */ if (icompq == 3) { slaset_("Full", n, n, &c_b10, &c_b11, &q[q_offset], ldq); } if (icompz == 3) { slaset_("Full", n, n, &c_b10, &c_b11, &z__[z_offset], ldz); } /* Quick return if possible */ if (*n <= 1) { return 0; } /* Zero out lower triangle of B */ i__1 = *n - 1; for (jcol = 1; jcol <= i__1; ++jcol) { i__2 = *n; for (jrow = jcol + 1; jrow <= i__2; ++jrow) { b_ref(jrow, jcol) = 0.f; /* L10: */ } /* L20: */ } /* Reduce A and B */ i__1 = *ihi - 2; for (jcol = *ilo; jcol <= i__1; ++jcol) { i__2 = jcol + 2; for (jrow = *ihi; jrow >= i__2; --jrow) { /* Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL) */ temp = a_ref(jrow - 1, jcol); slartg_(&temp, &a_ref(jrow, jcol), &c__, &s, &a_ref(jrow - 1, jcol)); a_ref(jrow, jcol) = 0.f; i__3 = *n - jcol; srot_(&i__3, &a_ref(jrow - 1, jcol + 1), lda, &a_ref(jrow, jcol + 1), lda, &c__, &s); i__3 = *n + 2 - jrow; srot_(&i__3, &b_ref(jrow - 1, jrow - 1), ldb, &b_ref(jrow, jrow - 1), ldb, &c__, &s); if (ilq) { srot_(n, &q_ref(1, jrow - 1), &c__1, &q_ref(1, jrow), &c__1, & c__, &s); } /* Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1) */ temp = b_ref(jrow, jrow); slartg_(&temp, &b_ref(jrow, jrow - 1), &c__, &s, &b_ref(jrow, jrow)); b_ref(jrow, jrow - 1) = 0.f; srot_(ihi, &a_ref(1, jrow), &c__1, &a_ref(1, jrow - 1), &c__1, & c__, &s); i__3 = jrow - 1; srot_(&i__3, &b_ref(1, jrow), &c__1, &b_ref(1, jrow - 1), &c__1, & c__, &s); if (ilz) { srot_(n, &z___ref(1, jrow), &c__1, &z___ref(1, jrow - 1), & c__1, &c__, &s); } /* L30: */ } /* L40: */ } /* ---------------------- Begin Timing Code ------------------------- Operation count: factor * number of calls to SLARTG TEMP *7 * total number of rows/cols rotated in A and B TEMP*[6n + 2(ihi-ilo) + 5]/6 *6 * rows rotated in Q TEMP*n/2 *6 * rows rotated in Z TEMP*n/2 *6 */ temp = (real) (*ihi - *ilo) * (real) (*ihi - *ilo - 1); jrow = *n * 6 + (*ihi - *ilo << 1) + 12; if (ilq) { jrow += *n * 3; } if (ilz) { jrow += *n * 3; } latime_1.ops += (real) jrow * temp; latime_1.itcnt = 0.f; /* ----------------------- End Timing Code -------------------------- */ return 0; /* End of SGGHRD */ } /* sgghrd_ */
/* Subroutine */ int dtrmm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, doublereal *alpha, doublereal *a, integer * lda, doublereal *b, integer *ldb) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; /* Local variables */ static integer info; static doublereal temp; static integer i__, j, k; static logical lside; extern logical lsame_(char *, char *); static integer nrowa; static logical upper; extern /* Subroutine */ int xerbla_(char *, integer *); static logical nounit; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] /* Purpose ======= DTRMM performs one of the matrix-matrix operations B := alpha*op( A )*B, or B := alpha*B*op( A ), where alpha is a scalar, B is an m by n matrix, A is a unit, or non-unit, upper or lower triangular matrix and op( A ) is one of op( A ) = A or op( A ) = A'. Parameters ========== SIDE - CHARACTER*1. On entry, SIDE specifies whether op( A ) multiplies B from the left or right as follows: SIDE = 'L' or 'l' B := alpha*op( A )*B. SIDE = 'R' or 'r' B := alpha*B*op( A ). Unchanged on exit. UPLO - CHARACTER*1. On entry, UPLO specifies whether the matrix A is an upper or lower triangular matrix as follows: UPLO = 'U' or 'u' A is an upper triangular matrix. UPLO = 'L' or 'l' A is a lower triangular matrix. Unchanged on exit. TRANSA - CHARACTER*1. On entry, TRANSA specifies the form of op( A ) to be used in the matrix multiplication as follows: TRANSA = 'N' or 'n' op( A ) = A. TRANSA = 'T' or 't' op( A ) = A'. TRANSA = 'C' or 'c' op( A ) = A'. Unchanged on exit. DIAG - CHARACTER*1. On entry, DIAG specifies whether or not A is unit triangular as follows: DIAG = 'U' or 'u' A is assumed to be unit triangular. DIAG = 'N' or 'n' A is not assumed to be unit triangular. Unchanged on exit. M - INTEGER. On entry, M specifies the number of rows of B. M must be at least zero. Unchanged on exit. N - INTEGER. On entry, N specifies the number of columns of B. N must be at least zero. Unchanged on exit. ALPHA - DOUBLE PRECISION. On entry, ALPHA specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. Unchanged on exit. A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. Before entry with UPLO = 'U' or 'u', the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with UPLO = 'L' or 'l', the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when DIAG = 'U' or 'u', the diagonal elements of A are not referenced either, but are assumed to be unity. Unchanged on exit. LDA - INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When SIDE = 'L' or 'l' then LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' then LDA must be at least max( 1, n ). Unchanged on exit. B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). Before entry, the leading m by n part of the array B must contain the matrix B, and on exit is overwritten by the transformed matrix. LDB - INTEGER. On entry, LDB specifies the first dimension of B as declared in the calling (sub) program. LDB must be at least max( 1, m ). Unchanged on exit. Level 3 Blas routine. -- Written on 8-February-1989. Jack Dongarra, Argonne National Laboratory. Iain Duff, AERE Harwell. Jeremy Du Croz, Numerical Algorithms Group Ltd. Sven Hammarling, Numerical Algorithms Group Ltd. Test the input parameters. Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; /* Function Body */ lside = lsame_(side, "L"); if (lside) { nrowa = *m; } else { nrowa = *n; } nounit = lsame_(diag, "N"); upper = lsame_(uplo, "U"); info = 0; if (! lside && ! lsame_(side, "R")) { info = 1; } else if (! upper && ! lsame_(uplo, "L")) { info = 2; } else if (! lsame_(transa, "N") && ! lsame_(transa, "T") && ! lsame_(transa, "C")) { info = 3; } else if (! lsame_(diag, "U") && ! lsame_(diag, "N")) { info = 4; } else if (*m < 0) { info = 5; } else if (*n < 0) { info = 6; } else if (*lda < max(1,nrowa)) { info = 9; } else if (*ldb < max(1,*m)) { info = 11; } if (info != 0) { xerbla_("DTRMM ", &info); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } /* And when alpha.eq.zero. */ if (*alpha == 0.) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b_ref(i__, j) = 0.; /* L10: */ } /* L20: */ } return 0; } /* Start the operations. */ if (lside) { if (lsame_(transa, "N")) { /* Form B := alpha*A*B. */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (k = 1; k <= i__2; ++k) { if (b_ref(k, j) != 0.) { temp = *alpha * b_ref(k, j); i__3 = k - 1; for (i__ = 1; i__ <= i__3; ++i__) { b_ref(i__, j) = b_ref(i__, j) + temp * a_ref( i__, k); /* L30: */ } if (nounit) { temp *= a_ref(k, k); } b_ref(k, j) = temp; } /* L40: */ } /* L50: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { for (k = *m; k >= 1; --k) { if (b_ref(k, j) != 0.) { temp = *alpha * b_ref(k, j); b_ref(k, j) = temp; if (nounit) { b_ref(k, j) = b_ref(k, j) * a_ref(k, k); } i__2 = *m; for (i__ = k + 1; i__ <= i__2; ++i__) { b_ref(i__, j) = b_ref(i__, j) + temp * a_ref( i__, k); /* L60: */ } } /* L70: */ } /* L80: */ } } } else { /* Form B := alpha*A'*B. */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { for (i__ = *m; i__ >= 1; --i__) { temp = b_ref(i__, j); if (nounit) { temp *= a_ref(i__, i__); } i__2 = i__ - 1; for (k = 1; k <= i__2; ++k) { temp += a_ref(k, i__) * b_ref(k, j); /* L90: */ } b_ref(i__, j) = *alpha * temp; /* L100: */ } /* L110: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { temp = b_ref(i__, j); if (nounit) { temp *= a_ref(i__, i__); } i__3 = *m; for (k = i__ + 1; k <= i__3; ++k) { temp += a_ref(k, i__) * b_ref(k, j); /* L120: */ } b_ref(i__, j) = *alpha * temp; /* L130: */ } /* L140: */ } } } } else { if (lsame_(transa, "N")) { /* Form B := alpha*B*A. */ if (upper) { for (j = *n; j >= 1; --j) { temp = *alpha; if (nounit) { temp *= a_ref(j, j); } i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { b_ref(i__, j) = temp * b_ref(i__, j); /* L150: */ } i__1 = j - 1; for (k = 1; k <= i__1; ++k) { if (a_ref(k, j) != 0.) { temp = *alpha * a_ref(k, j); i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b_ref(i__, j) = b_ref(i__, j) + temp * b_ref( i__, k); /* L160: */ } } /* L170: */ } /* L180: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = *alpha; if (nounit) { temp *= a_ref(j, j); } i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b_ref(i__, j) = temp * b_ref(i__, j); /* L190: */ } i__2 = *n; for (k = j + 1; k <= i__2; ++k) { if (a_ref(k, j) != 0.) { temp = *alpha * a_ref(k, j); i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { b_ref(i__, j) = b_ref(i__, j) + temp * b_ref( i__, k); /* L200: */ } } /* L210: */ } /* L220: */ } } } else { /* Form B := alpha*B*A'. */ if (upper) { i__1 = *n; for (k = 1; k <= i__1; ++k) { i__2 = k - 1; for (j = 1; j <= i__2; ++j) { if (a_ref(j, k) != 0.) { temp = *alpha * a_ref(j, k); i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { b_ref(i__, j) = b_ref(i__, j) + temp * b_ref( i__, k); /* L230: */ } } /* L240: */ } temp = *alpha; if (nounit) { temp *= a_ref(k, k); } if (temp != 1.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b_ref(i__, k) = temp * b_ref(i__, k); /* L250: */ } } /* L260: */ } } else { for (k = *n; k >= 1; --k) { i__1 = *n; for (j = k + 1; j <= i__1; ++j) { if (a_ref(j, k) != 0.) { temp = *alpha * a_ref(j, k); i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b_ref(i__, j) = b_ref(i__, j) + temp * b_ref( i__, k); /* L270: */ } } /* L280: */ } temp = *alpha; if (nounit) { temp *= a_ref(k, k); } if (temp != 1.) { i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { b_ref(i__, k) = temp * b_ref(i__, k); /* L290: */ } } /* L300: */ } } } } return 0; /* End of DTRMM . */ } /* dtrmm_ */
/* Subroutine */ int cget10_(integer *m, integer *n, complex *a, integer *lda, complex *b, integer *ldb, complex *work, real *rwork, real *result) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; real r__1, r__2; /* Local variables */ static real unfl; static integer j; static real anorm; extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, complex *, integer *), caxpy_(integer *, complex *, complex *, integer *, complex *, integer *); static real wnorm; extern doublereal clange_(char *, integer *, integer *, complex *, integer *, real *), slamch_(char *), scasum_( integer *, complex *, integer *); static real eps; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= CGET10 compares two matrices A and B and computes the ratio RESULT = norm( A - B ) / ( norm(A) * M * EPS ) Arguments ========= M (input) INTEGER The number of rows of the matrices A and B. N (input) INTEGER The number of columns of the matrices A and B. A (input) COMPLEX array, dimension (LDA,N) The m by n matrix A. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). B (input) COMPLEX array, dimension (LDB,N) The m by n matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,M). WORK (workspace) COMPLEX array, dimension (M) RWORK (workspace) COMPLEX array, dimension (M) RESULT (output) REAL RESULT = norm( A - B ) / ( norm(A) * M * EPS ) ===================================================================== Quick return if possible Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --work; --rwork; /* Function Body */ if (*m <= 0 || *n <= 0) { *result = 0.f; return 0; } unfl = slamch_("Safe minimum"); eps = slamch_("Precision"); wnorm = 0.f; i__1 = *n; for (j = 1; j <= i__1; ++j) { ccopy_(m, &a_ref(1, j), &c__1, &work[1], &c__1); caxpy_(m, &c_b9, &b_ref(1, j), &c__1, &work[1], &c__1); /* Computing MAX */ r__1 = wnorm, r__2 = scasum_(n, &work[1], &c__1); wnorm = dmax(r__1,r__2); /* L10: */ } /* Computing MAX */ r__1 = clange_("1", m, n, &a[a_offset], lda, &rwork[1]); anorm = dmax(r__1,unfl); if (anorm > wnorm) { *result = wnorm / anorm / (*m * eps); } else { if (anorm < 1.f) { /* Computing MIN */ r__1 = wnorm, r__2 = *m * anorm; *result = dmin(r__1,r__2) / anorm / (*m * eps); } else { /* Computing MIN */ r__1 = wnorm / anorm, r__2 = (real) (*m); *result = dmin(r__1,r__2) / (*m * eps); } } return 0; /* End of CGET10 */ } /* cget10_ */
/* Subroutine */ int ztrt05_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublecomplex *xact, integer *ldxact, doublereal *ferr, doublereal *berr, doublereal * reslts) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset, xact_dim1, xact_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2, d__3, d__4; doublecomplex z__1, z__2; /* Builtin functions */ double d_imag(doublecomplex *); /* Local variables */ static doublereal diff, axbi; static integer imax; static doublereal unfl, ovfl; static logical unit; static integer i__, j, k; extern logical lsame_(char *, char *); static logical upper; static doublereal xnorm; extern doublereal dlamch_(char *); static doublereal errbnd; extern integer izamax_(integer *, doublecomplex *, integer *); static logical notran; static integer ifu; static doublereal eps, tmp; #define xact_subscr(a_1,a_2) (a_2)*xact_dim1 + a_1 #define xact_ref(a_1,a_2) xact[xact_subscr(a_1,a_2)] #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define x_subscr(a_1,a_2) (a_2)*x_dim1 + a_1 #define x_ref(a_1,a_2) x[x_subscr(a_1,a_2)] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= ZTRT05 tests the error bounds from iterative refinement for the computed solution to a system of equations A*X = B, where A is a triangular n by n matrix. RESLTS(1) = test of the error bound = norm(X - XACT) / ( norm(X) * FERR ) A large value is returned if this ratio is not less than one. RESLTS(2) = residual from the iterative refinement routine = the maximum of BERR / ( (n+1)*EPS + (*) ), where (*) = (n+1)*UNFL / (min_i (abs(A)*abs(X) +abs(b))_i ) Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the matrix A is upper or lower triangular. = 'U': Upper triangular = 'L': Lower triangular TRANS (input) CHARACTER*1 Specifies the form of the system of equations. = 'N': A * X = B (No transpose) = 'T': A'* X = B (Transpose) = 'C': A'* X = B (Conjugate transpose = Transpose) DIAG (input) CHARACTER*1 Specifies whether or not the matrix A is unit triangular. = 'N': Non-unit triangular = 'U': Unit triangular N (input) INTEGER The number of rows of the matrices X, B, and XACT, and the order of the matrix A. N >= 0. NRHS (input) INTEGER The number of columns of the matrices X, B, and XACT. NRHS >= 0. A (input) COMPLEX*16 array, dimension (LDA,N) The triangular matrix A. If UPLO = 'U', the leading n by n upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading n by n lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If DIAG = 'U', the diagonal elements of A are also not referenced and are assumed to be 1. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input) COMPLEX*16 array, dimension (LDB,NRHS) The right hand side vectors for the system of linear equations. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (input) COMPLEX*16 array, dimension (LDX,NRHS) The computed solution vectors. Each vector is stored as a column of the matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). XACT (input) COMPLEX*16 array, dimension (LDX,NRHS) The exact solution vectors. Each vector is stored as a column of the matrix XACT. LDXACT (input) INTEGER The leading dimension of the array XACT. LDXACT >= max(1,N). FERR (input) DOUBLE PRECISION array, dimension (NRHS) The estimated forward error bounds for each solution vector X. If XTRUE is the true solution, FERR bounds the magnitude of the largest entry in (X - XTRUE) divided by the magnitude of the largest entry in X. BERR (input) DOUBLE PRECISION array, dimension (NRHS) The componentwise relative backward error of each solution vector (i.e., the smallest relative change in any entry of A or B that makes X an exact solution). RESLTS (output) DOUBLE PRECISION array, dimension (2) The maximum over the NRHS solution vectors of the ratios: RESLTS(1) = norm(X - XACT) / ( norm(X) * FERR ) RESLTS(2) = BERR / ( (n+1)*EPS + (*) ) ===================================================================== Quick exit if N = 0 or NRHS = 0. Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; xact_dim1 = *ldxact; xact_offset = 1 + xact_dim1 * 1; xact -= xact_offset; --ferr; --berr; --reslts; /* Function Body */ if (*n <= 0 || *nrhs <= 0) { reslts[1] = 0.; reslts[2] = 0.; return 0; } eps = dlamch_("Epsilon"); unfl = dlamch_("Safe minimum"); ovfl = 1. / unfl; upper = lsame_(uplo, "U"); notran = lsame_(trans, "N"); unit = lsame_(diag, "U"); /* Test 1: Compute the maximum of norm(X - XACT) / ( norm(X) * FERR ) over all the vectors X and XACT using the infinity-norm. */ errbnd = 0.; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { imax = izamax_(n, &x_ref(1, j), &c__1); /* Computing MAX */ i__2 = x_subscr(imax, j); d__3 = (d__1 = x[i__2].r, abs(d__1)) + (d__2 = d_imag(&x_ref(imax, j)) , abs(d__2)); xnorm = max(d__3,unfl); diff = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = x_subscr(i__, j); i__4 = xact_subscr(i__, j); z__2.r = x[i__3].r - xact[i__4].r, z__2.i = x[i__3].i - xact[i__4] .i; z__1.r = z__2.r, z__1.i = z__2.i; /* Computing MAX */ d__3 = diff, d__4 = (d__1 = z__1.r, abs(d__1)) + (d__2 = d_imag(& z__1), abs(d__2)); diff = max(d__3,d__4); /* L10: */ } if (xnorm > 1.) { goto L20; } else if (diff <= ovfl * xnorm) { goto L20; } else { errbnd = 1. / eps; goto L30; } L20: if (diff / xnorm <= ferr[j]) { /* Computing MAX */ d__1 = errbnd, d__2 = diff / xnorm / ferr[j]; errbnd = max(d__1,d__2); } else { errbnd = 1. / eps; } L30: ; } reslts[1] = errbnd; /* Test 2: Compute the maximum of BERR / ( (n+1)*EPS + (*) ), where (*) = (n+1)*UNFL / (min_i (abs(A)*abs(X) +abs(b))_i ) */ ifu = 0; if (unit) { ifu = 1; } i__1 = *nrhs; for (k = 1; k <= i__1; ++k) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = b_subscr(i__, k); tmp = (d__1 = b[i__3].r, abs(d__1)) + (d__2 = d_imag(&b_ref(i__, k)), abs(d__2)); if (upper) { if (! notran) { i__3 = i__ - ifu; for (j = 1; j <= i__3; ++j) { i__4 = a_subscr(j, i__); i__5 = x_subscr(j, k); tmp += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a_ref(j, i__)), abs(d__2))) * ((d__3 = x[i__5].r, abs(d__3)) + (d__4 = d_imag(& x_ref(j, k)), abs(d__4))); /* L40: */ } if (unit) { i__3 = x_subscr(i__, k); tmp += (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag( &x_ref(i__, k)), abs(d__2)); } } else { if (unit) { i__3 = x_subscr(i__, k); tmp += (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag( &x_ref(i__, k)), abs(d__2)); } i__3 = *n; for (j = i__ + ifu; j <= i__3; ++j) { i__4 = a_subscr(i__, j); i__5 = x_subscr(j, k); tmp += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a_ref(i__, j)), abs(d__2))) * ((d__3 = x[i__5].r, abs(d__3)) + (d__4 = d_imag(& x_ref(j, k)), abs(d__4))); /* L50: */ } } } else { if (notran) { i__3 = i__ - ifu; for (j = 1; j <= i__3; ++j) { i__4 = a_subscr(i__, j); i__5 = x_subscr(j, k); tmp += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a_ref(i__, j)), abs(d__2))) * ((d__3 = x[i__5].r, abs(d__3)) + (d__4 = d_imag(& x_ref(j, k)), abs(d__4))); /* L60: */ } if (unit) { i__3 = x_subscr(i__, k); tmp += (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag( &x_ref(i__, k)), abs(d__2)); } } else { if (unit) { i__3 = x_subscr(i__, k); tmp += (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag( &x_ref(i__, k)), abs(d__2)); } i__3 = *n; for (j = i__ + ifu; j <= i__3; ++j) { i__4 = a_subscr(j, i__); i__5 = x_subscr(j, k); tmp += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a_ref(j, i__)), abs(d__2))) * ((d__3 = x[i__5].r, abs(d__3)) + (d__4 = d_imag(& x_ref(j, k)), abs(d__4))); /* L70: */ } } } if (i__ == 1) { axbi = tmp; } else { axbi = min(axbi,tmp); } /* L80: */ } /* Computing MAX */ d__1 = axbi, d__2 = (*n + 1) * unfl; tmp = berr[k] / ((*n + 1) * eps + (*n + 1) * unfl / max(d__1,d__2)); if (k == 1) { reslts[2] = tmp; } else { reslts[2] = max(reslts[2],tmp); } /* L90: */ } return 0; /* End of ZTRT05 */ } /* ztrt05_ */
/* Subroutine */ int cgtrfs_(char *trans, integer *n, integer *nrhs, complex * dl, complex *d__, complex *du, complex *dlf, complex *df, complex * duf, complex *du2, integer *ipiv, complex *b, integer *ldb, complex * x, integer *ldx, real *ferr, real *berr, complex *work, real *rwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CGTRFS improves the computed solution to a system of linear equations when the coefficient matrix is tridiagonal, and provides error bounds and backward error estimates for the solution. Arguments ========= TRANS (input) CHARACTER*1 Specifies the form of the system of equations: = 'N': A * X = B (No transpose) = 'T': A**T * X = B (Transpose) = 'C': A**H * X = B (Conjugate transpose) N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. DL (input) COMPLEX array, dimension (N-1) The (n-1) subdiagonal elements of A. D (input) COMPLEX array, dimension (N) The diagonal elements of A. DU (input) COMPLEX array, dimension (N-1) The (n-1) superdiagonal elements of A. DLF (input) COMPLEX array, dimension (N-1) The (n-1) multipliers that define the matrix L from the LU factorization of A as computed by CGTTRF. DF (input) COMPLEX array, dimension (N) The n diagonal elements of the upper triangular matrix U from the LU factorization of A. DUF (input) COMPLEX array, dimension (N-1) The (n-1) elements of the first superdiagonal of U. DU2 (input) COMPLEX array, dimension (N-2) The (n-2) elements of the second superdiagonal of U. IPIV (input) INTEGER array, dimension (N) The pivot indices; for 1 <= i <= n, row i of the matrix was interchanged with row IPIV(i). IPIV(i) will always be either i or i+1; IPIV(i) = i indicates a row interchange was not required. B (input) COMPLEX array, dimension (LDB,NRHS) The right hand side matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (input/output) COMPLEX array, dimension (LDX,NRHS) On entry, the solution matrix X, as computed by CGTTRS. On exit, the improved solution matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). FERR (output) REAL array, dimension (NRHS) The estimated forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), FERR(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). The estimate is as reliable as the estimate for RCOND, and is almost always a slight overestimate of the true error. BERR (output) REAL array, dimension (NRHS) The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). WORK (workspace) COMPLEX array, dimension (2*N) RWORK (workspace) REAL array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Internal Parameters =================== ITMAX is the maximum number of steps of iterative refinement. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static real c_b18 = -1.f; static real c_b19 = 1.f; static complex c_b26 = {1.f,0.f}; /* System generated locals */ integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9; real r__1, r__2, r__3, r__4, r__5, r__6, r__7, r__8, r__9, r__10, r__11, r__12, r__13, r__14; complex q__1; /* Builtin functions */ double r_imag(complex *); /* Local variables */ static integer kase; static real safe1, safe2; static integer i__, j; static real s; extern logical lsame_(char *, char *); extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, complex *, integer *), caxpy_(integer *, complex *, complex *, integer *, complex *, integer *); static integer count; extern /* Subroutine */ int clacon_(integer *, complex *, complex *, real *, integer *), clagtm_(char *, integer *, integer *, real *, complex *, complex *, complex *, complex *, integer *, real *, complex *, integer *); static integer nz; extern doublereal slamch_(char *); static real safmin; extern /* Subroutine */ int xerbla_(char *, integer *); static logical notran; static char transn[1]; extern /* Subroutine */ int cgttrs_(char *, integer *, integer *, complex *, complex *, complex *, complex *, integer *, complex *, integer *, integer *); static char transt[1]; static real lstres, eps; #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define x_subscr(a_1,a_2) (a_2)*x_dim1 + a_1 #define x_ref(a_1,a_2) x[x_subscr(a_1,a_2)] --dl; --d__; --du; --dlf; --df; --duf; --du2; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; --ferr; --berr; --work; --rwork; /* Function Body */ *info = 0; notran = lsame_(trans, "N"); if (! notran && ! lsame_(trans, "T") && ! lsame_( trans, "C")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*ldb < max(1,*n)) { *info = -13; } else if (*ldx < max(1,*n)) { *info = -15; } if (*info != 0) { i__1 = -(*info); xerbla_("CGTRFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] = 0.f; berr[j] = 0.f; /* L10: */ } return 0; } if (notran) { *(unsigned char *)transn = 'N'; *(unsigned char *)transt = 'C'; } else { *(unsigned char *)transn = 'C'; *(unsigned char *)transt = 'N'; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = 4; eps = slamch_("Epsilon"); safmin = slamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { count = 1; lstres = 3.f; L20: /* Loop until stopping criterion is satisfied. Compute residual R = B - op(A) * X, where op(A) = A, A**T, or A**H, depending on TRANS. */ ccopy_(n, &b_ref(1, j), &c__1, &work[1], &c__1); clagtm_(trans, n, &c__1, &c_b18, &dl[1], &d__[1], &du[1], &x_ref(1, j) , ldx, &c_b19, &work[1], n); /* Compute abs(op(A))*abs(x) + abs(b) for use in the backward error bound. */ if (notran) { if (*n == 1) { i__2 = b_subscr(1, j); i__3 = x_subscr(1, j); rwork[1] = (r__1 = b[i__2].r, dabs(r__1)) + (r__2 = r_imag(& b_ref(1, j)), dabs(r__2)) + ((r__3 = d__[1].r, dabs( r__3)) + (r__4 = r_imag(&d__[1]), dabs(r__4))) * (( r__5 = x[i__3].r, dabs(r__5)) + (r__6 = r_imag(&x_ref( 1, j)), dabs(r__6))); } else { i__2 = b_subscr(1, j); i__3 = x_subscr(1, j); i__4 = x_subscr(2, j); rwork[1] = (r__1 = b[i__2].r, dabs(r__1)) + (r__2 = r_imag(& b_ref(1, j)), dabs(r__2)) + ((r__3 = d__[1].r, dabs( r__3)) + (r__4 = r_imag(&d__[1]), dabs(r__4))) * (( r__5 = x[i__3].r, dabs(r__5)) + (r__6 = r_imag(&x_ref( 1, j)), dabs(r__6))) + ((r__7 = du[1].r, dabs(r__7)) + (r__8 = r_imag(&du[1]), dabs(r__8))) * ((r__9 = x[ i__4].r, dabs(r__9)) + (r__10 = r_imag(&x_ref(2, j)), dabs(r__10))); i__2 = *n - 1; for (i__ = 2; i__ <= i__2; ++i__) { i__3 = b_subscr(i__, j); i__4 = i__ - 1; i__5 = x_subscr(i__ - 1, j); i__6 = i__; i__7 = x_subscr(i__, j); i__8 = i__; i__9 = x_subscr(i__ + 1, j); rwork[i__] = (r__1 = b[i__3].r, dabs(r__1)) + (r__2 = r_imag(&b_ref(i__, j)), dabs(r__2)) + ((r__3 = dl[ i__4].r, dabs(r__3)) + (r__4 = r_imag(&dl[i__ - 1] ), dabs(r__4))) * ((r__5 = x[i__5].r, dabs(r__5)) + (r__6 = r_imag(&x_ref(i__ - 1, j)), dabs(r__6))) + ((r__7 = d__[i__6].r, dabs(r__7)) + (r__8 = r_imag(&d__[i__]), dabs(r__8))) * ((r__9 = x[i__7] .r, dabs(r__9)) + (r__10 = r_imag(&x_ref(i__, j)), dabs(r__10))) + ((r__11 = du[i__8].r, dabs(r__11) ) + (r__12 = r_imag(&du[i__]), dabs(r__12))) * (( r__13 = x[i__9].r, dabs(r__13)) + (r__14 = r_imag( &x_ref(i__ + 1, j)), dabs(r__14))); /* L30: */ } i__2 = b_subscr(*n, j); i__3 = *n - 1; i__4 = x_subscr(*n - 1, j); i__5 = *n; i__6 = x_subscr(*n, j); rwork[*n] = (r__1 = b[i__2].r, dabs(r__1)) + (r__2 = r_imag(& b_ref(*n, j)), dabs(r__2)) + ((r__3 = dl[i__3].r, dabs(r__3)) + (r__4 = r_imag(&dl[*n - 1]), dabs(r__4)) ) * ((r__5 = x[i__4].r, dabs(r__5)) + (r__6 = r_imag(& x_ref(*n - 1, j)), dabs(r__6))) + ((r__7 = d__[i__5] .r, dabs(r__7)) + (r__8 = r_imag(&d__[*n]), dabs(r__8) )) * ((r__9 = x[i__6].r, dabs(r__9)) + (r__10 = r_imag(&x_ref(*n, j)), dabs(r__10))); } } else { if (*n == 1) { i__2 = b_subscr(1, j); i__3 = x_subscr(1, j); rwork[1] = (r__1 = b[i__2].r, dabs(r__1)) + (r__2 = r_imag(& b_ref(1, j)), dabs(r__2)) + ((r__3 = d__[1].r, dabs( r__3)) + (r__4 = r_imag(&d__[1]), dabs(r__4))) * (( r__5 = x[i__3].r, dabs(r__5)) + (r__6 = r_imag(&x_ref( 1, j)), dabs(r__6))); } else { i__2 = b_subscr(1, j); i__3 = x_subscr(1, j); i__4 = x_subscr(2, j); rwork[1] = (r__1 = b[i__2].r, dabs(r__1)) + (r__2 = r_imag(& b_ref(1, j)), dabs(r__2)) + ((r__3 = d__[1].r, dabs( r__3)) + (r__4 = r_imag(&d__[1]), dabs(r__4))) * (( r__5 = x[i__3].r, dabs(r__5)) + (r__6 = r_imag(&x_ref( 1, j)), dabs(r__6))) + ((r__7 = dl[1].r, dabs(r__7)) + (r__8 = r_imag(&dl[1]), dabs(r__8))) * ((r__9 = x[ i__4].r, dabs(r__9)) + (r__10 = r_imag(&x_ref(2, j)), dabs(r__10))); i__2 = *n - 1; for (i__ = 2; i__ <= i__2; ++i__) { i__3 = b_subscr(i__, j); i__4 = i__ - 1; i__5 = x_subscr(i__ - 1, j); i__6 = i__; i__7 = x_subscr(i__, j); i__8 = i__; i__9 = x_subscr(i__ + 1, j); rwork[i__] = (r__1 = b[i__3].r, dabs(r__1)) + (r__2 = r_imag(&b_ref(i__, j)), dabs(r__2)) + ((r__3 = du[ i__4].r, dabs(r__3)) + (r__4 = r_imag(&du[i__ - 1] ), dabs(r__4))) * ((r__5 = x[i__5].r, dabs(r__5)) + (r__6 = r_imag(&x_ref(i__ - 1, j)), dabs(r__6))) + ((r__7 = d__[i__6].r, dabs(r__7)) + (r__8 = r_imag(&d__[i__]), dabs(r__8))) * ((r__9 = x[i__7] .r, dabs(r__9)) + (r__10 = r_imag(&x_ref(i__, j)), dabs(r__10))) + ((r__11 = dl[i__8].r, dabs(r__11) ) + (r__12 = r_imag(&dl[i__]), dabs(r__12))) * (( r__13 = x[i__9].r, dabs(r__13)) + (r__14 = r_imag( &x_ref(i__ + 1, j)), dabs(r__14))); /* L40: */ } i__2 = b_subscr(*n, j); i__3 = *n - 1; i__4 = x_subscr(*n - 1, j); i__5 = *n; i__6 = x_subscr(*n, j); rwork[*n] = (r__1 = b[i__2].r, dabs(r__1)) + (r__2 = r_imag(& b_ref(*n, j)), dabs(r__2)) + ((r__3 = du[i__3].r, dabs(r__3)) + (r__4 = r_imag(&du[*n - 1]), dabs(r__4)) ) * ((r__5 = x[i__4].r, dabs(r__5)) + (r__6 = r_imag(& x_ref(*n - 1, j)), dabs(r__6))) + ((r__7 = d__[i__5] .r, dabs(r__7)) + (r__8 = r_imag(&d__[*n]), dabs(r__8) )) * ((r__9 = x[i__6].r, dabs(r__9)) + (r__10 = r_imag(&x_ref(*n, j)), dabs(r__10))); } } /* Compute componentwise relative backward error from formula max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) where abs(Z) is the componentwise absolute value of the matrix or vector Z. If the i-th component of the denominator is less than SAFE2, then SAFE1 is added to the i-th components of the numerator and denominator before dividing. */ s = 0.f; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (rwork[i__] > safe2) { /* Computing MAX */ i__3 = i__; r__3 = s, r__4 = ((r__1 = work[i__3].r, dabs(r__1)) + (r__2 = r_imag(&work[i__]), dabs(r__2))) / rwork[i__]; s = dmax(r__3,r__4); } else { /* Computing MAX */ i__3 = i__; r__3 = s, r__4 = ((r__1 = work[i__3].r, dabs(r__1)) + (r__2 = r_imag(&work[i__]), dabs(r__2)) + safe1) / (rwork[i__] + safe1); s = dmax(r__3,r__4); } /* L50: */ } berr[j] = s; /* Test stopping criterion. Continue iterating if 1) The residual BERR(J) is larger than machine epsilon, and 2) BERR(J) decreased by at least a factor of 2 during the last iteration, and 3) At most ITMAX iterations tried. */ if (berr[j] > eps && berr[j] * 2.f <= lstres && count <= 5) { /* Update solution and try again. */ cgttrs_(trans, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], &ipiv[ 1], &work[1], n, info); caxpy_(n, &c_b26, &work[1], &c__1, &x_ref(1, j), &c__1); lstres = berr[j]; ++count; goto L20; } /* Bound error from formula norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) where norm(Z) is the magnitude of the largest component of Z inv(op(A)) is the inverse of op(A) abs(Z) is the componentwise absolute value of the matrix or vector Z NZ is the maximum number of nonzeros in any row of A, plus 1 EPS is machine epsilon The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) is incremented by SAFE1 if the i-th component of abs(op(A))*abs(X) + abs(B) is less than SAFE2. Use CLACON to estimate the infinity-norm of the matrix inv(op(A)) * diag(W), where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (rwork[i__] > safe2) { i__3 = i__; rwork[i__] = (r__1 = work[i__3].r, dabs(r__1)) + (r__2 = r_imag(&work[i__]), dabs(r__2)) + nz * eps * rwork[ i__]; } else { i__3 = i__; rwork[i__] = (r__1 = work[i__3].r, dabs(r__1)) + (r__2 = r_imag(&work[i__]), dabs(r__2)) + nz * eps * rwork[ i__] + safe1; } /* L60: */ } kase = 0; L70: clacon_(n, &work[*n + 1], &work[1], &ferr[j], &kase); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(op(A)**H). */ cgttrs_(transt, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], & ipiv[1], &work[1], n, info); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = i__; q__1.r = rwork[i__4] * work[i__5].r, q__1.i = rwork[i__4] * work[i__5].i; work[i__3].r = q__1.r, work[i__3].i = q__1.i; /* L80: */ } } else { /* Multiply by inv(op(A))*diag(W). */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = i__; q__1.r = rwork[i__4] * work[i__5].r, q__1.i = rwork[i__4] * work[i__5].i; work[i__3].r = q__1.r, work[i__3].i = q__1.i; /* L90: */ } cgttrs_(transn, n, &c__1, &dlf[1], &df[1], &duf[1], &du2[1], & ipiv[1], &work[1], n, info); } goto L70; } /* Normalize error. */ lstres = 0.f; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ i__3 = x_subscr(i__, j); r__3 = lstres, r__4 = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x_ref(i__, j)), dabs(r__2)); lstres = dmax(r__3,r__4); /* L100: */ } if (lstres != 0.f) { ferr[j] /= lstres; } /* L110: */ } return 0; /* End of CGTRFS */ } /* cgtrfs_ */