/* Subroutine */ int sstevd_(char *jobz, integer *n, real *d__, real *e, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= SSTEVD computes all eigenvalues and, optionally, eigenvectors of a real symmetric tridiagonal matrix. If eigenvectors are desired, it uses a divide and conquer algorithm. The divide and conquer algorithm makes very mild assumptions about floating point arithmetic. It will work on machines with a guard digit in add/subtract, or on those binary machines without guard digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. It could conceivably fail on hexadecimal or decimal machines without guard digits, but we know of none. Arguments ========= JOBZ (input) CHARACTER*1 = 'N': Compute eigenvalues only; = 'V': Compute eigenvalues and eigenvectors. N (input) INTEGER The order of the matrix. N >= 0. D (input/output) REAL array, dimension (N) On entry, the n diagonal elements of the tridiagonal matrix A. On exit, if INFO = 0, the eigenvalues in ascending order. E (input/output) REAL array, dimension (N) On entry, the (n-1) subdiagonal elements of the tridiagonal matrix A, stored in elements 1 to N-1 of E; E(N) need not be set, but is used by the routine. On exit, the contents of E are destroyed. Z (output) REAL array, dimension (LDZ, N) If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal eigenvectors of the matrix A, with the i-th column of Z holding the eigenvector associated with D(i). If JOBZ = 'N', then Z is not referenced. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= 1, and if JOBZ = 'V', LDZ >= max(1,N). WORK (workspace/output) REAL array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. If JOBZ = 'N' or N <= 1 then LWORK must be at least 1. If JOBZ = 'V' and N > 1 then LWORK must be at least ( 1 + 4*N + N**2 ). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. IWORK (workspace/output) INTEGER array, dimension (LIWORK) On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. LIWORK (input) INTEGER The dimension of the array IWORK. If JOBZ = 'N' or N <= 1 then LIWORK must be at least 1. If JOBZ = 'V' and N > 1 then LIWORK must be at least 3+5*N. If LIWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the IWORK array, returns this value as the first entry of the IWORK array, and no error message related to LIWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the algorithm failed to converge; i off-diagonal elements of E did not converge to zero. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer z_dim1, z_offset, i__1; real r__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static real rmin, rmax, tnrm, sigma; extern logical lsame_(char *, char *); extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static integer lwmin; static logical wantz; static integer iscale; extern doublereal slamch_(char *); static real safmin; extern /* Subroutine */ int xerbla_(char *, integer *); static real bignum; extern /* Subroutine */ int sstedc_(char *, integer *, real *, real *, real *, integer *, real *, integer *, integer *, integer *, integer *); static integer liwmin; extern doublereal slanst_(char *, integer *, real *, real *); extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *); static real smlnum; static logical lquery; static real eps; #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] --d__; --e; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --work; --iwork; /* Function Body */ wantz = lsame_(jobz, "V"); lquery = *lwork == -1 || *liwork == -1; *info = 0; liwmin = 1; lwmin = 1; if (*n > 1 && wantz) { /* Computing 2nd power */ i__1 = *n; lwmin = (*n << 2) + 1 + i__1 * i__1; liwmin = *n * 5 + 3; } if (! (wantz || lsame_(jobz, "N"))) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*ldz < 1 || wantz && *ldz < *n) { *info = -6; } else if (*lwork < lwmin && ! lquery) { *info = -8; } else if (*liwork < liwmin && ! lquery) { *info = -10; } if (*info == 0) { work[1] = (real) lwmin; iwork[1] = liwmin; } if (*info != 0) { i__1 = -(*info); xerbla_("SSTEVD", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { if (wantz) { z___ref(1, 1) = 1.f; } return 0; } /* Get machine constants. */ safmin = slamch_("Safe minimum"); eps = slamch_("Precision"); smlnum = safmin / eps; bignum = 1.f / smlnum; rmin = sqrt(smlnum); rmax = sqrt(bignum); /* Scale matrix to allowable range, if necessary. */ iscale = 0; tnrm = slanst_("M", n, &d__[1], &e[1]); if (tnrm > 0.f && tnrm < rmin) { iscale = 1; sigma = rmin / tnrm; } else if (tnrm > rmax) { iscale = 1; sigma = rmax / tnrm; } if (iscale == 1) { sscal_(n, &sigma, &d__[1], &c__1); i__1 = *n - 1; sscal_(&i__1, &sigma, &e[1], &c__1); } /* For eigenvalues only, call SSTERF. For eigenvalues and eigenvectors, call SSTEDC. */ if (! wantz) { ssterf_(n, &d__[1], &e[1], info); } else { sstedc_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], lwork, &iwork[1], liwork, info); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ if (iscale == 1) { r__1 = 1.f / sigma; sscal_(n, &r__1, &d__[1], &c__1); } work[1] = (real) lwmin; iwork[1] = liwmin; return 0; /* End of SSTEVD */ } /* sstevd_ */
/* Subroutine */ int clalsa_(integer *icompq, integer *smlsiz, integer *n, integer *nrhs, complex *b, integer *ldb, complex *bx, integer *ldbx, real *u, integer *ldu, real *vt, integer *k, real *difl, real *difr, real *z__, real *poles, integer *givptr, integer *givcol, integer * ldgcol, integer *perm, real *givnum, real *c__, real *s, real *rwork, integer *iwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= CLALSA is an itermediate step in solving the least squares problem by computing the SVD of the coefficient matrix in compact form (The singular vectors are computed as products of simple orthorgonal matrices.). If ICOMPQ = 0, CLALSA applies the inverse of the left singular vector matrix of an upper bidiagonal matrix to the right hand side; and if ICOMPQ = 1, CLALSA applies the right singular vector matrix to the right hand side. The singular vector matrices were generated in compact form by CLALSA. Arguments ========= ICOMPQ (input) INTEGER Specifies whether the left or the right singular vector matrix is involved. = 0: Left singular vector matrix = 1: Right singular vector matrix SMLSIZ (input) INTEGER The maximum size of the subproblems at the bottom of the computation tree. N (input) INTEGER The row and column dimensions of the upper bidiagonal matrix. NRHS (input) INTEGER The number of columns of B and BX. NRHS must be at least 1. B (input) COMPLEX array, dimension ( LDB, NRHS ) On input, B contains the right hand sides of the least squares problem in rows 1 through M. On output, B contains the solution X in rows 1 through N. LDB (input) INTEGER The leading dimension of B in the calling subprogram. LDB must be at least max(1,MAX( M, N ) ). BX (output) COMPLEX array, dimension ( LDBX, NRHS ) On exit, the result of applying the left or right singular vector matrix to B. LDBX (input) INTEGER The leading dimension of BX. U (input) REAL array, dimension ( LDU, SMLSIZ ). On entry, U contains the left singular vector matrices of all subproblems at the bottom level. LDU (input) INTEGER, LDU = > N. The leading dimension of arrays U, VT, DIFL, DIFR, POLES, GIVNUM, and Z. VT (input) REAL array, dimension ( LDU, SMLSIZ+1 ). On entry, VT' contains the right singular vector matrices of all subproblems at the bottom level. K (input) INTEGER array, dimension ( N ). DIFL (input) REAL array, dimension ( LDU, NLVL ). where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. DIFR (input) REAL array, dimension ( LDU, 2 * NLVL ). On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record distances between singular values on the I-th level and singular values on the (I -1)-th level, and DIFR(*, 2 * I) record the normalizing factors of the right singular vectors matrices of subproblems on I-th level. Z (input) REAL array, dimension ( LDU, NLVL ). On entry, Z(1, I) contains the components of the deflation- adjusted updating row vector for subproblems on the I-th level. POLES (input) REAL array, dimension ( LDU, 2 * NLVL ). On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old singular values involved in the secular equations on the I-th level. GIVPTR (input) INTEGER array, dimension ( N ). On entry, GIVPTR( I ) records the number of Givens rotations performed on the I-th problem on the computation tree. GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the locations of Givens rotations performed on the I-th level on the computation tree. LDGCOL (input) INTEGER, LDGCOL = > N. The leading dimension of arrays GIVCOL and PERM. PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). On entry, PERM(*, I) records permutations done on the I-th level of the computation tree. GIVNUM (input) REAL array, dimension ( LDU, 2 * NLVL ). On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- values of Givens rotations performed on the I-th level on the computation tree. C (input) REAL array, dimension ( N ). On entry, if the I-th subproblem is not square, C( I ) contains the C-value of a Givens rotation related to the right null space of the I-th subproblem. S (input) REAL array, dimension ( N ). On entry, if the I-th subproblem is not square, S( I ) contains the S-value of a Givens rotation related to the right null space of the I-th subproblem. RWORK (workspace) REAL array, dimension at least max ( N, (SMLSZ+1)*NRHS*3 ). IWORK (workspace) INTEGER array. The dimension must be at least 3 * N INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. Further Details =============== Based on contributions by Ming Gu and Ren-Cang Li, Computer Science Division, University of California at Berkeley, USA Osni Marques, LBNL/NERSC, USA ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static real c_b9 = 1.f; static real c_b10 = 0.f; static integer c__2 = 2; /* System generated locals */ integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, b_dim1, b_offset, bx_dim1, bx_offset, i__1, i__2, i__3, i__4, i__5, i__6; complex q__1; /* Builtin functions */ double r_imag(complex *); integer pow_ii(integer *, integer *); /* Local variables */ static integer jcol, nlvl, sqre, jrow, i__, j, jimag, jreal, inode, ndiml; extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static integer ndimr; extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, complex *, integer *); static integer i1; extern /* Subroutine */ int clals0_(integer *, integer *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, integer *, integer *, real *, integer *, real *, real *, real *, real *, integer *, real *, real *, real *, integer *); static integer ic, lf, nd, ll, nl, nr; extern /* Subroutine */ int xerbla_(char *, integer *), slasdt_( integer *, integer *, integer *, integer *, integer *, integer *, integer *); static integer im1, nlf, nrf, lvl, ndb1, nlp1, lvl2, nrp1; #define difl_ref(a_1,a_2) difl[(a_2)*difl_dim1 + a_1] #define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1] #define perm_ref(a_1,a_2) perm[(a_2)*perm_dim1 + a_1] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] #define poles_ref(a_1,a_2) poles[(a_2)*poles_dim1 + a_1] #define bx_subscr(a_1,a_2) (a_2)*bx_dim1 + a_1 #define bx_ref(a_1,a_2) bx[bx_subscr(a_1,a_2)] #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1] #define givcol_ref(a_1,a_2) givcol[(a_2)*givcol_dim1 + a_1] #define givnum_ref(a_1,a_2) givnum[(a_2)*givnum_dim1 + a_1] b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; bx_dim1 = *ldbx; bx_offset = 1 + bx_dim1 * 1; bx -= bx_offset; givnum_dim1 = *ldu; givnum_offset = 1 + givnum_dim1 * 1; givnum -= givnum_offset; poles_dim1 = *ldu; poles_offset = 1 + poles_dim1 * 1; poles -= poles_offset; z_dim1 = *ldu; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; difr_dim1 = *ldu; difr_offset = 1 + difr_dim1 * 1; difr -= difr_offset; difl_dim1 = *ldu; difl_offset = 1 + difl_dim1 * 1; difl -= difl_offset; vt_dim1 = *ldu; vt_offset = 1 + vt_dim1 * 1; vt -= vt_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; --k; --givptr; perm_dim1 = *ldgcol; perm_offset = 1 + perm_dim1 * 1; perm -= perm_offset; givcol_dim1 = *ldgcol; givcol_offset = 1 + givcol_dim1 * 1; givcol -= givcol_offset; --c__; --s; --rwork; --iwork; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*smlsiz < 3) { *info = -2; } else if (*n < *smlsiz) { *info = -3; } else if (*nrhs < 1) { *info = -4; } else if (*ldb < *n) { *info = -6; } else if (*ldbx < *n) { *info = -8; } else if (*ldu < *n) { *info = -10; } else if (*ldgcol < *n) { *info = -19; } if (*info != 0) { i__1 = -(*info); xerbla_("CLALSA", &i__1); return 0; } /* Book-keeping and setting up the computation tree. */ inode = 1; ndiml = inode + *n; ndimr = ndiml + *n; slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], smlsiz); /* The following code applies back the left singular vector factors. For applying back the right singular vector factors, go to 170. */ if (*icompq == 1) { goto L170; } /* The nodes on the bottom level of the tree were solved by SLASDQ. The corresponding left and right singular vector matrices are in explicit form. First apply back the left singular vector matrices. */ ndb1 = (nd + 1) / 2; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { /* IC : center row of each node NL : number of rows of left subproblem NR : number of rows of right subproblem NLF: starting row of the left subproblem NRF: starting row of the right subproblem */ i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nr = iwork[ndimr + i1]; nlf = ic - nl; nrf = ic + 1; /* Since B and BX are complex, the following call to SGEMM is performed in two steps (real and imaginary parts). CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) */ j = nl * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nl - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++j; i__4 = b_subscr(jrow, jcol); rwork[j] = b[i__4].r; /* L10: */ } /* L20: */ } sgemm_("T", "N", &nl, nrhs, &nl, &c_b9, &u_ref(nlf, 1), ldu, &rwork[( nl * *nrhs << 1) + 1], &nl, &c_b10, &rwork[1], &nl); j = nl * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nl - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++j; rwork[j] = r_imag(&b_ref(jrow, jcol)); /* L30: */ } /* L40: */ } sgemm_("T", "N", &nl, nrhs, &nl, &c_b9, &u_ref(nlf, 1), ldu, &rwork[( nl * *nrhs << 1) + 1], &nl, &c_b10, &rwork[nl * *nrhs + 1], & nl); jreal = 0; jimag = nl * *nrhs; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nl - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++jreal; ++jimag; i__4 = bx_subscr(jrow, jcol); i__5 = jreal; i__6 = jimag; q__1.r = rwork[i__5], q__1.i = rwork[i__6]; bx[i__4].r = q__1.r, bx[i__4].i = q__1.i; /* L50: */ } /* L60: */ } /* Since B and BX are complex, the following call to SGEMM is performed in two steps (real and imaginary parts). CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) */ j = nr * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nr - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++j; i__4 = b_subscr(jrow, jcol); rwork[j] = b[i__4].r; /* L70: */ } /* L80: */ } sgemm_("T", "N", &nr, nrhs, &nr, &c_b9, &u_ref(nrf, 1), ldu, &rwork[( nr * *nrhs << 1) + 1], &nr, &c_b10, &rwork[1], &nr); j = nr * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nr - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++j; rwork[j] = r_imag(&b_ref(jrow, jcol)); /* L90: */ } /* L100: */ } sgemm_("T", "N", &nr, nrhs, &nr, &c_b9, &u_ref(nrf, 1), ldu, &rwork[( nr * *nrhs << 1) + 1], &nr, &c_b10, &rwork[nr * *nrhs + 1], & nr); jreal = 0; jimag = nr * *nrhs; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nr - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++jreal; ++jimag; i__4 = bx_subscr(jrow, jcol); i__5 = jreal; i__6 = jimag; q__1.r = rwork[i__5], q__1.i = rwork[i__6]; bx[i__4].r = q__1.r, bx[i__4].i = q__1.i; /* L110: */ } /* L120: */ } /* L130: */ } /* Next copy the rows of B that correspond to unchanged rows in the bidiagonal matrix to BX. */ i__1 = nd; for (i__ = 1; i__ <= i__1; ++i__) { ic = iwork[inode + i__ - 1]; ccopy_(nrhs, &b_ref(ic, 1), ldb, &bx_ref(ic, 1), ldbx); /* L140: */ } /* Finally go through the left singular vector matrices of all the other subproblems bottom-up on the tree. */ j = pow_ii(&c__2, &nlvl); sqre = 0; for (lvl = nlvl; lvl >= 1; --lvl) { lvl2 = (lvl << 1) - 1; /* find the first node LF and last node LL on the current level LVL */ if (lvl == 1) { lf = 1; ll = 1; } else { i__1 = lvl - 1; lf = pow_ii(&c__2, &i__1); ll = (lf << 1) - 1; } i__1 = ll; for (i__ = lf; i__ <= i__1; ++i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; nrf = ic + 1; --j; clals0_(icompq, &nl, &nr, &sqre, nrhs, &bx_ref(nlf, 1), ldbx, & b_ref(nlf, 1), ldb, &perm_ref(nlf, lvl), &givptr[j], & givcol_ref(nlf, lvl2), ldgcol, &givnum_ref(nlf, lvl2), ldu, &poles_ref(nlf, lvl2), &difl_ref(nlf, lvl), & difr_ref(nlf, lvl2), &z___ref(nlf, lvl), &k[j], &c__[j], & s[j], &rwork[1], info); /* L150: */ } /* L160: */ } goto L330; /* ICOMPQ = 1: applying back the right singular vector factors. */ L170: /* First now go through the right singular vector matrices of all the tree nodes top-down. */ j = 0; i__1 = nlvl; for (lvl = 1; lvl <= i__1; ++lvl) { lvl2 = (lvl << 1) - 1; /* Find the first node LF and last node LL on the current level LVL. */ if (lvl == 1) { lf = 1; ll = 1; } else { i__2 = lvl - 1; lf = pow_ii(&c__2, &i__2); ll = (lf << 1) - 1; } i__2 = lf; for (i__ = ll; i__ >= i__2; --i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; nrf = ic + 1; if (i__ == ll) { sqre = 0; } else { sqre = 1; } ++j; clals0_(icompq, &nl, &nr, &sqre, nrhs, &b_ref(nlf, 1), ldb, & bx_ref(nlf, 1), ldbx, &perm_ref(nlf, lvl), &givptr[j], & givcol_ref(nlf, lvl2), ldgcol, &givnum_ref(nlf, lvl2), ldu, &poles_ref(nlf, lvl2), &difl_ref(nlf, lvl), & difr_ref(nlf, lvl2), &z___ref(nlf, lvl), &k[j], &c__[j], & s[j], &rwork[1], info); /* L180: */ } /* L190: */ } /* The nodes on the bottom level of the tree were solved by SLASDQ. The corresponding right singular vector matrices are in explicit form. Apply them back. */ ndb1 = (nd + 1) / 2; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nr = iwork[ndimr + i1]; nlp1 = nl + 1; if (i__ == nd) { nrp1 = nr; } else { nrp1 = nr + 1; } nlf = ic - nl; nrf = ic + 1; /* Since B and BX are complex, the following call to SGEMM is performed in two steps (real and imaginary parts). CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) */ j = nlp1 * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nlp1 - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++j; i__4 = b_subscr(jrow, jcol); rwork[j] = b[i__4].r; /* L200: */ } /* L210: */ } sgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b9, &vt_ref(nlf, 1), ldu, & rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b10, &rwork[1], & nlp1); j = nlp1 * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nlp1 - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++j; rwork[j] = r_imag(&b_ref(jrow, jcol)); /* L220: */ } /* L230: */ } sgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b9, &vt_ref(nlf, 1), ldu, & rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b10, &rwork[nlp1 * * nrhs + 1], &nlp1); jreal = 0; jimag = nlp1 * *nrhs; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nlp1 - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++jreal; ++jimag; i__4 = bx_subscr(jrow, jcol); i__5 = jreal; i__6 = jimag; q__1.r = rwork[i__5], q__1.i = rwork[i__6]; bx[i__4].r = q__1.r, bx[i__4].i = q__1.i; /* L240: */ } /* L250: */ } /* Since B and BX are complex, the following call to SGEMM is performed in two steps (real and imaginary parts). CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) */ j = nrp1 * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nrp1 - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++j; i__4 = b_subscr(jrow, jcol); rwork[j] = b[i__4].r; /* L260: */ } /* L270: */ } sgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b9, &vt_ref(nrf, 1), ldu, & rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b10, &rwork[1], & nrp1); j = nrp1 * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nrp1 - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++j; rwork[j] = r_imag(&b_ref(jrow, jcol)); /* L280: */ } /* L290: */ } sgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b9, &vt_ref(nrf, 1), ldu, & rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b10, &rwork[nrp1 * * nrhs + 1], &nrp1); jreal = 0; jimag = nrp1 * *nrhs; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nrp1 - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++jreal; ++jimag; i__4 = bx_subscr(jrow, jcol); i__5 = jreal; i__6 = jimag; q__1.r = rwork[i__5], q__1.i = rwork[i__6]; bx[i__4].r = q__1.r, bx[i__4].i = q__1.i; /* L300: */ } /* L310: */ } /* L320: */ } L330: return 0; /* End of CLALSA */ } /* clalsa_ */
/* Subroutine */ int cdrgev_(integer *nsizes, integer *nn, integer *ntypes, logical *dotype, integer *iseed, real *thresh, integer *nounit, complex *a, integer *lda, complex *b, complex *s, complex *t, complex *q, integer *ldq, complex *z__, complex *qe, integer *ldqe, complex * alpha, complex *beta, complex *alpha1, complex *beta1, complex *work, integer *lwork, real *rwork, real *result, integer *info) { /* Initialized data */ static integer kclass[26] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2, 2,2,2,3 }; static integer kbmagn[26] = { 1,1,1,1,1,1,1,1,3,2,3,2,2,3,1,1,1,1,1,1,1,3, 2,3,2,1 }; static integer ktrian[26] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1, 1,1,1,1 }; static logical lasign[26] = { FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_, TRUE_,FALSE_,TRUE_,TRUE_,FALSE_,FALSE_,TRUE_,TRUE_,TRUE_,FALSE_, TRUE_,FALSE_,FALSE_,FALSE_,TRUE_,TRUE_,TRUE_,TRUE_,TRUE_,FALSE_ }; static logical lbsign[26] = { FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_, FALSE_,TRUE_,FALSE_,FALSE_,TRUE_,TRUE_,FALSE_,FALSE_,TRUE_,FALSE_, TRUE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_, FALSE_ }; static integer kz1[6] = { 0,1,2,1,3,3 }; static integer kz2[6] = { 0,0,1,2,1,1 }; static integer kadd[6] = { 0,0,0,0,3,2 }; static integer katype[26] = { 0,1,0,1,2,3,4,1,4,4,1,1,4,4,4,2,4,5,8,7,9,4, 4,4,4,0 }; static integer kbtype[26] = { 0,0,1,1,2,-3,1,4,1,1,4,4,1,1,-4,2,-4,8,8,8, 8,8,8,8,8,0 }; static integer kazero[26] = { 1,1,1,1,1,1,2,1,2,2,1,1,2,2,3,1,3,5,5,5,5,3, 3,3,3,1 }; static integer kbzero[26] = { 1,1,1,1,1,1,1,2,1,1,2,2,1,1,4,1,4,6,6,6,6,4, 4,4,4,1 }; static integer kamagn[26] = { 1,1,1,1,1,1,1,1,2,3,2,3,2,3,1,1,1,1,1,1,1,2, 3,3,2,1 }; /* Format strings */ static char fmt_9999[] = "(\002 CDRGEV: \002,a,\002 returned INFO=\002,i" "6,\002.\002,/3x,\002N=\002,i6,\002, JTYPE=\002,i6,\002, ISEED=" "(\002,3(i5,\002,\002),i5,\002)\002)"; static char fmt_9998[] = "(\002 CDRGEV: \002,a,\002 Eigenvectors from" " \002,a,\002 incorrectly \002,\002normalized.\002,/\002 Bits of " "error=\002,0p,g10.3,\002,\002,3x,\002N=\002,i4,\002, JTYPE=\002," "i3,\002, ISEED=(\002,3(i4,\002,\002),i5,\002)\002)"; static char fmt_9997[] = "(/1x,a3,\002 -- Complex Generalized eigenvalue" " problem \002,\002driver\002)"; static char fmt_9996[] = "(\002 Matrix types (see CDRGEV for details):" " \002)"; static char fmt_9995[] = "(\002 Special Matrices:\002,23x,\002(J'=transp" "osed Jordan block)\002,/\002 1=(0,0) 2=(I,0) 3=(0,I) 4=(I,I" ") 5=(J',J') \002,\0026=(diag(J',I), diag(I,J'))\002,/\002 Diag" "onal Matrices: ( \002,\002D=diag(0,1,2,...) )\002,/\002 7=(D," "I) 9=(large*D, small*I\002,\002) 11=(large*I, small*D) 13=(l" "arge*D, large*I)\002,/\002 8=(I,D) 10=(small*D, large*I) 12=" "(small*I, large*D) \002,\002 14=(small*D, small*I)\002,/\002 15" "=(D, reversed D)\002)"; static char fmt_9994[] = "(\002 Matrices Rotated by Random \002,a,\002 M" "atrices U, V:\002,/\002 16=Transposed Jordan Blocks " " 19=geometric \002,\002alpha, beta=0,1\002,/\002 17=arithm. alp" "ha&beta \002,\002 20=arithmetic alpha, beta=0," "1\002,/\002 18=clustered \002,\002alpha, beta=0,1 21" "=random alpha, beta=0,1\002,/\002 Large & Small Matrices:\002," "/\002 22=(large, small) \002,\00223=(small,large) 24=(smal" "l,small) 25=(large,large)\002,/\002 26=random O(1) matrices" ".\002)"; static char fmt_9993[] = "(/\002 Tests performed: \002,/\002 1 = max " "| ( b A - a B )'*l | / const.,\002,/\002 2 = | |VR(i)| - 1 | / u" "lp,\002,/\002 3 = max | ( b A - a B )*r | / const.\002,/\002 4 =" " | |VL(i)| - 1 | / ulp,\002,/\002 5 = 0 if W same no matter if r" " or l computed,\002,/\002 6 = 0 if l same no matter if l compute" "d,\002,/\002 7 = 0 if r same no matter if r computed,\002,/1x)"; static char fmt_9992[] = "(\002 Matrix order=\002,i5,\002, type=\002,i2" ",\002, seed=\002,4(i4,\002,\002),\002 result \002,i2,\002 is\002" ",0p,f8.2)"; static char fmt_9991[] = "(\002 Matrix order=\002,i5,\002, type=\002,i2" ",\002, seed=\002,4(i4,\002,\002),\002 result \002,i2,\002 is\002" ",1p,e10.3)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, qe_dim1, qe_offset, s_dim1, s_offset, t_dim1, t_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; real r__1, r__2; complex q__1, q__2, q__3; /* Builtin functions */ double r_sign(real *, real *), c_abs(complex *); void r_cnjg(complex *, complex *); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ static integer iadd, ierr, nmax, i__, j, n; static logical badnn; extern /* Subroutine */ int cget52_(logical *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, complex *, complex *, complex *, real *, real *), cggev_(char *, char *, integer *, complex *, integer *, complex *, integer *, complex *, complex *, complex *, integer *, complex *, integer *, complex *, integer *, real *, integer *); static real rmagn[4]; static complex ctemp; static integer nmats, jsize, nerrs, jtype, n1; extern /* Subroutine */ int clatm4_(integer *, integer *, integer *, integer *, logical *, real *, real *, real *, integer *, integer * , complex *, integer *), cunm2r_(char *, char *, integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, integer *); static integer jc, nb, in; extern /* Subroutine */ int slabad_(real *, real *); static integer jr; extern /* Subroutine */ int clarfg_(integer *, complex *, complex *, integer *, complex *); extern /* Complex */ VOID clarnd_(complex *, integer *, integer *); extern doublereal slamch_(char *); extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex *, integer *, complex *, integer *), claset_(char *, integer *, integer *, complex *, complex *, complex *, integer *); static real safmin, safmax; static integer ioldsd[4]; extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern /* Subroutine */ int alasvm_(char *, integer *, integer *, integer *, integer *), xerbla_(char *, integer *); static integer minwrk, maxwrk; static real ulpinv; static integer mtypes, ntestt; static real ulp; /* Fortran I/O blocks */ static cilist io___40 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___42 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___43 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___44 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___45 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___46 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___47 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___48 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___50 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___51 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___52 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___53 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___54 = { 0, 0, 0, fmt_9991, 0 }; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define q_subscr(a_1,a_2) (a_2)*q_dim1 + a_1 #define q_ref(a_1,a_2) q[q_subscr(a_1,a_2)] #define z___subscr(a_1,a_2) (a_2)*z_dim1 + a_1 #define z___ref(a_1,a_2) z__[z___subscr(a_1,a_2)] #define qe_subscr(a_1,a_2) (a_2)*qe_dim1 + a_1 #define qe_ref(a_1,a_2) qe[qe_subscr(a_1,a_2)] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= CDRGEV checks the nonsymmetric generalized eigenvalue problem driver routine CGGEV. CGGEV computes for a pair of n-by-n nonsymmetric matrices (A,B) the generalized eigenvalues and, optionally, the left and right eigenvectors. A generalized eigenvalue for a pair of matrices (A,B) is a scalar w or a ratio alpha/beta = w, such that A - w*B is singular. It is usually represented as the pair (alpha,beta), as there is reasonalbe interpretation for beta=0, and even for both being zero. A right generalized eigenvector corresponding to a generalized eigenvalue w for a pair of matrices (A,B) is a vector r such that (A - wB) * r = 0. A left generalized eigenvector is a vector l such that l**H * (A - wB) = 0, where l**H is the conjugate-transpose of l. When CDRGEV is called, a number of matrix "sizes" ("n's") and a number of matrix "types" are specified. For each size ("n") and each type of matrix, a pair of matrices (A, B) will be generated and used for testing. For each matrix pair, the following tests will be performed and compared with the threshhold THRESH. Results from CGGEV: (1) max over all left eigenvalue/-vector pairs (alpha/beta,l) of | VL**H * (beta A - alpha B) |/( ulp max(|beta A|, |alpha B|) ) where VL**H is the conjugate-transpose of VL. (2) | |VL(i)| - 1 | / ulp and whether largest component real VL(i) denotes the i-th column of VL. (3) max over all left eigenvalue/-vector pairs (alpha/beta,r) of | (beta A - alpha B) * VR | / ( ulp max(|beta A|, |alpha B|) ) (4) | |VR(i)| - 1 | / ulp and whether largest component real VR(i) denotes the i-th column of VR. (5) W(full) = W(partial) W(full) denotes the eigenvalues computed when both l and r are also computed, and W(partial) denotes the eigenvalues computed when only W, only W and r, or only W and l are computed. (6) VL(full) = VL(partial) VL(full) denotes the left eigenvectors computed when both l and r are computed, and VL(partial) denotes the result when only l is computed. (7) VR(full) = VR(partial) VR(full) denotes the right eigenvectors computed when both l and r are also computed, and VR(partial) denotes the result when only l is computed. Test Matrices ---- -------- The sizes of the test matrices are specified by an array NN(1:NSIZES); the value of each element NN(j) specifies one size. The "types" are specified by a logical array DOTYPE( 1:NTYPES ); if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. Currently, the list of possible types is: (1) ( 0, 0 ) (a pair of zero matrices) (2) ( I, 0 ) (an identity and a zero matrix) (3) ( 0, I ) (an identity and a zero matrix) (4) ( I, I ) (a pair of identity matrices) t t (5) ( J , J ) (a pair of transposed Jordan blocks) t ( I 0 ) (6) ( X, Y ) where X = ( J 0 ) and Y = ( t ) ( 0 I ) ( 0 J ) and I is a k x k identity and J a (k+1)x(k+1) Jordan block; k=(N-1)/2 (7) ( D, I ) where D is diag( 0, 1,..., N-1 ) (a diagonal matrix with those diagonal entries.) (8) ( I, D ) (9) ( big*D, small*I ) where "big" is near overflow and small=1/big (10) ( small*D, big*I ) (11) ( big*I, small*D ) (12) ( small*I, big*D ) (13) ( big*D, big*I ) (14) ( small*D, small*I ) (15) ( D1, D2 ) where D1 is diag( 0, 0, 1, ..., N-3, 0 ) and D2 is diag( 0, N-3, N-4,..., 1, 0, 0 ) t t (16) Q ( J , J ) Z where Q and Z are random orthogonal matrices. (17) Q ( T1, T2 ) Z where T1 and T2 are upper triangular matrices with random O(1) entries above the diagonal and diagonal entries diag(T1) = ( 0, 0, 1, ..., N-3, 0 ) and diag(T2) = ( 0, N-3, N-4,..., 1, 0, 0 ) (18) Q ( T1, T2 ) Z diag(T1) = ( 0, 0, 1, 1, s, ..., s, 0 ) diag(T2) = ( 0, 1, 0, 1,..., 1, 0 ) s = machine precision. (19) Q ( T1, T2 ) Z diag(T1)=( 0,0,1,1, 1-d, ..., 1-(N-5)*d=s, 0 ) diag(T2) = ( 0, 1, 0, 1, ..., 1, 0 ) N-5 (20) Q ( T1, T2 ) Z diag(T1)=( 0, 0, 1, 1, a, ..., a =s, 0 ) diag(T2) = ( 0, 1, 0, 1, ..., 1, 0, 0 ) (21) Q ( T1, T2 ) Z diag(T1)=( 0, 0, 1, r1, r2, ..., r(N-4), 0 ) diag(T2) = ( 0, 1, 0, 1, ..., 1, 0, 0 ) where r1,..., r(N-4) are random. (22) Q ( big*T1, small*T2 ) Z diag(T1) = ( 0, 0, 1, ..., N-3, 0 ) diag(T2) = ( 0, 1, ..., 1, 0, 0 ) (23) Q ( small*T1, big*T2 ) Z diag(T1) = ( 0, 0, 1, ..., N-3, 0 ) diag(T2) = ( 0, 1, ..., 1, 0, 0 ) (24) Q ( small*T1, small*T2 ) Z diag(T1) = ( 0, 0, 1, ..., N-3, 0 ) diag(T2) = ( 0, 1, ..., 1, 0, 0 ) (25) Q ( big*T1, big*T2 ) Z diag(T1) = ( 0, 0, 1, ..., N-3, 0 ) diag(T2) = ( 0, 1, ..., 1, 0, 0 ) (26) Q ( T1, T2 ) Z where T1 and T2 are random upper-triangular matrices. Arguments ========= NSIZES (input) INTEGER The number of sizes of matrices to use. If it is zero, CDRGES does nothing. NSIZES >= 0. NN (input) INTEGER array, dimension (NSIZES) An array containing the sizes to be used for the matrices. Zero values will be skipped. NN >= 0. NTYPES (input) INTEGER The number of elements in DOTYPE. If it is zero, CDRGEV does nothing. It must be at least zero. If it is MAXTYP+1 and NSIZES is 1, then an additional type, MAXTYP+1 is defined, which is to use whatever matrix is in A. This is only useful if DOTYPE(1:MAXTYP) is .FALSE. and DOTYPE(MAXTYP+1) is .TRUE. . DOTYPE (input) LOGICAL array, dimension (NTYPES) If DOTYPE(j) is .TRUE., then for each size in NN a matrix of that size and of type j will be generated. If NTYPES is smaller than the maximum number of types defined (PARAMETER MAXTYP), then types NTYPES+1 through MAXTYP will not be generated. If NTYPES is larger than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) will be ignored. ISEED (input/output) INTEGER array, dimension (4) On entry ISEED specifies the seed of the random number generator. The array elements should be between 0 and 4095; if not they will be reduced mod 4096. Also, ISEED(4) must be odd. The random number generator uses a linear congruential sequence limited to small integers, and so should produce machine independent random numbers. The values of ISEED are changed on exit, and can be used in the next call to CDRGES to continue the same random number sequence. THRESH (input) REAL A test will count as "failed" if the "error", computed as described above, exceeds THRESH. Note that the error is scaled to be O(1), so THRESH should be a reasonably small multiple of 1, e.g., 10 or 100. In particular, it should not depend on the precision (single vs. double) or the size of the matrix. It must be at least zero. NOUNIT (input) INTEGER The FORTRAN unit number for printing out error messages (e.g., if a routine returns IERR not equal to 0.) A (input/workspace) COMPLEX array, dimension(LDA, max(NN)) Used to hold the original A matrix. Used as input only if NTYPES=MAXTYP+1, DOTYPE(1:MAXTYP)=.FALSE., and DOTYPE(MAXTYP+1)=.TRUE. LDA (input) INTEGER The leading dimension of A, B, S, and T. It must be at least 1 and at least max( NN ). B (input/workspace) COMPLEX array, dimension(LDA, max(NN)) Used to hold the original B matrix. Used as input only if NTYPES=MAXTYP+1, DOTYPE(1:MAXTYP)=.FALSE., and DOTYPE(MAXTYP+1)=.TRUE. S (workspace) COMPLEX array, dimension (LDA, max(NN)) The Schur form matrix computed from A by CGGEV. On exit, S contains the Schur form matrix corresponding to the matrix in A. T (workspace) COMPLEX array, dimension (LDA, max(NN)) The upper triangular matrix computed from B by CGGEV. Q (workspace) COMPLEX array, dimension (LDQ, max(NN)) The (left) eigenvectors matrix computed by CGGEV. LDQ (input) INTEGER The leading dimension of Q and Z. It must be at least 1 and at least max( NN ). Z (workspace) COMPLEX array, dimension( LDQ, max(NN) ) The (right) orthogonal matrix computed by CGGEV. QE (workspace) COMPLEX array, dimension( LDQ, max(NN) ) QE holds the computed right or left eigenvectors. LDQE (input) INTEGER The leading dimension of QE. LDQE >= max(1,max(NN)). ALPHA (workspace) COMPLEX array, dimension (max(NN)) BETA (workspace) COMPLEX array, dimension (max(NN)) The generalized eigenvalues of (A,B) computed by CGGEV. ( ALPHAR(k)+ALPHAI(k)*i ) / BETA(k) is the k-th generalized eigenvalue of A and B. ALPHA1 (workspace) COMPLEX array, dimension (max(NN)) BETA1 (workspace) COMPLEX array, dimension (max(NN)) Like ALPHAR, ALPHAI, BETA, these arrays contain the eigenvalues of A and B, but those computed when CGGEV only computes a partial eigendecomposition, i.e. not the eigenvalues and left and right eigenvectors. WORK (workspace) COMPLEX array, dimension (LWORK) LWORK (input) INTEGER The number of entries in WORK. LWORK >= N*(N+1) RWORK (workspace) REAL array, dimension (8*N) Real workspace. RESULT (output) REAL array, dimension (2) The values computed by the tests described above. The values are currently limited to 1/ulp, to avoid overflow. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. > 0: A routine returned an error code. INFO is the absolute value of the INFO value returned. ===================================================================== Parameter adjustments */ --nn; --dotype; --iseed; t_dim1 = *lda; t_offset = 1 + t_dim1 * 1; t -= t_offset; s_dim1 = *lda; s_offset = 1 + s_dim1 * 1; s -= s_offset; b_dim1 = *lda; b_offset = 1 + b_dim1 * 1; b -= b_offset; a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; z_dim1 = *ldq; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; qe_dim1 = *ldqe; qe_offset = 1 + qe_dim1 * 1; qe -= qe_offset; --alpha; --beta; --alpha1; --beta1; --work; --rwork; --result; /* Function Body Check for errors */ *info = 0; badnn = FALSE_; nmax = 1; i__1 = *nsizes; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = nmax, i__3 = nn[j]; nmax = max(i__2,i__3); if (nn[j] < 0) { badnn = TRUE_; } /* L10: */ } if (*nsizes < 0) { *info = -1; } else if (badnn) { *info = -2; } else if (*ntypes < 0) { *info = -3; } else if (*thresh < 0.f) { *info = -6; } else if (*lda <= 1 || *lda < nmax) { *info = -9; } else if (*ldq <= 1 || *ldq < nmax) { *info = -14; } else if (*ldqe <= 1 || *ldqe < nmax) { *info = -17; } /* Compute workspace (Note: Comments in the code beginning "Workspace:" describe the minimal amount of workspace needed at that point in the code, as well as the preferred amount for good performance. NB refers to the optimal block size for the immediately following subroutine, as returned by ILAENV. */ minwrk = 1; if (*info == 0 && *lwork >= 1) { minwrk = nmax * (nmax + 1); /* Computing MAX */ i__1 = 1, i__2 = ilaenv_(&c__1, "CGEQRF", " ", &nmax, &nmax, &c_n1, & c_n1, (ftnlen)6, (ftnlen)1), i__1 = max(i__1,i__2), i__2 = ilaenv_(&c__1, "CUNMQR", "LC", &nmax, &nmax, &nmax, &c_n1, ( ftnlen)6, (ftnlen)2), i__1 = max(i__1,i__2), i__2 = ilaenv_(& c__1, "CUNGQR", " ", &nmax, &nmax, &nmax, &c_n1, (ftnlen)6, ( ftnlen)1); nb = max(i__1,i__2); /* Computing MAX */ i__1 = nmax << 1, i__2 = nmax * (nb + 1), i__1 = max(i__1,i__2), i__2 = nmax * (nmax + 1); maxwrk = max(i__1,i__2); work[1].r = (real) maxwrk, work[1].i = 0.f; } if (*lwork < minwrk) { *info = -23; } if (*info != 0) { i__1 = -(*info); xerbla_("CDRGEV", &i__1); return 0; } /* Quick return if possible */ if (*nsizes == 0 || *ntypes == 0) { return 0; } ulp = slamch_("Precision"); safmin = slamch_("Safe minimum"); safmin /= ulp; safmax = 1.f / safmin; slabad_(&safmin, &safmax); ulpinv = 1.f / ulp; /* The values RMAGN(2:3) depend on N, see below. */ rmagn[0] = 0.f; rmagn[1] = 1.f; /* Loop over sizes, types */ ntestt = 0; nerrs = 0; nmats = 0; i__1 = *nsizes; for (jsize = 1; jsize <= i__1; ++jsize) { n = nn[jsize]; n1 = max(1,n); rmagn[2] = safmax * ulp / (real) n1; rmagn[3] = safmin * ulpinv * n1; if (*nsizes != 1) { mtypes = min(26,*ntypes); } else { mtypes = min(27,*ntypes); } i__2 = mtypes; for (jtype = 1; jtype <= i__2; ++jtype) { if (! dotype[jtype]) { goto L210; } ++nmats; /* Save ISEED in case of an error. */ for (j = 1; j <= 4; ++j) { ioldsd[j - 1] = iseed[j]; /* L20: */ } /* Generate test matrices A and B Description of control parameters: KCLASS: =1 means w/o rotation, =2 means w/ rotation, =3 means random. KATYPE: the "type" to be passed to CLATM4 for computing A. KAZERO: the pattern of zeros on the diagonal for A: =1: ( xxx ), =2: (0, xxx ) =3: ( 0, 0, xxx, 0 ), =4: ( 0, xxx, 0, 0 ), =5: ( 0, 0, 1, xxx, 0 ), =6: ( 0, 1, 0, xxx, 0 ). (xxx means a string of non-zero entries.) KAMAGN: the magnitude of the matrix: =0: zero, =1: O(1), =2: large, =3: small. LASIGN: .TRUE. if the diagonal elements of A are to be multiplied by a random magnitude 1 number. KBTYPE, KBZERO, KBMAGN, LBSIGN: the same, but for B. KTRIAN: =0: don't fill in the upper triangle, =1: do. KZ1, KZ2, KADD: used to implement KAZERO and KBZERO. RMAGN: used to implement KAMAGN and KBMAGN. */ if (mtypes > 26) { goto L100; } ierr = 0; if (kclass[jtype - 1] < 3) { /* Generate A (w/o rotation) */ if ((i__3 = katype[jtype - 1], abs(i__3)) == 3) { in = ((n - 1) / 2 << 1) + 1; if (in != n) { claset_("Full", &n, &n, &c_b1, &c_b1, &a[a_offset], lda); } } else { in = n; } clatm4_(&katype[jtype - 1], &in, &kz1[kazero[jtype - 1] - 1], &kz2[kazero[jtype - 1] - 1], &lasign[jtype - 1], & rmagn[kamagn[jtype - 1]], &ulp, &rmagn[ktrian[jtype - 1] * kamagn[jtype - 1]], &c__2, &iseed[1], &a[ a_offset], lda); iadd = kadd[kazero[jtype - 1] - 1]; if (iadd > 0 && iadd <= n) { i__3 = a_subscr(iadd, iadd); i__4 = kamagn[jtype - 1]; a[i__3].r = rmagn[i__4], a[i__3].i = 0.f; } /* Generate B (w/o rotation) */ if ((i__3 = kbtype[jtype - 1], abs(i__3)) == 3) { in = ((n - 1) / 2 << 1) + 1; if (in != n) { claset_("Full", &n, &n, &c_b1, &c_b1, &b[b_offset], lda); } } else { in = n; } clatm4_(&kbtype[jtype - 1], &in, &kz1[kbzero[jtype - 1] - 1], &kz2[kbzero[jtype - 1] - 1], &lbsign[jtype - 1], & rmagn[kbmagn[jtype - 1]], &c_b28, &rmagn[ktrian[jtype - 1] * kbmagn[jtype - 1]], &c__2, &iseed[1], &b[ b_offset], lda); iadd = kadd[kbzero[jtype - 1] - 1]; if (iadd != 0 && iadd <= n) { i__3 = b_subscr(iadd, iadd); i__4 = kbmagn[jtype - 1]; b[i__3].r = rmagn[i__4], b[i__3].i = 0.f; } if (kclass[jtype - 1] == 2 && n > 0) { /* Include rotations Generate Q, Z as Householder transformations times a diagonal matrix. */ i__3 = n - 1; for (jc = 1; jc <= i__3; ++jc) { i__4 = n; for (jr = jc; jr <= i__4; ++jr) { i__5 = q_subscr(jr, jc); clarnd_(&q__1, &c__3, &iseed[1]); q[i__5].r = q__1.r, q[i__5].i = q__1.i; i__5 = z___subscr(jr, jc); clarnd_(&q__1, &c__3, &iseed[1]); z__[i__5].r = q__1.r, z__[i__5].i = q__1.i; /* L30: */ } i__4 = n + 1 - jc; clarfg_(&i__4, &q_ref(jc, jc), &q_ref(jc + 1, jc), & c__1, &work[jc]); i__4 = (n << 1) + jc; i__5 = q_subscr(jc, jc); r__2 = q[i__5].r; r__1 = r_sign(&c_b28, &r__2); work[i__4].r = r__1, work[i__4].i = 0.f; i__4 = q_subscr(jc, jc); q[i__4].r = 1.f, q[i__4].i = 0.f; i__4 = n + 1 - jc; clarfg_(&i__4, &z___ref(jc, jc), &z___ref(jc + 1, jc), &c__1, &work[n + jc]); i__4 = n * 3 + jc; i__5 = z___subscr(jc, jc); r__2 = z__[i__5].r; r__1 = r_sign(&c_b28, &r__2); work[i__4].r = r__1, work[i__4].i = 0.f; i__4 = z___subscr(jc, jc); z__[i__4].r = 1.f, z__[i__4].i = 0.f; /* L40: */ } clarnd_(&q__1, &c__3, &iseed[1]); ctemp.r = q__1.r, ctemp.i = q__1.i; i__3 = q_subscr(n, n); q[i__3].r = 1.f, q[i__3].i = 0.f; i__3 = n; work[i__3].r = 0.f, work[i__3].i = 0.f; i__3 = n * 3; r__1 = c_abs(&ctemp); q__1.r = ctemp.r / r__1, q__1.i = ctemp.i / r__1; work[i__3].r = q__1.r, work[i__3].i = q__1.i; clarnd_(&q__1, &c__3, &iseed[1]); ctemp.r = q__1.r, ctemp.i = q__1.i; i__3 = z___subscr(n, n); z__[i__3].r = 1.f, z__[i__3].i = 0.f; i__3 = n << 1; work[i__3].r = 0.f, work[i__3].i = 0.f; i__3 = n << 2; r__1 = c_abs(&ctemp); q__1.r = ctemp.r / r__1, q__1.i = ctemp.i / r__1; work[i__3].r = q__1.r, work[i__3].i = q__1.i; /* Apply the diagonal matrices */ i__3 = n; for (jc = 1; jc <= i__3; ++jc) { i__4 = n; for (jr = 1; jr <= i__4; ++jr) { i__5 = a_subscr(jr, jc); i__6 = (n << 1) + jr; r_cnjg(&q__3, &work[n * 3 + jc]); q__2.r = work[i__6].r * q__3.r - work[i__6].i * q__3.i, q__2.i = work[i__6].r * q__3.i + work[i__6].i * q__3.r; i__7 = a_subscr(jr, jc); q__1.r = q__2.r * a[i__7].r - q__2.i * a[i__7].i, q__1.i = q__2.r * a[i__7].i + q__2.i * a[ i__7].r; a[i__5].r = q__1.r, a[i__5].i = q__1.i; i__5 = b_subscr(jr, jc); i__6 = (n << 1) + jr; r_cnjg(&q__3, &work[n * 3 + jc]); q__2.r = work[i__6].r * q__3.r - work[i__6].i * q__3.i, q__2.i = work[i__6].r * q__3.i + work[i__6].i * q__3.r; i__7 = b_subscr(jr, jc); q__1.r = q__2.r * b[i__7].r - q__2.i * b[i__7].i, q__1.i = q__2.r * b[i__7].i + q__2.i * b[ i__7].r; b[i__5].r = q__1.r, b[i__5].i = q__1.i; /* L50: */ } /* L60: */ } i__3 = n - 1; cunm2r_("L", "N", &n, &n, &i__3, &q[q_offset], ldq, &work[ 1], &a[a_offset], lda, &work[(n << 1) + 1], &ierr); if (ierr != 0) { goto L90; } i__3 = n - 1; cunm2r_("R", "C", &n, &n, &i__3, &z__[z_offset], ldq, & work[n + 1], &a[a_offset], lda, &work[(n << 1) + 1], &ierr); if (ierr != 0) { goto L90; } i__3 = n - 1; cunm2r_("L", "N", &n, &n, &i__3, &q[q_offset], ldq, &work[ 1], &b[b_offset], lda, &work[(n << 1) + 1], &ierr); if (ierr != 0) { goto L90; } i__3 = n - 1; cunm2r_("R", "C", &n, &n, &i__3, &z__[z_offset], ldq, & work[n + 1], &b[b_offset], lda, &work[(n << 1) + 1], &ierr); if (ierr != 0) { goto L90; } } } else { /* Random matrices */ i__3 = n; for (jc = 1; jc <= i__3; ++jc) { i__4 = n; for (jr = 1; jr <= i__4; ++jr) { i__5 = a_subscr(jr, jc); i__6 = kamagn[jtype - 1]; clarnd_(&q__2, &c__4, &iseed[1]); q__1.r = rmagn[i__6] * q__2.r, q__1.i = rmagn[i__6] * q__2.i; a[i__5].r = q__1.r, a[i__5].i = q__1.i; i__5 = b_subscr(jr, jc); i__6 = kbmagn[jtype - 1]; clarnd_(&q__2, &c__4, &iseed[1]); q__1.r = rmagn[i__6] * q__2.r, q__1.i = rmagn[i__6] * q__2.i; b[i__5].r = q__1.r, b[i__5].i = q__1.i; /* L70: */ } /* L80: */ } } L90: if (ierr != 0) { io___40.ciunit = *nounit; s_wsfe(&io___40); do_fio(&c__1, "Generator", (ftnlen)9); do_fio(&c__1, (char *)&ierr, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(ierr); return 0; } L100: for (i__ = 1; i__ <= 7; ++i__) { result[i__] = -1.f; /* L110: */ } /* Call CGGEV to compute eigenvalues and eigenvectors. */ clacpy_(" ", &n, &n, &a[a_offset], lda, &s[s_offset], lda); clacpy_(" ", &n, &n, &b[b_offset], lda, &t[t_offset], lda); cggev_("V", "V", &n, &s[s_offset], lda, &t[t_offset], lda, &alpha[ 1], &beta[1], &q[q_offset], ldq, &z__[z_offset], ldq, & work[1], lwork, &rwork[1], &ierr); if (ierr != 0 && ierr != n + 1) { result[1] = ulpinv; io___42.ciunit = *nounit; s_wsfe(&io___42); do_fio(&c__1, "CGGEV1", (ftnlen)6); do_fio(&c__1, (char *)&ierr, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(ierr); goto L190; } /* Do the tests (1) and (2) */ cget52_(&c_true, &n, &a[a_offset], lda, &b[b_offset], lda, &q[ q_offset], ldq, &alpha[1], &beta[1], &work[1], &rwork[1], &result[1]); if (result[2] > *thresh) { io___43.ciunit = *nounit; s_wsfe(&io___43); do_fio(&c__1, "Left", (ftnlen)4); do_fio(&c__1, "CGGEV1", (ftnlen)6); do_fio(&c__1, (char *)&result[2], (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); } /* Do the tests (3) and (4) */ cget52_(&c_false, &n, &a[a_offset], lda, &b[b_offset], lda, &z__[ z_offset], ldq, &alpha[1], &beta[1], &work[1], &rwork[1], &result[3]); if (result[4] > *thresh) { io___44.ciunit = *nounit; s_wsfe(&io___44); do_fio(&c__1, "Right", (ftnlen)5); do_fio(&c__1, "CGGEV1", (ftnlen)6); do_fio(&c__1, (char *)&result[4], (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); } /* Do test (5) */ clacpy_(" ", &n, &n, &a[a_offset], lda, &s[s_offset], lda); clacpy_(" ", &n, &n, &b[b_offset], lda, &t[t_offset], lda); cggev_("N", "N", &n, &s[s_offset], lda, &t[t_offset], lda, & alpha1[1], &beta1[1], &q[q_offset], ldq, &z__[z_offset], ldq, &work[1], lwork, &rwork[1], &ierr); if (ierr != 0 && ierr != n + 1) { result[1] = ulpinv; io___45.ciunit = *nounit; s_wsfe(&io___45); do_fio(&c__1, "CGGEV2", (ftnlen)6); do_fio(&c__1, (char *)&ierr, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(ierr); goto L190; } i__3 = n; for (j = 1; j <= i__3; ++j) { i__4 = j; i__5 = j; i__6 = j; i__7 = j; if (alpha[i__4].r != alpha1[i__5].r || alpha[i__4].i != alpha1[i__5].i || (beta[i__6].r != beta1[i__7].r || beta[i__6].i != beta1[i__7].i)) { result[5] = ulpinv; } /* L120: */ } /* Do test (6): Compute eigenvalues and left eigenvectors, and test them */ clacpy_(" ", &n, &n, &a[a_offset], lda, &s[s_offset], lda); clacpy_(" ", &n, &n, &b[b_offset], lda, &t[t_offset], lda); cggev_("V", "N", &n, &s[s_offset], lda, &t[t_offset], lda, & alpha1[1], &beta1[1], &qe[qe_offset], ldqe, &z__[z_offset] , ldq, &work[1], lwork, &rwork[1], &ierr); if (ierr != 0 && ierr != n + 1) { result[1] = ulpinv; io___46.ciunit = *nounit; s_wsfe(&io___46); do_fio(&c__1, "CGGEV3", (ftnlen)6); do_fio(&c__1, (char *)&ierr, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(ierr); goto L190; } i__3 = n; for (j = 1; j <= i__3; ++j) { i__4 = j; i__5 = j; i__6 = j; i__7 = j; if (alpha[i__4].r != alpha1[i__5].r || alpha[i__4].i != alpha1[i__5].i || (beta[i__6].r != beta1[i__7].r || beta[i__6].i != beta1[i__7].i)) { result[6] = ulpinv; } /* L130: */ } i__3 = n; for (j = 1; j <= i__3; ++j) { i__4 = n; for (jc = 1; jc <= i__4; ++jc) { i__5 = q_subscr(j, jc); i__6 = qe_subscr(j, jc); if (q[i__5].r != qe[i__6].r || q[i__5].i != qe[i__6].i) { result[6] = ulpinv; } /* L140: */ } /* L150: */ } /* Do test (7): Compute eigenvalues and right eigenvectors, and test them */ clacpy_(" ", &n, &n, &a[a_offset], lda, &s[s_offset], lda); clacpy_(" ", &n, &n, &b[b_offset], lda, &t[t_offset], lda); cggev_("N", "V", &n, &s[s_offset], lda, &t[t_offset], lda, & alpha1[1], &beta1[1], &q[q_offset], ldq, &qe[qe_offset], ldqe, &work[1], lwork, &rwork[1], &ierr); if (ierr != 0 && ierr != n + 1) { result[1] = ulpinv; io___47.ciunit = *nounit; s_wsfe(&io___47); do_fio(&c__1, "CGGEV4", (ftnlen)6); do_fio(&c__1, (char *)&ierr, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(ierr); goto L190; } i__3 = n; for (j = 1; j <= i__3; ++j) { i__4 = j; i__5 = j; i__6 = j; i__7 = j; if (alpha[i__4].r != alpha1[i__5].r || alpha[i__4].i != alpha1[i__5].i || (beta[i__6].r != beta1[i__7].r || beta[i__6].i != beta1[i__7].i)) { result[7] = ulpinv; } /* L160: */ } i__3 = n; for (j = 1; j <= i__3; ++j) { i__4 = n; for (jc = 1; jc <= i__4; ++jc) { i__5 = z___subscr(j, jc); i__6 = qe_subscr(j, jc); if (z__[i__5].r != qe[i__6].r || z__[i__5].i != qe[i__6] .i) { result[7] = ulpinv; } /* L170: */ } /* L180: */ } /* End of Loop -- Check for RESULT(j) > THRESH */ L190: ntestt += 7; /* Print out tests which fail. */ for (jr = 1; jr <= 9; ++jr) { if (result[jr] >= *thresh) { /* If this is the first test to fail, print a header to the data file. */ if (nerrs == 0) { io___48.ciunit = *nounit; s_wsfe(&io___48); do_fio(&c__1, "CGV", (ftnlen)3); e_wsfe(); /* Matrix types */ io___49.ciunit = *nounit; s_wsfe(&io___49); e_wsfe(); io___50.ciunit = *nounit; s_wsfe(&io___50); e_wsfe(); io___51.ciunit = *nounit; s_wsfe(&io___51); do_fio(&c__1, "Orthogonal", (ftnlen)10); e_wsfe(); /* Tests performed */ io___52.ciunit = *nounit; s_wsfe(&io___52); e_wsfe(); } ++nerrs; if (result[jr] < 1e4f) { io___53.ciunit = *nounit; s_wsfe(&io___53); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)) ; do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&jr, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&result[jr], (ftnlen)sizeof( real)); e_wsfe(); } else { io___54.ciunit = *nounit; s_wsfe(&io___54); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)) ; do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&jr, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&result[jr], (ftnlen)sizeof( real)); e_wsfe(); } } /* L200: */ } L210: ; } /* L220: */ } /* Summary */ alasvm_("CGV", nounit, &nerrs, &ntestt, &c__0); work[1].r = (real) maxwrk, work[1].i = 0.f; return 0; /* End of CDRGEV */ } /* cdrgev_ */
/* Subroutine */ int sgqrts_(integer *n, integer *m, integer *p, real *a, real *af, real *q, real *r__, integer *lda, real *taua, real *b, real *bf, real *z__, real *t, real *bwk, integer *ldb, real *taub, real * work, integer *lwork, real *rwork, real *result) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, r_dim1, r_offset, q_dim1, q_offset, b_dim1, b_offset, bf_dim1, bf_offset, t_dim1, t_offset, z_dim1, z_offset, bwk_dim1, bwk_offset, i__1, i__2; real r__1; /* Local variables */ static integer info; static real unfl, resid; extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static real anorm, bnorm; extern /* Subroutine */ int ssyrk_(char *, char *, integer *, integer *, real *, real *, integer *, real *, real *, integer *); extern doublereal slamch_(char *), slange_(char *, integer *, integer *, real *, integer *, real *); extern /* Subroutine */ int sggqrf_(integer *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, real *, integer * , integer *), slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, real *, integer *); extern doublereal slansy_(char *, char *, integer *, real *, integer *, real *); extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real *, integer *, real *, real *, integer *, integer *), sorgrq_( integer *, integer *, integer *, real *, integer *, real *, real * , integer *, integer *); static real ulp; #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] #define t_ref(a_1,a_2) t[(a_2)*t_dim1 + a_1] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] #define af_ref(a_1,a_2) af[(a_2)*af_dim1 + a_1] #define bf_ref(a_1,a_2) bf[(a_2)*bf_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= SGQRTS tests SGGQRF, which computes the GQR factorization of an N-by-M matrix A and a N-by-P matrix B: A = Q*R and B = Q*T*Z. Arguments ========= N (input) INTEGER The number of rows of the matrices A and B. N >= 0. M (input) INTEGER The number of columns of the matrix A. M >= 0. P (input) INTEGER The number of columns of the matrix B. P >= 0. A (input) REAL array, dimension (LDA,M) The N-by-M matrix A. AF (output) REAL array, dimension (LDA,N) Details of the GQR factorization of A and B, as returned by SGGQRF, see SGGQRF for further details. Q (output) REAL array, dimension (LDA,N) The M-by-M orthogonal matrix Q. R (workspace) REAL array, dimension (LDA,MAX(M,N)) LDA (input) INTEGER The leading dimension of the arrays A, AF, R and Q. LDA >= max(M,N). TAUA (output) REAL array, dimension (min(M,N)) The scalar factors of the elementary reflectors, as returned by SGGQRF. B (input) REAL array, dimension (LDB,P) On entry, the N-by-P matrix A. BF (output) REAL array, dimension (LDB,N) Details of the GQR factorization of A and B, as returned by SGGQRF, see SGGQRF for further details. Z (output) REAL array, dimension (LDB,P) The P-by-P orthogonal matrix Z. T (workspace) REAL array, dimension (LDB,max(P,N)) BWK (workspace) REAL array, dimension (LDB,N) LDB (input) INTEGER The leading dimension of the arrays B, BF, Z and T. LDB >= max(P,N). TAUB (output) REAL array, dimension (min(P,N)) The scalar factors of the elementary reflectors, as returned by SGGRQF. WORK (workspace) REAL array, dimension (LWORK) LWORK (input) INTEGER The dimension of the array WORK, LWORK >= max(N,M,P)**2. RWORK (workspace) REAL array, dimension (max(N,M,P)) RESULT (output) REAL array, dimension (4) The test ratios: RESULT(1) = norm( R - Q'*A ) / ( MAX(M,N)*norm(A)*ULP) RESULT(2) = norm( T*Z - Q'*B ) / (MAX(P,N)*norm(B)*ULP) RESULT(3) = norm( I - Q'*Q ) / ( M*ULP ) RESULT(4) = norm( I - Z'*Z ) / ( P*ULP ) ===================================================================== Parameter adjustments */ r_dim1 = *lda; r_offset = 1 + r_dim1 * 1; r__ -= r_offset; q_dim1 = *lda; q_offset = 1 + q_dim1 * 1; q -= q_offset; af_dim1 = *lda; af_offset = 1 + af_dim1 * 1; af -= af_offset; a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --taua; bwk_dim1 = *ldb; bwk_offset = 1 + bwk_dim1 * 1; bwk -= bwk_offset; t_dim1 = *ldb; t_offset = 1 + t_dim1 * 1; t -= t_offset; z_dim1 = *ldb; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; bf_dim1 = *ldb; bf_offset = 1 + bf_dim1 * 1; bf -= bf_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --taub; --work; --rwork; --result; /* Function Body */ ulp = slamch_("Precision"); unfl = slamch_("Safe minimum"); /* Copy the matrix A to the array AF. */ slacpy_("Full", n, m, &a[a_offset], lda, &af[af_offset], lda); slacpy_("Full", n, p, &b[b_offset], ldb, &bf[bf_offset], ldb); /* Computing MAX */ r__1 = slange_("1", n, m, &a[a_offset], lda, &rwork[1]); anorm = dmax(r__1,unfl); /* Computing MAX */ r__1 = slange_("1", n, p, &b[b_offset], ldb, &rwork[1]); bnorm = dmax(r__1,unfl); /* Factorize the matrices A and B in the arrays AF and BF. */ sggqrf_(n, m, p, &af[af_offset], lda, &taua[1], &bf[bf_offset], ldb, & taub[1], &work[1], lwork, &info); /* Generate the N-by-N matrix Q */ slaset_("Full", n, n, &c_b9, &c_b9, &q[q_offset], lda); i__1 = *n - 1; slacpy_("Lower", &i__1, m, &af_ref(2, 1), lda, &q_ref(2, 1), lda); i__1 = min(*n,*m); sorgqr_(n, n, &i__1, &q[q_offset], lda, &taua[1], &work[1], lwork, &info); /* Generate the P-by-P matrix Z */ slaset_("Full", p, p, &c_b9, &c_b9, &z__[z_offset], ldb); if (*n <= *p) { if (*n > 0 && *n < *p) { i__1 = *p - *n; slacpy_("Full", n, &i__1, &bf[bf_offset], ldb, &z___ref(*p - *n + 1, 1), ldb); } if (*n > 1) { i__1 = *n - 1; i__2 = *n - 1; slacpy_("Lower", &i__1, &i__2, &bf_ref(2, *p - *n + 1), ldb, & z___ref(*p - *n + 2, *p - *n + 1), ldb); } } else { if (*p > 1) { i__1 = *p - 1; i__2 = *p - 1; slacpy_("Lower", &i__1, &i__2, &bf_ref(*n - *p + 2, 1), ldb, & z___ref(2, 1), ldb); } } i__1 = min(*n,*p); sorgrq_(p, p, &i__1, &z__[z_offset], ldb, &taub[1], &work[1], lwork, & info); /* Copy R */ slaset_("Full", n, m, &c_b19, &c_b19, &r__[r_offset], lda); slacpy_("Upper", n, m, &af[af_offset], lda, &r__[r_offset], lda); /* Copy T */ slaset_("Full", n, p, &c_b19, &c_b19, &t[t_offset], ldb); if (*n <= *p) { slacpy_("Upper", n, n, &bf_ref(1, *p - *n + 1), ldb, &t_ref(1, *p - * n + 1), ldb); } else { i__1 = *n - *p; slacpy_("Full", &i__1, p, &bf[bf_offset], ldb, &t[t_offset], ldb); slacpy_("Upper", p, p, &bf_ref(*n - *p + 1, 1), ldb, &t_ref(*n - *p + 1, 1), ldb); } /* Compute R - Q'*A */ sgemm_("Transpose", "No transpose", n, m, n, &c_b30, &q[q_offset], lda, & a[a_offset], lda, &c_b31, &r__[r_offset], lda); /* Compute norm( R - Q'*A ) / ( MAX(M,N)*norm(A)*ULP ) . */ resid = slange_("1", n, m, &r__[r_offset], lda, &rwork[1]); if (anorm > 0.f) { /* Computing MAX */ i__1 = max(1,*m); result[1] = resid / (real) max(i__1,*n) / anorm / ulp; } else { result[1] = 0.f; } /* Compute T*Z - Q'*B */ sgemm_("No Transpose", "No transpose", n, p, p, &c_b31, &t[t_offset], ldb, &z__[z_offset], ldb, &c_b19, &bwk[bwk_offset], ldb); sgemm_("Transpose", "No transpose", n, p, n, &c_b30, &q[q_offset], lda, & b[b_offset], ldb, &c_b31, &bwk[bwk_offset], ldb); /* Compute norm( T*Z - Q'*B ) / ( MAX(P,N)*norm(A)*ULP ) . */ resid = slange_("1", n, p, &bwk[bwk_offset], ldb, &rwork[1]); if (bnorm > 0.f) { /* Computing MAX */ i__1 = max(1,*p); result[2] = resid / (real) max(i__1,*n) / bnorm / ulp; } else { result[2] = 0.f; } /* Compute I - Q'*Q */ slaset_("Full", n, n, &c_b19, &c_b31, &r__[r_offset], lda); ssyrk_("Upper", "Transpose", n, n, &c_b30, &q[q_offset], lda, &c_b31, & r__[r_offset], lda); /* Compute norm( I - Q'*Q ) / ( N * ULP ) . */ resid = slansy_("1", "Upper", n, &r__[r_offset], lda, &rwork[1]); result[3] = resid / (real) max(1,*n) / ulp; /* Compute I - Z'*Z */ slaset_("Full", p, p, &c_b19, &c_b31, &t[t_offset], ldb); ssyrk_("Upper", "Transpose", p, p, &c_b30, &z__[z_offset], ldb, &c_b31, & t[t_offset], ldb); /* Compute norm( I - Z'*Z ) / ( P*ULP ) . */ resid = slansy_("1", "Upper", p, &t[t_offset], ldb, &rwork[1]); result[4] = resid / (real) max(1,*p) / ulp; return 0; /* End of SGQRTS */ } /* sgqrts_ */
/* Subroutine */ int slatdf_(integer *ijob, integer *n, real *z__, integer * ldz, real *rhs, real *rdsum, real *rdscal, integer *ipiv, integer * jpiv) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= SLATDF uses the LU factorization of the n-by-n matrix Z computed by SGETC2 and computes a contribution to the reciprocal Dif-estimate by solving Z * x = b for x, and choosing the r.h.s. b such that the norm of x is as large as possible. On entry RHS = b holds the contribution from earlier solved sub-systems, and on return RHS = x. The factorization of Z returned by SGETC2 has the form Z = P*L*U*Q, where P and Q are permutation matrices. L is lower triangular with unit diagonal elements and U is upper triangular. Arguments ========= IJOB (input) INTEGER IJOB = 2: First compute an approximative null-vector e of Z using SGECON, e is normalized and solve for Zx = +-e - f with the sign giving the greater value of 2-norm(x). About 5 times as expensive as Default. IJOB .ne. 2: Local look ahead strategy where all entries of the r.h.s. b is choosen as either +1 or -1 (Default). N (input) INTEGER The number of columns of the matrix Z. Z (input) REAL array, dimension (LDZ, N) On entry, the LU part of the factorization of the n-by-n matrix Z computed by SGETC2: Z = P * L * U * Q LDZ (input) INTEGER The leading dimension of the array Z. LDA >= max(1, N). RHS (input/output) REAL array, dimension N. On entry, RHS contains contributions from other subsystems. On exit, RHS contains the solution of the subsystem with entries acoording to the value of IJOB (see above). RDSUM (input/output) REAL On entry, the sum of squares of computed contributions to the Dif-estimate under computation by STGSYL, where the scaling factor RDSCAL (see below) has been factored out. On exit, the corresponding sum of squares updated with the contributions from the current sub-system. If TRANS = 'T' RDSUM is not touched. NOTE: RDSUM only makes sense when STGSY2 is called by STGSYL. RDSCAL (input/output) REAL On entry, scaling factor used to prevent overflow in RDSUM. On exit, RDSCAL is updated w.r.t. the current contributions in RDSUM. If TRANS = 'T', RDSCAL is not touched. NOTE: RDSCAL only makes sense when STGSY2 is called by STGSYL. IPIV (input) INTEGER array, dimension (N). The pivot indices; for 1 <= i <= N, row i of the matrix has been interchanged with row IPIV(i). JPIV (input) INTEGER array, dimension (N). The pivot indices; for 1 <= j <= N, column j of the matrix has been interchanged with column JPIV(j). Further Details =============== Based on contributions by Bo Kagstrom and Peter Poromaa, Department of Computing Science, Umea University, S-901 87 Umea, Sweden. This routine is a further developed implementation of algorithm BSOLVE in [1] using complete pivoting in the LU factorization. [1] Bo Kagstrom and Lars Westin, Generalized Schur Methods with Condition Estimators for Solving the Generalized Sylvester Equation, IEEE Transactions on Automatic Control, Vol. 34, No. 7, July 1989, pp 745-751. [2] Peter Poromaa, On Efficient and Robust Estimators for the Separation between two Regular Matrix Pairs with Applications in Condition Estimation. Report IMINF-95.05, Departement of Computing Science, Umea University, S-901 87 Umea, Sweden, 1995. ===================================================================== Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static real c_b23 = 1.f; static real c_b37 = -1.f; /* System generated locals */ integer z_dim1, z_offset, i__1, i__2; real r__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer info; static real temp; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); static real work[32]; static integer i__, j, k; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static real pmone; extern doublereal sasum_(integer *, real *, integer *); static real sminu; static integer iwork[8]; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), saxpy_(integer *, real *, real *, integer *, real *, integer *); static real splus; extern /* Subroutine */ int sgesc2_(integer *, real *, integer *, real *, integer *, integer *, real *); static real bm, bp, xm[8], xp[8]; extern /* Subroutine */ int sgecon_(char *, integer *, real *, integer *, real *, real *, real *, integer *, integer *), slassq_( integer *, real *, integer *, real *, real *), slaswp_(integer *, real *, integer *, integer *, integer *, integer *, integer *); #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --rhs; --ipiv; --jpiv; /* Function Body */ if (*ijob != 2) { /* Apply permutations IPIV to RHS */ i__1 = *n - 1; slaswp_(&c__1, &rhs[1], ldz, &c__1, &i__1, &ipiv[1], &c__1); /* Solve for L-part choosing RHS either to +1 or -1. */ pmone = -1.f; i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { bp = rhs[j] + 1.f; bm = rhs[j] - 1.f; splus = 1.f; /* Look-ahead for L-part RHS(1:N-1) = + or -1, SPLUS and SMIN computed more efficiently than in BSOLVE [1]. */ i__2 = *n - j; splus += sdot_(&i__2, &z___ref(j + 1, j), &c__1, &z___ref(j + 1, j), &c__1); i__2 = *n - j; sminu = sdot_(&i__2, &z___ref(j + 1, j), &c__1, &rhs[j + 1], & c__1); splus *= rhs[j]; if (splus > sminu) { rhs[j] = bp; } else if (sminu > splus) { rhs[j] = bm; } else { /* In this case the updating sums are equal and we can choose RHS(J) +1 or -1. The first time this happens we choose -1, thereafter +1. This is a simple way to get good estimates of matrices like Byers well-known example (see [1]). (Not done in BSOLVE.) */ rhs[j] += pmone; pmone = 1.f; } /* Compute the remaining r.h.s. */ temp = -rhs[j]; i__2 = *n - j; saxpy_(&i__2, &temp, &z___ref(j + 1, j), &c__1, &rhs[j + 1], & c__1); /* L10: */ } /* Solve for U-part, look-ahead for RHS(N) = +-1. This is not done in BSOLVE and will hopefully give us a better estimate because any ill-conditioning of the original matrix is transfered to U and not to L. U(N, N) is an approximation to sigma_min(LU). */ i__1 = *n - 1; scopy_(&i__1, &rhs[1], &c__1, xp, &c__1); xp[*n - 1] = rhs[*n] + 1.f; rhs[*n] += -1.f; splus = 0.f; sminu = 0.f; for (i__ = *n; i__ >= 1; --i__) { temp = 1.f / z___ref(i__, i__); xp[i__ - 1] *= temp; rhs[i__] *= temp; i__1 = *n; for (k = i__ + 1; k <= i__1; ++k) { xp[i__ - 1] -= xp[k - 1] * (z___ref(i__, k) * temp); rhs[i__] -= rhs[k] * (z___ref(i__, k) * temp); /* L20: */ } splus += (r__1 = xp[i__ - 1], dabs(r__1)); sminu += (r__1 = rhs[i__], dabs(r__1)); /* L30: */ } if (splus > sminu) { scopy_(n, xp, &c__1, &rhs[1], &c__1); } /* Apply the permutations JPIV to the computed solution (RHS) */ i__1 = *n - 1; slaswp_(&c__1, &rhs[1], ldz, &c__1, &i__1, &jpiv[1], &c_n1); /* Compute the sum of squares */ slassq_(n, &rhs[1], &c__1, rdscal, rdsum); } else { /* IJOB = 2, Compute approximate nullvector XM of Z */ sgecon_("I", n, &z__[z_offset], ldz, &c_b23, &temp, work, iwork, & info); scopy_(n, &work[*n], &c__1, xm, &c__1); /* Compute RHS */ i__1 = *n - 1; slaswp_(&c__1, xm, ldz, &c__1, &i__1, &ipiv[1], &c_n1); temp = 1.f / sqrt(sdot_(n, xm, &c__1, xm, &c__1)); sscal_(n, &temp, xm, &c__1); scopy_(n, xm, &c__1, xp, &c__1); saxpy_(n, &c_b23, &rhs[1], &c__1, xp, &c__1); saxpy_(n, &c_b37, xm, &c__1, &rhs[1], &c__1); sgesc2_(n, &z__[z_offset], ldz, &rhs[1], &ipiv[1], &jpiv[1], &temp); sgesc2_(n, &z__[z_offset], ldz, xp, &ipiv[1], &jpiv[1], &temp); if (sasum_(n, xp, &c__1) > sasum_(n, &rhs[1], &c__1)) { scopy_(n, xp, &c__1, &rhs[1], &c__1); } /* Compute the sum of squares */ slassq_(n, &rhs[1], &c__1, rdscal, rdsum); } return 0; /* End of SLATDF */ } /* slatdf_ */
/* Subroutine */ int dspgv_(integer *itype, char *jobz, char *uplo, integer * n, doublereal *ap, doublereal *bp, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DSPGV computes all the eigenvalues and, optionally, the eigenvectors of a real generalized symmetric-definite eigenproblem, of the form A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and B are assumed to be symmetric, stored in packed format, and B is also positive definite. Arguments ========= ITYPE (input) INTEGER Specifies the problem type to be solved: = 1: A*x = (lambda)*B*x = 2: A*B*x = (lambda)*x = 3: B*A*x = (lambda)*x JOBZ (input) CHARACTER*1 = 'N': Compute eigenvalues only; = 'V': Compute eigenvalues and eigenvectors. UPLO (input) CHARACTER*1 = 'U': Upper triangles of A and B are stored; = 'L': Lower triangles of A and B are stored. N (input) INTEGER The order of the matrices A and B. N >= 0. AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) On entry, the upper or lower triangle of the symmetric matrix A, packed columnwise in a linear array. The j-th column of A is stored in the array AP as follows: if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. On exit, the contents of AP are destroyed. BP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) On entry, the upper or lower triangle of the symmetric matrix B, packed columnwise in a linear array. The j-th column of B is stored in the array BP as follows: if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. On exit, the triangular factor U or L from the Cholesky factorization B = U**T*U or B = L*L**T, in the same storage format as B. W (output) DOUBLE PRECISION array, dimension (N) If INFO = 0, the eigenvalues in ascending order. Z (output) DOUBLE PRECISION array, dimension (LDZ, N) If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of eigenvectors. The eigenvectors are normalized as follows: if ITYPE = 1 or 2, Z**T*B*Z = I; if ITYPE = 3, Z**T*inv(B)*Z = I. If JOBZ = 'N', then Z is not referenced. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= 1, and if JOBZ = 'V', LDZ >= max(1,N). WORK (workspace) DOUBLE PRECISION array, dimension (3*N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: DPPTRF or DSPEV returned an error code: <= N: if INFO = i, DSPEV failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero. > N: if INFO = n + i, for 1 <= i <= n, then the leading minor of order i of B is not positive definite. The factorization of B could not be completed and no eigenvalues or eigenvectors were computed. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer z_dim1, z_offset, i__1; /* Local variables */ static integer neig, j; extern logical lsame_(char *, char *); extern /* Subroutine */ int dspev_(char *, char *, integer *, doublereal * , doublereal *, doublereal *, integer *, doublereal *, integer *); static char trans[1]; static logical upper; extern /* Subroutine */ int dtpmv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *), dtpsv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *); static logical wantz; extern /* Subroutine */ int xerbla_(char *, integer *), dpptrf_( char *, integer *, doublereal *, integer *), dspgst_( integer *, char *, integer *, doublereal *, doublereal *, integer *); #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] --ap; --bp; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --work; /* Function Body */ wantz = lsame_(jobz, "V"); upper = lsame_(uplo, "U"); *info = 0; if (*itype < 0 || *itype > 3) { *info = -1; } else if (! (wantz || lsame_(jobz, "N"))) { *info = -2; } else if (! (upper || lsame_(uplo, "L"))) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*ldz < 1 || wantz && *ldz < *n) { *info = -9; } if (*info != 0) { i__1 = -(*info); xerbla_("DSPGV ", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Form a Cholesky factorization of B. */ dpptrf_(uplo, n, &bp[1], info); if (*info != 0) { *info = *n + *info; return 0; } /* Transform problem to standard eigenvalue problem and solve. */ dspgst_(itype, uplo, n, &ap[1], &bp[1], info); dspev_(jobz, uplo, n, &ap[1], &w[1], &z__[z_offset], ldz, &work[1], info); if (wantz) { /* Backtransform eigenvectors to the original problem. */ neig = *n; if (*info > 0) { neig = *info - 1; } if (*itype == 1 || *itype == 2) { /* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */ if (upper) { *(unsigned char *)trans = 'N'; } else { *(unsigned char *)trans = 'T'; } i__1 = neig; for (j = 1; j <= i__1; ++j) { dtpsv_(uplo, trans, "Non-unit", n, &bp[1], &z___ref(1, j), & c__1); /* L10: */ } } else if (*itype == 3) { /* For B*A*x=(lambda)*x; backtransform eigenvectors: x = L*y or U'*y */ if (upper) { *(unsigned char *)trans = 'T'; } else { *(unsigned char *)trans = 'N'; } i__1 = neig; for (j = 1; j <= i__1; ++j) { dtpmv_(uplo, trans, "Non-unit", n, &bp[1], &z___ref(1, j), & c__1); /* L20: */ } } } return 0; /* End of DSPGV */ } /* dspgv_ */
/* Subroutine */ int cgrqts_(integer *m, integer *p, integer *n, complex *a, complex *af, complex *q, complex *r__, integer *lda, complex *taua, complex *b, complex *bf, complex *z__, complex *t, complex *bwk, integer *ldb, complex *taub, complex *work, integer *lwork, real * rwork, real *result) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, r_dim1, r_offset, q_dim1, q_offset, b_dim1, b_offset, bf_dim1, bf_offset, t_dim1, t_offset, z_dim1, z_offset, bwk_dim1, bwk_offset, i__1, i__2; real r__1; complex q__1; /* Local variables */ static integer info; static real unfl; extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *), cherk_(char *, char *, integer *, integer *, real *, complex *, integer *, real * , complex *, integer *); static real resid, anorm, bnorm; extern doublereal clange_(char *, integer *, integer *, complex *, integer *, real *), clanhe_(char *, char *, integer *, complex *, integer *, real *), slamch_(char *); extern /* Subroutine */ int cggrqf_(integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, complex *, integer *, integer *), clacpy_(char *, integer *, integer *, complex *, integer *, complex *, integer *), claset_(char *, integer *, integer *, complex *, complex *, complex *, integer *), cungqr_(integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, integer *), cungrq_(integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, integer *); static real ulp; #define q_subscr(a_1,a_2) (a_2)*q_dim1 + a_1 #define q_ref(a_1,a_2) q[q_subscr(a_1,a_2)] #define r___subscr(a_1,a_2) (a_2)*r_dim1 + a_1 #define r___ref(a_1,a_2) r__[r___subscr(a_1,a_2)] #define z___subscr(a_1,a_2) (a_2)*z_dim1 + a_1 #define z___ref(a_1,a_2) z__[z___subscr(a_1,a_2)] #define af_subscr(a_1,a_2) (a_2)*af_dim1 + a_1 #define af_ref(a_1,a_2) af[af_subscr(a_1,a_2)] #define bf_subscr(a_1,a_2) (a_2)*bf_dim1 + a_1 #define bf_ref(a_1,a_2) bf[bf_subscr(a_1,a_2)] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CGRQTS tests CGGRQF, which computes the GRQ factorization of an M-by-N matrix A and a P-by-N matrix B: A = R*Q and B = Z*T*Q. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. P (input) INTEGER The number of rows of the matrix B. P >= 0. N (input) INTEGER The number of columns of the matrices A and B. N >= 0. A (input) COMPLEX array, dimension (LDA,N) The M-by-N matrix A. AF (output) COMPLEX array, dimension (LDA,N) Details of the GRQ factorization of A and B, as returned by CGGRQF, see CGGRQF for further details. Q (output) COMPLEX array, dimension (LDA,N) The N-by-N unitary matrix Q. R (workspace) COMPLEX array, dimension (LDA,MAX(M,N)) LDA (input) INTEGER The leading dimension of the arrays A, AF, R and Q. LDA >= max(M,N). TAUA (output) COMPLEX array, dimension (min(M,N)) The scalar factors of the elementary reflectors, as returned by SGGQRC. B (input) COMPLEX array, dimension (LDB,N) On entry, the P-by-N matrix A. BF (output) COMPLEX array, dimension (LDB,N) Details of the GQR factorization of A and B, as returned by CGGRQF, see CGGRQF for further details. Z (output) REAL array, dimension (LDB,P) The P-by-P unitary matrix Z. T (workspace) COMPLEX array, dimension (LDB,max(P,N)) BWK (workspace) COMPLEX array, dimension (LDB,N) LDB (input) INTEGER The leading dimension of the arrays B, BF, Z and T. LDB >= max(P,N). TAUB (output) COMPLEX array, dimension (min(P,N)) The scalar factors of the elementary reflectors, as returned by SGGRQF. WORK (workspace) COMPLEX array, dimension (LWORK) LWORK (input) INTEGER The dimension of the array WORK, LWORK >= max(M,P,N)**2. RWORK (workspace) REAL array, dimension (M) RESULT (output) REAL array, dimension (4) The test ratios: RESULT(1) = norm( R - A*Q' ) / ( MAX(M,N)*norm(A)*ULP) RESULT(2) = norm( T*Q - Z'*B ) / (MAX(P,N)*norm(B)*ULP) RESULT(3) = norm( I - Q'*Q ) / ( N*ULP ) RESULT(4) = norm( I - Z'*Z ) / ( P*ULP ) ===================================================================== Parameter adjustments */ r_dim1 = *lda; r_offset = 1 + r_dim1 * 1; r__ -= r_offset; q_dim1 = *lda; q_offset = 1 + q_dim1 * 1; q -= q_offset; af_dim1 = *lda; af_offset = 1 + af_dim1 * 1; af -= af_offset; a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --taua; bwk_dim1 = *ldb; bwk_offset = 1 + bwk_dim1 * 1; bwk -= bwk_offset; t_dim1 = *ldb; t_offset = 1 + t_dim1 * 1; t -= t_offset; z_dim1 = *ldb; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; bf_dim1 = *ldb; bf_offset = 1 + bf_dim1 * 1; bf -= bf_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --taub; --work; --rwork; --result; /* Function Body */ ulp = slamch_("Precision"); unfl = slamch_("Safe minimum"); /* Copy the matrix A to the array AF. */ clacpy_("Full", m, n, &a[a_offset], lda, &af[af_offset], lda); clacpy_("Full", p, n, &b[b_offset], ldb, &bf[bf_offset], ldb); /* Computing MAX */ r__1 = clange_("1", m, n, &a[a_offset], lda, &rwork[1]); anorm = dmax(r__1,unfl); /* Computing MAX */ r__1 = clange_("1", p, n, &b[b_offset], ldb, &rwork[1]); bnorm = dmax(r__1,unfl); /* Factorize the matrices A and B in the arrays AF and BF. */ cggrqf_(m, p, n, &af[af_offset], lda, &taua[1], &bf[bf_offset], ldb, & taub[1], &work[1], lwork, &info); /* Generate the N-by-N matrix Q */ claset_("Full", n, n, &c_b3, &c_b3, &q[q_offset], lda); if (*m <= *n) { if (*m > 0 && *m < *n) { i__1 = *n - *m; clacpy_("Full", m, &i__1, &af[af_offset], lda, &q_ref(*n - *m + 1, 1), lda); } if (*m > 1) { i__1 = *m - 1; i__2 = *m - 1; clacpy_("Lower", &i__1, &i__2, &af_ref(2, *n - *m + 1), lda, & q_ref(*n - *m + 2, *n - *m + 1), lda); } } else { if (*n > 1) { i__1 = *n - 1; i__2 = *n - 1; clacpy_("Lower", &i__1, &i__2, &af_ref(*m - *n + 2, 1), lda, & q_ref(2, 1), lda); } } i__1 = min(*m,*n); cungrq_(n, n, &i__1, &q[q_offset], lda, &taua[1], &work[1], lwork, &info); /* Generate the P-by-P matrix Z */ claset_("Full", p, p, &c_b3, &c_b3, &z__[z_offset], ldb); if (*p > 1) { i__1 = *p - 1; clacpy_("Lower", &i__1, n, &bf_ref(2, 1), ldb, &z___ref(2, 1), ldb); } i__1 = min(*p,*n); cungqr_(p, p, &i__1, &z__[z_offset], ldb, &taub[1], &work[1], lwork, & info); /* Copy R */ claset_("Full", m, n, &c_b1, &c_b1, &r__[r_offset], lda); if (*m <= *n) { clacpy_("Upper", m, m, &af_ref(1, *n - *m + 1), lda, &r___ref(1, *n - *m + 1), lda); } else { i__1 = *m - *n; clacpy_("Full", &i__1, n, &af[af_offset], lda, &r__[r_offset], lda); clacpy_("Upper", n, n, &af_ref(*m - *n + 1, 1), lda, &r___ref(*m - *n + 1, 1), lda); } /* Copy T */ claset_("Full", p, n, &c_b1, &c_b1, &t[t_offset], ldb); clacpy_("Upper", p, n, &bf[bf_offset], ldb, &t[t_offset], ldb); /* Compute R - A*Q' */ q__1.r = -1.f, q__1.i = 0.f; cgemm_("No transpose", "Conjugate transpose", m, n, n, &q__1, &a[a_offset] , lda, &q[q_offset], lda, &c_b2, &r__[r_offset], lda); /* Compute norm( R - A*Q' ) / ( MAX(M,N)*norm(A)*ULP ) . */ resid = clange_("1", m, n, &r__[r_offset], lda, &rwork[1]); if (anorm > 0.f) { /* Computing MAX */ i__1 = max(1,*m); result[1] = resid / (real) max(i__1,*n) / anorm / ulp; } else { result[1] = 0.f; } /* Compute T*Q - Z'*B */ cgemm_("Conjugate transpose", "No transpose", p, n, p, &c_b2, &z__[ z_offset], ldb, &b[b_offset], ldb, &c_b1, &bwk[bwk_offset], ldb); q__1.r = -1.f, q__1.i = 0.f; cgemm_("No transpose", "No transpose", p, n, n, &c_b2, &t[t_offset], ldb, &q[q_offset], lda, &q__1, &bwk[bwk_offset], ldb); /* Compute norm( T*Q - Z'*B ) / ( MAX(P,N)*norm(A)*ULP ) . */ resid = clange_("1", p, n, &bwk[bwk_offset], ldb, &rwork[1]); if (bnorm > 0.f) { /* Computing MAX */ i__1 = max(1,*p); result[2] = resid / (real) max(i__1,*m) / bnorm / ulp; } else { result[2] = 0.f; } /* Compute I - Q*Q' */ claset_("Full", n, n, &c_b1, &c_b2, &r__[r_offset], lda); cherk_("Upper", "No Transpose", n, n, &c_b34, &q[q_offset], lda, &c_b35, & r__[r_offset], lda); /* Compute norm( I - Q'*Q ) / ( N * ULP ) . */ resid = clanhe_("1", "Upper", n, &r__[r_offset], lda, &rwork[1]); result[3] = resid / (real) max(1,*n) / ulp; /* Compute I - Z'*Z */ claset_("Full", p, p, &c_b1, &c_b2, &t[t_offset], ldb); cherk_("Upper", "Conjugate transpose", p, p, &c_b34, &z__[z_offset], ldb, &c_b35, &t[t_offset], ldb); /* Compute norm( I - Z'*Z ) / ( P*ULP ) . */ resid = clanhe_("1", "Upper", p, &t[t_offset], ldb, &rwork[1]); result[4] = resid / (real) max(1,*p) / ulp; return 0; /* End of CGRQTS */ } /* cgrqts_ */
/* Subroutine */ int dsyevx_(char *jobz, char *range, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *vl, doublereal *vu, integer * il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *ifail, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= DSYEVX computes selected eigenvalues and, optionally, eigenvectors of a real symmetric matrix A. Eigenvalues and eigenvectors can be selected by specifying either a range of values or a range of indices for the desired eigenvalues. Arguments ========= JOBZ (input) CHARACTER*1 = 'N': Compute eigenvalues only; = 'V': Compute eigenvalues and eigenvectors. RANGE (input) CHARACTER*1 = 'A': all eigenvalues will be found. = 'V': all eigenvalues in the half-open interval (VL,VU] will be found. = 'I': the IL-th through IU-th eigenvalues will be found. UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) DOUBLE PRECISION array, dimension (LDA, N) On entry, the symmetric matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A. On exit, the lower triangle (if UPLO='L') or the upper triangle (if UPLO='U') of A, including the diagonal, is destroyed. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). VL (input) DOUBLE PRECISION VU (input) DOUBLE PRECISION If RANGE='V', the lower and upper bounds of the interval to be searched for eigenvalues. VL < VU. Not referenced if RANGE = 'A' or 'I'. IL (input) INTEGER IU (input) INTEGER If RANGE='I', the indices (in ascending order) of the smallest and largest eigenvalues to be returned. 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. Not referenced if RANGE = 'A' or 'V'. ABSTOL (input) DOUBLE PRECISION The absolute error tolerance for the eigenvalues. An approximate eigenvalue is accepted as converged when it is determined to lie in an interval [a,b] of width less than or equal to ABSTOL + EPS * max( |a|,|b| ) , where EPS is the machine precision. If ABSTOL is less than or equal to zero, then EPS*|T| will be used in its place, where |T| is the 1-norm of the tridiagonal matrix obtained by reducing A to tridiagonal form. Eigenvalues will be computed most accurately when ABSTOL is set to twice the underflow threshold 2*DLAMCH('S'), not zero. If this routine returns with INFO>0, indicating that some eigenvectors did not converge, try setting ABSTOL to 2*DLAMCH('S'). See "Computing Small Singular Values of Bidiagonal Matrices with Guaranteed High Relative Accuracy," by Demmel and Kahan, LAPACK Working Note #3. M (output) INTEGER The total number of eigenvalues found. 0 <= M <= N. If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. W (output) DOUBLE PRECISION array, dimension (N) On normal exit, the first M elements contain the selected eigenvalues in ascending order. Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M)) If JOBZ = 'V', then if INFO = 0, the first M columns of Z contain the orthonormal eigenvectors of the matrix A corresponding to the selected eigenvalues, with the i-th column of Z holding the eigenvector associated with W(i). If an eigenvector fails to converge, then that column of Z contains the latest approximation to the eigenvector, and the index of the eigenvector is returned in IFAIL. If JOBZ = 'N', then Z is not referenced. Note: the user must ensure that at least max(1,M) columns are supplied in the array Z; if RANGE = 'V', the exact value of M is not known in advance and an upper bound must be used. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= 1, and if JOBZ = 'V', LDZ >= max(1,N). WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The length of the array WORK. LWORK >= max(1,8*N). For optimal efficiency, LWORK >= (NB+3)*N, where NB is the max of the blocksize for DSYTRD and DORMTR returned by ILAENV. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. IWORK (workspace) INTEGER array, dimension (5*N) IFAIL (output) INTEGER array, dimension (N) If JOBZ = 'V', then if INFO = 0, the first M elements of IFAIL are zero. If INFO > 0, then IFAIL contains the indices of the eigenvectors that failed to converge. If JOBZ = 'N', then IFAIL is not referenced. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, then i eigenvectors failed to converge. Their indices are stored in array IFAIL. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; /* System generated locals */ integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer indd, inde; static doublereal anrm; static integer imax; static doublereal rmin, rmax; static integer lopt, itmp1, i__, j, indee; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); static doublereal sigma; extern logical lsame_(char *, char *); static integer iinfo; static char order[1]; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), dswap_(integer *, doublereal *, integer *, doublereal *, integer *); static logical lower, wantz; static integer nb, jj; extern doublereal dlamch_(char *); static logical alleig, indeig; static integer iscale, indibl; static logical valeig; extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); static doublereal safmin; extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern /* Subroutine */ int xerbla_(char *, integer *); static doublereal abstll, bignum; static integer indtau, indisp; extern /* Subroutine */ int dstein_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *), dsterf_(integer *, doublereal *, doublereal *, integer *); static integer indiwo, indwkn; extern doublereal dlansy_(char *, char *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int dstebz_(char *, char *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); static integer indwrk; extern /* Subroutine */ int dorgtr_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), dsteqr_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *), dormtr_(char *, char *, char *, integer *, integer *, doublereal * , integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); static integer llwrkn, llwork, nsplit; static doublereal smlnum; extern /* Subroutine */ int dsytrd_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *); static integer lwkopt; static logical lquery; static doublereal eps, vll, vuu, tmp1; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --work; --iwork; --ifail; /* Function Body */ lower = lsame_(uplo, "L"); wantz = lsame_(jobz, "V"); alleig = lsame_(range, "A"); valeig = lsame_(range, "V"); indeig = lsame_(range, "I"); lquery = *lwork == -1; *info = 0; if (! (wantz || lsame_(jobz, "N"))) { *info = -1; } else if (! (alleig || valeig || indeig)) { *info = -2; } else if (! (lower || lsame_(uplo, "U"))) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else { if (valeig) { if (*n > 0 && *vu <= *vl) { *info = -8; } } else if (indeig) { if (*il < 1 || *il > max(1,*n)) { *info = -9; } else if (*iu < min(*n,*il) || *iu > *n) { *info = -10; } } } if (*info == 0) { if (*ldz < 1 || wantz && *ldz < *n) { *info = -15; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 1, i__2 = *n << 3; if (*lwork < max(i__1,i__2) && ! lquery) { *info = -17; } } } if (*info == 0) { nb = ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); /* Computing MAX */ i__1 = nb, i__2 = ilaenv_(&c__1, "DORMTR", uplo, n, &c_n1, &c_n1, & c_n1, (ftnlen)6, (ftnlen)1); nb = max(i__1,i__2); lwkopt = (nb + 3) * *n; work[1] = (doublereal) lwkopt; } if (*info != 0) { i__1 = -(*info); xerbla_("DSYEVX", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ *m = 0; if (*n == 0) { work[1] = 1.; return 0; } if (*n == 1) { work[1] = 7.; if (alleig || indeig) { *m = 1; w[1] = a_ref(1, 1); } else { if (*vl < a_ref(1, 1) && *vu >= a_ref(1, 1)) { *m = 1; w[1] = a_ref(1, 1); } } if (wantz) { z___ref(1, 1) = 1.; } return 0; } /* Get machine constants. */ safmin = dlamch_("Safe minimum"); eps = dlamch_("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = sqrt(smlnum); /* Computing MIN */ d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); rmax = min(d__1,d__2); /* Scale matrix to allowable range, if necessary. */ iscale = 0; abstll = *abstol; vll = *vl; vuu = *vu; anrm = dlansy_("M", uplo, n, &a[a_offset], lda, &work[1]); if (anrm > 0. && anrm < rmin) { iscale = 1; sigma = rmin / anrm; } else if (anrm > rmax) { iscale = 1; sigma = rmax / anrm; } if (iscale == 1) { if (lower) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n - j + 1; dscal_(&i__2, &sigma, &a_ref(j, j), &c__1); /* L10: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { dscal_(&j, &sigma, &a_ref(1, j), &c__1); /* L20: */ } } if (*abstol > 0.) { abstll = *abstol * sigma; } if (valeig) { vll = *vl * sigma; vuu = *vu * sigma; } } /* Call DSYTRD to reduce symmetric matrix to tridiagonal form. */ indtau = 1; inde = indtau + *n; indd = inde + *n; indwrk = indd + *n; llwork = *lwork - indwrk + 1; dsytrd_(uplo, n, &a[a_offset], lda, &work[indd], &work[inde], &work[ indtau], &work[indwrk], &llwork, &iinfo); lopt = (integer) (*n * 3 + work[indwrk]); /* If all eigenvalues are desired and ABSTOL is less than or equal to zero, then call DSTERF or DORGTR and SSTEQR. If this fails for some eigenvalue, then try DSTEBZ. */ if ((alleig || indeig && *il == 1 && *iu == *n) && *abstol <= 0.) { dcopy_(n, &work[indd], &c__1, &w[1], &c__1); indee = indwrk + (*n << 1); if (! wantz) { i__1 = *n - 1; dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); dsterf_(n, &w[1], &work[indee], info); } else { dlacpy_("A", n, n, &a[a_offset], lda, &z__[z_offset], ldz); dorgtr_(uplo, n, &z__[z_offset], ldz, &work[indtau], &work[indwrk] , &llwork, &iinfo); i__1 = *n - 1; dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); dsteqr_(jobz, n, &w[1], &work[indee], &z__[z_offset], ldz, &work[ indwrk], info); if (*info == 0) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { ifail[i__] = 0; /* L30: */ } } } if (*info == 0) { *m = *n; goto L40; } *info = 0; } /* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. */ if (wantz) { *(unsigned char *)order = 'B'; } else { *(unsigned char *)order = 'E'; } indibl = 1; indisp = indibl + *n; indiwo = indisp + *n; dstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &work[indd], &work[ inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[ indwrk], &iwork[indiwo], info); if (wantz) { dstein_(n, &work[indd], &work[inde], m, &w[1], &iwork[indibl], &iwork[ indisp], &z__[z_offset], ldz, &work[indwrk], &iwork[indiwo], & ifail[1], info); /* Apply orthogonal matrix used in reduction to tridiagonal form to eigenvectors returned by DSTEIN. */ indwkn = inde; llwrkn = *lwork - indwkn + 1; dormtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau], &z__[ z_offset], ldz, &work[indwkn], &llwrkn, &iinfo); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ L40: if (iscale == 1) { if (*info == 0) { imax = *m; } else { imax = *info - 1; } d__1 = 1. / sigma; dscal_(&imax, &d__1, &w[1], &c__1); } /* If eigenvalues are not in order, then sort them, along with eigenvectors. */ if (wantz) { i__1 = *m - 1; for (j = 1; j <= i__1; ++j) { i__ = 0; tmp1 = w[j]; i__2 = *m; for (jj = j + 1; jj <= i__2; ++jj) { if (w[jj] < tmp1) { i__ = jj; tmp1 = w[jj]; } /* L50: */ } if (i__ != 0) { itmp1 = iwork[indibl + i__ - 1]; w[i__] = w[j]; iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; w[j] = tmp1; iwork[indibl + j - 1] = itmp1; dswap_(n, &z___ref(1, i__), &c__1, &z___ref(1, j), &c__1); if (*info != 0) { itmp1 = ifail[i__]; ifail[i__] = ifail[j]; ifail[j] = itmp1; } } /* L60: */ } } /* Set WORK(1) to optimal workspace size. */ work[1] = (doublereal) lwkopt; return 0; /* End of DSYEVX */ } /* dsyevx_ */
/* Subroutine */ int dlarrv_(integer *n, doublereal *d__, doublereal *l, integer *isplit, integer *m, doublereal *w, integer *iblock, doublereal *gersch, doublereal *tol, doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer iend, jblk; extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, integer *); static integer iter, temp[1], ktot; extern doublereal dnrm2_(integer *, doublereal *, integer *); static integer itmp1, itmp2, i__, j, k, p, q; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); static integer indld; static doublereal sigma; static integer ndone, iinfo, iindr; static doublereal resid; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); static integer nclus; extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static integer iindc1, iindc2; extern /* Subroutine */ int dlar1v_(integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *); static doublereal lambda; static integer im, in; extern doublereal dlamch_(char *); static integer ibegin, indgap, indlld; extern /* Subroutine */ int dlarrb_(integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *); static doublereal mingma; static integer oldien, oldncl; static doublereal relgap; extern /* Subroutine */ int dlarrf_(integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *), dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); static integer oldcls, ndepth, inderr, iindwk; extern /* Subroutine */ int dstein_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *); static logical mgscls; static integer lsbdpt, newcls, oldfst; static doublereal minrgp; static integer indwrk, oldlst; static doublereal reltol; static integer maxitr, newfrs, newftt; static doublereal mgstol; static integer nsplit; static doublereal nrminv, rqcorr; static integer newlst, newsiz; static doublereal gap, eps, ztz, tmp1; #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] /* -- LAPACK auxiliary routine (instru to count ops, version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Common block to return operation count Purpose ======= DLARRV computes the eigenvectors of the tridiagonal matrix T = L D L^T given L, D and the eigenvalues of L D L^T. The input eigenvalues should have high relative accuracy with respect to the entries of L and D. The desired accuracy of the output can be specified by the input parameter TOL. Arguments ========= N (input) INTEGER The order of the matrix. N >= 0. D (input/output) DOUBLE PRECISION array, dimension (N) On entry, the n diagonal elements of the diagonal matrix D. On exit, D may be overwritten. L (input/output) DOUBLE PRECISION array, dimension (N-1) On entry, the (n-1) subdiagonal elements of the unit bidiagonal matrix L in elements 1 to N-1 of L. L(N) need not be set. On exit, L is overwritten. ISPLIT (input) INTEGER array, dimension (N) The splitting points, at which T breaks up into submatrices. The first submatrix consists of rows/columns 1 to ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 through ISPLIT( 2 ), etc. TOL (input) DOUBLE PRECISION The absolute error tolerance for the eigenvalues/eigenvectors. Errors in the input eigenvalues must be bounded by TOL. The eigenvectors output have residual norms bounded by TOL, and the dot products between different eigenvectors are bounded by TOL. TOL must be at least N*EPS*|T|, where EPS is the machine precision and |T| is the 1-norm of the tridiagonal matrix. M (input) INTEGER The total number of eigenvalues found. 0 <= M <= N. If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. W (input) DOUBLE PRECISION array, dimension (N) The first M elements of W contain the eigenvalues for which eigenvectors are to be computed. The eigenvalues should be grouped by split-off block and ordered from smallest to largest within the block ( The output array W from DLARRE is expected here ). Errors in W must be bounded by TOL (see above). IBLOCK (input) INTEGER array, dimension (N) The submatrix indices associated with the corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to the first submatrix from the top, =2 if W(i) belongs to the second submatrix, etc. Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) If JOBZ = 'V', then if INFO = 0, the first M columns of Z contain the orthonormal eigenvectors of the matrix T corresponding to the selected eigenvalues, with the i-th column of Z holding the eigenvector associated with W(i). If JOBZ = 'N', then Z is not referenced. Note: the user must ensure that at least max(1,M) columns are supplied in the array Z; if RANGE = 'V', the exact value of M is not known in advance and an upper bound must be used. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= 1, and if JOBZ = 'V', LDZ >= max(1,N). ISUPPZ (output) INTEGER ARRAY, dimension ( 2*max(1,M) ) The support of the eigenvectors in Z, i.e., the indices indicating the nonzero elements in Z. The i-th eigenvector is nonzero only in elements ISUPPZ( 2*i-1 ) through ISUPPZ( 2*i ). WORK (workspace) DOUBLE PRECISION array, dimension (13*N) IWORK (workspace) INTEGER array, dimension (6*N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = 1, internal error in DLARRB if INFO = 2, internal error in DSTEIN Further Details =============== Based on contributions by Inderjit Dhillon, IBM Almaden, USA Osni Marques, LBNL/NERSC, USA Ken Stanley, Computer Science Division, University of California at Berkeley, USA ===================================================================== Test the input parameters. Parameter adjustments */ --d__; --l; --isplit; --w; --iblock; --gersch; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --isuppz; --work; --iwork; /* Function Body */ inderr = *n + 1; indld = *n << 1; indlld = *n * 3; indgap = *n << 2; indwrk = *n * 5 + 1; iindr = *n; iindc1 = *n << 1; iindc2 = *n * 3; iindwk = (*n << 2) + 1; eps = dlamch_("Precision"); i__1 = *n << 1; for (i__ = 1; i__ <= i__1; ++i__) { iwork[i__] = 0; /* L10: */ } latime_1.ops += (doublereal) (*m + 1); i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { work[inderr + i__ - 1] = eps * (d__1 = w[i__], abs(d__1)); /* L20: */ } dlaset_("Full", n, n, &c_b6, &c_b6, &z__[z_offset], ldz); mgstol = eps * 5.; nsplit = iblock[*m]; ibegin = 1; i__1 = nsplit; for (jblk = 1; jblk <= i__1; ++jblk) { iend = isplit[jblk]; /* Find the eigenvectors of the submatrix indexed IBEGIN through IEND. */ if (ibegin == iend) { z___ref(ibegin, ibegin) = 1.; isuppz[(ibegin << 1) - 1] = ibegin; isuppz[ibegin * 2] = ibegin; ibegin = iend + 1; goto L170; } oldien = ibegin - 1; in = iend - oldien; latime_1.ops += 1.; /* Computing MIN */ d__1 = .01, d__2 = 1. / (doublereal) in; reltol = min(d__1,d__2); im = in; dcopy_(&im, &w[ibegin], &c__1, &work[1], &c__1); latime_1.ops += (doublereal) (in - 1); i__2 = in - 1; for (i__ = 1; i__ <= i__2; ++i__) { work[indgap + i__] = work[i__ + 1] - work[i__]; /* L30: */ } /* Computing MAX */ d__2 = (d__1 = work[in], abs(d__1)); work[indgap + in] = max(d__2,eps); ndone = 0; ndepth = 0; lsbdpt = 1; nclus = 1; iwork[iindc1 + 1] = 1; iwork[iindc1 + 2] = in; /* While( NDONE.LT.IM ) do */ L40: if (ndone < im) { oldncl = nclus; nclus = 0; lsbdpt = 1 - lsbdpt; i__2 = oldncl; for (i__ = 1; i__ <= i__2; ++i__) { if (lsbdpt == 0) { oldcls = iindc1; newcls = iindc2; } else { oldcls = iindc2; newcls = iindc1; } /* If NDEPTH > 1, retrieve the relatively robust representation (RRR) and perform limited bisection (if necessary) to get approximate eigenvalues. */ j = oldcls + (i__ << 1); oldfst = iwork[j - 1]; oldlst = iwork[j]; if (ndepth > 0) { j = oldien + oldfst; dcopy_(&in, &z___ref(ibegin, j), &c__1, &d__[ibegin], & c__1); dcopy_(&in, &z___ref(ibegin, j + 1), &c__1, &l[ibegin], & c__1); sigma = l[iend]; } k = ibegin; latime_1.ops += (doublereal) (in - 1 << 1); i__3 = in - 1; for (j = 1; j <= i__3; ++j) { work[indld + j] = d__[k] * l[k]; work[indlld + j] = work[indld + j] * l[k]; ++k; /* L50: */ } if (ndepth > 0) { dlarrb_(&in, &d__[ibegin], &l[ibegin], &work[indld + 1], & work[indlld + 1], &oldfst, &oldlst, &sigma, & reltol, &work[1], &work[indgap + 1], &work[inderr] , &work[indwrk], &iwork[iindwk], &iinfo); if (iinfo != 0) { *info = 1; return 0; } } /* Classify eigenvalues of the current representation (RRR) as (i) isolated, (ii) loosely clustered or (iii) tightly clustered */ newfrs = oldfst; i__3 = oldlst; for (j = oldfst; j <= i__3; ++j) { latime_1.ops += 1.; if (j == oldlst || work[indgap + j] >= reltol * (d__1 = work[j], abs(d__1))) { newlst = j; } else { /* continue (to the next loop) */ latime_1.ops += 1.; relgap = work[indgap + j] / (d__1 = work[j], abs(d__1) ); if (j == newfrs) { minrgp = relgap; } else { minrgp = min(minrgp,relgap); } goto L140; } newsiz = newlst - newfrs + 1; maxitr = 10; newftt = oldien + newfrs; if (newsiz > 1) { mgscls = newsiz <= 20 && minrgp >= mgstol; if (! mgscls) { dlarrf_(&in, &d__[ibegin], &l[ibegin], &work[ indld + 1], &work[indlld + 1], &newfrs, & newlst, &work[1], &z___ref(ibegin, newftt) , &z___ref(ibegin, newftt + 1), &work[ indwrk], &iwork[iindwk], info); if (*info == 0) { ++nclus; k = newcls + (nclus << 1); iwork[k - 1] = newfrs; iwork[k] = newlst; } else { *info = 0; if (minrgp >= mgstol) { mgscls = TRUE_; } else { /* Call DSTEIN to process this tight cluster. This happens only if MINRGP <= MGSTOL and DLARRF returns INFO = 1. The latter means that a new RRR to "break" the cluster could not be found. */ work[indwrk] = d__[ibegin]; latime_1.ops += (doublereal) (in - 1); i__4 = in - 1; for (k = 1; k <= i__4; ++k) { work[indwrk + k] = d__[ibegin + k] + work[indlld + k]; /* L60: */ } i__4 = newsiz; for (k = 1; k <= i__4; ++k) { iwork[iindwk + k - 1] = 1; /* L70: */ } i__4 = newlst; for (k = newfrs; k <= i__4; ++k) { isuppz[(ibegin + k << 1) - 3] = 1; isuppz[(ibegin + k << 1) - 2] = in; /* L80: */ } temp[0] = in; dstein_(&in, &work[indwrk], &work[indld + 1], &newsiz, &work[newfrs], & iwork[iindwk], temp, &z___ref( ibegin, newftt), ldz, &work[ indwrk + in], &iwork[iindwk + in], &iwork[iindwk + (in << 1)], & iinfo); if (iinfo != 0) { *info = 2; return 0; } ndone += newsiz; } } } } else { mgscls = FALSE_; } if (newsiz == 1 || mgscls) { ktot = newftt; i__4 = newlst; for (k = newfrs; k <= i__4; ++k) { iter = 0; L90: lambda = work[k]; dlar1v_(&in, &c__1, &in, &lambda, &d__[ibegin], & l[ibegin], &work[indld + 1], &work[indlld + 1], &gersch[(oldien << 1) + 1], & z___ref(ibegin, ktot), &ztz, &mingma, & iwork[iindr + ktot], &isuppz[(ktot << 1) - 1], &work[indwrk]); latime_1.ops += 4.; tmp1 = 1. / ztz; nrminv = sqrt(tmp1); resid = abs(mingma) * nrminv; rqcorr = mingma * tmp1; if (k == in) { gap = work[indgap + k - 1]; } else if (k == 1) { gap = work[indgap + k]; } else { /* Computing MIN */ d__1 = work[indgap + k - 1], d__2 = work[ indgap + k]; gap = min(d__1,d__2); } ++iter; latime_1.ops += 3.; if (resid > *tol * gap && abs(rqcorr) > eps * 4. * abs(lambda)) { latime_1.ops += 1.; work[k] = lambda + rqcorr; if (iter < maxitr) { goto L90; } } iwork[ktot] = 1; if (newsiz == 1) { ++ndone; } latime_1.ops += (doublereal) in; dscal_(&in, &nrminv, &z___ref(ibegin, ktot), & c__1); ++ktot; /* L100: */ } if (newsiz > 1) { itmp1 = isuppz[(newftt << 1) - 1]; itmp2 = isuppz[newftt * 2]; ktot = oldien + newlst; i__4 = ktot; for (p = newftt + 1; p <= i__4; ++p) { i__5 = p - 1; for (q = newftt; q <= i__5; ++q) { latime_1.ops += (doublereal) (in << 2); tmp1 = -ddot_(&in, &z___ref(ibegin, p), & c__1, &z___ref(ibegin, q), &c__1); daxpy_(&in, &tmp1, &z___ref(ibegin, q), & c__1, &z___ref(ibegin, p), &c__1); /* L110: */ } latime_1.ops += (doublereal) (in * 3 + 1); tmp1 = 1. / dnrm2_(&in, &z___ref(ibegin, p), & c__1); dscal_(&in, &tmp1, &z___ref(ibegin, p), &c__1) ; /* Computing MIN */ i__5 = itmp1, i__6 = isuppz[(p << 1) - 1]; itmp1 = min(i__5,i__6); /* Computing MAX */ i__5 = itmp2, i__6 = isuppz[p * 2]; itmp2 = max(i__5,i__6); /* L120: */ } i__4 = ktot; for (p = newftt; p <= i__4; ++p) { isuppz[(p << 1) - 1] = itmp1; isuppz[p * 2] = itmp2; /* L130: */ } ndone += newsiz; } } newfrs = j + 1; L140: ; } /* L150: */ } ++ndepth; goto L40; } j = ibegin << 1; i__2 = iend; for (i__ = ibegin; i__ <= i__2; ++i__) { isuppz[j - 1] += oldien; isuppz[j] += oldien; j += 2; /* L160: */ } ibegin = iend + 1; L170: ; } return 0; /* End of DLARRV */ } /* dlarrv_ */
/* Subroutine */ int sstedc_(char *compz, integer *n, real *d__, real *e, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= SSTEDC computes all eigenvalues and, optionally, eigenvectors of a symmetric tridiagonal matrix using the divide and conquer method. The eigenvectors of a full or band real symmetric matrix can also be found if SSYTRD or SSPTRD or SSBTRD has been used to reduce this matrix to tridiagonal form. This code makes very mild assumptions about floating point arithmetic. It will work on machines with a guard digit in add/subtract, or on those binary machines without guard digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. It could conceivably fail on hexadecimal or decimal machines without guard digits, but we know of none. See SLAED3 for details. Arguments ========= COMPZ (input) CHARACTER*1 = 'N': Compute eigenvalues only. = 'I': Compute eigenvectors of tridiagonal matrix also. = 'V': Compute eigenvectors of original dense symmetric matrix also. On entry, Z contains the orthogonal matrix used to reduce the original matrix to tridiagonal form. N (input) INTEGER The dimension of the symmetric tridiagonal matrix. N >= 0. D (input/output) REAL array, dimension (N) On entry, the diagonal elements of the tridiagonal matrix. On exit, if INFO = 0, the eigenvalues in ascending order. E (input/output) REAL array, dimension (N-1) On entry, the subdiagonal elements of the tridiagonal matrix. On exit, E has been destroyed. Z (input/output) REAL array, dimension (LDZ,N) On entry, if COMPZ = 'V', then Z contains the orthogonal matrix used in the reduction to tridiagonal form. On exit, if INFO = 0, then if COMPZ = 'V', Z contains the orthonormal eigenvectors of the original symmetric matrix, and if COMPZ = 'I', Z contains the orthonormal eigenvectors of the symmetric tridiagonal matrix. If COMPZ = 'N', then Z is not referenced. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= 1. If eigenvectors are desired, then LDZ >= max(1,N). WORK (workspace/output) REAL array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. If COMPZ = 'N' or N <= 1 then LWORK must be at least 1. If COMPZ = 'V' and N > 1 then LWORK must be at least ( 1 + 3*N + 2*N*lg N + 3*N**2 ), where lg( N ) = smallest integer k such that 2**k >= N. If COMPZ = 'I' and N > 1 then LWORK must be at least ( 1 + 4*N + N**2 ). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. IWORK (workspace/output) INTEGER array, dimension (LIWORK) On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. LIWORK (input) INTEGER The dimension of the array IWORK. If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1. If COMPZ = 'V' and N > 1 then LIWORK must be at least ( 6 + 6*N + 5*N*lg N ). If COMPZ = 'I' and N > 1 then LIWORK must be at least ( 3 + 5*N ). If LIWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the IWORK array, returns this value as the first entry of the IWORK array, and no error message related to LIWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. > 0: The algorithm failed to compute an eigenvalue while working on the submatrix lying in rows and columns INFO/(N+1) through mod(INFO,N+1). Further Details =============== Based on contributions by Jeff Rutter, Computer Science Division, University of California at Berkeley, USA Modified by Francoise Tisseur, University of Tennessee. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__2 = 2; static integer c__9 = 9; static integer c__0 = 0; static real c_b18 = 0.f; static real c_b19 = 1.f; static integer c__1 = 1; /* System generated locals */ integer z_dim1, z_offset, i__1, i__2; real r__1, r__2; /* Builtin functions */ double log(doublereal); integer pow_ii(integer *, integer *); double sqrt(doublereal); /* Local variables */ static real tiny; static integer i__, j, k, m; static real p; extern logical lsame_(char *, char *); extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static integer lwmin, start; extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, integer *), slaed0_(integer *, integer *, integer *, real *, real *, real *, integer *, real *, integer *, real *, integer *, integer *); static integer ii; extern doublereal slamch_(char *); extern /* Subroutine */ int xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, real *, integer *); static integer liwmin, icompz; static real orgnrm; extern doublereal slanst_(char *, integer *, real *, real *); extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *), slasrt_(char *, integer *, real *, integer *); static logical lquery; static integer smlsiz; extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *, real *, integer *, real *, integer *); static integer storez, strtrw, end, lgn; static real eps; #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] --d__; --e; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --work; --iwork; /* Function Body */ *info = 0; lquery = *lwork == -1 || *liwork == -1; if (lsame_(compz, "N")) { icompz = 0; } else if (lsame_(compz, "V")) { icompz = 1; } else if (lsame_(compz, "I")) { icompz = 2; } else { icompz = -1; } if (*n <= 1 || icompz <= 0) { liwmin = 1; lwmin = 1; } else { lgn = (integer) (log((real) (*n)) / log(2.f)); if (pow_ii(&c__2, &lgn) < *n) { ++lgn; } if (pow_ii(&c__2, &lgn) < *n) { ++lgn; } if (icompz == 1) { /* Computing 2nd power */ i__1 = *n; lwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3; liwmin = *n * 6 + 6 + *n * 5 * lgn; } else if (icompz == 2) { /* Computing 2nd power */ i__1 = *n; lwmin = (*n << 2) + 1 + i__1 * i__1; liwmin = *n * 5 + 3; } } if (icompz < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { *info = -6; } else if (*lwork < lwmin && ! lquery) { *info = -8; } else if (*liwork < liwmin && ! lquery) { *info = -10; } if (*info == 0) { work[1] = (real) lwmin; iwork[1] = liwmin; } if (*info != 0) { i__1 = -(*info); xerbla_("SSTEDC", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { if (icompz != 0) { z___ref(1, 1) = 1.f; } return 0; } smlsiz = ilaenv_(&c__9, "SSTEDC", " ", &c__0, &c__0, &c__0, &c__0, ( ftnlen)6, (ftnlen)1); /* If the following conditional clause is removed, then the routine will use the Divide and Conquer routine to compute only the eigenvalues, which requires (3N + 3N**2) real workspace and (2 + 5N + 2N lg(N)) integer workspace. Since on many architectures SSTERF is much faster than any other algorithm for finding eigenvalues only, it is used here as the default. If COMPZ = 'N', use SSTERF to compute the eigenvalues. */ if (icompz == 0) { ssterf_(n, &d__[1], &e[1], info); return 0; } /* If N is smaller than the minimum divide size (SMLSIZ+1), then solve the problem with another solver. */ if (*n <= smlsiz) { if (icompz == 0) { ssterf_(n, &d__[1], &e[1], info); return 0; } else if (icompz == 2) { ssteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], info); return 0; } else { ssteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], info); return 0; } } /* If COMPZ = 'V', the Z matrix must be stored elsewhere for later use. */ if (icompz == 1) { storez = *n * *n + 1; } else { storez = 1; } if (icompz == 2) { slaset_("Full", n, n, &c_b18, &c_b19, &z__[z_offset], ldz); } /* Scale. */ orgnrm = slanst_("M", n, &d__[1], &e[1]); if (orgnrm == 0.f) { return 0; } eps = slamch_("Epsilon"); start = 1; /* while ( START <= N ) */ L10: if (start <= *n) { /* Let END be the position of the next subdiagonal entry such that E( END ) <= TINY or END = N if no such subdiagonal exists. The matrix identified by the elements between START and END constitutes an independent sub-problem. */ end = start; L20: if (end < *n) { tiny = eps * sqrt((r__1 = d__[end], dabs(r__1))) * sqrt((r__2 = d__[end + 1], dabs(r__2))); if ((r__1 = e[end], dabs(r__1)) > tiny) { ++end; goto L20; } } /* (Sub) Problem determined. Compute its size and solve it. */ m = end - start + 1; if (m == 1) { start = end + 1; goto L10; } if (m > smlsiz) { *info = smlsiz; /* Scale. */ orgnrm = slanst_("M", &m, &d__[start], &e[start]); slascl_("G", &c__0, &c__0, &orgnrm, &c_b19, &m, &c__1, &d__[start] , &m, info); i__1 = m - 1; i__2 = m - 1; slascl_("G", &c__0, &c__0, &orgnrm, &c_b19, &i__1, &c__1, &e[ start], &i__2, info); if (icompz == 1) { strtrw = 1; } else { strtrw = start; } slaed0_(&icompz, n, &m, &d__[start], &e[start], &z___ref(strtrw, start), ldz, &work[1], n, &work[storez], &iwork[1], info); if (*info != 0) { *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m + 1) + start - 1; return 0; } /* Scale back. */ slascl_("G", &c__0, &c__0, &c_b19, &orgnrm, &m, &c__1, &d__[start] , &m, info); } else { if (icompz == 1) { /* Since QR won't update a Z matrix which is larger than the length of D, we must solve the sub-problem in a workspace and then multiply back into Z. */ ssteqr_("I", &m, &d__[start], &e[start], &work[1], &m, &work[ m * m + 1], info); slacpy_("A", n, &m, &z___ref(1, start), ldz, &work[storez], n); sgemm_("N", "N", n, &m, &m, &c_b19, &work[storez], ldz, &work[ 1], &m, &c_b18, &z___ref(1, start), ldz); } else if (icompz == 2) { ssteqr_("I", &m, &d__[start], &e[start], &z___ref(start, start), ldz, &work[1], info); } else { ssterf_(&m, &d__[start], &e[start], info); } if (*info != 0) { *info = start * (*n + 1) + end; return 0; } } start = end + 1; goto L10; } /* endwhile If the problem split any number of times, then the eigenvalues will not be properly ordered. Here we permute the eigenvalues (and the associated eigenvectors) into ascending order. */ if (m != *n) { if (icompz == 0) { /* Use Quick Sort */ slasrt_("I", n, &d__[1], info); } else { /* Use Selection Sort to minimize swaps of eigenvectors */ i__1 = *n; for (ii = 2; ii <= i__1; ++ii) { i__ = ii - 1; k = i__; p = d__[i__]; i__2 = *n; for (j = ii; j <= i__2; ++j) { if (d__[j] < p) { k = j; p = d__[j]; } /* L30: */ } if (k != i__) { d__[k] = d__[i__]; d__[i__] = p; sswap_(n, &z___ref(1, i__), &c__1, &z___ref(1, k), &c__1); } /* L40: */ } } } work[1] = (real) lwmin; iwork[1] = liwmin; return 0; /* End of SSTEDC */ } /* sstedc_ */
/* Subroutine */ int cstegr_(char *jobz, char *range, integer *n, real *d__, real *e, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, complex *z__, integer *ldz, integer *isuppz, real *work, integer *lwork, integer *iwork, integer *liwork, integer * info) { /* System generated locals */ integer z_dim1, z_offset, i__1, i__2; real r__1, r__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer iend; static real rmin, rmax; static integer itmp; static real tnrm; static integer i__, j; static real scale; extern logical lsame_(char *, char *); static integer iinfo; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), cswap_(integer *, complex *, integer *, complex *, integer *); static integer lwmin; static logical wantz; static integer jj; static logical alleig, indeig; static integer ibegin, iindbl; static logical valeig; extern doublereal slamch_(char *); extern /* Subroutine */ int claset_(char *, integer *, integer *, complex *, complex *, complex *, integer *); static real safmin; extern /* Subroutine */ int xerbla_(char *, integer *); static real bignum; static integer iindwk, indgrs, indwof; extern /* Subroutine */ int clarrv_(integer *, real *, real *, integer *, integer *, real *, integer *, real *, real *, complex *, integer * , integer *, real *, integer *, integer *), slarre_(integer *, real *, real *, real *, integer *, integer *, integer *, real *, real *, real *, real *, integer *); static real thresh; static integer iinspl, indwrk, liwmin; extern doublereal slanst_(char *, integer *, real *, real *); static integer nsplit; static real smlnum; static logical lquery; static real eps, tol, tmp; #define z___subscr(a_1,a_2) (a_2)*z_dim1 + a_1 #define z___ref(a_1,a_2) z__[z___subscr(a_1,a_2)] /* -- LAPACK computational routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1999 Purpose ======= CSTEGR computes selected eigenvalues and, optionally, eigenvectors of a real symmetric tridiagonal matrix T. Eigenvalues and eigenvectors can be selected by specifying either a range of values or a range of indices for the desired eigenvalues. The eigenvalues are computed by the dqds algorithm, while orthogonal eigenvectors are computed from various ``good'' L D L^T representations (also known as Relatively Robust Representations). Gram-Schmidt orthogonalization is avoided as far as possible. More specifically, the various steps of the algorithm are as follows. For the i-th unreduced block of T, (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T is a relatively robust representation, (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high relative accuracy by the dqds algorithm, (c) If there is a cluster of close eigenvalues, "choose" sigma_i close to the cluster, and go to step (a), (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, compute the corresponding eigenvector by forming a rank-revealing twisted factorization. The desired accuracy of the output can be specified by the input parameter ABSTOL. For more details, see "A new O(n^2) algorithm for the symmetric tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, Computer Science Division Technical Report No. UCB/CSD-97-971, UC Berkeley, May 1997. Note 1 : Currently CSTEGR is only set up to find ALL the n eigenvalues and eigenvectors of T in O(n^2) time Note 2 : Currently the routine CSTEIN is called when an appropriate sigma_i cannot be chosen in step (c) above. CSTEIN invokes modified Gram-Schmidt when eigenvalues are close. Note 3 : CSTEGR works only on machines which follow ieee-754 floating-point standard in their handling of infinities and NaNs. Normal execution of CSTEGR may create NaNs and infinities and hence may abort due to a floating point exception in environments which do not conform to the ieee standard. Arguments ========= JOBZ (input) CHARACTER*1 = 'N': Compute eigenvalues only; = 'V': Compute eigenvalues and eigenvectors. RANGE (input) CHARACTER*1 = 'A': all eigenvalues will be found. = 'V': all eigenvalues in the half-open interval (VL,VU] will be found. = 'I': the IL-th through IU-th eigenvalues will be found. ********* Only RANGE = 'A' is currently supported ********************* N (input) INTEGER The order of the matrix. N >= 0. D (input/output) REAL array, dimension (N) On entry, the n diagonal elements of the tridiagonal matrix T. On exit, D is overwritten. E (input/output) REAL array, dimension (N) On entry, the (n-1) subdiagonal elements of the tridiagonal matrix T in elements 1 to N-1 of E; E(N) need not be set. On exit, E is overwritten. VL (input) REAL VU (input) REAL If RANGE='V', the lower and upper bounds of the interval to be searched for eigenvalues. VL < VU. Not referenced if RANGE = 'A' or 'I'. IL (input) INTEGER IU (input) INTEGER If RANGE='I', the indices (in ascending order) of the smallest and largest eigenvalues to be returned. 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. Not referenced if RANGE = 'A' or 'V'. ABSTOL (input) REAL The absolute error tolerance for the eigenvalues/eigenvectors. IF JOBZ = 'V', the eigenvalues and eigenvectors output have residual norms bounded by ABSTOL, and the dot products between different eigenvectors are bounded by ABSTOL. If ABSTOL is less than N*EPS*|T|, then N*EPS*|T| will be used in its place, where EPS is the machine precision and |T| is the 1-norm of the tridiagonal matrix. The eigenvalues are computed to an accuracy of EPS*|T| irrespective of ABSTOL. If high relative accuracy is important, set ABSTOL to DLAMCH( 'Safe minimum' ). See Barlow and Demmel "Computing Accurate Eigensystems of Scaled Diagonally Dominant Matrices", LAPACK Working Note #7 for a discussion of which matrices define their eigenvalues to high relative accuracy. M (output) INTEGER The total number of eigenvalues found. 0 <= M <= N. If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. W (output) REAL array, dimension (N) The first M elements contain the selected eigenvalues in ascending order. Z (output) COMPLEX array, dimension (LDZ, max(1,M) ) If JOBZ = 'V', then if INFO = 0, the first M columns of Z contain the orthonormal eigenvectors of the matrix T corresponding to the selected eigenvalues, with the i-th column of Z holding the eigenvector associated with W(i). If JOBZ = 'N', then Z is not referenced. Note: the user must ensure that at least max(1,M) columns are supplied in the array Z; if RANGE = 'V', the exact value of M is not known in advance and an upper bound must be used. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= 1, and if JOBZ = 'V', LDZ >= max(1,N). ISUPPZ (output) INTEGER ARRAY, dimension ( 2*max(1,M) ) The support of the eigenvectors in Z, i.e., the indices indicating the nonzero elements in Z. The i-th eigenvector is nonzero only in elements ISUPPZ( 2*i-1 ) through ISUPPZ( 2*i ). WORK (workspace/output) REAL array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal (and minimal) LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,18*N) If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. IWORK (workspace/output) INTEGER array, dimension (LIWORK) On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. LIWORK (input) INTEGER The dimension of the array IWORK. LIWORK >= max(1,10*N) If LIWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the IWORK array, returns this value as the first entry of the IWORK array, and no error message related to LIWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = 1, internal error in SLARRE, if INFO = 2, internal error in CLARRV. Further Details =============== Based on contributions by Inderjit Dhillon, IBM Almaden, USA Osni Marques, LBNL/NERSC, USA Ken Stanley, Computer Science Division, University of California at Berkeley, USA ===================================================================== Test the input parameters. Parameter adjustments */ --d__; --e; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --isuppz; --work; --iwork; /* Function Body */ wantz = lsame_(jobz, "V"); alleig = lsame_(range, "A"); valeig = lsame_(range, "V"); indeig = lsame_(range, "I"); lquery = *lwork == -1 || *liwork == -1; lwmin = *n * 18; liwmin = *n * 10; *info = 0; if (! (wantz || lsame_(jobz, "N"))) { *info = -1; } else if (! (alleig || valeig || indeig)) { *info = -2; /* The following two lines need to be removed once the RANGE = 'V' and RANGE = 'I' options are provided. */ } else if (valeig || indeig) { *info = -2; } else if (*n < 0) { *info = -3; } else if (valeig && *n > 0 && *vu <= *vl) { *info = -7; } else if (indeig && *il < 1) { *info = -8; /* The following change should be made in DSTEVX also, otherwise IL can be specified as N+1 and IU as N. ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN */ } else if (indeig && (*iu < *il || *iu > *n)) { *info = -9; } else if (*ldz < 1 || wantz && *ldz < *n) { *info = -14; } else if (*lwork < lwmin && ! lquery) { *info = -17; } else if (*liwork < liwmin && ! lquery) { *info = -19; } if (*info == 0) { work[1] = (real) lwmin; iwork[1] = liwmin; } if (*info != 0) { i__1 = -(*info); xerbla_("CSTEGR", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ *m = 0; if (*n == 0) { return 0; } if (*n == 1) { if (alleig || indeig) { *m = 1; w[1] = d__[1]; } else { if (*vl < d__[1] && *vu >= d__[1]) { *m = 1; w[1] = d__[1]; } } if (wantz) { i__1 = z___subscr(1, 1); z__[i__1].r = 1.f, z__[i__1].i = 0.f; } return 0; } /* Get machine constants. */ safmin = slamch_("Safe minimum"); eps = slamch_("Precision"); smlnum = safmin / eps; bignum = 1.f / smlnum; rmin = sqrt(smlnum); /* Computing MIN */ r__1 = sqrt(bignum), r__2 = 1.f / sqrt(sqrt(safmin)); rmax = dmin(r__1,r__2); /* Scale matrix to allowable range, if necessary. */ scale = 1.f; tnrm = slanst_("M", n, &d__[1], &e[1]); if (tnrm > 0.f && tnrm < rmin) { scale = rmin / tnrm; } else if (tnrm > rmax) { scale = rmax / tnrm; } if (scale != 1.f) { sscal_(n, &scale, &d__[1], &c__1); i__1 = *n - 1; sscal_(&i__1, &scale, &e[1], &c__1); tnrm *= scale; } indgrs = 1; indwof = (*n << 1) + 1; indwrk = *n * 3 + 1; iinspl = 1; iindbl = *n + 1; iindwk = (*n << 1) + 1; claset_("Full", n, n, &c_b1, &c_b1, &z__[z_offset], ldz); /* Compute the desired eigenvalues of the tridiagonal after splitting into smaller subblocks if the corresponding of-diagonal elements are small */ thresh = eps * tnrm; slarre_(n, &d__[1], &e[1], &thresh, &nsplit, &iwork[iinspl], m, &w[1], & work[indwof], &work[indgrs], &work[indwrk], &iinfo); if (iinfo != 0) { *info = 1; return 0; } if (wantz) { /* Compute the desired eigenvectors corresponding to the computed eigenvalues Computing MAX */ r__1 = *abstol, r__2 = (real) (*n) * thresh; tol = dmax(r__1,r__2); ibegin = 1; i__1 = nsplit; for (i__ = 1; i__ <= i__1; ++i__) { iend = iwork[iinspl + i__ - 1]; i__2 = iend; for (j = ibegin; j <= i__2; ++j) { iwork[iindbl + j - 1] = i__; /* L10: */ } ibegin = iend + 1; /* L20: */ } clarrv_(n, &d__[1], &e[1], &iwork[iinspl], m, &w[1], &iwork[iindbl], & work[indgrs], &tol, &z__[z_offset], ldz, &isuppz[1], &work[ indwrk], &iwork[iindwk], &iinfo); if (iinfo != 0) { *info = 2; return 0; } } ibegin = 1; i__1 = nsplit; for (i__ = 1; i__ <= i__1; ++i__) { iend = iwork[iinspl + i__ - 1]; i__2 = iend; for (j = ibegin; j <= i__2; ++j) { w[j] += work[indwof + i__ - 1]; /* L30: */ } ibegin = iend + 1; /* L40: */ } /* If matrix was scaled, then rescale eigenvalues appropriately. */ if (scale != 1.f) { r__1 = 1.f / scale; sscal_(m, &r__1, &w[1], &c__1); } /* If eigenvalues are not in order, then sort them, along with eigenvectors. */ if (nsplit > 1) { i__1 = *m - 1; for (j = 1; j <= i__1; ++j) { i__ = 0; tmp = w[j]; i__2 = *m; for (jj = j + 1; jj <= i__2; ++jj) { if (w[jj] < tmp) { i__ = jj; tmp = w[jj]; } /* L50: */ } if (i__ != 0) { w[i__] = w[j]; w[j] = tmp; if (wantz) { cswap_(n, &z___ref(1, i__), &c__1, &z___ref(1, j), &c__1); itmp = isuppz[(i__ << 1) - 1]; isuppz[(i__ << 1) - 1] = isuppz[(j << 1) - 1]; isuppz[(j << 1) - 1] = itmp; itmp = isuppz[i__ * 2]; isuppz[i__ * 2] = isuppz[j * 2]; isuppz[j * 2] = itmp; } } /* L60: */ } } work[1] = (real) lwmin; iwork[1] = liwmin; return 0; /* End of CSTEGR */ } /* cstegr_ */
/* Subroutine */ int dhseqr_(char *job, char *compz, integer *n, integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= DHSEQR computes the eigenvalues of a real upper Hessenberg matrix H and, optionally, the matrices T and Z from the Schur decomposition H = Z T Z**T, where T is an upper quasi-triangular matrix (the Schur form), and Z is the orthogonal matrix of Schur vectors. Optionally Z may be postmultiplied into an input orthogonal matrix Q, so that this routine can give the Schur factorization of a matrix A which has been reduced to the Hessenberg form H by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. Arguments ========= JOB (input) CHARACTER*1 = 'E': compute eigenvalues only; = 'S': compute eigenvalues and the Schur form T. COMPZ (input) CHARACTER*1 = 'N': no Schur vectors are computed; = 'I': Z is initialized to the unit matrix and the matrix Z of Schur vectors of H is returned; = 'V': Z must contain an orthogonal matrix Q on entry, and the product Q*Z is returned. N (input) INTEGER The order of the matrix H. N >= 0. ILO (input) INTEGER IHI (input) INTEGER It is assumed that H is already upper triangular in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally set by a previous call to DGEBAL, and then passed to SGEHRD when the matrix output by DGEBAL is reduced to Hessenberg form. Otherwise ILO and IHI should be set to 1 and N respectively. 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. H (input/output) DOUBLE PRECISION array, dimension (LDH,N) On entry, the upper Hessenberg matrix H. On exit, if JOB = 'S', H contains the upper quasi-triangular matrix T from the Schur decomposition (the Schur form); 2-by-2 diagonal blocks (corresponding to complex conjugate pairs of eigenvalues) are returned in standard form, with H(i,i) = H(i+1,i+1) and H(i+1,i)*H(i,i+1) < 0. If JOB = 'E', the contents of H are unspecified on exit. LDH (input) INTEGER The leading dimension of the array H. LDH >= max(1,N). WR (output) DOUBLE PRECISION array, dimension (N) WI (output) DOUBLE PRECISION array, dimension (N) The real and imaginary parts, respectively, of the computed eigenvalues. If two eigenvalues are computed as a complex conjugate pair, they are stored in consecutive elements of WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and WI(i+1) < 0. If JOB = 'S', the eigenvalues are stored in the same order as on the diagonal of the Schur form returned in H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal block, WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i). Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) If COMPZ = 'N': Z is not referenced. If COMPZ = 'I': on entry, Z need not be set, and on exit, Z contains the orthogonal matrix Z of the Schur vectors of H. If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q, which is assumed to be equal to the unit matrix except for the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z. Normally Q is the orthogonal matrix generated by DORGHR after the call to DGEHRD which formed the Hessenberg matrix H. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise. WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,N). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, DHSEQR failed to compute all of the eigenvalues in a total of 30*(IHI-ILO+1) iterations; elements 1:ilo-1 and i+1:n of WR and WI contain those eigenvalues which have been successfully computed. ===================================================================== Decode and test the input parameters Parameter adjustments */ /* Table of constant values */ static doublereal c_b9 = 0.; static doublereal c_b10 = 1.; static integer c__4 = 4; static integer c_n1 = -1; static integer c__2 = 2; static integer c__8 = 8; static integer c__15 = 15; static logical c_false = FALSE_; static integer c__1 = 1; /* System generated locals */ address a__1[2]; integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3[2], i__4, i__5; doublereal d__1, d__2; char ch__1[2]; /* Builtin functions Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); /* Local variables */ static integer maxb; static doublereal absw; static integer ierr; static doublereal unfl, temp, ovfl; static integer i__, j, k, l; static doublereal s[225] /* was [15][15] */, v[16]; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); extern logical lsame_(char *, char *); extern /* Subroutine */ int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); static integer itemp; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); static integer i1, i2; static logical initz, wantt, wantz; extern doublereal dlapy2_(doublereal *, doublereal *); extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); static integer ii, nh; extern doublereal dlamch_(char *); extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *); static integer nr, ns; extern integer idamax_(integer *, doublereal *, integer *); static integer nv; extern doublereal dlanhs_(char *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int dlahqr_(logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); static doublereal vv[16]; extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern /* Subroutine */ int dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), dlarfx_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *), xerbla_(char *, integer *); static doublereal smlnum; static logical lquery; static integer itn; static doublereal tau; static integer its; static doublereal ulp, tst1; #define h___ref(a_1,a_2) h__[(a_2)*h_dim1 + a_1] #define s_ref(a_1,a_2) s[(a_2)*15 + a_1 - 16] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] h_dim1 = *ldh; h_offset = 1 + h_dim1 * 1; h__ -= h_offset; --wr; --wi; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --work; /* Function Body */ wantt = lsame_(job, "S"); initz = lsame_(compz, "I"); wantz = initz || lsame_(compz, "V"); *info = 0; work[1] = (doublereal) max(1,*n); lquery = *lwork == -1; if (! lsame_(job, "E") && ! wantt) { *info = -1; } else if (! lsame_(compz, "N") && ! wantz) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ilo < 1 || *ilo > max(1,*n)) { *info = -4; } else if (*ihi < min(*ilo,*n) || *ihi > *n) { *info = -5; } else if (*ldh < max(1,*n)) { *info = -7; } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) { *info = -11; } else if (*lwork < max(1,*n) && ! lquery) { *info = -13; } if (*info != 0) { i__1 = -(*info); xerbla_("DHSEQR", &i__1); return 0; } else if (lquery) { return 0; } /* Initialize Z, if necessary */ if (initz) { dlaset_("Full", n, n, &c_b9, &c_b10, &z__[z_offset], ldz); } /* Store the eigenvalues isolated by DGEBAL. */ i__1 = *ilo - 1; for (i__ = 1; i__ <= i__1; ++i__) { wr[i__] = h___ref(i__, i__); wi[i__] = 0.; /* L10: */ } i__1 = *n; for (i__ = *ihi + 1; i__ <= i__1; ++i__) { wr[i__] = h___ref(i__, i__); wi[i__] = 0.; /* L20: */ } /* Quick return if possible. */ if (*n == 0) { return 0; } if (*ilo == *ihi) { wr[*ilo] = h___ref(*ilo, *ilo); wi[*ilo] = 0.; return 0; } /* Set rows and columns ILO to IHI to zero below the first subdiagonal. */ i__1 = *ihi - 2; for (j = *ilo; j <= i__1; ++j) { i__2 = *n; for (i__ = j + 2; i__ <= i__2; ++i__) { h___ref(i__, j) = 0.; /* L30: */ } /* L40: */ } nh = *ihi - *ilo + 1; /* Determine the order of the multi-shift QR algorithm to be used. Writing concatenation */ i__3[0] = 1, a__1[0] = job; i__3[1] = 1, a__1[1] = compz; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); ns = ilaenv_(&c__4, "DHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, ( ftnlen)2); /* Writing concatenation */ i__3[0] = 1, a__1[0] = job; i__3[1] = 1, a__1[1] = compz; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); maxb = ilaenv_(&c__8, "DHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, ( ftnlen)2); if (ns <= 2 || ns > nh || maxb >= nh) { /* Use the standard double-shift algorithm */ dlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[ 1], ilo, ihi, &z__[z_offset], ldz, info); return 0; } maxb = max(3,maxb); /* Computing MIN */ i__1 = min(ns,maxb); ns = min(i__1,15); /* Now 2 < NS <= MAXB < NH. Set machine-dependent constants for the stopping criterion. If norm(H) <= sqrt(OVFL), overflow should not occur. */ unfl = dlamch_("Safe minimum"); ovfl = 1. / unfl; dlabad_(&unfl, &ovfl); ulp = dlamch_("Precision"); smlnum = unfl * (nh / ulp); /* I1 and I2 are the indices of the first row and last column of H to which transformations must be applied. If eigenvalues only are being computed, I1 and I2 are set inside the main loop. */ if (wantt) { i1 = 1; i2 = *n; } /* ITN is the total number of multiple-shift QR iterations allowed. */ itn = nh * 30; /* The main loop begins here. I is the loop index and decreases from IHI to ILO in steps of at most MAXB. Each iteration of the loop works with the active submatrix in rows and columns L to I. Eigenvalues I+1 to IHI have already converged. Either L = ILO or H(L,L-1) is negligible so that the matrix splits. */ i__ = *ihi; L50: l = *ilo; if (i__ < *ilo) { goto L170; } /* Perform multiple-shift QR iterations on rows and columns ILO to I until a submatrix of order at most MAXB splits off at the bottom because a subdiagonal element has become negligible. */ i__1 = itn; for (its = 0; its <= i__1; ++its) { /* Look for a single small subdiagonal element. */ i__2 = l + 1; for (k = i__; k >= i__2; --k) { tst1 = (d__1 = h___ref(k - 1, k - 1), abs(d__1)) + (d__2 = h___ref(k, k), abs(d__2)); if (tst1 == 0.) { i__4 = i__ - l + 1; tst1 = dlanhs_("1", &i__4, &h___ref(l, l), ldh, &work[1]); } /* Computing MAX */ d__2 = ulp * tst1; if ((d__1 = h___ref(k, k - 1), abs(d__1)) <= max(d__2,smlnum)) { goto L70; } /* L60: */ } L70: l = k; if (l > *ilo) { /* H(L,L-1) is negligible. */ h___ref(l, l - 1) = 0.; } /* Exit from loop if a submatrix of order <= MAXB has split off. */ if (l >= i__ - maxb + 1) { goto L160; } /* Now the active submatrix is in rows and columns L to I. If eigenvalues only are being computed, only the active submatrix need be transformed. */ if (! wantt) { i1 = l; i2 = i__; } if (its == 20 || its == 30) { /* Exceptional shifts. */ i__2 = i__; for (ii = i__ - ns + 1; ii <= i__2; ++ii) { wr[ii] = ((d__1 = h___ref(ii, ii - 1), abs(d__1)) + (d__2 = h___ref(ii, ii), abs(d__2))) * 1.5; wi[ii] = 0.; /* L80: */ } } else { /* Use eigenvalues of trailing submatrix of order NS as shifts. */ dlacpy_("Full", &ns, &ns, &h___ref(i__ - ns + 1, i__ - ns + 1), ldh, s, &c__15); dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &wr[i__ - ns + 1], &wi[i__ - ns + 1], &c__1, &ns, &z__[z_offset], ldz, &ierr); if (ierr > 0) { /* If DLAHQR failed to compute all NS eigenvalues, use the unconverged diagonal elements as the remaining shifts. */ i__2 = ierr; for (ii = 1; ii <= i__2; ++ii) { wr[i__ - ns + ii] = s_ref(ii, ii); wi[i__ - ns + ii] = 0.; /* L90: */ } } } /* Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns)) where G is the Hessenberg submatrix H(L:I,L:I) and w is the vector of shifts (stored in WR and WI). The result is stored in the local array V. */ v[0] = 1.; i__2 = ns + 1; for (ii = 2; ii <= i__2; ++ii) { v[ii - 1] = 0.; /* L100: */ } nv = 1; i__2 = i__; for (j = i__ - ns + 1; j <= i__2; ++j) { if (wi[j] >= 0.) { if (wi[j] == 0.) { /* real shift */ i__4 = nv + 1; dcopy_(&i__4, v, &c__1, vv, &c__1); i__4 = nv + 1; d__1 = -wr[j]; dgemv_("No transpose", &i__4, &nv, &c_b10, &h___ref(l, l), ldh, vv, &c__1, &d__1, v, &c__1); ++nv; } else if (wi[j] > 0.) { /* complex conjugate pair of shifts */ i__4 = nv + 1; dcopy_(&i__4, v, &c__1, vv, &c__1); i__4 = nv + 1; d__1 = wr[j] * -2.; dgemv_("No transpose", &i__4, &nv, &c_b10, &h___ref(l, l), ldh, v, &c__1, &d__1, vv, &c__1); i__4 = nv + 1; itemp = idamax_(&i__4, vv, &c__1); /* Computing MAX */ d__2 = (d__1 = vv[itemp - 1], abs(d__1)); temp = 1. / max(d__2,smlnum); i__4 = nv + 1; dscal_(&i__4, &temp, vv, &c__1); absw = dlapy2_(&wr[j], &wi[j]); temp = temp * absw * absw; i__4 = nv + 2; i__5 = nv + 1; dgemv_("No transpose", &i__4, &i__5, &c_b10, &h___ref(l, l), ldh, vv, &c__1, &temp, v, &c__1); nv += 2; } /* Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero, reset it to the unit vector. */ itemp = idamax_(&nv, v, &c__1); temp = (d__1 = v[itemp - 1], abs(d__1)); if (temp == 0.) { v[0] = 1.; i__4 = nv; for (ii = 2; ii <= i__4; ++ii) { v[ii - 1] = 0.; /* L110: */ } } else { temp = max(temp,smlnum); d__1 = 1. / temp; dscal_(&nv, &d__1, v, &c__1); } } /* L120: */ } /* Multiple-shift QR step */ i__2 = i__ - 1; for (k = l; k <= i__2; ++k) { /* The first iteration of this loop determines a reflection G from the vector V and applies it from left and right to H, thus creating a nonzero bulge below the subdiagonal. Each subsequent iteration determines a reflection G to restore the Hessenberg form in the (K-1)th column, and thus chases the bulge one step toward the bottom of the active submatrix. NR is the order of G. Computing MIN */ i__4 = ns + 1, i__5 = i__ - k + 1; nr = min(i__4,i__5); if (k > l) { dcopy_(&nr, &h___ref(k, k - 1), &c__1, v, &c__1); } dlarfg_(&nr, v, &v[1], &c__1, &tau); if (k > l) { h___ref(k, k - 1) = v[0]; i__4 = i__; for (ii = k + 1; ii <= i__4; ++ii) { h___ref(ii, k - 1) = 0.; /* L130: */ } } v[0] = 1.; /* Apply G from the left to transform the rows of the matrix in columns K to I2. */ i__4 = i2 - k + 1; dlarfx_("Left", &nr, &i__4, v, &tau, &h___ref(k, k), ldh, &work[1] ); /* Apply G from the right to transform the columns of the matrix in rows I1 to min(K+NR,I). Computing MIN */ i__5 = k + nr; i__4 = min(i__5,i__) - i1 + 1; dlarfx_("Right", &i__4, &nr, v, &tau, &h___ref(i1, k), ldh, &work[ 1]); if (wantz) { /* Accumulate transformations in the matrix Z */ dlarfx_("Right", &nh, &nr, v, &tau, &z___ref(*ilo, k), ldz, & work[1]); } /* L140: */ } /* L150: */ } /* Failure to converge in remaining number of iterations */ *info = i__; return 0; L160: /* A submatrix of order <= MAXB in rows and columns L to I has split off. Use the double-shift QR algorithm to handle it. */ dlahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &wr[1], &wi[1], ilo, ihi, &z__[z_offset], ldz, info); if (*info > 0) { return 0; } /* Decrement number of remaining iterations, and return to start of the main loop with a new value of I. */ itn -= its; i__ = l - 1; goto L50; L170: work[1] = (doublereal) max(1,*n); return 0; /* End of DHSEQR */ } /* dhseqr_ */
/* Subroutine */ int cdrvgg_(integer *nsizes, integer *nn, integer *ntypes, logical *dotype, integer *iseed, real *thresh, real *thrshn, integer * nounit, complex *a, integer *lda, complex *b, complex *s, complex *t, complex *s2, complex *t2, complex *q, integer *ldq, complex *z__, complex *alpha1, complex *beta1, complex *alpha2, complex *beta2, complex *vl, complex *vr, complex *work, integer *lwork, real *rwork, real *result, integer *info) { /* Initialized data */ static integer kclass[26] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2, 2,2,2,3 }; static integer kbmagn[26] = { 1,1,1,1,1,1,1,1,3,2,3,2,2,3,1,1,1,1,1,1,1,3, 2,3,2,1 }; static integer ktrian[26] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1, 1,1,1,1 }; static logical lasign[26] = { FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_, TRUE_,FALSE_,TRUE_,TRUE_,FALSE_,FALSE_,TRUE_,TRUE_,TRUE_,FALSE_, TRUE_,FALSE_,FALSE_,FALSE_,TRUE_,TRUE_,TRUE_,TRUE_,TRUE_,FALSE_ }; static logical lbsign[26] = { FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_, FALSE_,TRUE_,FALSE_,FALSE_,TRUE_,TRUE_,FALSE_,FALSE_,TRUE_,FALSE_, TRUE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_, FALSE_ }; static integer kz1[6] = { 0,1,2,1,3,3 }; static integer kz2[6] = { 0,0,1,2,1,1 }; static integer kadd[6] = { 0,0,0,0,3,2 }; static integer katype[26] = { 0,1,0,1,2,3,4,1,4,4,1,1,4,4,4,2,4,5,8,7,9,4, 4,4,4,0 }; static integer kbtype[26] = { 0,0,1,1,2,-3,1,4,1,1,4,4,1,1,-4,2,-4,8,8,8, 8,8,8,8,8,0 }; static integer kazero[26] = { 1,1,1,1,1,1,2,1,2,2,1,1,2,2,3,1,3,5,5,5,5,3, 3,3,3,1 }; static integer kbzero[26] = { 1,1,1,1,1,1,1,2,1,1,2,2,1,1,4,1,4,6,6,6,6,4, 4,4,4,1 }; static integer kamagn[26] = { 1,1,1,1,1,1,1,1,2,3,2,3,2,3,1,1,1,1,1,1,1,2, 3,3,2,1 }; /* Format strings */ static char fmt_9999[] = "(\002 CDRVGG: \002,a,\002 returned INFO=\002,i" "6,\002.\002,/9x,\002N=\002,i6,\002, JTYPE=\002,i6,\002, ISEED=" "(\002,3(i5,\002,\002),i5,\002)\002)"; static char fmt_9998[] = "(\002 CDRVGG: \002,a,\002 Eigenvectors from" " \002,a,\002 incorrectly \002,\002normalized.\002,/\002 Bits of " "error=\002,0p,g10.3,\002,\002,9x,\002N=\002,i6,\002, JTYPE=\002," "i6,\002, ISEED=(\002,3(i5,\002,\002),i5,\002)\002)"; static char fmt_9997[] = "(/1x,a3,\002 -- Complex Generalized eigenvalue" " problem driver\002)"; static char fmt_9996[] = "(\002 Matrix types (see CDRVGG for details):" " \002)"; static char fmt_9995[] = "(\002 Special Matrices:\002,23x,\002(J'=transp" "osed Jordan block)\002,/\002 1=(0,0) 2=(I,0) 3=(0,I) 4=(I,I" ") 5=(J',J') \002,\0026=(diag(J',I), diag(I,J'))\002,/\002 Diag" "onal Matrices: ( \002,\002D=diag(0,1,2,...) )\002,/\002 7=(D," "I) 9=(large*D, small*I\002,\002) 11=(large*I, small*D) 13=(l" "arge*D, large*I)\002,/\002 8=(I,D) 10=(small*D, large*I) 12=" "(small*I, large*D) \002,\002 14=(small*D, small*I)\002,/\002 15" "=(D, reversed D)\002)"; static char fmt_9994[] = "(\002 Matrices Rotated by Random \002,a,\002 M" "atrices U, V:\002,/\002 16=Transposed Jordan Blocks " " 19=geometric \002,\002alpha, beta=0,1\002,/\002 17=arithm. alp" "ha&beta \002,\002 20=arithmetic alpha, beta=0," "1\002,/\002 18=clustered \002,\002alpha, beta=0,1 21" "=random alpha, beta=0,1\002,/\002 Large & Small Matrices:\002," "/\002 22=(large, small) \002,\00223=(small,large) 24=(smal" "l,small) 25=(large,large)\002,/\002 26=random O(1) matrices" ".\002)"; static char fmt_9993[] = "(/\002 Tests performed: (S is Schur, T is tri" "angular, \002,\002Q and Z are \002,a,\002,\002,/20x,\002l and r " "are the appropriate left and right\002,/19x,\002eigenvectors, re" "sp., a is alpha, b is beta, and\002,/19x,a,\002 means \002,a," "\002.)\002,/\002 1 = | A - Q S Z\002,a,\002 | / ( |A| n ulp ) " " 2 = | B - Q T Z\002,a,\002 | / ( |B| n ulp )\002,/\002 3 = | " "I - QQ\002,a,\002 | / ( n ulp ) 4 = | I - ZZ\002,a" ",\002 | / ( n ulp )\002,/\002 5 = difference between (alpha,beta" ") and diagonals of\002,\002 (S,T)\002,/\002 6 = max | ( b A - a " "B )\002,a,\002 l | / const. 7 = max | ( b A - a B ) r | / cons" "t.\002,/1x)"; static char fmt_9992[] = "(\002 Matrix order=\002,i5,\002, type=\002,i2" ",\002, seed=\002,4(i4,\002,\002),\002 result \002,i3,\002 is\002" ",0p,f8.2)"; static char fmt_9991[] = "(\002 Matrix order=\002,i5,\002, type=\002,i2" ",\002, seed=\002,4(i4,\002,\002),\002 result \002,i3,\002 is\002" ",1p,e10.3)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, s_dim1, s_offset, s2_dim1, s2_offset, t_dim1, t_offset, t2_dim1, t2_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9, i__10, i__11; real r__1, r__2, r__3, r__4, r__5, r__6, r__7, r__8, r__9, r__10, r__11, r__12, r__13, r__14, r__15, r__16; complex q__1, q__2, q__3, q__4; /* Builtin functions */ double r_sign(real *, real *), c_abs(complex *); void r_cnjg(complex *, complex *); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); double r_imag(complex *); /* Local variables */ static integer iadd, nmax; static real temp1, temp2; static integer j, n; static logical badnn; extern /* Subroutine */ int cgegs_(char *, char *, integer *, complex *, integer *, complex *, integer *, complex *, complex *, complex *, integer *, complex *, integer *, complex *, integer *, real *, integer *), cgegv_(char *, char *, integer *, complex *, integer *, complex *, integer *, complex *, complex *, complex *, integer *, complex *, integer *, complex *, integer *, real *, integer *), cget51_(integer *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, complex *, real *, real *), cget52_(logical *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, complex *, complex *, complex *, real *, real *); static real dumma[4]; static integer iinfo; static real rmagn[4]; static complex ctemp; static integer nmats, jsize, nerrs, i1, jtype, ntest, n1; extern /* Subroutine */ int clatm4_(integer *, integer *, integer *, integer *, logical *, real *, real *, real *, integer *, integer * , complex *, integer *), cunm2r_(char *, char *, integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, integer *); static integer jc, nb; extern /* Subroutine */ int slabad_(real *, real *); static integer in, jr; extern /* Subroutine */ int clarfg_(integer *, complex *, complex *, integer *, complex *); static integer ns; extern /* Complex */ VOID clarnd_(complex *, integer *, integer *); extern doublereal slamch_(char *); extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex *, integer *, complex *, integer *); static real safmin, safmax; static integer ioldsd[4]; extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern /* Subroutine */ int alasvm_(char *, integer *, integer *, integer *, integer *), claset_(char *, integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *); static real ulpinv; static integer lwkopt, mtypes, ntestt, nbz; static real ulp; /* Fortran I/O blocks */ static cilist io___43 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___44 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___47 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___50 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___51 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___52 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___53 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___54 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___55 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___56 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___57 = { 0, 0, 0, fmt_9991, 0 }; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define q_subscr(a_1,a_2) (a_2)*q_dim1 + a_1 #define q_ref(a_1,a_2) q[q_subscr(a_1,a_2)] #define s_subscr(a_1,a_2) (a_2)*s_dim1 + a_1 #define s_ref(a_1,a_2) s[s_subscr(a_1,a_2)] #define t_subscr(a_1,a_2) (a_2)*t_dim1 + a_1 #define t_ref(a_1,a_2) t[t_subscr(a_1,a_2)] #define z___subscr(a_1,a_2) (a_2)*z_dim1 + a_1 #define z___ref(a_1,a_2) z__[z___subscr(a_1,a_2)] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CDRVGG checks the nonsymmetric generalized eigenvalue driver routines. T T T CGEGS factors A and B as Q S Z and Q T Z , where means transpose, T is upper triangular, S is in generalized Schur form (upper triangular), and Q and Z are unitary. It also computes the generalized eigenvalues (alpha(1),beta(1)), ..., (alpha(n),beta(n)), where alpha(j)=S(j,j) and beta(j)=T(j,j) -- thus, w(j) = alpha(j)/beta(j) is a root of the generalized eigenvalue problem det( A - w(j) B ) = 0 and m(j) = beta(j)/alpha(j) is a root of the essentially equivalent problem det( m(j) A - B ) = 0 CGEGV computes the generalized eigenvalues (alpha(1),beta(1)), ..., (alpha(n),beta(n)), the matrix L whose columns contain the generalized left eigenvectors l, and the matrix R whose columns contain the generalized right eigenvectors r for the pair (A,B). When CDRVGG is called, a number of matrix "sizes" ("n's") and a number of matrix "types" are specified. For each size ("n") and each type of matrix, one matrix will be generated and used to test the nonsymmetric eigenroutines. For each matrix, 7 tests will be performed and compared with the threshhold THRESH: Results from CGEGS: H (1) | A - Q S Z | / ( |A| n ulp ) H (2) | B - Q T Z | / ( |B| n ulp ) H (3) | I - QQ | / ( n ulp ) H (4) | I - ZZ | / ( n ulp ) (5) maximum over j of D(j) where: |alpha(j) - S(j,j)| |beta(j) - T(j,j)| D(j) = ------------------------ + ----------------------- max(|alpha(j)|,|S(j,j)|) max(|beta(j)|,|T(j,j)|) Results from CGEGV: (6) max over all left eigenvalue/-vector pairs (beta/alpha,l) of | l**H * (beta A - alpha B) | / ( ulp max( |beta A|, |alpha B| ) ) where l**H is the conjugate tranpose of l. (7) max over all right eigenvalue/-vector pairs (beta/alpha,r) of | (beta A - alpha B) r | / ( ulp max( |beta A|, |alpha B| ) ) Test Matrices ---- -------- The sizes of the test matrices are specified by an array NN(1:NSIZES); the value of each element NN(j) specifies one size. The "types" are specified by a logical array DOTYPE( 1:NTYPES ); if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. Currently, the list of possible types is: (1) ( 0, 0 ) (a pair of zero matrices) (2) ( I, 0 ) (an identity and a zero matrix) (3) ( 0, I ) (an identity and a zero matrix) (4) ( I, I ) (a pair of identity matrices) t t (5) ( J , J ) (a pair of transposed Jordan blocks) t ( I 0 ) (6) ( X, Y ) where X = ( J 0 ) and Y = ( t ) ( 0 I ) ( 0 J ) and I is a k x k identity and J a (k+1)x(k+1) Jordan block; k=(N-1)/2 (7) ( D, I ) where D is diag( 0, 1,..., N-1 ) (a diagonal matrix with those diagonal entries.) (8) ( I, D ) (9) ( big*D, small*I ) where "big" is near overflow and small=1/big (10) ( small*D, big*I ) (11) ( big*I, small*D ) (12) ( small*I, big*D ) (13) ( big*D, big*I ) (14) ( small*D, small*I ) (15) ( D1, D2 ) where D1 is diag( 0, 0, 1, ..., N-3, 0 ) and D2 is diag( 0, N-3, N-4,..., 1, 0, 0 ) t t (16) Q ( J , J ) Z where Q and Z are random unitary matrices. (17) Q ( T1, T2 ) Z where T1 and T2 are upper triangular matrices with random O(1) entries above the diagonal and diagonal entries diag(T1) = ( 0, 0, 1, ..., N-3, 0 ) and diag(T2) = ( 0, N-3, N-4,..., 1, 0, 0 ) (18) Q ( T1, T2 ) Z diag(T1) = ( 0, 0, 1, 1, s, ..., s, 0 ) diag(T2) = ( 0, 1, 0, 1,..., 1, 0 ) s = machine precision. (19) Q ( T1, T2 ) Z diag(T1)=( 0,0,1,1, 1-d, ..., 1-(N-5)*d=s, 0 ) diag(T2) = ( 0, 1, 0, 1, ..., 1, 0 ) N-5 (20) Q ( T1, T2 ) Z diag(T1)=( 0, 0, 1, 1, a, ..., a =s, 0 ) diag(T2) = ( 0, 1, 0, 1, ..., 1, 0, 0 ) (21) Q ( T1, T2 ) Z diag(T1)=( 0, 0, 1, r1, r2, ..., r(N-4), 0 ) diag(T2) = ( 0, 1, 0, 1, ..., 1, 0, 0 ) where r1,..., r(N-4) are random. (22) Q ( big*T1, small*T2 ) Z diag(T1) = ( 0, 0, 1, ..., N-3, 0 ) diag(T2) = ( 0, 1, ..., 1, 0, 0 ) (23) Q ( small*T1, big*T2 ) Z diag(T1) = ( 0, 0, 1, ..., N-3, 0 ) diag(T2) = ( 0, 1, ..., 1, 0, 0 ) (24) Q ( small*T1, small*T2 ) Z diag(T1) = ( 0, 0, 1, ..., N-3, 0 ) diag(T2) = ( 0, 1, ..., 1, 0, 0 ) (25) Q ( big*T1, big*T2 ) Z diag(T1) = ( 0, 0, 1, ..., N-3, 0 ) diag(T2) = ( 0, 1, ..., 1, 0, 0 ) (26) Q ( T1, T2 ) Z where T1 and T2 are random upper-triangular matrices. Arguments ========= NSIZES (input) INTEGER The number of sizes of matrices to use. If it is zero, CDRVGG does nothing. It must be at least zero. NN (input) INTEGER array, dimension (NSIZES) An array containing the sizes to be used for the matrices. Zero values will be skipped. The values must be at least zero. NTYPES (input) INTEGER The number of elements in DOTYPE. If it is zero, CDRVGG does nothing. It must be at least zero. If it is MAXTYP+1 and NSIZES is 1, then an additional type, MAXTYP+1 is defined, which is to use whatever matrix is in A. This is only useful if DOTYPE(1:MAXTYP) is .FALSE. and DOTYPE(MAXTYP+1) is .TRUE. . DOTYPE (input) LOGICAL array, dimension (NTYPES) If DOTYPE(j) is .TRUE., then for each size in NN a matrix of that size and of type j will be generated. If NTYPES is smaller than the maximum number of types defined (PARAMETER MAXTYP), then types NTYPES+1 through MAXTYP will not be generated. If NTYPES is larger than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) will be ignored. ISEED (input/output) INTEGER array, dimension (4) On entry ISEED specifies the seed of the random number generator. The array elements should be between 0 and 4095; if not they will be reduced mod 4096. Also, ISEED(4) must be odd. The random number generator uses a linear congruential sequence limited to small integers, and so should produce machine independent random numbers. The values of ISEED are changed on exit, and can be used in the next call to CDRVGG to continue the same random number sequence. THRESH (input) REAL A test will count as "failed" if the "error", computed as described above, exceeds THRESH. Note that the error is scaled to be O(1), so THRESH should be a reasonably small multiple of 1, e.g., 10 or 100. In particular, it should not depend on the precision (single vs. double) or the size of the matrix. It must be at least zero. THRSHN (input) REAL Threshhold for reporting eigenvector normalization error. If the normalization of any eigenvector differs from 1 by more than THRSHN*ulp, then a special error message will be printed. (This is handled separately from the other tests, since only a compiler or programming error should cause an error message, at least if THRSHN is at least 5--10.) NOUNIT (input) INTEGER The FORTRAN unit number for printing out error messages (e.g., if a routine returns IINFO not equal to 0.) A (input/workspace) COMPLEX array, dimension (LDA, max(NN)) Used to hold the original A matrix. Used as input only if NTYPES=MAXTYP+1, DOTYPE(1:MAXTYP)=.FALSE., and DOTYPE(MAXTYP+1)=.TRUE. LDA (input) INTEGER The leading dimension of A, B, S, T, S2, and T2. It must be at least 1 and at least max( NN ). B (input/workspace) COMPLEX array, dimension (LDA, max(NN)) Used to hold the original B matrix. Used as input only if NTYPES=MAXTYP+1, DOTYPE(1:MAXTYP)=.FALSE., and DOTYPE(MAXTYP+1)=.TRUE. S (workspace) COMPLEX array, dimension (LDA, max(NN)) The upper triangular matrix computed from A by CGEGS. T (workspace) COMPLEX array, dimension (LDA, max(NN)) The upper triangular matrix computed from B by CGEGS. S2 (workspace) COMPLEX array, dimension (LDA, max(NN)) The matrix computed from A by CGEGV. This will be the Schur (upper triangular) form of some matrix related to A, but will not, in general, be the same as S. T2 (workspace) COMPLEX array, dimension (LDA, max(NN)) The matrix computed from B by CGEGV. This will be the Schur form of some matrix related to B, but will not, in general, be the same as T. Q (workspace) COMPLEX array, dimension (LDQ, max(NN)) The (left) unitary matrix computed by CGEGS. LDQ (input) INTEGER The leading dimension of Q, Z, VL, and VR. It must be at least 1 and at least max( NN ). Z (workspace) COMPLEX array, dimension (LDQ, max(NN)) The (right) unitary matrix computed by CGEGS. ALPHA1 (workspace) COMPLEX array, dimension (max(NN)) BETA1 (workspace) COMPLEX array, dimension (max(NN)) The generalized eigenvalues of (A,B) computed by CGEGS. ALPHA1(k) / BETA1(k) is the k-th generalized eigenvalue of the matrices in A and B. ALPHA2 (workspace) COMPLEX array, dimension (max(NN)) BETA2 (workspace) COMPLEX array, dimension (max(NN)) The generalized eigenvalues of (A,B) computed by CGEGV. ALPHA2(k) / BETA2(k) is the k-th generalized eigenvalue of the matrices in A and B. VL (workspace) COMPLEX array, dimension (LDQ, max(NN)) The (lower triangular) left eigenvector matrix for the matrices in A and B. VR (workspace) COMPLEX array, dimension (LDQ, max(NN)) The (upper triangular) right eigenvector matrix for the matrices in A and B. WORK (workspace) COMPLEX array, dimension (LWORK) LWORK (input) INTEGER The number of entries in WORK. This must be at least MAX( 2*N, N*(NB+1), (k+1)*(2*k+N+1) ), where "k" is the sum of the blocksize and number-of-shifts for CHGEQZ, and NB is the greatest of the blocksizes for CGEQRF, CUNMQR, and CUNGQR. (The blocksizes and the number-of-shifts are retrieved through calls to ILAENV.) RWORK (workspace) REAL array, dimension (8*N) RESULT (output) REAL array, dimension (7) The values computed by the tests described above. The values are currently limited to 1/ulp, to avoid overflow. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. > 0: A routine returned an error code. INFO is the absolute value of the INFO value returned. ===================================================================== Parameter adjustments */ --nn; --dotype; --iseed; t2_dim1 = *lda; t2_offset = 1 + t2_dim1 * 1; t2 -= t2_offset; s2_dim1 = *lda; s2_offset = 1 + s2_dim1 * 1; s2 -= s2_offset; t_dim1 = *lda; t_offset = 1 + t_dim1 * 1; t -= t_offset; s_dim1 = *lda; s_offset = 1 + s_dim1 * 1; s -= s_offset; b_dim1 = *lda; b_offset = 1 + b_dim1 * 1; b -= b_offset; a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; vr_dim1 = *ldq; vr_offset = 1 + vr_dim1 * 1; vr -= vr_offset; vl_dim1 = *ldq; vl_offset = 1 + vl_dim1 * 1; vl -= vl_offset; z_dim1 = *ldq; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; --alpha1; --beta1; --alpha2; --beta2; --work; --rwork; --result; /* Function Body Check for errors */ *info = 0; badnn = FALSE_; nmax = 1; i__1 = *nsizes; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = nmax, i__3 = nn[j]; nmax = max(i__2,i__3); if (nn[j] < 0) { badnn = TRUE_; } /* L10: */ } /* Maximum blocksize and shift -- we assume that blocksize and number of shifts are monotone increasing functions of N. Computing MAX */ i__1 = 1, i__2 = ilaenv_(&c__1, "CGEQRF", " ", &nmax, &nmax, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1), i__1 = max(i__1,i__2), i__2 = ilaenv_(& c__1, "CUNMQR", "LC", &nmax, &nmax, &nmax, &c_n1, (ftnlen)6, ( ftnlen)2), i__1 = max(i__1,i__2), i__2 = ilaenv_(&c__1, "CUNGQR", " ", &nmax, &nmax, &nmax, &c_n1, (ftnlen)6, (ftnlen)1); nb = max(i__1,i__2); nbz = ilaenv_(&c__1, "CHGEQZ", "SII", &nmax, &c__1, &nmax, &c__0, (ftnlen) 6, (ftnlen)3); ns = ilaenv_(&c__4, "CHGEQZ", "SII", &nmax, &c__1, &nmax, &c__0, (ftnlen) 6, (ftnlen)3); i1 = nbz + ns; /* Computing MAX */ i__1 = nmax << 1, i__2 = nmax * (nb + 1), i__1 = max(i__1,i__2), i__2 = (( i1 << 1) + nmax + 1) * (i1 + 1); lwkopt = max(i__1,i__2); /* Check for errors */ if (*nsizes < 0) { *info = -1; } else if (badnn) { *info = -2; } else if (*ntypes < 0) { *info = -3; } else if (*thresh < 0.f) { *info = -6; } else if (*lda <= 1 || *lda < nmax) { *info = -10; } else if (*ldq <= 1 || *ldq < nmax) { *info = -19; } else if (lwkopt > *lwork) { *info = -30; } if (*info != 0) { i__1 = -(*info); xerbla_("CDRVGG", &i__1); return 0; } /* Quick return if possible */ if (*nsizes == 0 || *ntypes == 0) { return 0; } ulp = slamch_("Precision"); safmin = slamch_("Safe minimum"); safmin /= ulp; safmax = 1.f / safmin; slabad_(&safmin, &safmax); ulpinv = 1.f / ulp; /* The values RMAGN(2:3) depend on N, see below. */ rmagn[0] = 0.f; rmagn[1] = 1.f; /* Loop over sizes, types */ ntestt = 0; nerrs = 0; nmats = 0; i__1 = *nsizes; for (jsize = 1; jsize <= i__1; ++jsize) { n = nn[jsize]; n1 = max(1,n); rmagn[2] = safmax * ulp / (real) n1; rmagn[3] = safmin * ulpinv * n1; if (*nsizes != 1) { mtypes = min(26,*ntypes); } else { mtypes = min(27,*ntypes); } i__2 = mtypes; for (jtype = 1; jtype <= i__2; ++jtype) { if (! dotype[jtype]) { goto L150; } ++nmats; ntest = 0; /* Save ISEED in case of an error. */ for (j = 1; j <= 4; ++j) { ioldsd[j - 1] = iseed[j]; /* L20: */ } /* Initialize RESULT */ for (j = 1; j <= 7; ++j) { result[j] = 0.f; /* L30: */ } /* Compute A and B Description of control parameters: KCLASS: =1 means w/o rotation, =2 means w/ rotation, =3 means random. KATYPE: the "type" to be passed to CLATM4 for computing A. KAZERO: the pattern of zeros on the diagonal for A: =1: ( xxx ), =2: (0, xxx ) =3: ( 0, 0, xxx, 0 ), =4: ( 0, xxx, 0, 0 ), =5: ( 0, 0, 1, xxx, 0 ), =6: ( 0, 1, 0, xxx, 0 ). (xxx means a string of non-zero entries.) KAMAGN: the magnitude of the matrix: =0: zero, =1: O(1), =2: large, =3: small. LASIGN: .TRUE. if the diagonal elements of A are to be multiplied by a random magnitude 1 number. KBTYPE, KBZERO, KBMAGN, IBSIGN: the same, but for B. KTRIAN: =0: don't fill in the upper triangle, =1: do. KZ1, KZ2, KADD: used to implement KAZERO and KBZERO. RMAGN: used to implement KAMAGN and KBMAGN. */ if (mtypes > 26) { goto L110; } iinfo = 0; if (kclass[jtype - 1] < 3) { /* Generate A (w/o rotation) */ if ((i__3 = katype[jtype - 1], abs(i__3)) == 3) { in = ((n - 1) / 2 << 1) + 1; if (in != n) { claset_("Full", &n, &n, &c_b1, &c_b1, &a[a_offset], lda); } } else { in = n; } clatm4_(&katype[jtype - 1], &in, &kz1[kazero[jtype - 1] - 1], &kz2[kazero[jtype - 1] - 1], &lasign[jtype - 1], & rmagn[kamagn[jtype - 1]], &ulp, &rmagn[ktrian[jtype - 1] * kamagn[jtype - 1]], &c__2, &iseed[1], &a[ a_offset], lda); iadd = kadd[kazero[jtype - 1] - 1]; if (iadd > 0 && iadd <= n) { i__3 = a_subscr(iadd, iadd); i__4 = kamagn[jtype - 1]; a[i__3].r = rmagn[i__4], a[i__3].i = 0.f; } /* Generate B (w/o rotation) */ if ((i__3 = kbtype[jtype - 1], abs(i__3)) == 3) { in = ((n - 1) / 2 << 1) + 1; if (in != n) { claset_("Full", &n, &n, &c_b1, &c_b1, &b[b_offset], lda); } } else { in = n; } clatm4_(&kbtype[jtype - 1], &in, &kz1[kbzero[jtype - 1] - 1], &kz2[kbzero[jtype - 1] - 1], &lbsign[jtype - 1], & rmagn[kbmagn[jtype - 1]], &c_b39, &rmagn[ktrian[jtype - 1] * kbmagn[jtype - 1]], &c__2, &iseed[1], &b[ b_offset], lda); iadd = kadd[kbzero[jtype - 1] - 1]; if (iadd != 0 && iadd <= n) { i__3 = b_subscr(iadd, iadd); i__4 = kbmagn[jtype - 1]; b[i__3].r = rmagn[i__4], b[i__3].i = 0.f; } if (kclass[jtype - 1] == 2 && n > 0) { /* Include rotations Generate Q, Z as Householder transformations times a diagonal matrix. */ i__3 = n - 1; for (jc = 1; jc <= i__3; ++jc) { i__4 = n; for (jr = jc; jr <= i__4; ++jr) { i__5 = q_subscr(jr, jc); clarnd_(&q__1, &c__3, &iseed[1]); q[i__5].r = q__1.r, q[i__5].i = q__1.i; i__5 = z___subscr(jr, jc); clarnd_(&q__1, &c__3, &iseed[1]); z__[i__5].r = q__1.r, z__[i__5].i = q__1.i; /* L40: */ } i__4 = n + 1 - jc; clarfg_(&i__4, &q_ref(jc, jc), &q_ref(jc + 1, jc), & c__1, &work[jc]); i__4 = (n << 1) + jc; i__5 = q_subscr(jc, jc); r__2 = q[i__5].r; r__1 = r_sign(&c_b39, &r__2); work[i__4].r = r__1, work[i__4].i = 0.f; i__4 = q_subscr(jc, jc); q[i__4].r = 1.f, q[i__4].i = 0.f; i__4 = n + 1 - jc; clarfg_(&i__4, &z___ref(jc, jc), &z___ref(jc + 1, jc), &c__1, &work[n + jc]); i__4 = n * 3 + jc; i__5 = z___subscr(jc, jc); r__2 = z__[i__5].r; r__1 = r_sign(&c_b39, &r__2); work[i__4].r = r__1, work[i__4].i = 0.f; i__4 = z___subscr(jc, jc); z__[i__4].r = 1.f, z__[i__4].i = 0.f; /* L50: */ } clarnd_(&q__1, &c__3, &iseed[1]); ctemp.r = q__1.r, ctemp.i = q__1.i; i__3 = q_subscr(n, n); q[i__3].r = 1.f, q[i__3].i = 0.f; i__3 = n; work[i__3].r = 0.f, work[i__3].i = 0.f; i__3 = n * 3; r__1 = c_abs(&ctemp); q__1.r = ctemp.r / r__1, q__1.i = ctemp.i / r__1; work[i__3].r = q__1.r, work[i__3].i = q__1.i; clarnd_(&q__1, &c__3, &iseed[1]); ctemp.r = q__1.r, ctemp.i = q__1.i; i__3 = z___subscr(n, n); z__[i__3].r = 1.f, z__[i__3].i = 0.f; i__3 = n << 1; work[i__3].r = 0.f, work[i__3].i = 0.f; i__3 = n << 2; r__1 = c_abs(&ctemp); q__1.r = ctemp.r / r__1, q__1.i = ctemp.i / r__1; work[i__3].r = q__1.r, work[i__3].i = q__1.i; /* Apply the diagonal matrices */ i__3 = n; for (jc = 1; jc <= i__3; ++jc) { i__4 = n; for (jr = 1; jr <= i__4; ++jr) { i__5 = a_subscr(jr, jc); i__6 = (n << 1) + jr; r_cnjg(&q__3, &work[n * 3 + jc]); q__2.r = work[i__6].r * q__3.r - work[i__6].i * q__3.i, q__2.i = work[i__6].r * q__3.i + work[i__6].i * q__3.r; i__7 = a_subscr(jr, jc); q__1.r = q__2.r * a[i__7].r - q__2.i * a[i__7].i, q__1.i = q__2.r * a[i__7].i + q__2.i * a[ i__7].r; a[i__5].r = q__1.r, a[i__5].i = q__1.i; i__5 = b_subscr(jr, jc); i__6 = (n << 1) + jr; r_cnjg(&q__3, &work[n * 3 + jc]); q__2.r = work[i__6].r * q__3.r - work[i__6].i * q__3.i, q__2.i = work[i__6].r * q__3.i + work[i__6].i * q__3.r; i__7 = b_subscr(jr, jc); q__1.r = q__2.r * b[i__7].r - q__2.i * b[i__7].i, q__1.i = q__2.r * b[i__7].i + q__2.i * b[ i__7].r; b[i__5].r = q__1.r, b[i__5].i = q__1.i; /* L60: */ } /* L70: */ } i__3 = n - 1; cunm2r_("L", "N", &n, &n, &i__3, &q[q_offset], ldq, &work[ 1], &a[a_offset], lda, &work[(n << 1) + 1], & iinfo); if (iinfo != 0) { goto L100; } i__3 = n - 1; cunm2r_("R", "C", &n, &n, &i__3, &z__[z_offset], ldq, & work[n + 1], &a[a_offset], lda, &work[(n << 1) + 1], &iinfo); if (iinfo != 0) { goto L100; } i__3 = n - 1; cunm2r_("L", "N", &n, &n, &i__3, &q[q_offset], ldq, &work[ 1], &b[b_offset], lda, &work[(n << 1) + 1], & iinfo); if (iinfo != 0) { goto L100; } i__3 = n - 1; cunm2r_("R", "C", &n, &n, &i__3, &z__[z_offset], ldq, & work[n + 1], &b[b_offset], lda, &work[(n << 1) + 1], &iinfo); if (iinfo != 0) { goto L100; } } } else { /* Random matrices */ i__3 = n; for (jc = 1; jc <= i__3; ++jc) { i__4 = n; for (jr = 1; jr <= i__4; ++jr) { i__5 = a_subscr(jr, jc); i__6 = kamagn[jtype - 1]; clarnd_(&q__2, &c__4, &iseed[1]); q__1.r = rmagn[i__6] * q__2.r, q__1.i = rmagn[i__6] * q__2.i; a[i__5].r = q__1.r, a[i__5].i = q__1.i; i__5 = b_subscr(jr, jc); i__6 = kbmagn[jtype - 1]; clarnd_(&q__2, &c__4, &iseed[1]); q__1.r = rmagn[i__6] * q__2.r, q__1.i = rmagn[i__6] * q__2.i; b[i__5].r = q__1.r, b[i__5].i = q__1.i; /* L80: */ } /* L90: */ } } L100: if (iinfo != 0) { io___43.ciunit = *nounit; s_wsfe(&io___43); do_fio(&c__1, "Generator", (ftnlen)9); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(iinfo); return 0; } L110: /* Call CGEGS to compute H, T, Q, Z, alpha, and beta. */ clacpy_(" ", &n, &n, &a[a_offset], lda, &s[s_offset], lda); clacpy_(" ", &n, &n, &b[b_offset], lda, &t[t_offset], lda); ntest = 1; result[1] = ulpinv; cgegs_("V", "V", &n, &s[s_offset], lda, &t[t_offset], lda, & alpha1[1], &beta1[1], &q[q_offset], ldq, &z__[z_offset], ldq, &work[1], lwork, &rwork[1], &iinfo); if (iinfo != 0) { io___44.ciunit = *nounit; s_wsfe(&io___44); do_fio(&c__1, "CGEGS", (ftnlen)5); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(iinfo); goto L130; } ntest = 4; /* Do tests 1--4 */ cget51_(&c__1, &n, &a[a_offset], lda, &s[s_offset], lda, &q[ q_offset], ldq, &z__[z_offset], ldq, &work[1], &rwork[1], &result[1]); cget51_(&c__1, &n, &b[b_offset], lda, &t[t_offset], lda, &q[ q_offset], ldq, &z__[z_offset], ldq, &work[1], &rwork[1], &result[2]); cget51_(&c__3, &n, &b[b_offset], lda, &t[t_offset], lda, &q[ q_offset], ldq, &q[q_offset], ldq, &work[1], &rwork[1], & result[3]); cget51_(&c__3, &n, &b[b_offset], lda, &t[t_offset], lda, &z__[ z_offset], ldq, &z__[z_offset], ldq, &work[1], &rwork[1], &result[4]); /* Do test 5: compare eigenvalues with diagonals. */ temp1 = 0.f; i__3 = n; for (j = 1; j <= i__3; ++j) { i__4 = j; i__5 = s_subscr(j, j); q__2.r = alpha1[i__4].r - s[i__5].r, q__2.i = alpha1[i__4].i - s[i__5].i; q__1.r = q__2.r, q__1.i = q__2.i; i__6 = j; i__7 = t_subscr(j, j); q__4.r = beta1[i__6].r - t[i__7].r, q__4.i = beta1[i__6].i - t[i__7].i; q__3.r = q__4.r, q__3.i = q__4.i; /* Computing MAX */ i__8 = j; i__9 = s_subscr(j, j); r__13 = safmin, r__14 = (r__1 = alpha1[i__8].r, dabs(r__1)) + (r__2 = r_imag(&alpha1[j]), dabs(r__2)), r__13 = max( r__13,r__14), r__14 = (r__3 = s[i__9].r, dabs(r__3)) + (r__4 = r_imag(&s_ref(j, j)), dabs(r__4)); /* Computing MAX */ i__10 = j; i__11 = t_subscr(j, j); r__15 = safmin, r__16 = (r__5 = beta1[i__10].r, dabs(r__5)) + (r__6 = r_imag(&beta1[j]), dabs(r__6)), r__15 = max( r__15,r__16), r__16 = (r__7 = t[i__11].r, dabs(r__7)) + (r__8 = r_imag(&t_ref(j, j)), dabs(r__8)); temp2 = (((r__9 = q__1.r, dabs(r__9)) + (r__10 = r_imag(&q__1) , dabs(r__10))) / dmax(r__13,r__14) + ((r__11 = q__3.r, dabs(r__11)) + (r__12 = r_imag(&q__3), dabs( r__12))) / dmax(r__15,r__16)) / ulp; temp1 = dmax(temp1,temp2); /* L120: */ } result[5] = temp1; /* Call CGEGV to compute S2, T2, VL, and VR, do tests. Eigenvalues and Eigenvectors */ clacpy_(" ", &n, &n, &a[a_offset], lda, &s2[s2_offset], lda); clacpy_(" ", &n, &n, &b[b_offset], lda, &t2[t2_offset], lda); ntest = 6; result[6] = ulpinv; cgegv_("V", "V", &n, &s2[s2_offset], lda, &t2[t2_offset], lda, & alpha2[1], &beta2[1], &vl[vl_offset], ldq, &vr[vr_offset], ldq, &work[1], lwork, &rwork[1], &iinfo); if (iinfo != 0) { io___47.ciunit = *nounit; s_wsfe(&io___47); do_fio(&c__1, "CGEGV", (ftnlen)5); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(iinfo); goto L130; } ntest = 7; /* Do Tests 6 and 7 */ cget52_(&c_true, &n, &a[a_offset], lda, &b[b_offset], lda, &vl[ vl_offset], ldq, &alpha2[1], &beta2[1], &work[1], &rwork[ 1], dumma); result[6] = dumma[0]; if (dumma[1] > *thrshn) { io___49.ciunit = *nounit; s_wsfe(&io___49); do_fio(&c__1, "Left", (ftnlen)4); do_fio(&c__1, "CGEGV", (ftnlen)5); do_fio(&c__1, (char *)&dumma[1], (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); } cget52_(&c_false, &n, &a[a_offset], lda, &b[b_offset], lda, &vr[ vr_offset], ldq, &alpha2[1], &beta2[1], &work[1], &rwork[ 1], dumma); result[7] = dumma[0]; if (dumma[1] > *thresh) { io___50.ciunit = *nounit; s_wsfe(&io___50); do_fio(&c__1, "Right", (ftnlen)5); do_fio(&c__1, "CGEGV", (ftnlen)5); do_fio(&c__1, (char *)&dumma[1], (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); } /* End of Loop -- Check for RESULT(j) > THRESH */ L130: ntestt += ntest; /* Print out tests which fail. */ i__3 = ntest; for (jr = 1; jr <= i__3; ++jr) { if (result[jr] >= *thresh) { /* If this is the first test to fail, print a header to the data file. */ if (nerrs == 0) { io___51.ciunit = *nounit; s_wsfe(&io___51); do_fio(&c__1, "CGG", (ftnlen)3); e_wsfe(); /* Matrix types */ io___52.ciunit = *nounit; s_wsfe(&io___52); e_wsfe(); io___53.ciunit = *nounit; s_wsfe(&io___53); e_wsfe(); io___54.ciunit = *nounit; s_wsfe(&io___54); do_fio(&c__1, "Unitary", (ftnlen)7); e_wsfe(); /* Tests performed */ io___55.ciunit = *nounit; s_wsfe(&io___55); do_fio(&c__1, "unitary", (ftnlen)7); do_fio(&c__1, "*", (ftnlen)1); do_fio(&c__1, "conjugate transpose", (ftnlen)19); for (j = 1; j <= 5; ++j) { do_fio(&c__1, "*", (ftnlen)1); } e_wsfe(); } ++nerrs; if (result[jr] < 1e4f) { io___56.ciunit = *nounit; s_wsfe(&io___56); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)) ; do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&jr, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&result[jr], (ftnlen)sizeof( real)); e_wsfe(); } else { io___57.ciunit = *nounit; s_wsfe(&io___57); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)) ; do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&jr, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&result[jr], (ftnlen)sizeof( real)); e_wsfe(); } } /* L140: */ } L150: ; } /* L160: */ } /* Summary */ alasvm_("CGG", nounit, &nerrs, &ntestt, &c__0); return 0; /* End of CDRVGG */ } /* cdrvgg_ */
/* Subroutine */ int sspgvd_(integer *itype, char *jobz, char *uplo, integer * n, real *ap, real *bp, real *w, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= SSPGVD computes all the eigenvalues, and optionally, the eigenvectors of a real generalized symmetric-definite eigenproblem, of the form A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and B are assumed to be symmetric, stored in packed format, and B is also positive definite. If eigenvectors are desired, it uses a divide and conquer algorithm. The divide and conquer algorithm makes very mild assumptions about floating point arithmetic. It will work on machines with a guard digit in add/subtract, or on those binary machines without guard digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. It could conceivably fail on hexadecimal or decimal machines without guard digits, but we know of none. Arguments ========= ITYPE (input) INTEGER Specifies the problem type to be solved: = 1: A*x = (lambda)*B*x = 2: A*B*x = (lambda)*x = 3: B*A*x = (lambda)*x JOBZ (input) CHARACTER*1 = 'N': Compute eigenvalues only; = 'V': Compute eigenvalues and eigenvectors. UPLO (input) CHARACTER*1 = 'U': Upper triangles of A and B are stored; = 'L': Lower triangles of A and B are stored. N (input) INTEGER The order of the matrices A and B. N >= 0. AP (input/output) REAL array, dimension (N*(N+1)/2) On entry, the upper or lower triangle of the symmetric matrix A, packed columnwise in a linear array. The j-th column of A is stored in the array AP as follows: if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. On exit, the contents of AP are destroyed. BP (input/output) REAL array, dimension (N*(N+1)/2) On entry, the upper or lower triangle of the symmetric matrix B, packed columnwise in a linear array. The j-th column of B is stored in the array BP as follows: if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. On exit, the triangular factor U or L from the Cholesky factorization B = U**T*U or B = L*L**T, in the same storage format as B. W (output) REAL array, dimension (N) If INFO = 0, the eigenvalues in ascending order. Z (output) REAL array, dimension (LDZ, N) If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of eigenvectors. The eigenvectors are normalized as follows: if ITYPE = 1 or 2, Z**T*B*Z = I; if ITYPE = 3, Z**T*inv(B)*Z = I. If JOBZ = 'N', then Z is not referenced. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= 1, and if JOBZ = 'V', LDZ >= max(1,N). WORK (workspace/output) REAL array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. If N <= 1, LWORK >= 1. If JOBZ = 'N' and N > 1, LWORK >= 2*N. If JOBZ = 'V' and N > 1, LWORK >= 1 + 6*N + 2*N**2. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. IWORK (workspace/output) INTEGER array, dimension (LIWORK) On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. LIWORK (input) INTEGER The dimension of the array IWORK. If JOBZ = 'N' or N <= 1, LIWORK >= 1. If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. If LIWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the IWORK array, returns this value as the first entry of the IWORK array, and no error message related to LIWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: SPPTRF or SSPEVD returned an error code: <= N: if INFO = i, SSPEVD failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero; > N: if INFO = N + i, for 1 <= i <= N, then the leading minor of order i of B is not positive definite. The factorization of B could not be completed and no eigenvalues or eigenvectors were computed. Further Details =============== Based on contributions by Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__2 = 2; static integer c__1 = 1; /* System generated locals */ integer z_dim1, z_offset, i__1; real r__1, r__2; /* Builtin functions */ double log(doublereal); integer pow_ii(integer *, integer *); /* Local variables */ static integer neig, j; extern logical lsame_(char *, char *); static integer lwmin; static char trans[1]; static logical upper, wantz; extern /* Subroutine */ int stpmv_(char *, char *, char *, integer *, real *, real *, integer *), stpsv_(char *, char *, char *, integer *, real *, real *, integer *), xerbla_(char *, integer *); static integer liwmin; extern /* Subroutine */ int sspevd_(char *, char *, integer *, real *, real *, real *, integer *, real *, integer *, integer *, integer * , integer *), spptrf_(char *, integer *, real *, integer *); static logical lquery; extern /* Subroutine */ int sspgst_(integer *, char *, integer *, real *, real *, integer *); static integer lgn; #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] --ap; --bp; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --work; --iwork; /* Function Body */ wantz = lsame_(jobz, "V"); upper = lsame_(uplo, "U"); lquery = *lwork == -1 || *liwork == -1; *info = 0; if (*n <= 1) { lgn = 0; liwmin = 1; lwmin = 1; } else { lgn = (integer) (log((real) (*n)) / log(2.f)); if (pow_ii(&c__2, &lgn) < *n) { ++lgn; } if (pow_ii(&c__2, &lgn) < *n) { ++lgn; } if (wantz) { liwmin = *n * 5 + 3; /* Computing 2nd power */ i__1 = *n; lwmin = *n * 5 + 1 + (*n << 1) * lgn + (i__1 * i__1 << 1); } else { liwmin = 1; lwmin = *n << 1; } } if (*itype < 0 || *itype > 3) { *info = -1; } else if (! (wantz || lsame_(jobz, "N"))) { *info = -2; } else if (! (upper || lsame_(uplo, "L"))) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*ldz < max(1,*n)) { *info = -9; } else if (*lwork < lwmin && ! lquery) { *info = -11; } else if (*liwork < liwmin && ! lquery) { *info = -13; } if (*info == 0) { work[1] = (real) lwmin; iwork[1] = liwmin; } if (*info != 0) { i__1 = -(*info); xerbla_("SSPGVD", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Form a Cholesky factorization of BP. */ spptrf_(uplo, n, &bp[1], info); if (*info != 0) { *info = *n + *info; return 0; } /* Transform problem to standard eigenvalue problem and solve. */ sspgst_(itype, uplo, n, &ap[1], &bp[1], info); sspevd_(jobz, uplo, n, &ap[1], &w[1], &z__[z_offset], ldz, &work[1], lwork, &iwork[1], liwork, info); /* Computing MAX */ r__1 = (real) lwmin; lwmin = dmax(r__1,work[1]); /* Computing MAX */ r__1 = (real) liwmin, r__2 = (real) iwork[1]; liwmin = dmax(r__1,r__2); if (wantz) { /* Backtransform eigenvectors to the original problem. */ neig = *n; if (*info > 0) { neig = *info - 1; } if (*itype == 1 || *itype == 2) { /* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */ if (upper) { *(unsigned char *)trans = 'N'; } else { *(unsigned char *)trans = 'T'; } i__1 = neig; for (j = 1; j <= i__1; ++j) { stpsv_(uplo, trans, "Non-unit", n, &bp[1], &z___ref(1, j), & c__1); /* L10: */ } } else if (*itype == 3) { /* For B*A*x=(lambda)*x; backtransform eigenvectors: x = L*y or U'*y */ if (upper) { *(unsigned char *)trans = 'T'; } else { *(unsigned char *)trans = 'N'; } i__1 = neig; for (j = 1; j <= i__1; ++j) { stpmv_(uplo, trans, "Non-unit", n, &bp[1], &z___ref(1, j), & c__1); /* L20: */ } } } work[1] = (real) lwmin; iwork[1] = liwmin; return 0; /* End of SSPGVD */ } /* sspgvd_ */
/* Subroutine */ int stgex2_(logical *wantq, logical *wantz, integer *n, real *a, integer *lda, real *b, integer *ldb, real *q, integer *ldq, real * z__, integer *ldz, integer *j1, integer *n1, integer *n2, real *work, integer *lwork, integer *info) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= STGEX2 swaps adjacent diagonal blocks (A11, B11) and (A22, B22) of size 1-by-1 or 2-by-2 in an upper (quasi) triangular matrix pair (A, B) by an orthogonal equivalence transformation. (A, B) must be in generalized real Schur canonical form (as returned by SGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 diagonal blocks. B is upper triangular. Optionally, the matrices Q and Z of generalized Schur vectors are updated. Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)' Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)' Arguments ========= WANTQ (input) LOGICAL .TRUE. : update the left transformation matrix Q; .FALSE.: do not update Q. WANTZ (input) LOGICAL .TRUE. : update the right transformation matrix Z; .FALSE.: do not update Z. N (input) INTEGER The order of the matrices A and B. N >= 0. A (input/output) REAL arrays, dimensions (LDA,N) On entry, the matrix A in the pair (A, B). On exit, the updated matrix A. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input/output) REAL arrays, dimensions (LDB,N) On entry, the matrix B in the pair (A, B). On exit, the updated matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). Q (input/output) REAL array, dimension (LDZ,N) On entry, if WANTQ = .TRUE., the orthogonal matrix Q. On exit, the updated matrix Q. Not referenced if WANTQ = .FALSE.. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= 1. If WANTQ = .TRUE., LDQ >= N. Z (input/output) REAL array, dimension (LDZ,N) On entry, if WANTZ =.TRUE., the orthogonal matrix Z. On exit, the updated matrix Z. Not referenced if WANTZ = .FALSE.. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= 1. If WANTZ = .TRUE., LDZ >= N. J1 (input) INTEGER The index to the first block (A11, B11). 1 <= J1 <= N. N1 (input) INTEGER The order of the first block (A11, B11). N1 = 0, 1 or 2. N2 (input) INTEGER The order of the second block (A22, B22). N2 = 0, 1 or 2. WORK (workspace) REAL array, dimension (LWORK). LWORK (input) INTEGER The dimension of the array WORK. LWORK >= MAX( N*(N2+N1), (N2+N1)*(N2+N1)*2 ) INFO (output) INTEGER =0: Successful exit >0: If INFO = 1, the transformed matrix (A, B) would be too far from generalized Schur form; the blocks are not swapped and (A, B) and (Q, Z) are unchanged. The problem of swapping is too ill-conditioned. <0: If INFO = -16: LWORK is too small. Appropriate value for LWORK is returned in WORK(1). Further Details =============== Based on contributions by Bo Kagstrom and Peter Poromaa, Department of Computing Science, Umea University, S-901 87 Umea, Sweden. In the current code both weak and strong stability tests are performed. The user can omit the strong stability test by changing the internal logical parameter WANDS to .FALSE.. See ref. [2] for details. [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the Generalized Real Schur Form of a Regular Matrix Pair (A, B), in M.S. Moonen et al (eds), Linear Algebra for Large Scale and Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified Eigenvalues of a Regular Matrix Pair (A, B) and Condition Estimation: Theory, Algorithms and Software, Report UMINF - 94.04, Department of Computing Science, Umea University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87. To appear in Numerical Algorithms, 1996. ===================================================================== Parameter adjustments */ /* Table of constant values */ static integer c__16 = 16; static real c_b3 = 0.f; static integer c__0 = 0; static integer c__1 = 1; static integer c__4 = 4; static integer c__2 = 2; static real c_b38 = 1.f; static real c_b44 = -1.f; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, z_offset, i__1, i__2; real r__1, r__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static logical weak; static real ddum; static integer idum; static real taul[4], dsum, taur[4], scpy[16] /* was [4][4] */, tcpy[16] /* was [4][4] */; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); static real f, g; static integer i__, m; static real s[16] /* was [4][4] */, t[16] /* was [4][4] */, scale, bqra21, brqa21; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static real licop[16] /* was [4][4] */; static integer linfo; extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static real ircop[16] /* was [4][4] */, dnorm; static integer iwork[4]; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), slagv2_(real *, integer *, real *, integer *, real *, real *, real *, real *, real *, real *, real *), sgeqr2_(integer * , integer *, real *, integer *, real *, real *, integer *), sgerq2_(integer *, integer *, real *, integer *, real *, real *, integer *); static real be[2], ai[2]; extern /* Subroutine */ int sorg2r_(integer *, integer *, integer *, real *, integer *, real *, real *, integer *), sorgr2_(integer *, integer *, integer *, real *, integer *, real *, real *, integer * ); static real ar[2], sa, sb, li[16] /* was [4][4] */; extern /* Subroutine */ int sorm2r_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, integer *), sormr2_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer * , real *, integer *); static real dscale, ir[16] /* was [4][4] */; extern /* Subroutine */ int stgsy2_(char *, integer *, integer *, integer *, real *, integer *, real *, integer *, real *, integer *, real * , integer *, real *, integer *, real *, integer *, real *, real *, real *, integer *, integer *, integer *); static real ss; extern doublereal slamch_(char *); static real ws; extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *), slartg_(real *, real *, real *, real *, real *); static real thresh; extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, real *); static real smlnum; static logical strong; static real eps; #define scpy_ref(a_1,a_2) scpy[(a_2)*4 + a_1 - 5] #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] #define s_ref(a_1,a_2) s[(a_2)*4 + a_1 - 5] #define t_ref(a_1,a_2) t[(a_2)*4 + a_1 - 5] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] #define li_ref(a_1,a_2) li[(a_2)*4 + a_1 - 5] #define ir_ref(a_1,a_2) ir[(a_2)*4 + a_1 - 5] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --work; /* Function Body */ *info = 0; /* Quick return if possible */ if (*n <= 1 || *n1 <= 0 || *n2 <= 0) { return 0; } if (*n1 > *n || *j1 + *n1 > *n) { return 0; } m = *n1 + *n2; /* Computing MAX */ i__1 = *n * m, i__2 = m * m << 1; if (*lwork < max(i__1,i__2)) { *info = -16; /* Computing MAX */ i__1 = *n * m, i__2 = m * m << 1; work[1] = (real) max(i__1,i__2); return 0; } weak = FALSE_; strong = FALSE_; /* Make a local copy of selected block */ scopy_(&c__16, &c_b3, &c__0, li, &c__1); scopy_(&c__16, &c_b3, &c__0, ir, &c__1); slacpy_("Full", &m, &m, &a_ref(*j1, *j1), lda, s, &c__4); slacpy_("Full", &m, &m, &b_ref(*j1, *j1), ldb, t, &c__4); /* Compute threshold for testing acceptance of swapping. */ eps = slamch_("P"); smlnum = slamch_("S") / eps; dscale = 0.f; dsum = 1.f; slacpy_("Full", &m, &m, s, &c__4, &work[1], &m); i__1 = m * m; slassq_(&i__1, &work[1], &c__1, &dscale, &dsum); slacpy_("Full", &m, &m, t, &c__4, &work[1], &m); i__1 = m * m; slassq_(&i__1, &work[1], &c__1, &dscale, &dsum); dnorm = dscale * sqrt(dsum); /* Computing MAX */ r__1 = eps * 10.f * dnorm; thresh = dmax(r__1,smlnum); if (m == 2) { /* CASE 1: Swap 1-by-1 and 1-by-1 blocks. Compute orthogonal QL and RQ that swap 1-by-1 and 1-by-1 blocks using Givens rotations and perform the swap tentatively. */ f = s_ref(2, 2) * t_ref(1, 1) - t_ref(2, 2) * s_ref(1, 1); g = s_ref(2, 2) * t_ref(1, 2) - t_ref(2, 2) * s_ref(1, 2); sb = (r__1 = t_ref(2, 2), dabs(r__1)); sa = (r__1 = s_ref(2, 2), dabs(r__1)); slartg_(&f, &g, &ir_ref(1, 2), &ir_ref(1, 1), &ddum); ir_ref(2, 1) = -ir_ref(1, 2); ir_ref(2, 2) = ir_ref(1, 1); srot_(&c__2, &s_ref(1, 1), &c__1, &s_ref(1, 2), &c__1, &ir_ref(1, 1), &ir_ref(2, 1)); srot_(&c__2, &t_ref(1, 1), &c__1, &t_ref(1, 2), &c__1, &ir_ref(1, 1), &ir_ref(2, 1)); if (sa >= sb) { slartg_(&s_ref(1, 1), &s_ref(2, 1), &li_ref(1, 1), &li_ref(2, 1), &ddum); } else { slartg_(&t_ref(1, 1), &t_ref(2, 1), &li_ref(1, 1), &li_ref(2, 1), &ddum); } srot_(&c__2, &s_ref(1, 1), &c__4, &s_ref(2, 1), &c__4, &li_ref(1, 1), &li_ref(2, 1)); srot_(&c__2, &t_ref(1, 1), &c__4, &t_ref(2, 1), &c__4, &li_ref(1, 1), &li_ref(2, 1)); li_ref(2, 2) = li_ref(1, 1); li_ref(1, 2) = -li_ref(2, 1); /* Weak stability test: |S21| + |T21| <= O(EPS * F-norm((S, T))) */ ws = (r__1 = s_ref(2, 1), dabs(r__1)) + (r__2 = t_ref(2, 1), dabs( r__2)); weak = ws <= thresh; if (! weak) { goto L70; } if (TRUE_) { /* Strong stability test: F-norm((A-QL'*S*QR, B-QL'*T*QR)) <= O(EPS*F-norm((A,B))) */ slacpy_("Full", &m, &m, &a_ref(*j1, *j1), lda, &work[m * m + 1], & m); sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, s, &c__4, &c_b3, & work[1], &m); sgemm_("N", "T", &m, &m, &m, &c_b44, &work[1], &m, ir, &c__4, & c_b38, &work[m * m + 1], &m); dscale = 0.f; dsum = 1.f; i__1 = m * m; slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); slacpy_("Full", &m, &m, &b_ref(*j1, *j1), ldb, &work[m * m + 1], & m); sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, t, &c__4, &c_b3, & work[1], &m); sgemm_("N", "T", &m, &m, &m, &c_b44, &work[1], &m, ir, &c__4, & c_b38, &work[m * m + 1], &m); i__1 = m * m; slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); ss = dscale * sqrt(dsum); strong = ss <= thresh; if (! strong) { goto L70; } } /* Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). */ i__1 = *j1 + 1; srot_(&i__1, &a_ref(1, *j1), &c__1, &a_ref(1, *j1 + 1), &c__1, & ir_ref(1, 1), &ir_ref(2, 1)); i__1 = *j1 + 1; srot_(&i__1, &b_ref(1, *j1), &c__1, &b_ref(1, *j1 + 1), &c__1, & ir_ref(1, 1), &ir_ref(2, 1)); i__1 = *n - *j1 + 1; srot_(&i__1, &a_ref(*j1, *j1), lda, &a_ref(*j1 + 1, *j1), lda, & li_ref(1, 1), &li_ref(2, 1)); i__1 = *n - *j1 + 1; srot_(&i__1, &b_ref(*j1, *j1), ldb, &b_ref(*j1 + 1, *j1), ldb, & li_ref(1, 1), &li_ref(2, 1)); /* Set N1-by-N2 (2,1) - blocks to ZERO. */ a_ref(*j1 + 1, *j1) = 0.f; b_ref(*j1 + 1, *j1) = 0.f; /* Accumulate transformations into Q and Z if requested. */ if (*wantz) { srot_(n, &z___ref(1, *j1), &c__1, &z___ref(1, *j1 + 1), &c__1, & ir_ref(1, 1), &ir_ref(2, 1)); } if (*wantq) { srot_(n, &q_ref(1, *j1), &c__1, &q_ref(1, *j1 + 1), &c__1, & li_ref(1, 1), &li_ref(2, 1)); } /* Exit with INFO = 0 if swap was successfully performed. */ return 0; } else { /* CASE 2: Swap 1-by-1 and 2-by-2 blocks, or 2-by-2 and 2-by-2 blocks. Solve the generalized Sylvester equation S11 * R - L * S22 = SCALE * S12 T11 * R - L * T22 = SCALE * T12 for R and L. Solutions in LI and IR. */ slacpy_("Full", n1, n2, &t_ref(1, *n1 + 1), &c__4, li, &c__4); slacpy_("Full", n1, n2, &s_ref(1, *n1 + 1), &c__4, &ir_ref(*n2 + 1, * n1 + 1), &c__4); stgsy2_("N", &c__0, n1, n2, s, &c__4, &s_ref(*n1 + 1, *n1 + 1), &c__4, &ir_ref(*n2 + 1, *n1 + 1), &c__4, t, &c__4, &t_ref(*n1 + 1, * n1 + 1), &c__4, li, &c__4, &scale, &dsum, &dscale, iwork, & idum, &linfo); /* Compute orthogonal matrix QL: QL' * LI = [ TL ] [ 0 ] where LI = [ -L ] [ SCALE * identity(N2) ] */ i__1 = *n2; for (i__ = 1; i__ <= i__1; ++i__) { sscal_(n1, &c_b44, &li_ref(1, i__), &c__1); li_ref(*n1 + i__, i__) = scale; /* L10: */ } sgeqr2_(&m, n2, li, &c__4, taul, &work[1], &linfo); if (linfo != 0) { goto L70; } sorg2r_(&m, &m, n2, li, &c__4, taul, &work[1], &linfo); if (linfo != 0) { goto L70; } /* Compute orthogonal matrix RQ: IR * RQ' = [ 0 TR], where IR = [ SCALE * identity(N1), R ] */ i__1 = *n1; for (i__ = 1; i__ <= i__1; ++i__) { ir_ref(*n2 + i__, i__) = scale; /* L20: */ } sgerq2_(n1, &m, &ir_ref(*n2 + 1, 1), &c__4, taur, &work[1], &linfo); if (linfo != 0) { goto L70; } sorgr2_(&m, &m, n1, ir, &c__4, taur, &work[1], &linfo); if (linfo != 0) { goto L70; } /* Perform the swapping tentatively: */ sgemm_("T", "N", &m, &m, &m, &c_b38, li, &c__4, s, &c__4, &c_b3, & work[1], &m); sgemm_("N", "T", &m, &m, &m, &c_b38, &work[1], &m, ir, &c__4, &c_b3, s, &c__4); sgemm_("T", "N", &m, &m, &m, &c_b38, li, &c__4, t, &c__4, &c_b3, & work[1], &m); sgemm_("N", "T", &m, &m, &m, &c_b38, &work[1], &m, ir, &c__4, &c_b3, t, &c__4); slacpy_("F", &m, &m, s, &c__4, scpy, &c__4); slacpy_("F", &m, &m, t, &c__4, tcpy, &c__4); slacpy_("F", &m, &m, ir, &c__4, ircop, &c__4); slacpy_("F", &m, &m, li, &c__4, licop, &c__4); /* Triangularize the B-part by an RQ factorization. Apply transformation (from left) to A-part, giving S. */ sgerq2_(&m, &m, t, &c__4, taur, &work[1], &linfo); if (linfo != 0) { goto L70; } sormr2_("R", "T", &m, &m, &m, t, &c__4, taur, s, &c__4, &work[1], & linfo); if (linfo != 0) { goto L70; } sormr2_("L", "N", &m, &m, &m, t, &c__4, taur, ir, &c__4, &work[1], & linfo); if (linfo != 0) { goto L70; } /* Compute F-norm(S21) in BRQA21. (T21 is 0.) */ dscale = 0.f; dsum = 1.f; i__1 = *n2; for (i__ = 1; i__ <= i__1; ++i__) { slassq_(n1, &s_ref(*n2 + 1, i__), &c__1, &dscale, &dsum); /* L30: */ } brqa21 = dscale * sqrt(dsum); /* Triangularize the B-part by a QR factorization. Apply transformation (from right) to A-part, giving S. */ sgeqr2_(&m, &m, tcpy, &c__4, taul, &work[1], &linfo); if (linfo != 0) { goto L70; } sorm2r_("L", "T", &m, &m, &m, tcpy, &c__4, taul, scpy, &c__4, &work[1] , info); sorm2r_("R", "N", &m, &m, &m, tcpy, &c__4, taul, licop, &c__4, &work[ 1], info); if (linfo != 0) { goto L70; } /* Compute F-norm(S21) in BQRA21. (T21 is 0.) */ dscale = 0.f; dsum = 1.f; i__1 = *n2; for (i__ = 1; i__ <= i__1; ++i__) { slassq_(n1, &scpy_ref(*n2 + 1, i__), &c__1, &dscale, &dsum); /* L40: */ } bqra21 = dscale * sqrt(dsum); /* Decide which method to use. Weak stability test: F-norm(S21) <= O(EPS * F-norm((S, T))) */ if (bqra21 <= brqa21 && bqra21 <= thresh) { slacpy_("F", &m, &m, scpy, &c__4, s, &c__4); slacpy_("F", &m, &m, tcpy, &c__4, t, &c__4); slacpy_("F", &m, &m, ircop, &c__4, ir, &c__4); slacpy_("F", &m, &m, licop, &c__4, li, &c__4); } else if (brqa21 >= thresh) { goto L70; } /* Set lower triangle of B-part to zero */ i__1 = m; for (i__ = 2; i__ <= i__1; ++i__) { i__2 = m - i__ + 1; scopy_(&i__2, &c_b3, &c__0, &t_ref(i__, i__ - 1), &c__1); /* L50: */ } if (TRUE_) { /* Strong stability test: F-norm((A-QL*S*QR', B-QL*T*QR')) <= O(EPS*F-norm((A,B))) */ slacpy_("Full", &m, &m, &a_ref(*j1, *j1), lda, &work[m * m + 1], & m); sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, s, &c__4, &c_b3, & work[1], &m); sgemm_("N", "N", &m, &m, &m, &c_b44, &work[1], &m, ir, &c__4, & c_b38, &work[m * m + 1], &m); dscale = 0.f; dsum = 1.f; i__1 = m * m; slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); slacpy_("Full", &m, &m, &b_ref(*j1, *j1), ldb, &work[m * m + 1], & m); sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, t, &c__4, &c_b3, & work[1], &m); sgemm_("N", "N", &m, &m, &m, &c_b44, &work[1], &m, ir, &c__4, & c_b38, &work[m * m + 1], &m); i__1 = m * m; slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); ss = dscale * sqrt(dsum); strong = ss <= thresh; if (! strong) { goto L70; } } /* If the swap is accepted ("weakly" and "strongly"), apply the transformations and set N1-by-N2 (2,1)-block to zero. */ i__1 = *n2; for (i__ = 1; i__ <= i__1; ++i__) { scopy_(n1, &c_b3, &c__0, &s_ref(*n2 + 1, i__), &c__1); /* L60: */ } /* copy back M-by-M diagonal block starting at index J1 of (A, B) */ slacpy_("F", &m, &m, s, &c__4, &a_ref(*j1, *j1), lda); slacpy_("F", &m, &m, t, &c__4, &b_ref(*j1, *j1), ldb); scopy_(&c__16, &c_b3, &c__0, t, &c__1); /* Standardize existing 2-by-2 blocks. */ i__1 = m * m; scopy_(&i__1, &c_b3, &c__0, &work[1], &c__1); work[1] = 1.f; t_ref(1, 1) = 1.f; idum = *lwork - m * m - 2; if (*n2 > 1) { slagv2_(&a_ref(*j1, *j1), lda, &b_ref(*j1, *j1), ldb, ar, ai, be, &work[1], &work[2], &t_ref(1, 1), &t_ref(2, 1)); work[m + 1] = -work[2]; work[m + 2] = work[1]; t_ref(*n2, *n2) = t_ref(1, 1); t_ref(1, 2) = -t_ref(2, 1); } work[m * m] = 1.f; t_ref(m, m) = 1.f; if (*n1 > 1) { slagv2_(&a_ref(*j1 + *n2, *j1 + *n2), lda, &b_ref(*j1 + *n2, *j1 + *n2), ldb, taur, taul, &work[m * m + 1], &work[*n2 * m + *n2 + 1], &work[*n2 * m + *n2 + 2], &t_ref(*n2 + 1, *n2 + 1), &t_ref(m, m - 1)); work[m * m] = work[*n2 * m + *n2 + 1]; work[m * m - 1] = -work[*n2 * m + *n2 + 2]; t_ref(m, m) = t_ref(*n2 + 1, *n2 + 1); t_ref(m - 1, m) = -t_ref(m, m - 1); } sgemm_("T", "N", n2, n1, n2, &c_b38, &work[1], &m, &a_ref(*j1, *j1 + * n2), lda, &c_b3, &work[m * m + 1], n2); slacpy_("Full", n2, n1, &work[m * m + 1], n2, &a_ref(*j1, *j1 + *n2), lda); sgemm_("T", "N", n2, n1, n2, &c_b38, &work[1], &m, &b_ref(*j1, *j1 + * n2), ldb, &c_b3, &work[m * m + 1], n2); slacpy_("Full", n2, n1, &work[m * m + 1], n2, &b_ref(*j1, *j1 + *n2), ldb); sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, &work[1], &m, &c_b3, & work[m * m + 1], &m); slacpy_("Full", &m, &m, &work[m * m + 1], &m, li, &c__4); sgemm_("N", "N", n2, n1, n1, &c_b38, &a_ref(*j1, *j1 + *n2), lda, & t_ref(*n2 + 1, *n2 + 1), &c__4, &c_b3, &work[1], n2); slacpy_("Full", n2, n1, &work[1], n2, &a_ref(*j1, *j1 + *n2), lda); sgemm_("N", "N", n2, n1, n1, &c_b38, &b_ref(*j1, *j1 + *n2), lda, & t_ref(*n2 + 1, *n2 + 1), &c__4, &c_b3, &work[1], n2); slacpy_("Full", n2, n1, &work[1], n2, &b_ref(*j1, *j1 + *n2), ldb); sgemm_("T", "N", &m, &m, &m, &c_b38, ir, &c__4, t, &c__4, &c_b3, & work[1], &m); slacpy_("Full", &m, &m, &work[1], &m, ir, &c__4); /* Accumulate transformations into Q and Z if requested. */ if (*wantq) { sgemm_("N", "N", n, &m, &m, &c_b38, &q_ref(1, *j1), ldq, li, & c__4, &c_b3, &work[1], n); slacpy_("Full", n, &m, &work[1], n, &q_ref(1, *j1), ldq); } if (*wantz) { sgemm_("N", "N", n, &m, &m, &c_b38, &z___ref(1, *j1), ldz, ir, & c__4, &c_b3, &work[1], n); slacpy_("Full", n, &m, &work[1], n, &z___ref(1, *j1), ldz); } /* Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). */ i__ = *j1 + m; if (i__ <= *n) { i__1 = *n - i__ + 1; sgemm_("T", "N", &m, &i__1, &m, &c_b38, li, &c__4, &a_ref(*j1, i__), lda, &c_b3, &work[1], &m); i__1 = *n - i__ + 1; slacpy_("Full", &m, &i__1, &work[1], &m, &a_ref(*j1, i__), lda); i__1 = *n - i__ + 1; sgemm_("T", "N", &m, &i__1, &m, &c_b38, li, &c__4, &b_ref(*j1, i__), lda, &c_b3, &work[1], &m); i__1 = *n - i__ + 1; slacpy_("Full", &m, &i__1, &work[1], &m, &b_ref(*j1, i__), lda); } i__ = *j1 - 1; if (i__ > 0) { sgemm_("N", "N", &i__, &m, &m, &c_b38, &a_ref(1, *j1), lda, ir, & c__4, &c_b3, &work[1], &i__); slacpy_("Full", &i__, &m, &work[1], &i__, &a_ref(1, *j1), lda); sgemm_("N", "N", &i__, &m, &m, &c_b38, &b_ref(1, *j1), ldb, ir, & c__4, &c_b3, &work[1], &i__); slacpy_("Full", &i__, &m, &work[1], &i__, &b_ref(1, *j1), ldb); } /* Exit with INFO = 0 if swap was successfully performed. */ return 0; } /* Exit with INFO = 1 if swap was rejected. */ L70: *info = 1; return 0; /* End of STGEX2 */ } /* stgex2_ */
/* Subroutine */ int sstevr_(char *jobz, char *range, integer *n, real *d__, real *e, real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, real *z__, integer *ldz, integer *isuppz, real * work, integer *lwork, integer *iwork, integer *liwork, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 20, 2000 Purpose ======= SSTEVR computes selected eigenvalues and, optionally, eigenvectors of a real symmetric tridiagonal matrix T. Eigenvalues and eigenvectors can be selected by specifying either a range of values or a range of indices for the desired eigenvalues. Whenever possible, SSTEVR calls SSTEGR to compute the eigenspectrum using Relatively Robust Representations. SSTEGR computes eigenvalues by the dqds algorithm, while orthogonal eigenvectors are computed from various "good" L D L^T representations (also known as Relatively Robust Representations). Gram-Schmidt orthogonalization is avoided as far as possible. More specifically, the various steps of the algorithm are as follows. For the i-th unreduced block of T, (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T is a relatively robust representation, (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high relative accuracy by the dqds algorithm, (c) If there is a cluster of close eigenvalues, "choose" sigma_i close to the cluster, and go to step (a), (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, compute the corresponding eigenvector by forming a rank-revealing twisted factorization. The desired accuracy of the output can be specified by the input parameter ABSTOL. For more details, see "A new O(n^2) algorithm for the symmetric tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, Computer Science Division Technical Report No. UCB//CSD-97-971, UC Berkeley, May 1997. Note 1 : SSTEVR calls SSTEGR when the full spectrum is requested on machines which conform to the ieee-754 floating point standard. SSTEVR calls SSTEBZ and SSTEIN on non-ieee machines and when partial spectrum requests are made. Normal execution of SSTEGR may create NaNs and infinities and hence may abort due to a floating point exception in environments which do not handle NaNs and infinities in the ieee standard default manner. Arguments ========= JOBZ (input) CHARACTER*1 = 'N': Compute eigenvalues only; = 'V': Compute eigenvalues and eigenvectors. RANGE (input) CHARACTER*1 = 'A': all eigenvalues will be found. = 'V': all eigenvalues in the half-open interval (VL,VU] will be found. = 'I': the IL-th through IU-th eigenvalues will be found. ********* For RANGE = 'V' or 'I' and IU - IL < N - 1, SSTEBZ and ********* SSTEIN are called N (input) INTEGER The order of the matrix. N >= 0. D (input/output) REAL array, dimension (N) On entry, the n diagonal elements of the tridiagonal matrix A. On exit, D may be multiplied by a constant factor chosen to avoid over/underflow in computing the eigenvalues. E (input/output) REAL array, dimension (N) On entry, the (n-1) subdiagonal elements of the tridiagonal matrix A in elements 1 to N-1 of E; E(N) need not be set. On exit, E may be multiplied by a constant factor chosen to avoid over/underflow in computing the eigenvalues. VL (input) REAL VU (input) REAL If RANGE='V', the lower and upper bounds of the interval to be searched for eigenvalues. VL < VU. Not referenced if RANGE = 'A' or 'I'. IL (input) INTEGER IU (input) INTEGER If RANGE='I', the indices (in ascending order) of the smallest and largest eigenvalues to be returned. 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. Not referenced if RANGE = 'A' or 'V'. ABSTOL (input) REAL The absolute error tolerance for the eigenvalues. An approximate eigenvalue is accepted as converged when it is determined to lie in an interval [a,b] of width less than or equal to ABSTOL + EPS * max( |a|,|b| ) , where EPS is the machine precision. If ABSTOL is less than or equal to zero, then EPS*|T| will be used in its place, where |T| is the 1-norm of the tridiagonal matrix obtained by reducing A to tridiagonal form. See "Computing Small Singular Values of Bidiagonal Matrices with Guaranteed High Relative Accuracy," by Demmel and Kahan, LAPACK Working Note #3. If high relative accuracy is important, set ABSTOL to SLAMCH( 'Safe minimum' ). Doing so will guarantee that eigenvalues are computed to high relative accuracy when possible in future releases. The current code does not make any guarantees about high relative accuracy, but future releases will. See J. Barlow and J. Demmel, "Computing Accurate Eigensystems of Scaled Diagonally Dominant Matrices", LAPACK Working Note #7, for a discussion of which matrices define their eigenvalues to high relative accuracy. M (output) INTEGER The total number of eigenvalues found. 0 <= M <= N. If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. W (output) REAL array, dimension (N) The first M elements contain the selected eigenvalues in ascending order. Z (output) REAL array, dimension (LDZ, max(1,M) ) If JOBZ = 'V', then if INFO = 0, the first M columns of Z contain the orthonormal eigenvectors of the matrix A corresponding to the selected eigenvalues, with the i-th column of Z holding the eigenvector associated with W(i). Note: the user must ensure that at least max(1,M) columns are supplied in the array Z; if RANGE = 'V', the exact value of M is not known in advance and an upper bound must be used. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= 1, and if JOBZ = 'V', LDZ >= max(1,N). ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) ) The support of the eigenvectors in Z, i.e., the indices indicating the nonzero elements in Z. The i-th eigenvector is nonzero only in elements ISUPPZ( 2*i-1 ) through ISUPPZ( 2*i ). ********* Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 WORK (workspace/output) REAL array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal (and minimal) LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= 20*N. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. IWORK (workspace/output) INTEGER array, dimension (LIWORK) On exit, if INFO = 0, IWORK(1) returns the optimal (and minimal) LIWORK. LIWORK (input) INTEGER The dimension of the array IWORK. LIWORK >= 10*N. If LIWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the IWORK array, returns this value as the first entry of the IWORK array, and no error message related to LIWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: Internal error Further Details =============== Based on contributions by Inderjit Dhillon, IBM Almaden, USA Osni Marques, LBNL/NERSC, USA Ken Stanley, Computer Science Division, University of California at Berkeley, USA ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__10 = 10; static integer c__1 = 1; static integer c__2 = 2; static integer c__3 = 3; static integer c__4 = 4; /* System generated locals */ integer z_dim1, z_offset, i__1, i__2; real r__1, r__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer imax; static real rmin, rmax, tnrm; static integer itmp1, i__, j; static real sigma; extern logical lsame_(char *, char *); extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static char order[1]; static integer lwmin; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer * ); static logical wantz; static integer jj; static logical alleig, indeig; static integer iscale, ieeeok, indibl, indifl; static logical valeig; extern doublereal slamch_(char *); static real safmin; extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern /* Subroutine */ int xerbla_(char *, integer *); static real bignum; static integer indisp, indiwo, liwmin; extern doublereal slanst_(char *, integer *, real *, real *); extern /* Subroutine */ int sstein_(integer *, real *, real *, integer *, real *, integer *, integer *, real *, integer *, real *, integer * , integer *, integer *), ssterf_(integer *, real *, real *, integer *), sstegr_(char *, char *, integer *, real *, real *, real *, real *, integer *, integer *, real *, integer *, real *, real *, integer *, integer *, real *, integer *, integer *, integer *, integer *); static integer nsplit; extern /* Subroutine */ int sstebz_(char *, char *, integer *, real *, real *, integer *, integer *, real *, real *, real *, integer *, integer *, real *, integer *, integer *, real *, integer *, integer *); static real smlnum; static logical lquery; static real eps, vll, vuu, tmp1; #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] --d__; --e; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --isuppz; --work; --iwork; /* Function Body */ ieeeok = ilaenv_(&c__10, "SSTEVR", "N", &c__1, &c__2, &c__3, &c__4, ( ftnlen)6, (ftnlen)1); wantz = lsame_(jobz, "V"); alleig = lsame_(range, "A"); valeig = lsame_(range, "V"); indeig = lsame_(range, "I"); lquery = *lwork == -1 || *liwork == -1; lwmin = *n * 20; liwmin = *n * 10; *info = 0; if (! (wantz || lsame_(jobz, "N"))) { *info = -1; } else if (! (alleig || valeig || indeig)) { *info = -2; } else if (*n < 0) { *info = -3; } else { if (valeig) { if (*n > 0 && *vu <= *vl) { *info = -7; } } else if (indeig) { if (*il < 1 || *il > max(1,*n)) { *info = -8; } else if (*iu < min(*n,*il) || *iu > *n) { *info = -9; } } } if (*info == 0) { if (*ldz < 1 || wantz && *ldz < *n) { *info = -14; } else if (*lwork < lwmin && ! lquery) { *info = -17; } else if (*liwork < liwmin && ! lquery) { *info = -19; } } if (*info == 0) { work[1] = (real) lwmin; iwork[1] = liwmin; } if (*info != 0) { i__1 = -(*info); xerbla_("SSTEVR", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ *m = 0; if (*n == 0) { return 0; } if (*n == 1) { if (alleig || indeig) { *m = 1; w[1] = d__[1]; } else { if (*vl < d__[1] && *vu >= d__[1]) { *m = 1; w[1] = d__[1]; } } if (wantz) { z___ref(1, 1) = 1.f; } return 0; } /* Get machine constants. */ safmin = slamch_("Safe minimum"); eps = slamch_("Precision"); smlnum = safmin / eps; bignum = 1.f / smlnum; rmin = sqrt(smlnum); /* Computing MIN */ r__1 = sqrt(bignum), r__2 = 1.f / sqrt(sqrt(safmin)); rmax = dmin(r__1,r__2); /* Scale matrix to allowable range, if necessary. */ iscale = 0; vll = *vl; vuu = *vu; tnrm = slanst_("M", n, &d__[1], &e[1]); if (tnrm > 0.f && tnrm < rmin) { iscale = 1; sigma = rmin / tnrm; } else if (tnrm > rmax) { iscale = 1; sigma = rmax / tnrm; } if (iscale == 1) { sscal_(n, &sigma, &d__[1], &c__1); i__1 = *n - 1; sscal_(&i__1, &sigma, &e[1], &c__1); if (valeig) { vll = *vl * sigma; vuu = *vu * sigma; } } /* If all eigenvalues are desired, then call SSTERF or SSTEGR. If this fails for some eigenvalue, then try SSTEBZ. */ if ((alleig || indeig && *il == 1 && *iu == *n) && ieeeok == 1) { i__1 = *n - 1; scopy_(&i__1, &e[1], &c__1, &work[1], &c__1); if (! wantz) { scopy_(n, &d__[1], &c__1, &w[1], &c__1); ssterf_(n, &w[1], &work[1], info); } else { scopy_(n, &d__[1], &c__1, &work[*n + 1], &c__1); i__1 = *lwork - (*n << 1); sstegr_(jobz, "A", n, &work[*n + 1], &work[1], vl, vu, il, iu, abstol, m, &w[1], &z__[z_offset], ldz, &isuppz[1], &work[( *n << 1) + 1], &i__1, &iwork[1], liwork, info); } if (*info == 0) { *m = *n; goto L10; } *info = 0; } /* Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN. */ if (wantz) { *(unsigned char *)order = 'B'; } else { *(unsigned char *)order = 'E'; } indibl = 1; indisp = indibl + *n; indifl = indisp + *n; indiwo = indifl + *n; sstebz_(range, order, n, &vll, &vuu, il, iu, abstol, &d__[1], &e[1], m, & nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[1], &iwork[ indiwo], info); if (wantz) { sstein_(n, &d__[1], &e[1], m, &w[1], &iwork[indibl], &iwork[indisp], & z__[z_offset], ldz, &work[1], &iwork[indiwo], &iwork[indifl], info); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ L10: if (iscale == 1) { if (*info == 0) { imax = *m; } else { imax = *info - 1; } r__1 = 1.f / sigma; sscal_(&imax, &r__1, &w[1], &c__1); } /* If eigenvalues are not in order, then sort them, along with eigenvectors. */ if (wantz) { i__1 = *m - 1; for (j = 1; j <= i__1; ++j) { i__ = 0; tmp1 = w[j]; i__2 = *m; for (jj = j + 1; jj <= i__2; ++jj) { if (w[jj] < tmp1) { i__ = jj; tmp1 = w[jj]; } /* L20: */ } if (i__ != 0) { itmp1 = iwork[i__]; w[i__] = w[j]; iwork[i__] = iwork[j]; w[j] = tmp1; iwork[j] = itmp1; sswap_(n, &z___ref(1, i__), &c__1, &z___ref(1, j), &c__1); } /* L30: */ } } /* Causes problems with tests 19 & 20: IF (wantz .and. INDEIG ) Z( 1,1) = Z(1,1) / 1.002 + .002 */ work[1] = (real) lwmin; iwork[1] = liwmin; return 0; /* End of SSTEVR */ } /* sstevr_ */
/* Subroutine */ int dspevd_(char *jobz, char *uplo, integer *n, doublereal * ap, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= DSPEVD computes all the eigenvalues and, optionally, eigenvectors of a real symmetric matrix A in packed storage. If eigenvectors are desired, it uses a divide and conquer algorithm. The divide and conquer algorithm makes very mild assumptions about floating point arithmetic. It will work on machines with a guard digit in add/subtract, or on those binary machines without guard digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. It could conceivably fail on hexadecimal or decimal machines without guard digits, but we know of none. Arguments ========= JOBZ (input) CHARACTER*1 = 'N': Compute eigenvalues only; = 'V': Compute eigenvalues and eigenvectors. UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) On entry, the upper or lower triangle of the symmetric matrix A, packed columnwise in a linear array. The j-th column of A is stored in the array AP as follows: if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. On exit, AP is overwritten by values generated during the reduction to tridiagonal form. If UPLO = 'U', the diagonal and first superdiagonal of the tridiagonal matrix T overwrite the corresponding elements of A, and if UPLO = 'L', the diagonal and first subdiagonal of T overwrite the corresponding elements of A. W (output) DOUBLE PRECISION array, dimension (N) If INFO = 0, the eigenvalues in ascending order. Z (output) DOUBLE PRECISION array, dimension (LDZ, N) If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal eigenvectors of the matrix A, with the i-th column of Z holding the eigenvector associated with W(i). If JOBZ = 'N', then Z is not referenced. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= 1, and if JOBZ = 'V', LDZ >= max(1,N). WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. If N <= 1, LWORK must be at least 1. If JOBZ = 'N' and N > 1, LWORK must be at least 2*N. If JOBZ = 'V' and N > 1, LWORK must be at least 1 + 6*N + N**2. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. IWORK (workspace/output) INTEGER array, dimension (LIWORK) On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. LIWORK (input) INTEGER The dimension of the array IWORK. If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. If LIWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the IWORK array, returns this value as the first entry of the IWORK array, and no error message related to LIWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = i, the algorithm failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer z_dim1, z_offset, i__1; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer inde; static doublereal anrm, rmin, rmax; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); static doublereal sigma; extern logical lsame_(char *, char *); static integer iinfo, lwmin; static logical wantz; extern doublereal dlamch_(char *); static integer iscale; extern /* Subroutine */ int dstedc_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *); static doublereal safmin; extern /* Subroutine */ int xerbla_(char *, integer *); static doublereal bignum; extern doublereal dlansp_(char *, char *, integer *, doublereal *, doublereal *); static integer indtau; extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, integer *); static integer indwrk, liwmin; extern /* Subroutine */ int dsptrd_(char *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *), dopmtr_(char *, char *, char *, integer *, integer *, doublereal * , doublereal *, doublereal *, integer *, doublereal *, integer *); static integer llwork; static doublereal smlnum; static logical lquery; static doublereal eps; #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] --ap; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --work; --iwork; /* Function Body */ wantz = lsame_(jobz, "V"); lquery = *lwork == -1 || *liwork == -1; *info = 0; if (*n <= 1) { liwmin = 1; lwmin = 1; } else { if (wantz) { liwmin = *n * 5 + 3; /* Computing 2nd power */ i__1 = *n; lwmin = *n * 6 + 1 + i__1 * i__1; } else { liwmin = 1; lwmin = *n << 1; } } if (! (wantz || lsame_(jobz, "N"))) { *info = -1; } else if (! (lsame_(uplo, "U") || lsame_(uplo, "L"))) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ldz < 1 || wantz && *ldz < *n) { *info = -7; } else if (*lwork < lwmin && ! lquery) { *info = -9; } else if (*liwork < liwmin && ! lquery) { *info = -11; } if (*info == 0) { work[1] = (doublereal) lwmin; iwork[1] = liwmin; } if (*info != 0) { i__1 = -(*info); xerbla_("DSPEVD", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { w[1] = ap[1]; if (wantz) { z___ref(1, 1) = 1.; } return 0; } /* Get machine constants. */ safmin = dlamch_("Safe minimum"); eps = dlamch_("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = sqrt(smlnum); rmax = sqrt(bignum); /* Scale matrix to allowable range, if necessary. */ anrm = dlansp_("M", uplo, n, &ap[1], &work[1]); iscale = 0; if (anrm > 0. && anrm < rmin) { iscale = 1; sigma = rmin / anrm; } else if (anrm > rmax) { iscale = 1; sigma = rmax / anrm; } if (iscale == 1) { i__1 = *n * (*n + 1) / 2; dscal_(&i__1, &sigma, &ap[1], &c__1); } /* Call DSPTRD to reduce symmetric packed matrix to tridiagonal form. */ inde = 1; indtau = inde + *n; dsptrd_(uplo, n, &ap[1], &w[1], &work[inde], &work[indtau], &iinfo); /* For eigenvalues only, call DSTERF. For eigenvectors, first call DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the tridiagonal matrix, then call DOPMTR to multiply it by the Householder transformations represented in AP. */ if (! wantz) { dsterf_(n, &w[1], &work[inde], info); } else { indwrk = indtau + *n; llwork = *lwork - indwrk + 1; dstedc_("I", n, &w[1], &work[inde], &z__[z_offset], ldz, &work[indwrk] , &llwork, &iwork[1], liwork, info); dopmtr_("L", uplo, "N", n, n, &ap[1], &work[indtau], &z__[z_offset], ldz, &work[indwrk], &iinfo); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ if (iscale == 1) { d__1 = 1. / sigma; dscal_(n, &d__1, &w[1], &c__1); } work[1] = (doublereal) lwmin; iwork[1] = liwmin; return 0; /* End of DSPEVD */ } /* dspevd_ */
/* Subroutine */ int dlahqr_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal *wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz, integer *info) { /* System generated locals */ integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ static doublereal h43h34, disc, unfl, ovfl; extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); static doublereal work[1], opst; static integer i__, j, k, l, m; static doublereal s, v[3]; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); static integer i1, i2; static doublereal t1, t2, t3, v1, v2, v3; extern /* Subroutine */ int dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), dlabad_( doublereal *, doublereal *); static doublereal h00, h10, h11, h12, h21, h22, h33, h44; static integer nh; static doublereal cs; extern doublereal dlamch_(char *); extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *); static integer nr; static doublereal sn; static integer nz; extern doublereal dlanhs_(char *, integer *, doublereal *, integer *, doublereal *); static doublereal smlnum, ave, h33s, h44s; static integer itn, its; static doublereal ulp, sum, tst1; #define h___ref(a_1,a_2) h__[(a_2)*h_dim1 + a_1] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] /* -- LAPACK auxiliary routine (instrum. to count ops. version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Common block to return operation count. Purpose ======= DLAHQR is an auxiliary routine called by DHSEQR to update the eigenvalues and Schur decomposition already computed by DHSEQR, by dealing with the Hessenberg submatrix in rows and columns ILO to IHI. Arguments ========= WANTT (input) LOGICAL = .TRUE. : the full Schur form T is required; = .FALSE.: only eigenvalues are required. WANTZ (input) LOGICAL = .TRUE. : the matrix of Schur vectors Z is required; = .FALSE.: Schur vectors are not required. N (input) INTEGER The order of the matrix H. N >= 0. ILO (input) INTEGER IHI (input) INTEGER It is assumed that H is already upper quasi-triangular in rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1). DLAHQR works primarily with the Hessenberg submatrix in rows and columns ILO to IHI, but applies transformations to all of H if WANTT is .TRUE.. 1 <= ILO <= max(1,IHI); IHI <= N. H (input/output) DOUBLE PRECISION array, dimension (LDH,N) On entry, the upper Hessenberg matrix H. On exit, if WANTT is .TRUE., H is upper quasi-triangular in rows and columns ILO:IHI, with any 2-by-2 diagonal blocks in standard form. If WANTT is .FALSE., the contents of H are unspecified on exit. LDH (input) INTEGER The leading dimension of the array H. LDH >= max(1,N). WR (output) DOUBLE PRECISION array, dimension (N) WI (output) DOUBLE PRECISION array, dimension (N) The real and imaginary parts, respectively, of the computed eigenvalues ILO to IHI are stored in the corresponding elements of WR and WI. If two eigenvalues are computed as a complex conjugate pair, they are stored in consecutive elements of WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the eigenvalues are stored in the same order as on the diagonal of the Schur form returned in H, with WR(i) = H(i,i), and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal block, WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i). ILOZ (input) INTEGER IHIZ (input) INTEGER Specify the rows of Z to which transformations must be applied if WANTZ is .TRUE.. 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) If WANTZ is .TRUE., on entry Z must contain the current matrix Z of transformations accumulated by DHSEQR, and on exit Z has been updated; transformations are applied only to the submatrix Z(ILOZ:IHIZ,ILO:IHI). If WANTZ is .FALSE., Z is not referenced. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= max(1,N). INFO (output) INTEGER = 0: successful exit > 0: DLAHQR failed to compute all the eigenvalues ILO to IHI in a total of 30*(IHI-ILO+1) iterations; if INFO = i, elements i+1:ihi of WR and WI contain those eigenvalues which have been successfully computed. Further Details =============== 2-96 Based on modifications by David Day, Sandia National Laboratory, USA ===================================================================== Parameter adjustments */ h_dim1 = *ldh; h_offset = 1 + h_dim1 * 1; h__ -= h_offset; --wr; --wi; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; /* Function Body */ *info = 0; /* ** Initialize */ opst = 0.; /* ** Quick return if possible */ if (*n == 0) { return 0; } if (*ilo == *ihi) { wr[*ilo] = h___ref(*ilo, *ilo); wi[*ilo] = 0.; return 0; } nh = *ihi - *ilo + 1; nz = *ihiz - *iloz + 1; /* Set machine-dependent constants for the stopping criterion. If norm(H) <= sqrt(OVFL), overflow should not occur. */ unfl = dlamch_("Safe minimum"); ovfl = 1. / unfl; dlabad_(&unfl, &ovfl); ulp = dlamch_("Precision"); smlnum = unfl * (nh / ulp); /* I1 and I2 are the indices of the first row and last column of H to which transformations must be applied. If eigenvalues only are being computed, I1 and I2 are set inside the main loop. */ if (*wantt) { i1 = 1; i2 = *n; } /* ITN is the total number of QR iterations allowed. */ itn = nh * 30; /* The main loop begins here. I is the loop index and decreases from IHI to ILO in steps of 1 or 2. Each iteration of the loop works with the active submatrix in rows and columns L to I. Eigenvalues I+1 to IHI have already converged. Either L = ILO or H(L,L-1) is negligible so that the matrix splits. */ i__ = *ihi; L10: l = *ilo; if (i__ < *ilo) { goto L150; } /* Perform QR iterations on rows and columns ILO to I until a submatrix of order 1 or 2 splits off at the bottom because a subdiagonal element has become negligible. */ i__1 = itn; for (its = 0; its <= i__1; ++its) { /* Look for a single small subdiagonal element. */ i__2 = l + 1; for (k = i__; k >= i__2; --k) { tst1 = (d__1 = h___ref(k - 1, k - 1), abs(d__1)) + (d__2 = h___ref(k, k), abs(d__2)); if (tst1 == 0.) { i__3 = i__ - l + 1; tst1 = dlanhs_("1", &i__3, &h___ref(l, l), ldh, work); /* ** Increment op count */ latime_1.ops += (i__ - l + 1) * (i__ - l + 2) / 2; /* ** */ } /* Computing MAX */ d__2 = ulp * tst1; if ((d__1 = h___ref(k, k - 1), abs(d__1)) <= max(d__2,smlnum)) { goto L30; } /* L20: */ } L30: l = k; /* ** Increment op count */ opst += (i__ - l + 1) * 3; /* ** */ if (l > *ilo) { /* H(L,L-1) is negligible */ h___ref(l, l - 1) = 0.; } /* Exit from loop if a submatrix of order 1 or 2 has split off. */ if (l >= i__ - 1) { goto L140; } /* Now the active submatrix is in rows and columns L to I. If eigenvalues only are being computed, only the active submatrix need be transformed. */ if (! (*wantt)) { i1 = l; i2 = i__; } if (its == 10 || its == 20) { /* Exceptional shift. */ s = (d__1 = h___ref(i__, i__ - 1), abs(d__1)) + (d__2 = h___ref( i__ - 1, i__ - 2), abs(d__2)); h44 = s * .75 + h___ref(i__, i__); h33 = h44; h43h34 = s * -.4375 * s; /* ** Increment op count */ opst += 5; /* ** */ } else { /* Prepare to use Francis' double shift (i.e. 2nd degree generalized Rayleigh quotient) */ h44 = h___ref(i__, i__); h33 = h___ref(i__ - 1, i__ - 1); h43h34 = h___ref(i__, i__ - 1) * h___ref(i__ - 1, i__); s = h___ref(i__ - 1, i__ - 2) * h___ref(i__ - 1, i__ - 2); disc = (h33 - h44) * .5; disc = disc * disc + h43h34; /* ** Increment op count */ opst += 6; /* ** */ if (disc > 0.) { /* Real roots: use Wilkinson's shift twice */ disc = sqrt(disc); ave = (h33 + h44) * .5; /* ** Increment op count */ opst += 2; /* ** */ if (abs(h33) - abs(h44) > 0.) { h33 = h33 * h44 - h43h34; h44 = h33 / (d_sign(&disc, &ave) + ave); /* ** Increment op count */ opst += 4; /* ** */ } else { h44 = d_sign(&disc, &ave) + ave; /* ** Increment op count */ opst += 1; /* ** */ } h33 = h44; h43h34 = 0.; } } /* Look for two consecutive small subdiagonal elements. */ i__2 = l; for (m = i__ - 2; m >= i__2; --m) { /* Determine the effect of starting the double-shift QR iteration at row M, and see if this would make H(M,M-1) negligible. */ h11 = h___ref(m, m); h22 = h___ref(m + 1, m + 1); h21 = h___ref(m + 1, m); h12 = h___ref(m, m + 1); h44s = h44 - h11; h33s = h33 - h11; v1 = (h33s * h44s - h43h34) / h21 + h12; v2 = h22 - h11 - h33s - h44s; v3 = h___ref(m + 2, m + 1); s = abs(v1) + abs(v2) + abs(v3); v1 /= s; v2 /= s; v3 /= s; v[0] = v1; v[1] = v2; v[2] = v3; if (m == l) { goto L50; } h00 = h___ref(m - 1, m - 1); h10 = h___ref(m, m - 1); tst1 = abs(v1) * (abs(h00) + abs(h11) + abs(h22)); if (abs(h10) * (abs(v2) + abs(v3)) <= ulp * tst1) { goto L50; } /* L40: */ } L50: /* ** Increment op count */ opst += (i__ - m - 1) * 20; /* ** Double-shift QR step */ i__2 = i__ - 1; for (k = m; k <= i__2; ++k) { /* The first iteration of this loop determines a reflection G from the vector V and applies it from left and right to H, thus creating a nonzero bulge below the subdiagonal. Each subsequent iteration determines a reflection G to restore the Hessenberg form in the (K-1)th column, and thus chases the bulge one step toward the bottom of the active submatrix. NR is the order of G. Computing MIN */ i__3 = 3, i__4 = i__ - k + 1; nr = min(i__3,i__4); if (k > m) { dcopy_(&nr, &h___ref(k, k - 1), &c__1, v, &c__1); } dlarfg_(&nr, v, &v[1], &c__1, &t1); /* ** Increment op count */ opst = opst + nr * 3 + 9; /* ** */ if (k > m) { h___ref(k, k - 1) = v[0]; h___ref(k + 1, k - 1) = 0.; if (k < i__ - 1) { h___ref(k + 2, k - 1) = 0.; } } else if (m > l) { h___ref(k, k - 1) = -h___ref(k, k - 1); } v2 = v[1]; t2 = t1 * v2; if (nr == 3) { v3 = v[2]; t3 = t1 * v3; /* Apply G from the left to transform the rows of the matrix in columns K to I2. */ i__3 = i2; for (j = k; j <= i__3; ++j) { sum = h___ref(k, j) + v2 * h___ref(k + 1, j) + v3 * h___ref(k + 2, j); h___ref(k, j) = h___ref(k, j) - sum * t1; h___ref(k + 1, j) = h___ref(k + 1, j) - sum * t2; h___ref(k + 2, j) = h___ref(k + 2, j) - sum * t3; /* L60: */ } /* Apply G from the right to transform the columns of the matrix in rows I1 to min(K+3,I). Computing MIN */ i__4 = k + 3; i__3 = min(i__4,i__); for (j = i1; j <= i__3; ++j) { sum = h___ref(j, k) + v2 * h___ref(j, k + 1) + v3 * h___ref(j, k + 2); h___ref(j, k) = h___ref(j, k) - sum * t1; h___ref(j, k + 1) = h___ref(j, k + 1) - sum * t2; h___ref(j, k + 2) = h___ref(j, k + 2) - sum * t3; /* L70: */ } /* ** Increment op count Computing MIN */ i__3 = 3, i__4 = i__ - k; latime_1.ops += (i2 - i1 + 2 + min(i__3,i__4)) * 10; /* ** */ if (*wantz) { /* Accumulate transformations in the matrix Z */ i__3 = *ihiz; for (j = *iloz; j <= i__3; ++j) { sum = z___ref(j, k) + v2 * z___ref(j, k + 1) + v3 * z___ref(j, k + 2); z___ref(j, k) = z___ref(j, k) - sum * t1; z___ref(j, k + 1) = z___ref(j, k + 1) - sum * t2; z___ref(j, k + 2) = z___ref(j, k + 2) - sum * t3; /* L80: */ } /* ** Increment op count */ latime_1.ops += nz * 10; /* ** */ } } else if (nr == 2) { /* Apply G from the left to transform the rows of the matrix in columns K to I2. */ i__3 = i2; for (j = k; j <= i__3; ++j) { sum = h___ref(k, j) + v2 * h___ref(k + 1, j); h___ref(k, j) = h___ref(k, j) - sum * t1; h___ref(k + 1, j) = h___ref(k + 1, j) - sum * t2; /* L90: */ } /* Apply G from the right to transform the columns of the matrix in rows I1 to min(K+3,I). */ i__3 = i__; for (j = i1; j <= i__3; ++j) { sum = h___ref(j, k) + v2 * h___ref(j, k + 1); h___ref(j, k) = h___ref(j, k) - sum * t1; h___ref(j, k + 1) = h___ref(j, k + 1) - sum * t2; /* L100: */ } /* ** Increment op count */ latime_1.ops += (i2 - i1 + 3) * 6; /* ** */ if (*wantz) { /* Accumulate transformations in the matrix Z */ i__3 = *ihiz; for (j = *iloz; j <= i__3; ++j) { sum = z___ref(j, k) + v2 * z___ref(j, k + 1); z___ref(j, k) = z___ref(j, k) - sum * t1; z___ref(j, k + 1) = z___ref(j, k + 1) - sum * t2; /* L110: */ } /* ** Increment op count */ latime_1.ops += nz * 6; /* ** */ } } /* L120: */ } /* L130: */ } /* Failure to converge in remaining number of iterations */ *info = i__; return 0; L140: if (l == i__) { /* H(I,I-1) is negligible: one eigenvalue has converged. */ wr[i__] = h___ref(i__, i__); wi[i__] = 0.; } else if (l == i__ - 1) { /* H(I-1,I-2) is negligible: a pair of eigenvalues have converged. Transform the 2-by-2 submatrix to standard Schur form, and compute and store the eigenvalues. */ dlanv2_(&h___ref(i__ - 1, i__ - 1), &h___ref(i__ - 1, i__), &h___ref( i__, i__ - 1), &h___ref(i__, i__), &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], &cs, &sn); if (*wantt) { /* Apply the transformation to the rest of H. */ if (i2 > i__) { i__1 = i2 - i__; drot_(&i__1, &h___ref(i__ - 1, i__ + 1), ldh, &h___ref(i__, i__ + 1), ldh, &cs, &sn); } i__1 = i__ - i1 - 1; drot_(&i__1, &h___ref(i1, i__ - 1), &c__1, &h___ref(i1, i__), & c__1, &cs, &sn); /* ** Increment op count */ latime_1.ops += (i2 - i1 - 1) * 6; /* ** */ } if (*wantz) { /* Apply the transformation to Z. */ drot_(&nz, &z___ref(*iloz, i__ - 1), &c__1, &z___ref(*iloz, i__), &c__1, &cs, &sn); /* ** Increment op count */ latime_1.ops += nz * 6; /* ** */ } } /* Decrement number of remaining iterations, and return to start of the main loop with new value of I. */ itn -= its; i__ = l - 1; goto L10; L150: /* ** Compute final op count */ latime_1.ops += opst; /* ** */ return 0; /* End of DLAHQR */ } /* dlahqr_ */
/* Subroutine */ int chseqr_(char *job, char *compz, integer *n, integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w, complex *z__, integer *ldz, complex *work, integer *lwork, integer *info) { /* System generated locals */ address a__1[2]; integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4[2], i__5, i__6; real r__1, r__2, r__3, r__4; complex q__1; char ch__1[2]; /* Builtin functions */ double r_imag(complex *); void r_cnjg(complex *, complex *); /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); /* Local variables */ static integer maxb, ierr; static real unfl; static complex temp; static real ovfl, opst; static integer i__, j, k, l; static complex s[225] /* was [15][15] */; extern /* Subroutine */ int cscal_(integer *, complex *, complex *, integer *); static complex v[16]; extern logical lsame_(char *, char *); extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *), ccopy_(integer *, complex *, integer *, complex *, integer *); static integer itemp; static real rtemp; static integer i1, i2; static logical initz, wantt, wantz; static real rwork[1]; extern doublereal slapy2_(real *, real *); static integer ii, nh; extern /* Subroutine */ int slabad_(real *, real *), clarfg_(integer *, complex *, complex *, integer *, complex *); static integer nr, ns; extern integer icamax_(integer *, complex *, integer *); static integer nv; extern doublereal slamch_(char *), clanhs_(char *, integer *, complex *, integer *, real *); extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer *), clahqr_(logical *, logical *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, integer *, complex *, integer *, integer *), clacpy_(char *, integer *, integer *, complex *, integer *, complex *, integer *); static complex vv[16]; extern /* Subroutine */ int claset_(char *, integer *, integer *, complex *, complex *, complex *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern /* Subroutine */ int clarfx_(char *, integer *, integer *, complex *, complex *, complex *, integer *, complex *), xerbla_( char *, integer *); static real smlnum; static logical lquery; static integer itn; static complex tau; static integer its; static real ulp, tst1; #define h___subscr(a_1,a_2) (a_2)*h_dim1 + a_1 #define h___ref(a_1,a_2) h__[h___subscr(a_1,a_2)] #define s_subscr(a_1,a_2) (a_2)*15 + a_1 - 16 #define s_ref(a_1,a_2) s[s_subscr(a_1,a_2)] #define z___subscr(a_1,a_2) (a_2)*z_dim1 + a_1 #define z___ref(a_1,a_2) z__[z___subscr(a_1,a_2)] /* -- LAPACK routine (instrumented to count operations, version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Common block to return operation count. Purpose ======= CHSEQR computes the eigenvalues of a complex upper Hessenberg matrix H, and, optionally, the matrices T and Z from the Schur decomposition H = Z T Z**H, where T is an upper triangular matrix (the Schur form), and Z is the unitary matrix of Schur vectors. Optionally Z may be postmultiplied into an input unitary matrix Q, so that this routine can give the Schur factorization of a matrix A which has been reduced to the Hessenberg form H by the unitary matrix Q: A = Q*H*Q**H = (QZ)*T*(QZ)**H. Arguments ========= JOB (input) CHARACTER*1 = 'E': compute eigenvalues only; = 'S': compute eigenvalues and the Schur form T. COMPZ (input) CHARACTER*1 = 'N': no Schur vectors are computed; = 'I': Z is initialized to the unit matrix and the matrix Z of Schur vectors of H is returned; = 'V': Z must contain an unitary matrix Q on entry, and the product Q*Z is returned. N (input) INTEGER The order of the matrix H. N >= 0. ILO (input) INTEGER IHI (input) INTEGER It is assumed that H is already upper triangular in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally set by a previous call to CGEBAL, and then passed to CGEHRD when the matrix output by CGEBAL is reduced to Hessenberg form. Otherwise ILO and IHI should be set to 1 and N respectively. 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. H (input/output) COMPLEX array, dimension (LDH,N) On entry, the upper Hessenberg matrix H. On exit, if JOB = 'S', H contains the upper triangular matrix T from the Schur decomposition (the Schur form). If JOB = 'E', the contents of H are unspecified on exit. LDH (input) INTEGER The leading dimension of the array H. LDH >= max(1,N). W (output) COMPLEX array, dimension (N) The computed eigenvalues. If JOB = 'S', the eigenvalues are stored in the same order as on the diagonal of the Schur form returned in H, with W(i) = H(i,i). Z (input/output) COMPLEX array, dimension (LDZ,N) If COMPZ = 'N': Z is not referenced. If COMPZ = 'I': on entry, Z need not be set, and on exit, Z contains the unitary matrix Z of the Schur vectors of H. If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q, which is assumed to be equal to the unit matrix except for the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z. Normally Q is the unitary matrix generated by CUNGHR after the call to CGEHRD which formed the Hessenberg matrix H. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise. WORK (workspace/output) COMPLEX array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,N). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, CHSEQR failed to compute all the eigenvalues in a total of 30*(IHI-ILO+1) iterations; elements 1:ilo-1 and i+1:n of W contain those eigenvalues which have been successfully computed. ===================================================================== Decode and test the input parameters Parameter adjustments */ h_dim1 = *ldh; h_offset = 1 + h_dim1 * 1; h__ -= h_offset; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --work; /* Function Body */ wantt = lsame_(job, "S"); initz = lsame_(compz, "I"); wantz = initz || lsame_(compz, "V"); *info = 0; i__1 = max(1,*n); work[1].r = (real) i__1, work[1].i = 0.f; lquery = *lwork == -1; if (! lsame_(job, "E") && ! wantt) { *info = -1; } else if (! lsame_(compz, "N") && ! wantz) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ilo < 1 || *ilo > max(1,*n)) { *info = -4; } else if (*ihi < min(*ilo,*n) || *ihi > *n) { *info = -5; } else if (*ldh < max(1,*n)) { *info = -7; } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) { *info = -10; } else if (*lwork < max(1,*n) && ! lquery) { *info = -12; } if (*info != 0) { i__1 = -(*info); xerbla_("CHSEQR", &i__1); return 0; } else if (lquery) { return 0; } /* ** Initialize */ opst = 0.f; /* ** Initialize Z, if necessary */ if (initz) { claset_("Full", n, n, &c_b1, &c_b2, &z__[z_offset], ldz); } /* Store the eigenvalues isolated by CGEBAL. */ i__1 = *ilo - 1; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = h___subscr(i__, i__); w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i; /* L10: */ } i__1 = *n; for (i__ = *ihi + 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = h___subscr(i__, i__); w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i; /* L20: */ } /* Quick return if possible. */ if (*n == 0) { return 0; } if (*ilo == *ihi) { i__1 = *ilo; i__2 = h___subscr(*ilo, *ilo); w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i; return 0; } /* Set rows and columns ILO to IHI to zero below the first subdiagonal. */ i__1 = *ihi - 2; for (j = *ilo; j <= i__1; ++j) { i__2 = *n; for (i__ = j + 2; i__ <= i__2; ++i__) { i__3 = h___subscr(i__, j); h__[i__3].r = 0.f, h__[i__3].i = 0.f; /* L30: */ } /* L40: */ } nh = *ihi - *ilo + 1; /* I1 and I2 are the indices of the first row and last column of H to which transformations must be applied. If eigenvalues only are being computed, I1 and I2 are re-set inside the main loop. */ if (wantt) { i1 = 1; i2 = *n; } else { i1 = *ilo; i2 = *ihi; } /* Ensure that the subdiagonal elements are real. */ i__1 = *ihi; for (i__ = *ilo + 1; i__ <= i__1; ++i__) { i__2 = h___subscr(i__, i__ - 1); temp.r = h__[i__2].r, temp.i = h__[i__2].i; if (r_imag(&temp) != 0.f) { r__1 = temp.r; r__2 = r_imag(&temp); rtemp = slapy2_(&r__1, &r__2); i__2 = h___subscr(i__, i__ - 1); h__[i__2].r = rtemp, h__[i__2].i = 0.f; q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp; temp.r = q__1.r, temp.i = q__1.i; if (i2 > i__) { i__2 = i2 - i__; r_cnjg(&q__1, &temp); cscal_(&i__2, &q__1, &h___ref(i__, i__ + 1), ldh); } i__2 = i__ - i1; cscal_(&i__2, &temp, &h___ref(i1, i__), &c__1); if (i__ < *ihi) { i__2 = h___subscr(i__ + 1, i__); i__3 = h___subscr(i__ + 1, i__); q__1.r = temp.r * h__[i__3].r - temp.i * h__[i__3].i, q__1.i = temp.r * h__[i__3].i + temp.i * h__[i__3].r; h__[i__2].r = q__1.r, h__[i__2].i = q__1.i; } /* ** Increment op count */ opst += (i2 - i1 + 2) * 6; /* ** */ if (wantz) { cscal_(&nh, &temp, &z___ref(*ilo, i__), &c__1); /* ** Increment op count */ opst += nh * 6; /* ** */ } } /* L50: */ } /* Determine the order of the multi-shift QR algorithm to be used. Writing concatenation */ i__4[0] = 1, a__1[0] = job; i__4[1] = 1, a__1[1] = compz; s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2); ns = ilaenv_(&c__4, "CHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, ( ftnlen)2); /* Writing concatenation */ i__4[0] = 1, a__1[0] = job; i__4[1] = 1, a__1[1] = compz; s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2); maxb = ilaenv_(&c__8, "CHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, ( ftnlen)2); if (ns <= 1 || ns > nh || maxb >= nh) { /* Use the standard double-shift algorithm */ clahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], ilo, ihi, &z__[z_offset], ldz, info); return 0; } maxb = max(2,maxb); /* Computing MIN */ i__1 = min(ns,maxb); ns = min(i__1,15); /* Now 1 < NS <= MAXB < NH. Set machine-dependent constants for the stopping criterion. If norm(H) <= sqrt(OVFL), overflow should not occur. */ unfl = slamch_("Safe minimum"); ovfl = 1.f / unfl; slabad_(&unfl, &ovfl); ulp = slamch_("Precision"); smlnum = unfl * (nh / ulp); /* ITN is the total number of multiple-shift QR iterations allowed. */ itn = nh * 30; /* The main loop begins here. I is the loop index and decreases from IHI to ILO in steps of at most MAXB. Each iteration of the loop works with the active submatrix in rows and columns L to I. Eigenvalues I+1 to IHI have already converged. Either L = ILO, or H(L,L-1) is negligible so that the matrix splits. */ i__ = *ihi; L60: if (i__ < *ilo) { goto L180; } /* Perform multiple-shift QR iterations on rows and columns ILO to I until a submatrix of order at most MAXB splits off at the bottom because a subdiagonal element has become negligible. */ l = *ilo; i__1 = itn; for (its = 0; its <= i__1; ++its) { /* Look for a single small subdiagonal element. */ i__2 = l + 1; for (k = i__; k >= i__2; --k) { i__3 = h___subscr(k - 1, k - 1); i__5 = h___subscr(k, k); tst1 = (r__1 = h__[i__3].r, dabs(r__1)) + (r__2 = r_imag(&h___ref( k - 1, k - 1)), dabs(r__2)) + ((r__3 = h__[i__5].r, dabs( r__3)) + (r__4 = r_imag(&h___ref(k, k)), dabs(r__4))); if (tst1 == 0.f) { i__3 = i__ - l + 1; tst1 = clanhs_("1", &i__3, &h___ref(l, l), ldh, rwork); /* ** Increment op count */ latime_1.ops += (i__ - l + 1) * 5 * (i__ - l) / 2; /* ** */ } i__3 = h___subscr(k, k - 1); /* Computing MAX */ r__2 = ulp * tst1; if ((r__1 = h__[i__3].r, dabs(r__1)) <= dmax(r__2,smlnum)) { goto L80; } /* L70: */ } L80: l = k; /* ** Increment op count */ opst += (i__ - l + 1) * 5; /* ** */ if (l > *ilo) { /* H(L,L-1) is negligible. */ i__2 = h___subscr(l, l - 1); h__[i__2].r = 0.f, h__[i__2].i = 0.f; } /* Exit from loop if a submatrix of order <= MAXB has split off. */ if (l >= i__ - maxb + 1) { goto L170; } /* Now the active submatrix is in rows and columns L to I. If eigenvalues only are being computed, only the active submatrix need be transformed. */ if (! wantt) { i1 = l; i2 = i__; } if (its == 20 || its == 30) { /* Exceptional shifts. */ i__2 = i__; for (ii = i__ - ns + 1; ii <= i__2; ++ii) { i__3 = ii; i__5 = h___subscr(ii, ii - 1); i__6 = h___subscr(ii, ii); r__3 = ((r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = h__[i__6] .r, dabs(r__2))) * 1.5f; w[i__3].r = r__3, w[i__3].i = 0.f; /* L90: */ } /* ** Increment op count */ opst += ns << 1; /* ** */ } else { /* Use eigenvalues of trailing submatrix of order NS as shifts. */ clacpy_("Full", &ns, &ns, &h___ref(i__ - ns + 1, i__ - ns + 1), ldh, s, &c__15); clahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &w[i__ - ns + 1], &c__1, &ns, &z__[z_offset], ldz, &ierr); if (ierr > 0) { /* If CLAHQR failed to compute all NS eigenvalues, use the unconverged diagonal elements as the remaining shifts. */ i__2 = ierr; for (ii = 1; ii <= i__2; ++ii) { i__3 = i__ - ns + ii; i__5 = s_subscr(ii, ii); w[i__3].r = s[i__5].r, w[i__3].i = s[i__5].i; /* L100: */ } } } /* Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns)) where G is the Hessenberg submatrix H(L:I,L:I) and w is the vector of shifts (stored in W). The result is stored in the local array V. */ v[0].r = 1.f, v[0].i = 0.f; i__2 = ns + 1; for (ii = 2; ii <= i__2; ++ii) { i__3 = ii - 1; v[i__3].r = 0.f, v[i__3].i = 0.f; /* L110: */ } nv = 1; i__2 = i__; for (j = i__ - ns + 1; j <= i__2; ++j) { i__3 = nv + 1; ccopy_(&i__3, v, &c__1, vv, &c__1); i__3 = nv + 1; i__5 = j; q__1.r = -w[i__5].r, q__1.i = -w[i__5].i; cgemv_("No transpose", &i__3, &nv, &c_b2, &h___ref(l, l), ldh, vv, &c__1, &q__1, v, &c__1); ++nv; /* ** Increment op count */ opst = opst + (nv << 3) * (*n + 1) + (nv + 1) * 6; /* ** Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero, reset it to the unit vector. */ itemp = icamax_(&nv, v, &c__1); /* ** Increment op count */ opst += nv << 1; /* ** */ i__3 = itemp - 1; rtemp = (r__1 = v[i__3].r, dabs(r__1)) + (r__2 = r_imag(&v[itemp - 1]), dabs(r__2)); if (rtemp == 0.f) { v[0].r = 1.f, v[0].i = 0.f; i__3 = nv; for (ii = 2; ii <= i__3; ++ii) { i__5 = ii - 1; v[i__5].r = 0.f, v[i__5].i = 0.f; /* L120: */ } } else { rtemp = dmax(rtemp,smlnum); r__1 = 1.f / rtemp; csscal_(&nv, &r__1, v, &c__1); /* ** Increment op count */ opst += nv << 1; /* ** */ } /* L130: */ } /* Multiple-shift QR step */ i__2 = i__ - 1; for (k = l; k <= i__2; ++k) { /* The first iteration of this loop determines a reflection G from the vector V and applies it from left and right to H, thus creating a nonzero bulge below the subdiagonal. Each subsequent iteration determines a reflection G to restore the Hessenberg form in the (K-1)th column, and thus chases the bulge one step toward the bottom of the active submatrix. NR is the order of G. Computing MIN */ i__3 = ns + 1, i__5 = i__ - k + 1; nr = min(i__3,i__5); if (k > l) { ccopy_(&nr, &h___ref(k, k - 1), &c__1, v, &c__1); } clarfg_(&nr, v, &v[1], &c__1, &tau); /* ** Increment op count */ opst = opst + nr * 10 + 12; /* ** */ if (k > l) { i__3 = h___subscr(k, k - 1); h__[i__3].r = v[0].r, h__[i__3].i = v[0].i; i__3 = i__; for (ii = k + 1; ii <= i__3; ++ii) { i__5 = h___subscr(ii, k - 1); h__[i__5].r = 0.f, h__[i__5].i = 0.f; /* L140: */ } } v[0].r = 1.f, v[0].i = 0.f; /* Apply G' from the left to transform the rows of the matrix in columns K to I2. */ i__3 = i2 - k + 1; r_cnjg(&q__1, &tau); clarfx_("Left", &nr, &i__3, v, &q__1, &h___ref(k, k), ldh, &work[ 1]); /* Apply G from the right to transform the columns of the matrix in rows I1 to min(K+NR,I). Computing MIN */ i__5 = k + nr; i__3 = min(i__5,i__) - i1 + 1; clarfx_("Right", &i__3, &nr, v, &tau, &h___ref(i1, k), ldh, &work[ 1]); /* ** Increment op count Computing MIN */ i__3 = nr, i__5 = i__ - k; latime_1.ops += ((nr << 2) - 2 << 2) * (i2 - i1 + 2 + min(i__3, i__5)); /* ** */ if (wantz) { /* Accumulate transformations in the matrix Z */ clarfx_("Right", &nh, &nr, v, &tau, &z___ref(*ilo, k), ldz, & work[1]); /* ** Increment op count */ latime_1.ops += ((nr << 2) - 2 << 2) * nh; /* ** */ } /* L150: */ } /* Ensure that H(I,I-1) is real. */ i__2 = h___subscr(i__, i__ - 1); temp.r = h__[i__2].r, temp.i = h__[i__2].i; if (r_imag(&temp) != 0.f) { r__1 = temp.r; r__2 = r_imag(&temp); rtemp = slapy2_(&r__1, &r__2); i__2 = h___subscr(i__, i__ - 1); h__[i__2].r = rtemp, h__[i__2].i = 0.f; q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp; temp.r = q__1.r, temp.i = q__1.i; if (i2 > i__) { i__2 = i2 - i__; r_cnjg(&q__1, &temp); cscal_(&i__2, &q__1, &h___ref(i__, i__ + 1), ldh); } i__2 = i__ - i1; cscal_(&i__2, &temp, &h___ref(i1, i__), &c__1); /* ** Increment op count */ opst += (i2 - i1 + 1) * 6; /* ** */ if (wantz) { cscal_(&nh, &temp, &z___ref(*ilo, i__), &c__1); /* ** Increment op count */ opst += nh * 6; /* ** */ } } /* L160: */ } /* Failure to converge in remaining number of iterations */ *info = i__; return 0; L170: /* A submatrix of order <= MAXB in rows and columns L to I has split off. Use the double-shift QR algorithm to handle it. */ clahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &w[1], ilo, ihi, &z__[z_offset], ldz, info); if (*info > 0) { return 0; } /* Decrement number of remaining iterations, and return to start of the main loop with a new value of I. */ itn -= its; i__ = l - 1; goto L60; L180: /* ** Compute final op count */ latime_1.ops += opst; /* ** */ i__1 = max(1,*n); work[1].r = (real) i__1, work[1].i = 0.f; return 0; /* End of CHSEQR */ } /* chseqr_ */
/* Subroutine */ int dstev_(char *jobz, integer *n, doublereal *d__, doublereal *e, doublereal *z__, integer *ldz, doublereal *work, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DSTEV computes all eigenvalues and, optionally, eigenvectors of a real symmetric tridiagonal matrix A. Arguments ========= JOBZ (input) CHARACTER*1 = 'N': Compute eigenvalues only; = 'V': Compute eigenvalues and eigenvectors. N (input) INTEGER The order of the matrix. N >= 0. D (input/output) DOUBLE PRECISION array, dimension (N) On entry, the n diagonal elements of the tridiagonal matrix A. On exit, if INFO = 0, the eigenvalues in ascending order. E (input/output) DOUBLE PRECISION array, dimension (N) On entry, the (n-1) subdiagonal elements of the tridiagonal matrix A, stored in elements 1 to N-1 of E; E(N) need not be set, but is used by the routine. On exit, the contents of E are destroyed. Z (output) DOUBLE PRECISION array, dimension (LDZ, N) If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal eigenvectors of the matrix A, with the i-th column of Z holding the eigenvector associated with D(i). If JOBZ = 'N', then Z is not referenced. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= 1, and if JOBZ = 'V', LDZ >= max(1,N). WORK (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2)) If JOBZ = 'N', WORK is not referenced. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the algorithm failed to converge; i off-diagonal elements of E did not converge to zero. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer z_dim1, z_offset, i__1; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer imax; static doublereal rmin, rmax, tnrm; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); static doublereal sigma; extern logical lsame_(char *, char *); static logical wantz; extern doublereal dlamch_(char *); static integer iscale; static doublereal safmin; extern /* Subroutine */ int xerbla_(char *, integer *); static doublereal bignum; extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, integer *), dsteqr_(char *, integer *, doublereal *, doublereal * , doublereal *, integer *, doublereal *, integer *); static doublereal smlnum, eps; #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] --d__; --e; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --work; /* Function Body */ wantz = lsame_(jobz, "V"); *info = 0; if (! (wantz || lsame_(jobz, "N"))) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*ldz < 1 || wantz && *ldz < *n) { *info = -6; } if (*info != 0) { i__1 = -(*info); xerbla_("DSTEV ", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { if (wantz) { z___ref(1, 1) = 1.; } return 0; } /* Get machine constants. */ safmin = dlamch_("Safe minimum"); eps = dlamch_("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = sqrt(smlnum); rmax = sqrt(bignum); /* Scale matrix to allowable range, if necessary. */ iscale = 0; tnrm = dlanst_("M", n, &d__[1], &e[1]); if (tnrm > 0. && tnrm < rmin) { iscale = 1; sigma = rmin / tnrm; } else if (tnrm > rmax) { iscale = 1; sigma = rmax / tnrm; } if (iscale == 1) { dscal_(n, &sigma, &d__[1], &c__1); i__1 = *n - 1; dscal_(&i__1, &sigma, &e[1], &c__1); } /* For eigenvalues only, call DSTERF. For eigenvalues and eigenvectors, call DSTEQR. */ if (! wantz) { dsterf_(n, &d__[1], &e[1], info); } else { dsteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], info); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ if (iscale == 1) { if (*info == 0) { imax = *n; } else { imax = *info - 1; } d__1 = 1. / sigma; dscal_(&imax, &d__1, &d__[1], &c__1); } return 0; /* End of DSTEV */ } /* dstev_ */
/* Subroutine */ int dlasda_(integer *icompq, integer *smlsiz, integer *n, integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr, doublereal *z__, doublereal *poles, integer *givptr, integer *givcol, integer *ldgcol, integer *perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1, i__2; /* Builtin functions */ integer pow_ii(integer *, integer *); /* Local variables */ static doublereal beta; static integer idxq, nlvl, i__, j, m; static doublereal alpha; static integer inode, ndiml, ndimr, idxqi, itemp; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); static integer sqrei, i1; extern /* Subroutine */ int dlasd6_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *); static integer ic, nwork1, lf, nd, nwork2, ll, nl, vf, nr, vl; extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), dlasdt_(integer *, integer *, integer *, integer *, integer *, integer *, integer *), dlaset_( char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); static integer im1, smlszp, ncc, nlf, nrf, vfi, iwk, vli, lvl, nru, ndb1, nlp1, lvl2, nrp1; #define difl_ref(a_1,a_2) difl[(a_2)*difl_dim1 + a_1] #define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1] #define perm_ref(a_1,a_2) perm[(a_2)*perm_dim1 + a_1] #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] #define poles_ref(a_1,a_2) poles[(a_2)*poles_dim1 + a_1] #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1] #define givcol_ref(a_1,a_2) givcol[(a_2)*givcol_dim1 + a_1] #define givnum_ref(a_1,a_2) givnum[(a_2)*givnum_dim1 + a_1] /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1999 Purpose ======= Using a divide and conquer approach, DLASDA computes the singular value decomposition (SVD) of a real upper bidiagonal N-by-M matrix B with diagonal D and offdiagonal E, where M = N + SQRE. The algorithm computes the singular values in the SVD B = U * S * VT. The orthogonal matrices U and VT are optionally computed in compact form. A related subroutine, DLASD0, computes the singular values and the singular vectors in explicit form. Arguments ========= ICOMPQ (input) INTEGER Specifies whether singular vectors are to be computed in compact form, as follows = 0: Compute singular values only. = 1: Compute singular vectors of upper bidiagonal matrix in compact form. SMLSIZ (input) INTEGER The maximum size of the subproblems at the bottom of the computation tree. N (input) INTEGER The row dimension of the upper bidiagonal matrix. This is also the dimension of the main diagonal array D. SQRE (input) INTEGER Specifies the column dimension of the bidiagonal matrix. = 0: The bidiagonal matrix has column dimension M = N; = 1: The bidiagonal matrix has column dimension M = N + 1. D (input/output) DOUBLE PRECISION array, dimension ( N ) On entry D contains the main diagonal of the bidiagonal matrix. On exit D, if INFO = 0, contains its singular values. E (input) DOUBLE PRECISION array, dimension ( M-1 ) Contains the subdiagonal entries of the bidiagonal matrix. On exit, E has been destroyed. U (output) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left singular vector matrices of all subproblems at the bottom level. LDU (input) INTEGER, LDU = > N. The leading dimension of arrays U, VT, DIFL, DIFR, POLES, GIVNUM, and Z. VT (output) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right singular vector matrices of all subproblems at the bottom level. K (output) INTEGER array, dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th secular equation on the computation tree. DIFL (output) DOUBLE PRECISION array, dimension ( LDU, NLVL ), where NLVL = floor(log_2 (N/SMLSIZ))). DIFR (output) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and dimension ( N ) if ICOMPQ = 0. If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1) record distances between singular values on the I-th level and singular values on the (I -1)-th level, and DIFR(1:N, 2 * I ) contains the normalizing factors for the right singular vector matrix. See DLASD8 for details. Z (output) DOUBLE PRECISION array, dimension ( LDU, NLVL ) if ICOMPQ = 1 and dimension ( N ) if ICOMPQ = 0. The first K elements of Z(1, I) contain the components of the deflation-adjusted updating row vector for subproblems on the I-th level. POLES (output) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and POLES(1, 2*I) contain the new and old singular values involved in the secular equations on the I-th level. GIVPTR (output) INTEGER array, dimension ( N ) if ICOMPQ = 1, and not referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records the number of Givens rotations performed on the I-th problem on the computation tree. GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations of Givens rotations performed on the I-th level on the computation tree. LDGCOL (input) INTEGER, LDGCOL = > N. The leading dimension of arrays GIVCOL and PERM. PERM (output) INTEGER array, dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records permutations done on the I-th level of the computation tree. GIVNUM (output) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S- values of Givens rotations performed on the I-th level on the computation tree. C (output) DOUBLE PRECISION array, dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1 and the I-th subproblem is not square, on exit, C( I ) contains the C-value of a Givens rotation related to the right null space of the I-th subproblem. S (output) DOUBLE PRECISION array, dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1 and the I-th subproblem is not square, on exit, S( I ) contains the S-value of a Givens rotation related to the right null space of the I-th subproblem. WORK (workspace) DOUBLE PRECISION array, dimension (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)). IWORK (workspace) INTEGER array. Dimension must be at least (7 * N). INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = 1, an singular value did not converge Further Details =============== Based on contributions by Ming Gu and Huan Ren, Computer Science Division, University of California at Berkeley, USA ===================================================================== Test the input parameters. Parameter adjustments */ --d__; --e; givnum_dim1 = *ldu; givnum_offset = 1 + givnum_dim1 * 1; givnum -= givnum_offset; poles_dim1 = *ldu; poles_offset = 1 + poles_dim1 * 1; poles -= poles_offset; z_dim1 = *ldu; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; difr_dim1 = *ldu; difr_offset = 1 + difr_dim1 * 1; difr -= difr_offset; difl_dim1 = *ldu; difl_offset = 1 + difl_dim1 * 1; difl -= difl_offset; vt_dim1 = *ldu; vt_offset = 1 + vt_dim1 * 1; vt -= vt_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; --k; --givptr; perm_dim1 = *ldgcol; perm_offset = 1 + perm_dim1 * 1; perm -= perm_offset; givcol_dim1 = *ldgcol; givcol_offset = 1 + givcol_dim1 * 1; givcol -= givcol_offset; --c__; --s; --work; --iwork; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*smlsiz < 3) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*sqre < 0 || *sqre > 1) { *info = -4; } else if (*ldu < *n + *sqre) { *info = -8; } else if (*ldgcol < *n) { *info = -17; } if (*info != 0) { i__1 = -(*info); xerbla_("DLASDA", &i__1); return 0; } m = *n + *sqre; /* If the input matrix is too small, call DLASDQ to find the SVD. */ if (*n <= *smlsiz) { if (*icompq == 0) { dlasdq_("U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ vt_offset], ldu, &u[u_offset], ldu, &u[u_offset], ldu, & work[1], info); } else { dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset] , ldu, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info); } return 0; } /* Book-keeping and set up the computation tree. */ inode = 1; ndiml = inode + *n; ndimr = ndiml + *n; idxq = ndimr + *n; iwk = idxq + *n; ncc = 0; nru = 0; smlszp = *smlsiz + 1; vf = 1; vl = vf + m; nwork1 = vl + m; nwork2 = nwork1 + smlszp * smlszp; dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], smlsiz); /* for the nodes on bottom level of the tree, solve their subproblems by DLASDQ. */ ndb1 = (nd + 1) / 2; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { /* IC : center row of each node NL : number of rows of left subproblem NR : number of rows of right subproblem NLF: starting row of the left subproblem NRF: starting row of the right subproblem */ i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nlp1 = nl + 1; nr = iwork[ndimr + i1]; nlf = ic - nl; nrf = ic + 1; idxqi = idxq + nlf - 2; vfi = vf + nlf - 1; vli = vl + nlf - 1; sqrei = 1; if (*icompq == 0) { dlaset_("A", &nlp1, &nlp1, &c_b11, &c_b12, &work[nwork1], &smlszp); dlasdq_("U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], & work[nwork1], &smlszp, &work[nwork2], &nl, &work[nwork2], &nl, &work[nwork2], info); itemp = nwork1 + nl * smlszp; dcopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1); dcopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1); } else { dlaset_("A", &nl, &nl, &c_b11, &c_b12, &u_ref(nlf, 1), ldu); dlaset_("A", &nlp1, &nlp1, &c_b11, &c_b12, &vt_ref(nlf, 1), ldu); dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], & vt_ref(nlf, 1), ldu, &u_ref(nlf, 1), ldu, &u_ref(nlf, 1), ldu, &work[nwork1], info); dcopy_(&nlp1, &vt_ref(nlf, 1), &c__1, &work[vfi], &c__1); dcopy_(&nlp1, &vt_ref(nlf, nlp1), &c__1, &work[vli], &c__1); } if (*info != 0) { return 0; } i__2 = nl; for (j = 1; j <= i__2; ++j) { iwork[idxqi + j] = j; /* L10: */ } if (i__ == nd && *sqre == 0) { sqrei = 0; } else { sqrei = 1; } idxqi += nlp1; vfi += nlp1; vli += nlp1; nrp1 = nr + sqrei; if (*icompq == 0) { dlaset_("A", &nrp1, &nrp1, &c_b11, &c_b12, &work[nwork1], &smlszp); dlasdq_("U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], & work[nwork1], &smlszp, &work[nwork2], &nr, &work[nwork2], &nr, &work[nwork2], info); itemp = nwork1 + (nrp1 - 1) * smlszp; dcopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1); dcopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1); } else { dlaset_("A", &nr, &nr, &c_b11, &c_b12, &u_ref(nrf, 1), ldu); dlaset_("A", &nrp1, &nrp1, &c_b11, &c_b12, &vt_ref(nrf, 1), ldu); dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], & vt_ref(nrf, 1), ldu, &u_ref(nrf, 1), ldu, &u_ref(nrf, 1), ldu, &work[nwork1], info); dcopy_(&nrp1, &vt_ref(nrf, 1), &c__1, &work[vfi], &c__1); dcopy_(&nrp1, &vt_ref(nrf, nrp1), &c__1, &work[vli], &c__1); } if (*info != 0) { return 0; } i__2 = nr; for (j = 1; j <= i__2; ++j) { iwork[idxqi + j] = j; /* L20: */ } /* L30: */ } /* Now conquer each subproblem bottom-up. */ j = pow_ii(&c__2, &nlvl); for (lvl = nlvl; lvl >= 1; --lvl) { lvl2 = (lvl << 1) - 1; /* Find the first node LF and last node LL on the current level LVL. */ if (lvl == 1) { lf = 1; ll = 1; } else { i__1 = lvl - 1; lf = pow_ii(&c__2, &i__1); ll = (lf << 1) - 1; } i__1 = ll; for (i__ = lf; i__ <= i__1; ++i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; nrf = ic + 1; if (i__ == ll) { sqrei = *sqre; } else { sqrei = 1; } vfi = vf + nlf - 1; vli = vl + nlf - 1; idxqi = idxq + nlf - 1; alpha = d__[ic]; beta = e[ic]; if (*icompq == 0) { dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & work[vli], &alpha, &beta, &iwork[idxqi], &perm[ perm_offset], &givptr[1], &givcol[givcol_offset], ldgcol, &givnum[givnum_offset], ldu, &poles[ poles_offset], &difl[difl_offset], &difr[difr_offset], &z__[z_offset], &k[1], &c__[1], &s[1], &work[nwork1], &iwork[iwk], info); } else { --j; dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & work[vli], &alpha, &beta, &iwork[idxqi], &perm_ref( nlf, lvl), &givptr[j], &givcol_ref(nlf, lvl2), ldgcol, &givnum_ref(nlf, lvl2), ldu, &poles_ref(nlf, lvl2), & difl_ref(nlf, lvl), &difr_ref(nlf, lvl2), &z___ref( nlf, lvl), &k[j], &c__[j], &s[j], &work[nwork1], & iwork[iwk], info); } if (*info != 0) { return 0; } /* L40: */ } /* L50: */ } return 0; /* End of DLASDA */ } /* dlasda_ */
/* Subroutine */ int chpevx_(char *jobz, char *range, char *uplo, integer *n, complex *ap, real *vl, real *vu, integer *il, integer *iu, real * abstol, integer *m, real *w, complex *z__, integer *ldz, complex * work, real *rwork, integer *iwork, integer *ifail, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= CHPEVX computes selected eigenvalues and, optionally, eigenvectors of a complex Hermitian matrix A in packed storage. Eigenvalues/vectors can be selected by specifying either a range of values or a range of indices for the desired eigenvalues. Arguments ========= JOBZ (input) CHARACTER*1 = 'N': Compute eigenvalues only; = 'V': Compute eigenvalues and eigenvectors. RANGE (input) CHARACTER*1 = 'A': all eigenvalues will be found; = 'V': all eigenvalues in the half-open interval (VL,VU] will be found; = 'I': the IL-th through IU-th eigenvalues will be found. UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. AP (input/output) COMPLEX array, dimension (N*(N+1)/2) On entry, the upper or lower triangle of the Hermitian matrix A, packed columnwise in a linear array. The j-th column of A is stored in the array AP as follows: if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. On exit, AP is overwritten by values generated during the reduction to tridiagonal form. If UPLO = 'U', the diagonal and first superdiagonal of the tridiagonal matrix T overwrite the corresponding elements of A, and if UPLO = 'L', the diagonal and first subdiagonal of T overwrite the corresponding elements of A. VL (input) REAL VU (input) REAL If RANGE='V', the lower and upper bounds of the interval to be searched for eigenvalues. VL < VU. Not referenced if RANGE = 'A' or 'I'. IL (input) INTEGER IU (input) INTEGER If RANGE='I', the indices (in ascending order) of the smallest and largest eigenvalues to be returned. 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. Not referenced if RANGE = 'A' or 'V'. ABSTOL (input) REAL The absolute error tolerance for the eigenvalues. An approximate eigenvalue is accepted as converged when it is determined to lie in an interval [a,b] of width less than or equal to ABSTOL + EPS * max( |a|,|b| ) , where EPS is the machine precision. If ABSTOL is less than or equal to zero, then EPS*|T| will be used in its place, where |T| is the 1-norm of the tridiagonal matrix obtained by reducing AP to tridiagonal form. Eigenvalues will be computed most accurately when ABSTOL is set to twice the underflow threshold 2*SLAMCH('S'), not zero. If this routine returns with INFO>0, indicating that some eigenvectors did not converge, try setting ABSTOL to 2*SLAMCH('S'). See "Computing Small Singular Values of Bidiagonal Matrices with Guaranteed High Relative Accuracy," by Demmel and Kahan, LAPACK Working Note #3. M (output) INTEGER The total number of eigenvalues found. 0 <= M <= N. If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. W (output) REAL array, dimension (N) If INFO = 0, the selected eigenvalues in ascending order. Z (output) COMPLEX array, dimension (LDZ, max(1,M)) If JOBZ = 'V', then if INFO = 0, the first M columns of Z contain the orthonormal eigenvectors of the matrix A corresponding to the selected eigenvalues, with the i-th column of Z holding the eigenvector associated with W(i). If an eigenvector fails to converge, then that column of Z contains the latest approximation to the eigenvector, and the index of the eigenvector is returned in IFAIL. If JOBZ = 'N', then Z is not referenced. Note: the user must ensure that at least max(1,M) columns are supplied in the array Z; if RANGE = 'V', the exact value of M is not known in advance and an upper bound must be used. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= 1, and if JOBZ = 'V', LDZ >= max(1,N). WORK (workspace) COMPLEX array, dimension (2*N) RWORK (workspace) REAL array, dimension (7*N) IWORK (workspace) INTEGER array, dimension (5*N) IFAIL (output) INTEGER array, dimension (N) If JOBZ = 'V', then if INFO = 0, the first M elements of IFAIL are zero. If INFO > 0, then IFAIL contains the indices of the eigenvectors that failed to converge. If JOBZ = 'N', then IFAIL is not referenced. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, then i eigenvectors failed to converge. Their indices are stored in array IFAIL. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer z_dim1, z_offset, i__1, i__2; real r__1, r__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer indd, inde; static real anrm; static integer imax; static real rmin, rmax; static integer itmp1, i__, j, indee; static real sigma; extern logical lsame_(char *, char *); static integer iinfo; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static char order[1]; extern /* Subroutine */ int cswap_(integer *, complex *, integer *, complex *, integer *), scopy_(integer *, real *, integer *, real * , integer *); static logical wantz; static integer jj; static logical alleig, indeig; static integer iscale, indibl; extern doublereal clanhp_(char *, char *, integer *, complex *, real *); static logical valeig; extern doublereal slamch_(char *); extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer *); static real safmin; extern /* Subroutine */ int xerbla_(char *, integer *); static real abstll, bignum; static integer indiwk, indisp, indtau; extern /* Subroutine */ int chptrd_(char *, integer *, complex *, real *, real *, complex *, integer *), cstein_(integer *, real *, real *, integer *, real *, integer *, integer *, complex *, integer *, real *, integer *, integer *, integer *); static integer indrwk, indwrk; extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *, complex *, integer *, real *, integer *), cupgtr_(char *, integer *, complex *, complex *, complex *, integer *, complex *, integer *), ssterf_(integer *, real *, real *, integer *); static integer nsplit; extern /* Subroutine */ int cupmtr_(char *, char *, char *, integer *, integer *, complex *, complex *, complex *, integer *, complex *, integer *); static real smlnum; extern /* Subroutine */ int sstebz_(char *, char *, integer *, real *, real *, integer *, integer *, real *, real *, real *, integer *, integer *, real *, integer *, integer *, real *, integer *, integer *); static real eps, vll, vuu, tmp1; #define z___subscr(a_1,a_2) (a_2)*z_dim1 + a_1 #define z___ref(a_1,a_2) z__[z___subscr(a_1,a_2)] --ap; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --work; --rwork; --iwork; --ifail; /* Function Body */ wantz = lsame_(jobz, "V"); alleig = lsame_(range, "A"); valeig = lsame_(range, "V"); indeig = lsame_(range, "I"); *info = 0; if (! (wantz || lsame_(jobz, "N"))) { *info = -1; } else if (! (alleig || valeig || indeig)) { *info = -2; } else if (! (lsame_(uplo, "L") || lsame_(uplo, "U"))) { *info = -3; } else if (*n < 0) { *info = -4; } else { if (valeig) { if (*n > 0 && *vu <= *vl) { *info = -7; } } else if (indeig) { if (*il < 1 || *il > max(1,*n)) { *info = -8; } else if (*iu < min(*n,*il) || *iu > *n) { *info = -9; } } } if (*info == 0) { if (*ldz < 1 || wantz && *ldz < *n) { *info = -14; } } if (*info != 0) { i__1 = -(*info); xerbla_("CHPEVX", &i__1); return 0; } /* Quick return if possible */ *m = 0; if (*n == 0) { return 0; } if (*n == 1) { if (alleig || indeig) { *m = 1; w[1] = ap[1].r; } else { if (*vl < ap[1].r && *vu >= ap[1].r) { *m = 1; w[1] = ap[1].r; } } if (wantz) { i__1 = z___subscr(1, 1); z__[i__1].r = 1.f, z__[i__1].i = 0.f; } return 0; } /* Get machine constants. */ safmin = slamch_("Safe minimum"); eps = slamch_("Precision"); smlnum = safmin / eps; bignum = 1.f / smlnum; rmin = sqrt(smlnum); /* Computing MIN */ r__1 = sqrt(bignum), r__2 = 1.f / sqrt(sqrt(safmin)); rmax = dmin(r__1,r__2); /* Scale matrix to allowable range, if necessary. */ iscale = 0; abstll = *abstol; if (valeig) { vll = *vl; vuu = *vu; } else { vll = 0.f; vuu = 0.f; } anrm = clanhp_("M", uplo, n, &ap[1], &rwork[1]); if (anrm > 0.f && anrm < rmin) { iscale = 1; sigma = rmin / anrm; } else if (anrm > rmax) { iscale = 1; sigma = rmax / anrm; } if (iscale == 1) { i__1 = *n * (*n + 1) / 2; csscal_(&i__1, &sigma, &ap[1], &c__1); if (*abstol > 0.f) { abstll = *abstol * sigma; } if (valeig) { vll = *vl * sigma; vuu = *vu * sigma; } } /* Call CHPTRD to reduce Hermitian packed matrix to tridiagonal form. */ indd = 1; inde = indd + *n; indrwk = inde + *n; indtau = 1; indwrk = indtau + *n; chptrd_(uplo, n, &ap[1], &rwork[indd], &rwork[inde], &work[indtau], & iinfo); /* If all eigenvalues are desired and ABSTOL is less than or equal to zero, then call SSTERF or CUPGTR and CSTEQR. If this fails for some eigenvalue, then try SSTEBZ. */ if ((alleig || indeig && *il == 1 && *iu == *n) && *abstol <= 0.f) { scopy_(n, &rwork[indd], &c__1, &w[1], &c__1); indee = indrwk + (*n << 1); if (! wantz) { i__1 = *n - 1; scopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1); ssterf_(n, &w[1], &rwork[indee], info); } else { cupgtr_(uplo, n, &ap[1], &work[indtau], &z__[z_offset], ldz, & work[indwrk], &iinfo); i__1 = *n - 1; scopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1); csteqr_(jobz, n, &w[1], &rwork[indee], &z__[z_offset], ldz, & rwork[indrwk], info); if (*info == 0) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { ifail[i__] = 0; /* L10: */ } } } if (*info == 0) { *m = *n; goto L20; } *info = 0; } /* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. */ if (wantz) { *(unsigned char *)order = 'B'; } else { *(unsigned char *)order = 'E'; } indibl = 1; indisp = indibl + *n; indiwk = indisp + *n; sstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &rwork[indd], & rwork[inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], & rwork[indrwk], &iwork[indiwk], info); if (wantz) { cstein_(n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], & iwork[indisp], &z__[z_offset], ldz, &rwork[indrwk], &iwork[ indiwk], &ifail[1], info); /* Apply unitary matrix used in reduction to tridiagonal form to eigenvectors returned by CSTEIN. */ indwrk = indtau + *n; cupmtr_("L", uplo, "N", n, m, &ap[1], &work[indtau], &z__[z_offset], ldz, &work[indwrk], info); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ L20: if (iscale == 1) { if (*info == 0) { imax = *m; } else { imax = *info - 1; } r__1 = 1.f / sigma; sscal_(&imax, &r__1, &w[1], &c__1); } /* If eigenvalues are not in order, then sort them, along with eigenvectors. */ if (wantz) { i__1 = *m - 1; for (j = 1; j <= i__1; ++j) { i__ = 0; tmp1 = w[j]; i__2 = *m; for (jj = j + 1; jj <= i__2; ++jj) { if (w[jj] < tmp1) { i__ = jj; tmp1 = w[jj]; } /* L30: */ } if (i__ != 0) { itmp1 = iwork[indibl + i__ - 1]; w[i__] = w[j]; iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; w[j] = tmp1; iwork[indibl + j - 1] = itmp1; cswap_(n, &z___ref(1, i__), &c__1, &z___ref(1, j), &c__1); if (*info != 0) { itmp1 = ifail[i__]; ifail[i__] = ifail[j]; ifail[j] = itmp1; } } /* L40: */ } } return 0; /* End of CHPEVX */ } /* chpevx_ */
/* Subroutine */ int zhgeqz_(char *job, char *compq, char *compz, integer *n, integer *ilo, integer *ihi, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *alpha, doublecomplex * beta, doublecomplex *q, integer *ldq, doublecomplex *z__, integer * ldz, doublecomplex *work, integer *lwork, doublereal *rwork, integer * info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6; doublereal d__1, d__2, d__3, d__4, d__5, d__6; doublecomplex z__1, z__2, z__3, z__4, z__5, z__6; /* Builtin functions */ double z_abs(doublecomplex *); void d_cnjg(doublecomplex *, doublecomplex *); double d_imag(doublecomplex *); void z_div(doublecomplex *, doublecomplex *, doublecomplex *), pow_zi( doublecomplex *, doublecomplex *, integer *), z_sqrt( doublecomplex *, doublecomplex *); /* Local variables */ static doublereal absb, atol, btol, temp, opst; extern /* Subroutine */ int zrot_(integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublereal *, doublecomplex *); static doublereal temp2, c__; static integer j; static doublecomplex s, t; extern logical lsame_(char *, char *); static doublecomplex ctemp; static integer iiter, ilast, jiter; static doublereal anorm; static integer maxit; static doublereal bnorm; static doublecomplex shift; extern /* Subroutine */ int zscal_(integer *, doublecomplex *, doublecomplex *, integer *); static doublereal tempr; static doublecomplex ctemp2, ctemp3; static logical ilazr2; static integer jc, in; static doublereal ascale, bscale; static doublecomplex u12; extern doublereal dlamch_(char *); static integer jr, nq; static doublecomplex signbc; static integer nz; static doublereal safmin; extern /* Subroutine */ int xerbla_(char *, integer *); static doublecomplex eshift; static logical ilschr; static integer icompq, ilastm; static doublecomplex rtdisc; static integer ischur; extern doublereal zlanhs_(char *, integer *, doublecomplex *, integer *, doublereal *); static logical ilazro; static integer icompz, ifirst; extern /* Subroutine */ int zlartg_(doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, doublecomplex *); static integer ifrstm; extern /* Subroutine */ int zlaset_(char *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, integer *); static integer istart; static logical lquery; static doublecomplex ad11, ad12, ad21, ad22; static integer jch; static logical ilq, ilz; static doublereal ulp; static doublecomplex abi22; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define q_subscr(a_1,a_2) (a_2)*q_dim1 + a_1 #define q_ref(a_1,a_2) q[q_subscr(a_1,a_2)] #define z___subscr(a_1,a_2) (a_2)*z_dim1 + a_1 #define z___ref(a_1,a_2) z__[z___subscr(a_1,a_2)] /* -- LAPACK routine (instrumented to count operations, version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 ----------------------- Begin Timing Code ------------------------ Common block to return operation count and iteration count ITCNT is initialized to 0, OPS is only incremented OPST is used to accumulate small contributions to OPS to avoid roundoff error ------------------------ End Timing Code ------------------------- Purpose ======= ZHGEQZ implements a single-shift version of the QZ method for finding the generalized eigenvalues w(i)=ALPHA(i)/BETA(i) of the equation det( A - w(i) B ) = 0 If JOB='S', then the pair (A,B) is simultaneously reduced to Schur form (i.e., A and B are both upper triangular) by applying one unitary tranformation (usually called Q) on the left and another (usually called Z) on the right. The diagonal elements of A are then ALPHA(1),...,ALPHA(N), and of B are BETA(1),...,BETA(N). If JOB='S' and COMPQ and COMPZ are 'V' or 'I', then the unitary transformations used to reduce (A,B) are accumulated into the arrays Q and Z s.t.: Q(in) A(in) Z(in)* = Q(out) A(out) Z(out)* Q(in) B(in) Z(in)* = Q(out) B(out) Z(out)* Ref: C.B. Moler & G.W. Stewart, "An Algorithm for Generalized Matrix Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), pp. 241--256. Arguments ========= JOB (input) CHARACTER*1 = 'E': compute only ALPHA and BETA. A and B will not necessarily be put into generalized Schur form. = 'S': put A and B into generalized Schur form, as well as computing ALPHA and BETA. COMPQ (input) CHARACTER*1 = 'N': do not modify Q. = 'V': multiply the array Q on the right by the conjugate transpose of the unitary tranformation that is applied to the left side of A and B to reduce them to Schur form. = 'I': like COMPQ='V', except that Q will be initialized to the identity first. COMPZ (input) CHARACTER*1 = 'N': do not modify Z. = 'V': multiply the array Z on the right by the unitary tranformation that is applied to the right side of A and B to reduce them to Schur form. = 'I': like COMPZ='V', except that Z will be initialized to the identity first. N (input) INTEGER The order of the matrices A, B, Q, and Z. N >= 0. ILO (input) INTEGER IHI (input) INTEGER It is assumed that A is already upper triangular in rows and columns 1:ILO-1 and IHI+1:N. 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. A (input/output) COMPLEX*16 array, dimension (LDA, N) On entry, the N-by-N upper Hessenberg matrix A. Elements below the subdiagonal must be zero. If JOB='S', then on exit A and B will have been simultaneously reduced to upper triangular form. If JOB='E', then on exit A will have been destroyed. LDA (input) INTEGER The leading dimension of the array A. LDA >= max( 1, N ). B (input/output) COMPLEX*16 array, dimension (LDB, N) On entry, the N-by-N upper triangular matrix B. Elements below the diagonal must be zero. If JOB='S', then on exit A and B will have been simultaneously reduced to upper triangular form. If JOB='E', then on exit B will have been destroyed. LDB (input) INTEGER The leading dimension of the array B. LDB >= max( 1, N ). ALPHA (output) COMPLEX*16 array, dimension (N) The diagonal elements of A when the pair (A,B) has been reduced to Schur form. ALPHA(i)/BETA(i) i=1,...,N are the generalized eigenvalues. BETA (output) COMPLEX*16 array, dimension (N) The diagonal elements of B when the pair (A,B) has been reduced to Schur form. ALPHA(i)/BETA(i) i=1,...,N are the generalized eigenvalues. A and B are normalized so that BETA(1),...,BETA(N) are non-negative real numbers. Q (input/output) COMPLEX*16 array, dimension (LDQ, N) If COMPQ='N', then Q will not be referenced. If COMPQ='V' or 'I', then the conjugate transpose of the unitary transformations which are applied to A and B on the left will be applied to the array Q on the right. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= 1. If COMPQ='V' or 'I', then LDQ >= N. Z (input/output) COMPLEX*16 array, dimension (LDZ, N) If COMPZ='N', then Z will not be referenced. If COMPZ='V' or 'I', then the unitary transformations which are applied to A and B on the right will be applied to the array Z on the right. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= 1. If COMPZ='V' or 'I', then LDZ >= N. WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) On exit, if INFO >= 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,N). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. RWORK (workspace) DOUBLE PRECISION array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value = 1,...,N: the QZ iteration did not converge. (A,B) is not in Schur form, but ALPHA(i) and BETA(i), i=INFO+1,...,N should be correct. = N+1,...,2*N: the shift calculation failed. (A,B) is not in Schur form, but ALPHA(i) and BETA(i), i=INFO-N+1,...,N should be correct. > 2*N: various "impossible" errors. Further Details =============== We assume that complex ABS works as long as its value is less than overflow. ===================================================================== ----------------------- Begin Timing Code ------------------------ Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --alpha; --beta; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --work; --rwork; /* Function Body */ latime_1.itcnt = 0.; /* ------------------------ End Timing Code ------------------------- Decode JOB, COMPQ, COMPZ */ if (lsame_(job, "E")) { ilschr = FALSE_; ischur = 1; } else if (lsame_(job, "S")) { ilschr = TRUE_; ischur = 2; } else { ischur = 0; } if (lsame_(compq, "N")) { ilq = FALSE_; icompq = 1; nq = 0; } else if (lsame_(compq, "V")) { ilq = TRUE_; icompq = 2; nq = *n; } else if (lsame_(compq, "I")) { ilq = TRUE_; icompq = 3; nq = *n; } else { icompq = 0; } if (lsame_(compz, "N")) { ilz = FALSE_; icompz = 1; nz = 0; } else if (lsame_(compz, "V")) { ilz = TRUE_; icompz = 2; nz = *n; } else if (lsame_(compz, "I")) { ilz = TRUE_; icompz = 3; nz = *n; } else { icompz = 0; } /* Check Argument Values */ *info = 0; i__1 = max(1,*n); work[1].r = (doublereal) i__1, work[1].i = 0.; lquery = *lwork == -1; if (ischur == 0) { *info = -1; } else if (icompq == 0) { *info = -2; } else if (icompz == 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*ilo < 1) { *info = -5; } else if (*ihi > *n || *ihi < *ilo - 1) { *info = -6; } else if (*lda < *n) { *info = -8; } else if (*ldb < *n) { *info = -10; } else if (*ldq < 1 || ilq && *ldq < *n) { *info = -14; } else if (*ldz < 1 || ilz && *ldz < *n) { *info = -16; } else if (*lwork < max(1,*n) && ! lquery) { *info = -18; } if (*info != 0) { i__1 = -(*info); xerbla_("ZHGEQZ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible WORK( 1 ) = CMPLX( 1 ) */ if (*n <= 0) { work[1].r = 1., work[1].i = 0.; return 0; } /* Initialize Q and Z */ if (icompq == 3) { zlaset_("Full", n, n, &c_b1, &c_b2, &q[q_offset], ldq); } if (icompz == 3) { zlaset_("Full", n, n, &c_b1, &c_b2, &z__[z_offset], ldz); } /* Machine Constants */ in = *ihi + 1 - *ilo; safmin = dlamch_("S"); ulp = dlamch_("E") * dlamch_("B"); anorm = zlanhs_("F", &in, &a_ref(*ilo, *ilo), lda, &rwork[1]); bnorm = zlanhs_("F", &in, &b_ref(*ilo, *ilo), ldb, &rwork[1]); /* Computing MAX */ d__1 = safmin, d__2 = ulp * anorm; atol = max(d__1,d__2); /* Computing MAX */ d__1 = safmin, d__2 = ulp * bnorm; btol = max(d__1,d__2); ascale = 1. / max(safmin,anorm); bscale = 1. / max(safmin,bnorm); /* ---------------------- Begin Timing Code ------------------------- Count ops for norms, etc. */ opst = 0.; /* Computing 2nd power */ i__1 = *n; latime_1.ops += (doublereal) ((i__1 * i__1 << 2) + *n * 12 - 5); /* ----------------------- End Timing Code -------------------------- Set Eigenvalues IHI+1:N */ i__1 = *n; for (j = *ihi + 1; j <= i__1; ++j) { absb = z_abs(&b_ref(j, j)); if (absb > safmin) { i__2 = b_subscr(j, j); z__2.r = b[i__2].r / absb, z__2.i = b[i__2].i / absb; d_cnjg(&z__1, &z__2); signbc.r = z__1.r, signbc.i = z__1.i; i__2 = b_subscr(j, j); b[i__2].r = absb, b[i__2].i = 0.; if (ilschr) { i__2 = j - 1; zscal_(&i__2, &signbc, &b_ref(1, j), &c__1); zscal_(&j, &signbc, &a_ref(1, j), &c__1); /* ----------------- Begin Timing Code --------------------- */ opst += (doublereal) ((j - 1) * 12); /* ------------------ End Timing Code ---------------------- */ } else { i__2 = a_subscr(j, j); i__3 = a_subscr(j, j); z__1.r = a[i__3].r * signbc.r - a[i__3].i * signbc.i, z__1.i = a[i__3].r * signbc.i + a[i__3].i * signbc.r; a[i__2].r = z__1.r, a[i__2].i = z__1.i; } if (ilz) { zscal_(n, &signbc, &z___ref(1, j), &c__1); } /* ------------------- Begin Timing Code ---------------------- */ opst += (doublereal) (nz * 6 + 13); /* -------------------- End Timing Code ----------------------- */ } else { i__2 = b_subscr(j, j); b[i__2].r = 0., b[i__2].i = 0.; } i__2 = j; i__3 = a_subscr(j, j); alpha[i__2].r = a[i__3].r, alpha[i__2].i = a[i__3].i; i__2 = j; i__3 = b_subscr(j, j); beta[i__2].r = b[i__3].r, beta[i__2].i = b[i__3].i; /* L10: */ } /* If IHI < ILO, skip QZ steps */ if (*ihi < *ilo) { goto L190; } /* MAIN QZ ITERATION LOOP Initialize dynamic indices Eigenvalues ILAST+1:N have been found. Column operations modify rows IFRSTM:whatever Row operations modify columns whatever:ILASTM If only eigenvalues are being computed, then IFRSTM is the row of the last splitting row above row ILAST; this is always at least ILO. IITER counts iterations since the last eigenvalue was found, to tell when to use an extraordinary shift. MAXIT is the maximum number of QZ sweeps allowed. */ ilast = *ihi; if (ilschr) { ifrstm = 1; ilastm = *n; } else { ifrstm = *ilo; ilastm = *ihi; } iiter = 0; eshift.r = 0., eshift.i = 0.; maxit = (*ihi - *ilo + 1) * 30; i__1 = maxit; for (jiter = 1; jiter <= i__1; ++jiter) { /* Check for too many iterations. */ if (jiter > maxit) { goto L180; } /* Split the matrix if possible. Two tests: 1: A(j,j-1)=0 or j=ILO 2: B(j,j)=0 Special case: j=ILAST */ if (ilast == *ilo) { goto L60; } else { i__2 = a_subscr(ilast, ilast - 1); if ((d__1 = a[i__2].r, abs(d__1)) + (d__2 = d_imag(&a_ref(ilast, ilast - 1)), abs(d__2)) <= atol) { i__2 = a_subscr(ilast, ilast - 1); a[i__2].r = 0., a[i__2].i = 0.; goto L60; } } if (z_abs(&b_ref(ilast, ilast)) <= btol) { i__2 = b_subscr(ilast, ilast); b[i__2].r = 0., b[i__2].i = 0.; goto L50; } /* General case: j<ILAST */ i__2 = *ilo; for (j = ilast - 1; j >= i__2; --j) { /* Test 1: for A(j,j-1)=0 or j=ILO */ if (j == *ilo) { ilazro = TRUE_; } else { i__3 = a_subscr(j, j - 1); if ((d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a_ref(j, j - 1)), abs(d__2)) <= atol) { i__3 = a_subscr(j, j - 1); a[i__3].r = 0., a[i__3].i = 0.; ilazro = TRUE_; } else { ilazro = FALSE_; } } /* Test 2: for B(j,j)=0 */ if (z_abs(&b_ref(j, j)) < btol) { i__3 = b_subscr(j, j); b[i__3].r = 0., b[i__3].i = 0.; /* Test 1a: Check for 2 consecutive small subdiagonals in A */ ilazr2 = FALSE_; if (! ilazro) { i__3 = a_subscr(j, j - 1); i__4 = a_subscr(j + 1, j); i__5 = a_subscr(j, j); if (((d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(& a_ref(j, j - 1)), abs(d__2))) * (ascale * ((d__3 = a[i__4].r, abs(d__3)) + (d__4 = d_imag(&a_ref(j + 1, j)), abs(d__4)))) <= ((d__5 = a[i__5].r, abs( d__5)) + (d__6 = d_imag(&a_ref(j, j)), abs(d__6))) * (ascale * atol)) { ilazr2 = TRUE_; } } /* If both tests pass (1 & 2), i.e., the leading diagonal element of B in the block is zero, split a 1x1 block off at the top. (I.e., at the J-th row/column) The leading diagonal element of the remainder can also be zero, so this may have to be done repeatedly. */ if (ilazro || ilazr2) { i__3 = ilast - 1; for (jch = j; jch <= i__3; ++jch) { i__4 = a_subscr(jch, jch); ctemp.r = a[i__4].r, ctemp.i = a[i__4].i; zlartg_(&ctemp, &a_ref(jch + 1, jch), &c__, &s, & a_ref(jch, jch)); i__4 = a_subscr(jch + 1, jch); a[i__4].r = 0., a[i__4].i = 0.; i__4 = ilastm - jch; zrot_(&i__4, &a_ref(jch, jch + 1), lda, &a_ref(jch + 1, jch + 1), lda, &c__, &s); i__4 = ilastm - jch; zrot_(&i__4, &b_ref(jch, jch + 1), ldb, &b_ref(jch + 1, jch + 1), ldb, &c__, &s); if (ilq) { d_cnjg(&z__1, &s); zrot_(n, &q_ref(1, jch), &c__1, &q_ref(1, jch + 1) , &c__1, &c__, &z__1); } if (ilazr2) { i__4 = a_subscr(jch, jch - 1); i__5 = a_subscr(jch, jch - 1); z__1.r = c__ * a[i__5].r, z__1.i = c__ * a[i__5] .i; a[i__4].r = z__1.r, a[i__4].i = z__1.i; } ilazr2 = FALSE_; /* --------------- Begin Timing Code ----------------- */ opst += (doublereal) ((ilastm - jch) * 40 + 32 + nq * 20); /* ---------------- End Timing Code ------------------ */ i__4 = b_subscr(jch + 1, jch + 1); if ((d__1 = b[i__4].r, abs(d__1)) + (d__2 = d_imag(& b_ref(jch + 1, jch + 1)), abs(d__2)) >= btol) { if (jch + 1 >= ilast) { goto L60; } else { ifirst = jch + 1; goto L70; } } i__4 = b_subscr(jch + 1, jch + 1); b[i__4].r = 0., b[i__4].i = 0.; /* L20: */ } goto L50; } else { /* Only test 2 passed -- chase the zero to B(ILAST,ILAST) Then process as in the case B(ILAST,ILAST)=0 */ i__3 = ilast - 1; for (jch = j; jch <= i__3; ++jch) { i__4 = b_subscr(jch, jch + 1); ctemp.r = b[i__4].r, ctemp.i = b[i__4].i; zlartg_(&ctemp, &b_ref(jch + 1, jch + 1), &c__, &s, & b_ref(jch, jch + 1)); i__4 = b_subscr(jch + 1, jch + 1); b[i__4].r = 0., b[i__4].i = 0.; if (jch < ilastm - 1) { i__4 = ilastm - jch - 1; zrot_(&i__4, &b_ref(jch, jch + 2), ldb, &b_ref( jch + 1, jch + 2), ldb, &c__, &s); } i__4 = ilastm - jch + 2; zrot_(&i__4, &a_ref(jch, jch - 1), lda, &a_ref(jch + 1, jch - 1), lda, &c__, &s); if (ilq) { d_cnjg(&z__1, &s); zrot_(n, &q_ref(1, jch), &c__1, &q_ref(1, jch + 1) , &c__1, &c__, &z__1); } i__4 = a_subscr(jch + 1, jch); ctemp.r = a[i__4].r, ctemp.i = a[i__4].i; zlartg_(&ctemp, &a_ref(jch + 1, jch - 1), &c__, &s, & a_ref(jch + 1, jch)); i__4 = a_subscr(jch + 1, jch - 1); a[i__4].r = 0., a[i__4].i = 0.; i__4 = jch + 1 - ifrstm; zrot_(&i__4, &a_ref(ifrstm, jch), &c__1, &a_ref( ifrstm, jch - 1), &c__1, &c__, &s); i__4 = jch - ifrstm; zrot_(&i__4, &b_ref(ifrstm, jch), &c__1, &b_ref( ifrstm, jch - 1), &c__1, &c__, &s); if (ilz) { zrot_(n, &z___ref(1, jch), &c__1, &z___ref(1, jch - 1), &c__1, &c__, &s); } /* L30: */ } /* ---------------- Begin Timing Code ------------------- */ opst += (doublereal) ((ilastm + 1 - ifrstm) * 40 + 64 + ( nq + nz) * 20) * (doublereal) (ilast - j); /* ----------------- End Timing Code -------------------- */ goto L50; } } else if (ilazro) { /* Only test 1 passed -- work on J:ILAST */ ifirst = j; goto L70; } /* Neither test passed -- try next J L40: */ } /* (Drop-through is "impossible") */ *info = (*n << 1) + 1; goto L210; /* B(ILAST,ILAST)=0 -- clear A(ILAST,ILAST-1) to split off a 1x1 block. */ L50: i__2 = a_subscr(ilast, ilast); ctemp.r = a[i__2].r, ctemp.i = a[i__2].i; zlartg_(&ctemp, &a_ref(ilast, ilast - 1), &c__, &s, &a_ref(ilast, ilast)); i__2 = a_subscr(ilast, ilast - 1); a[i__2].r = 0., a[i__2].i = 0.; i__2 = ilast - ifrstm; zrot_(&i__2, &a_ref(ifrstm, ilast), &c__1, &a_ref(ifrstm, ilast - 1), &c__1, &c__, &s); i__2 = ilast - ifrstm; zrot_(&i__2, &b_ref(ifrstm, ilast), &c__1, &b_ref(ifrstm, ilast - 1), &c__1, &c__, &s); if (ilz) { zrot_(n, &z___ref(1, ilast), &c__1, &z___ref(1, ilast - 1), &c__1, &c__, &s); } /* --------------------- Begin Timing Code ----------------------- */ opst += (doublereal) ((ilast - ifrstm) * 40 + 32 + nz * 20); /* ---------------------- End Timing Code ------------------------ A(ILAST,ILAST-1)=0 -- Standardize B, set ALPHA and BETA */ L60: absb = z_abs(&b_ref(ilast, ilast)); if (absb > safmin) { i__2 = b_subscr(ilast, ilast); z__2.r = b[i__2].r / absb, z__2.i = b[i__2].i / absb; d_cnjg(&z__1, &z__2); signbc.r = z__1.r, signbc.i = z__1.i; i__2 = b_subscr(ilast, ilast); b[i__2].r = absb, b[i__2].i = 0.; if (ilschr) { i__2 = ilast - ifrstm; zscal_(&i__2, &signbc, &b_ref(ifrstm, ilast), &c__1); i__2 = ilast + 1 - ifrstm; zscal_(&i__2, &signbc, &a_ref(ifrstm, ilast), &c__1); /* ----------------- Begin Timing Code --------------------- */ opst += (doublereal) ((ilast - ifrstm) * 12); /* ------------------ End Timing Code ---------------------- */ } else { i__2 = a_subscr(ilast, ilast); i__3 = a_subscr(ilast, ilast); z__1.r = a[i__3].r * signbc.r - a[i__3].i * signbc.i, z__1.i = a[i__3].r * signbc.i + a[i__3].i * signbc.r; a[i__2].r = z__1.r, a[i__2].i = z__1.i; } if (ilz) { zscal_(n, &signbc, &z___ref(1, ilast), &c__1); } /* ------------------- Begin Timing Code ---------------------- */ opst += (doublereal) (nz * 6 + 13); /* -------------------- End Timing Code ----------------------- */ } else { i__2 = b_subscr(ilast, ilast); b[i__2].r = 0., b[i__2].i = 0.; } i__2 = ilast; i__3 = a_subscr(ilast, ilast); alpha[i__2].r = a[i__3].r, alpha[i__2].i = a[i__3].i; i__2 = ilast; i__3 = b_subscr(ilast, ilast); beta[i__2].r = b[i__3].r, beta[i__2].i = b[i__3].i; /* Go to next block -- exit if finished. */ --ilast; if (ilast < *ilo) { goto L190; } /* Reset counters */ iiter = 0; eshift.r = 0., eshift.i = 0.; if (! ilschr) { ilastm = ilast; if (ifrstm > ilast) { ifrstm = *ilo; } } goto L160; /* QZ step This iteration only involves rows/columns IFIRST:ILAST. We assume IFIRST < ILAST, and that the diagonal of B is non-zero. */ L70: ++iiter; if (! ilschr) { ifrstm = ifirst; } /* Compute the Shift. At this point, IFIRST < ILAST, and the diagonal elements of B(IFIRST:ILAST,IFIRST,ILAST) are larger than BTOL (in magnitude) */ if (iiter / 10 * 10 != iiter) { /* The Wilkinson shift (AEP p.512), i.e., the eigenvalue of the bottom-right 2x2 block of A inv(B) which is nearest to the bottom-right element. We factor B as U*D, where U has unit diagonals, and compute (A*inv(D))*inv(U). */ i__2 = b_subscr(ilast - 1, ilast); z__2.r = bscale * b[i__2].r, z__2.i = bscale * b[i__2].i; i__3 = b_subscr(ilast, ilast); z__3.r = bscale * b[i__3].r, z__3.i = bscale * b[i__3].i; z_div(&z__1, &z__2, &z__3); u12.r = z__1.r, u12.i = z__1.i; i__2 = a_subscr(ilast - 1, ilast - 1); z__2.r = ascale * a[i__2].r, z__2.i = ascale * a[i__2].i; i__3 = b_subscr(ilast - 1, ilast - 1); z__3.r = bscale * b[i__3].r, z__3.i = bscale * b[i__3].i; z_div(&z__1, &z__2, &z__3); ad11.r = z__1.r, ad11.i = z__1.i; i__2 = a_subscr(ilast, ilast - 1); z__2.r = ascale * a[i__2].r, z__2.i = ascale * a[i__2].i; i__3 = b_subscr(ilast - 1, ilast - 1); z__3.r = bscale * b[i__3].r, z__3.i = bscale * b[i__3].i; z_div(&z__1, &z__2, &z__3); ad21.r = z__1.r, ad21.i = z__1.i; i__2 = a_subscr(ilast - 1, ilast); z__2.r = ascale * a[i__2].r, z__2.i = ascale * a[i__2].i; i__3 = b_subscr(ilast, ilast); z__3.r = bscale * b[i__3].r, z__3.i = bscale * b[i__3].i; z_div(&z__1, &z__2, &z__3); ad12.r = z__1.r, ad12.i = z__1.i; i__2 = a_subscr(ilast, ilast); z__2.r = ascale * a[i__2].r, z__2.i = ascale * a[i__2].i; i__3 = b_subscr(ilast, ilast); z__3.r = bscale * b[i__3].r, z__3.i = bscale * b[i__3].i; z_div(&z__1, &z__2, &z__3); ad22.r = z__1.r, ad22.i = z__1.i; z__2.r = u12.r * ad21.r - u12.i * ad21.i, z__2.i = u12.r * ad21.i + u12.i * ad21.r; z__1.r = ad22.r - z__2.r, z__1.i = ad22.i - z__2.i; abi22.r = z__1.r, abi22.i = z__1.i; z__2.r = ad11.r + abi22.r, z__2.i = ad11.i + abi22.i; z__1.r = z__2.r * .5, z__1.i = z__2.i * .5; t.r = z__1.r, t.i = z__1.i; pow_zi(&z__4, &t, &c__2); z__5.r = ad12.r * ad21.r - ad12.i * ad21.i, z__5.i = ad12.r * ad21.i + ad12.i * ad21.r; z__3.r = z__4.r + z__5.r, z__3.i = z__4.i + z__5.i; z__6.r = ad11.r * ad22.r - ad11.i * ad22.i, z__6.i = ad11.r * ad22.i + ad11.i * ad22.r; z__2.r = z__3.r - z__6.r, z__2.i = z__3.i - z__6.i; z_sqrt(&z__1, &z__2); rtdisc.r = z__1.r, rtdisc.i = z__1.i; z__1.r = t.r - abi22.r, z__1.i = t.i - abi22.i; z__2.r = t.r - abi22.r, z__2.i = t.i - abi22.i; temp = z__1.r * rtdisc.r + d_imag(&z__2) * d_imag(&rtdisc); if (temp <= 0.) { z__1.r = t.r + rtdisc.r, z__1.i = t.i + rtdisc.i; shift.r = z__1.r, shift.i = z__1.i; } else { z__1.r = t.r - rtdisc.r, z__1.i = t.i - rtdisc.i; shift.r = z__1.r, shift.i = z__1.i; } /* ------------------- Begin Timing Code ---------------------- */ opst += 116.; /* -------------------- End Timing Code ----------------------- */ } else { /* Exceptional shift. Chosen for no particularly good reason. */ i__2 = a_subscr(ilast - 1, ilast); z__4.r = ascale * a[i__2].r, z__4.i = ascale * a[i__2].i; i__3 = b_subscr(ilast - 1, ilast - 1); z__5.r = bscale * b[i__3].r, z__5.i = bscale * b[i__3].i; z_div(&z__3, &z__4, &z__5); d_cnjg(&z__2, &z__3); z__1.r = eshift.r + z__2.r, z__1.i = eshift.i + z__2.i; eshift.r = z__1.r, eshift.i = z__1.i; shift.r = eshift.r, shift.i = eshift.i; /* ------------------- Begin Timing Code ---------------------- */ opst += 15.; /* -------------------- End Timing Code ----------------------- */ } /* Now check for two consecutive small subdiagonals. */ i__2 = ifirst + 1; for (j = ilast - 1; j >= i__2; --j) { istart = j; i__3 = a_subscr(j, j); z__2.r = ascale * a[i__3].r, z__2.i = ascale * a[i__3].i; i__4 = b_subscr(j, j); z__4.r = bscale * b[i__4].r, z__4.i = bscale * b[i__4].i; z__3.r = shift.r * z__4.r - shift.i * z__4.i, z__3.i = shift.r * z__4.i + shift.i * z__4.r; z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i; ctemp.r = z__1.r, ctemp.i = z__1.i; temp = (d__1 = ctemp.r, abs(d__1)) + (d__2 = d_imag(&ctemp), abs( d__2)); i__3 = a_subscr(j + 1, j); temp2 = ascale * ((d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(& a_ref(j + 1, j)), abs(d__2))); tempr = max(temp,temp2); if (tempr < 1. && tempr != 0.) { temp /= tempr; temp2 /= tempr; } i__3 = a_subscr(j, j - 1); if (((d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a_ref(j, j - 1)), abs(d__2))) * temp2 <= temp * atol) { goto L90; } /* L80: */ } istart = ifirst; i__2 = a_subscr(ifirst, ifirst); z__2.r = ascale * a[i__2].r, z__2.i = ascale * a[i__2].i; i__3 = b_subscr(ifirst, ifirst); z__4.r = bscale * b[i__3].r, z__4.i = bscale * b[i__3].i; z__3.r = shift.r * z__4.r - shift.i * z__4.i, z__3.i = shift.r * z__4.i + shift.i * z__4.r; z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i; ctemp.r = z__1.r, ctemp.i = z__1.i; /* --------------------- Begin Timing Code ----------------------- */ opst += -6.; /* ---------------------- End Timing Code ------------------------ */ L90: /* Do an implicit-shift QZ sweep. Initial Q */ i__2 = a_subscr(istart + 1, istart); z__1.r = ascale * a[i__2].r, z__1.i = ascale * a[i__2].i; ctemp2.r = z__1.r, ctemp2.i = z__1.i; /* --------------------- Begin Timing Code ----------------------- */ opst += (doublereal) ((ilast - istart) * 18 + 2); /* ---------------------- End Timing Code ------------------------ */ zlartg_(&ctemp, &ctemp2, &c__, &s, &ctemp3); /* Sweep */ i__2 = ilast - 1; for (j = istart; j <= i__2; ++j) { if (j > istart) { i__3 = a_subscr(j, j - 1); ctemp.r = a[i__3].r, ctemp.i = a[i__3].i; zlartg_(&ctemp, &a_ref(j + 1, j - 1), &c__, &s, &a_ref(j, j - 1)); i__3 = a_subscr(j + 1, j - 1); a[i__3].r = 0., a[i__3].i = 0.; } i__3 = ilastm; for (jc = j; jc <= i__3; ++jc) { i__4 = a_subscr(j, jc); z__2.r = c__ * a[i__4].r, z__2.i = c__ * a[i__4].i; i__5 = a_subscr(j + 1, jc); z__3.r = s.r * a[i__5].r - s.i * a[i__5].i, z__3.i = s.r * a[ i__5].i + s.i * a[i__5].r; z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; ctemp.r = z__1.r, ctemp.i = z__1.i; i__4 = a_subscr(j + 1, jc); d_cnjg(&z__4, &s); z__3.r = -z__4.r, z__3.i = -z__4.i; i__5 = a_subscr(j, jc); z__2.r = z__3.r * a[i__5].r - z__3.i * a[i__5].i, z__2.i = z__3.r * a[i__5].i + z__3.i * a[i__5].r; i__6 = a_subscr(j + 1, jc); z__5.r = c__ * a[i__6].r, z__5.i = c__ * a[i__6].i; z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i; a[i__4].r = z__1.r, a[i__4].i = z__1.i; i__4 = a_subscr(j, jc); a[i__4].r = ctemp.r, a[i__4].i = ctemp.i; i__4 = b_subscr(j, jc); z__2.r = c__ * b[i__4].r, z__2.i = c__ * b[i__4].i; i__5 = b_subscr(j + 1, jc); z__3.r = s.r * b[i__5].r - s.i * b[i__5].i, z__3.i = s.r * b[ i__5].i + s.i * b[i__5].r; z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; ctemp2.r = z__1.r, ctemp2.i = z__1.i; i__4 = b_subscr(j + 1, jc); d_cnjg(&z__4, &s); z__3.r = -z__4.r, z__3.i = -z__4.i; i__5 = b_subscr(j, jc); z__2.r = z__3.r * b[i__5].r - z__3.i * b[i__5].i, z__2.i = z__3.r * b[i__5].i + z__3.i * b[i__5].r; i__6 = b_subscr(j + 1, jc); z__5.r = c__ * b[i__6].r, z__5.i = c__ * b[i__6].i; z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i; b[i__4].r = z__1.r, b[i__4].i = z__1.i; i__4 = b_subscr(j, jc); b[i__4].r = ctemp2.r, b[i__4].i = ctemp2.i; /* L100: */ } if (ilq) { i__3 = *n; for (jr = 1; jr <= i__3; ++jr) { i__4 = q_subscr(jr, j); z__2.r = c__ * q[i__4].r, z__2.i = c__ * q[i__4].i; d_cnjg(&z__4, &s); i__5 = q_subscr(jr, j + 1); z__3.r = z__4.r * q[i__5].r - z__4.i * q[i__5].i, z__3.i = z__4.r * q[i__5].i + z__4.i * q[i__5].r; z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; ctemp.r = z__1.r, ctemp.i = z__1.i; i__4 = q_subscr(jr, j + 1); z__3.r = -s.r, z__3.i = -s.i; i__5 = q_subscr(jr, j); z__2.r = z__3.r * q[i__5].r - z__3.i * q[i__5].i, z__2.i = z__3.r * q[i__5].i + z__3.i * q[i__5].r; i__6 = q_subscr(jr, j + 1); z__4.r = c__ * q[i__6].r, z__4.i = c__ * q[i__6].i; z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; q[i__4].r = z__1.r, q[i__4].i = z__1.i; i__4 = q_subscr(jr, j); q[i__4].r = ctemp.r, q[i__4].i = ctemp.i; /* L110: */ } } i__3 = b_subscr(j + 1, j + 1); ctemp.r = b[i__3].r, ctemp.i = b[i__3].i; zlartg_(&ctemp, &b_ref(j + 1, j), &c__, &s, &b_ref(j + 1, j + 1)); i__3 = b_subscr(j + 1, j); b[i__3].r = 0., b[i__3].i = 0.; /* Computing MIN */ i__4 = j + 2; i__3 = min(i__4,ilast); for (jr = ifrstm; jr <= i__3; ++jr) { i__4 = a_subscr(jr, j + 1); z__2.r = c__ * a[i__4].r, z__2.i = c__ * a[i__4].i; i__5 = a_subscr(jr, j); z__3.r = s.r * a[i__5].r - s.i * a[i__5].i, z__3.i = s.r * a[ i__5].i + s.i * a[i__5].r; z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; ctemp.r = z__1.r, ctemp.i = z__1.i; i__4 = a_subscr(jr, j); d_cnjg(&z__4, &s); z__3.r = -z__4.r, z__3.i = -z__4.i; i__5 = a_subscr(jr, j + 1); z__2.r = z__3.r * a[i__5].r - z__3.i * a[i__5].i, z__2.i = z__3.r * a[i__5].i + z__3.i * a[i__5].r; i__6 = a_subscr(jr, j); z__5.r = c__ * a[i__6].r, z__5.i = c__ * a[i__6].i; z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i; a[i__4].r = z__1.r, a[i__4].i = z__1.i; i__4 = a_subscr(jr, j + 1); a[i__4].r = ctemp.r, a[i__4].i = ctemp.i; /* L120: */ } i__3 = j; for (jr = ifrstm; jr <= i__3; ++jr) { i__4 = b_subscr(jr, j + 1); z__2.r = c__ * b[i__4].r, z__2.i = c__ * b[i__4].i; i__5 = b_subscr(jr, j); z__3.r = s.r * b[i__5].r - s.i * b[i__5].i, z__3.i = s.r * b[ i__5].i + s.i * b[i__5].r; z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; ctemp.r = z__1.r, ctemp.i = z__1.i; i__4 = b_subscr(jr, j); d_cnjg(&z__4, &s); z__3.r = -z__4.r, z__3.i = -z__4.i; i__5 = b_subscr(jr, j + 1); z__2.r = z__3.r * b[i__5].r - z__3.i * b[i__5].i, z__2.i = z__3.r * b[i__5].i + z__3.i * b[i__5].r; i__6 = b_subscr(jr, j); z__5.r = c__ * b[i__6].r, z__5.i = c__ * b[i__6].i; z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i; b[i__4].r = z__1.r, b[i__4].i = z__1.i; i__4 = b_subscr(jr, j + 1); b[i__4].r = ctemp.r, b[i__4].i = ctemp.i; /* L130: */ } if (ilz) { i__3 = *n; for (jr = 1; jr <= i__3; ++jr) { i__4 = z___subscr(jr, j + 1); z__2.r = c__ * z__[i__4].r, z__2.i = c__ * z__[i__4].i; i__5 = z___subscr(jr, j); z__3.r = s.r * z__[i__5].r - s.i * z__[i__5].i, z__3.i = s.r * z__[i__5].i + s.i * z__[i__5].r; z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; ctemp.r = z__1.r, ctemp.i = z__1.i; i__4 = z___subscr(jr, j); d_cnjg(&z__4, &s); z__3.r = -z__4.r, z__3.i = -z__4.i; i__5 = z___subscr(jr, j + 1); z__2.r = z__3.r * z__[i__5].r - z__3.i * z__[i__5].i, z__2.i = z__3.r * z__[i__5].i + z__3.i * z__[i__5] .r; i__6 = z___subscr(jr, j); z__5.r = c__ * z__[i__6].r, z__5.i = c__ * z__[i__6].i; z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i; z__[i__4].r = z__1.r, z__[i__4].i = z__1.i; i__4 = z___subscr(jr, j + 1); z__[i__4].r = ctemp.r, z__[i__4].i = ctemp.i; /* L140: */ } } /* L150: */ } /* --------------------- Begin Timing Code ----------------------- */ opst += (doublereal) (ilast - istart) * (doublereal) ((ilastm - ifrstm) * 40 + 184 + (nq + nz) * 20) - 20; /* ---------------------- End Timing Code ------------------------ */ L160: /* --------------------- Begin Timing Code ----------------------- End of iteration -- add in "small" contributions. */ latime_1.ops += opst; opst = 0.; /* ---------------------- End Timing Code ------------------------ L170: */ } /* Drop-through = non-convergence */ L180: *info = ilast; /* ---------------------- Begin Timing Code ------------------------- */ latime_1.ops += opst; opst = 0.; /* ----------------------- End Timing Code -------------------------- */ goto L210; /* Successful completion of all QZ steps */ L190: /* Set Eigenvalues 1:ILO-1 */ i__1 = *ilo - 1; for (j = 1; j <= i__1; ++j) { absb = z_abs(&b_ref(j, j)); if (absb > safmin) { i__2 = b_subscr(j, j); z__2.r = b[i__2].r / absb, z__2.i = b[i__2].i / absb; d_cnjg(&z__1, &z__2); signbc.r = z__1.r, signbc.i = z__1.i; i__2 = b_subscr(j, j); b[i__2].r = absb, b[i__2].i = 0.; if (ilschr) { i__2 = j - 1; zscal_(&i__2, &signbc, &b_ref(1, j), &c__1); zscal_(&j, &signbc, &a_ref(1, j), &c__1); /* ----------------- Begin Timing Code --------------------- */ opst += (doublereal) ((j - 1) * 12); /* ------------------ End Timing Code ---------------------- */ } else { i__2 = a_subscr(j, j); i__3 = a_subscr(j, j); z__1.r = a[i__3].r * signbc.r - a[i__3].i * signbc.i, z__1.i = a[i__3].r * signbc.i + a[i__3].i * signbc.r; a[i__2].r = z__1.r, a[i__2].i = z__1.i; } if (ilz) { zscal_(n, &signbc, &z___ref(1, j), &c__1); } /* ------------------- Begin Timing Code ---------------------- */ opst += (doublereal) (nz * 6 + 13); /* -------------------- End Timing Code ----------------------- */ } else { i__2 = b_subscr(j, j); b[i__2].r = 0., b[i__2].i = 0.; } i__2 = j; i__3 = a_subscr(j, j); alpha[i__2].r = a[i__3].r, alpha[i__2].i = a[i__3].i; i__2 = j; i__3 = b_subscr(j, j); beta[i__2].r = b[i__3].r, beta[i__2].i = b[i__3].i; /* L200: */ } /* Normal Termination */ *info = 0; /* Exit (other than argument error) -- return optimal workspace size */ L210: /* ---------------------- Begin Timing Code ------------------------- */ latime_1.ops += opst; opst = 0.; latime_1.itcnt = (doublereal) jiter; /* ----------------------- End Timing Code -------------------------- */ z__1.r = (doublereal) (*n), z__1.i = 0.; work[1].r = z__1.r, work[1].i = z__1.i; return 0; /* End of ZHGEQZ */ } /* zhgeqz_ */
/* Subroutine */ int dsyevr_(char *jobz, char *range, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *vl, doublereal *vu, integer * il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, integer *isuppz, doublereal *work, integer *lwork, integer *iwork, integer *liwork, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 20, 2000 Purpose ======= DSYEVR computes selected eigenvalues and, optionally, eigenvectors of a real symmetric matrix T. Eigenvalues and eigenvectors can be selected by specifying either a range of values or a range of indices for the desired eigenvalues. Whenever possible, DSYEVR calls DSTEGR to compute the eigenspectrum using Relatively Robust Representations. DSTEGR computes eigenvalues by the dqds algorithm, while orthogonal eigenvectors are computed from various "good" L D L^T representations (also known as Relatively Robust Representations). Gram-Schmidt orthogonalization is avoided as far as possible. More specifically, the various steps of the algorithm are as follows. For the i-th unreduced block of T, (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T is a relatively robust representation, (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high relative accuracy by the dqds algorithm, (c) If there is a cluster of close eigenvalues, "choose" sigma_i close to the cluster, and go to step (a), (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, compute the corresponding eigenvector by forming a rank-revealing twisted factorization. The desired accuracy of the output can be specified by the input parameter ABSTOL. For more details, see "A new O(n^2) algorithm for the symmetric tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, Computer Science Division Technical Report No. UCB//CSD-97-971, UC Berkeley, May 1997. Note 1 : DSYEVR calls DSTEGR when the full spectrum is requested on machines which conform to the ieee-754 floating point standard. DSYEVR calls DSTEBZ and SSTEIN on non-ieee machines and when partial spectrum requests are made. Normal execution of DSTEGR may create NaNs and infinities and hence may abort due to a floating point exception in environments which do not handle NaNs and infinities in the ieee standard default manner. Arguments ========= JOBZ (input) CHARACTER*1 = 'N': Compute eigenvalues only; = 'V': Compute eigenvalues and eigenvectors. RANGE (input) CHARACTER*1 = 'A': all eigenvalues will be found. = 'V': all eigenvalues in the half-open interval (VL,VU] will be found. = 'I': the IL-th through IU-th eigenvalues will be found. ********* For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and ********* DSTEIN are called UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) DOUBLE PRECISION array, dimension (LDA, N) On entry, the symmetric matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A. On exit, the lower triangle (if UPLO='L') or the upper triangle (if UPLO='U') of A, including the diagonal, is destroyed. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). VL (input) DOUBLE PRECISION VU (input) DOUBLE PRECISION If RANGE='V', the lower and upper bounds of the interval to be searched for eigenvalues. VL < VU. Not referenced if RANGE = 'A' or 'I'. IL (input) INTEGER IU (input) INTEGER If RANGE='I', the indices (in ascending order) of the smallest and largest eigenvalues to be returned. 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. Not referenced if RANGE = 'A' or 'V'. ABSTOL (input) DOUBLE PRECISION The absolute error tolerance for the eigenvalues. An approximate eigenvalue is accepted as converged when it is determined to lie in an interval [a,b] of width less than or equal to ABSTOL + EPS * max( |a|,|b| ) , where EPS is the machine precision. If ABSTOL is less than or equal to zero, then EPS*|T| will be used in its place, where |T| is the 1-norm of the tridiagonal matrix obtained by reducing A to tridiagonal form. See "Computing Small Singular Values of Bidiagonal Matrices with Guaranteed High Relative Accuracy," by Demmel and Kahan, LAPACK Working Note #3. If high relative accuracy is important, set ABSTOL to DLAMCH( 'Safe minimum' ). Doing so will guarantee that eigenvalues are computed to high relative accuracy when possible in future releases. The current code does not make any guarantees about high relative accuracy, but furutre releases will. See J. Barlow and J. Demmel, "Computing Accurate Eigensystems of Scaled Diagonally Dominant Matrices", LAPACK Working Note #7, for a discussion of which matrices define their eigenvalues to high relative accuracy. M (output) INTEGER The total number of eigenvalues found. 0 <= M <= N. If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. W (output) DOUBLE PRECISION array, dimension (N) The first M elements contain the selected eigenvalues in ascending order. Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M)) If JOBZ = 'V', then if INFO = 0, the first M columns of Z contain the orthonormal eigenvectors of the matrix A corresponding to the selected eigenvalues, with the i-th column of Z holding the eigenvector associated with W(i). If JOBZ = 'N', then Z is not referenced. Note: the user must ensure that at least max(1,M) columns are supplied in the array Z; if RANGE = 'V', the exact value of M is not known in advance and an upper bound must be used. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= 1, and if JOBZ = 'V', LDZ >= max(1,N). ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) ) The support of the eigenvectors in Z, i.e., the indices indicating the nonzero elements in Z. The i-th eigenvector is nonzero only in elements ISUPPZ( 2*i-1 ) through ISUPPZ( 2*i ). ********* Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,26*N). For optimal efficiency, LWORK >= (NB+6)*N, where NB is the max of the blocksize for DSYTRD and DORMTR returned by ILAENV. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. IWORK (workspace/output) INTEGER array, dimension (LIWORK) On exit, if INFO = 0, IWORK(1) returns the optimal LWORK. LIWORK (input) INTEGER The dimension of the array IWORK. LIWORK >= max(1,10*N). If LIWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the IWORK array, returns this value as the first entry of the IWORK array, and no error message related to LIWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: Internal error Further Details =============== Based on contributions by Inderjit Dhillon, IBM Almaden, USA Osni Marques, LBNL/NERSC, USA Ken Stanley, Computer Science Division, University of California at Berkeley, USA ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__10 = 10; static integer c__1 = 1; static integer c__2 = 2; static integer c__3 = 3; static integer c__4 = 4; static integer c_n1 = -1; /* System generated locals */ integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer indd, inde; static doublereal anrm; static integer imax; static doublereal rmin, rmax; static integer itmp1, i__, j, inddd, indee; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); static doublereal sigma; extern logical lsame_(char *, char *); static integer iinfo; static char order[1]; static integer indwk; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), dswap_(integer *, doublereal *, integer *, doublereal *, integer *); static integer lwmin; static logical lower, wantz; static integer nb, jj; extern doublereal dlamch_(char *); static logical alleig, indeig; static integer iscale, ieeeok, indibl, indifl; static logical valeig; static doublereal safmin; extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern /* Subroutine */ int xerbla_(char *, integer *); static doublereal abstll, bignum; static integer indtau, indisp; extern /* Subroutine */ int dstein_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *), dstegr_(char *, char *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *, integer *, integer *); static integer indiwo, indwkn; extern doublereal dlansy_(char *, char *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int dstebz_(char *, char *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), dsterf_(integer *, doublereal *, doublereal *, integer *); static integer liwmin; extern /* Subroutine */ int dormtr_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); static integer llwrkn, llwork, nsplit; static doublereal smlnum; extern /* Subroutine */ int dsytrd_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *); static integer lwkopt; static logical lquery; static doublereal eps, vll, vuu, tmp1; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --isuppz; --work; --iwork; /* Function Body */ ieeeok = ilaenv_(&c__10, "DSYEVR", "N", &c__1, &c__2, &c__3, &c__4, ( ftnlen)6, (ftnlen)1); lower = lsame_(uplo, "L"); wantz = lsame_(jobz, "V"); alleig = lsame_(range, "A"); valeig = lsame_(range, "V"); indeig = lsame_(range, "I"); lquery = *lwork == -1 || *liwork == -1; /* Computing MAX */ i__1 = 1, i__2 = *n * 26; lwmin = max(i__1,i__2); /* Computing MAX */ i__1 = 1, i__2 = *n * 10; liwmin = max(i__1,i__2); *info = 0; if (! (wantz || lsame_(jobz, "N"))) { *info = -1; } else if (! (alleig || valeig || indeig)) { *info = -2; } else if (! (lower || lsame_(uplo, "U"))) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else { if (valeig) { if (*n > 0 && *vu <= *vl) { *info = -8; } } else if (indeig) { if (*il < 1 || *il > max(1,*n)) { *info = -9; } else if (*iu < min(*n,*il) || *iu > *n) { *info = -10; } } } if (*info == 0) { if (*ldz < 1 || wantz && *ldz < *n) { *info = -15; } else if (*lwork < lwmin && ! lquery) { *info = -18; } else if (*liwork < liwmin && ! lquery) { *info = -20; } } if (*info == 0) { nb = ilaenv_(&c__1, "ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); /* Computing MAX */ i__1 = nb, i__2 = ilaenv_(&c__1, "ZUNMTR", uplo, n, &c_n1, &c_n1, & c_n1, (ftnlen)6, (ftnlen)1); nb = max(i__1,i__2); /* Computing MAX */ i__1 = (nb + 1) * *n; lwkopt = max(i__1,lwmin); work[1] = (doublereal) lwkopt; iwork[1] = liwmin; } if (*info != 0) { i__1 = -(*info); xerbla_("DSYEVR", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ *m = 0; if (*n == 0) { work[1] = 1.; return 0; } if (*n == 1) { work[1] = 7.; if (alleig || indeig) { *m = 1; w[1] = a_ref(1, 1); } else { if (*vl < a_ref(1, 1) && *vu >= a_ref(1, 1)) { *m = 1; w[1] = a_ref(1, 1); } } if (wantz) { z___ref(1, 1) = 1.; } return 0; } /* Get machine constants. */ safmin = dlamch_("Safe minimum"); eps = dlamch_("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = sqrt(smlnum); /* Computing MIN */ d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); rmax = min(d__1,d__2); /* Scale matrix to allowable range, if necessary. */ iscale = 0; abstll = *abstol; vll = *vl; vuu = *vu; anrm = dlansy_("M", uplo, n, &a[a_offset], lda, &work[1]); if (anrm > 0. && anrm < rmin) { iscale = 1; sigma = rmin / anrm; } else if (anrm > rmax) { iscale = 1; sigma = rmax / anrm; } if (iscale == 1) { if (lower) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n - j + 1; dscal_(&i__2, &sigma, &a_ref(j, j), &c__1); /* L10: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { dscal_(&j, &sigma, &a_ref(1, j), &c__1); /* L20: */ } } if (*abstol > 0.) { abstll = *abstol * sigma; } if (valeig) { vll = *vl * sigma; vuu = *vu * sigma; } } /* Call DSYTRD to reduce symmetric matrix to tridiagonal form. */ indtau = 1; inde = indtau + *n; indd = inde + *n; indee = indd + *n; inddd = indee + *n; indifl = inddd + *n; indwk = indifl + *n; llwork = *lwork - indwk + 1; dsytrd_(uplo, n, &a[a_offset], lda, &work[indd], &work[inde], &work[ indtau], &work[indwk], &llwork, &iinfo); /* If all eigenvalues are desired then call DSTERF or SSTEGR and DORMTR. */ if ((alleig || indeig && *il == 1 && *iu == *n) && ieeeok == 1) { if (! wantz) { dcopy_(n, &work[indd], &c__1, &w[1], &c__1); i__1 = *n - 1; dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); dsterf_(n, &w[1], &work[indee], info); } else { i__1 = *n - 1; dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); dcopy_(n, &work[indd], &c__1, &work[inddd], &c__1); dstegr_(jobz, "A", n, &work[inddd], &work[indee], vl, vu, il, iu, abstol, m, &w[1], &z__[z_offset], ldz, &isuppz[1], &work[ indwk], lwork, &iwork[1], liwork, info); /* Apply orthogonal matrix used in reduction to tridiagonal form to eigenvectors returned by DSTEIN. */ if (wantz && *info == 0) { indwkn = inde; llwrkn = *lwork - indwkn + 1; dormtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau] , &z__[z_offset], ldz, &work[indwkn], &llwrkn, &iinfo); } } if (*info == 0) { *m = *n; goto L30; } *info = 0; } /* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. Also call DSTEBZ and SSTEIN if SSTEGR fails. */ if (wantz) { *(unsigned char *)order = 'B'; } else { *(unsigned char *)order = 'E'; } indifl = 1; indibl = indifl + *n; indisp = indibl + *n; indiwo = indisp + *n; dstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &work[indd], &work[ inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[ indwk], &iwork[indiwo], info); if (wantz) { dstein_(n, &work[indd], &work[inde], m, &w[1], &iwork[indibl], &iwork[ indisp], &z__[z_offset], ldz, &work[indwk], &iwork[indiwo], & iwork[indifl], info); /* Apply orthogonal matrix used in reduction to tridiagonal form to eigenvectors returned by DSTEIN. */ indwkn = inde; llwrkn = *lwork - indwkn + 1; dormtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau], &z__[ z_offset], ldz, &work[indwkn], &llwrkn, &iinfo); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ L30: if (iscale == 1) { if (*info == 0) { imax = *m; } else { imax = *info - 1; } d__1 = 1. / sigma; dscal_(&imax, &d__1, &w[1], &c__1); } /* If eigenvalues are not in order, then sort them, along with eigenvectors. */ if (wantz) { i__1 = *m - 1; for (j = 1; j <= i__1; ++j) { i__ = 0; tmp1 = w[j]; i__2 = *m; for (jj = j + 1; jj <= i__2; ++jj) { if (w[jj] < tmp1) { i__ = jj; tmp1 = w[jj]; } /* L40: */ } if (i__ != 0) { itmp1 = iwork[indibl + i__ - 1]; w[i__] = w[j]; iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; w[j] = tmp1; iwork[indibl + j - 1] = itmp1; dswap_(n, &z___ref(1, i__), &c__1, &z___ref(1, j), &c__1); } /* L50: */ } } /* Set WORK(1) to optimal workspace size. */ work[1] = (doublereal) lwkopt; iwork[1] = liwmin; return 0; /* End of DSYEVR */ } /* dsyevr_ */
/* Subroutine */ int dstein_(integer * n, doublereal * d__, doublereal * e, integer * m, doublereal * w, integer * iblock, integer * isplit, doublereal * z__, integer * ldz, doublereal * work, integer * iwork, integer * ifail, integer * info) { /* System generated locals */ integer z_dim1, z_offset, i__1, i__2, i__3; doublereal d__1, d__2, d__3, d__4, d__5; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer jblk, nblk; extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, integer *); static integer jmax; extern doublereal dnrm2_(integer *, doublereal *, integer *); static integer i__, j; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); static integer iseed[4], gpind, iinfo; extern doublereal dasum_(integer *, doublereal *, integer *); extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); static integer b1; extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static integer j1; static doublereal ortol; static integer indrv1, indrv2, indrv3, indrv4, indrv5, bn; extern doublereal dlamch_(char *); extern /* Subroutine */ int dlagtf_(integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *); static doublereal xj; extern integer idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int xerbla_(char *, integer *), dlagts_( integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *); static integer nrmchk; extern /* Subroutine */ int dlarnv_(integer *, integer *, integer *, doublereal *); static integer blksiz; static doublereal onenrm, dtpcrt, pertol, scl, eps, sep, nrm, tol; static integer its; static doublereal xjm, ztr, eps1; #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] /* -- LAPACK routine (instrumented to count operations, version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Common block to return operation count and iteration count ITCNT is initialized to 0, OPS is only incremented Purpose ======= DSTEIN computes the eigenvectors of a real symmetric tridiagonal matrix T corresponding to specified eigenvalues, using inverse iteration. The maximum number of iterations allowed for each eigenvector is specified by an internal parameter MAXITS (currently set to 5). Arguments ========= N (input) INTEGER The order of the matrix. N >= 0. D (input) DOUBLE PRECISION array, dimension (N) The n diagonal elements of the tridiagonal matrix T. E (input) DOUBLE PRECISION array, dimension (N) The (n-1) subdiagonal elements of the tridiagonal matrix T, in elements 1 to N-1. E(N) need not be set. M (input) INTEGER The number of eigenvectors to be found. 0 <= M <= N. W (input) DOUBLE PRECISION array, dimension (N) The first M elements of W contain the eigenvalues for which eigenvectors are to be computed. The eigenvalues should be grouped by split-off block and ordered from smallest to largest within the block. ( The output array W from DSTEBZ with ORDER = 'B' is expected here. ) IBLOCK (input) INTEGER array, dimension (N) The submatrix indices associated with the corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to the first submatrix from the top, =2 if W(i) belongs to the second submatrix, etc. ( The output array IBLOCK from DSTEBZ is expected here. ) ISPLIT (input) INTEGER array, dimension (N) The splitting points, at which T breaks up into submatrices. The first submatrix consists of rows/columns 1 to ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 through ISPLIT( 2 ), etc. ( The output array ISPLIT from DSTEBZ is expected here. ) Z (output) DOUBLE PRECISION array, dimension (LDZ, M) The computed eigenvectors. The eigenvector associated with the eigenvalue W(i) is stored in the i-th column of Z. Any vector which fails to converge is set to its current iterate after MAXITS iterations. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= max(1,N). WORK (workspace) DOUBLE PRECISION array, dimension (5*N) IWORK (workspace) INTEGER array, dimension (N) IFAIL (output) INTEGER array, dimension (M) On normal exit, all elements of IFAIL are zero. If one or more eigenvectors fail to converge after MAXITS iterations, then their indices are stored in array IFAIL. INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, then i eigenvectors failed to converge in MAXITS iterations. Their indices are stored in array IFAIL. Internal Parameters =================== MAXITS INTEGER, default = 5 The maximum number of iterations performed. EXTRA INTEGER, default = 2 The number of iterations performed after norm growth criterion is satisfied, should be at least 1. ===================================================================== Test the input parameters. Parameter adjustments */ --d__; --e; --w; --iblock; --isplit; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --work; --iwork; --ifail; /* Function Body */ *info = 0; i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { ifail[i__] = 0; /* L10: */ } if (*n < 0) { *info = -1; } else if (*m < 0 || *m > *n) { *info = -4; } else if (*ldz < max(1, *n)) { *info = -9; } else { i__1 = *m; for (j = 2; j <= i__1; ++j) { if (iblock[j] < iblock[j - 1]) { *info = -6; goto L30; } if (iblock[j] == iblock[j - 1] && w[j] < w[j - 1]) { *info = -5; goto L30; } /* L20: */ } L30: ; } if (*info != 0) { i__1 = -(*info); xerbla_("DSTEIN", &i__1); return 0; } /* Initialize iteration count. */ latime_1.itcnt = 0.; /* Quick return if possible */ if (*n == 0 || *m == 0) { return 0; } else if (*n == 1) { z___ref(1, 1) = 1.; return 0; } /* Get machine constants. */ eps = dlamch_("Precision"); /* Initialize seed for random number generator DLARNV. */ for (i__ = 1; i__ <= 4; ++i__) { iseed[i__ - 1] = 1; /* L40: */ } /* Initialize pointers. */ indrv1 = 0; indrv2 = indrv1 + *n; indrv3 = indrv2 + *n; indrv4 = indrv3 + *n; indrv5 = indrv4 + *n; /* Compute eigenvectors of matrix blocks. */ j1 = 1; i__1 = iblock[*m]; for (nblk = 1; nblk <= i__1; ++nblk) { /* Find starting and ending indices of block nblk. */ if (nblk == 1) { b1 = 1; } else { b1 = isplit[nblk - 1] + 1; } bn = isplit[nblk]; blksiz = bn - b1 + 1; if (blksiz == 1) { goto L60; } gpind = b1; /* Compute reorthogonalization criterion and stopping criterion. */ onenrm = (d__1 = d__[b1], abs(d__1)) + (d__2 = e[b1], abs(d__2)); /* Computing MAX */ d__3 = onenrm, d__4 = (d__1 = d__[bn], abs(d__1)) + (d__2 = e[bn - 1], abs(d__2)); onenrm = max(d__3, d__4); i__2 = bn - 1; for (i__ = b1 + 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__4 = onenrm, d__5 = (d__1 = d__[i__], abs(d__1)) + (d__2 = e[i__ - 1], abs (d__2)) + (d__3 = e[i__], abs(d__3)); onenrm = max(d__4, d__5); /* L50: */ } ortol = onenrm * .001; dtpcrt = sqrt(.1 / blksiz); /* Increment opcount for computing criteria. */ latime_1.ops = latime_1.ops + ((bn - b1) << 1) + 3; /* Loop through eigenvalues of block nblk. */ L60: jblk = 0; i__2 = *m; for (j = j1; j <= i__2; ++j) { if (iblock[j] != nblk) { j1 = j; goto L160; } ++jblk; xj = w[j]; /* Skip all the work if the block size is one. */ if (blksiz == 1) { work[indrv1 + 1] = 1.; goto L120; } /* If eigenvalues j and j-1 are too close, add a relatively small perturbation. */ if (jblk > 1) { eps1 = (d__1 = eps * xj, abs(d__1)); pertol = eps1 * 10.; sep = xj - xjm; if (sep < pertol) { xj = xjm + pertol; } } its = 0; nrmchk = 0; /* Get random starting vector. */ dlarnv_(&c__2, iseed, &blksiz, &work[indrv1 + 1]); /* Increment opcount for getting random starting vector. ( DLARND(2,.) requires 9 flops. ) */ latime_1.ops += blksiz * 9; /* Copy the matrix T so it won't be destroyed in factorization. */ dcopy_(&blksiz, &d__[b1], &c__1, &work[indrv4 + 1], &c__1); i__3 = blksiz - 1; dcopy_(&i__3, &e[b1], &c__1, &work[indrv2 + 2], &c__1); i__3 = blksiz - 1; dcopy_(&i__3, &e[b1], &c__1, &work[indrv3 + 1], &c__1); /* Compute LU factors with partial pivoting ( PT = LU ) */ tol = 0.; dlagtf_(&blksiz, &work[indrv4 + 1], &xj, &work[indrv2 + 2], &work[indrv3 + 1], &tol, &work[indrv5 + 1], &iwork[1], &iinfo); /* Increment opcount for computing LU factors. ( DLAGTF(BLKSIZ,...) requires about 8*BLKSIZ flops. ) */ latime_1.ops += blksiz << 3; /* Update iteration count. */ L70: ++its; if (its > 5) { goto L100; } /* Normalize and scale the righthand side vector Pb. Computing MAX */ d__2 = eps, d__3 = (d__1 = work[indrv4 + blksiz], abs(d__1)); scl = blksiz * onenrm * max(d__2, d__3) / dasum_(&blksiz, &work [indrv1 + 1], &c__1); dscal_(&blksiz, &scl, &work[indrv1 + 1], &c__1); /* Solve the system LU = Pb. */ dlagts_(&c_n1, &blksiz, &work[indrv4 + 1], &work[indrv2 + 2], &work[indrv3 + 1], &work[indrv5 + 1], &iwork[1], &work[indrv1 + 1], &tol, &iinfo); /* Increment opcount for scaling and solving linear system. ( DLAGTS(-1,BLKSIZ,...) requires about 8*BLKSIZ flops. ) */ latime_1.ops = latime_1.ops + 3 + blksiz * 10; /* Reorthogonalize by modified Gram-Schmidt if eigenvalues are close enough. */ if (jblk == 1) { goto L90; } if ((d__1 = xj - xjm, abs(d__1)) > ortol) { gpind = j; } if (gpind != j) { i__3 = j - 1; for (i__ = gpind; i__ <= i__3; ++i__) { ztr = -ddot_(&blksiz, &work[indrv1 + 1], &c__1, &z___ref(b1, i__), &c__1); daxpy_(&blksiz, &ztr, &z___ref(b1, i__), &c__1, &work[indrv1 + 1], &c__1); /* L80: */ } /* Increment opcount for reorthogonalizing. */ latime_1.ops += (j - gpind) * blksiz << 2; } /* Check the infinity norm of the iterate. */ L90: jmax = idamax_(&blksiz, &work[indrv1 + 1], &c__1); nrm = (d__1 = work[indrv1 + jmax], abs(d__1)); /* Continue for additional iterations after norm reaches stopping criterion. */ if (nrm < dtpcrt) { goto L70; } ++nrmchk; if (nrmchk < 3) { goto L70; } goto L110; /* If stopping criterion was not satisfied, update info and store eigenvector number in array ifail. */ L100: ++(*info); ifail[*info] = j; /* Accept iterate as jth eigenvector. */ L110: scl = 1. / dnrm2_(&blksiz, &work[indrv1 + 1], &c__1); jmax = idamax_(&blksiz, &work[indrv1 + 1], &c__1); if (work[indrv1 + jmax] < 0.) { scl = -scl; } dscal_(&blksiz, &scl, &work[indrv1 + 1], &c__1); /* Increment opcount for scaling. */ latime_1.ops += blksiz * 3; L120: i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { z___ref(i__, j) = 0.; /* L130: */ } i__3 = blksiz; for (i__ = 1; i__ <= i__3; ++i__) { z___ref(b1 + i__ - 1, j) = work[indrv1 + i__]; /* L140: */ } /* Save the shift to check eigenvalue spacing at next iteration. */ xjm = xj; /* L150: */ } L160: ; } return 0; /* End of DSTEIN */ } /* dstein_ */
/* Subroutine */ int dlakf2_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *b, doublereal *d__, doublereal *e, doublereal *z__, integer *ldz) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, d_dim1, d_offset, e_dim1, e_offset, z_dim1, z_offset, i__1, i__2, i__3; /* Local variables */ static integer i__, j, l, ik, jk, mn; extern /* Subroutine */ int dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); static integer mn2; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define d___ref(a_1,a_2) d__[(a_2)*d_dim1 + a_1] #define e_ref(a_1,a_2) e[(a_2)*e_dim1 + a_1] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= Form the 2*M*N by 2*M*N matrix Z = [ kron(In, A) -kron(B', Im) ] [ kron(In, D) -kron(E', Im) ], where In is the identity matrix of size n and X' is the transpose of X. kron(X, Y) is the Kronecker product between the matrices X and Y. Arguments ========= M (input) INTEGER Size of matrix, must be >= 1. N (input) INTEGER Size of matrix, must be >= 1. A (input) DOUBLE PRECISION, dimension ( LDA, M ) The matrix A in the output matrix Z. LDA (input) INTEGER The leading dimension of A, B, D, and E. ( LDA >= M+N ) B (input) DOUBLE PRECISION, dimension ( LDA, N ) D (input) DOUBLE PRECISION, dimension ( LDA, M ) E (input) DOUBLE PRECISION, dimension ( LDA, N ) The matrices used in forming the output matrix Z. Z (output) DOUBLE PRECISION, dimension ( LDZ, 2*M*N ) The resultant Kronecker M*N*2 by M*N*2 matrix (see above.) LDZ (input) INTEGER The leading dimension of Z. ( LDZ >= 2*M*N ) ==================================================================== Initialize Z Parameter adjustments */ e_dim1 = *lda; e_offset = 1 + e_dim1 * 1; e -= e_offset; d_dim1 = *lda; d_offset = 1 + d_dim1 * 1; d__ -= d_offset; b_dim1 = *lda; b_offset = 1 + b_dim1 * 1; b -= b_offset; a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; /* Function Body */ mn = *m * *n; mn2 = mn << 1; dlaset_("Full", &mn2, &mn2, &c_b3, &c_b3, &z__[z_offset], ldz); ik = 1; i__1 = *n; for (l = 1; l <= i__1; ++l) { /* form kron(In, A) */ i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = *m; for (j = 1; j <= i__3; ++j) { z___ref(ik + i__ - 1, ik + j - 1) = a_ref(i__, j); /* L10: */ } /* L20: */ } /* form kron(In, D) */ i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = *m; for (j = 1; j <= i__3; ++j) { z___ref(ik + mn + i__ - 1, ik + j - 1) = d___ref(i__, j); /* L30: */ } /* L40: */ } ik += *m; /* L50: */ } ik = 1; i__1 = *n; for (l = 1; l <= i__1; ++l) { jk = mn + 1; i__2 = *n; for (j = 1; j <= i__2; ++j) { /* form -kron(B', Im) */ i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { z___ref(ik + i__ - 1, jk + i__ - 1) = -b_ref(j, l); /* L60: */ } /* form -kron(E', Im) */ i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { z___ref(ik + mn + i__ - 1, jk + i__ - 1) = -e_ref(j, l); /* L70: */ } jk += *m; /* L80: */ } ik += *m; /* L90: */ } return 0; /* End of DLAKF2 */ } /* dlakf2_ */
/* Subroutine */ int ssgt01_(integer *itype, char *uplo, integer *n, integer * m, real *a, integer *lda, real *b, integer *ldb, real *z__, integer * ldz, real *d__, real *work, real *result) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, z_dim1, z_offset, i__1; /* Local variables */ static integer i__; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static real anorm; extern /* Subroutine */ int ssymm_(char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); extern doublereal slamch_(char *), slange_(char *, integer *, integer *, real *, integer *, real *), slansy_(char *, char *, integer *, real *, integer *, real *); static real ulp; #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 modified August 1997, a new parameter M is added to the calling sequence. Purpose ======= SSGT01 checks a decomposition of the form A Z = B Z D or A B Z = Z D or B A Z = Z D where A is a symmetric matrix, B is symmetric positive definite, Z is orthogonal, and D is diagonal. One of the following test ratios is computed: ITYPE = 1: RESULT(1) = | A Z - B Z D | / ( |A| |Z| n ulp ) ITYPE = 2: RESULT(1) = | A B Z - Z D | / ( |A| |Z| n ulp ) ITYPE = 3: RESULT(1) = | B A Z - Z D | / ( |A| |Z| n ulp ) Arguments ========= ITYPE (input) INTEGER The form of the symmetric generalized eigenproblem. = 1: A*z = (lambda)*B*z = 2: A*B*z = (lambda)*z = 3: B*A*z = (lambda)*z UPLO (input) CHARACTER*1 Specifies whether the upper or lower triangular part of the symmetric matrices A and B is stored. = 'U': Upper triangular = 'L': Lower triangular N (input) INTEGER The order of the matrix A. N >= 0. M (input) INTEGER The number of eigenvalues found. 0 <= M <= N. A (input) REAL array, dimension (LDA, N) The original symmetric matrix A. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input) REAL array, dimension (LDB, N) The original symmetric positive definite matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). Z (input) REAL array, dimension (LDZ, M) The computed eigenvectors of the generalized eigenproblem. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= max(1,N). D (input) REAL array, dimension (M) The computed eigenvalues of the generalized eigenproblem. WORK (workspace) REAL array, dimension (N*N) RESULT (output) REAL array, dimension (1) The test ratio as described above. ===================================================================== Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --d__; --work; --result; /* Function Body */ result[1] = 0.f; if (*n <= 0) { return 0; } ulp = slamch_("Epsilon"); /* Compute product of 1-norms of A and Z. */ anorm = slansy_("1", uplo, n, &a[a_offset], lda, &work[1]) * slange_("1", n, m, &z__[z_offset], ldz, &work[1]); if (anorm == 0.f) { anorm = 1.f; } if (*itype == 1) { /* Norm of AZ - BZD */ ssymm_("Left", uplo, n, m, &c_b6, &a[a_offset], lda, &z__[z_offset], ldz, &c_b7, &work[1], n); i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { sscal_(n, &d__[i__], &z___ref(1, i__), &c__1); /* L10: */ } ssymm_("Left", uplo, n, m, &c_b6, &b[b_offset], ldb, &z__[z_offset], ldz, &c_b12, &work[1], n); result[1] = slange_("1", n, m, &work[1], n, &work[1]) / anorm / (*n * ulp); } else if (*itype == 2) { /* Norm of ABZ - ZD */ ssymm_("Left", uplo, n, m, &c_b6, &b[b_offset], ldb, &z__[z_offset], ldz, &c_b7, &work[1], n); i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { sscal_(n, &d__[i__], &z___ref(1, i__), &c__1); /* L20: */ } ssymm_("Left", uplo, n, m, &c_b6, &a[a_offset], lda, &work[1], n, & c_b12, &z__[z_offset], ldz); result[1] = slange_("1", n, m, &z__[z_offset], ldz, &work[1]) / anorm / (*n * ulp); } else if (*itype == 3) { /* Norm of BAZ - ZD */ ssymm_("Left", uplo, n, m, &c_b6, &a[a_offset], lda, &z__[z_offset], ldz, &c_b7, &work[1], n); i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { sscal_(n, &d__[i__], &z___ref(1, i__), &c__1); /* L30: */ } ssymm_("Left", uplo, n, m, &c_b6, &b[b_offset], ldb, &work[1], n, & c_b12, &z__[z_offset], ldz); result[1] = slange_("1", n, m, &z__[z_offset], ldz, &work[1]) / anorm / (*n * ulp); } return 0; /* End of SSGT01 */ } /* ssgt01_ */
/* Subroutine */ int dgghrd_(char *compq, char *compz, integer *n, integer * ilo, integer *ihi, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *q, integer *ldq, doublereal *z__, integer * ldz, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DGGHRD reduces a pair of real matrices (A,B) to generalized upper Hessenberg form using orthogonal transformations, where A is a general matrix and B is upper triangular: Q' * A * Z = H and Q' * B * Z = T, where H is upper Hessenberg, T is upper triangular, and Q and Z are orthogonal, and ' means transpose. The orthogonal matrices Q and Z are determined as products of Givens rotations. They may either be formed explicitly, or they may be postmultiplied into input matrices Q1 and Z1, so that Q1 * A * Z1' = (Q1*Q) * H * (Z1*Z)' Q1 * B * Z1' = (Q1*Q) * T * (Z1*Z)' Arguments ========= COMPQ (input) CHARACTER*1 = 'N': do not compute Q; = 'I': Q is initialized to the unit matrix, and the orthogonal matrix Q is returned; = 'V': Q must contain an orthogonal matrix Q1 on entry, and the product Q1*Q is returned. COMPZ (input) CHARACTER*1 = 'N': do not compute Z; = 'I': Z is initialized to the unit matrix, and the orthogonal matrix Z is returned; = 'V': Z must contain an orthogonal matrix Z1 on entry, and the product Z1*Z is returned. N (input) INTEGER The order of the matrices A and B. N >= 0. ILO (input) INTEGER IHI (input) INTEGER It is assumed that A is already upper triangular in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally set by a previous call to DGGBAL; otherwise they should be set to 1 and N respectively. 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. A (input/output) DOUBLE PRECISION array, dimension (LDA, N) On entry, the N-by-N general matrix to be reduced. On exit, the upper triangle and the first subdiagonal of A are overwritten with the upper Hessenberg matrix H, and the rest is set to zero. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input/output) DOUBLE PRECISION array, dimension (LDB, N) On entry, the N-by-N upper triangular matrix B. On exit, the upper triangular matrix T = Q' B Z. The elements below the diagonal are set to zero. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N) If COMPQ='N': Q is not referenced. If COMPQ='I': on entry, Q need not be set, and on exit it contains the orthogonal matrix Q, where Q' is the product of the Givens transformations which are applied to A and B on the left. If COMPQ='V': on entry, Q must contain an orthogonal matrix Q1, and on exit this is overwritten by Q1*Q. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. Z (input/output) DOUBLE PRECISION array, dimension (LDZ, N) If COMPZ='N': Z is not referenced. If COMPZ='I': on entry, Z need not be set, and on exit it contains the orthogonal matrix Z, which is the product of the Givens transformations which are applied to A and B on the right. If COMPZ='V': on entry, Z must contain an orthogonal matrix Z1, and on exit this is overwritten by Z1*Z. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. Further Details =============== This routine reduces A to Hessenberg and B to triangular form by an unblocked reduction, as described in _Matrix_Computations_, by Golub and Van Loan (Johns Hopkins Press.) ===================================================================== Decode COMPQ Parameter adjustments */ /* Table of constant values */ static doublereal c_b10 = 0.; static doublereal c_b11 = 1.; static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, z_offset, i__1, i__2, i__3; /* Local variables */ static integer jcol; static doublereal temp; extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); static integer jrow; static doublereal c__, s; extern logical lsame_(char *, char *); extern /* Subroutine */ int dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), xerbla_(char *, integer *); static integer icompq, icompz; static logical ilq, ilz; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; /* Function Body */ if (lsame_(compq, "N")) { ilq = FALSE_; icompq = 1; } else if (lsame_(compq, "V")) { ilq = TRUE_; icompq = 2; } else if (lsame_(compq, "I")) { ilq = TRUE_; icompq = 3; } else { icompq = 0; } /* Decode COMPZ */ if (lsame_(compz, "N")) { ilz = FALSE_; icompz = 1; } else if (lsame_(compz, "V")) { ilz = TRUE_; icompz = 2; } else if (lsame_(compz, "I")) { ilz = TRUE_; icompz = 3; } else { icompz = 0; } /* Test the input parameters. */ *info = 0; if (icompq <= 0) { *info = -1; } else if (icompz <= 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ilo < 1) { *info = -4; } else if (*ihi > *n || *ihi < *ilo - 1) { *info = -5; } else if (*lda < max(1,*n)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -9; } else if (ilq && *ldq < *n || *ldq < 1) { *info = -11; } else if (ilz && *ldz < *n || *ldz < 1) { *info = -13; } if (*info != 0) { i__1 = -(*info); xerbla_("DGGHRD", &i__1); return 0; } /* Initialize Q and Z if desired. */ if (icompq == 3) { dlaset_("Full", n, n, &c_b10, &c_b11, &q[q_offset], ldq); } if (icompz == 3) { dlaset_("Full", n, n, &c_b10, &c_b11, &z__[z_offset], ldz); } /* Quick return if possible */ if (*n <= 1) { return 0; } /* Zero out lower triangle of B */ i__1 = *n - 1; for (jcol = 1; jcol <= i__1; ++jcol) { i__2 = *n; for (jrow = jcol + 1; jrow <= i__2; ++jrow) { b_ref(jrow, jcol) = 0.; /* L10: */ } /* L20: */ } /* Reduce A and B */ i__1 = *ihi - 2; for (jcol = *ilo; jcol <= i__1; ++jcol) { i__2 = jcol + 2; for (jrow = *ihi; jrow >= i__2; --jrow) { /* Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL) */ temp = a_ref(jrow - 1, jcol); dlartg_(&temp, &a_ref(jrow, jcol), &c__, &s, &a_ref(jrow - 1, jcol)); a_ref(jrow, jcol) = 0.; i__3 = *n - jcol; drot_(&i__3, &a_ref(jrow - 1, jcol + 1), lda, &a_ref(jrow, jcol + 1), lda, &c__, &s); i__3 = *n + 2 - jrow; drot_(&i__3, &b_ref(jrow - 1, jrow - 1), ldb, &b_ref(jrow, jrow - 1), ldb, &c__, &s); if (ilq) { drot_(n, &q_ref(1, jrow - 1), &c__1, &q_ref(1, jrow), &c__1, & c__, &s); } /* Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1) */ temp = b_ref(jrow, jrow); dlartg_(&temp, &b_ref(jrow, jrow - 1), &c__, &s, &b_ref(jrow, jrow)); b_ref(jrow, jrow - 1) = 0.; drot_(ihi, &a_ref(1, jrow), &c__1, &a_ref(1, jrow - 1), &c__1, & c__, &s); i__3 = jrow - 1; drot_(&i__3, &b_ref(1, jrow), &c__1, &b_ref(1, jrow - 1), &c__1, & c__, &s); if (ilz) { drot_(n, &z___ref(1, jrow), &c__1, &z___ref(1, jrow - 1), & c__1, &c__, &s); } /* L30: */ } /* L40: */ } return 0; /* End of DGGHRD */ } /* dgghrd_ */
/* Subroutine */ int spteqr_(char *compz, integer *n, real *d__, real *e, real *z__, integer *ldz, real *work, integer *info) { /* System generated locals */ integer z_dim1, z_offset, i__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static real c__[1] /* was [1][1] */; static integer i__; extern logical lsame_(char *, char *); static real vt[1] /* was [1][1] */; extern /* Subroutine */ int xerbla_(char *, integer *), slaset_( char *, integer *, integer *, real *, real *, real *, integer *), sbdsqr_(char *, integer *, integer *, integer *, integer *, real *, real *, real *, integer *, real *, integer *, real *, integer *, real *, integer *); static integer icompz; extern /* Subroutine */ int spttrf_(integer *, real *, real *, integer *); static integer nru; #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] /* -- LAPACK routine (instrumented to count operations, version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1999 Common block to return operation count and iteration count ITCNT is initialized to 0, OPS is only incremented Purpose ======= SPTEQR computes all eigenvalues and, optionally, eigenvectors of a symmetric positive definite tridiagonal matrix by first factoring the matrix using SPTTRF, and then calling SBDSQR to compute the singular values of the bidiagonal factor. This routine computes the eigenvalues of the positive definite tridiagonal matrix to high relative accuracy. This means that if the eigenvalues range over many orders of magnitude in size, then the small eigenvalues and corresponding eigenvectors will be computed more accurately than, for example, with the standard QR method. The eigenvectors of a full or band symmetric positive definite matrix can also be found if SSYTRD, SSPTRD, or SSBTRD has been used to reduce this matrix to tridiagonal form. (The reduction to tridiagonal form, however, may preclude the possibility of obtaining high relative accuracy in the small eigenvalues of the original matrix, if these eigenvalues range over many orders of magnitude.) Arguments ========= COMPZ (input) CHARACTER*1 = 'N': Compute eigenvalues only. = 'V': Compute eigenvectors of original symmetric matrix also. Array Z contains the orthogonal matrix used to reduce the original matrix to tridiagonal form. = 'I': Compute eigenvectors of tridiagonal matrix also. N (input) INTEGER The order of the matrix. N >= 0. D (input/output) REAL array, dimension (N) On entry, the n diagonal elements of the tridiagonal matrix. On normal exit, D contains the eigenvalues, in descending order. E (input/output) REAL array, dimension (N-1) On entry, the (n-1) subdiagonal elements of the tridiagonal matrix. On exit, E has been destroyed. Z (input/output) REAL array, dimension (LDZ, N) On entry, if COMPZ = 'V', the orthogonal matrix used in the reduction to tridiagonal form. On exit, if COMPZ = 'V', the orthonormal eigenvectors of the original symmetric matrix; if COMPZ = 'I', the orthonormal eigenvectors of the tridiagonal matrix. If INFO > 0 on exit, Z contains the eigenvectors associated with only the stored eigenvalues. If COMPZ = 'N', then Z is not referenced. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= 1, and if COMPZ = 'V' or 'I', LDZ >= max(1,N). WORK (workspace) REAL array, dimension (4*N) INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = i, and i is: <= N the Cholesky factorization of the matrix could not be performed because the i-th principal minor was not positive definite. > N the SVD algorithm failed to converge; if INFO = N+i, i off-diagonal elements of the bidiagonal factor did not converge to zero. ===================================================================== Test the input parameters. Parameter adjustments */ --d__; --e; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --work; /* Function Body */ *info = 0; if (lsame_(compz, "N")) { icompz = 0; } else if (lsame_(compz, "V")) { icompz = 1; } else if (lsame_(compz, "I")) { icompz = 2; } else { icompz = -1; } if (icompz < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) { *info = -6; } if (*info != 0) { i__1 = -(*info); xerbla_("SPTEQR", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { if (icompz > 0) { z___ref(1, 1) = 1.f; } return 0; } if (icompz == 2) { slaset_("Full", n, n, &c_b7, &c_b8, &z__[z_offset], ldz); } /* Call SPTTRF to factor the matrix. */ latime_1.ops = latime_1.ops + *n * 5 - 4; spttrf_(n, &d__[1], &e[1], info); if (*info != 0) { return 0; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { d__[i__] = sqrt(d__[i__]); /* L10: */ } i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { e[i__] *= d__[i__]; /* L20: */ } /* Call SBDSQR to compute the singular values/vectors of the bidiagonal factor. */ if (icompz > 0) { nru = *n; } else { nru = 0; } sbdsqr_("Lower", n, &c__0, &nru, &c__0, &d__[1], &e[1], vt, &c__1, &z__[ z_offset], ldz, c__, &c__1, &work[1], info); /* Square the singular values. */ if (*info == 0) { latime_1.ops += *n; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { d__[i__] *= d__[i__]; /* L30: */ } } else { *info = *n + *info; } return 0; /* End of SPTEQR */ } /* spteqr_ */
/* Subroutine */ int zhbgvx_(char *jobz, char *range, char *uplo, integer *n, integer *ka, integer *kb, doublecomplex *ab, integer *ldab, doublecomplex *bb, integer *ldbb, doublecomplex *q, integer *ldq, doublereal *vl, doublereal *vu, integer *il, integer *iu, doublereal * abstol, integer *m, doublereal *w, doublecomplex *z__, integer *ldz, doublecomplex *work, doublereal *rwork, integer *iwork, integer * ifail, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= ZHBGVX computes all the eigenvalues, and optionally, the eigenvectors of a complex generalized Hermitian-definite banded eigenproblem, of the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian and banded, and B is also positive definite. Eigenvalues and eigenvectors can be selected by specifying either all eigenvalues, a range of values or a range of indices for the desired eigenvalues. Arguments ========= JOBZ (input) CHARACTER*1 = 'N': Compute eigenvalues only; = 'V': Compute eigenvalues and eigenvectors. RANGE (input) CHARACTER*1 = 'A': all eigenvalues will be found; = 'V': all eigenvalues in the half-open interval (VL,VU] will be found; = 'I': the IL-th through IU-th eigenvalues will be found. UPLO (input) CHARACTER*1 = 'U': Upper triangles of A and B are stored; = 'L': Lower triangles of A and B are stored. N (input) INTEGER The order of the matrices A and B. N >= 0. KA (input) INTEGER The number of superdiagonals of the matrix A if UPLO = 'U', or the number of subdiagonals if UPLO = 'L'. KA >= 0. KB (input) INTEGER The number of superdiagonals of the matrix B if UPLO = 'U', or the number of subdiagonals if UPLO = 'L'. KB >= 0. AB (input/output) COMPLEX*16 array, dimension (LDAB, N) On entry, the upper or lower triangle of the Hermitian band matrix A, stored in the first ka+1 rows of the array. The j-th column of A is stored in the j-th column of the array AB as follows: if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). On exit, the contents of AB are destroyed. LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= KA+1. BB (input/output) COMPLEX*16 array, dimension (LDBB, N) On entry, the upper or lower triangle of the Hermitian band matrix B, stored in the first kb+1 rows of the array. The j-th column of B is stored in the j-th column of the array BB as follows: if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). On exit, the factor S from the split Cholesky factorization B = S**H*S, as returned by ZPBSTF. LDBB (input) INTEGER The leading dimension of the array BB. LDBB >= KB+1. Q (output) COMPLEX*16 array, dimension (LDQ, N) If JOBZ = 'V', the n-by-n matrix used in the reduction of A*x = (lambda)*B*x to standard form, i.e. C*x = (lambda)*x, and consequently C to tridiagonal form. If JOBZ = 'N', the array Q is not referenced. LDQ (input) INTEGER The leading dimension of the array Q. If JOBZ = 'N', LDQ >= 1. If JOBZ = 'V', LDQ >= max(1,N). VL (input) DOUBLE PRECISION VU (input) DOUBLE PRECISION If RANGE='V', the lower and upper bounds of the interval to be searched for eigenvalues. VL < VU. Not referenced if RANGE = 'A' or 'I'. IL (input) INTEGER IU (input) INTEGER If RANGE='I', the indices (in ascending order) of the smallest and largest eigenvalues to be returned. 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. Not referenced if RANGE = 'A' or 'V'. ABSTOL (input) DOUBLE PRECISION The absolute error tolerance for the eigenvalues. An approximate eigenvalue is accepted as converged when it is determined to lie in an interval [a,b] of width less than or equal to ABSTOL + EPS * max( |a|,|b| ) , where EPS is the machine precision. If ABSTOL is less than or equal to zero, then EPS*|T| will be used in its place, where |T| is the 1-norm of the tridiagonal matrix obtained by reducing AP to tridiagonal form. Eigenvalues will be computed most accurately when ABSTOL is set to twice the underflow threshold 2*DLAMCH('S'), not zero. If this routine returns with INFO>0, indicating that some eigenvectors did not converge, try setting ABSTOL to 2*DLAMCH('S'). M (output) INTEGER The total number of eigenvalues found. 0 <= M <= N. If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. W (output) DOUBLE PRECISION array, dimension (N) If INFO = 0, the eigenvalues in ascending order. Z (output) COMPLEX*16 array, dimension (LDZ, N) If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of eigenvectors, with the i-th column of Z holding the eigenvector associated with W(i). The eigenvectors are normalized so that Z**H*B*Z = I. If JOBZ = 'N', then Z is not referenced. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= 1, and if JOBZ = 'V', LDZ >= N. WORK (workspace) COMPLEX*16 array, dimension (N) RWORK (workspace) DOUBLE PRECISION array, dimension (7*N) IWORK (workspace) INTEGER array, dimension (5*N) IFAIL (output) INTEGER array, dimension (N) If JOBZ = 'V', then if INFO = 0, the first M elements of IFAIL are zero. If INFO > 0, then IFAIL contains the indices of the eigenvectors that failed to converge. If JOBZ = 'N', then IFAIL is not referenced. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, and i is: <= N: then i eigenvectors failed to converge. Their indices are stored in array IFAIL. > N: if INFO = N + i, for 1 <= i <= N, then ZPBSTF returned INFO = i: B is not positive definite. The factorization of B could not be completed and no eigenvalues or eigenvectors were computed. Further Details =============== Based on contributions by Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static doublecomplex c_b1 = {0.,0.}; static doublecomplex c_b2 = {1.,0.}; static integer c__1 = 1; /* System generated locals */ integer ab_dim1, ab_offset, bb_dim1, bb_offset, q_dim1, q_offset, z_dim1, z_offset, i__1, i__2; /* Local variables */ static integer indd, inde; static char vect[1]; static integer itmp1, i__, j, indee; extern logical lsame_(char *, char *); static integer iinfo; static char order[1]; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *); static logical upper, wantz; extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); static integer jj; static logical alleig, indeig; static integer indibl; static logical valeig; extern /* Subroutine */ int xerbla_(char *, integer *); static integer indiwk, indisp; extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *, integer *), dstebz_(char *, char *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), zhbtrd_(char *, char *, integer *, integer *, doublecomplex *, integer *, doublereal *, doublereal *, doublecomplex *, integer *, doublecomplex *, integer *); static integer indrwk, indwrk; extern /* Subroutine */ int zhbgst_(char *, char *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublereal *, integer *), zlacpy_(char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); static integer nsplit; extern /* Subroutine */ int zpbstf_(char *, integer *, integer *, doublecomplex *, integer *, integer *), zstein_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, doublecomplex *, integer *, doublereal *, integer *, integer *, integer *), zsteqr_(char *, integer *, doublereal *, doublereal *, doublecomplex *, integer *, doublereal *, integer *); static doublereal tmp1; #define z___subscr(a_1,a_2) (a_2)*z_dim1 + a_1 #define z___ref(a_1,a_2) z__[z___subscr(a_1,a_2)] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; bb_dim1 = *ldbb; bb_offset = 1 + bb_dim1 * 1; bb -= bb_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --work; --rwork; --iwork; --ifail; /* Function Body */ wantz = lsame_(jobz, "V"); upper = lsame_(uplo, "U"); alleig = lsame_(range, "A"); valeig = lsame_(range, "V"); indeig = lsame_(range, "I"); *info = 0; if (! (wantz || lsame_(jobz, "N"))) { *info = -1; } else if (! (alleig || valeig || indeig)) { *info = -2; } else if (! (upper || lsame_(uplo, "L"))) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*ka < 0) { *info = -5; } else if (*kb < 0 || *kb > *ka) { *info = -6; } else if (*ldab < *ka + 1) { *info = -8; } else if (*ldbb < *kb + 1) { *info = -10; } else if (valeig && *n > 0 && *vu <= *vl) { *info = -12; } else if (indeig && *il < 1) { *info = -13; } else if (indeig && (*iu < min(*n,*il) || *iu > *n)) { *info = -14; } else if (*ldz < 1 || wantz && *ldz < *n) { *info = -19; } if (*info != 0) { i__1 = -(*info); xerbla_("ZHBGVX", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Form a split Cholesky factorization of B. */ zpbstf_(uplo, n, kb, &bb[bb_offset], ldbb, info); if (*info != 0) { *info = *n + *info; return 0; } /* Transform problem to standard eigenvalue problem. */ zhbgst_(jobz, uplo, n, ka, kb, &ab[ab_offset], ldab, &bb[bb_offset], ldbb, &q[q_offset], ldq, &work[1], &rwork[1], &iinfo); /* Solve the standard eigenvalue problem. Reduce Hermitian band matrix to tridiagonal form. */ indd = 1; inde = indd + *n; indrwk = inde + *n; indwrk = 1; if (wantz) { *(unsigned char *)vect = 'U'; } else { *(unsigned char *)vect = 'N'; } zhbtrd_(vect, uplo, n, ka, &ab[ab_offset], ldab, &rwork[indd], &rwork[ inde], &q[q_offset], ldq, &work[indwrk], &iinfo); /* If all eigenvalues are desired and ABSTOL is less than or equal to zero, then call DSTERF or ZSTEQR. If this fails for some eigenvalue, then try DSTEBZ. */ if ((alleig || indeig && *il == 1 && *iu == *n) && *abstol <= 0.) { dcopy_(n, &rwork[indd], &c__1, &w[1], &c__1); indee = indrwk + (*n << 1); i__1 = *n - 1; dcopy_(&i__1, &rwork[inde], &c__1, &rwork[indee], &c__1); if (! wantz) { dsterf_(n, &w[1], &rwork[indee], info); } else { zlacpy_("A", n, n, &q[q_offset], ldq, &z__[z_offset], ldz); zsteqr_(jobz, n, &w[1], &rwork[indee], &z__[z_offset], ldz, & rwork[indrwk], info); if (*info == 0) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { ifail[i__] = 0; /* L10: */ } } } if (*info == 0) { *m = *n; goto L30; } *info = 0; } /* Otherwise, call DSTEBZ and, if eigenvectors are desired, call ZSTEIN. */ if (wantz) { *(unsigned char *)order = 'B'; } else { *(unsigned char *)order = 'E'; } indibl = 1; indisp = indibl + *n; indiwk = indisp + *n; dstebz_(range, order, n, vl, vu, il, iu, abstol, &rwork[indd], &rwork[ inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &rwork[ indrwk], &iwork[indiwk], info); if (wantz) { zstein_(n, &rwork[indd], &rwork[inde], m, &w[1], &iwork[indibl], & iwork[indisp], &z__[z_offset], ldz, &rwork[indrwk], &iwork[ indiwk], &ifail[1], info); /* Apply unitary matrix used in reduction to tridiagonal form to eigenvectors returned by ZSTEIN. */ i__1 = *m; for (j = 1; j <= i__1; ++j) { zcopy_(n, &z___ref(1, j), &c__1, &work[1], &c__1); zgemv_("N", n, n, &c_b2, &q[q_offset], ldq, &work[1], &c__1, & c_b1, &z___ref(1, j), &c__1); /* L20: */ } } L30: /* If eigenvalues are not in order, then sort them, along with eigenvectors. */ if (wantz) { i__1 = *m - 1; for (j = 1; j <= i__1; ++j) { i__ = 0; tmp1 = w[j]; i__2 = *m; for (jj = j + 1; jj <= i__2; ++jj) { if (w[jj] < tmp1) { i__ = jj; tmp1 = w[jj]; } /* L40: */ } if (i__ != 0) { itmp1 = iwork[indibl + i__ - 1]; w[i__] = w[j]; iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; w[j] = tmp1; iwork[indibl + j - 1] = itmp1; zswap_(n, &z___ref(1, i__), &c__1, &z___ref(1, j), &c__1); if (*info != 0) { itmp1 = ifail[i__]; ifail[i__] = ifail[j]; ifail[j] = itmp1; } } /* L50: */ } } return 0; /* End of ZHBGVX */ } /* zhbgvx_ */