/* Subroutine */ int clalsa_(integer *icompq, integer *smlsiz, integer *n, integer *nrhs, complex *b, integer *ldb, complex *bx, integer *ldbx, real *u, integer *ldu, real *vt, integer *k, real *difl, real *difr, real *z__, real *poles, integer *givptr, integer *givcol, integer * ldgcol, integer *perm, real *givnum, real *c__, real *s, real *rwork, integer *iwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= CLALSA is an itermediate step in solving the least squares problem by computing the SVD of the coefficient matrix in compact form (The singular vectors are computed as products of simple orthorgonal matrices.). If ICOMPQ = 0, CLALSA applies the inverse of the left singular vector matrix of an upper bidiagonal matrix to the right hand side; and if ICOMPQ = 1, CLALSA applies the right singular vector matrix to the right hand side. The singular vector matrices were generated in compact form by CLALSA. Arguments ========= ICOMPQ (input) INTEGER Specifies whether the left or the right singular vector matrix is involved. = 0: Left singular vector matrix = 1: Right singular vector matrix SMLSIZ (input) INTEGER The maximum size of the subproblems at the bottom of the computation tree. N (input) INTEGER The row and column dimensions of the upper bidiagonal matrix. NRHS (input) INTEGER The number of columns of B and BX. NRHS must be at least 1. B (input) COMPLEX array, dimension ( LDB, NRHS ) On input, B contains the right hand sides of the least squares problem in rows 1 through M. On output, B contains the solution X in rows 1 through N. LDB (input) INTEGER The leading dimension of B in the calling subprogram. LDB must be at least max(1,MAX( M, N ) ). BX (output) COMPLEX array, dimension ( LDBX, NRHS ) On exit, the result of applying the left or right singular vector matrix to B. LDBX (input) INTEGER The leading dimension of BX. U (input) REAL array, dimension ( LDU, SMLSIZ ). On entry, U contains the left singular vector matrices of all subproblems at the bottom level. LDU (input) INTEGER, LDU = > N. The leading dimension of arrays U, VT, DIFL, DIFR, POLES, GIVNUM, and Z. VT (input) REAL array, dimension ( LDU, SMLSIZ+1 ). On entry, VT' contains the right singular vector matrices of all subproblems at the bottom level. K (input) INTEGER array, dimension ( N ). DIFL (input) REAL array, dimension ( LDU, NLVL ). where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. DIFR (input) REAL array, dimension ( LDU, 2 * NLVL ). On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record distances between singular values on the I-th level and singular values on the (I -1)-th level, and DIFR(*, 2 * I) record the normalizing factors of the right singular vectors matrices of subproblems on I-th level. Z (input) REAL array, dimension ( LDU, NLVL ). On entry, Z(1, I) contains the components of the deflation- adjusted updating row vector for subproblems on the I-th level. POLES (input) REAL array, dimension ( LDU, 2 * NLVL ). On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old singular values involved in the secular equations on the I-th level. GIVPTR (input) INTEGER array, dimension ( N ). On entry, GIVPTR( I ) records the number of Givens rotations performed on the I-th problem on the computation tree. GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the locations of Givens rotations performed on the I-th level on the computation tree. LDGCOL (input) INTEGER, LDGCOL = > N. The leading dimension of arrays GIVCOL and PERM. PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). On entry, PERM(*, I) records permutations done on the I-th level of the computation tree. GIVNUM (input) REAL array, dimension ( LDU, 2 * NLVL ). On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- values of Givens rotations performed on the I-th level on the computation tree. C (input) REAL array, dimension ( N ). On entry, if the I-th subproblem is not square, C( I ) contains the C-value of a Givens rotation related to the right null space of the I-th subproblem. S (input) REAL array, dimension ( N ). On entry, if the I-th subproblem is not square, S( I ) contains the S-value of a Givens rotation related to the right null space of the I-th subproblem. RWORK (workspace) REAL array, dimension at least max ( N, (SMLSZ+1)*NRHS*3 ). IWORK (workspace) INTEGER array. The dimension must be at least 3 * N INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. Further Details =============== Based on contributions by Ming Gu and Ren-Cang Li, Computer Science Division, University of California at Berkeley, USA Osni Marques, LBNL/NERSC, USA ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static real c_b9 = 1.f; static real c_b10 = 0.f; static integer c__2 = 2; /* System generated locals */ integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, b_dim1, b_offset, bx_dim1, bx_offset, i__1, i__2, i__3, i__4, i__5, i__6; complex q__1; /* Builtin functions */ double r_imag(complex *); integer pow_ii(integer *, integer *); /* Local variables */ static integer jcol, nlvl, sqre, jrow, i__, j, jimag, jreal, inode, ndiml; extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static integer ndimr; extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, complex *, integer *); static integer i1; extern /* Subroutine */ int clals0_(integer *, integer *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, integer *, integer *, real *, integer *, real *, real *, real *, real *, integer *, real *, real *, real *, integer *); static integer ic, lf, nd, ll, nl, nr; extern /* Subroutine */ int xerbla_(char *, integer *), slasdt_( integer *, integer *, integer *, integer *, integer *, integer *, integer *); static integer im1, nlf, nrf, lvl, ndb1, nlp1, lvl2, nrp1; #define difl_ref(a_1,a_2) difl[(a_2)*difl_dim1 + a_1] #define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1] #define perm_ref(a_1,a_2) perm[(a_2)*perm_dim1 + a_1] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] #define poles_ref(a_1,a_2) poles[(a_2)*poles_dim1 + a_1] #define bx_subscr(a_1,a_2) (a_2)*bx_dim1 + a_1 #define bx_ref(a_1,a_2) bx[bx_subscr(a_1,a_2)] #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1] #define givcol_ref(a_1,a_2) givcol[(a_2)*givcol_dim1 + a_1] #define givnum_ref(a_1,a_2) givnum[(a_2)*givnum_dim1 + a_1] b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; bx_dim1 = *ldbx; bx_offset = 1 + bx_dim1 * 1; bx -= bx_offset; givnum_dim1 = *ldu; givnum_offset = 1 + givnum_dim1 * 1; givnum -= givnum_offset; poles_dim1 = *ldu; poles_offset = 1 + poles_dim1 * 1; poles -= poles_offset; z_dim1 = *ldu; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; difr_dim1 = *ldu; difr_offset = 1 + difr_dim1 * 1; difr -= difr_offset; difl_dim1 = *ldu; difl_offset = 1 + difl_dim1 * 1; difl -= difl_offset; vt_dim1 = *ldu; vt_offset = 1 + vt_dim1 * 1; vt -= vt_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; --k; --givptr; perm_dim1 = *ldgcol; perm_offset = 1 + perm_dim1 * 1; perm -= perm_offset; givcol_dim1 = *ldgcol; givcol_offset = 1 + givcol_dim1 * 1; givcol -= givcol_offset; --c__; --s; --rwork; --iwork; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*smlsiz < 3) { *info = -2; } else if (*n < *smlsiz) { *info = -3; } else if (*nrhs < 1) { *info = -4; } else if (*ldb < *n) { *info = -6; } else if (*ldbx < *n) { *info = -8; } else if (*ldu < *n) { *info = -10; } else if (*ldgcol < *n) { *info = -19; } if (*info != 0) { i__1 = -(*info); xerbla_("CLALSA", &i__1); return 0; } /* Book-keeping and setting up the computation tree. */ inode = 1; ndiml = inode + *n; ndimr = ndiml + *n; slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], smlsiz); /* The following code applies back the left singular vector factors. For applying back the right singular vector factors, go to 170. */ if (*icompq == 1) { goto L170; } /* The nodes on the bottom level of the tree were solved by SLASDQ. The corresponding left and right singular vector matrices are in explicit form. First apply back the left singular vector matrices. */ ndb1 = (nd + 1) / 2; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { /* IC : center row of each node NL : number of rows of left subproblem NR : number of rows of right subproblem NLF: starting row of the left subproblem NRF: starting row of the right subproblem */ i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nr = iwork[ndimr + i1]; nlf = ic - nl; nrf = ic + 1; /* Since B and BX are complex, the following call to SGEMM is performed in two steps (real and imaginary parts). CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) */ j = nl * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nl - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++j; i__4 = b_subscr(jrow, jcol); rwork[j] = b[i__4].r; /* L10: */ } /* L20: */ } sgemm_("T", "N", &nl, nrhs, &nl, &c_b9, &u_ref(nlf, 1), ldu, &rwork[( nl * *nrhs << 1) + 1], &nl, &c_b10, &rwork[1], &nl); j = nl * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nl - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++j; rwork[j] = r_imag(&b_ref(jrow, jcol)); /* L30: */ } /* L40: */ } sgemm_("T", "N", &nl, nrhs, &nl, &c_b9, &u_ref(nlf, 1), ldu, &rwork[( nl * *nrhs << 1) + 1], &nl, &c_b10, &rwork[nl * *nrhs + 1], & nl); jreal = 0; jimag = nl * *nrhs; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nl - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++jreal; ++jimag; i__4 = bx_subscr(jrow, jcol); i__5 = jreal; i__6 = jimag; q__1.r = rwork[i__5], q__1.i = rwork[i__6]; bx[i__4].r = q__1.r, bx[i__4].i = q__1.i; /* L50: */ } /* L60: */ } /* Since B and BX are complex, the following call to SGEMM is performed in two steps (real and imaginary parts). CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) */ j = nr * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nr - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++j; i__4 = b_subscr(jrow, jcol); rwork[j] = b[i__4].r; /* L70: */ } /* L80: */ } sgemm_("T", "N", &nr, nrhs, &nr, &c_b9, &u_ref(nrf, 1), ldu, &rwork[( nr * *nrhs << 1) + 1], &nr, &c_b10, &rwork[1], &nr); j = nr * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nr - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++j; rwork[j] = r_imag(&b_ref(jrow, jcol)); /* L90: */ } /* L100: */ } sgemm_("T", "N", &nr, nrhs, &nr, &c_b9, &u_ref(nrf, 1), ldu, &rwork[( nr * *nrhs << 1) + 1], &nr, &c_b10, &rwork[nr * *nrhs + 1], & nr); jreal = 0; jimag = nr * *nrhs; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nr - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++jreal; ++jimag; i__4 = bx_subscr(jrow, jcol); i__5 = jreal; i__6 = jimag; q__1.r = rwork[i__5], q__1.i = rwork[i__6]; bx[i__4].r = q__1.r, bx[i__4].i = q__1.i; /* L110: */ } /* L120: */ } /* L130: */ } /* Next copy the rows of B that correspond to unchanged rows in the bidiagonal matrix to BX. */ i__1 = nd; for (i__ = 1; i__ <= i__1; ++i__) { ic = iwork[inode + i__ - 1]; ccopy_(nrhs, &b_ref(ic, 1), ldb, &bx_ref(ic, 1), ldbx); /* L140: */ } /* Finally go through the left singular vector matrices of all the other subproblems bottom-up on the tree. */ j = pow_ii(&c__2, &nlvl); sqre = 0; for (lvl = nlvl; lvl >= 1; --lvl) { lvl2 = (lvl << 1) - 1; /* find the first node LF and last node LL on the current level LVL */ if (lvl == 1) { lf = 1; ll = 1; } else { i__1 = lvl - 1; lf = pow_ii(&c__2, &i__1); ll = (lf << 1) - 1; } i__1 = ll; for (i__ = lf; i__ <= i__1; ++i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; nrf = ic + 1; --j; clals0_(icompq, &nl, &nr, &sqre, nrhs, &bx_ref(nlf, 1), ldbx, & b_ref(nlf, 1), ldb, &perm_ref(nlf, lvl), &givptr[j], & givcol_ref(nlf, lvl2), ldgcol, &givnum_ref(nlf, lvl2), ldu, &poles_ref(nlf, lvl2), &difl_ref(nlf, lvl), & difr_ref(nlf, lvl2), &z___ref(nlf, lvl), &k[j], &c__[j], & s[j], &rwork[1], info); /* L150: */ } /* L160: */ } goto L330; /* ICOMPQ = 1: applying back the right singular vector factors. */ L170: /* First now go through the right singular vector matrices of all the tree nodes top-down. */ j = 0; i__1 = nlvl; for (lvl = 1; lvl <= i__1; ++lvl) { lvl2 = (lvl << 1) - 1; /* Find the first node LF and last node LL on the current level LVL. */ if (lvl == 1) { lf = 1; ll = 1; } else { i__2 = lvl - 1; lf = pow_ii(&c__2, &i__2); ll = (lf << 1) - 1; } i__2 = lf; for (i__ = ll; i__ >= i__2; --i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; nrf = ic + 1; if (i__ == ll) { sqre = 0; } else { sqre = 1; } ++j; clals0_(icompq, &nl, &nr, &sqre, nrhs, &b_ref(nlf, 1), ldb, & bx_ref(nlf, 1), ldbx, &perm_ref(nlf, lvl), &givptr[j], & givcol_ref(nlf, lvl2), ldgcol, &givnum_ref(nlf, lvl2), ldu, &poles_ref(nlf, lvl2), &difl_ref(nlf, lvl), & difr_ref(nlf, lvl2), &z___ref(nlf, lvl), &k[j], &c__[j], & s[j], &rwork[1], info); /* L180: */ } /* L190: */ } /* The nodes on the bottom level of the tree were solved by SLASDQ. The corresponding right singular vector matrices are in explicit form. Apply them back. */ ndb1 = (nd + 1) / 2; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nr = iwork[ndimr + i1]; nlp1 = nl + 1; if (i__ == nd) { nrp1 = nr; } else { nrp1 = nr + 1; } nlf = ic - nl; nrf = ic + 1; /* Since B and BX are complex, the following call to SGEMM is performed in two steps (real and imaginary parts). CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) */ j = nlp1 * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nlp1 - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++j; i__4 = b_subscr(jrow, jcol); rwork[j] = b[i__4].r; /* L200: */ } /* L210: */ } sgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b9, &vt_ref(nlf, 1), ldu, & rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b10, &rwork[1], & nlp1); j = nlp1 * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nlp1 - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++j; rwork[j] = r_imag(&b_ref(jrow, jcol)); /* L220: */ } /* L230: */ } sgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b9, &vt_ref(nlf, 1), ldu, & rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b10, &rwork[nlp1 * * nrhs + 1], &nlp1); jreal = 0; jimag = nlp1 * *nrhs; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nlp1 - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++jreal; ++jimag; i__4 = bx_subscr(jrow, jcol); i__5 = jreal; i__6 = jimag; q__1.r = rwork[i__5], q__1.i = rwork[i__6]; bx[i__4].r = q__1.r, bx[i__4].i = q__1.i; /* L240: */ } /* L250: */ } /* Since B and BX are complex, the following call to SGEMM is performed in two steps (real and imaginary parts). CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) */ j = nrp1 * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nrp1 - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++j; i__4 = b_subscr(jrow, jcol); rwork[j] = b[i__4].r; /* L260: */ } /* L270: */ } sgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b9, &vt_ref(nrf, 1), ldu, & rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b10, &rwork[1], & nrp1); j = nrp1 * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nrp1 - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++j; rwork[j] = r_imag(&b_ref(jrow, jcol)); /* L280: */ } /* L290: */ } sgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b9, &vt_ref(nrf, 1), ldu, & rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b10, &rwork[nrp1 * * nrhs + 1], &nrp1); jreal = 0; jimag = nrp1 * *nrhs; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nrp1 - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++jreal; ++jimag; i__4 = bx_subscr(jrow, jcol); i__5 = jreal; i__6 = jimag; q__1.r = rwork[i__5], q__1.i = rwork[i__6]; bx[i__4].r = q__1.r, bx[i__4].i = q__1.i; /* L300: */ } /* L310: */ } /* L320: */ } L330: return 0; /* End of CLALSA */ } /* clalsa_ */
/* Subroutine */ int dchkhs_(integer *nsizes, integer *nn, integer *ntypes, logical *dotype, integer *iseed, doublereal *thresh, integer *nounit, doublereal *a, integer *lda, doublereal *h__, doublereal *t1, doublereal *t2, doublereal *u, integer *ldu, doublereal *z__, doublereal *uz, doublereal *wr1, doublereal *wi1, doublereal *wr3, doublereal *wi3, doublereal *evectl, doublereal *evectr, doublereal * evecty, doublereal *evectx, doublereal *uu, doublereal *tau, doublereal *work, integer *nwork, integer *iwork, logical *select, doublereal *result, integer *info) { /* Initialized data */ static integer ktype[21] = { 1,2,3,4,4,4,4,4,6,6,6,6,6,6,6,6,6,6,9,9,9 }; static integer kmagn[21] = { 1,1,1,1,1,1,2,3,1,1,1,1,1,1,1,1,2,3,1,2,3 }; static integer kmode[21] = { 0,0,0,4,3,1,4,4,4,3,1,5,4,3,1,5,5,5,4,3,1 }; static integer kconds[21] = { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,2,2,0,0,0 }; /* Format strings */ static char fmt_9999[] = "(\002 DCHKHS: \002,a,\002 returned INFO=\002,i" "6,\002.\002,/9x,\002N=\002,i6,\002, JTYPE=\002,i6,\002, ISEED=" "(\002,3(i5,\002,\002),i5,\002)\002)"; static char fmt_9998[] = "(\002 DCHKHS: \002,a,\002 Eigenvectors from" " \002,a,\002 incorrectly \002,\002normalized.\002,/\002 Bits of " "error=\002,0p,g10.3,\002,\002,9x,\002N=\002,i6,\002, JTYPE=\002," "i6,\002, ISEED=(\002,3(i5,\002,\002),i5,\002)\002)"; static char fmt_9997[] = "(\002 DCHKHS: Selected \002,a,\002 Eigenvector" "s from \002,a,\002 do not match other eigenvectors \002,9x,\002N=" "\002,i6,\002, JTYPE=\002,i6,\002, ISEED=(\002,3(i5,\002,\002),i5," "\002)\002)"; /* System generated locals */ integer a_dim1, a_offset, evectl_dim1, evectl_offset, evectr_dim1, evectr_offset, evectx_dim1, evectx_offset, evecty_dim1, evecty_offset, h_dim1, h_offset, t1_dim1, t1_offset, t2_dim1, t2_offset, u_dim1, u_offset, uu_dim1, uu_offset, uz_dim1, uz_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; doublereal d__1, d__2, d__3, d__4, d__5, d__6; /* Builtin functions */ double sqrt(doublereal); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ static doublereal cond; static integer jcol, nmax; static doublereal unfl, ovfl, temp1, temp2; static integer i__, j, k, n; static logical badnn; extern /* Subroutine */ int dget10_(integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *), dget22_(char *, char *, char *, integer *, doublereal *, integer * , doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *), dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); static logical match; static integer imode; static doublereal dumma[6]; static integer iinfo, nselc; static doublereal conds; extern /* Subroutine */ int dhst01_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *); static doublereal aninv, anorm; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); static integer nmats, nselr, jsize, nerrs, itype, jtype, ntest, n1; static doublereal rtulp; extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); static integer jj, in; extern doublereal dlamch_(char *); extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); static char adumma[1*1]; extern /* Subroutine */ int dlatme_(integer *, char *, integer *, doublereal *, integer *, doublereal *, doublereal *, char *, char *, char *, char *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), dhsein_(char *, char *, char *, logical *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *); static integer idumma[1]; extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); static integer ioldsd[4]; extern /* Subroutine */ int dlafts_(char *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), dlasum_(char *, integer *, integer *, integer *), dhseqr_( char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), dlatmr_( integer *, integer *, char *, integer *, char *, doublereal *, integer *, doublereal *, doublereal *, char *, char *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, char *, integer *, integer *, integer *, doublereal *, doublereal *, char *, doublereal *, integer *, integer *, integer *), dlatms_( integer *, integer *, char *, integer *, char *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, char *, doublereal *, integer *, doublereal *, integer *), dorghr_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), dormhr_(char *, char *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), dtrevc_(char *, char *, logical *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *), xerbla_(char *, integer *); static doublereal rtunfl, rtovfl, rtulpi, ulpinv; static integer mtypes, ntestt, ihi, ilo; static doublereal ulp; /* Fortran I/O blocks */ static cilist io___36 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___39 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___41 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___42 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___43 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___50 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___51 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___52 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___56 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___57 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___58 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___59 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___60 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___61 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___62 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___63 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___64 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___65 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___66 = { 0, 0, 0, fmt_9999, 0 }; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define h___ref(a_1,a_2) h__[(a_2)*h_dim1 + a_1] #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] #define uu_ref(a_1,a_2) uu[(a_2)*uu_dim1 + a_1] #define evectl_ref(a_1,a_2) evectl[(a_2)*evectl_dim1 + a_1] #define evectr_ref(a_1,a_2) evectr[(a_2)*evectr_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1999 Purpose ======= DCHKHS checks the nonsymmetric eigenvalue problem routines. DGEHRD factors A as U H U' , where ' means transpose, H is hessenberg, and U is an orthogonal matrix. DORGHR generates the orthogonal matrix U. DORMHR multiplies a matrix by the orthogonal matrix U. DHSEQR factors H as Z T Z' , where Z is orthogonal and T is "quasi-triangular", and the eigenvalue vector W. DTREVC computes the left and right eigenvector matrices L and R for T. DHSEIN computes the left and right eigenvector matrices Y and X for H, using inverse iteration. When DCHKHS is called, a number of matrix "sizes" ("n's") and a number of matrix "types" are specified. For each size ("n") and each type of matrix, one matrix will be generated and used to test the nonsymmetric eigenroutines. For each matrix, 14 tests will be performed: (1) | A - U H U**T | / ( |A| n ulp ) (2) | I - UU**T | / ( n ulp ) (3) | H - Z T Z**T | / ( |H| n ulp ) (4) | I - ZZ**T | / ( n ulp ) (5) | A - UZ H (UZ)**T | / ( |A| n ulp ) (6) | I - UZ (UZ)**T | / ( n ulp ) (7) | T(Z computed) - T(Z not computed) | / ( |T| ulp ) (8) | W(Z computed) - W(Z not computed) | / ( |W| ulp ) (9) | TR - RW | / ( |T| |R| ulp ) (10) | L**H T - W**H L | / ( |T| |L| ulp ) (11) | HX - XW | / ( |H| |X| ulp ) (12) | Y**H H - W**H Y | / ( |H| |Y| ulp ) (13) | AX - XW | / ( |A| |X| ulp ) (14) | Y**H A - W**H Y | / ( |A| |Y| ulp ) The "sizes" are specified by an array NN(1:NSIZES); the value of each element NN(j) specifies one size. The "types" are specified by a logical array DOTYPE( 1:NTYPES ); if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. Currently, the list of possible types is: (1) The zero matrix. (2) The identity matrix. (3) A (transposed) Jordan block, with 1's on the diagonal. (4) A diagonal matrix with evenly spaced entries 1, ..., ULP and random signs. (ULP = (first number larger than 1) - 1 ) (5) A diagonal matrix with geometrically spaced entries 1, ..., ULP and random signs. (6) A diagonal matrix with "clustered" entries 1, ULP, ..., ULP and random signs. (7) Same as (4), but multiplied by SQRT( overflow threshold ) (8) Same as (4), but multiplied by SQRT( underflow threshold ) (9) A matrix of the form U' T U, where U is orthogonal and T has evenly spaced entries 1, ..., ULP with random signs on the diagonal and random O(1) entries in the upper triangle. (10) A matrix of the form U' T U, where U is orthogonal and T has geometrically spaced entries 1, ..., ULP with random signs on the diagonal and random O(1) entries in the upper triangle. (11) A matrix of the form U' T U, where U is orthogonal and T has "clustered" entries 1, ULP,..., ULP with random signs on the diagonal and random O(1) entries in the upper triangle. (12) A matrix of the form U' T U, where U is orthogonal and T has real or complex conjugate paired eigenvalues randomly chosen from ( ULP, 1 ) and random O(1) entries in the upper triangle. (13) A matrix of the form X' T X, where X has condition SQRT( ULP ) and T has evenly spaced entries 1, ..., ULP with random signs on the diagonal and random O(1) entries in the upper triangle. (14) A matrix of the form X' T X, where X has condition SQRT( ULP ) and T has geometrically spaced entries 1, ..., ULP with random signs on the diagonal and random O(1) entries in the upper triangle. (15) A matrix of the form X' T X, where X has condition SQRT( ULP ) and T has "clustered" entries 1, ULP,..., ULP with random signs on the diagonal and random O(1) entries in the upper triangle. (16) A matrix of the form X' T X, where X has condition SQRT( ULP ) and T has real or complex conjugate paired eigenvalues randomly chosen from ( ULP, 1 ) and random O(1) entries in the upper triangle. (17) Same as (16), but multiplied by SQRT( overflow threshold ) (18) Same as (16), but multiplied by SQRT( underflow threshold ) (19) Nonsymmetric matrix with random entries chosen from (-1,1). (20) Same as (19), but multiplied by SQRT( overflow threshold ) (21) Same as (19), but multiplied by SQRT( underflow threshold ) Arguments ========== NSIZES - INTEGER The number of sizes of matrices to use. If it is zero, DCHKHS does nothing. It must be at least zero. Not modified. NN - INTEGER array, dimension (NSIZES) An array containing the sizes to be used for the matrices. Zero values will be skipped. The values must be at least zero. Not modified. NTYPES - INTEGER The number of elements in DOTYPE. If it is zero, DCHKHS does nothing. It must be at least zero. If it is MAXTYP+1 and NSIZES is 1, then an additional type, MAXTYP+1 is defined, which is to use whatever matrix is in A. This is only useful if DOTYPE(1:MAXTYP) is .FALSE. and DOTYPE(MAXTYP+1) is .TRUE. . Not modified. DOTYPE - LOGICAL array, dimension (NTYPES) If DOTYPE(j) is .TRUE., then for each size in NN a matrix of that size and of type j will be generated. If NTYPES is smaller than the maximum number of types defined (PARAMETER MAXTYP), then types NTYPES+1 through MAXTYP will not be generated. If NTYPES is larger than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) will be ignored. Not modified. ISEED - INTEGER array, dimension (4) On entry ISEED specifies the seed of the random number generator. The array elements should be between 0 and 4095; if not they will be reduced mod 4096. Also, ISEED(4) must be odd. The random number generator uses a linear congruential sequence limited to small integers, and so should produce machine independent random numbers. The values of ISEED are changed on exit, and can be used in the next call to DCHKHS to continue the same random number sequence. Modified. THRESH - DOUBLE PRECISION A test will count as "failed" if the "error", computed as described above, exceeds THRESH. Note that the error is scaled to be O(1), so THRESH should be a reasonably small multiple of 1, e.g., 10 or 100. In particular, it should not depend on the precision (single vs. double) or the size of the matrix. It must be at least zero. Not modified. NOUNIT - INTEGER The FORTRAN unit number for printing out error messages (e.g., if a routine returns IINFO not equal to 0.) Not modified. A - DOUBLE PRECISION array, dimension (LDA,max(NN)) Used to hold the matrix whose eigenvalues are to be computed. On exit, A contains the last matrix actually used. Modified. LDA - INTEGER The leading dimension of A, H, T1 and T2. It must be at least 1 and at least max( NN ). Not modified. H - DOUBLE PRECISION array, dimension (LDA,max(NN)) The upper hessenberg matrix computed by DGEHRD. On exit, H contains the Hessenberg form of the matrix in A. Modified. T1 - DOUBLE PRECISION array, dimension (LDA,max(NN)) The Schur (="quasi-triangular") matrix computed by DHSEQR if Z is computed. On exit, T1 contains the Schur form of the matrix in A. Modified. T2 - DOUBLE PRECISION array, dimension (LDA,max(NN)) The Schur matrix computed by DHSEQR when Z is not computed. This should be identical to T1. Modified. LDU - INTEGER The leading dimension of U, Z, UZ and UU. It must be at least 1 and at least max( NN ). Not modified. U - DOUBLE PRECISION array, dimension (LDU,max(NN)) The orthogonal matrix computed by DGEHRD. Modified. Z - DOUBLE PRECISION array, dimension (LDU,max(NN)) The orthogonal matrix computed by DHSEQR. Modified. UZ - DOUBLE PRECISION array, dimension (LDU,max(NN)) The product of U times Z. Modified. WR1 - DOUBLE PRECISION array, dimension (max(NN)) WI1 - DOUBLE PRECISION array, dimension (max(NN)) The real and imaginary parts of the eigenvalues of A, as computed when Z is computed. On exit, WR1 + WI1*i are the eigenvalues of the matrix in A. Modified. WR3 - DOUBLE PRECISION array, dimension (max(NN)) WI3 - DOUBLE PRECISION array, dimension (max(NN)) Like WR1, WI1, these arrays contain the eigenvalues of A, but those computed when DHSEQR only computes the eigenvalues, i.e., not the Schur vectors and no more of the Schur form than is necessary for computing the eigenvalues. Modified. EVECTL - DOUBLE PRECISION array, dimension (LDU,max(NN)) The (upper triangular) left eigenvector matrix for the matrix in T1. For complex conjugate pairs, the real part is stored in one row and the imaginary part in the next. Modified. EVEZTR - DOUBLE PRECISION array, dimension (LDU,max(NN)) The (upper triangular) right eigenvector matrix for the matrix in T1. For complex conjugate pairs, the real part is stored in one column and the imaginary part in the next. Modified. EVECTY - DOUBLE PRECISION array, dimension (LDU,max(NN)) The left eigenvector matrix for the matrix in H. For complex conjugate pairs, the real part is stored in one row and the imaginary part in the next. Modified. EVECTX - DOUBLE PRECISION array, dimension (LDU,max(NN)) The right eigenvector matrix for the matrix in H. For complex conjugate pairs, the real part is stored in one column and the imaginary part in the next. Modified. UU - DOUBLE PRECISION array, dimension (LDU,max(NN)) Details of the orthogonal matrix computed by DGEHRD. Modified. TAU - DOUBLE PRECISION array, dimension(max(NN)) Further details of the orthogonal matrix computed by DGEHRD. Modified. WORK - DOUBLE PRECISION array, dimension (NWORK) Workspace. Modified. NWORK - INTEGER The number of entries in WORK. NWORK >= 4*NN(j)*NN(j) + 2. IWORK - INTEGER array, dimension (max(NN)) Workspace. Modified. SELECT - LOGICAL array, dimension (max(NN)) Workspace. Modified. RESULT - DOUBLE PRECISION array, dimension (14) The values computed by the fourteen tests described above. The values are currently limited to 1/ulp, to avoid overflow. Modified. INFO - INTEGER If 0, then everything ran OK. -1: NSIZES < 0 -2: Some NN(j) < 0 -3: NTYPES < 0 -6: THRESH < 0 -9: LDA < 1 or LDA < NMAX, where NMAX is max( NN(j) ). -14: LDU < 1 or LDU < NMAX. -28: NWORK too small. If DLATMR, SLATMS, or SLATME returns an error code, the absolute value of it is returned. If 1, then DHSEQR could not find all the shifts. If 2, then the EISPACK code (for small blocks) failed. If >2, then 30*N iterations were not enough to find an eigenvalue or to decompose the problem. Modified. ----------------------------------------------------------------------- Some Local Variables and Parameters: ---- ----- --------- --- ---------- ZERO, ONE Real 0 and 1. MAXTYP The number of types defined. MTEST The number of tests defined: care must be taken that (1) the size of RESULT, (2) the number of tests actually performed, and (3) MTEST agree. NTEST The number of tests performed on this matrix so far. This should be less than MTEST, and equal to it by the last test. It will be less if any of the routines being tested indicates that it could not compute the matrices that would be tested. NMAX Largest value in NN. NMATS The number of matrices generated so far. NERRS The number of tests which have exceeded THRESH so far (computed by DLAFTS). COND, CONDS, IMODE Values to be passed to the matrix generators. ANORM Norm of A; passed to matrix generators. OVFL, UNFL Overflow and underflow thresholds. ULP, ULPINV Finest relative precision and its inverse. RTOVFL, RTUNFL, RTULP, RTULPI Square roots of the previous 4 values. The following four arrays decode JTYPE: KTYPE(j) The general type (1-10) for type "j". KMODE(j) The MODE value to be passed to the matrix generator for type "j". KMAGN(j) The order of magnitude ( O(1), O(overflow^(1/2) ), O(underflow^(1/2) ) KCONDS(j) Selects whether CONDS is to be 1 or 1/sqrt(ulp). (0 means irrelevant.) ===================================================================== Parameter adjustments */ --nn; --dotype; --iseed; t2_dim1 = *lda; t2_offset = 1 + t2_dim1 * 1; t2 -= t2_offset; t1_dim1 = *lda; t1_offset = 1 + t1_dim1 * 1; t1 -= t1_offset; h_dim1 = *lda; h_offset = 1 + h_dim1 * 1; h__ -= h_offset; a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; uu_dim1 = *ldu; uu_offset = 1 + uu_dim1 * 1; uu -= uu_offset; evectx_dim1 = *ldu; evectx_offset = 1 + evectx_dim1 * 1; evectx -= evectx_offset; evecty_dim1 = *ldu; evecty_offset = 1 + evecty_dim1 * 1; evecty -= evecty_offset; evectr_dim1 = *ldu; evectr_offset = 1 + evectr_dim1 * 1; evectr -= evectr_offset; evectl_dim1 = *ldu; evectl_offset = 1 + evectl_dim1 * 1; evectl -= evectl_offset; uz_dim1 = *ldu; uz_offset = 1 + uz_dim1 * 1; uz -= uz_offset; z_dim1 = *ldu; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; --wr1; --wi1; --wr3; --wi3; --tau; --work; --iwork; --select; --result; /* Function Body Check for errors */ ntestt = 0; *info = 0; badnn = FALSE_; nmax = 0; i__1 = *nsizes; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = nmax, i__3 = nn[j]; nmax = max(i__2,i__3); if (nn[j] < 0) { badnn = TRUE_; } /* L10: */ } /* Check for errors */ if (*nsizes < 0) { *info = -1; } else if (badnn) { *info = -2; } else if (*ntypes < 0) { *info = -3; } else if (*thresh < 0.) { *info = -6; } else if (*lda <= 1 || *lda < nmax) { *info = -9; } else if (*ldu <= 1 || *ldu < nmax) { *info = -14; } else if ((nmax << 2) * nmax + 2 > *nwork) { *info = -28; } if (*info != 0) { i__1 = -(*info); xerbla_("DCHKHS", &i__1); return 0; } /* Quick return if possible */ if (*nsizes == 0 || *ntypes == 0) { return 0; } /* More important constants */ unfl = dlamch_("Safe minimum"); ovfl = dlamch_("Overflow"); dlabad_(&unfl, &ovfl); ulp = dlamch_("Epsilon") * dlamch_("Base"); ulpinv = 1. / ulp; rtunfl = sqrt(unfl); rtovfl = sqrt(ovfl); rtulp = sqrt(ulp); rtulpi = 1. / rtulp; /* Loop over sizes, types */ nerrs = 0; nmats = 0; i__1 = *nsizes; for (jsize = 1; jsize <= i__1; ++jsize) { n = nn[jsize]; if (n == 0) { goto L270; } n1 = max(1,n); aninv = 1. / (doublereal) n1; if (*nsizes != 1) { mtypes = min(21,*ntypes); } else { mtypes = min(22,*ntypes); } i__2 = mtypes; for (jtype = 1; jtype <= i__2; ++jtype) { if (! dotype[jtype]) { goto L260; } ++nmats; ntest = 0; /* Save ISEED in case of an error. */ for (j = 1; j <= 4; ++j) { ioldsd[j - 1] = iseed[j]; /* L20: */ } /* Initialize RESULT */ for (j = 1; j <= 14; ++j) { result[j] = 0.; /* L30: */ } /* Compute "A" Control parameters: KMAGN KCONDS KMODE KTYPE =1 O(1) 1 clustered 1 zero =2 large large clustered 2 identity =3 small exponential Jordan =4 arithmetic diagonal, (w/ eigenvalues) =5 random log symmetric, w/ eigenvalues =6 random general, w/ eigenvalues =7 random diagonal =8 random symmetric =9 random general =10 random triangular */ if (mtypes > 21) { goto L100; } itype = ktype[jtype - 1]; imode = kmode[jtype - 1]; /* Compute norm */ switch (kmagn[jtype - 1]) { case 1: goto L40; case 2: goto L50; case 3: goto L60; } L40: anorm = 1.; goto L70; L50: anorm = rtovfl * ulp * aninv; goto L70; L60: anorm = rtunfl * n * ulpinv; goto L70; L70: dlaset_("Full", lda, &n, &c_b18, &c_b18, &a[a_offset], lda); iinfo = 0; cond = ulpinv; /* Special Matrices */ if (itype == 1) { /* Zero */ iinfo = 0; } else if (itype == 2) { /* Identity */ i__3 = n; for (jcol = 1; jcol <= i__3; ++jcol) { a_ref(jcol, jcol) = anorm; /* L80: */ } } else if (itype == 3) { /* Jordan Block */ i__3 = n; for (jcol = 1; jcol <= i__3; ++jcol) { a_ref(jcol, jcol) = anorm; if (jcol > 1) { a_ref(jcol, jcol - 1) = 1.; } /* L90: */ } } else if (itype == 4) { /* Diagonal Matrix, [Eigen]values Specified */ dlatms_(&n, &n, "S", &iseed[1], "S", &work[1], &imode, &cond, &anorm, &c__0, &c__0, "N", &a[a_offset], lda, &work[n + 1], &iinfo); } else if (itype == 5) { /* Symmetric, eigenvalues specified */ dlatms_(&n, &n, "S", &iseed[1], "S", &work[1], &imode, &cond, &anorm, &n, &n, "N", &a[a_offset], lda, &work[n + 1], &iinfo); } else if (itype == 6) { /* General, eigenvalues specified */ if (kconds[jtype - 1] == 1) { conds = 1.; } else if (kconds[jtype - 1] == 2) { conds = rtulpi; } else { conds = 0.; } *(unsigned char *)&adumma[0] = ' '; dlatme_(&n, "S", &iseed[1], &work[1], &imode, &cond, &c_b32, adumma, "T", "T", "T", &work[n + 1], &c__4, &conds, & n, &n, &anorm, &a[a_offset], lda, &work[(n << 1) + 1], &iinfo); } else if (itype == 7) { /* Diagonal, random eigenvalues */ dlatmr_(&n, &n, "S", &iseed[1], "S", &work[1], &c__6, &c_b32, &c_b32, "T", "N", &work[n + 1], &c__1, &c_b32, &work[( n << 1) + 1], &c__1, &c_b32, "N", idumma, &c__0, & c__0, &c_b18, &anorm, "NO", &a[a_offset], lda, &iwork[ 1], &iinfo); } else if (itype == 8) { /* Symmetric, random eigenvalues */ dlatmr_(&n, &n, "S", &iseed[1], "S", &work[1], &c__6, &c_b32, &c_b32, "T", "N", &work[n + 1], &c__1, &c_b32, &work[( n << 1) + 1], &c__1, &c_b32, "N", idumma, &n, &n, & c_b18, &anorm, "NO", &a[a_offset], lda, &iwork[1], & iinfo); } else if (itype == 9) { /* General, random eigenvalues */ dlatmr_(&n, &n, "S", &iseed[1], "N", &work[1], &c__6, &c_b32, &c_b32, "T", "N", &work[n + 1], &c__1, &c_b32, &work[( n << 1) + 1], &c__1, &c_b32, "N", idumma, &n, &n, & c_b18, &anorm, "NO", &a[a_offset], lda, &iwork[1], & iinfo); } else if (itype == 10) { /* Triangular, random eigenvalues */ dlatmr_(&n, &n, "S", &iseed[1], "N", &work[1], &c__6, &c_b32, &c_b32, "T", "N", &work[n + 1], &c__1, &c_b32, &work[( n << 1) + 1], &c__1, &c_b32, "N", idumma, &n, &c__0, & c_b18, &anorm, "NO", &a[a_offset], lda, &iwork[1], & iinfo); } else { iinfo = 1; } if (iinfo != 0) { io___36.ciunit = *nounit; s_wsfe(&io___36); do_fio(&c__1, "Generator", (ftnlen)9); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(iinfo); return 0; } L100: /* Call DGEHRD to compute H and U, do tests. */ dlacpy_(" ", &n, &n, &a[a_offset], lda, &h__[h_offset], lda); ntest = 1; ilo = 1; ihi = n; i__3 = *nwork - n; dgehrd_(&n, &ilo, &ihi, &h__[h_offset], lda, &work[1], &work[n + 1], &i__3, &iinfo); if (iinfo != 0) { result[1] = ulpinv; io___39.ciunit = *nounit; s_wsfe(&io___39); do_fio(&c__1, "DGEHRD", (ftnlen)6); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(iinfo); goto L250; } i__3 = n - 1; for (j = 1; j <= i__3; ++j) { uu_ref(j + 1, j) = 0.; i__4 = n; for (i__ = j + 2; i__ <= i__4; ++i__) { u_ref(i__, j) = h___ref(i__, j); uu_ref(i__, j) = h___ref(i__, j); h___ref(i__, j) = 0.; /* L110: */ } /* L120: */ } dcopy_(&n, &work[1], &c__1, &tau[1], &c__1); i__3 = *nwork - n; dorghr_(&n, &ilo, &ihi, &u[u_offset], ldu, &work[1], &work[n + 1], &i__3, &iinfo); ntest = 2; dhst01_(&n, &ilo, &ihi, &a[a_offset], lda, &h__[h_offset], lda, & u[u_offset], ldu, &work[1], nwork, &result[1]); /* Call DHSEQR to compute T1, T2 and Z, do tests. Eigenvalues only (WR3,WI3) */ dlacpy_(" ", &n, &n, &h__[h_offset], lda, &t2[t2_offset], lda); ntest = 3; result[3] = ulpinv; dhseqr_("E", "N", &n, &ilo, &ihi, &t2[t2_offset], lda, &wr3[1], & wi3[1], &uz[uz_offset], ldu, &work[1], nwork, &iinfo); if (iinfo != 0) { io___41.ciunit = *nounit; s_wsfe(&io___41); do_fio(&c__1, "DHSEQR(E)", (ftnlen)9); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); if (iinfo <= n + 2) { *info = abs(iinfo); goto L250; } } /* Eigenvalues (WR1,WI1) and Full Schur Form (T2) */ dlacpy_(" ", &n, &n, &h__[h_offset], lda, &t2[t2_offset], lda); dhseqr_("S", "N", &n, &ilo, &ihi, &t2[t2_offset], lda, &wr1[1], & wi1[1], &uz[uz_offset], ldu, &work[1], nwork, &iinfo); if (iinfo != 0 && iinfo <= n + 2) { io___42.ciunit = *nounit; s_wsfe(&io___42); do_fio(&c__1, "DHSEQR(S)", (ftnlen)9); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(iinfo); goto L250; } /* Eigenvalues (WR1,WI1), Schur Form (T1), and Schur vectors (UZ) */ dlacpy_(" ", &n, &n, &h__[h_offset], lda, &t1[t1_offset], lda); dlacpy_(" ", &n, &n, &u[u_offset], ldu, &uz[uz_offset], lda); dhseqr_("S", "V", &n, &ilo, &ihi, &t1[t1_offset], lda, &wr1[1], & wi1[1], &uz[uz_offset], ldu, &work[1], nwork, &iinfo); if (iinfo != 0 && iinfo <= n + 2) { io___43.ciunit = *nounit; s_wsfe(&io___43); do_fio(&c__1, "DHSEQR(V)", (ftnlen)9); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(iinfo); goto L250; } /* Compute Z = U' UZ */ dgemm_("T", "N", &n, &n, &n, &c_b32, &u[u_offset], ldu, &uz[ uz_offset], ldu, &c_b18, &z__[z_offset], ldu); ntest = 8; /* Do Tests 3: | H - Z T Z' | / ( |H| n ulp ) and 4: | I - Z Z' | / ( n ulp ) */ dhst01_(&n, &ilo, &ihi, &h__[h_offset], lda, &t1[t1_offset], lda, &z__[z_offset], ldu, &work[1], nwork, &result[3]); /* Do Tests 5: | A - UZ T (UZ)' | / ( |A| n ulp ) and 6: | I - UZ (UZ)' | / ( n ulp ) */ dhst01_(&n, &ilo, &ihi, &a[a_offset], lda, &t1[t1_offset], lda, & uz[uz_offset], ldu, &work[1], nwork, &result[5]); /* Do Test 7: | T2 - T1 | / ( |T| n ulp ) */ dget10_(&n, &n, &t2[t2_offset], lda, &t1[t1_offset], lda, &work[1] , &result[7]); /* Do Test 8: | W3 - W1 | / ( max(|W1|,|W3|) ulp ) */ temp1 = 0.; temp2 = 0.; i__3 = n; for (j = 1; j <= i__3; ++j) { /* Computing MAX */ d__5 = temp1, d__6 = (d__1 = wr1[j], abs(d__1)) + (d__2 = wi1[ j], abs(d__2)), d__5 = max(d__5,d__6), d__6 = (d__3 = wr3[j], abs(d__3)) + (d__4 = wi3[j], abs(d__4)); temp1 = max(d__5,d__6); /* Computing MAX */ d__3 = temp2, d__4 = (d__1 = wr1[j] - wr3[j], abs(d__1)) + ( d__2 = wr1[j] - wr3[j], abs(d__2)); temp2 = max(d__3,d__4); /* L130: */ } /* Computing MAX */ d__1 = unfl, d__2 = ulp * max(temp1,temp2); result[8] = temp2 / max(d__1,d__2); /* Compute the Left and Right Eigenvectors of T Compute the Right eigenvector Matrix: */ ntest = 9; result[9] = ulpinv; /* Select last max(N/4,1) real, max(N/4,1) complex eigenvectors */ nselc = 0; nselr = 0; j = n; L140: if (wi1[j] == 0.) { /* Computing MAX */ i__3 = n / 4; if (nselr < max(i__3,1)) { ++nselr; select[j] = TRUE_; } else { select[j] = FALSE_; } --j; } else { /* Computing MAX */ i__3 = n / 4; if (nselc < max(i__3,1)) { ++nselc; select[j] = TRUE_; select[j - 1] = FALSE_; } else { select[j] = FALSE_; select[j - 1] = FALSE_; } j += -2; } if (j > 0) { goto L140; } dtrevc_("Right", "All", &select[1], &n, &t1[t1_offset], lda, dumma, ldu, &evectr[evectr_offset], ldu, &n, &in, &work[1] , &iinfo); if (iinfo != 0) { io___50.ciunit = *nounit; s_wsfe(&io___50); do_fio(&c__1, "DTREVC(R,A)", (ftnlen)11); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(iinfo); goto L250; } /* Test 9: | TR - RW | / ( |T| |R| ulp ) */ dget22_("N", "N", "N", &n, &t1[t1_offset], lda, &evectr[ evectr_offset], ldu, &wr1[1], &wi1[1], &work[1], dumma); result[9] = dumma[0]; if (dumma[1] > *thresh) { io___51.ciunit = *nounit; s_wsfe(&io___51); do_fio(&c__1, "Right", (ftnlen)5); do_fio(&c__1, "DTREVC", (ftnlen)6); do_fio(&c__1, (char *)&dumma[1], (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); } /* Compute selected right eigenvectors and confirm that they agree with previous right eigenvectors */ dtrevc_("Right", "Some", &select[1], &n, &t1[t1_offset], lda, dumma, ldu, &evectl[evectl_offset], ldu, &n, &in, &work[1] , &iinfo); if (iinfo != 0) { io___52.ciunit = *nounit; s_wsfe(&io___52); do_fio(&c__1, "DTREVC(R,S)", (ftnlen)11); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(iinfo); goto L250; } k = 1; match = TRUE_; i__3 = n; for (j = 1; j <= i__3; ++j) { if (select[j] && wi1[j] == 0.) { i__4 = n; for (jj = 1; jj <= i__4; ++jj) { if (evectr_ref(jj, j) != evectl_ref(jj, k)) { match = FALSE_; goto L180; } /* L150: */ } ++k; } else if (select[j] && wi1[j] != 0.) { i__4 = n; for (jj = 1; jj <= i__4; ++jj) { if (evectr_ref(jj, j) != evectl_ref(jj, k) || evectr_ref(jj, j + 1) != evectl_ref(jj, k + 1) ) { match = FALSE_; goto L180; } /* L160: */ } k += 2; } /* L170: */ } L180: if (! match) { io___56.ciunit = *nounit; s_wsfe(&io___56); do_fio(&c__1, "Right", (ftnlen)5); do_fio(&c__1, "DTREVC", (ftnlen)6); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); } /* Compute the Left eigenvector Matrix: */ ntest = 10; result[10] = ulpinv; dtrevc_("Left", "All", &select[1], &n, &t1[t1_offset], lda, & evectl[evectl_offset], ldu, dumma, ldu, &n, &in, &work[1], &iinfo); if (iinfo != 0) { io___57.ciunit = *nounit; s_wsfe(&io___57); do_fio(&c__1, "DTREVC(L,A)", (ftnlen)11); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(iinfo); goto L250; } /* Test 10: | LT - WL | / ( |T| |L| ulp ) */ dget22_("Trans", "N", "Conj", &n, &t1[t1_offset], lda, &evectl[ evectl_offset], ldu, &wr1[1], &wi1[1], &work[1], &dumma[2] ); result[10] = dumma[2]; if (dumma[3] > *thresh) { io___58.ciunit = *nounit; s_wsfe(&io___58); do_fio(&c__1, "Left", (ftnlen)4); do_fio(&c__1, "DTREVC", (ftnlen)6); do_fio(&c__1, (char *)&dumma[3], (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); } /* Compute selected left eigenvectors and confirm that they agree with previous left eigenvectors */ dtrevc_("Left", "Some", &select[1], &n, &t1[t1_offset], lda, & evectr[evectr_offset], ldu, dumma, ldu, &n, &in, &work[1], &iinfo); if (iinfo != 0) { io___59.ciunit = *nounit; s_wsfe(&io___59); do_fio(&c__1, "DTREVC(L,S)", (ftnlen)11); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(iinfo); goto L250; } k = 1; match = TRUE_; i__3 = n; for (j = 1; j <= i__3; ++j) { if (select[j] && wi1[j] == 0.) { i__4 = n; for (jj = 1; jj <= i__4; ++jj) { if (evectl_ref(jj, j) != evectr_ref(jj, k)) { match = FALSE_; goto L220; } /* L190: */ } ++k; } else if (select[j] && wi1[j] != 0.) { i__4 = n; for (jj = 1; jj <= i__4; ++jj) { if (evectl_ref(jj, j) != evectr_ref(jj, k) || evectl_ref(jj, j + 1) != evectr_ref(jj, k + 1) ) { match = FALSE_; goto L220; } /* L200: */ } k += 2; } /* L210: */ } L220: if (! match) { io___60.ciunit = *nounit; s_wsfe(&io___60); do_fio(&c__1, "Left", (ftnlen)4); do_fio(&c__1, "DTREVC", (ftnlen)6); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); } /* Call DHSEIN for Right eigenvectors of H, do test 11 */ ntest = 11; result[11] = ulpinv; i__3 = n; for (j = 1; j <= i__3; ++j) { select[j] = TRUE_; /* L230: */ } dhsein_("Right", "Qr", "Ninitv", &select[1], &n, &h__[h_offset], lda, &wr3[1], &wi3[1], dumma, ldu, &evectx[evectx_offset], ldu, &n1, &in, &work[1], &iwork[1], &iwork[1], &iinfo); if (iinfo != 0) { io___61.ciunit = *nounit; s_wsfe(&io___61); do_fio(&c__1, "DHSEIN(R)", (ftnlen)9); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(iinfo); if (iinfo < 0) { goto L250; } } else { /* Test 11: | HX - XW | / ( |H| |X| ulp ) (from inverse iteration) */ dget22_("N", "N", "N", &n, &h__[h_offset], lda, &evectx[ evectx_offset], ldu, &wr3[1], &wi3[1], &work[1], dumma); if (dumma[0] < ulpinv) { result[11] = dumma[0] * aninv; } if (dumma[1] > *thresh) { io___62.ciunit = *nounit; s_wsfe(&io___62); do_fio(&c__1, "Right", (ftnlen)5); do_fio(&c__1, "DHSEIN", (ftnlen)6); do_fio(&c__1, (char *)&dumma[1], (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)) ; e_wsfe(); } } /* Call DHSEIN for Left eigenvectors of H, do test 12 */ ntest = 12; result[12] = ulpinv; i__3 = n; for (j = 1; j <= i__3; ++j) { select[j] = TRUE_; /* L240: */ } dhsein_("Left", "Qr", "Ninitv", &select[1], &n, &h__[h_offset], lda, &wr3[1], &wi3[1], &evecty[evecty_offset], ldu, dumma, ldu, &n1, &in, &work[1], &iwork[1], &iwork[1], &iinfo); if (iinfo != 0) { io___63.ciunit = *nounit; s_wsfe(&io___63); do_fio(&c__1, "DHSEIN(L)", (ftnlen)9); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(iinfo); if (iinfo < 0) { goto L250; } } else { /* Test 12: | YH - WY | / ( |H| |Y| ulp ) (from inverse iteration) */ dget22_("C", "N", "C", &n, &h__[h_offset], lda, &evecty[ evecty_offset], ldu, &wr3[1], &wi3[1], &work[1], & dumma[2]); if (dumma[2] < ulpinv) { result[12] = dumma[2] * aninv; } if (dumma[3] > *thresh) { io___64.ciunit = *nounit; s_wsfe(&io___64); do_fio(&c__1, "Left", (ftnlen)4); do_fio(&c__1, "DHSEIN", (ftnlen)6); do_fio(&c__1, (char *)&dumma[3], (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)) ; e_wsfe(); } } /* Call DORMHR for Right eigenvectors of A, do test 13 */ ntest = 13; result[13] = ulpinv; dormhr_("Left", "No transpose", &n, &n, &ilo, &ihi, &uu[uu_offset] , ldu, &tau[1], &evectx[evectx_offset], ldu, &work[1], nwork, &iinfo); if (iinfo != 0) { io___65.ciunit = *nounit; s_wsfe(&io___65); do_fio(&c__1, "DORMHR(R)", (ftnlen)9); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(iinfo); if (iinfo < 0) { goto L250; } } else { /* Test 13: | AX - XW | / ( |A| |X| ulp ) (from inverse iteration) */ dget22_("N", "N", "N", &n, &a[a_offset], lda, &evectx[ evectx_offset], ldu, &wr3[1], &wi3[1], &work[1], dumma); if (dumma[0] < ulpinv) { result[13] = dumma[0] * aninv; } } /* Call DORMHR for Left eigenvectors of A, do test 14 */ ntest = 14; result[14] = ulpinv; dormhr_("Left", "No transpose", &n, &n, &ilo, &ihi, &uu[uu_offset] , ldu, &tau[1], &evecty[evecty_offset], ldu, &work[1], nwork, &iinfo); if (iinfo != 0) { io___66.ciunit = *nounit; s_wsfe(&io___66); do_fio(&c__1, "DORMHR(L)", (ftnlen)9); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(iinfo); if (iinfo < 0) { goto L250; } } else { /* Test 14: | YA - WY | / ( |A| |Y| ulp ) (from inverse iteration) */ dget22_("C", "N", "C", &n, &a[a_offset], lda, &evecty[ evecty_offset], ldu, &wr3[1], &wi3[1], &work[1], & dumma[2]); if (dumma[2] < ulpinv) { result[14] = dumma[2] * aninv; } } /* End of Loop -- Check for RESULT(j) > THRESH */ L250: ntestt += ntest; dlafts_("DHS", &n, &n, &jtype, &ntest, &result[1], ioldsd, thresh, nounit, &nerrs); L260: ; } L270: ; } /* Summary */ dlasum_("DHS", nounit, &nerrs, &ntestt); return 0; /* End of DCHKHS */ } /* dchkhs_ */
/* Subroutine */ int dlasdq_(char *uplo, integer *sqre, integer *n, integer * ncvt, integer *nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt, integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer *ldc, doublereal *work, integer *info) { /* System generated locals */ integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; /* Local variables */ static integer isub; static doublereal smin; static integer sqre1, i__, j; static doublereal r__; extern logical lsame_(char *, char *); extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), dswap_(integer *, doublereal *, integer * , doublereal *, integer *); static integer iuplo; static doublereal cs, sn; extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), xerbla_(char *, integer *), dbdsqr_(char *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); static logical rotate; static integer np1; #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1] #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1] /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1999 Purpose ======= DLASDQ computes the singular value decomposition (SVD) of a real (upper or lower) bidiagonal matrix with diagonal D and offdiagonal E, accumulating the transformations if desired. Letting B denote the input bidiagonal matrix, the algorithm computes orthogonal matrices Q and P such that B = Q * S * P' (P' denotes the transpose of P). The singular values S are overwritten on D. The input matrix U is changed to U * Q if desired. The input matrix VT is changed to P' * VT if desired. The input matrix C is changed to Q' * C if desired. See "Computing Small Singular Values of Bidiagonal Matrices With Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, LAPACK Working Note #3, for a detailed description of the algorithm. Arguments ========= UPLO (input) CHARACTER*1 On entry, UPLO specifies whether the input bidiagonal matrix is upper or lower bidiagonal, and wether it is square are not. UPLO = 'U' or 'u' B is upper bidiagonal. UPLO = 'L' or 'l' B is lower bidiagonal. SQRE (input) INTEGER = 0: then the input matrix is N-by-N. = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and (N+1)-by-N if UPLU = 'L'. The bidiagonal matrix has N = NL + NR + 1 rows and M = N + SQRE >= N columns. N (input) INTEGER On entry, N specifies the number of rows and columns in the matrix. N must be at least 0. NCVT (input) INTEGER On entry, NCVT specifies the number of columns of the matrix VT. NCVT must be at least 0. NRU (input) INTEGER On entry, NRU specifies the number of rows of the matrix U. NRU must be at least 0. NCC (input) INTEGER On entry, NCC specifies the number of columns of the matrix C. NCC must be at least 0. D (input/output) DOUBLE PRECISION array, dimension (N) On entry, D contains the diagonal entries of the bidiagonal matrix whose SVD is desired. On normal exit, D contains the singular values in ascending order. E (input/output) DOUBLE PRECISION array. dimension is (N-1) if SQRE = 0 and N if SQRE = 1. On entry, the entries of E contain the offdiagonal entries of the bidiagonal matrix whose SVD is desired. On normal exit, E will contain 0. If the algorithm does not converge, D and E will contain the diagonal and superdiagonal entries of a bidiagonal matrix orthogonally equivalent to the one given as input. VT (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT) On entry, contains a matrix which on exit has been premultiplied by P', dimension N-by-NCVT if SQRE = 0 and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0). LDVT (input) INTEGER On entry, LDVT specifies the leading dimension of VT as declared in the calling (sub) program. LDVT must be at least 1. If NCVT is nonzero LDVT must also be at least N. U (input/output) DOUBLE PRECISION array, dimension (LDU, N) On entry, contains a matrix which on exit has been postmultiplied by Q, dimension NRU-by-N if SQRE = 0 and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0). LDU (input) INTEGER On entry, LDU specifies the leading dimension of U as declared in the calling (sub) program. LDU must be at least max( 1, NRU ) . C (input/output) DOUBLE PRECISION array, dimension (LDC, NCC) On entry, contains an N-by-NCC matrix which on exit has been premultiplied by Q' dimension N-by-NCC if SQRE = 0 and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0). LDC (input) INTEGER On entry, LDC specifies the leading dimension of C as declared in the calling (sub) program. LDC must be at least 1. If NCC is nonzero, LDC must also be at least N. WORK (workspace) DOUBLE PRECISION array, dimension (4*N) Workspace. Only referenced if one of NCVT, NRU, or NCC is nonzero, and if N is at least 2. INFO (output) INTEGER On exit, a value of 0 indicates a successful exit. If INFO < 0, argument number -INFO is illegal. If INFO > 0, the algorithm did not converge, and INFO specifies how many superdiagonals did not converge. Further Details =============== Based on contributions by Ming Gu and Huan Ren, Computer Science Division, University of California at Berkeley, USA ===================================================================== Test the input parameters. Parameter adjustments */ --d__; --e; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1 * 1; vt -= vt_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1 * 1; c__ -= c_offset; --work; /* Function Body */ *info = 0; iuplo = 0; if (lsame_(uplo, "U")) { iuplo = 1; } if (lsame_(uplo, "L")) { iuplo = 2; } if (iuplo == 0) { *info = -1; } else if (*sqre < 0 || *sqre > 1) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ncvt < 0) { *info = -4; } else if (*nru < 0) { *info = -5; } else if (*ncc < 0) { *info = -6; } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { *info = -10; } else if (*ldu < max(1,*nru)) { *info = -12; } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { *info = -14; } if (*info != 0) { i__1 = -(*info); xerbla_("DLASDQ", &i__1); return 0; } if (*n == 0) { return 0; } /* ROTATE is true if any singular vectors desired, false otherwise */ rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; np1 = *n + 1; sqre1 = *sqre; /* If matrix non-square upper bidiagonal, rotate to be lower bidiagonal. The rotations are on the right. */ if (iuplo == 1 && sqre1 == 1) { i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); d__[i__] = r__; e[i__] = sn * d__[i__ + 1]; d__[i__ + 1] = cs * d__[i__ + 1]; if (rotate) { work[i__] = cs; work[*n + i__] = sn; } /* L10: */ } dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__); d__[*n] = r__; e[*n] = 0.; if (rotate) { work[*n] = cs; work[*n + *n] = sn; } iuplo = 2; sqre1 = 0; /* Update singular vectors if desired. */ if (*ncvt > 0) { dlasr_("L", "V", "F", &np1, ncvt, &work[1], &work[np1], &vt[ vt_offset], ldvt); } } /* If matrix lower bidiagonal, rotate to be upper bidiagonal by applying Givens rotations on the left. */ if (iuplo == 2) { i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); d__[i__] = r__; e[i__] = sn * d__[i__ + 1]; d__[i__ + 1] = cs * d__[i__ + 1]; if (rotate) { work[i__] = cs; work[*n + i__] = sn; } /* L20: */ } /* If matrix (N+1)-by-N lower bidiagonal, one additional rotation is needed. */ if (sqre1 == 1) { dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__); d__[*n] = r__; if (rotate) { work[*n] = cs; work[*n + *n] = sn; } } /* Update singular vectors if desired. */ if (*nru > 0) { if (sqre1 == 0) { dlasr_("R", "V", "F", nru, n, &work[1], &work[np1], &u[ u_offset], ldu); } else { dlasr_("R", "V", "F", nru, &np1, &work[1], &work[np1], &u[ u_offset], ldu); } } if (*ncc > 0) { if (sqre1 == 0) { dlasr_("L", "V", "F", n, ncc, &work[1], &work[np1], &c__[ c_offset], ldc); } else { dlasr_("L", "V", "F", &np1, ncc, &work[1], &work[np1], &c__[ c_offset], ldc); } } } /* Call DBDSQR to compute the SVD of the reduced real N-by-N upper bidiagonal matrix. */ dbdsqr_("U", n, ncvt, nru, ncc, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[ u_offset], ldu, &c__[c_offset], ldc, &work[1], info); /* Sort the singular values into ascending order (insertion sort on singular values, but only one transposition per singular vector) */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Scan for smallest D(I). */ isub = i__; smin = d__[i__]; i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { if (d__[j] < smin) { isub = j; smin = d__[j]; } /* L30: */ } if (isub != i__) { /* Swap singular values and vectors. */ d__[isub] = d__[i__]; d__[i__] = smin; if (*ncvt > 0) { dswap_(ncvt, &vt_ref(isub, 1), ldvt, &vt_ref(i__, 1), ldvt); } if (*nru > 0) { dswap_(nru, &u_ref(1, isub), &c__1, &u_ref(1, i__), &c__1); } if (*ncc > 0) { dswap_(ncc, &c___ref(isub, 1), ldc, &c___ref(i__, 1), ldc); } } /* L40: */ } return 0; /* End of DLASDQ */ } /* dlasdq_ */
/* Subroutine */ int cggsvp_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, complex *a, integer *lda, complex *b, integer *ldb, real *tola, real *tolb, integer *k, integer *l, complex *u, integer *ldu, complex *v, integer *ldv, complex *q, integer *ldq, integer *iwork, real *rwork, complex *tau, complex *work, integer * info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CGGSVP computes unitary matrices U, V and Q such that N-K-L K L U'*A*Q = K ( 0 A12 A13 ) if M-K-L >= 0; L ( 0 0 A23 ) M-K-L ( 0 0 0 ) N-K-L K L = K ( 0 A12 A13 ) if M-K-L < 0; M-K ( 0 0 A23 ) N-K-L K L V'*B*Q = L ( 0 0 B13 ) P-L ( 0 0 0 ) where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, otherwise A23 is (M-K)-by-L upper trapezoidal. K+L = the effective numerical rank of the (M+P)-by-N matrix (A',B')'. Z' denotes the conjugate transpose of Z. This decomposition is the preprocessing step for computing the Generalized Singular Value Decomposition (GSVD), see subroutine CGGSVD. Arguments ========= JOBU (input) CHARACTER*1 = 'U': Unitary matrix U is computed; = 'N': U is not computed. JOBV (input) CHARACTER*1 = 'V': Unitary matrix V is computed; = 'N': V is not computed. JOBQ (input) CHARACTER*1 = 'Q': Unitary matrix Q is computed; = 'N': Q is not computed. M (input) INTEGER The number of rows of the matrix A. M >= 0. P (input) INTEGER The number of rows of the matrix B. P >= 0. N (input) INTEGER The number of columns of the matrices A and B. N >= 0. A (input/output) COMPLEX array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, A contains the triangular (or trapezoidal) matrix described in the Purpose section. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). B (input/output) COMPLEX array, dimension (LDB,N) On entry, the P-by-N matrix B. On exit, B contains the triangular matrix described in the Purpose section. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,P). TOLA (input) REAL TOLB (input) REAL TOLA and TOLB are the thresholds to determine the effective numerical rank of matrix B and a subblock of A. Generally, they are set to TOLA = MAX(M,N)*norm(A)*MACHEPS, TOLB = MAX(P,N)*norm(B)*MACHEPS. The size of TOLA and TOLB may affect the size of backward errors of the decomposition. K (output) INTEGER L (output) INTEGER On exit, K and L specify the dimension of the subblocks described in Purpose section. K + L = effective numerical rank of (A',B')'. U (output) COMPLEX array, dimension (LDU,M) If JOBU = 'U', U contains the unitary matrix U. If JOBU = 'N', U is not referenced. LDU (input) INTEGER The leading dimension of the array U. LDU >= max(1,M) if JOBU = 'U'; LDU >= 1 otherwise. V (output) COMPLEX array, dimension (LDV,M) If JOBV = 'V', V contains the unitary matrix V. If JOBV = 'N', V is not referenced. LDV (input) INTEGER The leading dimension of the array V. LDV >= max(1,P) if JOBV = 'V'; LDV >= 1 otherwise. Q (output) COMPLEX array, dimension (LDQ,N) If JOBQ = 'Q', Q contains the unitary matrix Q. If JOBQ = 'N', Q is not referenced. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= max(1,N) if JOBQ = 'Q'; LDQ >= 1 otherwise. IWORK (workspace) INTEGER array, dimension (N) RWORK (workspace) REAL array, dimension (2*N) TAU (workspace) COMPLEX array, dimension (N) WORK (workspace) COMPLEX array, dimension (max(3*N,M,P)) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. Further Details =============== The subroutine uses LAPACK subroutine CGEQPF for the QR factorization with column pivoting to detect the effective numerical rank of the a matrix. It may be replaced by a better rank determination strategy. ===================================================================== Test the input parameters Parameter adjustments */ /* Table of constant values */ static complex c_b1 = {0.f,0.f}; static complex c_b2 = {1.f,0.f}; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, u_offset, v_dim1, v_offset, i__1, i__2, i__3; real r__1, r__2; /* Builtin functions */ double r_imag(complex *); /* Local variables */ static integer i__, j; extern logical lsame_(char *, char *); static logical wantq, wantu, wantv; extern /* Subroutine */ int cgeqr2_(integer *, integer *, complex *, integer *, complex *, complex *, integer *), cgerq2_(integer *, integer *, complex *, integer *, complex *, complex *, integer *), cung2r_(integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *), cunm2r_(char *, char *, integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, integer *), cunmr2_(char *, char *, integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, integer *), cgeqpf_(integer *, integer *, complex *, integer *, integer *, complex *, complex *, real *, integer *), clacpy_(char *, integer *, integer *, complex *, integer *, complex *, integer *), claset_(char *, integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *), clapmt_(logical *, integer *, integer *, complex *, integer *, integer *); static logical forwrd; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define u_subscr(a_1,a_2) (a_2)*u_dim1 + a_1 #define u_ref(a_1,a_2) u[u_subscr(a_1,a_2)] #define v_subscr(a_1,a_2) (a_2)*v_dim1 + a_1 #define v_ref(a_1,a_2) v[v_subscr(a_1,a_2)] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; v_dim1 = *ldv; v_offset = 1 + v_dim1 * 1; v -= v_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; --iwork; --rwork; --tau; --work; /* Function Body */ wantu = lsame_(jobu, "U"); wantv = lsame_(jobv, "V"); wantq = lsame_(jobq, "Q"); forwrd = TRUE_; *info = 0; if (! (wantu || lsame_(jobu, "N"))) { *info = -1; } else if (! (wantv || lsame_(jobv, "N"))) { *info = -2; } else if (! (wantq || lsame_(jobq, "N"))) { *info = -3; } else if (*m < 0) { *info = -4; } else if (*p < 0) { *info = -5; } else if (*n < 0) { *info = -6; } else if (*lda < max(1,*m)) { *info = -8; } else if (*ldb < max(1,*p)) { *info = -10; } else if (*ldu < 1 || wantu && *ldu < *m) { *info = -16; } else if (*ldv < 1 || wantv && *ldv < *p) { *info = -18; } else if (*ldq < 1 || wantq && *ldq < *n) { *info = -20; } if (*info != 0) { i__1 = -(*info); xerbla_("CGGSVP", &i__1); return 0; } /* QR with column pivoting of B: B*P = V*( S11 S12 ) ( 0 0 ) */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { iwork[i__] = 0; /* L10: */ } cgeqpf_(p, n, &b[b_offset], ldb, &iwork[1], &tau[1], &work[1], &rwork[1], info); /* Update A := A*P */ clapmt_(&forwrd, m, n, &a[a_offset], lda, &iwork[1]); /* Determine the effective rank of matrix B. */ *l = 0; i__1 = min(*p,*n); for (i__ = 1; i__ <= i__1; ++i__) { i__2 = b_subscr(i__, i__); if ((r__1 = b[i__2].r, dabs(r__1)) + (r__2 = r_imag(&b_ref(i__, i__)), dabs(r__2)) > *tolb) { ++(*l); } /* L20: */ } if (wantv) { /* Copy the details of V, and form V. */ claset_("Full", p, p, &c_b1, &c_b1, &v[v_offset], ldv); if (*p > 1) { i__1 = *p - 1; clacpy_("Lower", &i__1, n, &b_ref(2, 1), ldb, &v_ref(2, 1), ldv); } i__1 = min(*p,*n); cung2r_(p, p, &i__1, &v[v_offset], ldv, &tau[1], &work[1], info); } /* Clean up B */ i__1 = *l - 1; for (j = 1; j <= i__1; ++j) { i__2 = *l; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = b_subscr(i__, j); b[i__3].r = 0.f, b[i__3].i = 0.f; /* L30: */ } /* L40: */ } if (*p > *l) { i__1 = *p - *l; claset_("Full", &i__1, n, &c_b1, &c_b1, &b_ref(*l + 1, 1), ldb); } if (wantq) { /* Set Q = I and Update Q := Q*P */ claset_("Full", n, n, &c_b1, &c_b2, &q[q_offset], ldq); clapmt_(&forwrd, n, n, &q[q_offset], ldq, &iwork[1]); } if (*p >= *l && *n != *l) { /* RQ factorization of ( S11 S12 ) = ( 0 S12 )*Z */ cgerq2_(l, n, &b[b_offset], ldb, &tau[1], &work[1], info); /* Update A := A*Z' */ cunmr2_("Right", "Conjugate transpose", m, n, l, &b[b_offset], ldb, & tau[1], &a[a_offset], lda, &work[1], info); if (wantq) { /* Update Q := Q*Z' */ cunmr2_("Right", "Conjugate transpose", n, n, l, &b[b_offset], ldb, &tau[1], &q[q_offset], ldq, &work[1], info); } /* Clean up B */ i__1 = *n - *l; claset_("Full", l, &i__1, &c_b1, &c_b1, &b[b_offset], ldb); i__1 = *n; for (j = *n - *l + 1; j <= i__1; ++j) { i__2 = *l; for (i__ = j - *n + *l + 1; i__ <= i__2; ++i__) { i__3 = b_subscr(i__, j); b[i__3].r = 0.f, b[i__3].i = 0.f; /* L50: */ } /* L60: */ } } /* Let N-L L A = ( A11 A12 ) M, then the following does the complete QR decomposition of A11: A11 = U*( 0 T12 )*P1' ( 0 0 ) */ i__1 = *n - *l; for (i__ = 1; i__ <= i__1; ++i__) { iwork[i__] = 0; /* L70: */ } i__1 = *n - *l; cgeqpf_(m, &i__1, &a[a_offset], lda, &iwork[1], &tau[1], &work[1], &rwork[ 1], info); /* Determine the effective rank of A11 */ *k = 0; /* Computing MIN */ i__2 = *m, i__3 = *n - *l; i__1 = min(i__2,i__3); for (i__ = 1; i__ <= i__1; ++i__) { i__2 = a_subscr(i__, i__); if ((r__1 = a[i__2].r, dabs(r__1)) + (r__2 = r_imag(&a_ref(i__, i__)), dabs(r__2)) > *tola) { ++(*k); } /* L80: */ } /* Update A12 := U'*A12, where A12 = A( 1:M, N-L+1:N ) Computing MIN */ i__2 = *m, i__3 = *n - *l; i__1 = min(i__2,i__3); cunm2r_("Left", "Conjugate transpose", m, l, &i__1, &a[a_offset], lda, & tau[1], &a_ref(1, *n - *l + 1), lda, &work[1], info); if (wantu) { /* Copy the details of U, and form U */ claset_("Full", m, m, &c_b1, &c_b1, &u[u_offset], ldu); if (*m > 1) { i__1 = *m - 1; i__2 = *n - *l; clacpy_("Lower", &i__1, &i__2, &a_ref(2, 1), lda, &u_ref(2, 1), ldu); } /* Computing MIN */ i__2 = *m, i__3 = *n - *l; i__1 = min(i__2,i__3); cung2r_(m, m, &i__1, &u[u_offset], ldu, &tau[1], &work[1], info); } if (wantq) { /* Update Q( 1:N, 1:N-L ) = Q( 1:N, 1:N-L )*P1 */ i__1 = *n - *l; clapmt_(&forwrd, n, &i__1, &q[q_offset], ldq, &iwork[1]); } /* Clean up A: set the strictly lower triangular part of A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0. */ i__1 = *k - 1; for (j = 1; j <= i__1; ++j) { i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = a_subscr(i__, j); a[i__3].r = 0.f, a[i__3].i = 0.f; /* L90: */ } /* L100: */ } if (*m > *k) { i__1 = *m - *k; i__2 = *n - *l; claset_("Full", &i__1, &i__2, &c_b1, &c_b1, &a_ref(*k + 1, 1), lda); } if (*n - *l > *k) { /* RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1 */ i__1 = *n - *l; cgerq2_(k, &i__1, &a[a_offset], lda, &tau[1], &work[1], info); if (wantq) { /* Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1' */ i__1 = *n - *l; cunmr2_("Right", "Conjugate transpose", n, &i__1, k, &a[a_offset], lda, &tau[1], &q[q_offset], ldq, &work[1], info); } /* Clean up A */ i__1 = *n - *l - *k; claset_("Full", k, &i__1, &c_b1, &c_b1, &a[a_offset], lda); i__1 = *n - *l; for (j = *n - *l - *k + 1; j <= i__1; ++j) { i__2 = *k; for (i__ = j - *n + *l + *k + 1; i__ <= i__2; ++i__) { i__3 = a_subscr(i__, j); a[i__3].r = 0.f, a[i__3].i = 0.f; /* L110: */ } /* L120: */ } } if (*m > *k) { /* QR factorization of A( K+1:M,N-L+1:N ) */ i__1 = *m - *k; cgeqr2_(&i__1, l, &a_ref(*k + 1, *n - *l + 1), lda, &tau[1], &work[1], info); if (wantu) { /* Update U(:,K+1:M) := U(:,K+1:M)*U1 */ i__1 = *m - *k; /* Computing MIN */ i__3 = *m - *k; i__2 = min(i__3,*l); cunm2r_("Right", "No transpose", m, &i__1, &i__2, &a_ref(*k + 1, * n - *l + 1), lda, &tau[1], &u_ref(1, *k + 1), ldu, &work[ 1], info); } /* Clean up */ i__1 = *n; for (j = *n - *l + 1; j <= i__1; ++j) { i__2 = *m; for (i__ = j - *n + *k + *l + 1; i__ <= i__2; ++i__) { i__3 = a_subscr(i__, j); a[i__3].r = 0.f, a[i__3].i = 0.f; /* L130: */ } /* L140: */ } } return 0; /* End of CGGSVP */ } /* cggsvp_ */
/* Subroutine */ int sstt21_(integer *n, integer *kband, real *ad, real *ae, real *sd, real *se, real *u, integer *ldu, real *work, real *result) { /* System generated locals */ integer u_dim1, u_offset, i__1; real r__1, r__2, r__3; /* Local variables */ static real unfl; extern /* Subroutine */ int ssyr_(char *, integer *, real *, real *, integer *, real *, integer *); static real temp1, temp2; static integer j; extern /* Subroutine */ int ssyr2_(char *, integer *, real *, real *, integer *, real *, integer *, real *, integer *), sgemm_( char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static real anorm, wnorm; extern doublereal slamch_(char *), slange_(char *, integer *, integer *, real *, integer *, real *); extern /* Subroutine */ int slaset_(char *, integer *, integer *, real *, real *, real *, integer *); extern doublereal slansy_(char *, char *, integer *, real *, integer *, real *); static real ulp; #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= SSTT21 checks a decomposition of the form A = U S U' where ' means transpose, A is symmetric tridiagonal, U is orthogonal, and S is diagonal (if KBAND=0) or symmetric tridiagonal (if KBAND=1). Two tests are performed: RESULT(1) = | A - U S U' | / ( |A| n ulp ) RESULT(2) = | I - UU' | / ( n ulp ) Arguments ========= N (input) INTEGER The size of the matrix. If it is zero, SSTT21 does nothing. It must be at least zero. KBAND (input) INTEGER The bandwidth of the matrix S. It may only be zero or one. If zero, then S is diagonal, and SE is not referenced. If one, then S is symmetric tri-diagonal. AD (input) REAL array, dimension (N) The diagonal of the original (unfactored) matrix A. A is assumed to be symmetric tridiagonal. AE (input) REAL array, dimension (N-1) The off-diagonal of the original (unfactored) matrix A. A is assumed to be symmetric tridiagonal. AE(1) is the (1,2) and (2,1) element, AE(2) is the (2,3) and (3,2) element, etc. SD (input) REAL array, dimension (N) The diagonal of the (symmetric tri-) diagonal matrix S. SE (input) REAL array, dimension (N-1) The off-diagonal of the (symmetric tri-) diagonal matrix S. Not referenced if KBSND=0. If KBAND=1, then AE(1) is the (1,2) and (2,1) element, SE(2) is the (2,3) and (3,2) element, etc. U (input) REAL array, dimension (LDU, N) The orthogonal matrix in the decomposition. LDU (input) INTEGER The leading dimension of U. LDU must be at least N. WORK (workspace) REAL array, dimension (N*(N+1)) RESULT (output) REAL array, dimension (2) The values computed by the two tests described above. The values are currently limited to 1/ulp, to avoid overflow. RESULT(1) is always modified. ===================================================================== 1) Constants Parameter adjustments */ --ad; --ae; --sd; --se; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; --work; --result; /* Function Body */ result[1] = 0.f; result[2] = 0.f; if (*n <= 0) { return 0; } unfl = slamch_("Safe minimum"); ulp = slamch_("Precision"); /* Do Test 1 Copy A & Compute its 1-Norm: */ slaset_("Full", n, n, &c_b5, &c_b5, &work[1], n); anorm = 0.f; temp1 = 0.f; i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { work[(*n + 1) * (j - 1) + 1] = ad[j]; work[(*n + 1) * (j - 1) + 2] = ae[j]; temp2 = (r__1 = ae[j], dabs(r__1)); /* Computing MAX */ r__2 = anorm, r__3 = (r__1 = ad[j], dabs(r__1)) + temp1 + temp2; anorm = dmax(r__2,r__3); temp1 = temp2; /* L10: */ } /* Computing 2nd power */ i__1 = *n; work[i__1 * i__1] = ad[*n]; /* Computing MAX */ r__2 = anorm, r__3 = (r__1 = ad[*n], dabs(r__1)) + temp1, r__2 = max(r__2, r__3); anorm = dmax(r__2,unfl); /* Norm of A - USU' */ i__1 = *n; for (j = 1; j <= i__1; ++j) { r__1 = -sd[j]; ssyr_("L", n, &r__1, &u_ref(1, j), &c__1, &work[1], n); /* L20: */ } if (*n > 1 && *kband == 1) { i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { r__1 = -se[j]; ssyr2_("L", n, &r__1, &u_ref(1, j), &c__1, &u_ref(1, j + 1), & c__1, &work[1], n); /* L30: */ } } /* Computing 2nd power */ i__1 = *n; wnorm = slansy_("1", "L", n, &work[1], n, &work[i__1 * i__1 + 1]); if (anorm > wnorm) { result[1] = wnorm / anorm / (*n * ulp); } else { if (anorm < 1.f) { /* Computing MIN */ r__1 = wnorm, r__2 = *n * anorm; result[1] = dmin(r__1,r__2) / anorm / (*n * ulp); } else { /* Computing MIN */ r__1 = wnorm / anorm, r__2 = (real) (*n); result[1] = dmin(r__1,r__2) / (*n * ulp); } } /* Do Test 2 Compute UU' - I */ sgemm_("N", "C", n, n, n, &c_b19, &u[u_offset], ldu, &u[u_offset], ldu, & c_b5, &work[1], n); i__1 = *n; for (j = 1; j <= i__1; ++j) { work[(*n + 1) * (j - 1) + 1] += -1.f; /* L40: */ } /* Computing MIN Computing 2nd power */ i__1 = *n; r__1 = (real) (*n), r__2 = slange_("1", n, n, &work[1], n, &work[i__1 * i__1 + 1]); result[2] = dmin(r__1,r__2) / (*n * ulp); return 0; /* End of SSTT21 */ } /* sstt21_ */
/* Subroutine */ int dlasda_(integer *icompq, integer *smlsiz, integer *n, integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr, doublereal *z__, doublereal *poles, integer *givptr, integer *givcol, integer *ldgcol, integer *perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1, i__2; /* Builtin functions */ integer pow_ii(integer *, integer *); /* Local variables */ static doublereal beta; static integer idxq, nlvl, i__, j, m; static doublereal alpha; static integer inode, ndiml, ndimr, idxqi, itemp; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); static integer sqrei, i1; extern /* Subroutine */ int dlasd6_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *); static integer ic, nwork1, lf, nd, nwork2, ll, nl, vf, nr, vl; extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), dlasdt_(integer *, integer *, integer *, integer *, integer *, integer *, integer *), dlaset_( char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); static integer im1, smlszp, ncc, nlf, nrf, vfi, iwk, vli, lvl, nru, ndb1, nlp1, lvl2, nrp1; #define difl_ref(a_1,a_2) difl[(a_2)*difl_dim1 + a_1] #define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1] #define perm_ref(a_1,a_2) perm[(a_2)*perm_dim1 + a_1] #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] #define poles_ref(a_1,a_2) poles[(a_2)*poles_dim1 + a_1] #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1] #define givcol_ref(a_1,a_2) givcol[(a_2)*givcol_dim1 + a_1] #define givnum_ref(a_1,a_2) givnum[(a_2)*givnum_dim1 + a_1] /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1999 Purpose ======= Using a divide and conquer approach, DLASDA computes the singular value decomposition (SVD) of a real upper bidiagonal N-by-M matrix B with diagonal D and offdiagonal E, where M = N + SQRE. The algorithm computes the singular values in the SVD B = U * S * VT. The orthogonal matrices U and VT are optionally computed in compact form. A related subroutine, DLASD0, computes the singular values and the singular vectors in explicit form. Arguments ========= ICOMPQ (input) INTEGER Specifies whether singular vectors are to be computed in compact form, as follows = 0: Compute singular values only. = 1: Compute singular vectors of upper bidiagonal matrix in compact form. SMLSIZ (input) INTEGER The maximum size of the subproblems at the bottom of the computation tree. N (input) INTEGER The row dimension of the upper bidiagonal matrix. This is also the dimension of the main diagonal array D. SQRE (input) INTEGER Specifies the column dimension of the bidiagonal matrix. = 0: The bidiagonal matrix has column dimension M = N; = 1: The bidiagonal matrix has column dimension M = N + 1. D (input/output) DOUBLE PRECISION array, dimension ( N ) On entry D contains the main diagonal of the bidiagonal matrix. On exit D, if INFO = 0, contains its singular values. E (input) DOUBLE PRECISION array, dimension ( M-1 ) Contains the subdiagonal entries of the bidiagonal matrix. On exit, E has been destroyed. U (output) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left singular vector matrices of all subproblems at the bottom level. LDU (input) INTEGER, LDU = > N. The leading dimension of arrays U, VT, DIFL, DIFR, POLES, GIVNUM, and Z. VT (output) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right singular vector matrices of all subproblems at the bottom level. K (output) INTEGER array, dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th secular equation on the computation tree. DIFL (output) DOUBLE PRECISION array, dimension ( LDU, NLVL ), where NLVL = floor(log_2 (N/SMLSIZ))). DIFR (output) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and dimension ( N ) if ICOMPQ = 0. If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1) record distances between singular values on the I-th level and singular values on the (I -1)-th level, and DIFR(1:N, 2 * I ) contains the normalizing factors for the right singular vector matrix. See DLASD8 for details. Z (output) DOUBLE PRECISION array, dimension ( LDU, NLVL ) if ICOMPQ = 1 and dimension ( N ) if ICOMPQ = 0. The first K elements of Z(1, I) contain the components of the deflation-adjusted updating row vector for subproblems on the I-th level. POLES (output) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and POLES(1, 2*I) contain the new and old singular values involved in the secular equations on the I-th level. GIVPTR (output) INTEGER array, dimension ( N ) if ICOMPQ = 1, and not referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records the number of Givens rotations performed on the I-th problem on the computation tree. GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations of Givens rotations performed on the I-th level on the computation tree. LDGCOL (input) INTEGER, LDGCOL = > N. The leading dimension of arrays GIVCOL and PERM. PERM (output) INTEGER array, dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records permutations done on the I-th level of the computation tree. GIVNUM (output) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S- values of Givens rotations performed on the I-th level on the computation tree. C (output) DOUBLE PRECISION array, dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1 and the I-th subproblem is not square, on exit, C( I ) contains the C-value of a Givens rotation related to the right null space of the I-th subproblem. S (output) DOUBLE PRECISION array, dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1 and the I-th subproblem is not square, on exit, S( I ) contains the S-value of a Givens rotation related to the right null space of the I-th subproblem. WORK (workspace) DOUBLE PRECISION array, dimension (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)). IWORK (workspace) INTEGER array. Dimension must be at least (7 * N). INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = 1, an singular value did not converge Further Details =============== Based on contributions by Ming Gu and Huan Ren, Computer Science Division, University of California at Berkeley, USA ===================================================================== Test the input parameters. Parameter adjustments */ --d__; --e; givnum_dim1 = *ldu; givnum_offset = 1 + givnum_dim1 * 1; givnum -= givnum_offset; poles_dim1 = *ldu; poles_offset = 1 + poles_dim1 * 1; poles -= poles_offset; z_dim1 = *ldu; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; difr_dim1 = *ldu; difr_offset = 1 + difr_dim1 * 1; difr -= difr_offset; difl_dim1 = *ldu; difl_offset = 1 + difl_dim1 * 1; difl -= difl_offset; vt_dim1 = *ldu; vt_offset = 1 + vt_dim1 * 1; vt -= vt_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; --k; --givptr; perm_dim1 = *ldgcol; perm_offset = 1 + perm_dim1 * 1; perm -= perm_offset; givcol_dim1 = *ldgcol; givcol_offset = 1 + givcol_dim1 * 1; givcol -= givcol_offset; --c__; --s; --work; --iwork; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*smlsiz < 3) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*sqre < 0 || *sqre > 1) { *info = -4; } else if (*ldu < *n + *sqre) { *info = -8; } else if (*ldgcol < *n) { *info = -17; } if (*info != 0) { i__1 = -(*info); xerbla_("DLASDA", &i__1); return 0; } m = *n + *sqre; /* If the input matrix is too small, call DLASDQ to find the SVD. */ if (*n <= *smlsiz) { if (*icompq == 0) { dlasdq_("U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ vt_offset], ldu, &u[u_offset], ldu, &u[u_offset], ldu, & work[1], info); } else { dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset] , ldu, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info); } return 0; } /* Book-keeping and set up the computation tree. */ inode = 1; ndiml = inode + *n; ndimr = ndiml + *n; idxq = ndimr + *n; iwk = idxq + *n; ncc = 0; nru = 0; smlszp = *smlsiz + 1; vf = 1; vl = vf + m; nwork1 = vl + m; nwork2 = nwork1 + smlszp * smlszp; dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], smlsiz); /* for the nodes on bottom level of the tree, solve their subproblems by DLASDQ. */ ndb1 = (nd + 1) / 2; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { /* IC : center row of each node NL : number of rows of left subproblem NR : number of rows of right subproblem NLF: starting row of the left subproblem NRF: starting row of the right subproblem */ i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nlp1 = nl + 1; nr = iwork[ndimr + i1]; nlf = ic - nl; nrf = ic + 1; idxqi = idxq + nlf - 2; vfi = vf + nlf - 1; vli = vl + nlf - 1; sqrei = 1; if (*icompq == 0) { dlaset_("A", &nlp1, &nlp1, &c_b11, &c_b12, &work[nwork1], &smlszp); dlasdq_("U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], & work[nwork1], &smlszp, &work[nwork2], &nl, &work[nwork2], &nl, &work[nwork2], info); itemp = nwork1 + nl * smlszp; dcopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1); dcopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1); } else { dlaset_("A", &nl, &nl, &c_b11, &c_b12, &u_ref(nlf, 1), ldu); dlaset_("A", &nlp1, &nlp1, &c_b11, &c_b12, &vt_ref(nlf, 1), ldu); dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], & vt_ref(nlf, 1), ldu, &u_ref(nlf, 1), ldu, &u_ref(nlf, 1), ldu, &work[nwork1], info); dcopy_(&nlp1, &vt_ref(nlf, 1), &c__1, &work[vfi], &c__1); dcopy_(&nlp1, &vt_ref(nlf, nlp1), &c__1, &work[vli], &c__1); } if (*info != 0) { return 0; } i__2 = nl; for (j = 1; j <= i__2; ++j) { iwork[idxqi + j] = j; /* L10: */ } if (i__ == nd && *sqre == 0) { sqrei = 0; } else { sqrei = 1; } idxqi += nlp1; vfi += nlp1; vli += nlp1; nrp1 = nr + sqrei; if (*icompq == 0) { dlaset_("A", &nrp1, &nrp1, &c_b11, &c_b12, &work[nwork1], &smlszp); dlasdq_("U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], & work[nwork1], &smlszp, &work[nwork2], &nr, &work[nwork2], &nr, &work[nwork2], info); itemp = nwork1 + (nrp1 - 1) * smlszp; dcopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1); dcopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1); } else { dlaset_("A", &nr, &nr, &c_b11, &c_b12, &u_ref(nrf, 1), ldu); dlaset_("A", &nrp1, &nrp1, &c_b11, &c_b12, &vt_ref(nrf, 1), ldu); dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], & vt_ref(nrf, 1), ldu, &u_ref(nrf, 1), ldu, &u_ref(nrf, 1), ldu, &work[nwork1], info); dcopy_(&nrp1, &vt_ref(nrf, 1), &c__1, &work[vfi], &c__1); dcopy_(&nrp1, &vt_ref(nrf, nrp1), &c__1, &work[vli], &c__1); } if (*info != 0) { return 0; } i__2 = nr; for (j = 1; j <= i__2; ++j) { iwork[idxqi + j] = j; /* L20: */ } /* L30: */ } /* Now conquer each subproblem bottom-up. */ j = pow_ii(&c__2, &nlvl); for (lvl = nlvl; lvl >= 1; --lvl) { lvl2 = (lvl << 1) - 1; /* Find the first node LF and last node LL on the current level LVL. */ if (lvl == 1) { lf = 1; ll = 1; } else { i__1 = lvl - 1; lf = pow_ii(&c__2, &i__1); ll = (lf << 1) - 1; } i__1 = ll; for (i__ = lf; i__ <= i__1; ++i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; nrf = ic + 1; if (i__ == ll) { sqrei = *sqre; } else { sqrei = 1; } vfi = vf + nlf - 1; vli = vl + nlf - 1; idxqi = idxq + nlf - 1; alpha = d__[ic]; beta = e[ic]; if (*icompq == 0) { dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & work[vli], &alpha, &beta, &iwork[idxqi], &perm[ perm_offset], &givptr[1], &givcol[givcol_offset], ldgcol, &givnum[givnum_offset], ldu, &poles[ poles_offset], &difl[difl_offset], &difr[difr_offset], &z__[z_offset], &k[1], &c__[1], &s[1], &work[nwork1], &iwork[iwk], info); } else { --j; dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & work[vli], &alpha, &beta, &iwork[idxqi], &perm_ref( nlf, lvl), &givptr[j], &givcol_ref(nlf, lvl2), ldgcol, &givnum_ref(nlf, lvl2), ldu, &poles_ref(nlf, lvl2), & difl_ref(nlf, lvl), &difr_ref(nlf, lvl2), &z___ref( nlf, lvl), &k[j], &c__[j], &s[j], &work[nwork1], & iwork[iwk], info); } if (*info != 0) { return 0; } /* L40: */ } /* L50: */ } return 0; /* End of DLASDA */ } /* dlasda_ */
/* Subroutine */ int slasd0_(integer *n, integer *sqre, real *d__, real *e, real *u, integer *ldu, real *vt, integer *ldvt, integer *smlsiz, integer *iwork, real *work, integer *info) { /* System generated locals */ integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; /* Builtin functions */ integer pow_ii(integer *, integer *); /* Local variables */ static real beta; static integer idxq, nlvl, i__, j, m; static real alpha; static integer inode, ndiml, idxqc, ndimr, itemp, sqrei, i1; extern /* Subroutine */ int slasd1_(integer *, integer *, integer *, real *, real *, real *, real *, integer *, real *, integer *, integer * , integer *, real *, integer *); static integer ic, lf, nd, ll, nl, nr; extern /* Subroutine */ int xerbla_(char *, integer *), slasdq_( char *, integer *, integer *, integer *, integer *, integer *, real *, real *, real *, integer *, real *, integer *, real *, integer *, real *, integer *), slasdt_(integer *, integer *, integer *, integer *, integer *, integer *, integer *); static integer im1, ncc, nlf, nrf, iwk, lvl, ndb1, nlp1, nrp1; #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1] /* -- LAPACK auxiliary routine (instrumented to count ops, version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= Using a divide and conquer approach, SLASD0 computes the singular value decomposition (SVD) of a real upper bidiagonal N-by-M matrix B with diagonal D and offdiagonal E, where M = N + SQRE. The algorithm computes orthogonal matrices U and VT such that B = U * S * VT. The singular values S are overwritten on D. A related subroutine, SLASDA, computes only the singular values, and optionally, the singular vectors in compact form. Arguments ========= N (input) INTEGER On entry, the row dimension of the upper bidiagonal matrix. This is also the dimension of the main diagonal array D. SQRE (input) INTEGER Specifies the column dimension of the bidiagonal matrix. = 0: The bidiagonal matrix has column dimension M = N; = 1: The bidiagonal matrix has column dimension M = N+1; D (input/output) REAL array, dimension (N) On entry D contains the main diagonal of the bidiagonal matrix. On exit D, if INFO = 0, contains its singular values. E (input) REAL array, dimension (M-1) Contains the subdiagonal entries of the bidiagonal matrix. On exit, E has been destroyed. U (output) REAL array, dimension at least (LDQ, N) On exit, U contains the left singular vectors. LDU (input) INTEGER On entry, leading dimension of U. VT (output) REAL array, dimension at least (LDVT, M) On exit, VT' contains the right singular vectors. LDVT (input) INTEGER On entry, leading dimension of VT. SMLSIZ (input) INTEGER On entry, maximum size of the subproblems at the bottom of the computation tree. IWORK INTEGER work array. Dimension must be at least (8 * N) WORK REAL work array. Dimension must be at least (3 * M**2 + 2 * M) INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = 1, an singular value did not converge Further Details =============== Based on contributions by Ming Gu and Huan Ren, Computer Science Division, University of California at Berkeley, USA ===================================================================== Test the input parameters. Parameter adjustments */ --d__; --e; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1 * 1; vt -= vt_offset; --iwork; --work; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; } else if (*sqre < 0 || *sqre > 1) { *info = -2; } m = *n + *sqre; if (*ldu < *n) { *info = -6; } else if (*ldvt < m) { *info = -8; } else if (*smlsiz < 3) { *info = -9; } if (*info != 0) { i__1 = -(*info); xerbla_("SLASD0", &i__1); return 0; } /* If the input matrix is too small, call SLASDQ to find the SVD. */ if (*n <= *smlsiz) { slasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info); return 0; } /* Set up the computation tree. */ inode = 1; ndiml = inode + *n; ndimr = ndiml + *n; idxq = ndimr + *n; iwk = idxq + *n; slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], smlsiz); /* For the nodes on bottom level of the tree, solve their subproblems by SLASDQ. */ ndb1 = (nd + 1) / 2; ncc = 0; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { /* IC : center row of each node NL : number of rows of left subproblem NR : number of rows of right subproblem NLF: starting row of the left subproblem NRF: starting row of the right subproblem */ i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nlp1 = nl + 1; nr = iwork[ndimr + i1]; nrp1 = nr + 1; nlf = ic - nl; nrf = ic + 1; sqrei = 1; slasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], & vt_ref(nlf, nlf), ldvt, &u_ref(nlf, nlf), ldu, &u_ref(nlf, nlf), ldu, &work[1], info); if (*info != 0) { return 0; } itemp = idxq + nlf - 2; i__2 = nl; for (j = 1; j <= i__2; ++j) { iwork[itemp + j] = j; /* L10: */ } if (i__ == nd) { sqrei = *sqre; } else { sqrei = 1; } nrp1 = nr + sqrei; slasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], & vt_ref(nrf, nrf), ldvt, &u_ref(nrf, nrf), ldu, &u_ref(nrf, nrf), ldu, &work[1], info); if (*info != 0) { return 0; } itemp = idxq + ic; i__2 = nr; for (j = 1; j <= i__2; ++j) { iwork[itemp + j - 1] = j; /* L20: */ } /* L30: */ } /* Now conquer each subproblem bottom-up. */ for (lvl = nlvl; lvl >= 1; --lvl) { /* Find the first node LF and last node LL on the current level LVL. */ if (lvl == 1) { lf = 1; ll = 1; } else { i__1 = lvl - 1; lf = pow_ii(&c__2, &i__1); ll = (lf << 1) - 1; } i__1 = ll; for (i__ = lf; i__ <= i__1; ++i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; if (*sqre == 0 && i__ == ll) { sqrei = *sqre; } else { sqrei = 1; } idxqc = idxq + nlf - 1; alpha = d__[ic]; beta = e[ic]; slasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u_ref(nlf, nlf), ldu, &vt_ref(nlf, nlf), ldvt, &iwork[idxqc], &iwork[ iwk], &work[1], info); if (*info != 0) { return 0; } /* L40: */ } /* L50: */ } return 0; /* End of SLASD0 */ } /* slasd0_ */
/* Subroutine */ int zbdsqr_(char *uplo, integer *n, integer *ncvt, integer * nru, integer *ncc, doublereal *d__, doublereal *e, doublecomplex *vt, integer *ldvt, doublecomplex *u, integer *ldu, doublecomplex *c__, integer *ldc, doublereal *rwork, integer *info) { /* System generated locals */ integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; doublereal d__1, d__2, d__3, d__4; /* Builtin functions */ double pow_dd(doublereal *, doublereal *), sqrt(doublereal), d_sign( doublereal *, doublereal *); /* Local variables */ static doublereal abse; static integer idir; static doublereal abss; static integer oldm; static doublereal cosl; static integer isub, iter; static doublereal unfl, sinl, cosr, smin, smax, sinr; extern /* Subroutine */ int dlas2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); static doublereal f, g, h__; static integer i__, j, m; static doublereal r__; extern logical lsame_(char *, char *); static doublereal oldcs; static integer oldll; static doublereal shift, sigmn, oldsn; static integer maxit; static doublereal sminl, sigmx; static logical lower; extern /* Subroutine */ int zlasr_(char *, char *, char *, integer *, integer *, doublereal *, doublereal *, doublecomplex *, integer *), zdrot_(integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublereal *, doublereal *) , zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), dlasq1_(integer *, doublereal *, doublereal *, doublereal *, integer *), dlasv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); static doublereal cs; static integer ll; extern doublereal dlamch_(char *); static doublereal sn, mu; extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), xerbla_(char *, integer *), zdscal_(integer *, doublereal *, doublecomplex *, integer *); static doublereal sminoa, thresh; static logical rotate; static doublereal sminlo; static integer nm1; static doublereal tolmul; static integer nm12, nm13, lll; static doublereal eps, sll, tol; #define c___subscr(a_1,a_2) (a_2)*c_dim1 + a_1 #define c___ref(a_1,a_2) c__[c___subscr(a_1,a_2)] #define u_subscr(a_1,a_2) (a_2)*u_dim1 + a_1 #define u_ref(a_1,a_2) u[u_subscr(a_1,a_2)] #define vt_subscr(a_1,a_2) (a_2)*vt_dim1 + a_1 #define vt_ref(a_1,a_2) vt[vt_subscr(a_1,a_2)] /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1999 Purpose ======= ZBDSQR computes the singular value decomposition (SVD) of a real N-by-N (upper or lower) bidiagonal matrix B: B = Q * S * P' (P' denotes the transpose of P), where S is a diagonal matrix with non-negative diagonal elements (the singular values of B), and Q and P are orthogonal matrices. The routine computes S, and optionally computes U * Q, P' * VT, or Q' * C, for given complex input matrices U, VT, and C. See "Computing Small Singular Values of Bidiagonal Matrices With Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, no. 5, pp. 873-912, Sept 1990) and "Accurate singular values and differential qd algorithms," by B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics Department, University of California at Berkeley, July 1992 for a detailed description of the algorithm. Arguments ========= UPLO (input) CHARACTER*1 = 'U': B is upper bidiagonal; = 'L': B is lower bidiagonal. N (input) INTEGER The order of the matrix B. N >= 0. NCVT (input) INTEGER The number of columns of the matrix VT. NCVT >= 0. NRU (input) INTEGER The number of rows of the matrix U. NRU >= 0. NCC (input) INTEGER The number of columns of the matrix C. NCC >= 0. D (input/output) DOUBLE PRECISION array, dimension (N) On entry, the n diagonal elements of the bidiagonal matrix B. On exit, if INFO=0, the singular values of B in decreasing order. E (input/output) DOUBLE PRECISION array, dimension (N) On entry, the elements of E contain the offdiagonal elements of of the bidiagonal matrix whose SVD is desired. On normal exit (INFO = 0), E is destroyed. If the algorithm does not converge (INFO > 0), D and E will contain the diagonal and superdiagonal elements of a bidiagonal matrix orthogonally equivalent to the one given as input. E(N) is used for workspace. VT (input/output) COMPLEX*16 array, dimension (LDVT, NCVT) On entry, an N-by-NCVT matrix VT. On exit, VT is overwritten by P' * VT. VT is not referenced if NCVT = 0. LDVT (input) INTEGER The leading dimension of the array VT. LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. U (input/output) COMPLEX*16 array, dimension (LDU, N) On entry, an NRU-by-N matrix U. On exit, U is overwritten by U * Q. U is not referenced if NRU = 0. LDU (input) INTEGER The leading dimension of the array U. LDU >= max(1,NRU). C (input/output) COMPLEX*16 array, dimension (LDC, NCC) On entry, an N-by-NCC matrix C. On exit, C is overwritten by Q' * C. C is not referenced if NCC = 0. LDC (input) INTEGER The leading dimension of the array C. LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. RWORK (workspace) DOUBLE PRECISION array, dimension (4*N) INFO (output) INTEGER = 0: successful exit < 0: If INFO = -i, the i-th argument had an illegal value > 0: the algorithm did not converge; D and E contain the elements of a bidiagonal matrix which is orthogonally similar to the input matrix B; if INFO = i, i elements of E have not converged to zero. Internal Parameters =================== TOLMUL DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8))) TOLMUL controls the convergence criterion of the QR loop. If it is positive, TOLMUL*EPS is the desired relative precision in the computed singular values. If it is negative, abs(TOLMUL*EPS*sigma_max) is the desired absolute accuracy in the computed singular values (corresponds to relative accuracy abs(TOLMUL*EPS) in the largest singular value. abs(TOLMUL) should be between 1 and 1/EPS, and preferably between 10 (for fast convergence) and .1/EPS (for there to be some accuracy in the results). Default is to lose at either one eighth or 2 of the available decimal digits in each computed singular value (whichever is smaller). MAXITR INTEGER, default = 6 MAXITR controls the maximum number of passes of the algorithm through its inner loop. The algorithms stops (and so fails to converge) if the number of passes through the inner loop exceeds MAXITR*N**2. ===================================================================== Test the input parameters. Parameter adjustments */ --d__; --e; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1 * 1; vt -= vt_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1 * 1; c__ -= c_offset; --rwork; /* Function Body */ *info = 0; lower = lsame_(uplo, "L"); if (! lsame_(uplo, "U") && ! lower) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*ncvt < 0) { *info = -3; } else if (*nru < 0) { *info = -4; } else if (*ncc < 0) { *info = -5; } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { *info = -9; } else if (*ldu < max(1,*nru)) { *info = -11; } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { *info = -13; } if (*info != 0) { i__1 = -(*info); xerbla_("ZBDSQR", &i__1); return 0; } if (*n == 0) { return 0; } if (*n == 1) { goto L160; } /* ROTATE is true if any singular vectors desired, false otherwise */ rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; /* If no singular vectors desired, use qd algorithm */ if (! rotate) { dlasq1_(n, &d__[1], &e[1], &rwork[1], info); return 0; } nm1 = *n - 1; nm12 = nm1 + nm1; nm13 = nm12 + nm1; idir = 0; /* Get machine constants */ eps = dlamch_("Epsilon"); unfl = dlamch_("Safe minimum"); /* If matrix lower bidiagonal, rotate to be upper bidiagonal by applying Givens rotations on the left */ if (lower) { i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); d__[i__] = r__; e[i__] = sn * d__[i__ + 1]; d__[i__ + 1] = cs * d__[i__ + 1]; rwork[i__] = cs; rwork[nm1 + i__] = sn; /* L10: */ } /* Update singular vectors if desired */ if (*nru > 0) { zlasr_("R", "V", "F", nru, n, &rwork[1], &rwork[*n], &u[u_offset], ldu); } if (*ncc > 0) { zlasr_("L", "V", "F", n, ncc, &rwork[1], &rwork[*n], &c__[ c_offset], ldc); } } /* Compute singular values to relative accuracy TOL (By setting TOL to be negative, algorithm will compute singular values to absolute accuracy ABS(TOL)*norm(input matrix)) Computing MAX Computing MIN */ d__3 = 100., d__4 = pow_dd(&eps, &c_b15); d__1 = 10., d__2 = min(d__3,d__4); tolmul = max(d__1,d__2); tol = tolmul * eps; /* Compute approximate maximum, minimum singular values */ smax = 0.; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__2 = smax, d__3 = (d__1 = d__[i__], abs(d__1)); smax = max(d__2,d__3); /* L20: */ } i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1)); smax = max(d__2,d__3); /* L30: */ } sminl = 0.; if (tol >= 0.) { /* Relative accuracy desired */ sminoa = abs(d__[1]); if (sminoa == 0.) { goto L50; } mu = sminoa; i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1] , abs(d__1)))); sminoa = min(sminoa,mu); if (sminoa == 0.) { goto L50; } /* L40: */ } L50: sminoa /= sqrt((doublereal) (*n)); /* Computing MAX */ d__1 = tol * sminoa, d__2 = *n * 6 * *n * unfl; thresh = max(d__1,d__2); } else { /* Absolute accuracy desired Computing MAX */ d__1 = abs(tol) * smax, d__2 = *n * 6 * *n * unfl; thresh = max(d__1,d__2); } /* Prepare for main iteration loop for the singular values (MAXIT is the maximum number of passes through the inner loop permitted before nonconvergence signalled.) */ maxit = *n * 6 * *n; iter = 0; oldll = -1; oldm = -1; /* M points to last element of unconverged part of matrix */ m = *n; /* Begin main iteration loop */ L60: /* Check for convergence or exceeding iteration count */ if (m <= 1) { goto L160; } if (iter > maxit) { goto L200; } /* Find diagonal block of matrix to work on */ if (tol < 0. && (d__1 = d__[m], abs(d__1)) <= thresh) { d__[m] = 0.; } smax = (d__1 = d__[m], abs(d__1)); smin = smax; i__1 = m - 1; for (lll = 1; lll <= i__1; ++lll) { ll = m - lll; abss = (d__1 = d__[ll], abs(d__1)); abse = (d__1 = e[ll], abs(d__1)); if (tol < 0. && abss <= thresh) { d__[ll] = 0.; } if (abse <= thresh) { goto L80; } smin = min(smin,abss); /* Computing MAX */ d__1 = max(smax,abss); smax = max(d__1,abse); /* L70: */ } ll = 0; goto L90; L80: e[ll] = 0.; /* Matrix splits since E(LL) = 0 */ if (ll == m - 1) { /* Convergence of bottom singular value, return to top of loop */ --m; goto L60; } L90: ++ll; /* E(LL) through E(M-1) are nonzero, E(LL-1) is zero */ if (ll == m - 1) { /* 2 by 2 block, handle separately */ dlasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr, &sinl, &cosl); d__[m - 1] = sigmx; e[m - 1] = 0.; d__[m] = sigmn; /* Compute singular vectors, if desired */ if (*ncvt > 0) { zdrot_(ncvt, &vt_ref(m - 1, 1), ldvt, &vt_ref(m, 1), ldvt, &cosr, &sinr); } if (*nru > 0) { zdrot_(nru, &u_ref(1, m - 1), &c__1, &u_ref(1, m), &c__1, &cosl, & sinl); } if (*ncc > 0) { zdrot_(ncc, &c___ref(m - 1, 1), ldc, &c___ref(m, 1), ldc, &cosl, & sinl); } m += -2; goto L60; } /* If working on new submatrix, choose shift direction (from larger end diagonal element towards smaller) */ if (ll > oldm || m < oldll) { if ((d__1 = d__[ll], abs(d__1)) >= (d__2 = d__[m], abs(d__2))) { /* Chase bulge from top (big end) to bottom (small end) */ idir = 1; } else { /* Chase bulge from bottom (big end) to top (small end) */ idir = 2; } } /* Apply convergence tests */ if (idir == 1) { /* Run convergence test in forward direction First apply standard test to bottom of matrix */ if ((d__2 = e[m - 1], abs(d__2)) <= abs(tol) * (d__1 = d__[m], abs( d__1)) || tol < 0. && (d__3 = e[m - 1], abs(d__3)) <= thresh) { e[m - 1] = 0.; goto L60; } if (tol >= 0.) { /* If relative accuracy desired, apply convergence criterion forward */ mu = (d__1 = d__[ll], abs(d__1)); sminl = mu; i__1 = m - 1; for (lll = ll; lll <= i__1; ++lll) { if ((d__1 = e[lll], abs(d__1)) <= tol * mu) { e[lll] = 0.; goto L60; } sminlo = sminl; mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[ lll], abs(d__1)))); sminl = min(sminl,mu); /* L100: */ } } } else { /* Run convergence test in backward direction First apply standard test to top of matrix */ if ((d__2 = e[ll], abs(d__2)) <= abs(tol) * (d__1 = d__[ll], abs(d__1) ) || tol < 0. && (d__3 = e[ll], abs(d__3)) <= thresh) { e[ll] = 0.; goto L60; } if (tol >= 0.) { /* If relative accuracy desired, apply convergence criterion backward */ mu = (d__1 = d__[m], abs(d__1)); sminl = mu; i__1 = ll; for (lll = m - 1; lll >= i__1; --lll) { if ((d__1 = e[lll], abs(d__1)) <= tol * mu) { e[lll] = 0.; goto L60; } sminlo = sminl; mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll] , abs(d__1)))); sminl = min(sminl,mu); /* L110: */ } } } oldll = ll; oldm = m; /* Compute shift. First, test if shifting would ruin relative accuracy, and if so set the shift to zero. Computing MAX */ d__1 = eps, d__2 = tol * .01; if (tol >= 0. && *n * tol * (sminl / smax) <= max(d__1,d__2)) { /* Use a zero shift to avoid loss of relative accuracy */ shift = 0.; } else { /* Compute the shift from 2-by-2 block at end of matrix */ if (idir == 1) { sll = (d__1 = d__[ll], abs(d__1)); dlas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__); } else { sll = (d__1 = d__[m], abs(d__1)); dlas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__); } /* Test if shift negligible, and if so set to zero */ if (sll > 0.) { /* Computing 2nd power */ d__1 = shift / sll; if (d__1 * d__1 < eps) { shift = 0.; } } } /* Increment iteration count */ iter = iter + m - ll; /* If SHIFT = 0, do simplified QR iteration */ if (shift == 0.) { if (idir == 1) { /* Chase bulge from top to bottom Save cosines and sines for later singular vector updates */ cs = 1.; oldcs = 1.; i__1 = m - 1; for (i__ = ll; i__ <= i__1; ++i__) { d__1 = d__[i__] * cs; dlartg_(&d__1, &e[i__], &cs, &sn, &r__); if (i__ > ll) { e[i__ - 1] = oldsn * r__; } d__1 = oldcs * r__; d__2 = d__[i__ + 1] * sn; dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]); rwork[i__ - ll + 1] = cs; rwork[i__ - ll + 1 + nm1] = sn; rwork[i__ - ll + 1 + nm12] = oldcs; rwork[i__ - ll + 1 + nm13] = oldsn; /* L120: */ } h__ = d__[m] * cs; d__[m] = h__ * oldcs; e[m - 1] = h__ * oldsn; /* Update singular vectors */ if (*ncvt > 0) { i__1 = m - ll + 1; zlasr_("L", "V", "F", &i__1, ncvt, &rwork[1], &rwork[*n], & vt_ref(ll, 1), ldvt); } if (*nru > 0) { i__1 = m - ll + 1; zlasr_("R", "V", "F", nru, &i__1, &rwork[nm12 + 1], &rwork[ nm13 + 1], &u_ref(1, ll), ldu); } if (*ncc > 0) { i__1 = m - ll + 1; zlasr_("L", "V", "F", &i__1, ncc, &rwork[nm12 + 1], &rwork[ nm13 + 1], &c___ref(ll, 1), ldc); } /* Test convergence */ if ((d__1 = e[m - 1], abs(d__1)) <= thresh) { e[m - 1] = 0.; } } else { /* Chase bulge from bottom to top Save cosines and sines for later singular vector updates */ cs = 1.; oldcs = 1.; i__1 = ll + 1; for (i__ = m; i__ >= i__1; --i__) { d__1 = d__[i__] * cs; dlartg_(&d__1, &e[i__ - 1], &cs, &sn, &r__); if (i__ < m) { e[i__] = oldsn * r__; } d__1 = oldcs * r__; d__2 = d__[i__ - 1] * sn; dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]); rwork[i__ - ll] = cs; rwork[i__ - ll + nm1] = -sn; rwork[i__ - ll + nm12] = oldcs; rwork[i__ - ll + nm13] = -oldsn; /* L130: */ } h__ = d__[ll] * cs; d__[ll] = h__ * oldcs; e[ll] = h__ * oldsn; /* Update singular vectors */ if (*ncvt > 0) { i__1 = m - ll + 1; zlasr_("L", "V", "B", &i__1, ncvt, &rwork[nm12 + 1], &rwork[ nm13 + 1], &vt_ref(ll, 1), ldvt); } if (*nru > 0) { i__1 = m - ll + 1; zlasr_("R", "V", "B", nru, &i__1, &rwork[1], &rwork[*n], & u_ref(1, ll), ldu); } if (*ncc > 0) { i__1 = m - ll + 1; zlasr_("L", "V", "B", &i__1, ncc, &rwork[1], &rwork[*n], & c___ref(ll, 1), ldc); } /* Test convergence */ if ((d__1 = e[ll], abs(d__1)) <= thresh) { e[ll] = 0.; } } } else { /* Use nonzero shift */ if (idir == 1) { /* Chase bulge from top to bottom Save cosines and sines for later singular vector updates */ f = ((d__1 = d__[ll], abs(d__1)) - shift) * (d_sign(&c_b49, &d__[ ll]) + shift / d__[ll]); g = e[ll]; i__1 = m - 1; for (i__ = ll; i__ <= i__1; ++i__) { dlartg_(&f, &g, &cosr, &sinr, &r__); if (i__ > ll) { e[i__ - 1] = r__; } f = cosr * d__[i__] + sinr * e[i__]; e[i__] = cosr * e[i__] - sinr * d__[i__]; g = sinr * d__[i__ + 1]; d__[i__ + 1] = cosr * d__[i__ + 1]; dlartg_(&f, &g, &cosl, &sinl, &r__); d__[i__] = r__; f = cosl * e[i__] + sinl * d__[i__ + 1]; d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__]; if (i__ < m - 1) { g = sinl * e[i__ + 1]; e[i__ + 1] = cosl * e[i__ + 1]; } rwork[i__ - ll + 1] = cosr; rwork[i__ - ll + 1 + nm1] = sinr; rwork[i__ - ll + 1 + nm12] = cosl; rwork[i__ - ll + 1 + nm13] = sinl; /* L140: */ } e[m - 1] = f; /* Update singular vectors */ if (*ncvt > 0) { i__1 = m - ll + 1; zlasr_("L", "V", "F", &i__1, ncvt, &rwork[1], &rwork[*n], & vt_ref(ll, 1), ldvt); } if (*nru > 0) { i__1 = m - ll + 1; zlasr_("R", "V", "F", nru, &i__1, &rwork[nm12 + 1], &rwork[ nm13 + 1], &u_ref(1, ll), ldu); } if (*ncc > 0) { i__1 = m - ll + 1; zlasr_("L", "V", "F", &i__1, ncc, &rwork[nm12 + 1], &rwork[ nm13 + 1], &c___ref(ll, 1), ldc); } /* Test convergence */ if ((d__1 = e[m - 1], abs(d__1)) <= thresh) { e[m - 1] = 0.; } } else { /* Chase bulge from bottom to top Save cosines and sines for later singular vector updates */ f = ((d__1 = d__[m], abs(d__1)) - shift) * (d_sign(&c_b49, &d__[m] ) + shift / d__[m]); g = e[m - 1]; i__1 = ll + 1; for (i__ = m; i__ >= i__1; --i__) { dlartg_(&f, &g, &cosr, &sinr, &r__); if (i__ < m) { e[i__] = r__; } f = cosr * d__[i__] + sinr * e[i__ - 1]; e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__]; g = sinr * d__[i__ - 1]; d__[i__ - 1] = cosr * d__[i__ - 1]; dlartg_(&f, &g, &cosl, &sinl, &r__); d__[i__] = r__; f = cosl * e[i__ - 1] + sinl * d__[i__ - 1]; d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1]; if (i__ > ll + 1) { g = sinl * e[i__ - 2]; e[i__ - 2] = cosl * e[i__ - 2]; } rwork[i__ - ll] = cosr; rwork[i__ - ll + nm1] = -sinr; rwork[i__ - ll + nm12] = cosl; rwork[i__ - ll + nm13] = -sinl; /* L150: */ } e[ll] = f; /* Test convergence */ if ((d__1 = e[ll], abs(d__1)) <= thresh) { e[ll] = 0.; } /* Update singular vectors if desired */ if (*ncvt > 0) { i__1 = m - ll + 1; zlasr_("L", "V", "B", &i__1, ncvt, &rwork[nm12 + 1], &rwork[ nm13 + 1], &vt_ref(ll, 1), ldvt); } if (*nru > 0) { i__1 = m - ll + 1; zlasr_("R", "V", "B", nru, &i__1, &rwork[1], &rwork[*n], & u_ref(1, ll), ldu); } if (*ncc > 0) { i__1 = m - ll + 1; zlasr_("L", "V", "B", &i__1, ncc, &rwork[1], &rwork[*n], & c___ref(ll, 1), ldc); } } } /* QR iteration finished, go back and check convergence */ goto L60; /* All singular values converged, so make them positive */ L160: i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (d__[i__] < 0.) { d__[i__] = -d__[i__]; /* Change sign of singular vectors, if desired */ if (*ncvt > 0) { zdscal_(ncvt, &c_b72, &vt_ref(i__, 1), ldvt); } } /* L170: */ } /* Sort the singular values into decreasing order (insertion sort on singular values, but only one transposition per singular vector) */ i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { /* Scan for smallest D(I) */ isub = 1; smin = d__[1]; i__2 = *n + 1 - i__; for (j = 2; j <= i__2; ++j) { if (d__[j] <= smin) { isub = j; smin = d__[j]; } /* L180: */ } if (isub != *n + 1 - i__) { /* Swap singular values and vectors */ d__[isub] = d__[*n + 1 - i__]; d__[*n + 1 - i__] = smin; if (*ncvt > 0) { zswap_(ncvt, &vt_ref(isub, 1), ldvt, &vt_ref(*n + 1 - i__, 1), ldvt); } if (*nru > 0) { zswap_(nru, &u_ref(1, isub), &c__1, &u_ref(1, *n + 1 - i__), & c__1); } if (*ncc > 0) { zswap_(ncc, &c___ref(isub, 1), ldc, &c___ref(*n + 1 - i__, 1), ldc); } } /* L190: */ } goto L220; /* Maximum number of iterations exceeded, failure to converge */ L200: *info = 0; i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { if (e[i__] != 0.) { ++(*info); } /* L210: */ } L220: return 0; /* End of ZBDSQR */ } /* zbdsqr_ */
/* Subroutine */ int dtgsja_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, integer *k, integer *l, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *tola, doublereal *tolb, doublereal *alpha, doublereal *beta, doublereal *u, integer *ldu, doublereal *v, integer *ldv, doublereal *q, integer * ldq, doublereal *work, integer *ncycle, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= DTGSJA computes the generalized singular value decomposition (GSVD) of two real upper triangular (or trapezoidal) matrices A and B. On entry, it is assumed that matrices A and B have the following forms, which may be obtained by the preprocessing subroutine DGGSVP from a general M-by-N matrix A and P-by-N matrix B: N-K-L K L A = K ( 0 A12 A13 ) if M-K-L >= 0; L ( 0 0 A23 ) M-K-L ( 0 0 0 ) N-K-L K L A = K ( 0 A12 A13 ) if M-K-L < 0; M-K ( 0 0 A23 ) N-K-L K L B = L ( 0 0 B13 ) P-L ( 0 0 0 ) where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, otherwise A23 is (M-K)-by-L upper trapezoidal. On exit, U'*A*Q = D1*( 0 R ), V'*B*Q = D2*( 0 R ), where U, V and Q are orthogonal matrices, Z' denotes the transpose of Z, R is a nonsingular upper triangular matrix, and D1 and D2 are ``diagonal'' matrices, which are of the following structures: If M-K-L >= 0, K L D1 = K ( I 0 ) L ( 0 C ) M-K-L ( 0 0 ) K L D2 = L ( 0 S ) P-L ( 0 0 ) N-K-L K L ( 0 R ) = K ( 0 R11 R12 ) K L ( 0 0 R22 ) L where C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), S = diag( BETA(K+1), ... , BETA(K+L) ), C**2 + S**2 = I. R is stored in A(1:K+L,N-K-L+1:N) on exit. If M-K-L < 0, K M-K K+L-M D1 = K ( I 0 0 ) M-K ( 0 C 0 ) K M-K K+L-M D2 = M-K ( 0 S 0 ) K+L-M ( 0 0 I ) P-L ( 0 0 0 ) N-K-L K M-K K+L-M ( 0 R ) = K ( 0 R11 R12 R13 ) M-K ( 0 0 R22 R23 ) K+L-M ( 0 0 0 R33 ) where C = diag( ALPHA(K+1), ... , ALPHA(M) ), S = diag( BETA(K+1), ... , BETA(M) ), C**2 + S**2 = I. R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored ( 0 R22 R23 ) in B(M-K+1:L,N+M-K-L+1:N) on exit. The computation of the orthogonal transformation matrices U, V or Q is optional. These matrices may either be formed explicitly, or they may be postmultiplied into input matrices U1, V1, or Q1. Arguments ========= JOBU (input) CHARACTER*1 = 'U': U must contain an orthogonal matrix U1 on entry, and the product U1*U is returned; = 'I': U is initialized to the unit matrix, and the orthogonal matrix U is returned; = 'N': U is not computed. JOBV (input) CHARACTER*1 = 'V': V must contain an orthogonal matrix V1 on entry, and the product V1*V is returned; = 'I': V is initialized to the unit matrix, and the orthogonal matrix V is returned; = 'N': V is not computed. JOBQ (input) CHARACTER*1 = 'Q': Q must contain an orthogonal matrix Q1 on entry, and the product Q1*Q is returned; = 'I': Q is initialized to the unit matrix, and the orthogonal matrix Q is returned; = 'N': Q is not computed. M (input) INTEGER The number of rows of the matrix A. M >= 0. P (input) INTEGER The number of rows of the matrix B. P >= 0. N (input) INTEGER The number of columns of the matrices A and B. N >= 0. K (input) INTEGER L (input) INTEGER K and L specify the subblocks in the input matrices A and B: A23 = A(K+1:MIN(K+L,M),N-L+1:N) and B13 = B(1:L,N-L+1:N) of A and B, whose GSVD is going to be computed by DTGSJA. See Further details. A (input/output) DOUBLE PRECISION array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, A(N-K+1:N,1:MIN(K+L,M) ) contains the triangular matrix R or part of R. See Purpose for details. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). B (input/output) DOUBLE PRECISION array, dimension (LDB,N) On entry, the P-by-N matrix B. On exit, if necessary, B(M-K+1:L,N+M-K-L+1:N) contains a part of R. See Purpose for details. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,P). TOLA (input) DOUBLE PRECISION TOLB (input) DOUBLE PRECISION TOLA and TOLB are the convergence criteria for the Jacobi- Kogbetliantz iteration procedure. Generally, they are the same as used in the preprocessing step, say TOLA = max(M,N)*norm(A)*MAZHEPS, TOLB = max(P,N)*norm(B)*MAZHEPS. ALPHA (output) DOUBLE PRECISION array, dimension (N) BETA (output) DOUBLE PRECISION array, dimension (N) On exit, ALPHA and BETA contain the generalized singular value pairs of A and B; ALPHA(1:K) = 1, BETA(1:K) = 0, and if M-K-L >= 0, ALPHA(K+1:K+L) = diag(C), BETA(K+1:K+L) = diag(S), or if M-K-L < 0, ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0 BETA(K+1:M) = S, BETA(M+1:K+L) = 1. Furthermore, if K+L < N, ALPHA(K+L+1:N) = 0 and BETA(K+L+1:N) = 0. U (input/output) DOUBLE PRECISION array, dimension (LDU,M) On entry, if JOBU = 'U', U must contain a matrix U1 (usually the orthogonal matrix returned by DGGSVP). On exit, if JOBU = 'I', U contains the orthogonal matrix U; if JOBU = 'U', U contains the product U1*U. If JOBU = 'N', U is not referenced. LDU (input) INTEGER The leading dimension of the array U. LDU >= max(1,M) if JOBU = 'U'; LDU >= 1 otherwise. V (input/output) DOUBLE PRECISION array, dimension (LDV,P) On entry, if JOBV = 'V', V must contain a matrix V1 (usually the orthogonal matrix returned by DGGSVP). On exit, if JOBV = 'I', V contains the orthogonal matrix V; if JOBV = 'V', V contains the product V1*V. If JOBV = 'N', V is not referenced. LDV (input) INTEGER The leading dimension of the array V. LDV >= max(1,P) if JOBV = 'V'; LDV >= 1 otherwise. Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) On entry, if JOBQ = 'Q', Q must contain a matrix Q1 (usually the orthogonal matrix returned by DGGSVP). On exit, if JOBQ = 'I', Q contains the orthogonal matrix Q; if JOBQ = 'Q', Q contains the product Q1*Q. If JOBQ = 'N', Q is not referenced. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= max(1,N) if JOBQ = 'Q'; LDQ >= 1 otherwise. WORK (workspace) DOUBLE PRECISION array, dimension (2*N) NCYCLE (output) INTEGER The number of cycles required for convergence. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. = 1: the procedure does not converge after MAXIT cycles. Internal Parameters =================== MAXIT INTEGER MAXIT specifies the total loops that the iterative procedure may take. If after MAXIT cycles, the routine fails to converge, we return INFO = 1. Further Details =============== DTGSJA essentially uses a variant of Kogbetliantz algorithm to reduce min(L,M-K)-by-L triangular (or trapezoidal) matrix A23 and L-by-L matrix B13 to the form: U1'*A13*Q1 = C1*R1; V1'*B13*Q1 = S1*R1, where U1, V1 and Q1 are orthogonal matrix, and Z' is the transpose of Z. C1 and S1 are diagonal matrices satisfying C1**2 + S1**2 = I, and R1 is an L-by-L nonsingular upper triangular matrix. ===================================================================== Decode and test the input parameters Parameter adjustments */ /* Table of constant values */ static doublereal c_b13 = 0.; static doublereal c_b14 = 1.; static integer c__1 = 1; static doublereal c_b43 = -1.; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, u_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4; doublereal d__1; /* Local variables */ extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); static integer i__, j; static doublereal gamma; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); extern logical lsame_(char *, char *); extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); static doublereal a1; static logical initq; static doublereal a2, a3, b1; static logical initu, initv, wantq, upper; static doublereal b2, b3; static logical wantu, wantv; static doublereal error, ssmin; extern /* Subroutine */ int dlags2_(logical *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), dlapll_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *); static integer kcycle; extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); static doublereal csq, csu, csv, snq, rwk, snu, snv; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] #define v_ref(a_1,a_2) v[(a_2)*v_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --alpha; --beta; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; v_dim1 = *ldv; v_offset = 1 + v_dim1 * 1; v -= v_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; --work; /* Function Body */ initu = lsame_(jobu, "I"); wantu = initu || lsame_(jobu, "U"); initv = lsame_(jobv, "I"); wantv = initv || lsame_(jobv, "V"); initq = lsame_(jobq, "I"); wantq = initq || lsame_(jobq, "Q"); *info = 0; if (! (initu || wantu || lsame_(jobu, "N"))) { *info = -1; } else if (! (initv || wantv || lsame_(jobv, "N"))) { *info = -2; } else if (! (initq || wantq || lsame_(jobq, "N"))) { *info = -3; } else if (*m < 0) { *info = -4; } else if (*p < 0) { *info = -5; } else if (*n < 0) { *info = -6; } else if (*lda < max(1,*m)) { *info = -10; } else if (*ldb < max(1,*p)) { *info = -12; } else if (*ldu < 1 || wantu && *ldu < *m) { *info = -18; } else if (*ldv < 1 || wantv && *ldv < *p) { *info = -20; } else if (*ldq < 1 || wantq && *ldq < *n) { *info = -22; } if (*info != 0) { i__1 = -(*info); xerbla_("DTGSJA", &i__1); return 0; } /* Initialize U, V and Q, if necessary */ if (initu) { dlaset_("Full", m, m, &c_b13, &c_b14, &u[u_offset], ldu); } if (initv) { dlaset_("Full", p, p, &c_b13, &c_b14, &v[v_offset], ldv); } if (initq) { dlaset_("Full", n, n, &c_b13, &c_b14, &q[q_offset], ldq); } /* Loop until convergence */ upper = FALSE_; for (kcycle = 1; kcycle <= 40; ++kcycle) { upper = ! upper; i__1 = *l - 1; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *l; for (j = i__ + 1; j <= i__2; ++j) { a1 = 0.; a2 = 0.; a3 = 0.; if (*k + i__ <= *m) { a1 = a_ref(*k + i__, *n - *l + i__); } if (*k + j <= *m) { a3 = a_ref(*k + j, *n - *l + j); } b1 = b_ref(i__, *n - *l + i__); b3 = b_ref(j, *n - *l + j); if (upper) { if (*k + i__ <= *m) { a2 = a_ref(*k + i__, *n - *l + j); } b2 = b_ref(i__, *n - *l + j); } else { if (*k + j <= *m) { a2 = a_ref(*k + j, *n - *l + i__); } b2 = b_ref(j, *n - *l + i__); } dlags2_(&upper, &a1, &a2, &a3, &b1, &b2, &b3, &csu, &snu, & csv, &snv, &csq, &snq); /* Update (K+I)-th and (K+J)-th rows of matrix A: U'*A */ if (*k + j <= *m) { drot_(l, &a_ref(*k + j, *n - *l + 1), lda, &a_ref(*k + i__, *n - *l + 1), lda, &csu, &snu); } /* Update I-th and J-th rows of matrix B: V'*B */ drot_(l, &b_ref(j, *n - *l + 1), ldb, &b_ref(i__, *n - *l + 1) , ldb, &csv, &snv); /* Update (N-L+I)-th and (N-L+J)-th columns of matrices A and B: A*Q and B*Q Computing MIN */ i__4 = *k + *l; i__3 = min(i__4,*m); drot_(&i__3, &a_ref(1, *n - *l + j), &c__1, &a_ref(1, *n - *l + i__), &c__1, &csq, &snq); drot_(l, &b_ref(1, *n - *l + j), &c__1, &b_ref(1, *n - *l + i__), &c__1, &csq, &snq); if (upper) { if (*k + i__ <= *m) { a_ref(*k + i__, *n - *l + j) = 0.; } b_ref(i__, *n - *l + j) = 0.; } else { if (*k + j <= *m) { a_ref(*k + j, *n - *l + i__) = 0.; } b_ref(j, *n - *l + i__) = 0.; } /* Update orthogonal matrices U, V, Q, if desired. */ if (wantu && *k + j <= *m) { drot_(m, &u_ref(1, *k + j), &c__1, &u_ref(1, *k + i__), & c__1, &csu, &snu); } if (wantv) { drot_(p, &v_ref(1, j), &c__1, &v_ref(1, i__), &c__1, &csv, &snv); } if (wantq) { drot_(n, &q_ref(1, *n - *l + j), &c__1, &q_ref(1, *n - *l + i__), &c__1, &csq, &snq); } /* L10: */ } /* L20: */ } if (! upper) { /* The matrices A13 and B13 were lower triangular at the start of the cycle, and are now upper triangular. Convergence test: test the parallelism of the corresponding rows of A and B. */ error = 0.; /* Computing MIN */ i__2 = *l, i__3 = *m - *k; i__1 = min(i__2,i__3); for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *l - i__ + 1; dcopy_(&i__2, &a_ref(*k + i__, *n - *l + i__), lda, &work[1], &c__1); i__2 = *l - i__ + 1; dcopy_(&i__2, &b_ref(i__, *n - *l + i__), ldb, &work[*l + 1], &c__1); i__2 = *l - i__ + 1; dlapll_(&i__2, &work[1], &c__1, &work[*l + 1], &c__1, &ssmin); error = max(error,ssmin); /* L30: */ } if (abs(error) <= min(*tola,*tolb)) { goto L50; } } /* End of cycle loop L40: */ } /* The algorithm has not converged after MAXIT cycles. */ *info = 1; goto L100; L50: /* If ERROR <= MIN(TOLA,TOLB), then the algorithm has converged. Compute the generalized singular value pairs (ALPHA, BETA), and set the triangular matrix R to array A. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { alpha[i__] = 1.; beta[i__] = 0.; /* L60: */ } /* Computing MIN */ i__2 = *l, i__3 = *m - *k; i__1 = min(i__2,i__3); for (i__ = 1; i__ <= i__1; ++i__) { a1 = a_ref(*k + i__, *n - *l + i__); b1 = b_ref(i__, *n - *l + i__); if (a1 != 0.) { gamma = b1 / a1; /* change sign if necessary */ if (gamma < 0.) { i__2 = *l - i__ + 1; dscal_(&i__2, &c_b43, &b_ref(i__, *n - *l + i__), ldb); if (wantv) { dscal_(p, &c_b43, &v_ref(1, i__), &c__1); } } d__1 = abs(gamma); dlartg_(&d__1, &c_b14, &beta[*k + i__], &alpha[*k + i__], &rwk); if (alpha[*k + i__] >= beta[*k + i__]) { i__2 = *l - i__ + 1; d__1 = 1. / alpha[*k + i__]; dscal_(&i__2, &d__1, &a_ref(*k + i__, *n - *l + i__), lda); } else { i__2 = *l - i__ + 1; d__1 = 1. / beta[*k + i__]; dscal_(&i__2, &d__1, &b_ref(i__, *n - *l + i__), ldb); i__2 = *l - i__ + 1; dcopy_(&i__2, &b_ref(i__, *n - *l + i__), ldb, &a_ref(*k + i__, *n - *l + i__), lda); } } else { alpha[*k + i__] = 0.; beta[*k + i__] = 1.; i__2 = *l - i__ + 1; dcopy_(&i__2, &b_ref(i__, *n - *l + i__), ldb, &a_ref(*k + i__, * n - *l + i__), lda); } /* L70: */ } /* Post-assignment */ i__1 = *k + *l; for (i__ = *m + 1; i__ <= i__1; ++i__) { alpha[i__] = 0.; beta[i__] = 1.; /* L80: */ } if (*k + *l < *n) { i__1 = *n; for (i__ = *k + *l + 1; i__ <= i__1; ++i__) { alpha[i__] = 0.; beta[i__] = 0.; /* L90: */ } } L100: *ncycle = kcycle; return 0; /* End of DTGSJA */ } /* dtgsja_ */
/* Subroutine */ int dbdsdc_(char *uplo, char *compq, integer *n, doublereal * d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, doublereal *q, integer *iq, doublereal *work, integer * iwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University December 1, 1999 Purpose ======= DBDSDC computes the singular value decomposition (SVD) of a real N-by-N (upper or lower) bidiagonal matrix B: B = U * S * VT, using a divide and conquer method, where S is a diagonal matrix with non-negative diagonal elements (the singular values of B), and U and VT are orthogonal matrices of left and right singular vectors, respectively. DBDSDC can be used to compute all singular values, and optionally, singular vectors or singular vectors in compact form. This code makes very mild assumptions about floating point arithmetic. It will work on machines with a guard digit in add/subtract, or on those binary machines without guard digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. It could conceivably fail on hexadecimal or decimal machines without guard digits, but we know of none. See DLASD3 for details. The code currently call DLASDQ if singular values only are desired. However, it can be slightly modified to compute singular values using the divide and conquer method. Arguments ========= UPLO (input) CHARACTER*1 = 'U': B is upper bidiagonal. = 'L': B is lower bidiagonal. COMPQ (input) CHARACTER*1 Specifies whether singular vectors are to be computed as follows: = 'N': Compute singular values only; = 'P': Compute singular values and compute singular vectors in compact form; = 'I': Compute singular values and singular vectors. N (input) INTEGER The order of the matrix B. N >= 0. D (input/output) DOUBLE PRECISION array, dimension (N) On entry, the n diagonal elements of the bidiagonal matrix B. On exit, if INFO=0, the singular values of B. E (input/output) DOUBLE PRECISION array, dimension (N) On entry, the elements of E contain the offdiagonal elements of the bidiagonal matrix whose SVD is desired. On exit, E has been destroyed. U (output) DOUBLE PRECISION array, dimension (LDU,N) If COMPQ = 'I', then: On exit, if INFO = 0, U contains the left singular vectors of the bidiagonal matrix. For other values of COMPQ, U is not referenced. LDU (input) INTEGER The leading dimension of the array U. LDU >= 1. If singular vectors are desired, then LDU >= max( 1, N ). VT (output) DOUBLE PRECISION array, dimension (LDVT,N) If COMPQ = 'I', then: On exit, if INFO = 0, VT' contains the right singular vectors of the bidiagonal matrix. For other values of COMPQ, VT is not referenced. LDVT (input) INTEGER The leading dimension of the array VT. LDVT >= 1. If singular vectors are desired, then LDVT >= max( 1, N ). Q (output) DOUBLE PRECISION array, dimension (LDQ) If COMPQ = 'P', then: On exit, if INFO = 0, Q and IQ contain the left and right singular vectors in a compact form, requiring O(N log N) space instead of 2*N**2. In particular, Q contains all the DOUBLE PRECISION data in LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1)))) words of memory, where SMLSIZ is returned by ILAENV and is equal to the maximum size of the subproblems at the bottom of the computation tree (usually about 25). For other values of COMPQ, Q is not referenced. IQ (output) INTEGER array, dimension (LDIQ) If COMPQ = 'P', then: On exit, if INFO = 0, Q and IQ contain the left and right singular vectors in a compact form, requiring O(N log N) space instead of 2*N**2. In particular, IQ contains all INTEGER data in LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1)))) words of memory, where SMLSIZ is returned by ILAENV and is equal to the maximum size of the subproblems at the bottom of the computation tree (usually about 25). For other values of COMPQ, IQ is not referenced. WORK (workspace) DOUBLE PRECISION array, dimension (LWORK) If COMPQ = 'N' then LWORK >= (4 * N). If COMPQ = 'P' then LWORK >= (6 * N). If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N). IWORK (workspace) INTEGER array, dimension (8*N) INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. > 0: The algorithm failed to compute an singular value. The update process of divide and conquer failed. Further Details =============== Based on contributions by Ming Gu and Huan Ren, Computer Science Division, University of California at Berkeley, USA ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__9 = 9; static integer c__0 = 0; static doublereal c_b15 = 1.; static integer c__1 = 1; static doublereal c_b29 = 0.; /* System generated locals */ integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double d_sign(doublereal *, doublereal *), log(doublereal); /* Local variables */ static integer difl, difr, ierr, perm, mlvl, sqre, i__, j, k; static doublereal p, r__; static integer z__; extern logical lsame_(char *, char *); extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer * , doublereal *, integer *), dswap_(integer *, doublereal *, integer *, doublereal *, integer *); static integer poles, iuplo, nsize, start; extern /* Subroutine */ int dlasd0_(integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *); static integer ic, ii, kk; static doublereal cs; extern doublereal dlamch_(char *); extern /* Subroutine */ int dlasda_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *); static integer is, iu; static doublereal sn; extern /* Subroutine */ int dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), dlasdq_(char *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern /* Subroutine */ int xerbla_(char *, integer *); static integer givcol; extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); static integer icompq; static doublereal orgnrm; static integer givnum, givptr, nm1, qstart, smlsiz, wstart, smlszp; static doublereal eps; static integer ivt; #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1] --d__; --e; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1 * 1; vt -= vt_offset; --q; --iq; --work; --iwork; /* Function Body */ *info = 0; iuplo = 0; if (lsame_(uplo, "U")) { iuplo = 1; } if (lsame_(uplo, "L")) { iuplo = 2; } if (lsame_(compq, "N")) { icompq = 0; } else if (lsame_(compq, "P")) { icompq = 1; } else if (lsame_(compq, "I")) { icompq = 2; } else { icompq = -1; } if (iuplo == 0) { *info = -1; } else if (icompq < 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ldu < 1 || icompq == 2 && *ldu < *n) { *info = -7; } else if (*ldvt < 1 || icompq == 2 && *ldvt < *n) { *info = -9; } if (*info != 0) { i__1 = -(*info); xerbla_("DBDSDC", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } smlsiz = ilaenv_(&c__9, "DBDSDC", " ", &c__0, &c__0, &c__0, &c__0, ( ftnlen)6, (ftnlen)1); if (*n == 1) { if (icompq == 1) { q[1] = d_sign(&c_b15, &d__[1]); q[smlsiz * *n + 1] = 1.; } else if (icompq == 2) { u_ref(1, 1) = d_sign(&c_b15, &d__[1]); vt_ref(1, 1) = 1.; } d__[1] = abs(d__[1]); return 0; } nm1 = *n - 1; /* If matrix lower bidiagonal, rotate to be upper bidiagonal by applying Givens rotations on the left */ wstart = 1; qstart = 3; if (icompq == 1) { dcopy_(n, &d__[1], &c__1, &q[1], &c__1); i__1 = *n - 1; dcopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1); } if (iuplo == 2) { qstart = 5; wstart = (*n << 1) - 1; i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); d__[i__] = r__; e[i__] = sn * d__[i__ + 1]; d__[i__ + 1] = cs * d__[i__ + 1]; if (icompq == 1) { q[i__ + (*n << 1)] = cs; q[i__ + *n * 3] = sn; } else if (icompq == 2) { work[i__] = cs; work[nm1 + i__] = -sn; } /* L10: */ } } /* If ICOMPQ = 0, use DLASDQ to compute the singular values. */ if (icompq == 0) { dlasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ wstart], info); goto L40; } /* If N is smaller than the minimum divide size SMLSIZ, then solve the problem with another solver. */ if (*n <= smlsiz) { if (icompq == 2) { dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu); dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt); dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset] , ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ wstart], info); } else if (icompq == 1) { iu = 1; ivt = iu + *n; dlaset_("A", n, n, &c_b29, &c_b15, &q[iu + (qstart - 1) * *n], n); dlaset_("A", n, n, &c_b29, &c_b15, &q[ivt + (qstart - 1) * *n], n); dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + ( qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[ iu + (qstart - 1) * *n], n, &work[wstart], info); } goto L40; } if (icompq == 2) { dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu); dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt); } /* Scale. */ orgnrm = dlanst_("M", n, &d__[1], &e[1]); if (orgnrm == 0.) { return 0; } dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, &ierr); dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1, & ierr); eps = dlamch_("Epsilon"); mlvl = (integer) (log((doublereal) (*n) / (doublereal) (smlsiz + 1)) / log(2.)) + 1; smlszp = smlsiz + 1; if (icompq == 1) { iu = 1; ivt = smlsiz + 1; difl = ivt + smlszp; difr = difl + mlvl; z__ = difr + (mlvl << 1); ic = z__ + mlvl; is = ic + 1; poles = is + 1; givnum = poles + (mlvl << 1); k = 1; givptr = 2; perm = 3; givcol = perm + mlvl; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if ((d__1 = d__[i__], abs(d__1)) < eps) { d__[i__] = d_sign(&eps, &d__[i__]); } /* L20: */ } start = 1; sqre = 0; i__1 = nm1; for (i__ = 1; i__ <= i__1; ++i__) { if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) { /* Subproblem found. First determine its size and then apply divide and conquer on it. */ if (i__ < nm1) { /* A subproblem with E(I) small for I < NM1. */ nsize = i__ - start + 1; } else if ((d__1 = e[i__], abs(d__1)) >= eps) { /* A subproblem with E(NM1) not too small but I = NM1. */ nsize = *n - start + 1; } else { /* A subproblem with E(NM1) small. This implies an 1-by-1 subproblem at D(N). Solve this 1-by-1 problem first. */ nsize = i__ - start + 1; if (icompq == 2) { u_ref(*n, *n) = d_sign(&c_b15, &d__[*n]); vt_ref(*n, *n) = 1.; } else if (icompq == 1) { q[*n + (qstart - 1) * *n] = d_sign(&c_b15, &d__[*n]); q[*n + (smlsiz + qstart - 1) * *n] = 1.; } d__[*n] = (d__1 = d__[*n], abs(d__1)); } if (icompq == 2) { dlasd0_(&nsize, &sqre, &d__[start], &e[start], &u_ref(start, start), ldu, &vt_ref(start, start), ldvt, &smlsiz, & iwork[1], &work[wstart], info); } else { dlasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[ start], &q[start + (iu + qstart - 2) * *n], n, &q[ start + (ivt + qstart - 2) * *n], &iq[start + k * *n], &q[start + (difl + qstart - 2) * *n], &q[start + ( difr + qstart - 2) * *n], &q[start + (z__ + qstart - 2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[ start + givptr * *n], &iq[start + givcol * *n], n, & iq[start + perm * *n], &q[start + (givnum + qstart - 2) * *n], &q[start + (ic + qstart - 2) * *n], &q[ start + (is + qstart - 2) * *n], &work[wstart], & iwork[1], info); if (*info != 0) { return 0; } } start = i__ + 1; } /* L30: */ } /* Unscale */ dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, &ierr); L40: /* Use Selection Sort to minimize swaps of singular vectors */ i__1 = *n; for (ii = 2; ii <= i__1; ++ii) { i__ = ii - 1; kk = i__; p = d__[i__]; i__2 = *n; for (j = ii; j <= i__2; ++j) { if (d__[j] > p) { kk = j; p = d__[j]; } /* L50: */ } if (kk != i__) { d__[kk] = d__[i__]; d__[i__] = p; if (icompq == 1) { iq[i__] = kk; } else if (icompq == 2) { dswap_(n, &u_ref(1, i__), &c__1, &u_ref(1, kk), &c__1); dswap_(n, &vt_ref(i__, 1), ldvt, &vt_ref(kk, 1), ldvt); } } else if (icompq == 1) { iq[i__] = i__; } /* L60: */ } /* If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */ if (icompq == 1) { if (iuplo == 1) { iq[*n] = 1; } else { iq[*n] = 0; } } /* If B is lower bidiagonal, update U by those Givens rotations which rotated B to be upper bidiagonal */ if (iuplo == 2 && icompq == 2) { dlasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu); } return 0; /* End of DBDSDC */ } /* dbdsdc_ */
/* Subroutine */ int cunt03_(char *rc, integer *mu, integer *mv, integer *n, integer *k, complex *u, integer *ldu, complex *v, integer *ldv, complex *work, integer *lwork, real *rwork, real *result, integer * info) { /* System generated locals */ integer u_dim1, u_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4; real r__1, r__2; complex q__1, q__2; /* Builtin functions */ double c_abs(complex *); void c_div(complex *, complex *, complex *); /* Local variables */ static integer i__, j; static complex s; extern logical lsame_(char *, char *); extern /* Subroutine */ int cunt01_(char *, integer *, integer *, complex *, integer *, complex *, integer *, real *, real *); extern integer icamax_(integer *, complex *, integer *); extern doublereal slamch_(char *); static complex su, sv; extern /* Subroutine */ int xerbla_(char *, integer *); static integer irc, lmx; static real ulp, res1, res2; #define u_subscr(a_1,a_2) (a_2)*u_dim1 + a_1 #define u_ref(a_1,a_2) u[u_subscr(a_1,a_2)] #define v_subscr(a_1,a_2) (a_2)*v_dim1 + a_1 #define v_ref(a_1,a_2) v[v_subscr(a_1,a_2)] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= CUNT03 compares two unitary matrices U and V to see if their corresponding rows or columns span the same spaces. The rows are checked if RC = 'R', and the columns are checked if RC = 'C'. RESULT is the maximum of | V*V' - I | / ( MV ulp ), if RC = 'R', or | V'*V - I | / ( MV ulp ), if RC = 'C', and the maximum over rows (or columns) 1 to K of | U(i) - S*V(i) |/ ( N ulp ) where abs(S) = 1 (chosen to minimize the expression), U(i) is the i-th row (column) of U, and V(i) is the i-th row (column) of V. Arguments ========== RC (input) CHARACTER*1 If RC = 'R' the rows of U and V are to be compared. If RC = 'C' the columns of U and V are to be compared. MU (input) INTEGER The number of rows of U if RC = 'R', and the number of columns if RC = 'C'. If MU = 0 CUNT03 does nothing. MU must be at least zero. MV (input) INTEGER The number of rows of V if RC = 'R', and the number of columns if RC = 'C'. If MV = 0 CUNT03 does nothing. MV must be at least zero. N (input) INTEGER If RC = 'R', the number of columns in the matrices U and V, and if RC = 'C', the number of rows in U and V. If N = 0 CUNT03 does nothing. N must be at least zero. K (input) INTEGER The number of rows or columns of U and V to compare. 0 <= K <= max(MU,MV). U (input) COMPLEX array, dimension (LDU,N) The first matrix to compare. If RC = 'R', U is MU by N, and if RC = 'C', U is N by MU. LDU (input) INTEGER The leading dimension of U. If RC = 'R', LDU >= max(1,MU), and if RC = 'C', LDU >= max(1,N). V (input) COMPLEX array, dimension (LDV,N) The second matrix to compare. If RC = 'R', V is MV by N, and if RC = 'C', V is N by MV. LDV (input) INTEGER The leading dimension of V. If RC = 'R', LDV >= max(1,MV), and if RC = 'C', LDV >= max(1,N). WORK (workspace) COMPLEX array, dimension (LWORK) LWORK (input) INTEGER The length of the array WORK. For best performance, LWORK should be at least N*N if RC = 'C' or M*M if RC = 'R', but the tests will be done even if LWORK is 0. RWORK (workspace) REAL array, dimension (max(MV,N)) RESULT (output) REAL The value computed by the test described above. RESULT is limited to 1/ulp to avoid overflow. INFO (output) INTEGER 0 indicates a successful exit -k indicates the k-th parameter had an illegal value ===================================================================== Check inputs Parameter adjustments */ u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; v_dim1 = *ldv; v_offset = 1 + v_dim1 * 1; v -= v_offset; --work; --rwork; /* Function Body */ *info = 0; if (lsame_(rc, "R")) { irc = 0; } else if (lsame_(rc, "C")) { irc = 1; } else { irc = -1; } if (irc == -1) { *info = -1; } else if (*mu < 0) { *info = -2; } else if (*mv < 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*k < 0 || *k > max(*mu,*mv)) { *info = -5; } else if (irc == 0 && *ldu < max(1,*mu) || irc == 1 && *ldu < max(1,*n)) { *info = -7; } else if (irc == 0 && *ldv < max(1,*mv) || irc == 1 && *ldv < max(1,*n)) { *info = -9; } if (*info != 0) { i__1 = -(*info); xerbla_("CUNT03", &i__1); return 0; } /* Initialize result */ *result = 0.f; if (*mu == 0 || *mv == 0 || *n == 0) { return 0; } /* Machine constants */ ulp = slamch_("Precision"); if (irc == 0) { /* Compare rows */ res1 = 0.f; i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { lmx = icamax_(n, &u_ref(i__, 1), ldu); i__2 = v_subscr(i__, lmx); if (v[i__2].r == 0.f && v[i__2].i == 0.f) { sv.r = 1.f, sv.i = 0.f; } else { r__1 = c_abs(&v_ref(i__, lmx)); q__2.r = r__1, q__2.i = 0.f; c_div(&q__1, &q__2, &v_ref(i__, lmx)); sv.r = q__1.r, sv.i = q__1.i; } i__2 = u_subscr(i__, lmx); if (u[i__2].r == 0.f && u[i__2].i == 0.f) { su.r = 1.f, su.i = 0.f; } else { r__1 = c_abs(&u_ref(i__, lmx)); q__2.r = r__1, q__2.i = 0.f; c_div(&q__1, &q__2, &u_ref(i__, lmx)); su.r = q__1.r, su.i = q__1.i; } c_div(&q__1, &sv, &su); s.r = q__1.r, s.i = q__1.i; i__2 = *n; for (j = 1; j <= i__2; ++j) { /* Computing MAX */ i__3 = u_subscr(i__, j); i__4 = v_subscr(i__, j); q__2.r = s.r * v[i__4].r - s.i * v[i__4].i, q__2.i = s.r * v[ i__4].i + s.i * v[i__4].r; q__1.r = u[i__3].r - q__2.r, q__1.i = u[i__3].i - q__2.i; r__1 = res1, r__2 = c_abs(&q__1); res1 = dmax(r__1,r__2); /* L10: */ } /* L20: */ } res1 /= (real) (*n) * ulp; /* Compute orthogonality of rows of V. */ cunt01_("Rows", mv, n, &v[v_offset], ldv, &work[1], lwork, &rwork[1], &res2); } else { /* Compare columns */ res1 = 0.f; i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { lmx = icamax_(n, &u_ref(1, i__), &c__1); i__2 = v_subscr(lmx, i__); if (v[i__2].r == 0.f && v[i__2].i == 0.f) { sv.r = 1.f, sv.i = 0.f; } else { r__1 = c_abs(&v_ref(lmx, i__)); q__2.r = r__1, q__2.i = 0.f; c_div(&q__1, &q__2, &v_ref(lmx, i__)); sv.r = q__1.r, sv.i = q__1.i; } i__2 = u_subscr(lmx, i__); if (u[i__2].r == 0.f && u[i__2].i == 0.f) { su.r = 1.f, su.i = 0.f; } else { r__1 = c_abs(&u_ref(lmx, i__)); q__2.r = r__1, q__2.i = 0.f; c_div(&q__1, &q__2, &u_ref(lmx, i__)); su.r = q__1.r, su.i = q__1.i; } c_div(&q__1, &sv, &su); s.r = q__1.r, s.i = q__1.i; i__2 = *n; for (j = 1; j <= i__2; ++j) { /* Computing MAX */ i__3 = u_subscr(j, i__); i__4 = v_subscr(j, i__); q__2.r = s.r * v[i__4].r - s.i * v[i__4].i, q__2.i = s.r * v[ i__4].i + s.i * v[i__4].r; q__1.r = u[i__3].r - q__2.r, q__1.i = u[i__3].i - q__2.i; r__1 = res1, r__2 = c_abs(&q__1); res1 = dmax(r__1,r__2); /* L30: */ } /* L40: */ } res1 /= (real) (*n) * ulp; /* Compute orthogonality of columns of V. */ cunt01_("Columns", n, mv, &v[v_offset], ldv, &work[1], lwork, &rwork[ 1], &res2); } /* Computing MIN */ r__1 = dmax(res1,res2), r__2 = 1.f / ulp; *result = dmin(r__1,r__2); return 0; /* End of CUNT03 */ } /* cunt03_ */
/* Subroutine */ int dlalsa_(integer *icompq, integer *smlsiz, integer *n, integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer * ldbx, doublereal *u, integer *ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr, doublereal *z__, doublereal * poles, integer *givptr, integer *givcol, integer *ldgcol, integer * perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal * work, integer *iwork, integer *info) { /* System generated locals */ integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, b_dim1, b_offset, bx_dim1, bx_offset, difl_dim1, difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1, i__2; /* Builtin functions */ integer pow_ii(integer *, integer *); /* Local variables */ static integer nlvl, sqre, i__, j; extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); static integer inode, ndiml, ndimr; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); static integer i1; extern /* Subroutine */ int dlals0_(integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *); extern doublereal dopbl3_(char *, integer *, integer *, integer *) ; static integer ic, lf, nd, ll, nl, nr; extern /* Subroutine */ int dlasdt_(integer *, integer *, integer *, integer *, integer *, integer *, integer *), xerbla_(char *, integer *); static integer im1, nlf, nrf, lvl, ndb1, nlp1, lvl2, nrp1; #define difl_ref(a_1,a_2) difl[(a_2)*difl_dim1 + a_1] #define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1] #define perm_ref(a_1,a_2) perm[(a_2)*perm_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] #define poles_ref(a_1,a_2) poles[(a_2)*poles_dim1 + a_1] #define bx_ref(a_1,a_2) bx[(a_2)*bx_dim1 + a_1] #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1] #define givcol_ref(a_1,a_2) givcol[(a_2)*givcol_dim1 + a_1] #define givnum_ref(a_1,a_2) givnum[(a_2)*givnum_dim1 + a_1] /* -- LAPACK routine (instrumented to count ops, version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= DLALSA is an itermediate step in solving the least squares problem by computing the SVD of the coefficient matrix in compact form (The singular vectors are computed as products of simple orthorgonal matrices.). If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector matrix of an upper bidiagonal matrix to the right hand side; and if ICOMPQ = 1, DLALSA applies the right singular vector matrix to the right hand side. The singular vector matrices were generated in compact form by DLALSA. Arguments ========= ICOMPQ (input) INTEGER Specifies whether the left or the right singular vector matrix is involved. = 0: Left singular vector matrix = 1: Right singular vector matrix SMLSIZ (input) INTEGER The maximum size of the subproblems at the bottom of the computation tree. N (input) INTEGER The row and column dimensions of the upper bidiagonal matrix. NRHS (input) INTEGER The number of columns of B and BX. NRHS must be at least 1. B (input) DOUBLE PRECISION array, dimension ( LDB, NRHS ) On input, B contains the right hand sides of the least squares problem in rows 1 through M. On output, B contains the solution X in rows 1 through N. LDB (input) INTEGER The leading dimension of B in the calling subprogram. LDB must be at least max(1,MAX( M, N ) ). BX (output) DOUBLE PRECISION array, dimension ( LDBX, NRHS ) On exit, the result of applying the left or right singular vector matrix to B. LDBX (input) INTEGER The leading dimension of BX. U (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ). On entry, U contains the left singular vector matrices of all subproblems at the bottom level. LDU (input) INTEGER, LDU = > N. The leading dimension of arrays U, VT, DIFL, DIFR, POLES, GIVNUM, and Z. VT (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ). On entry, VT' contains the right singular vector matrices of all subproblems at the bottom level. K (input) INTEGER array, dimension ( N ). DIFL (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. DIFR (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record distances between singular values on the I-th level and singular values on the (I -1)-th level, and DIFR(*, 2 * I) record the normalizing factors of the right singular vectors matrices of subproblems on I-th level. Z (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). On entry, Z(1, I) contains the components of the deflation- adjusted updating row vector for subproblems on the I-th level. POLES (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old singular values involved in the secular equations on the I-th level. GIVPTR (input) INTEGER array, dimension ( N ). On entry, GIVPTR( I ) records the number of Givens rotations performed on the I-th problem on the computation tree. GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the locations of Givens rotations performed on the I-th level on the computation tree. LDGCOL (input) INTEGER, LDGCOL = > N. The leading dimension of arrays GIVCOL and PERM. PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). On entry, PERM(*, I) records permutations done on the I-th level of the computation tree. GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- values of Givens rotations performed on the I-th level on the computation tree. C (input) DOUBLE PRECISION array, dimension ( N ). On entry, if the I-th subproblem is not square, C( I ) contains the C-value of a Givens rotation related to the right null space of the I-th subproblem. S (input) DOUBLE PRECISION array, dimension ( N ). On entry, if the I-th subproblem is not square, S( I ) contains the S-value of a Givens rotation related to the right null space of the I-th subproblem. WORK (workspace) DOUBLE PRECISION array. The dimension must be at least N. IWORK (workspace) INTEGER array. The dimension must be at least 3 * N INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. ===================================================================== Test the input parameters. Parameter adjustments */ b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; bx_dim1 = *ldbx; bx_offset = 1 + bx_dim1 * 1; bx -= bx_offset; givnum_dim1 = *ldu; givnum_offset = 1 + givnum_dim1 * 1; givnum -= givnum_offset; poles_dim1 = *ldu; poles_offset = 1 + poles_dim1 * 1; poles -= poles_offset; z_dim1 = *ldu; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; difr_dim1 = *ldu; difr_offset = 1 + difr_dim1 * 1; difr -= difr_offset; difl_dim1 = *ldu; difl_offset = 1 + difl_dim1 * 1; difl -= difl_offset; vt_dim1 = *ldu; vt_offset = 1 + vt_dim1 * 1; vt -= vt_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; --k; --givptr; perm_dim1 = *ldgcol; perm_offset = 1 + perm_dim1 * 1; perm -= perm_offset; givcol_dim1 = *ldgcol; givcol_offset = 1 + givcol_dim1 * 1; givcol -= givcol_offset; --c__; --s; --work; --iwork; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*smlsiz < 3) { *info = -2; } else if (*n < *smlsiz) { *info = -3; } else if (*nrhs < 1) { *info = -4; } else if (*ldb < *n) { *info = -6; } else if (*ldbx < *n) { *info = -8; } else if (*ldu < *n) { *info = -10; } else if (*ldgcol < *n) { *info = -19; } if (*info != 0) { i__1 = -(*info); xerbla_("DLALSA", &i__1); return 0; } /* Book-keeping and setting up the computation tree. */ inode = 1; ndiml = inode + *n; ndimr = ndiml + *n; dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], smlsiz); /* The following code applies back the left singular vector factors. For applying back the right singular vector factors, go to 50. */ if (*icompq == 1) { goto L50; } /* The nodes on the bottom level of the tree were solved by DLASDQ. The corresponding left and right singular vector matrices are in explicit form. First apply back the left singular vector matrices. */ ndb1 = (nd + 1) / 2; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { /* IC : center row of each node NL : number of rows of left subproblem NR : number of rows of right subproblem NLF: starting row of the left subproblem NRF: starting row of the right subproblem */ i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nr = iwork[ndimr + i1]; nlf = ic - nl; nrf = ic + 1; latime_1.ops += dopbl3_("DGEMM ", &nl, nrhs, &nl); latime_1.ops += dopbl3_("DGEMM ", &nr, nrhs, &nr); dgemm_("T", "N", &nl, nrhs, &nl, &c_b9, &u_ref(nlf, 1), ldu, &b_ref( nlf, 1), ldb, &c_b10, &bx_ref(nlf, 1), ldbx); dgemm_("T", "N", &nr, nrhs, &nr, &c_b9, &u_ref(nrf, 1), ldu, &b_ref( nrf, 1), ldb, &c_b10, &bx_ref(nrf, 1), ldbx); /* L10: */ } /* Next copy the rows of B that correspond to unchanged rows in the bidiagonal matrix to BX. */ i__1 = nd; for (i__ = 1; i__ <= i__1; ++i__) { ic = iwork[inode + i__ - 1]; dcopy_(nrhs, &b_ref(ic, 1), ldb, &bx_ref(ic, 1), ldbx); /* L20: */ } /* Finally go through the left singular vector matrices of all the other subproblems bottom-up on the tree. */ j = pow_ii(&c__2, &nlvl); sqre = 0; for (lvl = nlvl; lvl >= 1; --lvl) { lvl2 = (lvl << 1) - 1; /* find the first node LF and last node LL on the current level LVL */ if (lvl == 1) { lf = 1; ll = 1; } else { i__1 = lvl - 1; lf = pow_ii(&c__2, &i__1); ll = (lf << 1) - 1; } i__1 = ll; for (i__ = lf; i__ <= i__1; ++i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; nrf = ic + 1; --j; dlals0_(icompq, &nl, &nr, &sqre, nrhs, &bx_ref(nlf, 1), ldbx, & b_ref(nlf, 1), ldb, &perm_ref(nlf, lvl), &givptr[j], & givcol_ref(nlf, lvl2), ldgcol, &givnum_ref(nlf, lvl2), ldu, &poles_ref(nlf, lvl2), &difl_ref(nlf, lvl), & difr_ref(nlf, lvl2), &z___ref(nlf, lvl), &k[j], &c__[j], & s[j], &work[1], info); /* L30: */ } /* L40: */ } goto L90; /* ICOMPQ = 1: applying back the right singular vector factors. */ L50: /* First now go through the right singular vector matrices of all the tree nodes top-down. */ j = 0; i__1 = nlvl; for (lvl = 1; lvl <= i__1; ++lvl) { lvl2 = (lvl << 1) - 1; /* Find the first node LF and last node LL on the current level LVL. */ if (lvl == 1) { lf = 1; ll = 1; } else { i__2 = lvl - 1; lf = pow_ii(&c__2, &i__2); ll = (lf << 1) - 1; } i__2 = lf; for (i__ = ll; i__ >= i__2; --i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; nrf = ic + 1; if (i__ == ll) { sqre = 0; } else { sqre = 1; } ++j; dlals0_(icompq, &nl, &nr, &sqre, nrhs, &b_ref(nlf, 1), ldb, & bx_ref(nlf, 1), ldbx, &perm_ref(nlf, lvl), &givptr[j], & givcol_ref(nlf, lvl2), ldgcol, &givnum_ref(nlf, lvl2), ldu, &poles_ref(nlf, lvl2), &difl_ref(nlf, lvl), & difr_ref(nlf, lvl2), &z___ref(nlf, lvl), &k[j], &c__[j], & s[j], &work[1], info); /* L60: */ } /* L70: */ } /* The nodes on the bottom level of the tree were solved by DLASDQ. The corresponding right singular vector matrices are in explicit form. Apply them back. */ ndb1 = (nd + 1) / 2; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nr = iwork[ndimr + i1]; nlp1 = nl + 1; if (i__ == nd) { nrp1 = nr; } else { nrp1 = nr + 1; } nlf = ic - nl; nrf = ic + 1; latime_1.ops += dopbl3_("DGEMM ", &nlp1, nrhs, &nlp1); latime_1.ops += dopbl3_("DGEMM ", &nrp1, nrhs, &nrp1); dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b9, &vt_ref(nlf, 1), ldu, & b_ref(nlf, 1), ldb, &c_b10, &bx_ref(nlf, 1), ldbx); dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b9, &vt_ref(nrf, 1), ldu, & b_ref(nrf, 1), ldb, &c_b10, &bx_ref(nrf, 1), ldbx); /* L80: */ } L90: return 0; /* End of DLALSA */ } /* dlalsa_ */
/* Subroutine */ int zhpt21_(integer *itype, char *uplo, integer *n, integer * kband, doublecomplex *ap, doublereal *d__, doublereal *e, doublecomplex *u, integer *ldu, doublecomplex *vp, doublecomplex *tau, doublecomplex *work, doublereal *rwork, doublereal *result) { /* System generated locals */ integer u_dim1, u_offset, i__1, i__2, i__3, i__4, i__5, i__6; doublereal d__1, d__2; doublecomplex z__1, z__2, z__3; /* Local variables */ static doublereal unfl; static doublecomplex temp; extern /* Subroutine */ int zhpr_(char *, integer *, doublereal *, doublecomplex *, integer *, doublecomplex *), zhpr2_(char *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *); static integer j; extern logical lsame_(char *, char *); static integer iinfo; static doublereal anorm; extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *); static char cuplo[1]; static doublecomplex vsave; extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); static logical lower; static doublereal wnorm; extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), zhpmv_(char *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *), zaxpy_( integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *); extern doublereal dlamch_(char *); static integer jp, jr; extern doublereal zlange_(char *, integer *, integer *, doublecomplex *, integer *, doublereal *), zlanhp_(char *, char *, integer *, doublecomplex *, doublereal *); extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *), zlaset_(char *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, integer *); static integer jp1; extern /* Subroutine */ int zupmtr_(char *, char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *); static integer lap; static doublereal ulp; #define u_subscr(a_1,a_2) (a_2)*u_dim1 + a_1 #define u_ref(a_1,a_2) u[u_subscr(a_1,a_2)] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZHPT21 generally checks a decomposition of the form A = U S U* where * means conjugate transpose, A is hermitian, U is unitary, and S is diagonal (if KBAND=0) or (real) symmetric tridiagonal (if KBAND=1). If ITYPE=1, then U is represented as a dense matrix, otherwise the U is expressed as a product of Householder transformations, whose vectors are stored in the array "V" and whose scaling constants are in "TAU"; we shall use the letter "V" to refer to the product of Householder transformations (which should be equal to U). Specifically, if ITYPE=1, then: RESULT(1) = | A - U S U* | / ( |A| n ulp ) *and* RESULT(2) = | I - UU* | / ( n ulp ) If ITYPE=2, then: RESULT(1) = | A - V S V* | / ( |A| n ulp ) If ITYPE=3, then: RESULT(1) = | I - UV* | / ( n ulp ) Packed storage means that, for example, if UPLO='U', then the columns of the upper triangle of A are stored one after another, so that A(1,j+1) immediately follows A(j,j) in the array AP. Similarly, if UPLO='L', then the columns of the lower triangle of A are stored one after another in AP, so that A(j+1,j+1) immediately follows A(n,j) in the array AP. This means that A(i,j) is stored in: AP( i + j*(j-1)/2 ) if UPLO='U' AP( i + (2*n-j)*(j-1)/2 ) if UPLO='L' The array VP bears the same relation to the matrix V that A does to AP. For ITYPE > 1, the transformation U is expressed as a product of Householder transformations: If UPLO='U', then V = H(n-1)...H(1), where H(j) = I - tau(j) v(j) v(j)* and the first j-1 elements of v(j) are stored in V(1:j-1,j+1), (i.e., VP( j*(j+1)/2 + 1 : j*(j+1)/2 + j-1 ) ), the j-th element is 1, and the last n-j elements are 0. If UPLO='L', then V = H(1)...H(n-1), where H(j) = I - tau(j) v(j) v(j)* and the first j elements of v(j) are 0, the (j+1)-st is 1, and the (j+2)-nd through n-th elements are stored in V(j+2:n,j) (i.e., in VP( (2*n-j)*(j-1)/2 + j+2 : (2*n-j)*(j-1)/2 + n ) .) Arguments ========= ITYPE (input) INTEGER Specifies the type of tests to be performed. 1: U expressed as a dense unitary matrix: RESULT(1) = | A - U S U* | / ( |A| n ulp ) *and* RESULT(2) = | I - UU* | / ( n ulp ) 2: U expressed as a product V of Housholder transformations: RESULT(1) = | A - V S V* | / ( |A| n ulp ) 3: U expressed both as a dense unitary matrix and as a product of Housholder transformations: RESULT(1) = | I - UV* | / ( n ulp ) UPLO (input) CHARACTER If UPLO='U', the upper triangle of A and V will be used and the (strictly) lower triangle will not be referenced. If UPLO='L', the lower triangle of A and V will be used and the (strictly) upper triangle will not be referenced. N (input) INTEGER The size of the matrix. If it is zero, ZHPT21 does nothing. It must be at least zero. KBAND (input) INTEGER The bandwidth of the matrix. It may only be zero or one. If zero, then S is diagonal, and E is not referenced. If one, then S is symmetric tri-diagonal. AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) The original (unfactored) matrix. It is assumed to be hermitian, and contains the columns of just the upper triangle (UPLO='U') or only the lower triangle (UPLO='L'), packed one after another. D (input) DOUBLE PRECISION array, dimension (N) The diagonal of the (symmetric tri-) diagonal matrix. E (input) DOUBLE PRECISION array, dimension (N) The off-diagonal of the (symmetric tri-) diagonal matrix. E(1) is the (1,2) and (2,1) element, E(2) is the (2,3) and (3,2) element, etc. Not referenced if KBAND=0. U (input) COMPLEX*16 array, dimension (LDU, N) If ITYPE=1 or 3, this contains the unitary matrix in the decomposition, expressed as a dense matrix. If ITYPE=2, then it is not referenced. LDU (input) INTEGER The leading dimension of U. LDU must be at least N and at least 1. VP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) If ITYPE=2 or 3, the columns of this array contain the Householder vectors used to describe the unitary matrix in the decomposition, as described in purpose. *NOTE* If ITYPE=2 or 3, V is modified and restored. The subdiagonal (if UPLO='L') or the superdiagonal (if UPLO='U') is set to one, and later reset to its original value, during the course of the calculation. If ITYPE=1, then it is neither referenced nor modified. TAU (input) COMPLEX*16 array, dimension (N) If ITYPE >= 2, then TAU(j) is the scalar factor of v(j) v(j)* in the Householder transformation H(j) of the product U = H(1)...H(n-2) If ITYPE < 2, then TAU is not referenced. WORK (workspace) COMPLEX*16 array, dimension (N**2) Workspace. RWORK (workspace) DOUBLE PRECISION array, dimension (N) Workspace. RESULT (output) DOUBLE PRECISION array, dimension (2) The values computed by the two tests described above. The values are currently limited to 1/ulp, to avoid overflow. RESULT(1) is always modified. RESULT(2) is modified only if ITYPE=1. ===================================================================== Constants Parameter adjustments */ --ap; --d__; --e; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; --vp; --tau; --work; --rwork; --result; /* Function Body */ result[1] = 0.; if (*itype == 1) { result[2] = 0.; } if (*n <= 0) { return 0; } lap = *n * (*n + 1) / 2; if (lsame_(uplo, "U")) { lower = FALSE_; *(unsigned char *)cuplo = 'U'; } else { lower = TRUE_; *(unsigned char *)cuplo = 'L'; } unfl = dlamch_("Safe minimum"); ulp = dlamch_("Epsilon") * dlamch_("Base"); /* Some Error Checks */ if (*itype < 1 || *itype > 3) { result[1] = 10. / ulp; return 0; } /* Do Test 1 Norm of A: */ if (*itype == 3) { anorm = 1.; } else { /* Computing MAX */ d__1 = zlanhp_("1", cuplo, n, &ap[1], &rwork[1]) ; anorm = max(d__1,unfl); } /* Compute error matrix: */ if (*itype == 1) { /* ITYPE=1: error = A - U S U* */ zlaset_("Full", n, n, &c_b1, &c_b1, &work[1], n); zcopy_(&lap, &ap[1], &c__1, &work[1], &c__1); i__1 = *n; for (j = 1; j <= i__1; ++j) { d__1 = -d__[j]; zhpr_(cuplo, n, &d__1, &u_ref(1, j), &c__1, &work[1]); /* L10: */ } if (*n > 1 && *kband == 1) { i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { i__2 = j; z__2.r = e[i__2], z__2.i = 0.; z__1.r = -z__2.r, z__1.i = -z__2.i; zhpr2_(cuplo, n, &z__1, &u_ref(1, j), &c__1, &u_ref(1, j - 1), &c__1, &work[1]); /* L20: */ } } wnorm = zlanhp_("1", cuplo, n, &work[1], &rwork[1]); } else if (*itype == 2) { /* ITYPE=2: error = V S V* - A */ zlaset_("Full", n, n, &c_b1, &c_b1, &work[1], n); if (lower) { i__1 = lap; i__2 = *n; work[i__1].r = d__[i__2], work[i__1].i = 0.; for (j = *n - 1; j >= 1; --j) { jp = ((*n << 1) - j) * (j - 1) / 2; jp1 = jp + *n - j; if (*kband == 1) { i__1 = jp + j + 1; i__2 = j; z__2.r = 1. - tau[i__2].r, z__2.i = 0. - tau[i__2].i; i__3 = j; z__1.r = e[i__3] * z__2.r, z__1.i = e[i__3] * z__2.i; work[i__1].r = z__1.r, work[i__1].i = z__1.i; i__1 = *n; for (jr = j + 2; jr <= i__1; ++jr) { i__2 = jp + jr; i__3 = j; z__3.r = -tau[i__3].r, z__3.i = -tau[i__3].i; i__4 = j; z__2.r = e[i__4] * z__3.r, z__2.i = e[i__4] * z__3.i; i__5 = jp + jr; z__1.r = z__2.r * vp[i__5].r - z__2.i * vp[i__5].i, z__1.i = z__2.r * vp[i__5].i + z__2.i * vp[ i__5].r; work[i__2].r = z__1.r, work[i__2].i = z__1.i; /* L30: */ } } i__1 = j; if (tau[i__1].r != 0. || tau[i__1].i != 0.) { i__1 = jp + j + 1; vsave.r = vp[i__1].r, vsave.i = vp[i__1].i; i__1 = jp + j + 1; vp[i__1].r = 1., vp[i__1].i = 0.; i__1 = *n - j; zhpmv_("L", &i__1, &c_b2, &work[jp1 + j + 1], &vp[jp + j + 1], &c__1, &c_b1, &work[lap + 1], &c__1); i__1 = j; z__2.r = tau[i__1].r * -.5, z__2.i = tau[i__1].i * -.5; i__2 = *n - j; zdotc_(&z__3, &i__2, &work[lap + 1], &c__1, &vp[jp + j + 1], &c__1); z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i = z__2.r * z__3.i + z__2.i * z__3.r; temp.r = z__1.r, temp.i = z__1.i; i__1 = *n - j; zaxpy_(&i__1, &temp, &vp[jp + j + 1], &c__1, &work[lap + 1], &c__1); i__1 = *n - j; i__2 = j; z__1.r = -tau[i__2].r, z__1.i = -tau[i__2].i; zhpr2_("L", &i__1, &z__1, &vp[jp + j + 1], &c__1, &work[ lap + 1], &c__1, &work[jp1 + j + 1]); i__1 = jp + j + 1; vp[i__1].r = vsave.r, vp[i__1].i = vsave.i; } i__1 = jp + j; i__2 = j; work[i__1].r = d__[i__2], work[i__1].i = 0.; /* L40: */ } } else { work[1].r = d__[1], work[1].i = 0.; i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { jp = j * (j - 1) / 2; jp1 = jp + j; if (*kband == 1) { i__2 = jp1 + j; i__3 = j; z__2.r = 1. - tau[i__3].r, z__2.i = 0. - tau[i__3].i; i__4 = j; z__1.r = e[i__4] * z__2.r, z__1.i = e[i__4] * z__2.i; work[i__2].r = z__1.r, work[i__2].i = z__1.i; i__2 = j - 1; for (jr = 1; jr <= i__2; ++jr) { i__3 = jp1 + jr; i__4 = j; z__3.r = -tau[i__4].r, z__3.i = -tau[i__4].i; i__5 = j; z__2.r = e[i__5] * z__3.r, z__2.i = e[i__5] * z__3.i; i__6 = jp1 + jr; z__1.r = z__2.r * vp[i__6].r - z__2.i * vp[i__6].i, z__1.i = z__2.r * vp[i__6].i + z__2.i * vp[ i__6].r; work[i__3].r = z__1.r, work[i__3].i = z__1.i; /* L50: */ } } i__2 = j; if (tau[i__2].r != 0. || tau[i__2].i != 0.) { i__2 = jp1 + j; vsave.r = vp[i__2].r, vsave.i = vp[i__2].i; i__2 = jp1 + j; vp[i__2].r = 1., vp[i__2].i = 0.; zhpmv_("U", &j, &c_b2, &work[1], &vp[jp1 + 1], &c__1, & c_b1, &work[lap + 1], &c__1); i__2 = j; z__2.r = tau[i__2].r * -.5, z__2.i = tau[i__2].i * -.5; zdotc_(&z__3, &j, &work[lap + 1], &c__1, &vp[jp1 + 1], & c__1); z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i = z__2.r * z__3.i + z__2.i * z__3.r; temp.r = z__1.r, temp.i = z__1.i; zaxpy_(&j, &temp, &vp[jp1 + 1], &c__1, &work[lap + 1], & c__1); i__2 = j; z__1.r = -tau[i__2].r, z__1.i = -tau[i__2].i; zhpr2_("U", &j, &z__1, &vp[jp1 + 1], &c__1, &work[lap + 1] , &c__1, &work[1]); i__2 = jp1 + j; vp[i__2].r = vsave.r, vp[i__2].i = vsave.i; } i__2 = jp1 + j + 1; i__3 = j + 1; work[i__2].r = d__[i__3], work[i__2].i = 0.; /* L60: */ } } i__1 = lap; for (j = 1; j <= i__1; ++j) { i__2 = j; i__3 = j; i__4 = j; z__1.r = work[i__3].r - ap[i__4].r, z__1.i = work[i__3].i - ap[ i__4].i; work[i__2].r = z__1.r, work[i__2].i = z__1.i; /* L70: */ } wnorm = zlanhp_("1", cuplo, n, &work[1], &rwork[1]); } else if (*itype == 3) { /* ITYPE=3: error = U V* - I */ if (*n < 2) { return 0; } zlacpy_(" ", n, n, &u[u_offset], ldu, &work[1], n); /* Computing 2nd power */ i__1 = *n; zupmtr_("R", cuplo, "C", n, n, &vp[1], &tau[1], &work[1], n, &work[ i__1 * i__1 + 1], &iinfo); if (iinfo != 0) { result[1] = 10. / ulp; return 0; } i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = (*n + 1) * (j - 1) + 1; i__3 = (*n + 1) * (j - 1) + 1; z__1.r = work[i__3].r - 1., z__1.i = work[i__3].i + 0.; work[i__2].r = z__1.r, work[i__2].i = z__1.i; /* L80: */ } wnorm = zlange_("1", n, n, &work[1], n, &rwork[1]); } if (anorm > wnorm) { result[1] = wnorm / anorm / (*n * ulp); } else { if (anorm < 1.) { /* Computing MIN */ d__1 = wnorm, d__2 = *n * anorm; result[1] = min(d__1,d__2) / anorm / (*n * ulp); } else { /* Computing MIN */ d__1 = wnorm / anorm, d__2 = (doublereal) (*n); result[1] = min(d__1,d__2) / (*n * ulp); } } /* Do Test 2 Compute UU* - I */ if (*itype == 1) { zgemm_("N", "C", n, n, n, &c_b2, &u[u_offset], ldu, &u[u_offset], ldu, &c_b1, &work[1], n); i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = (*n + 1) * (j - 1) + 1; i__3 = (*n + 1) * (j - 1) + 1; z__1.r = work[i__3].r - 1., z__1.i = work[i__3].i + 0.; work[i__2].r = z__1.r, work[i__2].i = z__1.i; /* L90: */ } /* Computing MIN */ d__1 = zlange_("1", n, n, &work[1], n, &rwork[1]), d__2 = ( doublereal) (*n); result[2] = min(d__1,d__2) / (*n * ulp); } return 0; /* End of ZHPT21 */ } /* zhpt21_ */