/* Subroutine */ int clalsa_(integer *icompq, integer *smlsiz, integer *n, integer *nrhs, complex *b, integer *ldb, complex *bx, integer *ldbx, real *u, integer *ldu, real *vt, integer *k, real *difl, real *difr, real *z__, real *poles, integer *givptr, integer *givcol, integer * ldgcol, integer *perm, real *givnum, real *c__, real *s, real *rwork, integer *iwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= CLALSA is an itermediate step in solving the least squares problem by computing the SVD of the coefficient matrix in compact form (The singular vectors are computed as products of simple orthorgonal matrices.). If ICOMPQ = 0, CLALSA applies the inverse of the left singular vector matrix of an upper bidiagonal matrix to the right hand side; and if ICOMPQ = 1, CLALSA applies the right singular vector matrix to the right hand side. The singular vector matrices were generated in compact form by CLALSA. Arguments ========= ICOMPQ (input) INTEGER Specifies whether the left or the right singular vector matrix is involved. = 0: Left singular vector matrix = 1: Right singular vector matrix SMLSIZ (input) INTEGER The maximum size of the subproblems at the bottom of the computation tree. N (input) INTEGER The row and column dimensions of the upper bidiagonal matrix. NRHS (input) INTEGER The number of columns of B and BX. NRHS must be at least 1. B (input) COMPLEX array, dimension ( LDB, NRHS ) On input, B contains the right hand sides of the least squares problem in rows 1 through M. On output, B contains the solution X in rows 1 through N. LDB (input) INTEGER The leading dimension of B in the calling subprogram. LDB must be at least max(1,MAX( M, N ) ). BX (output) COMPLEX array, dimension ( LDBX, NRHS ) On exit, the result of applying the left or right singular vector matrix to B. LDBX (input) INTEGER The leading dimension of BX. U (input) REAL array, dimension ( LDU, SMLSIZ ). On entry, U contains the left singular vector matrices of all subproblems at the bottom level. LDU (input) INTEGER, LDU = > N. The leading dimension of arrays U, VT, DIFL, DIFR, POLES, GIVNUM, and Z. VT (input) REAL array, dimension ( LDU, SMLSIZ+1 ). On entry, VT' contains the right singular vector matrices of all subproblems at the bottom level. K (input) INTEGER array, dimension ( N ). DIFL (input) REAL array, dimension ( LDU, NLVL ). where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. DIFR (input) REAL array, dimension ( LDU, 2 * NLVL ). On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record distances between singular values on the I-th level and singular values on the (I -1)-th level, and DIFR(*, 2 * I) record the normalizing factors of the right singular vectors matrices of subproblems on I-th level. Z (input) REAL array, dimension ( LDU, NLVL ). On entry, Z(1, I) contains the components of the deflation- adjusted updating row vector for subproblems on the I-th level. POLES (input) REAL array, dimension ( LDU, 2 * NLVL ). On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old singular values involved in the secular equations on the I-th level. GIVPTR (input) INTEGER array, dimension ( N ). On entry, GIVPTR( I ) records the number of Givens rotations performed on the I-th problem on the computation tree. GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the locations of Givens rotations performed on the I-th level on the computation tree. LDGCOL (input) INTEGER, LDGCOL = > N. The leading dimension of arrays GIVCOL and PERM. PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). On entry, PERM(*, I) records permutations done on the I-th level of the computation tree. GIVNUM (input) REAL array, dimension ( LDU, 2 * NLVL ). On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- values of Givens rotations performed on the I-th level on the computation tree. C (input) REAL array, dimension ( N ). On entry, if the I-th subproblem is not square, C( I ) contains the C-value of a Givens rotation related to the right null space of the I-th subproblem. S (input) REAL array, dimension ( N ). On entry, if the I-th subproblem is not square, S( I ) contains the S-value of a Givens rotation related to the right null space of the I-th subproblem. RWORK (workspace) REAL array, dimension at least max ( N, (SMLSZ+1)*NRHS*3 ). IWORK (workspace) INTEGER array. The dimension must be at least 3 * N INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. Further Details =============== Based on contributions by Ming Gu and Ren-Cang Li, Computer Science Division, University of California at Berkeley, USA Osni Marques, LBNL/NERSC, USA ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static real c_b9 = 1.f; static real c_b10 = 0.f; static integer c__2 = 2; /* System generated locals */ integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, b_dim1, b_offset, bx_dim1, bx_offset, i__1, i__2, i__3, i__4, i__5, i__6; complex q__1; /* Builtin functions */ double r_imag(complex *); integer pow_ii(integer *, integer *); /* Local variables */ static integer jcol, nlvl, sqre, jrow, i__, j, jimag, jreal, inode, ndiml; extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static integer ndimr; extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, complex *, integer *); static integer i1; extern /* Subroutine */ int clals0_(integer *, integer *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, integer *, integer *, real *, integer *, real *, real *, real *, real *, integer *, real *, real *, real *, integer *); static integer ic, lf, nd, ll, nl, nr; extern /* Subroutine */ int xerbla_(char *, integer *), slasdt_( integer *, integer *, integer *, integer *, integer *, integer *, integer *); static integer im1, nlf, nrf, lvl, ndb1, nlp1, lvl2, nrp1; #define difl_ref(a_1,a_2) difl[(a_2)*difl_dim1 + a_1] #define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1] #define perm_ref(a_1,a_2) perm[(a_2)*perm_dim1 + a_1] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] #define poles_ref(a_1,a_2) poles[(a_2)*poles_dim1 + a_1] #define bx_subscr(a_1,a_2) (a_2)*bx_dim1 + a_1 #define bx_ref(a_1,a_2) bx[bx_subscr(a_1,a_2)] #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1] #define givcol_ref(a_1,a_2) givcol[(a_2)*givcol_dim1 + a_1] #define givnum_ref(a_1,a_2) givnum[(a_2)*givnum_dim1 + a_1] b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; bx_dim1 = *ldbx; bx_offset = 1 + bx_dim1 * 1; bx -= bx_offset; givnum_dim1 = *ldu; givnum_offset = 1 + givnum_dim1 * 1; givnum -= givnum_offset; poles_dim1 = *ldu; poles_offset = 1 + poles_dim1 * 1; poles -= poles_offset; z_dim1 = *ldu; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; difr_dim1 = *ldu; difr_offset = 1 + difr_dim1 * 1; difr -= difr_offset; difl_dim1 = *ldu; difl_offset = 1 + difl_dim1 * 1; difl -= difl_offset; vt_dim1 = *ldu; vt_offset = 1 + vt_dim1 * 1; vt -= vt_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; --k; --givptr; perm_dim1 = *ldgcol; perm_offset = 1 + perm_dim1 * 1; perm -= perm_offset; givcol_dim1 = *ldgcol; givcol_offset = 1 + givcol_dim1 * 1; givcol -= givcol_offset; --c__; --s; --rwork; --iwork; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*smlsiz < 3) { *info = -2; } else if (*n < *smlsiz) { *info = -3; } else if (*nrhs < 1) { *info = -4; } else if (*ldb < *n) { *info = -6; } else if (*ldbx < *n) { *info = -8; } else if (*ldu < *n) { *info = -10; } else if (*ldgcol < *n) { *info = -19; } if (*info != 0) { i__1 = -(*info); xerbla_("CLALSA", &i__1); return 0; } /* Book-keeping and setting up the computation tree. */ inode = 1; ndiml = inode + *n; ndimr = ndiml + *n; slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], smlsiz); /* The following code applies back the left singular vector factors. For applying back the right singular vector factors, go to 170. */ if (*icompq == 1) { goto L170; } /* The nodes on the bottom level of the tree were solved by SLASDQ. The corresponding left and right singular vector matrices are in explicit form. First apply back the left singular vector matrices. */ ndb1 = (nd + 1) / 2; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { /* IC : center row of each node NL : number of rows of left subproblem NR : number of rows of right subproblem NLF: starting row of the left subproblem NRF: starting row of the right subproblem */ i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nr = iwork[ndimr + i1]; nlf = ic - nl; nrf = ic + 1; /* Since B and BX are complex, the following call to SGEMM is performed in two steps (real and imaginary parts). CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) */ j = nl * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nl - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++j; i__4 = b_subscr(jrow, jcol); rwork[j] = b[i__4].r; /* L10: */ } /* L20: */ } sgemm_("T", "N", &nl, nrhs, &nl, &c_b9, &u_ref(nlf, 1), ldu, &rwork[( nl * *nrhs << 1) + 1], &nl, &c_b10, &rwork[1], &nl); j = nl * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nl - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++j; rwork[j] = r_imag(&b_ref(jrow, jcol)); /* L30: */ } /* L40: */ } sgemm_("T", "N", &nl, nrhs, &nl, &c_b9, &u_ref(nlf, 1), ldu, &rwork[( nl * *nrhs << 1) + 1], &nl, &c_b10, &rwork[nl * *nrhs + 1], & nl); jreal = 0; jimag = nl * *nrhs; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nl - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++jreal; ++jimag; i__4 = bx_subscr(jrow, jcol); i__5 = jreal; i__6 = jimag; q__1.r = rwork[i__5], q__1.i = rwork[i__6]; bx[i__4].r = q__1.r, bx[i__4].i = q__1.i; /* L50: */ } /* L60: */ } /* Since B and BX are complex, the following call to SGEMM is performed in two steps (real and imaginary parts). CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) */ j = nr * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nr - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++j; i__4 = b_subscr(jrow, jcol); rwork[j] = b[i__4].r; /* L70: */ } /* L80: */ } sgemm_("T", "N", &nr, nrhs, &nr, &c_b9, &u_ref(nrf, 1), ldu, &rwork[( nr * *nrhs << 1) + 1], &nr, &c_b10, &rwork[1], &nr); j = nr * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nr - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++j; rwork[j] = r_imag(&b_ref(jrow, jcol)); /* L90: */ } /* L100: */ } sgemm_("T", "N", &nr, nrhs, &nr, &c_b9, &u_ref(nrf, 1), ldu, &rwork[( nr * *nrhs << 1) + 1], &nr, &c_b10, &rwork[nr * *nrhs + 1], & nr); jreal = 0; jimag = nr * *nrhs; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nr - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++jreal; ++jimag; i__4 = bx_subscr(jrow, jcol); i__5 = jreal; i__6 = jimag; q__1.r = rwork[i__5], q__1.i = rwork[i__6]; bx[i__4].r = q__1.r, bx[i__4].i = q__1.i; /* L110: */ } /* L120: */ } /* L130: */ } /* Next copy the rows of B that correspond to unchanged rows in the bidiagonal matrix to BX. */ i__1 = nd; for (i__ = 1; i__ <= i__1; ++i__) { ic = iwork[inode + i__ - 1]; ccopy_(nrhs, &b_ref(ic, 1), ldb, &bx_ref(ic, 1), ldbx); /* L140: */ } /* Finally go through the left singular vector matrices of all the other subproblems bottom-up on the tree. */ j = pow_ii(&c__2, &nlvl); sqre = 0; for (lvl = nlvl; lvl >= 1; --lvl) { lvl2 = (lvl << 1) - 1; /* find the first node LF and last node LL on the current level LVL */ if (lvl == 1) { lf = 1; ll = 1; } else { i__1 = lvl - 1; lf = pow_ii(&c__2, &i__1); ll = (lf << 1) - 1; } i__1 = ll; for (i__ = lf; i__ <= i__1; ++i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; nrf = ic + 1; --j; clals0_(icompq, &nl, &nr, &sqre, nrhs, &bx_ref(nlf, 1), ldbx, & b_ref(nlf, 1), ldb, &perm_ref(nlf, lvl), &givptr[j], & givcol_ref(nlf, lvl2), ldgcol, &givnum_ref(nlf, lvl2), ldu, &poles_ref(nlf, lvl2), &difl_ref(nlf, lvl), & difr_ref(nlf, lvl2), &z___ref(nlf, lvl), &k[j], &c__[j], & s[j], &rwork[1], info); /* L150: */ } /* L160: */ } goto L330; /* ICOMPQ = 1: applying back the right singular vector factors. */ L170: /* First now go through the right singular vector matrices of all the tree nodes top-down. */ j = 0; i__1 = nlvl; for (lvl = 1; lvl <= i__1; ++lvl) { lvl2 = (lvl << 1) - 1; /* Find the first node LF and last node LL on the current level LVL. */ if (lvl == 1) { lf = 1; ll = 1; } else { i__2 = lvl - 1; lf = pow_ii(&c__2, &i__2); ll = (lf << 1) - 1; } i__2 = lf; for (i__ = ll; i__ >= i__2; --i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; nrf = ic + 1; if (i__ == ll) { sqre = 0; } else { sqre = 1; } ++j; clals0_(icompq, &nl, &nr, &sqre, nrhs, &b_ref(nlf, 1), ldb, & bx_ref(nlf, 1), ldbx, &perm_ref(nlf, lvl), &givptr[j], & givcol_ref(nlf, lvl2), ldgcol, &givnum_ref(nlf, lvl2), ldu, &poles_ref(nlf, lvl2), &difl_ref(nlf, lvl), & difr_ref(nlf, lvl2), &z___ref(nlf, lvl), &k[j], &c__[j], & s[j], &rwork[1], info); /* L180: */ } /* L190: */ } /* The nodes on the bottom level of the tree were solved by SLASDQ. The corresponding right singular vector matrices are in explicit form. Apply them back. */ ndb1 = (nd + 1) / 2; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nr = iwork[ndimr + i1]; nlp1 = nl + 1; if (i__ == nd) { nrp1 = nr; } else { nrp1 = nr + 1; } nlf = ic - nl; nrf = ic + 1; /* Since B and BX are complex, the following call to SGEMM is performed in two steps (real and imaginary parts). CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) */ j = nlp1 * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nlp1 - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++j; i__4 = b_subscr(jrow, jcol); rwork[j] = b[i__4].r; /* L200: */ } /* L210: */ } sgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b9, &vt_ref(nlf, 1), ldu, & rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b10, &rwork[1], & nlp1); j = nlp1 * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nlp1 - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++j; rwork[j] = r_imag(&b_ref(jrow, jcol)); /* L220: */ } /* L230: */ } sgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b9, &vt_ref(nlf, 1), ldu, & rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b10, &rwork[nlp1 * * nrhs + 1], &nlp1); jreal = 0; jimag = nlp1 * *nrhs; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nlp1 - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++jreal; ++jimag; i__4 = bx_subscr(jrow, jcol); i__5 = jreal; i__6 = jimag; q__1.r = rwork[i__5], q__1.i = rwork[i__6]; bx[i__4].r = q__1.r, bx[i__4].i = q__1.i; /* L240: */ } /* L250: */ } /* Since B and BX are complex, the following call to SGEMM is performed in two steps (real and imaginary parts). CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) */ j = nrp1 * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nrp1 - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++j; i__4 = b_subscr(jrow, jcol); rwork[j] = b[i__4].r; /* L260: */ } /* L270: */ } sgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b9, &vt_ref(nrf, 1), ldu, & rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b10, &rwork[1], & nrp1); j = nrp1 * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nrp1 - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++j; rwork[j] = r_imag(&b_ref(jrow, jcol)); /* L280: */ } /* L290: */ } sgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b9, &vt_ref(nrf, 1), ldu, & rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b10, &rwork[nrp1 * * nrhs + 1], &nrp1); jreal = 0; jimag = nrp1 * *nrhs; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nrp1 - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++jreal; ++jimag; i__4 = bx_subscr(jrow, jcol); i__5 = jreal; i__6 = jimag; q__1.r = rwork[i__5], q__1.i = rwork[i__6]; bx[i__4].r = q__1.r, bx[i__4].i = q__1.i; /* L300: */ } /* L310: */ } /* L320: */ } L330: return 0; /* End of CLALSA */ } /* clalsa_ */
/* Subroutine */ int dlasda_(integer *icompq, integer *smlsiz, integer *n, integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr, doublereal *z__, doublereal *poles, integer *givptr, integer *givcol, integer *ldgcol, integer *perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal *work, integer *iwork, integer *info) { /* System generated locals */ integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1, i__2; /* Builtin functions */ integer pow_ii(integer *, integer *); /* Local variables */ static doublereal beta; static integer idxq, nlvl, i__, j, m; static doublereal alpha; static integer inode, ndiml, ndimr, idxqi, itemp; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); static integer sqrei, i1; extern /* Subroutine */ int dlasd6_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *); static integer ic, nwork1, lf, nd, nwork2, ll, nl, vf, nr, vl; extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), dlasdt_(integer *, integer *, integer *, integer *, integer *, integer *, integer *), dlaset_( char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); static integer im1, smlszp, ncc, nlf, nrf, vfi, iwk, vli, lvl, nru, ndb1, nlp1, lvl2, nrp1; #define difl_ref(a_1,a_2) difl[(a_2)*difl_dim1 + a_1] #define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1] #define perm_ref(a_1,a_2) perm[(a_2)*perm_dim1 + a_1] #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] #define poles_ref(a_1,a_2) poles[(a_2)*poles_dim1 + a_1] #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1] #define givcol_ref(a_1,a_2) givcol[(a_2)*givcol_dim1 + a_1] #define givnum_ref(a_1,a_2) givnum[(a_2)*givnum_dim1 + a_1] /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1999 Purpose ======= Using a divide and conquer approach, DLASDA computes the singular value decomposition (SVD) of a real upper bidiagonal N-by-M matrix B with diagonal D and offdiagonal E, where M = N + SQRE. The algorithm computes the singular values in the SVD B = U * S * VT. The orthogonal matrices U and VT are optionally computed in compact form. A related subroutine, DLASD0, computes the singular values and the singular vectors in explicit form. Arguments ========= ICOMPQ (input) INTEGER Specifies whether singular vectors are to be computed in compact form, as follows = 0: Compute singular values only. = 1: Compute singular vectors of upper bidiagonal matrix in compact form. SMLSIZ (input) INTEGER The maximum size of the subproblems at the bottom of the computation tree. N (input) INTEGER The row dimension of the upper bidiagonal matrix. This is also the dimension of the main diagonal array D. SQRE (input) INTEGER Specifies the column dimension of the bidiagonal matrix. = 0: The bidiagonal matrix has column dimension M = N; = 1: The bidiagonal matrix has column dimension M = N + 1. D (input/output) DOUBLE PRECISION array, dimension ( N ) On entry D contains the main diagonal of the bidiagonal matrix. On exit D, if INFO = 0, contains its singular values. E (input) DOUBLE PRECISION array, dimension ( M-1 ) Contains the subdiagonal entries of the bidiagonal matrix. On exit, E has been destroyed. U (output) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left singular vector matrices of all subproblems at the bottom level. LDU (input) INTEGER, LDU = > N. The leading dimension of arrays U, VT, DIFL, DIFR, POLES, GIVNUM, and Z. VT (output) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right singular vector matrices of all subproblems at the bottom level. K (output) INTEGER array, dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th secular equation on the computation tree. DIFL (output) DOUBLE PRECISION array, dimension ( LDU, NLVL ), where NLVL = floor(log_2 (N/SMLSIZ))). DIFR (output) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and dimension ( N ) if ICOMPQ = 0. If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1) record distances between singular values on the I-th level and singular values on the (I -1)-th level, and DIFR(1:N, 2 * I ) contains the normalizing factors for the right singular vector matrix. See DLASD8 for details. Z (output) DOUBLE PRECISION array, dimension ( LDU, NLVL ) if ICOMPQ = 1 and dimension ( N ) if ICOMPQ = 0. The first K elements of Z(1, I) contain the components of the deflation-adjusted updating row vector for subproblems on the I-th level. POLES (output) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and POLES(1, 2*I) contain the new and old singular values involved in the secular equations on the I-th level. GIVPTR (output) INTEGER array, dimension ( N ) if ICOMPQ = 1, and not referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records the number of Givens rotations performed on the I-th problem on the computation tree. GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations of Givens rotations performed on the I-th level on the computation tree. LDGCOL (input) INTEGER, LDGCOL = > N. The leading dimension of arrays GIVCOL and PERM. PERM (output) INTEGER array, dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records permutations done on the I-th level of the computation tree. GIVNUM (output) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S- values of Givens rotations performed on the I-th level on the computation tree. C (output) DOUBLE PRECISION array, dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1 and the I-th subproblem is not square, on exit, C( I ) contains the C-value of a Givens rotation related to the right null space of the I-th subproblem. S (output) DOUBLE PRECISION array, dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1 and the I-th subproblem is not square, on exit, S( I ) contains the S-value of a Givens rotation related to the right null space of the I-th subproblem. WORK (workspace) DOUBLE PRECISION array, dimension (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)). IWORK (workspace) INTEGER array. Dimension must be at least (7 * N). INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = 1, an singular value did not converge Further Details =============== Based on contributions by Ming Gu and Huan Ren, Computer Science Division, University of California at Berkeley, USA ===================================================================== Test the input parameters. Parameter adjustments */ --d__; --e; givnum_dim1 = *ldu; givnum_offset = 1 + givnum_dim1 * 1; givnum -= givnum_offset; poles_dim1 = *ldu; poles_offset = 1 + poles_dim1 * 1; poles -= poles_offset; z_dim1 = *ldu; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; difr_dim1 = *ldu; difr_offset = 1 + difr_dim1 * 1; difr -= difr_offset; difl_dim1 = *ldu; difl_offset = 1 + difl_dim1 * 1; difl -= difl_offset; vt_dim1 = *ldu; vt_offset = 1 + vt_dim1 * 1; vt -= vt_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; --k; --givptr; perm_dim1 = *ldgcol; perm_offset = 1 + perm_dim1 * 1; perm -= perm_offset; givcol_dim1 = *ldgcol; givcol_offset = 1 + givcol_dim1 * 1; givcol -= givcol_offset; --c__; --s; --work; --iwork; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*smlsiz < 3) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*sqre < 0 || *sqre > 1) { *info = -4; } else if (*ldu < *n + *sqre) { *info = -8; } else if (*ldgcol < *n) { *info = -17; } if (*info != 0) { i__1 = -(*info); xerbla_("DLASDA", &i__1); return 0; } m = *n + *sqre; /* If the input matrix is too small, call DLASDQ to find the SVD. */ if (*n <= *smlsiz) { if (*icompq == 0) { dlasdq_("U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ vt_offset], ldu, &u[u_offset], ldu, &u[u_offset], ldu, & work[1], info); } else { dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset] , ldu, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info); } return 0; } /* Book-keeping and set up the computation tree. */ inode = 1; ndiml = inode + *n; ndimr = ndiml + *n; idxq = ndimr + *n; iwk = idxq + *n; ncc = 0; nru = 0; smlszp = *smlsiz + 1; vf = 1; vl = vf + m; nwork1 = vl + m; nwork2 = nwork1 + smlszp * smlszp; dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], smlsiz); /* for the nodes on bottom level of the tree, solve their subproblems by DLASDQ. */ ndb1 = (nd + 1) / 2; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { /* IC : center row of each node NL : number of rows of left subproblem NR : number of rows of right subproblem NLF: starting row of the left subproblem NRF: starting row of the right subproblem */ i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nlp1 = nl + 1; nr = iwork[ndimr + i1]; nlf = ic - nl; nrf = ic + 1; idxqi = idxq + nlf - 2; vfi = vf + nlf - 1; vli = vl + nlf - 1; sqrei = 1; if (*icompq == 0) { dlaset_("A", &nlp1, &nlp1, &c_b11, &c_b12, &work[nwork1], &smlszp); dlasdq_("U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], & work[nwork1], &smlszp, &work[nwork2], &nl, &work[nwork2], &nl, &work[nwork2], info); itemp = nwork1 + nl * smlszp; dcopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1); dcopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1); } else { dlaset_("A", &nl, &nl, &c_b11, &c_b12, &u_ref(nlf, 1), ldu); dlaset_("A", &nlp1, &nlp1, &c_b11, &c_b12, &vt_ref(nlf, 1), ldu); dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], & vt_ref(nlf, 1), ldu, &u_ref(nlf, 1), ldu, &u_ref(nlf, 1), ldu, &work[nwork1], info); dcopy_(&nlp1, &vt_ref(nlf, 1), &c__1, &work[vfi], &c__1); dcopy_(&nlp1, &vt_ref(nlf, nlp1), &c__1, &work[vli], &c__1); } if (*info != 0) { return 0; } i__2 = nl; for (j = 1; j <= i__2; ++j) { iwork[idxqi + j] = j; /* L10: */ } if (i__ == nd && *sqre == 0) { sqrei = 0; } else { sqrei = 1; } idxqi += nlp1; vfi += nlp1; vli += nlp1; nrp1 = nr + sqrei; if (*icompq == 0) { dlaset_("A", &nrp1, &nrp1, &c_b11, &c_b12, &work[nwork1], &smlszp); dlasdq_("U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], & work[nwork1], &smlszp, &work[nwork2], &nr, &work[nwork2], &nr, &work[nwork2], info); itemp = nwork1 + (nrp1 - 1) * smlszp; dcopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1); dcopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1); } else { dlaset_("A", &nr, &nr, &c_b11, &c_b12, &u_ref(nrf, 1), ldu); dlaset_("A", &nrp1, &nrp1, &c_b11, &c_b12, &vt_ref(nrf, 1), ldu); dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], & vt_ref(nrf, 1), ldu, &u_ref(nrf, 1), ldu, &u_ref(nrf, 1), ldu, &work[nwork1], info); dcopy_(&nrp1, &vt_ref(nrf, 1), &c__1, &work[vfi], &c__1); dcopy_(&nrp1, &vt_ref(nrf, nrp1), &c__1, &work[vli], &c__1); } if (*info != 0) { return 0; } i__2 = nr; for (j = 1; j <= i__2; ++j) { iwork[idxqi + j] = j; /* L20: */ } /* L30: */ } /* Now conquer each subproblem bottom-up. */ j = pow_ii(&c__2, &nlvl); for (lvl = nlvl; lvl >= 1; --lvl) { lvl2 = (lvl << 1) - 1; /* Find the first node LF and last node LL on the current level LVL. */ if (lvl == 1) { lf = 1; ll = 1; } else { i__1 = lvl - 1; lf = pow_ii(&c__2, &i__1); ll = (lf << 1) - 1; } i__1 = ll; for (i__ = lf; i__ <= i__1; ++i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; nrf = ic + 1; if (i__ == ll) { sqrei = *sqre; } else { sqrei = 1; } vfi = vf + nlf - 1; vli = vl + nlf - 1; idxqi = idxq + nlf - 1; alpha = d__[ic]; beta = e[ic]; if (*icompq == 0) { dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & work[vli], &alpha, &beta, &iwork[idxqi], &perm[ perm_offset], &givptr[1], &givcol[givcol_offset], ldgcol, &givnum[givnum_offset], ldu, &poles[ poles_offset], &difl[difl_offset], &difr[difr_offset], &z__[z_offset], &k[1], &c__[1], &s[1], &work[nwork1], &iwork[iwk], info); } else { --j; dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], & work[vli], &alpha, &beta, &iwork[idxqi], &perm_ref( nlf, lvl), &givptr[j], &givcol_ref(nlf, lvl2), ldgcol, &givnum_ref(nlf, lvl2), ldu, &poles_ref(nlf, lvl2), & difl_ref(nlf, lvl), &difr_ref(nlf, lvl2), &z___ref( nlf, lvl), &k[j], &c__[j], &s[j], &work[nwork1], & iwork[iwk], info); } if (*info != 0) { return 0; } /* L40: */ } /* L50: */ } return 0; /* End of DLASDA */ } /* dlasda_ */
/* Subroutine */ int dlasdq_(char *uplo, integer *sqre, integer *n, integer * ncvt, integer *nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt, integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer *ldc, doublereal *work, integer *info) { /* System generated locals */ integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; /* Local variables */ static integer isub; static doublereal smin; static integer sqre1, i__, j; static doublereal r__; extern logical lsame_(char *, char *); extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), dswap_(integer *, doublereal *, integer * , doublereal *, integer *); static integer iuplo; static doublereal cs, sn; extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), xerbla_(char *, integer *), dbdsqr_(char *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); static logical rotate; static integer np1; #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1] #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1] /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1999 Purpose ======= DLASDQ computes the singular value decomposition (SVD) of a real (upper or lower) bidiagonal matrix with diagonal D and offdiagonal E, accumulating the transformations if desired. Letting B denote the input bidiagonal matrix, the algorithm computes orthogonal matrices Q and P such that B = Q * S * P' (P' denotes the transpose of P). The singular values S are overwritten on D. The input matrix U is changed to U * Q if desired. The input matrix VT is changed to P' * VT if desired. The input matrix C is changed to Q' * C if desired. See "Computing Small Singular Values of Bidiagonal Matrices With Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, LAPACK Working Note #3, for a detailed description of the algorithm. Arguments ========= UPLO (input) CHARACTER*1 On entry, UPLO specifies whether the input bidiagonal matrix is upper or lower bidiagonal, and wether it is square are not. UPLO = 'U' or 'u' B is upper bidiagonal. UPLO = 'L' or 'l' B is lower bidiagonal. SQRE (input) INTEGER = 0: then the input matrix is N-by-N. = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and (N+1)-by-N if UPLU = 'L'. The bidiagonal matrix has N = NL + NR + 1 rows and M = N + SQRE >= N columns. N (input) INTEGER On entry, N specifies the number of rows and columns in the matrix. N must be at least 0. NCVT (input) INTEGER On entry, NCVT specifies the number of columns of the matrix VT. NCVT must be at least 0. NRU (input) INTEGER On entry, NRU specifies the number of rows of the matrix U. NRU must be at least 0. NCC (input) INTEGER On entry, NCC specifies the number of columns of the matrix C. NCC must be at least 0. D (input/output) DOUBLE PRECISION array, dimension (N) On entry, D contains the diagonal entries of the bidiagonal matrix whose SVD is desired. On normal exit, D contains the singular values in ascending order. E (input/output) DOUBLE PRECISION array. dimension is (N-1) if SQRE = 0 and N if SQRE = 1. On entry, the entries of E contain the offdiagonal entries of the bidiagonal matrix whose SVD is desired. On normal exit, E will contain 0. If the algorithm does not converge, D and E will contain the diagonal and superdiagonal entries of a bidiagonal matrix orthogonally equivalent to the one given as input. VT (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT) On entry, contains a matrix which on exit has been premultiplied by P', dimension N-by-NCVT if SQRE = 0 and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0). LDVT (input) INTEGER On entry, LDVT specifies the leading dimension of VT as declared in the calling (sub) program. LDVT must be at least 1. If NCVT is nonzero LDVT must also be at least N. U (input/output) DOUBLE PRECISION array, dimension (LDU, N) On entry, contains a matrix which on exit has been postmultiplied by Q, dimension NRU-by-N if SQRE = 0 and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0). LDU (input) INTEGER On entry, LDU specifies the leading dimension of U as declared in the calling (sub) program. LDU must be at least max( 1, NRU ) . C (input/output) DOUBLE PRECISION array, dimension (LDC, NCC) On entry, contains an N-by-NCC matrix which on exit has been premultiplied by Q' dimension N-by-NCC if SQRE = 0 and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0). LDC (input) INTEGER On entry, LDC specifies the leading dimension of C as declared in the calling (sub) program. LDC must be at least 1. If NCC is nonzero, LDC must also be at least N. WORK (workspace) DOUBLE PRECISION array, dimension (4*N) Workspace. Only referenced if one of NCVT, NRU, or NCC is nonzero, and if N is at least 2. INFO (output) INTEGER On exit, a value of 0 indicates a successful exit. If INFO < 0, argument number -INFO is illegal. If INFO > 0, the algorithm did not converge, and INFO specifies how many superdiagonals did not converge. Further Details =============== Based on contributions by Ming Gu and Huan Ren, Computer Science Division, University of California at Berkeley, USA ===================================================================== Test the input parameters. Parameter adjustments */ --d__; --e; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1 * 1; vt -= vt_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1 * 1; c__ -= c_offset; --work; /* Function Body */ *info = 0; iuplo = 0; if (lsame_(uplo, "U")) { iuplo = 1; } if (lsame_(uplo, "L")) { iuplo = 2; } if (iuplo == 0) { *info = -1; } else if (*sqre < 0 || *sqre > 1) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ncvt < 0) { *info = -4; } else if (*nru < 0) { *info = -5; } else if (*ncc < 0) { *info = -6; } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { *info = -10; } else if (*ldu < max(1,*nru)) { *info = -12; } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { *info = -14; } if (*info != 0) { i__1 = -(*info); xerbla_("DLASDQ", &i__1); return 0; } if (*n == 0) { return 0; } /* ROTATE is true if any singular vectors desired, false otherwise */ rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; np1 = *n + 1; sqre1 = *sqre; /* If matrix non-square upper bidiagonal, rotate to be lower bidiagonal. The rotations are on the right. */ if (iuplo == 1 && sqre1 == 1) { i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); d__[i__] = r__; e[i__] = sn * d__[i__ + 1]; d__[i__ + 1] = cs * d__[i__ + 1]; if (rotate) { work[i__] = cs; work[*n + i__] = sn; } /* L10: */ } dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__); d__[*n] = r__; e[*n] = 0.; if (rotate) { work[*n] = cs; work[*n + *n] = sn; } iuplo = 2; sqre1 = 0; /* Update singular vectors if desired. */ if (*ncvt > 0) { dlasr_("L", "V", "F", &np1, ncvt, &work[1], &work[np1], &vt[ vt_offset], ldvt); } } /* If matrix lower bidiagonal, rotate to be upper bidiagonal by applying Givens rotations on the left. */ if (iuplo == 2) { i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); d__[i__] = r__; e[i__] = sn * d__[i__ + 1]; d__[i__ + 1] = cs * d__[i__ + 1]; if (rotate) { work[i__] = cs; work[*n + i__] = sn; } /* L20: */ } /* If matrix (N+1)-by-N lower bidiagonal, one additional rotation is needed. */ if (sqre1 == 1) { dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__); d__[*n] = r__; if (rotate) { work[*n] = cs; work[*n + *n] = sn; } } /* Update singular vectors if desired. */ if (*nru > 0) { if (sqre1 == 0) { dlasr_("R", "V", "F", nru, n, &work[1], &work[np1], &u[ u_offset], ldu); } else { dlasr_("R", "V", "F", nru, &np1, &work[1], &work[np1], &u[ u_offset], ldu); } } if (*ncc > 0) { if (sqre1 == 0) { dlasr_("L", "V", "F", n, ncc, &work[1], &work[np1], &c__[ c_offset], ldc); } else { dlasr_("L", "V", "F", &np1, ncc, &work[1], &work[np1], &c__[ c_offset], ldc); } } } /* Call DBDSQR to compute the SVD of the reduced real N-by-N upper bidiagonal matrix. */ dbdsqr_("U", n, ncvt, nru, ncc, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[ u_offset], ldu, &c__[c_offset], ldc, &work[1], info); /* Sort the singular values into ascending order (insertion sort on singular values, but only one transposition per singular vector) */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Scan for smallest D(I). */ isub = i__; smin = d__[i__]; i__2 = *n; for (j = i__ + 1; j <= i__2; ++j) { if (d__[j] < smin) { isub = j; smin = d__[j]; } /* L30: */ } if (isub != i__) { /* Swap singular values and vectors. */ d__[isub] = d__[i__]; d__[i__] = smin; if (*ncvt > 0) { dswap_(ncvt, &vt_ref(isub, 1), ldvt, &vt_ref(i__, 1), ldvt); } if (*nru > 0) { dswap_(nru, &u_ref(1, isub), &c__1, &u_ref(1, i__), &c__1); } if (*ncc > 0) { dswap_(ncc, &c___ref(isub, 1), ldc, &c___ref(i__, 1), ldc); } } /* L40: */ } return 0; /* End of DLASDQ */ } /* dlasdq_ */
/* Subroutine */ int zbdsqr_(char *uplo, integer *n, integer *ncvt, integer * nru, integer *ncc, doublereal *d__, doublereal *e, doublecomplex *vt, integer *ldvt, doublecomplex *u, integer *ldu, doublecomplex *c__, integer *ldc, doublereal *rwork, integer *info) { /* System generated locals */ integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; doublereal d__1, d__2, d__3, d__4; /* Builtin functions */ double pow_dd(doublereal *, doublereal *), sqrt(doublereal), d_sign( doublereal *, doublereal *); /* Local variables */ static doublereal abse; static integer idir; static doublereal abss; static integer oldm; static doublereal cosl; static integer isub, iter; static doublereal unfl, sinl, cosr, smin, smax, sinr; extern /* Subroutine */ int dlas2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); static doublereal f, g, h__; static integer i__, j, m; static doublereal r__; extern logical lsame_(char *, char *); static doublereal oldcs; static integer oldll; static doublereal shift, sigmn, oldsn; static integer maxit; static doublereal sminl, sigmx; static logical lower; extern /* Subroutine */ int zlasr_(char *, char *, char *, integer *, integer *, doublereal *, doublereal *, doublecomplex *, integer *), zdrot_(integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublereal *, doublereal *) , zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), dlasq1_(integer *, doublereal *, doublereal *, doublereal *, integer *), dlasv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); static doublereal cs; static integer ll; extern doublereal dlamch_(char *); static doublereal sn, mu; extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), xerbla_(char *, integer *), zdscal_(integer *, doublereal *, doublecomplex *, integer *); static doublereal sminoa, thresh; static logical rotate; static doublereal sminlo; static integer nm1; static doublereal tolmul; static integer nm12, nm13, lll; static doublereal eps, sll, tol; #define c___subscr(a_1,a_2) (a_2)*c_dim1 + a_1 #define c___ref(a_1,a_2) c__[c___subscr(a_1,a_2)] #define u_subscr(a_1,a_2) (a_2)*u_dim1 + a_1 #define u_ref(a_1,a_2) u[u_subscr(a_1,a_2)] #define vt_subscr(a_1,a_2) (a_2)*vt_dim1 + a_1 #define vt_ref(a_1,a_2) vt[vt_subscr(a_1,a_2)] /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1999 Purpose ======= ZBDSQR computes the singular value decomposition (SVD) of a real N-by-N (upper or lower) bidiagonal matrix B: B = Q * S * P' (P' denotes the transpose of P), where S is a diagonal matrix with non-negative diagonal elements (the singular values of B), and Q and P are orthogonal matrices. The routine computes S, and optionally computes U * Q, P' * VT, or Q' * C, for given complex input matrices U, VT, and C. See "Computing Small Singular Values of Bidiagonal Matrices With Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, no. 5, pp. 873-912, Sept 1990) and "Accurate singular values and differential qd algorithms," by B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics Department, University of California at Berkeley, July 1992 for a detailed description of the algorithm. Arguments ========= UPLO (input) CHARACTER*1 = 'U': B is upper bidiagonal; = 'L': B is lower bidiagonal. N (input) INTEGER The order of the matrix B. N >= 0. NCVT (input) INTEGER The number of columns of the matrix VT. NCVT >= 0. NRU (input) INTEGER The number of rows of the matrix U. NRU >= 0. NCC (input) INTEGER The number of columns of the matrix C. NCC >= 0. D (input/output) DOUBLE PRECISION array, dimension (N) On entry, the n diagonal elements of the bidiagonal matrix B. On exit, if INFO=0, the singular values of B in decreasing order. E (input/output) DOUBLE PRECISION array, dimension (N) On entry, the elements of E contain the offdiagonal elements of of the bidiagonal matrix whose SVD is desired. On normal exit (INFO = 0), E is destroyed. If the algorithm does not converge (INFO > 0), D and E will contain the diagonal and superdiagonal elements of a bidiagonal matrix orthogonally equivalent to the one given as input. E(N) is used for workspace. VT (input/output) COMPLEX*16 array, dimension (LDVT, NCVT) On entry, an N-by-NCVT matrix VT. On exit, VT is overwritten by P' * VT. VT is not referenced if NCVT = 0. LDVT (input) INTEGER The leading dimension of the array VT. LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. U (input/output) COMPLEX*16 array, dimension (LDU, N) On entry, an NRU-by-N matrix U. On exit, U is overwritten by U * Q. U is not referenced if NRU = 0. LDU (input) INTEGER The leading dimension of the array U. LDU >= max(1,NRU). C (input/output) COMPLEX*16 array, dimension (LDC, NCC) On entry, an N-by-NCC matrix C. On exit, C is overwritten by Q' * C. C is not referenced if NCC = 0. LDC (input) INTEGER The leading dimension of the array C. LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. RWORK (workspace) DOUBLE PRECISION array, dimension (4*N) INFO (output) INTEGER = 0: successful exit < 0: If INFO = -i, the i-th argument had an illegal value > 0: the algorithm did not converge; D and E contain the elements of a bidiagonal matrix which is orthogonally similar to the input matrix B; if INFO = i, i elements of E have not converged to zero. Internal Parameters =================== TOLMUL DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8))) TOLMUL controls the convergence criterion of the QR loop. If it is positive, TOLMUL*EPS is the desired relative precision in the computed singular values. If it is negative, abs(TOLMUL*EPS*sigma_max) is the desired absolute accuracy in the computed singular values (corresponds to relative accuracy abs(TOLMUL*EPS) in the largest singular value. abs(TOLMUL) should be between 1 and 1/EPS, and preferably between 10 (for fast convergence) and .1/EPS (for there to be some accuracy in the results). Default is to lose at either one eighth or 2 of the available decimal digits in each computed singular value (whichever is smaller). MAXITR INTEGER, default = 6 MAXITR controls the maximum number of passes of the algorithm through its inner loop. The algorithms stops (and so fails to converge) if the number of passes through the inner loop exceeds MAXITR*N**2. ===================================================================== Test the input parameters. Parameter adjustments */ --d__; --e; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1 * 1; vt -= vt_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1 * 1; c__ -= c_offset; --rwork; /* Function Body */ *info = 0; lower = lsame_(uplo, "L"); if (! lsame_(uplo, "U") && ! lower) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*ncvt < 0) { *info = -3; } else if (*nru < 0) { *info = -4; } else if (*ncc < 0) { *info = -5; } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { *info = -9; } else if (*ldu < max(1,*nru)) { *info = -11; } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { *info = -13; } if (*info != 0) { i__1 = -(*info); xerbla_("ZBDSQR", &i__1); return 0; } if (*n == 0) { return 0; } if (*n == 1) { goto L160; } /* ROTATE is true if any singular vectors desired, false otherwise */ rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; /* If no singular vectors desired, use qd algorithm */ if (! rotate) { dlasq1_(n, &d__[1], &e[1], &rwork[1], info); return 0; } nm1 = *n - 1; nm12 = nm1 + nm1; nm13 = nm12 + nm1; idir = 0; /* Get machine constants */ eps = dlamch_("Epsilon"); unfl = dlamch_("Safe minimum"); /* If matrix lower bidiagonal, rotate to be upper bidiagonal by applying Givens rotations on the left */ if (lower) { i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); d__[i__] = r__; e[i__] = sn * d__[i__ + 1]; d__[i__ + 1] = cs * d__[i__ + 1]; rwork[i__] = cs; rwork[nm1 + i__] = sn; /* L10: */ } /* Update singular vectors if desired */ if (*nru > 0) { zlasr_("R", "V", "F", nru, n, &rwork[1], &rwork[*n], &u[u_offset], ldu); } if (*ncc > 0) { zlasr_("L", "V", "F", n, ncc, &rwork[1], &rwork[*n], &c__[ c_offset], ldc); } } /* Compute singular values to relative accuracy TOL (By setting TOL to be negative, algorithm will compute singular values to absolute accuracy ABS(TOL)*norm(input matrix)) Computing MAX Computing MIN */ d__3 = 100., d__4 = pow_dd(&eps, &c_b15); d__1 = 10., d__2 = min(d__3,d__4); tolmul = max(d__1,d__2); tol = tolmul * eps; /* Compute approximate maximum, minimum singular values */ smax = 0.; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__2 = smax, d__3 = (d__1 = d__[i__], abs(d__1)); smax = max(d__2,d__3); /* L20: */ } i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1)); smax = max(d__2,d__3); /* L30: */ } sminl = 0.; if (tol >= 0.) { /* Relative accuracy desired */ sminoa = abs(d__[1]); if (sminoa == 0.) { goto L50; } mu = sminoa; i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1] , abs(d__1)))); sminoa = min(sminoa,mu); if (sminoa == 0.) { goto L50; } /* L40: */ } L50: sminoa /= sqrt((doublereal) (*n)); /* Computing MAX */ d__1 = tol * sminoa, d__2 = *n * 6 * *n * unfl; thresh = max(d__1,d__2); } else { /* Absolute accuracy desired Computing MAX */ d__1 = abs(tol) * smax, d__2 = *n * 6 * *n * unfl; thresh = max(d__1,d__2); } /* Prepare for main iteration loop for the singular values (MAXIT is the maximum number of passes through the inner loop permitted before nonconvergence signalled.) */ maxit = *n * 6 * *n; iter = 0; oldll = -1; oldm = -1; /* M points to last element of unconverged part of matrix */ m = *n; /* Begin main iteration loop */ L60: /* Check for convergence or exceeding iteration count */ if (m <= 1) { goto L160; } if (iter > maxit) { goto L200; } /* Find diagonal block of matrix to work on */ if (tol < 0. && (d__1 = d__[m], abs(d__1)) <= thresh) { d__[m] = 0.; } smax = (d__1 = d__[m], abs(d__1)); smin = smax; i__1 = m - 1; for (lll = 1; lll <= i__1; ++lll) { ll = m - lll; abss = (d__1 = d__[ll], abs(d__1)); abse = (d__1 = e[ll], abs(d__1)); if (tol < 0. && abss <= thresh) { d__[ll] = 0.; } if (abse <= thresh) { goto L80; } smin = min(smin,abss); /* Computing MAX */ d__1 = max(smax,abss); smax = max(d__1,abse); /* L70: */ } ll = 0; goto L90; L80: e[ll] = 0.; /* Matrix splits since E(LL) = 0 */ if (ll == m - 1) { /* Convergence of bottom singular value, return to top of loop */ --m; goto L60; } L90: ++ll; /* E(LL) through E(M-1) are nonzero, E(LL-1) is zero */ if (ll == m - 1) { /* 2 by 2 block, handle separately */ dlasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr, &sinl, &cosl); d__[m - 1] = sigmx; e[m - 1] = 0.; d__[m] = sigmn; /* Compute singular vectors, if desired */ if (*ncvt > 0) { zdrot_(ncvt, &vt_ref(m - 1, 1), ldvt, &vt_ref(m, 1), ldvt, &cosr, &sinr); } if (*nru > 0) { zdrot_(nru, &u_ref(1, m - 1), &c__1, &u_ref(1, m), &c__1, &cosl, & sinl); } if (*ncc > 0) { zdrot_(ncc, &c___ref(m - 1, 1), ldc, &c___ref(m, 1), ldc, &cosl, & sinl); } m += -2; goto L60; } /* If working on new submatrix, choose shift direction (from larger end diagonal element towards smaller) */ if (ll > oldm || m < oldll) { if ((d__1 = d__[ll], abs(d__1)) >= (d__2 = d__[m], abs(d__2))) { /* Chase bulge from top (big end) to bottom (small end) */ idir = 1; } else { /* Chase bulge from bottom (big end) to top (small end) */ idir = 2; } } /* Apply convergence tests */ if (idir == 1) { /* Run convergence test in forward direction First apply standard test to bottom of matrix */ if ((d__2 = e[m - 1], abs(d__2)) <= abs(tol) * (d__1 = d__[m], abs( d__1)) || tol < 0. && (d__3 = e[m - 1], abs(d__3)) <= thresh) { e[m - 1] = 0.; goto L60; } if (tol >= 0.) { /* If relative accuracy desired, apply convergence criterion forward */ mu = (d__1 = d__[ll], abs(d__1)); sminl = mu; i__1 = m - 1; for (lll = ll; lll <= i__1; ++lll) { if ((d__1 = e[lll], abs(d__1)) <= tol * mu) { e[lll] = 0.; goto L60; } sminlo = sminl; mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[ lll], abs(d__1)))); sminl = min(sminl,mu); /* L100: */ } } } else { /* Run convergence test in backward direction First apply standard test to top of matrix */ if ((d__2 = e[ll], abs(d__2)) <= abs(tol) * (d__1 = d__[ll], abs(d__1) ) || tol < 0. && (d__3 = e[ll], abs(d__3)) <= thresh) { e[ll] = 0.; goto L60; } if (tol >= 0.) { /* If relative accuracy desired, apply convergence criterion backward */ mu = (d__1 = d__[m], abs(d__1)); sminl = mu; i__1 = ll; for (lll = m - 1; lll >= i__1; --lll) { if ((d__1 = e[lll], abs(d__1)) <= tol * mu) { e[lll] = 0.; goto L60; } sminlo = sminl; mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll] , abs(d__1)))); sminl = min(sminl,mu); /* L110: */ } } } oldll = ll; oldm = m; /* Compute shift. First, test if shifting would ruin relative accuracy, and if so set the shift to zero. Computing MAX */ d__1 = eps, d__2 = tol * .01; if (tol >= 0. && *n * tol * (sminl / smax) <= max(d__1,d__2)) { /* Use a zero shift to avoid loss of relative accuracy */ shift = 0.; } else { /* Compute the shift from 2-by-2 block at end of matrix */ if (idir == 1) { sll = (d__1 = d__[ll], abs(d__1)); dlas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__); } else { sll = (d__1 = d__[m], abs(d__1)); dlas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__); } /* Test if shift negligible, and if so set to zero */ if (sll > 0.) { /* Computing 2nd power */ d__1 = shift / sll; if (d__1 * d__1 < eps) { shift = 0.; } } } /* Increment iteration count */ iter = iter + m - ll; /* If SHIFT = 0, do simplified QR iteration */ if (shift == 0.) { if (idir == 1) { /* Chase bulge from top to bottom Save cosines and sines for later singular vector updates */ cs = 1.; oldcs = 1.; i__1 = m - 1; for (i__ = ll; i__ <= i__1; ++i__) { d__1 = d__[i__] * cs; dlartg_(&d__1, &e[i__], &cs, &sn, &r__); if (i__ > ll) { e[i__ - 1] = oldsn * r__; } d__1 = oldcs * r__; d__2 = d__[i__ + 1] * sn; dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]); rwork[i__ - ll + 1] = cs; rwork[i__ - ll + 1 + nm1] = sn; rwork[i__ - ll + 1 + nm12] = oldcs; rwork[i__ - ll + 1 + nm13] = oldsn; /* L120: */ } h__ = d__[m] * cs; d__[m] = h__ * oldcs; e[m - 1] = h__ * oldsn; /* Update singular vectors */ if (*ncvt > 0) { i__1 = m - ll + 1; zlasr_("L", "V", "F", &i__1, ncvt, &rwork[1], &rwork[*n], & vt_ref(ll, 1), ldvt); } if (*nru > 0) { i__1 = m - ll + 1; zlasr_("R", "V", "F", nru, &i__1, &rwork[nm12 + 1], &rwork[ nm13 + 1], &u_ref(1, ll), ldu); } if (*ncc > 0) { i__1 = m - ll + 1; zlasr_("L", "V", "F", &i__1, ncc, &rwork[nm12 + 1], &rwork[ nm13 + 1], &c___ref(ll, 1), ldc); } /* Test convergence */ if ((d__1 = e[m - 1], abs(d__1)) <= thresh) { e[m - 1] = 0.; } } else { /* Chase bulge from bottom to top Save cosines and sines for later singular vector updates */ cs = 1.; oldcs = 1.; i__1 = ll + 1; for (i__ = m; i__ >= i__1; --i__) { d__1 = d__[i__] * cs; dlartg_(&d__1, &e[i__ - 1], &cs, &sn, &r__); if (i__ < m) { e[i__] = oldsn * r__; } d__1 = oldcs * r__; d__2 = d__[i__ - 1] * sn; dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]); rwork[i__ - ll] = cs; rwork[i__ - ll + nm1] = -sn; rwork[i__ - ll + nm12] = oldcs; rwork[i__ - ll + nm13] = -oldsn; /* L130: */ } h__ = d__[ll] * cs; d__[ll] = h__ * oldcs; e[ll] = h__ * oldsn; /* Update singular vectors */ if (*ncvt > 0) { i__1 = m - ll + 1; zlasr_("L", "V", "B", &i__1, ncvt, &rwork[nm12 + 1], &rwork[ nm13 + 1], &vt_ref(ll, 1), ldvt); } if (*nru > 0) { i__1 = m - ll + 1; zlasr_("R", "V", "B", nru, &i__1, &rwork[1], &rwork[*n], & u_ref(1, ll), ldu); } if (*ncc > 0) { i__1 = m - ll + 1; zlasr_("L", "V", "B", &i__1, ncc, &rwork[1], &rwork[*n], & c___ref(ll, 1), ldc); } /* Test convergence */ if ((d__1 = e[ll], abs(d__1)) <= thresh) { e[ll] = 0.; } } } else { /* Use nonzero shift */ if (idir == 1) { /* Chase bulge from top to bottom Save cosines and sines for later singular vector updates */ f = ((d__1 = d__[ll], abs(d__1)) - shift) * (d_sign(&c_b49, &d__[ ll]) + shift / d__[ll]); g = e[ll]; i__1 = m - 1; for (i__ = ll; i__ <= i__1; ++i__) { dlartg_(&f, &g, &cosr, &sinr, &r__); if (i__ > ll) { e[i__ - 1] = r__; } f = cosr * d__[i__] + sinr * e[i__]; e[i__] = cosr * e[i__] - sinr * d__[i__]; g = sinr * d__[i__ + 1]; d__[i__ + 1] = cosr * d__[i__ + 1]; dlartg_(&f, &g, &cosl, &sinl, &r__); d__[i__] = r__; f = cosl * e[i__] + sinl * d__[i__ + 1]; d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__]; if (i__ < m - 1) { g = sinl * e[i__ + 1]; e[i__ + 1] = cosl * e[i__ + 1]; } rwork[i__ - ll + 1] = cosr; rwork[i__ - ll + 1 + nm1] = sinr; rwork[i__ - ll + 1 + nm12] = cosl; rwork[i__ - ll + 1 + nm13] = sinl; /* L140: */ } e[m - 1] = f; /* Update singular vectors */ if (*ncvt > 0) { i__1 = m - ll + 1; zlasr_("L", "V", "F", &i__1, ncvt, &rwork[1], &rwork[*n], & vt_ref(ll, 1), ldvt); } if (*nru > 0) { i__1 = m - ll + 1; zlasr_("R", "V", "F", nru, &i__1, &rwork[nm12 + 1], &rwork[ nm13 + 1], &u_ref(1, ll), ldu); } if (*ncc > 0) { i__1 = m - ll + 1; zlasr_("L", "V", "F", &i__1, ncc, &rwork[nm12 + 1], &rwork[ nm13 + 1], &c___ref(ll, 1), ldc); } /* Test convergence */ if ((d__1 = e[m - 1], abs(d__1)) <= thresh) { e[m - 1] = 0.; } } else { /* Chase bulge from bottom to top Save cosines and sines for later singular vector updates */ f = ((d__1 = d__[m], abs(d__1)) - shift) * (d_sign(&c_b49, &d__[m] ) + shift / d__[m]); g = e[m - 1]; i__1 = ll + 1; for (i__ = m; i__ >= i__1; --i__) { dlartg_(&f, &g, &cosr, &sinr, &r__); if (i__ < m) { e[i__] = r__; } f = cosr * d__[i__] + sinr * e[i__ - 1]; e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__]; g = sinr * d__[i__ - 1]; d__[i__ - 1] = cosr * d__[i__ - 1]; dlartg_(&f, &g, &cosl, &sinl, &r__); d__[i__] = r__; f = cosl * e[i__ - 1] + sinl * d__[i__ - 1]; d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1]; if (i__ > ll + 1) { g = sinl * e[i__ - 2]; e[i__ - 2] = cosl * e[i__ - 2]; } rwork[i__ - ll] = cosr; rwork[i__ - ll + nm1] = -sinr; rwork[i__ - ll + nm12] = cosl; rwork[i__ - ll + nm13] = -sinl; /* L150: */ } e[ll] = f; /* Test convergence */ if ((d__1 = e[ll], abs(d__1)) <= thresh) { e[ll] = 0.; } /* Update singular vectors if desired */ if (*ncvt > 0) { i__1 = m - ll + 1; zlasr_("L", "V", "B", &i__1, ncvt, &rwork[nm12 + 1], &rwork[ nm13 + 1], &vt_ref(ll, 1), ldvt); } if (*nru > 0) { i__1 = m - ll + 1; zlasr_("R", "V", "B", nru, &i__1, &rwork[1], &rwork[*n], & u_ref(1, ll), ldu); } if (*ncc > 0) { i__1 = m - ll + 1; zlasr_("L", "V", "B", &i__1, ncc, &rwork[1], &rwork[*n], & c___ref(ll, 1), ldc); } } } /* QR iteration finished, go back and check convergence */ goto L60; /* All singular values converged, so make them positive */ L160: i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (d__[i__] < 0.) { d__[i__] = -d__[i__]; /* Change sign of singular vectors, if desired */ if (*ncvt > 0) { zdscal_(ncvt, &c_b72, &vt_ref(i__, 1), ldvt); } } /* L170: */ } /* Sort the singular values into decreasing order (insertion sort on singular values, but only one transposition per singular vector) */ i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { /* Scan for smallest D(I) */ isub = 1; smin = d__[1]; i__2 = *n + 1 - i__; for (j = 2; j <= i__2; ++j) { if (d__[j] <= smin) { isub = j; smin = d__[j]; } /* L180: */ } if (isub != *n + 1 - i__) { /* Swap singular values and vectors */ d__[isub] = d__[*n + 1 - i__]; d__[*n + 1 - i__] = smin; if (*ncvt > 0) { zswap_(ncvt, &vt_ref(isub, 1), ldvt, &vt_ref(*n + 1 - i__, 1), ldvt); } if (*nru > 0) { zswap_(nru, &u_ref(1, isub), &c__1, &u_ref(1, *n + 1 - i__), & c__1); } if (*ncc > 0) { zswap_(ncc, &c___ref(isub, 1), ldc, &c___ref(*n + 1 - i__, 1), ldc); } } /* L190: */ } goto L220; /* Maximum number of iterations exceeded, failure to converge */ L200: *info = 0; i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { if (e[i__] != 0.) { ++(*info); } /* L210: */ } L220: return 0; /* End of ZBDSQR */ } /* zbdsqr_ */
/* Subroutine */ int slasd0_(integer *n, integer *sqre, real *d__, real *e, real *u, integer *ldu, real *vt, integer *ldvt, integer *smlsiz, integer *iwork, real *work, integer *info) { /* System generated locals */ integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; /* Builtin functions */ integer pow_ii(integer *, integer *); /* Local variables */ static real beta; static integer idxq, nlvl, i__, j, m; static real alpha; static integer inode, ndiml, idxqc, ndimr, itemp, sqrei, i1; extern /* Subroutine */ int slasd1_(integer *, integer *, integer *, real *, real *, real *, real *, integer *, real *, integer *, integer * , integer *, real *, integer *); static integer ic, lf, nd, ll, nl, nr; extern /* Subroutine */ int xerbla_(char *, integer *), slasdq_( char *, integer *, integer *, integer *, integer *, integer *, real *, real *, real *, integer *, real *, integer *, real *, integer *, real *, integer *), slasdt_(integer *, integer *, integer *, integer *, integer *, integer *, integer *); static integer im1, ncc, nlf, nrf, iwk, lvl, ndb1, nlp1, nrp1; #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1] /* -- LAPACK auxiliary routine (instrumented to count ops, version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= Using a divide and conquer approach, SLASD0 computes the singular value decomposition (SVD) of a real upper bidiagonal N-by-M matrix B with diagonal D and offdiagonal E, where M = N + SQRE. The algorithm computes orthogonal matrices U and VT such that B = U * S * VT. The singular values S are overwritten on D. A related subroutine, SLASDA, computes only the singular values, and optionally, the singular vectors in compact form. Arguments ========= N (input) INTEGER On entry, the row dimension of the upper bidiagonal matrix. This is also the dimension of the main diagonal array D. SQRE (input) INTEGER Specifies the column dimension of the bidiagonal matrix. = 0: The bidiagonal matrix has column dimension M = N; = 1: The bidiagonal matrix has column dimension M = N+1; D (input/output) REAL array, dimension (N) On entry D contains the main diagonal of the bidiagonal matrix. On exit D, if INFO = 0, contains its singular values. E (input) REAL array, dimension (M-1) Contains the subdiagonal entries of the bidiagonal matrix. On exit, E has been destroyed. U (output) REAL array, dimension at least (LDQ, N) On exit, U contains the left singular vectors. LDU (input) INTEGER On entry, leading dimension of U. VT (output) REAL array, dimension at least (LDVT, M) On exit, VT' contains the right singular vectors. LDVT (input) INTEGER On entry, leading dimension of VT. SMLSIZ (input) INTEGER On entry, maximum size of the subproblems at the bottom of the computation tree. IWORK INTEGER work array. Dimension must be at least (8 * N) WORK REAL work array. Dimension must be at least (3 * M**2 + 2 * M) INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = 1, an singular value did not converge Further Details =============== Based on contributions by Ming Gu and Huan Ren, Computer Science Division, University of California at Berkeley, USA ===================================================================== Test the input parameters. Parameter adjustments */ --d__; --e; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1 * 1; vt -= vt_offset; --iwork; --work; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; } else if (*sqre < 0 || *sqre > 1) { *info = -2; } m = *n + *sqre; if (*ldu < *n) { *info = -6; } else if (*ldvt < m) { *info = -8; } else if (*smlsiz < 3) { *info = -9; } if (*info != 0) { i__1 = -(*info); xerbla_("SLASD0", &i__1); return 0; } /* If the input matrix is too small, call SLASDQ to find the SVD. */ if (*n <= *smlsiz) { slasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info); return 0; } /* Set up the computation tree. */ inode = 1; ndiml = inode + *n; ndimr = ndiml + *n; idxq = ndimr + *n; iwk = idxq + *n; slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], smlsiz); /* For the nodes on bottom level of the tree, solve their subproblems by SLASDQ. */ ndb1 = (nd + 1) / 2; ncc = 0; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { /* IC : center row of each node NL : number of rows of left subproblem NR : number of rows of right subproblem NLF: starting row of the left subproblem NRF: starting row of the right subproblem */ i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nlp1 = nl + 1; nr = iwork[ndimr + i1]; nrp1 = nr + 1; nlf = ic - nl; nrf = ic + 1; sqrei = 1; slasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], & vt_ref(nlf, nlf), ldvt, &u_ref(nlf, nlf), ldu, &u_ref(nlf, nlf), ldu, &work[1], info); if (*info != 0) { return 0; } itemp = idxq + nlf - 2; i__2 = nl; for (j = 1; j <= i__2; ++j) { iwork[itemp + j] = j; /* L10: */ } if (i__ == nd) { sqrei = *sqre; } else { sqrei = 1; } nrp1 = nr + sqrei; slasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], & vt_ref(nrf, nrf), ldvt, &u_ref(nrf, nrf), ldu, &u_ref(nrf, nrf), ldu, &work[1], info); if (*info != 0) { return 0; } itemp = idxq + ic; i__2 = nr; for (j = 1; j <= i__2; ++j) { iwork[itemp + j - 1] = j; /* L20: */ } /* L30: */ } /* Now conquer each subproblem bottom-up. */ for (lvl = nlvl; lvl >= 1; --lvl) { /* Find the first node LF and last node LL on the current level LVL. */ if (lvl == 1) { lf = 1; ll = 1; } else { i__1 = lvl - 1; lf = pow_ii(&c__2, &i__1); ll = (lf << 1) - 1; } i__1 = ll; for (i__ = lf; i__ <= i__1; ++i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; if (*sqre == 0 && i__ == ll) { sqrei = *sqre; } else { sqrei = 1; } idxqc = idxq + nlf - 1; alpha = d__[ic]; beta = e[ic]; slasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u_ref(nlf, nlf), ldu, &vt_ref(nlf, nlf), ldvt, &iwork[idxqc], &iwork[ iwk], &work[1], info); if (*info != 0) { return 0; } /* L40: */ } /* L50: */ } return 0; /* End of SLASD0 */ } /* slasd0_ */
/* Subroutine */ int sbdt03_(char *uplo, integer *n, integer *kd, real *d__, real *e, real *u, integer *ldu, real *s, real *vt, integer *ldvt, real *work, real *resid) { /* System generated locals */ integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; real r__1, r__2, r__3, r__4; /* Local variables */ static integer i__, j; extern logical lsame_(char *, char *); static real bnorm; extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); extern doublereal sasum_(integer *, real *, integer *), slamch_(char *); extern integer isamax_(integer *, real *, integer *); static real eps; #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= SBDT03 reconstructs a bidiagonal matrix B from its SVD: S = U' * B * V where U and V are orthogonal matrices and S is diagonal. The test ratio to test the singular value decomposition is RESID = norm( B - U * S * VT ) / ( n * norm(B) * EPS ) where VT = V' and EPS is the machine precision. Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the matrix B is upper or lower bidiagonal. = 'U': Upper bidiagonal = 'L': Lower bidiagonal N (input) INTEGER The order of the matrix B. KD (input) INTEGER The bandwidth of the bidiagonal matrix B. If KD = 1, the matrix B is bidiagonal, and if KD = 0, B is diagonal and E is not referenced. If KD is greater than 1, it is assumed to be 1, and if KD is less than 0, it is assumed to be 0. D (input) REAL array, dimension (N) The n diagonal elements of the bidiagonal matrix B. E (input) REAL array, dimension (N-1) The (n-1) superdiagonal elements of the bidiagonal matrix B if UPLO = 'U', or the (n-1) subdiagonal elements of B if UPLO = 'L'. U (input) REAL array, dimension (LDU,N) The n by n orthogonal matrix U in the reduction B = U'*A*P. LDU (input) INTEGER The leading dimension of the array U. LDU >= max(1,N) S (input) REAL array, dimension (N) The singular values from the SVD of B, sorted in decreasing order. VT (input) REAL array, dimension (LDVT,N) The n by n orthogonal matrix V' in the reduction B = U * S * V'. LDVT (input) INTEGER The leading dimension of the array VT. WORK (workspace) REAL array, dimension (2*N) RESID (output) REAL The test ratio: norm(B - U * S * V') / ( n * norm(A) * EPS ) ====================================================================== Quick return if possible Parameter adjustments */ --d__; --e; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; --s; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1 * 1; vt -= vt_offset; --work; /* Function Body */ *resid = 0.f; if (*n <= 0) { return 0; } /* Compute B - U * S * V' one column at a time. */ bnorm = 0.f; if (*kd >= 1) { /* B is bidiagonal. */ if (lsame_(uplo, "U")) { /* B is upper bidiagonal. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = s[i__] * vt_ref(i__, j); /* L10: */ } sgemv_("No transpose", n, n, &c_b6, &u[u_offset], ldu, &work[* n + 1], &c__1, &c_b8, &work[1], &c__1); work[j] += d__[j]; if (j > 1) { work[j - 1] += e[j - 1]; /* Computing MAX */ r__3 = bnorm, r__4 = (r__1 = d__[j], dabs(r__1)) + (r__2 = e[j - 1], dabs(r__2)); bnorm = dmax(r__3,r__4); } else { /* Computing MAX */ r__2 = bnorm, r__3 = (r__1 = d__[j], dabs(r__1)); bnorm = dmax(r__2,r__3); } /* Computing MAX */ r__1 = *resid, r__2 = sasum_(n, &work[1], &c__1); *resid = dmax(r__1,r__2); /* L20: */ } } else { /* B is lower bidiagonal. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = s[i__] * vt_ref(i__, j); /* L30: */ } sgemv_("No transpose", n, n, &c_b6, &u[u_offset], ldu, &work[* n + 1], &c__1, &c_b8, &work[1], &c__1); work[j] += d__[j]; if (j < *n) { work[j + 1] += e[j]; /* Computing MAX */ r__3 = bnorm, r__4 = (r__1 = d__[j], dabs(r__1)) + (r__2 = e[j], dabs(r__2)); bnorm = dmax(r__3,r__4); } else { /* Computing MAX */ r__2 = bnorm, r__3 = (r__1 = d__[j], dabs(r__1)); bnorm = dmax(r__2,r__3); } /* Computing MAX */ r__1 = *resid, r__2 = sasum_(n, &work[1], &c__1); *resid = dmax(r__1,r__2); /* L40: */ } } } else { /* B is diagonal. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] = s[i__] * vt_ref(i__, j); /* L50: */ } sgemv_("No transpose", n, n, &c_b6, &u[u_offset], ldu, &work[*n + 1], &c__1, &c_b8, &work[1], &c__1); work[j] += d__[j]; /* Computing MAX */ r__1 = *resid, r__2 = sasum_(n, &work[1], &c__1); *resid = dmax(r__1,r__2); /* L60: */ } j = isamax_(n, &d__[1], &c__1); bnorm = (r__1 = d__[j], dabs(r__1)); } /* Compute norm(B - U * S * V') / ( n * norm(B) * EPS ) */ eps = slamch_("Precision"); if (bnorm <= 0.f) { if (*resid != 0.f) { *resid = 1.f / eps; } } else { if (bnorm >= *resid) { *resid = *resid / bnorm / ((real) (*n) * eps); } else { if (bnorm < 1.f) { /* Computing MIN */ r__1 = *resid, r__2 = (real) (*n) * bnorm; *resid = dmin(r__1,r__2) / bnorm / ((real) (*n) * eps); } else { /* Computing MIN */ r__1 = *resid / bnorm, r__2 = (real) (*n); *resid = dmin(r__1,r__2) / ((real) (*n) * eps); } } } return 0; /* End of SBDT03 */ } /* sbdt03_ */
/* Subroutine */ int dbdsdc_(char *uplo, char *compq, integer *n, doublereal * d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt, doublereal *q, integer *iq, doublereal *work, integer * iwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University December 1, 1999 Purpose ======= DBDSDC computes the singular value decomposition (SVD) of a real N-by-N (upper or lower) bidiagonal matrix B: B = U * S * VT, using a divide and conquer method, where S is a diagonal matrix with non-negative diagonal elements (the singular values of B), and U and VT are orthogonal matrices of left and right singular vectors, respectively. DBDSDC can be used to compute all singular values, and optionally, singular vectors or singular vectors in compact form. This code makes very mild assumptions about floating point arithmetic. It will work on machines with a guard digit in add/subtract, or on those binary machines without guard digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. It could conceivably fail on hexadecimal or decimal machines without guard digits, but we know of none. See DLASD3 for details. The code currently call DLASDQ if singular values only are desired. However, it can be slightly modified to compute singular values using the divide and conquer method. Arguments ========= UPLO (input) CHARACTER*1 = 'U': B is upper bidiagonal. = 'L': B is lower bidiagonal. COMPQ (input) CHARACTER*1 Specifies whether singular vectors are to be computed as follows: = 'N': Compute singular values only; = 'P': Compute singular values and compute singular vectors in compact form; = 'I': Compute singular values and singular vectors. N (input) INTEGER The order of the matrix B. N >= 0. D (input/output) DOUBLE PRECISION array, dimension (N) On entry, the n diagonal elements of the bidiagonal matrix B. On exit, if INFO=0, the singular values of B. E (input/output) DOUBLE PRECISION array, dimension (N) On entry, the elements of E contain the offdiagonal elements of the bidiagonal matrix whose SVD is desired. On exit, E has been destroyed. U (output) DOUBLE PRECISION array, dimension (LDU,N) If COMPQ = 'I', then: On exit, if INFO = 0, U contains the left singular vectors of the bidiagonal matrix. For other values of COMPQ, U is not referenced. LDU (input) INTEGER The leading dimension of the array U. LDU >= 1. If singular vectors are desired, then LDU >= max( 1, N ). VT (output) DOUBLE PRECISION array, dimension (LDVT,N) If COMPQ = 'I', then: On exit, if INFO = 0, VT' contains the right singular vectors of the bidiagonal matrix. For other values of COMPQ, VT is not referenced. LDVT (input) INTEGER The leading dimension of the array VT. LDVT >= 1. If singular vectors are desired, then LDVT >= max( 1, N ). Q (output) DOUBLE PRECISION array, dimension (LDQ) If COMPQ = 'P', then: On exit, if INFO = 0, Q and IQ contain the left and right singular vectors in a compact form, requiring O(N log N) space instead of 2*N**2. In particular, Q contains all the DOUBLE PRECISION data in LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1)))) words of memory, where SMLSIZ is returned by ILAENV and is equal to the maximum size of the subproblems at the bottom of the computation tree (usually about 25). For other values of COMPQ, Q is not referenced. IQ (output) INTEGER array, dimension (LDIQ) If COMPQ = 'P', then: On exit, if INFO = 0, Q and IQ contain the left and right singular vectors in a compact form, requiring O(N log N) space instead of 2*N**2. In particular, IQ contains all INTEGER data in LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1)))) words of memory, where SMLSIZ is returned by ILAENV and is equal to the maximum size of the subproblems at the bottom of the computation tree (usually about 25). For other values of COMPQ, IQ is not referenced. WORK (workspace) DOUBLE PRECISION array, dimension (LWORK) If COMPQ = 'N' then LWORK >= (4 * N). If COMPQ = 'P' then LWORK >= (6 * N). If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N). IWORK (workspace) INTEGER array, dimension (8*N) INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. > 0: The algorithm failed to compute an singular value. The update process of divide and conquer failed. Further Details =============== Based on contributions by Ming Gu and Huan Ren, Computer Science Division, University of California at Berkeley, USA ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__9 = 9; static integer c__0 = 0; static doublereal c_b15 = 1.; static integer c__1 = 1; static doublereal c_b29 = 0.; /* System generated locals */ integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double d_sign(doublereal *, doublereal *), log(doublereal); /* Local variables */ static integer difl, difr, ierr, perm, mlvl, sqre, i__, j, k; static doublereal p, r__; static integer z__; extern logical lsame_(char *, char *); extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer * , doublereal *, integer *), dswap_(integer *, doublereal *, integer *, doublereal *, integer *); static integer poles, iuplo, nsize, start; extern /* Subroutine */ int dlasd0_(integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *); static integer ic, ii, kk; static doublereal cs; extern doublereal dlamch_(char *); extern /* Subroutine */ int dlasda_(integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *); static integer is, iu; static doublereal sn; extern /* Subroutine */ int dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), dlasdq_(char *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern /* Subroutine */ int xerbla_(char *, integer *); static integer givcol; extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *); static integer icompq; static doublereal orgnrm; static integer givnum, givptr, nm1, qstart, smlsiz, wstart, smlszp; static doublereal eps; static integer ivt; #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1] --d__; --e; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1 * 1; vt -= vt_offset; --q; --iq; --work; --iwork; /* Function Body */ *info = 0; iuplo = 0; if (lsame_(uplo, "U")) { iuplo = 1; } if (lsame_(uplo, "L")) { iuplo = 2; } if (lsame_(compq, "N")) { icompq = 0; } else if (lsame_(compq, "P")) { icompq = 1; } else if (lsame_(compq, "I")) { icompq = 2; } else { icompq = -1; } if (iuplo == 0) { *info = -1; } else if (icompq < 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ldu < 1 || icompq == 2 && *ldu < *n) { *info = -7; } else if (*ldvt < 1 || icompq == 2 && *ldvt < *n) { *info = -9; } if (*info != 0) { i__1 = -(*info); xerbla_("DBDSDC", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } smlsiz = ilaenv_(&c__9, "DBDSDC", " ", &c__0, &c__0, &c__0, &c__0, ( ftnlen)6, (ftnlen)1); if (*n == 1) { if (icompq == 1) { q[1] = d_sign(&c_b15, &d__[1]); q[smlsiz * *n + 1] = 1.; } else if (icompq == 2) { u_ref(1, 1) = d_sign(&c_b15, &d__[1]); vt_ref(1, 1) = 1.; } d__[1] = abs(d__[1]); return 0; } nm1 = *n - 1; /* If matrix lower bidiagonal, rotate to be upper bidiagonal by applying Givens rotations on the left */ wstart = 1; qstart = 3; if (icompq == 1) { dcopy_(n, &d__[1], &c__1, &q[1], &c__1); i__1 = *n - 1; dcopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1); } if (iuplo == 2) { qstart = 5; wstart = (*n << 1) - 1; i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__); d__[i__] = r__; e[i__] = sn * d__[i__ + 1]; d__[i__ + 1] = cs * d__[i__ + 1]; if (icompq == 1) { q[i__ + (*n << 1)] = cs; q[i__ + *n * 3] = sn; } else if (icompq == 2) { work[i__] = cs; work[nm1 + i__] = -sn; } /* L10: */ } } /* If ICOMPQ = 0, use DLASDQ to compute the singular values. */ if (icompq == 0) { dlasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[ vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ wstart], info); goto L40; } /* If N is smaller than the minimum divide size SMLSIZ, then solve the problem with another solver. */ if (*n <= smlsiz) { if (icompq == 2) { dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu); dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt); dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset] , ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[ wstart], info); } else if (icompq == 1) { iu = 1; ivt = iu + *n; dlaset_("A", n, n, &c_b29, &c_b15, &q[iu + (qstart - 1) * *n], n); dlaset_("A", n, n, &c_b29, &c_b15, &q[ivt + (qstart - 1) * *n], n); dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + ( qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[ iu + (qstart - 1) * *n], n, &work[wstart], info); } goto L40; } if (icompq == 2) { dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu); dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt); } /* Scale. */ orgnrm = dlanst_("M", n, &d__[1], &e[1]); if (orgnrm == 0.) { return 0; } dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, &ierr); dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1, & ierr); eps = dlamch_("Epsilon"); mlvl = (integer) (log((doublereal) (*n) / (doublereal) (smlsiz + 1)) / log(2.)) + 1; smlszp = smlsiz + 1; if (icompq == 1) { iu = 1; ivt = smlsiz + 1; difl = ivt + smlszp; difr = difl + mlvl; z__ = difr + (mlvl << 1); ic = z__ + mlvl; is = ic + 1; poles = is + 1; givnum = poles + (mlvl << 1); k = 1; givptr = 2; perm = 3; givcol = perm + mlvl; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if ((d__1 = d__[i__], abs(d__1)) < eps) { d__[i__] = d_sign(&eps, &d__[i__]); } /* L20: */ } start = 1; sqre = 0; i__1 = nm1; for (i__ = 1; i__ <= i__1; ++i__) { if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) { /* Subproblem found. First determine its size and then apply divide and conquer on it. */ if (i__ < nm1) { /* A subproblem with E(I) small for I < NM1. */ nsize = i__ - start + 1; } else if ((d__1 = e[i__], abs(d__1)) >= eps) { /* A subproblem with E(NM1) not too small but I = NM1. */ nsize = *n - start + 1; } else { /* A subproblem with E(NM1) small. This implies an 1-by-1 subproblem at D(N). Solve this 1-by-1 problem first. */ nsize = i__ - start + 1; if (icompq == 2) { u_ref(*n, *n) = d_sign(&c_b15, &d__[*n]); vt_ref(*n, *n) = 1.; } else if (icompq == 1) { q[*n + (qstart - 1) * *n] = d_sign(&c_b15, &d__[*n]); q[*n + (smlsiz + qstart - 1) * *n] = 1.; } d__[*n] = (d__1 = d__[*n], abs(d__1)); } if (icompq == 2) { dlasd0_(&nsize, &sqre, &d__[start], &e[start], &u_ref(start, start), ldu, &vt_ref(start, start), ldvt, &smlsiz, & iwork[1], &work[wstart], info); } else { dlasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[ start], &q[start + (iu + qstart - 2) * *n], n, &q[ start + (ivt + qstart - 2) * *n], &iq[start + k * *n], &q[start + (difl + qstart - 2) * *n], &q[start + ( difr + qstart - 2) * *n], &q[start + (z__ + qstart - 2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[ start + givptr * *n], &iq[start + givcol * *n], n, & iq[start + perm * *n], &q[start + (givnum + qstart - 2) * *n], &q[start + (ic + qstart - 2) * *n], &q[ start + (is + qstart - 2) * *n], &work[wstart], & iwork[1], info); if (*info != 0) { return 0; } } start = i__ + 1; } /* L30: */ } /* Unscale */ dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, &ierr); L40: /* Use Selection Sort to minimize swaps of singular vectors */ i__1 = *n; for (ii = 2; ii <= i__1; ++ii) { i__ = ii - 1; kk = i__; p = d__[i__]; i__2 = *n; for (j = ii; j <= i__2; ++j) { if (d__[j] > p) { kk = j; p = d__[j]; } /* L50: */ } if (kk != i__) { d__[kk] = d__[i__]; d__[i__] = p; if (icompq == 1) { iq[i__] = kk; } else if (icompq == 2) { dswap_(n, &u_ref(1, i__), &c__1, &u_ref(1, kk), &c__1); dswap_(n, &vt_ref(i__, 1), ldvt, &vt_ref(kk, 1), ldvt); } } else if (icompq == 1) { iq[i__] = i__; } /* L60: */ } /* If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */ if (icompq == 1) { if (iuplo == 1) { iq[*n] = 1; } else { iq[*n] = 0; } } /* If B is lower bidiagonal, update U by those Givens rotations which rotated B to be upper bidiagonal */ if (iuplo == 2 && icompq == 2) { dlasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu); } return 0; /* End of DBDSDC */ } /* dbdsdc_ */
/* Subroutine */ int dlalsa_(integer *icompq, integer *smlsiz, integer *n, integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer * ldbx, doublereal *u, integer *ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr, doublereal *z__, doublereal * poles, integer *givptr, integer *givcol, integer *ldgcol, integer * perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal * work, integer *iwork, integer *info) { /* System generated locals */ integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, b_dim1, b_offset, bx_dim1, bx_offset, difl_dim1, difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1, i__2; /* Builtin functions */ integer pow_ii(integer *, integer *); /* Local variables */ static integer nlvl, sqre, i__, j; extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); static integer inode, ndiml, ndimr; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); static integer i1; extern /* Subroutine */ int dlals0_(integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *); extern doublereal dopbl3_(char *, integer *, integer *, integer *) ; static integer ic, lf, nd, ll, nl, nr; extern /* Subroutine */ int dlasdt_(integer *, integer *, integer *, integer *, integer *, integer *, integer *), xerbla_(char *, integer *); static integer im1, nlf, nrf, lvl, ndb1, nlp1, lvl2, nrp1; #define difl_ref(a_1,a_2) difl[(a_2)*difl_dim1 + a_1] #define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1] #define perm_ref(a_1,a_2) perm[(a_2)*perm_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] #define poles_ref(a_1,a_2) poles[(a_2)*poles_dim1 + a_1] #define bx_ref(a_1,a_2) bx[(a_2)*bx_dim1 + a_1] #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1] #define givcol_ref(a_1,a_2) givcol[(a_2)*givcol_dim1 + a_1] #define givnum_ref(a_1,a_2) givnum[(a_2)*givnum_dim1 + a_1] /* -- LAPACK routine (instrumented to count ops, version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= DLALSA is an itermediate step in solving the least squares problem by computing the SVD of the coefficient matrix in compact form (The singular vectors are computed as products of simple orthorgonal matrices.). If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector matrix of an upper bidiagonal matrix to the right hand side; and if ICOMPQ = 1, DLALSA applies the right singular vector matrix to the right hand side. The singular vector matrices were generated in compact form by DLALSA. Arguments ========= ICOMPQ (input) INTEGER Specifies whether the left or the right singular vector matrix is involved. = 0: Left singular vector matrix = 1: Right singular vector matrix SMLSIZ (input) INTEGER The maximum size of the subproblems at the bottom of the computation tree. N (input) INTEGER The row and column dimensions of the upper bidiagonal matrix. NRHS (input) INTEGER The number of columns of B and BX. NRHS must be at least 1. B (input) DOUBLE PRECISION array, dimension ( LDB, NRHS ) On input, B contains the right hand sides of the least squares problem in rows 1 through M. On output, B contains the solution X in rows 1 through N. LDB (input) INTEGER The leading dimension of B in the calling subprogram. LDB must be at least max(1,MAX( M, N ) ). BX (output) DOUBLE PRECISION array, dimension ( LDBX, NRHS ) On exit, the result of applying the left or right singular vector matrix to B. LDBX (input) INTEGER The leading dimension of BX. U (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ). On entry, U contains the left singular vector matrices of all subproblems at the bottom level. LDU (input) INTEGER, LDU = > N. The leading dimension of arrays U, VT, DIFL, DIFR, POLES, GIVNUM, and Z. VT (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ). On entry, VT' contains the right singular vector matrices of all subproblems at the bottom level. K (input) INTEGER array, dimension ( N ). DIFL (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. DIFR (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record distances between singular values on the I-th level and singular values on the (I -1)-th level, and DIFR(*, 2 * I) record the normalizing factors of the right singular vectors matrices of subproblems on I-th level. Z (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ). On entry, Z(1, I) contains the components of the deflation- adjusted updating row vector for subproblems on the I-th level. POLES (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old singular values involved in the secular equations on the I-th level. GIVPTR (input) INTEGER array, dimension ( N ). On entry, GIVPTR( I ) records the number of Givens rotations performed on the I-th problem on the computation tree. GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the locations of Givens rotations performed on the I-th level on the computation tree. LDGCOL (input) INTEGER, LDGCOL = > N. The leading dimension of arrays GIVCOL and PERM. PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). On entry, PERM(*, I) records permutations done on the I-th level of the computation tree. GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- values of Givens rotations performed on the I-th level on the computation tree. C (input) DOUBLE PRECISION array, dimension ( N ). On entry, if the I-th subproblem is not square, C( I ) contains the C-value of a Givens rotation related to the right null space of the I-th subproblem. S (input) DOUBLE PRECISION array, dimension ( N ). On entry, if the I-th subproblem is not square, S( I ) contains the S-value of a Givens rotation related to the right null space of the I-th subproblem. WORK (workspace) DOUBLE PRECISION array. The dimension must be at least N. IWORK (workspace) INTEGER array. The dimension must be at least 3 * N INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. ===================================================================== Test the input parameters. Parameter adjustments */ b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; bx_dim1 = *ldbx; bx_offset = 1 + bx_dim1 * 1; bx -= bx_offset; givnum_dim1 = *ldu; givnum_offset = 1 + givnum_dim1 * 1; givnum -= givnum_offset; poles_dim1 = *ldu; poles_offset = 1 + poles_dim1 * 1; poles -= poles_offset; z_dim1 = *ldu; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; difr_dim1 = *ldu; difr_offset = 1 + difr_dim1 * 1; difr -= difr_offset; difl_dim1 = *ldu; difl_offset = 1 + difl_dim1 * 1; difl -= difl_offset; vt_dim1 = *ldu; vt_offset = 1 + vt_dim1 * 1; vt -= vt_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; --k; --givptr; perm_dim1 = *ldgcol; perm_offset = 1 + perm_dim1 * 1; perm -= perm_offset; givcol_dim1 = *ldgcol; givcol_offset = 1 + givcol_dim1 * 1; givcol -= givcol_offset; --c__; --s; --work; --iwork; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*smlsiz < 3) { *info = -2; } else if (*n < *smlsiz) { *info = -3; } else if (*nrhs < 1) { *info = -4; } else if (*ldb < *n) { *info = -6; } else if (*ldbx < *n) { *info = -8; } else if (*ldu < *n) { *info = -10; } else if (*ldgcol < *n) { *info = -19; } if (*info != 0) { i__1 = -(*info); xerbla_("DLALSA", &i__1); return 0; } /* Book-keeping and setting up the computation tree. */ inode = 1; ndiml = inode + *n; ndimr = ndiml + *n; dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], smlsiz); /* The following code applies back the left singular vector factors. For applying back the right singular vector factors, go to 50. */ if (*icompq == 1) { goto L50; } /* The nodes on the bottom level of the tree were solved by DLASDQ. The corresponding left and right singular vector matrices are in explicit form. First apply back the left singular vector matrices. */ ndb1 = (nd + 1) / 2; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { /* IC : center row of each node NL : number of rows of left subproblem NR : number of rows of right subproblem NLF: starting row of the left subproblem NRF: starting row of the right subproblem */ i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nr = iwork[ndimr + i1]; nlf = ic - nl; nrf = ic + 1; latime_1.ops += dopbl3_("DGEMM ", &nl, nrhs, &nl); latime_1.ops += dopbl3_("DGEMM ", &nr, nrhs, &nr); dgemm_("T", "N", &nl, nrhs, &nl, &c_b9, &u_ref(nlf, 1), ldu, &b_ref( nlf, 1), ldb, &c_b10, &bx_ref(nlf, 1), ldbx); dgemm_("T", "N", &nr, nrhs, &nr, &c_b9, &u_ref(nrf, 1), ldu, &b_ref( nrf, 1), ldb, &c_b10, &bx_ref(nrf, 1), ldbx); /* L10: */ } /* Next copy the rows of B that correspond to unchanged rows in the bidiagonal matrix to BX. */ i__1 = nd; for (i__ = 1; i__ <= i__1; ++i__) { ic = iwork[inode + i__ - 1]; dcopy_(nrhs, &b_ref(ic, 1), ldb, &bx_ref(ic, 1), ldbx); /* L20: */ } /* Finally go through the left singular vector matrices of all the other subproblems bottom-up on the tree. */ j = pow_ii(&c__2, &nlvl); sqre = 0; for (lvl = nlvl; lvl >= 1; --lvl) { lvl2 = (lvl << 1) - 1; /* find the first node LF and last node LL on the current level LVL */ if (lvl == 1) { lf = 1; ll = 1; } else { i__1 = lvl - 1; lf = pow_ii(&c__2, &i__1); ll = (lf << 1) - 1; } i__1 = ll; for (i__ = lf; i__ <= i__1; ++i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; nrf = ic + 1; --j; dlals0_(icompq, &nl, &nr, &sqre, nrhs, &bx_ref(nlf, 1), ldbx, & b_ref(nlf, 1), ldb, &perm_ref(nlf, lvl), &givptr[j], & givcol_ref(nlf, lvl2), ldgcol, &givnum_ref(nlf, lvl2), ldu, &poles_ref(nlf, lvl2), &difl_ref(nlf, lvl), & difr_ref(nlf, lvl2), &z___ref(nlf, lvl), &k[j], &c__[j], & s[j], &work[1], info); /* L30: */ } /* L40: */ } goto L90; /* ICOMPQ = 1: applying back the right singular vector factors. */ L50: /* First now go through the right singular vector matrices of all the tree nodes top-down. */ j = 0; i__1 = nlvl; for (lvl = 1; lvl <= i__1; ++lvl) { lvl2 = (lvl << 1) - 1; /* Find the first node LF and last node LL on the current level LVL. */ if (lvl == 1) { lf = 1; ll = 1; } else { i__2 = lvl - 1; lf = pow_ii(&c__2, &i__2); ll = (lf << 1) - 1; } i__2 = lf; for (i__ = ll; i__ >= i__2; --i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; nrf = ic + 1; if (i__ == ll) { sqre = 0; } else { sqre = 1; } ++j; dlals0_(icompq, &nl, &nr, &sqre, nrhs, &b_ref(nlf, 1), ldb, & bx_ref(nlf, 1), ldbx, &perm_ref(nlf, lvl), &givptr[j], & givcol_ref(nlf, lvl2), ldgcol, &givnum_ref(nlf, lvl2), ldu, &poles_ref(nlf, lvl2), &difl_ref(nlf, lvl), & difr_ref(nlf, lvl2), &z___ref(nlf, lvl), &k[j], &c__[j], & s[j], &work[1], info); /* L60: */ } /* L70: */ } /* The nodes on the bottom level of the tree were solved by DLASDQ. The corresponding right singular vector matrices are in explicit form. Apply them back. */ ndb1 = (nd + 1) / 2; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nr = iwork[ndimr + i1]; nlp1 = nl + 1; if (i__ == nd) { nrp1 = nr; } else { nrp1 = nr + 1; } nlf = ic - nl; nrf = ic + 1; latime_1.ops += dopbl3_("DGEMM ", &nlp1, nrhs, &nlp1); latime_1.ops += dopbl3_("DGEMM ", &nrp1, nrhs, &nrp1); dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b9, &vt_ref(nlf, 1), ldu, & b_ref(nlf, 1), ldb, &c_b10, &bx_ref(nlf, 1), ldbx); dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b9, &vt_ref(nrf, 1), ldu, & b_ref(nrf, 1), ldb, &c_b10, &bx_ref(nrf, 1), ldbx); /* L80: */ } L90: return 0; /* End of DLALSA */ } /* dlalsa_ */