/* Subroutine */ int clalsa_(integer *icompq, integer *smlsiz, integer *n, integer *nrhs, complex *b, integer *ldb, complex *bx, integer *ldbx, real *u, integer *ldu, real *vt, integer *k, real *difl, real *difr, real *z__, real *poles, integer *givptr, integer *givcol, integer * ldgcol, integer *perm, real *givnum, real *c__, real *s, real *rwork, integer *iwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= CLALSA is an itermediate step in solving the least squares problem by computing the SVD of the coefficient matrix in compact form (The singular vectors are computed as products of simple orthorgonal matrices.). If ICOMPQ = 0, CLALSA applies the inverse of the left singular vector matrix of an upper bidiagonal matrix to the right hand side; and if ICOMPQ = 1, CLALSA applies the right singular vector matrix to the right hand side. The singular vector matrices were generated in compact form by CLALSA. Arguments ========= ICOMPQ (input) INTEGER Specifies whether the left or the right singular vector matrix is involved. = 0: Left singular vector matrix = 1: Right singular vector matrix SMLSIZ (input) INTEGER The maximum size of the subproblems at the bottom of the computation tree. N (input) INTEGER The row and column dimensions of the upper bidiagonal matrix. NRHS (input) INTEGER The number of columns of B and BX. NRHS must be at least 1. B (input) COMPLEX array, dimension ( LDB, NRHS ) On input, B contains the right hand sides of the least squares problem in rows 1 through M. On output, B contains the solution X in rows 1 through N. LDB (input) INTEGER The leading dimension of B in the calling subprogram. LDB must be at least max(1,MAX( M, N ) ). BX (output) COMPLEX array, dimension ( LDBX, NRHS ) On exit, the result of applying the left or right singular vector matrix to B. LDBX (input) INTEGER The leading dimension of BX. U (input) REAL array, dimension ( LDU, SMLSIZ ). On entry, U contains the left singular vector matrices of all subproblems at the bottom level. LDU (input) INTEGER, LDU = > N. The leading dimension of arrays U, VT, DIFL, DIFR, POLES, GIVNUM, and Z. VT (input) REAL array, dimension ( LDU, SMLSIZ+1 ). On entry, VT' contains the right singular vector matrices of all subproblems at the bottom level. K (input) INTEGER array, dimension ( N ). DIFL (input) REAL array, dimension ( LDU, NLVL ). where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. DIFR (input) REAL array, dimension ( LDU, 2 * NLVL ). On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record distances between singular values on the I-th level and singular values on the (I -1)-th level, and DIFR(*, 2 * I) record the normalizing factors of the right singular vectors matrices of subproblems on I-th level. Z (input) REAL array, dimension ( LDU, NLVL ). On entry, Z(1, I) contains the components of the deflation- adjusted updating row vector for subproblems on the I-th level. POLES (input) REAL array, dimension ( LDU, 2 * NLVL ). On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old singular values involved in the secular equations on the I-th level. GIVPTR (input) INTEGER array, dimension ( N ). On entry, GIVPTR( I ) records the number of Givens rotations performed on the I-th problem on the computation tree. GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ). On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the locations of Givens rotations performed on the I-th level on the computation tree. LDGCOL (input) INTEGER, LDGCOL = > N. The leading dimension of arrays GIVCOL and PERM. PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ). On entry, PERM(*, I) records permutations done on the I-th level of the computation tree. GIVNUM (input) REAL array, dimension ( LDU, 2 * NLVL ). On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- values of Givens rotations performed on the I-th level on the computation tree. C (input) REAL array, dimension ( N ). On entry, if the I-th subproblem is not square, C( I ) contains the C-value of a Givens rotation related to the right null space of the I-th subproblem. S (input) REAL array, dimension ( N ). On entry, if the I-th subproblem is not square, S( I ) contains the S-value of a Givens rotation related to the right null space of the I-th subproblem. RWORK (workspace) REAL array, dimension at least max ( N, (SMLSZ+1)*NRHS*3 ). IWORK (workspace) INTEGER array. The dimension must be at least 3 * N INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. Further Details =============== Based on contributions by Ming Gu and Ren-Cang Li, Computer Science Division, University of California at Berkeley, USA Osni Marques, LBNL/NERSC, USA ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static real c_b9 = 1.f; static real c_b10 = 0.f; static integer c__2 = 2; /* System generated locals */ integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, b_dim1, b_offset, bx_dim1, bx_offset, i__1, i__2, i__3, i__4, i__5, i__6; complex q__1; /* Builtin functions */ double r_imag(complex *); integer pow_ii(integer *, integer *); /* Local variables */ static integer jcol, nlvl, sqre, jrow, i__, j, jimag, jreal, inode, ndiml; extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static integer ndimr; extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, complex *, integer *); static integer i1; extern /* Subroutine */ int clals0_(integer *, integer *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, integer *, integer *, real *, integer *, real *, real *, real *, real *, integer *, real *, real *, real *, integer *); static integer ic, lf, nd, ll, nl, nr; extern /* Subroutine */ int xerbla_(char *, integer *), slasdt_( integer *, integer *, integer *, integer *, integer *, integer *, integer *); static integer im1, nlf, nrf, lvl, ndb1, nlp1, lvl2, nrp1; #define difl_ref(a_1,a_2) difl[(a_2)*difl_dim1 + a_1] #define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1] #define perm_ref(a_1,a_2) perm[(a_2)*perm_dim1 + a_1] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] #define poles_ref(a_1,a_2) poles[(a_2)*poles_dim1 + a_1] #define bx_subscr(a_1,a_2) (a_2)*bx_dim1 + a_1 #define bx_ref(a_1,a_2) bx[bx_subscr(a_1,a_2)] #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1] #define givcol_ref(a_1,a_2) givcol[(a_2)*givcol_dim1 + a_1] #define givnum_ref(a_1,a_2) givnum[(a_2)*givnum_dim1 + a_1] b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; bx_dim1 = *ldbx; bx_offset = 1 + bx_dim1 * 1; bx -= bx_offset; givnum_dim1 = *ldu; givnum_offset = 1 + givnum_dim1 * 1; givnum -= givnum_offset; poles_dim1 = *ldu; poles_offset = 1 + poles_dim1 * 1; poles -= poles_offset; z_dim1 = *ldu; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; difr_dim1 = *ldu; difr_offset = 1 + difr_dim1 * 1; difr -= difr_offset; difl_dim1 = *ldu; difl_offset = 1 + difl_dim1 * 1; difl -= difl_offset; vt_dim1 = *ldu; vt_offset = 1 + vt_dim1 * 1; vt -= vt_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; --k; --givptr; perm_dim1 = *ldgcol; perm_offset = 1 + perm_dim1 * 1; perm -= perm_offset; givcol_dim1 = *ldgcol; givcol_offset = 1 + givcol_dim1 * 1; givcol -= givcol_offset; --c__; --s; --rwork; --iwork; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*smlsiz < 3) { *info = -2; } else if (*n < *smlsiz) { *info = -3; } else if (*nrhs < 1) { *info = -4; } else if (*ldb < *n) { *info = -6; } else if (*ldbx < *n) { *info = -8; } else if (*ldu < *n) { *info = -10; } else if (*ldgcol < *n) { *info = -19; } if (*info != 0) { i__1 = -(*info); xerbla_("CLALSA", &i__1); return 0; } /* Book-keeping and setting up the computation tree. */ inode = 1; ndiml = inode + *n; ndimr = ndiml + *n; slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], smlsiz); /* The following code applies back the left singular vector factors. For applying back the right singular vector factors, go to 170. */ if (*icompq == 1) { goto L170; } /* The nodes on the bottom level of the tree were solved by SLASDQ. The corresponding left and right singular vector matrices are in explicit form. First apply back the left singular vector matrices. */ ndb1 = (nd + 1) / 2; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { /* IC : center row of each node NL : number of rows of left subproblem NR : number of rows of right subproblem NLF: starting row of the left subproblem NRF: starting row of the right subproblem */ i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nr = iwork[ndimr + i1]; nlf = ic - nl; nrf = ic + 1; /* Since B and BX are complex, the following call to SGEMM is performed in two steps (real and imaginary parts). CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) */ j = nl * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nl - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++j; i__4 = b_subscr(jrow, jcol); rwork[j] = b[i__4].r; /* L10: */ } /* L20: */ } sgemm_("T", "N", &nl, nrhs, &nl, &c_b9, &u_ref(nlf, 1), ldu, &rwork[( nl * *nrhs << 1) + 1], &nl, &c_b10, &rwork[1], &nl); j = nl * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nl - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++j; rwork[j] = r_imag(&b_ref(jrow, jcol)); /* L30: */ } /* L40: */ } sgemm_("T", "N", &nl, nrhs, &nl, &c_b9, &u_ref(nlf, 1), ldu, &rwork[( nl * *nrhs << 1) + 1], &nl, &c_b10, &rwork[nl * *nrhs + 1], & nl); jreal = 0; jimag = nl * *nrhs; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nl - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++jreal; ++jimag; i__4 = bx_subscr(jrow, jcol); i__5 = jreal; i__6 = jimag; q__1.r = rwork[i__5], q__1.i = rwork[i__6]; bx[i__4].r = q__1.r, bx[i__4].i = q__1.i; /* L50: */ } /* L60: */ } /* Since B and BX are complex, the following call to SGEMM is performed in two steps (real and imaginary parts). CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) */ j = nr * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nr - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++j; i__4 = b_subscr(jrow, jcol); rwork[j] = b[i__4].r; /* L70: */ } /* L80: */ } sgemm_("T", "N", &nr, nrhs, &nr, &c_b9, &u_ref(nrf, 1), ldu, &rwork[( nr * *nrhs << 1) + 1], &nr, &c_b10, &rwork[1], &nr); j = nr * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nr - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++j; rwork[j] = r_imag(&b_ref(jrow, jcol)); /* L90: */ } /* L100: */ } sgemm_("T", "N", &nr, nrhs, &nr, &c_b9, &u_ref(nrf, 1), ldu, &rwork[( nr * *nrhs << 1) + 1], &nr, &c_b10, &rwork[nr * *nrhs + 1], & nr); jreal = 0; jimag = nr * *nrhs; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nr - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++jreal; ++jimag; i__4 = bx_subscr(jrow, jcol); i__5 = jreal; i__6 = jimag; q__1.r = rwork[i__5], q__1.i = rwork[i__6]; bx[i__4].r = q__1.r, bx[i__4].i = q__1.i; /* L110: */ } /* L120: */ } /* L130: */ } /* Next copy the rows of B that correspond to unchanged rows in the bidiagonal matrix to BX. */ i__1 = nd; for (i__ = 1; i__ <= i__1; ++i__) { ic = iwork[inode + i__ - 1]; ccopy_(nrhs, &b_ref(ic, 1), ldb, &bx_ref(ic, 1), ldbx); /* L140: */ } /* Finally go through the left singular vector matrices of all the other subproblems bottom-up on the tree. */ j = pow_ii(&c__2, &nlvl); sqre = 0; for (lvl = nlvl; lvl >= 1; --lvl) { lvl2 = (lvl << 1) - 1; /* find the first node LF and last node LL on the current level LVL */ if (lvl == 1) { lf = 1; ll = 1; } else { i__1 = lvl - 1; lf = pow_ii(&c__2, &i__1); ll = (lf << 1) - 1; } i__1 = ll; for (i__ = lf; i__ <= i__1; ++i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; nrf = ic + 1; --j; clals0_(icompq, &nl, &nr, &sqre, nrhs, &bx_ref(nlf, 1), ldbx, & b_ref(nlf, 1), ldb, &perm_ref(nlf, lvl), &givptr[j], & givcol_ref(nlf, lvl2), ldgcol, &givnum_ref(nlf, lvl2), ldu, &poles_ref(nlf, lvl2), &difl_ref(nlf, lvl), & difr_ref(nlf, lvl2), &z___ref(nlf, lvl), &k[j], &c__[j], & s[j], &rwork[1], info); /* L150: */ } /* L160: */ } goto L330; /* ICOMPQ = 1: applying back the right singular vector factors. */ L170: /* First now go through the right singular vector matrices of all the tree nodes top-down. */ j = 0; i__1 = nlvl; for (lvl = 1; lvl <= i__1; ++lvl) { lvl2 = (lvl << 1) - 1; /* Find the first node LF and last node LL on the current level LVL. */ if (lvl == 1) { lf = 1; ll = 1; } else { i__2 = lvl - 1; lf = pow_ii(&c__2, &i__2); ll = (lf << 1) - 1; } i__2 = lf; for (i__ = ll; i__ >= i__2; --i__) { im1 = i__ - 1; ic = iwork[inode + im1]; nl = iwork[ndiml + im1]; nr = iwork[ndimr + im1]; nlf = ic - nl; nrf = ic + 1; if (i__ == ll) { sqre = 0; } else { sqre = 1; } ++j; clals0_(icompq, &nl, &nr, &sqre, nrhs, &b_ref(nlf, 1), ldb, & bx_ref(nlf, 1), ldbx, &perm_ref(nlf, lvl), &givptr[j], & givcol_ref(nlf, lvl2), ldgcol, &givnum_ref(nlf, lvl2), ldu, &poles_ref(nlf, lvl2), &difl_ref(nlf, lvl), & difr_ref(nlf, lvl2), &z___ref(nlf, lvl), &k[j], &c__[j], & s[j], &rwork[1], info); /* L180: */ } /* L190: */ } /* The nodes on the bottom level of the tree were solved by SLASDQ. The corresponding right singular vector matrices are in explicit form. Apply them back. */ ndb1 = (nd + 1) / 2; i__1 = nd; for (i__ = ndb1; i__ <= i__1; ++i__) { i1 = i__ - 1; ic = iwork[inode + i1]; nl = iwork[ndiml + i1]; nr = iwork[ndimr + i1]; nlp1 = nl + 1; if (i__ == nd) { nrp1 = nr; } else { nrp1 = nr + 1; } nlf = ic - nl; nrf = ic + 1; /* Since B and BX are complex, the following call to SGEMM is performed in two steps (real and imaginary parts). CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) */ j = nlp1 * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nlp1 - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++j; i__4 = b_subscr(jrow, jcol); rwork[j] = b[i__4].r; /* L200: */ } /* L210: */ } sgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b9, &vt_ref(nlf, 1), ldu, & rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b10, &rwork[1], & nlp1); j = nlp1 * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nlp1 - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++j; rwork[j] = r_imag(&b_ref(jrow, jcol)); /* L220: */ } /* L230: */ } sgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b9, &vt_ref(nlf, 1), ldu, & rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b10, &rwork[nlp1 * * nrhs + 1], &nlp1); jreal = 0; jimag = nlp1 * *nrhs; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nlf + nlp1 - 1; for (jrow = nlf; jrow <= i__3; ++jrow) { ++jreal; ++jimag; i__4 = bx_subscr(jrow, jcol); i__5 = jreal; i__6 = jimag; q__1.r = rwork[i__5], q__1.i = rwork[i__6]; bx[i__4].r = q__1.r, bx[i__4].i = q__1.i; /* L240: */ } /* L250: */ } /* Since B and BX are complex, the following call to SGEMM is performed in two steps (real and imaginary parts). CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) */ j = nrp1 * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nrp1 - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++j; i__4 = b_subscr(jrow, jcol); rwork[j] = b[i__4].r; /* L260: */ } /* L270: */ } sgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b9, &vt_ref(nrf, 1), ldu, & rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b10, &rwork[1], & nrp1); j = nrp1 * *nrhs << 1; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nrp1 - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++j; rwork[j] = r_imag(&b_ref(jrow, jcol)); /* L280: */ } /* L290: */ } sgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b9, &vt_ref(nrf, 1), ldu, & rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b10, &rwork[nrp1 * * nrhs + 1], &nrp1); jreal = 0; jimag = nrp1 * *nrhs; i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = nrf + nrp1 - 1; for (jrow = nrf; jrow <= i__3; ++jrow) { ++jreal; ++jimag; i__4 = bx_subscr(jrow, jcol); i__5 = jreal; i__6 = jimag; q__1.r = rwork[i__5], q__1.i = rwork[i__6]; bx[i__4].r = q__1.r, bx[i__4].i = q__1.i; /* L300: */ } /* L310: */ } /* L320: */ } L330: return 0; /* End of CLALSA */ } /* clalsa_ */
/* Subroutine */ int clals0_(integer *icompq, integer *nl, integer *nr, integer *sqre, integer *nrhs, complex *b, integer *ldb, complex *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * difl, real *difr, real *z__, integer *k, real *c__, real *s, real * rwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University December 1, 1999 Purpose ======= CLALS0 applies back the multiplying factors of either the left or the right singular vector matrix of a diagonal matrix appended by a row to the right hand side matrix B in solving the least squares problem using the divide-and-conquer SVD approach. For the left singular vector matrix, three types of orthogonal matrices are involved: (1L) Givens rotations: the number of such rotations is GIVPTR; the pairs of columns/rows they were applied to are stored in GIVCOL; and the C- and S-values of these rotations are stored in GIVNUM. (2L) Permutation. The (NL+1)-st row of B is to be moved to the first row, and for J=2:N, PERM(J)-th row of B is to be moved to the J-th row. (3L) The left singular vector matrix of the remaining matrix. For the right singular vector matrix, four types of orthogonal matrices are involved: (1R) The right singular vector matrix of the remaining matrix. (2R) If SQRE = 1, one extra Givens rotation to generate the right null space. (3R) The inverse transformation of (2L). (4R) The inverse transformation of (1L). Arguments ========= ICOMPQ (input) INTEGER Specifies whether singular vectors are to be computed in factored form: = 0: Left singular vector matrix. = 1: Right singular vector matrix. NL (input) INTEGER The row dimension of the upper block. NL >= 1. NR (input) INTEGER The row dimension of the lower block. NR >= 1. SQRE (input) INTEGER = 0: the lower block is an NR-by-NR square matrix. = 1: the lower block is an NR-by-(NR+1) rectangular matrix. The bidiagonal matrix has row dimension N = NL + NR + 1, and column dimension M = N + SQRE. NRHS (input) INTEGER The number of columns of B and BX. NRHS must be at least 1. B (input/output) COMPLEX array, dimension ( LDB, NRHS ) On input, B contains the right hand sides of the least squares problem in rows 1 through M. On output, B contains the solution X in rows 1 through N. LDB (input) INTEGER The leading dimension of B. LDB must be at least max(1,MAX( M, N ) ). BX (workspace) COMPLEX array, dimension ( LDBX, NRHS ) LDBX (input) INTEGER The leading dimension of BX. PERM (input) INTEGER array, dimension ( N ) The permutations (from deflation and sorting) applied to the two blocks. GIVPTR (input) INTEGER The number of Givens rotations which took place in this subproblem. GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) Each pair of numbers indicates a pair of rows/columns involved in a Givens rotation. LDGCOL (input) INTEGER The leading dimension of GIVCOL, must be at least N. GIVNUM (input) REAL array, dimension ( LDGNUM, 2 ) Each number indicates the C or S value used in the corresponding Givens rotation. LDGNUM (input) INTEGER The leading dimension of arrays DIFR, POLES and GIVNUM, must be at least K. POLES (input) REAL array, dimension ( LDGNUM, 2 ) On entry, POLES(1:K, 1) contains the new singular values obtained from solving the secular equation, and POLES(1:K, 2) is an array containing the poles in the secular equation. DIFL (input) REAL array, dimension ( K ). On entry, DIFL(I) is the distance between I-th updated (undeflated) singular value and the I-th (undeflated) old singular value. DIFR (input) REAL array, dimension ( LDGNUM, 2 ). On entry, DIFR(I, 1) contains the distances between I-th updated (undeflated) singular value and the I+1-th (undeflated) old singular value. And DIFR(I, 2) is the normalizing factor for the I-th right singular vector. Z (input) REAL array, dimension ( K ) Contain the components of the deflation-adjusted updating row vector. K (input) INTEGER Contains the dimension of the non-deflated matrix, This is the order of the related secular equation. 1 <= K <=N. C (input) REAL C contains garbage if SQRE =0 and the C-value of a Givens rotation related to the right null space if SQRE = 1. S (input) REAL S contains garbage if SQRE =0 and the S-value of a Givens rotation related to the right null space if SQRE = 1. RWORK (workspace) REAL array, dimension ( K*(1+NRHS) + 2*NRHS ) INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. Further Details =============== Based on contributions by Ming Gu and Ren-Cang Li, Computer Science Division, University of California at Berkeley, USA Osni Marques, LBNL/NERSC, USA ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static real c_b5 = -1.f; static integer c__1 = 1; static real c_b13 = 1.f; static real c_b15 = 0.f; static integer c__0 = 0; /* System generated locals */ integer givcol_dim1, givcol_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, b_dim1, b_offset, bx_dim1, bx_offset, i__1, i__2, i__3, i__4, i__5; real r__1; complex q__1; /* Builtin functions */ double r_imag(complex *); /* Local variables */ static integer jcol; static real temp; static integer jrow; extern doublereal snrm2_(integer *, real *, integer *); static integer i__, j, m, n; static real diflj, difrj, dsigj; extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, complex *, integer *), sgemv_(char *, integer *, integer *, real * , real *, integer *, real *, integer *, real *, real *, integer *), csrot_(integer *, complex *, integer *, complex *, integer *, real *, real *); extern doublereal slamc3_(real *, real *); static real dj; extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, real *, integer *, integer *, complex *, integer *, integer *), csscal_(integer *, real *, complex *, integer *), clacpy_(char *, integer *, integer *, complex *, integer *, complex *, integer *), xerbla_(char *, integer *); static real dsigjp; static integer nlp1; #define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define poles_ref(a_1,a_2) poles[(a_2)*poles_dim1 + a_1] #define bx_subscr(a_1,a_2) (a_2)*bx_dim1 + a_1 #define bx_ref(a_1,a_2) bx[bx_subscr(a_1,a_2)] #define givcol_ref(a_1,a_2) givcol[(a_2)*givcol_dim1 + a_1] #define givnum_ref(a_1,a_2) givnum[(a_2)*givnum_dim1 + a_1] b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; bx_dim1 = *ldbx; bx_offset = 1 + bx_dim1 * 1; bx -= bx_offset; --perm; givcol_dim1 = *ldgcol; givcol_offset = 1 + givcol_dim1 * 1; givcol -= givcol_offset; difr_dim1 = *ldgnum; difr_offset = 1 + difr_dim1 * 1; difr -= difr_offset; poles_dim1 = *ldgnum; poles_offset = 1 + poles_dim1 * 1; poles -= poles_offset; givnum_dim1 = *ldgnum; givnum_offset = 1 + givnum_dim1 * 1; givnum -= givnum_offset; --difl; --z__; --rwork; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*nl < 1) { *info = -2; } else if (*nr < 1) { *info = -3; } else if (*sqre < 0 || *sqre > 1) { *info = -4; } n = *nl + *nr + 1; if (*nrhs < 1) { *info = -5; } else if (*ldb < n) { *info = -7; } else if (*ldbx < n) { *info = -9; } else if (*givptr < 0) { *info = -11; } else if (*ldgcol < n) { *info = -13; } else if (*ldgnum < n) { *info = -15; } else if (*k < 1) { *info = -20; } if (*info != 0) { i__1 = -(*info); xerbla_("CLALS0", &i__1); return 0; } m = n + *sqre; nlp1 = *nl + 1; if (*icompq == 0) { /* Apply back orthogonal transformations from the left. Step (1L): apply back the Givens rotations performed. */ i__1 = *givptr; for (i__ = 1; i__ <= i__1; ++i__) { csrot_(nrhs, &b_ref(givcol_ref(i__, 2), 1), ldb, &b_ref( givcol_ref(i__, 1), 1), ldb, &givnum_ref(i__, 2), & givnum_ref(i__, 1)); /* L10: */ } /* Step (2L): permute rows of B. */ ccopy_(nrhs, &b_ref(nlp1, 1), ldb, &bx_ref(1, 1), ldbx); i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { ccopy_(nrhs, &b_ref(perm[i__], 1), ldb, &bx_ref(i__, 1), ldbx); /* L20: */ } /* Step (3L): apply the inverse of the left singular vector matrix to BX. */ if (*k == 1) { ccopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb); if (z__[1] < 0.f) { csscal_(nrhs, &c_b5, &b[b_offset], ldb); } } else { i__1 = *k; for (j = 1; j <= i__1; ++j) { diflj = difl[j]; dj = poles_ref(j, 1); dsigj = -poles_ref(j, 2); if (j < *k) { difrj = -difr_ref(j, 1); dsigjp = -poles_ref(j + 1, 2); } if (z__[j] == 0.f || poles_ref(j, 2) == 0.f) { rwork[j] = 0.f; } else { rwork[j] = -poles_ref(j, 2) * z__[j] / diflj / (poles_ref( j, 2) + dj); } i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (z__[i__] == 0.f || poles_ref(i__, 2) == 0.f) { rwork[i__] = 0.f; } else { rwork[i__] = poles_ref(i__, 2) * z__[i__] / (slamc3_(& poles_ref(i__, 2), &dsigj) - diflj) / ( poles_ref(i__, 2) + dj); } /* L30: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { if (z__[i__] == 0.f || poles_ref(i__, 2) == 0.f) { rwork[i__] = 0.f; } else { rwork[i__] = poles_ref(i__, 2) * z__[i__] / (slamc3_(& poles_ref(i__, 2), &dsigjp) + difrj) / ( poles_ref(i__, 2) + dj); } /* L40: */ } rwork[1] = -1.f; temp = snrm2_(k, &rwork[1], &c__1); /* Since B and BX are complex, the following call to SGEMV is performed in two steps (real and imaginary parts). CALL SGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO, $ B( J, 1 ), LDB ) */ i__ = *k + (*nrhs << 1); i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = *k; for (jrow = 1; jrow <= i__3; ++jrow) { ++i__; i__4 = bx_subscr(jrow, jcol); rwork[i__] = bx[i__4].r; /* L50: */ } /* L60: */ } sgemv_("T", k, nrhs, &c_b13, &rwork[*k + 1 + (*nrhs << 1)], k, &rwork[1], &c__1, &c_b15, &rwork[*k + 1], &c__1); i__ = *k + (*nrhs << 1); i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = *k; for (jrow = 1; jrow <= i__3; ++jrow) { ++i__; rwork[i__] = r_imag(&bx_ref(jrow, jcol)); /* L70: */ } /* L80: */ } sgemv_("T", k, nrhs, &c_b13, &rwork[*k + 1 + (*nrhs << 1)], k, &rwork[1], &c__1, &c_b15, &rwork[*k + 1 + *nrhs], & c__1); i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = b_subscr(j, jcol); i__4 = jcol + *k; i__5 = jcol + *k + *nrhs; q__1.r = rwork[i__4], q__1.i = rwork[i__5]; b[i__3].r = q__1.r, b[i__3].i = q__1.i; /* L90: */ } clascl_("G", &c__0, &c__0, &temp, &c_b13, &c__1, nrhs, &b_ref( j, 1), ldb, info); /* L100: */ } } /* Move the deflated rows of BX to B also. */ if (*k < max(m,n)) { i__1 = n - *k; clacpy_("A", &i__1, nrhs, &bx_ref(*k + 1, 1), ldbx, &b_ref(*k + 1, 1), ldb); } } else { /* Apply back the right orthogonal transformations. Step (1R): apply back the new right singular vector matrix to B. */ if (*k == 1) { ccopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx); } else { i__1 = *k; for (j = 1; j <= i__1; ++j) { dsigj = poles_ref(j, 2); if (z__[j] == 0.f) { rwork[j] = 0.f; } else { rwork[j] = -z__[j] / difl[j] / (dsigj + poles_ref(j, 1)) / difr_ref(j, 2); } i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (z__[j] == 0.f) { rwork[i__] = 0.f; } else { r__1 = -poles_ref(i__ + 1, 2); rwork[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difr_ref(i__, 1)) / (dsigj + poles_ref(i__, 1) ) / difr_ref(i__, 2); } /* L110: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { if (z__[j] == 0.f) { rwork[i__] = 0.f; } else { r__1 = -poles_ref(i__, 2); rwork[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difl[ i__]) / (dsigj + poles_ref(i__, 1)) / difr_ref(i__, 2); } /* L120: */ } /* Since B and BX are complex, the following call to SGEMV is performed in two steps (real and imaginary parts). CALL SGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO, $ BX( J, 1 ), LDBX ) */ i__ = *k + (*nrhs << 1); i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = *k; for (jrow = 1; jrow <= i__3; ++jrow) { ++i__; i__4 = b_subscr(jrow, jcol); rwork[i__] = b[i__4].r; /* L130: */ } /* L140: */ } sgemv_("T", k, nrhs, &c_b13, &rwork[*k + 1 + (*nrhs << 1)], k, &rwork[1], &c__1, &c_b15, &rwork[*k + 1], &c__1); i__ = *k + (*nrhs << 1); i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = *k; for (jrow = 1; jrow <= i__3; ++jrow) { ++i__; rwork[i__] = r_imag(&b_ref(jrow, jcol)); /* L150: */ } /* L160: */ } sgemv_("T", k, nrhs, &c_b13, &rwork[*k + 1 + (*nrhs << 1)], k, &rwork[1], &c__1, &c_b15, &rwork[*k + 1 + *nrhs], & c__1); i__2 = *nrhs; for (jcol = 1; jcol <= i__2; ++jcol) { i__3 = bx_subscr(j, jcol); i__4 = jcol + *k; i__5 = jcol + *k + *nrhs; q__1.r = rwork[i__4], q__1.i = rwork[i__5]; bx[i__3].r = q__1.r, bx[i__3].i = q__1.i; /* L170: */ } /* L180: */ } } /* Step (2R): if SQRE = 1, apply back the rotation that is related to the right null space of the subproblem. */ if (*sqre == 1) { ccopy_(nrhs, &b_ref(m, 1), ldb, &bx_ref(m, 1), ldbx); csrot_(nrhs, &bx_ref(1, 1), ldbx, &bx_ref(m, 1), ldbx, c__, s); } if (*k < max(m,n)) { i__1 = n - *k; clacpy_("A", &i__1, nrhs, &b_ref(*k + 1, 1), ldb, &bx_ref(*k + 1, 1), ldbx); } /* Step (3R): permute rows of B. */ ccopy_(nrhs, &bx_ref(1, 1), ldbx, &b_ref(nlp1, 1), ldb); if (*sqre == 1) { ccopy_(nrhs, &bx_ref(m, 1), ldbx, &b_ref(m, 1), ldb); } i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { ccopy_(nrhs, &bx_ref(i__, 1), ldbx, &b_ref(perm[i__], 1), ldb); /* L190: */ } /* Step (4R): apply back the Givens rotations performed. */ for (i__ = *givptr; i__ >= 1; --i__) { r__1 = -givnum_ref(i__, 1); csrot_(nrhs, &b_ref(givcol_ref(i__, 2), 1), ldb, &b_ref( givcol_ref(i__, 1), 1), ldb, &givnum_ref(i__, 2), &r__1); /* L200: */ } } return 0; /* End of CLALS0 */ } /* clals0_ */