void cblas_srot( const integer N, float *X, const integer incX, float *Y, const integer incY, const float c, const float s) { #define F77_N N #define F77_incX incX #define F77_incY incY srot_(&F77_N, X, &F77_incX, Y, &F77_incY, &c, &s); }
int f2c_srot(integer* N, real* X, integer* incX, real* Y, integer* incY, real* c, real* s) { srot_(N, X, incX, Y, incY, c, s); return 0; }
/* Subroutine */ int slaexc_(logical *wantq, integer *n, real *t, integer * ldt, real *q, integer *ldq, integer *j1, integer *n1, integer *n2, real *work, integer *info) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= SLAEXC swaps adjacent diagonal blocks T11 and T22 of order 1 or 2 in an upper quasi-triangular matrix T by an orthogonal similarity transformation. T must be in Schur canonical form, that is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block has its diagonal elemnts equal and its off-diagonal elements of opposite sign. Arguments ========= WANTQ (input) LOGICAL = .TRUE. : accumulate the transformation in the matrix Q; = .FALSE.: do not accumulate the transformation. N (input) INTEGER The order of the matrix T. N >= 0. T (input/output) REAL array, dimension (LDT,N) On entry, the upper quasi-triangular matrix T, in Schur canonical form. On exit, the updated matrix T, again in Schur canonical form. LDT (input) INTEGER The leading dimension of the array T. LDT >= max(1,N). Q (input/output) REAL array, dimension (LDQ,N) On entry, if WANTQ is .TRUE., the orthogonal matrix Q. On exit, if WANTQ is .TRUE., the updated matrix Q. If WANTQ is .FALSE., Q is not referenced. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= 1; and if WANTQ is .TRUE., LDQ >= N. J1 (input) INTEGER The index of the first row of the first block T11. N1 (input) INTEGER The order of the first block T11. N1 = 0, 1 or 2. N2 (input) INTEGER The order of the second block T22. N2 = 0, 1 or 2. WORK (workspace) REAL array, dimension (N) INFO (output) INTEGER = 0: successful exit = 1: the transformed matrix T would be too far from Schur form; the blocks are not swapped and T and Q are unchanged. ===================================================================== Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c__4 = 4; static logical c_false = FALSE_; static integer c_n1 = -1; static integer c__2 = 2; static integer c__3 = 3; /* System generated locals */ integer q_dim1, q_offset, t_dim1, t_offset, i__1; real r__1, r__2, r__3, r__4, r__5, r__6; /* Local variables */ static integer ierr; static real temp; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); static real d__[16] /* was [4][4] */; static integer k; static real u[3], scale, x[4] /* was [2][2] */, dnorm; static integer j2, j3, j4; static real xnorm, u1[3], u2[3]; extern /* Subroutine */ int slanv2_(real *, real *, real *, real *, real * , real *, real *, real *, real *, real *), slasy2_(logical *, logical *, integer *, integer *, integer *, real *, integer *, real *, integer *, real *, integer *, real *, real *, integer *, real *, integer *); static integer nd; static real cs, t11, t22, t33, sn; extern doublereal slamch_(char *), slange_(char *, integer *, integer *, real *, integer *, real *); extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *, real *), slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *), slartg_(real *, real *, real *, real * , real *); static real thresh; extern /* Subroutine */ int slarfx_(char *, integer *, integer *, real *, real *, real *, integer *, real *); static real smlnum, wi1, wi2, wr1, wr2, eps, tau, tau1, tau2; #define d___ref(a_1,a_2) d__[(a_2)*4 + a_1 - 5] #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] #define t_ref(a_1,a_2) t[(a_2)*t_dim1 + a_1] #define x_ref(a_1,a_2) x[(a_2)*2 + a_1 - 3] t_dim1 = *ldt; t_offset = 1 + t_dim1 * 1; t -= t_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; --work; /* Function Body */ *info = 0; /* Quick return if possible */ if (*n == 0 || *n1 == 0 || *n2 == 0) { return 0; } if (*j1 + *n1 > *n) { return 0; } j2 = *j1 + 1; j3 = *j1 + 2; j4 = *j1 + 3; if (*n1 == 1 && *n2 == 1) { /* Swap two 1-by-1 blocks. */ t11 = t_ref(*j1, *j1); t22 = t_ref(j2, j2); /* Determine the transformation to perform the interchange. */ r__1 = t22 - t11; slartg_(&t_ref(*j1, j2), &r__1, &cs, &sn, &temp); /* Apply transformation to the matrix T. */ if (j3 <= *n) { i__1 = *n - *j1 - 1; srot_(&i__1, &t_ref(*j1, j3), ldt, &t_ref(j2, j3), ldt, &cs, &sn); } i__1 = *j1 - 1; srot_(&i__1, &t_ref(1, *j1), &c__1, &t_ref(1, j2), &c__1, &cs, &sn); t_ref(*j1, *j1) = t22; t_ref(j2, j2) = t11; if (*wantq) { /* Accumulate transformation in the matrix Q. */ srot_(n, &q_ref(1, *j1), &c__1, &q_ref(1, j2), &c__1, &cs, &sn); } } else { /* Swapping involves at least one 2-by-2 block. Copy the diagonal block of order N1+N2 to the local array D and compute its norm. */ nd = *n1 + *n2; slacpy_("Full", &nd, &nd, &t_ref(*j1, *j1), ldt, d__, &c__4); dnorm = slange_("Max", &nd, &nd, d__, &c__4, &work[1]); /* Compute machine-dependent threshold for test for accepting swap. */ eps = slamch_("P"); smlnum = slamch_("S") / eps; /* Computing MAX */ r__1 = eps * 10.f * dnorm; thresh = dmax(r__1,smlnum); /* Solve T11*X - X*T22 = scale*T12 for X. */ slasy2_(&c_false, &c_false, &c_n1, n1, n2, d__, &c__4, &d___ref(*n1 + 1, *n1 + 1), &c__4, &d___ref(1, *n1 + 1), &c__4, &scale, x, & c__2, &xnorm, &ierr); /* Swap the adjacent diagonal blocks. */ k = *n1 + *n1 + *n2 - 3; switch (k) { case 1: goto L10; case 2: goto L20; case 3: goto L30; } L10: /* N1 = 1, N2 = 2: generate elementary reflector H so that: ( scale, X11, X12 ) H = ( 0, 0, * ) */ u[0] = scale; u[1] = x_ref(1, 1); u[2] = x_ref(1, 2); slarfg_(&c__3, &u[2], u, &c__1, &tau); u[2] = 1.f; t11 = t_ref(*j1, *j1); /* Perform swap provisionally on diagonal block in D. */ slarfx_("L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); slarfx_("R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); /* Test whether to reject swap. Computing MAX */ r__4 = (r__1 = d___ref(3, 1), dabs(r__1)), r__5 = (r__2 = d___ref(3, 2), dabs(r__2)), r__4 = max(r__4,r__5), r__5 = (r__3 = d___ref(3, 3) - t11, dabs(r__3)); if (dmax(r__4,r__5) > thresh) { goto L50; } /* Accept swap: apply transformation to the entire matrix T. */ i__1 = *n - *j1 + 1; slarfx_("L", &c__3, &i__1, u, &tau, &t_ref(*j1, *j1), ldt, &work[1]); slarfx_("R", &j2, &c__3, u, &tau, &t_ref(1, *j1), ldt, &work[1]); t_ref(j3, *j1) = 0.f; t_ref(j3, j2) = 0.f; t_ref(j3, j3) = t11; if (*wantq) { /* Accumulate transformation in the matrix Q. */ slarfx_("R", n, &c__3, u, &tau, &q_ref(1, *j1), ldq, &work[1]); } goto L40; L20: /* N1 = 2, N2 = 1: generate elementary reflector H so that: H ( -X11 ) = ( * ) ( -X21 ) = ( 0 ) ( scale ) = ( 0 ) */ u[0] = -x_ref(1, 1); u[1] = -x_ref(2, 1); u[2] = scale; slarfg_(&c__3, u, &u[1], &c__1, &tau); u[0] = 1.f; t33 = t_ref(j3, j3); /* Perform swap provisionally on diagonal block in D. */ slarfx_("L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); slarfx_("R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); /* Test whether to reject swap. Computing MAX */ r__4 = (r__1 = d___ref(2, 1), dabs(r__1)), r__5 = (r__2 = d___ref(3, 1), dabs(r__2)), r__4 = max(r__4,r__5), r__5 = (r__3 = d___ref(1, 1) - t33, dabs(r__3)); if (dmax(r__4,r__5) > thresh) { goto L50; } /* Accept swap: apply transformation to the entire matrix T. */ slarfx_("R", &j3, &c__3, u, &tau, &t_ref(1, *j1), ldt, &work[1]); i__1 = *n - *j1; slarfx_("L", &c__3, &i__1, u, &tau, &t_ref(*j1, j2), ldt, &work[1]); t_ref(*j1, *j1) = t33; t_ref(j2, *j1) = 0.f; t_ref(j3, *j1) = 0.f; if (*wantq) { /* Accumulate transformation in the matrix Q. */ slarfx_("R", n, &c__3, u, &tau, &q_ref(1, *j1), ldq, &work[1]); } goto L40; L30: /* N1 = 2, N2 = 2: generate elementary reflectors H(1) and H(2) so that: H(2) H(1) ( -X11 -X12 ) = ( * * ) ( -X21 -X22 ) ( 0 * ) ( scale 0 ) ( 0 0 ) ( 0 scale ) ( 0 0 ) */ u1[0] = -x_ref(1, 1); u1[1] = -x_ref(2, 1); u1[2] = scale; slarfg_(&c__3, u1, &u1[1], &c__1, &tau1); u1[0] = 1.f; temp = -tau1 * (x_ref(1, 2) + u1[1] * x_ref(2, 2)); u2[0] = -temp * u1[1] - x_ref(2, 2); u2[1] = -temp * u1[2]; u2[2] = scale; slarfg_(&c__3, u2, &u2[1], &c__1, &tau2); u2[0] = 1.f; /* Perform swap provisionally on diagonal block in D. */ slarfx_("L", &c__3, &c__4, u1, &tau1, d__, &c__4, &work[1]) ; slarfx_("R", &c__4, &c__3, u1, &tau1, d__, &c__4, &work[1]) ; slarfx_("L", &c__3, &c__4, u2, &tau2, &d___ref(2, 1), &c__4, &work[1]); slarfx_("R", &c__4, &c__3, u2, &tau2, &d___ref(1, 2), &c__4, &work[1]); /* Test whether to reject swap. Computing MAX */ r__5 = (r__1 = d___ref(3, 1), dabs(r__1)), r__6 = (r__2 = d___ref(3, 2), dabs(r__2)), r__5 = max(r__5,r__6), r__6 = (r__3 = d___ref(4, 1), dabs(r__3)), r__5 = max(r__5,r__6), r__6 = ( r__4 = d___ref(4, 2), dabs(r__4)); if (dmax(r__5,r__6) > thresh) { goto L50; } /* Accept swap: apply transformation to the entire matrix T. */ i__1 = *n - *j1 + 1; slarfx_("L", &c__3, &i__1, u1, &tau1, &t_ref(*j1, *j1), ldt, &work[1]); slarfx_("R", &j4, &c__3, u1, &tau1, &t_ref(1, *j1), ldt, &work[1]); i__1 = *n - *j1 + 1; slarfx_("L", &c__3, &i__1, u2, &tau2, &t_ref(j2, *j1), ldt, &work[1]); slarfx_("R", &j4, &c__3, u2, &tau2, &t_ref(1, j2), ldt, &work[1]); t_ref(j3, *j1) = 0.f; t_ref(j3, j2) = 0.f; t_ref(j4, *j1) = 0.f; t_ref(j4, j2) = 0.f; if (*wantq) { /* Accumulate transformation in the matrix Q. */ slarfx_("R", n, &c__3, u1, &tau1, &q_ref(1, *j1), ldq, &work[1]); slarfx_("R", n, &c__3, u2, &tau2, &q_ref(1, j2), ldq, &work[1]); } L40: if (*n2 == 2) { /* Standardize new 2-by-2 block T11 */ slanv2_(&t_ref(*j1, *j1), &t_ref(*j1, j2), &t_ref(j2, *j1), & t_ref(j2, j2), &wr1, &wi1, &wr2, &wi2, &cs, &sn); i__1 = *n - *j1 - 1; srot_(&i__1, &t_ref(*j1, *j1 + 2), ldt, &t_ref(j2, *j1 + 2), ldt, &cs, &sn); i__1 = *j1 - 1; srot_(&i__1, &t_ref(1, *j1), &c__1, &t_ref(1, j2), &c__1, &cs, & sn); if (*wantq) { srot_(n, &q_ref(1, *j1), &c__1, &q_ref(1, j2), &c__1, &cs, & sn); } } if (*n1 == 2) { /* Standardize new 2-by-2 block T22 */ j3 = *j1 + *n2; j4 = j3 + 1; slanv2_(&t_ref(j3, j3), &t_ref(j3, j4), &t_ref(j4, j3), &t_ref(j4, j4), &wr1, &wi1, &wr2, &wi2, &cs, &sn); if (j3 + 2 <= *n) { i__1 = *n - j3 - 1; srot_(&i__1, &t_ref(j3, j3 + 2), ldt, &t_ref(j4, j3 + 2), ldt, &cs, &sn); } i__1 = j3 - 1; srot_(&i__1, &t_ref(1, j3), &c__1, &t_ref(1, j4), &c__1, &cs, &sn) ; if (*wantq) { srot_(n, &q_ref(1, j3), &c__1, &q_ref(1, j4), &c__1, &cs, &sn) ; } } } return 0; /* Exit with INFO = 1 if swap was rejected. */ L50: *info = 1; return 0; /* End of SLAEXC */ } /* slaexc_ */
/* Subroutine */ int slals0_(integer *icompq, integer *nl, integer *nr, integer *sqre, integer *nrhs, real *b, integer *ldb, real *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * difl, real *difr, real *z__, integer *k, real *c__, real *s, real * work, integer *info) { /* System generated locals */ integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, i__1, i__2; real r__1; /* Local variables */ integer i__, j, m, n; real dj; integer nlp1; real temp; real diflj, difrj, dsigj; real dsigjp; /* -- LAPACK routine (version 3.2) -- */ /* November 2006 */ /* Purpose */ /* ======= */ /* SLALS0 applies back the multiplying factors of either the left or the */ /* right singular vector matrix of a diagonal matrix appended by a row */ /* to the right hand side matrix B in solving the least squares problem */ /* using the divide-and-conquer SVD approach. */ /* For the left singular vector matrix, three types of orthogonal */ /* matrices are involved: */ /* (1L) Givens rotations: the number of such rotations is GIVPTR; the */ /* pairs of columns/rows they were applied to are stored in GIVCOL; */ /* and the C- and S-values of these rotations are stored in GIVNUM. */ /* (2L) Permutation. The (NL+1)-st row of B is to be moved to the first */ /* row, and for J=2:N, PERM(J)-th row of B is to be moved to the */ /* J-th row. */ /* (3L) The left singular vector matrix of the remaining matrix. */ /* For the right singular vector matrix, four types of orthogonal */ /* matrices are involved: */ /* (1R) The right singular vector matrix of the remaining matrix. */ /* (2R) If SQRE = 1, one extra Givens rotation to generate the right */ /* null space. */ /* (3R) The inverse transformation of (2L). */ /* (4R) The inverse transformation of (1L). */ /* Arguments */ /* ========= */ /* ICOMPQ (input) INTEGER */ /* Specifies whether singular vectors are to be computed in */ /* factored form: */ /* = 0: Left singular vector matrix. */ /* = 1: Right singular vector matrix. */ /* NL (input) INTEGER */ /* The row dimension of the upper block. NL >= 1. */ /* NR (input) INTEGER */ /* The row dimension of the lower block. NR >= 1. */ /* SQRE (input) INTEGER */ /* = 0: the lower block is an NR-by-NR square matrix. */ /* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */ /* The bidiagonal matrix has row dimension N = NL + NR + 1, */ /* and column dimension M = N + SQRE. */ /* NRHS (input) INTEGER */ /* The number of columns of B and BX. NRHS must be at least 1. */ /* B (input/output) REAL array, dimension ( LDB, NRHS ) */ /* On input, B contains the right hand sides of the least */ /* squares problem in rows 1 through M. On output, B contains */ /* the solution X in rows 1 through N. */ /* LDB (input) INTEGER */ /* The leading dimension of B. LDB must be at least */ /* max(1,MAX( M, N ) ). */ /* BX (workspace) REAL array, dimension ( LDBX, NRHS ) */ /* LDBX (input) INTEGER */ /* The leading dimension of BX. */ /* PERM (input) INTEGER array, dimension ( N ) */ /* The permutations (from deflation and sorting) applied */ /* to the two blocks. */ /* GIVPTR (input) INTEGER */ /* The number of Givens rotations which took place in this */ /* subproblem. */ /* GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) */ /* Each pair of numbers indicates a pair of rows/columns */ /* involved in a Givens rotation. */ /* LDGCOL (input) INTEGER */ /* The leading dimension of GIVCOL, must be at least N. */ /* GIVNUM (input) REAL array, dimension ( LDGNUM, 2 ) */ /* Each number indicates the C or S value used in the */ /* corresponding Givens rotation. */ /* LDGNUM (input) INTEGER */ /* The leading dimension of arrays DIFR, POLES and */ /* GIVNUM, must be at least K. */ /* POLES (input) REAL array, dimension ( LDGNUM, 2 ) */ /* On entry, POLES(1:K, 1) contains the new singular */ /* values obtained from solving the secular equation, and */ /* POLES(1:K, 2) is an array containing the poles in the secular */ /* equation. */ /* DIFL (input) REAL array, dimension ( K ). */ /* On entry, DIFL(I) is the distance between I-th updated */ /* (undeflated) singular value and the I-th (undeflated) old */ /* singular value. */ /* DIFR (input) REAL array, dimension ( LDGNUM, 2 ). */ /* On entry, DIFR(I, 1) contains the distances between I-th */ /* updated (undeflated) singular value and the I+1-th */ /* (undeflated) old singular value. And DIFR(I, 2) is the */ /* normalizing factor for the I-th right singular vector. */ /* Z (input) REAL array, dimension ( K ) */ /* Contain the components of the deflation-adjusted updating row */ /* vector. */ /* K (input) INTEGER */ /* Contains the dimension of the non-deflated matrix, */ /* This is the order of the related secular equation. 1 <= K <=N. */ /* C (input) REAL */ /* C contains garbage if SQRE =0 and the C-value of a Givens */ /* rotation related to the right null space if SQRE = 1. */ /* S (input) REAL */ /* S contains garbage if SQRE =0 and the S-value of a Givens */ /* rotation related to the right null space if SQRE = 1. */ /* WORK (workspace) REAL array, dimension ( K ) */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Ren-Cang Li, Computer Science Division, University of */ /* California at Berkeley, USA */ /* Osni Marques, LBNL/NERSC, USA */ /* ===================================================================== */ /* Test the input parameters. */ /* Parameter adjustments */ b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; bx_dim1 = *ldbx; bx_offset = 1 + bx_dim1; bx -= bx_offset; --perm; givcol_dim1 = *ldgcol; givcol_offset = 1 + givcol_dim1; givcol -= givcol_offset; difr_dim1 = *ldgnum; difr_offset = 1 + difr_dim1; difr -= difr_offset; poles_dim1 = *ldgnum; poles_offset = 1 + poles_dim1; poles -= poles_offset; givnum_dim1 = *ldgnum; givnum_offset = 1 + givnum_dim1; givnum -= givnum_offset; --difl; --z__; --work; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*nl < 1) { *info = -2; } else if (*nr < 1) { *info = -3; } else if (*sqre < 0 || *sqre > 1) { *info = -4; } n = *nl + *nr + 1; if (*nrhs < 1) { *info = -5; } else if (*ldb < n) { *info = -7; } else if (*ldbx < n) { *info = -9; } else if (*givptr < 0) { *info = -11; } else if (*ldgcol < n) { *info = -13; } else if (*ldgnum < n) { *info = -15; } else if (*k < 1) { *info = -20; } if (*info != 0) { i__1 = -(*info); xerbla_("SLALS0", &i__1); return 0; } m = n + *sqre; nlp1 = *nl + 1; if (*icompq == 0) { /* Apply back orthogonal transformations from the left. */ /* Step (1L): apply back the Givens rotations performed. */ i__1 = *givptr; for (i__ = 1; i__ <= i__1; ++i__) { srot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + (givnum_dim1 << 1)], &givnum[i__ + givnum_dim1]); } /* Step (2L): permute rows of B. */ scopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx); i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { scopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1], ldbx); } /* Step (3L): apply the inverse of the left singular vector */ /* matrix to BX. */ if (*k == 1) { scopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb); if (z__[1] < 0.f) { sscal_(nrhs, &c_b5, &b[b_offset], ldb); } } else { i__1 = *k; for (j = 1; j <= i__1; ++j) { diflj = difl[j]; dj = poles[j + poles_dim1]; dsigj = -poles[j + (poles_dim1 << 1)]; if (j < *k) { difrj = -difr[j + difr_dim1]; dsigjp = -poles[j + 1 + (poles_dim1 << 1)]; } if (z__[j] == 0.f || poles[j + (poles_dim1 << 1)] == 0.f) { work[j] = 0.f; } else { work[j] = -poles[j + (poles_dim1 << 1)] * z__[j] / diflj / (poles[j + (poles_dim1 << 1)] + dj); } i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (z__[i__] == 0.f || poles[i__ + (poles_dim1 << 1)] == 0.f) { work[i__] = 0.f; } else { work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] / (slamc3_(&poles[i__ + (poles_dim1 << 1)], & dsigj) - diflj) / (poles[i__ + (poles_dim1 << 1)] + dj); } } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { if (z__[i__] == 0.f || poles[i__ + (poles_dim1 << 1)] == 0.f) { work[i__] = 0.f; } else { work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] / (slamc3_(&poles[i__ + (poles_dim1 << 1)], & dsigjp) + difrj) / (poles[i__ + (poles_dim1 << 1)] + dj); } } work[1] = -1.f; temp = snrm2_(k, &work[1], &c__1); sgemv_("T", k, nrhs, &c_b11, &bx[bx_offset], ldbx, &work[1], & c__1, &c_b13, &b[j + b_dim1], ldb); slascl_("G", &c__0, &c__0, &temp, &c_b11, &c__1, nrhs, &b[j + b_dim1], ldb, info); } } /* Move the deflated rows of BX to B also. */ if (*k < max(m,n)) { i__1 = n - *k; slacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1 + b_dim1], ldb); } } else { /* Apply back the right orthogonal transformations. */ /* Step (1R): apply back the new right singular vector matrix */ /* to B. */ if (*k == 1) { scopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx); } else { i__1 = *k; for (j = 1; j <= i__1; ++j) { dsigj = poles[j + (poles_dim1 << 1)]; if (z__[j] == 0.f) { work[j] = 0.f; } else { work[j] = -z__[j] / difl[j] / (dsigj + poles[j + poles_dim1]) / difr[j + (difr_dim1 << 1)]; } i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (z__[j] == 0.f) { work[i__] = 0.f; } else { r__1 = -poles[i__ + 1 + (poles_dim1 << 1)]; work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difr[ i__ + difr_dim1]) / (dsigj + poles[i__ + poles_dim1]) / difr[i__ + (difr_dim1 << 1)]; } } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { if (z__[j] == 0.f) { work[i__] = 0.f; } else { r__1 = -poles[i__ + (poles_dim1 << 1)]; work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difl[ i__]) / (dsigj + poles[i__ + poles_dim1]) / difr[i__ + (difr_dim1 << 1)]; } } sgemv_("T", k, nrhs, &c_b11, &b[b_offset], ldb, &work[1], & c__1, &c_b13, &bx[j + bx_dim1], ldbx); } } /* Step (2R): if SQRE = 1, apply back the rotation that is */ /* related to the right null space of the subproblem. */ if (*sqre == 1) { scopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx); srot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__, s); } if (*k < max(m,n)) { i__1 = n - *k; slacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 + bx_dim1], ldbx); } /* Step (3R): permute rows of B. */ scopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb); if (*sqre == 1) { scopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb); } i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { scopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1], ldb); } /* Step (4R): apply back the Givens rotations performed. */ for (i__ = *givptr; i__ >= 1; --i__) { r__1 = -givnum[i__ + givnum_dim1]; srot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + (givnum_dim1 << 1)], &r__1); } } return 0; /* End of SLALS0 */ } /* slals0_ */
void srot(int N, float *x, int incx, float *y, int incy, float c, float s) { srot_(&N, x, &incx, y, &incy, &c, &s); }
/* Subroutine */ int slaed2_(integer *k, integer *n, integer *n1, real *d__, real *q, integer *ldq, integer *indxq, real *rho, real *z__, real * dlamda, real *w, real *q2, integer *indx, integer *indxc, integer * indxp, integer *coltyp, integer *info) { /* System generated locals */ integer q_dim1, q_offset, i__1, i__2; real r__1, r__2, r__3, r__4; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ real c__; integer i__, j; real s, t; integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1; real eps, tau, tol; integer psm[4], imax, jmax, ctot[4]; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *), sscal_(integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer * ); extern doublereal slapy2_(real *, real *), slamch_(char *); extern /* Subroutine */ int xerbla_(char *, integer *); extern integer isamax_(integer *, real *, integer *); extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SLAED2 merges the two sets of eigenvalues together into a single */ /* sorted set. Then it tries to deflate the size of the problem. */ /* There are two ways in which deflation can occur: when two or more */ /* eigenvalues are close together or if there is a tiny entry in the */ /* Z vector. For each such occurrence the order of the related secular */ /* equation problem is reduced by one. */ /* Arguments */ /* ========= */ /* K (output) INTEGER */ /* The number of non-deflated eigenvalues, and the order of the */ /* related secular equation. 0 <= K <=N. */ /* N (input) INTEGER */ /* The dimension of the symmetric tridiagonal matrix. N >= 0. */ /* N1 (input) INTEGER */ /* The location of the last eigenvalue in the leading sub-matrix. */ /* min(1,N) <= N1 <= N/2. */ /* D (input/output) REAL array, dimension (N) */ /* On entry, D contains the eigenvalues of the two submatrices to */ /* be combined. */ /* On exit, D contains the trailing (N-K) updated eigenvalues */ /* (those which were deflated) sorted into increasing order. */ /* Q (input/output) REAL array, dimension (LDQ, N) */ /* On entry, Q contains the eigenvectors of two submatrices in */ /* the two square blocks with corners at (1,1), (N1,N1) */ /* and (N1+1, N1+1), (N,N). */ /* On exit, Q contains the trailing (N-K) updated eigenvectors */ /* (those which were deflated) in its last N-K columns. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= max(1,N). */ /* INDXQ (input/output) INTEGER array, dimension (N) */ /* The permutation which separately sorts the two sub-problems */ /* in D into ascending order. Note that elements in the second */ /* half of this permutation must first have N1 added to their */ /* values. Destroyed on exit. */ /* RHO (input/output) REAL */ /* On entry, the off-diagonal element associated with the rank-1 */ /* cut which originally split the two submatrices which are now */ /* being recombined. */ /* On exit, RHO has been modified to the value required by */ /* SLAED3. */ /* Z (input) REAL array, dimension (N) */ /* On entry, Z contains the updating vector (the last */ /* row of the first sub-eigenvector matrix and the first row of */ /* the second sub-eigenvector matrix). */ /* On exit, the contents of Z have been destroyed by the updating */ /* process. */ /* DLAMDA (output) REAL array, dimension (N) */ /* A copy of the first K eigenvalues which will be used by */ /* SLAED3 to form the secular equation. */ /* W (output) REAL array, dimension (N) */ /* The first k values of the final deflation-altered z-vector */ /* which will be passed to SLAED3. */ /* Q2 (output) REAL array, dimension (N1**2+(N-N1)**2) */ /* A copy of the first K eigenvectors which will be used by */ /* SLAED3 in a matrix multiply (SGEMM) to solve for the new */ /* eigenvectors. */ /* INDX (workspace) INTEGER array, dimension (N) */ /* The permutation used to sort the contents of DLAMDA into */ /* ascending order. */ /* INDXC (output) INTEGER array, dimension (N) */ /* The permutation used to arrange the columns of the deflated */ /* Q matrix into three groups: the first group contains non-zero */ /* elements only at and above N1, the second contains */ /* non-zero elements only below N1, and the third is dense. */ /* INDXP (workspace) INTEGER array, dimension (N) */ /* The permutation used to place deflated values of D at the end */ /* of the array. INDXP(1:K) points to the nondeflated D-values */ /* and INDXP(K+1:N) points to the deflated eigenvalues. */ /* COLTYP (workspace/output) INTEGER array, dimension (N) */ /* During execution, a label which will indicate which of the */ /* following types a column in the Q2 matrix is: */ /* 1 : non-zero in the upper half only; */ /* 2 : dense; */ /* 3 : non-zero in the lower half only; */ /* 4 : deflated. */ /* On exit, COLTYP(i) is the number of columns of type i, */ /* for i=1 to 4 only. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Jeff Rutter, Computer Science Division, University of California */ /* at Berkeley, USA */ /* Modified by Francoise Tisseur, University of Tennessee. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --indxq; --z__; --dlamda; --w; --q2; --indx; --indxc; --indxp; --coltyp; /* Function Body */ *info = 0; if (*n < 0) { *info = -2; } else if (*ldq < max(1,*n)) { *info = -6; } else { /* if(complicated condition) */ /* Computing MIN */ i__1 = 1, i__2 = *n / 2; if (min(i__1,i__2) > *n1 || *n / 2 < *n1) { *info = -3; } } if (*info != 0) { i__1 = -(*info); xerbla_("SLAED2", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } n2 = *n - *n1; n1p1 = *n1 + 1; if (*rho < 0.f) { sscal_(&n2, &c_b3, &z__[n1p1], &c__1); } /* Normalize z so that norm(z) = 1. Since z is the concatenation of */ /* two normalized vectors, norm2(z) = sqrt(2). */ t = 1.f / sqrt(2.f); sscal_(n, &t, &z__[1], &c__1); /* RHO = ABS( norm(z)**2 * RHO ) */ *rho = (r__1 = *rho * 2.f, dabs(r__1)); /* Sort the eigenvalues into increasing order */ i__1 = *n; for (i__ = n1p1; i__ <= i__1; ++i__) { indxq[i__] += *n1; /* L10: */ } /* re-integrate the deflated parts from the last pass */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { dlamda[i__] = d__[indxq[i__]]; /* L20: */ } slamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]); i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { indx[i__] = indxq[indxc[i__]]; /* L30: */ } /* Calculate the allowable deflation tolerance */ imax = isamax_(n, &z__[1], &c__1); jmax = isamax_(n, &d__[1], &c__1); eps = slamch_("Epsilon"); /* Computing MAX */ r__3 = (r__1 = d__[jmax], dabs(r__1)), r__4 = (r__2 = z__[imax], dabs( r__2)); tol = eps * 8.f * dmax(r__3,r__4); /* If the rank-1 modifier is small enough, no more needs to be done */ /* except to reorganize Q so that its columns correspond with the */ /* elements in D. */ if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) { *k = 0; iq2 = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__ = indx[j]; scopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1); dlamda[j] = d__[i__]; iq2 += *n; /* L40: */ } slacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq); scopy_(n, &dlamda[1], &c__1, &d__[1], &c__1); goto L190; } /* If there are multiple eigenvalues then the problem deflates. Here */ /* the number of equal eigenvalues are found. As each equal */ /* eigenvalue is found, an elementary reflector is computed to rotate */ /* the corresponding eigensubspace so that the corresponding */ /* components of Z are zero in this new basis. */ i__1 = *n1; for (i__ = 1; i__ <= i__1; ++i__) { coltyp[i__] = 1; /* L50: */ } i__1 = *n; for (i__ = n1p1; i__ <= i__1; ++i__) { coltyp[i__] = 3; /* L60: */ } *k = 0; k2 = *n + 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { nj = indx[j]; if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) { /* Deflate due to small z component. */ --k2; coltyp[nj] = 4; indxp[k2] = nj; if (j == *n) { goto L100; } } else { pj = nj; goto L80; } /* L70: */ } L80: ++j; nj = indx[j]; if (j > *n) { goto L100; } if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) { /* Deflate due to small z component. */ --k2; coltyp[nj] = 4; indxp[k2] = nj; } else { /* Check if eigenvalues are close enough to allow deflation. */ s = z__[pj]; c__ = z__[nj]; /* Find sqrt(a**2+b**2) without overflow or */ /* destructive underflow. */ tau = slapy2_(&c__, &s); t = d__[nj] - d__[pj]; c__ /= tau; s = -s / tau; if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) { /* Deflation is possible. */ z__[nj] = tau; z__[pj] = 0.f; if (coltyp[nj] != coltyp[pj]) { coltyp[nj] = 2; } coltyp[pj] = 4; srot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, & c__, &s); /* Computing 2nd power */ r__1 = c__; /* Computing 2nd power */ r__2 = s; t = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2); /* Computing 2nd power */ r__1 = s; /* Computing 2nd power */ r__2 = c__; d__[nj] = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2); d__[pj] = t; --k2; i__ = 1; L90: if (k2 + i__ <= *n) { if (d__[pj] < d__[indxp[k2 + i__]]) { indxp[k2 + i__ - 1] = indxp[k2 + i__]; indxp[k2 + i__] = pj; ++i__; goto L90; } else { indxp[k2 + i__ - 1] = pj; } } else { indxp[k2 + i__ - 1] = pj; } pj = nj; } else { ++(*k); dlamda[*k] = d__[pj]; w[*k] = z__[pj]; indxp[*k] = pj; pj = nj; } } goto L80; L100: /* Record the last eigenvalue. */ ++(*k); dlamda[*k] = d__[pj]; w[*k] = z__[pj]; indxp[*k] = pj; /* Count up the total number of the various types of columns, then */ /* form a permutation which positions the four column types into */ /* four uniform groups (although one or more of these groups may be */ /* empty). */ for (j = 1; j <= 4; ++j) { ctot[j - 1] = 0; /* L110: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { ct = coltyp[j]; ++ctot[ct - 1]; /* L120: */ } /* PSM(*) = Position in SubMatrix (of types 1 through 4) */ psm[0] = 1; psm[1] = ctot[0] + 1; psm[2] = psm[1] + ctot[1]; psm[3] = psm[2] + ctot[2]; *k = *n - ctot[3]; /* Fill out the INDXC array so that the permutation which it induces */ /* will place all type-1 columns first, all type-2 columns next, */ /* then all type-3's, and finally all type-4's. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { js = indxp[j]; ct = coltyp[js]; indx[psm[ct - 1]] = js; indxc[psm[ct - 1]] = j; ++psm[ct - 1]; /* L130: */ } /* Sort the eigenvalues and corresponding eigenvectors into DLAMDA */ /* and Q2 respectively. The eigenvalues/vectors which were not */ /* deflated go into the first K slots of DLAMDA and Q2 respectively, */ /* while those which were deflated go into the last N - K slots. */ i__ = 1; iq1 = 1; iq2 = (ctot[0] + ctot[1]) * *n1 + 1; i__1 = ctot[0]; for (j = 1; j <= i__1; ++j) { js = indx[i__]; scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1); z__[i__] = d__[js]; ++i__; iq1 += *n1; /* L140: */ } i__1 = ctot[1]; for (j = 1; j <= i__1; ++j) { js = indx[i__]; scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1); scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1); z__[i__] = d__[js]; ++i__; iq1 += *n1; iq2 += n2; /* L150: */ } i__1 = ctot[2]; for (j = 1; j <= i__1; ++j) { js = indx[i__]; scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1); z__[i__] = d__[js]; ++i__; iq2 += n2; /* L160: */ } iq1 = iq2; i__1 = ctot[3]; for (j = 1; j <= i__1; ++j) { js = indx[i__]; scopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1); iq2 += *n; z__[i__] = d__[js]; ++i__; /* L170: */ } /* The deflated eigenvalues and their corresponding vectors go back */ /* into the last N - K slots of D and Q respectively. */ slacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq); i__1 = *n - *k; scopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1); /* Copy CTOT into COLTYP for referencing in SLAED3. */ for (j = 1; j <= 4; ++j) { coltyp[j] = ctot[j - 1]; /* L180: */ } L190: return 0; /* End of SLAED2 */ } /* slaed2_ */
GURLS_EXPORT void rot(int *N, float *X, int *incX, float *Y, int *incY, float *c, float *s) { srot_(N, X, incX, Y, incY, c, s); }
/* Subroutine */ int ssbtrd_(char *vect, char *uplo, integer *n, integer *kd, real *ab, integer *ldab, real *d__, real *e, real *q, integer *ldq, real *work, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, q_dim1, q_offset, i__1, i__2, i__3, i__4, i__5; /* Local variables */ integer i__, j, k, l, i2, j1, j2, nq, nr, kd1, ibl, iqb, kdn, jin, nrt, kdm1, inca, jend, lend, jinc, incx, last; real temp; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); integer j1end, j1inc, iqend; extern logical lsame_(char *, char *); logical initq, wantq, upper; extern /* Subroutine */ int slar2v_(integer *, real *, real *, real *, integer *, real *, real *, integer *); integer iqaend; extern /* Subroutine */ int xerbla_(char *, integer *), slaset_( char *, integer *, integer *, real *, real *, real *, integer *), slartg_(real *, real *, real *, real *, real *), slargv_( integer *, real *, integer *, real *, integer *, real *, integer * ), slartv_(integer *, real *, integer *, real *, integer *, real * , real *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SSBTRD reduces a real symmetric band matrix A to symmetric */ /* tridiagonal form T by an orthogonal similarity transformation: */ /* Q**T * A * Q = T. */ /* Arguments */ /* ========= */ /* VECT (input) CHARACTER*1 */ /* = 'N': do not form Q; */ /* = 'V': form Q; */ /* = 'U': update a matrix X, by forming X*Q. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KD (input) INTEGER */ /* The number of superdiagonals of the matrix A if UPLO = 'U', */ /* or the number of subdiagonals if UPLO = 'L'. KD >= 0. */ /* AB (input/output) REAL array, dimension (LDAB,N) */ /* On entry, the upper or lower triangle of the symmetric band */ /* matrix A, stored in the first KD+1 rows of the array. The */ /* j-th column of A is stored in the j-th column of the array AB */ /* as follows: */ /* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ /* On exit, the diagonal elements of AB are overwritten by the */ /* diagonal elements of the tridiagonal matrix T; if KD > 0, the */ /* elements on the first superdiagonal (if UPLO = 'U') or the */ /* first subdiagonal (if UPLO = 'L') are overwritten by the */ /* off-diagonal elements of T; the rest of AB is overwritten by */ /* values generated during the reduction. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KD+1. */ /* D (output) REAL array, dimension (N) */ /* The diagonal elements of the tridiagonal matrix T. */ /* E (output) REAL array, dimension (N-1) */ /* The off-diagonal elements of the tridiagonal matrix T: */ /* E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. */ /* Q (input/output) REAL array, dimension (LDQ,N) */ /* On entry, if VECT = 'U', then Q must contain an N-by-N */ /* matrix X; if VECT = 'N' or 'V', then Q need not be set. */ /* On exit: */ /* if VECT = 'V', Q contains the N-by-N orthogonal matrix Q; */ /* if VECT = 'U', Q contains the product X*Q; */ /* if VECT = 'N', the array Q is not referenced. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. */ /* LDQ >= 1, and LDQ >= N if VECT = 'V' or 'U'. */ /* WORK (workspace) REAL array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* Further Details */ /* =============== */ /* Modified by Linda Kaufman, Bell Labs. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --d__; --e; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --work; /* Function Body */ initq = lsame_(vect, "V"); wantq = initq || lsame_(vect, "U"); upper = lsame_(uplo, "U"); kd1 = *kd + 1; kdm1 = *kd - 1; incx = *ldab - 1; iqend = 1; *info = 0; if (! wantq && ! lsame_(vect, "N")) { *info = -1; } else if (! upper && ! lsame_(uplo, "L")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*kd < 0) { *info = -4; } else if (*ldab < kd1) { *info = -6; } else if (*ldq < max(1,*n) && wantq) { *info = -10; } if (*info != 0) { i__1 = -(*info); xerbla_("SSBTRD", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Initialize Q to the unit matrix, if needed */ if (initq) { slaset_("Full", n, n, &c_b9, &c_b10, &q[q_offset], ldq); } /* Wherever possible, plane rotations are generated and applied in */ /* vector operations of length NR over the index set J1:J2:KD1. */ /* The cosines and sines of the plane rotations are stored in the */ /* arrays D and WORK. */ inca = kd1 * *ldab; /* Computing MIN */ i__1 = *n - 1; kdn = min(i__1,*kd); if (upper) { if (*kd > 1) { /* Reduce to tridiagonal form, working with upper triangle */ nr = 0; j1 = kdn + 2; j2 = 1; i__1 = *n - 2; for (i__ = 1; i__ <= i__1; ++i__) { /* Reduce i-th row of matrix to tridiagonal form */ for (k = kdn + 1; k >= 2; --k) { j1 += kdn; j2 += kdn; if (nr > 0) { /* generate plane rotations to annihilate nonzero */ /* elements which have been created outside the band */ slargv_(&nr, &ab[(j1 - 1) * ab_dim1 + 1], &inca, & work[j1], &kd1, &d__[j1], &kd1); /* apply rotations from the right */ /* Dependent on the the number of diagonals either */ /* SLARTV or SROT is used */ if (nr >= (*kd << 1) - 1) { i__2 = *kd - 1; for (l = 1; l <= i__2; ++l) { slartv_(&nr, &ab[l + 1 + (j1 - 1) * ab_dim1], &inca, &ab[l + j1 * ab_dim1], &inca, & d__[j1], &work[j1], &kd1); /* L10: */ } } else { jend = j1 + (nr - 1) * kd1; i__2 = jend; i__3 = kd1; for (jinc = j1; i__3 < 0 ? jinc >= i__2 : jinc <= i__2; jinc += i__3) { srot_(&kdm1, &ab[(jinc - 1) * ab_dim1 + 2], & c__1, &ab[jinc * ab_dim1 + 1], &c__1, &d__[jinc], &work[jinc]); /* L20: */ } } } if (k > 2) { if (k <= *n - i__ + 1) { /* generate plane rotation to annihilate a(i,i+k-1) */ /* within the band */ slartg_(&ab[*kd - k + 3 + (i__ + k - 2) * ab_dim1] , &ab[*kd - k + 2 + (i__ + k - 1) * ab_dim1], &d__[i__ + k - 1], &work[i__ + k - 1], &temp); ab[*kd - k + 3 + (i__ + k - 2) * ab_dim1] = temp; /* apply rotation from the right */ i__3 = k - 3; srot_(&i__3, &ab[*kd - k + 4 + (i__ + k - 2) * ab_dim1], &c__1, &ab[*kd - k + 3 + (i__ + k - 1) * ab_dim1], &c__1, &d__[i__ + k - 1], &work[i__ + k - 1]); } ++nr; j1 = j1 - kdn - 1; } /* apply plane rotations from both sides to diagonal */ /* blocks */ if (nr > 0) { slar2v_(&nr, &ab[kd1 + (j1 - 1) * ab_dim1], &ab[kd1 + j1 * ab_dim1], &ab[*kd + j1 * ab_dim1], &inca, &d__[j1], &work[j1], &kd1); } /* apply plane rotations from the left */ if (nr > 0) { if ((*kd << 1) - 1 < nr) { /* Dependent on the the number of diagonals either */ /* SLARTV or SROT is used */ i__3 = *kd - 1; for (l = 1; l <= i__3; ++l) { if (j2 + l > *n) { nrt = nr - 1; } else { nrt = nr; } if (nrt > 0) { slartv_(&nrt, &ab[*kd - l + (j1 + l) * ab_dim1], &inca, &ab[*kd - l + 1 + (j1 + l) * ab_dim1], &inca, & d__[j1], &work[j1], &kd1); } /* L30: */ } } else { j1end = j1 + kd1 * (nr - 2); if (j1end >= j1) { i__3 = j1end; i__2 = kd1; for (jin = j1; i__2 < 0 ? jin >= i__3 : jin <= i__3; jin += i__2) { i__4 = *kd - 1; srot_(&i__4, &ab[*kd - 1 + (jin + 1) * ab_dim1], &incx, &ab[*kd + (jin + 1) * ab_dim1], &incx, &d__[jin], & work[jin]); /* L40: */ } } /* Computing MIN */ i__2 = kdm1, i__3 = *n - j2; lend = min(i__2,i__3); last = j1end + kd1; if (lend > 0) { srot_(&lend, &ab[*kd - 1 + (last + 1) * ab_dim1], &incx, &ab[*kd + (last + 1) * ab_dim1], &incx, &d__[last], &work[ last]); } } } if (wantq) { /* accumulate product of plane rotations in Q */ if (initq) { /* take advantage of the fact that Q was */ /* initially the Identity matrix */ iqend = max(iqend,j2); /* Computing MAX */ i__2 = 0, i__3 = k - 3; i2 = max(i__2,i__3); iqaend = i__ * *kd + 1; if (k == 2) { iqaend += *kd; } iqaend = min(iqaend,iqend); i__2 = j2; i__3 = kd1; for (j = j1; i__3 < 0 ? j >= i__2 : j <= i__2; j += i__3) { ibl = i__ - i2 / kdm1; ++i2; /* Computing MAX */ i__4 = 1, i__5 = j - ibl; iqb = max(i__4,i__5); nq = iqaend + 1 - iqb; /* Computing MIN */ i__4 = iqaend + *kd; iqaend = min(i__4,iqend); srot_(&nq, &q[iqb + (j - 1) * q_dim1], &c__1, &q[iqb + j * q_dim1], &c__1, &d__[j], &work[j]); /* L50: */ } } else { i__3 = j2; i__2 = kd1; for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j += i__2) { srot_(n, &q[(j - 1) * q_dim1 + 1], &c__1, &q[ j * q_dim1 + 1], &c__1, &d__[j], & work[j]); /* L60: */ } } } if (j2 + kdn > *n) { /* adjust J2 to keep within the bounds of the matrix */ --nr; j2 = j2 - kdn - 1; } i__2 = j2; i__3 = kd1; for (j = j1; i__3 < 0 ? j >= i__2 : j <= i__2; j += i__3) { /* create nonzero element a(j-1,j+kd) outside the band */ /* and store it in WORK */ work[j + *kd] = work[j] * ab[(j + *kd) * ab_dim1 + 1]; ab[(j + *kd) * ab_dim1 + 1] = d__[j] * ab[(j + *kd) * ab_dim1 + 1]; /* L70: */ } /* L80: */ } /* L90: */ } } if (*kd > 0) { /* copy off-diagonal elements to E */ i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { e[i__] = ab[*kd + (i__ + 1) * ab_dim1]; /* L100: */ } } else { /* set E to zero if original matrix was diagonal */ i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { e[i__] = 0.f; /* L110: */ } } /* copy diagonal elements to D */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { d__[i__] = ab[kd1 + i__ * ab_dim1]; /* L120: */ } } else { if (*kd > 1) { /* Reduce to tridiagonal form, working with lower triangle */ nr = 0; j1 = kdn + 2; j2 = 1; i__1 = *n - 2; for (i__ = 1; i__ <= i__1; ++i__) { /* Reduce i-th column of matrix to tridiagonal form */ for (k = kdn + 1; k >= 2; --k) { j1 += kdn; j2 += kdn; if (nr > 0) { /* generate plane rotations to annihilate nonzero */ /* elements which have been created outside the band */ slargv_(&nr, &ab[kd1 + (j1 - kd1) * ab_dim1], &inca, & work[j1], &kd1, &d__[j1], &kd1); /* apply plane rotations from one side */ /* Dependent on the the number of diagonals either */ /* SLARTV or SROT is used */ if (nr > (*kd << 1) - 1) { i__3 = *kd - 1; for (l = 1; l <= i__3; ++l) { slartv_(&nr, &ab[kd1 - l + (j1 - kd1 + l) * ab_dim1], &inca, &ab[kd1 - l + 1 + ( j1 - kd1 + l) * ab_dim1], &inca, &d__[ j1], &work[j1], &kd1); /* L130: */ } } else { jend = j1 + kd1 * (nr - 1); i__3 = jend; i__2 = kd1; for (jinc = j1; i__2 < 0 ? jinc >= i__3 : jinc <= i__3; jinc += i__2) { srot_(&kdm1, &ab[*kd + (jinc - *kd) * ab_dim1] , &incx, &ab[kd1 + (jinc - *kd) * ab_dim1], &incx, &d__[jinc], &work[ jinc]); /* L140: */ } } } if (k > 2) { if (k <= *n - i__ + 1) { /* generate plane rotation to annihilate a(i+k-1,i) */ /* within the band */ slartg_(&ab[k - 1 + i__ * ab_dim1], &ab[k + i__ * ab_dim1], &d__[i__ + k - 1], &work[i__ + k - 1], &temp); ab[k - 1 + i__ * ab_dim1] = temp; /* apply rotation from the left */ i__2 = k - 3; i__3 = *ldab - 1; i__4 = *ldab - 1; srot_(&i__2, &ab[k - 2 + (i__ + 1) * ab_dim1], & i__3, &ab[k - 1 + (i__ + 1) * ab_dim1], & i__4, &d__[i__ + k - 1], &work[i__ + k - 1]); } ++nr; j1 = j1 - kdn - 1; } /* apply plane rotations from both sides to diagonal */ /* blocks */ if (nr > 0) { slar2v_(&nr, &ab[(j1 - 1) * ab_dim1 + 1], &ab[j1 * ab_dim1 + 1], &ab[(j1 - 1) * ab_dim1 + 2], & inca, &d__[j1], &work[j1], &kd1); } /* apply plane rotations from the right */ /* Dependent on the the number of diagonals either */ /* SLARTV or SROT is used */ if (nr > 0) { if (nr > (*kd << 1) - 1) { i__2 = *kd - 1; for (l = 1; l <= i__2; ++l) { if (j2 + l > *n) { nrt = nr - 1; } else { nrt = nr; } if (nrt > 0) { slartv_(&nrt, &ab[l + 2 + (j1 - 1) * ab_dim1], &inca, &ab[l + 1 + j1 * ab_dim1], &inca, &d__[j1], &work[ j1], &kd1); } /* L150: */ } } else { j1end = j1 + kd1 * (nr - 2); if (j1end >= j1) { i__2 = j1end; i__3 = kd1; for (j1inc = j1; i__3 < 0 ? j1inc >= i__2 : j1inc <= i__2; j1inc += i__3) { srot_(&kdm1, &ab[(j1inc - 1) * ab_dim1 + 3], &c__1, &ab[j1inc * ab_dim1 + 2], &c__1, &d__[j1inc], &work[ j1inc]); /* L160: */ } } /* Computing MIN */ i__3 = kdm1, i__2 = *n - j2; lend = min(i__3,i__2); last = j1end + kd1; if (lend > 0) { srot_(&lend, &ab[(last - 1) * ab_dim1 + 3], & c__1, &ab[last * ab_dim1 + 2], &c__1, &d__[last], &work[last]); } } } if (wantq) { /* accumulate product of plane rotations in Q */ if (initq) { /* take advantage of the fact that Q was */ /* initially the Identity matrix */ iqend = max(iqend,j2); /* Computing MAX */ i__3 = 0, i__2 = k - 3; i2 = max(i__3,i__2); iqaend = i__ * *kd + 1; if (k == 2) { iqaend += *kd; } iqaend = min(iqaend,iqend); i__3 = j2; i__2 = kd1; for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j += i__2) { ibl = i__ - i2 / kdm1; ++i2; /* Computing MAX */ i__4 = 1, i__5 = j - ibl; iqb = max(i__4,i__5); nq = iqaend + 1 - iqb; /* Computing MIN */ i__4 = iqaend + *kd; iqaend = min(i__4,iqend); srot_(&nq, &q[iqb + (j - 1) * q_dim1], &c__1, &q[iqb + j * q_dim1], &c__1, &d__[j], &work[j]); /* L170: */ } } else { i__2 = j2; i__3 = kd1; for (j = j1; i__3 < 0 ? j >= i__2 : j <= i__2; j += i__3) { srot_(n, &q[(j - 1) * q_dim1 + 1], &c__1, &q[ j * q_dim1 + 1], &c__1, &d__[j], & work[j]); /* L180: */ } } } if (j2 + kdn > *n) { /* adjust J2 to keep within the bounds of the matrix */ --nr; j2 = j2 - kdn - 1; } i__3 = j2; i__2 = kd1; for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j += i__2) { /* create nonzero element a(j+kd,j-1) outside the */ /* band and store it in WORK */ work[j + *kd] = work[j] * ab[kd1 + j * ab_dim1]; ab[kd1 + j * ab_dim1] = d__[j] * ab[kd1 + j * ab_dim1] ; /* L190: */ } /* L200: */ } /* L210: */ } } if (*kd > 0) { /* copy off-diagonal elements to E */ i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { e[i__] = ab[i__ * ab_dim1 + 2]; /* L220: */ } } else { /* set E to zero if original matrix was diagonal */ i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { e[i__] = 0.f; /* L230: */ } } /* copy diagonal elements to D */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { d__[i__] = ab[i__ * ab_dim1 + 1]; /* L240: */ } } return 0; /* End of SSBTRD */ } /* ssbtrd_ */
int slasd2_(int *nl, int *nr, int *sqre, int *k, float *d__, float *z__, float *alpha, float *beta, float *u, int * ldu, float *vt, int *ldvt, float *dsigma, float *u2, int *ldu2, float *vt2, int *ldvt2, int *idxp, int *idx, int *idxc, int *idxq, int *coltyp, int *info) { /* System generated locals */ int u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, vt2_dim1, vt2_offset, i__1; float r__1, r__2; /* Local variables */ float c__; int i__, j, m, n; float s; int k2; float z1; int ct, jp; float eps, tau, tol; int psm[4], nlp1, nlp2, idxi, idxj, ctot[4]; extern int srot_(int *, float *, int *, float *, int *, float *, float *); int idxjp, jprev; extern int scopy_(int *, float *, int *, float *, int *); extern double slapy2_(float *, float *), slamch_(char *); extern int xerbla_(char *, int *), slamrg_( int *, int *, float *, int *, int *, int *); float hlftol; extern int slacpy_(char *, int *, int *, float *, int *, float *, int *), slaset_(char *, int *, int *, float *, float *, float *, int *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SLASD2 merges the two sets of singular values together into a single */ /* sorted set. Then it tries to deflate the size of the problem. */ /* There are two ways in which deflation can occur: when two or more */ /* singular values are close together or if there is a tiny entry in the */ /* Z vector. For each such occurrence the order of the related secular */ /* equation problem is reduced by one. */ /* SLASD2 is called from SLASD1. */ /* Arguments */ /* ========= */ /* NL (input) INTEGER */ /* The row dimension of the upper block. NL >= 1. */ /* NR (input) INTEGER */ /* The row dimension of the lower block. NR >= 1. */ /* SQRE (input) INTEGER */ /* = 0: the lower block is an NR-by-NR square matrix. */ /* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */ /* The bidiagonal matrix has N = NL + NR + 1 rows and */ /* M = N + SQRE >= N columns. */ /* K (output) INTEGER */ /* Contains the dimension of the non-deflated matrix, */ /* This is the order of the related secular equation. 1 <= K <=N. */ /* D (input/output) REAL array, dimension (N) */ /* On entry D contains the singular values of the two submatrices */ /* to be combined. On exit D contains the trailing (N-K) updated */ /* singular values (those which were deflated) sorted into */ /* increasing order. */ /* Z (output) REAL array, dimension (N) */ /* On exit Z contains the updating row vector in the secular */ /* equation. */ /* ALPHA (input) REAL */ /* Contains the diagonal element associated with the added row. */ /* BETA (input) REAL */ /* Contains the off-diagonal element associated with the added */ /* row. */ /* U (input/output) REAL array, dimension (LDU,N) */ /* On entry U contains the left singular vectors of two */ /* submatrices in the two square blocks with corners at (1,1), */ /* (NL, NL), and (NL+2, NL+2), (N,N). */ /* On exit U contains the trailing (N-K) updated left singular */ /* vectors (those which were deflated) in its last N-K columns. */ /* LDU (input) INTEGER */ /* The leading dimension of the array U. LDU >= N. */ /* VT (input/output) REAL array, dimension (LDVT,M) */ /* On entry VT' contains the right singular vectors of two */ /* submatrices in the two square blocks with corners at (1,1), */ /* (NL+1, NL+1), and (NL+2, NL+2), (M,M). */ /* On exit VT' contains the trailing (N-K) updated right singular */ /* vectors (those which were deflated) in its last N-K columns. */ /* In case SQRE =1, the last row of VT spans the right null */ /* space. */ /* LDVT (input) INTEGER */ /* The leading dimension of the array VT. LDVT >= M. */ /* DSIGMA (output) REAL array, dimension (N) */ /* Contains a copy of the diagonal elements (K-1 singular values */ /* and one zero) in the secular equation. */ /* U2 (output) REAL array, dimension (LDU2,N) */ /* Contains a copy of the first K-1 left singular vectors which */ /* will be used by SLASD3 in a matrix multiply (SGEMM) to solve */ /* for the new left singular vectors. U2 is arranged into four */ /* blocks. The first block contains a column with 1 at NL+1 and */ /* zero everywhere else; the second block contains non-zero */ /* entries only at and above NL; the third contains non-zero */ /* entries only below NL+1; and the fourth is dense. */ /* LDU2 (input) INTEGER */ /* The leading dimension of the array U2. LDU2 >= N. */ /* VT2 (output) REAL array, dimension (LDVT2,N) */ /* VT2' contains a copy of the first K right singular vectors */ /* which will be used by SLASD3 in a matrix multiply (SGEMM) to */ /* solve for the new right singular vectors. VT2 is arranged into */ /* three blocks. The first block contains a row that corresponds */ /* to the special 0 diagonal element in SIGMA; the second block */ /* contains non-zeros only at and before NL +1; the third block */ /* contains non-zeros only at and after NL +2. */ /* LDVT2 (input) INTEGER */ /* The leading dimension of the array VT2. LDVT2 >= M. */ /* IDXP (workspace) INTEGER array, dimension (N) */ /* This will contain the permutation used to place deflated */ /* values of D at the end of the array. On output IDXP(2:K) */ /* points to the nondeflated D-values and IDXP(K+1:N) */ /* points to the deflated singular values. */ /* IDX (workspace) INTEGER array, dimension (N) */ /* This will contain the permutation used to sort the contents of */ /* D into ascending order. */ /* IDXC (output) INTEGER array, dimension (N) */ /* This will contain the permutation used to arrange the columns */ /* of the deflated U matrix into three groups: the first group */ /* contains non-zero entries only at and above NL, the second */ /* contains non-zero entries only below NL+2, and the third is */ /* dense. */ /* IDXQ (input/output) INTEGER array, dimension (N) */ /* This contains the permutation which separately sorts the two */ /* sub-problems in D into ascending order. Note that entries in */ /* the first hlaf of this permutation must first be moved one */ /* position backward; and entries in the second half */ /* must first have NL+1 added to their values. */ /* COLTYP (workspace/output) INTEGER array, dimension (N) */ /* As workspace, this will contain a label which will indicate */ /* which of the following types a column in the U2 matrix or a */ /* row in the VT2 matrix is: */ /* 1 : non-zero in the upper half only */ /* 2 : non-zero in the lower half only */ /* 3 : dense */ /* 4 : deflated */ /* On exit, it is an array of dimension 4, with COLTYP(I) being */ /* the dimension of the I-th type columns. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Huan Ren, Computer Science Division, University of */ /* California at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --z__; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1; vt -= vt_offset; --dsigma; u2_dim1 = *ldu2; u2_offset = 1 + u2_dim1; u2 -= u2_offset; vt2_dim1 = *ldvt2; vt2_offset = 1 + vt2_dim1; vt2 -= vt2_offset; --idxp; --idx; --idxc; --idxq; --coltyp; /* Function Body */ *info = 0; if (*nl < 1) { *info = -1; } else if (*nr < 1) { *info = -2; } else if (*sqre != 1 && *sqre != 0) { *info = -3; } n = *nl + *nr + 1; m = n + *sqre; if (*ldu < n) { *info = -10; } else if (*ldvt < m) { *info = -12; } else if (*ldu2 < n) { *info = -15; } else if (*ldvt2 < m) { *info = -17; } if (*info != 0) { i__1 = -(*info); xerbla_("SLASD2", &i__1); return 0; } nlp1 = *nl + 1; nlp2 = *nl + 2; /* Generate the first part of the vector Z; and move the singular */ /* values in the first part of D one position backward. */ z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1]; z__[1] = z1; for (i__ = *nl; i__ >= 1; --i__) { z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1]; d__[i__ + 1] = d__[i__]; idxq[i__ + 1] = idxq[i__] + 1; /* L10: */ } /* Generate the second part of the vector Z. */ i__1 = m; for (i__ = nlp2; i__ <= i__1; ++i__) { z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1]; /* L20: */ } /* Initialize some reference arrays. */ i__1 = nlp1; for (i__ = 2; i__ <= i__1; ++i__) { coltyp[i__] = 1; /* L30: */ } i__1 = n; for (i__ = nlp2; i__ <= i__1; ++i__) { coltyp[i__] = 2; /* L40: */ } /* Sort the singular values into increasing order */ i__1 = n; for (i__ = nlp2; i__ <= i__1; ++i__) { idxq[i__] += nlp1; /* L50: */ } /* DSIGMA, IDXC, IDXC, and the first column of U2 */ /* are used as storage space. */ i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { dsigma[i__] = d__[idxq[i__]]; u2[i__ + u2_dim1] = z__[idxq[i__]]; idxc[i__] = coltyp[idxq[i__]]; /* L60: */ } slamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { idxi = idx[i__] + 1; d__[i__] = dsigma[idxi]; z__[i__] = u2[idxi + u2_dim1]; coltyp[i__] = idxc[idxi]; /* L70: */ } /* Calculate the allowable deflation tolerance */ eps = slamch_("Epsilon"); /* Computing MAX */ r__1 = ABS(*alpha), r__2 = ABS(*beta); tol = MAX(r__1,r__2); /* Computing MAX */ r__2 = (r__1 = d__[n], ABS(r__1)); tol = eps * 8.f * MAX(r__2,tol); /* There are 2 kinds of deflation -- first a value in the z-vector */ /* is small, second two (or more) singular values are very close */ /* together (their difference is small). */ /* If the value in the z-vector is small, we simply permute the */ /* array so that the corresponding singular value is moved to the */ /* end. */ /* If two values in the D-vector are close, we perform a two-sided */ /* rotation designed to make one of the corresponding z-vector */ /* entries zero, and then permute the array so that the deflated */ /* singular value is moved to the end. */ /* If there are multiple singular values then the problem deflates. */ /* Here the number of equal singular values are found. As each equal */ /* singular value is found, an elementary reflector is computed to */ /* rotate the corresponding singular subspace so that the */ /* corresponding components of Z are zero in this new basis. */ *k = 1; k2 = n + 1; i__1 = n; for (j = 2; j <= i__1; ++j) { if ((r__1 = z__[j], ABS(r__1)) <= tol) { /* Deflate due to small z component. */ --k2; idxp[k2] = j; coltyp[j] = 4; if (j == n) { goto L120; } } else { jprev = j; goto L90; } /* L80: */ } L90: j = jprev; L100: ++j; if (j > n) { goto L110; } if ((r__1 = z__[j], ABS(r__1)) <= tol) { /* Deflate due to small z component. */ --k2; idxp[k2] = j; coltyp[j] = 4; } else { /* Check if singular values are close enough to allow deflation. */ if ((r__1 = d__[j] - d__[jprev], ABS(r__1)) <= tol) { /* Deflation is possible. */ s = z__[jprev]; c__ = z__[j]; /* Find sqrt(a**2+b**2) without overflow or */ /* destructive underflow. */ tau = slapy2_(&c__, &s); c__ /= tau; s = -s / tau; z__[j] = tau; z__[jprev] = 0.f; /* Apply back the Givens rotation to the left and right */ /* singular vector matrices. */ idxjp = idxq[idx[jprev] + 1]; idxj = idxq[idx[j] + 1]; if (idxjp <= nlp1) { --idxjp; } if (idxj <= nlp1) { --idxj; } srot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], & c__1, &c__, &s); srot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, & c__, &s); if (coltyp[j] != coltyp[jprev]) { coltyp[j] = 3; } coltyp[jprev] = 4; --k2; idxp[k2] = jprev; jprev = j; } else { ++(*k); u2[*k + u2_dim1] = z__[jprev]; dsigma[*k] = d__[jprev]; idxp[*k] = jprev; jprev = j; } } goto L100; L110: /* Record the last singular value. */ ++(*k); u2[*k + u2_dim1] = z__[jprev]; dsigma[*k] = d__[jprev]; idxp[*k] = jprev; L120: /* Count up the total number of the various types of columns, then */ /* form a permutation which positions the four column types into */ /* four groups of uniform structure (although one or more of these */ /* groups may be empty). */ for (j = 1; j <= 4; ++j) { ctot[j - 1] = 0; /* L130: */ } i__1 = n; for (j = 2; j <= i__1; ++j) { ct = coltyp[j]; ++ctot[ct - 1]; /* L140: */ } /* PSM(*) = Position in SubMatrix (of types 1 through 4) */ psm[0] = 2; psm[1] = ctot[0] + 2; psm[2] = psm[1] + ctot[1]; psm[3] = psm[2] + ctot[2]; /* Fill out the IDXC array so that the permutation which it induces */ /* will place all type-1 columns first, all type-2 columns next, */ /* then all type-3's, and finally all type-4's, starting from the */ /* second column. This applies similarly to the rows of VT. */ i__1 = n; for (j = 2; j <= i__1; ++j) { jp = idxp[j]; ct = coltyp[jp]; idxc[psm[ct - 1]] = j; ++psm[ct - 1]; /* L150: */ } /* Sort the singular values and corresponding singular vectors into */ /* DSIGMA, U2, and VT2 respectively. The singular values/vectors */ /* which were not deflated go into the first K slots of DSIGMA, U2, */ /* and VT2 respectively, while those which were deflated go into the */ /* last N - K slots, except that the first column/row will be treated */ /* separately. */ i__1 = n; for (j = 2; j <= i__1; ++j) { jp = idxp[j]; dsigma[j] = d__[jp]; idxj = idxq[idx[idxp[idxc[j]]] + 1]; if (idxj <= nlp1) { --idxj; } scopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1); scopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2); /* L160: */ } /* Determine DSIGMA(1), DSIGMA(2) and Z(1) */ dsigma[1] = 0.f; hlftol = tol / 2.f; if (ABS(dsigma[2]) <= hlftol) { dsigma[2] = hlftol; } if (m > n) { z__[1] = slapy2_(&z1, &z__[m]); if (z__[1] <= tol) { c__ = 1.f; s = 0.f; z__[1] = tol; } else { c__ = z1 / z__[1]; s = z__[m] / z__[1]; } } else { if (ABS(z1) <= tol) { z__[1] = tol; } else { z__[1] = z1; } } /* Move the rest of the updating row to Z. */ i__1 = *k - 1; scopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1); /* Determine the first column of U2, the first row of VT2 and the */ /* last row of VT. */ slaset_("A", &n, &c__1, &c_b30, &c_b30, &u2[u2_offset], ldu2); u2[nlp1 + u2_dim1] = 1.f; if (m > n) { i__1 = nlp1; for (i__ = 1; i__ <= i__1; ++i__) { vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1]; vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1]; /* L170: */ } i__1 = m; for (i__ = nlp2; i__ <= i__1; ++i__) { vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1]; vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1]; /* L180: */ } } else { scopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2); } if (m > n) { scopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2); } /* The deflated singular values and their corresponding vectors go */ /* into the back of D, U, and V respectively. */ if (n > *k) { i__1 = n - *k; scopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); i__1 = n - *k; slacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1) * u_dim1 + 1], ldu); i__1 = n - *k; slacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 + vt_dim1], ldvt); } /* Copy CTOT into COLTYP for referencing in SLASD3. */ for (j = 1; j <= 4; ++j) { coltyp[j] = ctot[j - 1]; /* L190: */ } return 0; /* End of SLASD2 */ } /* slasd2_ */
/* Subroutine */ int slaqrb_(logical *wantt, integer *n, integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real *wi, real *z__, integer *info) { /* System generated locals */ integer h_dim1, h_offset, i__1, i__2, i__3, i__4; real r__1, r__2; /* Local variables */ static integer i__, j, k, l, m; static real s, v[3]; static integer i1, i2; static real t1, t2, t3, v1, v2, v3, h00, h10, h11, h12, h21, h22, h33, h44; static integer nh; static real cs; static integer nr; static real sn, h33s, h44s; static integer itn, its; static real ulp, sum, tst1, h43h34, unfl, ovfl, work[1]; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *), scopy_(integer *, real *, integer *, real *, integer *), slanv2_(real *, real *, real *, real *, real * , real *, real *, real *, real *, real *), slabad_(real *, real *) ; extern doublereal slamch_(char *, ftnlen); extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *, real *); extern doublereal slanhs_(char *, integer *, real *, integer *, real *, ftnlen); static real smlnum; /* %------------------% */ /* | Scalar Arguments | */ /* %------------------% */ /* %-----------------% */ /* | Array Arguments | */ /* %-----------------% */ /* %------------% */ /* | Parameters | */ /* %------------% */ /* %------------------------% */ /* | Local Scalars & Arrays | */ /* %------------------------% */ /* %--------------------% */ /* | External Functions | */ /* %--------------------% */ /* %----------------------% */ /* | External Subroutines | */ /* %----------------------% */ /* %-----------------------% */ /* | Executable Statements | */ /* %-----------------------% */ /* Parameter adjustments */ h_dim1 = *ldh; h_offset = 1 + h_dim1; h__ -= h_offset; --wr; --wi; --z__; /* Function Body */ *info = 0; /* %--------------------------% */ /* | Quick return if possible | */ /* %--------------------------% */ if (*n == 0) { return 0; } if (*ilo == *ihi) { wr[*ilo] = h__[*ilo + *ilo * h_dim1]; wi[*ilo] = 0.f; return 0; } /* %---------------------------------------------% */ /* | Initialize the vector of last components of | */ /* | the Schur vectors for accumulation. | */ /* %---------------------------------------------% */ i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { z__[j] = 0.f; /* L5: */ } z__[*n] = 1.f; nh = *ihi - *ilo + 1; /* %-------------------------------------------------------------% */ /* | Set machine-dependent constants for the stopping criterion. | */ /* | If norm(H) <= sqrt(OVFL), overflow should not occur. | */ /* %-------------------------------------------------------------% */ unfl = slamch_("safe minimum", (ftnlen)12); ovfl = 1.f / unfl; slabad_(&unfl, &ovfl); ulp = slamch_("precision", (ftnlen)9); smlnum = unfl * (nh / ulp); /* %---------------------------------------------------------------% */ /* | I1 and I2 are the indices of the first row and last column | */ /* | of H to which transformations must be applied. If eigenvalues | */ /* | only are computed, I1 and I2 are set inside the main loop. | */ /* | Zero out H(J+2,J) = ZERO for J=1:N if WANTT = .TRUE. | */ /* | else H(J+2,J) for J=ILO:IHI-ILO-1 if WANTT = .FALSE. | */ /* %---------------------------------------------------------------% */ if (*wantt) { i1 = 1; i2 = *n; i__1 = i2 - 2; for (i__ = 1; i__ <= i__1; ++i__) { h__[i1 + i__ + 1 + i__ * h_dim1] = 0.f; /* L8: */ } } else { i__1 = *ihi - *ilo - 1; for (i__ = 1; i__ <= i__1; ++i__) { h__[*ilo + i__ + 1 + (*ilo + i__ - 1) * h_dim1] = 0.f; /* L9: */ } } /* %---------------------------------------------------% */ /* | ITN is the total number of QR iterations allowed. | */ /* %---------------------------------------------------% */ itn = nh * 30; /* ------------------------------------------------------------------ */ /* The main loop begins here. I is the loop index and decreases from */ /* IHI to ILO in steps of 1 or 2. Each iteration of the loop works */ /* with the active submatrix in rows and columns L to I. */ /* Eigenvalues I+1 to IHI have already converged. Either L = ILO or */ /* H(L,L-1) is negligible so that the matrix splits. */ /* ------------------------------------------------------------------ */ i__ = *ihi; L10: l = *ilo; if (i__ < *ilo) { goto L150; } /* %--------------------------------------------------------------% */ /* | Perform QR iterations on rows and columns ILO to I until a | */ /* | submatrix of order 1 or 2 splits off at the bottom because a | */ /* | subdiagonal element has become negligible. | */ /* %--------------------------------------------------------------% */ i__1 = itn; for (its = 0; its <= i__1; ++its) { /* %----------------------------------------------% */ /* | Look for a single small subdiagonal element. | */ /* %----------------------------------------------% */ i__2 = l + 1; for (k = i__; k >= i__2; --k) { tst1 = (r__1 = h__[k - 1 + (k - 1) * h_dim1], dabs(r__1)) + (r__2 = h__[k + k * h_dim1], dabs(r__2)); if (tst1 == 0.f) { i__3 = i__ - l + 1; tst1 = slanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, work, ( ftnlen)1); } /* Computing MAX */ r__2 = ulp * tst1; if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= dmax(r__2, smlnum)) { goto L30; } /* L20: */ } L30: l = k; if (l > *ilo) { /* %------------------------% */ /* | H(L,L-1) is negligible | */ /* %------------------------% */ h__[l + (l - 1) * h_dim1] = 0.f; } /* %-------------------------------------------------------------% */ /* | Exit from loop if a submatrix of order 1 or 2 has split off | */ /* %-------------------------------------------------------------% */ if (l >= i__ - 1) { goto L140; } /* %---------------------------------------------------------% */ /* | Now the active submatrix is in rows and columns L to I. | */ /* | If eigenvalues only are being computed, only the active | */ /* | submatrix need be transformed. | */ /* %---------------------------------------------------------% */ if (! (*wantt)) { i1 = l; i2 = i__; } if (its == 10 || its == 20) { /* %-------------------% */ /* | Exceptional shift | */ /* %-------------------% */ s = (r__1 = h__[i__ + (i__ - 1) * h_dim1], dabs(r__1)) + (r__2 = h__[i__ - 1 + (i__ - 2) * h_dim1], dabs(r__2)); h44 = s * .75f; h33 = h44; h43h34 = s * -.4375f * s; } else { /* %-----------------------------------------% */ /* | Prepare to use Wilkinson's double shift | */ /* %-----------------------------------------% */ h44 = h__[i__ + i__ * h_dim1]; h33 = h__[i__ - 1 + (i__ - 1) * h_dim1]; h43h34 = h__[i__ + (i__ - 1) * h_dim1] * h__[i__ - 1 + i__ * h_dim1]; } /* %-----------------------------------------------------% */ /* | Look for two consecutive small subdiagonal elements | */ /* %-----------------------------------------------------% */ i__2 = l; for (m = i__ - 2; m >= i__2; --m) { /* %---------------------------------------------------------% */ /* | Determine the effect of starting the double-shift QR | */ /* | iteration at row M, and see if this would make H(M,M-1) | */ /* | negligible. | */ /* %---------------------------------------------------------% */ h11 = h__[m + m * h_dim1]; h22 = h__[m + 1 + (m + 1) * h_dim1]; h21 = h__[m + 1 + m * h_dim1]; h12 = h__[m + (m + 1) * h_dim1]; h44s = h44 - h11; h33s = h33 - h11; v1 = (h33s * h44s - h43h34) / h21 + h12; v2 = h22 - h11 - h33s - h44s; v3 = h__[m + 2 + (m + 1) * h_dim1]; s = dabs(v1) + dabs(v2) + dabs(v3); v1 /= s; v2 /= s; v3 /= s; v[0] = v1; v[1] = v2; v[2] = v3; if (m == l) { goto L50; } h00 = h__[m - 1 + (m - 1) * h_dim1]; h10 = h__[m + (m - 1) * h_dim1]; tst1 = dabs(v1) * (dabs(h00) + dabs(h11) + dabs(h22)); if (dabs(h10) * (dabs(v2) + dabs(v3)) <= ulp * tst1) { goto L50; } /* L40: */ } L50: /* %----------------------% */ /* | Double-shift QR step | */ /* %----------------------% */ i__2 = i__ - 1; for (k = m; k <= i__2; ++k) { /* ------------------------------------------------------------ */ /* The first iteration of this loop determines a reflection G */ /* from the vector V and applies it from left and right to H, */ /* thus creating a nonzero bulge below the subdiagonal. */ /* Each subsequent iteration determines a reflection G to */ /* restore the Hessenberg form in the (K-1)th column, and thus */ /* chases the bulge one step toward the bottom of the active */ /* submatrix. NR is the order of G. */ /* ------------------------------------------------------------ */ /* Computing MIN */ i__3 = 3, i__4 = i__ - k + 1; nr = min(i__3,i__4); if (k > m) { scopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); } slarfg_(&nr, v, &v[1], &c__1, &t1); if (k > m) { h__[k + (k - 1) * h_dim1] = v[0]; h__[k + 1 + (k - 1) * h_dim1] = 0.f; if (k < i__ - 1) { h__[k + 2 + (k - 1) * h_dim1] = 0.f; } } else if (m > l) { h__[k + (k - 1) * h_dim1] = -h__[k + (k - 1) * h_dim1]; } v2 = v[1]; t2 = t1 * v2; if (nr == 3) { v3 = v[2]; t3 = t1 * v3; /* %------------------------------------------------% */ /* | Apply G from the left to transform the rows of | */ /* | the matrix in columns K to I2. | */ /* %------------------------------------------------% */ i__3 = i2; for (j = k; j <= i__3; ++j) { sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1] + v3 * h__[k + 2 + j * h_dim1]; h__[k + j * h_dim1] -= sum * t1; h__[k + 1 + j * h_dim1] -= sum * t2; h__[k + 2 + j * h_dim1] -= sum * t3; /* L60: */ } /* %----------------------------------------------------% */ /* | Apply G from the right to transform the columns of | */ /* | the matrix in rows I1 to min(K+3,I). | */ /* %----------------------------------------------------% */ /* Computing MIN */ i__4 = k + 3; i__3 = min(i__4,i__); for (j = i1; j <= i__3; ++j) { sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] + v3 * h__[j + (k + 2) * h_dim1]; h__[j + k * h_dim1] -= sum * t1; h__[j + (k + 1) * h_dim1] -= sum * t2; h__[j + (k + 2) * h_dim1] -= sum * t3; /* L70: */ } /* %----------------------------------% */ /* | Accumulate transformations for Z | */ /* %----------------------------------% */ sum = z__[k] + v2 * z__[k + 1] + v3 * z__[k + 2]; z__[k] -= sum * t1; z__[k + 1] -= sum * t2; z__[k + 2] -= sum * t3; } else if (nr == 2) { /* %------------------------------------------------% */ /* | Apply G from the left to transform the rows of | */ /* | the matrix in columns K to I2. | */ /* %------------------------------------------------% */ i__3 = i2; for (j = k; j <= i__3; ++j) { sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]; h__[k + j * h_dim1] -= sum * t1; h__[k + 1 + j * h_dim1] -= sum * t2; /* L90: */ } /* %----------------------------------------------------% */ /* | Apply G from the right to transform the columns of | */ /* | the matrix in rows I1 to min(K+3,I). | */ /* %----------------------------------------------------% */ i__3 = i__; for (j = i1; j <= i__3; ++j) { sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] ; h__[j + k * h_dim1] -= sum * t1; h__[j + (k + 1) * h_dim1] -= sum * t2; /* L100: */ } /* %----------------------------------% */ /* | Accumulate transformations for Z | */ /* %----------------------------------% */ sum = z__[k] + v2 * z__[k + 1]; z__[k] -= sum * t1; z__[k + 1] -= sum * t2; } /* L120: */ } /* L130: */ } /* %-------------------------------------------------------% */ /* | Failure to converge in remaining number of iterations | */ /* %-------------------------------------------------------% */ *info = i__; return 0; L140: if (l == i__) { /* %------------------------------------------------------% */ /* | H(I,I-1) is negligible: one eigenvalue has converged | */ /* %------------------------------------------------------% */ wr[i__] = h__[i__ + i__ * h_dim1]; wi[i__] = 0.f; } else if (l == i__ - 1) { /* %--------------------------------------------------------% */ /* | H(I-1,I-2) is negligible; | */ /* | a pair of eigenvalues have converged. | */ /* | | */ /* | Transform the 2-by-2 submatrix to standard Schur form, | */ /* | and compute and store the eigenvalues. | */ /* %--------------------------------------------------------% */ slanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ * h_dim1], &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ * h_dim1], &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], &cs, &sn); if (*wantt) { /* %-----------------------------------------------------% */ /* | Apply the transformation to the rest of H and to Z, | */ /* | as required. | */ /* %-----------------------------------------------------% */ if (i2 > i__) { i__1 = i2 - i__; srot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, &h__[ i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn); } i__1 = i__ - i1 - 1; srot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ * h_dim1], &c__1, &cs, &sn); sum = cs * z__[i__ - 1] + sn * z__[i__]; z__[i__] = cs * z__[i__] - sn * z__[i__ - 1]; z__[i__ - 1] = sum; } } /* %---------------------------------------------------------% */ /* | Decrement number of remaining iterations, and return to | */ /* | start of the main loop with new value of I. | */ /* %---------------------------------------------------------% */ itn -= its; i__ = l - 1; goto L10; L150: return 0; /* %---------------% */ /* | End of slaqrb | */ /* %---------------% */ } /* slaqrb_ */
/* Subroutine */ int slaeda_(integer *n, integer *tlvls, integer *curlvl, integer *curpbm, integer *prmptr, integer *perm, integer *givptr, integer *givcol, real *givnum, real *q, integer *qptr, real *z__, real *ztemp, integer *info) { /* System generated locals */ integer i__1, i__2, i__3; /* Builtin functions */ integer pow_ii(integer *, integer *); double sqrt(doublereal); /* Local variables */ static integer curr; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); static integer bsiz1, bsiz2, psiz1, psiz2, i__, k, zptr1; extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); static integer mid, ptr; #define givcol_ref(a_1,a_2) givcol[(a_2)*2 + a_1] #define givnum_ref(a_1,a_2) givnum[(a_2)*2 + a_1] /* -- LAPACK routine (instrumented to count operations, version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Common block to return operation count and iteration count ITCNT is unchanged, OPS is only incremented Purpose ======= SLAEDA computes the Z vector corresponding to the merge step in the CURLVLth step of the merge process with TLVLS steps for the CURPBMth problem. Arguments ========= N (input) INTEGER The dimension of the symmetric tridiagonal matrix. N >= 0. TLVLS (input) INTEGER The total number of merging levels in the overall divide and conquer tree. CURLVL (input) INTEGER The current level in the overall merge routine, 0 <= curlvl <= tlvls. CURPBM (input) INTEGER The current problem in the current level in the overall merge routine (counting from upper left to lower right). PRMPTR (input) INTEGER array, dimension (N lg N) Contains a list of pointers which indicate where in PERM a level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) indicates the size of the permutation and incidentally the size of the full, non-deflated problem. PERM (input) INTEGER array, dimension (N lg N) Contains the permutations (from deflation and sorting) to be applied to each eigenblock. GIVPTR (input) INTEGER array, dimension (N lg N) Contains a list of pointers which indicate where in GIVCOL a level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) indicates the number of Givens rotations. GIVCOL (input) INTEGER array, dimension (2, N lg N) Each pair of numbers indicates a pair of columns to take place in a Givens rotation. GIVNUM (input) REAL array, dimension (2, N lg N) Each number indicates the S value to be used in the corresponding Givens rotation. Q (input) REAL array, dimension (N**2) Contains the square eigenblocks from previous levels, the starting positions for blocks are given by QPTR. QPTR (input) INTEGER array, dimension (N+2) Contains a list of pointers which indicate where in Q an eigenblock is stored. SQRT( QPTR(i+1) - QPTR(i) ) indicates the size of the block. Z (output) REAL array, dimension (N) On output this vector contains the updating vector (the last row of the first sub-eigenvector matrix and the first row of the second sub-eigenvector matrix). ZTEMP (workspace) REAL array, dimension (N) INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. Further Details =============== Based on contributions by Jeff Rutter, Computer Science Division, University of California at Berkeley, USA ===================================================================== Test the input parameters. Parameter adjustments */ --ztemp; --z__; --qptr; --q; givnum -= 3; givcol -= 3; --givptr; --perm; --prmptr; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; } if (*info != 0) { i__1 = -(*info); xerbla_("SLAEDA", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine location of first number in second half. */ mid = *n / 2 + 1; /* Gather last/first rows of appropriate eigenblocks into center of Z */ ptr = 1; /* Determine location of lowest level subproblem in the full storage scheme */ i__1 = *curlvl - 1; curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1; /* Determine size of these matrices. We add HALF to the value of the SQRT in case the machine underestimates one of these square roots. */ latime_1.ops += 8; bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f); bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + .5f); i__1 = mid - bsiz1 - 1; for (k = 1; k <= i__1; ++k) { z__[k] = 0.f; /* L10: */ } scopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], & c__1); scopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1); i__1 = *n; for (k = mid + bsiz2; k <= i__1; ++k) { z__[k] = 0.f; /* L20: */ } /* Loop thru remaining levels 1 -> CURLVL applying the Givens rotations and permutation and then multiplying the center matrices against the current Z. */ ptr = pow_ii(&c__2, tlvls) + 1; i__1 = *curlvl - 1; for (k = 1; k <= i__1; ++k) { i__2 = *curlvl - k; i__3 = *curlvl - k - 1; curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) - 1; psiz1 = prmptr[curr + 1] - prmptr[curr]; psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; zptr1 = mid - psiz1; /* Apply Givens at CURR and CURR+1 */ latime_1.ops += (givptr[curr + 2] - givptr[curr]) * 6; i__2 = givptr[curr + 1] - 1; for (i__ = givptr[curr]; i__ <= i__2; ++i__) { srot_(&c__1, &z__[zptr1 + givcol_ref(1, i__) - 1], &c__1, &z__[ zptr1 + givcol_ref(2, i__) - 1], &c__1, &givnum_ref(1, i__), &givnum_ref(2, i__)); /* L30: */ } i__2 = givptr[curr + 2] - 1; for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) { srot_(&c__1, &z__[mid - 1 + givcol_ref(1, i__)], &c__1, &z__[mid - 1 + givcol_ref(2, i__)], &c__1, &givnum_ref(1, i__), & givnum_ref(2, i__)); /* L40: */ } psiz1 = prmptr[curr + 1] - prmptr[curr]; psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; i__2 = psiz1 - 1; for (i__ = 0; i__ <= i__2; ++i__) { ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1]; /* L50: */ } i__2 = psiz2 - 1; for (i__ = 0; i__ <= i__2; ++i__) { ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] - 1]; /* L60: */ } /* Multiply Blocks at CURR and CURR+1 Determine size of these matrices. We add HALF to the value of the SQRT in case the machine underestimates one of these square roots. */ latime_1.ops += 8; bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f); bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + .5f); if (bsiz1 > 0) { latime_1.ops += (bsiz1 << 1) * bsiz1; sgemv_("T", &bsiz1, &bsiz1, &c_b24, &q[qptr[curr]], &bsiz1, & ztemp[1], &c__1, &c_b26, &z__[zptr1], &c__1); } i__2 = psiz1 - bsiz1; scopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1); if (bsiz2 > 0) { latime_1.ops += (bsiz2 << 1) * bsiz2; sgemv_("T", &bsiz2, &bsiz2, &c_b24, &q[qptr[curr + 1]], &bsiz2, & ztemp[psiz1 + 1], &c__1, &c_b26, &z__[mid], &c__1); } i__2 = psiz2 - bsiz2; scopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], & c__1); i__2 = *tlvls - k; ptr += pow_ii(&c__2, &i__2); /* L70: */ } return 0; /* End of SLAEDA */ } /* slaeda_ */
/* Subroutine */ int sgeevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, real *a, integer *lda, real *wr, real *wi, real * vl, integer *ldvl, real *vr, integer *ldvr, integer *ilo, integer * ihi, real *scale, real *abnrm, real *rconde, real *rcondv, real *work, integer *lwork, integer *iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2, i__3; real r__1, r__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__, k; real r__, cs, sn; char job[1]; real scl, dum[1], eps; char side[1]; real anrm; integer ierr, itau, iwrk, nout; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); extern doublereal snrm2_(integer *, real *, integer *); integer icond; extern logical lsame_(char *, char *); extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); extern doublereal slapy2_(real *, real *); extern /* Subroutine */ int slabad_(real *, real *); logical scalea; real cscale; extern /* Subroutine */ int sgebak_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, integer *, integer *), sgebal_(char *, integer *, real *, integer *, integer *, integer *, real *, integer *); extern doublereal slamch_(char *), slange_(char *, integer *, integer *, real *, integer *, real *); extern /* Subroutine */ int sgehrd_(integer *, integer *, integer *, real *, integer *, real *, real *, integer *, integer *), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); logical select[1]; real bignum; extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, real *, integer *, integer *, real *, integer *, integer *); extern integer isamax_(integer *, real *, integer *); extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *), slartg_(real *, real *, real *, real *, real *), sorghr_(integer *, integer *, integer *, real *, integer *, real *, real *, integer *, integer *), shseqr_( char *, char *, integer *, integer *, integer *, real *, integer * , real *, real *, real *, integer *, real *, integer *, integer *), strevc_(char *, char *, logical *, integer *, real *, integer *, real *, integer *, real *, integer *, integer * , integer *, real *, integer *); integer minwrk, maxwrk; extern /* Subroutine */ int strsna_(char *, char *, logical *, integer *, real *, integer *, real *, integer *, real *, integer *, real *, real *, integer *, integer *, real *, integer *, integer *, integer *); logical wantvl, wntsnb; integer hswork; logical wntsne; real smlnum; logical lquery, wantvr, wntsnn, wntsnv; /* -- LAPACK driver routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SGEEVX computes for an N-by-N real nonsymmetric matrix A, the */ /* eigenvalues and, optionally, the left and/or right eigenvectors. */ /* Optionally also, it computes a balancing transformation to improve */ /* the conditioning of the eigenvalues and eigenvectors (ILO, IHI, */ /* SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues */ /* (RCONDE), and reciprocal condition numbers for the right */ /* eigenvectors (RCONDV). */ /* The right eigenvector v(j) of A satisfies */ /* A * v(j) = lambda(j) * v(j) */ /* where lambda(j) is its eigenvalue. */ /* The left eigenvector u(j) of A satisfies */ /* u(j)**H * A = lambda(j) * u(j)**H */ /* where u(j)**H denotes the conjugate transpose of u(j). */ /* The computed eigenvectors are normalized to have Euclidean norm */ /* equal to 1 and largest component real. */ /* Balancing a matrix means permuting the rows and columns to make it */ /* more nearly upper triangular, and applying a diagonal similarity */ /* transformation D * A * D**(-1), where D is a diagonal matrix, to */ /* make its rows and columns closer in norm and the condition numbers */ /* of its eigenvalues and eigenvectors smaller. The computed */ /* reciprocal condition numbers correspond to the balanced matrix. */ /* Permuting rows and columns will not change the condition numbers */ /* (in exact arithmetic) but diagonal scaling will. For further */ /* explanation of balancing, see section 4.10.2 of the LAPACK */ /* Users' Guide. */ /* Arguments */ /* ========= */ /* BALANC (input) CHARACTER*1 */ /* Indicates how the input matrix should be diagonally scaled */ /* and/or permuted to improve the conditioning of its */ /* eigenvalues. */ /* = 'N': Do not diagonally scale or permute; */ /* = 'P': Perform permutations to make the matrix more nearly */ /* upper triangular. Do not diagonally scale; */ /* = 'S': Diagonally scale the matrix, i.e. replace A by */ /* D*A*D**(-1), where D is a diagonal matrix chosen */ /* to make the rows and columns of A more equal in */ /* norm. Do not permute; */ /* = 'B': Both diagonally scale and permute A. */ /* Computed reciprocal condition numbers will be for the matrix */ /* after balancing and/or permuting. Permuting does not change */ /* condition numbers (in exact arithmetic), but balancing does. */ /* JOBVL (input) CHARACTER*1 */ /* = 'N': left eigenvectors of A are not computed; */ /* = 'V': left eigenvectors of A are computed. */ /* If SENSE = 'E' or 'B', JOBVL must = 'V'. */ /* JOBVR (input) CHARACTER*1 */ /* = 'N': right eigenvectors of A are not computed; */ /* = 'V': right eigenvectors of A are computed. */ /* If SENSE = 'E' or 'B', JOBVR must = 'V'. */ /* SENSE (input) CHARACTER*1 */ /* Determines which reciprocal condition numbers are computed. */ /* = 'N': None are computed; */ /* = 'E': Computed for eigenvalues only; */ /* = 'V': Computed for right eigenvectors only; */ /* = 'B': Computed for eigenvalues and right eigenvectors. */ /* If SENSE = 'E' or 'B', both left and right eigenvectors */ /* must also be computed (JOBVL = 'V' and JOBVR = 'V'). */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) REAL array, dimension (LDA,N) */ /* On entry, the N-by-N matrix A. */ /* On exit, A has been overwritten. If JOBVL = 'V' or */ /* JOBVR = 'V', A contains the real Schur form of the balanced */ /* version of the input matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* WR (output) REAL array, dimension (N) */ /* WI (output) REAL array, dimension (N) */ /* WR and WI contain the real and imaginary parts, */ /* respectively, of the computed eigenvalues. Complex */ /* conjugate pairs of eigenvalues will appear consecutively */ /* with the eigenvalue having the positive imaginary part */ /* first. */ /* VL (output) REAL array, dimension (LDVL,N) */ /* If JOBVL = 'V', the left eigenvectors u(j) are stored one */ /* after another in the columns of VL, in the same order */ /* as their eigenvalues. */ /* If JOBVL = 'N', VL is not referenced. */ /* If the j-th eigenvalue is real, then u(j) = VL(:,j), */ /* the j-th column of VL. */ /* If the j-th and (j+1)-st eigenvalues form a complex */ /* conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and */ /* u(j+1) = VL(:,j) - i*VL(:,j+1). */ /* LDVL (input) INTEGER */ /* The leading dimension of the array VL. LDVL >= 1; if */ /* JOBVL = 'V', LDVL >= N. */ /* VR (output) REAL array, dimension (LDVR,N) */ /* If JOBVR = 'V', the right eigenvectors v(j) are stored one */ /* after another in the columns of VR, in the same order */ /* as their eigenvalues. */ /* If JOBVR = 'N', VR is not referenced. */ /* If the j-th eigenvalue is real, then v(j) = VR(:,j), */ /* the j-th column of VR. */ /* If the j-th and (j+1)-st eigenvalues form a complex */ /* conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and */ /* v(j+1) = VR(:,j) - i*VR(:,j+1). */ /* LDVR (input) INTEGER */ /* The leading dimension of the array VR. LDVR >= 1, and if */ /* JOBVR = 'V', LDVR >= N. */ /* ILO (output) INTEGER */ /* IHI (output) INTEGER */ /* ILO and IHI are integer values determined when A was */ /* balanced. The balanced A(i,j) = 0 if I > J and */ /* J = 1,...,ILO-1 or I = IHI+1,...,N. */ /* SCALE (output) REAL array, dimension (N) */ /* Details of the permutations and scaling factors applied */ /* when balancing A. If P(j) is the index of the row and column */ /* interchanged with row and column j, and D(j) is the scaling */ /* factor applied to row and column j, then */ /* SCALE(J) = P(J), for J = 1,...,ILO-1 */ /* = D(J), for J = ILO,...,IHI */ /* = P(J) for J = IHI+1,...,N. */ /* The order in which the interchanges are made is N to IHI+1, */ /* then 1 to ILO-1. */ /* ABNRM (output) REAL */ /* The one-norm of the balanced matrix (the maximum */ /* of the sum of absolute values of elements of any column). */ /* RCONDE (output) REAL array, dimension (N) */ /* RCONDE(j) is the reciprocal condition number of the j-th */ /* eigenvalue. */ /* RCONDV (output) REAL array, dimension (N) */ /* RCONDV(j) is the reciprocal condition number of the j-th */ /* right eigenvector. */ /* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. If SENSE = 'N' or 'E', */ /* LWORK >= max(1,2*N), and if JOBVL = 'V' or JOBVR = 'V', */ /* LWORK >= 3*N. If SENSE = 'V' or 'B', LWORK >= N*(N+6). */ /* For good performance, LWORK must generally be larger. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* IWORK (workspace) INTEGER array, dimension (2*N-2) */ /* If SENSE = 'N' or 'E', not referenced. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = i, the QR algorithm failed to compute all the */ /* eigenvalues, and no eigenvectors or condition numbers */ /* have been computed; elements 1:ILO-1 and i+1:N of WR */ /* and WI contain eigenvalues which have converged. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --wr; --wi; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1; vr -= vr_offset; --scale; --rconde; --rcondv; --work; --iwork; /* Function Body */ *info = 0; lquery = *lwork == -1; wantvl = lsame_(jobvl, "V"); wantvr = lsame_(jobvr, "V"); wntsnn = lsame_(sense, "N"); wntsne = lsame_(sense, "E"); wntsnv = lsame_(sense, "V"); wntsnb = lsame_(sense, "B"); if (! (lsame_(balanc, "N") || lsame_(balanc, "S") || lsame_(balanc, "P") || lsame_(balanc, "B"))) { *info = -1; } else if (! wantvl && ! lsame_(jobvl, "N")) { *info = -2; } else if (! wantvr && ! lsame_(jobvr, "N")) { *info = -3; } else if (! (wntsnn || wntsne || wntsnb || wntsnv) || (wntsne || wntsnb) && ! (wantvl && wantvr)) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*lda < max(1,*n)) { *info = -7; } else if (*ldvl < 1 || wantvl && *ldvl < *n) { *info = -11; } else if (*ldvr < 1 || wantvr && *ldvr < *n) { *info = -13; } /* Compute workspace */ /* (Note: Comments in the code beginning "Workspace:" describe the */ /* minimal amount of workspace needed at that point in the code, */ /* as well as the preferred amount for good performance. */ /* NB refers to the optimal block size for the immediately */ /* following subroutine, as returned by ILAENV. */ /* HSWORK refers to the workspace preferred by SHSEQR, as */ /* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */ /* the worst case.) */ if (*info == 0) { if (*n == 0) { minwrk = 1; maxwrk = 1; } else { maxwrk = *n + *n * ilaenv_(&c__1, "SGEHRD", " ", n, &c__1, n, & c__0); if (wantvl) { shseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ 1], &vl[vl_offset], ldvl, &work[1], &c_n1, info); } else if (wantvr) { shseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[ 1], &vr[vr_offset], ldvr, &work[1], &c_n1, info); } else { if (wntsnn) { shseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1], &vr[vr_offset], ldvr, &work[1], &c_n1, info); } else { shseqr_("S", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[1], &vr[vr_offset], ldvr, &work[1], &c_n1, info); } } hswork = work[1]; if (! wantvl && ! wantvr) { minwrk = *n << 1; if (! wntsnn) { /* Computing MAX */ i__1 = minwrk, i__2 = *n * *n + *n * 6; minwrk = max(i__1,i__2); } maxwrk = max(maxwrk,hswork); if (! wntsnn) { /* Computing MAX */ i__1 = maxwrk, i__2 = *n * *n + *n * 6; maxwrk = max(i__1,i__2); } } else { minwrk = *n * 3; if (! wntsnn && ! wntsne) { /* Computing MAX */ i__1 = minwrk, i__2 = *n * *n + *n * 6; minwrk = max(i__1,i__2); } maxwrk = max(maxwrk,hswork); /* Computing MAX */ i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "SORGHR", " ", n, &c__1, n, &c_n1); maxwrk = max(i__1,i__2); if (! wntsnn && ! wntsne) { /* Computing MAX */ i__1 = maxwrk, i__2 = *n * *n + *n * 6; maxwrk = max(i__1,i__2); } /* Computing MAX */ i__1 = maxwrk, i__2 = *n * 3; maxwrk = max(i__1,i__2); } maxwrk = max(maxwrk,minwrk); } work[1] = (real) maxwrk; if (*lwork < minwrk && ! lquery) { *info = -21; } } if (*info != 0) { i__1 = -(*info); xerbla_("SGEEVX", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Get machine constants */ eps = slamch_("P"); smlnum = slamch_("S"); bignum = 1.f / smlnum; slabad_(&smlnum, &bignum); smlnum = sqrt(smlnum) / eps; bignum = 1.f / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ icond = 0; anrm = slange_("M", n, n, &a[a_offset], lda, dum); scalea = FALSE_; if (anrm > 0.f && anrm < smlnum) { scalea = TRUE_; cscale = smlnum; } else if (anrm > bignum) { scalea = TRUE_; cscale = bignum; } if (scalea) { slascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, & ierr); } /* Balance the matrix and compute ABNRM */ sgebal_(balanc, n, &a[a_offset], lda, ilo, ihi, &scale[1], &ierr); *abnrm = slange_("1", n, n, &a[a_offset], lda, dum); if (scalea) { dum[0] = *abnrm; slascl_("G", &c__0, &c__0, &cscale, &anrm, &c__1, &c__1, dum, &c__1, & ierr); *abnrm = dum[0]; } /* Reduce to upper Hessenberg form */ /* (Workspace: need 2*N, prefer N+N*NB) */ itau = 1; iwrk = itau + *n; i__1 = *lwork - iwrk + 1; sgehrd_(n, ilo, ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, & ierr); if (wantvl) { /* Want left eigenvectors */ /* Copy Householder vectors to VL */ *(unsigned char *)side = 'L'; slacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl) ; /* Generate orthogonal matrix in VL */ /* (Workspace: need 2*N-1, prefer N+(N-1)*NB) */ i__1 = *lwork - iwrk + 1; sorghr_(n, ilo, ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], & i__1, &ierr); /* Perform QR iteration, accumulating Schur vectors in VL */ /* (Workspace: need 1, prefer HSWORK (see comments) ) */ iwrk = itau; i__1 = *lwork - iwrk + 1; shseqr_("S", "V", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vl[ vl_offset], ldvl, &work[iwrk], &i__1, info); if (wantvr) { /* Want left and right eigenvectors */ /* Copy Schur vectors to VR */ *(unsigned char *)side = 'B'; slacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr); } } else if (wantvr) { /* Want right eigenvectors */ /* Copy Householder vectors to VR */ *(unsigned char *)side = 'R'; slacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr) ; /* Generate orthogonal matrix in VR */ /* (Workspace: need 2*N-1, prefer N+(N-1)*NB) */ i__1 = *lwork - iwrk + 1; sorghr_(n, ilo, ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], & i__1, &ierr); /* Perform QR iteration, accumulating Schur vectors in VR */ /* (Workspace: need 1, prefer HSWORK (see comments) ) */ iwrk = itau; i__1 = *lwork - iwrk + 1; shseqr_("S", "V", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[ vr_offset], ldvr, &work[iwrk], &i__1, info); } else { /* Compute eigenvalues only */ /* If condition numbers desired, compute Schur form */ if (wntsnn) { *(unsigned char *)job = 'E'; } else { *(unsigned char *)job = 'S'; } /* (Workspace: need 1, prefer HSWORK (see comments) ) */ iwrk = itau; i__1 = *lwork - iwrk + 1; shseqr_(job, "N", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[ vr_offset], ldvr, &work[iwrk], &i__1, info); } /* If INFO > 0 from SHSEQR, then quit */ if (*info > 0) { goto L50; } if (wantvl || wantvr) { /* Compute left and/or right eigenvectors */ /* (Workspace: need 3*N) */ strevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &ierr); } /* Compute condition numbers if desired */ /* (Workspace: need N*N+6*N unless SENSE = 'E') */ if (! wntsnn) { strsna_(sense, "A", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &rconde[1], &rcondv[1], n, &nout, &work[iwrk], n, &iwork[1], &icond); } if (wantvl) { /* Undo balancing of left eigenvectors */ sgebak_(balanc, "L", n, ilo, ihi, &scale[1], n, &vl[vl_offset], ldvl, &ierr); /* Normalize left eigenvectors and make largest component real */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (wi[i__] == 0.f) { scl = 1.f / snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); } else if (wi[i__] > 0.f) { r__1 = snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1); r__2 = snrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); scl = 1.f / slapy2_(&r__1, &r__2); sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1); sscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1); i__2 = *n; for (k = 1; k <= i__2; ++k) { /* Computing 2nd power */ r__1 = vl[k + i__ * vl_dim1]; /* Computing 2nd power */ r__2 = vl[k + (i__ + 1) * vl_dim1]; work[k] = r__1 * r__1 + r__2 * r__2; /* L10: */ } k = isamax_(n, &work[1], &c__1); slartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], &cs, &sn, &r__); srot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * vl_dim1 + 1], &c__1, &cs, &sn); vl[k + (i__ + 1) * vl_dim1] = 0.f; } /* L20: */ } } if (wantvr) { /* Undo balancing of right eigenvectors */ sgebak_(balanc, "R", n, ilo, ihi, &scale[1], n, &vr[vr_offset], ldvr, &ierr); /* Normalize right eigenvectors and make largest component real */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (wi[i__] == 0.f) { scl = 1.f / snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); } else if (wi[i__] > 0.f) { r__1 = snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1); r__2 = snrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); scl = 1.f / slapy2_(&r__1, &r__2); sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1); sscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1); i__2 = *n; for (k = 1; k <= i__2; ++k) { /* Computing 2nd power */ r__1 = vr[k + i__ * vr_dim1]; /* Computing 2nd power */ r__2 = vr[k + (i__ + 1) * vr_dim1]; work[k] = r__1 * r__1 + r__2 * r__2; /* L30: */ } k = isamax_(n, &work[1], &c__1); slartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], &cs, &sn, &r__); srot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * vr_dim1 + 1], &c__1, &cs, &sn); vr[k + (i__ + 1) * vr_dim1] = 0.f; } /* L40: */ } } /* Undo scaling if necessary */ L50: if (scalea) { i__1 = *n - *info; /* Computing MAX */ i__3 = *n - *info; i__2 = max(i__3,1); slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + 1], &i__2, &ierr); i__1 = *n - *info; /* Computing MAX */ i__3 = *n - *info; i__2 = max(i__3,1); slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + 1], &i__2, &ierr); if (*info == 0) { if ((wntsnv || wntsnb) && icond == 0) { slascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &rcondv[ 1], n, &ierr); } } else { i__1 = *ilo - 1; slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], n, &ierr); i__1 = *ilo - 1; slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], n, &ierr); } } work[1] = (real) maxwrk; return 0; /* End of SGEEVX */ } /* sgeevx_ */
/* Subroutine */ int slarot_(logical *lrows, logical *lleft, logical *lright, integer *nl, real *c__, real *s, real *a, integer *lda, real *xleft, real *xright) { /* System generated locals */ integer i__1; /* Local variables */ static integer iinc; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); static integer inext, ix, iy, nt; static real xt[2], yt[2]; extern /* Subroutine */ int xerbla_(char *, integer *); static integer iyt; /* -- LAPACK auxiliary test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= SLAROT applies a (Givens) rotation to two adjacent rows or columns, where one element of the first and/or last column/row may be a separate variable. This is specifically indended for use on matrices stored in some format other than GE, so that elements of the matrix may be used or modified for which no array element is provided. One example is a symmetric matrix in SB format (bandwidth=4), for which UPLO='L': Two adjacent rows will have the format: row j: * * * * * . . . . row j+1: * * * * * . . . . '*' indicates elements for which storage is provided, '.' indicates elements for which no storage is provided, but are not necessarily zero; their values are determined by symmetry. ' ' indicates elements which are necessarily zero, and have no storage provided. Those columns which have two '*'s can be handled by SROT. Those columns which have no '*'s can be ignored, since as long as the Givens rotations are carefully applied to preserve symmetry, their values are determined. Those columns which have one '*' have to be handled separately, by using separate variables "p" and "q": row j: * * * * * p . . . row j+1: q * * * * * . . . . The element p would have to be set correctly, then that column is rotated, setting p to its new value. The next call to SLAROT would rotate columns j and j+1, using p, and restore symmetry. The element q would start out being zero, and be made non-zero by the rotation. Later, rotations would presumably be chosen to zero q out. Typical Calling Sequences: rotating the i-th and (i+1)-st rows. ------- ------- --------- General dense matrix: CALL SLAROT(.TRUE.,.FALSE.,.FALSE., N, C,S, A(i,1),LDA, DUMMY, DUMMY) General banded matrix in GB format: j = MAX(1, i-KL ) NL = MIN( N, i+KU+1 ) + 1-j CALL SLAROT( .TRUE., i-KL.GE.1, i+KU.LT.N, NL, C,S, A(KU+i+1-j,j),LDA-1, XLEFT, XRIGHT ) [ note that i+1-j is just MIN(i,KL+1) ] Symmetric banded matrix in SY format, bandwidth K, lower triangle only: j = MAX(1, i-K ) NL = MIN( K+1, i ) + 1 CALL SLAROT( .TRUE., i-K.GE.1, .TRUE., NL, C,S, A(i,j), LDA, XLEFT, XRIGHT ) Same, but upper triangle only: NL = MIN( K+1, N-i ) + 1 CALL SLAROT( .TRUE., .TRUE., i+K.LT.N, NL, C,S, A(i,i), LDA, XLEFT, XRIGHT ) Symmetric banded matrix in SB format, bandwidth K, lower triangle only: [ same as for SY, except:] . . . . A(i+1-j,j), LDA-1, XLEFT, XRIGHT ) [ note that i+1-j is just MIN(i,K+1) ] Same, but upper triangle only: . . . A(K+1,i), LDA-1, XLEFT, XRIGHT ) Rotating columns is just the transpose of rotating rows, except for GB and SB: (rotating columns i and i+1) GB: j = MAX(1, i-KU ) NL = MIN( N, i+KL+1 ) + 1-j CALL SLAROT( .TRUE., i-KU.GE.1, i+KL.LT.N, NL, C,S, A(KU+j+1-i,i),LDA-1, XTOP, XBOTTM ) [note that KU+j+1-i is just MAX(1,KU+2-i)] SB: (upper triangle) . . . . . . A(K+j+1-i,i),LDA-1, XTOP, XBOTTM ) SB: (lower triangle) . . . . . . A(1,i),LDA-1, XTOP, XBOTTM ) Arguments ========= LROWS - LOGICAL If .TRUE., then SLAROT will rotate two rows. If .FALSE., then it will rotate two columns. Not modified. LLEFT - LOGICAL If .TRUE., then XLEFT will be used instead of the corresponding element of A for the first element in the second row (if LROWS=.FALSE.) or column (if LROWS=.TRUE.) If .FALSE., then the corresponding element of A will be used. Not modified. LRIGHT - LOGICAL If .TRUE., then XRIGHT will be used instead of the corresponding element of A for the last element in the first row (if LROWS=.FALSE.) or column (if LROWS=.TRUE.) If .FALSE., then the corresponding element of A will be used. Not modified. NL - INTEGER The length of the rows (if LROWS=.TRUE.) or columns (if LROWS=.FALSE.) to be rotated. If XLEFT and/or XRIGHT are used, the columns/rows they are in should be included in NL, e.g., if LLEFT = LRIGHT = .TRUE., then NL must be at least 2. The number of rows/columns to be rotated exclusive of those involving XLEFT and/or XRIGHT may not be negative, i.e., NL minus how many of LLEFT and LRIGHT are .TRUE. must be at least zero; if not, XERBLA will be called. Not modified. C, S - REAL Specify the Givens rotation to be applied. If LROWS is true, then the matrix ( c s ) (-s c ) is applied from the left; if false, then the transpose thereof is applied from the right. For a Givens rotation, C**2 + S**2 should be 1, but this is not checked. Not modified. A - REAL array. The array containing the rows/columns to be rotated. The first element of A should be the upper left element to be rotated. Read and modified. LDA - INTEGER The "effective" leading dimension of A. If A contains a matrix stored in GE or SY format, then this is just the leading dimension of A as dimensioned in the calling routine. If A contains a matrix stored in band (GB or SB) format, then this should be *one less* than the leading dimension used in the calling routine. Thus, if A were dimensioned A(LDA,*) in SLAROT, then A(1,j) would be the j-th element in the first of the two rows to be rotated, and A(2,j) would be the j-th in the second, regardless of how the array may be stored in the calling routine. [A cannot, however, actually be dimensioned thus, since for band format, the row number may exceed LDA, which is not legal FORTRAN.] If LROWS=.TRUE., then LDA must be at least 1, otherwise it must be at least NL minus the number of .TRUE. values in XLEFT and XRIGHT. Not modified. XLEFT - REAL If LLEFT is .TRUE., then XLEFT will be used and modified instead of A(2,1) (if LROWS=.TRUE.) or A(1,2) (if LROWS=.FALSE.). Read and modified. XRIGHT - REAL If LRIGHT is .TRUE., then XRIGHT will be used and modified instead of A(1,NL) (if LROWS=.TRUE.) or A(NL,1) (if LROWS=.FALSE.). Read and modified. ===================================================================== Set up indices, arrays for ends Parameter adjustments */ --a; /* Function Body */ if (*lrows) { iinc = *lda; inext = 1; } else { iinc = 1; inext = *lda; } if (*lleft) { nt = 1; ix = iinc + 1; iy = *lda + 2; xt[0] = a[1]; yt[0] = *xleft; } else { nt = 0; ix = 1; iy = inext + 1; } if (*lright) { iyt = inext + 1 + (*nl - 1) * iinc; ++nt; xt[nt - 1] = *xright; yt[nt - 1] = a[iyt]; } /* Check for errors */ if (*nl < nt) { xerbla_("SLAROT", &c__4); return 0; } if (*lda <= 0 || ! (*lrows) && *lda < *nl - nt) { xerbla_("SLAROT", &c__8); return 0; } /* Rotate */ i__1 = *nl - nt; srot_(&i__1, &a[ix], &iinc, &a[iy], &iinc, c__, s); srot_(&nt, xt, &c__1, yt, &c__1, c__, s); /* Stuff values back into XLEFT, XRIGHT, etc. */ if (*lleft) { a[1] = xt[0]; *xleft = yt[0]; } if (*lright) { *xright = xt[nt - 1]; a[iyt] = yt[nt - 1]; } return 0; /* End of SLAROT */ } /* slarot_ */
/* Subroutine */ int sbdsqr_(char *uplo, integer *n, integer *ncvt, integer * nru, integer *ncc, real *d__, real *e, real *vt, integer *ldvt, real * u, integer *ldu, real *c__, integer *ldc, real *work, integer *info) { /* System generated locals */ integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; real r__1, r__2, r__3, r__4; doublereal d__1; /* Builtin functions */ double pow_dd(doublereal *, doublereal *), sqrt(doublereal), r_sign(real * , real *); /* Local variables */ static real abse; static integer idir; static real abss; static integer oldm; static real cosl; static integer isub, iter; static real unfl, sinl, cosr, smin, smax, sinr; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *), slas2_(real *, real *, real *, real *, real *); static real f, g, h__; static integer i__, j, m; static real r__; extern logical lsame_(char *, char *); static real oldcs; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static integer oldll; static real shift, sigmn, oldsn; static integer maxit; static real sminl; extern /* Subroutine */ int slasr_(char *, char *, char *, integer *, integer *, real *, real *, real *, integer *); static real sigmx; static logical lower; extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, integer *), slasq1_(integer *, real *, real *, real *, integer *), slasv2_(real *, real *, real *, real *, real *, real *, real *, real *, real *); static real cs; static integer ll; static real sn, mu; extern doublereal slamch_(char *); extern /* Subroutine */ int xerbla_(char *, integer *); static real sminoa; extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real * ); static real thresh; static logical rotate; static real sminlo; static integer nm1; static real tolmul; static integer nm12, nm13, lll; static real eps, sll, tol; #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1] #define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1] #define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1] /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1999 Purpose ======= SBDSQR computes the singular value decomposition (SVD) of a real N-by-N (upper or lower) bidiagonal matrix B: B = Q * S * P' (P' denotes the transpose of P), where S is a diagonal matrix with non-negative diagonal elements (the singular values of B), and Q and P are orthogonal matrices. The routine computes S, and optionally computes U * Q, P' * VT, or Q' * C, for given real input matrices U, VT, and C. See "Computing Small Singular Values of Bidiagonal Matrices With Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, no. 5, pp. 873-912, Sept 1990) and "Accurate singular values and differential qd algorithms," by B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics Department, University of California at Berkeley, July 1992 for a detailed description of the algorithm. Arguments ========= UPLO (input) CHARACTER*1 = 'U': B is upper bidiagonal; = 'L': B is lower bidiagonal. N (input) INTEGER The order of the matrix B. N >= 0. NCVT (input) INTEGER The number of columns of the matrix VT. NCVT >= 0. NRU (input) INTEGER The number of rows of the matrix U. NRU >= 0. NCC (input) INTEGER The number of columns of the matrix C. NCC >= 0. D (input/output) REAL array, dimension (N) On entry, the n diagonal elements of the bidiagonal matrix B. On exit, if INFO=0, the singular values of B in decreasing order. E (input/output) REAL array, dimension (N) On entry, the elements of E contain the offdiagonal elements of the bidiagonal matrix whose SVD is desired. On normal exit (INFO = 0), E is destroyed. If the algorithm does not converge (INFO > 0), D and E will contain the diagonal and superdiagonal elements of a bidiagonal matrix orthogonally equivalent to the one given as input. E(N) is used for workspace. VT (input/output) REAL array, dimension (LDVT, NCVT) On entry, an N-by-NCVT matrix VT. On exit, VT is overwritten by P' * VT. VT is not referenced if NCVT = 0. LDVT (input) INTEGER The leading dimension of the array VT. LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. U (input/output) REAL array, dimension (LDU, N) On entry, an NRU-by-N matrix U. On exit, U is overwritten by U * Q. U is not referenced if NRU = 0. LDU (input) INTEGER The leading dimension of the array U. LDU >= max(1,NRU). C (input/output) REAL array, dimension (LDC, NCC) On entry, an N-by-NCC matrix C. On exit, C is overwritten by Q' * C. C is not referenced if NCC = 0. LDC (input) INTEGER The leading dimension of the array C. LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. WORK (workspace) REAL array, dimension (4*N) INFO (output) INTEGER = 0: successful exit < 0: If INFO = -i, the i-th argument had an illegal value > 0: the algorithm did not converge; D and E contain the elements of a bidiagonal matrix which is orthogonally similar to the input matrix B; if INFO = i, i elements of E have not converged to zero. Internal Parameters =================== TOLMUL REAL, default = max(10,min(100,EPS**(-1/8))) TOLMUL controls the convergence criterion of the QR loop. If it is positive, TOLMUL*EPS is the desired relative precision in the computed singular values. If it is negative, abs(TOLMUL*EPS*sigma_max) is the desired absolute accuracy in the computed singular values (corresponds to relative accuracy abs(TOLMUL*EPS) in the largest singular value. abs(TOLMUL) should be between 1 and 1/EPS, and preferably between 10 (for fast convergence) and .1/EPS (for there to be some accuracy in the results). Default is to lose at either one eighth or 2 of the available decimal digits in each computed singular value (whichever is smaller). MAXITR INTEGER, default = 6 MAXITR controls the maximum number of passes of the algorithm through its inner loop. The algorithms stops (and so fails to converge) if the number of passes through the inner loop exceeds MAXITR*N**2. ===================================================================== Test the input parameters. Parameter adjustments */ --d__; --e; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1 * 1; vt -= vt_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1 * 1; u -= u_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1 * 1; c__ -= c_offset; --work; /* Function Body */ *info = 0; lower = lsame_(uplo, "L"); if (! lsame_(uplo, "U") && ! lower) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*ncvt < 0) { *info = -3; } else if (*nru < 0) { *info = -4; } else if (*ncc < 0) { *info = -5; } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) { *info = -9; } else if (*ldu < max(1,*nru)) { *info = -11; } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) { *info = -13; } if (*info != 0) { i__1 = -(*info); xerbla_("SBDSQR", &i__1); return 0; } if (*n == 0) { return 0; } if (*n == 1) { goto L160; } /* ROTATE is true if any singular vectors desired, false otherwise */ rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; /* If no singular vectors desired, use qd algorithm */ if (! rotate) { slasq1_(n, &d__[1], &e[1], &work[1], info); return 0; } nm1 = *n - 1; nm12 = nm1 + nm1; nm13 = nm12 + nm1; idir = 0; /* Get machine constants */ eps = slamch_("Epsilon"); unfl = slamch_("Safe minimum"); /* If matrix lower bidiagonal, rotate to be upper bidiagonal by applying Givens rotations on the left */ if (lower) { i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { slartg_(&d__[i__], &e[i__], &cs, &sn, &r__); d__[i__] = r__; e[i__] = sn * d__[i__ + 1]; d__[i__ + 1] = cs * d__[i__ + 1]; work[i__] = cs; work[nm1 + i__] = sn; /* L10: */ } /* Update singular vectors if desired */ if (*nru > 0) { slasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset], ldu); } if (*ncc > 0) { slasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset], ldc); } } /* Compute singular values to relative accuracy TOL (By setting TOL to be negative, algorithm will compute singular values to absolute accuracy ABS(TOL)*norm(input matrix)) Computing MAX Computing MIN */ d__1 = (doublereal) eps; r__3 = 100.f, r__4 = pow_dd(&d__1, &c_b15); r__1 = 10.f, r__2 = dmin(r__3,r__4); tolmul = dmax(r__1,r__2); tol = tolmul * eps; /* Compute approximate maximum, minimum singular values */ smax = 0.f; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ r__2 = smax, r__3 = (r__1 = d__[i__], dabs(r__1)); smax = dmax(r__2,r__3); /* L20: */ } i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ r__2 = smax, r__3 = (r__1 = e[i__], dabs(r__1)); smax = dmax(r__2,r__3); /* L30: */ } sminl = 0.f; if (tol >= 0.f) { /* Relative accuracy desired */ sminoa = dabs(d__[1]); if (sminoa == 0.f) { goto L50; } mu = sminoa; i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { mu = (r__2 = d__[i__], dabs(r__2)) * (mu / (mu + (r__1 = e[i__ - 1], dabs(r__1)))); sminoa = dmin(sminoa,mu); if (sminoa == 0.f) { goto L50; } /* L40: */ } L50: sminoa /= sqrt((real) (*n)); /* Computing MAX */ r__1 = tol * sminoa, r__2 = *n * 6 * *n * unfl; thresh = dmax(r__1,r__2); } else { /* Absolute accuracy desired Computing MAX */ r__1 = dabs(tol) * smax, r__2 = *n * 6 * *n * unfl; thresh = dmax(r__1,r__2); } /* Prepare for main iteration loop for the singular values (MAXIT is the maximum number of passes through the inner loop permitted before nonconvergence signalled.) */ maxit = *n * 6 * *n; iter = 0; oldll = -1; oldm = -1; /* M points to last element of unconverged part of matrix */ m = *n; /* Begin main iteration loop */ L60: /* Check for convergence or exceeding iteration count */ if (m <= 1) { goto L160; } if (iter > maxit) { goto L200; } /* Find diagonal block of matrix to work on */ if (tol < 0.f && (r__1 = d__[m], dabs(r__1)) <= thresh) { d__[m] = 0.f; } smax = (r__1 = d__[m], dabs(r__1)); smin = smax; i__1 = m - 1; for (lll = 1; lll <= i__1; ++lll) { ll = m - lll; abss = (r__1 = d__[ll], dabs(r__1)); abse = (r__1 = e[ll], dabs(r__1)); if (tol < 0.f && abss <= thresh) { d__[ll] = 0.f; } if (abse <= thresh) { goto L80; } smin = dmin(smin,abss); /* Computing MAX */ r__1 = max(smax,abss); smax = dmax(r__1,abse); /* L70: */ } ll = 0; goto L90; L80: e[ll] = 0.f; /* Matrix splits since E(LL) = 0 */ if (ll == m - 1) { /* Convergence of bottom singular value, return to top of loop */ --m; goto L60; } L90: ++ll; /* E(LL) through E(M-1) are nonzero, E(LL-1) is zero */ if (ll == m - 1) { /* 2 by 2 block, handle separately */ slasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr, &sinl, &cosl); d__[m - 1] = sigmx; e[m - 1] = 0.f; d__[m] = sigmn; /* Compute singular vectors, if desired */ if (*ncvt > 0) { srot_(ncvt, &vt_ref(m - 1, 1), ldvt, &vt_ref(m, 1), ldvt, &cosr, & sinr); } if (*nru > 0) { srot_(nru, &u_ref(1, m - 1), &c__1, &u_ref(1, m), &c__1, &cosl, & sinl); } if (*ncc > 0) { srot_(ncc, &c___ref(m - 1, 1), ldc, &c___ref(m, 1), ldc, &cosl, & sinl); } m += -2; goto L60; } /* If working on new submatrix, choose shift direction (from larger end diagonal element towards smaller) */ if (ll > oldm || m < oldll) { if ((r__1 = d__[ll], dabs(r__1)) >= (r__2 = d__[m], dabs(r__2))) { /* Chase bulge from top (big end) to bottom (small end) */ idir = 1; } else { /* Chase bulge from bottom (big end) to top (small end) */ idir = 2; } } /* Apply convergence tests */ if (idir == 1) { /* Run convergence test in forward direction First apply standard test to bottom of matrix */ if ((r__2 = e[m - 1], dabs(r__2)) <= dabs(tol) * (r__1 = d__[m], dabs( r__1)) || tol < 0.f && (r__3 = e[m - 1], dabs(r__3)) <= thresh) { e[m - 1] = 0.f; goto L60; } if (tol >= 0.f) { /* If relative accuracy desired, apply convergence criterion forward */ mu = (r__1 = d__[ll], dabs(r__1)); sminl = mu; i__1 = m - 1; for (lll = ll; lll <= i__1; ++lll) { if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) { e[lll] = 0.f; goto L60; } sminlo = sminl; mu = (r__2 = d__[lll + 1], dabs(r__2)) * (mu / (mu + (r__1 = e[lll], dabs(r__1)))); sminl = dmin(sminl,mu); /* L100: */ } } } else { /* Run convergence test in backward direction First apply standard test to top of matrix */ if ((r__2 = e[ll], dabs(r__2)) <= dabs(tol) * (r__1 = d__[ll], dabs( r__1)) || tol < 0.f && (r__3 = e[ll], dabs(r__3)) <= thresh) { e[ll] = 0.f; goto L60; } if (tol >= 0.f) { /* If relative accuracy desired, apply convergence criterion backward */ mu = (r__1 = d__[m], dabs(r__1)); sminl = mu; i__1 = ll; for (lll = m - 1; lll >= i__1; --lll) { if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) { e[lll] = 0.f; goto L60; } sminlo = sminl; mu = (r__2 = d__[lll], dabs(r__2)) * (mu / (mu + (r__1 = e[ lll], dabs(r__1)))); sminl = dmin(sminl,mu); /* L110: */ } } } oldll = ll; oldm = m; /* Compute shift. First, test if shifting would ruin relative accuracy, and if so set the shift to zero. Computing MAX */ r__1 = eps, r__2 = tol * .01f; if (tol >= 0.f && *n * tol * (sminl / smax) <= dmax(r__1,r__2)) { /* Use a zero shift to avoid loss of relative accuracy */ shift = 0.f; } else { /* Compute the shift from 2-by-2 block at end of matrix */ if (idir == 1) { sll = (r__1 = d__[ll], dabs(r__1)); slas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__); } else { sll = (r__1 = d__[m], dabs(r__1)); slas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__); } /* Test if shift negligible, and if so set to zero */ if (sll > 0.f) { /* Computing 2nd power */ r__1 = shift / sll; if (r__1 * r__1 < eps) { shift = 0.f; } } } /* Increment iteration count */ iter = iter + m - ll; /* If SHIFT = 0, do simplified QR iteration */ if (shift == 0.f) { if (idir == 1) { /* Chase bulge from top to bottom Save cosines and sines for later singular vector updates */ cs = 1.f; oldcs = 1.f; i__1 = m - 1; for (i__ = ll; i__ <= i__1; ++i__) { r__1 = d__[i__] * cs; slartg_(&r__1, &e[i__], &cs, &sn, &r__); if (i__ > ll) { e[i__ - 1] = oldsn * r__; } r__1 = oldcs * r__; r__2 = d__[i__ + 1] * sn; slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]); work[i__ - ll + 1] = cs; work[i__ - ll + 1 + nm1] = sn; work[i__ - ll + 1 + nm12] = oldcs; work[i__ - ll + 1 + nm13] = oldsn; /* L120: */ } h__ = d__[m] * cs; d__[m] = h__ * oldcs; e[m - 1] = h__ * oldsn; /* Update singular vectors */ if (*ncvt > 0) { i__1 = m - ll + 1; slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], & vt_ref(ll, 1), ldvt); } if (*nru > 0) { i__1 = m - ll + 1; slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 + 1], &u_ref(1, ll), ldu); } if (*ncc > 0) { i__1 = m - ll + 1; slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + 1], &c___ref(ll, 1), ldc); } /* Test convergence */ if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) { e[m - 1] = 0.f; } } else { /* Chase bulge from bottom to top Save cosines and sines for later singular vector updates */ cs = 1.f; oldcs = 1.f; i__1 = ll + 1; for (i__ = m; i__ >= i__1; --i__) { r__1 = d__[i__] * cs; slartg_(&r__1, &e[i__ - 1], &cs, &sn, &r__); if (i__ < m) { e[i__] = oldsn * r__; } r__1 = oldcs * r__; r__2 = d__[i__ - 1] * sn; slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]); work[i__ - ll] = cs; work[i__ - ll + nm1] = -sn; work[i__ - ll + nm12] = oldcs; work[i__ - ll + nm13] = -oldsn; /* L130: */ } h__ = d__[ll] * cs; d__[ll] = h__ * oldcs; e[ll] = h__ * oldsn; /* Update singular vectors */ if (*ncvt > 0) { i__1 = m - ll + 1; slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ nm13 + 1], &vt_ref(ll, 1), ldvt); } if (*nru > 0) { i__1 = m - ll + 1; slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u_ref( 1, ll), ldu); } if (*ncc > 0) { i__1 = m - ll + 1; slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], & c___ref(ll, 1), ldc); } /* Test convergence */ if ((r__1 = e[ll], dabs(r__1)) <= thresh) { e[ll] = 0.f; } } } else { /* Use nonzero shift */ if (idir == 1) { /* Chase bulge from top to bottom Save cosines and sines for later singular vector updates */ f = ((r__1 = d__[ll], dabs(r__1)) - shift) * (r_sign(&c_b49, &d__[ ll]) + shift / d__[ll]); g = e[ll]; i__1 = m - 1; for (i__ = ll; i__ <= i__1; ++i__) { slartg_(&f, &g, &cosr, &sinr, &r__); if (i__ > ll) { e[i__ - 1] = r__; } f = cosr * d__[i__] + sinr * e[i__]; e[i__] = cosr * e[i__] - sinr * d__[i__]; g = sinr * d__[i__ + 1]; d__[i__ + 1] = cosr * d__[i__ + 1]; slartg_(&f, &g, &cosl, &sinl, &r__); d__[i__] = r__; f = cosl * e[i__] + sinl * d__[i__ + 1]; d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__]; if (i__ < m - 1) { g = sinl * e[i__ + 1]; e[i__ + 1] = cosl * e[i__ + 1]; } work[i__ - ll + 1] = cosr; work[i__ - ll + 1 + nm1] = sinr; work[i__ - ll + 1 + nm12] = cosl; work[i__ - ll + 1 + nm13] = sinl; /* L140: */ } e[m - 1] = f; /* Update singular vectors */ if (*ncvt > 0) { i__1 = m - ll + 1; slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], & vt_ref(ll, 1), ldvt); } if (*nru > 0) { i__1 = m - ll + 1; slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 + 1], &u_ref(1, ll), ldu); } if (*ncc > 0) { i__1 = m - ll + 1; slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + 1], &c___ref(ll, 1), ldc); } /* Test convergence */ if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) { e[m - 1] = 0.f; } } else { /* Chase bulge from bottom to top Save cosines and sines for later singular vector updates */ f = ((r__1 = d__[m], dabs(r__1)) - shift) * (r_sign(&c_b49, &d__[ m]) + shift / d__[m]); g = e[m - 1]; i__1 = ll + 1; for (i__ = m; i__ >= i__1; --i__) { slartg_(&f, &g, &cosr, &sinr, &r__); if (i__ < m) { e[i__] = r__; } f = cosr * d__[i__] + sinr * e[i__ - 1]; e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__]; g = sinr * d__[i__ - 1]; d__[i__ - 1] = cosr * d__[i__ - 1]; slartg_(&f, &g, &cosl, &sinl, &r__); d__[i__] = r__; f = cosl * e[i__ - 1] + sinl * d__[i__ - 1]; d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1]; if (i__ > ll + 1) { g = sinl * e[i__ - 2]; e[i__ - 2] = cosl * e[i__ - 2]; } work[i__ - ll] = cosr; work[i__ - ll + nm1] = -sinr; work[i__ - ll + nm12] = cosl; work[i__ - ll + nm13] = -sinl; /* L150: */ } e[ll] = f; /* Test convergence */ if ((r__1 = e[ll], dabs(r__1)) <= thresh) { e[ll] = 0.f; } /* Update singular vectors if desired */ if (*ncvt > 0) { i__1 = m - ll + 1; slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ nm13 + 1], &vt_ref(ll, 1), ldvt); } if (*nru > 0) { i__1 = m - ll + 1; slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u_ref( 1, ll), ldu); } if (*ncc > 0) { i__1 = m - ll + 1; slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], & c___ref(ll, 1), ldc); } } } /* QR iteration finished, go back and check convergence */ goto L60; /* All singular values converged, so make them positive */ L160: i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (d__[i__] < 0.f) { d__[i__] = -d__[i__]; /* Change sign of singular vectors, if desired */ if (*ncvt > 0) { sscal_(ncvt, &c_b72, &vt_ref(i__, 1), ldvt); } } /* L170: */ } /* Sort the singular values into decreasing order (insertion sort on singular values, but only one transposition per singular vector) */ i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { /* Scan for smallest D(I) */ isub = 1; smin = d__[1]; i__2 = *n + 1 - i__; for (j = 2; j <= i__2; ++j) { if (d__[j] <= smin) { isub = j; smin = d__[j]; } /* L180: */ } if (isub != *n + 1 - i__) { /* Swap singular values and vectors */ d__[isub] = d__[*n + 1 - i__]; d__[*n + 1 - i__] = smin; if (*ncvt > 0) { sswap_(ncvt, &vt_ref(isub, 1), ldvt, &vt_ref(*n + 1 - i__, 1), ldvt); } if (*nru > 0) { sswap_(nru, &u_ref(1, isub), &c__1, &u_ref(1, *n + 1 - i__), & c__1); } if (*ncc > 0) { sswap_(ncc, &c___ref(isub, 1), ldc, &c___ref(*n + 1 - i__, 1), ldc); } } /* L190: */ } goto L220; /* Maximum number of iterations exceeded, failure to converge */ L200: *info = 0; i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { if (e[i__] != 0.f) { ++(*info); } /* L210: */ } L220: return 0; /* End of SBDSQR */ } /* sbdsqr_ */
/* Subroutine */ int slaed2_(integer *k, integer *n, real *d, real *q, integer *ldq, integer *indxq, real *rho, integer *cutpnt, real *z, real *dlamda, real *q2, integer *ldq2, integer *indxc, real *w, integer *indxp, integer *indx, integer *coltyp, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, Courant Institute, NAG Ltd., and Rice University September 30, 1994 Purpose ======= SLAED2 merges the two sets of eigenvalues together into a single sorted set. Then it tries to deflate the size of the problem. There are two ways in which deflation can occur: when two or more eigenvalues are close together or if there is a tiny entry in the Z vector. For each such occurrence the order of the related secular equation problem is reduced by one. Arguments ========= K (output) INTEGER The number of non-deflated eigenvalues, and the order of the related secular equation. 0 <= K <=N. N (input) INTEGER The dimension of the symmetric tridiagonal matrix. N >= 0. D (input/output) REAL array, dimension (N) On entry, D contains the eigenvalues of the two submatrices to be combined. On exit, D contains the trailing (N-K) updated eigenvalues (those which were deflated) sorted into increasing order. Q (input/output) REAL array, dimension (LDQ, N) On entry, Q contains the eigenvectors of two submatrices in the two square blocks with corners at (1,1), (CUTPNT,CUTPNT) and (CUTPNT+1, CUTPNT+1), (N,N). On exit, Q contains the trailing (N-K) updated eigenvectors (those which were deflated) in its last N-K columns. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= max(1,N). INDXQ (input/output) INTEGER array, dimension (N) The permutation which separately sorts the two sub-problems in D into ascending order. Note that elements in the second half of this permutation must first have CUTPNT added to their values. Destroyed on exit. RHO (input/output) REAL On entry, the off-diagonal element associated with the rank-1 cut which originally split the two submatrices which are now being recombined. On exit, RHO has been modified to the value required by SLAED3. CUTPNT (input) INTEGER The location of the last eigenvalue in the leading sub-matrix. min(1,N) <= CUTPNT <= N. Z (input) REAL array, dimension (N) On entry, Z contains the updating vector (the last row of the first sub-eigenvector matrix and the first row of the second sub-eigenvector matrix). On exit, the contents of Z have been destroyed by the updating process. DLAMDA (output) REAL array, dimension (N) A copy of the first K eigenvalues which will be used by SLAED3 to form the secular equation. Q2 (output) REAL array, dimension (LDQ2, N) A copy of the first K eigenvectors which will be used by SLAED3 in a matrix multiply (SGEMM) to solve for the new eigenvectors. Q2 is arranged into three blocks. The first block contains non-zero elements only at and above CUTPNT, the second contains non-zero elements only below CUTPNT, and the third is dense. LDQ2 (input) INTEGER The leading dimension of the array Q2. LDQ2 >= max(1,N). INDXC (output) INTEGER array, dimension (N) The permutation used to arrange the columns of the deflated Q matrix into three groups: the first group contains non-zero elements only at and above CUTPNT, the second contains non-zero elements only below CUTPNT, and the third is dense. W (output) REAL array, dimension (N) The first k values of the final deflation-altered z-vector which will be passed to SLAED3. INDXP (workspace) INTEGER array, dimension (N) The permutation used to place deflated values of D at the end of the array. INDXP(1:K) points to the nondeflated D-values and INDXP(K+1:N) points to the deflated eigenvalues. INDX (workspace) INTEGER array, dimension (N) The permutation used to sort the contents of D into ascending order. COLTYP (workspace/output) INTEGER array, dimension (N) During execution, a label which will indicate which of the following types a column in the Q2 matrix is: 1 : non-zero in the upper half only; 2 : non-zero in the lower half only; 3 : dense; 4 : deflated. On exit, COLTYP(i) is the number of columns of type i, for i=1 to 4 only. INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static real c_b3 = -1.f; static integer c__1 = 1; /* System generated locals */ integer q_dim1, q_offset, q2_dim1, q2_offset, i__1; real r__1, r__2, r__3, r__4; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer jlam, imax, jmax, ctot[4]; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); static real c; static integer i, j; static real s, t; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static integer k2; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *); static integer n1, n2; extern doublereal slapy2_(real *, real *); static integer ct, jp; extern doublereal slamch_(char *); extern /* Subroutine */ int xerbla_(char *, integer *); extern integer isamax_(integer *, real *, integer *); extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *); static integer n1p1; static real eps, tau, tol; static integer psm[4]; --d; q_dim1 = *ldq; q_offset = q_dim1 + 1; q -= q_offset; --indxq; --z; --dlamda; q2_dim1 = *ldq2; q2_offset = q2_dim1 + 1; q2 -= q2_offset; --indxc; --w; --indxp; --indx; --coltyp; /* Function Body */ *info = 0; if (*n < 0) { *info = -2; } else if (*ldq < max(1,*n)) { *info = -5; } else if (min(1,*n) > *cutpnt || *n < *cutpnt) { *info = -8; } else if (*ldq2 < max(1,*n)) { *info = -12; } if (*info != 0) { i__1 = -(*info); xerbla_("SLAED2", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } n1 = *cutpnt; n2 = *n - n1; n1p1 = n1 + 1; if (*rho < 0.f) { sscal_(&n2, &c_b3, &z[n1p1], &c__1); } /* Normalize z so that norm(z) = 1. Since z is the concatenation of two normalized vectors, norm2(z) = sqrt(2). */ t = 1.f / sqrt(2.f); i__1 = *n; for (j = 1; j <= i__1; ++j) { indx[j] = j; /* L10: */ } sscal_(n, &t, &z[1], &c__1); /* RHO = ABS( norm(z)**2 * RHO ) */ *rho = (r__1 = *rho * 2.f, dabs(r__1)); i__1 = *cutpnt; for (i = 1; i <= i__1; ++i) { coltyp[i] = 1; /* L20: */ } i__1 = *n; for (i = *cutpnt + 1; i <= i__1; ++i) { coltyp[i] = 2; /* L30: */ } /* Sort the eigenvalues into increasing order */ i__1 = *n; for (i = *cutpnt + 1; i <= i__1; ++i) { indxq[i] += *cutpnt; /* L40: */ } /* re-integrate the deflated parts from the last pass */ i__1 = *n; for (i = 1; i <= i__1; ++i) { dlamda[i] = d[indxq[i]]; w[i] = z[indxq[i]]; indxc[i] = coltyp[indxq[i]]; /* L50: */ } slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]); i__1 = *n; for (i = 1; i <= i__1; ++i) { d[i] = dlamda[indx[i]]; z[i] = w[indx[i]]; coltyp[i] = indxc[indx[i]]; /* L60: */ } /* Calculate the allowable deflation tolerance */ imax = isamax_(n, &z[1], &c__1); jmax = isamax_(n, &d[1], &c__1); eps = slamch_("Epsilon"); /* Computing MAX */ r__3 = (r__1 = d[jmax], dabs(r__1)), r__4 = (r__2 = z[imax], dabs(r__2)); tol = eps * 8.f * dmax(r__3,r__4); /* If the rank-1 modifier is small enough, no more needs to be done except to reorganize Q so that its columns correspond with the elements in D. */ if (*rho * (r__1 = z[imax], dabs(r__1)) <= tol) { *k = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { scopy_(n, &q[indxq[indx[j]] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1], &c__1); /* L70: */ } slacpy_("A", n, n, &q2[q2_offset], ldq2, &q[q_offset], ldq); goto L180; } /* If there are multiple eigenvalues then the problem deflates. Here the number of equal eigenvalues are found. As each equal eigenvalue is found, an elementary reflector is computed to rotate the corresponding eigensubspace so that the corresponding components of Z are zero in this new basis. */ *k = 0; k2 = *n + 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (*rho * (r__1 = z[j], dabs(r__1)) <= tol) { /* Deflate due to small z component. */ --k2; indxp[k2] = j; coltyp[j] = 4; if (j == *n) { goto L120; } } else { jlam = j; goto L90; } /* L80: */ } L90: ++j; if (j > *n) { goto L110; } if (*rho * (r__1 = z[j], dabs(r__1)) <= tol) { /* Deflate due to small z component. */ --k2; indxp[k2] = j; coltyp[j] = 4; } else { /* Check if eigenvalues are close enough to allow deflation. */ s = z[jlam]; c = z[j]; /* Find sqrt(a**2+b**2) without overflow or destructive underflow. */ tau = slapy2_(&c, &s); t = d[j] - d[jlam]; c /= tau; s = -(doublereal)s / tau; if ((r__1 = t * c * s, dabs(r__1)) <= tol) { /* Deflation is possible. */ z[j] = tau; z[jlam] = 0.f; if (coltyp[j] != coltyp[jlam]) { coltyp[j] = 3; } coltyp[jlam] = 4; srot_(n, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[indxq[indx[ j]] * q_dim1 + 1], &c__1, &c, &s); /* Computing 2nd power */ r__1 = c; /* Computing 2nd power */ r__2 = s; t = d[jlam] * (r__1 * r__1) + d[j] * (r__2 * r__2); /* Computing 2nd power */ r__1 = s; /* Computing 2nd power */ r__2 = c; d[j] = d[jlam] * (r__1 * r__1) + d[j] * (r__2 * r__2); d[jlam] = t; --k2; i = 1; L100: if (k2 + i <= *n) { if (d[jlam] < d[indxp[k2 + i]]) { indxp[k2 + i - 1] = indxp[k2 + i]; indxp[k2 + i] = jlam; ++i; goto L100; } else { indxp[k2 + i - 1] = jlam; } } else { indxp[k2 + i - 1] = jlam; } jlam = j; } else { ++(*k); w[*k] = z[jlam]; dlamda[*k] = d[jlam]; indxp[*k] = jlam; jlam = j; } } goto L90; L110: /* Record the last eigenvalue. */ ++(*k); w[*k] = z[jlam]; dlamda[*k] = d[jlam]; indxp[*k] = jlam; L120: /* Count up the total number of the various types of columns, then form a permutation which positions the four column types into four uniform groups (although one or more of these groups may be empty). */ for (j = 1; j <= 4; ++j) { ctot[j - 1] = 0; /* L130: */ } i__1 = *n; for (j = 1; j <= i__1; ++j) { ct = coltyp[j]; ++ctot[ct - 1]; /* L140: */ } /* PSM(*) = Position in SubVISMatrix (of types 1 through 4) */ psm[0] = 1; psm[1] = ctot[0] + 1; psm[2] = psm[1] + ctot[1]; psm[3] = psm[2] + ctot[2]; /* Fill out the INDXC array so that the permutation which it induces will place all type-1 columns first, all type-2 columns next, then all type-3's, and finally all type-4's. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { jp = indxp[j]; ct = coltyp[jp]; indxc[psm[ct - 1]] = j; ++psm[ct - 1]; /* L150: */ } /* Sort the eigenvalues and corresponding eigenvectors into DLAMDA and Q2 respectively. The eigenvalues/vectors which were not deflated go into the first K slots of DLAMDA and Q2 respectively, while those which were deflated go into the last N - K slots. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { jp = indxp[j]; dlamda[j] = d[jp]; scopy_(n, &q[indxq[indx[indxp[indxc[j]]]] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1], &c__1); /* L160: */ } /* The deflated eigenvalues and their corresponding vectors go back into the last N - K slots of D and Q respectively. */ i__1 = *n - *k; scopy_(&i__1, &dlamda[*k + 1], &c__1, &d[*k + 1], &c__1); i__1 = *n - *k; slacpy_("A", n, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*k + 1) * q_dim1 + 1], ldq); /* Copy CTOT into COLTYP for referencing in SLAED3. */ for (j = 1; j <= 4; ++j) { coltyp[j] = ctot[j - 1]; /* L170: */ } L180: return 0; /* End of SLAED2 */ } /* slaed2_ */
/* Subroutine */ int stgex2_(logical *wantq, logical *wantz, integer *n, real *a, integer *lda, real *b, integer *ldb, real *q, integer *ldq, real * z__, integer *ldz, integer *j1, integer *n1, integer *n2, real *work, integer *lwork, integer *info) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= STGEX2 swaps adjacent diagonal blocks (A11, B11) and (A22, B22) of size 1-by-1 or 2-by-2 in an upper (quasi) triangular matrix pair (A, B) by an orthogonal equivalence transformation. (A, B) must be in generalized real Schur canonical form (as returned by SGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 diagonal blocks. B is upper triangular. Optionally, the matrices Q and Z of generalized Schur vectors are updated. Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)' Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)' Arguments ========= WANTQ (input) LOGICAL .TRUE. : update the left transformation matrix Q; .FALSE.: do not update Q. WANTZ (input) LOGICAL .TRUE. : update the right transformation matrix Z; .FALSE.: do not update Z. N (input) INTEGER The order of the matrices A and B. N >= 0. A (input/output) REAL arrays, dimensions (LDA,N) On entry, the matrix A in the pair (A, B). On exit, the updated matrix A. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input/output) REAL arrays, dimensions (LDB,N) On entry, the matrix B in the pair (A, B). On exit, the updated matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). Q (input/output) REAL array, dimension (LDZ,N) On entry, if WANTQ = .TRUE., the orthogonal matrix Q. On exit, the updated matrix Q. Not referenced if WANTQ = .FALSE.. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= 1. If WANTQ = .TRUE., LDQ >= N. Z (input/output) REAL array, dimension (LDZ,N) On entry, if WANTZ =.TRUE., the orthogonal matrix Z. On exit, the updated matrix Z. Not referenced if WANTZ = .FALSE.. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= 1. If WANTZ = .TRUE., LDZ >= N. J1 (input) INTEGER The index to the first block (A11, B11). 1 <= J1 <= N. N1 (input) INTEGER The order of the first block (A11, B11). N1 = 0, 1 or 2. N2 (input) INTEGER The order of the second block (A22, B22). N2 = 0, 1 or 2. WORK (workspace) REAL array, dimension (LWORK). LWORK (input) INTEGER The dimension of the array WORK. LWORK >= MAX( N*(N2+N1), (N2+N1)*(N2+N1)*2 ) INFO (output) INTEGER =0: Successful exit >0: If INFO = 1, the transformed matrix (A, B) would be too far from generalized Schur form; the blocks are not swapped and (A, B) and (Q, Z) are unchanged. The problem of swapping is too ill-conditioned. <0: If INFO = -16: LWORK is too small. Appropriate value for LWORK is returned in WORK(1). Further Details =============== Based on contributions by Bo Kagstrom and Peter Poromaa, Department of Computing Science, Umea University, S-901 87 Umea, Sweden. In the current code both weak and strong stability tests are performed. The user can omit the strong stability test by changing the internal logical parameter WANDS to .FALSE.. See ref. [2] for details. [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the Generalized Real Schur Form of a Regular Matrix Pair (A, B), in M.S. Moonen et al (eds), Linear Algebra for Large Scale and Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified Eigenvalues of a Regular Matrix Pair (A, B) and Condition Estimation: Theory, Algorithms and Software, Report UMINF - 94.04, Department of Computing Science, Umea University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87. To appear in Numerical Algorithms, 1996. ===================================================================== Parameter adjustments */ /* Table of constant values */ static integer c__16 = 16; static real c_b3 = 0.f; static integer c__0 = 0; static integer c__1 = 1; static integer c__4 = 4; static integer c__2 = 2; static real c_b38 = 1.f; static real c_b44 = -1.f; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, z_offset, i__1, i__2; real r__1, r__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static logical weak; static real ddum; static integer idum; static real taul[4], dsum, taur[4], scpy[16] /* was [4][4] */, tcpy[16] /* was [4][4] */; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); static real f, g; static integer i__, m; static real s[16] /* was [4][4] */, t[16] /* was [4][4] */, scale, bqra21, brqa21; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static real licop[16] /* was [4][4] */; static integer linfo; extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static real ircop[16] /* was [4][4] */, dnorm; static integer iwork[4]; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), slagv2_(real *, integer *, real *, integer *, real *, real *, real *, real *, real *, real *, real *), sgeqr2_(integer * , integer *, real *, integer *, real *, real *, integer *), sgerq2_(integer *, integer *, real *, integer *, real *, real *, integer *); static real be[2], ai[2]; extern /* Subroutine */ int sorg2r_(integer *, integer *, integer *, real *, integer *, real *, real *, integer *), sorgr2_(integer *, integer *, integer *, real *, integer *, real *, real *, integer * ); static real ar[2], sa, sb, li[16] /* was [4][4] */; extern /* Subroutine */ int sorm2r_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, integer *), sormr2_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer * , real *, integer *); static real dscale, ir[16] /* was [4][4] */; extern /* Subroutine */ int stgsy2_(char *, integer *, integer *, integer *, real *, integer *, real *, integer *, real *, integer *, real * , integer *, real *, integer *, real *, integer *, real *, real *, real *, integer *, integer *, integer *); static real ss; extern doublereal slamch_(char *); static real ws; extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *), slartg_(real *, real *, real *, real *, real *); static real thresh; extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, real *); static real smlnum; static logical strong; static real eps; #define scpy_ref(a_1,a_2) scpy[(a_2)*4 + a_1 - 5] #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] #define s_ref(a_1,a_2) s[(a_2)*4 + a_1 - 5] #define t_ref(a_1,a_2) t[(a_2)*4 + a_1 - 5] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] #define li_ref(a_1,a_2) li[(a_2)*4 + a_1 - 5] #define ir_ref(a_1,a_2) ir[(a_2)*4 + a_1 - 5] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --work; /* Function Body */ *info = 0; /* Quick return if possible */ if (*n <= 1 || *n1 <= 0 || *n2 <= 0) { return 0; } if (*n1 > *n || *j1 + *n1 > *n) { return 0; } m = *n1 + *n2; /* Computing MAX */ i__1 = *n * m, i__2 = m * m << 1; if (*lwork < max(i__1,i__2)) { *info = -16; /* Computing MAX */ i__1 = *n * m, i__2 = m * m << 1; work[1] = (real) max(i__1,i__2); return 0; } weak = FALSE_; strong = FALSE_; /* Make a local copy of selected block */ scopy_(&c__16, &c_b3, &c__0, li, &c__1); scopy_(&c__16, &c_b3, &c__0, ir, &c__1); slacpy_("Full", &m, &m, &a_ref(*j1, *j1), lda, s, &c__4); slacpy_("Full", &m, &m, &b_ref(*j1, *j1), ldb, t, &c__4); /* Compute threshold for testing acceptance of swapping. */ eps = slamch_("P"); smlnum = slamch_("S") / eps; dscale = 0.f; dsum = 1.f; slacpy_("Full", &m, &m, s, &c__4, &work[1], &m); i__1 = m * m; slassq_(&i__1, &work[1], &c__1, &dscale, &dsum); slacpy_("Full", &m, &m, t, &c__4, &work[1], &m); i__1 = m * m; slassq_(&i__1, &work[1], &c__1, &dscale, &dsum); dnorm = dscale * sqrt(dsum); /* Computing MAX */ r__1 = eps * 10.f * dnorm; thresh = dmax(r__1,smlnum); if (m == 2) { /* CASE 1: Swap 1-by-1 and 1-by-1 blocks. Compute orthogonal QL and RQ that swap 1-by-1 and 1-by-1 blocks using Givens rotations and perform the swap tentatively. */ f = s_ref(2, 2) * t_ref(1, 1) - t_ref(2, 2) * s_ref(1, 1); g = s_ref(2, 2) * t_ref(1, 2) - t_ref(2, 2) * s_ref(1, 2); sb = (r__1 = t_ref(2, 2), dabs(r__1)); sa = (r__1 = s_ref(2, 2), dabs(r__1)); slartg_(&f, &g, &ir_ref(1, 2), &ir_ref(1, 1), &ddum); ir_ref(2, 1) = -ir_ref(1, 2); ir_ref(2, 2) = ir_ref(1, 1); srot_(&c__2, &s_ref(1, 1), &c__1, &s_ref(1, 2), &c__1, &ir_ref(1, 1), &ir_ref(2, 1)); srot_(&c__2, &t_ref(1, 1), &c__1, &t_ref(1, 2), &c__1, &ir_ref(1, 1), &ir_ref(2, 1)); if (sa >= sb) { slartg_(&s_ref(1, 1), &s_ref(2, 1), &li_ref(1, 1), &li_ref(2, 1), &ddum); } else { slartg_(&t_ref(1, 1), &t_ref(2, 1), &li_ref(1, 1), &li_ref(2, 1), &ddum); } srot_(&c__2, &s_ref(1, 1), &c__4, &s_ref(2, 1), &c__4, &li_ref(1, 1), &li_ref(2, 1)); srot_(&c__2, &t_ref(1, 1), &c__4, &t_ref(2, 1), &c__4, &li_ref(1, 1), &li_ref(2, 1)); li_ref(2, 2) = li_ref(1, 1); li_ref(1, 2) = -li_ref(2, 1); /* Weak stability test: |S21| + |T21| <= O(EPS * F-norm((S, T))) */ ws = (r__1 = s_ref(2, 1), dabs(r__1)) + (r__2 = t_ref(2, 1), dabs( r__2)); weak = ws <= thresh; if (! weak) { goto L70; } if (TRUE_) { /* Strong stability test: F-norm((A-QL'*S*QR, B-QL'*T*QR)) <= O(EPS*F-norm((A,B))) */ slacpy_("Full", &m, &m, &a_ref(*j1, *j1), lda, &work[m * m + 1], & m); sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, s, &c__4, &c_b3, & work[1], &m); sgemm_("N", "T", &m, &m, &m, &c_b44, &work[1], &m, ir, &c__4, & c_b38, &work[m * m + 1], &m); dscale = 0.f; dsum = 1.f; i__1 = m * m; slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); slacpy_("Full", &m, &m, &b_ref(*j1, *j1), ldb, &work[m * m + 1], & m); sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, t, &c__4, &c_b3, & work[1], &m); sgemm_("N", "T", &m, &m, &m, &c_b44, &work[1], &m, ir, &c__4, & c_b38, &work[m * m + 1], &m); i__1 = m * m; slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); ss = dscale * sqrt(dsum); strong = ss <= thresh; if (! strong) { goto L70; } } /* Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). */ i__1 = *j1 + 1; srot_(&i__1, &a_ref(1, *j1), &c__1, &a_ref(1, *j1 + 1), &c__1, & ir_ref(1, 1), &ir_ref(2, 1)); i__1 = *j1 + 1; srot_(&i__1, &b_ref(1, *j1), &c__1, &b_ref(1, *j1 + 1), &c__1, & ir_ref(1, 1), &ir_ref(2, 1)); i__1 = *n - *j1 + 1; srot_(&i__1, &a_ref(*j1, *j1), lda, &a_ref(*j1 + 1, *j1), lda, & li_ref(1, 1), &li_ref(2, 1)); i__1 = *n - *j1 + 1; srot_(&i__1, &b_ref(*j1, *j1), ldb, &b_ref(*j1 + 1, *j1), ldb, & li_ref(1, 1), &li_ref(2, 1)); /* Set N1-by-N2 (2,1) - blocks to ZERO. */ a_ref(*j1 + 1, *j1) = 0.f; b_ref(*j1 + 1, *j1) = 0.f; /* Accumulate transformations into Q and Z if requested. */ if (*wantz) { srot_(n, &z___ref(1, *j1), &c__1, &z___ref(1, *j1 + 1), &c__1, & ir_ref(1, 1), &ir_ref(2, 1)); } if (*wantq) { srot_(n, &q_ref(1, *j1), &c__1, &q_ref(1, *j1 + 1), &c__1, & li_ref(1, 1), &li_ref(2, 1)); } /* Exit with INFO = 0 if swap was successfully performed. */ return 0; } else { /* CASE 2: Swap 1-by-1 and 2-by-2 blocks, or 2-by-2 and 2-by-2 blocks. Solve the generalized Sylvester equation S11 * R - L * S22 = SCALE * S12 T11 * R - L * T22 = SCALE * T12 for R and L. Solutions in LI and IR. */ slacpy_("Full", n1, n2, &t_ref(1, *n1 + 1), &c__4, li, &c__4); slacpy_("Full", n1, n2, &s_ref(1, *n1 + 1), &c__4, &ir_ref(*n2 + 1, * n1 + 1), &c__4); stgsy2_("N", &c__0, n1, n2, s, &c__4, &s_ref(*n1 + 1, *n1 + 1), &c__4, &ir_ref(*n2 + 1, *n1 + 1), &c__4, t, &c__4, &t_ref(*n1 + 1, * n1 + 1), &c__4, li, &c__4, &scale, &dsum, &dscale, iwork, & idum, &linfo); /* Compute orthogonal matrix QL: QL' * LI = [ TL ] [ 0 ] where LI = [ -L ] [ SCALE * identity(N2) ] */ i__1 = *n2; for (i__ = 1; i__ <= i__1; ++i__) { sscal_(n1, &c_b44, &li_ref(1, i__), &c__1); li_ref(*n1 + i__, i__) = scale; /* L10: */ } sgeqr2_(&m, n2, li, &c__4, taul, &work[1], &linfo); if (linfo != 0) { goto L70; } sorg2r_(&m, &m, n2, li, &c__4, taul, &work[1], &linfo); if (linfo != 0) { goto L70; } /* Compute orthogonal matrix RQ: IR * RQ' = [ 0 TR], where IR = [ SCALE * identity(N1), R ] */ i__1 = *n1; for (i__ = 1; i__ <= i__1; ++i__) { ir_ref(*n2 + i__, i__) = scale; /* L20: */ } sgerq2_(n1, &m, &ir_ref(*n2 + 1, 1), &c__4, taur, &work[1], &linfo); if (linfo != 0) { goto L70; } sorgr2_(&m, &m, n1, ir, &c__4, taur, &work[1], &linfo); if (linfo != 0) { goto L70; } /* Perform the swapping tentatively: */ sgemm_("T", "N", &m, &m, &m, &c_b38, li, &c__4, s, &c__4, &c_b3, & work[1], &m); sgemm_("N", "T", &m, &m, &m, &c_b38, &work[1], &m, ir, &c__4, &c_b3, s, &c__4); sgemm_("T", "N", &m, &m, &m, &c_b38, li, &c__4, t, &c__4, &c_b3, & work[1], &m); sgemm_("N", "T", &m, &m, &m, &c_b38, &work[1], &m, ir, &c__4, &c_b3, t, &c__4); slacpy_("F", &m, &m, s, &c__4, scpy, &c__4); slacpy_("F", &m, &m, t, &c__4, tcpy, &c__4); slacpy_("F", &m, &m, ir, &c__4, ircop, &c__4); slacpy_("F", &m, &m, li, &c__4, licop, &c__4); /* Triangularize the B-part by an RQ factorization. Apply transformation (from left) to A-part, giving S. */ sgerq2_(&m, &m, t, &c__4, taur, &work[1], &linfo); if (linfo != 0) { goto L70; } sormr2_("R", "T", &m, &m, &m, t, &c__4, taur, s, &c__4, &work[1], & linfo); if (linfo != 0) { goto L70; } sormr2_("L", "N", &m, &m, &m, t, &c__4, taur, ir, &c__4, &work[1], & linfo); if (linfo != 0) { goto L70; } /* Compute F-norm(S21) in BRQA21. (T21 is 0.) */ dscale = 0.f; dsum = 1.f; i__1 = *n2; for (i__ = 1; i__ <= i__1; ++i__) { slassq_(n1, &s_ref(*n2 + 1, i__), &c__1, &dscale, &dsum); /* L30: */ } brqa21 = dscale * sqrt(dsum); /* Triangularize the B-part by a QR factorization. Apply transformation (from right) to A-part, giving S. */ sgeqr2_(&m, &m, tcpy, &c__4, taul, &work[1], &linfo); if (linfo != 0) { goto L70; } sorm2r_("L", "T", &m, &m, &m, tcpy, &c__4, taul, scpy, &c__4, &work[1] , info); sorm2r_("R", "N", &m, &m, &m, tcpy, &c__4, taul, licop, &c__4, &work[ 1], info); if (linfo != 0) { goto L70; } /* Compute F-norm(S21) in BQRA21. (T21 is 0.) */ dscale = 0.f; dsum = 1.f; i__1 = *n2; for (i__ = 1; i__ <= i__1; ++i__) { slassq_(n1, &scpy_ref(*n2 + 1, i__), &c__1, &dscale, &dsum); /* L40: */ } bqra21 = dscale * sqrt(dsum); /* Decide which method to use. Weak stability test: F-norm(S21) <= O(EPS * F-norm((S, T))) */ if (bqra21 <= brqa21 && bqra21 <= thresh) { slacpy_("F", &m, &m, scpy, &c__4, s, &c__4); slacpy_("F", &m, &m, tcpy, &c__4, t, &c__4); slacpy_("F", &m, &m, ircop, &c__4, ir, &c__4); slacpy_("F", &m, &m, licop, &c__4, li, &c__4); } else if (brqa21 >= thresh) { goto L70; } /* Set lower triangle of B-part to zero */ i__1 = m; for (i__ = 2; i__ <= i__1; ++i__) { i__2 = m - i__ + 1; scopy_(&i__2, &c_b3, &c__0, &t_ref(i__, i__ - 1), &c__1); /* L50: */ } if (TRUE_) { /* Strong stability test: F-norm((A-QL*S*QR', B-QL*T*QR')) <= O(EPS*F-norm((A,B))) */ slacpy_("Full", &m, &m, &a_ref(*j1, *j1), lda, &work[m * m + 1], & m); sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, s, &c__4, &c_b3, & work[1], &m); sgemm_("N", "N", &m, &m, &m, &c_b44, &work[1], &m, ir, &c__4, & c_b38, &work[m * m + 1], &m); dscale = 0.f; dsum = 1.f; i__1 = m * m; slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); slacpy_("Full", &m, &m, &b_ref(*j1, *j1), ldb, &work[m * m + 1], & m); sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, t, &c__4, &c_b3, & work[1], &m); sgemm_("N", "N", &m, &m, &m, &c_b44, &work[1], &m, ir, &c__4, & c_b38, &work[m * m + 1], &m); i__1 = m * m; slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); ss = dscale * sqrt(dsum); strong = ss <= thresh; if (! strong) { goto L70; } } /* If the swap is accepted ("weakly" and "strongly"), apply the transformations and set N1-by-N2 (2,1)-block to zero. */ i__1 = *n2; for (i__ = 1; i__ <= i__1; ++i__) { scopy_(n1, &c_b3, &c__0, &s_ref(*n2 + 1, i__), &c__1); /* L60: */ } /* copy back M-by-M diagonal block starting at index J1 of (A, B) */ slacpy_("F", &m, &m, s, &c__4, &a_ref(*j1, *j1), lda); slacpy_("F", &m, &m, t, &c__4, &b_ref(*j1, *j1), ldb); scopy_(&c__16, &c_b3, &c__0, t, &c__1); /* Standardize existing 2-by-2 blocks. */ i__1 = m * m; scopy_(&i__1, &c_b3, &c__0, &work[1], &c__1); work[1] = 1.f; t_ref(1, 1) = 1.f; idum = *lwork - m * m - 2; if (*n2 > 1) { slagv2_(&a_ref(*j1, *j1), lda, &b_ref(*j1, *j1), ldb, ar, ai, be, &work[1], &work[2], &t_ref(1, 1), &t_ref(2, 1)); work[m + 1] = -work[2]; work[m + 2] = work[1]; t_ref(*n2, *n2) = t_ref(1, 1); t_ref(1, 2) = -t_ref(2, 1); } work[m * m] = 1.f; t_ref(m, m) = 1.f; if (*n1 > 1) { slagv2_(&a_ref(*j1 + *n2, *j1 + *n2), lda, &b_ref(*j1 + *n2, *j1 + *n2), ldb, taur, taul, &work[m * m + 1], &work[*n2 * m + *n2 + 1], &work[*n2 * m + *n2 + 2], &t_ref(*n2 + 1, *n2 + 1), &t_ref(m, m - 1)); work[m * m] = work[*n2 * m + *n2 + 1]; work[m * m - 1] = -work[*n2 * m + *n2 + 2]; t_ref(m, m) = t_ref(*n2 + 1, *n2 + 1); t_ref(m - 1, m) = -t_ref(m, m - 1); } sgemm_("T", "N", n2, n1, n2, &c_b38, &work[1], &m, &a_ref(*j1, *j1 + * n2), lda, &c_b3, &work[m * m + 1], n2); slacpy_("Full", n2, n1, &work[m * m + 1], n2, &a_ref(*j1, *j1 + *n2), lda); sgemm_("T", "N", n2, n1, n2, &c_b38, &work[1], &m, &b_ref(*j1, *j1 + * n2), ldb, &c_b3, &work[m * m + 1], n2); slacpy_("Full", n2, n1, &work[m * m + 1], n2, &b_ref(*j1, *j1 + *n2), ldb); sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, &work[1], &m, &c_b3, & work[m * m + 1], &m); slacpy_("Full", &m, &m, &work[m * m + 1], &m, li, &c__4); sgemm_("N", "N", n2, n1, n1, &c_b38, &a_ref(*j1, *j1 + *n2), lda, & t_ref(*n2 + 1, *n2 + 1), &c__4, &c_b3, &work[1], n2); slacpy_("Full", n2, n1, &work[1], n2, &a_ref(*j1, *j1 + *n2), lda); sgemm_("N", "N", n2, n1, n1, &c_b38, &b_ref(*j1, *j1 + *n2), lda, & t_ref(*n2 + 1, *n2 + 1), &c__4, &c_b3, &work[1], n2); slacpy_("Full", n2, n1, &work[1], n2, &b_ref(*j1, *j1 + *n2), ldb); sgemm_("T", "N", &m, &m, &m, &c_b38, ir, &c__4, t, &c__4, &c_b3, & work[1], &m); slacpy_("Full", &m, &m, &work[1], &m, ir, &c__4); /* Accumulate transformations into Q and Z if requested. */ if (*wantq) { sgemm_("N", "N", n, &m, &m, &c_b38, &q_ref(1, *j1), ldq, li, & c__4, &c_b3, &work[1], n); slacpy_("Full", n, &m, &work[1], n, &q_ref(1, *j1), ldq); } if (*wantz) { sgemm_("N", "N", n, &m, &m, &c_b38, &z___ref(1, *j1), ldz, ir, & c__4, &c_b3, &work[1], n); slacpy_("Full", n, &m, &work[1], n, &z___ref(1, *j1), ldz); } /* Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). */ i__ = *j1 + m; if (i__ <= *n) { i__1 = *n - i__ + 1; sgemm_("T", "N", &m, &i__1, &m, &c_b38, li, &c__4, &a_ref(*j1, i__), lda, &c_b3, &work[1], &m); i__1 = *n - i__ + 1; slacpy_("Full", &m, &i__1, &work[1], &m, &a_ref(*j1, i__), lda); i__1 = *n - i__ + 1; sgemm_("T", "N", &m, &i__1, &m, &c_b38, li, &c__4, &b_ref(*j1, i__), lda, &c_b3, &work[1], &m); i__1 = *n - i__ + 1; slacpy_("Full", &m, &i__1, &work[1], &m, &b_ref(*j1, i__), lda); } i__ = *j1 - 1; if (i__ > 0) { sgemm_("N", "N", &i__, &m, &m, &c_b38, &a_ref(1, *j1), lda, ir, & c__4, &c_b3, &work[1], &i__); slacpy_("Full", &i__, &m, &work[1], &i__, &a_ref(1, *j1), lda); sgemm_("N", "N", &i__, &m, &m, &c_b38, &b_ref(1, *j1), ldb, ir, & c__4, &c_b3, &work[1], &i__); slacpy_("Full", &i__, &m, &work[1], &i__, &b_ref(1, *j1), ldb); } /* Exit with INFO = 0 if swap was successfully performed. */ return 0; } /* Exit with INFO = 1 if swap was rejected. */ L70: *info = 1; return 0; /* End of STGEX2 */ } /* stgex2_ */
void test09 ( void ) /******************************************************************************/ /* Purpose: TEST09 tests SROT. Modified: 29 March 2007 Author: John Burkardt */ { float c; int i; int inc1; int inc2; int n = 6; int ncopy; float s; float *x; float *y; x = malloc ( n * sizeof ( float ) ); y = malloc ( n * sizeof ( float ) ); for ( i = 0; i < n; i++ ) { x[i] = ( float ) ( i + 1 ); } for ( i = 0; i < n; i++ ) { y[i] = ( float ) ( ( i + 1 ) * ( i + 1 ) - 12 ); } printf ( "\n" ); printf ( "TEST09\n" ); printf ( " SROT carries out a Givens rotation.\n" ); printf ( "\n" ); printf ( " X and Y\n" ); printf ( "\n" ); for ( i = 0; i < n; i++ ) { printf ( " %6d %14f %14f\n", i + 1, x[i], y[i] ); } ncopy = n; inc1 = 1; inc2 = 1; c = 0.5; s = sqrt ( 1.0 - c * c ); srot_ ( &ncopy, x, &inc1, y, &inc2, &c, &s ); printf ( "\n" ); printf ( " SROT ( N, X, 1, Y, 1, %f, %f )\n", c, s ); printf ( "\n" ); for ( i = 0; i < n; i++ ) { printf ( " %6d %14f %14f\n", i + 1, x[i], y[i] ); } for ( i = 0; i < n; i++ ) { x[i] = ( float ) ( i + 1 ); } for ( i = 0; i < n; i++ ) { y[i] = ( float ) ( ( i + 1 ) * ( i + 1 ) - 12 ); } c = x[0] / sqrt ( x[0] * x[0] + y[0] * y[0] ); s = y[0] / sqrt ( x[0] * x[0] + y[0] * y[0] ); ncopy = n; inc1 = 1; inc2 = 1; srot_ ( &ncopy, x, &inc1, y, &inc2, &c, &s ); printf ( "\n" ); printf ( " SROT ( N, X, 1, Y, 1, %f, %f )\n", c, s ); printf ( "\n" ); for ( i = 0; i < n; i++ ) { printf ( " %6d %14f %14f\n", i + 1, x[i], y[i] ); } free ( x ); free ( y ); return; }
/* Subroutine */ int stgsja_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, integer *k, integer *l, real *a, integer *lda, real *b, integer *ldb, real *tola, real *tolb, real *alpha, real * beta, real *u, integer *ldu, real *v, integer *ldv, real *q, integer * ldq, real *work, integer *ncycle, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, u_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4; real r__1; /* Local variables */ integer i__, j; real a1, a2, a3, b1, b2, b3, csq, csu, csv, snq, rwk, snu, snv; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); real gamma; extern logical lsame_(char *, char *); extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); logical initq, initu, initv, wantq, upper; real error, ssmin; logical wantu, wantv; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), slags2_(logical *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *); integer kcycle; extern /* Subroutine */ int xerbla_(char *, integer *), slapll_( integer *, real *, integer *, real *, integer *, real *), slartg_( real *, real *, real *, real *, real *), slaset_(char *, integer * , integer *, real *, real *, real *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* STGSJA computes the generalized singular value decomposition (GSVD) */ /* of two real upper triangular (or trapezoidal) matrices A and B. */ /* On entry, it is assumed that matrices A and B have the following */ /* forms, which may be obtained by the preprocessing subroutine SGGSVP */ /* from a general M-by-N matrix A and P-by-N matrix B: */ /* N-K-L K L */ /* A = K ( 0 A12 A13 ) if M-K-L >= 0; */ /* L ( 0 0 A23 ) */ /* M-K-L ( 0 0 0 ) */ /* N-K-L K L */ /* A = K ( 0 A12 A13 ) if M-K-L < 0; */ /* M-K ( 0 0 A23 ) */ /* N-K-L K L */ /* B = L ( 0 0 B13 ) */ /* P-L ( 0 0 0 ) */ /* where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular */ /* upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, */ /* otherwise A23 is (M-K)-by-L upper trapezoidal. */ /* On exit, */ /* U'*A*Q = D1*( 0 R ), V'*B*Q = D2*( 0 R ), */ /* where U, V and Q are orthogonal matrices, Z' denotes the transpose */ /* of Z, R is a nonsingular upper triangular matrix, and D1 and D2 are */ /* ``diagonal'' matrices, which are of the following structures: */ /* If M-K-L >= 0, */ /* K L */ /* D1 = K ( I 0 ) */ /* L ( 0 C ) */ /* M-K-L ( 0 0 ) */ /* K L */ /* D2 = L ( 0 S ) */ /* P-L ( 0 0 ) */ /* N-K-L K L */ /* ( 0 R ) = K ( 0 R11 R12 ) K */ /* L ( 0 0 R22 ) L */ /* where */ /* C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), */ /* S = diag( BETA(K+1), ... , BETA(K+L) ), */ /* C**2 + S**2 = I. */ /* R is stored in A(1:K+L,N-K-L+1:N) on exit. */ /* If M-K-L < 0, */ /* K M-K K+L-M */ /* D1 = K ( I 0 0 ) */ /* M-K ( 0 C 0 ) */ /* K M-K K+L-M */ /* D2 = M-K ( 0 S 0 ) */ /* K+L-M ( 0 0 I ) */ /* P-L ( 0 0 0 ) */ /* N-K-L K M-K K+L-M */ /* ( 0 R ) = K ( 0 R11 R12 R13 ) */ /* M-K ( 0 0 R22 R23 ) */ /* K+L-M ( 0 0 0 R33 ) */ /* where */ /* C = diag( ALPHA(K+1), ... , ALPHA(M) ), */ /* S = diag( BETA(K+1), ... , BETA(M) ), */ /* C**2 + S**2 = I. */ /* R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored */ /* ( 0 R22 R23 ) */ /* in B(M-K+1:L,N+M-K-L+1:N) on exit. */ /* The computation of the orthogonal transformation matrices U, V or Q */ /* is optional. These matrices may either be formed explicitly, or they */ /* may be postmultiplied into input matrices U1, V1, or Q1. */ /* Arguments */ /* ========= */ /* JOBU (input) CHARACTER*1 */ /* = 'U': U must contain an orthogonal matrix U1 on entry, and */ /* the product U1*U is returned; */ /* = 'I': U is initialized to the unit matrix, and the */ /* orthogonal matrix U is returned; */ /* = 'N': U is not computed. */ /* JOBV (input) CHARACTER*1 */ /* = 'V': V must contain an orthogonal matrix V1 on entry, and */ /* the product V1*V is returned; */ /* = 'I': V is initialized to the unit matrix, and the */ /* orthogonal matrix V is returned; */ /* = 'N': V is not computed. */ /* JOBQ (input) CHARACTER*1 */ /* = 'Q': Q must contain an orthogonal matrix Q1 on entry, and */ /* the product Q1*Q is returned; */ /* = 'I': Q is initialized to the unit matrix, and the */ /* orthogonal matrix Q is returned; */ /* = 'N': Q is not computed. */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* P (input) INTEGER */ /* The number of rows of the matrix B. P >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrices A and B. N >= 0. */ /* K (input) INTEGER */ /* L (input) INTEGER */ /* K and L specify the subblocks in the input matrices A and B: */ /* A23 = A(K+1:MIN(K+L,M),N-L+1:N) and B13 = B(1:L,N-L+1:N) */ /* of A and B, whose GSVD is going to be computed by STGSJA. */ /* See Further details. */ /* A (input/output) REAL array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, A(N-K+1:N,1:MIN(K+L,M) ) contains the triangular */ /* matrix R or part of R. See Purpose for details. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* B (input/output) REAL array, dimension (LDB,N) */ /* On entry, the P-by-N matrix B. */ /* On exit, if necessary, B(M-K+1:L,N+M-K-L+1:N) contains */ /* a part of R. See Purpose for details. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,P). */ /* TOLA (input) REAL */ /* TOLB (input) REAL */ /* TOLA and TOLB are the convergence criteria for the Jacobi- */ /* Kogbetliantz iteration procedure. Generally, they are the */ /* same as used in the preprocessing step, say */ /* TOLA = max(M,N)*norm(A)*MACHEPS, */ /* TOLB = max(P,N)*norm(B)*MACHEPS. */ /* ALPHA (output) REAL array, dimension (N) */ /* BETA (output) REAL array, dimension (N) */ /* On exit, ALPHA and BETA contain the generalized singular */ /* value pairs of A and B; */ /* ALPHA(1:K) = 1, */ /* BETA(1:K) = 0, */ /* and if M-K-L >= 0, */ /* ALPHA(K+1:K+L) = diag(C), */ /* BETA(K+1:K+L) = diag(S), */ /* or if M-K-L < 0, */ /* ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0 */ /* BETA(K+1:M) = S, BETA(M+1:K+L) = 1. */ /* Furthermore, if K+L < N, */ /* ALPHA(K+L+1:N) = 0 and */ /* BETA(K+L+1:N) = 0. */ /* U (input/output) REAL array, dimension (LDU,M) */ /* On entry, if JOBU = 'U', U must contain a matrix U1 (usually */ /* the orthogonal matrix returned by SGGSVP). */ /* On exit, */ /* if JOBU = 'I', U contains the orthogonal matrix U; */ /* if JOBU = 'U', U contains the product U1*U. */ /* If JOBU = 'N', U is not referenced. */ /* LDU (input) INTEGER */ /* The leading dimension of the array U. LDU >= max(1,M) if */ /* JOBU = 'U'; LDU >= 1 otherwise. */ /* V (input/output) REAL array, dimension (LDV,P) */ /* On entry, if JOBV = 'V', V must contain a matrix V1 (usually */ /* the orthogonal matrix returned by SGGSVP). */ /* On exit, */ /* if JOBV = 'I', V contains the orthogonal matrix V; */ /* if JOBV = 'V', V contains the product V1*V. */ /* If JOBV = 'N', V is not referenced. */ /* LDV (input) INTEGER */ /* The leading dimension of the array V. LDV >= max(1,P) if */ /* JOBV = 'V'; LDV >= 1 otherwise. */ /* Q (input/output) REAL array, dimension (LDQ,N) */ /* On entry, if JOBQ = 'Q', Q must contain a matrix Q1 (usually */ /* the orthogonal matrix returned by SGGSVP). */ /* On exit, */ /* if JOBQ = 'I', Q contains the orthogonal matrix Q; */ /* if JOBQ = 'Q', Q contains the product Q1*Q. */ /* If JOBQ = 'N', Q is not referenced. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= max(1,N) if */ /* JOBQ = 'Q'; LDQ >= 1 otherwise. */ /* WORK (workspace) REAL array, dimension (2*N) */ /* NCYCLE (output) INTEGER */ /* The number of cycles required for convergence. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* = 1: the procedure does not converge after MAXIT cycles. */ /* Internal Parameters */ /* =================== */ /* MAXIT INTEGER */ /* MAXIT specifies the total loops that the iterative procedure */ /* may take. If after MAXIT cycles, the routine fails to */ /* converge, we return INFO = 1. */ /* Further Details */ /* =============== */ /* STGSJA essentially uses a variant of Kogbetliantz algorithm to reduce */ /* min(L,M-K)-by-L triangular (or trapezoidal) matrix A23 and L-by-L */ /* matrix B13 to the form: */ /* U1'*A13*Q1 = C1*R1; V1'*B13*Q1 = S1*R1, */ /* where U1, V1 and Q1 are orthogonal matrix, and Z' is the transpose */ /* of Z. C1 and S1 are diagonal matrices satisfying */ /* C1**2 + S1**2 = I, */ /* and R1 is an L-by-L nonsingular upper triangular matrix. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode and test the input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --alpha; --beta; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --work; /* Function Body */ initu = lsame_(jobu, "I"); wantu = initu || lsame_(jobu, "U"); initv = lsame_(jobv, "I"); wantv = initv || lsame_(jobv, "V"); initq = lsame_(jobq, "I"); wantq = initq || lsame_(jobq, "Q"); *info = 0; if (! (initu || wantu || lsame_(jobu, "N"))) { *info = -1; } else if (! (initv || wantv || lsame_(jobv, "N"))) { *info = -2; } else if (! (initq || wantq || lsame_(jobq, "N"))) { *info = -3; } else if (*m < 0) { *info = -4; } else if (*p < 0) { *info = -5; } else if (*n < 0) { *info = -6; } else if (*lda < max(1,*m)) { *info = -10; } else if (*ldb < max(1,*p)) { *info = -12; } else if (*ldu < 1 || wantu && *ldu < *m) { *info = -18; } else if (*ldv < 1 || wantv && *ldv < *p) { *info = -20; } else if (*ldq < 1 || wantq && *ldq < *n) { *info = -22; } if (*info != 0) { i__1 = -(*info); xerbla_("STGSJA", &i__1); return 0; } /* Initialize U, V and Q, if necessary */ if (initu) { slaset_("Full", m, m, &c_b13, &c_b14, &u[u_offset], ldu); } if (initv) { slaset_("Full", p, p, &c_b13, &c_b14, &v[v_offset], ldv); } if (initq) { slaset_("Full", n, n, &c_b13, &c_b14, &q[q_offset], ldq); } /* Loop until convergence */ upper = FALSE_; for (kcycle = 1; kcycle <= 40; ++kcycle) { upper = ! upper; i__1 = *l - 1; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *l; for (j = i__ + 1; j <= i__2; ++j) { a1 = 0.f; a2 = 0.f; a3 = 0.f; if (*k + i__ <= *m) { a1 = a[*k + i__ + (*n - *l + i__) * a_dim1]; } if (*k + j <= *m) { a3 = a[*k + j + (*n - *l + j) * a_dim1]; } b1 = b[i__ + (*n - *l + i__) * b_dim1]; b3 = b[j + (*n - *l + j) * b_dim1]; if (upper) { if (*k + i__ <= *m) { a2 = a[*k + i__ + (*n - *l + j) * a_dim1]; } b2 = b[i__ + (*n - *l + j) * b_dim1]; } else { if (*k + j <= *m) { a2 = a[*k + j + (*n - *l + i__) * a_dim1]; } b2 = b[j + (*n - *l + i__) * b_dim1]; } slags2_(&upper, &a1, &a2, &a3, &b1, &b2, &b3, &csu, &snu, & csv, &snv, &csq, &snq); /* Update (K+I)-th and (K+J)-th rows of matrix A: U'*A */ if (*k + j <= *m) { srot_(l, &a[*k + j + (*n - *l + 1) * a_dim1], lda, &a[*k + i__ + (*n - *l + 1) * a_dim1], lda, &csu, &snu); } /* Update I-th and J-th rows of matrix B: V'*B */ srot_(l, &b[j + (*n - *l + 1) * b_dim1], ldb, &b[i__ + (*n - * l + 1) * b_dim1], ldb, &csv, &snv); /* Update (N-L+I)-th and (N-L+J)-th columns of matrices */ /* A and B: A*Q and B*Q */ /* Computing MIN */ i__4 = *k + *l; i__3 = min(i__4,*m); srot_(&i__3, &a[(*n - *l + j) * a_dim1 + 1], &c__1, &a[(*n - * l + i__) * a_dim1 + 1], &c__1, &csq, &snq); srot_(l, &b[(*n - *l + j) * b_dim1 + 1], &c__1, &b[(*n - *l + i__) * b_dim1 + 1], &c__1, &csq, &snq); if (upper) { if (*k + i__ <= *m) { a[*k + i__ + (*n - *l + j) * a_dim1] = 0.f; } b[i__ + (*n - *l + j) * b_dim1] = 0.f; } else { if (*k + j <= *m) { a[*k + j + (*n - *l + i__) * a_dim1] = 0.f; } b[j + (*n - *l + i__) * b_dim1] = 0.f; } /* Update orthogonal matrices U, V, Q, if desired. */ if (wantu && *k + j <= *m) { srot_(m, &u[(*k + j) * u_dim1 + 1], &c__1, &u[(*k + i__) * u_dim1 + 1], &c__1, &csu, &snu); } if (wantv) { srot_(p, &v[j * v_dim1 + 1], &c__1, &v[i__ * v_dim1 + 1], &c__1, &csv, &snv); } if (wantq) { srot_(n, &q[(*n - *l + j) * q_dim1 + 1], &c__1, &q[(*n - * l + i__) * q_dim1 + 1], &c__1, &csq, &snq); } /* L10: */ } /* L20: */ } if (! upper) { /* The matrices A13 and B13 were lower triangular at the start */ /* of the cycle, and are now upper triangular. */ /* Convergence test: test the parallelism of the corresponding */ /* rows of A and B. */ error = 0.f; /* Computing MIN */ i__2 = *l, i__3 = *m - *k; i__1 = min(i__2,i__3); for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *l - i__ + 1; scopy_(&i__2, &a[*k + i__ + (*n - *l + i__) * a_dim1], lda, & work[1], &c__1); i__2 = *l - i__ + 1; scopy_(&i__2, &b[i__ + (*n - *l + i__) * b_dim1], ldb, &work[* l + 1], &c__1); i__2 = *l - i__ + 1; slapll_(&i__2, &work[1], &c__1, &work[*l + 1], &c__1, &ssmin); error = dmax(error,ssmin); /* L30: */ } if (dabs(error) <= dmin(*tola,*tolb)) { goto L50; } } /* End of cycle loop */ /* L40: */ } /* The algorithm has not converged after MAXIT cycles. */ *info = 1; goto L100; L50: /* If ERROR <= MIN(TOLA,TOLB), then the algorithm has converged. */ /* Compute the generalized singular value pairs (ALPHA, BETA), and */ /* set the triangular matrix R to array A. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { alpha[i__] = 1.f; beta[i__] = 0.f; /* L60: */ } /* Computing MIN */ i__2 = *l, i__3 = *m - *k; i__1 = min(i__2,i__3); for (i__ = 1; i__ <= i__1; ++i__) { a1 = a[*k + i__ + (*n - *l + i__) * a_dim1]; b1 = b[i__ + (*n - *l + i__) * b_dim1]; if (a1 != 0.f) { gamma = b1 / a1; /* change sign if necessary */ if (gamma < 0.f) { i__2 = *l - i__ + 1; sscal_(&i__2, &c_b43, &b[i__ + (*n - *l + i__) * b_dim1], ldb) ; if (wantv) { sscal_(p, &c_b43, &v[i__ * v_dim1 + 1], &c__1); } } r__1 = dabs(gamma); slartg_(&r__1, &c_b14, &beta[*k + i__], &alpha[*k + i__], &rwk); if (alpha[*k + i__] >= beta[*k + i__]) { i__2 = *l - i__ + 1; r__1 = 1.f / alpha[*k + i__]; sscal_(&i__2, &r__1, &a[*k + i__ + (*n - *l + i__) * a_dim1], lda); } else { i__2 = *l - i__ + 1; r__1 = 1.f / beta[*k + i__]; sscal_(&i__2, &r__1, &b[i__ + (*n - *l + i__) * b_dim1], ldb); i__2 = *l - i__ + 1; scopy_(&i__2, &b[i__ + (*n - *l + i__) * b_dim1], ldb, &a[*k + i__ + (*n - *l + i__) * a_dim1], lda); } } else { alpha[*k + i__] = 0.f; beta[*k + i__] = 1.f; i__2 = *l - i__ + 1; scopy_(&i__2, &b[i__ + (*n - *l + i__) * b_dim1], ldb, &a[*k + i__ + (*n - *l + i__) * a_dim1], lda); } /* L70: */ } /* Post-assignment */ i__1 = *k + *l; for (i__ = *m + 1; i__ <= i__1; ++i__) { alpha[i__] = 0.f; beta[i__] = 1.f; /* L80: */ } if (*k + *l < *n) { i__1 = *n; for (i__ = *k + *l + 1; i__ <= i__1; ++i__) { alpha[i__] = 0.f; beta[i__] = 0.f; /* L90: */ } } L100: *ncycle = kcycle; return 0; /* End of STGSJA */ } /* stgsja_ */
/* Subroutine */ int sgbbrd_(char *vect, integer *m, integer *n, integer *ncc, integer *kl, integer *ku, real *ab, integer *ldab, real *d__, real * e, real *q, integer *ldq, real *pt, integer *ldpt, real *c__, integer *ldc, real *work, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= SGBBRD reduces a real general m-by-n band matrix A to upper bidiagonal form B by an orthogonal transformation: Q' * A * P = B. The routine computes B, and optionally forms Q or P', or computes Q'*C for a given matrix C. Arguments ========= VECT (input) CHARACTER*1 Specifies whether or not the matrices Q and P' are to be formed. = 'N': do not form Q or P'; = 'Q': form Q only; = 'P': form P' only; = 'B': form both. M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. NCC (input) INTEGER The number of columns of the matrix C. NCC >= 0. KL (input) INTEGER The number of subdiagonals of the matrix A. KL >= 0. KU (input) INTEGER The number of superdiagonals of the matrix A. KU >= 0. AB (input/output) REAL array, dimension (LDAB,N) On entry, the m-by-n band matrix A, stored in rows 1 to KL+KU+1. The j-th column of A is stored in the j-th column of the array AB as follows: AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). On exit, A is overwritten by values generated during the reduction. LDAB (input) INTEGER The leading dimension of the array A. LDAB >= KL+KU+1. D (output) REAL array, dimension (min(M,N)) The diagonal elements of the bidiagonal matrix B. E (output) REAL array, dimension (min(M,N)-1) The superdiagonal elements of the bidiagonal matrix B. Q (output) REAL array, dimension (LDQ,M) If VECT = 'Q' or 'B', the m-by-m orthogonal matrix Q. If VECT = 'N' or 'P', the array Q is not referenced. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= max(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise. PT (output) REAL array, dimension (LDPT,N) If VECT = 'P' or 'B', the n-by-n orthogonal matrix P'. If VECT = 'N' or 'Q', the array PT is not referenced. LDPT (input) INTEGER The leading dimension of the array PT. LDPT >= max(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise. C (input/output) REAL array, dimension (LDC,NCC) On entry, an m-by-ncc matrix C. On exit, C is overwritten by Q'*C. C is not referenced if NCC = 0. LDC (input) INTEGER The leading dimension of the array C. LDC >= max(1,M) if NCC > 0; LDC >= 1 if NCC = 0. WORK (workspace) REAL array, dimension (2*max(M,N)) INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. ===================================================================== Test the input parameters Parameter adjustments */ /* Table of constant values */ static real c_b8 = 0.f; static real c_b9 = 1.f; static integer c__1 = 1; /* System generated locals */ integer ab_dim1, ab_offset, c_dim1, c_offset, pt_dim1, pt_offset, q_dim1, q_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; /* Local variables */ static integer inca; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); static integer i__, j, l; extern logical lsame_(char *, char *); static logical wantb, wantc; static integer minmn; static logical wantq; static integer j1, j2, kb; static real ra, rb, rc; static integer kk, ml, mn, nr, mu; static real rs; extern /* Subroutine */ int xerbla_(char *, integer *), slaset_( char *, integer *, integer *, real *, real *, real *, integer *), slartg_(real *, real *, real *, real *, real *); static integer kb1; extern /* Subroutine */ int slargv_(integer *, real *, integer *, real *, integer *, real *, integer *); static integer ml0; extern /* Subroutine */ int slartv_(integer *, real *, integer *, real *, integer *, real *, real *, integer *); static logical wantpt; static integer mu0, klm, kun, nrt, klu1; #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1] #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] #define pt_ref(a_1,a_2) pt[(a_2)*pt_dim1 + a_1] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; --d__; --e; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; pt_dim1 = *ldpt; pt_offset = 1 + pt_dim1 * 1; pt -= pt_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1 * 1; c__ -= c_offset; --work; /* Function Body */ wantb = lsame_(vect, "B"); wantq = lsame_(vect, "Q") || wantb; wantpt = lsame_(vect, "P") || wantb; wantc = *ncc > 0; klu1 = *kl + *ku + 1; *info = 0; if (! wantq && ! wantpt && ! lsame_(vect, "N")) { *info = -1; } else if (*m < 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ncc < 0) { *info = -4; } else if (*kl < 0) { *info = -5; } else if (*ku < 0) { *info = -6; } else if (*ldab < klu1) { *info = -8; } else if (*ldq < 1 || wantq && *ldq < max(1,*m)) { *info = -12; } else if (*ldpt < 1 || wantpt && *ldpt < max(1,*n)) { *info = -14; } else if (*ldc < 1 || wantc && *ldc < max(1,*m)) { *info = -16; } if (*info != 0) { i__1 = -(*info); xerbla_("SGBBRD", &i__1); return 0; } /* Initialize Q and P' to the unit matrix, if needed */ if (wantq) { slaset_("Full", m, m, &c_b8, &c_b9, &q[q_offset], ldq); } if (wantpt) { slaset_("Full", n, n, &c_b8, &c_b9, &pt[pt_offset], ldpt); } /* Quick return if possible. */ if (*m == 0 || *n == 0) { return 0; } minmn = min(*m,*n); if (*kl + *ku > 1) { /* Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce first to lower bidiagonal form and then transform to upper bidiagonal */ if (*ku > 0) { ml0 = 1; mu0 = 2; } else { ml0 = 2; mu0 = 1; } /* Wherever possible, plane rotations are generated and applied in vector operations of length NR over the index set J1:J2:KLU1. The sines of the plane rotations are stored in WORK(1:max(m,n)) and the cosines in WORK(max(m,n)+1:2*max(m,n)). */ mn = max(*m,*n); /* Computing MIN */ i__1 = *m - 1; klm = min(i__1,*kl); /* Computing MIN */ i__1 = *n - 1; kun = min(i__1,*ku); kb = klm + kun; kb1 = kb + 1; inca = kb1 * *ldab; nr = 0; j1 = klm + 2; j2 = 1 - kun; i__1 = minmn; for (i__ = 1; i__ <= i__1; ++i__) { /* Reduce i-th column and i-th row of matrix to bidiagonal form */ ml = klm + 1; mu = kun + 1; i__2 = kb; for (kk = 1; kk <= i__2; ++kk) { j1 += kb; j2 += kb; /* generate plane rotations to annihilate nonzero elements which have been created below the band */ if (nr > 0) { slargv_(&nr, &ab_ref(klu1, j1 - klm - 1), &inca, &work[j1] , &kb1, &work[mn + j1], &kb1); } /* apply plane rotations from the left */ i__3 = kb; for (l = 1; l <= i__3; ++l) { if (j2 - klm + l - 1 > *n) { nrt = nr - 1; } else { nrt = nr; } if (nrt > 0) { slartv_(&nrt, &ab_ref(klu1 - l, j1 - klm + l - 1), & inca, &ab_ref(klu1 - l + 1, j1 - klm + l - 1), &inca, &work[mn + j1], &work[j1], &kb1); } /* L10: */ } if (ml > ml0) { if (ml <= *m - i__ + 1) { /* generate plane rotation to annihilate a(i+ml-1,i) within the band, and apply rotation from the left */ slartg_(&ab_ref(*ku + ml - 1, i__), &ab_ref(*ku + ml, i__), &work[mn + i__ + ml - 1], &work[i__ + ml - 1], &ra); ab_ref(*ku + ml - 1, i__) = ra; if (i__ < *n) { /* Computing MIN */ i__4 = *ku + ml - 2, i__5 = *n - i__; i__3 = min(i__4,i__5); i__6 = *ldab - 1; i__7 = *ldab - 1; srot_(&i__3, &ab_ref(*ku + ml - 2, i__ + 1), & i__6, &ab_ref(*ku + ml - 1, i__ + 1), & i__7, &work[mn + i__ + ml - 1], &work[i__ + ml - 1]); } } ++nr; j1 -= kb1; } if (wantq) { /* accumulate product of plane rotations in Q */ i__3 = j2; i__4 = kb1; for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { srot_(m, &q_ref(1, j - 1), &c__1, &q_ref(1, j), &c__1, &work[mn + j], &work[j]); /* L20: */ } } if (wantc) { /* apply plane rotations to C */ i__4 = j2; i__3 = kb1; for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { srot_(ncc, &c___ref(j - 1, 1), ldc, &c___ref(j, 1), ldc, &work[mn + j], &work[j]); /* L30: */ } } if (j2 + kun > *n) { /* adjust J2 to keep within the bounds of the matrix */ --nr; j2 -= kb1; } i__3 = j2; i__4 = kb1; for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { /* create nonzero element a(j-1,j+ku) above the band and store it in WORK(n+1:2*n) */ work[j + kun] = work[j] * ab_ref(1, j + kun); ab_ref(1, j + kun) = work[mn + j] * ab_ref(1, j + kun); /* L40: */ } /* generate plane rotations to annihilate nonzero elements which have been generated above the band */ if (nr > 0) { slargv_(&nr, &ab_ref(1, j1 + kun - 1), &inca, &work[j1 + kun], &kb1, &work[mn + j1 + kun], &kb1); } /* apply plane rotations from the right */ i__4 = kb; for (l = 1; l <= i__4; ++l) { if (j2 + l - 1 > *m) { nrt = nr - 1; } else { nrt = nr; } if (nrt > 0) { slartv_(&nrt, &ab_ref(l + 1, j1 + kun - 1), &inca, & ab_ref(l, j1 + kun), &inca, &work[mn + j1 + kun], &work[j1 + kun], &kb1); } /* L50: */ } if (ml == ml0 && mu > mu0) { if (mu <= *n - i__ + 1) { /* generate plane rotation to annihilate a(i,i+mu-1) within the band, and apply rotation from the right */ slartg_(&ab_ref(*ku - mu + 3, i__ + mu - 2), &ab_ref(* ku - mu + 2, i__ + mu - 1), &work[mn + i__ + mu - 1], &work[i__ + mu - 1], &ra); ab_ref(*ku - mu + 3, i__ + mu - 2) = ra; /* Computing MIN */ i__3 = *kl + mu - 2, i__5 = *m - i__; i__4 = min(i__3,i__5); srot_(&i__4, &ab_ref(*ku - mu + 4, i__ + mu - 2), & c__1, &ab_ref(*ku - mu + 3, i__ + mu - 1), & c__1, &work[mn + i__ + mu - 1], &work[i__ + mu - 1]); } ++nr; j1 -= kb1; } if (wantpt) { /* accumulate product of plane rotations in P' */ i__4 = j2; i__3 = kb1; for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { srot_(n, &pt_ref(j + kun - 1, 1), ldpt, &pt_ref(j + kun, 1), ldpt, &work[mn + j + kun], &work[j + kun]); /* L60: */ } } if (j2 + kb > *m) { /* adjust J2 to keep within the bounds of the matrix */ --nr; j2 -= kb1; } i__3 = j2; i__4 = kb1; for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { /* create nonzero element a(j+kl+ku,j+ku-1) below the band and store it in WORK(1:n) */ work[j + kb] = work[j + kun] * ab_ref(klu1, j + kun); ab_ref(klu1, j + kun) = work[mn + j + kun] * ab_ref(klu1, j + kun); /* L70: */ } if (ml > ml0) { --ml; } else { --mu; } /* L80: */ } /* L90: */ } } if (*ku == 0 && *kl > 0) { /* A has been reduced to lower bidiagonal form Transform lower bidiagonal form to upper bidiagonal by applying plane rotations from the left, storing diagonal elements in D and off-diagonal elements in E Computing MIN */ i__2 = *m - 1; i__1 = min(i__2,*n); for (i__ = 1; i__ <= i__1; ++i__) { slartg_(&ab_ref(1, i__), &ab_ref(2, i__), &rc, &rs, &ra); d__[i__] = ra; if (i__ < *n) { e[i__] = rs * ab_ref(1, i__ + 1); ab_ref(1, i__ + 1) = rc * ab_ref(1, i__ + 1); } if (wantq) { srot_(m, &q_ref(1, i__), &c__1, &q_ref(1, i__ + 1), &c__1, & rc, &rs); } if (wantc) { srot_(ncc, &c___ref(i__, 1), ldc, &c___ref(i__ + 1, 1), ldc, & rc, &rs); } /* L100: */ } if (*m <= *n) { d__[*m] = ab_ref(1, *m); } } else if (*ku > 0) { /* A has been reduced to upper bidiagonal form */ if (*m < *n) { /* Annihilate a(m,m+1) by applying plane rotations from the right, storing diagonal elements in D and off-diagonal elements in E */ rb = ab_ref(*ku, *m + 1); for (i__ = *m; i__ >= 1; --i__) { slartg_(&ab_ref(*ku + 1, i__), &rb, &rc, &rs, &ra); d__[i__] = ra; if (i__ > 1) { rb = -rs * ab_ref(*ku, i__); e[i__ - 1] = rc * ab_ref(*ku, i__); } if (wantpt) { srot_(n, &pt_ref(i__, 1), ldpt, &pt_ref(*m + 1, 1), ldpt, &rc, &rs); } /* L110: */ } } else { /* Copy off-diagonal elements to E and diagonal elements to D */ i__1 = minmn - 1; for (i__ = 1; i__ <= i__1; ++i__) { e[i__] = ab_ref(*ku, i__ + 1); /* L120: */ } i__1 = minmn; for (i__ = 1; i__ <= i__1; ++i__) { d__[i__] = ab_ref(*ku + 1, i__); /* L130: */ } } } else { /* A is diagonal. Set elements of E to zero and copy diagonal elements to D. */ i__1 = minmn - 1; for (i__ = 1; i__ <= i__1; ++i__) { e[i__] = 0.f; /* L140: */ } i__1 = minmn; for (i__ = 1; i__ <= i__1; ++i__) { d__[i__] = ab_ref(1, i__); /* L150: */ } } return 0; /* End of SGBBRD */ } /* sgbbrd_ */
/* Subroutine */ int slals0_(integer *icompq, integer *nl, integer *nr, integer *sqre, integer *nrhs, real *b, integer *ldb, real *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * difl, real *difr, real *z__, integer *k, real *c__, real *s, real * work, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University December 1, 1999 Purpose ======= SLALS0 applies back the multiplying factors of either the left or the right singular vector matrix of a diagonal matrix appended by a row to the right hand side matrix B in solving the least squares problem using the divide-and-conquer SVD approach. For the left singular vector matrix, three types of orthogonal matrices are involved: (1L) Givens rotations: the number of such rotations is GIVPTR; the pairs of columns/rows they were applied to are stored in GIVCOL; and the C- and S-values of these rotations are stored in GIVNUM. (2L) Permutation. The (NL+1)-st row of B is to be moved to the first row, and for J=2:N, PERM(J)-th row of B is to be moved to the J-th row. (3L) The left singular vector matrix of the remaining matrix. For the right singular vector matrix, four types of orthogonal matrices are involved: (1R) The right singular vector matrix of the remaining matrix. (2R) If SQRE = 1, one extra Givens rotation to generate the right null space. (3R) The inverse transformation of (2L). (4R) The inverse transformation of (1L). Arguments ========= ICOMPQ (input) INTEGER Specifies whether singular vectors are to be computed in factored form: = 0: Left singular vector matrix. = 1: Right singular vector matrix. NL (input) INTEGER The row dimension of the upper block. NL >= 1. NR (input) INTEGER The row dimension of the lower block. NR >= 1. SQRE (input) INTEGER = 0: the lower block is an NR-by-NR square matrix. = 1: the lower block is an NR-by-(NR+1) rectangular matrix. The bidiagonal matrix has row dimension N = NL + NR + 1, and column dimension M = N + SQRE. NRHS (input) INTEGER The number of columns of B and BX. NRHS must be at least 1. B (input/output) REAL array, dimension ( LDB, NRHS ) On input, B contains the right hand sides of the least squares problem in rows 1 through M. On output, B contains the solution X in rows 1 through N. LDB (input) INTEGER The leading dimension of B. LDB must be at least max(1,MAX( M, N ) ). BX (workspace) REAL array, dimension ( LDBX, NRHS ) LDBX (input) INTEGER The leading dimension of BX. PERM (input) INTEGER array, dimension ( N ) The permutations (from deflation and sorting) applied to the two blocks. GIVPTR (input) INTEGER The number of Givens rotations which took place in this subproblem. GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) Each pair of numbers indicates a pair of rows/columns involved in a Givens rotation. LDGCOL (input) INTEGER The leading dimension of GIVCOL, must be at least N. GIVNUM (input) REAL array, dimension ( LDGNUM, 2 ) Each number indicates the C or S value used in the corresponding Givens rotation. LDGNUM (input) INTEGER The leading dimension of arrays DIFR, POLES and GIVNUM, must be at least K. POLES (input) REAL array, dimension ( LDGNUM, 2 ) On entry, POLES(1:K, 1) contains the new singular values obtained from solving the secular equation, and POLES(1:K, 2) is an array containing the poles in the secular equation. DIFL (input) REAL array, dimension ( K ). On entry, DIFL(I) is the distance between I-th updated (undeflated) singular value and the I-th (undeflated) old singular value. DIFR (input) REAL array, dimension ( LDGNUM, 2 ). On entry, DIFR(I, 1) contains the distances between I-th updated (undeflated) singular value and the I+1-th (undeflated) old singular value. And DIFR(I, 2) is the normalizing factor for the I-th right singular vector. Z (input) REAL array, dimension ( K ) Contain the components of the deflation-adjusted updating row vector. K (input) INTEGER Contains the dimension of the non-deflated matrix, This is the order of the related secular equation. 1 <= K <=N. C (input) REAL C contains garbage if SQRE =0 and the C-value of a Givens rotation related to the right null space if SQRE = 1. S (input) REAL S contains garbage if SQRE =0 and the S-value of a Givens rotation related to the right null space if SQRE = 1. WORK (workspace) REAL array, dimension ( K ) INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. Further Details =============== Based on contributions by Ming Gu and Ren-Cang Li, Computer Science Division, University of California at Berkeley, USA Osni Marques, LBNL/NERSC, USA ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static real c_b5 = -1.f; static integer c__1 = 1; static real c_b11 = 1.f; static real c_b13 = 0.f; static integer c__0 = 0; /* System generated locals */ integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, i__1, i__2; real r__1; /* Local variables */ static real temp; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); extern doublereal snrm2_(integer *, real *, integer *); static integer i__, j, m, n; static real diflj, difrj, dsigj; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), sgemv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), scopy_( integer *, real *, integer *, real *, integer *); extern doublereal slamc3_(real *, real *); static real dj; extern /* Subroutine */ int xerbla_(char *, integer *); static real dsigjp; extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *); static integer nlp1; #define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define poles_ref(a_1,a_2) poles[(a_2)*poles_dim1 + a_1] #define bx_ref(a_1,a_2) bx[(a_2)*bx_dim1 + a_1] #define givcol_ref(a_1,a_2) givcol[(a_2)*givcol_dim1 + a_1] #define givnum_ref(a_1,a_2) givnum[(a_2)*givnum_dim1 + a_1] b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; bx_dim1 = *ldbx; bx_offset = 1 + bx_dim1 * 1; bx -= bx_offset; --perm; givcol_dim1 = *ldgcol; givcol_offset = 1 + givcol_dim1 * 1; givcol -= givcol_offset; difr_dim1 = *ldgnum; difr_offset = 1 + difr_dim1 * 1; difr -= difr_offset; poles_dim1 = *ldgnum; poles_offset = 1 + poles_dim1 * 1; poles -= poles_offset; givnum_dim1 = *ldgnum; givnum_offset = 1 + givnum_dim1 * 1; givnum -= givnum_offset; --difl; --z__; --work; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*nl < 1) { *info = -2; } else if (*nr < 1) { *info = -3; } else if (*sqre < 0 || *sqre > 1) { *info = -4; } n = *nl + *nr + 1; if (*nrhs < 1) { *info = -5; } else if (*ldb < n) { *info = -7; } else if (*ldbx < n) { *info = -9; } else if (*givptr < 0) { *info = -11; } else if (*ldgcol < n) { *info = -13; } else if (*ldgnum < n) { *info = -15; } else if (*k < 1) { *info = -20; } if (*info != 0) { i__1 = -(*info); xerbla_("SLALS0", &i__1); return 0; } m = n + *sqre; nlp1 = *nl + 1; if (*icompq == 0) { /* Apply back orthogonal transformations from the left. Step (1L): apply back the Givens rotations performed. */ i__1 = *givptr; for (i__ = 1; i__ <= i__1; ++i__) { srot_(nrhs, &b_ref(givcol_ref(i__, 2), 1), ldb, &b_ref(givcol_ref( i__, 1), 1), ldb, &givnum_ref(i__, 2), &givnum_ref(i__, 1) ); /* L10: */ } /* Step (2L): permute rows of B. */ scopy_(nrhs, &b_ref(nlp1, 1), ldb, &bx_ref(1, 1), ldbx); i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { scopy_(nrhs, &b_ref(perm[i__], 1), ldb, &bx_ref(i__, 1), ldbx); /* L20: */ } /* Step (3L): apply the inverse of the left singular vector matrix to BX. */ if (*k == 1) { scopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb); if (z__[1] < 0.f) { sscal_(nrhs, &c_b5, &b[b_offset], ldb); } } else { i__1 = *k; for (j = 1; j <= i__1; ++j) { diflj = difl[j]; dj = poles_ref(j, 1); dsigj = -poles_ref(j, 2); if (j < *k) { difrj = -difr_ref(j, 1); dsigjp = -poles_ref(j + 1, 2); } if (z__[j] == 0.f || poles_ref(j, 2) == 0.f) { work[j] = 0.f; } else { work[j] = -poles_ref(j, 2) * z__[j] / diflj / (poles_ref( j, 2) + dj); } i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (z__[i__] == 0.f || poles_ref(i__, 2) == 0.f) { work[i__] = 0.f; } else { work[i__] = poles_ref(i__, 2) * z__[i__] / (slamc3_(& poles_ref(i__, 2), &dsigj) - diflj) / ( poles_ref(i__, 2) + dj); } /* L30: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { if (z__[i__] == 0.f || poles_ref(i__, 2) == 0.f) { work[i__] = 0.f; } else { work[i__] = poles_ref(i__, 2) * z__[i__] / (slamc3_(& poles_ref(i__, 2), &dsigjp) + difrj) / ( poles_ref(i__, 2) + dj); } /* L40: */ } work[1] = -1.f; temp = snrm2_(k, &work[1], &c__1); sgemv_("T", k, nrhs, &c_b11, &bx[bx_offset], ldbx, &work[1], & c__1, &c_b13, &b_ref(j, 1), ldb); slascl_("G", &c__0, &c__0, &temp, &c_b11, &c__1, nrhs, &b_ref( j, 1), ldb, info); /* L50: */ } } /* Move the deflated rows of BX to B also. */ if (*k < max(m,n)) { i__1 = n - *k; slacpy_("A", &i__1, nrhs, &bx_ref(*k + 1, 1), ldbx, &b_ref(*k + 1, 1), ldb); } } else { /* Apply back the right orthogonal transformations. Step (1R): apply back the new right singular vector matrix to B. */ if (*k == 1) { scopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx); } else { i__1 = *k; for (j = 1; j <= i__1; ++j) { dsigj = poles_ref(j, 2); if (z__[j] == 0.f) { work[j] = 0.f; } else { work[j] = -z__[j] / difl[j] / (dsigj + poles_ref(j, 1)) / difr_ref(j, 2); } i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (z__[j] == 0.f) { work[i__] = 0.f; } else { r__1 = -poles_ref(i__ + 1, 2); work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difr_ref(i__, 1)) / (dsigj + poles_ref(i__, 1) ) / difr_ref(i__, 2); } /* L60: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { if (z__[j] == 0.f) { work[i__] = 0.f; } else { r__1 = -poles_ref(i__, 2); work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difl[ i__]) / (dsigj + poles_ref(i__, 1)) / difr_ref(i__, 2); } /* L70: */ } sgemv_("T", k, nrhs, &c_b11, &b[b_offset], ldb, &work[1], & c__1, &c_b13, &bx_ref(j, 1), ldbx); /* L80: */ } } /* Step (2R): if SQRE = 1, apply back the rotation that is related to the right null space of the subproblem. */ if (*sqre == 1) { scopy_(nrhs, &b_ref(m, 1), ldb, &bx_ref(m, 1), ldbx); srot_(nrhs, &bx_ref(1, 1), ldbx, &bx_ref(m, 1), ldbx, c__, s); } if (*k < max(m,n)) { i__1 = n - *k; slacpy_("A", &i__1, nrhs, &b_ref(*k + 1, 1), ldb, &bx_ref(*k + 1, 1), ldbx); } /* Step (3R): permute rows of B. */ scopy_(nrhs, &bx_ref(1, 1), ldbx, &b_ref(nlp1, 1), ldb); if (*sqre == 1) { scopy_(nrhs, &bx_ref(m, 1), ldbx, &b_ref(m, 1), ldb); } i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { scopy_(nrhs, &bx_ref(i__, 1), ldbx, &b_ref(perm[i__], 1), ldb); /* L90: */ } /* Step (4R): apply back the Givens rotations performed. */ for (i__ = *givptr; i__ >= 1; --i__) { r__1 = -givnum_ref(i__, 1); srot_(nrhs, &b_ref(givcol_ref(i__, 2), 1), ldb, &b_ref(givcol_ref( i__, 1), 1), ldb, &givnum_ref(i__, 2), &r__1); /* L100: */ } } return 0; /* End of SLALS0 */ } /* slals0_ */
/* Subroutine */ int slaeda_(integer *n, integer *tlvls, integer *curlvl, integer *curpbm, integer *prmptr, integer *perm, integer *givptr, integer *givcol, real *givnum, real *q, integer *qptr, real *z__, real *ztemp, integer *info) { /* System generated locals */ integer i__1, i__2, i__3; /* Builtin functions */ integer pow_ii(integer *, integer *); double sqrt(doublereal); /* Local variables */ integer i__, k, mid, ptr, curr; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); integer bsiz1, bsiz2, psiz1, psiz2, zptr1; extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SLAEDA computes the Z vector corresponding to the merge step in the */ /* CURLVLth step of the merge process with TLVLS steps for the CURPBMth */ /* problem. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The dimension of the symmetric tridiagonal matrix. N >= 0. */ /* TLVLS (input) INTEGER */ /* The total number of merging levels in the overall divide and */ /* conquer tree. */ /* CURLVL (input) INTEGER */ /* The current level in the overall merge routine, */ /* 0 <= curlvl <= tlvls. */ /* CURPBM (input) INTEGER */ /* The current problem in the current level in the overall */ /* merge routine (counting from upper left to lower right). */ /* PRMPTR (input) INTEGER array, dimension (N lg N) */ /* Contains a list of pointers which indicate where in PERM a */ /* level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) */ /* indicates the size of the permutation and incidentally the */ /* size of the full, non-deflated problem. */ /* PERM (input) INTEGER array, dimension (N lg N) */ /* Contains the permutations (from deflation and sorting) to be */ /* applied to each eigenblock. */ /* GIVPTR (input) INTEGER array, dimension (N lg N) */ /* Contains a list of pointers which indicate where in GIVCOL a */ /* level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) */ /* indicates the number of Givens rotations. */ /* GIVCOL (input) INTEGER array, dimension (2, N lg N) */ /* Each pair of numbers indicates a pair of columns to take place */ /* in a Givens rotation. */ /* GIVNUM (input) REAL array, dimension (2, N lg N) */ /* Each number indicates the S value to be used in the */ /* corresponding Givens rotation. */ /* Q (input) REAL array, dimension (N**2) */ /* Contains the square eigenblocks from previous levels, the */ /* starting positions for blocks are given by QPTR. */ /* QPTR (input) INTEGER array, dimension (N+2) */ /* Contains a list of pointers which indicate where in Q an */ /* eigenblock is stored. SQRT( QPTR(i+1) - QPTR(i) ) indicates */ /* the size of the block. */ /* Z (output) REAL array, dimension (N) */ /* On output this vector contains the updating vector (the last */ /* row of the first sub-eigenvector matrix and the first row of */ /* the second sub-eigenvector matrix). */ /* ZTEMP (workspace) REAL array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Jeff Rutter, Computer Science Division, University of California */ /* at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ztemp; --z__; --qptr; --q; givnum -= 3; givcol -= 3; --givptr; --perm; --prmptr; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; } if (*info != 0) { i__1 = -(*info); xerbla_("SLAEDA", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine location of first number in second half. */ mid = *n / 2 + 1; /* Gather last/first rows of appropriate eigenblocks into center of Z */ ptr = 1; /* Determine location of lowest level subproblem in the full storage */ /* scheme */ i__1 = *curlvl - 1; curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1; /* Determine size of these matrices. We add HALF to the value of */ /* the SQRT in case the machine underestimates one of these square */ /* roots. */ bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f); bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + .5f); i__1 = mid - bsiz1 - 1; for (k = 1; k <= i__1; ++k) { z__[k] = 0.f; /* L10: */ } scopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], & c__1); scopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1); i__1 = *n; for (k = mid + bsiz2; k <= i__1; ++k) { z__[k] = 0.f; /* L20: */ } /* Loop thru remaining levels 1 -> CURLVL applying the Givens */ /* rotations and permutation and then multiplying the center matrices */ /* against the current Z. */ ptr = pow_ii(&c__2, tlvls) + 1; i__1 = *curlvl - 1; for (k = 1; k <= i__1; ++k) { i__2 = *curlvl - k; i__3 = *curlvl - k - 1; curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) - 1; psiz1 = prmptr[curr + 1] - prmptr[curr]; psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; zptr1 = mid - psiz1; /* Apply Givens at CURR and CURR+1 */ i__2 = givptr[curr + 1] - 1; for (i__ = givptr[curr]; i__ <= i__2; ++i__) { srot_(&c__1, &z__[zptr1 + givcol[(i__ << 1) + 1] - 1], &c__1, & z__[zptr1 + givcol[(i__ << 1) + 2] - 1], &c__1, &givnum[( i__ << 1) + 1], &givnum[(i__ << 1) + 2]); /* L30: */ } i__2 = givptr[curr + 2] - 1; for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) { srot_(&c__1, &z__[mid - 1 + givcol[(i__ << 1) + 1]], &c__1, &z__[ mid - 1 + givcol[(i__ << 1) + 2]], &c__1, &givnum[(i__ << 1) + 1], &givnum[(i__ << 1) + 2]); /* L40: */ } psiz1 = prmptr[curr + 1] - prmptr[curr]; psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; i__2 = psiz1 - 1; for (i__ = 0; i__ <= i__2; ++i__) { ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1]; /* L50: */ } i__2 = psiz2 - 1; for (i__ = 0; i__ <= i__2; ++i__) { ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] - 1]; /* L60: */ } /* Multiply Blocks at CURR and CURR+1 */ /* Determine size of these matrices. We add HALF to the value of */ /* the SQRT in case the machine underestimates one of these */ /* square roots. */ bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f); bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + .5f); if (bsiz1 > 0) { sgemv_("T", &bsiz1, &bsiz1, &c_b24, &q[qptr[curr]], &bsiz1, & ztemp[1], &c__1, &c_b26, &z__[zptr1], &c__1); } i__2 = psiz1 - bsiz1; scopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1); if (bsiz2 > 0) { sgemv_("T", &bsiz2, &bsiz2, &c_b24, &q[qptr[curr + 1]], &bsiz2, & ztemp[psiz1 + 1], &c__1, &c_b26, &z__[mid], &c__1); } i__2 = psiz2 - bsiz2; scopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], & c__1); i__2 = *tlvls - k; ptr += pow_ii(&c__2, &i__2); /* L70: */ } return 0; /* End of SLAEDA */ } /* slaeda_ */
/* Subroutine */ int slalsd_(char *uplo, integer *smlsiz, integer *n, integer *nrhs, real *d__, real *e, real *b, integer *ldb, real *rcond, integer *rank, real *work, integer *iwork, integer *info) { /* System generated locals */ integer b_dim1, b_offset, i__1, i__2; real r__1; /* Builtin functions */ double log(doublereal), r_sign(real *, real *); /* Local variables */ static integer difl, difr, perm, nsub, nlvl, sqre, bxst; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); static integer c__, i__, j, k; static real r__; static integer s, u, z__; extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static integer poles, sizei, nsize; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *); static integer nwork, icmpq1, icmpq2; extern doublereal sopbl3_(char *, integer *, integer *, integer *) ; static real cs; static integer bx; static real sn; static integer st; extern /* Subroutine */ int slasda_(integer *, integer *, integer *, integer *, real *, real *, real *, integer *, real *, integer *, real *, real *, real *, real *, integer *, integer *, integer *, integer *, real *, real *, real *, real *, integer *, integer *); extern doublereal slamch_(char *); static integer vt; extern /* Subroutine */ int xerbla_(char *, integer *), slalsa_( integer *, integer *, integer *, integer *, real *, integer *, real *, integer *, real *, integer *, real *, integer *, real *, real *, real *, real *, integer *, integer *, integer *, integer * , real *, real *, real *, real *, integer *, integer *), slascl_( char *, integer *, integer *, real *, real *, integer *, integer * , real *, integer *, integer *); static integer givcol; extern integer isamax_(integer *, real *, integer *); extern /* Subroutine */ int slasdq_(char *, integer *, integer *, integer *, integer *, integer *, real *, real *, real *, integer *, real * , integer *, real *, integer *, real *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *), slartg_(real *, real *, real *, real *, real * ), slaset_(char *, integer *, integer *, real *, real *, real *, integer *); static real orgnrm; static integer givnum; extern doublereal slanst_(char *, integer *, real *, real *); extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *); static integer givptr, nm1, smlszp, st1; static real eps; static integer iwk; static real tol; #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] /* -- LAPACK routine (instrumented to count ops, version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1999 Purpose ======= SLALSD uses the singular value decomposition of A to solve the least squares problem of finding X to minimize the Euclidean norm of each column of A*X-B, where A is N-by-N upper bidiagonal, and X and B are N-by-NRHS. The solution X overwrites B. The singular values of A smaller than RCOND times the largest singular value are treated as zero in solving the least squares problem; in this case a minimum norm solution is returned. The actual singular values are returned in D in ascending order. This code makes very mild assumptions about floating point arithmetic. It will work on machines with a guard digit in add/subtract, or on those binary machines without guard digits which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. It could conceivably fail on hexadecimal or decimal machines without guard digits, but we know of none. Arguments ========= UPLO (input) CHARACTER*1 = 'U': D and E define an upper bidiagonal matrix. = 'L': D and E define a lower bidiagonal matrix. SMLSIZ (input) INTEGER The maximum size of the subproblems at the bottom of the computation tree. N (input) INTEGER The dimension of the bidiagonal matrix. N >= 0. NRHS (input) INTEGER The number of columns of B. NRHS must be at least 1. D (input/output) REAL array, dimension (N) On entry D contains the main diagonal of the bidiagonal matrix. On exit, if INFO = 0, D contains its singular values. E (input) REAL array, dimension (N-1) Contains the super-diagonal entries of the bidiagonal matrix. On exit, E has been destroyed. B (input/output) REAL array, dimension (LDB,NRHS) On input, B contains the right hand sides of the least squares problem. On output, B contains the solution X. LDB (input) INTEGER The leading dimension of B in the calling subprogram. LDB must be at least max(1,N). RCOND (input) REAL The singular values of A less than or equal to RCOND times the largest singular value are treated as zero in solving the least squares problem. If RCOND is negative, machine precision is used instead. For example, if diag(S)*X=B were the least squares problem, where diag(S) is a diagonal matrix of singular values, the solution would be X(i) = B(i) / S(i) if S(i) is greater than RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to RCOND*max(S). RANK (output) INTEGER The number of singular values of A greater than RCOND times the largest singular value. WORK (workspace) REAL array, dimension at least (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2), where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1). IWORK (workspace) INTEGER array, dimension at least (3 * N * NLVL + 11 * N) INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. > 0: The algorithm failed to compute an singular value while working on the submatrix lying in rows and columns INFO/(N+1) through MOD(INFO,N+1). ===================================================================== Test the input parameters. Parameter adjustments */ --d__; --e; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --work; --iwork; /* Function Body */ *info = 0; if (*n < 0) { *info = -3; } else if (*nrhs < 1) { *info = -4; } else if (*ldb < 1 || *ldb < *n) { *info = -8; } if (*info != 0) { i__1 = -(*info); xerbla_("SLALSD", &i__1); return 0; } eps = slamch_("Epsilon"); /* Set up the tolerance. */ if (*rcond <= 0.f || *rcond >= 1.f) { *rcond = eps; } *rank = 0; /* Quick return if possible. */ if (*n == 0) { return 0; } else if (*n == 1) { if (d__[1] == 0.f) { slaset_("A", &c__1, nrhs, &c_b6, &c_b6, &b[b_offset], ldb); } else { *rank = 1; latime_1.ops += (real) (*nrhs << 1); slascl_("G", &c__0, &c__0, &d__[1], &c_b11, &c__1, nrhs, &b[ b_offset], ldb, info); d__[1] = dabs(d__[1]); } return 0; } /* Rotate the matrix if it is lower bidiagonal. */ if (*(unsigned char *)uplo == 'L') { latime_1.ops += (real) ((*n - 1) * 6); i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { slartg_(&d__[i__], &e[i__], &cs, &sn, &r__); d__[i__] = r__; e[i__] = sn * d__[i__ + 1]; d__[i__ + 1] = cs * d__[i__ + 1]; if (*nrhs == 1) { latime_1.ops += 6.f; srot_(&c__1, &b_ref(i__, 1), &c__1, &b_ref(i__ + 1, 1), &c__1, &cs, &sn); } else { work[(i__ << 1) - 1] = cs; work[i__ * 2] = sn; } /* L10: */ } if (*nrhs > 1) { latime_1.ops += (real) ((*n - 1) * 6 * *nrhs); i__1 = *nrhs; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *n - 1; for (j = 1; j <= i__2; ++j) { cs = work[(j << 1) - 1]; sn = work[j * 2]; srot_(&c__1, &b_ref(j, i__), &c__1, &b_ref(j + 1, i__), & c__1, &cs, &sn); /* L20: */ } /* L30: */ } } } /* Scale. */ nm1 = *n - 1; orgnrm = slanst_("M", n, &d__[1], &e[1]); if (orgnrm == 0.f) { slaset_("A", n, nrhs, &c_b6, &c_b6, &b[b_offset], ldb); return 0; } latime_1.ops += (real) (*n + nm1); slascl_("G", &c__0, &c__0, &orgnrm, &c_b11, n, &c__1, &d__[1], n, info); slascl_("G", &c__0, &c__0, &orgnrm, &c_b11, &nm1, &c__1, &e[1], &nm1, info); /* If N is smaller than the minimum divide size SMLSIZ, then solve the problem with another solver. */ if (*n <= *smlsiz) { nwork = *n * *n + 1; slaset_("A", n, n, &c_b6, &c_b11, &work[1], n); slasdq_("U", &c__0, n, n, &c__0, nrhs, &d__[1], &e[1], &work[1], n, & work[1], n, &b[b_offset], ldb, &work[nwork], info); if (*info != 0) { return 0; } latime_1.ops += 1.f; tol = *rcond * (r__1 = d__[isamax_(n, &d__[1], &c__1)], dabs(r__1)); i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (d__[i__] <= tol) { slaset_("A", &c__1, nrhs, &c_b6, &c_b6, &b_ref(i__, 1), ldb); } else { latime_1.ops += (real) (*nrhs); slascl_("G", &c__0, &c__0, &d__[i__], &c_b11, &c__1, nrhs, & b_ref(i__, 1), ldb, info); ++(*rank); } /* L40: */ } latime_1.ops += sopbl3_("SGEMM ", n, nrhs, n); sgemm_("T", "N", n, nrhs, n, &c_b11, &work[1], n, &b[b_offset], ldb, & c_b6, &work[nwork], n); slacpy_("A", n, nrhs, &work[nwork], n, &b[b_offset], ldb); /* Unscale. */ latime_1.ops += (real) (*n + *n * *nrhs); slascl_("G", &c__0, &c__0, &c_b11, &orgnrm, n, &c__1, &d__[1], n, info); slasrt_("D", n, &d__[1], info); slascl_("G", &c__0, &c__0, &orgnrm, &c_b11, n, nrhs, &b[b_offset], ldb, info); return 0; } /* Book-keeping and setting up some constants. */ nlvl = (integer) (log((real) (*n) / (real) (*smlsiz + 1)) / log(2.f)) + 1; smlszp = *smlsiz + 1; u = 1; vt = *smlsiz * *n + 1; difl = vt + smlszp * *n; difr = difl + nlvl * *n; z__ = difr + (nlvl * *n << 1); c__ = z__ + nlvl * *n; s = c__ + *n; poles = s + *n; givnum = poles + (nlvl << 1) * *n; bx = givnum + (nlvl << 1) * *n; nwork = bx + *n * *nrhs; sizei = *n + 1; k = sizei + *n; givptr = k + *n; perm = givptr + *n; givcol = perm + nlvl * *n; iwk = givcol + (nlvl * *n << 1); st = 1; sqre = 0; icmpq1 = 1; icmpq2 = 0; nsub = 0; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if ((r__1 = d__[i__], dabs(r__1)) < eps) { d__[i__] = r_sign(&eps, &d__[i__]); } /* L50: */ } i__1 = nm1; for (i__ = 1; i__ <= i__1; ++i__) { if ((r__1 = e[i__], dabs(r__1)) < eps || i__ == nm1) { ++nsub; iwork[nsub] = st; /* Subproblem found. First determine its size and then apply divide and conquer on it. */ if (i__ < nm1) { /* A subproblem with E(I) small for I < NM1. */ nsize = i__ - st + 1; iwork[sizei + nsub - 1] = nsize; } else if ((r__1 = e[i__], dabs(r__1)) >= eps) { /* A subproblem with E(NM1) not too small but I = NM1. */ nsize = *n - st + 1; iwork[sizei + nsub - 1] = nsize; } else { /* A subproblem with E(NM1) small. This implies an 1-by-1 subproblem at D(N), which is not solved explicitly. */ nsize = i__ - st + 1; iwork[sizei + nsub - 1] = nsize; ++nsub; iwork[nsub] = *n; iwork[sizei + nsub - 1] = 1; scopy_(nrhs, &b_ref(*n, 1), ldb, &work[bx + nm1], n); } st1 = st - 1; if (nsize == 1) { /* This is a 1-by-1 subproblem and is not solved explicitly. */ scopy_(nrhs, &b_ref(st, 1), ldb, &work[bx + st1], n); } else if (nsize <= *smlsiz) { /* This is a small subproblem and is solved by SLASDQ. */ slaset_("A", &nsize, &nsize, &c_b6, &c_b11, &work[vt + st1], n); slasdq_("U", &c__0, &nsize, &nsize, &c__0, nrhs, &d__[st], &e[ st], &work[vt + st1], n, &work[nwork], n, &b_ref(st, 1), ldb, &work[nwork], info); if (*info != 0) { return 0; } slacpy_("A", &nsize, nrhs, &b_ref(st, 1), ldb, &work[bx + st1] , n); } else { /* A large problem. Solve it using divide and conquer. */ slasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], & work[u + st1], n, &work[vt + st1], &iwork[k + st1], & work[difl + st1], &work[difr + st1], &work[z__ + st1], &work[poles + st1], &iwork[givptr + st1], &iwork[ givcol + st1], n, &iwork[perm + st1], &work[givnum + st1], &work[c__ + st1], &work[s + st1], &work[nwork], &iwork[iwk], info); if (*info != 0) { return 0; } bxst = bx + st1; slalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b_ref(st, 1), ldb, & work[bxst], n, &work[u + st1], n, &work[vt + st1], & iwork[k + st1], &work[difl + st1], &work[difr + st1], &work[z__ + st1], &work[poles + st1], &iwork[givptr + st1], &iwork[givcol + st1], n, &iwork[perm + st1], & work[givnum + st1], &work[c__ + st1], &work[s + st1], &work[nwork], &iwork[iwk], info); if (*info != 0) { return 0; } } st = i__ + 1; } /* L60: */ } /* Apply the singular values and treat the tiny ones as zero. */ tol = *rcond * (r__1 = d__[isamax_(n, &d__[1], &c__1)], dabs(r__1)); i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Some of the elements in D can be negative because 1-by-1 subproblems were not solved explicitly. */ if ((r__1 = d__[i__], dabs(r__1)) <= tol) { slaset_("A", &c__1, nrhs, &c_b6, &c_b6, &work[bx + i__ - 1], n); } else { ++(*rank); latime_1.ops += (real) (*nrhs); slascl_("G", &c__0, &c__0, &d__[i__], &c_b11, &c__1, nrhs, &work[ bx + i__ - 1], n, info); } d__[i__] = (r__1 = d__[i__], dabs(r__1)); /* L70: */ } /* Now apply back the right singular vectors. */ icmpq2 = 1; i__1 = nsub; for (i__ = 1; i__ <= i__1; ++i__) { st = iwork[i__]; st1 = st - 1; nsize = iwork[sizei + i__ - 1]; bxst = bx + st1; if (nsize == 1) { scopy_(nrhs, &work[bxst], n, &b_ref(st, 1), ldb); } else if (nsize <= *smlsiz) { latime_1.ops += sopbl3_("SGEMM ", &nsize, nrhs, &nsize) ; sgemm_("T", "N", &nsize, nrhs, &nsize, &c_b11, &work[vt + st1], n, &work[bxst], n, &c_b6, &b_ref(st, 1), ldb); } else { slalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b_ref(st, 1), ldb, &work[u + st1], n, &work[vt + st1], &iwork[k + st1], &work[difl + st1], &work[difr + st1], &work[z__ + st1], &work[poles + st1], &iwork[givptr + st1], &iwork[ givcol + st1], n, &iwork[perm + st1], &work[givnum + st1], &work[c__ + st1], &work[s + st1], &work[nwork], &iwork[ iwk], info); if (*info != 0) { return 0; } } /* L80: */ } /* Unscale and sort the singular values. */ latime_1.ops += (real) (*n + *n * *nrhs); slascl_("G", &c__0, &c__0, &c_b11, &orgnrm, n, &c__1, &d__[1], n, info); slasrt_("D", n, &d__[1], info); slascl_("G", &c__0, &c__0, &orgnrm, &c_b11, n, nrhs, &b[b_offset], ldb, info); return 0; /* End of SLALSD */ } /* slalsd_ */
int sbdsqr_(char *uplo, int *n, int *ncvt, int * nru, int *ncc, float *d__, float *e, float *vt, int *ldvt, float * u, int *ldu, float *c__, int *ldc, float *work, int *info) { /* System generated locals */ int c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2; float r__1, r__2, r__3, r__4; double d__1; /* Builtin functions */ double pow_dd(double *, double *), sqrt(double), r_sign(float * , float *); /* Local variables */ float f, g, h__; int i__, j, m; float r__, cs; int ll; float sn, mu; int nm1, nm12, nm13, lll; float eps, sll, tol, abse; int idir; float abss; int oldm; float cosl; int isub, iter; float unfl, sinl, cosr, smin, smax, sinr; extern int srot_(int *, float *, int *, float *, int *, float *, float *), slas2_(float *, float *, float *, float *, float *); extern int lsame_(char *, char *); float oldcs; extern int sscal_(int *, float *, float *, int *); int oldll; float shift, sigmn, oldsn; int maxit; float sminl; extern int slasr_(char *, char *, char *, int *, int *, float *, float *, float *, int *); float sigmx; int lower; extern int sswap_(int *, float *, int *, float *, int *), slasq1_(int *, float *, float *, float *, int *), slasv2_(float *, float *, float *, float *, float *, float *, float *, float *, float *); extern double slamch_(char *); extern int xerbla_(char *, int *); float sminoa; extern int slartg_(float *, float *, float *, float *, float * ); float thresh; int rotate; float tolmul; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* January 2007 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SBDSQR computes the singular values and, optionally, the right and/or */ /* left singular vectors from the singular value decomposition (SVD) of */ /* a float N-by-N (upper or lower) bidiagonal matrix B using the implicit */ /* zero-shift QR algorithm. The SVD of B has the form */ /* B = Q * S * P**T */ /* where S is the diagonal matrix of singular values, Q is an orthogonal */ /* matrix of left singular vectors, and P is an orthogonal matrix of */ /* right singular vectors. If left singular vectors are requested, this */ /* subroutine actually returns U*Q instead of Q, and, if right singular */ /* vectors are requested, this subroutine returns P**T*VT instead of */ /* P**T, for given float input matrices U and VT. When U and VT are the */ /* orthogonal matrices that reduce a general matrix A to bidiagonal */ /* form: A = U*B*VT, as computed by SGEBRD, then */ /* A = (U*Q) * S * (P**T*VT) */ /* is the SVD of A. Optionally, the subroutine may also compute Q**T*C */ /* for a given float input matrix C. */ /* See "Computing Small Singular Values of Bidiagonal Matrices With */ /* Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, */ /* LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, */ /* no. 5, pp. 873-912, Sept 1990) and */ /* "Accurate singular values and differential qd algorithms," by */ /* B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics */ /* Department, University of California at Berkeley, July 1992 */ /* for a detailed description of the algorithm. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': B is upper bidiagonal; */ /* = 'L': B is lower bidiagonal. */ /* N (input) INTEGER */ /* The order of the matrix B. N >= 0. */ /* NCVT (input) INTEGER */ /* The number of columns of the matrix VT. NCVT >= 0. */ /* NRU (input) INTEGER */ /* The number of rows of the matrix U. NRU >= 0. */ /* NCC (input) INTEGER */ /* The number of columns of the matrix C. NCC >= 0. */ /* D (input/output) REAL array, dimension (N) */ /* On entry, the n diagonal elements of the bidiagonal matrix B. */ /* On exit, if INFO=0, the singular values of B in decreasing */ /* order. */ /* E (input/output) REAL array, dimension (N-1) */ /* On entry, the N-1 offdiagonal elements of the bidiagonal */ /* matrix B. */ /* On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E */ /* will contain the diagonal and superdiagonal elements of a */ /* bidiagonal matrix orthogonally equivalent to the one given */ /* as input. */ /* VT (input/output) REAL array, dimension (LDVT, NCVT) */ /* On entry, an N-by-NCVT matrix VT. */ /* On exit, VT is overwritten by P**T * VT. */ /* Not referenced if NCVT = 0. */ /* LDVT (input) INTEGER */ /* The leading dimension of the array VT. */ /* LDVT >= MAX(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. */ /* U (input/output) REAL array, dimension (LDU, N) */ /* On entry, an NRU-by-N matrix U. */ /* On exit, U is overwritten by U * Q. */ /* Not referenced if NRU = 0. */ /* LDU (input) INTEGER */ /* The leading dimension of the array U. LDU >= MAX(1,NRU). */ /* C (input/output) REAL array, dimension (LDC, NCC) */ /* On entry, an N-by-NCC matrix C. */ /* On exit, C is overwritten by Q**T * C. */ /* Not referenced if NCC = 0. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. */ /* LDC >= MAX(1,N) if NCC > 0; LDC >=1 if NCC = 0. */ /* WORK (workspace) REAL array, dimension (4*N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: If INFO = -i, the i-th argument had an illegal value */ /* > 0: */ /* if NCVT = NRU = NCC = 0, */ /* = 1, a split was marked by a positive value in E */ /* = 2, current block of Z not diagonalized after 30*N */ /* iterations (in inner while loop) */ /* = 3, termination criterion of outer while loop not met */ /* (program created more than N unreduced blocks) */ /* else NCVT = NRU = NCC = 0, */ /* the algorithm did not converge; D and E contain the */ /* elements of a bidiagonal matrix which is orthogonally */ /* similar to the input matrix B; if INFO = i, i */ /* elements of E have not converged to zero. */ /* Internal Parameters */ /* =================== */ /* TOLMUL REAL, default = MAX(10,MIN(100,EPS**(-1/8))) */ /* TOLMUL controls the convergence criterion of the QR loop. */ /* If it is positive, TOLMUL*EPS is the desired relative */ /* precision in the computed singular values. */ /* If it is negative, ABS(TOLMUL*EPS*sigma_max) is the */ /* desired absolute accuracy in the computed singular */ /* values (corresponds to relative accuracy */ /* ABS(TOLMUL*EPS) in the largest singular value. */ /* ABS(TOLMUL) should be between 1 and 1/EPS, and preferably */ /* between 10 (for fast convergence) and .1/EPS */ /* (for there to be some accuracy in the results). */ /* Default is to lose at either one eighth or 2 of the */ /* available decimal digits in each computed singular value */ /* (whichever is smaller). */ /* MAXITR INTEGER, default = 6 */ /* MAXITR controls the maximum number of passes of the */ /* algorithm through its inner loop. The algorithms stops */ /* (and so fails to converge) if the number of passes */ /* through the inner loop exceeds MAXITR*N**2. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --e; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1; vt -= vt_offset; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ *info = 0; lower = lsame_(uplo, "L"); if (! lsame_(uplo, "U") && ! lower) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*ncvt < 0) { *info = -3; } else if (*nru < 0) { *info = -4; } else if (*ncc < 0) { *info = -5; } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < MAX(1,*n)) { *info = -9; } else if (*ldu < MAX(1,*nru)) { *info = -11; } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < MAX(1,*n)) { *info = -13; } if (*info != 0) { i__1 = -(*info); xerbla_("SBDSQR", &i__1); return 0; } if (*n == 0) { return 0; } if (*n == 1) { goto L160; } /* ROTATE is true if any singular vectors desired, false otherwise */ rotate = *ncvt > 0 || *nru > 0 || *ncc > 0; /* If no singular vectors desired, use qd algorithm */ if (! rotate) { slasq1_(n, &d__[1], &e[1], &work[1], info); return 0; } nm1 = *n - 1; nm12 = nm1 + nm1; nm13 = nm12 + nm1; idir = 0; /* Get machine constants */ eps = slamch_("Epsilon"); unfl = slamch_("Safe minimum"); /* If matrix lower bidiagonal, rotate to be upper bidiagonal */ /* by applying Givens rotations on the left */ if (lower) { i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { slartg_(&d__[i__], &e[i__], &cs, &sn, &r__); d__[i__] = r__; e[i__] = sn * d__[i__ + 1]; d__[i__ + 1] = cs * d__[i__ + 1]; work[i__] = cs; work[nm1 + i__] = sn; /* L10: */ } /* Update singular vectors if desired */ if (*nru > 0) { slasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset], ldu); } if (*ncc > 0) { slasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset], ldc); } } /* Compute singular values to relative accuracy TOL */ /* (By setting TOL to be negative, algorithm will compute */ /* singular values to absolute accuracy ABS(TOL)*norm(input matrix)) */ /* Computing MAX */ /* Computing MIN */ d__1 = (double) eps; r__3 = 100.f, r__4 = pow_dd(&d__1, &c_b15); r__1 = 10.f, r__2 = MIN(r__3,r__4); tolmul = MAX(r__1,r__2); tol = tolmul * eps; /* Compute approximate maximum, minimum singular values */ smax = 0.f; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ r__2 = smax, r__3 = (r__1 = d__[i__], ABS(r__1)); smax = MAX(r__2,r__3); /* L20: */ } i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { /* Computing MAX */ r__2 = smax, r__3 = (r__1 = e[i__], ABS(r__1)); smax = MAX(r__2,r__3); /* L30: */ } sminl = 0.f; if (tol >= 0.f) { /* Relative accuracy desired */ sminoa = ABS(d__[1]); if (sminoa == 0.f) { goto L50; } mu = sminoa; i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { mu = (r__2 = d__[i__], ABS(r__2)) * (mu / (mu + (r__1 = e[i__ - 1], ABS(r__1)))); sminoa = MIN(sminoa,mu); if (sminoa == 0.f) { goto L50; } /* L40: */ } L50: sminoa /= sqrt((float) (*n)); /* Computing MAX */ r__1 = tol * sminoa, r__2 = *n * 6 * *n * unfl; thresh = MAX(r__1,r__2); } else { /* Absolute accuracy desired */ /* Computing MAX */ r__1 = ABS(tol) * smax, r__2 = *n * 6 * *n * unfl; thresh = MAX(r__1,r__2); } /* Prepare for main iteration loop for the singular values */ /* (MAXIT is the maximum number of passes through the inner */ /* loop permitted before nonconvergence signalled.) */ maxit = *n * 6 * *n; iter = 0; oldll = -1; oldm = -1; /* M points to last element of unconverged part of matrix */ m = *n; /* Begin main iteration loop */ L60: /* Check for convergence or exceeding iteration count */ if (m <= 1) { goto L160; } if (iter > maxit) { goto L200; } /* Find diagonal block of matrix to work on */ if (tol < 0.f && (r__1 = d__[m], ABS(r__1)) <= thresh) { d__[m] = 0.f; } smax = (r__1 = d__[m], ABS(r__1)); smin = smax; i__1 = m - 1; for (lll = 1; lll <= i__1; ++lll) { ll = m - lll; abss = (r__1 = d__[ll], ABS(r__1)); abse = (r__1 = e[ll], ABS(r__1)); if (tol < 0.f && abss <= thresh) { d__[ll] = 0.f; } if (abse <= thresh) { goto L80; } smin = MIN(smin,abss); /* Computing MAX */ r__1 = MAX(smax,abss); smax = MAX(r__1,abse); /* L70: */ } ll = 0; goto L90; L80: e[ll] = 0.f; /* Matrix splits since E(LL) = 0 */ if (ll == m - 1) { /* Convergence of bottom singular value, return to top of loop */ --m; goto L60; } L90: ++ll; /* E(LL) through E(M-1) are nonzero, E(LL-1) is zero */ if (ll == m - 1) { /* 2 by 2 block, handle separately */ slasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr, &sinl, &cosl); d__[m - 1] = sigmx; e[m - 1] = 0.f; d__[m] = sigmn; /* Compute singular vectors, if desired */ if (*ncvt > 0) { srot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, & cosr, &sinr); } if (*nru > 0) { srot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], & c__1, &cosl, &sinl); } if (*ncc > 0) { srot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, & cosl, &sinl); } m += -2; goto L60; } /* If working on new submatrix, choose shift direction */ /* (from larger end diagonal element towards smaller) */ if (ll > oldm || m < oldll) { if ((r__1 = d__[ll], ABS(r__1)) >= (r__2 = d__[m], ABS(r__2))) { /* Chase bulge from top (big end) to bottom (small end) */ idir = 1; } else { /* Chase bulge from bottom (big end) to top (small end) */ idir = 2; } } /* Apply convergence tests */ if (idir == 1) { /* Run convergence test in forward direction */ /* First apply standard test to bottom of matrix */ if ((r__2 = e[m - 1], ABS(r__2)) <= ABS(tol) * (r__1 = d__[m], ABS( r__1)) || tol < 0.f && (r__3 = e[m - 1], ABS(r__3)) <= thresh) { e[m - 1] = 0.f; goto L60; } if (tol >= 0.f) { /* If relative accuracy desired, */ /* apply convergence criterion forward */ mu = (r__1 = d__[ll], ABS(r__1)); sminl = mu; i__1 = m - 1; for (lll = ll; lll <= i__1; ++lll) { if ((r__1 = e[lll], ABS(r__1)) <= tol * mu) { e[lll] = 0.f; goto L60; } mu = (r__2 = d__[lll + 1], ABS(r__2)) * (mu / (mu + (r__1 = e[lll], ABS(r__1)))); sminl = MIN(sminl,mu); /* L100: */ } } } else { /* Run convergence test in backward direction */ /* First apply standard test to top of matrix */ if ((r__2 = e[ll], ABS(r__2)) <= ABS(tol) * (r__1 = d__[ll], ABS( r__1)) || tol < 0.f && (r__3 = e[ll], ABS(r__3)) <= thresh) { e[ll] = 0.f; goto L60; } if (tol >= 0.f) { /* If relative accuracy desired, */ /* apply convergence criterion backward */ mu = (r__1 = d__[m], ABS(r__1)); sminl = mu; i__1 = ll; for (lll = m - 1; lll >= i__1; --lll) { if ((r__1 = e[lll], ABS(r__1)) <= tol * mu) { e[lll] = 0.f; goto L60; } mu = (r__2 = d__[lll], ABS(r__2)) * (mu / (mu + (r__1 = e[ lll], ABS(r__1)))); sminl = MIN(sminl,mu); /* L110: */ } } } oldll = ll; oldm = m; /* Compute shift. First, test if shifting would ruin relative */ /* accuracy, and if so set the shift to zero. */ /* Computing MAX */ r__1 = eps, r__2 = tol * .01f; if (tol >= 0.f && *n * tol * (sminl / smax) <= MAX(r__1,r__2)) { /* Use a zero shift to avoid loss of relative accuracy */ shift = 0.f; } else { /* Compute the shift from 2-by-2 block at end of matrix */ if (idir == 1) { sll = (r__1 = d__[ll], ABS(r__1)); slas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__); } else { sll = (r__1 = d__[m], ABS(r__1)); slas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__); } /* Test if shift negligible, and if so set to zero */ if (sll > 0.f) { /* Computing 2nd power */ r__1 = shift / sll; if (r__1 * r__1 < eps) { shift = 0.f; } } } /* Increment iteration count */ iter = iter + m - ll; /* If SHIFT = 0, do simplified QR iteration */ if (shift == 0.f) { if (idir == 1) { /* Chase bulge from top to bottom */ /* Save cosines and sines for later singular vector updates */ cs = 1.f; oldcs = 1.f; i__1 = m - 1; for (i__ = ll; i__ <= i__1; ++i__) { r__1 = d__[i__] * cs; slartg_(&r__1, &e[i__], &cs, &sn, &r__); if (i__ > ll) { e[i__ - 1] = oldsn * r__; } r__1 = oldcs * r__; r__2 = d__[i__ + 1] * sn; slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]); work[i__ - ll + 1] = cs; work[i__ - ll + 1 + nm1] = sn; work[i__ - ll + 1 + nm12] = oldcs; work[i__ - ll + 1 + nm13] = oldsn; /* L120: */ } h__ = d__[m] * cs; d__[m] = h__ * oldcs; e[m - 1] = h__ * oldsn; /* Update singular vectors */ if (*ncvt > 0) { i__1 = m - ll + 1; slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[ ll + vt_dim1], ldvt); } if (*nru > 0) { i__1 = m - ll + 1; slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 + 1], &u[ll * u_dim1 + 1], ldu); } if (*ncc > 0) { i__1 = m - ll + 1; slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + 1], &c__[ll + c_dim1], ldc); } /* Test convergence */ if ((r__1 = e[m - 1], ABS(r__1)) <= thresh) { e[m - 1] = 0.f; } } else { /* Chase bulge from bottom to top */ /* Save cosines and sines for later singular vector updates */ cs = 1.f; oldcs = 1.f; i__1 = ll + 1; for (i__ = m; i__ >= i__1; --i__) { r__1 = d__[i__] * cs; slartg_(&r__1, &e[i__ - 1], &cs, &sn, &r__); if (i__ < m) { e[i__] = oldsn * r__; } r__1 = oldcs * r__; r__2 = d__[i__ - 1] * sn; slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]); work[i__ - ll] = cs; work[i__ - ll + nm1] = -sn; work[i__ - ll + nm12] = oldcs; work[i__ - ll + nm13] = -oldsn; /* L130: */ } h__ = d__[ll] * cs; d__[ll] = h__ * oldcs; e[ll] = h__ * oldsn; /* Update singular vectors */ if (*ncvt > 0) { i__1 = m - ll + 1; slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ nm13 + 1], &vt[ll + vt_dim1], ldvt); } if (*nru > 0) { i__1 = m - ll + 1; slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll * u_dim1 + 1], ldu); } if (*ncc > 0) { i__1 = m - ll + 1; slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[ ll + c_dim1], ldc); } /* Test convergence */ if ((r__1 = e[ll], ABS(r__1)) <= thresh) { e[ll] = 0.f; } } } else { /* Use nonzero shift */ if (idir == 1) { /* Chase bulge from top to bottom */ /* Save cosines and sines for later singular vector updates */ f = ((r__1 = d__[ll], ABS(r__1)) - shift) * (r_sign(&c_b49, &d__[ ll]) + shift / d__[ll]); g = e[ll]; i__1 = m - 1; for (i__ = ll; i__ <= i__1; ++i__) { slartg_(&f, &g, &cosr, &sinr, &r__); if (i__ > ll) { e[i__ - 1] = r__; } f = cosr * d__[i__] + sinr * e[i__]; e[i__] = cosr * e[i__] - sinr * d__[i__]; g = sinr * d__[i__ + 1]; d__[i__ + 1] = cosr * d__[i__ + 1]; slartg_(&f, &g, &cosl, &sinl, &r__); d__[i__] = r__; f = cosl * e[i__] + sinl * d__[i__ + 1]; d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__]; if (i__ < m - 1) { g = sinl * e[i__ + 1]; e[i__ + 1] = cosl * e[i__ + 1]; } work[i__ - ll + 1] = cosr; work[i__ - ll + 1 + nm1] = sinr; work[i__ - ll + 1 + nm12] = cosl; work[i__ - ll + 1 + nm13] = sinl; /* L140: */ } e[m - 1] = f; /* Update singular vectors */ if (*ncvt > 0) { i__1 = m - ll + 1; slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[ ll + vt_dim1], ldvt); } if (*nru > 0) { i__1 = m - ll + 1; slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 + 1], &u[ll * u_dim1 + 1], ldu); } if (*ncc > 0) { i__1 = m - ll + 1; slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 + 1], &c__[ll + c_dim1], ldc); } /* Test convergence */ if ((r__1 = e[m - 1], ABS(r__1)) <= thresh) { e[m - 1] = 0.f; } } else { /* Chase bulge from bottom to top */ /* Save cosines and sines for later singular vector updates */ f = ((r__1 = d__[m], ABS(r__1)) - shift) * (r_sign(&c_b49, &d__[ m]) + shift / d__[m]); g = e[m - 1]; i__1 = ll + 1; for (i__ = m; i__ >= i__1; --i__) { slartg_(&f, &g, &cosr, &sinr, &r__); if (i__ < m) { e[i__] = r__; } f = cosr * d__[i__] + sinr * e[i__ - 1]; e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__]; g = sinr * d__[i__ - 1]; d__[i__ - 1] = cosr * d__[i__ - 1]; slartg_(&f, &g, &cosl, &sinl, &r__); d__[i__] = r__; f = cosl * e[i__ - 1] + sinl * d__[i__ - 1]; d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1]; if (i__ > ll + 1) { g = sinl * e[i__ - 2]; e[i__ - 2] = cosl * e[i__ - 2]; } work[i__ - ll] = cosr; work[i__ - ll + nm1] = -sinr; work[i__ - ll + nm12] = cosl; work[i__ - ll + nm13] = -sinl; /* L150: */ } e[ll] = f; /* Test convergence */ if ((r__1 = e[ll], ABS(r__1)) <= thresh) { e[ll] = 0.f; } /* Update singular vectors if desired */ if (*ncvt > 0) { i__1 = m - ll + 1; slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[ nm13 + 1], &vt[ll + vt_dim1], ldvt); } if (*nru > 0) { i__1 = m - ll + 1; slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll * u_dim1 + 1], ldu); } if (*ncc > 0) { i__1 = m - ll + 1; slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[ ll + c_dim1], ldc); } } } /* QR iteration finished, go back and check convergence */ goto L60; /* All singular values converged, so make them positive */ L160: i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { if (d__[i__] < 0.f) { d__[i__] = -d__[i__]; /* Change sign of singular vectors, if desired */ if (*ncvt > 0) { sscal_(ncvt, &c_b72, &vt[i__ + vt_dim1], ldvt); } } /* L170: */ } /* Sort the singular values into decreasing order (insertion sort on */ /* singular values, but only one transposition per singular vector) */ i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { /* Scan for smallest D(I) */ isub = 1; smin = d__[1]; i__2 = *n + 1 - i__; for (j = 2; j <= i__2; ++j) { if (d__[j] <= smin) { isub = j; smin = d__[j]; } /* L180: */ } if (isub != *n + 1 - i__) { /* Swap singular values and vectors */ d__[isub] = d__[*n + 1 - i__]; d__[*n + 1 - i__] = smin; if (*ncvt > 0) { sswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ + vt_dim1], ldvt); } if (*nru > 0) { sswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) * u_dim1 + 1], &c__1); } if (*ncc > 0) { sswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ + c_dim1], ldc); } } /* L190: */ } goto L220; /* Maximum number of iterations exceeded, failure to converge */ L200: *info = 0; i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { if (e[i__] != 0.f) { ++(*info); } /* L210: */ } L220: return 0; /* End of SBDSQR */ } /* sbdsqr_ */
/* Subroutine */ int slaed8_(integer* icompq, integer* k, integer* n, integer *qsiz, real* d__, real* q, integer* ldq, integer* indxq, real* rho, integer* cutpnt, real* z__, real* dlamda, real* q2, integer* ldq2, real* w, integer* perm, integer* givptr, integer* givcol, real * givnum, integer* indxp, integer* indx, integer* info) { /* System generated locals */ integer q_dim1, q_offset, q2_dim1, q2_offset, i__1; real r__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ real c__; integer i__, j; real s, t; integer k2, n1, n2, jp, n1p1; real eps, tau, tol; integer jlam, imax, jmax; extern /* Subroutine */ int srot_(integer*, real*, integer*, real*, integer*, real*, real*), sscal_(integer*, real*, real*, integer*), scopy_(integer*, real*, integer*, real*, integer * ); extern doublereal slapy2_(real*, real*), slamch_(char*); extern /* Subroutine */ int xerbla_(char*, integer*); extern integer isamax_(integer*, real*, integer*); extern /* Subroutine */ int slamrg_(integer*, integer*, real*, integer *, integer*, integer*), slacpy_(char*, integer*, integer*, real*, integer*, real*, integer*); /* -- LAPACK routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SLAED8 merges the two sets of eigenvalues together into a single */ /* sorted set. Then it tries to deflate the size of the problem. */ /* There are two ways in which deflation can occur: when two or more */ /* eigenvalues are close together or if there is a tiny element in the */ /* Z vector. For each such occurrence the order of the related secular */ /* equation problem is reduced by one. */ /* Arguments */ /* ========= */ /* ICOMPQ (input) INTEGER */ /* = 0: Compute eigenvalues only. */ /* = 1: Compute eigenvectors of original dense symmetric matrix */ /* also. On entry, Q contains the orthogonal matrix used */ /* to reduce the original matrix to tridiagonal form. */ /* K (output) INTEGER */ /* The number of non-deflated eigenvalues, and the order of the */ /* related secular equation. */ /* N (input) INTEGER */ /* The dimension of the symmetric tridiagonal matrix. N >= 0. */ /* QSIZ (input) INTEGER */ /* The dimension of the orthogonal matrix used to reduce */ /* the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. */ /* D (input/output) REAL array, dimension (N) */ /* On entry, the eigenvalues of the two submatrices to be */ /* combined. On exit, the trailing (N-K) updated eigenvalues */ /* (those which were deflated) sorted into increasing order. */ /* Q (input/output) REAL array, dimension (LDQ,N) */ /* If ICOMPQ = 0, Q is not referenced. Otherwise, */ /* on entry, Q contains the eigenvectors of the partially solved */ /* system which has been previously updated in matrix */ /* multiplies with other partially solved eigensystems. */ /* On exit, Q contains the trailing (N-K) updated eigenvectors */ /* (those which were deflated) in its last N-K columns. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= max(1,N). */ /* INDXQ (input) INTEGER array, dimension (N) */ /* The permutation which separately sorts the two sub-problems */ /* in D into ascending order. Note that elements in the second */ /* half of this permutation must first have CUTPNT added to */ /* their values in order to be accurate. */ /* RHO (input/output) REAL */ /* On entry, the off-diagonal element associated with the rank-1 */ /* cut which originally split the two submatrices which are now */ /* being recombined. */ /* On exit, RHO has been modified to the value required by */ /* SLAED3. */ /* CUTPNT (input) INTEGER */ /* The location of the last eigenvalue in the leading */ /* sub-matrix. min(1,N) <= CUTPNT <= N. */ /* Z (input) REAL array, dimension (N) */ /* On entry, Z contains the updating vector (the last row of */ /* the first sub-eigenvector matrix and the first row of the */ /* second sub-eigenvector matrix). */ /* On exit, the contents of Z are destroyed by the updating */ /* process. */ /* DLAMDA (output) REAL array, dimension (N) */ /* A copy of the first K eigenvalues which will be used by */ /* SLAED3 to form the secular equation. */ /* Q2 (output) REAL array, dimension (LDQ2,N) */ /* If ICOMPQ = 0, Q2 is not referenced. Otherwise, */ /* a copy of the first K eigenvectors which will be used by */ /* SLAED7 in a matrix multiply (SGEMM) to update the new */ /* eigenvectors. */ /* LDQ2 (input) INTEGER */ /* The leading dimension of the array Q2. LDQ2 >= max(1,N). */ /* W (output) REAL array, dimension (N) */ /* The first k values of the final deflation-altered z-vector and */ /* will be passed to SLAED3. */ /* PERM (output) INTEGER array, dimension (N) */ /* The permutations (from deflation and sorting) to be applied */ /* to each eigenblock. */ /* GIVPTR (output) INTEGER */ /* The number of Givens rotations which took place in this */ /* subproblem. */ /* GIVCOL (output) INTEGER array, dimension (2, N) */ /* Each pair of numbers indicates a pair of columns to take place */ /* in a Givens rotation. */ /* GIVNUM (output) REAL array, dimension (2, N) */ /* Each number indicates the S value to be used in the */ /* corresponding Givens rotation. */ /* INDXP (workspace) INTEGER array, dimension (N) */ /* The permutation used to place deflated values of D at the end */ /* of the array. INDXP(1:K) points to the nondeflated D-values */ /* and INDXP(K+1:N) points to the deflated eigenvalues. */ /* INDX (workspace) INTEGER array, dimension (N) */ /* The permutation used to sort the contents of D into ascending */ /* order. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Jeff Rutter, Computer Science Division, University of California */ /* at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --indxq; --z__; --dlamda; q2_dim1 = *ldq2; q2_offset = 1 + q2_dim1; q2 -= q2_offset; --w; --perm; givcol -= 3; givnum -= 3; --indxp; --indx; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*n < 0) { *info = -3; } else if (*icompq == 1 && *qsiz < *n) { *info = -4; } else if (*ldq < max(1, *n)) { *info = -7; } else if (*cutpnt < min(1, *n) || *cutpnt > *n) { *info = -10; } else if (*ldq2 < max(1, *n)) { *info = -14; } if (*info != 0) { i__1 = -(*info); xerbla_("SLAED8", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } n1 = *cutpnt; n2 = *n - n1; n1p1 = n1 + 1; if (*rho < 0.f) { sscal_(&n2, &c_b3, &z__[n1p1], &c__1); } /* Normalize z so that norm(z) = 1 */ t = 1.f / sqrt(2.f); i__1 = *n; for (j = 1; j <= i__1; ++j) { indx[j] = j; /* L10: */ } sscal_(n, &t, &z__[1], &c__1); *rho = (r__1 = *rho * 2.f, dabs(r__1)); /* Sort the eigenvalues into increasing order */ i__1 = *n; for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) { indxq[i__] += *cutpnt; /* L20: */ } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { dlamda[i__] = d__[indxq[i__]]; w[i__] = z__[indxq[i__]]; /* L30: */ } i__ = 1; j = *cutpnt + 1; slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]); i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { d__[i__] = dlamda[indx[i__]]; z__[i__] = w[indx[i__]]; /* L40: */ } /* Calculate the allowable deflation tolerence */ imax = isamax_(n, &z__[1], &c__1); jmax = isamax_(n, &d__[1], &c__1); eps = slamch_("Epsilon"); tol = eps * 8.f * (r__1 = d__[jmax], dabs(r__1)); /* If the rank-1 modifier is small enough, no more needs to be done */ /* except to reorganize Q so that its columns correspond with the */ /* elements in D. */ if (*rho *(r__1 = z__[imax], dabs(r__1)) <= tol) { *k = 0; if (*icompq == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { perm[j] = indxq[indx[j]]; /* L50: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { perm[j] = indxq[indx[j]]; scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1], &c__1); /* L60: */ } slacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq); } return 0; } /* If there are multiple eigenvalues then the problem deflates. Here */ /* the number of equal eigenvalues are found. As each equal */ /* eigenvalue is found, an elementary reflector is computed to rotate */ /* the corresponding eigensubspace so that the corresponding */ /* components of Z are zero in this new basis. */ *k = 0; *givptr = 0; k2 = *n + 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (*rho *(r__1 = z__[j], dabs(r__1)) <= tol) { /* Deflate due to small z component. */ --k2; indxp[k2] = j; if (j == *n) { goto L110; } } else { jlam = j; goto L80; } /* L70: */ } L80: ++j; if (j > *n) { goto L100; } if (*rho *(r__1 = z__[j], dabs(r__1)) <= tol) { /* Deflate due to small z component. */ --k2; indxp[k2] = j; } else { /* Check if eigenvalues are close enough to allow deflation. */ s = z__[jlam]; c__ = z__[j]; /* Find sqrt(a**2+b**2) without overflow or */ /* destructive underflow. */ tau = slapy2_(&c__, &s); t = d__[j] - d__[jlam]; c__ /= tau; s = -s / tau; if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) { /* Deflation is possible. */ z__[j] = tau; z__[jlam] = 0.f; /* Record the appropriate Givens rotation */ ++(*givptr); givcol[(*givptr << 1) + 1] = indxq[indx[jlam]]; givcol[(*givptr << 1) + 2] = indxq[indx[j]]; givnum[(*givptr << 1) + 1] = c__; givnum[(*givptr << 1) + 2] = s; if (*icompq == 1) { srot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[ indxq[indx[j]] * q_dim1 + 1], &c__1, &c__, &s); } t = d__[jlam] * c__ * c__ + d__[j] * s * s; d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__; d__[jlam] = t; --k2; i__ = 1; L90: if (k2 + i__ <= *n) { if (d__[jlam] < d__[indxp[k2 + i__]]) { indxp[k2 + i__ - 1] = indxp[k2 + i__]; indxp[k2 + i__] = jlam; ++i__; goto L90; } else { indxp[k2 + i__ - 1] = jlam; } } else { indxp[k2 + i__ - 1] = jlam; } jlam = j; } else { ++(*k); w[*k] = z__[jlam]; dlamda[*k] = d__[jlam]; indxp[*k] = jlam; jlam = j; } } goto L80; L100: /* Record the last eigenvalue. */ ++(*k); w[*k] = z__[jlam]; dlamda[*k] = d__[jlam]; indxp[*k] = jlam; L110: /* Sort the eigenvalues and corresponding eigenvectors into DLAMDA */ /* and Q2 respectively. The eigenvalues/vectors which were not */ /* deflated go into the first K slots of DLAMDA and Q2 respectively, */ /* while those which were deflated go into the last N - K slots. */ if (*icompq == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { jp = indxp[j]; dlamda[j] = d__[jp]; perm[j] = indxq[indx[jp]]; /* L120: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { jp = indxp[j]; dlamda[j] = d__[jp]; perm[j] = indxq[indx[jp]]; scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1] , &c__1); /* L130: */ } } /* The deflated eigenvalues and their corresponding vectors go back */ /* into the last N - K slots of D and Q respectively. */ if (*k < *n) { if (*icompq == 0) { i__1 = *n - *k; scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); } else { i__1 = *n - *k; scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1); i__1 = *n - *k; slacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(* k + 1) * q_dim1 + 1], ldq); } } return 0; /* End of SLAED8 */ } /* slaed8_ */
/* Subroutine */ int slasd2_(integer *nl, integer *nr, integer *sqre, integer *k, real *d__, real *z__, real *alpha, real *beta, real *u, integer * ldu, real *vt, integer *ldvt, real *dsigma, real *u2, integer *ldu2, real *vt2, integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer *idxq, integer *coltyp, integer *info) { /* System generated locals */ integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, vt2_dim1, vt2_offset, i__1; real r__1, r__2; /* Local variables */ real c__; integer i__, j, m, n; real s; integer k2; real z1; integer ct, jp; real eps, tau, tol; integer psm[4], nlp1, nlp2, idxi, idxj, ctot[4]; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); integer idxjp, jprev; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *); extern real slapy2_(real *, real *), slamch_(char *); extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_( integer *, integer *, real *, integer *, integer *, integer *); real hlftol; extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, real *, integer *); /* -- LAPACK auxiliary routine (version 3.4.2) -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* September 2012 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --z__; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1; vt -= vt_offset; --dsigma; u2_dim1 = *ldu2; u2_offset = 1 + u2_dim1; u2 -= u2_offset; vt2_dim1 = *ldvt2; vt2_offset = 1 + vt2_dim1; vt2 -= vt2_offset; --idxp; --idx; --idxc; --idxq; --coltyp; /* Function Body */ *info = 0; if (*nl < 1) { *info = -1; } else if (*nr < 1) { *info = -2; } else if (*sqre != 1 && *sqre != 0) { *info = -3; } n = *nl + *nr + 1; m = n + *sqre; if (*ldu < n) { *info = -10; } else if (*ldvt < m) { *info = -12; } else if (*ldu2 < n) { *info = -15; } else if (*ldvt2 < m) { *info = -17; } if (*info != 0) { i__1 = -(*info); xerbla_("SLASD2", &i__1); return 0; } nlp1 = *nl + 1; nlp2 = *nl + 2; /* Generate the first part of the vector Z; and move the singular */ /* values in the first part of D one position backward. */ z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1]; z__[1] = z1; for (i__ = *nl; i__ >= 1; --i__) { z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1]; d__[i__ + 1] = d__[i__]; idxq[i__ + 1] = idxq[i__] + 1; /* L10: */ } /* Generate the second part of the vector Z. */ i__1 = m; for (i__ = nlp2; i__ <= i__1; ++i__) { z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1]; /* L20: */ } /* Initialize some reference arrays. */ i__1 = nlp1; for (i__ = 2; i__ <= i__1; ++i__) { coltyp[i__] = 1; /* L30: */ } i__1 = n; for (i__ = nlp2; i__ <= i__1; ++i__) { coltyp[i__] = 2; /* L40: */ } /* Sort the singular values into increasing order */ i__1 = n; for (i__ = nlp2; i__ <= i__1; ++i__) { idxq[i__] += nlp1; /* L50: */ } /* DSIGMA, IDXC, IDXC, and the first column of U2 */ /* are used as storage space. */ i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { dsigma[i__] = d__[idxq[i__]]; u2[i__ + u2_dim1] = z__[idxq[i__]]; idxc[i__] = coltyp[idxq[i__]]; /* L60: */ } slamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]); i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { idxi = idx[i__] + 1; d__[i__] = dsigma[idxi]; z__[i__] = u2[idxi + u2_dim1]; coltyp[i__] = idxc[idxi]; /* L70: */ } /* Calculate the allowable deflation tolerance */ eps = slamch_("Epsilon"); /* Computing MAX */ r__1 = f2c_abs(*alpha); r__2 = f2c_abs(*beta); // , expr subst tol = max(r__1,r__2); /* Computing MAX */ r__2 = (r__1 = d__[n], f2c_abs(r__1)); tol = eps * 8.f * max(r__2,tol); /* There are 2 kinds of deflation -- first a value in the z-vector */ /* is small, second two (or more) singular values are very close */ /* together (their difference is small). */ /* If the value in the z-vector is small, we simply permute the */ /* array so that the corresponding singular value is moved to the */ /* end. */ /* If two values in the D-vector are close, we perform a two-sided */ /* rotation designed to make one of the corresponding z-vector */ /* entries zero, and then permute the array so that the deflated */ /* singular value is moved to the end. */ /* If there are multiple singular values then the problem deflates. */ /* Here the number of equal singular values are found. As each equal */ /* singular value is found, an elementary reflector is computed to */ /* rotate the corresponding singular subspace so that the */ /* corresponding components of Z are zero in this new basis. */ *k = 1; k2 = n + 1; i__1 = n; for (j = 2; j <= i__1; ++j) { if ((r__1 = z__[j], f2c_abs(r__1)) <= tol) { /* Deflate due to small z component. */ --k2; idxp[k2] = j; coltyp[j] = 4; if (j == n) { goto L120; } } else { jprev = j; goto L90; } /* L80: */ } L90: j = jprev; L100: ++j; if (j > n) { goto L110; } if ((r__1 = z__[j], f2c_abs(r__1)) <= tol) { /* Deflate due to small z component. */ --k2; idxp[k2] = j; coltyp[j] = 4; } else { /* Check if singular values are close enough to allow deflation. */ if ((r__1 = d__[j] - d__[jprev], f2c_abs(r__1)) <= tol) { /* Deflation is possible. */ s = z__[jprev]; c__ = z__[j]; /* Find sqrt(a**2+b**2) without overflow or */ /* destructive underflow. */ tau = slapy2_(&c__, &s); c__ /= tau; s = -s / tau; z__[j] = tau; z__[jprev] = 0.f; /* Apply back the Givens rotation to the left and right */ /* singular vector matrices. */ idxjp = idxq[idx[jprev] + 1]; idxj = idxq[idx[j] + 1]; if (idxjp <= nlp1) { --idxjp; } if (idxj <= nlp1) { --idxj; } srot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], & c__1, &c__, &s); srot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, & c__, &s); if (coltyp[j] != coltyp[jprev]) { coltyp[j] = 3; } coltyp[jprev] = 4; --k2; idxp[k2] = jprev; jprev = j; } else { ++(*k); u2[*k + u2_dim1] = z__[jprev]; dsigma[*k] = d__[jprev]; idxp[*k] = jprev; jprev = j; } } goto L100; L110: /* Record the last singular value. */ ++(*k); u2[*k + u2_dim1] = z__[jprev]; dsigma[*k] = d__[jprev]; idxp[*k] = jprev; L120: /* Count up the total number of the various types of columns, then */ /* form a permutation which positions the four column types into */ /* four groups of uniform structure (although one or more of these */ /* groups may be empty). */ for (j = 1; j <= 4; ++j) { ctot[j - 1] = 0; /* L130: */ } i__1 = n; for (j = 2; j <= i__1; ++j) { ct = coltyp[j]; ++ctot[ct - 1]; /* L140: */ } /* PSM(*) = Position in SubMatrix (of types 1 through 4) */ psm[0] = 2; psm[1] = ctot[0] + 2; psm[2] = psm[1] + ctot[1]; psm[3] = psm[2] + ctot[2]; /* Fill out the IDXC array so that the permutation which it induces */ /* will place all type-1 columns first, all type-2 columns next, */ /* then all type-3's, and finally all type-4's, starting from the */ /* second column. This applies similarly to the rows of VT. */ i__1 = n; for (j = 2; j <= i__1; ++j) { jp = idxp[j]; ct = coltyp[jp]; idxc[psm[ct - 1]] = j; ++psm[ct - 1]; /* L150: */ } /* Sort the singular values and corresponding singular vectors into */ /* DSIGMA, U2, and VT2 respectively. The singular values/vectors */ /* which were not deflated go into the first K slots of DSIGMA, U2, */ /* and VT2 respectively, while those which were deflated go into the */ /* last N - K slots, except that the first column/row will be treated */ /* separately. */ i__1 = n; for (j = 2; j <= i__1; ++j) { jp = idxp[j]; dsigma[j] = d__[jp]; idxj = idxq[idx[idxp[idxc[j]]] + 1]; if (idxj <= nlp1) { --idxj; } scopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1); scopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2); /* L160: */ } /* Determine DSIGMA(1), DSIGMA(2) and Z(1) */ dsigma[1] = 0.f; hlftol = tol / 2.f; if (f2c_abs(dsigma[2]) <= hlftol) { dsigma[2] = hlftol; } if (m > n) { z__[1] = slapy2_(&z1, &z__[m]); if (z__[1] <= tol) { c__ = 1.f; s = 0.f; z__[1] = tol; } else { c__ = z1 / z__[1]; s = z__[m] / z__[1]; } } else { if (f2c_abs(z1) <= tol) { z__[1] = tol; } else { z__[1] = z1; } } /* Move the rest of the updating row to Z. */ i__1 = *k - 1; scopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1); /* Determine the first column of U2, the first row of VT2 and the */ /* last row of VT. */ slaset_("A", &n, &c__1, &c_b30, &c_b30, &u2[u2_offset], ldu2); u2[nlp1 + u2_dim1] = 1.f; if (m > n) { i__1 = nlp1; for (i__ = 1; i__ <= i__1; ++i__) { vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1]; vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1]; /* L170: */ } i__1 = m; for (i__ = nlp2; i__ <= i__1; ++i__) { vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1]; vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1]; /* L180: */ } } else { scopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2); } if (m > n) { scopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2); } /* The deflated singular values and their corresponding vectors go */ /* into the back of D, U, and V respectively. */ if (n > *k) { i__1 = n - *k; scopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1); i__1 = n - *k; slacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1) * u_dim1 + 1], ldu); i__1 = n - *k; slacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 + vt_dim1], ldvt); } /* Copy CTOT into COLTYP for referencing in SLASD3. */ for (j = 1; j <= 4; ++j) { coltyp[j] = ctot[j - 1]; /* L190: */ } return 0; /* End of SLASD2 */ }
/* Subroutine */ int slaed8_(integer *icompq, integer *k, integer *n, integer *qsiz, real *d, real *q, integer *ldq, integer *indxq, real *rho, integer *cutpnt, real *z, real *dlamda, real *q2, integer *ldq2, real *w, integer *perm, integer *givptr, integer *givcol, real *givnum, integer *indxp, integer *indx, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, Courant Institute, NAG Ltd., and Rice University September 30, 1994 Purpose ======= SLAED8 merges the two sets of eigenvalues together into a single sorted set. Then it tries to deflate the size of the problem. There are two ways in which deflation can occur: when two or more eigenvalues are close together or if there is a tiny element in the Z vector. For each such occurrence the order of the related secular equation problem is reduced by one. Arguments ========= ICOMPQ (input) INTEGER = 0: Compute eigenvalues only. = 1: Compute eigenvectors of original dense symmetric matrix also. On entry, Q contains the orthogonal matrix used to reduce the original matrix to tridiagonal form. K (output) INTEGER The number of non-deflated eigenvalues, and the order of the related secular equation. N (input) INTEGER The dimension of the symmetric tridiagonal matrix. N >= 0. QSIZ (input) INTEGER The dimension of the orthogonal matrix used to reduce the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. D (input/output) REAL array, dimension (N) On entry, the eigenvalues of the two submatrices to be combined. On exit, the trailing (N-K) updated eigenvalues (those which were deflated) sorted into increasing order. Q (input/output) REAL array, dimension (LDQ,N) If ICOMPQ = 0, Q is not referenced. Otherwise, on entry, Q contains the eigenvectors of the partially solved system which has been previously updated in matrix multiplies with other partially solved eigensystems. On exit, Q contains the trailing (N-K) updated eigenvectors (those which were deflated) in its last N-K columns. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= max(1,N). INDXQ (input) INTEGER array, dimension (N) The permutation which separately sorts the two sub-problems in D into ascending order. Note that elements in the second half of this permutation must first have CUTPNT added to their values in order to be accurate. RHO (input/output) REAL On entry, the off-diagonal element associated with the rank-1 cut which originally split the two submatrices which are now being recombined. On exit, RHO has been modified to the value required by SLAED3. CUTPNT (input) INTEGER The location of the last eigenvalue in the leading sub-matrix. min(1,N) <= CUTPNT <= N. Z (input) REAL array, dimension (N) On entry, Z contains the updating vector (the last row of the first sub-eigenvector matrix and the first row of the second sub-eigenvector matrix). On exit, the contents of Z are destroyed by the updating process. DLAMDA (output) REAL array, dimension (N) A copy of the first K eigenvalues which will be used by SLAED3 to form the secular equation. Q2 (output) REAL array, dimension (LDQ2,N) If ICOMPQ = 0, Q2 is not referenced. Otherwise, a copy of the first K eigenvectors which will be used by SLAED7 in a matrix multiply (SGEMM) to update the new eigenvectors. LDQ2 (input) INTEGER The leading dimension of the array Q2. LDQ2 >= max(1,N). W (output) REAL array, dimension (N) The first k values of the final deflation-altered z-vector and will be passed to SLAED3. PERM (output) INTEGER array, dimension (N) The permutations (from deflation and sorting) to be applied to each eigenblock. GIVPTR (output) INTEGER The number of Givens rotations which took place in this subproblem. GIVCOL (output) INTEGER array, dimension (2, N) Each pair of numbers indicates a pair of columns to take place in a Givens rotation. GIVNUM (output) REAL array, dimension (2, N) Each number indicates the S value to be used in the corresponding Givens rotation. INDXP (workspace) INTEGER array, dimension (N) The permutation used to place deflated values of D at the end of the array. INDXP(1:K) points to the nondeflated D-values and INDXP(K+1:N) points to the deflated eigenvalues. INDX (workspace) INTEGER array, dimension (N) The permutation used to sort the contents of D into ascending order. INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static real c_b3 = -1.f; static integer c__1 = 1; /* System generated locals */ integer q_dim1, q_offset, q2_dim1, q2_offset, i__1; real r__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer jlam, imax, jmax; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); static real c; static integer i, j; static real s, t; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static integer k2; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *); static integer n1, n2; extern doublereal slapy2_(real *, real *); static integer jp; extern doublereal slamch_(char *); extern /* Subroutine */ int xerbla_(char *, integer *); extern integer isamax_(integer *, real *, integer *); extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *); static integer n1p1; static real eps, tau, tol; --d; q_dim1 = *ldq; q_offset = q_dim1 + 1; q -= q_offset; --indxq; --z; --dlamda; q2_dim1 = *ldq2; q2_offset = q2_dim1 + 1; q2 -= q2_offset; --w; --perm; givcol -= 3; givnum -= 3; --indxp; --indx; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*n < 0) { *info = -3; } else if (*icompq == 1 && *qsiz < *n) { *info = -4; } else if (*ldq < max(1,*n)) { *info = -7; } else if (*cutpnt < min(1,*n) || *cutpnt > *n) { *info = -10; } else if (*ldq2 < max(1,*n)) { *info = -14; } if (*info != 0) { i__1 = -(*info); xerbla_("SLAED8", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } n1 = *cutpnt; n2 = *n - n1; n1p1 = n1 + 1; if (*rho < 0.f) { sscal_(&n2, &c_b3, &z[n1p1], &c__1); } /* Normalize z so that norm(z) = 1 */ t = 1.f / sqrt(2.f); i__1 = *n; for (j = 1; j <= i__1; ++j) { indx[j] = j; /* L10: */ } sscal_(n, &t, &z[1], &c__1); *rho = (r__1 = *rho * 2.f, dabs(r__1)); /* Sort the eigenvalues into increasing order */ i__1 = *n; for (i = *cutpnt + 1; i <= i__1; ++i) { indxq[i] += *cutpnt; /* L20: */ } i__1 = *n; for (i = 1; i <= i__1; ++i) { dlamda[i] = d[indxq[i]]; w[i] = z[indxq[i]]; /* L30: */ } i = 1; j = *cutpnt + 1; slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]); i__1 = *n; for (i = 1; i <= i__1; ++i) { d[i] = dlamda[indx[i]]; z[i] = w[indx[i]]; /* L40: */ } /* Calculate the allowable deflation tolerence */ imax = isamax_(n, &z[1], &c__1); jmax = isamax_(n, &d[1], &c__1); eps = slamch_("Epsilon"); tol = eps * 8.f * (r__1 = d[jmax], dabs(r__1)); /* If the rank-1 modifier is small enough, no more needs to be done except to reorganize Q so that its columns correspond with the elements in D. */ if (*rho * (r__1 = z[imax], dabs(r__1)) <= tol) { *k = 0; if (*icompq == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { perm[j] = indxq[indx[j]]; /* L50: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { perm[j] = indxq[indx[j]]; scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1], &c__1); /* L60: */ } slacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq); } return 0; } /* If there are multiple eigenvalues then the problem deflates. Here the number of equal eigenvalues are found. As each equal eigenvalue is found, an elementary reflector is computed to rotate the corresponding eigensubspace so that the corresponding components of Z are zero in this new basis. */ *k = 0; *givptr = 0; k2 = *n + 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (*rho * (r__1 = z[j], dabs(r__1)) <= tol) { /* Deflate due to small z component. */ --k2; indxp[k2] = j; if (j == *n) { goto L110; } } else { jlam = j; goto L80; } /* L70: */ } L80: ++j; if (j > *n) { goto L100; } if (*rho * (r__1 = z[j], dabs(r__1)) <= tol) { /* Deflate due to small z component. */ --k2; indxp[k2] = j; } else { /* Check if eigenvalues are close enough to allow deflation. */ s = z[jlam]; c = z[j]; /* Find sqrt(a**2+b**2) without overflow or destructive underflow. */ tau = slapy2_(&c, &s); t = d[j] - d[jlam]; c /= tau; s = -(doublereal)s / tau; if ((r__1 = t * c * s, dabs(r__1)) <= tol) { /* Deflation is possible. */ z[j] = tau; z[jlam] = 0.f; /* Record the appropriate Givens rotation */ ++(*givptr); givcol[(*givptr << 1) + 1] = indxq[indx[jlam]]; givcol[(*givptr << 1) + 2] = indxq[indx[j]]; givnum[(*givptr << 1) + 1] = c; givnum[(*givptr << 1) + 2] = s; if (*icompq == 1) { srot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[ indxq[indx[j]] * q_dim1 + 1], &c__1, &c, &s); } t = d[jlam] * c * c + d[j] * s * s; d[j] = d[jlam] * s * s + d[j] * c * c; d[jlam] = t; --k2; i = 1; L90: if (k2 + i <= *n) { if (d[jlam] < d[indxp[k2 + i]]) { indxp[k2 + i - 1] = indxp[k2 + i]; indxp[k2 + i] = jlam; ++i; goto L90; } else { indxp[k2 + i - 1] = jlam; } } else { indxp[k2 + i - 1] = jlam; } jlam = j; } else { ++(*k); w[*k] = z[jlam]; dlamda[*k] = d[jlam]; indxp[*k] = jlam; jlam = j; } } goto L80; L100: /* Record the last eigenvalue. */ ++(*k); w[*k] = z[jlam]; dlamda[*k] = d[jlam]; indxp[*k] = jlam; L110: /* Sort the eigenvalues and corresponding eigenvectors into DLAMDA and Q2 respectively. The eigenvalues/vectors which were not deflated go into the first K slots of DLAMDA and Q2 respectively, while those which were deflated go into the last N - K slots. */ if (*icompq == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { jp = indxp[j]; dlamda[j] = d[jp]; perm[j] = indxq[indx[jp]]; /* L120: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { jp = indxp[j]; dlamda[j] = d[jp]; perm[j] = indxq[indx[jp]]; scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1] , &c__1); /* L130: */ } } /* The deflated eigenvalues and their corresponding vectors go back into the last N - K slots of D and Q respectively. */ if (*k < *n) { if (*icompq == 0) { i__1 = *n - *k; scopy_(&i__1, &dlamda[*k + 1], &c__1, &d[*k + 1], &c__1); } else { i__1 = *n - *k; scopy_(&i__1, &dlamda[*k + 1], &c__1, &d[*k + 1], &c__1); i__1 = *n - *k; slacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(* k + 1) * q_dim1 + 1], ldq); } } return 0; /* End of SLAED8 */ } /* slaed8_ */
/* Subroutine */ int slahqr_(logical *wantt, logical *wantz, integer *n, integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real * wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, integer * info) { /* System generated locals */ integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3; real r__1, r__2, r__3, r__4; /* Local variables */ integer i__, j, k, l, m; real s, v[3]; integer i1, i2; real t1, t2, t3, v2, v3, aa, ab, ba, bb, h11, h12, h21, h22, cs; integer nh; real sn; integer nr; real tr; integer nz; real det, h21s; integer its; real ulp, sum, tst, rt1i, rt2i, rt1r, rt2r; real safmin; real safmax, rtdisc, smlnum; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* November 2006 */ /* Purpose */ /* ======= */ /* SLAHQR is an auxiliary routine called by SHSEQR to update the */ /* eigenvalues and Schur decomposition already computed by SHSEQR, by */ /* dealing with the Hessenberg submatrix in rows and columns ILO to */ /* IHI. */ /* Arguments */ /* ========= */ /* WANTT (input) LOGICAL */ /* = .TRUE. : the full Schur form T is required; */ /* = .FALSE.: only eigenvalues are required. */ /* WANTZ (input) LOGICAL */ /* = .TRUE. : the matrix of Schur vectors Z is required; */ /* = .FALSE.: Schur vectors are not required. */ /* N (input) INTEGER */ /* The order of the matrix H. N >= 0. */ /* ILO (input) INTEGER */ /* IHI (input) INTEGER */ /* It is assumed that H is already upper quasi-triangular in */ /* rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless */ /* ILO = 1). SLAHQR works primarily with the Hessenberg */ /* submatrix in rows and columns ILO to IHI, but applies */ /* 1 <= ILO <= max(1,IHI); IHI <= N. */ /* H (input/output) REAL array, dimension (LDH,N) */ /* On entry, the upper Hessenberg matrix H. */ /* On exit, if INFO is zero and if WANTT is .TRUE., H is upper */ /* quasi-triangular in rows and columns ILO:IHI, with any */ /* 2-by-2 diagonal blocks in standard form. If INFO is zero */ /* and WANTT is .FALSE., the contents of H are unspecified on */ /* exit. The output state of H if INFO is nonzero is given */ /* below under the description of INFO. */ /* LDH (input) INTEGER */ /* The leading dimension of the array H. LDH >= max(1,N). */ /* WR (output) REAL array, dimension (N) */ /* WI (output) REAL array, dimension (N) */ /* The real and imaginary parts, respectively, of the computed */ /* eigenvalues ILO to IHI are stored in the corresponding */ /* elements of WR and WI. If two eigenvalues are computed as a */ /* complex conjugate pair, they are stored in consecutive */ /* elements of WR and WI, say the i-th and (i+1)th, with */ /* WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the */ /* eigenvalues are stored in the same order as on the diagonal */ /* of the Schur form returned in H, with WR(i) = H(i,i), and, if */ /* H(i:i+1,i:i+1) is a 2-by-2 diagonal block, */ /* WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i). */ /* ILOZ (input) INTEGER */ /* IHIZ (input) INTEGER */ /* Specify the rows of Z to which transformations must be */ /* 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. */ /* Z (input/output) REAL array, dimension (LDZ,N) */ /* If WANTZ is .TRUE., on entry Z must contain the current */ /* matrix Z of transformations accumulated by SHSEQR, and on */ /* exit Z has been updated; transformations are applied only to */ /* the submatrix Z(ILOZ:IHIZ,ILO:IHI). */ /* If WANTZ is .FALSE., Z is not referenced. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* .GT. 0: If INFO = i, SLAHQR failed to compute all the */ /* eigenvalues ILO to IHI in a total of 30 iterations */ /* per eigenvalue; elements i+1:ihi of WR and WI */ /* contain those eigenvalues which have been */ /* successfully computed. */ /* If INFO .GT. 0 and WANTT is .FALSE., then on exit, */ /* the remaining unconverged eigenvalues are the */ /* eigenvalues of the upper Hessenberg matrix rows */ /* and columns ILO thorugh INFO of the final, output */ /* value of H. */ /* If INFO .GT. 0 and WANTT is .TRUE., then on exit */ /* (*) (initial value of H)*U = U*(final value of H) */ /* where U is an orthognal matrix. The final */ /* value of H is upper Hessenberg and triangular in */ /* rows and columns INFO+1 through IHI. */ /* If INFO .GT. 0 and WANTZ is .TRUE., then on exit */ /* (final value of Z) = (initial value of Z)*U */ /* where U is the orthogonal matrix in (*) */ /* (regardless of the value of WANTT.) */ /* Further Details */ /* =============== */ /* 02-96 Based on modifications by */ /* David Day, Sandia National Laboratory, USA */ /* 12-04 Further modifications by */ /* Ralph Byers, University of Kansas, USA */ /* This is a modified version of SLAHQR from LAPACK version 3.0. */ /* It is (1) more robust against overflow and underflow and */ /* (2) adopts the more conservative Ahues & Tisseur stopping */ /* criterion (LAWN 122, 1997). */ /* ========================================================= */ /* Parameter adjustments */ h_dim1 = *ldh; h_offset = 1 + h_dim1; h__ -= h_offset; --wr; --wi; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; /* Function Body */ *info = 0; /* Quick return if possible */ if (*n == 0) { return 0; } if (*ilo == *ihi) { wr[*ilo] = h__[*ilo + *ilo * h_dim1]; wi[*ilo] = 0.f; return 0; } /* ==== clear out the trash ==== */ i__1 = *ihi - 3; for (j = *ilo; j <= i__1; ++j) { h__[j + 2 + j * h_dim1] = 0.f; h__[j + 3 + j * h_dim1] = 0.f; } if (*ilo <= *ihi - 2) { h__[*ihi + (*ihi - 2) * h_dim1] = 0.f; } nh = *ihi - *ilo + 1; nz = *ihiz - *iloz + 1; /* Set machine-dependent constants for the stopping criterion. */ safmin = slamch_("SAFE MINIMUM"); safmax = 1.f / safmin; slabad_(&safmin, &safmax); ulp = slamch_("PRECISION"); smlnum = safmin * ((real) nh / ulp); /* I1 and I2 are the indices of the first row and last column of H */ /* to which transformations must be applied. If eigenvalues only are */ /* being computed, I1 and I2 are set inside the main loop. */ if (*wantt) { i1 = 1; i2 = *n; } /* The main loop begins here. I is the loop index and decreases from */ /* IHI to ILO in steps of 1 or 2. Each iteration of the loop works */ /* with the active submatrix in rows and columns L to I. */ /* Eigenvalues I+1 to IHI have already converged. Either L = ILO or */ /* H(L,L-1) is negligible so that the matrix splits. */ i__ = *ihi; L20: l = *ilo; if (i__ < *ilo) { goto L160; } /* Perform QR iterations on rows and columns ILO to I until a */ /* submatrix of order 1 or 2 splits off at the bottom because a */ /* subdiagonal element has become negligible. */ for (its = 0; its <= 30; ++its) { /* Look for a single small subdiagonal element. */ i__1 = l + 1; for (k = i__; k >= i__1; --k) { if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= smlnum) { goto L40; } tst = (r__1 = h__[k - 1 + (k - 1) * h_dim1], dabs(r__1)) + (r__2 = h__[k + k * h_dim1], dabs(r__2)); if (tst == 0.f) { if (k - 2 >= *ilo) { tst += (r__1 = h__[k - 1 + (k - 2) * h_dim1], dabs(r__1)); } if (k + 1 <= *ihi) { tst += (r__1 = h__[k + 1 + k * h_dim1], dabs(r__1)); } } /* ==== The following is a conservative small subdiagonal */ /* . deflation criterion due to Ahues & Tisseur (LAWN 122, */ /* . 1997). It has better mathematical foundation and */ /* . improves accuracy in some cases. ==== */ if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= ulp * tst) { /* Computing MAX */ r__3 = (r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)), r__4 = (r__2 = h__[k - 1 + k * h_dim1], dabs(r__2)); ab = dmax(r__3,r__4); /* Computing MIN */ r__3 = (r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)), r__4 = (r__2 = h__[k - 1 + k * h_dim1], dabs(r__2)); ba = dmin(r__3,r__4); /* Computing MAX */ r__3 = (r__1 = h__[k + k * h_dim1], dabs(r__1)), r__4 = (r__2 = h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1], dabs(r__2)); aa = dmax(r__3,r__4); /* Computing MIN */ r__3 = (r__1 = h__[k + k * h_dim1], dabs(r__1)), r__4 = (r__2 = h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1], dabs(r__2)); bb = dmin(r__3,r__4); s = aa + ab; /* Computing MAX */ r__1 = smlnum, r__2 = ulp * (bb * (aa / s)); if (ba * (ab / s) <= dmax(r__1,r__2)) { goto L40; } } } L40: l = k; if (l > *ilo) { /* H(L,L-1) is negligible */ h__[l + (l - 1) * h_dim1] = 0.f; } /* Exit from loop if a submatrix of order 1 or 2 has split off. */ if (l >= i__ - 1) { goto L150; } /* Now the active submatrix is in rows and columns L to I. If */ /* eigenvalues only are being computed, only the active submatrix */ /* need be transformed. */ if (! (*wantt)) { i1 = l; i2 = i__; } if (its == 10) { /* Exceptional shift. */ s = (r__1 = h__[l + 1 + l * h_dim1], dabs(r__1)) + (r__2 = h__[l + 2 + (l + 1) * h_dim1], dabs(r__2)); h11 = s * .75f + h__[l + l * h_dim1]; h12 = s * -.4375f; h21 = s; h22 = h11; } else if (its == 20) { /* Exceptional shift. */ s = (r__1 = h__[i__ + (i__ - 1) * h_dim1], dabs(r__1)) + (r__2 = h__[i__ - 1 + (i__ - 2) * h_dim1], dabs(r__2)); h11 = s * .75f + h__[i__ + i__ * h_dim1]; h12 = s * -.4375f; h21 = s; h22 = h11; } else { /* Prepare to use Francis' double shift */ /* (i.e. 2nd degree generalized Rayleigh quotient) */ h11 = h__[i__ - 1 + (i__ - 1) * h_dim1]; h21 = h__[i__ + (i__ - 1) * h_dim1]; h12 = h__[i__ - 1 + i__ * h_dim1]; h22 = h__[i__ + i__ * h_dim1]; } s = dabs(h11) + dabs(h12) + dabs(h21) + dabs(h22); if (s == 0.f) { rt1r = 0.f; rt1i = 0.f; rt2r = 0.f; rt2i = 0.f; } else { h11 /= s; h21 /= s; h12 /= s; h22 /= s; tr = (h11 + h22) / 2.f; det = (h11 - tr) * (h22 - tr) - h12 * h21; rtdisc = sqrt((dabs(det))); if (det >= 0.f) { /* ==== complex conjugate shifts ==== */ rt1r = tr * s; rt2r = rt1r; rt1i = rtdisc * s; rt2i = -rt1i; } else { /* ==== real shifts (use only one of them) ==== */ rt1r = tr + rtdisc; rt2r = tr - rtdisc; if ((r__1 = rt1r - h22, dabs(r__1)) <= (r__2 = rt2r - h22, dabs(r__2))) { rt1r *= s; rt2r = rt1r; } else { rt2r *= s; rt1r = rt2r; } rt1i = 0.f; rt2i = 0.f; } } /* Look for two consecutive small subdiagonal elements. */ i__1 = l; for (m = i__ - 2; m >= i__1; --m) { /* Determine the effect of starting the double-shift QR */ /* iteration at row M, and see if this would make H(M,M-1) */ /* negligible. (The following uses scaling to avoid */ /* overflows and most underflows.) */ h21s = h__[m + 1 + m * h_dim1]; s = (r__1 = h__[m + m * h_dim1] - rt2r, dabs(r__1)) + dabs(rt2i) + dabs(h21s); h21s = h__[m + 1 + m * h_dim1] / s; v[0] = h21s * h__[m + (m + 1) * h_dim1] + (h__[m + m * h_dim1] - rt1r) * ((h__[m + m * h_dim1] - rt2r) / s) - rt1i * (rt2i / s); v[1] = h21s * (h__[m + m * h_dim1] + h__[m + 1 + (m + 1) * h_dim1] - rt1r - rt2r); v[2] = h21s * h__[m + 2 + (m + 1) * h_dim1]; s = dabs(v[0]) + dabs(v[1]) + dabs(v[2]); v[0] /= s; v[1] /= s; v[2] /= s; if (m == l) { goto L60; } if ((r__1 = h__[m + (m - 1) * h_dim1], dabs(r__1)) * (dabs(v[1]) + dabs(v[2])) <= ulp * dabs(v[0]) * ((r__2 = h__[m - 1 + ( m - 1) * h_dim1], dabs(r__2)) + (r__3 = h__[m + m * h_dim1], dabs(r__3)) + (r__4 = h__[m + 1 + (m + 1) * h_dim1], dabs(r__4)))) { goto L60; } } L60: /* Double-shift QR step */ i__1 = i__ - 1; for (k = m; k <= i__1; ++k) { /* The first iteration of this loop determines a reflection G */ /* from the vector V and applies it from left and right to H, */ /* thus creating a nonzero bulge below the subdiagonal. */ /* Each subsequent iteration determines a reflection G to */ /* restore the Hessenberg form in the (K-1)th column, and thus */ /* chases the bulge one step toward the bottom of the active */ /* submatrix. NR is the order of G. */ /* Computing MIN */ i__2 = 3, i__3 = i__ - k + 1; nr = min(i__2,i__3); if (k > m) { scopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1); } slarfg_(&nr, v, &v[1], &c__1, &t1); if (k > m) { h__[k + (k - 1) * h_dim1] = v[0]; h__[k + 1 + (k - 1) * h_dim1] = 0.f; if (k < i__ - 1) { h__[k + 2 + (k - 1) * h_dim1] = 0.f; } } else if (m > l) { /* ==== Use the following instead of */ /* . H( K, K-1 ) = -H( K, K-1 ) to */ /* . avoid a bug when v(2) and v(3) */ /* . underflow. ==== */ h__[k + (k - 1) * h_dim1] *= 1.f - t1; } v2 = v[1]; t2 = t1 * v2; if (nr == 3) { v3 = v[2]; t3 = t1 * v3; /* Apply G from the left to transform the rows of the matrix */ /* in columns K to I2. */ i__2 = i2; for (j = k; j <= i__2; ++j) { sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1] + v3 * h__[k + 2 + j * h_dim1]; h__[k + j * h_dim1] -= sum * t1; h__[k + 1 + j * h_dim1] -= sum * t2; h__[k + 2 + j * h_dim1] -= sum * t3; } /* Apply G from the right to transform the columns of the */ /* matrix in rows I1 to min(K+3,I). */ /* Computing MIN */ i__3 = k + 3; i__2 = min(i__3,i__); for (j = i1; j <= i__2; ++j) { sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] + v3 * h__[j + (k + 2) * h_dim1]; h__[j + k * h_dim1] -= sum * t1; h__[j + (k + 1) * h_dim1] -= sum * t2; h__[j + (k + 2) * h_dim1] -= sum * t3; } if (*wantz) { /* Accumulate transformations in the matrix Z */ i__2 = *ihiz; for (j = *iloz; j <= i__2; ++j) { sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * z_dim1] + v3 * z__[j + (k + 2) * z_dim1]; z__[j + k * z_dim1] -= sum * t1; z__[j + (k + 1) * z_dim1] -= sum * t2; z__[j + (k + 2) * z_dim1] -= sum * t3; } } } else if (nr == 2) { /* Apply G from the left to transform the rows of the matrix */ /* in columns K to I2. */ i__2 = i2; for (j = k; j <= i__2; ++j) { sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]; h__[k + j * h_dim1] -= sum * t1; h__[k + 1 + j * h_dim1] -= sum * t2; } /* Apply G from the right to transform the columns of the */ /* matrix in rows I1 to min(K+3,I). */ i__2 = i__; for (j = i1; j <= i__2; ++j) { sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1] ; h__[j + k * h_dim1] -= sum * t1; h__[j + (k + 1) * h_dim1] -= sum * t2; } if (*wantz) { /* Accumulate transformations in the matrix Z */ i__2 = *ihiz; for (j = *iloz; j <= i__2; ++j) { sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * z_dim1]; z__[j + k * z_dim1] -= sum * t1; z__[j + (k + 1) * z_dim1] -= sum * t2; } } } } } /* Failure to converge in remaining number of iterations */ *info = i__; return 0; L150: if (l == i__) { /* H(I,I-1) is negligible: one eigenvalue has converged. */ wr[i__] = h__[i__ + i__ * h_dim1]; wi[i__] = 0.f; } else if (l == i__ - 1) { /* H(I-1,I-2) is negligible: a pair of eigenvalues have converged. */ /* Transform the 2-by-2 submatrix to standard Schur form, */ /* and compute and store the eigenvalues. */ slanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ * h_dim1], &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ * h_dim1], &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], &cs, &sn); if (*wantt) { /* Apply the transformation to the rest of H. */ if (i2 > i__) { i__1 = i2 - i__; srot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, &h__[ i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn); } i__1 = i__ - i1 - 1; srot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ * h_dim1], &c__1, &cs, &sn); } if (*wantz) { /* Apply the transformation to Z. */ srot_(&nz, &z__[*iloz + (i__ - 1) * z_dim1], &c__1, &z__[*iloz + i__ * z_dim1], &c__1, &cs, &sn); } } /* return to start of the main loop with new value of I. */ i__ = l - 1; goto L20; L160: return 0; /* End of SLAHQR */ } /* slahqr_ */
/* Subroutine */ int sgbbrd_(char *vect, integer *m, integer *n, integer *ncc, integer *kl, integer *ku, real *ab, integer *ldab, real *d__, real * e, real *q, integer *ldq, real *pt, integer *ldpt, real *c__, integer *ldc, real *work, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, c_dim1, c_offset, pt_dim1, pt_offset, q_dim1, q_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; /* Local variables */ integer i__, j, l, j1, j2, kb; real ra, rb, rc; integer kk, ml, mn, nr, mu; real rs; integer kb1, ml0, mu0, klm, kun, nrt, klu1, inca; logical wantb, wantc; integer minmn; logical wantq; logical wantpt; /* -- LAPACK routine (version 3.2) -- */ /* November 2006 */ /* Purpose */ /* ======= */ /* SGBBRD reduces a real general m-by-n band matrix A to upper */ /* bidiagonal form B by an orthogonal transformation: Q' * A * P = B. */ /* The routine computes B, and optionally forms Q or P', or computes */ /* Q'*C for a given matrix C. */ /* Arguments */ /* ========= */ /* VECT (input) CHARACTER*1 */ /* Specifies whether or not the matrices Q and P' are to be */ /* formed. */ /* = 'N': do not form Q or P'; */ /* = 'Q': form Q only; */ /* = 'P': form P' only; */ /* = 'B': form both. */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* NCC (input) INTEGER */ /* The number of columns of the matrix C. NCC >= 0. */ /* KL (input) INTEGER */ /* The number of subdiagonals of the matrix A. KL >= 0. */ /* KU (input) INTEGER */ /* The number of superdiagonals of the matrix A. KU >= 0. */ /* AB (input/output) REAL array, dimension (LDAB,N) */ /* On entry, the m-by-n band matrix A, stored in rows 1 to */ /* KL+KU+1. The j-th column of A is stored in the j-th column of */ /* the array AB as follows: */ /* AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). */ /* On exit, A is overwritten by values generated during the */ /* reduction. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array A. LDAB >= KL+KU+1. */ /* D (output) REAL array, dimension (min(M,N)) */ /* The diagonal elements of the bidiagonal matrix B. */ /* E (output) REAL array, dimension (min(M,N)-1) */ /* The superdiagonal elements of the bidiagonal matrix B. */ /* Q (output) REAL array, dimension (LDQ,M) */ /* If VECT = 'Q' or 'B', the m-by-m orthogonal matrix Q. */ /* If VECT = 'N' or 'P', the array Q is not referenced. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. */ /* LDQ >= max(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise. */ /* PT (output) REAL array, dimension (LDPT,N) */ /* If VECT = 'P' or 'B', the n-by-n orthogonal matrix P'. */ /* If VECT = 'N' or 'Q', the array PT is not referenced. */ /* LDPT (input) INTEGER */ /* The leading dimension of the array PT. */ /* LDPT >= max(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise. */ /* C (input/output) REAL array, dimension (LDC,NCC) */ /* On entry, an m-by-ncc matrix C. */ /* On exit, C is overwritten by Q'*C. */ /* C is not referenced if NCC = 0. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. */ /* LDC >= max(1,M) if NCC > 0; LDC >= 1 if NCC = 0. */ /* WORK (workspace) REAL array, dimension (2*max(M,N)) */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* ===================================================================== */ /* Test the input parameters */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --d__; --e; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; pt_dim1 = *ldpt; pt_offset = 1 + pt_dim1; pt -= pt_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ wantb = lsame_(vect, "B"); wantq = lsame_(vect, "Q") || wantb; wantpt = lsame_(vect, "P") || wantb; wantc = *ncc > 0; klu1 = *kl + *ku + 1; *info = 0; if (! wantq && ! wantpt && ! lsame_(vect, "N")) { *info = -1; } else if (*m < 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*ncc < 0) { *info = -4; } else if (*kl < 0) { *info = -5; } else if (*ku < 0) { *info = -6; } else if (*ldab < klu1) { *info = -8; } else if (*ldq < 1 || wantq && *ldq < max(1,*m)) { *info = -12; } else if (*ldpt < 1 || wantpt && *ldpt < max(1,*n)) { *info = -14; } else if (*ldc < 1 || wantc && *ldc < max(1,*m)) { *info = -16; } if (*info != 0) { i__1 = -(*info); xerbla_("SGBBRD", &i__1); return 0; } /* Initialize Q and P' to the unit matrix, if needed */ if (wantq) { slaset_("Full", m, m, &c_b8, &c_b9, &q[q_offset], ldq); } if (wantpt) { slaset_("Full", n, n, &c_b8, &c_b9, &pt[pt_offset], ldpt); } /* Quick return if possible. */ if (*m == 0 || *n == 0) { return 0; } minmn = min(*m,*n); if (*kl + *ku > 1) { /* Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce */ /* first to lower bidiagonal form and then transform to upper */ /* bidiagonal */ if (*ku > 0) { ml0 = 1; mu0 = 2; } else { ml0 = 2; mu0 = 1; } /* Wherever possible, plane rotations are generated and applied in */ /* vector operations of length NR over the index set J1:J2:KLU1. */ /* The sines of the plane rotations are stored in WORK(1:max(m,n)) */ /* and the cosines in WORK(max(m,n)+1:2*max(m,n)). */ mn = max(*m,*n); /* Computing MIN */ i__1 = *m - 1; klm = min(i__1,*kl); /* Computing MIN */ i__1 = *n - 1; kun = min(i__1,*ku); kb = klm + kun; kb1 = kb + 1; inca = kb1 * *ldab; nr = 0; j1 = klm + 2; j2 = 1 - kun; i__1 = minmn; for (i__ = 1; i__ <= i__1; ++i__) { /* Reduce i-th column and i-th row of matrix to bidiagonal form */ ml = klm + 1; mu = kun + 1; i__2 = kb; for (kk = 1; kk <= i__2; ++kk) { j1 += kb; j2 += kb; /* generate plane rotations to annihilate nonzero elements */ /* which have been created below the band */ if (nr > 0) { slargv_(&nr, &ab[klu1 + (j1 - klm - 1) * ab_dim1], &inca, &work[j1], &kb1, &work[mn + j1], &kb1); } /* apply plane rotations from the left */ i__3 = kb; for (l = 1; l <= i__3; ++l) { if (j2 - klm + l - 1 > *n) { nrt = nr - 1; } else { nrt = nr; } if (nrt > 0) { slartv_(&nrt, &ab[klu1 - l + (j1 - klm + l - 1) * ab_dim1], &inca, &ab[klu1 - l + 1 + (j1 - klm + l - 1) * ab_dim1], &inca, &work[mn + j1], & work[j1], &kb1); } } if (ml > ml0) { if (ml <= *m - i__ + 1) { /* generate plane rotation to annihilate a(i+ml-1,i) */ /* within the band, and apply rotation from the left */ slartg_(&ab[*ku + ml - 1 + i__ * ab_dim1], &ab[*ku + ml + i__ * ab_dim1], &work[mn + i__ + ml - 1], &work[i__ + ml - 1], &ra); ab[*ku + ml - 1 + i__ * ab_dim1] = ra; if (i__ < *n) { /* Computing MIN */ i__4 = *ku + ml - 2, i__5 = *n - i__; i__3 = min(i__4,i__5); i__6 = *ldab - 1; i__7 = *ldab - 1; srot_(&i__3, &ab[*ku + ml - 2 + (i__ + 1) * ab_dim1], &i__6, &ab[*ku + ml - 1 + (i__ + 1) * ab_dim1], &i__7, &work[mn + i__ + ml - 1], &work[i__ + ml - 1]); } } ++nr; j1 -= kb1; } if (wantq) { /* accumulate product of plane rotations in Q */ i__3 = j2; i__4 = kb1; for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { srot_(m, &q[(j - 1) * q_dim1 + 1], &c__1, &q[j * q_dim1 + 1], &c__1, &work[mn + j], &work[j]); } } if (wantc) { /* apply plane rotations to C */ i__4 = j2; i__3 = kb1; for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { srot_(ncc, &c__[j - 1 + c_dim1], ldc, &c__[j + c_dim1] , ldc, &work[mn + j], &work[j]); } } if (j2 + kun > *n) { /* adjust J2 to keep within the bounds of the matrix */ --nr; j2 -= kb1; } i__3 = j2; i__4 = kb1; for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { /* create nonzero element a(j-1,j+ku) above the band */ /* and store it in WORK(n+1:2*n) */ work[j + kun] = work[j] * ab[(j + kun) * ab_dim1 + 1]; ab[(j + kun) * ab_dim1 + 1] = work[mn + j] * ab[(j + kun) * ab_dim1 + 1]; } /* generate plane rotations to annihilate nonzero elements */ /* which have been generated above the band */ if (nr > 0) { slargv_(&nr, &ab[(j1 + kun - 1) * ab_dim1 + 1], &inca, & work[j1 + kun], &kb1, &work[mn + j1 + kun], &kb1); } /* apply plane rotations from the right */ i__4 = kb; for (l = 1; l <= i__4; ++l) { if (j2 + l - 1 > *m) { nrt = nr - 1; } else { nrt = nr; } if (nrt > 0) { slartv_(&nrt, &ab[l + 1 + (j1 + kun - 1) * ab_dim1], & inca, &ab[l + (j1 + kun) * ab_dim1], &inca, & work[mn + j1 + kun], &work[j1 + kun], &kb1); } } if (ml == ml0 && mu > mu0) { if (mu <= *n - i__ + 1) { /* generate plane rotation to annihilate a(i,i+mu-1) */ /* within the band, and apply rotation from the right */ slartg_(&ab[*ku - mu + 3 + (i__ + mu - 2) * ab_dim1], &ab[*ku - mu + 2 + (i__ + mu - 1) * ab_dim1], &work[mn + i__ + mu - 1], &work[i__ + mu - 1], &ra); ab[*ku - mu + 3 + (i__ + mu - 2) * ab_dim1] = ra; /* Computing MIN */ i__3 = *kl + mu - 2, i__5 = *m - i__; i__4 = min(i__3,i__5); srot_(&i__4, &ab[*ku - mu + 4 + (i__ + mu - 2) * ab_dim1], &c__1, &ab[*ku - mu + 3 + (i__ + mu - 1) * ab_dim1], &c__1, &work[mn + i__ + mu - 1], &work[i__ + mu - 1]); } ++nr; j1 -= kb1; } if (wantpt) { /* accumulate product of plane rotations in P' */ i__4 = j2; i__3 = kb1; for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) { srot_(n, &pt[j + kun - 1 + pt_dim1], ldpt, &pt[j + kun + pt_dim1], ldpt, &work[mn + j + kun], & work[j + kun]); } } if (j2 + kb > *m) { /* adjust J2 to keep within the bounds of the matrix */ --nr; j2 -= kb1; } i__3 = j2; i__4 = kb1; for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) { /* create nonzero element a(j+kl+ku,j+ku-1) below the */ /* band and store it in WORK(1:n) */ work[j + kb] = work[j + kun] * ab[klu1 + (j + kun) * ab_dim1]; ab[klu1 + (j + kun) * ab_dim1] = work[mn + j + kun] * ab[ klu1 + (j + kun) * ab_dim1]; } if (ml > ml0) { --ml; } else { --mu; } } } } if (*ku == 0 && *kl > 0) { /* A has been reduced to lower bidiagonal form */ /* Transform lower bidiagonal form to upper bidiagonal by applying */ /* plane rotations from the left, storing diagonal elements in D */ /* and off-diagonal elements in E */ /* Computing MIN */ i__2 = *m - 1; i__1 = min(i__2,*n); for (i__ = 1; i__ <= i__1; ++i__) { slartg_(&ab[i__ * ab_dim1 + 1], &ab[i__ * ab_dim1 + 2], &rc, &rs, &ra); d__[i__] = ra; if (i__ < *n) { e[i__] = rs * ab[(i__ + 1) * ab_dim1 + 1]; ab[(i__ + 1) * ab_dim1 + 1] = rc * ab[(i__ + 1) * ab_dim1 + 1] ; } if (wantq) { srot_(m, &q[i__ * q_dim1 + 1], &c__1, &q[(i__ + 1) * q_dim1 + 1], &c__1, &rc, &rs); } if (wantc) { srot_(ncc, &c__[i__ + c_dim1], ldc, &c__[i__ + 1 + c_dim1], ldc, &rc, &rs); } } if (*m <= *n) { d__[*m] = ab[*m * ab_dim1 + 1]; } } else if (*ku > 0) { /* A has been reduced to upper bidiagonal form */ if (*m < *n) { /* Annihilate a(m,m+1) by applying plane rotations from the */ /* right, storing diagonal elements in D and off-diagonal */ /* elements in E */ rb = ab[*ku + (*m + 1) * ab_dim1]; for (i__ = *m; i__ >= 1; --i__) { slartg_(&ab[*ku + 1 + i__ * ab_dim1], &rb, &rc, &rs, &ra); d__[i__] = ra; if (i__ > 1) { rb = -rs * ab[*ku + i__ * ab_dim1]; e[i__ - 1] = rc * ab[*ku + i__ * ab_dim1]; } if (wantpt) { srot_(n, &pt[i__ + pt_dim1], ldpt, &pt[*m + 1 + pt_dim1], ldpt, &rc, &rs); } } } else { /* Copy off-diagonal elements to E and diagonal elements to D */ i__1 = minmn - 1; for (i__ = 1; i__ <= i__1; ++i__) { e[i__] = ab[*ku + (i__ + 1) * ab_dim1]; } i__1 = minmn; for (i__ = 1; i__ <= i__1; ++i__) { d__[i__] = ab[*ku + 1 + i__ * ab_dim1]; } } } else { /* A is diagonal. Set elements of E to zero and copy diagonal */ /* elements to D. */ i__1 = minmn - 1; for (i__ = 1; i__ <= i__1; ++i__) { e[i__] = 0.f; } i__1 = minmn; for (i__ = 1; i__ <= i__1; ++i__) { d__[i__] = ab[i__ * ab_dim1 + 1]; } } return 0; /* End of SGBBRD */ } /* sgbbrd_ */
/* Subroutine */ int stgex2_(logical *wantq, logical *wantz, integer *n, real *a, integer *lda, real *b, integer *ldb, real *q, integer *ldq, real * z__, integer *ldz, integer *j1, integer *n1, integer *n2, real *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, z_offset, i__1, i__2; real r__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ real f, g; integer i__, m; real s[16] /* was [4][4] */, t[16] /* was [4][4] */, be[2], ai[2], ar[2], sa, sb, li[16] /* was [4][4] */, ir[16] /* was [4][4] */, ss, ws, eps; logical weak; real ddum; integer idum; real taul[4], dsum, taur[4], scpy[16] /* was [4][4] */, tcpy[16] /* was [4][4] */; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); real scale, bqra21, brqa21; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); real licop[16] /* was [4][4] */; integer linfo; extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); real ircop[16] /* was [4][4] */, dnorm; integer iwork[4]; extern /* Subroutine */ int slagv2_(real *, integer *, real *, integer *, real *, real *, real *, real *, real *, real *, real *), sgeqr2_( integer *, integer *, real *, integer *, real *, real *, integer * ), sgerq2_(integer *, integer *, real *, integer *, real *, real * , integer *), sorg2r_(integer *, integer *, integer *, real *, integer *, real *, real *, integer *), sorgr2_(integer *, integer *, integer *, real *, integer *, real *, real *, integer *), sorm2r_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, integer *), sormr2_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, integer *); real dscale; extern /* Subroutine */ int stgsy2_(char *, integer *, integer *, integer *, real *, integer *, real *, integer *, real *, integer *, real * , integer *, real *, integer *, real *, integer *, real *, real *, real *, integer *, integer *, integer *); extern doublereal slamch_(char *); extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *), slartg_(real *, real *, real *, real *, real *); real thresh; extern /* Subroutine */ int slaset_(char *, integer *, integer *, real *, real *, real *, integer *), slassq_(integer *, real *, integer *, real *, real *); real smlnum; logical strong; /* -- LAPACK auxiliary routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* STGEX2 swaps adjacent diagonal blocks (A11, B11) and (A22, B22) */ /* of size 1-by-1 or 2-by-2 in an upper (quasi) triangular matrix pair */ /* (A, B) by an orthogonal equivalence transformation. */ /* (A, B) must be in generalized real Schur canonical form (as returned */ /* by SGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 */ /* diagonal blocks. B is upper triangular. */ /* Optionally, the matrices Q and Z of generalized Schur vectors are */ /* updated. */ /* Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)' */ /* Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)' */ /* Arguments */ /* ========= */ /* WANTQ (input) LOGICAL */ /* .TRUE. : update the left transformation matrix Q; */ /* .FALSE.: do not update Q. */ /* WANTZ (input) LOGICAL */ /* .TRUE. : update the right transformation matrix Z; */ /* .FALSE.: do not update Z. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* A (input/output) REAL arrays, dimensions (LDA,N) */ /* On entry, the matrix A in the pair (A, B). */ /* On exit, the updated matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input/output) REAL arrays, dimensions (LDB,N) */ /* On entry, the matrix B in the pair (A, B). */ /* On exit, the updated matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* Q (input/output) REAL array, dimension (LDZ,N) */ /* On entry, if WANTQ = .TRUE., the orthogonal matrix Q. */ /* On exit, the updated matrix Q. */ /* Not referenced if WANTQ = .FALSE.. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= 1. */ /* If WANTQ = .TRUE., LDQ >= N. */ /* Z (input/output) REAL array, dimension (LDZ,N) */ /* On entry, if WANTZ =.TRUE., the orthogonal matrix Z. */ /* On exit, the updated matrix Z. */ /* Not referenced if WANTZ = .FALSE.. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1. */ /* If WANTZ = .TRUE., LDZ >= N. */ /* J1 (input) INTEGER */ /* The index to the first block (A11, B11). 1 <= J1 <= N. */ /* N1 (input) INTEGER */ /* The order of the first block (A11, B11). N1 = 0, 1 or 2. */ /* N2 (input) INTEGER */ /* The order of the second block (A22, B22). N2 = 0, 1 or 2. */ /* WORK (workspace) REAL array, dimension (MAX(1,LWORK)). */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* LWORK >= MAX( N*(N2+N1), (N2+N1)*(N2+N1)*2 ) */ /* INFO (output) INTEGER */ /* =0: Successful exit */ /* >0: If INFO = 1, the transformed matrix (A, B) would be */ /* too far from generalized Schur form; the blocks are */ /* not swapped and (A, B) and (Q, Z) are unchanged. */ /* The problem of swapping is too ill-conditioned. */ /* <0: If INFO = -16: LWORK is too small. Appropriate value */ /* for LWORK is returned in WORK(1). */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Bo Kagstrom and Peter Poromaa, Department of Computing Science, */ /* Umea University, S-901 87 Umea, Sweden. */ /* In the current code both weak and strong stability tests are */ /* performed. The user can omit the strong stability test by changing */ /* the internal logical parameter WANDS to .FALSE.. See ref. [2] for */ /* details. */ /* [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the */ /* Generalized Real Schur Form of a Regular Matrix Pair (A, B), in */ /* M.S. Moonen et al (eds), Linear Algebra for Large Scale and */ /* Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. */ /* [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified */ /* Eigenvalues of a Regular Matrix Pair (A, B) and Condition */ /* Estimation: Theory, Algorithms and Software, */ /* Report UMINF - 94.04, Department of Computing Science, Umea */ /* University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working */ /* Note 87. To appear in Numerical Algorithms, 1996. */ /* ===================================================================== */ /* Replaced various illegal calls to SCOPY by calls to SLASET, or by DO */ /* loops. Sven Hammarling, 1/5/02. */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; /* Function Body */ *info = 0; /* Quick return if possible */ if (*n <= 1 || *n1 <= 0 || *n2 <= 0) { return 0; } if (*n1 > *n || *j1 + *n1 > *n) { return 0; } m = *n1 + *n2; /* Computing MAX */ i__1 = *n * m, i__2 = m * m << 1; if (*lwork < max(i__1,i__2)) { *info = -16; /* Computing MAX */ i__1 = *n * m, i__2 = m * m << 1; work[1] = (real) max(i__1,i__2); return 0; } weak = FALSE_; strong = FALSE_; /* Make a local copy of selected block */ slaset_("Full", &c__4, &c__4, &c_b5, &c_b5, li, &c__4); slaset_("Full", &c__4, &c__4, &c_b5, &c_b5, ir, &c__4); slacpy_("Full", &m, &m, &a[*j1 + *j1 * a_dim1], lda, s, &c__4); slacpy_("Full", &m, &m, &b[*j1 + *j1 * b_dim1], ldb, t, &c__4); /* Compute threshold for testing acceptance of swapping. */ eps = slamch_("P"); smlnum = slamch_("S") / eps; dscale = 0.f; dsum = 1.f; slacpy_("Full", &m, &m, s, &c__4, &work[1], &m); i__1 = m * m; slassq_(&i__1, &work[1], &c__1, &dscale, &dsum); slacpy_("Full", &m, &m, t, &c__4, &work[1], &m); i__1 = m * m; slassq_(&i__1, &work[1], &c__1, &dscale, &dsum); dnorm = dscale * sqrt(dsum); /* Computing MAX */ r__1 = eps * 10.f * dnorm; thresh = dmax(r__1,smlnum); if (m == 2) { /* CASE 1: Swap 1-by-1 and 1-by-1 blocks. */ /* Compute orthogonal QL and RQ that swap 1-by-1 and 1-by-1 blocks */ /* using Givens rotations and perform the swap tentatively. */ f = s[5] * t[0] - t[5] * s[0]; g = s[5] * t[4] - t[5] * s[4]; sb = dabs(t[5]); sa = dabs(s[5]); slartg_(&f, &g, &ir[4], ir, &ddum); ir[1] = -ir[4]; ir[5] = ir[0]; srot_(&c__2, s, &c__1, &s[4], &c__1, ir, &ir[1]); srot_(&c__2, t, &c__1, &t[4], &c__1, ir, &ir[1]); if (sa >= sb) { slartg_(s, &s[1], li, &li[1], &ddum); } else { slartg_(t, &t[1], li, &li[1], &ddum); } srot_(&c__2, s, &c__4, &s[1], &c__4, li, &li[1]); srot_(&c__2, t, &c__4, &t[1], &c__4, li, &li[1]); li[5] = li[0]; li[4] = -li[1]; /* Weak stability test: */ /* |S21| + |T21| <= O(EPS * F-norm((S, T))) */ ws = dabs(s[1]) + dabs(t[1]); weak = ws <= thresh; if (! weak) { goto L70; } if (TRUE_) { /* Strong stability test: */ /* F-norm((A-QL'*S*QR, B-QL'*T*QR)) <= O(EPS*F-norm((A,B))) */ slacpy_("Full", &m, &m, &a[*j1 + *j1 * a_dim1], lda, &work[m * m + 1], &m); sgemm_("N", "N", &m, &m, &m, &c_b42, li, &c__4, s, &c__4, &c_b5, & work[1], &m); sgemm_("N", "T", &m, &m, &m, &c_b48, &work[1], &m, ir, &c__4, & c_b42, &work[m * m + 1], &m); dscale = 0.f; dsum = 1.f; i__1 = m * m; slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); slacpy_("Full", &m, &m, &b[*j1 + *j1 * b_dim1], ldb, &work[m * m + 1], &m); sgemm_("N", "N", &m, &m, &m, &c_b42, li, &c__4, t, &c__4, &c_b5, & work[1], &m); sgemm_("N", "T", &m, &m, &m, &c_b48, &work[1], &m, ir, &c__4, & c_b42, &work[m * m + 1], &m); i__1 = m * m; slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); ss = dscale * sqrt(dsum); strong = ss <= thresh; if (! strong) { goto L70; } } /* Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and */ /* (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). */ i__1 = *j1 + 1; srot_(&i__1, &a[*j1 * a_dim1 + 1], &c__1, &a[(*j1 + 1) * a_dim1 + 1], &c__1, ir, &ir[1]); i__1 = *j1 + 1; srot_(&i__1, &b[*j1 * b_dim1 + 1], &c__1, &b[(*j1 + 1) * b_dim1 + 1], &c__1, ir, &ir[1]); i__1 = *n - *j1 + 1; srot_(&i__1, &a[*j1 + *j1 * a_dim1], lda, &a[*j1 + 1 + *j1 * a_dim1], lda, li, &li[1]); i__1 = *n - *j1 + 1; srot_(&i__1, &b[*j1 + *j1 * b_dim1], ldb, &b[*j1 + 1 + *j1 * b_dim1], ldb, li, &li[1]); /* Set N1-by-N2 (2,1) - blocks to ZERO. */ a[*j1 + 1 + *j1 * a_dim1] = 0.f; b[*j1 + 1 + *j1 * b_dim1] = 0.f; /* Accumulate transformations into Q and Z if requested. */ if (*wantz) { srot_(n, &z__[*j1 * z_dim1 + 1], &c__1, &z__[(*j1 + 1) * z_dim1 + 1], &c__1, ir, &ir[1]); } if (*wantq) { srot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[(*j1 + 1) * q_dim1 + 1], &c__1, li, &li[1]); } /* Exit with INFO = 0 if swap was successfully performed. */ return 0; } else { /* CASE 2: Swap 1-by-1 and 2-by-2 blocks, or 2-by-2 */ /* and 2-by-2 blocks. */ /* Solve the generalized Sylvester equation */ /* S11 * R - L * S22 = SCALE * S12 */ /* T11 * R - L * T22 = SCALE * T12 */ /* for R and L. Solutions in LI and IR. */ slacpy_("Full", n1, n2, &t[(*n1 + 1 << 2) - 4], &c__4, li, &c__4); slacpy_("Full", n1, n2, &s[(*n1 + 1 << 2) - 4], &c__4, &ir[*n2 + 1 + ( *n1 + 1 << 2) - 5], &c__4); stgsy2_("N", &c__0, n1, n2, s, &c__4, &s[*n1 + 1 + (*n1 + 1 << 2) - 5] , &c__4, &ir[*n2 + 1 + (*n1 + 1 << 2) - 5], &c__4, t, &c__4, & t[*n1 + 1 + (*n1 + 1 << 2) - 5], &c__4, li, &c__4, &scale, & dsum, &dscale, iwork, &idum, &linfo); /* Compute orthogonal matrix QL: */ /* QL' * LI = [ TL ] */ /* [ 0 ] */ /* where */ /* LI = [ -L ] */ /* [ SCALE * identity(N2) ] */ i__1 = *n2; for (i__ = 1; i__ <= i__1; ++i__) { sscal_(n1, &c_b48, &li[(i__ << 2) - 4], &c__1); li[*n1 + i__ + (i__ << 2) - 5] = scale; /* L10: */ } sgeqr2_(&m, n2, li, &c__4, taul, &work[1], &linfo); if (linfo != 0) { goto L70; } sorg2r_(&m, &m, n2, li, &c__4, taul, &work[1], &linfo); if (linfo != 0) { goto L70; } /* Compute orthogonal matrix RQ: */ /* IR * RQ' = [ 0 TR], */ /* where IR = [ SCALE * identity(N1), R ] */ i__1 = *n1; for (i__ = 1; i__ <= i__1; ++i__) { ir[*n2 + i__ + (i__ << 2) - 5] = scale; /* L20: */ } sgerq2_(n1, &m, &ir[*n2], &c__4, taur, &work[1], &linfo); if (linfo != 0) { goto L70; } sorgr2_(&m, &m, n1, ir, &c__4, taur, &work[1], &linfo); if (linfo != 0) { goto L70; } /* Perform the swapping tentatively: */ sgemm_("T", "N", &m, &m, &m, &c_b42, li, &c__4, s, &c__4, &c_b5, & work[1], &m); sgemm_("N", "T", &m, &m, &m, &c_b42, &work[1], &m, ir, &c__4, &c_b5, s, &c__4); sgemm_("T", "N", &m, &m, &m, &c_b42, li, &c__4, t, &c__4, &c_b5, & work[1], &m); sgemm_("N", "T", &m, &m, &m, &c_b42, &work[1], &m, ir, &c__4, &c_b5, t, &c__4); slacpy_("F", &m, &m, s, &c__4, scpy, &c__4); slacpy_("F", &m, &m, t, &c__4, tcpy, &c__4); slacpy_("F", &m, &m, ir, &c__4, ircop, &c__4); slacpy_("F", &m, &m, li, &c__4, licop, &c__4); /* Triangularize the B-part by an RQ factorization. */ /* Apply transformation (from left) to A-part, giving S. */ sgerq2_(&m, &m, t, &c__4, taur, &work[1], &linfo); if (linfo != 0) { goto L70; } sormr2_("R", "T", &m, &m, &m, t, &c__4, taur, s, &c__4, &work[1], & linfo); if (linfo != 0) { goto L70; } sormr2_("L", "N", &m, &m, &m, t, &c__4, taur, ir, &c__4, &work[1], & linfo); if (linfo != 0) { goto L70; } /* Compute F-norm(S21) in BRQA21. (T21 is 0.) */ dscale = 0.f; dsum = 1.f; i__1 = *n2; for (i__ = 1; i__ <= i__1; ++i__) { slassq_(n1, &s[*n2 + 1 + (i__ << 2) - 5], &c__1, &dscale, &dsum); /* L30: */ } brqa21 = dscale * sqrt(dsum); /* Triangularize the B-part by a QR factorization. */ /* Apply transformation (from right) to A-part, giving S. */ sgeqr2_(&m, &m, tcpy, &c__4, taul, &work[1], &linfo); if (linfo != 0) { goto L70; } sorm2r_("L", "T", &m, &m, &m, tcpy, &c__4, taul, scpy, &c__4, &work[1] , info); sorm2r_("R", "N", &m, &m, &m, tcpy, &c__4, taul, licop, &c__4, &work[ 1], info); if (linfo != 0) { goto L70; } /* Compute F-norm(S21) in BQRA21. (T21 is 0.) */ dscale = 0.f; dsum = 1.f; i__1 = *n2; for (i__ = 1; i__ <= i__1; ++i__) { slassq_(n1, &scpy[*n2 + 1 + (i__ << 2) - 5], &c__1, &dscale, & dsum); /* L40: */ } bqra21 = dscale * sqrt(dsum); /* Decide which method to use. */ /* Weak stability test: */ /* F-norm(S21) <= O(EPS * F-norm((S, T))) */ if (bqra21 <= brqa21 && bqra21 <= thresh) { slacpy_("F", &m, &m, scpy, &c__4, s, &c__4); slacpy_("F", &m, &m, tcpy, &c__4, t, &c__4); slacpy_("F", &m, &m, ircop, &c__4, ir, &c__4); slacpy_("F", &m, &m, licop, &c__4, li, &c__4); } else if (brqa21 >= thresh) { goto L70; } /* Set lower triangle of B-part to zero */ i__1 = m - 1; i__2 = m - 1; slaset_("Lower", &i__1, &i__2, &c_b5, &c_b5, &t[1], &c__4); if (TRUE_) { /* Strong stability test: */ /* F-norm((A-QL*S*QR', B-QL*T*QR')) <= O(EPS*F-norm((A,B))) */ slacpy_("Full", &m, &m, &a[*j1 + *j1 * a_dim1], lda, &work[m * m + 1], &m); sgemm_("N", "N", &m, &m, &m, &c_b42, li, &c__4, s, &c__4, &c_b5, & work[1], &m); sgemm_("N", "N", &m, &m, &m, &c_b48, &work[1], &m, ir, &c__4, & c_b42, &work[m * m + 1], &m); dscale = 0.f; dsum = 1.f; i__1 = m * m; slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); slacpy_("Full", &m, &m, &b[*j1 + *j1 * b_dim1], ldb, &work[m * m + 1], &m); sgemm_("N", "N", &m, &m, &m, &c_b42, li, &c__4, t, &c__4, &c_b5, & work[1], &m); sgemm_("N", "N", &m, &m, &m, &c_b48, &work[1], &m, ir, &c__4, & c_b42, &work[m * m + 1], &m); i__1 = m * m; slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); ss = dscale * sqrt(dsum); strong = ss <= thresh; if (! strong) { goto L70; } } /* If the swap is accepted ("weakly" and "strongly"), apply the */ /* transformations and set N1-by-N2 (2,1)-block to zero. */ slaset_("Full", n1, n2, &c_b5, &c_b5, &s[*n2], &c__4); /* copy back M-by-M diagonal block starting at index J1 of (A, B) */ slacpy_("F", &m, &m, s, &c__4, &a[*j1 + *j1 * a_dim1], lda) ; slacpy_("F", &m, &m, t, &c__4, &b[*j1 + *j1 * b_dim1], ldb) ; slaset_("Full", &c__4, &c__4, &c_b5, &c_b5, t, &c__4); /* Standardize existing 2-by-2 blocks. */ i__1 = m * m; for (i__ = 1; i__ <= i__1; ++i__) { work[i__] = 0.f; /* L50: */ } work[1] = 1.f; t[0] = 1.f; idum = *lwork - m * m - 2; if (*n2 > 1) { slagv2_(&a[*j1 + *j1 * a_dim1], lda, &b[*j1 + *j1 * b_dim1], ldb, ar, ai, be, &work[1], &work[2], t, &t[1]); work[m + 1] = -work[2]; work[m + 2] = work[1]; t[*n2 + (*n2 << 2) - 5] = t[0]; t[4] = -t[1]; } work[m * m] = 1.f; t[m + (m << 2) - 5] = 1.f; if (*n1 > 1) { slagv2_(&a[*j1 + *n2 + (*j1 + *n2) * a_dim1], lda, &b[*j1 + *n2 + (*j1 + *n2) * b_dim1], ldb, taur, taul, &work[m * m + 1], &work[*n2 * m + *n2 + 1], &work[*n2 * m + *n2 + 2], &t[* n2 + 1 + (*n2 + 1 << 2) - 5], &t[m + (m - 1 << 2) - 5]); work[m * m] = work[*n2 * m + *n2 + 1]; work[m * m - 1] = -work[*n2 * m + *n2 + 2]; t[m + (m << 2) - 5] = t[*n2 + 1 + (*n2 + 1 << 2) - 5]; t[m - 1 + (m << 2) - 5] = -t[m + (m - 1 << 2) - 5]; } sgemm_("T", "N", n2, n1, n2, &c_b42, &work[1], &m, &a[*j1 + (*j1 + * n2) * a_dim1], lda, &c_b5, &work[m * m + 1], n2); slacpy_("Full", n2, n1, &work[m * m + 1], n2, &a[*j1 + (*j1 + *n2) * a_dim1], lda); sgemm_("T", "N", n2, n1, n2, &c_b42, &work[1], &m, &b[*j1 + (*j1 + * n2) * b_dim1], ldb, &c_b5, &work[m * m + 1], n2); slacpy_("Full", n2, n1, &work[m * m + 1], n2, &b[*j1 + (*j1 + *n2) * b_dim1], ldb); sgemm_("N", "N", &m, &m, &m, &c_b42, li, &c__4, &work[1], &m, &c_b5, & work[m * m + 1], &m); slacpy_("Full", &m, &m, &work[m * m + 1], &m, li, &c__4); sgemm_("N", "N", n2, n1, n1, &c_b42, &a[*j1 + (*j1 + *n2) * a_dim1], lda, &t[*n2 + 1 + (*n2 + 1 << 2) - 5], &c__4, &c_b5, &work[1], n2); slacpy_("Full", n2, n1, &work[1], n2, &a[*j1 + (*j1 + *n2) * a_dim1], lda); sgemm_("N", "N", n2, n1, n1, &c_b42, &b[*j1 + (*j1 + *n2) * b_dim1], ldb, &t[*n2 + 1 + (*n2 + 1 << 2) - 5], &c__4, &c_b5, &work[1], n2); slacpy_("Full", n2, n1, &work[1], n2, &b[*j1 + (*j1 + *n2) * b_dim1], ldb); sgemm_("T", "N", &m, &m, &m, &c_b42, ir, &c__4, t, &c__4, &c_b5, & work[1], &m); slacpy_("Full", &m, &m, &work[1], &m, ir, &c__4); /* Accumulate transformations into Q and Z if requested. */ if (*wantq) { sgemm_("N", "N", n, &m, &m, &c_b42, &q[*j1 * q_dim1 + 1], ldq, li, &c__4, &c_b5, &work[1], n); slacpy_("Full", n, &m, &work[1], n, &q[*j1 * q_dim1 + 1], ldq); } if (*wantz) { sgemm_("N", "N", n, &m, &m, &c_b42, &z__[*j1 * z_dim1 + 1], ldz, ir, &c__4, &c_b5, &work[1], n); slacpy_("Full", n, &m, &work[1], n, &z__[*j1 * z_dim1 + 1], ldz); } /* Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and */ /* (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). */ i__ = *j1 + m; if (i__ <= *n) { i__1 = *n - i__ + 1; sgemm_("T", "N", &m, &i__1, &m, &c_b42, li, &c__4, &a[*j1 + i__ * a_dim1], lda, &c_b5, &work[1], &m); i__1 = *n - i__ + 1; slacpy_("Full", &m, &i__1, &work[1], &m, &a[*j1 + i__ * a_dim1], lda); i__1 = *n - i__ + 1; sgemm_("T", "N", &m, &i__1, &m, &c_b42, li, &c__4, &b[*j1 + i__ * b_dim1], ldb, &c_b5, &work[1], &m); i__1 = *n - i__ + 1; slacpy_("Full", &m, &i__1, &work[1], &m, &b[*j1 + i__ * b_dim1], ldb); } i__ = *j1 - 1; if (i__ > 0) { sgemm_("N", "N", &i__, &m, &m, &c_b42, &a[*j1 * a_dim1 + 1], lda, ir, &c__4, &c_b5, &work[1], &i__); slacpy_("Full", &i__, &m, &work[1], &i__, &a[*j1 * a_dim1 + 1], lda); sgemm_("N", "N", &i__, &m, &m, &c_b42, &b[*j1 * b_dim1 + 1], ldb, ir, &c__4, &c_b5, &work[1], &i__); slacpy_("Full", &i__, &m, &work[1], &i__, &b[*j1 * b_dim1 + 1], ldb); } /* Exit with INFO = 0 if swap was successfully performed. */ return 0; } /* Exit with INFO = 1 if swap was rejected. */ L70: *info = 1; return 0; /* End of STGEX2 */ } /* stgex2_ */
/* Subroutine */ int check3_(real *sfac) { /* Initialized data */ static integer incxs[4] = { 1,2,-2,-1 }; static integer incys[4] = { 1,-2,1,-2 }; static integer lens[8] /* was [4][2] */ = { 1,1,2,4,1,1,3,7 }; static integer ns[4] = { 0,1,2,4 }; static real dx1[7] = { .6f,.1f,-.5f,.8f,.9f,-.3f,-.4f }; static real dy1[7] = { .5f,-.9f,.3f,.7f,-.6f,.2f,.8f }; static real sc = .8f; static real ss = .6f; static real dt9x[112] /* was [7][4][4] */ = { .6f,0.f,0.f,0.f,0.f, 0.f,0.f,.78f,0.f,0.f,0.f,0.f,0.f,0.f,.78f,-.46f,0.f,0.f,0.f,0.f, 0.f,.78f,-.46f,-.22f,1.06f,0.f,0.f,0.f,.6f,0.f,0.f,0.f,0.f,0.f, 0.f,.78f,0.f,0.f,0.f,0.f,0.f,0.f,.66f,.1f,-.1f,0.f,0.f,0.f,0.f, .96f,.1f,-.76f,.8f,.9f,-.3f,-.02f,.6f,0.f,0.f,0.f,0.f,0.f,0.f, .78f,0.f,0.f,0.f,0.f,0.f,0.f,-.06f,.1f,-.1f,0.f,0.f,0.f,0.f,.9f, .1f,-.22f,.8f,.18f,-.3f,-.02f,.6f,0.f,0.f,0.f,0.f,0.f,0.f,.78f, 0.f,0.f,0.f,0.f,0.f,0.f,.78f,.26f,0.f,0.f,0.f,0.f,0.f,.78f,.26f, -.76f,1.12f,0.f,0.f,0.f }; static real dt9y[112] /* was [7][4][4] */ = { .5f,0.f,0.f,0.f,0.f, 0.f,0.f,.04f,0.f,0.f,0.f,0.f,0.f,0.f,.04f,-.78f,0.f,0.f,0.f,0.f, 0.f,.04f,-.78f,.54f,.08f,0.f,0.f,0.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f, .04f,0.f,0.f,0.f,0.f,0.f,0.f,.7f,-.9f,-.12f,0.f,0.f,0.f,0.f,.64f, -.9f,-.3f,.7f,-.18f,.2f,.28f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.04f,0.f, 0.f,0.f,0.f,0.f,0.f,.7f,-1.08f,0.f,0.f,0.f,0.f,0.f,.64f,-1.26f, .54f,.2f,0.f,0.f,0.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.04f,0.f,0.f,0.f, 0.f,0.f,0.f,.04f,-.9f,.18f,0.f,0.f,0.f,0.f,.04f,-.9f,.18f,.7f, -.18f,.2f,.16f }; static real ssize2[28] /* was [14][2] */ = { 0.f,0.f,0.f,0.f,0.f,0.f, 0.f,0.f,0.f,0.f,0.f,0.f,0.f,0.f,1.17f,1.17f,1.17f,1.17f,1.17f, 1.17f,1.17f,1.17f,1.17f,1.17f,1.17f,1.17f,1.17f,1.17f }; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__, k, ki, kn, mx, my; real sx[7], sy[7], stx[7], sty[7]; integer lenx, leny; real mwpc[11]; integer mwpn[11]; real mwps[11]; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); real mwpx[5], mwpy[5]; integer ksize; real copyx[5], copyy[5]; extern /* Subroutine */ int stest_(integer *, real *, real *, real *, real *); real mwptx[55] /* was [11][5] */, mwpty[55] /* was [11][5] */; integer mwpinx[11], mwpiny[11]; real mwpstx[5], mwpsty[5]; /* Fortran I/O blocks */ static cilist io___88 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ for (ki = 1; ki <= 4; ++ki) { combla_1.incx = incxs[ki - 1]; combla_1.incy = incys[ki - 1]; mx = abs(combla_1.incx); my = abs(combla_1.incy); for (kn = 1; kn <= 4; ++kn) { combla_1.n = ns[kn - 1]; ksize = min(2,kn); lenx = lens[kn + (mx << 2) - 5]; leny = lens[kn + (my << 2) - 5]; if (combla_1.icase == 4) { /* .. SROT .. */ for (i__ = 1; i__ <= 7; ++i__) { sx[i__ - 1] = dx1[i__ - 1]; sy[i__ - 1] = dy1[i__ - 1]; stx[i__ - 1] = dt9x[i__ + (kn + (ki << 2)) * 7 - 36]; sty[i__ - 1] = dt9y[i__ + (kn + (ki << 2)) * 7 - 36]; /* L20: */ } srot_(&combla_1.n, sx, &combla_1.incx, sy, &combla_1.incy, & sc, &ss); stest_(&lenx, sx, stx, &ssize2[ksize * 14 - 14], sfac); stest_(&leny, sy, sty, &ssize2[ksize * 14 - 14], sfac); } else { s_wsle(&io___88); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK3", (ftnlen) 28); e_wsle(); s_stop("", (ftnlen)0); } /* L40: */ } /* L60: */ } mwpc[0] = 1.f; for (i__ = 2; i__ <= 11; ++i__) { mwpc[i__ - 1] = 0.f; /* L80: */ } mwps[0] = 0.f; for (i__ = 2; i__ <= 6; ++i__) { mwps[i__ - 1] = 1.f; /* L100: */ } for (i__ = 7; i__ <= 11; ++i__) { mwps[i__ - 1] = -1.f; /* L120: */ } mwpinx[0] = 1; mwpinx[1] = 1; mwpinx[2] = 1; mwpinx[3] = -1; mwpinx[4] = 1; mwpinx[5] = -1; mwpinx[6] = 1; mwpinx[7] = 1; mwpinx[8] = -1; mwpinx[9] = 1; mwpinx[10] = -1; mwpiny[0] = 1; mwpiny[1] = 1; mwpiny[2] = -1; mwpiny[3] = -1; mwpiny[4] = 2; mwpiny[5] = 1; mwpiny[6] = 1; mwpiny[7] = -1; mwpiny[8] = -1; mwpiny[9] = 2; mwpiny[10] = 1; for (i__ = 1; i__ <= 11; ++i__) { mwpn[i__ - 1] = 5; /* L140: */ } mwpn[4] = 3; mwpn[9] = 3; for (i__ = 1; i__ <= 5; ++i__) { mwpx[i__ - 1] = (real) i__; mwpy[i__ - 1] = (real) i__; mwptx[i__ * 11 - 11] = (real) i__; mwpty[i__ * 11 - 11] = (real) i__; mwptx[i__ * 11 - 10] = (real) i__; mwpty[i__ * 11 - 10] = (real) (-i__); mwptx[i__ * 11 - 9] = (real) (6 - i__); mwpty[i__ * 11 - 9] = (real) (i__ - 6); mwptx[i__ * 11 - 8] = (real) i__; mwpty[i__ * 11 - 8] = (real) (-i__); mwptx[i__ * 11 - 6] = (real) (6 - i__); mwpty[i__ * 11 - 6] = (real) (i__ - 6); mwptx[i__ * 11 - 5] = (real) (-i__); mwpty[i__ * 11 - 5] = (real) i__; mwptx[i__ * 11 - 4] = (real) (i__ - 6); mwpty[i__ * 11 - 4] = (real) (6 - i__); mwptx[i__ * 11 - 3] = (real) (-i__); mwpty[i__ * 11 - 3] = (real) i__; mwptx[i__ * 11 - 1] = (real) (i__ - 6); mwpty[i__ * 11 - 1] = (real) (6 - i__); /* L160: */ } mwptx[4] = 1.f; mwptx[15] = 3.f; mwptx[26] = 5.f; mwptx[37] = 4.f; mwptx[48] = 5.f; mwpty[4] = -1.f; mwpty[15] = 2.f; mwpty[26] = -2.f; mwpty[37] = 4.f; mwpty[48] = -3.f; mwptx[9] = -1.f; mwptx[20] = -3.f; mwptx[31] = -5.f; mwptx[42] = 4.f; mwptx[53] = 5.f; mwpty[9] = 1.f; mwpty[20] = 2.f; mwpty[31] = 2.f; mwpty[42] = 4.f; mwpty[53] = 3.f; for (i__ = 1; i__ <= 11; ++i__) { combla_1.incx = mwpinx[i__ - 1]; combla_1.incy = mwpiny[i__ - 1]; for (k = 1; k <= 5; ++k) { copyx[k - 1] = mwpx[k - 1]; copyy[k - 1] = mwpy[k - 1]; mwpstx[k - 1] = mwptx[i__ + k * 11 - 12]; mwpsty[k - 1] = mwpty[i__ + k * 11 - 12]; /* L180: */ } srot_(&mwpn[i__ - 1], copyx, &combla_1.incx, copyy, &combla_1.incy, & mwpc[i__ - 1], &mwps[i__ - 1]); stest_(&c__5, copyx, mwpstx, mwpstx, sfac); stest_(&c__5, copyy, mwpsty, mwpsty, sfac); /* L200: */ } return 0; } /* check3_ */