Пример #1
0
void cblas_srot( const integer N, float *X, const integer incX, float *Y, 
                      const integer incY, const float  c, const float  s)
{
   #define F77_N N
   #define F77_incX incX
   #define F77_incY incY
   srot_(&F77_N, X, &F77_incX, Y, &F77_incY, &c, &s);
}  
Пример #2
0
int
f2c_srot(integer* N,
         real* X, integer* incX,
         real* Y, integer* incY,
         real* c, real* s)
{
    srot_(N, X, incX, Y, incY, c, s);
    return 0;
}
Пример #3
0
/* Subroutine */ int slaexc_(logical *wantq, integer *n, real *t, integer *
	ldt, real *q, integer *ldq, integer *j1, integer *n1, integer *n2, 
	real *work, integer *info)
{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    SLAEXC swaps adjacent diagonal blocks T11 and T22 of order 1 or 2 in   
    an upper quasi-triangular matrix T by an orthogonal similarity   
    transformation.   

    T must be in Schur canonical form, that is, block upper triangular   
    with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block   
    has its diagonal elemnts equal and its off-diagonal elements of   
    opposite sign.   

    Arguments   
    =========   

    WANTQ   (input) LOGICAL   
            = .TRUE. : accumulate the transformation in the matrix Q;   
            = .FALSE.: do not accumulate the transformation.   

    N       (input) INTEGER   
            The order of the matrix T. N >= 0.   

    T       (input/output) REAL array, dimension (LDT,N)   
            On entry, the upper quasi-triangular matrix T, in Schur   
            canonical form.   
            On exit, the updated matrix T, again in Schur canonical form.   

    LDT     (input)  INTEGER   
            The leading dimension of the array T. LDT >= max(1,N).   

    Q       (input/output) REAL array, dimension (LDQ,N)   
            On entry, if WANTQ is .TRUE., the orthogonal matrix Q.   
            On exit, if WANTQ is .TRUE., the updated matrix Q.   
            If WANTQ is .FALSE., Q is not referenced.   

    LDQ     (input) INTEGER   
            The leading dimension of the array Q.   
            LDQ >= 1; and if WANTQ is .TRUE., LDQ >= N.   

    J1      (input) INTEGER   
            The index of the first row of the first block T11.   

    N1      (input) INTEGER   
            The order of the first block T11. N1 = 0, 1 or 2.   

    N2      (input) INTEGER   
            The order of the second block T22. N2 = 0, 1 or 2.   

    WORK    (workspace) REAL array, dimension (N)   

    INFO    (output) INTEGER   
            = 0: successful exit   
            = 1: the transformed matrix T would be too far from Schur   
                 form; the blocks are not swapped and T and Q are   
                 unchanged.   

    =====================================================================   


       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c__4 = 4;
    static logical c_false = FALSE_;
    static integer c_n1 = -1;
    static integer c__2 = 2;
    static integer c__3 = 3;
    
    /* System generated locals */
    integer q_dim1, q_offset, t_dim1, t_offset, i__1;
    real r__1, r__2, r__3, r__4, r__5, r__6;
    /* Local variables */
    static integer ierr;
    static real temp;
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *);
    static real d__[16]	/* was [4][4] */;
    static integer k;
    static real u[3], scale, x[4]	/* was [2][2] */, dnorm;
    static integer j2, j3, j4;
    static real xnorm, u1[3], u2[3];
    extern /* Subroutine */ int slanv2_(real *, real *, real *, real *, real *
	    , real *, real *, real *, real *, real *), slasy2_(logical *, 
	    logical *, integer *, integer *, integer *, real *, integer *, 
	    real *, integer *, real *, integer *, real *, real *, integer *, 
	    real *, integer *);
    static integer nd;
    static real cs, t11, t22, t33, sn;
    extern doublereal slamch_(char *), slange_(char *, integer *, 
	    integer *, real *, integer *, real *);
    extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *, 
	    real *), slacpy_(char *, integer *, integer *, real *, integer *, 
	    real *, integer *), slartg_(real *, real *, real *, real *
	    , real *);
    static real thresh;
    extern /* Subroutine */ int slarfx_(char *, integer *, integer *, real *, 
	    real *, real *, integer *, real *);
    static real smlnum, wi1, wi2, wr1, wr2, eps, tau, tau1, tau2;
#define d___ref(a_1,a_2) d__[(a_2)*4 + a_1 - 5]
#define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]
#define t_ref(a_1,a_2) t[(a_2)*t_dim1 + a_1]
#define x_ref(a_1,a_2) x[(a_2)*2 + a_1 - 3]


    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    --work;

    /* Function Body */
    *info = 0;

/*     Quick return if possible */

    if (*n == 0 || *n1 == 0 || *n2 == 0) {
	return 0;
    }
    if (*j1 + *n1 > *n) {
	return 0;
    }

    j2 = *j1 + 1;
    j3 = *j1 + 2;
    j4 = *j1 + 3;

    if (*n1 == 1 && *n2 == 1) {

/*        Swap two 1-by-1 blocks. */

	t11 = t_ref(*j1, *j1);
	t22 = t_ref(j2, j2);

/*        Determine the transformation to perform the interchange. */

	r__1 = t22 - t11;
	slartg_(&t_ref(*j1, j2), &r__1, &cs, &sn, &temp);

/*        Apply transformation to the matrix T. */

	if (j3 <= *n) {
	    i__1 = *n - *j1 - 1;
	    srot_(&i__1, &t_ref(*j1, j3), ldt, &t_ref(j2, j3), ldt, &cs, &sn);
	}
	i__1 = *j1 - 1;
	srot_(&i__1, &t_ref(1, *j1), &c__1, &t_ref(1, j2), &c__1, &cs, &sn);

	t_ref(*j1, *j1) = t22;
	t_ref(j2, j2) = t11;

	if (*wantq) {

/*           Accumulate transformation in the matrix Q. */

	    srot_(n, &q_ref(1, *j1), &c__1, &q_ref(1, j2), &c__1, &cs, &sn);
	}

    } else {

/*        Swapping involves at least one 2-by-2 block.   

          Copy the diagonal block of order N1+N2 to the local array D   
          and compute its norm. */

	nd = *n1 + *n2;
	slacpy_("Full", &nd, &nd, &t_ref(*j1, *j1), ldt, d__, &c__4);
	dnorm = slange_("Max", &nd, &nd, d__, &c__4, &work[1]);

/*        Compute machine-dependent threshold for test for accepting   
          swap. */

	eps = slamch_("P");
	smlnum = slamch_("S") / eps;
/* Computing MAX */
	r__1 = eps * 10.f * dnorm;
	thresh = dmax(r__1,smlnum);

/*        Solve T11*X - X*T22 = scale*T12 for X. */

	slasy2_(&c_false, &c_false, &c_n1, n1, n2, d__, &c__4, &d___ref(*n1 + 
		1, *n1 + 1), &c__4, &d___ref(1, *n1 + 1), &c__4, &scale, x, &
		c__2, &xnorm, &ierr);

/*        Swap the adjacent diagonal blocks. */

	k = *n1 + *n1 + *n2 - 3;
	switch (k) {
	    case 1:  goto L10;
	    case 2:  goto L20;
	    case 3:  goto L30;
	}

L10:

/*        N1 = 1, N2 = 2: generate elementary reflector H so that:   

          ( scale, X11, X12 ) H = ( 0, 0, * ) */

	u[0] = scale;
	u[1] = x_ref(1, 1);
	u[2] = x_ref(1, 2);
	slarfg_(&c__3, &u[2], u, &c__1, &tau);
	u[2] = 1.f;
	t11 = t_ref(*j1, *j1);

/*        Perform swap provisionally on diagonal block in D. */

	slarfx_("L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]);
	slarfx_("R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]);

/*        Test whether to reject swap.   

   Computing MAX */
	r__4 = (r__1 = d___ref(3, 1), dabs(r__1)), r__5 = (r__2 = d___ref(3, 
		2), dabs(r__2)), r__4 = max(r__4,r__5), r__5 = (r__3 = 
		d___ref(3, 3) - t11, dabs(r__3));
	if (dmax(r__4,r__5) > thresh) {
	    goto L50;
	}

/*        Accept swap: apply transformation to the entire matrix T. */

	i__1 = *n - *j1 + 1;
	slarfx_("L", &c__3, &i__1, u, &tau, &t_ref(*j1, *j1), ldt, &work[1]);
	slarfx_("R", &j2, &c__3, u, &tau, &t_ref(1, *j1), ldt, &work[1]);

	t_ref(j3, *j1) = 0.f;
	t_ref(j3, j2) = 0.f;
	t_ref(j3, j3) = t11;

	if (*wantq) {

/*           Accumulate transformation in the matrix Q. */

	    slarfx_("R", n, &c__3, u, &tau, &q_ref(1, *j1), ldq, &work[1]);
	}
	goto L40;

L20:

/*        N1 = 2, N2 = 1: generate elementary reflector H so that:   

          H (  -X11 ) = ( * )   
            (  -X21 ) = ( 0 )   
            ( scale ) = ( 0 ) */

	u[0] = -x_ref(1, 1);
	u[1] = -x_ref(2, 1);
	u[2] = scale;
	slarfg_(&c__3, u, &u[1], &c__1, &tau);
	u[0] = 1.f;
	t33 = t_ref(j3, j3);

/*        Perform swap provisionally on diagonal block in D. */

	slarfx_("L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]);
	slarfx_("R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]);

/*        Test whether to reject swap.   

   Computing MAX */
	r__4 = (r__1 = d___ref(2, 1), dabs(r__1)), r__5 = (r__2 = d___ref(3, 
		1), dabs(r__2)), r__4 = max(r__4,r__5), r__5 = (r__3 = 
		d___ref(1, 1) - t33, dabs(r__3));
	if (dmax(r__4,r__5) > thresh) {
	    goto L50;
	}

/*        Accept swap: apply transformation to the entire matrix T. */

	slarfx_("R", &j3, &c__3, u, &tau, &t_ref(1, *j1), ldt, &work[1]);
	i__1 = *n - *j1;
	slarfx_("L", &c__3, &i__1, u, &tau, &t_ref(*j1, j2), ldt, &work[1]);

	t_ref(*j1, *j1) = t33;
	t_ref(j2, *j1) = 0.f;
	t_ref(j3, *j1) = 0.f;

	if (*wantq) {

/*           Accumulate transformation in the matrix Q. */

	    slarfx_("R", n, &c__3, u, &tau, &q_ref(1, *j1), ldq, &work[1]);
	}
	goto L40;

L30:

/*        N1 = 2, N2 = 2: generate elementary reflectors H(1) and H(2) so   
          that:   

          H(2) H(1) (  -X11  -X12 ) = (  *  * )   
                    (  -X21  -X22 )   (  0  * )   
                    ( scale    0  )   (  0  0 )   
                    (    0  scale )   (  0  0 ) */

	u1[0] = -x_ref(1, 1);
	u1[1] = -x_ref(2, 1);
	u1[2] = scale;
	slarfg_(&c__3, u1, &u1[1], &c__1, &tau1);
	u1[0] = 1.f;

	temp = -tau1 * (x_ref(1, 2) + u1[1] * x_ref(2, 2));
	u2[0] = -temp * u1[1] - x_ref(2, 2);
	u2[1] = -temp * u1[2];
	u2[2] = scale;
	slarfg_(&c__3, u2, &u2[1], &c__1, &tau2);
	u2[0] = 1.f;

/*        Perform swap provisionally on diagonal block in D. */

	slarfx_("L", &c__3, &c__4, u1, &tau1, d__, &c__4, &work[1])
		;
	slarfx_("R", &c__4, &c__3, u1, &tau1, d__, &c__4, &work[1])
		;
	slarfx_("L", &c__3, &c__4, u2, &tau2, &d___ref(2, 1), &c__4, &work[1]);
	slarfx_("R", &c__4, &c__3, u2, &tau2, &d___ref(1, 2), &c__4, &work[1]);

/*        Test whether to reject swap.   

   Computing MAX */
	r__5 = (r__1 = d___ref(3, 1), dabs(r__1)), r__6 = (r__2 = d___ref(3, 
		2), dabs(r__2)), r__5 = max(r__5,r__6), r__6 = (r__3 = 
		d___ref(4, 1), dabs(r__3)), r__5 = max(r__5,r__6), r__6 = (
		r__4 = d___ref(4, 2), dabs(r__4));
	if (dmax(r__5,r__6) > thresh) {
	    goto L50;
	}

/*        Accept swap: apply transformation to the entire matrix T. */

	i__1 = *n - *j1 + 1;
	slarfx_("L", &c__3, &i__1, u1, &tau1, &t_ref(*j1, *j1), ldt, &work[1]);
	slarfx_("R", &j4, &c__3, u1, &tau1, &t_ref(1, *j1), ldt, &work[1]);
	i__1 = *n - *j1 + 1;
	slarfx_("L", &c__3, &i__1, u2, &tau2, &t_ref(j2, *j1), ldt, &work[1]);
	slarfx_("R", &j4, &c__3, u2, &tau2, &t_ref(1, j2), ldt, &work[1]);

	t_ref(j3, *j1) = 0.f;
	t_ref(j3, j2) = 0.f;
	t_ref(j4, *j1) = 0.f;
	t_ref(j4, j2) = 0.f;

	if (*wantq) {

/*           Accumulate transformation in the matrix Q. */

	    slarfx_("R", n, &c__3, u1, &tau1, &q_ref(1, *j1), ldq, &work[1]);
	    slarfx_("R", n, &c__3, u2, &tau2, &q_ref(1, j2), ldq, &work[1]);
	}

L40:

	if (*n2 == 2) {

/*           Standardize new 2-by-2 block T11 */

	    slanv2_(&t_ref(*j1, *j1), &t_ref(*j1, j2), &t_ref(j2, *j1), &
		    t_ref(j2, j2), &wr1, &wi1, &wr2, &wi2, &cs, &sn);
	    i__1 = *n - *j1 - 1;
	    srot_(&i__1, &t_ref(*j1, *j1 + 2), ldt, &t_ref(j2, *j1 + 2), ldt, 
		    &cs, &sn);
	    i__1 = *j1 - 1;
	    srot_(&i__1, &t_ref(1, *j1), &c__1, &t_ref(1, j2), &c__1, &cs, &
		    sn);
	    if (*wantq) {
		srot_(n, &q_ref(1, *j1), &c__1, &q_ref(1, j2), &c__1, &cs, &
			sn);
	    }
	}

	if (*n1 == 2) {

/*           Standardize new 2-by-2 block T22 */

	    j3 = *j1 + *n2;
	    j4 = j3 + 1;
	    slanv2_(&t_ref(j3, j3), &t_ref(j3, j4), &t_ref(j4, j3), &t_ref(j4,
		     j4), &wr1, &wi1, &wr2, &wi2, &cs, &sn);
	    if (j3 + 2 <= *n) {
		i__1 = *n - j3 - 1;
		srot_(&i__1, &t_ref(j3, j3 + 2), ldt, &t_ref(j4, j3 + 2), ldt,
			 &cs, &sn);
	    }
	    i__1 = j3 - 1;
	    srot_(&i__1, &t_ref(1, j3), &c__1, &t_ref(1, j4), &c__1, &cs, &sn)
		    ;
	    if (*wantq) {
		srot_(n, &q_ref(1, j3), &c__1, &q_ref(1, j4), &c__1, &cs, &sn)
			;
	    }
	}

    }
    return 0;

/*     Exit with INFO = 1 if swap was rejected. */

L50:
    *info = 1;
    return 0;

/*     End of SLAEXC */

} /* slaexc_ */
Пример #4
0
/* Subroutine */ int slals0_(integer *icompq, integer *nl, integer *nr, 
	integer *sqre, integer *nrhs, real *b, integer *ldb, real *bx, 
	integer *ldbx, integer *perm, integer *givptr, integer *givcol, 
	integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real *
	difl, real *difr, real *z__, integer *k, real *c__, real *s, real *
	work, integer *info)
{
    /* System generated locals */
    integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset, 
	    difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, 
	    poles_offset, i__1, i__2;
    real r__1;

    /* Local variables */
    integer i__, j, m, n;
    real dj;
    integer nlp1;
    real temp;
    real diflj, difrj, dsigj;
    real dsigjp;

/*  -- LAPACK routine (version 3.2) -- */
/*     November 2006 */

/*  Purpose */
/*  ======= */

/*  SLALS0 applies back the multiplying factors of either the left or the */
/*  right singular vector matrix of a diagonal matrix appended by a row */
/*  to the right hand side matrix B in solving the least squares problem */
/*  using the divide-and-conquer SVD approach. */

/*  For the left singular vector matrix, three types of orthogonal */
/*  matrices are involved: */

/*  (1L) Givens rotations: the number of such rotations is GIVPTR; the */
/*       pairs of columns/rows they were applied to are stored in GIVCOL; */
/*       and the C- and S-values of these rotations are stored in GIVNUM. */

/*  (2L) Permutation. The (NL+1)-st row of B is to be moved to the first */
/*       row, and for J=2:N, PERM(J)-th row of B is to be moved to the */
/*       J-th row. */

/*  (3L) The left singular vector matrix of the remaining matrix. */

/*  For the right singular vector matrix, four types of orthogonal */
/*  matrices are involved: */

/*  (1R) The right singular vector matrix of the remaining matrix. */

/*  (2R) If SQRE = 1, one extra Givens rotation to generate the right */
/*       null space. */

/*  (3R) The inverse transformation of (2L). */

/*  (4R) The inverse transformation of (1L). */

/*  Arguments */
/*  ========= */

/*  ICOMPQ (input) INTEGER */
/*         Specifies whether singular vectors are to be computed in */
/*         factored form: */
/*         = 0: Left singular vector matrix. */
/*         = 1: Right singular vector matrix. */

/*  NL     (input) INTEGER */
/*         The row dimension of the upper block. NL >= 1. */

/*  NR     (input) INTEGER */
/*         The row dimension of the lower block. NR >= 1. */

/*  SQRE   (input) INTEGER */
/*         = 0: the lower block is an NR-by-NR square matrix. */
/*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */

/*         The bidiagonal matrix has row dimension N = NL + NR + 1, */
/*         and column dimension M = N + SQRE. */

/*  NRHS   (input) INTEGER */
/*         The number of columns of B and BX. NRHS must be at least 1. */

/*  B      (input/output) REAL array, dimension ( LDB, NRHS ) */
/*         On input, B contains the right hand sides of the least */
/*         squares problem in rows 1 through M. On output, B contains */
/*         the solution X in rows 1 through N. */

/*  LDB    (input) INTEGER */
/*         The leading dimension of B. LDB must be at least */
/*         max(1,MAX( M, N ) ). */

/*  BX     (workspace) REAL array, dimension ( LDBX, NRHS ) */

/*  LDBX   (input) INTEGER */
/*         The leading dimension of BX. */

/*  PERM   (input) INTEGER array, dimension ( N ) */
/*         The permutations (from deflation and sorting) applied */
/*         to the two blocks. */

/*  GIVPTR (input) INTEGER */
/*         The number of Givens rotations which took place in this */
/*         subproblem. */

/*  GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) */
/*         Each pair of numbers indicates a pair of rows/columns */
/*         involved in a Givens rotation. */

/*  LDGCOL (input) INTEGER */
/*         The leading dimension of GIVCOL, must be at least N. */

/*  GIVNUM (input) REAL array, dimension ( LDGNUM, 2 ) */
/*         Each number indicates the C or S value used in the */
/*         corresponding Givens rotation. */

/*  LDGNUM (input) INTEGER */
/*         The leading dimension of arrays DIFR, POLES and */
/*         GIVNUM, must be at least K. */

/*  POLES  (input) REAL array, dimension ( LDGNUM, 2 ) */
/*         On entry, POLES(1:K, 1) contains the new singular */
/*         values obtained from solving the secular equation, and */
/*         POLES(1:K, 2) is an array containing the poles in the secular */
/*         equation. */

/*  DIFL   (input) REAL array, dimension ( K ). */
/*         On entry, DIFL(I) is the distance between I-th updated */
/*         (undeflated) singular value and the I-th (undeflated) old */
/*         singular value. */

/*  DIFR   (input) REAL array, dimension ( LDGNUM, 2 ). */
/*         On entry, DIFR(I, 1) contains the distances between I-th */
/*         updated (undeflated) singular value and the I+1-th */
/*         (undeflated) old singular value. And DIFR(I, 2) is the */
/*         normalizing factor for the I-th right singular vector. */

/*  Z      (input) REAL array, dimension ( K ) */
/*         Contain the components of the deflation-adjusted updating row */
/*         vector. */

/*  K      (input) INTEGER */
/*         Contains the dimension of the non-deflated matrix, */
/*         This is the order of the related secular equation. 1 <= K <=N. */

/*  C      (input) REAL */
/*         C contains garbage if SQRE =0 and the C-value of a Givens */
/*         rotation related to the right null space if SQRE = 1. */

/*  S      (input) REAL */
/*         S contains garbage if SQRE =0 and the S-value of a Givens */
/*         rotation related to the right null space if SQRE = 1. */

/*  WORK   (workspace) REAL array, dimension ( K ) */

/*  INFO   (output) INTEGER */
/*          = 0:  successful exit. */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*     Ming Gu and Ren-Cang Li, Computer Science Division, University of */
/*       California at Berkeley, USA */
/*     Osni Marques, LBNL/NERSC, USA */

/*  ===================================================================== */

/*     Test the input parameters. */

    /* Parameter adjustments */
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    bx_dim1 = *ldbx;
    bx_offset = 1 + bx_dim1;
    bx -= bx_offset;
    --perm;
    givcol_dim1 = *ldgcol;
    givcol_offset = 1 + givcol_dim1;
    givcol -= givcol_offset;
    difr_dim1 = *ldgnum;
    difr_offset = 1 + difr_dim1;
    difr -= difr_offset;
    poles_dim1 = *ldgnum;
    poles_offset = 1 + poles_dim1;
    poles -= poles_offset;
    givnum_dim1 = *ldgnum;
    givnum_offset = 1 + givnum_dim1;
    givnum -= givnum_offset;
    --difl;
    --z__;
    --work;

    /* Function Body */
    *info = 0;

    if (*icompq < 0 || *icompq > 1) {
	*info = -1;
    } else if (*nl < 1) {
	*info = -2;
    } else if (*nr < 1) {
	*info = -3;
    } else if (*sqre < 0 || *sqre > 1) {
	*info = -4;
    }

    n = *nl + *nr + 1;

    if (*nrhs < 1) {
	*info = -5;
    } else if (*ldb < n) {
	*info = -7;
    } else if (*ldbx < n) {
	*info = -9;
    } else if (*givptr < 0) {
	*info = -11;
    } else if (*ldgcol < n) {
	*info = -13;
    } else if (*ldgnum < n) {
	*info = -15;
    } else if (*k < 1) {
	*info = -20;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLALS0", &i__1);
	return 0;
    }

    m = n + *sqre;
    nlp1 = *nl + 1;

    if (*icompq == 0) {

/*        Apply back orthogonal transformations from the left. */

/*        Step (1L): apply back the Givens rotations performed. */

	i__1 = *givptr;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    srot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, &
		    b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + 
		    (givnum_dim1 << 1)], &givnum[i__ + givnum_dim1]);
	}

/*        Step (2L): permute rows of B. */

	scopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx);
	i__1 = n;
	for (i__ = 2; i__ <= i__1; ++i__) {
	    scopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1], 
		    ldbx);
	}

/*        Step (3L): apply the inverse of the left singular vector */
/*        matrix to BX. */

	if (*k == 1) {
	    scopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb);
	    if (z__[1] < 0.f) {
		sscal_(nrhs, &c_b5, &b[b_offset], ldb);
	    }
	} else {
	    i__1 = *k;
	    for (j = 1; j <= i__1; ++j) {
		diflj = difl[j];
		dj = poles[j + poles_dim1];
		dsigj = -poles[j + (poles_dim1 << 1)];
		if (j < *k) {
		    difrj = -difr[j + difr_dim1];
		    dsigjp = -poles[j + 1 + (poles_dim1 << 1)];
		}
		if (z__[j] == 0.f || poles[j + (poles_dim1 << 1)] == 0.f) {
		    work[j] = 0.f;
		} else {
		    work[j] = -poles[j + (poles_dim1 << 1)] * z__[j] / diflj /
			     (poles[j + (poles_dim1 << 1)] + dj);
		}
		i__2 = j - 1;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    if (z__[i__] == 0.f || poles[i__ + (poles_dim1 << 1)] == 
			    0.f) {
			work[i__] = 0.f;
		    } else {
			work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] 
				/ (slamc3_(&poles[i__ + (poles_dim1 << 1)], &
				dsigj) - diflj) / (poles[i__ + (poles_dim1 << 
				1)] + dj);
		    }
		}
		i__2 = *k;
		for (i__ = j + 1; i__ <= i__2; ++i__) {
		    if (z__[i__] == 0.f || poles[i__ + (poles_dim1 << 1)] == 
			    0.f) {
			work[i__] = 0.f;
		    } else {
			work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] 
				/ (slamc3_(&poles[i__ + (poles_dim1 << 1)], &
				dsigjp) + difrj) / (poles[i__ + (poles_dim1 <<
				 1)] + dj);
		    }
		}
		work[1] = -1.f;
		temp = snrm2_(k, &work[1], &c__1);
		sgemv_("T", k, nrhs, &c_b11, &bx[bx_offset], ldbx, &work[1], &
			c__1, &c_b13, &b[j + b_dim1], ldb);
		slascl_("G", &c__0, &c__0, &temp, &c_b11, &c__1, nrhs, &b[j + 
			b_dim1], ldb, info);
	    }
	}

/*        Move the deflated rows of BX to B also. */

	if (*k < max(m,n)) {
	    i__1 = n - *k;
	    slacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1 
		    + b_dim1], ldb);
	}
    } else {

/*        Apply back the right orthogonal transformations. */

/*        Step (1R): apply back the new right singular vector matrix */
/*        to B. */

	if (*k == 1) {
	    scopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx);
	} else {
	    i__1 = *k;
	    for (j = 1; j <= i__1; ++j) {
		dsigj = poles[j + (poles_dim1 << 1)];
		if (z__[j] == 0.f) {
		    work[j] = 0.f;
		} else {
		    work[j] = -z__[j] / difl[j] / (dsigj + poles[j + 
			    poles_dim1]) / difr[j + (difr_dim1 << 1)];
		}
		i__2 = j - 1;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    if (z__[j] == 0.f) {
			work[i__] = 0.f;
		    } else {
			r__1 = -poles[i__ + 1 + (poles_dim1 << 1)];
			work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difr[
				i__ + difr_dim1]) / (dsigj + poles[i__ + 
				poles_dim1]) / difr[i__ + (difr_dim1 << 1)];
		    }
		}
		i__2 = *k;
		for (i__ = j + 1; i__ <= i__2; ++i__) {
		    if (z__[j] == 0.f) {
			work[i__] = 0.f;
		    } else {
			r__1 = -poles[i__ + (poles_dim1 << 1)];
			work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difl[
				i__]) / (dsigj + poles[i__ + poles_dim1]) / 
				difr[i__ + (difr_dim1 << 1)];
		    }
		}
		sgemv_("T", k, nrhs, &c_b11, &b[b_offset], ldb, &work[1], &
			c__1, &c_b13, &bx[j + bx_dim1], ldbx);
	    }
	}

/*        Step (2R): if SQRE = 1, apply back the rotation that is */
/*        related to the right null space of the subproblem. */

	if (*sqre == 1) {
	    scopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx);
	    srot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__, 
		    s);
	}
	if (*k < max(m,n)) {
	    i__1 = n - *k;
	    slacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 + 
		    bx_dim1], ldbx);
	}

/*        Step (3R): permute rows of B. */

	scopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb);
	if (*sqre == 1) {
	    scopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb);
	}
	i__1 = n;
	for (i__ = 2; i__ <= i__1; ++i__) {
	    scopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1], 
		    ldb);
	}

/*        Step (4R): apply back the Givens rotations performed. */

	for (i__ = *givptr; i__ >= 1; --i__) {
	    r__1 = -givnum[i__ + givnum_dim1];
	    srot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, &
		    b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + 
		    (givnum_dim1 << 1)], &r__1);
	}
    }

    return 0;

/*     End of SLALS0 */

} /* slals0_ */
Пример #5
0
void srot(int N, float *x, int incx, float *y, int incy, float c, float s)
{
    srot_(&N, x, &incx, y, &incy, &c, &s);
}
Пример #6
0
/* Subroutine */ int slaed2_(integer *k, integer *n, integer *n1, real *d__,
                             real *q, integer *ldq, integer *indxq, real *rho, real *z__, real *
                             dlamda, real *w, real *q2, integer *indx, integer *indxc, integer *
                             indxp, integer *coltyp, integer *info)
{
    /* System generated locals */
    integer q_dim1, q_offset, i__1, i__2;
    real r__1, r__2, r__3, r__4;

    /* Builtin functions */
    double sqrt(doublereal);

    /* Local variables */
    real c__;
    integer i__, j;
    real s, t;
    integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1;
    real eps, tau, tol;
    integer psm[4], imax, jmax, ctot[4];
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
                                      integer *, real *, real *), sscal_(integer *, real *, real *,
                                              integer *), scopy_(integer *, real *, integer *, real *, integer *
                                                                );
    extern doublereal slapy2_(real *, real *), slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer isamax_(integer *, real *, integer *);
    extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer
                                        *, integer *, integer *), slacpy_(char *, integer *, integer *,
                                                real *, integer *, real *, integer *);


    /*  -- LAPACK routine (version 3.2) -- */
    /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
    /*     November 2006 */

    /*     .. Scalar Arguments .. */
    /*     .. */
    /*     .. Array Arguments .. */
    /*     .. */

    /*  Purpose */
    /*  ======= */

    /*  SLAED2 merges the two sets of eigenvalues together into a single */
    /*  sorted set.  Then it tries to deflate the size of the problem. */
    /*  There are two ways in which deflation can occur:  when two or more */
    /*  eigenvalues are close together or if there is a tiny entry in the */
    /*  Z vector.  For each such occurrence the order of the related secular */
    /*  equation problem is reduced by one. */

    /*  Arguments */
    /*  ========= */

    /*  K      (output) INTEGER */
    /*         The number of non-deflated eigenvalues, and the order of the */
    /*         related secular equation. 0 <= K <=N. */

    /*  N      (input) INTEGER */
    /*         The dimension of the symmetric tridiagonal matrix.  N >= 0. */

    /*  N1     (input) INTEGER */
    /*         The location of the last eigenvalue in the leading sub-matrix. */
    /*         min(1,N) <= N1 <= N/2. */

    /*  D      (input/output) REAL array, dimension (N) */
    /*         On entry, D contains the eigenvalues of the two submatrices to */
    /*         be combined. */
    /*         On exit, D contains the trailing (N-K) updated eigenvalues */
    /*         (those which were deflated) sorted into increasing order. */

    /*  Q      (input/output) REAL array, dimension (LDQ, N) */
    /*         On entry, Q contains the eigenvectors of two submatrices in */
    /*         the two square blocks with corners at (1,1), (N1,N1) */
    /*         and (N1+1, N1+1), (N,N). */
    /*         On exit, Q contains the trailing (N-K) updated eigenvectors */
    /*         (those which were deflated) in its last N-K columns. */

    /*  LDQ    (input) INTEGER */
    /*         The leading dimension of the array Q.  LDQ >= max(1,N). */

    /*  INDXQ  (input/output) INTEGER array, dimension (N) */
    /*         The permutation which separately sorts the two sub-problems */
    /*         in D into ascending order.  Note that elements in the second */
    /*         half of this permutation must first have N1 added to their */
    /*         values. Destroyed on exit. */

    /*  RHO    (input/output) REAL */
    /*         On entry, the off-diagonal element associated with the rank-1 */
    /*         cut which originally split the two submatrices which are now */
    /*         being recombined. */
    /*         On exit, RHO has been modified to the value required by */
    /*         SLAED3. */

    /*  Z      (input) REAL array, dimension (N) */
    /*         On entry, Z contains the updating vector (the last */
    /*         row of the first sub-eigenvector matrix and the first row of */
    /*         the second sub-eigenvector matrix). */
    /*         On exit, the contents of Z have been destroyed by the updating */
    /*         process. */

    /*  DLAMDA (output) REAL array, dimension (N) */
    /*         A copy of the first K eigenvalues which will be used by */
    /*         SLAED3 to form the secular equation. */

    /*  W      (output) REAL array, dimension (N) */
    /*         The first k values of the final deflation-altered z-vector */
    /*         which will be passed to SLAED3. */

    /*  Q2     (output) REAL array, dimension (N1**2+(N-N1)**2) */
    /*         A copy of the first K eigenvectors which will be used by */
    /*         SLAED3 in a matrix multiply (SGEMM) to solve for the new */
    /*         eigenvectors. */

    /*  INDX   (workspace) INTEGER array, dimension (N) */
    /*         The permutation used to sort the contents of DLAMDA into */
    /*         ascending order. */

    /*  INDXC  (output) INTEGER array, dimension (N) */
    /*         The permutation used to arrange the columns of the deflated */
    /*         Q matrix into three groups:  the first group contains non-zero */
    /*         elements only at and above N1, the second contains */
    /*         non-zero elements only below N1, and the third is dense. */

    /*  INDXP  (workspace) INTEGER array, dimension (N) */
    /*         The permutation used to place deflated values of D at the end */
    /*         of the array.  INDXP(1:K) points to the nondeflated D-values */
    /*         and INDXP(K+1:N) points to the deflated eigenvalues. */

    /*  COLTYP (workspace/output) INTEGER array, dimension (N) */
    /*         During execution, a label which will indicate which of the */
    /*         following types a column in the Q2 matrix is: */
    /*         1 : non-zero in the upper half only; */
    /*         2 : dense; */
    /*         3 : non-zero in the lower half only; */
    /*         4 : deflated. */
    /*         On exit, COLTYP(i) is the number of columns of type i, */
    /*         for i=1 to 4 only. */

    /*  INFO   (output) INTEGER */
    /*          = 0:  successful exit. */
    /*          < 0:  if INFO = -i, the i-th argument had an illegal value. */

    /*  Further Details */
    /*  =============== */

    /*  Based on contributions by */
    /*     Jeff Rutter, Computer Science Division, University of California */
    /*     at Berkeley, USA */
    /*  Modified by Francoise Tisseur, University of Tennessee. */

    /*  ===================================================================== */

    /*     .. Parameters .. */
    /*     .. */
    /*     .. Local Arrays .. */
    /*     .. */
    /*     .. Local Scalars .. */
    /*     .. */
    /*     .. External Functions .. */
    /*     .. */
    /*     .. External Subroutines .. */
    /*     .. */
    /*     .. Intrinsic Functions .. */
    /*     .. */
    /*     .. Executable Statements .. */

    /*     Test the input parameters. */

    /* Parameter adjustments */
    --d__;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    --indxq;
    --z__;
    --dlamda;
    --w;
    --q2;
    --indx;
    --indxc;
    --indxp;
    --coltyp;

    /* Function Body */
    *info = 0;

    if (*n < 0) {
        *info = -2;
    } else if (*ldq < max(1,*n)) {
        *info = -6;
    } else { /* if(complicated condition) */
        /* Computing MIN */
        i__1 = 1, i__2 = *n / 2;
        if (min(i__1,i__2) > *n1 || *n / 2 < *n1) {
            *info = -3;
        }
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_("SLAED2", &i__1);
        return 0;
    }

    /*     Quick return if possible */

    if (*n == 0) {
        return 0;
    }

    n2 = *n - *n1;
    n1p1 = *n1 + 1;

    if (*rho < 0.f) {
        sscal_(&n2, &c_b3, &z__[n1p1], &c__1);
    }

    /*     Normalize z so that norm(z) = 1.  Since z is the concatenation of */
    /*     two normalized vectors, norm2(z) = sqrt(2). */

    t = 1.f / sqrt(2.f);
    sscal_(n, &t, &z__[1], &c__1);

    /*     RHO = ABS( norm(z)**2 * RHO ) */

    *rho = (r__1 = *rho * 2.f, dabs(r__1));

    /*     Sort the eigenvalues into increasing order */

    i__1 = *n;
    for (i__ = n1p1; i__ <= i__1; ++i__) {
        indxq[i__] += *n1;
        /* L10: */
    }

    /*     re-integrate the deflated parts from the last pass */

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
        dlamda[i__] = d__[indxq[i__]];
        /* L20: */
    }
    slamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]);
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
        indx[i__] = indxq[indxc[i__]];
        /* L30: */
    }

    /*     Calculate the allowable deflation tolerance */

    imax = isamax_(n, &z__[1], &c__1);
    jmax = isamax_(n, &d__[1], &c__1);
    eps = slamch_("Epsilon");
    /* Computing MAX */
    r__3 = (r__1 = d__[jmax], dabs(r__1)), r__4 = (r__2 = z__[imax], dabs(
                r__2));
    tol = eps * 8.f * dmax(r__3,r__4);

    /*     If the rank-1 modifier is small enough, no more needs to be done */
    /*     except to reorganize Q so that its columns correspond with the */
    /*     elements in D. */

    if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) {
        *k = 0;
        iq2 = 1;
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            i__ = indx[j];
            scopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
            dlamda[j] = d__[i__];
            iq2 += *n;
            /* L40: */
        }
        slacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq);
        scopy_(n, &dlamda[1], &c__1, &d__[1], &c__1);
        goto L190;
    }

    /*     If there are multiple eigenvalues then the problem deflates.  Here */
    /*     the number of equal eigenvalues are found.  As each equal */
    /*     eigenvalue is found, an elementary reflector is computed to rotate */
    /*     the corresponding eigensubspace so that the corresponding */
    /*     components of Z are zero in this new basis. */

    i__1 = *n1;
    for (i__ = 1; i__ <= i__1; ++i__) {
        coltyp[i__] = 1;
        /* L50: */
    }
    i__1 = *n;
    for (i__ = n1p1; i__ <= i__1; ++i__) {
        coltyp[i__] = 3;
        /* L60: */
    }


    *k = 0;
    k2 = *n + 1;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
        nj = indx[j];
        if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) {

            /*           Deflate due to small z component. */

            --k2;
            coltyp[nj] = 4;
            indxp[k2] = nj;
            if (j == *n) {
                goto L100;
            }
        } else {
            pj = nj;
            goto L80;
        }
        /* L70: */
    }
L80:
    ++j;
    nj = indx[j];
    if (j > *n) {
        goto L100;
    }
    if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) {

        /*        Deflate due to small z component. */

        --k2;
        coltyp[nj] = 4;
        indxp[k2] = nj;
    } else {

        /*        Check if eigenvalues are close enough to allow deflation. */

        s = z__[pj];
        c__ = z__[nj];

        /*        Find sqrt(a**2+b**2) without overflow or */
        /*        destructive underflow. */

        tau = slapy2_(&c__, &s);
        t = d__[nj] - d__[pj];
        c__ /= tau;
        s = -s / tau;
        if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) {

            /*           Deflation is possible. */

            z__[nj] = tau;
            z__[pj] = 0.f;
            if (coltyp[nj] != coltyp[pj]) {
                coltyp[nj] = 2;
            }
            coltyp[pj] = 4;
            srot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, &
                  c__, &s);
            /* Computing 2nd power */
            r__1 = c__;
            /* Computing 2nd power */
            r__2 = s;
            t = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2);
            /* Computing 2nd power */
            r__1 = s;
            /* Computing 2nd power */
            r__2 = c__;
            d__[nj] = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2);
            d__[pj] = t;
            --k2;
            i__ = 1;
L90:
            if (k2 + i__ <= *n) {
                if (d__[pj] < d__[indxp[k2 + i__]]) {
                    indxp[k2 + i__ - 1] = indxp[k2 + i__];
                    indxp[k2 + i__] = pj;
                    ++i__;
                    goto L90;
                } else {
                    indxp[k2 + i__ - 1] = pj;
                }
            } else {
                indxp[k2 + i__ - 1] = pj;
            }
            pj = nj;
        } else {
            ++(*k);
            dlamda[*k] = d__[pj];
            w[*k] = z__[pj];
            indxp[*k] = pj;
            pj = nj;
        }
    }
    goto L80;
L100:

    /*     Record the last eigenvalue. */

    ++(*k);
    dlamda[*k] = d__[pj];
    w[*k] = z__[pj];
    indxp[*k] = pj;

    /*     Count up the total number of the various types of columns, then */
    /*     form a permutation which positions the four column types into */
    /*     four uniform groups (although one or more of these groups may be */
    /*     empty). */

    for (j = 1; j <= 4; ++j) {
        ctot[j - 1] = 0;
        /* L110: */
    }
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
        ct = coltyp[j];
        ++ctot[ct - 1];
        /* L120: */
    }

    /*     PSM(*) = Position in SubMatrix (of types 1 through 4) */

    psm[0] = 1;
    psm[1] = ctot[0] + 1;
    psm[2] = psm[1] + ctot[1];
    psm[3] = psm[2] + ctot[2];
    *k = *n - ctot[3];

    /*     Fill out the INDXC array so that the permutation which it induces */
    /*     will place all type-1 columns first, all type-2 columns next, */
    /*     then all type-3's, and finally all type-4's. */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
        js = indxp[j];
        ct = coltyp[js];
        indx[psm[ct - 1]] = js;
        indxc[psm[ct - 1]] = j;
        ++psm[ct - 1];
        /* L130: */
    }

    /*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA */
    /*     and Q2 respectively.  The eigenvalues/vectors which were not */
    /*     deflated go into the first K slots of DLAMDA and Q2 respectively, */
    /*     while those which were deflated go into the last N - K slots. */

    i__ = 1;
    iq1 = 1;
    iq2 = (ctot[0] + ctot[1]) * *n1 + 1;
    i__1 = ctot[0];
    for (j = 1; j <= i__1; ++j) {
        js = indx[i__];
        scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
        z__[i__] = d__[js];
        ++i__;
        iq1 += *n1;
        /* L140: */
    }

    i__1 = ctot[1];
    for (j = 1; j <= i__1; ++j) {
        js = indx[i__];
        scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
        scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
        z__[i__] = d__[js];
        ++i__;
        iq1 += *n1;
        iq2 += n2;
        /* L150: */
    }

    i__1 = ctot[2];
    for (j = 1; j <= i__1; ++j) {
        js = indx[i__];
        scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
        z__[i__] = d__[js];
        ++i__;
        iq2 += n2;
        /* L160: */
    }

    iq1 = iq2;
    i__1 = ctot[3];
    for (j = 1; j <= i__1; ++j) {
        js = indx[i__];
        scopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
        iq2 += *n;
        z__[i__] = d__[js];
        ++i__;
        /* L170: */
    }

    /*     The deflated eigenvalues and their corresponding vectors go back */
    /*     into the last N - K slots of D and Q respectively. */

    slacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq);
    i__1 = *n - *k;
    scopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1);

    /*     Copy CTOT into COLTYP for referencing in SLAED3. */

    for (j = 1; j <= 4; ++j) {
        coltyp[j] = ctot[j - 1];
        /* L180: */
    }

L190:
    return 0;

    /*     End of SLAED2 */

} /* slaed2_ */
Пример #7
0
GURLS_EXPORT void rot(int *N, float *X, int *incX, float *Y, int *incY, float *c, float *s)
{
srot_(N, X, incX, Y, incY, c, s);
}
Пример #8
0
/* Subroutine */ int ssbtrd_(char *vect, char *uplo, integer *n, integer *kd, 
	real *ab, integer *ldab, real *d__, real *e, real *q, integer *ldq, 
	real *work, integer *info)
{
    /* System generated locals */
    integer ab_dim1, ab_offset, q_dim1, q_offset, i__1, i__2, i__3, i__4, 
	    i__5;

    /* Local variables */
    integer i__, j, k, l, i2, j1, j2, nq, nr, kd1, ibl, iqb, kdn, jin, nrt, 
	    kdm1, inca, jend, lend, jinc, incx, last;
    real temp;
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *);
    integer j1end, j1inc, iqend;
    extern logical lsame_(char *, char *);
    logical initq, wantq, upper;
    extern /* Subroutine */ int slar2v_(integer *, real *, real *, real *, 
	    integer *, real *, real *, integer *);
    integer iqaend;
    extern /* Subroutine */ int xerbla_(char *, integer *), slaset_(
	    char *, integer *, integer *, real *, real *, real *, integer *), slartg_(real *, real *, real *, real *, real *), slargv_(
	    integer *, real *, integer *, real *, integer *, real *, integer *
), slartv_(integer *, real *, integer *, real *, integer *, real *
, real *, integer *);


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SSBTRD reduces a real symmetric band matrix A to symmetric */
/*  tridiagonal form T by an orthogonal similarity transformation: */
/*  Q**T * A * Q = T. */

/*  Arguments */
/*  ========= */

/*  VECT    (input) CHARACTER*1 */
/*          = 'N':  do not form Q; */
/*          = 'V':  form Q; */
/*          = 'U':  update a matrix X, by forming X*Q. */

/*  UPLO    (input) CHARACTER*1 */
/*          = 'U':  Upper triangle of A is stored; */
/*          = 'L':  Lower triangle of A is stored. */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  KD      (input) INTEGER */
/*          The number of superdiagonals of the matrix A if UPLO = 'U', */
/*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0. */

/*  AB      (input/output) REAL array, dimension (LDAB,N) */
/*          On entry, the upper or lower triangle of the symmetric band */
/*          matrix A, stored in the first KD+1 rows of the array.  The */
/*          j-th column of A is stored in the j-th column of the array AB */
/*          as follows: */
/*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */
/*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd). */
/*          On exit, the diagonal elements of AB are overwritten by the */
/*          diagonal elements of the tridiagonal matrix T; if KD > 0, the */
/*          elements on the first superdiagonal (if UPLO = 'U') or the */
/*          first subdiagonal (if UPLO = 'L') are overwritten by the */
/*          off-diagonal elements of T; the rest of AB is overwritten by */
/*          values generated during the reduction. */

/*  LDAB    (input) INTEGER */
/*          The leading dimension of the array AB.  LDAB >= KD+1. */

/*  D       (output) REAL array, dimension (N) */
/*          The diagonal elements of the tridiagonal matrix T. */

/*  E       (output) REAL array, dimension (N-1) */
/*          The off-diagonal elements of the tridiagonal matrix T: */
/*          E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. */

/*  Q       (input/output) REAL array, dimension (LDQ,N) */
/*          On entry, if VECT = 'U', then Q must contain an N-by-N */
/*          matrix X; if VECT = 'N' or 'V', then Q need not be set. */

/*          On exit: */
/*          if VECT = 'V', Q contains the N-by-N orthogonal matrix Q; */
/*          if VECT = 'U', Q contains the product X*Q; */
/*          if VECT = 'N', the array Q is not referenced. */

/*  LDQ     (input) INTEGER */
/*          The leading dimension of the array Q. */
/*          LDQ >= 1, and LDQ >= N if VECT = 'V' or 'U'. */

/*  WORK    (workspace) REAL array, dimension (N) */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */

/*  Further Details */
/*  =============== */

/*  Modified by Linda Kaufman, Bell Labs. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1;
    ab -= ab_offset;
    --d__;
    --e;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    --work;

    /* Function Body */
    initq = lsame_(vect, "V");
    wantq = initq || lsame_(vect, "U");
    upper = lsame_(uplo, "U");
    kd1 = *kd + 1;
    kdm1 = *kd - 1;
    incx = *ldab - 1;
    iqend = 1;

    *info = 0;
    if (! wantq && ! lsame_(vect, "N")) {
	*info = -1;
    } else if (! upper && ! lsame_(uplo, "L")) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*kd < 0) {
	*info = -4;
    } else if (*ldab < kd1) {
	*info = -6;
    } else if (*ldq < max(1,*n) && wantq) {
	*info = -10;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SSBTRD", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Initialize Q to the unit matrix, if needed */

    if (initq) {
	slaset_("Full", n, n, &c_b9, &c_b10, &q[q_offset], ldq);
    }

/*     Wherever possible, plane rotations are generated and applied in */
/*     vector operations of length NR over the index set J1:J2:KD1. */

/*     The cosines and sines of the plane rotations are stored in the */
/*     arrays D and WORK. */

    inca = kd1 * *ldab;
/* Computing MIN */
    i__1 = *n - 1;
    kdn = min(i__1,*kd);
    if (upper) {

	if (*kd > 1) {

/*           Reduce to tridiagonal form, working with upper triangle */

	    nr = 0;
	    j1 = kdn + 2;
	    j2 = 1;

	    i__1 = *n - 2;
	    for (i__ = 1; i__ <= i__1; ++i__) {

/*              Reduce i-th row of matrix to tridiagonal form */

		for (k = kdn + 1; k >= 2; --k) {
		    j1 += kdn;
		    j2 += kdn;

		    if (nr > 0) {

/*                    generate plane rotations to annihilate nonzero */
/*                    elements which have been created outside the band */

			slargv_(&nr, &ab[(j1 - 1) * ab_dim1 + 1], &inca, &
				work[j1], &kd1, &d__[j1], &kd1);

/*                    apply rotations from the right */


/*                    Dependent on the the number of diagonals either */
/*                    SLARTV or SROT is used */

			if (nr >= (*kd << 1) - 1) {
			    i__2 = *kd - 1;
			    for (l = 1; l <= i__2; ++l) {
				slartv_(&nr, &ab[l + 1 + (j1 - 1) * ab_dim1], 
					&inca, &ab[l + j1 * ab_dim1], &inca, &
					d__[j1], &work[j1], &kd1);
/* L10: */
			    }

			} else {
			    jend = j1 + (nr - 1) * kd1;
			    i__2 = jend;
			    i__3 = kd1;
			    for (jinc = j1; i__3 < 0 ? jinc >= i__2 : jinc <= 
				    i__2; jinc += i__3) {
				srot_(&kdm1, &ab[(jinc - 1) * ab_dim1 + 2], &
					c__1, &ab[jinc * ab_dim1 + 1], &c__1, 
					&d__[jinc], &work[jinc]);
/* L20: */
			    }
			}
		    }


		    if (k > 2) {
			if (k <= *n - i__ + 1) {

/*                       generate plane rotation to annihilate a(i,i+k-1) */
/*                       within the band */

			    slartg_(&ab[*kd - k + 3 + (i__ + k - 2) * ab_dim1]
, &ab[*kd - k + 2 + (i__ + k - 1) * 
				    ab_dim1], &d__[i__ + k - 1], &work[i__ + 
				    k - 1], &temp);
			    ab[*kd - k + 3 + (i__ + k - 2) * ab_dim1] = temp;

/*                       apply rotation from the right */

			    i__3 = k - 3;
			    srot_(&i__3, &ab[*kd - k + 4 + (i__ + k - 2) * 
				    ab_dim1], &c__1, &ab[*kd - k + 3 + (i__ + 
				    k - 1) * ab_dim1], &c__1, &d__[i__ + k - 
				    1], &work[i__ + k - 1]);
			}
			++nr;
			j1 = j1 - kdn - 1;
		    }

/*                 apply plane rotations from both sides to diagonal */
/*                 blocks */

		    if (nr > 0) {
			slar2v_(&nr, &ab[kd1 + (j1 - 1) * ab_dim1], &ab[kd1 + 
				j1 * ab_dim1], &ab[*kd + j1 * ab_dim1], &inca, 
				 &d__[j1], &work[j1], &kd1);
		    }

/*                 apply plane rotations from the left */

		    if (nr > 0) {
			if ((*kd << 1) - 1 < nr) {

/*                    Dependent on the the number of diagonals either */
/*                    SLARTV or SROT is used */

			    i__3 = *kd - 1;
			    for (l = 1; l <= i__3; ++l) {
				if (j2 + l > *n) {
				    nrt = nr - 1;
				} else {
				    nrt = nr;
				}
				if (nrt > 0) {
				    slartv_(&nrt, &ab[*kd - l + (j1 + l) * 
					    ab_dim1], &inca, &ab[*kd - l + 1 
					    + (j1 + l) * ab_dim1], &inca, &
					    d__[j1], &work[j1], &kd1);
				}
/* L30: */
			    }
			} else {
			    j1end = j1 + kd1 * (nr - 2);
			    if (j1end >= j1) {
				i__3 = j1end;
				i__2 = kd1;
				for (jin = j1; i__2 < 0 ? jin >= i__3 : jin <=
					 i__3; jin += i__2) {
				    i__4 = *kd - 1;
				    srot_(&i__4, &ab[*kd - 1 + (jin + 1) * 
					    ab_dim1], &incx, &ab[*kd + (jin + 
					    1) * ab_dim1], &incx, &d__[jin], &
					    work[jin]);
/* L40: */
				}
			    }
/* Computing MIN */
			    i__2 = kdm1, i__3 = *n - j2;
			    lend = min(i__2,i__3);
			    last = j1end + kd1;
			    if (lend > 0) {
				srot_(&lend, &ab[*kd - 1 + (last + 1) * 
					ab_dim1], &incx, &ab[*kd + (last + 1) 
					* ab_dim1], &incx, &d__[last], &work[
					last]);
			    }
			}
		    }

		    if (wantq) {

/*                    accumulate product of plane rotations in Q */

			if (initq) {

/*                 take advantage of the fact that Q was */
/*                 initially the Identity matrix */

			    iqend = max(iqend,j2);
/* Computing MAX */
			    i__2 = 0, i__3 = k - 3;
			    i2 = max(i__2,i__3);
			    iqaend = i__ * *kd + 1;
			    if (k == 2) {
				iqaend += *kd;
			    }
			    iqaend = min(iqaend,iqend);
			    i__2 = j2;
			    i__3 = kd1;
			    for (j = j1; i__3 < 0 ? j >= i__2 : j <= i__2; j 
				    += i__3) {
				ibl = i__ - i2 / kdm1;
				++i2;
/* Computing MAX */
				i__4 = 1, i__5 = j - ibl;
				iqb = max(i__4,i__5);
				nq = iqaend + 1 - iqb;
/* Computing MIN */
				i__4 = iqaend + *kd;
				iqaend = min(i__4,iqend);
				srot_(&nq, &q[iqb + (j - 1) * q_dim1], &c__1, 
					&q[iqb + j * q_dim1], &c__1, &d__[j], 
					&work[j]);
/* L50: */
			    }
			} else {

			    i__3 = j2;
			    i__2 = kd1;
			    for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j 
				    += i__2) {
				srot_(n, &q[(j - 1) * q_dim1 + 1], &c__1, &q[
					j * q_dim1 + 1], &c__1, &d__[j], &
					work[j]);
/* L60: */
			    }
			}

		    }

		    if (j2 + kdn > *n) {

/*                    adjust J2 to keep within the bounds of the matrix */

			--nr;
			j2 = j2 - kdn - 1;
		    }

		    i__2 = j2;
		    i__3 = kd1;
		    for (j = j1; i__3 < 0 ? j >= i__2 : j <= i__2; j += i__3) 
			    {

/*                    create nonzero element a(j-1,j+kd) outside the band */
/*                    and store it in WORK */

			work[j + *kd] = work[j] * ab[(j + *kd) * ab_dim1 + 1];
			ab[(j + *kd) * ab_dim1 + 1] = d__[j] * ab[(j + *kd) * 
				ab_dim1 + 1];
/* L70: */
		    }
/* L80: */
		}
/* L90: */
	    }
	}

	if (*kd > 0) {

/*           copy off-diagonal elements to E */

	    i__1 = *n - 1;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		e[i__] = ab[*kd + (i__ + 1) * ab_dim1];
/* L100: */
	    }
	} else {

/*           set E to zero if original matrix was diagonal */

	    i__1 = *n - 1;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		e[i__] = 0.f;
/* L110: */
	    }
	}

/*        copy diagonal elements to D */

	i__1 = *n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    d__[i__] = ab[kd1 + i__ * ab_dim1];
/* L120: */
	}

    } else {

	if (*kd > 1) {

/*           Reduce to tridiagonal form, working with lower triangle */

	    nr = 0;
	    j1 = kdn + 2;
	    j2 = 1;

	    i__1 = *n - 2;
	    for (i__ = 1; i__ <= i__1; ++i__) {

/*              Reduce i-th column of matrix to tridiagonal form */

		for (k = kdn + 1; k >= 2; --k) {
		    j1 += kdn;
		    j2 += kdn;

		    if (nr > 0) {

/*                    generate plane rotations to annihilate nonzero */
/*                    elements which have been created outside the band */

			slargv_(&nr, &ab[kd1 + (j1 - kd1) * ab_dim1], &inca, &
				work[j1], &kd1, &d__[j1], &kd1);

/*                    apply plane rotations from one side */


/*                    Dependent on the the number of diagonals either */
/*                    SLARTV or SROT is used */

			if (nr > (*kd << 1) - 1) {
			    i__3 = *kd - 1;
			    for (l = 1; l <= i__3; ++l) {
				slartv_(&nr, &ab[kd1 - l + (j1 - kd1 + l) * 
					ab_dim1], &inca, &ab[kd1 - l + 1 + (
					j1 - kd1 + l) * ab_dim1], &inca, &d__[
					j1], &work[j1], &kd1);
/* L130: */
			    }
			} else {
			    jend = j1 + kd1 * (nr - 1);
			    i__3 = jend;
			    i__2 = kd1;
			    for (jinc = j1; i__2 < 0 ? jinc >= i__3 : jinc <= 
				    i__3; jinc += i__2) {
				srot_(&kdm1, &ab[*kd + (jinc - *kd) * ab_dim1]
, &incx, &ab[kd1 + (jinc - *kd) * 
					ab_dim1], &incx, &d__[jinc], &work[
					jinc]);
/* L140: */
			    }
			}

		    }

		    if (k > 2) {
			if (k <= *n - i__ + 1) {

/*                       generate plane rotation to annihilate a(i+k-1,i) */
/*                       within the band */

			    slartg_(&ab[k - 1 + i__ * ab_dim1], &ab[k + i__ * 
				    ab_dim1], &d__[i__ + k - 1], &work[i__ + 
				    k - 1], &temp);
			    ab[k - 1 + i__ * ab_dim1] = temp;

/*                       apply rotation from the left */

			    i__2 = k - 3;
			    i__3 = *ldab - 1;
			    i__4 = *ldab - 1;
			    srot_(&i__2, &ab[k - 2 + (i__ + 1) * ab_dim1], &
				    i__3, &ab[k - 1 + (i__ + 1) * ab_dim1], &
				    i__4, &d__[i__ + k - 1], &work[i__ + k - 
				    1]);
			}
			++nr;
			j1 = j1 - kdn - 1;
		    }

/*                 apply plane rotations from both sides to diagonal */
/*                 blocks */

		    if (nr > 0) {
			slar2v_(&nr, &ab[(j1 - 1) * ab_dim1 + 1], &ab[j1 * 
				ab_dim1 + 1], &ab[(j1 - 1) * ab_dim1 + 2], &
				inca, &d__[j1], &work[j1], &kd1);
		    }

/*                 apply plane rotations from the right */


/*                    Dependent on the the number of diagonals either */
/*                    SLARTV or SROT is used */

		    if (nr > 0) {
			if (nr > (*kd << 1) - 1) {
			    i__2 = *kd - 1;
			    for (l = 1; l <= i__2; ++l) {
				if (j2 + l > *n) {
				    nrt = nr - 1;
				} else {
				    nrt = nr;
				}
				if (nrt > 0) {
				    slartv_(&nrt, &ab[l + 2 + (j1 - 1) * 
					    ab_dim1], &inca, &ab[l + 1 + j1 * 
					    ab_dim1], &inca, &d__[j1], &work[
					    j1], &kd1);
				}
/* L150: */
			    }
			} else {
			    j1end = j1 + kd1 * (nr - 2);
			    if (j1end >= j1) {
				i__2 = j1end;
				i__3 = kd1;
				for (j1inc = j1; i__3 < 0 ? j1inc >= i__2 : 
					j1inc <= i__2; j1inc += i__3) {
				    srot_(&kdm1, &ab[(j1inc - 1) * ab_dim1 + 
					    3], &c__1, &ab[j1inc * ab_dim1 + 
					    2], &c__1, &d__[j1inc], &work[
					    j1inc]);
/* L160: */
				}
			    }
/* Computing MIN */
			    i__3 = kdm1, i__2 = *n - j2;
			    lend = min(i__3,i__2);
			    last = j1end + kd1;
			    if (lend > 0) {
				srot_(&lend, &ab[(last - 1) * ab_dim1 + 3], &
					c__1, &ab[last * ab_dim1 + 2], &c__1, 
					&d__[last], &work[last]);
			    }
			}
		    }



		    if (wantq) {

/*                    accumulate product of plane rotations in Q */

			if (initq) {

/*                 take advantage of the fact that Q was */
/*                 initially the Identity matrix */

			    iqend = max(iqend,j2);
/* Computing MAX */
			    i__3 = 0, i__2 = k - 3;
			    i2 = max(i__3,i__2);
			    iqaend = i__ * *kd + 1;
			    if (k == 2) {
				iqaend += *kd;
			    }
			    iqaend = min(iqaend,iqend);
			    i__3 = j2;
			    i__2 = kd1;
			    for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j 
				    += i__2) {
				ibl = i__ - i2 / kdm1;
				++i2;
/* Computing MAX */
				i__4 = 1, i__5 = j - ibl;
				iqb = max(i__4,i__5);
				nq = iqaend + 1 - iqb;
/* Computing MIN */
				i__4 = iqaend + *kd;
				iqaend = min(i__4,iqend);
				srot_(&nq, &q[iqb + (j - 1) * q_dim1], &c__1, 
					&q[iqb + j * q_dim1], &c__1, &d__[j], 
					&work[j]);
/* L170: */
			    }
			} else {

			    i__2 = j2;
			    i__3 = kd1;
			    for (j = j1; i__3 < 0 ? j >= i__2 : j <= i__2; j 
				    += i__3) {
				srot_(n, &q[(j - 1) * q_dim1 + 1], &c__1, &q[
					j * q_dim1 + 1], &c__1, &d__[j], &
					work[j]);
/* L180: */
			    }
			}
		    }

		    if (j2 + kdn > *n) {

/*                    adjust J2 to keep within the bounds of the matrix */

			--nr;
			j2 = j2 - kdn - 1;
		    }

		    i__3 = j2;
		    i__2 = kd1;
		    for (j = j1; i__2 < 0 ? j >= i__3 : j <= i__3; j += i__2) 
			    {

/*                    create nonzero element a(j+kd,j-1) outside the */
/*                    band and store it in WORK */

			work[j + *kd] = work[j] * ab[kd1 + j * ab_dim1];
			ab[kd1 + j * ab_dim1] = d__[j] * ab[kd1 + j * ab_dim1]
				;
/* L190: */
		    }
/* L200: */
		}
/* L210: */
	    }
	}

	if (*kd > 0) {

/*           copy off-diagonal elements to E */

	    i__1 = *n - 1;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		e[i__] = ab[i__ * ab_dim1 + 2];
/* L220: */
	    }
	} else {

/*           set E to zero if original matrix was diagonal */

	    i__1 = *n - 1;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		e[i__] = 0.f;
/* L230: */
	    }
	}

/*        copy diagonal elements to D */

	i__1 = *n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    d__[i__] = ab[i__ * ab_dim1 + 1];
/* L240: */
	}
    }

    return 0;

/*     End of SSBTRD */

} /* ssbtrd_ */
Пример #9
0
 int slasd2_(int *nl, int *nr, int *sqre, int 
	*k, float *d__, float *z__, float *alpha, float *beta, float *u, int *
	ldu, float *vt, int *ldvt, float *dsigma, float *u2, int *ldu2, 
	float *vt2, int *ldvt2, int *idxp, int *idx, int *idxc, 
	 int *idxq, int *coltyp, int *info)
{
    /* System generated locals */
    int u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, 
	    vt2_dim1, vt2_offset, i__1;
    float r__1, r__2;

    /* Local variables */
    float c__;
    int i__, j, m, n;
    float s;
    int k2;
    float z1;
    int ct, jp;
    float eps, tau, tol;
    int psm[4], nlp1, nlp2, idxi, idxj, ctot[4];
    extern  int srot_(int *, float *, int *, float *, 
	    int *, float *, float *);
    int idxjp, jprev;
    extern  int scopy_(int *, float *, int *, float *, 
	    int *);
    extern double slapy2_(float *, float *), slamch_(char *);
    extern  int xerbla_(char *, int *), slamrg_(
	    int *, int *, float *, int *, int *, int *);
    float hlftol;
    extern  int slacpy_(char *, int *, int *, float *, 
	    int *, float *, int *), slaset_(char *, int *, 
	    int *, float *, float *, float *, int *);


/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLASD2 merges the two sets of singular values together into a single */
/*  sorted set.  Then it tries to deflate the size of the problem. */
/*  There are two ways in which deflation can occur:  when two or more */
/*  singular values are close together or if there is a tiny entry in the */
/*  Z vector.  For each such occurrence the order of the related secular */
/*  equation problem is reduced by one. */

/*  SLASD2 is called from SLASD1. */

/*  Arguments */
/*  ========= */

/*  NL     (input) INTEGER */
/*         The row dimension of the upper block.  NL >= 1. */

/*  NR     (input) INTEGER */
/*         The row dimension of the lower block.  NR >= 1. */

/*  SQRE   (input) INTEGER */
/*         = 0: the lower block is an NR-by-NR square matrix. */
/*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */

/*         The bidiagonal matrix has N = NL + NR + 1 rows and */
/*         M = N + SQRE >= N columns. */

/*  K      (output) INTEGER */
/*         Contains the dimension of the non-deflated matrix, */
/*         This is the order of the related secular equation. 1 <= K <=N. */

/*  D      (input/output) REAL array, dimension (N) */
/*         On entry D contains the singular values of the two submatrices */
/*         to be combined.  On exit D contains the trailing (N-K) updated */
/*         singular values (those which were deflated) sorted into */
/*         increasing order. */

/*  Z      (output) REAL array, dimension (N) */
/*         On exit Z contains the updating row vector in the secular */
/*         equation. */

/*  ALPHA  (input) REAL */
/*         Contains the diagonal element associated with the added row. */

/*  BETA   (input) REAL */
/*         Contains the off-diagonal element associated with the added */
/*         row. */

/*  U      (input/output) REAL array, dimension (LDU,N) */
/*         On entry U contains the left singular vectors of two */
/*         submatrices in the two square blocks with corners at (1,1), */
/*         (NL, NL), and (NL+2, NL+2), (N,N). */
/*         On exit U contains the trailing (N-K) updated left singular */
/*         vectors (those which were deflated) in its last N-K columns. */

/*  LDU    (input) INTEGER */
/*         The leading dimension of the array U.  LDU >= N. */

/*  VT     (input/output) REAL array, dimension (LDVT,M) */
/*         On entry VT' contains the right singular vectors of two */
/*         submatrices in the two square blocks with corners at (1,1), */
/*         (NL+1, NL+1), and (NL+2, NL+2), (M,M). */
/*         On exit VT' contains the trailing (N-K) updated right singular */
/*         vectors (those which were deflated) in its last N-K columns. */
/*         In case SQRE =1, the last row of VT spans the right null */
/*         space. */

/*  LDVT   (input) INTEGER */
/*         The leading dimension of the array VT.  LDVT >= M. */

/*  DSIGMA (output) REAL array, dimension (N) */
/*         Contains a copy of the diagonal elements (K-1 singular values */
/*         and one zero) in the secular equation. */

/*  U2     (output) REAL array, dimension (LDU2,N) */
/*         Contains a copy of the first K-1 left singular vectors which */
/*         will be used by SLASD3 in a matrix multiply (SGEMM) to solve */
/*         for the new left singular vectors. U2 is arranged into four */
/*         blocks. The first block contains a column with 1 at NL+1 and */
/*         zero everywhere else; the second block contains non-zero */
/*         entries only at and above NL; the third contains non-zero */
/*         entries only below NL+1; and the fourth is dense. */

/*  LDU2   (input) INTEGER */
/*         The leading dimension of the array U2.  LDU2 >= N. */

/*  VT2    (output) REAL array, dimension (LDVT2,N) */
/*         VT2' contains a copy of the first K right singular vectors */
/*         which will be used by SLASD3 in a matrix multiply (SGEMM) to */
/*         solve for the new right singular vectors. VT2 is arranged into */
/*         three blocks. The first block contains a row that corresponds */
/*         to the special 0 diagonal element in SIGMA; the second block */
/*         contains non-zeros only at and before NL +1; the third block */
/*         contains non-zeros only at and after  NL +2. */

/*  LDVT2  (input) INTEGER */
/*         The leading dimension of the array VT2.  LDVT2 >= M. */

/*  IDXP   (workspace) INTEGER array, dimension (N) */
/*         This will contain the permutation used to place deflated */
/*         values of D at the end of the array. On output IDXP(2:K) */
/*         points to the nondeflated D-values and IDXP(K+1:N) */
/*         points to the deflated singular values. */

/*  IDX    (workspace) INTEGER array, dimension (N) */
/*         This will contain the permutation used to sort the contents of */
/*         D into ascending order. */

/*  IDXC   (output) INTEGER array, dimension (N) */
/*         This will contain the permutation used to arrange the columns */
/*         of the deflated U matrix into three groups:  the first group */
/*         contains non-zero entries only at and above NL, the second */
/*         contains non-zero entries only below NL+2, and the third is */
/*         dense. */

/*  IDXQ   (input/output) INTEGER array, dimension (N) */
/*         This contains the permutation which separately sorts the two */
/*         sub-problems in D into ascending order.  Note that entries in */
/*         the first hlaf of this permutation must first be moved one */
/*         position backward; and entries in the second half */
/*         must first have NL+1 added to their values. */

/*  COLTYP (workspace/output) INTEGER array, dimension (N) */
/*         As workspace, this will contain a label which will indicate */
/*         which of the following types a column in the U2 matrix or a */
/*         row in the VT2 matrix is: */
/*         1 : non-zero in the upper half only */
/*         2 : non-zero in the lower half only */
/*         3 : dense */
/*         4 : deflated */

/*         On exit, it is an array of dimension 4, with COLTYP(I) being */
/*         the dimension of the I-th type columns. */

/*  INFO   (output) INTEGER */
/*          = 0:  successful exit. */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*     Ming Gu and Huan Ren, Computer Science Division, University of */
/*     California at Berkeley, USA */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    --d__;
    --z__;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1;
    u -= u_offset;
    vt_dim1 = *ldvt;
    vt_offset = 1 + vt_dim1;
    vt -= vt_offset;
    --dsigma;
    u2_dim1 = *ldu2;
    u2_offset = 1 + u2_dim1;
    u2 -= u2_offset;
    vt2_dim1 = *ldvt2;
    vt2_offset = 1 + vt2_dim1;
    vt2 -= vt2_offset;
    --idxp;
    --idx;
    --idxc;
    --idxq;
    --coltyp;

    /* Function Body */
    *info = 0;

    if (*nl < 1) {
	*info = -1;
    } else if (*nr < 1) {
	*info = -2;
    } else if (*sqre != 1 && *sqre != 0) {
	*info = -3;
    }

    n = *nl + *nr + 1;
    m = n + *sqre;

    if (*ldu < n) {
	*info = -10;
    } else if (*ldvt < m) {
	*info = -12;
    } else if (*ldu2 < n) {
	*info = -15;
    } else if (*ldvt2 < m) {
	*info = -17;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLASD2", &i__1);
	return 0;
    }

    nlp1 = *nl + 1;
    nlp2 = *nl + 2;

/*     Generate the first part of the vector Z; and move the singular */
/*     values in the first part of D one position backward. */

    z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1];
    z__[1] = z1;
    for (i__ = *nl; i__ >= 1; --i__) {
	z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1];
	d__[i__ + 1] = d__[i__];
	idxq[i__ + 1] = idxq[i__] + 1;
/* L10: */
    }

/*     Generate the second part of the vector Z. */

    i__1 = m;
    for (i__ = nlp2; i__ <= i__1; ++i__) {
	z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1];
/* L20: */
    }

/*     Initialize some reference arrays. */

    i__1 = nlp1;
    for (i__ = 2; i__ <= i__1; ++i__) {
	coltyp[i__] = 1;
/* L30: */
    }
    i__1 = n;
    for (i__ = nlp2; i__ <= i__1; ++i__) {
	coltyp[i__] = 2;
/* L40: */
    }

/*     Sort the singular values into increasing order */

    i__1 = n;
    for (i__ = nlp2; i__ <= i__1; ++i__) {
	idxq[i__] += nlp1;
/* L50: */
    }

/*     DSIGMA, IDXC, IDXC, and the first column of U2 */
/*     are used as storage space. */

    i__1 = n;
    for (i__ = 2; i__ <= i__1; ++i__) {
	dsigma[i__] = d__[idxq[i__]];
	u2[i__ + u2_dim1] = z__[idxq[i__]];
	idxc[i__] = coltyp[idxq[i__]];
/* L60: */
    }

    slamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);

    i__1 = n;
    for (i__ = 2; i__ <= i__1; ++i__) {
	idxi = idx[i__] + 1;
	d__[i__] = dsigma[idxi];
	z__[i__] = u2[idxi + u2_dim1];
	coltyp[i__] = idxc[idxi];
/* L70: */
    }

/*     Calculate the allowable deflation tolerance */

    eps = slamch_("Epsilon");
/* Computing MAX */
    r__1 = ABS(*alpha), r__2 = ABS(*beta);
    tol = MAX(r__1,r__2);
/* Computing MAX */
    r__2 = (r__1 = d__[n], ABS(r__1));
    tol = eps * 8.f * MAX(r__2,tol);

/*     There are 2 kinds of deflation -- first a value in the z-vector */
/*     is small, second two (or more) singular values are very close */
/*     together (their difference is small). */

/*     If the value in the z-vector is small, we simply permute the */
/*     array so that the corresponding singular value is moved to the */
/*     end. */

/*     If two values in the D-vector are close, we perform a two-sided */
/*     rotation designed to make one of the corresponding z-vector */
/*     entries zero, and then permute the array so that the deflated */
/*     singular value is moved to the end. */

/*     If there are multiple singular values then the problem deflates. */
/*     Here the number of equal singular values are found.  As each equal */
/*     singular value is found, an elementary reflector is computed to */
/*     rotate the corresponding singular subspace so that the */
/*     corresponding components of Z are zero in this new basis. */

    *k = 1;
    k2 = n + 1;
    i__1 = n;
    for (j = 2; j <= i__1; ++j) {
	if ((r__1 = z__[j], ABS(r__1)) <= tol) {

/*           Deflate due to small z component. */

	    --k2;
	    idxp[k2] = j;
	    coltyp[j] = 4;
	    if (j == n) {
		goto L120;
	    }
	} else {
	    jprev = j;
	    goto L90;
	}
/* L80: */
    }
L90:
    j = jprev;
L100:
    ++j;
    if (j > n) {
	goto L110;
    }
    if ((r__1 = z__[j], ABS(r__1)) <= tol) {

/*        Deflate due to small z component. */

	--k2;
	idxp[k2] = j;
	coltyp[j] = 4;
    } else {

/*        Check if singular values are close enough to allow deflation. */

	if ((r__1 = d__[j] - d__[jprev], ABS(r__1)) <= tol) {

/*           Deflation is possible. */

	    s = z__[jprev];
	    c__ = z__[j];

/*           Find sqrt(a**2+b**2) without overflow or */
/*           destructive underflow. */

	    tau = slapy2_(&c__, &s);
	    c__ /= tau;
	    s = -s / tau;
	    z__[j] = tau;
	    z__[jprev] = 0.f;

/*           Apply back the Givens rotation to the left and right */
/*           singular vector matrices. */

	    idxjp = idxq[idx[jprev] + 1];
	    idxj = idxq[idx[j] + 1];
	    if (idxjp <= nlp1) {
		--idxjp;
	    }
	    if (idxj <= nlp1) {
		--idxj;
	    }
	    srot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], &
		    c__1, &c__, &s);
	    srot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, &
		    c__, &s);
	    if (coltyp[j] != coltyp[jprev]) {
		coltyp[j] = 3;
	    }
	    coltyp[jprev] = 4;
	    --k2;
	    idxp[k2] = jprev;
	    jprev = j;
	} else {
	    ++(*k);
	    u2[*k + u2_dim1] = z__[jprev];
	    dsigma[*k] = d__[jprev];
	    idxp[*k] = jprev;
	    jprev = j;
	}
    }
    goto L100;
L110:

/*     Record the last singular value. */

    ++(*k);
    u2[*k + u2_dim1] = z__[jprev];
    dsigma[*k] = d__[jprev];
    idxp[*k] = jprev;

L120:

/*     Count up the total number of the various types of columns, then */
/*     form a permutation which positions the four column types into */
/*     four groups of uniform structure (although one or more of these */
/*     groups may be empty). */

    for (j = 1; j <= 4; ++j) {
	ctot[j - 1] = 0;
/* L130: */
    }
    i__1 = n;
    for (j = 2; j <= i__1; ++j) {
	ct = coltyp[j];
	++ctot[ct - 1];
/* L140: */
    }

/*     PSM(*) = Position in SubMatrix (of types 1 through 4) */

    psm[0] = 2;
    psm[1] = ctot[0] + 2;
    psm[2] = psm[1] + ctot[1];
    psm[3] = psm[2] + ctot[2];

/*     Fill out the IDXC array so that the permutation which it induces */
/*     will place all type-1 columns first, all type-2 columns next, */
/*     then all type-3's, and finally all type-4's, starting from the */
/*     second column. This applies similarly to the rows of VT. */

    i__1 = n;
    for (j = 2; j <= i__1; ++j) {
	jp = idxp[j];
	ct = coltyp[jp];
	idxc[psm[ct - 1]] = j;
	++psm[ct - 1];
/* L150: */
    }

/*     Sort the singular values and corresponding singular vectors into */
/*     DSIGMA, U2, and VT2 respectively.  The singular values/vectors */
/*     which were not deflated go into the first K slots of DSIGMA, U2, */
/*     and VT2 respectively, while those which were deflated go into the */
/*     last N - K slots, except that the first column/row will be treated */
/*     separately. */

    i__1 = n;
    for (j = 2; j <= i__1; ++j) {
	jp = idxp[j];
	dsigma[j] = d__[jp];
	idxj = idxq[idx[idxp[idxc[j]]] + 1];
	if (idxj <= nlp1) {
	    --idxj;
	}
	scopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1);
	scopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2);
/* L160: */
    }

/*     Determine DSIGMA(1), DSIGMA(2) and Z(1) */

    dsigma[1] = 0.f;
    hlftol = tol / 2.f;
    if (ABS(dsigma[2]) <= hlftol) {
	dsigma[2] = hlftol;
    }
    if (m > n) {
	z__[1] = slapy2_(&z1, &z__[m]);
	if (z__[1] <= tol) {
	    c__ = 1.f;
	    s = 0.f;
	    z__[1] = tol;
	} else {
	    c__ = z1 / z__[1];
	    s = z__[m] / z__[1];
	}
    } else {
	if (ABS(z1) <= tol) {
	    z__[1] = tol;
	} else {
	    z__[1] = z1;
	}
    }

/*     Move the rest of the updating row to Z. */

    i__1 = *k - 1;
    scopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1);

/*     Determine the first column of U2, the first row of VT2 and the */
/*     last row of VT. */

    slaset_("A", &n, &c__1, &c_b30, &c_b30, &u2[u2_offset], ldu2);
    u2[nlp1 + u2_dim1] = 1.f;
    if (m > n) {
	i__1 = nlp1;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1];
	    vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1];
/* L170: */
	}
	i__1 = m;
	for (i__ = nlp2; i__ <= i__1; ++i__) {
	    vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1];
	    vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1];
/* L180: */
	}
    } else {
	scopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2);
    }
    if (m > n) {
	scopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2);
    }

/*     The deflated singular values and their corresponding vectors go */
/*     into the back of D, U, and V respectively. */

    if (n > *k) {
	i__1 = n - *k;
	scopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
	i__1 = n - *k;
	slacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1)
		 * u_dim1 + 1], ldu);
	i__1 = n - *k;
	slacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 + 
		vt_dim1], ldvt);
    }

/*     Copy CTOT into COLTYP for referencing in SLASD3. */

    for (j = 1; j <= 4; ++j) {
	coltyp[j] = ctot[j - 1];
/* L190: */
    }

    return 0;

/*     End of SLASD2 */

} /* slasd2_ */
Пример #10
0
/* Subroutine */ int slaqrb_(logical *wantt, integer *n, integer *ilo, 
	integer *ihi, real *h__, integer *ldh, real *wr, real *wi, real *z__, 
	integer *info)
{
    /* System generated locals */
    integer h_dim1, h_offset, i__1, i__2, i__3, i__4;
    real r__1, r__2;

    /* Local variables */
    static integer i__, j, k, l, m;
    static real s, v[3];
    static integer i1, i2;
    static real t1, t2, t3, v1, v2, v3, h00, h10, h11, h12, h21, h22, h33, 
	    h44;
    static integer nh;
    static real cs;
    static integer nr;
    static real sn, h33s, h44s;
    static integer itn, its;
    static real ulp, sum, tst1, h43h34, unfl, ovfl, work[1];
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *), scopy_(integer *, real *, integer *, 
	    real *, integer *), slanv2_(real *, real *, real *, real *, real *
	    , real *, real *, real *, real *, real *), slabad_(real *, real *)
	    ;
    extern doublereal slamch_(char *, ftnlen);
    extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *, 
	    real *);
    extern doublereal slanhs_(char *, integer *, real *, integer *, real *, 
	    ftnlen);
    static real smlnum;


/*     %------------------% */
/*     | Scalar Arguments | */
/*     %------------------% */


/*     %-----------------% */
/*     | Array Arguments | */
/*     %-----------------% */


/*     %------------% */
/*     | Parameters | */
/*     %------------% */


/*     %------------------------% */
/*     | Local Scalars & Arrays | */
/*     %------------------------% */


/*     %--------------------% */
/*     | External Functions | */
/*     %--------------------% */


/*     %----------------------% */
/*     | External Subroutines | */
/*     %----------------------% */


/*     %-----------------------% */
/*     | Executable Statements | */
/*     %-----------------------% */

    /* Parameter adjustments */
    h_dim1 = *ldh;
    h_offset = 1 + h_dim1;
    h__ -= h_offset;
    --wr;
    --wi;
    --z__;

    /* Function Body */
    *info = 0;

/*     %--------------------------% */
/*     | Quick return if possible | */
/*     %--------------------------% */

    if (*n == 0) {
	return 0;
    }
    if (*ilo == *ihi) {
	wr[*ilo] = h__[*ilo + *ilo * h_dim1];
	wi[*ilo] = 0.f;
	return 0;
    }

/*     %---------------------------------------------% */
/*     | Initialize the vector of last components of | */
/*     | the Schur vectors for accumulation.         | */
/*     %---------------------------------------------% */

    i__1 = *n - 1;
    for (j = 1; j <= i__1; ++j) {
	z__[j] = 0.f;
/* L5: */
    }
    z__[*n] = 1.f;

    nh = *ihi - *ilo + 1;

/*     %-------------------------------------------------------------% */
/*     | Set machine-dependent constants for the stopping criterion. | */
/*     | If norm(H) <= sqrt(OVFL), overflow should not occur.        | */
/*     %-------------------------------------------------------------% */

    unfl = slamch_("safe minimum", (ftnlen)12);
    ovfl = 1.f / unfl;
    slabad_(&unfl, &ovfl);
    ulp = slamch_("precision", (ftnlen)9);
    smlnum = unfl * (nh / ulp);

/*     %---------------------------------------------------------------% */
/*     | I1 and I2 are the indices of the first row and last column    | */
/*     | of H to which transformations must be applied. If eigenvalues | */
/*     | only are computed, I1 and I2 are set inside the main loop.    | */
/*     | Zero out H(J+2,J) = ZERO for J=1:N if WANTT = .TRUE.          | */
/*     | else H(J+2,J) for J=ILO:IHI-ILO-1 if WANTT = .FALSE.          | */
/*     %---------------------------------------------------------------% */

    if (*wantt) {
	i1 = 1;
	i2 = *n;
	i__1 = i2 - 2;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    h__[i1 + i__ + 1 + i__ * h_dim1] = 0.f;
/* L8: */
	}
    } else {
	i__1 = *ihi - *ilo - 1;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    h__[*ilo + i__ + 1 + (*ilo + i__ - 1) * h_dim1] = 0.f;
/* L9: */
	}
    }

/*     %---------------------------------------------------% */
/*     | ITN is the total number of QR iterations allowed. | */
/*     %---------------------------------------------------% */

    itn = nh * 30;

/*     ------------------------------------------------------------------ */
/*     The main loop begins here. I is the loop index and decreases from */
/*     IHI to ILO in steps of 1 or 2. Each iteration of the loop works */
/*     with the active submatrix in rows and columns L to I. */
/*     Eigenvalues I+1 to IHI have already converged. Either L = ILO or */
/*     H(L,L-1) is negligible so that the matrix splits. */
/*     ------------------------------------------------------------------ */

    i__ = *ihi;
L10:
    l = *ilo;
    if (i__ < *ilo) {
	goto L150;
    }
/*     %--------------------------------------------------------------% */
/*     | Perform QR iterations on rows and columns ILO to I until a   | */
/*     | submatrix of order 1 or 2 splits off at the bottom because a | */
/*     | subdiagonal element has become negligible.                   | */
/*     %--------------------------------------------------------------% */
    i__1 = itn;
    for (its = 0; its <= i__1; ++its) {

/*        %----------------------------------------------% */
/*        | Look for a single small subdiagonal element. | */
/*        %----------------------------------------------% */

	i__2 = l + 1;
	for (k = i__; k >= i__2; --k) {
	    tst1 = (r__1 = h__[k - 1 + (k - 1) * h_dim1], dabs(r__1)) + (r__2 
		    = h__[k + k * h_dim1], dabs(r__2));
	    if (tst1 == 0.f) {
		i__3 = i__ - l + 1;
		tst1 = slanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, work, (
			ftnlen)1);
	    }
/* Computing MAX */
	    r__2 = ulp * tst1;
	    if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= dmax(r__2,
		    smlnum)) {
		goto L30;
	    }
/* L20: */
	}
L30:
	l = k;
	if (l > *ilo) {

/*           %------------------------% */
/*           | H(L,L-1) is negligible | */
/*           %------------------------% */

	    h__[l + (l - 1) * h_dim1] = 0.f;
	}

/*        %-------------------------------------------------------------% */
/*        | Exit from loop if a submatrix of order 1 or 2 has split off | */
/*        %-------------------------------------------------------------% */

	if (l >= i__ - 1) {
	    goto L140;
	}

/*        %---------------------------------------------------------% */
/*        | Now the active submatrix is in rows and columns L to I. | */
/*        | If eigenvalues only are being computed, only the active | */
/*        | submatrix need be transformed.                          | */
/*        %---------------------------------------------------------% */

	if (! (*wantt)) {
	    i1 = l;
	    i2 = i__;
	}

	if (its == 10 || its == 20) {

/*           %-------------------% */
/*           | Exceptional shift | */
/*           %-------------------% */

	    s = (r__1 = h__[i__ + (i__ - 1) * h_dim1], dabs(r__1)) + (r__2 = 
		    h__[i__ - 1 + (i__ - 2) * h_dim1], dabs(r__2));
	    h44 = s * .75f;
	    h33 = h44;
	    h43h34 = s * -.4375f * s;

	} else {

/*           %-----------------------------------------% */
/*           | Prepare to use Wilkinson's double shift | */
/*           %-----------------------------------------% */

	    h44 = h__[i__ + i__ * h_dim1];
	    h33 = h__[i__ - 1 + (i__ - 1) * h_dim1];
	    h43h34 = h__[i__ + (i__ - 1) * h_dim1] * h__[i__ - 1 + i__ * 
		    h_dim1];
	}

/*        %-----------------------------------------------------% */
/*        | Look for two consecutive small subdiagonal elements | */
/*        %-----------------------------------------------------% */

	i__2 = l;
	for (m = i__ - 2; m >= i__2; --m) {

/*           %---------------------------------------------------------% */
/*           | Determine the effect of starting the double-shift QR    | */
/*           | iteration at row M, and see if this would make H(M,M-1) | */
/*           | negligible.                                             | */
/*           %---------------------------------------------------------% */

	    h11 = h__[m + m * h_dim1];
	    h22 = h__[m + 1 + (m + 1) * h_dim1];
	    h21 = h__[m + 1 + m * h_dim1];
	    h12 = h__[m + (m + 1) * h_dim1];
	    h44s = h44 - h11;
	    h33s = h33 - h11;
	    v1 = (h33s * h44s - h43h34) / h21 + h12;
	    v2 = h22 - h11 - h33s - h44s;
	    v3 = h__[m + 2 + (m + 1) * h_dim1];
	    s = dabs(v1) + dabs(v2) + dabs(v3);
	    v1 /= s;
	    v2 /= s;
	    v3 /= s;
	    v[0] = v1;
	    v[1] = v2;
	    v[2] = v3;
	    if (m == l) {
		goto L50;
	    }
	    h00 = h__[m - 1 + (m - 1) * h_dim1];
	    h10 = h__[m + (m - 1) * h_dim1];
	    tst1 = dabs(v1) * (dabs(h00) + dabs(h11) + dabs(h22));
	    if (dabs(h10) * (dabs(v2) + dabs(v3)) <= ulp * tst1) {
		goto L50;
	    }
/* L40: */
	}
L50:

/*        %----------------------% */
/*        | Double-shift QR step | */
/*        %----------------------% */

	i__2 = i__ - 1;
	for (k = m; k <= i__2; ++k) {

/*           ------------------------------------------------------------ */
/*           The first iteration of this loop determines a reflection G */
/*           from the vector V and applies it from left and right to H, */
/*           thus creating a nonzero bulge below the subdiagonal. */

/*           Each subsequent iteration determines a reflection G to */
/*           restore the Hessenberg form in the (K-1)th column, and thus */
/*           chases the bulge one step toward the bottom of the active */
/*           submatrix. NR is the order of G. */
/*           ------------------------------------------------------------ */

/* Computing MIN */
	    i__3 = 3, i__4 = i__ - k + 1;
	    nr = min(i__3,i__4);
	    if (k > m) {
		scopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
	    }
	    slarfg_(&nr, v, &v[1], &c__1, &t1);
	    if (k > m) {
		h__[k + (k - 1) * h_dim1] = v[0];
		h__[k + 1 + (k - 1) * h_dim1] = 0.f;
		if (k < i__ - 1) {
		    h__[k + 2 + (k - 1) * h_dim1] = 0.f;
		}
	    } else if (m > l) {
		h__[k + (k - 1) * h_dim1] = -h__[k + (k - 1) * h_dim1];
	    }
	    v2 = v[1];
	    t2 = t1 * v2;
	    if (nr == 3) {
		v3 = v[2];
		t3 = t1 * v3;

/*              %------------------------------------------------% */
/*              | Apply G from the left to transform the rows of | */
/*              | the matrix in columns K to I2.                 | */
/*              %------------------------------------------------% */

		i__3 = i2;
		for (j = k; j <= i__3; ++j) {
		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1] 
			    + v3 * h__[k + 2 + j * h_dim1];
		    h__[k + j * h_dim1] -= sum * t1;
		    h__[k + 1 + j * h_dim1] -= sum * t2;
		    h__[k + 2 + j * h_dim1] -= sum * t3;
/* L60: */
		}

/*              %----------------------------------------------------% */
/*              | Apply G from the right to transform the columns of | */
/*              | the matrix in rows I1 to min(K+3,I).               | */
/*              %----------------------------------------------------% */

/* Computing MIN */
		i__4 = k + 3;
		i__3 = min(i__4,i__);
		for (j = i1; j <= i__3; ++j) {
		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
			     + v3 * h__[j + (k + 2) * h_dim1];
		    h__[j + k * h_dim1] -= sum * t1;
		    h__[j + (k + 1) * h_dim1] -= sum * t2;
		    h__[j + (k + 2) * h_dim1] -= sum * t3;
/* L70: */
		}

/*              %----------------------------------% */
/*              | Accumulate transformations for Z | */
/*              %----------------------------------% */

		sum = z__[k] + v2 * z__[k + 1] + v3 * z__[k + 2];
		z__[k] -= sum * t1;
		z__[k + 1] -= sum * t2;
		z__[k + 2] -= sum * t3;
	    } else if (nr == 2) {

/*              %------------------------------------------------% */
/*              | Apply G from the left to transform the rows of | */
/*              | the matrix in columns K to I2.                 | */
/*              %------------------------------------------------% */

		i__3 = i2;
		for (j = k; j <= i__3; ++j) {
		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1];
		    h__[k + j * h_dim1] -= sum * t1;
		    h__[k + 1 + j * h_dim1] -= sum * t2;
/* L90: */
		}

/*              %----------------------------------------------------% */
/*              | Apply G from the right to transform the columns of | */
/*              | the matrix in rows I1 to min(K+3,I).               | */
/*              %----------------------------------------------------% */

		i__3 = i__;
		for (j = i1; j <= i__3; ++j) {
		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
			    ;
		    h__[j + k * h_dim1] -= sum * t1;
		    h__[j + (k + 1) * h_dim1] -= sum * t2;
/* L100: */
		}

/*              %----------------------------------% */
/*              | Accumulate transformations for Z | */
/*              %----------------------------------% */

		sum = z__[k] + v2 * z__[k + 1];
		z__[k] -= sum * t1;
		z__[k + 1] -= sum * t2;
	    }
/* L120: */
	}
/* L130: */
    }

/*     %-------------------------------------------------------% */
/*     | Failure to converge in remaining number of iterations | */
/*     %-------------------------------------------------------% */

    *info = i__;
    return 0;
L140:
    if (l == i__) {

/*        %------------------------------------------------------% */
/*        | H(I,I-1) is negligible: one eigenvalue has converged | */
/*        %------------------------------------------------------% */

	wr[i__] = h__[i__ + i__ * h_dim1];
	wi[i__] = 0.f;
    } else if (l == i__ - 1) {

/*        %--------------------------------------------------------% */
/*        | H(I-1,I-2) is negligible;                              | */
/*        | a pair of eigenvalues have converged.                  | */
/*        |                                                        | */
/*        | Transform the 2-by-2 submatrix to standard Schur form, | */
/*        | and compute and store the eigenvalues.                 | */
/*        %--------------------------------------------------------% */

	slanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ * 
		h_dim1], &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ * 
		h_dim1], &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], &cs, 
		&sn);
	if (*wantt) {

/*           %-----------------------------------------------------% */
/*           | Apply the transformation to the rest of H and to Z, | */
/*           | as required.                                        | */
/*           %-----------------------------------------------------% */

	    if (i2 > i__) {
		i__1 = i2 - i__;
		srot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, &h__[
			i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn);
	    }
	    i__1 = i__ - i1 - 1;
	    srot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ *
		     h_dim1], &c__1, &cs, &sn);
	    sum = cs * z__[i__ - 1] + sn * z__[i__];
	    z__[i__] = cs * z__[i__] - sn * z__[i__ - 1];
	    z__[i__ - 1] = sum;
	}
    }

/*     %---------------------------------------------------------% */
/*     | Decrement number of remaining iterations, and return to | */
/*     | start of the main loop with new value of I.             | */
/*     %---------------------------------------------------------% */

    itn -= its;
    i__ = l - 1;
    goto L10;
L150:
    return 0;

/*     %---------------% */
/*     | End of slaqrb | */
/*     %---------------% */

} /* slaqrb_ */
Пример #11
0
/* Subroutine */ int slaeda_(integer *n, integer *tlvls, integer *curlvl, 
	integer *curpbm, integer *prmptr, integer *perm, integer *givptr, 
	integer *givcol, real *givnum, real *q, integer *qptr, real *z__, 
	real *ztemp, integer *info)
{
    /* System generated locals */
    integer i__1, i__2, i__3;

    /* Builtin functions */
    integer pow_ii(integer *, integer *);
    double sqrt(doublereal);

    /* Local variables */
    static integer curr;
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *);
    static integer bsiz1, bsiz2, psiz1, psiz2, i__, k, zptr1;
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
	    real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *), 
	    xerbla_(char *, integer *);
    static integer mid, ptr;


#define givcol_ref(a_1,a_2) givcol[(a_2)*2 + a_1]
#define givnum_ref(a_1,a_2) givnum[(a_2)*2 + a_1]


/*  -- LAPACK routine (instrumented to count operations, version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   

       Common block to return operation count and iteration count   
       ITCNT is unchanged, OPS is only incremented   

    Purpose   
    =======   

    SLAEDA computes the Z vector corresponding to the merge step in the   
    CURLVLth step of the merge process with TLVLS steps for the CURPBMth   
    problem.   

    Arguments   
    =========   

    N      (input) INTEGER   
           The dimension of the symmetric tridiagonal matrix.  N >= 0.   

    TLVLS  (input) INTEGER   
           The total number of merging levels in the overall divide and   
           conquer tree.   

    CURLVL (input) INTEGER   
           The current level in the overall merge routine,   
           0 <= curlvl <= tlvls.   

    CURPBM (input) INTEGER   
           The current problem in the current level in the overall   
           merge routine (counting from upper left to lower right).   

    PRMPTR (input) INTEGER array, dimension (N lg N)   
           Contains a list of pointers which indicate where in PERM a   
           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)   
           indicates the size of the permutation and incidentally the   
           size of the full, non-deflated problem.   

    PERM   (input) INTEGER array, dimension (N lg N)   
           Contains the permutations (from deflation and sorting) to be   
           applied to each eigenblock.   

    GIVPTR (input) INTEGER array, dimension (N lg N)   
           Contains a list of pointers which indicate where in GIVCOL a   
           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)   
           indicates the number of Givens rotations.   

    GIVCOL (input) INTEGER array, dimension (2, N lg N)   
           Each pair of numbers indicates a pair of columns to take place   
           in a Givens rotation.   

    GIVNUM (input) REAL array, dimension (2, N lg N)   
           Each number indicates the S value to be used in the   
           corresponding Givens rotation.   

    Q      (input) REAL array, dimension (N**2)   
           Contains the square eigenblocks from previous levels, the   
           starting positions for blocks are given by QPTR.   

    QPTR   (input) INTEGER array, dimension (N+2)   
           Contains a list of pointers which indicate where in Q an   
           eigenblock is stored.  SQRT( QPTR(i+1) - QPTR(i) ) indicates   
           the size of the block.   

    Z      (output) REAL array, dimension (N)   
           On output this vector contains the updating vector (the last   
           row of the first sub-eigenvector matrix and the first row of   
           the second sub-eigenvector matrix).   

    ZTEMP  (workspace) REAL array, dimension (N)   

    INFO   (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   

    Further Details   
    ===============   

    Based on contributions by   
       Jeff Rutter, Computer Science Division, University of California   
       at Berkeley, USA   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    --ztemp;
    --z__;
    --qptr;
    --q;
    givnum -= 3;
    givcol -= 3;
    --givptr;
    --perm;
    --prmptr;

    /* Function Body */
    *info = 0;

    if (*n < 0) {
	*info = -1;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLAEDA", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine location of first number in second half. */

    mid = *n / 2 + 1;

/*     Gather last/first rows of appropriate eigenblocks into center of Z */

    ptr = 1;

/*     Determine location of lowest level subproblem in the full storage   
       scheme */

    i__1 = *curlvl - 1;
    curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;

/*     Determine size of these matrices.  We add HALF to the value of   
       the SQRT in case the machine underestimates one of these square   
       roots. */

    latime_1.ops += 8;
    bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f);
    bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + .5f);
    i__1 = mid - bsiz1 - 1;
    for (k = 1; k <= i__1; ++k) {
	z__[k] = 0.f;
/* L10: */
    }
    scopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], &
	    c__1);
    scopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1);
    i__1 = *n;
    for (k = mid + bsiz2; k <= i__1; ++k) {
	z__[k] = 0.f;
/* L20: */
    }

/*     Loop thru remaining levels 1 -> CURLVL applying the Givens   
       rotations and permutation and then multiplying the center matrices   
       against the current Z. */

    ptr = pow_ii(&c__2, tlvls) + 1;
    i__1 = *curlvl - 1;
    for (k = 1; k <= i__1; ++k) {
	i__2 = *curlvl - k;
	i__3 = *curlvl - k - 1;
	curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) - 
		1;
	psiz1 = prmptr[curr + 1] - prmptr[curr];
	psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
	zptr1 = mid - psiz1;

/*       Apply Givens at CURR and CURR+1 */

	latime_1.ops += (givptr[curr + 2] - givptr[curr]) * 6;
	i__2 = givptr[curr + 1] - 1;
	for (i__ = givptr[curr]; i__ <= i__2; ++i__) {
	    srot_(&c__1, &z__[zptr1 + givcol_ref(1, i__) - 1], &c__1, &z__[
		    zptr1 + givcol_ref(2, i__) - 1], &c__1, &givnum_ref(1, 
		    i__), &givnum_ref(2, i__));
/* L30: */
	}
	i__2 = givptr[curr + 2] - 1;
	for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) {
	    srot_(&c__1, &z__[mid - 1 + givcol_ref(1, i__)], &c__1, &z__[mid 
		    - 1 + givcol_ref(2, i__)], &c__1, &givnum_ref(1, i__), &
		    givnum_ref(2, i__));
/* L40: */
	}
	psiz1 = prmptr[curr + 1] - prmptr[curr];
	psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
	i__2 = psiz1 - 1;
	for (i__ = 0; i__ <= i__2; ++i__) {
	    ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1];
/* L50: */
	}
	i__2 = psiz2 - 1;
	for (i__ = 0; i__ <= i__2; ++i__) {
	    ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] - 
		    1];
/* L60: */
	}

/*        Multiply Blocks at CURR and CURR+1   

          Determine size of these matrices.  We add HALF to the value of   
          the SQRT in case the machine underestimates one of these   
          square roots. */

	latime_1.ops += 8;
	bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f);
	bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + 
		.5f);
	if (bsiz1 > 0) {
	    latime_1.ops += (bsiz1 << 1) * bsiz1;
	    sgemv_("T", &bsiz1, &bsiz1, &c_b24, &q[qptr[curr]], &bsiz1, &
		    ztemp[1], &c__1, &c_b26, &z__[zptr1], &c__1);
	}
	i__2 = psiz1 - bsiz1;
	scopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1);
	if (bsiz2 > 0) {
	    latime_1.ops += (bsiz2 << 1) * bsiz2;
	    sgemv_("T", &bsiz2, &bsiz2, &c_b24, &q[qptr[curr + 1]], &bsiz2, &
		    ztemp[psiz1 + 1], &c__1, &c_b26, &z__[mid], &c__1);
	}
	i__2 = psiz2 - bsiz2;
	scopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], &
		c__1);

	i__2 = *tlvls - k;
	ptr += pow_ii(&c__2, &i__2);
/* L70: */
    }

    return 0;

/*     End of SLAEDA */

} /* slaeda_ */
Пример #12
0
/* Subroutine */ int sgeevx_(char *balanc, char *jobvl, char *jobvr, char *
	sense, integer *n, real *a, integer *lda, real *wr, real *wi, real *
	vl, integer *ldvl, real *vr, integer *ldvr, integer *ilo, integer *
	ihi, real *scale, real *abnrm, real *rconde, real *rcondv, real *work, 
	 integer *lwork, integer *iwork, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, 
	    i__2, i__3;
    real r__1, r__2;

    /* Builtin functions */
    double sqrt(doublereal);

    /* Local variables */
    integer i__, k;
    real r__, cs, sn;
    char job[1];
    real scl, dum[1], eps;
    char side[1];
    real anrm;
    integer ierr, itau, iwrk, nout;
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *);
    extern doublereal snrm2_(integer *, real *, integer *);
    integer icond;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    extern doublereal slapy2_(real *, real *);
    extern /* Subroutine */ int slabad_(real *, real *);
    logical scalea;
    real cscale;
    extern /* Subroutine */ int sgebak_(char *, char *, integer *, integer *, 
	    integer *, real *, integer *, real *, integer *, integer *), sgebal_(char *, integer *, real *, integer *, 
	    integer *, integer *, real *, integer *);
    extern doublereal slamch_(char *), slange_(char *, integer *, 
	    integer *, real *, integer *, real *);
    extern /* Subroutine */ int sgehrd_(integer *, integer *, integer *, real 
	    *, integer *, real *, real *, integer *, integer *), xerbla_(char 
	    *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *);
    logical select[1];
    real bignum;
    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
	    real *, integer *, integer *, real *, integer *, integer *);
    extern integer isamax_(integer *, real *, integer *);
    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
	    integer *, real *, integer *), slartg_(real *, real *, 
	    real *, real *, real *), sorghr_(integer *, integer *, integer *, 
	    real *, integer *, real *, real *, integer *, integer *), shseqr_(
	    char *, char *, integer *, integer *, integer *, real *, integer *
, real *, real *, real *, integer *, real *, integer *, integer *), strevc_(char *, char *, logical *, integer *, 
	    real *, integer *, real *, integer *, real *, integer *, integer *
, integer *, real *, integer *);
    integer minwrk, maxwrk;
    extern /* Subroutine */ int strsna_(char *, char *, logical *, integer *, 
	    real *, integer *, real *, integer *, real *, integer *, real *, 
	    real *, integer *, integer *, real *, integer *, integer *, 
	    integer *);
    logical wantvl, wntsnb;
    integer hswork;
    logical wntsne;
    real smlnum;
    logical lquery, wantvr, wntsnn, wntsnv;


/*  -- LAPACK driver routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SGEEVX computes for an N-by-N real nonsymmetric matrix A, the */
/*  eigenvalues and, optionally, the left and/or right eigenvectors. */

/*  Optionally also, it computes a balancing transformation to improve */
/*  the conditioning of the eigenvalues and eigenvectors (ILO, IHI, */
/*  SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues */
/*  (RCONDE), and reciprocal condition numbers for the right */
/*  eigenvectors (RCONDV). */

/*  The right eigenvector v(j) of A satisfies */
/*                   A * v(j) = lambda(j) * v(j) */
/*  where lambda(j) is its eigenvalue. */
/*  The left eigenvector u(j) of A satisfies */
/*                u(j)**H * A = lambda(j) * u(j)**H */
/*  where u(j)**H denotes the conjugate transpose of u(j). */

/*  The computed eigenvectors are normalized to have Euclidean norm */
/*  equal to 1 and largest component real. */

/*  Balancing a matrix means permuting the rows and columns to make it */
/*  more nearly upper triangular, and applying a diagonal similarity */
/*  transformation D * A * D**(-1), where D is a diagonal matrix, to */
/*  make its rows and columns closer in norm and the condition numbers */
/*  of its eigenvalues and eigenvectors smaller.  The computed */
/*  reciprocal condition numbers correspond to the balanced matrix. */
/*  Permuting rows and columns will not change the condition numbers */
/*  (in exact arithmetic) but diagonal scaling will.  For further */
/*  explanation of balancing, see section 4.10.2 of the LAPACK */
/*  Users' Guide. */

/*  Arguments */
/*  ========= */

/*  BALANC  (input) CHARACTER*1 */
/*          Indicates how the input matrix should be diagonally scaled */
/*          and/or permuted to improve the conditioning of its */
/*          eigenvalues. */
/*          = 'N': Do not diagonally scale or permute; */
/*          = 'P': Perform permutations to make the matrix more nearly */
/*                 upper triangular. Do not diagonally scale; */
/*          = 'S': Diagonally scale the matrix, i.e. replace A by */
/*                 D*A*D**(-1), where D is a diagonal matrix chosen */
/*                 to make the rows and columns of A more equal in */
/*                 norm. Do not permute; */
/*          = 'B': Both diagonally scale and permute A. */

/*          Computed reciprocal condition numbers will be for the matrix */
/*          after balancing and/or permuting. Permuting does not change */
/*          condition numbers (in exact arithmetic), but balancing does. */

/*  JOBVL   (input) CHARACTER*1 */
/*          = 'N': left eigenvectors of A are not computed; */
/*          = 'V': left eigenvectors of A are computed. */
/*          If SENSE = 'E' or 'B', JOBVL must = 'V'. */

/*  JOBVR   (input) CHARACTER*1 */
/*          = 'N': right eigenvectors of A are not computed; */
/*          = 'V': right eigenvectors of A are computed. */
/*          If SENSE = 'E' or 'B', JOBVR must = 'V'. */

/*  SENSE   (input) CHARACTER*1 */
/*          Determines which reciprocal condition numbers are computed. */
/*          = 'N': None are computed; */
/*          = 'E': Computed for eigenvalues only; */
/*          = 'V': Computed for right eigenvectors only; */
/*          = 'B': Computed for eigenvalues and right eigenvectors. */

/*          If SENSE = 'E' or 'B', both left and right eigenvectors */
/*          must also be computed (JOBVL = 'V' and JOBVR = 'V'). */

/*  N       (input) INTEGER */
/*          The order of the matrix A. N >= 0. */

/*  A       (input/output) REAL array, dimension (LDA,N) */
/*          On entry, the N-by-N matrix A. */
/*          On exit, A has been overwritten.  If JOBVL = 'V' or */
/*          JOBVR = 'V', A contains the real Schur form of the balanced */
/*          version of the input matrix A. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  WR      (output) REAL array, dimension (N) */
/*  WI      (output) REAL array, dimension (N) */
/*          WR and WI contain the real and imaginary parts, */
/*          respectively, of the computed eigenvalues.  Complex */
/*          conjugate pairs of eigenvalues will appear consecutively */
/*          with the eigenvalue having the positive imaginary part */
/*          first. */

/*  VL      (output) REAL array, dimension (LDVL,N) */
/*          If JOBVL = 'V', the left eigenvectors u(j) are stored one */
/*          after another in the columns of VL, in the same order */
/*          as their eigenvalues. */
/*          If JOBVL = 'N', VL is not referenced. */
/*          If the j-th eigenvalue is real, then u(j) = VL(:,j), */
/*          the j-th column of VL. */
/*          If the j-th and (j+1)-st eigenvalues form a complex */
/*          conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and */
/*          u(j+1) = VL(:,j) - i*VL(:,j+1). */

/*  LDVL    (input) INTEGER */
/*          The leading dimension of the array VL.  LDVL >= 1; if */
/*          JOBVL = 'V', LDVL >= N. */

/*  VR      (output) REAL array, dimension (LDVR,N) */
/*          If JOBVR = 'V', the right eigenvectors v(j) are stored one */
/*          after another in the columns of VR, in the same order */
/*          as their eigenvalues. */
/*          If JOBVR = 'N', VR is not referenced. */
/*          If the j-th eigenvalue is real, then v(j) = VR(:,j), */
/*          the j-th column of VR. */
/*          If the j-th and (j+1)-st eigenvalues form a complex */
/*          conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and */
/*          v(j+1) = VR(:,j) - i*VR(:,j+1). */

/*  LDVR    (input) INTEGER */
/*          The leading dimension of the array VR.  LDVR >= 1, and if */
/*          JOBVR = 'V', LDVR >= N. */

/*  ILO     (output) INTEGER */
/*  IHI     (output) INTEGER */
/*          ILO and IHI are integer values determined when A was */
/*          balanced.  The balanced A(i,j) = 0 if I > J and */
/*          J = 1,...,ILO-1 or I = IHI+1,...,N. */

/*  SCALE   (output) REAL array, dimension (N) */
/*          Details of the permutations and scaling factors applied */
/*          when balancing A.  If P(j) is the index of the row and column */
/*          interchanged with row and column j, and D(j) is the scaling */
/*          factor applied to row and column j, then */
/*          SCALE(J) = P(J),    for J = 1,...,ILO-1 */
/*                   = D(J),    for J = ILO,...,IHI */
/*                   = P(J)     for J = IHI+1,...,N. */
/*          The order in which the interchanges are made is N to IHI+1, */
/*          then 1 to ILO-1. */

/*  ABNRM   (output) REAL */
/*          The one-norm of the balanced matrix (the maximum */
/*          of the sum of absolute values of elements of any column). */

/*  RCONDE  (output) REAL array, dimension (N) */
/*          RCONDE(j) is the reciprocal condition number of the j-th */
/*          eigenvalue. */

/*  RCONDV  (output) REAL array, dimension (N) */
/*          RCONDV(j) is the reciprocal condition number of the j-th */
/*          right eigenvector. */

/*  WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK)) */
/*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */

/*  LWORK   (input) INTEGER */
/*          The dimension of the array WORK.   If SENSE = 'N' or 'E', */
/*          LWORK >= max(1,2*N), and if JOBVL = 'V' or JOBVR = 'V', */
/*          LWORK >= 3*N.  If SENSE = 'V' or 'B', LWORK >= N*(N+6). */
/*          For good performance, LWORK must generally be larger. */

/*          If LWORK = -1, then a workspace query is assumed; the routine */
/*          only calculates the optimal size of the WORK array, returns */
/*          this value as the first entry of the WORK array, and no error */
/*          message related to LWORK is issued by XERBLA. */

/*  IWORK   (workspace) INTEGER array, dimension (2*N-2) */
/*          If SENSE = 'N' or 'E', not referenced. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
/*          > 0:  if INFO = i, the QR algorithm failed to compute all the */
/*                eigenvalues, and no eigenvectors or condition numbers */
/*                have been computed; elements 1:ILO-1 and i+1:N of WR */
/*                and WI contain eigenvalues which have converged. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input arguments */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --wr;
    --wi;
    vl_dim1 = *ldvl;
    vl_offset = 1 + vl_dim1;
    vl -= vl_offset;
    vr_dim1 = *ldvr;
    vr_offset = 1 + vr_dim1;
    vr -= vr_offset;
    --scale;
    --rconde;
    --rcondv;
    --work;
    --iwork;

    /* Function Body */
    *info = 0;
    lquery = *lwork == -1;
    wantvl = lsame_(jobvl, "V");
    wantvr = lsame_(jobvr, "V");
    wntsnn = lsame_(sense, "N");
    wntsne = lsame_(sense, "E");
    wntsnv = lsame_(sense, "V");
    wntsnb = lsame_(sense, "B");
    if (! (lsame_(balanc, "N") || lsame_(balanc, "S") || lsame_(balanc, "P") 
	    || lsame_(balanc, "B"))) {
	*info = -1;
    } else if (! wantvl && ! lsame_(jobvl, "N")) {
	*info = -2;
    } else if (! wantvr && ! lsame_(jobvr, "N")) {
	*info = -3;
    } else if (! (wntsnn || wntsne || wntsnb || wntsnv) || (wntsne || wntsnb) 
	    && ! (wantvl && wantvr)) {
	*info = -4;
    } else if (*n < 0) {
	*info = -5;
    } else if (*lda < max(1,*n)) {
	*info = -7;
    } else if (*ldvl < 1 || wantvl && *ldvl < *n) {
	*info = -11;
    } else if (*ldvr < 1 || wantvr && *ldvr < *n) {
	*info = -13;
    }

/*     Compute workspace */
/*      (Note: Comments in the code beginning "Workspace:" describe the */
/*       minimal amount of workspace needed at that point in the code, */
/*       as well as the preferred amount for good performance. */
/*       NB refers to the optimal block size for the immediately */
/*       following subroutine, as returned by ILAENV. */
/*       HSWORK refers to the workspace preferred by SHSEQR, as */
/*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N, */
/*       the worst case.) */

    if (*info == 0) {
	if (*n == 0) {
	    minwrk = 1;
	    maxwrk = 1;
	} else {
	    maxwrk = *n + *n * ilaenv_(&c__1, "SGEHRD", " ", n, &c__1, n, &
		    c__0);

	    if (wantvl) {
		shseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[
			1], &vl[vl_offset], ldvl, &work[1], &c_n1, info);
	    } else if (wantvr) {
		shseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[
			1], &vr[vr_offset], ldvr, &work[1], &c_n1, info);
	    } else {
		if (wntsnn) {
		    shseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], 
			    &wi[1], &vr[vr_offset], ldvr, &work[1], &c_n1, 
			    info);
		} else {
		    shseqr_("S", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], 
			    &wi[1], &vr[vr_offset], ldvr, &work[1], &c_n1, 
			    info);
		}
	    }
	    hswork = work[1];

	    if (! wantvl && ! wantvr) {
		minwrk = *n << 1;
		if (! wntsnn) {
/* Computing MAX */
		    i__1 = minwrk, i__2 = *n * *n + *n * 6;
		    minwrk = max(i__1,i__2);
		}
		maxwrk = max(maxwrk,hswork);
		if (! wntsnn) {
/* Computing MAX */
		    i__1 = maxwrk, i__2 = *n * *n + *n * 6;
		    maxwrk = max(i__1,i__2);
		}
	    } else {
		minwrk = *n * 3;
		if (! wntsnn && ! wntsne) {
/* Computing MAX */
		    i__1 = minwrk, i__2 = *n * *n + *n * 6;
		    minwrk = max(i__1,i__2);
		}
		maxwrk = max(maxwrk,hswork);
/* Computing MAX */
		i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "SORGHR", 
			 " ", n, &c__1, n, &c_n1);
		maxwrk = max(i__1,i__2);
		if (! wntsnn && ! wntsne) {
/* Computing MAX */
		    i__1 = maxwrk, i__2 = *n * *n + *n * 6;
		    maxwrk = max(i__1,i__2);
		}
/* Computing MAX */
		i__1 = maxwrk, i__2 = *n * 3;
		maxwrk = max(i__1,i__2);
	    }
	    maxwrk = max(maxwrk,minwrk);
	}
	work[1] = (real) maxwrk;

	if (*lwork < minwrk && ! lquery) {
	    *info = -21;
	}
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SGEEVX", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Get machine constants */

    eps = slamch_("P");
    smlnum = slamch_("S");
    bignum = 1.f / smlnum;
    slabad_(&smlnum, &bignum);
    smlnum = sqrt(smlnum) / eps;
    bignum = 1.f / smlnum;

/*     Scale A if max element outside range [SMLNUM,BIGNUM] */

    icond = 0;
    anrm = slange_("M", n, n, &a[a_offset], lda, dum);
    scalea = FALSE_;
    if (anrm > 0.f && anrm < smlnum) {
	scalea = TRUE_;
	cscale = smlnum;
    } else if (anrm > bignum) {
	scalea = TRUE_;
	cscale = bignum;
    }
    if (scalea) {
	slascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &
		ierr);
    }

/*     Balance the matrix and compute ABNRM */

    sgebal_(balanc, n, &a[a_offset], lda, ilo, ihi, &scale[1], &ierr);
    *abnrm = slange_("1", n, n, &a[a_offset], lda, dum);
    if (scalea) {
	dum[0] = *abnrm;
	slascl_("G", &c__0, &c__0, &cscale, &anrm, &c__1, &c__1, dum, &c__1, &
		ierr);
	*abnrm = dum[0];
    }

/*     Reduce to upper Hessenberg form */
/*     (Workspace: need 2*N, prefer N+N*NB) */

    itau = 1;
    iwrk = itau + *n;
    i__1 = *lwork - iwrk + 1;
    sgehrd_(n, ilo, ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, &
	    ierr);

    if (wantvl) {

/*        Want left eigenvectors */
/*        Copy Householder vectors to VL */

	*(unsigned char *)side = 'L';
	slacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl)
		;

/*        Generate orthogonal matrix in VL */
/*        (Workspace: need 2*N-1, prefer N+(N-1)*NB) */

	i__1 = *lwork - iwrk + 1;
	sorghr_(n, ilo, ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk], &
		i__1, &ierr);

/*        Perform QR iteration, accumulating Schur vectors in VL */
/*        (Workspace: need 1, prefer HSWORK (see comments) ) */

	iwrk = itau;
	i__1 = *lwork - iwrk + 1;
	shseqr_("S", "V", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vl[
		vl_offset], ldvl, &work[iwrk], &i__1, info);

	if (wantvr) {

/*           Want left and right eigenvectors */
/*           Copy Schur vectors to VR */

	    *(unsigned char *)side = 'B';
	    slacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr);
	}

    } else if (wantvr) {

/*        Want right eigenvectors */
/*        Copy Householder vectors to VR */

	*(unsigned char *)side = 'R';
	slacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr)
		;

/*        Generate orthogonal matrix in VR */
/*        (Workspace: need 2*N-1, prefer N+(N-1)*NB) */

	i__1 = *lwork - iwrk + 1;
	sorghr_(n, ilo, ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk], &
		i__1, &ierr);

/*        Perform QR iteration, accumulating Schur vectors in VR */
/*        (Workspace: need 1, prefer HSWORK (see comments) ) */

	iwrk = itau;
	i__1 = *lwork - iwrk + 1;
	shseqr_("S", "V", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[
		vr_offset], ldvr, &work[iwrk], &i__1, info);

    } else {

/*        Compute eigenvalues only */
/*        If condition numbers desired, compute Schur form */

	if (wntsnn) {
	    *(unsigned char *)job = 'E';
	} else {
	    *(unsigned char *)job = 'S';
	}

/*        (Workspace: need 1, prefer HSWORK (see comments) ) */

	iwrk = itau;
	i__1 = *lwork - iwrk + 1;
	shseqr_(job, "N", n, ilo, ihi, &a[a_offset], lda, &wr[1], &wi[1], &vr[
		vr_offset], ldvr, &work[iwrk], &i__1, info);
    }

/*     If INFO > 0 from SHSEQR, then quit */

    if (*info > 0) {
	goto L50;
    }

    if (wantvl || wantvr) {

/*        Compute left and/or right eigenvectors */
/*        (Workspace: need 3*N) */

	strevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl, 
		 &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &ierr);
    }

/*     Compute condition numbers if desired */
/*     (Workspace: need N*N+6*N unless SENSE = 'E') */

    if (! wntsnn) {
	strsna_(sense, "A", select, n, &a[a_offset], lda, &vl[vl_offset], 
		ldvl, &vr[vr_offset], ldvr, &rconde[1], &rcondv[1], n, &nout, 
		&work[iwrk], n, &iwork[1], &icond);
    }

    if (wantvl) {

/*        Undo balancing of left eigenvectors */

	sgebak_(balanc, "L", n, ilo, ihi, &scale[1], n, &vl[vl_offset], ldvl, 
		&ierr);

/*        Normalize left eigenvectors and make largest component real */

	i__1 = *n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    if (wi[i__] == 0.f) {
		scl = 1.f / snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
		sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
	    } else if (wi[i__] > 0.f) {
		r__1 = snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
		r__2 = snrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
		scl = 1.f / slapy2_(&r__1, &r__2);
		sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
		sscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
		i__2 = *n;
		for (k = 1; k <= i__2; ++k) {
/* Computing 2nd power */
		    r__1 = vl[k + i__ * vl_dim1];
/* Computing 2nd power */
		    r__2 = vl[k + (i__ + 1) * vl_dim1];
		    work[k] = r__1 * r__1 + r__2 * r__2;
/* L10: */
		}
		k = isamax_(n, &work[1], &c__1);
		slartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], 
			&cs, &sn, &r__);
		srot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) * 
			vl_dim1 + 1], &c__1, &cs, &sn);
		vl[k + (i__ + 1) * vl_dim1] = 0.f;
	    }
/* L20: */
	}
    }

    if (wantvr) {

/*        Undo balancing of right eigenvectors */

	sgebak_(balanc, "R", n, ilo, ihi, &scale[1], n, &vr[vr_offset], ldvr, 
		&ierr);

/*        Normalize right eigenvectors and make largest component real */

	i__1 = *n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    if (wi[i__] == 0.f) {
		scl = 1.f / snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
		sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
	    } else if (wi[i__] > 0.f) {
		r__1 = snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
		r__2 = snrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
		scl = 1.f / slapy2_(&r__1, &r__2);
		sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
		sscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
		i__2 = *n;
		for (k = 1; k <= i__2; ++k) {
/* Computing 2nd power */
		    r__1 = vr[k + i__ * vr_dim1];
/* Computing 2nd power */
		    r__2 = vr[k + (i__ + 1) * vr_dim1];
		    work[k] = r__1 * r__1 + r__2 * r__2;
/* L30: */
		}
		k = isamax_(n, &work[1], &c__1);
		slartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], 
			&cs, &sn, &r__);
		srot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) * 
			vr_dim1 + 1], &c__1, &cs, &sn);
		vr[k + (i__ + 1) * vr_dim1] = 0.f;
	    }
/* L40: */
	}
    }

/*     Undo scaling if necessary */

L50:
    if (scalea) {
	i__1 = *n - *info;
/* Computing MAX */
	i__3 = *n - *info;
	i__2 = max(i__3,1);
	slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info + 
		1], &i__2, &ierr);
	i__1 = *n - *info;
/* Computing MAX */
	i__3 = *n - *info;
	i__2 = max(i__3,1);
	slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info + 
		1], &i__2, &ierr);
	if (*info == 0) {
	    if ((wntsnv || wntsnb) && icond == 0) {
		slascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &rcondv[
			1], n, &ierr);
	    }
	} else {
	    i__1 = *ilo - 1;
	    slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1], 
		    n, &ierr);
	    i__1 = *ilo - 1;
	    slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1], 
		    n, &ierr);
	}
    }

    work[1] = (real) maxwrk;
    return 0;

/*     End of SGEEVX */

} /* sgeevx_ */
Пример #13
0
/* Subroutine */ int slarot_(logical *lrows, logical *lleft, logical *lright, 
	integer *nl, real *c__, real *s, real *a, integer *lda, real *xleft, 
	real *xright)
{
    /* System generated locals */
    integer i__1;

    /* Local variables */
    static integer iinc;
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *);
    static integer inext, ix, iy, nt;
    static real xt[2], yt[2];
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static integer iyt;


/*  -- LAPACK auxiliary test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

       SLAROT applies a (Givens) rotation to two adjacent rows or   
       columns, where one element of the first and/or last column/row   
       may be a separate variable.  This is specifically indended   
       for use on matrices stored in some format other than GE, so   
       that elements of the matrix may be used or modified for which   
       no array element is provided.   

       One example is a symmetric matrix in SB format (bandwidth=4), for   
       which UPLO='L':  Two adjacent rows will have the format:   

       row j:     *  *  *  *  *  .  .  .  .   
       row j+1:      *  *  *  *  *  .  .  .  .   

       '*' indicates elements for which storage is provided,   
       '.' indicates elements for which no storage is provided, but   
       are not necessarily zero; their values are determined by   
       symmetry.  ' ' indicates elements which are necessarily zero,   
        and have no storage provided.   

       Those columns which have two '*'s can be handled by SROT.   
       Those columns which have no '*'s can be ignored, since as long   
       as the Givens rotations are carefully applied to preserve   
       symmetry, their values are determined.   
       Those columns which have one '*' have to be handled separately,   
       by using separate variables "p" and "q":   

       row j:     *  *  *  *  *  p  .  .  .   
       row j+1:   q  *  *  *  *  *  .  .  .  .   

       The element p would have to be set correctly, then that column   
       is rotated, setting p to its new value.  The next call to   
       SLAROT would rotate columns j and j+1, using p, and restore   
       symmetry.  The element q would start out being zero, and be   
       made non-zero by the rotation.  Later, rotations would presumably   
       be chosen to zero q out.   

       Typical Calling Sequences: rotating the i-th and (i+1)-st rows.   
       ------- ------- ---------   

         General dense matrix:   

                 CALL SLAROT(.TRUE.,.FALSE.,.FALSE., N, C,S,   
                         A(i,1),LDA, DUMMY, DUMMY)   

         General banded matrix in GB format:   

                 j = MAX(1, i-KL )   
                 NL = MIN( N, i+KU+1 ) + 1-j   
                 CALL SLAROT( .TRUE., i-KL.GE.1, i+KU.LT.N, NL, C,S,   
                         A(KU+i+1-j,j),LDA-1, XLEFT, XRIGHT )   

                 [ note that i+1-j is just MIN(i,KL+1) ]   

         Symmetric banded matrix in SY format, bandwidth K,   
         lower triangle only:   

                 j = MAX(1, i-K )   
                 NL = MIN( K+1, i ) + 1   
                 CALL SLAROT( .TRUE., i-K.GE.1, .TRUE., NL, C,S,   
                         A(i,j), LDA, XLEFT, XRIGHT )   

         Same, but upper triangle only:   

                 NL = MIN( K+1, N-i ) + 1   
                 CALL SLAROT( .TRUE., .TRUE., i+K.LT.N, NL, C,S,   
                         A(i,i), LDA, XLEFT, XRIGHT )   

         Symmetric banded matrix in SB format, bandwidth K,   
         lower triangle only:   

                 [ same as for SY, except:]   
                     . . . .   
                         A(i+1-j,j), LDA-1, XLEFT, XRIGHT )   

                 [ note that i+1-j is just MIN(i,K+1) ]   

         Same, but upper triangle only:   
                      . . .   
                         A(K+1,i), LDA-1, XLEFT, XRIGHT )   

         Rotating columns is just the transpose of rotating rows, except   
         for GB and SB: (rotating columns i and i+1)   

         GB:   
                 j = MAX(1, i-KU )   
                 NL = MIN( N, i+KL+1 ) + 1-j   
                 CALL SLAROT( .TRUE., i-KU.GE.1, i+KL.LT.N, NL, C,S,   
                         A(KU+j+1-i,i),LDA-1, XTOP, XBOTTM )   

                 [note that KU+j+1-i is just MAX(1,KU+2-i)]   

         SB: (upper triangle)   

                      . . . . . .   
                         A(K+j+1-i,i),LDA-1, XTOP, XBOTTM )   

         SB: (lower triangle)   

                      . . . . . .   
                         A(1,i),LDA-1, XTOP, XBOTTM )   

    Arguments   
    =========   

    LROWS  - LOGICAL   
             If .TRUE., then SLAROT will rotate two rows.  If .FALSE.,   
             then it will rotate two columns.   
             Not modified.   

    LLEFT  - LOGICAL   
             If .TRUE., then XLEFT will be used instead of the   
             corresponding element of A for the first element in the   
             second row (if LROWS=.FALSE.) or column (if LROWS=.TRUE.)   
             If .FALSE., then the corresponding element of A will be   
             used.   
             Not modified.   

    LRIGHT - LOGICAL   
             If .TRUE., then XRIGHT will be used instead of the   
             corresponding element of A for the last element in the   
             first row (if LROWS=.FALSE.) or column (if LROWS=.TRUE.) If   
             .FALSE., then the corresponding element of A will be used.   
             Not modified.   

    NL     - INTEGER   
             The length of the rows (if LROWS=.TRUE.) or columns (if   
             LROWS=.FALSE.) to be rotated.  If XLEFT and/or XRIGHT are   
             used, the columns/rows they are in should be included in   
             NL, e.g., if LLEFT = LRIGHT = .TRUE., then NL must be at   
             least 2.  The number of rows/columns to be rotated   
             exclusive of those involving XLEFT and/or XRIGHT may   
             not be negative, i.e., NL minus how many of LLEFT and   
             LRIGHT are .TRUE. must be at least zero; if not, XERBLA   
             will be called.   
             Not modified.   

    C, S   - REAL   
             Specify the Givens rotation to be applied.  If LROWS is   
             true, then the matrix ( c  s )   
                                   (-s  c )  is applied from the left;   
             if false, then the transpose thereof is applied from the   
             right.  For a Givens rotation, C**2 + S**2 should be 1,   
             but this is not checked.   
             Not modified.   

    A      - REAL array.   
             The array containing the rows/columns to be rotated.  The   
             first element of A should be the upper left element to   
             be rotated.   
             Read and modified.   

    LDA    - INTEGER   
             The "effective" leading dimension of A.  If A contains   
             a matrix stored in GE or SY format, then this is just   
             the leading dimension of A as dimensioned in the calling   
             routine.  If A contains a matrix stored in band (GB or SB)   
             format, then this should be *one less* than the leading   
             dimension used in the calling routine.  Thus, if   
             A were dimensioned A(LDA,*) in SLAROT, then A(1,j) would   
             be the j-th element in the first of the two rows   
             to be rotated, and A(2,j) would be the j-th in the second,   
             regardless of how the array may be stored in the calling   
             routine.  [A cannot, however, actually be dimensioned thus,   
             since for band format, the row number may exceed LDA, which   
             is not legal FORTRAN.]   
             If LROWS=.TRUE., then LDA must be at least 1, otherwise   
             it must be at least NL minus the number of .TRUE. values   
             in XLEFT and XRIGHT.   
             Not modified.   

    XLEFT  - REAL   
             If LLEFT is .TRUE., then XLEFT will be used and modified   
             instead of A(2,1) (if LROWS=.TRUE.) or A(1,2)   
             (if LROWS=.FALSE.).   
             Read and modified.   

    XRIGHT - REAL   
             If LRIGHT is .TRUE., then XRIGHT will be used and modified   
             instead of A(1,NL) (if LROWS=.TRUE.) or A(NL,1)   
             (if LROWS=.FALSE.).   
             Read and modified.   

    =====================================================================   


       Set up indices, arrays for ends   

       Parameter adjustments */
    --a;

    /* Function Body */
    if (*lrows) {
	iinc = *lda;
	inext = 1;
    } else {
	iinc = 1;
	inext = *lda;
    }

    if (*lleft) {
	nt = 1;
	ix = iinc + 1;
	iy = *lda + 2;
	xt[0] = a[1];
	yt[0] = *xleft;
    } else {
	nt = 0;
	ix = 1;
	iy = inext + 1;
    }

    if (*lright) {
	iyt = inext + 1 + (*nl - 1) * iinc;
	++nt;
	xt[nt - 1] = *xright;
	yt[nt - 1] = a[iyt];
    }

/*     Check for errors */

    if (*nl < nt) {
	xerbla_("SLAROT", &c__4);
	return 0;
    }
    if (*lda <= 0 || ! (*lrows) && *lda < *nl - nt) {
	xerbla_("SLAROT", &c__8);
	return 0;
    }

/*     Rotate */

    i__1 = *nl - nt;
    srot_(&i__1, &a[ix], &iinc, &a[iy], &iinc, c__, s);
    srot_(&nt, xt, &c__1, yt, &c__1, c__, s);

/*     Stuff values back into XLEFT, XRIGHT, etc. */

    if (*lleft) {
	a[1] = xt[0];
	*xleft = yt[0];
    }

    if (*lright) {
	*xright = xt[nt - 1];
	a[iyt] = yt[nt - 1];
    }

    return 0;

/*     End of SLAROT */

} /* slarot_ */
Пример #14
0
/* Subroutine */ int sbdsqr_(char *uplo, integer *n, integer *ncvt, integer *
	nru, integer *ncc, real *d__, real *e, real *vt, integer *ldvt, real *
	u, integer *ldu, real *c__, integer *ldc, real *work, integer *info)
{
    /* System generated locals */
    integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, 
	    i__2;
    real r__1, r__2, r__3, r__4;
    doublereal d__1;

    /* Builtin functions */
    double pow_dd(doublereal *, doublereal *), sqrt(doublereal), r_sign(real *
	    , real *);

    /* Local variables */
    static real abse;
    static integer idir;
    static real abss;
    static integer oldm;
    static real cosl;
    static integer isub, iter;
    static real unfl, sinl, cosr, smin, smax, sinr;
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *), slas2_(real *, real *, real *, real *,
	     real *);
    static real f, g, h__;
    static integer i__, j, m;
    static real r__;
    extern logical lsame_(char *, char *);
    static real oldcs;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static integer oldll;
    static real shift, sigmn, oldsn;
    static integer maxit;
    static real sminl;
    extern /* Subroutine */ int slasr_(char *, char *, char *, integer *, 
	    integer *, real *, real *, real *, integer *);
    static real sigmx;
    static logical lower;
    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 
	    integer *), slasq1_(integer *, real *, real *, real *, integer *),
	     slasv2_(real *, real *, real *, real *, real *, real *, real *, 
	    real *, real *);
    static real cs;
    static integer ll;
    static real sn, mu;
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static real sminoa;
    extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real *
	    );
    static real thresh;
    static logical rotate;
    static real sminlo;
    static integer nm1;
    static real tolmul;
    static integer nm12, nm13, lll;
    static real eps, sll, tol;


#define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
#define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1]
#define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1]


/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       October 31, 1999   


    Purpose   
    =======   

    SBDSQR computes the singular value decomposition (SVD) of a real   
    N-by-N (upper or lower) bidiagonal matrix B:  B = Q * S * P' (P'   
    denotes the transpose of P), where S is a diagonal matrix with   
    non-negative diagonal elements (the singular values of B), and Q   
    and P are orthogonal matrices.   

    The routine computes S, and optionally computes U * Q, P' * VT,   
    or Q' * C, for given real input matrices U, VT, and C.   

    See "Computing  Small Singular Values of Bidiagonal Matrices With   
    Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,   
    LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11,   
    no. 5, pp. 873-912, Sept 1990) and   
    "Accurate singular values and differential qd algorithms," by   
    B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics   
    Department, University of California at Berkeley, July 1992   
    for a detailed description of the algorithm.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            = 'U':  B is upper bidiagonal;   
            = 'L':  B is lower bidiagonal.   

    N       (input) INTEGER   
            The order of the matrix B.  N >= 0.   

    NCVT    (input) INTEGER   
            The number of columns of the matrix VT. NCVT >= 0.   

    NRU     (input) INTEGER   
            The number of rows of the matrix U. NRU >= 0.   

    NCC     (input) INTEGER   
            The number of columns of the matrix C. NCC >= 0.   

    D       (input/output) REAL array, dimension (N)   
            On entry, the n diagonal elements of the bidiagonal matrix B.   
            On exit, if INFO=0, the singular values of B in decreasing   
            order.   

    E       (input/output) REAL array, dimension (N)   
            On entry, the elements of E contain the   
            offdiagonal elements of the bidiagonal matrix whose SVD   
            is desired. On normal exit (INFO = 0), E is destroyed.   
            If the algorithm does not converge (INFO > 0), D and E   
            will contain the diagonal and superdiagonal elements of a   
            bidiagonal matrix orthogonally equivalent to the one given   
            as input. E(N) is used for workspace.   

    VT      (input/output) REAL array, dimension (LDVT, NCVT)   
            On entry, an N-by-NCVT matrix VT.   
            On exit, VT is overwritten by P' * VT.   
            VT is not referenced if NCVT = 0.   

    LDVT    (input) INTEGER   
            The leading dimension of the array VT.   
            LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.   

    U       (input/output) REAL array, dimension (LDU, N)   
            On entry, an NRU-by-N matrix U.   
            On exit, U is overwritten by U * Q.   
            U is not referenced if NRU = 0.   

    LDU     (input) INTEGER   
            The leading dimension of the array U.  LDU >= max(1,NRU).   

    C       (input/output) REAL array, dimension (LDC, NCC)   
            On entry, an N-by-NCC matrix C.   
            On exit, C is overwritten by Q' * C.   
            C is not referenced if NCC = 0.   

    LDC     (input) INTEGER   
            The leading dimension of the array C.   
            LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.   

    WORK    (workspace) REAL array, dimension (4*N)   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  If INFO = -i, the i-th argument had an illegal value   
            > 0:  the algorithm did not converge; D and E contain the   
                  elements of a bidiagonal matrix which is orthogonally   
                  similar to the input matrix B;  if INFO = i, i   
                  elements of E have not converged to zero.   

    Internal Parameters   
    ===================   

    TOLMUL  REAL, default = max(10,min(100,EPS**(-1/8)))   
            TOLMUL controls the convergence criterion of the QR loop.   
            If it is positive, TOLMUL*EPS is the desired relative   
               precision in the computed singular values.   
            If it is negative, abs(TOLMUL*EPS*sigma_max) is the   
               desired absolute accuracy in the computed singular   
               values (corresponds to relative accuracy   
               abs(TOLMUL*EPS) in the largest singular value.   
            abs(TOLMUL) should be between 1 and 1/EPS, and preferably   
               between 10 (for fast convergence) and .1/EPS   
               (for there to be some accuracy in the results).   
            Default is to lose at either one eighth or 2 of the   
               available decimal digits in each computed singular value   
               (whichever is smaller).   

    MAXITR  INTEGER, default = 6   
            MAXITR controls the maximum number of passes of the   
            algorithm through its inner loop. The algorithms stops   
            (and so fails to converge) if the number of passes   
            through the inner loop exceeds MAXITR*N**2.   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    --d__;
    --e;
    vt_dim1 = *ldvt;
    vt_offset = 1 + vt_dim1 * 1;
    vt -= vt_offset;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1 * 1;
    u -= u_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1 * 1;
    c__ -= c_offset;
    --work;

    /* Function Body */
    *info = 0;
    lower = lsame_(uplo, "L");
    if (! lsame_(uplo, "U") && ! lower) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*ncvt < 0) {
	*info = -3;
    } else if (*nru < 0) {
	*info = -4;
    } else if (*ncc < 0) {
	*info = -5;
    } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) {
	*info = -9;
    } else if (*ldu < max(1,*nru)) {
	*info = -11;
    } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) {
	*info = -13;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SBDSQR", &i__1);
	return 0;
    }
    if (*n == 0) {
	return 0;
    }
    if (*n == 1) {
	goto L160;
    }

/*     ROTATE is true if any singular vectors desired, false otherwise */

    rotate = *ncvt > 0 || *nru > 0 || *ncc > 0;

/*     If no singular vectors desired, use qd algorithm */

    if (! rotate) {
	slasq1_(n, &d__[1], &e[1], &work[1], info);
	return 0;
    }

    nm1 = *n - 1;
    nm12 = nm1 + nm1;
    nm13 = nm12 + nm1;
    idir = 0;

/*     Get machine constants */

    eps = slamch_("Epsilon");
    unfl = slamch_("Safe minimum");

/*     If matrix lower bidiagonal, rotate to be upper bidiagonal   
       by applying Givens rotations on the left */

    if (lower) {
	i__1 = *n - 1;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
	    d__[i__] = r__;
	    e[i__] = sn * d__[i__ + 1];
	    d__[i__ + 1] = cs * d__[i__ + 1];
	    work[i__] = cs;
	    work[nm1 + i__] = sn;
/* L10: */
	}

/*        Update singular vectors if desired */

	if (*nru > 0) {
	    slasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset], 
		    ldu);
	}
	if (*ncc > 0) {
	    slasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset],
		     ldc);
	}
    }

/*     Compute singular values to relative accuracy TOL   
       (By setting TOL to be negative, algorithm will compute   
       singular values to absolute accuracy ABS(TOL)*norm(input matrix))   

   Computing MAX   
   Computing MIN */
    d__1 = (doublereal) eps;
    r__3 = 100.f, r__4 = pow_dd(&d__1, &c_b15);
    r__1 = 10.f, r__2 = dmin(r__3,r__4);
    tolmul = dmax(r__1,r__2);
    tol = tolmul * eps;

/*     Compute approximate maximum, minimum singular values */

    smax = 0.f;
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
/* Computing MAX */
	r__2 = smax, r__3 = (r__1 = d__[i__], dabs(r__1));
	smax = dmax(r__2,r__3);
/* L20: */
    }
    i__1 = *n - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
/* Computing MAX */
	r__2 = smax, r__3 = (r__1 = e[i__], dabs(r__1));
	smax = dmax(r__2,r__3);
/* L30: */
    }
    sminl = 0.f;
    if (tol >= 0.f) {

/*        Relative accuracy desired */

	sminoa = dabs(d__[1]);
	if (sminoa == 0.f) {
	    goto L50;
	}
	mu = sminoa;
	i__1 = *n;
	for (i__ = 2; i__ <= i__1; ++i__) {
	    mu = (r__2 = d__[i__], dabs(r__2)) * (mu / (mu + (r__1 = e[i__ - 
		    1], dabs(r__1))));
	    sminoa = dmin(sminoa,mu);
	    if (sminoa == 0.f) {
		goto L50;
	    }
/* L40: */
	}
L50:
	sminoa /= sqrt((real) (*n));
/* Computing MAX */
	r__1 = tol * sminoa, r__2 = *n * 6 * *n * unfl;
	thresh = dmax(r__1,r__2);
    } else {

/*        Absolute accuracy desired   

   Computing MAX */
	r__1 = dabs(tol) * smax, r__2 = *n * 6 * *n * unfl;
	thresh = dmax(r__1,r__2);
    }

/*     Prepare for main iteration loop for the singular values   
       (MAXIT is the maximum number of passes through the inner   
       loop permitted before nonconvergence signalled.) */

    maxit = *n * 6 * *n;
    iter = 0;
    oldll = -1;
    oldm = -1;

/*     M points to last element of unconverged part of matrix */

    m = *n;

/*     Begin main iteration loop */

L60:

/*     Check for convergence or exceeding iteration count */

    if (m <= 1) {
	goto L160;
    }
    if (iter > maxit) {
	goto L200;
    }

/*     Find diagonal block of matrix to work on */

    if (tol < 0.f && (r__1 = d__[m], dabs(r__1)) <= thresh) {
	d__[m] = 0.f;
    }
    smax = (r__1 = d__[m], dabs(r__1));
    smin = smax;
    i__1 = m - 1;
    for (lll = 1; lll <= i__1; ++lll) {
	ll = m - lll;
	abss = (r__1 = d__[ll], dabs(r__1));
	abse = (r__1 = e[ll], dabs(r__1));
	if (tol < 0.f && abss <= thresh) {
	    d__[ll] = 0.f;
	}
	if (abse <= thresh) {
	    goto L80;
	}
	smin = dmin(smin,abss);
/* Computing MAX */
	r__1 = max(smax,abss);
	smax = dmax(r__1,abse);
/* L70: */
    }
    ll = 0;
    goto L90;
L80:
    e[ll] = 0.f;

/*     Matrix splits since E(LL) = 0 */

    if (ll == m - 1) {

/*        Convergence of bottom singular value, return to top of loop */

	--m;
	goto L60;
    }
L90:
    ++ll;

/*     E(LL) through E(M-1) are nonzero, E(LL-1) is zero */

    if (ll == m - 1) {

/*        2 by 2 block, handle separately */

	slasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr,
		 &sinl, &cosl);
	d__[m - 1] = sigmx;
	e[m - 1] = 0.f;
	d__[m] = sigmn;

/*        Compute singular vectors, if desired */

	if (*ncvt > 0) {
	    srot_(ncvt, &vt_ref(m - 1, 1), ldvt, &vt_ref(m, 1), ldvt, &cosr, &
		    sinr);
	}
	if (*nru > 0) {
	    srot_(nru, &u_ref(1, m - 1), &c__1, &u_ref(1, m), &c__1, &cosl, &
		    sinl);
	}
	if (*ncc > 0) {
	    srot_(ncc, &c___ref(m - 1, 1), ldc, &c___ref(m, 1), ldc, &cosl, &
		    sinl);
	}
	m += -2;
	goto L60;
    }

/*     If working on new submatrix, choose shift direction   
       (from larger end diagonal element towards smaller) */

    if (ll > oldm || m < oldll) {
	if ((r__1 = d__[ll], dabs(r__1)) >= (r__2 = d__[m], dabs(r__2))) {

/*           Chase bulge from top (big end) to bottom (small end) */

	    idir = 1;
	} else {

/*           Chase bulge from bottom (big end) to top (small end) */

	    idir = 2;
	}
    }

/*     Apply convergence tests */

    if (idir == 1) {

/*        Run convergence test in forward direction   
          First apply standard test to bottom of matrix */

	if ((r__2 = e[m - 1], dabs(r__2)) <= dabs(tol) * (r__1 = d__[m], dabs(
		r__1)) || tol < 0.f && (r__3 = e[m - 1], dabs(r__3)) <= 
		thresh) {
	    e[m - 1] = 0.f;
	    goto L60;
	}

	if (tol >= 0.f) {

/*           If relative accuracy desired,   
             apply convergence criterion forward */

	    mu = (r__1 = d__[ll], dabs(r__1));
	    sminl = mu;
	    i__1 = m - 1;
	    for (lll = ll; lll <= i__1; ++lll) {
		if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) {
		    e[lll] = 0.f;
		    goto L60;
		}
		sminlo = sminl;
		mu = (r__2 = d__[lll + 1], dabs(r__2)) * (mu / (mu + (r__1 = 
			e[lll], dabs(r__1))));
		sminl = dmin(sminl,mu);
/* L100: */
	    }
	}

    } else {

/*        Run convergence test in backward direction   
          First apply standard test to top of matrix */

	if ((r__2 = e[ll], dabs(r__2)) <= dabs(tol) * (r__1 = d__[ll], dabs(
		r__1)) || tol < 0.f && (r__3 = e[ll], dabs(r__3)) <= thresh) {
	    e[ll] = 0.f;
	    goto L60;
	}

	if (tol >= 0.f) {

/*           If relative accuracy desired,   
             apply convergence criterion backward */

	    mu = (r__1 = d__[m], dabs(r__1));
	    sminl = mu;
	    i__1 = ll;
	    for (lll = m - 1; lll >= i__1; --lll) {
		if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) {
		    e[lll] = 0.f;
		    goto L60;
		}
		sminlo = sminl;
		mu = (r__2 = d__[lll], dabs(r__2)) * (mu / (mu + (r__1 = e[
			lll], dabs(r__1))));
		sminl = dmin(sminl,mu);
/* L110: */
	    }
	}
    }
    oldll = ll;
    oldm = m;

/*     Compute shift.  First, test if shifting would ruin relative   
       accuracy, and if so set the shift to zero.   

   Computing MAX */
    r__1 = eps, r__2 = tol * .01f;
    if (tol >= 0.f && *n * tol * (sminl / smax) <= dmax(r__1,r__2)) {

/*        Use a zero shift to avoid loss of relative accuracy */

	shift = 0.f;
    } else {

/*        Compute the shift from 2-by-2 block at end of matrix */

	if (idir == 1) {
	    sll = (r__1 = d__[ll], dabs(r__1));
	    slas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__);
	} else {
	    sll = (r__1 = d__[m], dabs(r__1));
	    slas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__);
	}

/*        Test if shift negligible, and if so set to zero */

	if (sll > 0.f) {
/* Computing 2nd power */
	    r__1 = shift / sll;
	    if (r__1 * r__1 < eps) {
		shift = 0.f;
	    }
	}
    }

/*     Increment iteration count */

    iter = iter + m - ll;

/*     If SHIFT = 0, do simplified QR iteration */

    if (shift == 0.f) {
	if (idir == 1) {

/*           Chase bulge from top to bottom   
             Save cosines and sines for later singular vector updates */

	    cs = 1.f;
	    oldcs = 1.f;
	    i__1 = m - 1;
	    for (i__ = ll; i__ <= i__1; ++i__) {
		r__1 = d__[i__] * cs;
		slartg_(&r__1, &e[i__], &cs, &sn, &r__);
		if (i__ > ll) {
		    e[i__ - 1] = oldsn * r__;
		}
		r__1 = oldcs * r__;
		r__2 = d__[i__ + 1] * sn;
		slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]);
		work[i__ - ll + 1] = cs;
		work[i__ - ll + 1 + nm1] = sn;
		work[i__ - ll + 1 + nm12] = oldcs;
		work[i__ - ll + 1 + nm13] = oldsn;
/* L120: */
	    }
	    h__ = d__[m] * cs;
	    d__[m] = h__ * oldcs;
	    e[m - 1] = h__ * oldsn;

/*           Update singular vectors */

	    if (*ncvt > 0) {
		i__1 = m - ll + 1;
		slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &
			vt_ref(ll, 1), ldvt);
	    }
	    if (*nru > 0) {
		i__1 = m - ll + 1;
		slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 
			+ 1], &u_ref(1, ll), ldu);
	    }
	    if (*ncc > 0) {
		i__1 = m - ll + 1;
		slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 
			+ 1], &c___ref(ll, 1), ldc);
	    }

/*           Test convergence */

	    if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) {
		e[m - 1] = 0.f;
	    }

	} else {

/*           Chase bulge from bottom to top   
             Save cosines and sines for later singular vector updates */

	    cs = 1.f;
	    oldcs = 1.f;
	    i__1 = ll + 1;
	    for (i__ = m; i__ >= i__1; --i__) {
		r__1 = d__[i__] * cs;
		slartg_(&r__1, &e[i__ - 1], &cs, &sn, &r__);
		if (i__ < m) {
		    e[i__] = oldsn * r__;
		}
		r__1 = oldcs * r__;
		r__2 = d__[i__ - 1] * sn;
		slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]);
		work[i__ - ll] = cs;
		work[i__ - ll + nm1] = -sn;
		work[i__ - ll + nm12] = oldcs;
		work[i__ - ll + nm13] = -oldsn;
/* L130: */
	    }
	    h__ = d__[ll] * cs;
	    d__[ll] = h__ * oldcs;
	    e[ll] = h__ * oldsn;

/*           Update singular vectors */

	    if (*ncvt > 0) {
		i__1 = m - ll + 1;
		slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
			nm13 + 1], &vt_ref(ll, 1), ldvt);
	    }
	    if (*nru > 0) {
		i__1 = m - ll + 1;
		slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u_ref(
			1, ll), ldu);
	    }
	    if (*ncc > 0) {
		i__1 = m - ll + 1;
		slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &
			c___ref(ll, 1), ldc);
	    }

/*           Test convergence */

	    if ((r__1 = e[ll], dabs(r__1)) <= thresh) {
		e[ll] = 0.f;
	    }
	}
    } else {

/*        Use nonzero shift */

	if (idir == 1) {

/*           Chase bulge from top to bottom   
             Save cosines and sines for later singular vector updates */

	    f = ((r__1 = d__[ll], dabs(r__1)) - shift) * (r_sign(&c_b49, &d__[
		    ll]) + shift / d__[ll]);
	    g = e[ll];
	    i__1 = m - 1;
	    for (i__ = ll; i__ <= i__1; ++i__) {
		slartg_(&f, &g, &cosr, &sinr, &r__);
		if (i__ > ll) {
		    e[i__ - 1] = r__;
		}
		f = cosr * d__[i__] + sinr * e[i__];
		e[i__] = cosr * e[i__] - sinr * d__[i__];
		g = sinr * d__[i__ + 1];
		d__[i__ + 1] = cosr * d__[i__ + 1];
		slartg_(&f, &g, &cosl, &sinl, &r__);
		d__[i__] = r__;
		f = cosl * e[i__] + sinl * d__[i__ + 1];
		d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__];
		if (i__ < m - 1) {
		    g = sinl * e[i__ + 1];
		    e[i__ + 1] = cosl * e[i__ + 1];
		}
		work[i__ - ll + 1] = cosr;
		work[i__ - ll + 1 + nm1] = sinr;
		work[i__ - ll + 1 + nm12] = cosl;
		work[i__ - ll + 1 + nm13] = sinl;
/* L140: */
	    }
	    e[m - 1] = f;

/*           Update singular vectors */

	    if (*ncvt > 0) {
		i__1 = m - ll + 1;
		slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &
			vt_ref(ll, 1), ldvt);
	    }
	    if (*nru > 0) {
		i__1 = m - ll + 1;
		slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 
			+ 1], &u_ref(1, ll), ldu);
	    }
	    if (*ncc > 0) {
		i__1 = m - ll + 1;
		slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 
			+ 1], &c___ref(ll, 1), ldc);
	    }

/*           Test convergence */

	    if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) {
		e[m - 1] = 0.f;
	    }

	} else {

/*           Chase bulge from bottom to top   
             Save cosines and sines for later singular vector updates */

	    f = ((r__1 = d__[m], dabs(r__1)) - shift) * (r_sign(&c_b49, &d__[
		    m]) + shift / d__[m]);
	    g = e[m - 1];
	    i__1 = ll + 1;
	    for (i__ = m; i__ >= i__1; --i__) {
		slartg_(&f, &g, &cosr, &sinr, &r__);
		if (i__ < m) {
		    e[i__] = r__;
		}
		f = cosr * d__[i__] + sinr * e[i__ - 1];
		e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__];
		g = sinr * d__[i__ - 1];
		d__[i__ - 1] = cosr * d__[i__ - 1];
		slartg_(&f, &g, &cosl, &sinl, &r__);
		d__[i__] = r__;
		f = cosl * e[i__ - 1] + sinl * d__[i__ - 1];
		d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1];
		if (i__ > ll + 1) {
		    g = sinl * e[i__ - 2];
		    e[i__ - 2] = cosl * e[i__ - 2];
		}
		work[i__ - ll] = cosr;
		work[i__ - ll + nm1] = -sinr;
		work[i__ - ll + nm12] = cosl;
		work[i__ - ll + nm13] = -sinl;
/* L150: */
	    }
	    e[ll] = f;

/*           Test convergence */

	    if ((r__1 = e[ll], dabs(r__1)) <= thresh) {
		e[ll] = 0.f;
	    }

/*           Update singular vectors if desired */

	    if (*ncvt > 0) {
		i__1 = m - ll + 1;
		slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
			nm13 + 1], &vt_ref(ll, 1), ldvt);
	    }
	    if (*nru > 0) {
		i__1 = m - ll + 1;
		slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u_ref(
			1, ll), ldu);
	    }
	    if (*ncc > 0) {
		i__1 = m - ll + 1;
		slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &
			c___ref(ll, 1), ldc);
	    }
	}
    }

/*     QR iteration finished, go back and check convergence */

    goto L60;

/*     All singular values converged, so make them positive */

L160:
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	if (d__[i__] < 0.f) {
	    d__[i__] = -d__[i__];

/*           Change sign of singular vectors, if desired */

	    if (*ncvt > 0) {
		sscal_(ncvt, &c_b72, &vt_ref(i__, 1), ldvt);
	    }
	}
/* L170: */
    }

/*     Sort the singular values into decreasing order (insertion sort on   
       singular values, but only one transposition per singular vector) */

    i__1 = *n - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {

/*        Scan for smallest D(I) */

	isub = 1;
	smin = d__[1];
	i__2 = *n + 1 - i__;
	for (j = 2; j <= i__2; ++j) {
	    if (d__[j] <= smin) {
		isub = j;
		smin = d__[j];
	    }
/* L180: */
	}
	if (isub != *n + 1 - i__) {

/*           Swap singular values and vectors */

	    d__[isub] = d__[*n + 1 - i__];
	    d__[*n + 1 - i__] = smin;
	    if (*ncvt > 0) {
		sswap_(ncvt, &vt_ref(isub, 1), ldvt, &vt_ref(*n + 1 - i__, 1),
			 ldvt);
	    }
	    if (*nru > 0) {
		sswap_(nru, &u_ref(1, isub), &c__1, &u_ref(1, *n + 1 - i__), &
			c__1);
	    }
	    if (*ncc > 0) {
		sswap_(ncc, &c___ref(isub, 1), ldc, &c___ref(*n + 1 - i__, 1),
			 ldc);
	    }
	}
/* L190: */
    }
    goto L220;

/*     Maximum number of iterations exceeded, failure to converge */

L200:
    *info = 0;
    i__1 = *n - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
	if (e[i__] != 0.f) {
	    ++(*info);
	}
/* L210: */
    }
L220:
    return 0;

/*     End of SBDSQR */

} /* sbdsqr_ */
Пример #15
0
/* Subroutine */ int slaed2_(integer *k, integer *n, real *d, real *q, 
	integer *ldq, integer *indxq, real *rho, integer *cutpnt, real *z, 
	real *dlamda, real *q2, integer *ldq2, integer *indxc, real *w, 
	integer *indxp, integer *indx, integer *coltyp, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, 
  
       Courant Institute, NAG Ltd., and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    SLAED2 merges the two sets of eigenvalues together into a single   
    sorted set.  Then it tries to deflate the size of the problem.   
    There are two ways in which deflation can occur:  when two or more   
    eigenvalues are close together or if there is a tiny entry in the   
    Z vector.  For each such occurrence the order of the related secular 
  
    equation problem is reduced by one.   

    Arguments   
    =========   

    K      (output) INTEGER   
           The number of non-deflated eigenvalues, and the order of the   
           related secular equation. 0 <= K <=N.   

    N      (input) INTEGER   
           The dimension of the symmetric tridiagonal matrix.  N >= 0.   

    D      (input/output) REAL array, dimension (N)   
           On entry, D contains the eigenvalues of the two submatrices to 
  
           be combined.   
           On exit, D contains the trailing (N-K) updated eigenvalues   
           (those which were deflated) sorted into increasing order.   

    Q      (input/output) REAL array, dimension (LDQ, N)   
           On entry, Q contains the eigenvectors of two submatrices in   
           the two square blocks with corners at (1,1), (CUTPNT,CUTPNT)   
           and (CUTPNT+1, CUTPNT+1), (N,N).   
           On exit, Q contains the trailing (N-K) updated eigenvectors   
           (those which were deflated) in its last N-K columns.   

    LDQ    (input) INTEGER   
           The leading dimension of the array Q.  LDQ >= max(1,N).   

    INDXQ  (input/output) INTEGER array, dimension (N)   
           The permutation which separately sorts the two sub-problems   
           in D into ascending order.  Note that elements in the second   
           half of this permutation must first have CUTPNT added to their 
  
           values. Destroyed on exit.   

    RHO    (input/output) REAL   
           On entry, the off-diagonal element associated with the rank-1 
  
           cut which originally split the two submatrices which are now   
           being recombined.   
           On exit, RHO has been modified to the value required by   
           SLAED3.   

    CUTPNT (input) INTEGER   
           The location of the last eigenvalue in the leading sub-matrix. 
  
           min(1,N) <= CUTPNT <= N.   

    Z      (input) REAL array, dimension (N)   
           On entry, Z contains the updating vector (the last   
           row of the first sub-eigenvector matrix and the first row of   
           the second sub-eigenvector matrix).   
           On exit, the contents of Z have been destroyed by the updating 
  
           process.   

    DLAMDA (output) REAL array, dimension (N)   
           A copy of the first K eigenvalues which will be used by   
           SLAED3 to form the secular equation.   

    Q2     (output) REAL array, dimension (LDQ2, N)   
           A copy of the first K eigenvectors which will be used by   
           SLAED3 in a matrix multiply (SGEMM) to solve for the new   
           eigenvectors.   Q2 is arranged into three blocks.  The   
           first block contains non-zero elements only at and above   
           CUTPNT, the second contains non-zero elements only below   
           CUTPNT, and the third is dense.   

    LDQ2   (input) INTEGER   
           The leading dimension of the array Q2.  LDQ2 >= max(1,N).   

    INDXC  (output) INTEGER array, dimension (N)   
           The permutation used to arrange the columns of the deflated   
           Q matrix into three groups:  the first group contains non-zero 
  
           elements only at and above CUTPNT, the second contains   
           non-zero elements only below CUTPNT, and the third is dense.   

    W      (output) REAL array, dimension (N)   
           The first k values of the final deflation-altered z-vector   
           which will be passed to SLAED3.   

    INDXP  (workspace) INTEGER array, dimension (N)   
           The permutation used to place deflated values of D at the end 
  
           of the array.  INDXP(1:K) points to the nondeflated D-values   
           and INDXP(K+1:N) points to the deflated eigenvalues.   

    INDX   (workspace) INTEGER array, dimension (N)   
           The permutation used to sort the contents of D into ascending 
  
           order.   

    COLTYP (workspace/output) INTEGER array, dimension (N)   
           During execution, a label which will indicate which of the   
           following types a column in the Q2 matrix is:   
           1 : non-zero in the upper half only;   
           2 : non-zero in the lower half only;   
           3 : dense;   
           4 : deflated.   
           On exit, COLTYP(i) is the number of columns of type i,   
           for i=1 to 4 only.   

    INFO   (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   

    ===================================================================== 
  


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static real c_b3 = -1.f;
    static integer c__1 = 1;
    
    /* System generated locals */
    integer q_dim1, q_offset, q2_dim1, q2_offset, i__1;
    real r__1, r__2, r__3, r__4;
    /* Builtin functions */
    double sqrt(doublereal);
    /* Local variables */
    static integer jlam, imax, jmax, ctot[4];
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *);
    static real c;
    static integer i, j;
    static real s, t;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static integer k2;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *);
    static integer n1, n2;
    extern doublereal slapy2_(real *, real *);
    static integer ct, jp;
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer isamax_(integer *, real *, integer *);
    extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer 
	    *, integer *, integer *), slacpy_(char *, integer *, integer *, 
	    real *, integer *, real *, integer *);
    static integer n1p1;
    static real eps, tau, tol;
    static integer psm[4];


    --d;
    q_dim1 = *ldq;
    q_offset = q_dim1 + 1;
    q -= q_offset;
    --indxq;
    --z;
    --dlamda;
    q2_dim1 = *ldq2;
    q2_offset = q2_dim1 + 1;
    q2 -= q2_offset;
    --indxc;
    --w;
    --indxp;
    --indx;
    --coltyp;

    /* Function Body */
    *info = 0;

    if (*n < 0) {
	*info = -2;
    } else if (*ldq < max(1,*n)) {
	*info = -5;
    } else if (min(1,*n) > *cutpnt || *n < *cutpnt) {
	*info = -8;
    } else if (*ldq2 < max(1,*n)) {
	*info = -12;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLAED2", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

    n1 = *cutpnt;
    n2 = *n - n1;
    n1p1 = n1 + 1;

    if (*rho < 0.f) {
	sscal_(&n2, &c_b3, &z[n1p1], &c__1);
    }

/*     Normalize z so that norm(z) = 1.  Since z is the concatenation of 
  
       two normalized vectors, norm2(z) = sqrt(2). */

    t = 1.f / sqrt(2.f);
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	indx[j] = j;
/* L10: */
    }
    sscal_(n, &t, &z[1], &c__1);

/*     RHO = ABS( norm(z)**2 * RHO ) */

    *rho = (r__1 = *rho * 2.f, dabs(r__1));

    i__1 = *cutpnt;
    for (i = 1; i <= i__1; ++i) {
	coltyp[i] = 1;
/* L20: */
    }
    i__1 = *n;
    for (i = *cutpnt + 1; i <= i__1; ++i) {
	coltyp[i] = 2;
/* L30: */
    }

/*     Sort the eigenvalues into increasing order */

    i__1 = *n;
    for (i = *cutpnt + 1; i <= i__1; ++i) {
	indxq[i] += *cutpnt;
/* L40: */
    }

/*     re-integrate the deflated parts from the last pass */

    i__1 = *n;
    for (i = 1; i <= i__1; ++i) {
	dlamda[i] = d[indxq[i]];
	w[i] = z[indxq[i]];
	indxc[i] = coltyp[indxq[i]];
/* L50: */
    }
    slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]);
    i__1 = *n;
    for (i = 1; i <= i__1; ++i) {
	d[i] = dlamda[indx[i]];
	z[i] = w[indx[i]];
	coltyp[i] = indxc[indx[i]];
/* L60: */
    }

/*     Calculate the allowable deflation tolerance */

    imax = isamax_(n, &z[1], &c__1);
    jmax = isamax_(n, &d[1], &c__1);
    eps = slamch_("Epsilon");
/* Computing MAX */
    r__3 = (r__1 = d[jmax], dabs(r__1)), r__4 = (r__2 = z[imax], dabs(r__2));
    tol = eps * 8.f * dmax(r__3,r__4);

/*     If the rank-1 modifier is small enough, no more needs to be done   
       except to reorganize Q so that its columns correspond with the   
       elements in D. */

    if (*rho * (r__1 = z[imax], dabs(r__1)) <= tol) {
	*k = 0;
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    scopy_(n, &q[indxq[indx[j]] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 
		    + 1], &c__1);
/* L70: */
	}
	slacpy_("A", n, n, &q2[q2_offset], ldq2, &q[q_offset], ldq);
	goto L180;
    }

/*     If there are multiple eigenvalues then the problem deflates.  Here 
  
       the number of equal eigenvalues are found.  As each equal   
       eigenvalue is found, an elementary reflector is computed to rotate 
  
       the corresponding eigensubspace so that the corresponding   
       components of Z are zero in this new basis. */

    *k = 0;
    k2 = *n + 1;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	if (*rho * (r__1 = z[j], dabs(r__1)) <= tol) {

/*           Deflate due to small z component. */

	    --k2;
	    indxp[k2] = j;
	    coltyp[j] = 4;
	    if (j == *n) {
		goto L120;
	    }
	} else {
	    jlam = j;
	    goto L90;
	}
/* L80: */
    }
L90:
    ++j;
    if (j > *n) {
	goto L110;
    }
    if (*rho * (r__1 = z[j], dabs(r__1)) <= tol) {

/*        Deflate due to small z component. */

	--k2;
	indxp[k2] = j;
	coltyp[j] = 4;
    } else {

/*        Check if eigenvalues are close enough to allow deflation. */

	s = z[jlam];
	c = z[j];

/*        Find sqrt(a**2+b**2) without overflow or   
          destructive underflow. */

	tau = slapy2_(&c, &s);
	t = d[j] - d[jlam];
	c /= tau;
	s = -(doublereal)s / tau;
	if ((r__1 = t * c * s, dabs(r__1)) <= tol) {

/*           Deflation is possible. */

	    z[j] = tau;
	    z[jlam] = 0.f;
	    if (coltyp[j] != coltyp[jlam]) {
		coltyp[j] = 3;
	    }
	    coltyp[jlam] = 4;
	    srot_(n, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[indxq[indx[
		    j]] * q_dim1 + 1], &c__1, &c, &s);
/* Computing 2nd power */
	    r__1 = c;
/* Computing 2nd power */
	    r__2 = s;
	    t = d[jlam] * (r__1 * r__1) + d[j] * (r__2 * r__2);
/* Computing 2nd power */
	    r__1 = s;
/* Computing 2nd power */
	    r__2 = c;
	    d[j] = d[jlam] * (r__1 * r__1) + d[j] * (r__2 * r__2);
	    d[jlam] = t;
	    --k2;
	    i = 1;
L100:
	    if (k2 + i <= *n) {
		if (d[jlam] < d[indxp[k2 + i]]) {
		    indxp[k2 + i - 1] = indxp[k2 + i];
		    indxp[k2 + i] = jlam;
		    ++i;
		    goto L100;
		} else {
		    indxp[k2 + i - 1] = jlam;
		}
	    } else {
		indxp[k2 + i - 1] = jlam;
	    }
	    jlam = j;
	} else {
	    ++(*k);
	    w[*k] = z[jlam];
	    dlamda[*k] = d[jlam];
	    indxp[*k] = jlam;
	    jlam = j;
	}
    }
    goto L90;
L110:

/*     Record the last eigenvalue. */

    ++(*k);
    w[*k] = z[jlam];
    dlamda[*k] = d[jlam];
    indxp[*k] = jlam;

L120:

/*     Count up the total number of the various types of columns, then   
       form a permutation which positions the four column types into   
       four uniform groups (although one or more of these groups may be   
       empty). */

    for (j = 1; j <= 4; ++j) {
	ctot[j - 1] = 0;
/* L130: */
    }
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	ct = coltyp[j];
	++ctot[ct - 1];
/* L140: */
    }

/*     PSM(*) = Position in SubVISMatrix (of types 1 through 4) */

    psm[0] = 1;
    psm[1] = ctot[0] + 1;
    psm[2] = psm[1] + ctot[1];
    psm[3] = psm[2] + ctot[2];

/*     Fill out the INDXC array so that the permutation which it induces 
  
       will place all type-1 columns first, all type-2 columns next,   
       then all type-3's, and finally all type-4's. */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	jp = indxp[j];
	ct = coltyp[jp];
	indxc[psm[ct - 1]] = j;
	++psm[ct - 1];
/* L150: */
    }

/*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA   
       and Q2 respectively.  The eigenvalues/vectors which were not   
       deflated go into the first K slots of DLAMDA and Q2 respectively, 
  
       while those which were deflated go into the last N - K slots. */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	jp = indxp[j];
	dlamda[j] = d[jp];
	scopy_(n, &q[indxq[indx[indxp[indxc[j]]]] * q_dim1 + 1], &c__1, &q2[j 
		* q2_dim1 + 1], &c__1);
/* L160: */
    }

/*     The deflated eigenvalues and their corresponding vectors go back   
       into the last N - K slots of D and Q respectively. */

    i__1 = *n - *k;
    scopy_(&i__1, &dlamda[*k + 1], &c__1, &d[*k + 1], &c__1);
    i__1 = *n - *k;
    slacpy_("A", n, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*k + 1) * 
	    q_dim1 + 1], ldq);

/*     Copy CTOT into COLTYP for referencing in SLAED3. */

    for (j = 1; j <= 4; ++j) {
	coltyp[j] = ctot[j - 1];
/* L170: */
    }

L180:
    return 0;

/*     End of SLAED2 */

} /* slaed2_ */
Пример #16
0
/* Subroutine */ int stgex2_(logical *wantq, logical *wantz, integer *n, real 
	*a, integer *lda, real *b, integer *ldb, real *q, integer *ldq, real *
	z__, integer *ldz, integer *j1, integer *n1, integer *n2, real *work, 
	integer *lwork, integer *info)
{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    STGEX2 swaps adjacent diagonal blocks (A11, B11) and (A22, B22)   
    of size 1-by-1 or 2-by-2 in an upper (quasi) triangular matrix pair   
    (A, B) by an orthogonal equivalence transformation.   

    (A, B) must be in generalized real Schur canonical form (as returned   
    by SGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2   
    diagonal blocks. B is upper triangular.   

    Optionally, the matrices Q and Z of generalized Schur vectors are   
    updated.   

           Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)'   
           Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)'   


    Arguments   
    =========   

    WANTQ   (input) LOGICAL   
            .TRUE. : update the left transformation matrix Q;   
            .FALSE.: do not update Q.   

    WANTZ   (input) LOGICAL   
            .TRUE. : update the right transformation matrix Z;   
            .FALSE.: do not update Z.   

    N       (input) INTEGER   
            The order of the matrices A and B. N >= 0.   

    A      (input/output) REAL arrays, dimensions (LDA,N)   
            On entry, the matrix A in the pair (A, B).   
            On exit, the updated matrix A.   

    LDA     (input)  INTEGER   
            The leading dimension of the array A. LDA >= max(1,N).   

    B      (input/output) REAL arrays, dimensions (LDB,N)   
            On entry, the matrix B in the pair (A, B).   
            On exit, the updated matrix B.   

    LDB     (input)  INTEGER   
            The leading dimension of the array B. LDB >= max(1,N).   

    Q       (input/output) REAL array, dimension (LDZ,N)   
            On entry, if WANTQ = .TRUE., the orthogonal matrix Q.   
            On exit, the updated matrix Q.   
            Not referenced if WANTQ = .FALSE..   

    LDQ     (input) INTEGER   
            The leading dimension of the array Q. LDQ >= 1.   
            If WANTQ = .TRUE., LDQ >= N.   

    Z       (input/output) REAL array, dimension (LDZ,N)   
            On entry, if WANTZ =.TRUE., the orthogonal matrix Z.   
            On exit, the updated matrix Z.   
            Not referenced if WANTZ = .FALSE..   

    LDZ     (input) INTEGER   
            The leading dimension of the array Z. LDZ >= 1.   
            If WANTZ = .TRUE., LDZ >= N.   

    J1      (input) INTEGER   
            The index to the first block (A11, B11). 1 <= J1 <= N.   

    N1      (input) INTEGER   
            The order of the first block (A11, B11). N1 = 0, 1 or 2.   

    N2      (input) INTEGER   
            The order of the second block (A22, B22). N2 = 0, 1 or 2.   

    WORK    (workspace) REAL array, dimension (LWORK).   

    LWORK   (input) INTEGER   
            The dimension of the array WORK.   
            LWORK >=  MAX( N*(N2+N1), (N2+N1)*(N2+N1)*2 )   

    INFO    (output) INTEGER   
              =0: Successful exit   
              >0: If INFO = 1, the transformed matrix (A, B) would be   
                  too far from generalized Schur form; the blocks are   
                  not swapped and (A, B) and (Q, Z) are unchanged.   
                  The problem of swapping is too ill-conditioned.   
              <0: If INFO = -16: LWORK is too small. Appropriate value   
                  for LWORK is returned in WORK(1).   

    Further Details   
    ===============   

    Based on contributions by   
       Bo Kagstrom and Peter Poromaa, Department of Computing Science,   
       Umea University, S-901 87 Umea, Sweden.   

    In the current code both weak and strong stability tests are   
    performed. The user can omit the strong stability test by changing   
    the internal logical parameter WANDS to .FALSE.. See ref. [2] for   
    details.   

    [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the   
        Generalized Real Schur Form of a Regular Matrix Pair (A, B), in   
        M.S. Moonen et al (eds), Linear Algebra for Large Scale and   
        Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218.   

    [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified   
        Eigenvalues of a Regular Matrix Pair (A, B) and Condition   
        Estimation: Theory, Algorithms and Software,   
        Report UMINF - 94.04, Department of Computing Science, Umea   
        University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working   
        Note 87. To appear in Numerical Algorithms, 1996.   

    =====================================================================   


       Parameter adjustments */
    /* Table of constant values */
    static integer c__16 = 16;
    static real c_b3 = 0.f;
    static integer c__0 = 0;
    static integer c__1 = 1;
    static integer c__4 = 4;
    static integer c__2 = 2;
    static real c_b38 = 1.f;
    static real c_b44 = -1.f;
    
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, 
	    z_offset, i__1, i__2;
    real r__1, r__2;
    /* Builtin functions */
    double sqrt(doublereal);
    /* Local variables */
    static logical weak;
    static real ddum;
    static integer idum;
    static real taul[4], dsum, taur[4], scpy[16]	/* was [4][4] */, 
	    tcpy[16]	/* was [4][4] */;
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *);
    static real f, g;
    static integer i__, m;
    static real s[16]	/* was [4][4] */, t[16]	/* was [4][4] */, scale, 
	    bqra21, brqa21;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static real licop[16]	/* was [4][4] */;
    static integer linfo;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    static real ircop[16]	/* was [4][4] */, dnorm;
    static integer iwork[4];
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *), slagv2_(real *, integer *, real *, integer *, real *, 
	    real *, real *, real *, real *, real *, real *), sgeqr2_(integer *
	    , integer *, real *, integer *, real *, real *, integer *), 
	    sgerq2_(integer *, integer *, real *, integer *, real *, real *, 
	    integer *);
    static real be[2], ai[2];
    extern /* Subroutine */ int sorg2r_(integer *, integer *, integer *, real 
	    *, integer *, real *, real *, integer *), sorgr2_(integer *, 
	    integer *, integer *, real *, integer *, real *, real *, integer *
	    );
    static real ar[2], sa, sb, li[16]	/* was [4][4] */;
    extern /* Subroutine */ int sorm2r_(char *, char *, integer *, integer *, 
	    integer *, real *, integer *, real *, real *, integer *, real *, 
	    integer *), sormr2_(char *, char *, integer *, 
	    integer *, integer *, real *, integer *, real *, real *, integer *
	    , real *, integer *);
    static real dscale, ir[16]	/* was [4][4] */;
    extern /* Subroutine */ int stgsy2_(char *, integer *, integer *, integer 
	    *, real *, integer *, real *, integer *, real *, integer *, real *
	    , integer *, real *, integer *, real *, integer *, real *, real *,
	     real *, integer *, integer *, integer *);
    static real ss;
    extern doublereal slamch_(char *);
    static real ws;
    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
	    integer *, real *, integer *), slartg_(real *, real *, 
	    real *, real *, real *);
    static real thresh;
    extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, 
	    real *);
    static real smlnum;
    static logical strong;
    static real eps;
#define scpy_ref(a_1,a_2) scpy[(a_2)*4 + a_1 - 5]
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
#define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]
#define s_ref(a_1,a_2) s[(a_2)*4 + a_1 - 5]
#define t_ref(a_1,a_2) t[(a_2)*4 + a_1 - 5]
#define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]
#define li_ref(a_1,a_2) li[(a_2)*4 + a_1 - 5]
#define ir_ref(a_1,a_2) ir[(a_2)*4 + a_1 - 5]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;

    /* Function Body */
    *info = 0;

/*     Quick return if possible */

    if (*n <= 1 || *n1 <= 0 || *n2 <= 0) {
	return 0;
    }
    if (*n1 > *n || *j1 + *n1 > *n) {
	return 0;
    }
    m = *n1 + *n2;
/* Computing MAX */
    i__1 = *n * m, i__2 = m * m << 1;
    if (*lwork < max(i__1,i__2)) {
	*info = -16;
/* Computing MAX */
	i__1 = *n * m, i__2 = m * m << 1;
	work[1] = (real) max(i__1,i__2);
	return 0;
    }

    weak = FALSE_;
    strong = FALSE_;

/*     Make a local copy of selected block */

    scopy_(&c__16, &c_b3, &c__0, li, &c__1);
    scopy_(&c__16, &c_b3, &c__0, ir, &c__1);
    slacpy_("Full", &m, &m, &a_ref(*j1, *j1), lda, s, &c__4);
    slacpy_("Full", &m, &m, &b_ref(*j1, *j1), ldb, t, &c__4);

/*     Compute threshold for testing acceptance of swapping. */

    eps = slamch_("P");
    smlnum = slamch_("S") / eps;
    dscale = 0.f;
    dsum = 1.f;
    slacpy_("Full", &m, &m, s, &c__4, &work[1], &m);
    i__1 = m * m;
    slassq_(&i__1, &work[1], &c__1, &dscale, &dsum);
    slacpy_("Full", &m, &m, t, &c__4, &work[1], &m);
    i__1 = m * m;
    slassq_(&i__1, &work[1], &c__1, &dscale, &dsum);
    dnorm = dscale * sqrt(dsum);
/* Computing MAX */
    r__1 = eps * 10.f * dnorm;
    thresh = dmax(r__1,smlnum);

    if (m == 2) {

/*        CASE 1: Swap 1-by-1 and 1-by-1 blocks.   

          Compute orthogonal QL and RQ that swap 1-by-1 and 1-by-1 blocks   
          using Givens rotations and perform the swap tentatively. */

	f = s_ref(2, 2) * t_ref(1, 1) - t_ref(2, 2) * s_ref(1, 1);
	g = s_ref(2, 2) * t_ref(1, 2) - t_ref(2, 2) * s_ref(1, 2);
	sb = (r__1 = t_ref(2, 2), dabs(r__1));
	sa = (r__1 = s_ref(2, 2), dabs(r__1));
	slartg_(&f, &g, &ir_ref(1, 2), &ir_ref(1, 1), &ddum);
	ir_ref(2, 1) = -ir_ref(1, 2);
	ir_ref(2, 2) = ir_ref(1, 1);
	srot_(&c__2, &s_ref(1, 1), &c__1, &s_ref(1, 2), &c__1, &ir_ref(1, 1), 
		&ir_ref(2, 1));
	srot_(&c__2, &t_ref(1, 1), &c__1, &t_ref(1, 2), &c__1, &ir_ref(1, 1), 
		&ir_ref(2, 1));
	if (sa >= sb) {
	    slartg_(&s_ref(1, 1), &s_ref(2, 1), &li_ref(1, 1), &li_ref(2, 1), 
		    &ddum);
	} else {
	    slartg_(&t_ref(1, 1), &t_ref(2, 1), &li_ref(1, 1), &li_ref(2, 1), 
		    &ddum);
	}
	srot_(&c__2, &s_ref(1, 1), &c__4, &s_ref(2, 1), &c__4, &li_ref(1, 1), 
		&li_ref(2, 1));
	srot_(&c__2, &t_ref(1, 1), &c__4, &t_ref(2, 1), &c__4, &li_ref(1, 1), 
		&li_ref(2, 1));
	li_ref(2, 2) = li_ref(1, 1);
	li_ref(1, 2) = -li_ref(2, 1);

/*        Weak stability test:   
             |S21| + |T21| <= O(EPS * F-norm((S, T))) */

	ws = (r__1 = s_ref(2, 1), dabs(r__1)) + (r__2 = t_ref(2, 1), dabs(
		r__2));
	weak = ws <= thresh;
	if (! weak) {
	    goto L70;
	}

	if (TRUE_) {

/*           Strong stability test:   
               F-norm((A-QL'*S*QR, B-QL'*T*QR)) <= O(EPS*F-norm((A,B))) */

	    slacpy_("Full", &m, &m, &a_ref(*j1, *j1), lda, &work[m * m + 1], &
		    m);
	    sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, s, &c__4, &c_b3, &
		    work[1], &m);
	    sgemm_("N", "T", &m, &m, &m, &c_b44, &work[1], &m, ir, &c__4, &
		    c_b38, &work[m * m + 1], &m);
	    dscale = 0.f;
	    dsum = 1.f;
	    i__1 = m * m;
	    slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum);

	    slacpy_("Full", &m, &m, &b_ref(*j1, *j1), ldb, &work[m * m + 1], &
		    m);
	    sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, t, &c__4, &c_b3, &
		    work[1], &m);
	    sgemm_("N", "T", &m, &m, &m, &c_b44, &work[1], &m, ir, &c__4, &
		    c_b38, &work[m * m + 1], &m);
	    i__1 = m * m;
	    slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum);
	    ss = dscale * sqrt(dsum);
	    strong = ss <= thresh;
	    if (! strong) {
		goto L70;
	    }
	}

/*        Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and   
                 (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). */

	i__1 = *j1 + 1;
	srot_(&i__1, &a_ref(1, *j1), &c__1, &a_ref(1, *j1 + 1), &c__1, &
		ir_ref(1, 1), &ir_ref(2, 1));
	i__1 = *j1 + 1;
	srot_(&i__1, &b_ref(1, *j1), &c__1, &b_ref(1, *j1 + 1), &c__1, &
		ir_ref(1, 1), &ir_ref(2, 1));
	i__1 = *n - *j1 + 1;
	srot_(&i__1, &a_ref(*j1, *j1), lda, &a_ref(*j1 + 1, *j1), lda, &
		li_ref(1, 1), &li_ref(2, 1));
	i__1 = *n - *j1 + 1;
	srot_(&i__1, &b_ref(*j1, *j1), ldb, &b_ref(*j1 + 1, *j1), ldb, &
		li_ref(1, 1), &li_ref(2, 1));

/*        Set  N1-by-N2 (2,1) - blocks to ZERO. */

	a_ref(*j1 + 1, *j1) = 0.f;
	b_ref(*j1 + 1, *j1) = 0.f;

/*        Accumulate transformations into Q and Z if requested. */

	if (*wantz) {
	    srot_(n, &z___ref(1, *j1), &c__1, &z___ref(1, *j1 + 1), &c__1, &
		    ir_ref(1, 1), &ir_ref(2, 1));
	}
	if (*wantq) {
	    srot_(n, &q_ref(1, *j1), &c__1, &q_ref(1, *j1 + 1), &c__1, &
		    li_ref(1, 1), &li_ref(2, 1));
	}

/*        Exit with INFO = 0 if swap was successfully performed. */

	return 0;

    } else {

/*        CASE 2: Swap 1-by-1 and 2-by-2 blocks, or 2-by-2   
                  and 2-by-2 blocks.   

          Solve the generalized Sylvester equation   
                   S11 * R - L * S22 = SCALE * S12   
                   T11 * R - L * T22 = SCALE * T12   
          for R and L. Solutions in LI and IR. */

	slacpy_("Full", n1, n2, &t_ref(1, *n1 + 1), &c__4, li, &c__4);
	slacpy_("Full", n1, n2, &s_ref(1, *n1 + 1), &c__4, &ir_ref(*n2 + 1, *
		n1 + 1), &c__4);
	stgsy2_("N", &c__0, n1, n2, s, &c__4, &s_ref(*n1 + 1, *n1 + 1), &c__4,
		 &ir_ref(*n2 + 1, *n1 + 1), &c__4, t, &c__4, &t_ref(*n1 + 1, *
		n1 + 1), &c__4, li, &c__4, &scale, &dsum, &dscale, iwork, &
		idum, &linfo);

/*        Compute orthogonal matrix QL:   

                      QL' * LI = [ TL ]   
                                 [ 0  ]   
          where   
                      LI =  [      -L              ]   
                            [ SCALE * identity(N2) ] */

	i__1 = *n2;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    sscal_(n1, &c_b44, &li_ref(1, i__), &c__1);
	    li_ref(*n1 + i__, i__) = scale;
/* L10: */
	}
	sgeqr2_(&m, n2, li, &c__4, taul, &work[1], &linfo);
	if (linfo != 0) {
	    goto L70;
	}
	sorg2r_(&m, &m, n2, li, &c__4, taul, &work[1], &linfo);
	if (linfo != 0) {
	    goto L70;
	}

/*        Compute orthogonal matrix RQ:   

                      IR * RQ' =   [ 0  TR],   

           where IR = [ SCALE * identity(N1), R ] */

	i__1 = *n1;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    ir_ref(*n2 + i__, i__) = scale;
/* L20: */
	}
	sgerq2_(n1, &m, &ir_ref(*n2 + 1, 1), &c__4, taur, &work[1], &linfo);
	if (linfo != 0) {
	    goto L70;
	}
	sorgr2_(&m, &m, n1, ir, &c__4, taur, &work[1], &linfo);
	if (linfo != 0) {
	    goto L70;
	}

/*        Perform the swapping tentatively: */

	sgemm_("T", "N", &m, &m, &m, &c_b38, li, &c__4, s, &c__4, &c_b3, &
		work[1], &m);
	sgemm_("N", "T", &m, &m, &m, &c_b38, &work[1], &m, ir, &c__4, &c_b3, 
		s, &c__4);
	sgemm_("T", "N", &m, &m, &m, &c_b38, li, &c__4, t, &c__4, &c_b3, &
		work[1], &m);
	sgemm_("N", "T", &m, &m, &m, &c_b38, &work[1], &m, ir, &c__4, &c_b3, 
		t, &c__4);
	slacpy_("F", &m, &m, s, &c__4, scpy, &c__4);
	slacpy_("F", &m, &m, t, &c__4, tcpy, &c__4);
	slacpy_("F", &m, &m, ir, &c__4, ircop, &c__4);
	slacpy_("F", &m, &m, li, &c__4, licop, &c__4);

/*        Triangularize the B-part by an RQ factorization.   
          Apply transformation (from left) to A-part, giving S. */

	sgerq2_(&m, &m, t, &c__4, taur, &work[1], &linfo);
	if (linfo != 0) {
	    goto L70;
	}
	sormr2_("R", "T", &m, &m, &m, t, &c__4, taur, s, &c__4, &work[1], &
		linfo);
	if (linfo != 0) {
	    goto L70;
	}
	sormr2_("L", "N", &m, &m, &m, t, &c__4, taur, ir, &c__4, &work[1], &
		linfo);
	if (linfo != 0) {
	    goto L70;
	}

/*        Compute F-norm(S21) in BRQA21. (T21 is 0.) */

	dscale = 0.f;
	dsum = 1.f;
	i__1 = *n2;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    slassq_(n1, &s_ref(*n2 + 1, i__), &c__1, &dscale, &dsum);
/* L30: */
	}
	brqa21 = dscale * sqrt(dsum);

/*        Triangularize the B-part by a QR factorization.   
          Apply transformation (from right) to A-part, giving S. */

	sgeqr2_(&m, &m, tcpy, &c__4, taul, &work[1], &linfo);
	if (linfo != 0) {
	    goto L70;
	}
	sorm2r_("L", "T", &m, &m, &m, tcpy, &c__4, taul, scpy, &c__4, &work[1]
		, info);
	sorm2r_("R", "N", &m, &m, &m, tcpy, &c__4, taul, licop, &c__4, &work[
		1], info);
	if (linfo != 0) {
	    goto L70;
	}

/*        Compute F-norm(S21) in BQRA21. (T21 is 0.) */

	dscale = 0.f;
	dsum = 1.f;
	i__1 = *n2;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    slassq_(n1, &scpy_ref(*n2 + 1, i__), &c__1, &dscale, &dsum);
/* L40: */
	}
	bqra21 = dscale * sqrt(dsum);

/*        Decide which method to use.   
            Weak stability test:   
               F-norm(S21) <= O(EPS * F-norm((S, T))) */

	if (bqra21 <= brqa21 && bqra21 <= thresh) {
	    slacpy_("F", &m, &m, scpy, &c__4, s, &c__4);
	    slacpy_("F", &m, &m, tcpy, &c__4, t, &c__4);
	    slacpy_("F", &m, &m, ircop, &c__4, ir, &c__4);
	    slacpy_("F", &m, &m, licop, &c__4, li, &c__4);
	} else if (brqa21 >= thresh) {
	    goto L70;
	}

/*        Set lower triangle of B-part to zero */

	i__1 = m;
	for (i__ = 2; i__ <= i__1; ++i__) {
	    i__2 = m - i__ + 1;
	    scopy_(&i__2, &c_b3, &c__0, &t_ref(i__, i__ - 1), &c__1);
/* L50: */
	}

	if (TRUE_) {

/*           Strong stability test:   
                F-norm((A-QL*S*QR', B-QL*T*QR')) <= O(EPS*F-norm((A,B))) */

	    slacpy_("Full", &m, &m, &a_ref(*j1, *j1), lda, &work[m * m + 1], &
		    m);
	    sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, s, &c__4, &c_b3, &
		    work[1], &m);
	    sgemm_("N", "N", &m, &m, &m, &c_b44, &work[1], &m, ir, &c__4, &
		    c_b38, &work[m * m + 1], &m);
	    dscale = 0.f;
	    dsum = 1.f;
	    i__1 = m * m;
	    slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum);

	    slacpy_("Full", &m, &m, &b_ref(*j1, *j1), ldb, &work[m * m + 1], &
		    m);
	    sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, t, &c__4, &c_b3, &
		    work[1], &m);
	    sgemm_("N", "N", &m, &m, &m, &c_b44, &work[1], &m, ir, &c__4, &
		    c_b38, &work[m * m + 1], &m);
	    i__1 = m * m;
	    slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum);
	    ss = dscale * sqrt(dsum);
	    strong = ss <= thresh;
	    if (! strong) {
		goto L70;
	    }

	}

/*        If the swap is accepted ("weakly" and "strongly"), apply the   
          transformations and set N1-by-N2 (2,1)-block to zero. */

	i__1 = *n2;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    scopy_(n1, &c_b3, &c__0, &s_ref(*n2 + 1, i__), &c__1);
/* L60: */
	}

/*        copy back M-by-M diagonal block starting at index J1 of (A, B) */

	slacpy_("F", &m, &m, s, &c__4, &a_ref(*j1, *j1), lda);
	slacpy_("F", &m, &m, t, &c__4, &b_ref(*j1, *j1), ldb);
	scopy_(&c__16, &c_b3, &c__0, t, &c__1);

/*        Standardize existing 2-by-2 blocks. */

	i__1 = m * m;
	scopy_(&i__1, &c_b3, &c__0, &work[1], &c__1);
	work[1] = 1.f;
	t_ref(1, 1) = 1.f;
	idum = *lwork - m * m - 2;
	if (*n2 > 1) {
	    slagv2_(&a_ref(*j1, *j1), lda, &b_ref(*j1, *j1), ldb, ar, ai, be, 
		    &work[1], &work[2], &t_ref(1, 1), &t_ref(2, 1));
	    work[m + 1] = -work[2];
	    work[m + 2] = work[1];
	    t_ref(*n2, *n2) = t_ref(1, 1);
	    t_ref(1, 2) = -t_ref(2, 1);
	}
	work[m * m] = 1.f;
	t_ref(m, m) = 1.f;

	if (*n1 > 1) {
	    slagv2_(&a_ref(*j1 + *n2, *j1 + *n2), lda, &b_ref(*j1 + *n2, *j1 
		    + *n2), ldb, taur, taul, &work[m * m + 1], &work[*n2 * m 
		    + *n2 + 1], &work[*n2 * m + *n2 + 2], &t_ref(*n2 + 1, *n2 
		    + 1), &t_ref(m, m - 1));
	    work[m * m] = work[*n2 * m + *n2 + 1];
	    work[m * m - 1] = -work[*n2 * m + *n2 + 2];
	    t_ref(m, m) = t_ref(*n2 + 1, *n2 + 1);
	    t_ref(m - 1, m) = -t_ref(m, m - 1);
	}
	sgemm_("T", "N", n2, n1, n2, &c_b38, &work[1], &m, &a_ref(*j1, *j1 + *
		n2), lda, &c_b3, &work[m * m + 1], n2);
	slacpy_("Full", n2, n1, &work[m * m + 1], n2, &a_ref(*j1, *j1 + *n2), 
		lda);
	sgemm_("T", "N", n2, n1, n2, &c_b38, &work[1], &m, &b_ref(*j1, *j1 + *
		n2), ldb, &c_b3, &work[m * m + 1], n2);
	slacpy_("Full", n2, n1, &work[m * m + 1], n2, &b_ref(*j1, *j1 + *n2), 
		ldb);
	sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, &work[1], &m, &c_b3, &
		work[m * m + 1], &m);
	slacpy_("Full", &m, &m, &work[m * m + 1], &m, li, &c__4);
	sgemm_("N", "N", n2, n1, n1, &c_b38, &a_ref(*j1, *j1 + *n2), lda, &
		t_ref(*n2 + 1, *n2 + 1), &c__4, &c_b3, &work[1], n2);
	slacpy_("Full", n2, n1, &work[1], n2, &a_ref(*j1, *j1 + *n2), lda);
	sgemm_("N", "N", n2, n1, n1, &c_b38, &b_ref(*j1, *j1 + *n2), lda, &
		t_ref(*n2 + 1, *n2 + 1), &c__4, &c_b3, &work[1], n2);
	slacpy_("Full", n2, n1, &work[1], n2, &b_ref(*j1, *j1 + *n2), ldb);
	sgemm_("T", "N", &m, &m, &m, &c_b38, ir, &c__4, t, &c__4, &c_b3, &
		work[1], &m);
	slacpy_("Full", &m, &m, &work[1], &m, ir, &c__4);

/*        Accumulate transformations into Q and Z if requested. */

	if (*wantq) {
	    sgemm_("N", "N", n, &m, &m, &c_b38, &q_ref(1, *j1), ldq, li, &
		    c__4, &c_b3, &work[1], n);
	    slacpy_("Full", n, &m, &work[1], n, &q_ref(1, *j1), ldq);

	}

	if (*wantz) {
	    sgemm_("N", "N", n, &m, &m, &c_b38, &z___ref(1, *j1), ldz, ir, &
		    c__4, &c_b3, &work[1], n);
	    slacpy_("Full", n, &m, &work[1], n, &z___ref(1, *j1), ldz);

	}

/*        Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and   
                  (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). */

	i__ = *j1 + m;
	if (i__ <= *n) {
	    i__1 = *n - i__ + 1;
	    sgemm_("T", "N", &m, &i__1, &m, &c_b38, li, &c__4, &a_ref(*j1, 
		    i__), lda, &c_b3, &work[1], &m);
	    i__1 = *n - i__ + 1;
	    slacpy_("Full", &m, &i__1, &work[1], &m, &a_ref(*j1, i__), lda);
	    i__1 = *n - i__ + 1;
	    sgemm_("T", "N", &m, &i__1, &m, &c_b38, li, &c__4, &b_ref(*j1, 
		    i__), lda, &c_b3, &work[1], &m);
	    i__1 = *n - i__ + 1;
	    slacpy_("Full", &m, &i__1, &work[1], &m, &b_ref(*j1, i__), lda);
	}
	i__ = *j1 - 1;
	if (i__ > 0) {
	    sgemm_("N", "N", &i__, &m, &m, &c_b38, &a_ref(1, *j1), lda, ir, &
		    c__4, &c_b3, &work[1], &i__);
	    slacpy_("Full", &i__, &m, &work[1], &i__, &a_ref(1, *j1), lda);
	    sgemm_("N", "N", &i__, &m, &m, &c_b38, &b_ref(1, *j1), ldb, ir, &
		    c__4, &c_b3, &work[1], &i__);
	    slacpy_("Full", &i__, &m, &work[1], &i__, &b_ref(1, *j1), ldb);
	}

/*        Exit with INFO = 0 if swap was successfully performed. */

	return 0;

    }

/*     Exit with INFO = 1 if swap was rejected. */

L70:

    *info = 1;
    return 0;

/*     End of STGEX2 */

} /* stgex2_ */
Пример #17
0
void test09 ( void )

/******************************************************************************/
/*
  Purpose:

    TEST09 tests SROT.

  Modified:

    29 March 2007

  Author:

    John Burkardt
*/
{
  float c;
  int i;
  int inc1;
  int inc2;
  int n = 6;
  int ncopy;
  float s;
  float *x;
  float *y;

  x = malloc ( n * sizeof ( float ) );
  y = malloc ( n * sizeof ( float ) );

  for ( i = 0; i < n; i++ )
  {
    x[i] = ( float ) ( i + 1 );
  }

  for ( i = 0; i < n; i++ )
  {
    y[i] = ( float ) ( ( i + 1 ) * ( i + 1 ) - 12 );
  }

  printf ( "\n" );
  printf ( "TEST09\n" );
  printf ( "  SROT carries out a Givens rotation.\n" );
  printf ( "\n" );
  printf ( "  X and Y\n" );
  printf ( "\n" );
  for ( i = 0; i < n; i++ )
  {
    printf ( "  %6d  %14f  %14f\n", i + 1, x[i], y[i] );
  }

  ncopy = n;
  inc1 = 1;
  inc2 = 1;
  c = 0.5;
  s = sqrt ( 1.0 - c * c );

  srot_ ( &ncopy, x, &inc1, y, &inc2, &c, &s );

  printf ( "\n" );
  printf ( "  SROT ( N, X, 1, Y, 1, %f, %f )\n", c, s );
  printf ( "\n" );
  for ( i = 0; i < n; i++ )
  {
    printf ( "  %6d  %14f  %14f\n", i + 1, x[i], y[i] );
  }

  for ( i = 0; i < n; i++ )
  {
    x[i] = ( float ) ( i + 1 );
  }

  for ( i = 0; i < n; i++ )
  {
    y[i] = ( float ) ( ( i + 1 ) * ( i + 1 ) - 12 );
  }

  c = x[0] / sqrt ( x[0] * x[0] + y[0] * y[0] );
  s = y[0] / sqrt ( x[0] * x[0] + y[0] * y[0] );

  ncopy = n;
  inc1 = 1;
  inc2 = 1;

  srot_ ( &ncopy, x, &inc1, y, &inc2, &c, &s );

  printf ( "\n" );
  printf ( "  SROT ( N, X, 1, Y, 1, %f, %f )\n", c, s );
  printf ( "\n" );
  for ( i = 0; i < n; i++ )
  {
    printf ( "  %6d  %14f  %14f\n", i + 1, x[i], y[i] );
  }

  free ( x );
  free ( y );

  return;
}
Пример #18
0
/* Subroutine */ int stgsja_(char *jobu, char *jobv, char *jobq, integer *m, 
	integer *p, integer *n, integer *k, integer *l, real *a, integer *lda, 
	 real *b, integer *ldb, real *tola, real *tolb, real *alpha, real *
	beta, real *u, integer *ldu, real *v, integer *ldv, real *q, integer *
	ldq, real *work, integer *ncycle, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, 
	    u_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4;
    real r__1;

    /* Local variables */
    integer i__, j;
    real a1, a2, a3, b1, b2, b3, csq, csu, csv, snq, rwk, snu, snv;
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *);
    real gamma;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    logical initq, initu, initv, wantq, upper;
    real error, ssmin;
    logical wantu, wantv;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *), slags2_(logical *, real *, real *, real *, real *, 
	    real *, real *, real *, real *, real *, real *, real *, real *);
    integer kcycle;
    extern /* Subroutine */ int xerbla_(char *, integer *), slapll_(
	    integer *, real *, integer *, real *, integer *, real *), slartg_(
	    real *, real *, real *, real *, real *), slaset_(char *, integer *
, integer *, real *, real *, real *, integer *);


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  STGSJA computes the generalized singular value decomposition (GSVD) */
/*  of two real upper triangular (or trapezoidal) matrices A and B. */

/*  On entry, it is assumed that matrices A and B have the following */
/*  forms, which may be obtained by the preprocessing subroutine SGGSVP */
/*  from a general M-by-N matrix A and P-by-N matrix B: */

/*               N-K-L  K    L */
/*     A =    K ( 0    A12  A13 ) if M-K-L >= 0; */
/*            L ( 0     0   A23 ) */
/*        M-K-L ( 0     0    0  ) */

/*             N-K-L  K    L */
/*     A =  K ( 0    A12  A13 ) if M-K-L < 0; */
/*        M-K ( 0     0   A23 ) */

/*             N-K-L  K    L */
/*     B =  L ( 0     0   B13 ) */
/*        P-L ( 0     0    0  ) */

/*  where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular */
/*  upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, */
/*  otherwise A23 is (M-K)-by-L upper trapezoidal. */

/*  On exit, */

/*              U'*A*Q = D1*( 0 R ),    V'*B*Q = D2*( 0 R ), */

/*  where U, V and Q are orthogonal matrices, Z' denotes the transpose */
/*  of Z, R is a nonsingular upper triangular matrix, and D1 and D2 are */
/*  ``diagonal'' matrices, which are of the following structures: */

/*  If M-K-L >= 0, */

/*                      K  L */
/*         D1 =     K ( I  0 ) */
/*                  L ( 0  C ) */
/*              M-K-L ( 0  0 ) */

/*                    K  L */
/*         D2 = L   ( 0  S ) */
/*              P-L ( 0  0 ) */

/*                 N-K-L  K    L */
/*    ( 0 R ) = K (  0   R11  R12 ) K */
/*              L (  0    0   R22 ) L */

/*  where */

/*    C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), */
/*    S = diag( BETA(K+1),  ... , BETA(K+L) ), */
/*    C**2 + S**2 = I. */

/*    R is stored in A(1:K+L,N-K-L+1:N) on exit. */

/*  If M-K-L < 0, */

/*                 K M-K K+L-M */
/*      D1 =   K ( I  0    0   ) */
/*           M-K ( 0  C    0   ) */

/*                   K M-K K+L-M */
/*      D2 =   M-K ( 0  S    0   ) */
/*           K+L-M ( 0  0    I   ) */
/*             P-L ( 0  0    0   ) */

/*                 N-K-L  K   M-K  K+L-M */
/* ( 0 R ) =    K ( 0    R11  R12  R13  ) */
/*            M-K ( 0     0   R22  R23  ) */
/*          K+L-M ( 0     0    0   R33  ) */

/*  where */
/*  C = diag( ALPHA(K+1), ... , ALPHA(M) ), */
/*  S = diag( BETA(K+1),  ... , BETA(M) ), */
/*  C**2 + S**2 = I. */

/*  R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored */
/*      (  0  R22 R23 ) */
/*  in B(M-K+1:L,N+M-K-L+1:N) on exit. */

/*  The computation of the orthogonal transformation matrices U, V or Q */
/*  is optional.  These matrices may either be formed explicitly, or they */
/*  may be postmultiplied into input matrices U1, V1, or Q1. */

/*  Arguments */
/*  ========= */

/*  JOBU    (input) CHARACTER*1 */
/*          = 'U':  U must contain an orthogonal matrix U1 on entry, and */
/*                  the product U1*U is returned; */
/*          = 'I':  U is initialized to the unit matrix, and the */
/*                  orthogonal matrix U is returned; */
/*          = 'N':  U is not computed. */

/*  JOBV    (input) CHARACTER*1 */
/*          = 'V':  V must contain an orthogonal matrix V1 on entry, and */
/*                  the product V1*V is returned; */
/*          = 'I':  V is initialized to the unit matrix, and the */
/*                  orthogonal matrix V is returned; */
/*          = 'N':  V is not computed. */

/*  JOBQ    (input) CHARACTER*1 */
/*          = 'Q':  Q must contain an orthogonal matrix Q1 on entry, and */
/*                  the product Q1*Q is returned; */
/*          = 'I':  Q is initialized to the unit matrix, and the */
/*                  orthogonal matrix Q is returned; */
/*          = 'N':  Q is not computed. */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix A.  M >= 0. */

/*  P       (input) INTEGER */
/*          The number of rows of the matrix B.  P >= 0. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrices A and B.  N >= 0. */

/*  K       (input) INTEGER */
/*  L       (input) INTEGER */
/*          K and L specify the subblocks in the input matrices A and B: */
/*          A23 = A(K+1:MIN(K+L,M),N-L+1:N) and B13 = B(1:L,N-L+1:N) */
/*          of A and B, whose GSVD is going to be computed by STGSJA. */
/*          See Further details. */

/*  A       (input/output) REAL array, dimension (LDA,N) */
/*          On entry, the M-by-N matrix A. */
/*          On exit, A(N-K+1:N,1:MIN(K+L,M) ) contains the triangular */
/*          matrix R or part of R.  See Purpose for details. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A. LDA >= max(1,M). */

/*  B       (input/output) REAL array, dimension (LDB,N) */
/*          On entry, the P-by-N matrix B. */
/*          On exit, if necessary, B(M-K+1:L,N+M-K-L+1:N) contains */
/*          a part of R.  See Purpose for details. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B. LDB >= max(1,P). */

/*  TOLA    (input) REAL */
/*  TOLB    (input) REAL */
/*          TOLA and TOLB are the convergence criteria for the Jacobi- */
/*          Kogbetliantz iteration procedure. Generally, they are the */
/*          same as used in the preprocessing step, say */
/*              TOLA = max(M,N)*norm(A)*MACHEPS, */
/*              TOLB = max(P,N)*norm(B)*MACHEPS. */

/*  ALPHA   (output) REAL array, dimension (N) */
/*  BETA    (output) REAL array, dimension (N) */
/*          On exit, ALPHA and BETA contain the generalized singular */
/*          value pairs of A and B; */
/*            ALPHA(1:K) = 1, */
/*            BETA(1:K)  = 0, */
/*          and if M-K-L >= 0, */
/*            ALPHA(K+1:K+L) = diag(C), */
/*            BETA(K+1:K+L)  = diag(S), */
/*          or if M-K-L < 0, */
/*            ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0 */
/*            BETA(K+1:M) = S, BETA(M+1:K+L) = 1. */
/*          Furthermore, if K+L < N, */
/*            ALPHA(K+L+1:N) = 0 and */
/*            BETA(K+L+1:N)  = 0. */

/*  U       (input/output) REAL array, dimension (LDU,M) */
/*          On entry, if JOBU = 'U', U must contain a matrix U1 (usually */
/*          the orthogonal matrix returned by SGGSVP). */
/*          On exit, */
/*          if JOBU = 'I', U contains the orthogonal matrix U; */
/*          if JOBU = 'U', U contains the product U1*U. */
/*          If JOBU = 'N', U is not referenced. */

/*  LDU     (input) INTEGER */
/*          The leading dimension of the array U. LDU >= max(1,M) if */
/*          JOBU = 'U'; LDU >= 1 otherwise. */

/*  V       (input/output) REAL array, dimension (LDV,P) */
/*          On entry, if JOBV = 'V', V must contain a matrix V1 (usually */
/*          the orthogonal matrix returned by SGGSVP). */
/*          On exit, */
/*          if JOBV = 'I', V contains the orthogonal matrix V; */
/*          if JOBV = 'V', V contains the product V1*V. */
/*          If JOBV = 'N', V is not referenced. */

/*  LDV     (input) INTEGER */
/*          The leading dimension of the array V. LDV >= max(1,P) if */
/*          JOBV = 'V'; LDV >= 1 otherwise. */

/*  Q       (input/output) REAL array, dimension (LDQ,N) */
/*          On entry, if JOBQ = 'Q', Q must contain a matrix Q1 (usually */
/*          the orthogonal matrix returned by SGGSVP). */
/*          On exit, */
/*          if JOBQ = 'I', Q contains the orthogonal matrix Q; */
/*          if JOBQ = 'Q', Q contains the product Q1*Q. */
/*          If JOBQ = 'N', Q is not referenced. */

/*  LDQ     (input) INTEGER */
/*          The leading dimension of the array Q. LDQ >= max(1,N) if */
/*          JOBQ = 'Q'; LDQ >= 1 otherwise. */

/*  WORK    (workspace) REAL array, dimension (2*N) */

/*  NCYCLE  (output) INTEGER */
/*          The number of cycles required for convergence. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
/*          = 1:  the procedure does not converge after MAXIT cycles. */

/*  Internal Parameters */
/*  =================== */

/*  MAXIT   INTEGER */
/*          MAXIT specifies the total loops that the iterative procedure */
/*          may take. If after MAXIT cycles, the routine fails to */
/*          converge, we return INFO = 1. */

/*  Further Details */
/*  =============== */

/*  STGSJA essentially uses a variant of Kogbetliantz algorithm to reduce */
/*  min(L,M-K)-by-L triangular (or trapezoidal) matrix A23 and L-by-L */
/*  matrix B13 to the form: */

/*           U1'*A13*Q1 = C1*R1; V1'*B13*Q1 = S1*R1, */

/*  where U1, V1 and Q1 are orthogonal matrix, and Z' is the transpose */
/*  of Z.  C1 and S1 are diagonal matrices satisfying */

/*                C1**2 + S1**2 = I, */

/*  and R1 is an L-by-L nonsingular upper triangular matrix. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */

/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Decode and test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    --alpha;
    --beta;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1;
    u -= u_offset;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1;
    v -= v_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    --work;

    /* Function Body */
    initu = lsame_(jobu, "I");
    wantu = initu || lsame_(jobu, "U");

    initv = lsame_(jobv, "I");
    wantv = initv || lsame_(jobv, "V");

    initq = lsame_(jobq, "I");
    wantq = initq || lsame_(jobq, "Q");

    *info = 0;
    if (! (initu || wantu || lsame_(jobu, "N"))) {
	*info = -1;
    } else if (! (initv || wantv || lsame_(jobv, "N"))) 
	    {
	*info = -2;
    } else if (! (initq || wantq || lsame_(jobq, "N"))) 
	    {
	*info = -3;
    } else if (*m < 0) {
	*info = -4;
    } else if (*p < 0) {
	*info = -5;
    } else if (*n < 0) {
	*info = -6;
    } else if (*lda < max(1,*m)) {
	*info = -10;
    } else if (*ldb < max(1,*p)) {
	*info = -12;
    } else if (*ldu < 1 || wantu && *ldu < *m) {
	*info = -18;
    } else if (*ldv < 1 || wantv && *ldv < *p) {
	*info = -20;
    } else if (*ldq < 1 || wantq && *ldq < *n) {
	*info = -22;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("STGSJA", &i__1);
	return 0;
    }

/*     Initialize U, V and Q, if necessary */

    if (initu) {
	slaset_("Full", m, m, &c_b13, &c_b14, &u[u_offset], ldu);
    }
    if (initv) {
	slaset_("Full", p, p, &c_b13, &c_b14, &v[v_offset], ldv);
    }
    if (initq) {
	slaset_("Full", n, n, &c_b13, &c_b14, &q[q_offset], ldq);
    }

/*     Loop until convergence */

    upper = FALSE_;
    for (kcycle = 1; kcycle <= 40; ++kcycle) {

	upper = ! upper;

	i__1 = *l - 1;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    i__2 = *l;
	    for (j = i__ + 1; j <= i__2; ++j) {

		a1 = 0.f;
		a2 = 0.f;
		a3 = 0.f;
		if (*k + i__ <= *m) {
		    a1 = a[*k + i__ + (*n - *l + i__) * a_dim1];
		}
		if (*k + j <= *m) {
		    a3 = a[*k + j + (*n - *l + j) * a_dim1];
		}

		b1 = b[i__ + (*n - *l + i__) * b_dim1];
		b3 = b[j + (*n - *l + j) * b_dim1];

		if (upper) {
		    if (*k + i__ <= *m) {
			a2 = a[*k + i__ + (*n - *l + j) * a_dim1];
		    }
		    b2 = b[i__ + (*n - *l + j) * b_dim1];
		} else {
		    if (*k + j <= *m) {
			a2 = a[*k + j + (*n - *l + i__) * a_dim1];
		    }
		    b2 = b[j + (*n - *l + i__) * b_dim1];
		}

		slags2_(&upper, &a1, &a2, &a3, &b1, &b2, &b3, &csu, &snu, &
			csv, &snv, &csq, &snq);

/*              Update (K+I)-th and (K+J)-th rows of matrix A: U'*A */

		if (*k + j <= *m) {
		    srot_(l, &a[*k + j + (*n - *l + 1) * a_dim1], lda, &a[*k 
			    + i__ + (*n - *l + 1) * a_dim1], lda, &csu, &snu);
		}

/*              Update I-th and J-th rows of matrix B: V'*B */

		srot_(l, &b[j + (*n - *l + 1) * b_dim1], ldb, &b[i__ + (*n - *
			l + 1) * b_dim1], ldb, &csv, &snv);

/*              Update (N-L+I)-th and (N-L+J)-th columns of matrices */
/*              A and B: A*Q and B*Q */

/* Computing MIN */
		i__4 = *k + *l;
		i__3 = min(i__4,*m);
		srot_(&i__3, &a[(*n - *l + j) * a_dim1 + 1], &c__1, &a[(*n - *
			l + i__) * a_dim1 + 1], &c__1, &csq, &snq);

		srot_(l, &b[(*n - *l + j) * b_dim1 + 1], &c__1, &b[(*n - *l + 
			i__) * b_dim1 + 1], &c__1, &csq, &snq);

		if (upper) {
		    if (*k + i__ <= *m) {
			a[*k + i__ + (*n - *l + j) * a_dim1] = 0.f;
		    }
		    b[i__ + (*n - *l + j) * b_dim1] = 0.f;
		} else {
		    if (*k + j <= *m) {
			a[*k + j + (*n - *l + i__) * a_dim1] = 0.f;
		    }
		    b[j + (*n - *l + i__) * b_dim1] = 0.f;
		}

/*              Update orthogonal matrices U, V, Q, if desired. */

		if (wantu && *k + j <= *m) {
		    srot_(m, &u[(*k + j) * u_dim1 + 1], &c__1, &u[(*k + i__) *
			     u_dim1 + 1], &c__1, &csu, &snu);
		}

		if (wantv) {
		    srot_(p, &v[j * v_dim1 + 1], &c__1, &v[i__ * v_dim1 + 1], 
			    &c__1, &csv, &snv);
		}

		if (wantq) {
		    srot_(n, &q[(*n - *l + j) * q_dim1 + 1], &c__1, &q[(*n - *
			    l + i__) * q_dim1 + 1], &c__1, &csq, &snq);
		}

/* L10: */
	    }
/* L20: */
	}

	if (! upper) {

/*           The matrices A13 and B13 were lower triangular at the start */
/*           of the cycle, and are now upper triangular. */

/*           Convergence test: test the parallelism of the corresponding */
/*           rows of A and B. */

	    error = 0.f;
/* Computing MIN */
	    i__2 = *l, i__3 = *m - *k;
	    i__1 = min(i__2,i__3);
	    for (i__ = 1; i__ <= i__1; ++i__) {
		i__2 = *l - i__ + 1;
		scopy_(&i__2, &a[*k + i__ + (*n - *l + i__) * a_dim1], lda, &
			work[1], &c__1);
		i__2 = *l - i__ + 1;
		scopy_(&i__2, &b[i__ + (*n - *l + i__) * b_dim1], ldb, &work[*
			l + 1], &c__1);
		i__2 = *l - i__ + 1;
		slapll_(&i__2, &work[1], &c__1, &work[*l + 1], &c__1, &ssmin);
		error = dmax(error,ssmin);
/* L30: */
	    }

	    if (dabs(error) <= dmin(*tola,*tolb)) {
		goto L50;
	    }
	}

/*        End of cycle loop */

/* L40: */
    }

/*     The algorithm has not converged after MAXIT cycles. */

    *info = 1;
    goto L100;

L50:

/*     If ERROR <= MIN(TOLA,TOLB), then the algorithm has converged. */
/*     Compute the generalized singular value pairs (ALPHA, BETA), and */
/*     set the triangular matrix R to array A. */

    i__1 = *k;
    for (i__ = 1; i__ <= i__1; ++i__) {
	alpha[i__] = 1.f;
	beta[i__] = 0.f;
/* L60: */
    }

/* Computing MIN */
    i__2 = *l, i__3 = *m - *k;
    i__1 = min(i__2,i__3);
    for (i__ = 1; i__ <= i__1; ++i__) {

	a1 = a[*k + i__ + (*n - *l + i__) * a_dim1];
	b1 = b[i__ + (*n - *l + i__) * b_dim1];

	if (a1 != 0.f) {
	    gamma = b1 / a1;

/*           change sign if necessary */

	    if (gamma < 0.f) {
		i__2 = *l - i__ + 1;
		sscal_(&i__2, &c_b43, &b[i__ + (*n - *l + i__) * b_dim1], ldb)
			;
		if (wantv) {
		    sscal_(p, &c_b43, &v[i__ * v_dim1 + 1], &c__1);
		}
	    }

	    r__1 = dabs(gamma);
	    slartg_(&r__1, &c_b14, &beta[*k + i__], &alpha[*k + i__], &rwk);

	    if (alpha[*k + i__] >= beta[*k + i__]) {
		i__2 = *l - i__ + 1;
		r__1 = 1.f / alpha[*k + i__];
		sscal_(&i__2, &r__1, &a[*k + i__ + (*n - *l + i__) * a_dim1], 
			lda);
	    } else {
		i__2 = *l - i__ + 1;
		r__1 = 1.f / beta[*k + i__];
		sscal_(&i__2, &r__1, &b[i__ + (*n - *l + i__) * b_dim1], ldb);
		i__2 = *l - i__ + 1;
		scopy_(&i__2, &b[i__ + (*n - *l + i__) * b_dim1], ldb, &a[*k 
			+ i__ + (*n - *l + i__) * a_dim1], lda);
	    }

	} else {

	    alpha[*k + i__] = 0.f;
	    beta[*k + i__] = 1.f;
	    i__2 = *l - i__ + 1;
	    scopy_(&i__2, &b[i__ + (*n - *l + i__) * b_dim1], ldb, &a[*k + 
		    i__ + (*n - *l + i__) * a_dim1], lda);

	}

/* L70: */
    }

/*     Post-assignment */

    i__1 = *k + *l;
    for (i__ = *m + 1; i__ <= i__1; ++i__) {
	alpha[i__] = 0.f;
	beta[i__] = 1.f;
/* L80: */
    }

    if (*k + *l < *n) {
	i__1 = *n;
	for (i__ = *k + *l + 1; i__ <= i__1; ++i__) {
	    alpha[i__] = 0.f;
	    beta[i__] = 0.f;
/* L90: */
	}
    }

L100:
    *ncycle = kcycle;
    return 0;

/*     End of STGSJA */

} /* stgsja_ */
Пример #19
0
/* Subroutine */ int sgbbrd_(char *vect, integer *m, integer *n, integer *ncc,
	 integer *kl, integer *ku, real *ab, integer *ldab, real *d__, real *
	e, real *q, integer *ldq, real *pt, integer *ldpt, real *c__, integer 
	*ldc, real *work, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    SGBBRD reduces a real general m-by-n band matrix A to upper   
    bidiagonal form B by an orthogonal transformation: Q' * A * P = B.   

    The routine computes B, and optionally forms Q or P', or computes   
    Q'*C for a given matrix C.   

    Arguments   
    =========   

    VECT    (input) CHARACTER*1   
            Specifies whether or not the matrices Q and P' are to be   
            formed.   
            = 'N': do not form Q or P';   
            = 'Q': form Q only;   
            = 'P': form P' only;   
            = 'B': form both.   

    M       (input) INTEGER   
            The number of rows of the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix A.  N >= 0.   

    NCC     (input) INTEGER   
            The number of columns of the matrix C.  NCC >= 0.   

    KL      (input) INTEGER   
            The number of subdiagonals of the matrix A. KL >= 0.   

    KU      (input) INTEGER   
            The number of superdiagonals of the matrix A. KU >= 0.   

    AB      (input/output) REAL array, dimension (LDAB,N)   
            On entry, the m-by-n band matrix A, stored in rows 1 to   
            KL+KU+1. The j-th column of A is stored in the j-th column of   
            the array AB as follows:   
            AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl).   
            On exit, A is overwritten by values generated during the   
            reduction.   

    LDAB    (input) INTEGER   
            The leading dimension of the array A. LDAB >= KL+KU+1.   

    D       (output) REAL array, dimension (min(M,N))   
            The diagonal elements of the bidiagonal matrix B.   

    E       (output) REAL array, dimension (min(M,N)-1)   
            The superdiagonal elements of the bidiagonal matrix B.   

    Q       (output) REAL array, dimension (LDQ,M)   
            If VECT = 'Q' or 'B', the m-by-m orthogonal matrix Q.   
            If VECT = 'N' or 'P', the array Q is not referenced.   

    LDQ     (input) INTEGER   
            The leading dimension of the array Q.   
            LDQ >= max(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise.   

    PT      (output) REAL array, dimension (LDPT,N)   
            If VECT = 'P' or 'B', the n-by-n orthogonal matrix P'.   
            If VECT = 'N' or 'Q', the array PT is not referenced.   

    LDPT    (input) INTEGER   
            The leading dimension of the array PT.   
            LDPT >= max(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise.   

    C       (input/output) REAL array, dimension (LDC,NCC)   
            On entry, an m-by-ncc matrix C.   
            On exit, C is overwritten by Q'*C.   
            C is not referenced if NCC = 0.   

    LDC     (input) INTEGER   
            The leading dimension of the array C.   
            LDC >= max(1,M) if NCC > 0; LDC >= 1 if NCC = 0.   

    WORK    (workspace) REAL array, dimension (2*max(M,N))   

    INFO    (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   

    =====================================================================   


       Test the input parameters   

       Parameter adjustments */
    /* Table of constant values */
    static real c_b8 = 0.f;
    static real c_b9 = 1.f;
    static integer c__1 = 1;
    
    /* System generated locals */
    integer ab_dim1, ab_offset, c_dim1, c_offset, pt_dim1, pt_offset, q_dim1, 
	    q_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7;
    /* Local variables */
    static integer inca;
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *);
    static integer i__, j, l;
    extern logical lsame_(char *, char *);
    static logical wantb, wantc;
    static integer minmn;
    static logical wantq;
    static integer j1, j2, kb;
    static real ra, rb, rc;
    static integer kk, ml, mn, nr, mu;
    static real rs;
    extern /* Subroutine */ int xerbla_(char *, integer *), slaset_(
	    char *, integer *, integer *, real *, real *, real *, integer *), slartg_(real *, real *, real *, real *, real *);
    static integer kb1;
    extern /* Subroutine */ int slargv_(integer *, real *, integer *, real *, 
	    integer *, real *, integer *);
    static integer ml0;
    extern /* Subroutine */ int slartv_(integer *, real *, integer *, real *, 
	    integer *, real *, real *, integer *);
    static logical wantpt;
    static integer mu0, klm, kun, nrt, klu1;
#define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
#define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]
#define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1]
#define pt_ref(a_1,a_2) pt[(a_2)*pt_dim1 + a_1]


    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    --d__;
    --e;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    pt_dim1 = *ldpt;
    pt_offset = 1 + pt_dim1 * 1;
    pt -= pt_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1 * 1;
    c__ -= c_offset;
    --work;

    /* Function Body */
    wantb = lsame_(vect, "B");
    wantq = lsame_(vect, "Q") || wantb;
    wantpt = lsame_(vect, "P") || wantb;
    wantc = *ncc > 0;
    klu1 = *kl + *ku + 1;
    *info = 0;
    if (! wantq && ! wantpt && ! lsame_(vect, "N")) {
	*info = -1;
    } else if (*m < 0) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*ncc < 0) {
	*info = -4;
    } else if (*kl < 0) {
	*info = -5;
    } else if (*ku < 0) {
	*info = -6;
    } else if (*ldab < klu1) {
	*info = -8;
    } else if (*ldq < 1 || wantq && *ldq < max(1,*m)) {
	*info = -12;
    } else if (*ldpt < 1 || wantpt && *ldpt < max(1,*n)) {
	*info = -14;
    } else if (*ldc < 1 || wantc && *ldc < max(1,*m)) {
	*info = -16;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SGBBRD", &i__1);
	return 0;
    }

/*     Initialize Q and P' to the unit matrix, if needed */

    if (wantq) {
	slaset_("Full", m, m, &c_b8, &c_b9, &q[q_offset], ldq);
    }
    if (wantpt) {
	slaset_("Full", n, n, &c_b8, &c_b9, &pt[pt_offset], ldpt);
    }

/*     Quick return if possible. */

    if (*m == 0 || *n == 0) {
	return 0;
    }

    minmn = min(*m,*n);

    if (*kl + *ku > 1) {

/*        Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce   
          first to lower bidiagonal form and then transform to upper   
          bidiagonal */

	if (*ku > 0) {
	    ml0 = 1;
	    mu0 = 2;
	} else {
	    ml0 = 2;
	    mu0 = 1;
	}

/*        Wherever possible, plane rotations are generated and applied in   
          vector operations of length NR over the index set J1:J2:KLU1.   

          The sines of the plane rotations are stored in WORK(1:max(m,n))   
          and the cosines in WORK(max(m,n)+1:2*max(m,n)). */

	mn = max(*m,*n);
/* Computing MIN */
	i__1 = *m - 1;
	klm = min(i__1,*kl);
/* Computing MIN */
	i__1 = *n - 1;
	kun = min(i__1,*ku);
	kb = klm + kun;
	kb1 = kb + 1;
	inca = kb1 * *ldab;
	nr = 0;
	j1 = klm + 2;
	j2 = 1 - kun;

	i__1 = minmn;
	for (i__ = 1; i__ <= i__1; ++i__) {

/*           Reduce i-th column and i-th row of matrix to bidiagonal form */

	    ml = klm + 1;
	    mu = kun + 1;
	    i__2 = kb;
	    for (kk = 1; kk <= i__2; ++kk) {
		j1 += kb;
		j2 += kb;

/*              generate plane rotations to annihilate nonzero elements   
                which have been created below the band */

		if (nr > 0) {
		    slargv_(&nr, &ab_ref(klu1, j1 - klm - 1), &inca, &work[j1]
			    , &kb1, &work[mn + j1], &kb1);
		}

/*              apply plane rotations from the left */

		i__3 = kb;
		for (l = 1; l <= i__3; ++l) {
		    if (j2 - klm + l - 1 > *n) {
			nrt = nr - 1;
		    } else {
			nrt = nr;
		    }
		    if (nrt > 0) {
			slartv_(&nrt, &ab_ref(klu1 - l, j1 - klm + l - 1), &
				inca, &ab_ref(klu1 - l + 1, j1 - klm + l - 1),
				 &inca, &work[mn + j1], &work[j1], &kb1);
		    }
/* L10: */
		}

		if (ml > ml0) {
		    if (ml <= *m - i__ + 1) {

/*                    generate plane rotation to annihilate a(i+ml-1,i)   
                      within the band, and apply rotation from the left */

			slartg_(&ab_ref(*ku + ml - 1, i__), &ab_ref(*ku + ml, 
				i__), &work[mn + i__ + ml - 1], &work[i__ + 
				ml - 1], &ra);
			ab_ref(*ku + ml - 1, i__) = ra;
			if (i__ < *n) {
/* Computing MIN */
			    i__4 = *ku + ml - 2, i__5 = *n - i__;
			    i__3 = min(i__4,i__5);
			    i__6 = *ldab - 1;
			    i__7 = *ldab - 1;
			    srot_(&i__3, &ab_ref(*ku + ml - 2, i__ + 1), &
				    i__6, &ab_ref(*ku + ml - 1, i__ + 1), &
				    i__7, &work[mn + i__ + ml - 1], &work[i__ 
				    + ml - 1]);
			}
		    }
		    ++nr;
		    j1 -= kb1;
		}

		if (wantq) {

/*                 accumulate product of plane rotations in Q */

		    i__3 = j2;
		    i__4 = kb1;
		    for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) 
			    {
			srot_(m, &q_ref(1, j - 1), &c__1, &q_ref(1, j), &c__1,
				 &work[mn + j], &work[j]);
/* L20: */
		    }
		}

		if (wantc) {

/*                 apply plane rotations to C */

		    i__4 = j2;
		    i__3 = kb1;
		    for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) 
			    {
			srot_(ncc, &c___ref(j - 1, 1), ldc, &c___ref(j, 1), 
				ldc, &work[mn + j], &work[j]);
/* L30: */
		    }
		}

		if (j2 + kun > *n) {

/*                 adjust J2 to keep within the bounds of the matrix */

		    --nr;
		    j2 -= kb1;
		}

		i__3 = j2;
		i__4 = kb1;
		for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) {

/*                 create nonzero element a(j-1,j+ku) above the band   
                   and store it in WORK(n+1:2*n) */

		    work[j + kun] = work[j] * ab_ref(1, j + kun);
		    ab_ref(1, j + kun) = work[mn + j] * ab_ref(1, j + kun);
/* L40: */
		}

/*              generate plane rotations to annihilate nonzero elements   
                which have been generated above the band */

		if (nr > 0) {
		    slargv_(&nr, &ab_ref(1, j1 + kun - 1), &inca, &work[j1 + 
			    kun], &kb1, &work[mn + j1 + kun], &kb1);
		}

/*              apply plane rotations from the right */

		i__4 = kb;
		for (l = 1; l <= i__4; ++l) {
		    if (j2 + l - 1 > *m) {
			nrt = nr - 1;
		    } else {
			nrt = nr;
		    }
		    if (nrt > 0) {
			slartv_(&nrt, &ab_ref(l + 1, j1 + kun - 1), &inca, &
				ab_ref(l, j1 + kun), &inca, &work[mn + j1 + 
				kun], &work[j1 + kun], &kb1);
		    }
/* L50: */
		}

		if (ml == ml0 && mu > mu0) {
		    if (mu <= *n - i__ + 1) {

/*                    generate plane rotation to annihilate a(i,i+mu-1)   
                      within the band, and apply rotation from the right */

			slartg_(&ab_ref(*ku - mu + 3, i__ + mu - 2), &ab_ref(*
				ku - mu + 2, i__ + mu - 1), &work[mn + i__ + 
				mu - 1], &work[i__ + mu - 1], &ra);
			ab_ref(*ku - mu + 3, i__ + mu - 2) = ra;
/* Computing MIN */
			i__3 = *kl + mu - 2, i__5 = *m - i__;
			i__4 = min(i__3,i__5);
			srot_(&i__4, &ab_ref(*ku - mu + 4, i__ + mu - 2), &
				c__1, &ab_ref(*ku - mu + 3, i__ + mu - 1), &
				c__1, &work[mn + i__ + mu - 1], &work[i__ + 
				mu - 1]);
		    }
		    ++nr;
		    j1 -= kb1;
		}

		if (wantpt) {

/*                 accumulate product of plane rotations in P' */

		    i__4 = j2;
		    i__3 = kb1;
		    for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) 
			    {
			srot_(n, &pt_ref(j + kun - 1, 1), ldpt, &pt_ref(j + 
				kun, 1), ldpt, &work[mn + j + kun], &work[j + 
				kun]);
/* L60: */
		    }
		}

		if (j2 + kb > *m) {

/*                 adjust J2 to keep within the bounds of the matrix */

		    --nr;
		    j2 -= kb1;
		}

		i__3 = j2;
		i__4 = kb1;
		for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) {

/*                 create nonzero element a(j+kl+ku,j+ku-1) below the   
                   band and store it in WORK(1:n) */

		    work[j + kb] = work[j + kun] * ab_ref(klu1, j + kun);
		    ab_ref(klu1, j + kun) = work[mn + j + kun] * ab_ref(klu1, 
			    j + kun);
/* L70: */
		}

		if (ml > ml0) {
		    --ml;
		} else {
		    --mu;
		}
/* L80: */
	    }
/* L90: */
	}
    }

    if (*ku == 0 && *kl > 0) {

/*        A has been reduced to lower bidiagonal form   

          Transform lower bidiagonal form to upper bidiagonal by applying   
          plane rotations from the left, storing diagonal elements in D   
          and off-diagonal elements in E   

   Computing MIN */
	i__2 = *m - 1;
	i__1 = min(i__2,*n);
	for (i__ = 1; i__ <= i__1; ++i__) {
	    slartg_(&ab_ref(1, i__), &ab_ref(2, i__), &rc, &rs, &ra);
	    d__[i__] = ra;
	    if (i__ < *n) {
		e[i__] = rs * ab_ref(1, i__ + 1);
		ab_ref(1, i__ + 1) = rc * ab_ref(1, i__ + 1);
	    }
	    if (wantq) {
		srot_(m, &q_ref(1, i__), &c__1, &q_ref(1, i__ + 1), &c__1, &
			rc, &rs);
	    }
	    if (wantc) {
		srot_(ncc, &c___ref(i__, 1), ldc, &c___ref(i__ + 1, 1), ldc, &
			rc, &rs);
	    }
/* L100: */
	}
	if (*m <= *n) {
	    d__[*m] = ab_ref(1, *m);
	}
    } else if (*ku > 0) {

/*        A has been reduced to upper bidiagonal form */

	if (*m < *n) {

/*           Annihilate a(m,m+1) by applying plane rotations from the   
             right, storing diagonal elements in D and off-diagonal   
             elements in E */

	    rb = ab_ref(*ku, *m + 1);
	    for (i__ = *m; i__ >= 1; --i__) {
		slartg_(&ab_ref(*ku + 1, i__), &rb, &rc, &rs, &ra);
		d__[i__] = ra;
		if (i__ > 1) {
		    rb = -rs * ab_ref(*ku, i__);
		    e[i__ - 1] = rc * ab_ref(*ku, i__);
		}
		if (wantpt) {
		    srot_(n, &pt_ref(i__, 1), ldpt, &pt_ref(*m + 1, 1), ldpt, 
			    &rc, &rs);
		}
/* L110: */
	    }
	} else {

/*           Copy off-diagonal elements to E and diagonal elements to D */

	    i__1 = minmn - 1;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		e[i__] = ab_ref(*ku, i__ + 1);
/* L120: */
	    }
	    i__1 = minmn;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		d__[i__] = ab_ref(*ku + 1, i__);
/* L130: */
	    }
	}
    } else {

/*        A is diagonal. Set elements of E to zero and copy diagonal   
          elements to D. */

	i__1 = minmn - 1;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    e[i__] = 0.f;
/* L140: */
	}
	i__1 = minmn;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    d__[i__] = ab_ref(1, i__);
/* L150: */
	}
    }
    return 0;

/*     End of SGBBRD */

} /* sgbbrd_ */
Пример #20
0
/* Subroutine */ int slals0_(integer *icompq, integer *nl, integer *nr, 
	integer *sqre, integer *nrhs, real *b, integer *ldb, real *bx, 
	integer *ldbx, integer *perm, integer *givptr, integer *givcol, 
	integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real *
	difl, real *difr, real *z__, integer *k, real *c__, real *s, real *
	work, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       December 1, 1999   


    Purpose   
    =======   

    SLALS0 applies back the multiplying factors of either the left or the   
    right singular vector matrix of a diagonal matrix appended by a row   
    to the right hand side matrix B in solving the least squares problem   
    using the divide-and-conquer SVD approach.   

    For the left singular vector matrix, three types of orthogonal   
    matrices are involved:   

    (1L) Givens rotations: the number of such rotations is GIVPTR; the   
         pairs of columns/rows they were applied to are stored in GIVCOL;   
         and the C- and S-values of these rotations are stored in GIVNUM.   

    (2L) Permutation. The (NL+1)-st row of B is to be moved to the first   
         row, and for J=2:N, PERM(J)-th row of B is to be moved to the   
         J-th row.   

    (3L) The left singular vector matrix of the remaining matrix.   

    For the right singular vector matrix, four types of orthogonal   
    matrices are involved:   

    (1R) The right singular vector matrix of the remaining matrix.   

    (2R) If SQRE = 1, one extra Givens rotation to generate the right   
         null space.   

    (3R) The inverse transformation of (2L).   

    (4R) The inverse transformation of (1L).   

    Arguments   
    =========   

    ICOMPQ (input) INTEGER   
           Specifies whether singular vectors are to be computed in   
           factored form:   
           = 0: Left singular vector matrix.   
           = 1: Right singular vector matrix.   

    NL     (input) INTEGER   
           The row dimension of the upper block. NL >= 1.   

    NR     (input) INTEGER   
           The row dimension of the lower block. NR >= 1.   

    SQRE   (input) INTEGER   
           = 0: the lower block is an NR-by-NR square matrix.   
           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.   

           The bidiagonal matrix has row dimension N = NL + NR + 1,   
           and column dimension M = N + SQRE.   

    NRHS   (input) INTEGER   
           The number of columns of B and BX. NRHS must be at least 1.   

    B      (input/output) REAL array, dimension ( LDB, NRHS )   
           On input, B contains the right hand sides of the least   
           squares problem in rows 1 through M. On output, B contains   
           the solution X in rows 1 through N.   

    LDB    (input) INTEGER   
           The leading dimension of B. LDB must be at least   
           max(1,MAX( M, N ) ).   

    BX     (workspace) REAL array, dimension ( LDBX, NRHS )   

    LDBX   (input) INTEGER   
           The leading dimension of BX.   

    PERM   (input) INTEGER array, dimension ( N )   
           The permutations (from deflation and sorting) applied   
           to the two blocks.   

    GIVPTR (input) INTEGER   
           The number of Givens rotations which took place in this   
           subproblem.   

    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 )   
           Each pair of numbers indicates a pair of rows/columns   
           involved in a Givens rotation.   

    LDGCOL (input) INTEGER   
           The leading dimension of GIVCOL, must be at least N.   

    GIVNUM (input) REAL array, dimension ( LDGNUM, 2 )   
           Each number indicates the C or S value used in the   
           corresponding Givens rotation.   

    LDGNUM (input) INTEGER   
           The leading dimension of arrays DIFR, POLES and   
           GIVNUM, must be at least K.   

    POLES  (input) REAL array, dimension ( LDGNUM, 2 )   
           On entry, POLES(1:K, 1) contains the new singular   
           values obtained from solving the secular equation, and   
           POLES(1:K, 2) is an array containing the poles in the secular   
           equation.   

    DIFL   (input) REAL array, dimension ( K ).   
           On entry, DIFL(I) is the distance between I-th updated   
           (undeflated) singular value and the I-th (undeflated) old   
           singular value.   

    DIFR   (input) REAL array, dimension ( LDGNUM, 2 ).   
           On entry, DIFR(I, 1) contains the distances between I-th   
           updated (undeflated) singular value and the I+1-th   
           (undeflated) old singular value. And DIFR(I, 2) is the   
           normalizing factor for the I-th right singular vector.   

    Z      (input) REAL array, dimension ( K )   
           Contain the components of the deflation-adjusted updating row   
           vector.   

    K      (input) INTEGER   
           Contains the dimension of the non-deflated matrix,   
           This is the order of the related secular equation. 1 <= K <=N.   

    C      (input) REAL   
           C contains garbage if SQRE =0 and the C-value of a Givens   
           rotation related to the right null space if SQRE = 1.   

    S      (input) REAL   
           S contains garbage if SQRE =0 and the S-value of a Givens   
           rotation related to the right null space if SQRE = 1.   

    WORK   (workspace) REAL array, dimension ( K )   

    INFO   (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   

    Further Details   
    ===============   

    Based on contributions by   
       Ming Gu and Ren-Cang Li, Computer Science Division, University of   
         California at Berkeley, USA   
       Osni Marques, LBNL/NERSC, USA   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static real c_b5 = -1.f;
    static integer c__1 = 1;
    static real c_b11 = 1.f;
    static real c_b13 = 0.f;
    static integer c__0 = 0;
    
    /* System generated locals */
    integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset, 
	    difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, 
	    poles_offset, i__1, i__2;
    real r__1;
    /* Local variables */
    static real temp;
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *);
    extern doublereal snrm2_(integer *, real *, integer *);
    static integer i__, j, m, n;
    static real diflj, difrj, dsigj;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 
	    sgemv_(char *, integer *, integer *, real *, real *, integer *, 
	    real *, integer *, real *, real *, integer *), scopy_(
	    integer *, real *, integer *, real *, integer *);
    extern doublereal slamc3_(real *, real *);
    static real dj;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static real dsigjp;
    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
	    real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, 
	    real *, integer *);
    static integer nlp1;
#define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1]
#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
#define poles_ref(a_1,a_2) poles[(a_2)*poles_dim1 + a_1]
#define bx_ref(a_1,a_2) bx[(a_2)*bx_dim1 + a_1]
#define givcol_ref(a_1,a_2) givcol[(a_2)*givcol_dim1 + a_1]
#define givnum_ref(a_1,a_2) givnum[(a_2)*givnum_dim1 + a_1]


    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    bx_dim1 = *ldbx;
    bx_offset = 1 + bx_dim1 * 1;
    bx -= bx_offset;
    --perm;
    givcol_dim1 = *ldgcol;
    givcol_offset = 1 + givcol_dim1 * 1;
    givcol -= givcol_offset;
    difr_dim1 = *ldgnum;
    difr_offset = 1 + difr_dim1 * 1;
    difr -= difr_offset;
    poles_dim1 = *ldgnum;
    poles_offset = 1 + poles_dim1 * 1;
    poles -= poles_offset;
    givnum_dim1 = *ldgnum;
    givnum_offset = 1 + givnum_dim1 * 1;
    givnum -= givnum_offset;
    --difl;
    --z__;
    --work;

    /* Function Body */
    *info = 0;

    if (*icompq < 0 || *icompq > 1) {
	*info = -1;
    } else if (*nl < 1) {
	*info = -2;
    } else if (*nr < 1) {
	*info = -3;
    } else if (*sqre < 0 || *sqre > 1) {
	*info = -4;
    }

    n = *nl + *nr + 1;

    if (*nrhs < 1) {
	*info = -5;
    } else if (*ldb < n) {
	*info = -7;
    } else if (*ldbx < n) {
	*info = -9;
    } else if (*givptr < 0) {
	*info = -11;
    } else if (*ldgcol < n) {
	*info = -13;
    } else if (*ldgnum < n) {
	*info = -15;
    } else if (*k < 1) {
	*info = -20;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLALS0", &i__1);
	return 0;
    }

    m = n + *sqre;
    nlp1 = *nl + 1;

    if (*icompq == 0) {

/*        Apply back orthogonal transformations from the left.   

          Step (1L): apply back the Givens rotations performed. */

	i__1 = *givptr;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    srot_(nrhs, &b_ref(givcol_ref(i__, 2), 1), ldb, &b_ref(givcol_ref(
		    i__, 1), 1), ldb, &givnum_ref(i__, 2), &givnum_ref(i__, 1)
		    );
/* L10: */
	}

/*        Step (2L): permute rows of B. */

	scopy_(nrhs, &b_ref(nlp1, 1), ldb, &bx_ref(1, 1), ldbx);
	i__1 = n;
	for (i__ = 2; i__ <= i__1; ++i__) {
	    scopy_(nrhs, &b_ref(perm[i__], 1), ldb, &bx_ref(i__, 1), ldbx);
/* L20: */
	}

/*        Step (3L): apply the inverse of the left singular vector   
          matrix to BX. */

	if (*k == 1) {
	    scopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb);
	    if (z__[1] < 0.f) {
		sscal_(nrhs, &c_b5, &b[b_offset], ldb);
	    }
	} else {
	    i__1 = *k;
	    for (j = 1; j <= i__1; ++j) {
		diflj = difl[j];
		dj = poles_ref(j, 1);
		dsigj = -poles_ref(j, 2);
		if (j < *k) {
		    difrj = -difr_ref(j, 1);
		    dsigjp = -poles_ref(j + 1, 2);
		}
		if (z__[j] == 0.f || poles_ref(j, 2) == 0.f) {
		    work[j] = 0.f;
		} else {
		    work[j] = -poles_ref(j, 2) * z__[j] / diflj / (poles_ref(
			    j, 2) + dj);
		}
		i__2 = j - 1;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    if (z__[i__] == 0.f || poles_ref(i__, 2) == 0.f) {
			work[i__] = 0.f;
		    } else {
			work[i__] = poles_ref(i__, 2) * z__[i__] / (slamc3_(&
				poles_ref(i__, 2), &dsigj) - diflj) / (
				poles_ref(i__, 2) + dj);
		    }
/* L30: */
		}
		i__2 = *k;
		for (i__ = j + 1; i__ <= i__2; ++i__) {
		    if (z__[i__] == 0.f || poles_ref(i__, 2) == 0.f) {
			work[i__] = 0.f;
		    } else {
			work[i__] = poles_ref(i__, 2) * z__[i__] / (slamc3_(&
				poles_ref(i__, 2), &dsigjp) + difrj) / (
				poles_ref(i__, 2) + dj);
		    }
/* L40: */
		}
		work[1] = -1.f;
		temp = snrm2_(k, &work[1], &c__1);
		sgemv_("T", k, nrhs, &c_b11, &bx[bx_offset], ldbx, &work[1], &
			c__1, &c_b13, &b_ref(j, 1), ldb);
		slascl_("G", &c__0, &c__0, &temp, &c_b11, &c__1, nrhs, &b_ref(
			j, 1), ldb, info);
/* L50: */
	    }
	}

/*        Move the deflated rows of BX to B also. */

	if (*k < max(m,n)) {
	    i__1 = n - *k;
	    slacpy_("A", &i__1, nrhs, &bx_ref(*k + 1, 1), ldbx, &b_ref(*k + 1,
		     1), ldb);
	}
    } else {

/*        Apply back the right orthogonal transformations.   

          Step (1R): apply back the new right singular vector matrix   
          to B. */

	if (*k == 1) {
	    scopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx);
	} else {
	    i__1 = *k;
	    for (j = 1; j <= i__1; ++j) {
		dsigj = poles_ref(j, 2);
		if (z__[j] == 0.f) {
		    work[j] = 0.f;
		} else {
		    work[j] = -z__[j] / difl[j] / (dsigj + poles_ref(j, 1)) / 
			    difr_ref(j, 2);
		}
		i__2 = j - 1;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    if (z__[j] == 0.f) {
			work[i__] = 0.f;
		    } else {
			r__1 = -poles_ref(i__ + 1, 2);
			work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - 
				difr_ref(i__, 1)) / (dsigj + poles_ref(i__, 1)
				) / difr_ref(i__, 2);
		    }
/* L60: */
		}
		i__2 = *k;
		for (i__ = j + 1; i__ <= i__2; ++i__) {
		    if (z__[j] == 0.f) {
			work[i__] = 0.f;
		    } else {
			r__1 = -poles_ref(i__, 2);
			work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difl[
				i__]) / (dsigj + poles_ref(i__, 1)) / 
				difr_ref(i__, 2);
		    }
/* L70: */
		}
		sgemv_("T", k, nrhs, &c_b11, &b[b_offset], ldb, &work[1], &
			c__1, &c_b13, &bx_ref(j, 1), ldbx);
/* L80: */
	    }
	}

/*        Step (2R): if SQRE = 1, apply back the rotation that is   
          related to the right null space of the subproblem. */

	if (*sqre == 1) {
	    scopy_(nrhs, &b_ref(m, 1), ldb, &bx_ref(m, 1), ldbx);
	    srot_(nrhs, &bx_ref(1, 1), ldbx, &bx_ref(m, 1), ldbx, c__, s);
	}
	if (*k < max(m,n)) {
	    i__1 = n - *k;
	    slacpy_("A", &i__1, nrhs, &b_ref(*k + 1, 1), ldb, &bx_ref(*k + 1, 
		    1), ldbx);
	}

/*        Step (3R): permute rows of B. */

	scopy_(nrhs, &bx_ref(1, 1), ldbx, &b_ref(nlp1, 1), ldb);
	if (*sqre == 1) {
	    scopy_(nrhs, &bx_ref(m, 1), ldbx, &b_ref(m, 1), ldb);
	}
	i__1 = n;
	for (i__ = 2; i__ <= i__1; ++i__) {
	    scopy_(nrhs, &bx_ref(i__, 1), ldbx, &b_ref(perm[i__], 1), ldb);
/* L90: */
	}

/*        Step (4R): apply back the Givens rotations performed. */

	for (i__ = *givptr; i__ >= 1; --i__) {
	    r__1 = -givnum_ref(i__, 1);
	    srot_(nrhs, &b_ref(givcol_ref(i__, 2), 1), ldb, &b_ref(givcol_ref(
		    i__, 1), 1), ldb, &givnum_ref(i__, 2), &r__1);
/* L100: */
	}
    }

    return 0;

/*     End of SLALS0 */

} /* slals0_ */
Пример #21
0
/* Subroutine */ int slaeda_(integer *n, integer *tlvls, integer *curlvl, 
	integer *curpbm, integer *prmptr, integer *perm, integer *givptr, 
	integer *givcol, real *givnum, real *q, integer *qptr, real *z__, 
	real *ztemp, integer *info)
{
    /* System generated locals */
    integer i__1, i__2, i__3;

    /* Builtin functions */
    integer pow_ii(integer *, integer *);
    double sqrt(doublereal);

    /* Local variables */
    integer i__, k, mid, ptr, curr;
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *);
    integer bsiz1, bsiz2, psiz1, psiz2, zptr1;
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
	    real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *), 
	    xerbla_(char *, integer *);


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLAEDA computes the Z vector corresponding to the merge step in the */
/*  CURLVLth step of the merge process with TLVLS steps for the CURPBMth */
/*  problem. */

/*  Arguments */
/*  ========= */

/*  N      (input) INTEGER */
/*         The dimension of the symmetric tridiagonal matrix.  N >= 0. */

/*  TLVLS  (input) INTEGER */
/*         The total number of merging levels in the overall divide and */
/*         conquer tree. */

/*  CURLVL (input) INTEGER */
/*         The current level in the overall merge routine, */
/*         0 <= curlvl <= tlvls. */

/*  CURPBM (input) INTEGER */
/*         The current problem in the current level in the overall */
/*         merge routine (counting from upper left to lower right). */

/*  PRMPTR (input) INTEGER array, dimension (N lg N) */
/*         Contains a list of pointers which indicate where in PERM a */
/*         level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i) */
/*         indicates the size of the permutation and incidentally the */
/*         size of the full, non-deflated problem. */

/*  PERM   (input) INTEGER array, dimension (N lg N) */
/*         Contains the permutations (from deflation and sorting) to be */
/*         applied to each eigenblock. */

/*  GIVPTR (input) INTEGER array, dimension (N lg N) */
/*         Contains a list of pointers which indicate where in GIVCOL a */
/*         level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i) */
/*         indicates the number of Givens rotations. */

/*  GIVCOL (input) INTEGER array, dimension (2, N lg N) */
/*         Each pair of numbers indicates a pair of columns to take place */
/*         in a Givens rotation. */

/*  GIVNUM (input) REAL array, dimension (2, N lg N) */
/*         Each number indicates the S value to be used in the */
/*         corresponding Givens rotation. */

/*  Q      (input) REAL array, dimension (N**2) */
/*         Contains the square eigenblocks from previous levels, the */
/*         starting positions for blocks are given by QPTR. */

/*  QPTR   (input) INTEGER array, dimension (N+2) */
/*         Contains a list of pointers which indicate where in Q an */
/*         eigenblock is stored.  SQRT( QPTR(i+1) - QPTR(i) ) indicates */
/*         the size of the block. */

/*  Z      (output) REAL array, dimension (N) */
/*         On output this vector contains the updating vector (the last */
/*         row of the first sub-eigenvector matrix and the first row of */
/*         the second sub-eigenvector matrix). */

/*  ZTEMP  (workspace) REAL array, dimension (N) */

/*  INFO   (output) INTEGER */
/*          = 0:  successful exit. */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*     Jeff Rutter, Computer Science Division, University of California */
/*     at Berkeley, USA */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    --ztemp;
    --z__;
    --qptr;
    --q;
    givnum -= 3;
    givcol -= 3;
    --givptr;
    --perm;
    --prmptr;

    /* Function Body */
    *info = 0;

    if (*n < 0) {
	*info = -1;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLAEDA", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine location of first number in second half. */

    mid = *n / 2 + 1;

/*     Gather last/first rows of appropriate eigenblocks into center of Z */

    ptr = 1;

/*     Determine location of lowest level subproblem in the full storage */
/*     scheme */

    i__1 = *curlvl - 1;
    curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;

/*     Determine size of these matrices.  We add HALF to the value of */
/*     the SQRT in case the machine underestimates one of these square */
/*     roots. */

    bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f);
    bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + .5f);
    i__1 = mid - bsiz1 - 1;
    for (k = 1; k <= i__1; ++k) {
	z__[k] = 0.f;
/* L10: */
    }
    scopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], &
	    c__1);
    scopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1);
    i__1 = *n;
    for (k = mid + bsiz2; k <= i__1; ++k) {
	z__[k] = 0.f;
/* L20: */
    }

/*     Loop thru remaining levels 1 -> CURLVL applying the Givens */
/*     rotations and permutation and then multiplying the center matrices */
/*     against the current Z. */

    ptr = pow_ii(&c__2, tlvls) + 1;
    i__1 = *curlvl - 1;
    for (k = 1; k <= i__1; ++k) {
	i__2 = *curlvl - k;
	i__3 = *curlvl - k - 1;
	curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) - 
		1;
	psiz1 = prmptr[curr + 1] - prmptr[curr];
	psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
	zptr1 = mid - psiz1;

/*       Apply Givens at CURR and CURR+1 */

	i__2 = givptr[curr + 1] - 1;
	for (i__ = givptr[curr]; i__ <= i__2; ++i__) {
	    srot_(&c__1, &z__[zptr1 + givcol[(i__ << 1) + 1] - 1], &c__1, &
		    z__[zptr1 + givcol[(i__ << 1) + 2] - 1], &c__1, &givnum[(
		    i__ << 1) + 1], &givnum[(i__ << 1) + 2]);
/* L30: */
	}
	i__2 = givptr[curr + 2] - 1;
	for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) {
	    srot_(&c__1, &z__[mid - 1 + givcol[(i__ << 1) + 1]], &c__1, &z__[
		    mid - 1 + givcol[(i__ << 1) + 2]], &c__1, &givnum[(i__ << 
		    1) + 1], &givnum[(i__ << 1) + 2]);
/* L40: */
	}
	psiz1 = prmptr[curr + 1] - prmptr[curr];
	psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
	i__2 = psiz1 - 1;
	for (i__ = 0; i__ <= i__2; ++i__) {
	    ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1];
/* L50: */
	}
	i__2 = psiz2 - 1;
	for (i__ = 0; i__ <= i__2; ++i__) {
	    ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] - 
		    1];
/* L60: */
	}

/*        Multiply Blocks at CURR and CURR+1 */

/*        Determine size of these matrices.  We add HALF to the value of */
/*        the SQRT in case the machine underestimates one of these */
/*        square roots. */

	bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f);
	bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + 
		.5f);
	if (bsiz1 > 0) {
	    sgemv_("T", &bsiz1, &bsiz1, &c_b24, &q[qptr[curr]], &bsiz1, &
		    ztemp[1], &c__1, &c_b26, &z__[zptr1], &c__1);
	}
	i__2 = psiz1 - bsiz1;
	scopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1);
	if (bsiz2 > 0) {
	    sgemv_("T", &bsiz2, &bsiz2, &c_b24, &q[qptr[curr + 1]], &bsiz2, &
		    ztemp[psiz1 + 1], &c__1, &c_b26, &z__[mid], &c__1);
	}
	i__2 = psiz2 - bsiz2;
	scopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], &
		c__1);

	i__2 = *tlvls - k;
	ptr += pow_ii(&c__2, &i__2);
/* L70: */
    }

    return 0;

/*     End of SLAEDA */

} /* slaeda_ */
Пример #22
0
/* Subroutine */ int slalsd_(char *uplo, integer *smlsiz, integer *n, integer 
	*nrhs, real *d__, real *e, real *b, integer *ldb, real *rcond, 
	integer *rank, real *work, integer *iwork, integer *info)
{
    /* System generated locals */
    integer b_dim1, b_offset, i__1, i__2;
    real r__1;

    /* Builtin functions */
    double log(doublereal), r_sign(real *, real *);

    /* Local variables */
    static integer difl, difr, perm, nsub, nlvl, sqre, bxst;
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *);
    static integer c__, i__, j, k;
    static real r__;
    static integer s, u, z__;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    static integer poles, sizei, nsize;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *);
    static integer nwork, icmpq1, icmpq2;
    extern doublereal sopbl3_(char *, integer *, integer *, integer *)
	    ;
    static real cs;
    static integer bx;
    static real sn;
    static integer st;
    extern /* Subroutine */ int slasda_(integer *, integer *, integer *, 
	    integer *, real *, real *, real *, integer *, real *, integer *, 
	    real *, real *, real *, real *, integer *, integer *, integer *, 
	    integer *, real *, real *, real *, real *, integer *, integer *);
    extern doublereal slamch_(char *);
    static integer vt;
    extern /* Subroutine */ int xerbla_(char *, integer *), slalsa_(
	    integer *, integer *, integer *, integer *, real *, integer *, 
	    real *, integer *, real *, integer *, real *, integer *, real *, 
	    real *, real *, real *, integer *, integer *, integer *, integer *
	    , real *, real *, real *, real *, integer *, integer *), slascl_(
	    char *, integer *, integer *, real *, real *, integer *, integer *
	    , real *, integer *, integer *);
    static integer givcol;
    extern integer isamax_(integer *, real *, integer *);
    extern /* Subroutine */ int slasdq_(char *, integer *, integer *, integer 
	    *, integer *, integer *, real *, real *, real *, integer *, real *
	    , integer *, real *, integer *, real *, integer *), 
	    slacpy_(char *, integer *, integer *, real *, integer *, real *, 
	    integer *), slartg_(real *, real *, real *, real *, real *
	    ), slaset_(char *, integer *, integer *, real *, real *, real *, 
	    integer *);
    static real orgnrm;
    static integer givnum;
    extern doublereal slanst_(char *, integer *, real *, real *);
    extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *);
    static integer givptr, nm1, smlszp, st1;
    static real eps;
    static integer iwk;
    static real tol;


#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]


/*  -- LAPACK routine (instrumented to count ops, version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       October 31, 1999   


    Purpose   
    =======   

    SLALSD uses the singular value decomposition of A to solve the least   
    squares problem of finding X to minimize the Euclidean norm of each   
    column of A*X-B, where A is N-by-N upper bidiagonal, and X and B   
    are N-by-NRHS. The solution X overwrites B.   

    The singular values of A smaller than RCOND times the largest   
    singular value are treated as zero in solving the least squares   
    problem; in this case a minimum norm solution is returned.   
    The actual singular values are returned in D in ascending order.   

    This code makes very mild assumptions about floating point   
    arithmetic. It will work on machines with a guard digit in   
    add/subtract, or on those binary machines without guard digits   
    which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.   
    It could conceivably fail on hexadecimal or decimal machines   
    without guard digits, but we know of none.   

    Arguments   
    =========   

    UPLO   (input) CHARACTER*1   
           = 'U': D and E define an upper bidiagonal matrix.   
           = 'L': D and E define a  lower bidiagonal matrix.   

    SMLSIZ (input) INTEGER   
           The maximum size of the subproblems at the bottom of the   
           computation tree.   

    N      (input) INTEGER   
           The dimension of the  bidiagonal matrix.  N >= 0.   

    NRHS   (input) INTEGER   
           The number of columns of B. NRHS must be at least 1.   

    D      (input/output) REAL array, dimension (N)   
           On entry D contains the main diagonal of the bidiagonal   
           matrix. On exit, if INFO = 0, D contains its singular values.   

    E      (input) REAL array, dimension (N-1)   
           Contains the super-diagonal entries of the bidiagonal matrix.   
           On exit, E has been destroyed.   

    B      (input/output) REAL array, dimension (LDB,NRHS)   
           On input, B contains the right hand sides of the least   
           squares problem. On output, B contains the solution X.   

    LDB    (input) INTEGER   
           The leading dimension of B in the calling subprogram.   
           LDB must be at least max(1,N).   

    RCOND  (input) REAL   
           The singular values of A less than or equal to RCOND times   
           the largest singular value are treated as zero in solving   
           the least squares problem. If RCOND is negative,   
           machine precision is used instead.   
           For example, if diag(S)*X=B were the least squares problem,   
           where diag(S) is a diagonal matrix of singular values, the   
           solution would be X(i) = B(i) / S(i) if S(i) is greater than   
           RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to   
           RCOND*max(S).   

    RANK   (output) INTEGER   
           The number of singular values of A greater than RCOND times   
           the largest singular value.   

    WORK   (workspace) REAL array, dimension at least   
           (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2),   
           where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1).   

    IWORK  (workspace) INTEGER array, dimension at least   
           (3 * N * NLVL + 11 * N)   

    INFO   (output) INTEGER   
           = 0:  successful exit.   
           < 0:  if INFO = -i, the i-th argument had an illegal value.   
           > 0:  The algorithm failed to compute an singular value while   
                 working on the submatrix lying in rows and columns   
                 INFO/(N+1) through MOD(INFO,N+1).   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    --d__;
    --e;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --work;
    --iwork;

    /* Function Body */
    *info = 0;

    if (*n < 0) {
	*info = -3;
    } else if (*nrhs < 1) {
	*info = -4;
    } else if (*ldb < 1 || *ldb < *n) {
	*info = -8;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLALSD", &i__1);
	return 0;
    }

    eps = slamch_("Epsilon");

/*     Set up the tolerance. */

    if (*rcond <= 0.f || *rcond >= 1.f) {
	*rcond = eps;
    }

    *rank = 0;

/*     Quick return if possible. */

    if (*n == 0) {
	return 0;
    } else if (*n == 1) {
	if (d__[1] == 0.f) {
	    slaset_("A", &c__1, nrhs, &c_b6, &c_b6, &b[b_offset], ldb);
	} else {
	    *rank = 1;
	    latime_1.ops += (real) (*nrhs << 1);
	    slascl_("G", &c__0, &c__0, &d__[1], &c_b11, &c__1, nrhs, &b[
		    b_offset], ldb, info);
	    d__[1] = dabs(d__[1]);
	}
	return 0;
    }

/*     Rotate the matrix if it is lower bidiagonal. */

    if (*(unsigned char *)uplo == 'L') {
	latime_1.ops += (real) ((*n - 1) * 6);
	i__1 = *n - 1;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
	    d__[i__] = r__;
	    e[i__] = sn * d__[i__ + 1];
	    d__[i__ + 1] = cs * d__[i__ + 1];
	    if (*nrhs == 1) {
		latime_1.ops += 6.f;
		srot_(&c__1, &b_ref(i__, 1), &c__1, &b_ref(i__ + 1, 1), &c__1,
			 &cs, &sn);
	    } else {
		work[(i__ << 1) - 1] = cs;
		work[i__ * 2] = sn;
	    }
/* L10: */
	}
	if (*nrhs > 1) {
	    latime_1.ops += (real) ((*n - 1) * 6 * *nrhs);
	    i__1 = *nrhs;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		i__2 = *n - 1;
		for (j = 1; j <= i__2; ++j) {
		    cs = work[(j << 1) - 1];
		    sn = work[j * 2];
		    srot_(&c__1, &b_ref(j, i__), &c__1, &b_ref(j + 1, i__), &
			    c__1, &cs, &sn);
/* L20: */
		}
/* L30: */
	    }
	}
    }

/*     Scale. */

    nm1 = *n - 1;
    orgnrm = slanst_("M", n, &d__[1], &e[1]);
    if (orgnrm == 0.f) {
	slaset_("A", n, nrhs, &c_b6, &c_b6, &b[b_offset], ldb);
	return 0;
    }

    latime_1.ops += (real) (*n + nm1);
    slascl_("G", &c__0, &c__0, &orgnrm, &c_b11, n, &c__1, &d__[1], n, info);
    slascl_("G", &c__0, &c__0, &orgnrm, &c_b11, &nm1, &c__1, &e[1], &nm1, 
	    info);

/*     If N is smaller than the minimum divide size SMLSIZ, then solve   
       the problem with another solver. */

    if (*n <= *smlsiz) {
	nwork = *n * *n + 1;
	slaset_("A", n, n, &c_b6, &c_b11, &work[1], n);
	slasdq_("U", &c__0, n, n, &c__0, nrhs, &d__[1], &e[1], &work[1], n, &
		work[1], n, &b[b_offset], ldb, &work[nwork], info);
	if (*info != 0) {
	    return 0;
	}
	latime_1.ops += 1.f;
	tol = *rcond * (r__1 = d__[isamax_(n, &d__[1], &c__1)], dabs(r__1));
	i__1 = *n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    if (d__[i__] <= tol) {
		slaset_("A", &c__1, nrhs, &c_b6, &c_b6, &b_ref(i__, 1), ldb);
	    } else {
		latime_1.ops += (real) (*nrhs);
		slascl_("G", &c__0, &c__0, &d__[i__], &c_b11, &c__1, nrhs, &
			b_ref(i__, 1), ldb, info);
		++(*rank);
	    }
/* L40: */
	}
	latime_1.ops += sopbl3_("SGEMM ", n, nrhs, n);
	sgemm_("T", "N", n, nrhs, n, &c_b11, &work[1], n, &b[b_offset], ldb, &
		c_b6, &work[nwork], n);
	slacpy_("A", n, nrhs, &work[nwork], n, &b[b_offset], ldb);

/*        Unscale. */

	latime_1.ops += (real) (*n + *n * *nrhs);
	slascl_("G", &c__0, &c__0, &c_b11, &orgnrm, n, &c__1, &d__[1], n, 
		info);
	slasrt_("D", n, &d__[1], info);
	slascl_("G", &c__0, &c__0, &orgnrm, &c_b11, n, nrhs, &b[b_offset], 
		ldb, info);

	return 0;
    }

/*     Book-keeping and setting up some constants. */

    nlvl = (integer) (log((real) (*n) / (real) (*smlsiz + 1)) / log(2.f)) + 1;

    smlszp = *smlsiz + 1;

    u = 1;
    vt = *smlsiz * *n + 1;
    difl = vt + smlszp * *n;
    difr = difl + nlvl * *n;
    z__ = difr + (nlvl * *n << 1);
    c__ = z__ + nlvl * *n;
    s = c__ + *n;
    poles = s + *n;
    givnum = poles + (nlvl << 1) * *n;
    bx = givnum + (nlvl << 1) * *n;
    nwork = bx + *n * *nrhs;

    sizei = *n + 1;
    k = sizei + *n;
    givptr = k + *n;
    perm = givptr + *n;
    givcol = perm + nlvl * *n;
    iwk = givcol + (nlvl * *n << 1);

    st = 1;
    sqre = 0;
    icmpq1 = 1;
    icmpq2 = 0;
    nsub = 0;

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	if ((r__1 = d__[i__], dabs(r__1)) < eps) {
	    d__[i__] = r_sign(&eps, &d__[i__]);
	}
/* L50: */
    }

    i__1 = nm1;
    for (i__ = 1; i__ <= i__1; ++i__) {
	if ((r__1 = e[i__], dabs(r__1)) < eps || i__ == nm1) {
	    ++nsub;
	    iwork[nsub] = st;

/*           Subproblem found. First determine its size and then   
             apply divide and conquer on it. */

	    if (i__ < nm1) {

/*              A subproblem with E(I) small for I < NM1. */

		nsize = i__ - st + 1;
		iwork[sizei + nsub - 1] = nsize;
	    } else if ((r__1 = e[i__], dabs(r__1)) >= eps) {

/*              A subproblem with E(NM1) not too small but I = NM1. */

		nsize = *n - st + 1;
		iwork[sizei + nsub - 1] = nsize;
	    } else {

/*              A subproblem with E(NM1) small. This implies an   
                1-by-1 subproblem at D(N), which is not solved   
                explicitly. */

		nsize = i__ - st + 1;
		iwork[sizei + nsub - 1] = nsize;
		++nsub;
		iwork[nsub] = *n;
		iwork[sizei + nsub - 1] = 1;
		scopy_(nrhs, &b_ref(*n, 1), ldb, &work[bx + nm1], n);
	    }
	    st1 = st - 1;
	    if (nsize == 1) {

/*              This is a 1-by-1 subproblem and is not solved   
                explicitly. */

		scopy_(nrhs, &b_ref(st, 1), ldb, &work[bx + st1], n);
	    } else if (nsize <= *smlsiz) {

/*              This is a small subproblem and is solved by SLASDQ. */

		slaset_("A", &nsize, &nsize, &c_b6, &c_b11, &work[vt + st1], 
			n);
		slasdq_("U", &c__0, &nsize, &nsize, &c__0, nrhs, &d__[st], &e[
			st], &work[vt + st1], n, &work[nwork], n, &b_ref(st, 
			1), ldb, &work[nwork], info);
		if (*info != 0) {
		    return 0;
		}
		slacpy_("A", &nsize, nrhs, &b_ref(st, 1), ldb, &work[bx + st1]
			, n);
	    } else {

/*              A large problem. Solve it using divide and conquer. */

		slasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], &
			work[u + st1], n, &work[vt + st1], &iwork[k + st1], &
			work[difl + st1], &work[difr + st1], &work[z__ + st1],
			 &work[poles + st1], &iwork[givptr + st1], &iwork[
			givcol + st1], n, &iwork[perm + st1], &work[givnum + 
			st1], &work[c__ + st1], &work[s + st1], &work[nwork], 
			&iwork[iwk], info);
		if (*info != 0) {
		    return 0;
		}
		bxst = bx + st1;
		slalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b_ref(st, 1), ldb, &
			work[bxst], n, &work[u + st1], n, &work[vt + st1], &
			iwork[k + st1], &work[difl + st1], &work[difr + st1], 
			&work[z__ + st1], &work[poles + st1], &iwork[givptr + 
			st1], &iwork[givcol + st1], n, &iwork[perm + st1], &
			work[givnum + st1], &work[c__ + st1], &work[s + st1], 
			&work[nwork], &iwork[iwk], info);
		if (*info != 0) {
		    return 0;
		}
	    }
	    st = i__ + 1;
	}
/* L60: */
    }

/*     Apply the singular values and treat the tiny ones as zero. */

    tol = *rcond * (r__1 = d__[isamax_(n, &d__[1], &c__1)], dabs(r__1));

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {

/*        Some of the elements in D can be negative because 1-by-1   
          subproblems were not solved explicitly. */

	if ((r__1 = d__[i__], dabs(r__1)) <= tol) {
	    slaset_("A", &c__1, nrhs, &c_b6, &c_b6, &work[bx + i__ - 1], n);
	} else {
	    ++(*rank);
	    latime_1.ops += (real) (*nrhs);
	    slascl_("G", &c__0, &c__0, &d__[i__], &c_b11, &c__1, nrhs, &work[
		    bx + i__ - 1], n, info);
	}
	d__[i__] = (r__1 = d__[i__], dabs(r__1));
/* L70: */
    }

/*     Now apply back the right singular vectors. */

    icmpq2 = 1;
    i__1 = nsub;
    for (i__ = 1; i__ <= i__1; ++i__) {
	st = iwork[i__];
	st1 = st - 1;
	nsize = iwork[sizei + i__ - 1];
	bxst = bx + st1;
	if (nsize == 1) {
	    scopy_(nrhs, &work[bxst], n, &b_ref(st, 1), ldb);
	} else if (nsize <= *smlsiz) {
	    latime_1.ops += sopbl3_("SGEMM ", &nsize, nrhs, &nsize)
		    ;
	    sgemm_("T", "N", &nsize, nrhs, &nsize, &c_b11, &work[vt + st1], n,
		     &work[bxst], n, &c_b6, &b_ref(st, 1), ldb);
	} else {
	    slalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b_ref(st, 
		    1), ldb, &work[u + st1], n, &work[vt + st1], &iwork[k + 
		    st1], &work[difl + st1], &work[difr + st1], &work[z__ + 
		    st1], &work[poles + st1], &iwork[givptr + st1], &iwork[
		    givcol + st1], n, &iwork[perm + st1], &work[givnum + st1],
		     &work[c__ + st1], &work[s + st1], &work[nwork], &iwork[
		    iwk], info);
	    if (*info != 0) {
		return 0;
	    }
	}
/* L80: */
    }

/*     Unscale and sort the singular values. */

    latime_1.ops += (real) (*n + *n * *nrhs);
    slascl_("G", &c__0, &c__0, &c_b11, &orgnrm, n, &c__1, &d__[1], n, info);
    slasrt_("D", n, &d__[1], info);
    slascl_("G", &c__0, &c__0, &orgnrm, &c_b11, n, nrhs, &b[b_offset], ldb, 
	    info);

    return 0;

/*     End of SLALSD */

} /* slalsd_ */
Пример #23
0
 int sbdsqr_(char *uplo, int *n, int *ncvt, int *
	nru, int *ncc, float *d__, float *e, float *vt, int *ldvt, float *
	u, int *ldu, float *c__, int *ldc, float *work, int *info)
{
    /* System generated locals */
    int c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, 
	    i__2;
    float r__1, r__2, r__3, r__4;
    double d__1;

    /* Builtin functions */
    double pow_dd(double *, double *), sqrt(double), r_sign(float *
	    , float *);

    /* Local variables */
    float f, g, h__;
    int i__, j, m;
    float r__, cs;
    int ll;
    float sn, mu;
    int nm1, nm12, nm13, lll;
    float eps, sll, tol, abse;
    int idir;
    float abss;
    int oldm;
    float cosl;
    int isub, iter;
    float unfl, sinl, cosr, smin, smax, sinr;
    extern  int srot_(int *, float *, int *, float *, 
	    int *, float *, float *), slas2_(float *, float *, float *, float *, 
	     float *);
    extern int lsame_(char *, char *);
    float oldcs;
    extern  int sscal_(int *, float *, float *, int *);
    int oldll;
    float shift, sigmn, oldsn;
    int maxit;
    float sminl;
    extern  int slasr_(char *, char *, char *, int *, 
	    int *, float *, float *, float *, int *);
    float sigmx;
    int lower;
    extern  int sswap_(int *, float *, int *, float *, 
	    int *), slasq1_(int *, float *, float *, float *, int *),
	     slasv2_(float *, float *, float *, float *, float *, float *, float *, 
	    float *, float *);
    extern double slamch_(char *);
    extern  int xerbla_(char *, int *);
    float sminoa;
    extern  int slartg_(float *, float *, float *, float *, float *
);
    float thresh;
    int rotate;
    float tolmul;


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     January 2007 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SBDSQR computes the singular values and, optionally, the right and/or */
/*  left singular vectors from the singular value decomposition (SVD) of */
/*  a float N-by-N (upper or lower) bidiagonal matrix B using the implicit */
/*  zero-shift QR algorithm.  The SVD of B has the form */

/*     B = Q * S * P**T */

/*  where S is the diagonal matrix of singular values, Q is an orthogonal */
/*  matrix of left singular vectors, and P is an orthogonal matrix of */
/*  right singular vectors.  If left singular vectors are requested, this */
/*  subroutine actually returns U*Q instead of Q, and, if right singular */
/*  vectors are requested, this subroutine returns P**T*VT instead of */
/*  P**T, for given float input matrices U and VT.  When U and VT are the */
/*  orthogonal matrices that reduce a general matrix A to bidiagonal */
/*  form:  A = U*B*VT, as computed by SGEBRD, then */

/*     A = (U*Q) * S * (P**T*VT) */

/*  is the SVD of A.  Optionally, the subroutine may also compute Q**T*C */
/*  for a given float input matrix C. */

/*  See "Computing  Small Singular Values of Bidiagonal Matrices With */
/*  Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, */
/*  LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, */
/*  no. 5, pp. 873-912, Sept 1990) and */
/*  "Accurate singular values and differential qd algorithms," by */
/*  B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics */
/*  Department, University of California at Berkeley, July 1992 */
/*  for a detailed description of the algorithm. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          = 'U':  B is upper bidiagonal; */
/*          = 'L':  B is lower bidiagonal. */

/*  N       (input) INTEGER */
/*          The order of the matrix B.  N >= 0. */

/*  NCVT    (input) INTEGER */
/*          The number of columns of the matrix VT. NCVT >= 0. */

/*  NRU     (input) INTEGER */
/*          The number of rows of the matrix U. NRU >= 0. */

/*  NCC     (input) INTEGER */
/*          The number of columns of the matrix C. NCC >= 0. */

/*  D       (input/output) REAL array, dimension (N) */
/*          On entry, the n diagonal elements of the bidiagonal matrix B. */
/*          On exit, if INFO=0, the singular values of B in decreasing */
/*          order. */

/*  E       (input/output) REAL array, dimension (N-1) */
/*          On entry, the N-1 offdiagonal elements of the bidiagonal */
/*          matrix B. */
/*          On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E */
/*          will contain the diagonal and superdiagonal elements of a */
/*          bidiagonal matrix orthogonally equivalent to the one given */
/*          as input. */

/*  VT      (input/output) REAL array, dimension (LDVT, NCVT) */
/*          On entry, an N-by-NCVT matrix VT. */
/*          On exit, VT is overwritten by P**T * VT. */
/*          Not referenced if NCVT = 0. */

/*  LDVT    (input) INTEGER */
/*          The leading dimension of the array VT. */
/*          LDVT >= MAX(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. */

/*  U       (input/output) REAL array, dimension (LDU, N) */
/*          On entry, an NRU-by-N matrix U. */
/*          On exit, U is overwritten by U * Q. */
/*          Not referenced if NRU = 0. */

/*  LDU     (input) INTEGER */
/*          The leading dimension of the array U.  LDU >= MAX(1,NRU). */

/*  C       (input/output) REAL array, dimension (LDC, NCC) */
/*          On entry, an N-by-NCC matrix C. */
/*          On exit, C is overwritten by Q**T * C. */
/*          Not referenced if NCC = 0. */

/*  LDC     (input) INTEGER */
/*          The leading dimension of the array C. */
/*          LDC >= MAX(1,N) if NCC > 0; LDC >=1 if NCC = 0. */

/*  WORK    (workspace) REAL array, dimension (4*N) */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  If INFO = -i, the i-th argument had an illegal value */
/*          > 0: */
/*             if NCVT = NRU = NCC = 0, */
/*                = 1, a split was marked by a positive value in E */
/*                = 2, current block of Z not diagonalized after 30*N */
/*                     iterations (in inner while loop) */
/*                = 3, termination criterion of outer while loop not met */
/*                     (program created more than N unreduced blocks) */
/*             else NCVT = NRU = NCC = 0, */
/*                   the algorithm did not converge; D and E contain the */
/*                   elements of a bidiagonal matrix which is orthogonally */
/*                   similar to the input matrix B;  if INFO = i, i */
/*                   elements of E have not converged to zero. */

/*  Internal Parameters */
/*  =================== */

/*  TOLMUL  REAL, default = MAX(10,MIN(100,EPS**(-1/8))) */
/*          TOLMUL controls the convergence criterion of the QR loop. */
/*          If it is positive, TOLMUL*EPS is the desired relative */
/*             precision in the computed singular values. */
/*          If it is negative, ABS(TOLMUL*EPS*sigma_max) is the */
/*             desired absolute accuracy in the computed singular */
/*             values (corresponds to relative accuracy */
/*             ABS(TOLMUL*EPS) in the largest singular value. */
/*          ABS(TOLMUL) should be between 1 and 1/EPS, and preferably */
/*             between 10 (for fast convergence) and .1/EPS */
/*             (for there to be some accuracy in the results). */
/*          Default is to lose at either one eighth or 2 of the */
/*             available decimal digits in each computed singular value */
/*             (whichever is smaller). */

/*  MAXITR  INTEGER, default = 6 */
/*          MAXITR controls the maximum number of passes of the */
/*          algorithm through its inner loop. The algorithms stops */
/*          (and so fails to converge) if the number of passes */
/*          through the inner loop exceeds MAXITR*N**2. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    --d__;
    --e;
    vt_dim1 = *ldvt;
    vt_offset = 1 + vt_dim1;
    vt -= vt_offset;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1;
    u -= u_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1;
    c__ -= c_offset;
    --work;

    /* Function Body */
    *info = 0;
    lower = lsame_(uplo, "L");
    if (! lsame_(uplo, "U") && ! lower) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*ncvt < 0) {
	*info = -3;
    } else if (*nru < 0) {
	*info = -4;
    } else if (*ncc < 0) {
	*info = -5;
    } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < MAX(1,*n)) {
	*info = -9;
    } else if (*ldu < MAX(1,*nru)) {
	*info = -11;
    } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < MAX(1,*n)) {
	*info = -13;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SBDSQR", &i__1);
	return 0;
    }
    if (*n == 0) {
	return 0;
    }
    if (*n == 1) {
	goto L160;
    }

/*     ROTATE is true if any singular vectors desired, false otherwise */

    rotate = *ncvt > 0 || *nru > 0 || *ncc > 0;

/*     If no singular vectors desired, use qd algorithm */

    if (! rotate) {
	slasq1_(n, &d__[1], &e[1], &work[1], info);
	return 0;
    }

    nm1 = *n - 1;
    nm12 = nm1 + nm1;
    nm13 = nm12 + nm1;
    idir = 0;

/*     Get machine constants */

    eps = slamch_("Epsilon");
    unfl = slamch_("Safe minimum");

/*     If matrix lower bidiagonal, rotate to be upper bidiagonal */
/*     by applying Givens rotations on the left */

    if (lower) {
	i__1 = *n - 1;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
	    d__[i__] = r__;
	    e[i__] = sn * d__[i__ + 1];
	    d__[i__ + 1] = cs * d__[i__ + 1];
	    work[i__] = cs;
	    work[nm1 + i__] = sn;
/* L10: */
	}

/*        Update singular vectors if desired */

	if (*nru > 0) {
	    slasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset], 
		    ldu);
	}
	if (*ncc > 0) {
	    slasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset], 
		     ldc);
	}
    }

/*     Compute singular values to relative accuracy TOL */
/*     (By setting TOL to be negative, algorithm will compute */
/*     singular values to absolute accuracy ABS(TOL)*norm(input matrix)) */

/* Computing MAX */
/* Computing MIN */
    d__1 = (double) eps;
    r__3 = 100.f, r__4 = pow_dd(&d__1, &c_b15);
    r__1 = 10.f, r__2 = MIN(r__3,r__4);
    tolmul = MAX(r__1,r__2);
    tol = tolmul * eps;

/*     Compute approximate maximum, minimum singular values */

    smax = 0.f;
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
/* Computing MAX */
	r__2 = smax, r__3 = (r__1 = d__[i__], ABS(r__1));
	smax = MAX(r__2,r__3);
/* L20: */
    }
    i__1 = *n - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
/* Computing MAX */
	r__2 = smax, r__3 = (r__1 = e[i__], ABS(r__1));
	smax = MAX(r__2,r__3);
/* L30: */
    }
    sminl = 0.f;
    if (tol >= 0.f) {

/*        Relative accuracy desired */

	sminoa = ABS(d__[1]);
	if (sminoa == 0.f) {
	    goto L50;
	}
	mu = sminoa;
	i__1 = *n;
	for (i__ = 2; i__ <= i__1; ++i__) {
	    mu = (r__2 = d__[i__], ABS(r__2)) * (mu / (mu + (r__1 = e[i__ - 
		    1], ABS(r__1))));
	    sminoa = MIN(sminoa,mu);
	    if (sminoa == 0.f) {
		goto L50;
	    }
/* L40: */
	}
L50:
	sminoa /= sqrt((float) (*n));
/* Computing MAX */
	r__1 = tol * sminoa, r__2 = *n * 6 * *n * unfl;
	thresh = MAX(r__1,r__2);
    } else {

/*        Absolute accuracy desired */

/* Computing MAX */
	r__1 = ABS(tol) * smax, r__2 = *n * 6 * *n * unfl;
	thresh = MAX(r__1,r__2);
    }

/*     Prepare for main iteration loop for the singular values */
/*     (MAXIT is the maximum number of passes through the inner */
/*     loop permitted before nonconvergence signalled.) */

    maxit = *n * 6 * *n;
    iter = 0;
    oldll = -1;
    oldm = -1;

/*     M points to last element of unconverged part of matrix */

    m = *n;

/*     Begin main iteration loop */

L60:

/*     Check for convergence or exceeding iteration count */

    if (m <= 1) {
	goto L160;
    }
    if (iter > maxit) {
	goto L200;
    }

/*     Find diagonal block of matrix to work on */

    if (tol < 0.f && (r__1 = d__[m], ABS(r__1)) <= thresh) {
	d__[m] = 0.f;
    }
    smax = (r__1 = d__[m], ABS(r__1));
    smin = smax;
    i__1 = m - 1;
    for (lll = 1; lll <= i__1; ++lll) {
	ll = m - lll;
	abss = (r__1 = d__[ll], ABS(r__1));
	abse = (r__1 = e[ll], ABS(r__1));
	if (tol < 0.f && abss <= thresh) {
	    d__[ll] = 0.f;
	}
	if (abse <= thresh) {
	    goto L80;
	}
	smin = MIN(smin,abss);
/* Computing MAX */
	r__1 = MAX(smax,abss);
	smax = MAX(r__1,abse);
/* L70: */
    }
    ll = 0;
    goto L90;
L80:
    e[ll] = 0.f;

/*     Matrix splits since E(LL) = 0 */

    if (ll == m - 1) {

/*        Convergence of bottom singular value, return to top of loop */

	--m;
	goto L60;
    }
L90:
    ++ll;

/*     E(LL) through E(M-1) are nonzero, E(LL-1) is zero */

    if (ll == m - 1) {

/*        2 by 2 block, handle separately */

	slasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr, 
		 &sinl, &cosl);
	d__[m - 1] = sigmx;
	e[m - 1] = 0.f;
	d__[m] = sigmn;

/*        Compute singular vectors, if desired */

	if (*ncvt > 0) {
	    srot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, &
		    cosr, &sinr);
	}
	if (*nru > 0) {
	    srot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], &
		    c__1, &cosl, &sinl);
	}
	if (*ncc > 0) {
	    srot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, &
		    cosl, &sinl);
	}
	m += -2;
	goto L60;
    }

/*     If working on new submatrix, choose shift direction */
/*     (from larger end diagonal element towards smaller) */

    if (ll > oldm || m < oldll) {
	if ((r__1 = d__[ll], ABS(r__1)) >= (r__2 = d__[m], ABS(r__2))) {

/*           Chase bulge from top (big end) to bottom (small end) */

	    idir = 1;
	} else {

/*           Chase bulge from bottom (big end) to top (small end) */

	    idir = 2;
	}
    }

/*     Apply convergence tests */

    if (idir == 1) {

/*        Run convergence test in forward direction */
/*        First apply standard test to bottom of matrix */

	if ((r__2 = e[m - 1], ABS(r__2)) <= ABS(tol) * (r__1 = d__[m], ABS(
		r__1)) || tol < 0.f && (r__3 = e[m - 1], ABS(r__3)) <= 
		thresh) {
	    e[m - 1] = 0.f;
	    goto L60;
	}

	if (tol >= 0.f) {

/*           If relative accuracy desired, */
/*           apply convergence criterion forward */

	    mu = (r__1 = d__[ll], ABS(r__1));
	    sminl = mu;
	    i__1 = m - 1;
	    for (lll = ll; lll <= i__1; ++lll) {
		if ((r__1 = e[lll], ABS(r__1)) <= tol * mu) {
		    e[lll] = 0.f;
		    goto L60;
		}
		mu = (r__2 = d__[lll + 1], ABS(r__2)) * (mu / (mu + (r__1 = 
			e[lll], ABS(r__1))));
		sminl = MIN(sminl,mu);
/* L100: */
	    }
	}

    } else {

/*        Run convergence test in backward direction */
/*        First apply standard test to top of matrix */

	if ((r__2 = e[ll], ABS(r__2)) <= ABS(tol) * (r__1 = d__[ll], ABS(
		r__1)) || tol < 0.f && (r__3 = e[ll], ABS(r__3)) <= thresh) {
	    e[ll] = 0.f;
	    goto L60;
	}

	if (tol >= 0.f) {

/*           If relative accuracy desired, */
/*           apply convergence criterion backward */

	    mu = (r__1 = d__[m], ABS(r__1));
	    sminl = mu;
	    i__1 = ll;
	    for (lll = m - 1; lll >= i__1; --lll) {
		if ((r__1 = e[lll], ABS(r__1)) <= tol * mu) {
		    e[lll] = 0.f;
		    goto L60;
		}
		mu = (r__2 = d__[lll], ABS(r__2)) * (mu / (mu + (r__1 = e[
			lll], ABS(r__1))));
		sminl = MIN(sminl,mu);
/* L110: */
	    }
	}
    }
    oldll = ll;
    oldm = m;

/*     Compute shift.  First, test if shifting would ruin relative */
/*     accuracy, and if so set the shift to zero. */

/* Computing MAX */
    r__1 = eps, r__2 = tol * .01f;
    if (tol >= 0.f && *n * tol * (sminl / smax) <= MAX(r__1,r__2)) {

/*        Use a zero shift to avoid loss of relative accuracy */

	shift = 0.f;
    } else {

/*        Compute the shift from 2-by-2 block at end of matrix */

	if (idir == 1) {
	    sll = (r__1 = d__[ll], ABS(r__1));
	    slas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__);
	} else {
	    sll = (r__1 = d__[m], ABS(r__1));
	    slas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__);
	}

/*        Test if shift negligible, and if so set to zero */

	if (sll > 0.f) {
/* Computing 2nd power */
	    r__1 = shift / sll;
	    if (r__1 * r__1 < eps) {
		shift = 0.f;
	    }
	}
    }

/*     Increment iteration count */

    iter = iter + m - ll;

/*     If SHIFT = 0, do simplified QR iteration */

    if (shift == 0.f) {
	if (idir == 1) {

/*           Chase bulge from top to bottom */
/*           Save cosines and sines for later singular vector updates */

	    cs = 1.f;
	    oldcs = 1.f;
	    i__1 = m - 1;
	    for (i__ = ll; i__ <= i__1; ++i__) {
		r__1 = d__[i__] * cs;
		slartg_(&r__1, &e[i__], &cs, &sn, &r__);
		if (i__ > ll) {
		    e[i__ - 1] = oldsn * r__;
		}
		r__1 = oldcs * r__;
		r__2 = d__[i__ + 1] * sn;
		slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]);
		work[i__ - ll + 1] = cs;
		work[i__ - ll + 1 + nm1] = sn;
		work[i__ - ll + 1 + nm12] = oldcs;
		work[i__ - ll + 1 + nm13] = oldsn;
/* L120: */
	    }
	    h__ = d__[m] * cs;
	    d__[m] = h__ * oldcs;
	    e[m - 1] = h__ * oldsn;

/*           Update singular vectors */

	    if (*ncvt > 0) {
		i__1 = m - ll + 1;
		slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
			ll + vt_dim1], ldvt);
	    }
	    if (*nru > 0) {
		i__1 = m - ll + 1;
		slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 
			+ 1], &u[ll * u_dim1 + 1], ldu);
	    }
	    if (*ncc > 0) {
		i__1 = m - ll + 1;
		slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 
			+ 1], &c__[ll + c_dim1], ldc);
	    }

/*           Test convergence */

	    if ((r__1 = e[m - 1], ABS(r__1)) <= thresh) {
		e[m - 1] = 0.f;
	    }

	} else {

/*           Chase bulge from bottom to top */
/*           Save cosines and sines for later singular vector updates */

	    cs = 1.f;
	    oldcs = 1.f;
	    i__1 = ll + 1;
	    for (i__ = m; i__ >= i__1; --i__) {
		r__1 = d__[i__] * cs;
		slartg_(&r__1, &e[i__ - 1], &cs, &sn, &r__);
		if (i__ < m) {
		    e[i__] = oldsn * r__;
		}
		r__1 = oldcs * r__;
		r__2 = d__[i__ - 1] * sn;
		slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]);
		work[i__ - ll] = cs;
		work[i__ - ll + nm1] = -sn;
		work[i__ - ll + nm12] = oldcs;
		work[i__ - ll + nm13] = -oldsn;
/* L130: */
	    }
	    h__ = d__[ll] * cs;
	    d__[ll] = h__ * oldcs;
	    e[ll] = h__ * oldsn;

/*           Update singular vectors */

	    if (*ncvt > 0) {
		i__1 = m - ll + 1;
		slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
			nm13 + 1], &vt[ll + vt_dim1], ldvt);
	    }
	    if (*nru > 0) {
		i__1 = m - ll + 1;
		slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
			 u_dim1 + 1], ldu);
	    }
	    if (*ncc > 0) {
		i__1 = m - ll + 1;
		slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
			ll + c_dim1], ldc);
	    }

/*           Test convergence */

	    if ((r__1 = e[ll], ABS(r__1)) <= thresh) {
		e[ll] = 0.f;
	    }
	}
    } else {

/*        Use nonzero shift */

	if (idir == 1) {

/*           Chase bulge from top to bottom */
/*           Save cosines and sines for later singular vector updates */

	    f = ((r__1 = d__[ll], ABS(r__1)) - shift) * (r_sign(&c_b49, &d__[
		    ll]) + shift / d__[ll]);
	    g = e[ll];
	    i__1 = m - 1;
	    for (i__ = ll; i__ <= i__1; ++i__) {
		slartg_(&f, &g, &cosr, &sinr, &r__);
		if (i__ > ll) {
		    e[i__ - 1] = r__;
		}
		f = cosr * d__[i__] + sinr * e[i__];
		e[i__] = cosr * e[i__] - sinr * d__[i__];
		g = sinr * d__[i__ + 1];
		d__[i__ + 1] = cosr * d__[i__ + 1];
		slartg_(&f, &g, &cosl, &sinl, &r__);
		d__[i__] = r__;
		f = cosl * e[i__] + sinl * d__[i__ + 1];
		d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__];
		if (i__ < m - 1) {
		    g = sinl * e[i__ + 1];
		    e[i__ + 1] = cosl * e[i__ + 1];
		}
		work[i__ - ll + 1] = cosr;
		work[i__ - ll + 1 + nm1] = sinr;
		work[i__ - ll + 1 + nm12] = cosl;
		work[i__ - ll + 1 + nm13] = sinl;
/* L140: */
	    }
	    e[m - 1] = f;

/*           Update singular vectors */

	    if (*ncvt > 0) {
		i__1 = m - ll + 1;
		slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
			ll + vt_dim1], ldvt);
	    }
	    if (*nru > 0) {
		i__1 = m - ll + 1;
		slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13 
			+ 1], &u[ll * u_dim1 + 1], ldu);
	    }
	    if (*ncc > 0) {
		i__1 = m - ll + 1;
		slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13 
			+ 1], &c__[ll + c_dim1], ldc);
	    }

/*           Test convergence */

	    if ((r__1 = e[m - 1], ABS(r__1)) <= thresh) {
		e[m - 1] = 0.f;
	    }

	} else {

/*           Chase bulge from bottom to top */
/*           Save cosines and sines for later singular vector updates */

	    f = ((r__1 = d__[m], ABS(r__1)) - shift) * (r_sign(&c_b49, &d__[
		    m]) + shift / d__[m]);
	    g = e[m - 1];
	    i__1 = ll + 1;
	    for (i__ = m; i__ >= i__1; --i__) {
		slartg_(&f, &g, &cosr, &sinr, &r__);
		if (i__ < m) {
		    e[i__] = r__;
		}
		f = cosr * d__[i__] + sinr * e[i__ - 1];
		e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__];
		g = sinr * d__[i__ - 1];
		d__[i__ - 1] = cosr * d__[i__ - 1];
		slartg_(&f, &g, &cosl, &sinl, &r__);
		d__[i__] = r__;
		f = cosl * e[i__ - 1] + sinl * d__[i__ - 1];
		d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1];
		if (i__ > ll + 1) {
		    g = sinl * e[i__ - 2];
		    e[i__ - 2] = cosl * e[i__ - 2];
		}
		work[i__ - ll] = cosr;
		work[i__ - ll + nm1] = -sinr;
		work[i__ - ll + nm12] = cosl;
		work[i__ - ll + nm13] = -sinl;
/* L150: */
	    }
	    e[ll] = f;

/*           Test convergence */

	    if ((r__1 = e[ll], ABS(r__1)) <= thresh) {
		e[ll] = 0.f;
	    }

/*           Update singular vectors if desired */

	    if (*ncvt > 0) {
		i__1 = m - ll + 1;
		slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
			nm13 + 1], &vt[ll + vt_dim1], ldvt);
	    }
	    if (*nru > 0) {
		i__1 = m - ll + 1;
		slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
			 u_dim1 + 1], ldu);
	    }
	    if (*ncc > 0) {
		i__1 = m - ll + 1;
		slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
			ll + c_dim1], ldc);
	    }
	}
    }

/*     QR iteration finished, go back and check convergence */

    goto L60;

/*     All singular values converged, so make them positive */

L160:
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	if (d__[i__] < 0.f) {
	    d__[i__] = -d__[i__];

/*           Change sign of singular vectors, if desired */

	    if (*ncvt > 0) {
		sscal_(ncvt, &c_b72, &vt[i__ + vt_dim1], ldvt);
	    }
	}
/* L170: */
    }

/*     Sort the singular values into decreasing order (insertion sort on */
/*     singular values, but only one transposition per singular vector) */

    i__1 = *n - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {

/*        Scan for smallest D(I) */

	isub = 1;
	smin = d__[1];
	i__2 = *n + 1 - i__;
	for (j = 2; j <= i__2; ++j) {
	    if (d__[j] <= smin) {
		isub = j;
		smin = d__[j];
	    }
/* L180: */
	}
	if (isub != *n + 1 - i__) {

/*           Swap singular values and vectors */

	    d__[isub] = d__[*n + 1 - i__];
	    d__[*n + 1 - i__] = smin;
	    if (*ncvt > 0) {
		sswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ + 
			vt_dim1], ldvt);
	    }
	    if (*nru > 0) {
		sswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) * 
			u_dim1 + 1], &c__1);
	    }
	    if (*ncc > 0) {
		sswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ + 
			c_dim1], ldc);
	    }
	}
/* L190: */
    }
    goto L220;

/*     Maximum number of iterations exceeded, failure to converge */

L200:
    *info = 0;
    i__1 = *n - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
	if (e[i__] != 0.f) {
	    ++(*info);
	}
/* L210: */
    }
L220:
    return 0;

/*     End of SBDSQR */

} /* sbdsqr_ */
Пример #24
0
/* Subroutine */ int slaed8_(integer* icompq, integer* k, integer* n, integer
                             *qsiz, real* d__, real* q, integer* ldq, integer* indxq, real* rho,
                             integer* cutpnt, real* z__, real* dlamda, real* q2, integer* ldq2,
                             real* w, integer* perm, integer* givptr, integer* givcol, real *
                             givnum, integer* indxp, integer* indx, integer* info) {
    /* System generated locals */
    integer q_dim1, q_offset, q2_dim1, q2_offset, i__1;
    real r__1;

    /* Builtin functions */
    double sqrt(doublereal);

    /* Local variables */
    real c__;
    integer i__, j;
    real s, t;
    integer k2, n1, n2, jp, n1p1;
    real eps, tau, tol;
    integer jlam, imax, jmax;
    extern /* Subroutine */ int srot_(integer*, real*, integer*, real*,
                                      integer*, real*, real*), sscal_(integer*, real*, real*,
                                              integer*), scopy_(integer*, real*, integer*, real*, integer *
                                                               );
    extern doublereal slapy2_(real*, real*), slamch_(char*);
    extern /* Subroutine */ int xerbla_(char*, integer*);
    extern integer isamax_(integer*, real*, integer*);
    extern /* Subroutine */ int slamrg_(integer*, integer*, real*, integer
                                        *, integer*, integer*), slacpy_(char*, integer*, integer*,
                                                real*, integer*, real*, integer*);


    /*  -- LAPACK routine (version 3.1) -- */
    /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
    /*     November 2006 */

    /*     .. Scalar Arguments .. */
    /*     .. */
    /*     .. Array Arguments .. */
    /*     .. */

    /*  Purpose */
    /*  ======= */

    /*  SLAED8 merges the two sets of eigenvalues together into a single */
    /*  sorted set.  Then it tries to deflate the size of the problem. */
    /*  There are two ways in which deflation can occur:  when two or more */
    /*  eigenvalues are close together or if there is a tiny element in the */
    /*  Z vector.  For each such occurrence the order of the related secular */
    /*  equation problem is reduced by one. */

    /*  Arguments */
    /*  ========= */

    /*  ICOMPQ  (input) INTEGER */
    /*          = 0:  Compute eigenvalues only. */
    /*          = 1:  Compute eigenvectors of original dense symmetric matrix */
    /*                also.  On entry, Q contains the orthogonal matrix used */
    /*                to reduce the original matrix to tridiagonal form. */

    /*  K      (output) INTEGER */
    /*         The number of non-deflated eigenvalues, and the order of the */
    /*         related secular equation. */

    /*  N      (input) INTEGER */
    /*         The dimension of the symmetric tridiagonal matrix.  N >= 0. */

    /*  QSIZ   (input) INTEGER */
    /*         The dimension of the orthogonal matrix used to reduce */
    /*         the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1. */

    /*  D      (input/output) REAL array, dimension (N) */
    /*         On entry, the eigenvalues of the two submatrices to be */
    /*         combined.  On exit, the trailing (N-K) updated eigenvalues */
    /*         (those which were deflated) sorted into increasing order. */

    /*  Q      (input/output) REAL array, dimension (LDQ,N) */
    /*         If ICOMPQ = 0, Q is not referenced.  Otherwise, */
    /*         on entry, Q contains the eigenvectors of the partially solved */
    /*         system which has been previously updated in matrix */
    /*         multiplies with other partially solved eigensystems. */
    /*         On exit, Q contains the trailing (N-K) updated eigenvectors */
    /*         (those which were deflated) in its last N-K columns. */

    /*  LDQ    (input) INTEGER */
    /*         The leading dimension of the array Q.  LDQ >= max(1,N). */

    /*  INDXQ  (input) INTEGER array, dimension (N) */
    /*         The permutation which separately sorts the two sub-problems */
    /*         in D into ascending order.  Note that elements in the second */
    /*         half of this permutation must first have CUTPNT added to */
    /*         their values in order to be accurate. */

    /*  RHO    (input/output) REAL */
    /*         On entry, the off-diagonal element associated with the rank-1 */
    /*         cut which originally split the two submatrices which are now */
    /*         being recombined. */
    /*         On exit, RHO has been modified to the value required by */
    /*         SLAED3. */

    /*  CUTPNT (input) INTEGER */
    /*         The location of the last eigenvalue in the leading */
    /*         sub-matrix.  min(1,N) <= CUTPNT <= N. */

    /*  Z      (input) REAL array, dimension (N) */
    /*         On entry, Z contains the updating vector (the last row of */
    /*         the first sub-eigenvector matrix and the first row of the */
    /*         second sub-eigenvector matrix). */
    /*         On exit, the contents of Z are destroyed by the updating */
    /*         process. */

    /*  DLAMDA (output) REAL array, dimension (N) */
    /*         A copy of the first K eigenvalues which will be used by */
    /*         SLAED3 to form the secular equation. */

    /*  Q2     (output) REAL array, dimension (LDQ2,N) */
    /*         If ICOMPQ = 0, Q2 is not referenced.  Otherwise, */
    /*         a copy of the first K eigenvectors which will be used by */
    /*         SLAED7 in a matrix multiply (SGEMM) to update the new */
    /*         eigenvectors. */

    /*  LDQ2   (input) INTEGER */
    /*         The leading dimension of the array Q2.  LDQ2 >= max(1,N). */

    /*  W      (output) REAL array, dimension (N) */
    /*         The first k values of the final deflation-altered z-vector and */
    /*         will be passed to SLAED3. */

    /*  PERM   (output) INTEGER array, dimension (N) */
    /*         The permutations (from deflation and sorting) to be applied */
    /*         to each eigenblock. */

    /*  GIVPTR (output) INTEGER */
    /*         The number of Givens rotations which took place in this */
    /*         subproblem. */

    /*  GIVCOL (output) INTEGER array, dimension (2, N) */
    /*         Each pair of numbers indicates a pair of columns to take place */
    /*         in a Givens rotation. */

    /*  GIVNUM (output) REAL array, dimension (2, N) */
    /*         Each number indicates the S value to be used in the */
    /*         corresponding Givens rotation. */

    /*  INDXP  (workspace) INTEGER array, dimension (N) */
    /*         The permutation used to place deflated values of D at the end */
    /*         of the array.  INDXP(1:K) points to the nondeflated D-values */
    /*         and INDXP(K+1:N) points to the deflated eigenvalues. */

    /*  INDX   (workspace) INTEGER array, dimension (N) */
    /*         The permutation used to sort the contents of D into ascending */
    /*         order. */

    /*  INFO   (output) INTEGER */
    /*          = 0:  successful exit. */
    /*          < 0:  if INFO = -i, the i-th argument had an illegal value. */

    /*  Further Details */
    /*  =============== */

    /*  Based on contributions by */
    /*     Jeff Rutter, Computer Science Division, University of California */
    /*     at Berkeley, USA */

    /*  ===================================================================== */

    /*     .. Parameters .. */
    /*     .. */
    /*     .. Local Scalars .. */

    /*     .. */
    /*     .. External Functions .. */
    /*     .. */
    /*     .. External Subroutines .. */
    /*     .. */
    /*     .. Intrinsic Functions .. */
    /*     .. */
    /*     .. Executable Statements .. */

    /*     Test the input parameters. */

    /* Parameter adjustments */
    --d__;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    --indxq;
    --z__;
    --dlamda;
    q2_dim1 = *ldq2;
    q2_offset = 1 + q2_dim1;
    q2 -= q2_offset;
    --w;
    --perm;
    givcol -= 3;
    givnum -= 3;
    --indxp;
    --indx;

    /* Function Body */
    *info = 0;

    if (*icompq < 0 || *icompq > 1) {
        *info = -1;
    } else if (*n < 0) {
        *info = -3;
    } else if (*icompq == 1 && *qsiz < *n) {
        *info = -4;
    } else if (*ldq < max(1, *n)) {
        *info = -7;
    } else if (*cutpnt < min(1, *n) || *cutpnt > *n) {
        *info = -10;
    } else if (*ldq2 < max(1, *n)) {
        *info = -14;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_("SLAED8", &i__1);
        return 0;
    }

    /*     Quick return if possible */

    if (*n == 0) {
        return 0;
    }

    n1 = *cutpnt;
    n2 = *n - n1;
    n1p1 = n1 + 1;

    if (*rho < 0.f) {
        sscal_(&n2, &c_b3, &z__[n1p1], &c__1);
    }

    /*     Normalize z so that norm(z) = 1 */

    t = 1.f / sqrt(2.f);
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
        indx[j] = j;
        /* L10: */
    }
    sscal_(n, &t, &z__[1], &c__1);
    *rho = (r__1 = *rho * 2.f, dabs(r__1));

    /*     Sort the eigenvalues into increasing order */

    i__1 = *n;
    for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) {
        indxq[i__] += *cutpnt;
        /* L20: */
    }
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
        dlamda[i__] = d__[indxq[i__]];
        w[i__] = z__[indxq[i__]];
        /* L30: */
    }
    i__ = 1;
    j = *cutpnt + 1;
    slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]);
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
        d__[i__] = dlamda[indx[i__]];
        z__[i__] = w[indx[i__]];
        /* L40: */
    }

    /*     Calculate the allowable deflation tolerence */

    imax = isamax_(n, &z__[1], &c__1);
    jmax = isamax_(n, &d__[1], &c__1);
    eps = slamch_("Epsilon");
    tol = eps * 8.f * (r__1 = d__[jmax], dabs(r__1));

    /*     If the rank-1 modifier is small enough, no more needs to be done */
    /*     except to reorganize Q so that its columns correspond with the */
    /*     elements in D. */

    if (*rho *(r__1 = z__[imax], dabs(r__1)) <= tol) {
        *k = 0;
        if (*icompq == 0) {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                perm[j] = indxq[indx[j]];
                /* L50: */
            }
        } else {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                perm[j] = indxq[indx[j]];
                scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1
                        + 1], &c__1);
                /* L60: */
            }
            slacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq);
        }
        return 0;
    }

    /*     If there are multiple eigenvalues then the problem deflates.  Here */
    /*     the number of equal eigenvalues are found.  As each equal */
    /*     eigenvalue is found, an elementary reflector is computed to rotate */
    /*     the corresponding eigensubspace so that the corresponding */
    /*     components of Z are zero in this new basis. */

    *k = 0;
    *givptr = 0;
    k2 = *n + 1;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
        if (*rho *(r__1 = z__[j], dabs(r__1)) <= tol) {

            /*           Deflate due to small z component. */

            --k2;
            indxp[k2] = j;
            if (j == *n) {
                goto L110;
            }
        } else {
            jlam = j;
            goto L80;
        }
        /* L70: */
    }
L80:
    ++j;
    if (j > *n) {
        goto L100;
    }
    if (*rho *(r__1 = z__[j], dabs(r__1)) <= tol) {

        /*        Deflate due to small z component. */

        --k2;
        indxp[k2] = j;
    } else {

        /*        Check if eigenvalues are close enough to allow deflation. */

        s = z__[jlam];
        c__ = z__[j];

        /*        Find sqrt(a**2+b**2) without overflow or */
        /*        destructive underflow. */

        tau = slapy2_(&c__, &s);
        t = d__[j] - d__[jlam];
        c__ /= tau;
        s = -s / tau;
        if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) {

            /*           Deflation is possible. */

            z__[j] = tau;
            z__[jlam] = 0.f;

            /*           Record the appropriate Givens rotation */

            ++(*givptr);
            givcol[(*givptr << 1) + 1] = indxq[indx[jlam]];
            givcol[(*givptr << 1) + 2] = indxq[indx[j]];
            givnum[(*givptr << 1) + 1] = c__;
            givnum[(*givptr << 1) + 2] = s;
            if (*icompq == 1) {
                srot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[
                          indxq[indx[j]] * q_dim1 + 1], &c__1, &c__, &s);
            }
            t = d__[jlam] * c__ * c__ + d__[j] * s * s;
            d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__;
            d__[jlam] = t;
            --k2;
            i__ = 1;
L90:
            if (k2 + i__ <= *n) {
                if (d__[jlam] < d__[indxp[k2 + i__]]) {
                    indxp[k2 + i__ - 1] = indxp[k2 + i__];
                    indxp[k2 + i__] = jlam;
                    ++i__;
                    goto L90;
                } else {
                    indxp[k2 + i__ - 1] = jlam;
                }
            } else {
                indxp[k2 + i__ - 1] = jlam;
            }
            jlam = j;
        } else {
            ++(*k);
            w[*k] = z__[jlam];
            dlamda[*k] = d__[jlam];
            indxp[*k] = jlam;
            jlam = j;
        }
    }
    goto L80;
L100:

    /*     Record the last eigenvalue. */

    ++(*k);
    w[*k] = z__[jlam];
    dlamda[*k] = d__[jlam];
    indxp[*k] = jlam;

L110:

    /*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA */
    /*     and Q2 respectively.  The eigenvalues/vectors which were not */
    /*     deflated go into the first K slots of DLAMDA and Q2 respectively, */
    /*     while those which were deflated go into the last N - K slots. */

    if (*icompq == 0) {
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            jp = indxp[j];
            dlamda[j] = d__[jp];
            perm[j] = indxq[indx[jp]];
            /* L120: */
        }
    } else {
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            jp = indxp[j];
            dlamda[j] = d__[jp];
            perm[j] = indxq[indx[jp]];
            scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1]
                   , &c__1);
            /* L130: */
        }
    }

    /*     The deflated eigenvalues and their corresponding vectors go back */
    /*     into the last N - K slots of D and Q respectively. */

    if (*k < *n) {
        if (*icompq == 0) {
            i__1 = *n - *k;
            scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
        } else {
            i__1 = *n - *k;
            scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
            i__1 = *n - *k;
            slacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*
                    k + 1) * q_dim1 + 1], ldq);
        }
    }

    return 0;

    /*     End of SLAED8 */

} /* slaed8_ */
Пример #25
0
/* Subroutine */
int slasd2_(integer *nl, integer *nr, integer *sqre, integer *k, real *d__, real *z__, real *alpha, real *beta, real *u, integer * ldu, real *vt, integer *ldvt, real *dsigma, real *u2, integer *ldu2, real *vt2, integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer *idxq, integer *coltyp, integer *info)
{
    /* System generated locals */
    integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, vt2_dim1, vt2_offset, i__1;
    real r__1, r__2;
    /* Local variables */
    real c__;
    integer i__, j, m, n;
    real s;
    integer k2;
    real z1;
    integer ct, jp;
    real eps, tau, tol;
    integer psm[4], nlp1, nlp2, idxi, idxj, ctot[4];
    extern /* Subroutine */
    int srot_(integer *, real *, integer *, real *, integer *, real *, real *);
    integer idxjp, jprev;
    extern /* Subroutine */
    int scopy_(integer *, real *, integer *, real *, integer *);
    extern real slapy2_(real *, real *), slamch_(char *);
    extern /* Subroutine */
    int xerbla_(char *, integer *), slamrg_( integer *, integer *, real *, integer *, integer *, integer *);
    real hlftol;
    extern /* Subroutine */
    int slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, real *, integer *);
    /* -- LAPACK auxiliary routine (version 3.4.2) -- */
    /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
    /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
    /* September 2012 */
    /* .. Scalar Arguments .. */
    /* .. */
    /* .. Array Arguments .. */
    /* .. */
    /* ===================================================================== */
    /* .. Parameters .. */
    /* .. */
    /* .. Local Arrays .. */
    /* .. */
    /* .. Local Scalars .. */
    /* .. */
    /* .. External Functions .. */
    /* .. */
    /* .. External Subroutines .. */
    /* .. */
    /* .. Intrinsic Functions .. */
    /* .. */
    /* .. Executable Statements .. */
    /* Test the input parameters. */
    /* Parameter adjustments */
    --d__;
    --z__;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1;
    u -= u_offset;
    vt_dim1 = *ldvt;
    vt_offset = 1 + vt_dim1;
    vt -= vt_offset;
    --dsigma;
    u2_dim1 = *ldu2;
    u2_offset = 1 + u2_dim1;
    u2 -= u2_offset;
    vt2_dim1 = *ldvt2;
    vt2_offset = 1 + vt2_dim1;
    vt2 -= vt2_offset;
    --idxp;
    --idx;
    --idxc;
    --idxq;
    --coltyp;
    /* Function Body */
    *info = 0;
    if (*nl < 1)
    {
        *info = -1;
    }
    else if (*nr < 1)
    {
        *info = -2;
    }
    else if (*sqre != 1 && *sqre != 0)
    {
        *info = -3;
    }
    n = *nl + *nr + 1;
    m = n + *sqre;
    if (*ldu < n)
    {
        *info = -10;
    }
    else if (*ldvt < m)
    {
        *info = -12;
    }
    else if (*ldu2 < n)
    {
        *info = -15;
    }
    else if (*ldvt2 < m)
    {
        *info = -17;
    }
    if (*info != 0)
    {
        i__1 = -(*info);
        xerbla_("SLASD2", &i__1);
        return 0;
    }
    nlp1 = *nl + 1;
    nlp2 = *nl + 2;
    /* Generate the first part of the vector Z;
    and move the singular */
    /* values in the first part of D one position backward. */
    z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1];
    z__[1] = z1;
    for (i__ = *nl;
            i__ >= 1;
            --i__)
    {
        z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1];
        d__[i__ + 1] = d__[i__];
        idxq[i__ + 1] = idxq[i__] + 1;
        /* L10: */
    }
    /* Generate the second part of the vector Z. */
    i__1 = m;
    for (i__ = nlp2;
            i__ <= i__1;
            ++i__)
    {
        z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1];
        /* L20: */
    }
    /* Initialize some reference arrays. */
    i__1 = nlp1;
    for (i__ = 2;
            i__ <= i__1;
            ++i__)
    {
        coltyp[i__] = 1;
        /* L30: */
    }
    i__1 = n;
    for (i__ = nlp2;
            i__ <= i__1;
            ++i__)
    {
        coltyp[i__] = 2;
        /* L40: */
    }
    /* Sort the singular values into increasing order */
    i__1 = n;
    for (i__ = nlp2;
            i__ <= i__1;
            ++i__)
    {
        idxq[i__] += nlp1;
        /* L50: */
    }
    /* DSIGMA, IDXC, IDXC, and the first column of U2 */
    /* are used as storage space. */
    i__1 = n;
    for (i__ = 2;
            i__ <= i__1;
            ++i__)
    {
        dsigma[i__] = d__[idxq[i__]];
        u2[i__ + u2_dim1] = z__[idxq[i__]];
        idxc[i__] = coltyp[idxq[i__]];
        /* L60: */
    }
    slamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);
    i__1 = n;
    for (i__ = 2;
            i__ <= i__1;
            ++i__)
    {
        idxi = idx[i__] + 1;
        d__[i__] = dsigma[idxi];
        z__[i__] = u2[idxi + u2_dim1];
        coltyp[i__] = idxc[idxi];
        /* L70: */
    }
    /* Calculate the allowable deflation tolerance */
    eps = slamch_("Epsilon");
    /* Computing MAX */
    r__1 = f2c_abs(*alpha);
    r__2 = f2c_abs(*beta); // , expr subst
    tol = max(r__1,r__2);
    /* Computing MAX */
    r__2 = (r__1 = d__[n], f2c_abs(r__1));
    tol = eps * 8.f * max(r__2,tol);
    /* There are 2 kinds of deflation -- first a value in the z-vector */
    /* is small, second two (or more) singular values are very close */
    /* together (their difference is small). */
    /* If the value in the z-vector is small, we simply permute the */
    /* array so that the corresponding singular value is moved to the */
    /* end. */
    /* If two values in the D-vector are close, we perform a two-sided */
    /* rotation designed to make one of the corresponding z-vector */
    /* entries zero, and then permute the array so that the deflated */
    /* singular value is moved to the end. */
    /* If there are multiple singular values then the problem deflates. */
    /* Here the number of equal singular values are found. As each equal */
    /* singular value is found, an elementary reflector is computed to */
    /* rotate the corresponding singular subspace so that the */
    /* corresponding components of Z are zero in this new basis. */
    *k = 1;
    k2 = n + 1;
    i__1 = n;
    for (j = 2;
            j <= i__1;
            ++j)
    {
        if ((r__1 = z__[j], f2c_abs(r__1)) <= tol)
        {
            /* Deflate due to small z component. */
            --k2;
            idxp[k2] = j;
            coltyp[j] = 4;
            if (j == n)
            {
                goto L120;
            }
        }
        else
        {
            jprev = j;
            goto L90;
        }
        /* L80: */
    }
L90:
    j = jprev;
L100:
    ++j;
    if (j > n)
    {
        goto L110;
    }
    if ((r__1 = z__[j], f2c_abs(r__1)) <= tol)
    {
        /* Deflate due to small z component. */
        --k2;
        idxp[k2] = j;
        coltyp[j] = 4;
    }
    else
    {
        /* Check if singular values are close enough to allow deflation. */
        if ((r__1 = d__[j] - d__[jprev], f2c_abs(r__1)) <= tol)
        {
            /* Deflation is possible. */
            s = z__[jprev];
            c__ = z__[j];
            /* Find sqrt(a**2+b**2) without overflow or */
            /* destructive underflow. */
            tau = slapy2_(&c__, &s);
            c__ /= tau;
            s = -s / tau;
            z__[j] = tau;
            z__[jprev] = 0.f;
            /* Apply back the Givens rotation to the left and right */
            /* singular vector matrices. */
            idxjp = idxq[idx[jprev] + 1];
            idxj = idxq[idx[j] + 1];
            if (idxjp <= nlp1)
            {
                --idxjp;
            }
            if (idxj <= nlp1)
            {
                --idxj;
            }
            srot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], & c__1, &c__, &s);
            srot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, & c__, &s);
            if (coltyp[j] != coltyp[jprev])
            {
                coltyp[j] = 3;
            }
            coltyp[jprev] = 4;
            --k2;
            idxp[k2] = jprev;
            jprev = j;
        }
        else
        {
            ++(*k);
            u2[*k + u2_dim1] = z__[jprev];
            dsigma[*k] = d__[jprev];
            idxp[*k] = jprev;
            jprev = j;
        }
    }
    goto L100;
L110: /* Record the last singular value. */
    ++(*k);
    u2[*k + u2_dim1] = z__[jprev];
    dsigma[*k] = d__[jprev];
    idxp[*k] = jprev;
L120: /* Count up the total number of the various types of columns, then */
    /* form a permutation which positions the four column types into */
    /* four groups of uniform structure (although one or more of these */
    /* groups may be empty). */
    for (j = 1;
            j <= 4;
            ++j)
    {
        ctot[j - 1] = 0;
        /* L130: */
    }
    i__1 = n;
    for (j = 2;
            j <= i__1;
            ++j)
    {
        ct = coltyp[j];
        ++ctot[ct - 1];
        /* L140: */
    }
    /* PSM(*) = Position in SubMatrix (of types 1 through 4) */
    psm[0] = 2;
    psm[1] = ctot[0] + 2;
    psm[2] = psm[1] + ctot[1];
    psm[3] = psm[2] + ctot[2];
    /* Fill out the IDXC array so that the permutation which it induces */
    /* will place all type-1 columns first, all type-2 columns next, */
    /* then all type-3's, and finally all type-4's, starting from the */
    /* second column. This applies similarly to the rows of VT. */
    i__1 = n;
    for (j = 2;
            j <= i__1;
            ++j)
    {
        jp = idxp[j];
        ct = coltyp[jp];
        idxc[psm[ct - 1]] = j;
        ++psm[ct - 1];
        /* L150: */
    }
    /* Sort the singular values and corresponding singular vectors into */
    /* DSIGMA, U2, and VT2 respectively. The singular values/vectors */
    /* which were not deflated go into the first K slots of DSIGMA, U2, */
    /* and VT2 respectively, while those which were deflated go into the */
    /* last N - K slots, except that the first column/row will be treated */
    /* separately. */
    i__1 = n;
    for (j = 2;
            j <= i__1;
            ++j)
    {
        jp = idxp[j];
        dsigma[j] = d__[jp];
        idxj = idxq[idx[idxp[idxc[j]]] + 1];
        if (idxj <= nlp1)
        {
            --idxj;
        }
        scopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1);
        scopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2);
        /* L160: */
    }
    /* Determine DSIGMA(1), DSIGMA(2) and Z(1) */
    dsigma[1] = 0.f;
    hlftol = tol / 2.f;
    if (f2c_abs(dsigma[2]) <= hlftol)
    {
        dsigma[2] = hlftol;
    }
    if (m > n)
    {
        z__[1] = slapy2_(&z1, &z__[m]);
        if (z__[1] <= tol)
        {
            c__ = 1.f;
            s = 0.f;
            z__[1] = tol;
        }
        else
        {
            c__ = z1 / z__[1];
            s = z__[m] / z__[1];
        }
    }
    else
    {
        if (f2c_abs(z1) <= tol)
        {
            z__[1] = tol;
        }
        else
        {
            z__[1] = z1;
        }
    }
    /* Move the rest of the updating row to Z. */
    i__1 = *k - 1;
    scopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1);
    /* Determine the first column of U2, the first row of VT2 and the */
    /* last row of VT. */
    slaset_("A", &n, &c__1, &c_b30, &c_b30, &u2[u2_offset], ldu2);
    u2[nlp1 + u2_dim1] = 1.f;
    if (m > n)
    {
        i__1 = nlp1;
        for (i__ = 1;
                i__ <= i__1;
                ++i__)
        {
            vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1];
            vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1];
            /* L170: */
        }
        i__1 = m;
        for (i__ = nlp2;
                i__ <= i__1;
                ++i__)
        {
            vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1];
            vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1];
            /* L180: */
        }
    }
    else
    {
        scopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2);
    }
    if (m > n)
    {
        scopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2);
    }
    /* The deflated singular values and their corresponding vectors go */
    /* into the back of D, U, and V respectively. */
    if (n > *k)
    {
        i__1 = n - *k;
        scopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
        i__1 = n - *k;
        slacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1) * u_dim1 + 1], ldu);
        i__1 = n - *k;
        slacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 + vt_dim1], ldvt);
    }
    /* Copy CTOT into COLTYP for referencing in SLASD3. */
    for (j = 1;
            j <= 4;
            ++j)
    {
        coltyp[j] = ctot[j - 1];
        /* L190: */
    }
    return 0;
    /* End of SLASD2 */
}
Пример #26
0
/* Subroutine */ int slaed8_(integer *icompq, integer *k, integer *n, integer 
	*qsiz, real *d, real *q, integer *ldq, integer *indxq, real *rho, 
	integer *cutpnt, real *z, real *dlamda, real *q2, integer *ldq2, real 
	*w, integer *perm, integer *givptr, integer *givcol, real *givnum, 
	integer *indxp, integer *indx, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, 
  
       Courant Institute, NAG Ltd., and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    SLAED8 merges the two sets of eigenvalues together into a single   
    sorted set.  Then it tries to deflate the size of the problem.   
    There are two ways in which deflation can occur:  when two or more   
    eigenvalues are close together or if there is a tiny element in the   
    Z vector.  For each such occurrence the order of the related secular 
  
    equation problem is reduced by one.   

    Arguments   
    =========   

    ICOMPQ  (input) INTEGER   
            = 0:  Compute eigenvalues only.   
            = 1:  Compute eigenvectors of original dense symmetric matrix 
  
                  also.  On entry, Q contains the orthogonal matrix used 
  
                  to reduce the original matrix to tridiagonal form.   

    K      (output) INTEGER   
           The number of non-deflated eigenvalues, and the order of the   
           related secular equation.   

    N      (input) INTEGER   
           The dimension of the symmetric tridiagonal matrix.  N >= 0.   

    QSIZ   (input) INTEGER   
           The dimension of the orthogonal matrix used to reduce   
           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1. 
  

    D      (input/output) REAL array, dimension (N)   
           On entry, the eigenvalues of the two submatrices to be   
           combined.  On exit, the trailing (N-K) updated eigenvalues   
           (those which were deflated) sorted into increasing order.   

    Q      (input/output) REAL array, dimension (LDQ,N)   
           If ICOMPQ = 0, Q is not referenced.  Otherwise,   
           on entry, Q contains the eigenvectors of the partially solved 
  
           system which has been previously updated in matrix   
           multiplies with other partially solved eigensystems.   
           On exit, Q contains the trailing (N-K) updated eigenvectors   
           (those which were deflated) in its last N-K columns.   

    LDQ    (input) INTEGER   
           The leading dimension of the array Q.  LDQ >= max(1,N).   

    INDXQ  (input) INTEGER array, dimension (N)   
           The permutation which separately sorts the two sub-problems   
           in D into ascending order.  Note that elements in the second   
           half of this permutation must first have CUTPNT added to   
           their values in order to be accurate.   

    RHO    (input/output) REAL   
           On entry, the off-diagonal element associated with the rank-1 
  
           cut which originally split the two submatrices which are now   
           being recombined.   
           On exit, RHO has been modified to the value required by   
           SLAED3.   

    CUTPNT (input) INTEGER   
           The location of the last eigenvalue in the leading   
           sub-matrix.  min(1,N) <= CUTPNT <= N.   

    Z      (input) REAL array, dimension (N)   
           On entry, Z contains the updating vector (the last row of   
           the first sub-eigenvector matrix and the first row of the   
           second sub-eigenvector matrix).   
           On exit, the contents of Z are destroyed by the updating   
           process.   

    DLAMDA (output) REAL array, dimension (N)   
           A copy of the first K eigenvalues which will be used by   
           SLAED3 to form the secular equation.   

    Q2     (output) REAL array, dimension (LDQ2,N)   
           If ICOMPQ = 0, Q2 is not referenced.  Otherwise,   
           a copy of the first K eigenvectors which will be used by   
           SLAED7 in a matrix multiply (SGEMM) to update the new   
           eigenvectors.   

    LDQ2   (input) INTEGER   
           The leading dimension of the array Q2.  LDQ2 >= max(1,N).   

    W      (output) REAL array, dimension (N)   
           The first k values of the final deflation-altered z-vector and 
  
           will be passed to SLAED3.   

    PERM   (output) INTEGER array, dimension (N)   
           The permutations (from deflation and sorting) to be applied   
           to each eigenblock.   

    GIVPTR (output) INTEGER   
           The number of Givens rotations which took place in this   
           subproblem.   

    GIVCOL (output) INTEGER array, dimension (2, N)   
           Each pair of numbers indicates a pair of columns to take place 
  
           in a Givens rotation.   

    GIVNUM (output) REAL array, dimension (2, N)   
           Each number indicates the S value to be used in the   
           corresponding Givens rotation.   

    INDXP  (workspace) INTEGER array, dimension (N)   
           The permutation used to place deflated values of D at the end 
  
           of the array.  INDXP(1:K) points to the nondeflated D-values   
           and INDXP(K+1:N) points to the deflated eigenvalues.   

    INDX   (workspace) INTEGER array, dimension (N)   
           The permutation used to sort the contents of D into ascending 
  
           order.   

    INFO   (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   

    ===================================================================== 
  



       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static real c_b3 = -1.f;
    static integer c__1 = 1;
    
    /* System generated locals */
    integer q_dim1, q_offset, q2_dim1, q2_offset, i__1;
    real r__1;
    /* Builtin functions */
    double sqrt(doublereal);
    /* Local variables */
    static integer jlam, imax, jmax;
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *);
    static real c;
    static integer i, j;
    static real s, t;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static integer k2;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *);
    static integer n1, n2;
    extern doublereal slapy2_(real *, real *);
    static integer jp;
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer isamax_(integer *, real *, integer *);
    extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer 
	    *, integer *, integer *), slacpy_(char *, integer *, integer *, 
	    real *, integer *, real *, integer *);
    static integer n1p1;
    static real eps, tau, tol;


    --d;
    q_dim1 = *ldq;
    q_offset = q_dim1 + 1;
    q -= q_offset;
    --indxq;
    --z;
    --dlamda;
    q2_dim1 = *ldq2;
    q2_offset = q2_dim1 + 1;
    q2 -= q2_offset;
    --w;
    --perm;
    givcol -= 3;
    givnum -= 3;
    --indxp;
    --indx;

    /* Function Body */
    *info = 0;

    if (*icompq < 0 || *icompq > 1) {
	*info = -1;
    } else if (*n < 0) {
	*info = -3;
    } else if (*icompq == 1 && *qsiz < *n) {
	*info = -4;
    } else if (*ldq < max(1,*n)) {
	*info = -7;
    } else if (*cutpnt < min(1,*n) || *cutpnt > *n) {
	*info = -10;
    } else if (*ldq2 < max(1,*n)) {
	*info = -14;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLAED8", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

    n1 = *cutpnt;
    n2 = *n - n1;
    n1p1 = n1 + 1;

    if (*rho < 0.f) {
	sscal_(&n2, &c_b3, &z[n1p1], &c__1);
    }

/*     Normalize z so that norm(z) = 1 */

    t = 1.f / sqrt(2.f);
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	indx[j] = j;
/* L10: */
    }
    sscal_(n, &t, &z[1], &c__1);
    *rho = (r__1 = *rho * 2.f, dabs(r__1));

/*     Sort the eigenvalues into increasing order */

    i__1 = *n;
    for (i = *cutpnt + 1; i <= i__1; ++i) {
	indxq[i] += *cutpnt;
/* L20: */
    }
    i__1 = *n;
    for (i = 1; i <= i__1; ++i) {
	dlamda[i] = d[indxq[i]];
	w[i] = z[indxq[i]];
/* L30: */
    }
    i = 1;
    j = *cutpnt + 1;
    slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]);
    i__1 = *n;
    for (i = 1; i <= i__1; ++i) {
	d[i] = dlamda[indx[i]];
	z[i] = w[indx[i]];
/* L40: */
    }

/*     Calculate the allowable deflation tolerence */

    imax = isamax_(n, &z[1], &c__1);
    jmax = isamax_(n, &d[1], &c__1);
    eps = slamch_("Epsilon");
    tol = eps * 8.f * (r__1 = d[jmax], dabs(r__1));

/*     If the rank-1 modifier is small enough, no more needs to be done   
       except to reorganize Q so that its columns correspond with the   
       elements in D. */

    if (*rho * (r__1 = z[imax], dabs(r__1)) <= tol) {
	*k = 0;
	if (*icompq == 0) {
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		perm[j] = indxq[indx[j]];
/* L50: */
	    }
	} else {
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		perm[j] = indxq[indx[j]];
		scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 
			+ 1], &c__1);
/* L60: */
	    }
	    slacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq);
	}
	return 0;
    }

/*     If there are multiple eigenvalues then the problem deflates.  Here 
  
       the number of equal eigenvalues are found.  As each equal   
       eigenvalue is found, an elementary reflector is computed to rotate 
  
       the corresponding eigensubspace so that the corresponding   
       components of Z are zero in this new basis. */

    *k = 0;
    *givptr = 0;
    k2 = *n + 1;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	if (*rho * (r__1 = z[j], dabs(r__1)) <= tol) {

/*           Deflate due to small z component. */

	    --k2;
	    indxp[k2] = j;
	    if (j == *n) {
		goto L110;
	    }
	} else {
	    jlam = j;
	    goto L80;
	}
/* L70: */
    }
L80:
    ++j;
    if (j > *n) {
	goto L100;
    }
    if (*rho * (r__1 = z[j], dabs(r__1)) <= tol) {

/*        Deflate due to small z component. */

	--k2;
	indxp[k2] = j;
    } else {

/*        Check if eigenvalues are close enough to allow deflation. */

	s = z[jlam];
	c = z[j];

/*        Find sqrt(a**2+b**2) without overflow or   
          destructive underflow. */

	tau = slapy2_(&c, &s);
	t = d[j] - d[jlam];
	c /= tau;
	s = -(doublereal)s / tau;
	if ((r__1 = t * c * s, dabs(r__1)) <= tol) {

/*           Deflation is possible. */

	    z[j] = tau;
	    z[jlam] = 0.f;

/*           Record the appropriate Givens rotation */

	    ++(*givptr);
	    givcol[(*givptr << 1) + 1] = indxq[indx[jlam]];
	    givcol[(*givptr << 1) + 2] = indxq[indx[j]];
	    givnum[(*givptr << 1) + 1] = c;
	    givnum[(*givptr << 1) + 2] = s;
	    if (*icompq == 1) {
		srot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[
			indxq[indx[j]] * q_dim1 + 1], &c__1, &c, &s);
	    }
	    t = d[jlam] * c * c + d[j] * s * s;
	    d[j] = d[jlam] * s * s + d[j] * c * c;
	    d[jlam] = t;
	    --k2;
	    i = 1;
L90:
	    if (k2 + i <= *n) {
		if (d[jlam] < d[indxp[k2 + i]]) {
		    indxp[k2 + i - 1] = indxp[k2 + i];
		    indxp[k2 + i] = jlam;
		    ++i;
		    goto L90;
		} else {
		    indxp[k2 + i - 1] = jlam;
		}
	    } else {
		indxp[k2 + i - 1] = jlam;
	    }
	    jlam = j;
	} else {
	    ++(*k);
	    w[*k] = z[jlam];
	    dlamda[*k] = d[jlam];
	    indxp[*k] = jlam;
	    jlam = j;
	}
    }
    goto L80;
L100:

/*     Record the last eigenvalue. */

    ++(*k);
    w[*k] = z[jlam];
    dlamda[*k] = d[jlam];
    indxp[*k] = jlam;

L110:

/*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA   
       and Q2 respectively.  The eigenvalues/vectors which were not   
       deflated go into the first K slots of DLAMDA and Q2 respectively, 
  
       while those which were deflated go into the last N - K slots. */

    if (*icompq == 0) {
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    jp = indxp[j];
	    dlamda[j] = d[jp];
	    perm[j] = indxq[indx[jp]];
/* L120: */
	}
    } else {
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    jp = indxp[j];
	    dlamda[j] = d[jp];
	    perm[j] = indxq[indx[jp]];
	    scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1]
		    , &c__1);
/* L130: */
	}
    }

/*     The deflated eigenvalues and their corresponding vectors go back   
       into the last N - K slots of D and Q respectively. */

    if (*k < *n) {
	if (*icompq == 0) {
	    i__1 = *n - *k;
	    scopy_(&i__1, &dlamda[*k + 1], &c__1, &d[*k + 1], &c__1);
	} else {
	    i__1 = *n - *k;
	    scopy_(&i__1, &dlamda[*k + 1], &c__1, &d[*k + 1], &c__1);
	    i__1 = *n - *k;
	    slacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*
		    k + 1) * q_dim1 + 1], ldq);
	}
    }

    return 0;

/*     End of SLAED8 */

} /* slaed8_ */
Пример #27
0
/* Subroutine */ int slahqr_(logical *wantt, logical *wantz, integer *n, 
	integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real *
	wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, integer *
	info)
{
    /* System generated locals */
    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3;
    real r__1, r__2, r__3, r__4;

    /* Local variables */
    integer i__, j, k, l, m;
    real s, v[3];
    integer i1, i2;
    real t1, t2, t3, v2, v3, aa, ab, ba, bb, h11, h12, h21, h22, cs;
    integer nh;
    real sn;
    integer nr;
    real tr;
    integer nz;
    real det, h21s;
    integer its;
    real ulp, sum, tst, rt1i, rt2i, rt1r, rt2r;
    real safmin;
    real safmax, rtdisc, smlnum;

/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     November 2006 */

/*     Purpose */
/*     ======= */

/*     SLAHQR is an auxiliary routine called by SHSEQR to update the */
/*     eigenvalues and Schur decomposition already computed by SHSEQR, by */
/*     dealing with the Hessenberg submatrix in rows and columns ILO to */
/*     IHI. */

/*     Arguments */
/*     ========= */

/*     WANTT   (input) LOGICAL */
/*          = .TRUE. : the full Schur form T is required; */
/*          = .FALSE.: only eigenvalues are required. */

/*     WANTZ   (input) LOGICAL */
/*          = .TRUE. : the matrix of Schur vectors Z is required; */
/*          = .FALSE.: Schur vectors are not required. */

/*     N       (input) INTEGER */
/*          The order of the matrix H.  N >= 0. */

/*     ILO     (input) INTEGER */
/*     IHI     (input) INTEGER */
/*          It is assumed that H is already upper quasi-triangular in */
/*          rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless */
/*          ILO = 1). SLAHQR works primarily with the Hessenberg */
/*          submatrix in rows and columns ILO to IHI, but applies */
/*          1 <= ILO <= max(1,IHI); IHI <= N. */

/*     H       (input/output) REAL array, dimension (LDH,N) */
/*          On entry, the upper Hessenberg matrix H. */
/*          On exit, if INFO is zero and if WANTT is .TRUE., H is upper */
/*          quasi-triangular in rows and columns ILO:IHI, with any */
/*          2-by-2 diagonal blocks in standard form. If INFO is zero */
/*          and WANTT is .FALSE., the contents of H are unspecified on */
/*          exit.  The output state of H if INFO is nonzero is given */
/*          below under the description of INFO. */

/*     LDH     (input) INTEGER */
/*          The leading dimension of the array H. LDH >= max(1,N). */

/*     WR      (output) REAL array, dimension (N) */
/*     WI      (output) REAL array, dimension (N) */
/*          The real and imaginary parts, respectively, of the computed */
/*          eigenvalues ILO to IHI are stored in the corresponding */
/*          elements of WR and WI. If two eigenvalues are computed as a */
/*          complex conjugate pair, they are stored in consecutive */
/*          elements of WR and WI, say the i-th and (i+1)th, with */
/*          WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the */
/*          eigenvalues are stored in the same order as on the diagonal */
/*          of the Schur form returned in H, with WR(i) = H(i,i), and, if */
/*          H(i:i+1,i:i+1) is a 2-by-2 diagonal block, */
/*          WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i). */

/*     ILOZ    (input) INTEGER */
/*     IHIZ    (input) INTEGER */
/*          Specify the rows of Z to which transformations must be */
/*          1 <= ILOZ <= ILO; IHI <= IHIZ <= N. */

/*     Z       (input/output) REAL array, dimension (LDZ,N) */
/*          If WANTZ is .TRUE., on entry Z must contain the current */
/*          matrix Z of transformations accumulated by SHSEQR, and on */
/*          exit Z has been updated; transformations are applied only to */
/*          the submatrix Z(ILOZ:IHIZ,ILO:IHI). */
/*          If WANTZ is .FALSE., Z is not referenced. */

/*     LDZ     (input) INTEGER */
/*          The leading dimension of the array Z. LDZ >= max(1,N). */

/*     INFO    (output) INTEGER */
/*           =   0: successful exit */
/*          .GT. 0: If INFO = i, SLAHQR failed to compute all the */
/*                  eigenvalues ILO to IHI in a total of 30 iterations */
/*                  per eigenvalue; elements i+1:ihi of WR and WI */
/*                  contain those eigenvalues which have been */
/*                  successfully computed. */

/*                  If INFO .GT. 0 and WANTT is .FALSE., then on exit, */
/*                  the remaining unconverged eigenvalues are the */
/*                  eigenvalues of the upper Hessenberg matrix rows */
/*                  and columns ILO thorugh INFO of the final, output */
/*                  value of H. */

/*                  If INFO .GT. 0 and WANTT is .TRUE., then on exit */
/*          (*)       (initial value of H)*U  = U*(final value of H) */
/*                  where U is an orthognal matrix.    The final */
/*                  value of H is upper Hessenberg and triangular in */
/*                  rows and columns INFO+1 through IHI. */

/*                  If INFO .GT. 0 and WANTZ is .TRUE., then on exit */
/*                      (final value of Z)  = (initial value of Z)*U */
/*                  where U is the orthogonal matrix in (*) */
/*                  (regardless of the value of WANTT.) */

/*     Further Details */
/*     =============== */

/*     02-96 Based on modifications by */
/*     David Day, Sandia National Laboratory, USA */

/*     12-04 Further modifications by */
/*     Ralph Byers, University of Kansas, USA */
/*     This is a modified version of SLAHQR from LAPACK version 3.0. */
/*     It is (1) more robust against overflow and underflow and */
/*     (2) adopts the more conservative Ahues & Tisseur stopping */
/*     criterion (LAWN 122, 1997). */

/*     ========================================================= */

    /* Parameter adjustments */
    h_dim1 = *ldh;
    h_offset = 1 + h_dim1;
    h__ -= h_offset;
    --wr;
    --wi;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1;
    z__ -= z_offset;

    /* Function Body */
    *info = 0;

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }
    if (*ilo == *ihi) {
	wr[*ilo] = h__[*ilo + *ilo * h_dim1];
	wi[*ilo] = 0.f;
	return 0;
    }

/*     ==== clear out the trash ==== */
    i__1 = *ihi - 3;
    for (j = *ilo; j <= i__1; ++j) {
	h__[j + 2 + j * h_dim1] = 0.f;
	h__[j + 3 + j * h_dim1] = 0.f;
    }
    if (*ilo <= *ihi - 2) {
	h__[*ihi + (*ihi - 2) * h_dim1] = 0.f;
    }

    nh = *ihi - *ilo + 1;
    nz = *ihiz - *iloz + 1;

/*     Set machine-dependent constants for the stopping criterion. */

    safmin = slamch_("SAFE MINIMUM");
    safmax = 1.f / safmin;
    slabad_(&safmin, &safmax);
    ulp = slamch_("PRECISION");
    smlnum = safmin * ((real) nh / ulp);

/*     I1 and I2 are the indices of the first row and last column of H */
/*     to which transformations must be applied. If eigenvalues only are */
/*     being computed, I1 and I2 are set inside the main loop. */

    if (*wantt) {
	i1 = 1;
	i2 = *n;
    }

/*     The main loop begins here. I is the loop index and decreases from */
/*     IHI to ILO in steps of 1 or 2. Each iteration of the loop works */
/*     with the active submatrix in rows and columns L to I. */
/*     Eigenvalues I+1 to IHI have already converged. Either L = ILO or */
/*     H(L,L-1) is negligible so that the matrix splits. */

    i__ = *ihi;
L20:
    l = *ilo;
    if (i__ < *ilo) {
	goto L160;
    }

/*     Perform QR iterations on rows and columns ILO to I until a */
/*     submatrix of order 1 or 2 splits off at the bottom because a */
/*     subdiagonal element has become negligible. */

    for (its = 0; its <= 30; ++its) {

/*        Look for a single small subdiagonal element. */

	i__1 = l + 1;
	for (k = i__; k >= i__1; --k) {
	    if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= smlnum) {
		goto L40;
	    }
	    tst = (r__1 = h__[k - 1 + (k - 1) * h_dim1], dabs(r__1)) + (r__2 =
		     h__[k + k * h_dim1], dabs(r__2));
	    if (tst == 0.f) {
		if (k - 2 >= *ilo) {
		    tst += (r__1 = h__[k - 1 + (k - 2) * h_dim1], dabs(r__1));
		}
		if (k + 1 <= *ihi) {
		    tst += (r__1 = h__[k + 1 + k * h_dim1], dabs(r__1));
		}
	    }
/*           ==== The following is a conservative small subdiagonal */
/*           .    deflation  criterion due to Ahues & Tisseur (LAWN 122, */
/*           .    1997). It has better mathematical foundation and */
/*           .    improves accuracy in some cases.  ==== */
	    if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= ulp * tst) {
/* Computing MAX */
		r__3 = (r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)), r__4 = 
			(r__2 = h__[k - 1 + k * h_dim1], dabs(r__2));
		ab = dmax(r__3,r__4);
/* Computing MIN */
		r__3 = (r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)), r__4 = 
			(r__2 = h__[k - 1 + k * h_dim1], dabs(r__2));
		ba = dmin(r__3,r__4);
/* Computing MAX */
		r__3 = (r__1 = h__[k + k * h_dim1], dabs(r__1)), r__4 = (r__2 
			= h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1],
			 dabs(r__2));
		aa = dmax(r__3,r__4);
/* Computing MIN */
		r__3 = (r__1 = h__[k + k * h_dim1], dabs(r__1)), r__4 = (r__2 
			= h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1],
			 dabs(r__2));
		bb = dmin(r__3,r__4);
		s = aa + ab;
/* Computing MAX */
		r__1 = smlnum, r__2 = ulp * (bb * (aa / s));
		if (ba * (ab / s) <= dmax(r__1,r__2)) {
		    goto L40;
		}
	    }
	}
L40:
	l = k;
	if (l > *ilo) {

/*           H(L,L-1) is negligible */

	    h__[l + (l - 1) * h_dim1] = 0.f;
	}

/*        Exit from loop if a submatrix of order 1 or 2 has split off. */

	if (l >= i__ - 1) {
	    goto L150;
	}

/*        Now the active submatrix is in rows and columns L to I. If */
/*        eigenvalues only are being computed, only the active submatrix */
/*        need be transformed. */

	if (! (*wantt)) {
	    i1 = l;
	    i2 = i__;
	}

	if (its == 10) {

/*           Exceptional shift. */

	    s = (r__1 = h__[l + 1 + l * h_dim1], dabs(r__1)) + (r__2 = h__[l 
		    + 2 + (l + 1) * h_dim1], dabs(r__2));
	    h11 = s * .75f + h__[l + l * h_dim1];
	    h12 = s * -.4375f;
	    h21 = s;
	    h22 = h11;
	} else if (its == 20) {

/*           Exceptional shift. */

	    s = (r__1 = h__[i__ + (i__ - 1) * h_dim1], dabs(r__1)) + (r__2 = 
		    h__[i__ - 1 + (i__ - 2) * h_dim1], dabs(r__2));
	    h11 = s * .75f + h__[i__ + i__ * h_dim1];
	    h12 = s * -.4375f;
	    h21 = s;
	    h22 = h11;
	} else {

/*           Prepare to use Francis' double shift */
/*           (i.e. 2nd degree generalized Rayleigh quotient) */

	    h11 = h__[i__ - 1 + (i__ - 1) * h_dim1];
	    h21 = h__[i__ + (i__ - 1) * h_dim1];
	    h12 = h__[i__ - 1 + i__ * h_dim1];
	    h22 = h__[i__ + i__ * h_dim1];
	}
	s = dabs(h11) + dabs(h12) + dabs(h21) + dabs(h22);
	if (s == 0.f) {
	    rt1r = 0.f;
	    rt1i = 0.f;
	    rt2r = 0.f;
	    rt2i = 0.f;
	} else {
	    h11 /= s;
	    h21 /= s;
	    h12 /= s;
	    h22 /= s;
	    tr = (h11 + h22) / 2.f;
	    det = (h11 - tr) * (h22 - tr) - h12 * h21;
	    rtdisc = sqrt((dabs(det)));
	    if (det >= 0.f) {

/*              ==== complex conjugate shifts ==== */

		rt1r = tr * s;
		rt2r = rt1r;
		rt1i = rtdisc * s;
		rt2i = -rt1i;
	    } else {

/*              ==== real shifts (use only one of them)  ==== */

		rt1r = tr + rtdisc;
		rt2r = tr - rtdisc;
		if ((r__1 = rt1r - h22, dabs(r__1)) <= (r__2 = rt2r - h22, 
			dabs(r__2))) {
		    rt1r *= s;
		    rt2r = rt1r;
		} else {
		    rt2r *= s;
		    rt1r = rt2r;
		}
		rt1i = 0.f;
		rt2i = 0.f;
	    }
	}

/*        Look for two consecutive small subdiagonal elements. */

	i__1 = l;
	for (m = i__ - 2; m >= i__1; --m) {
/*           Determine the effect of starting the double-shift QR */
/*           iteration at row M, and see if this would make H(M,M-1) */
/*           negligible.  (The following uses scaling to avoid */
/*           overflows and most underflows.) */

	    h21s = h__[m + 1 + m * h_dim1];
	    s = (r__1 = h__[m + m * h_dim1] - rt2r, dabs(r__1)) + dabs(rt2i) 
		    + dabs(h21s);
	    h21s = h__[m + 1 + m * h_dim1] / s;
	    v[0] = h21s * h__[m + (m + 1) * h_dim1] + (h__[m + m * h_dim1] - 
		    rt1r) * ((h__[m + m * h_dim1] - rt2r) / s) - rt1i * (rt2i 
		    / s);
	    v[1] = h21s * (h__[m + m * h_dim1] + h__[m + 1 + (m + 1) * h_dim1]
		     - rt1r - rt2r);
	    v[2] = h21s * h__[m + 2 + (m + 1) * h_dim1];
	    s = dabs(v[0]) + dabs(v[1]) + dabs(v[2]);
	    v[0] /= s;
	    v[1] /= s;
	    v[2] /= s;
	    if (m == l) {
		goto L60;
	    }
	    if ((r__1 = h__[m + (m - 1) * h_dim1], dabs(r__1)) * (dabs(v[1]) 
		    + dabs(v[2])) <= ulp * dabs(v[0]) * ((r__2 = h__[m - 1 + (
		    m - 1) * h_dim1], dabs(r__2)) + (r__3 = h__[m + m * 
		    h_dim1], dabs(r__3)) + (r__4 = h__[m + 1 + (m + 1) * 
		    h_dim1], dabs(r__4)))) {
		goto L60;
	    }
	}
L60:

/*        Double-shift QR step */

	i__1 = i__ - 1;
	for (k = m; k <= i__1; ++k) {

/*           The first iteration of this loop determines a reflection G */
/*           from the vector V and applies it from left and right to H, */
/*           thus creating a nonzero bulge below the subdiagonal. */

/*           Each subsequent iteration determines a reflection G to */
/*           restore the Hessenberg form in the (K-1)th column, and thus */
/*           chases the bulge one step toward the bottom of the active */
/*           submatrix. NR is the order of G. */

/* Computing MIN */
	    i__2 = 3, i__3 = i__ - k + 1;
	    nr = min(i__2,i__3);
	    if (k > m) {
		scopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
	    }
	    slarfg_(&nr, v, &v[1], &c__1, &t1);
	    if (k > m) {
		h__[k + (k - 1) * h_dim1] = v[0];
		h__[k + 1 + (k - 1) * h_dim1] = 0.f;
		if (k < i__ - 1) {
		    h__[k + 2 + (k - 1) * h_dim1] = 0.f;
		}
	    } else if (m > l) {
/*               ==== Use the following instead of */
/*               .    H( K, K-1 ) = -H( K, K-1 ) to */
/*               .    avoid a bug when v(2) and v(3) */
/*               .    underflow. ==== */
		h__[k + (k - 1) * h_dim1] *= 1.f - t1;
	    }
	    v2 = v[1];
	    t2 = t1 * v2;
	    if (nr == 3) {
		v3 = v[2];
		t3 = t1 * v3;

/*              Apply G from the left to transform the rows of the matrix */
/*              in columns K to I2. */

		i__2 = i2;
		for (j = k; j <= i__2; ++j) {
		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1] 
			    + v3 * h__[k + 2 + j * h_dim1];
		    h__[k + j * h_dim1] -= sum * t1;
		    h__[k + 1 + j * h_dim1] -= sum * t2;
		    h__[k + 2 + j * h_dim1] -= sum * t3;
		}

/*              Apply G from the right to transform the columns of the */
/*              matrix in rows I1 to min(K+3,I). */

/* Computing MIN */
		i__3 = k + 3;
		i__2 = min(i__3,i__);
		for (j = i1; j <= i__2; ++j) {
		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
			     + v3 * h__[j + (k + 2) * h_dim1];
		    h__[j + k * h_dim1] -= sum * t1;
		    h__[j + (k + 1) * h_dim1] -= sum * t2;
		    h__[j + (k + 2) * h_dim1] -= sum * t3;
		}

		if (*wantz) {

/*                 Accumulate transformations in the matrix Z */

		    i__2 = *ihiz;
		    for (j = *iloz; j <= i__2; ++j) {
			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * 
				z_dim1] + v3 * z__[j + (k + 2) * z_dim1];
			z__[j + k * z_dim1] -= sum * t1;
			z__[j + (k + 1) * z_dim1] -= sum * t2;
			z__[j + (k + 2) * z_dim1] -= sum * t3;
		    }
		}
	    } else if (nr == 2) {

/*              Apply G from the left to transform the rows of the matrix */
/*              in columns K to I2. */

		i__2 = i2;
		for (j = k; j <= i__2; ++j) {
		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1];
		    h__[k + j * h_dim1] -= sum * t1;
		    h__[k + 1 + j * h_dim1] -= sum * t2;
		}

/*              Apply G from the right to transform the columns of the */
/*              matrix in rows I1 to min(K+3,I). */

		i__2 = i__;
		for (j = i1; j <= i__2; ++j) {
		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
			    ;
		    h__[j + k * h_dim1] -= sum * t1;
		    h__[j + (k + 1) * h_dim1] -= sum * t2;
		}

		if (*wantz) {

/*                 Accumulate transformations in the matrix Z */

		    i__2 = *ihiz;
		    for (j = *iloz; j <= i__2; ++j) {
			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) * 
				z_dim1];
			z__[j + k * z_dim1] -= sum * t1;
			z__[j + (k + 1) * z_dim1] -= sum * t2;
		    }
		}
	    }
	}

    }

/*     Failure to converge in remaining number of iterations */

    *info = i__;
    return 0;

L150:

    if (l == i__) {

/*        H(I,I-1) is negligible: one eigenvalue has converged. */

	wr[i__] = h__[i__ + i__ * h_dim1];
	wi[i__] = 0.f;
    } else if (l == i__ - 1) {

/*        H(I-1,I-2) is negligible: a pair of eigenvalues have converged. */

/*        Transform the 2-by-2 submatrix to standard Schur form, */
/*        and compute and store the eigenvalues. */

	slanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ * 
		h_dim1], &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ * 
		h_dim1], &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], &cs, 
		&sn);

	if (*wantt) {

/*           Apply the transformation to the rest of H. */

	    if (i2 > i__) {
		i__1 = i2 - i__;
		srot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, &h__[
			i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn);
	    }
	    i__1 = i__ - i1 - 1;
	    srot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ *
		     h_dim1], &c__1, &cs, &sn);
	}
	if (*wantz) {

/*           Apply the transformation to Z. */

	    srot_(&nz, &z__[*iloz + (i__ - 1) * z_dim1], &c__1, &z__[*iloz + 
		    i__ * z_dim1], &c__1, &cs, &sn);
	}
    }

/*     return to start of the main loop with new value of I. */

    i__ = l - 1;
    goto L20;

L160:
    return 0;

/*     End of SLAHQR */

} /* slahqr_ */
Пример #28
0
/* Subroutine */ int sgbbrd_(char *vect, integer *m, integer *n, integer *ncc, 
	 integer *kl, integer *ku, real *ab, integer *ldab, real *d__, real *
	e, real *q, integer *ldq, real *pt, integer *ldpt, real *c__, integer 
	*ldc, real *work, integer *info)
{
    /* System generated locals */
    integer ab_dim1, ab_offset, c_dim1, c_offset, pt_dim1, pt_offset, q_dim1, 
	    q_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7;

    /* Local variables */
    integer i__, j, l, j1, j2, kb;
    real ra, rb, rc;
    integer kk, ml, mn, nr, mu;
    real rs;
    integer kb1, ml0, mu0, klm, kun, nrt, klu1, inca;
    logical wantb, wantc;
    integer minmn;
    logical wantq;
    logical wantpt;

/*  -- LAPACK routine (version 3.2) -- */
/*     November 2006 */

/*  Purpose */
/*  ======= */

/*  SGBBRD reduces a real general m-by-n band matrix A to upper */
/*  bidiagonal form B by an orthogonal transformation: Q' * A * P = B. */

/*  The routine computes B, and optionally forms Q or P', or computes */
/*  Q'*C for a given matrix C. */

/*  Arguments */
/*  ========= */

/*  VECT    (input) CHARACTER*1 */
/*          Specifies whether or not the matrices Q and P' are to be */
/*          formed. */
/*          = 'N': do not form Q or P'; */
/*          = 'Q': form Q only; */
/*          = 'P': form P' only; */
/*          = 'B': form both. */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix A.  M >= 0. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix A.  N >= 0. */

/*  NCC     (input) INTEGER */
/*          The number of columns of the matrix C.  NCC >= 0. */

/*  KL      (input) INTEGER */
/*          The number of subdiagonals of the matrix A. KL >= 0. */

/*  KU      (input) INTEGER */
/*          The number of superdiagonals of the matrix A. KU >= 0. */

/*  AB      (input/output) REAL array, dimension (LDAB,N) */
/*          On entry, the m-by-n band matrix A, stored in rows 1 to */
/*          KL+KU+1. The j-th column of A is stored in the j-th column of */
/*          the array AB as follows: */
/*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). */
/*          On exit, A is overwritten by values generated during the */
/*          reduction. */

/*  LDAB    (input) INTEGER */
/*          The leading dimension of the array A. LDAB >= KL+KU+1. */

/*  D       (output) REAL array, dimension (min(M,N)) */
/*          The diagonal elements of the bidiagonal matrix B. */

/*  E       (output) REAL array, dimension (min(M,N)-1) */
/*          The superdiagonal elements of the bidiagonal matrix B. */

/*  Q       (output) REAL array, dimension (LDQ,M) */
/*          If VECT = 'Q' or 'B', the m-by-m orthogonal matrix Q. */
/*          If VECT = 'N' or 'P', the array Q is not referenced. */

/*  LDQ     (input) INTEGER */
/*          The leading dimension of the array Q. */
/*          LDQ >= max(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise. */

/*  PT      (output) REAL array, dimension (LDPT,N) */
/*          If VECT = 'P' or 'B', the n-by-n orthogonal matrix P'. */
/*          If VECT = 'N' or 'Q', the array PT is not referenced. */

/*  LDPT    (input) INTEGER */
/*          The leading dimension of the array PT. */
/*          LDPT >= max(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise. */

/*  C       (input/output) REAL array, dimension (LDC,NCC) */
/*          On entry, an m-by-ncc matrix C. */
/*          On exit, C is overwritten by Q'*C. */
/*          C is not referenced if NCC = 0. */

/*  LDC     (input) INTEGER */
/*          The leading dimension of the array C. */
/*          LDC >= max(1,M) if NCC > 0; LDC >= 1 if NCC = 0. */

/*  WORK    (workspace) REAL array, dimension (2*max(M,N)) */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit. */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */

/*  ===================================================================== */

/*     Test the input parameters */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1;
    ab -= ab_offset;
    --d__;
    --e;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    pt_dim1 = *ldpt;
    pt_offset = 1 + pt_dim1;
    pt -= pt_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1;
    c__ -= c_offset;
    --work;

    /* Function Body */
    wantb = lsame_(vect, "B");
    wantq = lsame_(vect, "Q") || wantb;
    wantpt = lsame_(vect, "P") || wantb;
    wantc = *ncc > 0;
    klu1 = *kl + *ku + 1;
    *info = 0;
    if (! wantq && ! wantpt && ! lsame_(vect, "N")) {
	*info = -1;
    } else if (*m < 0) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*ncc < 0) {
	*info = -4;
    } else if (*kl < 0) {
	*info = -5;
    } else if (*ku < 0) {
	*info = -6;
    } else if (*ldab < klu1) {
	*info = -8;
    } else if (*ldq < 1 || wantq && *ldq < max(1,*m)) {
	*info = -12;
    } else if (*ldpt < 1 || wantpt && *ldpt < max(1,*n)) {
	*info = -14;
    } else if (*ldc < 1 || wantc && *ldc < max(1,*m)) {
	*info = -16;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SGBBRD", &i__1);
	return 0;
    }

/*     Initialize Q and P' to the unit matrix, if needed */

    if (wantq) {
	slaset_("Full", m, m, &c_b8, &c_b9, &q[q_offset], ldq);
    }
    if (wantpt) {
	slaset_("Full", n, n, &c_b8, &c_b9, &pt[pt_offset], ldpt);
    }

/*     Quick return if possible. */

    if (*m == 0 || *n == 0) {
	return 0;
    }

    minmn = min(*m,*n);

    if (*kl + *ku > 1) {

/*        Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce */
/*        first to lower bidiagonal form and then transform to upper */
/*        bidiagonal */

	if (*ku > 0) {
	    ml0 = 1;
	    mu0 = 2;
	} else {
	    ml0 = 2;
	    mu0 = 1;
	}

/*        Wherever possible, plane rotations are generated and applied in */
/*        vector operations of length NR over the index set J1:J2:KLU1. */

/*        The sines of the plane rotations are stored in WORK(1:max(m,n)) */
/*        and the cosines in WORK(max(m,n)+1:2*max(m,n)). */

	mn = max(*m,*n);
/* Computing MIN */
	i__1 = *m - 1;
	klm = min(i__1,*kl);
/* Computing MIN */
	i__1 = *n - 1;
	kun = min(i__1,*ku);
	kb = klm + kun;
	kb1 = kb + 1;
	inca = kb1 * *ldab;
	nr = 0;
	j1 = klm + 2;
	j2 = 1 - kun;

	i__1 = minmn;
	for (i__ = 1; i__ <= i__1; ++i__) {

/*           Reduce i-th column and i-th row of matrix to bidiagonal form */

	    ml = klm + 1;
	    mu = kun + 1;
	    i__2 = kb;
	    for (kk = 1; kk <= i__2; ++kk) {
		j1 += kb;
		j2 += kb;

/*              generate plane rotations to annihilate nonzero elements */
/*              which have been created below the band */

		if (nr > 0) {
		    slargv_(&nr, &ab[klu1 + (j1 - klm - 1) * ab_dim1], &inca, 
			    &work[j1], &kb1, &work[mn + j1], &kb1);
		}

/*              apply plane rotations from the left */

		i__3 = kb;
		for (l = 1; l <= i__3; ++l) {
		    if (j2 - klm + l - 1 > *n) {
			nrt = nr - 1;
		    } else {
			nrt = nr;
		    }
		    if (nrt > 0) {
			slartv_(&nrt, &ab[klu1 - l + (j1 - klm + l - 1) * 
				ab_dim1], &inca, &ab[klu1 - l + 1 + (j1 - klm 
				+ l - 1) * ab_dim1], &inca, &work[mn + j1], &
				work[j1], &kb1);
		    }
		}

		if (ml > ml0) {
		    if (ml <= *m - i__ + 1) {

/*                    generate plane rotation to annihilate a(i+ml-1,i) */
/*                    within the band, and apply rotation from the left */

			slartg_(&ab[*ku + ml - 1 + i__ * ab_dim1], &ab[*ku + 
				ml + i__ * ab_dim1], &work[mn + i__ + ml - 1], 
				 &work[i__ + ml - 1], &ra);
			ab[*ku + ml - 1 + i__ * ab_dim1] = ra;
			if (i__ < *n) {
/* Computing MIN */
			    i__4 = *ku + ml - 2, i__5 = *n - i__;
			    i__3 = min(i__4,i__5);
			    i__6 = *ldab - 1;
			    i__7 = *ldab - 1;
			    srot_(&i__3, &ab[*ku + ml - 2 + (i__ + 1) * 
				    ab_dim1], &i__6, &ab[*ku + ml - 1 + (i__ 
				    + 1) * ab_dim1], &i__7, &work[mn + i__ + 
				    ml - 1], &work[i__ + ml - 1]);
			}
		    }
		    ++nr;
		    j1 -= kb1;
		}

		if (wantq) {

/*                 accumulate product of plane rotations in Q */

		    i__3 = j2;
		    i__4 = kb1;
		    for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) 
			    {
			srot_(m, &q[(j - 1) * q_dim1 + 1], &c__1, &q[j * 
				q_dim1 + 1], &c__1, &work[mn + j], &work[j]);
		    }
		}

		if (wantc) {

/*                 apply plane rotations to C */

		    i__4 = j2;
		    i__3 = kb1;
		    for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) 
			    {
			srot_(ncc, &c__[j - 1 + c_dim1], ldc, &c__[j + c_dim1]
, ldc, &work[mn + j], &work[j]);
		    }
		}

		if (j2 + kun > *n) {

/*                 adjust J2 to keep within the bounds of the matrix */

		    --nr;
		    j2 -= kb1;
		}

		i__3 = j2;
		i__4 = kb1;
		for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) {

/*                 create nonzero element a(j-1,j+ku) above the band */
/*                 and store it in WORK(n+1:2*n) */

		    work[j + kun] = work[j] * ab[(j + kun) * ab_dim1 + 1];
		    ab[(j + kun) * ab_dim1 + 1] = work[mn + j] * ab[(j + kun) 
			    * ab_dim1 + 1];
		}

/*              generate plane rotations to annihilate nonzero elements */
/*              which have been generated above the band */

		if (nr > 0) {
		    slargv_(&nr, &ab[(j1 + kun - 1) * ab_dim1 + 1], &inca, &
			    work[j1 + kun], &kb1, &work[mn + j1 + kun], &kb1);
		}

/*              apply plane rotations from the right */

		i__4 = kb;
		for (l = 1; l <= i__4; ++l) {
		    if (j2 + l - 1 > *m) {
			nrt = nr - 1;
		    } else {
			nrt = nr;
		    }
		    if (nrt > 0) {
			slartv_(&nrt, &ab[l + 1 + (j1 + kun - 1) * ab_dim1], &
				inca, &ab[l + (j1 + kun) * ab_dim1], &inca, &
				work[mn + j1 + kun], &work[j1 + kun], &kb1);
		    }
		}

		if (ml == ml0 && mu > mu0) {
		    if (mu <= *n - i__ + 1) {

/*                    generate plane rotation to annihilate a(i,i+mu-1) */
/*                    within the band, and apply rotation from the right */

			slartg_(&ab[*ku - mu + 3 + (i__ + mu - 2) * ab_dim1], 
				&ab[*ku - mu + 2 + (i__ + mu - 1) * ab_dim1], 
				&work[mn + i__ + mu - 1], &work[i__ + mu - 1], 
				 &ra);
			ab[*ku - mu + 3 + (i__ + mu - 2) * ab_dim1] = ra;
/* Computing MIN */
			i__3 = *kl + mu - 2, i__5 = *m - i__;
			i__4 = min(i__3,i__5);
			srot_(&i__4, &ab[*ku - mu + 4 + (i__ + mu - 2) * 
				ab_dim1], &c__1, &ab[*ku - mu + 3 + (i__ + mu 
				- 1) * ab_dim1], &c__1, &work[mn + i__ + mu - 
				1], &work[i__ + mu - 1]);
		    }
		    ++nr;
		    j1 -= kb1;
		}

		if (wantpt) {

/*                 accumulate product of plane rotations in P' */

		    i__4 = j2;
		    i__3 = kb1;
		    for (j = j1; i__3 < 0 ? j >= i__4 : j <= i__4; j += i__3) 
			    {
			srot_(n, &pt[j + kun - 1 + pt_dim1], ldpt, &pt[j + 
				kun + pt_dim1], ldpt, &work[mn + j + kun], &
				work[j + kun]);
		    }
		}

		if (j2 + kb > *m) {

/*                 adjust J2 to keep within the bounds of the matrix */

		    --nr;
		    j2 -= kb1;
		}

		i__3 = j2;
		i__4 = kb1;
		for (j = j1; i__4 < 0 ? j >= i__3 : j <= i__3; j += i__4) {

/*                 create nonzero element a(j+kl+ku,j+ku-1) below the */
/*                 band and store it in WORK(1:n) */

		    work[j + kb] = work[j + kun] * ab[klu1 + (j + kun) * 
			    ab_dim1];
		    ab[klu1 + (j + kun) * ab_dim1] = work[mn + j + kun] * ab[
			    klu1 + (j + kun) * ab_dim1];
		}

		if (ml > ml0) {
		    --ml;
		} else {
		    --mu;
		}
	    }
	}
    }

    if (*ku == 0 && *kl > 0) {

/*        A has been reduced to lower bidiagonal form */

/*        Transform lower bidiagonal form to upper bidiagonal by applying */
/*        plane rotations from the left, storing diagonal elements in D */
/*        and off-diagonal elements in E */

/* Computing MIN */
	i__2 = *m - 1;
	i__1 = min(i__2,*n);
	for (i__ = 1; i__ <= i__1; ++i__) {
	    slartg_(&ab[i__ * ab_dim1 + 1], &ab[i__ * ab_dim1 + 2], &rc, &rs, 
		    &ra);
	    d__[i__] = ra;
	    if (i__ < *n) {
		e[i__] = rs * ab[(i__ + 1) * ab_dim1 + 1];
		ab[(i__ + 1) * ab_dim1 + 1] = rc * ab[(i__ + 1) * ab_dim1 + 1]
			;
	    }
	    if (wantq) {
		srot_(m, &q[i__ * q_dim1 + 1], &c__1, &q[(i__ + 1) * q_dim1 + 
			1], &c__1, &rc, &rs);
	    }
	    if (wantc) {
		srot_(ncc, &c__[i__ + c_dim1], ldc, &c__[i__ + 1 + c_dim1], 
			ldc, &rc, &rs);
	    }
	}
	if (*m <= *n) {
	    d__[*m] = ab[*m * ab_dim1 + 1];
	}
    } else if (*ku > 0) {

/*        A has been reduced to upper bidiagonal form */

	if (*m < *n) {

/*           Annihilate a(m,m+1) by applying plane rotations from the */
/*           right, storing diagonal elements in D and off-diagonal */
/*           elements in E */

	    rb = ab[*ku + (*m + 1) * ab_dim1];
	    for (i__ = *m; i__ >= 1; --i__) {
		slartg_(&ab[*ku + 1 + i__ * ab_dim1], &rb, &rc, &rs, &ra);
		d__[i__] = ra;
		if (i__ > 1) {
		    rb = -rs * ab[*ku + i__ * ab_dim1];
		    e[i__ - 1] = rc * ab[*ku + i__ * ab_dim1];
		}
		if (wantpt) {
		    srot_(n, &pt[i__ + pt_dim1], ldpt, &pt[*m + 1 + pt_dim1], 
			    ldpt, &rc, &rs);
		}
	    }
	} else {

/*           Copy off-diagonal elements to E and diagonal elements to D */

	    i__1 = minmn - 1;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		e[i__] = ab[*ku + (i__ + 1) * ab_dim1];
	    }
	    i__1 = minmn;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		d__[i__] = ab[*ku + 1 + i__ * ab_dim1];
	    }
	}
    } else {

/*        A is diagonal. Set elements of E to zero and copy diagonal */
/*        elements to D. */

	i__1 = minmn - 1;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    e[i__] = 0.f;
	}
	i__1 = minmn;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    d__[i__] = ab[i__ * ab_dim1 + 1];
	}
    }
    return 0;

/*     End of SGBBRD */

} /* sgbbrd_ */
Пример #29
0
/* Subroutine */ int stgex2_(logical *wantq, logical *wantz, integer *n, real 
	*a, integer *lda, real *b, integer *ldb, real *q, integer *ldq, real *
	z__, integer *ldz, integer *j1, integer *n1, integer *n2, real *work, 
	integer *lwork, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, 
	    z_offset, i__1, i__2;
    real r__1;

    /* Builtin functions */
    double sqrt(doublereal);

    /* Local variables */
    real f, g;
    integer i__, m;
    real s[16]	/* was [4][4] */, t[16]	/* was [4][4] */, be[2], ai[2], ar[2],
	     sa, sb, li[16]	/* was [4][4] */, ir[16]	/* was [4][4] 
	    */, ss, ws, eps;
    logical weak;
    real ddum;
    integer idum;
    real taul[4], dsum, taur[4], scpy[16]	/* was [4][4] */, tcpy[16]	
	    /* was [4][4] */;
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *);
    real scale, bqra21, brqa21;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    real licop[16]	/* was [4][4] */;
    integer linfo;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    real ircop[16]	/* was [4][4] */, dnorm;
    integer iwork[4];
    extern /* Subroutine */ int slagv2_(real *, integer *, real *, integer *, 
	    real *, real *, real *, real *, real *, real *, real *), sgeqr2_(
	    integer *, integer *, real *, integer *, real *, real *, integer *
), sgerq2_(integer *, integer *, real *, integer *, real *, real *
, integer *), sorg2r_(integer *, integer *, integer *, real *, 
	    integer *, real *, real *, integer *), sorgr2_(integer *, integer 
	    *, integer *, real *, integer *, real *, real *, integer *), 
	    sorm2r_(char *, char *, integer *, integer *, integer *, real *, 
	    integer *, real *, real *, integer *, real *, integer *), sormr2_(char *, char *, integer *, integer *, integer *, 
	    real *, integer *, real *, real *, integer *, real *, integer *);
    real dscale;
    extern /* Subroutine */ int stgsy2_(char *, integer *, integer *, integer 
	    *, real *, integer *, real *, integer *, real *, integer *, real *
, integer *, real *, integer *, real *, integer *, real *, real *, 
	     real *, integer *, integer *, integer *);
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
	    integer *, real *, integer *), slartg_(real *, real *, 
	    real *, real *, real *);
    real thresh;
    extern /* Subroutine */ int slaset_(char *, integer *, integer *, real *, 
	    real *, real *, integer *), slassq_(integer *, real *, 
	    integer *, real *, real *);
    real smlnum;
    logical strong;


/*  -- LAPACK auxiliary routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  STGEX2 swaps adjacent diagonal blocks (A11, B11) and (A22, B22) */
/*  of size 1-by-1 or 2-by-2 in an upper (quasi) triangular matrix pair */
/*  (A, B) by an orthogonal equivalence transformation. */

/*  (A, B) must be in generalized real Schur canonical form (as returned */
/*  by SGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 */
/*  diagonal blocks. B is upper triangular. */

/*  Optionally, the matrices Q and Z of generalized Schur vectors are */
/*  updated. */

/*         Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)' */
/*         Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)' */


/*  Arguments */
/*  ========= */

/*  WANTQ   (input) LOGICAL */
/*          .TRUE. : update the left transformation matrix Q; */
/*          .FALSE.: do not update Q. */

/*  WANTZ   (input) LOGICAL */
/*          .TRUE. : update the right transformation matrix Z; */
/*          .FALSE.: do not update Z. */

/*  N       (input) INTEGER */
/*          The order of the matrices A and B. N >= 0. */

/*  A      (input/output) REAL arrays, dimensions (LDA,N) */
/*          On entry, the matrix A in the pair (A, B). */
/*          On exit, the updated matrix A. */

/*  LDA     (input)  INTEGER */
/*          The leading dimension of the array A. LDA >= max(1,N). */

/*  B      (input/output) REAL arrays, dimensions (LDB,N) */
/*          On entry, the matrix B in the pair (A, B). */
/*          On exit, the updated matrix B. */

/*  LDB     (input)  INTEGER */
/*          The leading dimension of the array B. LDB >= max(1,N). */

/*  Q       (input/output) REAL array, dimension (LDZ,N) */
/*          On entry, if WANTQ = .TRUE., the orthogonal matrix Q. */
/*          On exit, the updated matrix Q. */
/*          Not referenced if WANTQ = .FALSE.. */

/*  LDQ     (input) INTEGER */
/*          The leading dimension of the array Q. LDQ >= 1. */
/*          If WANTQ = .TRUE., LDQ >= N. */

/*  Z       (input/output) REAL array, dimension (LDZ,N) */
/*          On entry, if WANTZ =.TRUE., the orthogonal matrix Z. */
/*          On exit, the updated matrix Z. */
/*          Not referenced if WANTZ = .FALSE.. */

/*  LDZ     (input) INTEGER */
/*          The leading dimension of the array Z. LDZ >= 1. */
/*          If WANTZ = .TRUE., LDZ >= N. */

/*  J1      (input) INTEGER */
/*          The index to the first block (A11, B11). 1 <= J1 <= N. */

/*  N1      (input) INTEGER */
/*          The order of the first block (A11, B11). N1 = 0, 1 or 2. */

/*  N2      (input) INTEGER */
/*          The order of the second block (A22, B22). N2 = 0, 1 or 2. */

/*  WORK    (workspace) REAL array, dimension (MAX(1,LWORK)). */

/*  LWORK   (input) INTEGER */
/*          The dimension of the array WORK. */
/*          LWORK >=  MAX( N*(N2+N1), (N2+N1)*(N2+N1)*2 ) */

/*  INFO    (output) INTEGER */
/*            =0: Successful exit */
/*            >0: If INFO = 1, the transformed matrix (A, B) would be */
/*                too far from generalized Schur form; the blocks are */
/*                not swapped and (A, B) and (Q, Z) are unchanged. */
/*                The problem of swapping is too ill-conditioned. */
/*            <0: If INFO = -16: LWORK is too small. Appropriate value */
/*                for LWORK is returned in WORK(1). */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*     Bo Kagstrom and Peter Poromaa, Department of Computing Science, */
/*     Umea University, S-901 87 Umea, Sweden. */

/*  In the current code both weak and strong stability tests are */
/*  performed. The user can omit the strong stability test by changing */
/*  the internal logical parameter WANDS to .FALSE.. See ref. [2] for */
/*  details. */

/*  [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the */
/*      Generalized Real Schur Form of a Regular Matrix Pair (A, B), in */
/*      M.S. Moonen et al (eds), Linear Algebra for Large Scale and */
/*      Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. */

/*  [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified */
/*      Eigenvalues of a Regular Matrix Pair (A, B) and Condition */
/*      Estimation: Theory, Algorithms and Software, */
/*      Report UMINF - 94.04, Department of Computing Science, Umea */
/*      University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working */
/*      Note 87. To appear in Numerical Algorithms, 1996. */

/*  ===================================================================== */
/*  Replaced various illegal calls to SCOPY by calls to SLASET, or by DO */
/*  loops. Sven Hammarling, 1/5/02. */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1;
    z__ -= z_offset;
    --work;

    /* Function Body */
    *info = 0;

/*     Quick return if possible */

    if (*n <= 1 || *n1 <= 0 || *n2 <= 0) {
	return 0;
    }
    if (*n1 > *n || *j1 + *n1 > *n) {
	return 0;
    }
    m = *n1 + *n2;
/* Computing MAX */
    i__1 = *n * m, i__2 = m * m << 1;
    if (*lwork < max(i__1,i__2)) {
	*info = -16;
/* Computing MAX */
	i__1 = *n * m, i__2 = m * m << 1;
	work[1] = (real) max(i__1,i__2);
	return 0;
    }

    weak = FALSE_;
    strong = FALSE_;

/*     Make a local copy of selected block */

    slaset_("Full", &c__4, &c__4, &c_b5, &c_b5, li, &c__4);
    slaset_("Full", &c__4, &c__4, &c_b5, &c_b5, ir, &c__4);
    slacpy_("Full", &m, &m, &a[*j1 + *j1 * a_dim1], lda, s, &c__4);
    slacpy_("Full", &m, &m, &b[*j1 + *j1 * b_dim1], ldb, t, &c__4);

/*     Compute threshold for testing acceptance of swapping. */

    eps = slamch_("P");
    smlnum = slamch_("S") / eps;
    dscale = 0.f;
    dsum = 1.f;
    slacpy_("Full", &m, &m, s, &c__4, &work[1], &m);
    i__1 = m * m;
    slassq_(&i__1, &work[1], &c__1, &dscale, &dsum);
    slacpy_("Full", &m, &m, t, &c__4, &work[1], &m);
    i__1 = m * m;
    slassq_(&i__1, &work[1], &c__1, &dscale, &dsum);
    dnorm = dscale * sqrt(dsum);
/* Computing MAX */
    r__1 = eps * 10.f * dnorm;
    thresh = dmax(r__1,smlnum);

    if (m == 2) {

/*        CASE 1: Swap 1-by-1 and 1-by-1 blocks. */

/*        Compute orthogonal QL and RQ that swap 1-by-1 and 1-by-1 blocks */
/*        using Givens rotations and perform the swap tentatively. */

	f = s[5] * t[0] - t[5] * s[0];
	g = s[5] * t[4] - t[5] * s[4];
	sb = dabs(t[5]);
	sa = dabs(s[5]);
	slartg_(&f, &g, &ir[4], ir, &ddum);
	ir[1] = -ir[4];
	ir[5] = ir[0];
	srot_(&c__2, s, &c__1, &s[4], &c__1, ir, &ir[1]);
	srot_(&c__2, t, &c__1, &t[4], &c__1, ir, &ir[1]);
	if (sa >= sb) {
	    slartg_(s, &s[1], li, &li[1], &ddum);
	} else {
	    slartg_(t, &t[1], li, &li[1], &ddum);
	}
	srot_(&c__2, s, &c__4, &s[1], &c__4, li, &li[1]);
	srot_(&c__2, t, &c__4, &t[1], &c__4, li, &li[1]);
	li[5] = li[0];
	li[4] = -li[1];

/*        Weak stability test: */
/*           |S21| + |T21| <= O(EPS * F-norm((S, T))) */

	ws = dabs(s[1]) + dabs(t[1]);
	weak = ws <= thresh;
	if (! weak) {
	    goto L70;
	}

	if (TRUE_) {

/*           Strong stability test: */
/*             F-norm((A-QL'*S*QR, B-QL'*T*QR)) <= O(EPS*F-norm((A,B))) */

	    slacpy_("Full", &m, &m, &a[*j1 + *j1 * a_dim1], lda, &work[m * m 
		    + 1], &m);
	    sgemm_("N", "N", &m, &m, &m, &c_b42, li, &c__4, s, &c__4, &c_b5, &
		    work[1], &m);
	    sgemm_("N", "T", &m, &m, &m, &c_b48, &work[1], &m, ir, &c__4, &
		    c_b42, &work[m * m + 1], &m);
	    dscale = 0.f;
	    dsum = 1.f;
	    i__1 = m * m;
	    slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum);

	    slacpy_("Full", &m, &m, &b[*j1 + *j1 * b_dim1], ldb, &work[m * m 
		    + 1], &m);
	    sgemm_("N", "N", &m, &m, &m, &c_b42, li, &c__4, t, &c__4, &c_b5, &
		    work[1], &m);
	    sgemm_("N", "T", &m, &m, &m, &c_b48, &work[1], &m, ir, &c__4, &
		    c_b42, &work[m * m + 1], &m);
	    i__1 = m * m;
	    slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum);
	    ss = dscale * sqrt(dsum);
	    strong = ss <= thresh;
	    if (! strong) {
		goto L70;
	    }
	}

/*        Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and */
/*               (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). */

	i__1 = *j1 + 1;
	srot_(&i__1, &a[*j1 * a_dim1 + 1], &c__1, &a[(*j1 + 1) * a_dim1 + 1], 
		&c__1, ir, &ir[1]);
	i__1 = *j1 + 1;
	srot_(&i__1, &b[*j1 * b_dim1 + 1], &c__1, &b[(*j1 + 1) * b_dim1 + 1], 
		&c__1, ir, &ir[1]);
	i__1 = *n - *j1 + 1;
	srot_(&i__1, &a[*j1 + *j1 * a_dim1], lda, &a[*j1 + 1 + *j1 * a_dim1], 
		lda, li, &li[1]);
	i__1 = *n - *j1 + 1;
	srot_(&i__1, &b[*j1 + *j1 * b_dim1], ldb, &b[*j1 + 1 + *j1 * b_dim1], 
		ldb, li, &li[1]);

/*        Set  N1-by-N2 (2,1) - blocks to ZERO. */

	a[*j1 + 1 + *j1 * a_dim1] = 0.f;
	b[*j1 + 1 + *j1 * b_dim1] = 0.f;

/*        Accumulate transformations into Q and Z if requested. */

	if (*wantz) {
	    srot_(n, &z__[*j1 * z_dim1 + 1], &c__1, &z__[(*j1 + 1) * z_dim1 + 
		    1], &c__1, ir, &ir[1]);
	}
	if (*wantq) {
	    srot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[(*j1 + 1) * q_dim1 + 1], 
		    &c__1, li, &li[1]);
	}

/*        Exit with INFO = 0 if swap was successfully performed. */

	return 0;

    } else {

/*        CASE 2: Swap 1-by-1 and 2-by-2 blocks, or 2-by-2 */
/*                and 2-by-2 blocks. */

/*        Solve the generalized Sylvester equation */
/*                 S11 * R - L * S22 = SCALE * S12 */
/*                 T11 * R - L * T22 = SCALE * T12 */
/*        for R and L. Solutions in LI and IR. */

	slacpy_("Full", n1, n2, &t[(*n1 + 1 << 2) - 4], &c__4, li, &c__4);
	slacpy_("Full", n1, n2, &s[(*n1 + 1 << 2) - 4], &c__4, &ir[*n2 + 1 + (
		*n1 + 1 << 2) - 5], &c__4);
	stgsy2_("N", &c__0, n1, n2, s, &c__4, &s[*n1 + 1 + (*n1 + 1 << 2) - 5]
, &c__4, &ir[*n2 + 1 + (*n1 + 1 << 2) - 5], &c__4, t, &c__4, &
		t[*n1 + 1 + (*n1 + 1 << 2) - 5], &c__4, li, &c__4, &scale, &
		dsum, &dscale, iwork, &idum, &linfo);

/*        Compute orthogonal matrix QL: */

/*                    QL' * LI = [ TL ] */
/*                               [ 0  ] */
/*        where */
/*                    LI =  [      -L              ] */
/*                          [ SCALE * identity(N2) ] */

	i__1 = *n2;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    sscal_(n1, &c_b48, &li[(i__ << 2) - 4], &c__1);
	    li[*n1 + i__ + (i__ << 2) - 5] = scale;
/* L10: */
	}
	sgeqr2_(&m, n2, li, &c__4, taul, &work[1], &linfo);
	if (linfo != 0) {
	    goto L70;
	}
	sorg2r_(&m, &m, n2, li, &c__4, taul, &work[1], &linfo);
	if (linfo != 0) {
	    goto L70;
	}

/*        Compute orthogonal matrix RQ: */

/*                    IR * RQ' =   [ 0  TR], */

/*         where IR = [ SCALE * identity(N1), R ] */

	i__1 = *n1;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    ir[*n2 + i__ + (i__ << 2) - 5] = scale;
/* L20: */
	}
	sgerq2_(n1, &m, &ir[*n2], &c__4, taur, &work[1], &linfo);
	if (linfo != 0) {
	    goto L70;
	}
	sorgr2_(&m, &m, n1, ir, &c__4, taur, &work[1], &linfo);
	if (linfo != 0) {
	    goto L70;
	}

/*        Perform the swapping tentatively: */

	sgemm_("T", "N", &m, &m, &m, &c_b42, li, &c__4, s, &c__4, &c_b5, &
		work[1], &m);
	sgemm_("N", "T", &m, &m, &m, &c_b42, &work[1], &m, ir, &c__4, &c_b5, 
		s, &c__4);
	sgemm_("T", "N", &m, &m, &m, &c_b42, li, &c__4, t, &c__4, &c_b5, &
		work[1], &m);
	sgemm_("N", "T", &m, &m, &m, &c_b42, &work[1], &m, ir, &c__4, &c_b5, 
		t, &c__4);
	slacpy_("F", &m, &m, s, &c__4, scpy, &c__4);
	slacpy_("F", &m, &m, t, &c__4, tcpy, &c__4);
	slacpy_("F", &m, &m, ir, &c__4, ircop, &c__4);
	slacpy_("F", &m, &m, li, &c__4, licop, &c__4);

/*        Triangularize the B-part by an RQ factorization. */
/*        Apply transformation (from left) to A-part, giving S. */

	sgerq2_(&m, &m, t, &c__4, taur, &work[1], &linfo);
	if (linfo != 0) {
	    goto L70;
	}
	sormr2_("R", "T", &m, &m, &m, t, &c__4, taur, s, &c__4, &work[1], &
		linfo);
	if (linfo != 0) {
	    goto L70;
	}
	sormr2_("L", "N", &m, &m, &m, t, &c__4, taur, ir, &c__4, &work[1], &
		linfo);
	if (linfo != 0) {
	    goto L70;
	}

/*        Compute F-norm(S21) in BRQA21. (T21 is 0.) */

	dscale = 0.f;
	dsum = 1.f;
	i__1 = *n2;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    slassq_(n1, &s[*n2 + 1 + (i__ << 2) - 5], &c__1, &dscale, &dsum);
/* L30: */
	}
	brqa21 = dscale * sqrt(dsum);

/*        Triangularize the B-part by a QR factorization. */
/*        Apply transformation (from right) to A-part, giving S. */

	sgeqr2_(&m, &m, tcpy, &c__4, taul, &work[1], &linfo);
	if (linfo != 0) {
	    goto L70;
	}
	sorm2r_("L", "T", &m, &m, &m, tcpy, &c__4, taul, scpy, &c__4, &work[1]
, info);
	sorm2r_("R", "N", &m, &m, &m, tcpy, &c__4, taul, licop, &c__4, &work[
		1], info);
	if (linfo != 0) {
	    goto L70;
	}

/*        Compute F-norm(S21) in BQRA21. (T21 is 0.) */

	dscale = 0.f;
	dsum = 1.f;
	i__1 = *n2;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    slassq_(n1, &scpy[*n2 + 1 + (i__ << 2) - 5], &c__1, &dscale, &
		    dsum);
/* L40: */
	}
	bqra21 = dscale * sqrt(dsum);

/*        Decide which method to use. */
/*          Weak stability test: */
/*             F-norm(S21) <= O(EPS * F-norm((S, T))) */

	if (bqra21 <= brqa21 && bqra21 <= thresh) {
	    slacpy_("F", &m, &m, scpy, &c__4, s, &c__4);
	    slacpy_("F", &m, &m, tcpy, &c__4, t, &c__4);
	    slacpy_("F", &m, &m, ircop, &c__4, ir, &c__4);
	    slacpy_("F", &m, &m, licop, &c__4, li, &c__4);
	} else if (brqa21 >= thresh) {
	    goto L70;
	}

/*        Set lower triangle of B-part to zero */

	i__1 = m - 1;
	i__2 = m - 1;
	slaset_("Lower", &i__1, &i__2, &c_b5, &c_b5, &t[1], &c__4);

	if (TRUE_) {

/*           Strong stability test: */
/*              F-norm((A-QL*S*QR', B-QL*T*QR')) <= O(EPS*F-norm((A,B))) */

	    slacpy_("Full", &m, &m, &a[*j1 + *j1 * a_dim1], lda, &work[m * m 
		    + 1], &m);
	    sgemm_("N", "N", &m, &m, &m, &c_b42, li, &c__4, s, &c__4, &c_b5, &
		    work[1], &m);
	    sgemm_("N", "N", &m, &m, &m, &c_b48, &work[1], &m, ir, &c__4, &
		    c_b42, &work[m * m + 1], &m);
	    dscale = 0.f;
	    dsum = 1.f;
	    i__1 = m * m;
	    slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum);

	    slacpy_("Full", &m, &m, &b[*j1 + *j1 * b_dim1], ldb, &work[m * m 
		    + 1], &m);
	    sgemm_("N", "N", &m, &m, &m, &c_b42, li, &c__4, t, &c__4, &c_b5, &
		    work[1], &m);
	    sgemm_("N", "N", &m, &m, &m, &c_b48, &work[1], &m, ir, &c__4, &
		    c_b42, &work[m * m + 1], &m);
	    i__1 = m * m;
	    slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum);
	    ss = dscale * sqrt(dsum);
	    strong = ss <= thresh;
	    if (! strong) {
		goto L70;
	    }

	}

/*        If the swap is accepted ("weakly" and "strongly"), apply the */
/*        transformations and set N1-by-N2 (2,1)-block to zero. */

	slaset_("Full", n1, n2, &c_b5, &c_b5, &s[*n2], &c__4);

/*        copy back M-by-M diagonal block starting at index J1 of (A, B) */

	slacpy_("F", &m, &m, s, &c__4, &a[*j1 + *j1 * a_dim1], lda)
		;
	slacpy_("F", &m, &m, t, &c__4, &b[*j1 + *j1 * b_dim1], ldb)
		;
	slaset_("Full", &c__4, &c__4, &c_b5, &c_b5, t, &c__4);

/*        Standardize existing 2-by-2 blocks. */

	i__1 = m * m;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    work[i__] = 0.f;
/* L50: */
	}
	work[1] = 1.f;
	t[0] = 1.f;
	idum = *lwork - m * m - 2;
	if (*n2 > 1) {
	    slagv2_(&a[*j1 + *j1 * a_dim1], lda, &b[*j1 + *j1 * b_dim1], ldb, 
		    ar, ai, be, &work[1], &work[2], t, &t[1]);
	    work[m + 1] = -work[2];
	    work[m + 2] = work[1];
	    t[*n2 + (*n2 << 2) - 5] = t[0];
	    t[4] = -t[1];
	}
	work[m * m] = 1.f;
	t[m + (m << 2) - 5] = 1.f;

	if (*n1 > 1) {
	    slagv2_(&a[*j1 + *n2 + (*j1 + *n2) * a_dim1], lda, &b[*j1 + *n2 + 
		    (*j1 + *n2) * b_dim1], ldb, taur, taul, &work[m * m + 1], 
		    &work[*n2 * m + *n2 + 1], &work[*n2 * m + *n2 + 2], &t[*
		    n2 + 1 + (*n2 + 1 << 2) - 5], &t[m + (m - 1 << 2) - 5]);
	    work[m * m] = work[*n2 * m + *n2 + 1];
	    work[m * m - 1] = -work[*n2 * m + *n2 + 2];
	    t[m + (m << 2) - 5] = t[*n2 + 1 + (*n2 + 1 << 2) - 5];
	    t[m - 1 + (m << 2) - 5] = -t[m + (m - 1 << 2) - 5];
	}
	sgemm_("T", "N", n2, n1, n2, &c_b42, &work[1], &m, &a[*j1 + (*j1 + *
		n2) * a_dim1], lda, &c_b5, &work[m * m + 1], n2);
	slacpy_("Full", n2, n1, &work[m * m + 1], n2, &a[*j1 + (*j1 + *n2) * 
		a_dim1], lda);
	sgemm_("T", "N", n2, n1, n2, &c_b42, &work[1], &m, &b[*j1 + (*j1 + *
		n2) * b_dim1], ldb, &c_b5, &work[m * m + 1], n2);
	slacpy_("Full", n2, n1, &work[m * m + 1], n2, &b[*j1 + (*j1 + *n2) * 
		b_dim1], ldb);
	sgemm_("N", "N", &m, &m, &m, &c_b42, li, &c__4, &work[1], &m, &c_b5, &
		work[m * m + 1], &m);
	slacpy_("Full", &m, &m, &work[m * m + 1], &m, li, &c__4);
	sgemm_("N", "N", n2, n1, n1, &c_b42, &a[*j1 + (*j1 + *n2) * a_dim1], 
		lda, &t[*n2 + 1 + (*n2 + 1 << 2) - 5], &c__4, &c_b5, &work[1], 
		 n2);
	slacpy_("Full", n2, n1, &work[1], n2, &a[*j1 + (*j1 + *n2) * a_dim1], 
		lda);
	sgemm_("N", "N", n2, n1, n1, &c_b42, &b[*j1 + (*j1 + *n2) * b_dim1], 
		ldb, &t[*n2 + 1 + (*n2 + 1 << 2) - 5], &c__4, &c_b5, &work[1], 
		 n2);
	slacpy_("Full", n2, n1, &work[1], n2, &b[*j1 + (*j1 + *n2) * b_dim1], 
		ldb);
	sgemm_("T", "N", &m, &m, &m, &c_b42, ir, &c__4, t, &c__4, &c_b5, &
		work[1], &m);
	slacpy_("Full", &m, &m, &work[1], &m, ir, &c__4);

/*        Accumulate transformations into Q and Z if requested. */

	if (*wantq) {
	    sgemm_("N", "N", n, &m, &m, &c_b42, &q[*j1 * q_dim1 + 1], ldq, li, 
		     &c__4, &c_b5, &work[1], n);
	    slacpy_("Full", n, &m, &work[1], n, &q[*j1 * q_dim1 + 1], ldq);

	}

	if (*wantz) {
	    sgemm_("N", "N", n, &m, &m, &c_b42, &z__[*j1 * z_dim1 + 1], ldz, 
		    ir, &c__4, &c_b5, &work[1], n);
	    slacpy_("Full", n, &m, &work[1], n, &z__[*j1 * z_dim1 + 1], ldz);

	}

/*        Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and */
/*                (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). */

	i__ = *j1 + m;
	if (i__ <= *n) {
	    i__1 = *n - i__ + 1;
	    sgemm_("T", "N", &m, &i__1, &m, &c_b42, li, &c__4, &a[*j1 + i__ * 
		    a_dim1], lda, &c_b5, &work[1], &m);
	    i__1 = *n - i__ + 1;
	    slacpy_("Full", &m, &i__1, &work[1], &m, &a[*j1 + i__ * a_dim1], 
		    lda);
	    i__1 = *n - i__ + 1;
	    sgemm_("T", "N", &m, &i__1, &m, &c_b42, li, &c__4, &b[*j1 + i__ * 
		    b_dim1], ldb, &c_b5, &work[1], &m);
	    i__1 = *n - i__ + 1;
	    slacpy_("Full", &m, &i__1, &work[1], &m, &b[*j1 + i__ * b_dim1], 
		    ldb);
	}
	i__ = *j1 - 1;
	if (i__ > 0) {
	    sgemm_("N", "N", &i__, &m, &m, &c_b42, &a[*j1 * a_dim1 + 1], lda, 
		    ir, &c__4, &c_b5, &work[1], &i__);
	    slacpy_("Full", &i__, &m, &work[1], &i__, &a[*j1 * a_dim1 + 1], 
		    lda);
	    sgemm_("N", "N", &i__, &m, &m, &c_b42, &b[*j1 * b_dim1 + 1], ldb, 
		    ir, &c__4, &c_b5, &work[1], &i__);
	    slacpy_("Full", &i__, &m, &work[1], &i__, &b[*j1 * b_dim1 + 1], 
		    ldb);
	}

/*        Exit with INFO = 0 if swap was successfully performed. */

	return 0;

    }

/*     Exit with INFO = 1 if swap was rejected. */

L70:

    *info = 1;
    return 0;

/*     End of STGEX2 */

} /* stgex2_ */
Пример #30
0
/* Subroutine */ int check3_(real *sfac)
{
    /* Initialized data */

    static integer incxs[4] = { 1,2,-2,-1 };
    static integer incys[4] = { 1,-2,1,-2 };
    static integer lens[8]	/* was [4][2] */ = { 1,1,2,4,1,1,3,7 };
    static integer ns[4] = { 0,1,2,4 };
    static real dx1[7] = { .6f,.1f,-.5f,.8f,.9f,-.3f,-.4f };
    static real dy1[7] = { .5f,-.9f,.3f,.7f,-.6f,.2f,.8f };
    static real sc = .8f;
    static real ss = .6f;
    static real dt9x[112]	/* was [7][4][4] */ = { .6f,0.f,0.f,0.f,0.f,
	    0.f,0.f,.78f,0.f,0.f,0.f,0.f,0.f,0.f,.78f,-.46f,0.f,0.f,0.f,0.f,
	    0.f,.78f,-.46f,-.22f,1.06f,0.f,0.f,0.f,.6f,0.f,0.f,0.f,0.f,0.f,
	    0.f,.78f,0.f,0.f,0.f,0.f,0.f,0.f,.66f,.1f,-.1f,0.f,0.f,0.f,0.f,
	    .96f,.1f,-.76f,.8f,.9f,-.3f,-.02f,.6f,0.f,0.f,0.f,0.f,0.f,0.f,
	    .78f,0.f,0.f,0.f,0.f,0.f,0.f,-.06f,.1f,-.1f,0.f,0.f,0.f,0.f,.9f,
	    .1f,-.22f,.8f,.18f,-.3f,-.02f,.6f,0.f,0.f,0.f,0.f,0.f,0.f,.78f,
	    0.f,0.f,0.f,0.f,0.f,0.f,.78f,.26f,0.f,0.f,0.f,0.f,0.f,.78f,.26f,
	    -.76f,1.12f,0.f,0.f,0.f };
    static real dt9y[112]	/* was [7][4][4] */ = { .5f,0.f,0.f,0.f,0.f,
	    0.f,0.f,.04f,0.f,0.f,0.f,0.f,0.f,0.f,.04f,-.78f,0.f,0.f,0.f,0.f,
	    0.f,.04f,-.78f,.54f,.08f,0.f,0.f,0.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,
	    .04f,0.f,0.f,0.f,0.f,0.f,0.f,.7f,-.9f,-.12f,0.f,0.f,0.f,0.f,.64f,
	    -.9f,-.3f,.7f,-.18f,.2f,.28f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.04f,0.f,
	    0.f,0.f,0.f,0.f,0.f,.7f,-1.08f,0.f,0.f,0.f,0.f,0.f,.64f,-1.26f,
	    .54f,.2f,0.f,0.f,0.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.04f,0.f,0.f,0.f,
	    0.f,0.f,0.f,.04f,-.9f,.18f,0.f,0.f,0.f,0.f,.04f,-.9f,.18f,.7f,
	    -.18f,.2f,.16f };
    static real ssize2[28]	/* was [14][2] */ = { 0.f,0.f,0.f,0.f,0.f,0.f,
	    0.f,0.f,0.f,0.f,0.f,0.f,0.f,0.f,1.17f,1.17f,1.17f,1.17f,1.17f,
	    1.17f,1.17f,1.17f,1.17f,1.17f,1.17f,1.17f,1.17f,1.17f };

    /* Builtin functions */
    integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), 
	    e_wsle(void);
    /* Subroutine */ int s_stop(char *, ftnlen);

    /* Local variables */
    integer i__, k, ki, kn, mx, my;
    real sx[7], sy[7], stx[7], sty[7];
    integer lenx, leny;
    real mwpc[11];
    integer mwpn[11];
    real mwps[11];
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *);
    real mwpx[5], mwpy[5];
    integer ksize;
    real copyx[5], copyy[5];
    extern /* Subroutine */ int stest_(integer *, real *, real *, real *, 
	    real *);
    real mwptx[55]	/* was [11][5] */, mwpty[55]	/* was [11][5] */;
    integer mwpinx[11], mwpiny[11];
    real mwpstx[5], mwpsty[5];

    /* Fortran I/O blocks */
    static cilist io___88 = { 0, 6, 0, 0, 0 };


/*     .. Parameters .. */
/*     .. Scalar Arguments .. */
/*     .. Scalars in Common .. */
/*     .. Local Scalars .. */
/*     .. Local Arrays .. */
/*     .. External Subroutines .. */
/*     .. Intrinsic Functions .. */
/*     .. Common blocks .. */
/*     .. Data statements .. */
/*     .. Executable Statements .. */

    for (ki = 1; ki <= 4; ++ki) {
	combla_1.incx = incxs[ki - 1];
	combla_1.incy = incys[ki - 1];
	mx = abs(combla_1.incx);
	my = abs(combla_1.incy);

	for (kn = 1; kn <= 4; ++kn) {
	    combla_1.n = ns[kn - 1];
	    ksize = min(2,kn);
	    lenx = lens[kn + (mx << 2) - 5];
	    leny = lens[kn + (my << 2) - 5];

	    if (combla_1.icase == 4) {
/*              .. SROT .. */
		for (i__ = 1; i__ <= 7; ++i__) {
		    sx[i__ - 1] = dx1[i__ - 1];
		    sy[i__ - 1] = dy1[i__ - 1];
		    stx[i__ - 1] = dt9x[i__ + (kn + (ki << 2)) * 7 - 36];
		    sty[i__ - 1] = dt9y[i__ + (kn + (ki << 2)) * 7 - 36];
/* L20: */
		}
		srot_(&combla_1.n, sx, &combla_1.incx, sy, &combla_1.incy, &
			sc, &ss);
		stest_(&lenx, sx, stx, &ssize2[ksize * 14 - 14], sfac);
		stest_(&leny, sy, sty, &ssize2[ksize * 14 - 14], sfac);
	    } else {
		s_wsle(&io___88);
		do_lio(&c__9, &c__1, " Shouldn't be here in CHECK3", (ftnlen)
			28);
		e_wsle();
		s_stop("", (ftnlen)0);
	    }
/* L40: */
	}
/* L60: */
    }

    mwpc[0] = 1.f;
    for (i__ = 2; i__ <= 11; ++i__) {
	mwpc[i__ - 1] = 0.f;
/* L80: */
    }
    mwps[0] = 0.f;
    for (i__ = 2; i__ <= 6; ++i__) {
	mwps[i__ - 1] = 1.f;
/* L100: */
    }
    for (i__ = 7; i__ <= 11; ++i__) {
	mwps[i__ - 1] = -1.f;
/* L120: */
    }
    mwpinx[0] = 1;
    mwpinx[1] = 1;
    mwpinx[2] = 1;
    mwpinx[3] = -1;
    mwpinx[4] = 1;
    mwpinx[5] = -1;
    mwpinx[6] = 1;
    mwpinx[7] = 1;
    mwpinx[8] = -1;
    mwpinx[9] = 1;
    mwpinx[10] = -1;
    mwpiny[0] = 1;
    mwpiny[1] = 1;
    mwpiny[2] = -1;
    mwpiny[3] = -1;
    mwpiny[4] = 2;
    mwpiny[5] = 1;
    mwpiny[6] = 1;
    mwpiny[7] = -1;
    mwpiny[8] = -1;
    mwpiny[9] = 2;
    mwpiny[10] = 1;
    for (i__ = 1; i__ <= 11; ++i__) {
	mwpn[i__ - 1] = 5;
/* L140: */
    }
    mwpn[4] = 3;
    mwpn[9] = 3;
    for (i__ = 1; i__ <= 5; ++i__) {
	mwpx[i__ - 1] = (real) i__;
	mwpy[i__ - 1] = (real) i__;
	mwptx[i__ * 11 - 11] = (real) i__;
	mwpty[i__ * 11 - 11] = (real) i__;
	mwptx[i__ * 11 - 10] = (real) i__;
	mwpty[i__ * 11 - 10] = (real) (-i__);
	mwptx[i__ * 11 - 9] = (real) (6 - i__);
	mwpty[i__ * 11 - 9] = (real) (i__ - 6);
	mwptx[i__ * 11 - 8] = (real) i__;
	mwpty[i__ * 11 - 8] = (real) (-i__);
	mwptx[i__ * 11 - 6] = (real) (6 - i__);
	mwpty[i__ * 11 - 6] = (real) (i__ - 6);
	mwptx[i__ * 11 - 5] = (real) (-i__);
	mwpty[i__ * 11 - 5] = (real) i__;
	mwptx[i__ * 11 - 4] = (real) (i__ - 6);
	mwpty[i__ * 11 - 4] = (real) (6 - i__);
	mwptx[i__ * 11 - 3] = (real) (-i__);
	mwpty[i__ * 11 - 3] = (real) i__;
	mwptx[i__ * 11 - 1] = (real) (i__ - 6);
	mwpty[i__ * 11 - 1] = (real) (6 - i__);
/* L160: */
    }
    mwptx[4] = 1.f;
    mwptx[15] = 3.f;
    mwptx[26] = 5.f;
    mwptx[37] = 4.f;
    mwptx[48] = 5.f;
    mwpty[4] = -1.f;
    mwpty[15] = 2.f;
    mwpty[26] = -2.f;
    mwpty[37] = 4.f;
    mwpty[48] = -3.f;
    mwptx[9] = -1.f;
    mwptx[20] = -3.f;
    mwptx[31] = -5.f;
    mwptx[42] = 4.f;
    mwptx[53] = 5.f;
    mwpty[9] = 1.f;
    mwpty[20] = 2.f;
    mwpty[31] = 2.f;
    mwpty[42] = 4.f;
    mwpty[53] = 3.f;
    for (i__ = 1; i__ <= 11; ++i__) {
	combla_1.incx = mwpinx[i__ - 1];
	combla_1.incy = mwpiny[i__ - 1];
	for (k = 1; k <= 5; ++k) {
	    copyx[k - 1] = mwpx[k - 1];
	    copyy[k - 1] = mwpy[k - 1];
	    mwpstx[k - 1] = mwptx[i__ + k * 11 - 12];
	    mwpsty[k - 1] = mwpty[i__ + k * 11 - 12];
/* L180: */
	}
	srot_(&mwpn[i__ - 1], copyx, &combla_1.incx, copyy, &combla_1.incy, &
		mwpc[i__ - 1], &mwps[i__ - 1]);
	stest_(&c__5, copyx, mwpstx, mwpstx, sfac);
	stest_(&c__5, copyy, mwpsty, mwpsty, sfac);
/* L200: */
    }
    return 0;
} /* check3_ */