예제 #1
0
파일: clalsa.c 프로젝트: MichaelH13/sdkpub
/* Subroutine */ int clalsa_(integer *icompq, integer *smlsiz, integer *n, 
	integer *nrhs, complex *b, integer *ldb, complex *bx, integer *ldbx, 
	real *u, integer *ldu, real *vt, integer *k, real *difl, real *difr, 
	real *z__, real *poles, integer *givptr, integer *givcol, integer *
	ldgcol, integer *perm, real *givnum, real *c__, real *s, real *rwork, 
	integer *iwork, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    CLALSA is an itermediate step in solving the least squares problem   
    by computing the SVD of the coefficient matrix in compact form (The   
    singular vectors are computed as products of simple orthorgonal   
    matrices.).   

    If ICOMPQ = 0, CLALSA applies the inverse of the left singular vector   
    matrix of an upper bidiagonal matrix to the right hand side; and if   
    ICOMPQ = 1, CLALSA applies the right singular vector matrix to the   
    right hand side. The singular vector matrices were generated in   
    compact form by CLALSA.   

    Arguments   
    =========   

    ICOMPQ (input) INTEGER   
           Specifies whether the left or the right singular vector   
           matrix is involved.   
           = 0: Left singular vector matrix   
           = 1: Right singular vector matrix   

    SMLSIZ (input) INTEGER   
           The maximum size of the subproblems at the bottom of the   
           computation tree.   

    N      (input) INTEGER   
           The row and column dimensions of the upper bidiagonal matrix.   

    NRHS   (input) INTEGER   
           The number of columns of B and BX. NRHS must be at least 1.   

    B      (input) COMPLEX array, dimension ( LDB, NRHS )   
           On input, B contains the right hand sides of the least   
           squares problem in rows 1 through M. On output, B contains   
           the solution X in rows 1 through N.   

    LDB    (input) INTEGER   
           The leading dimension of B in the calling subprogram.   
           LDB must be at least max(1,MAX( M, N ) ).   

    BX     (output) COMPLEX array, dimension ( LDBX, NRHS )   
           On exit, the result of applying the left or right singular   
           vector matrix to B.   

    LDBX   (input) INTEGER   
           The leading dimension of BX.   

    U      (input) REAL array, dimension ( LDU, SMLSIZ ).   
           On entry, U contains the left singular vector matrices of all   
           subproblems at the bottom level.   

    LDU    (input) INTEGER, LDU = > N.   
           The leading dimension of arrays U, VT, DIFL, DIFR,   
           POLES, GIVNUM, and Z.   

    VT     (input) REAL array, dimension ( LDU, SMLSIZ+1 ).   
           On entry, VT' contains the right singular vector matrices of   
           all subproblems at the bottom level.   

    K      (input) INTEGER array, dimension ( N ).   

    DIFL   (input) REAL array, dimension ( LDU, NLVL ).   
           where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.   

    DIFR   (input) REAL array, dimension ( LDU, 2 * NLVL ).   
           On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record   
           distances between singular values on the I-th level and   
           singular values on the (I -1)-th level, and DIFR(*, 2 * I)   
           record the normalizing factors of the right singular vectors   
           matrices of subproblems on I-th level.   

    Z      (input) REAL array, dimension ( LDU, NLVL ).   
           On entry, Z(1, I) contains the components of the deflation-   
           adjusted updating row vector for subproblems on the I-th   
           level.   

    POLES  (input) REAL array, dimension ( LDU, 2 * NLVL ).   
           On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old   
           singular values involved in the secular equations on the I-th   
           level.   

    GIVPTR (input) INTEGER array, dimension ( N ).   
           On entry, GIVPTR( I ) records the number of Givens   
           rotations performed on the I-th problem on the computation   
           tree.   

    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).   
           On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the   
           locations of Givens rotations performed on the I-th level on   
           the computation tree.   

    LDGCOL (input) INTEGER, LDGCOL = > N.   
           The leading dimension of arrays GIVCOL and PERM.   

    PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).   
           On entry, PERM(*, I) records permutations done on the I-th   
           level of the computation tree.   

    GIVNUM (input) REAL array, dimension ( LDU, 2 * NLVL ).   
           On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-   
           values of Givens rotations performed on the I-th level on the   
           computation tree.   

    C      (input) REAL array, dimension ( N ).   
           On entry, if the I-th subproblem is not square,   
           C( I ) contains the C-value of a Givens rotation related to   
           the right null space of the I-th subproblem.   

    S      (input) REAL array, dimension ( N ).   
           On entry, if the I-th subproblem is not square,   
           S( I ) contains the S-value of a Givens rotation related to   
           the right null space of the I-th subproblem.   

    RWORK  (workspace) REAL array, dimension at least   
           max ( N, (SMLSZ+1)*NRHS*3 ).   

    IWORK  (workspace) INTEGER array.   
           The dimension must be at least 3 * N   

    INFO   (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   

    Further Details   
    ===============   

    Based on contributions by   
       Ming Gu and Ren-Cang Li, Computer Science Division, University of   
         California at Berkeley, USA   
       Osni Marques, LBNL/NERSC, USA   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static real c_b9 = 1.f;
    static real c_b10 = 0.f;
    static integer c__2 = 2;
    
    /* System generated locals */
    integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, 
	    difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, 
	    poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, 
	    z_dim1, z_offset, b_dim1, b_offset, bx_dim1, bx_offset, i__1, 
	    i__2, i__3, i__4, i__5, i__6;
    complex q__1;
    /* Builtin functions */
    double r_imag(complex *);
    integer pow_ii(integer *, integer *);
    /* Local variables */
    static integer jcol, nlvl, sqre, jrow, i__, j, jimag, jreal, inode, ndiml;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    static integer ndimr;
    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, 
	    complex *, integer *);
    static integer i1;
    extern /* Subroutine */ int clals0_(integer *, integer *, integer *, 
	    integer *, integer *, complex *, integer *, complex *, integer *, 
	    integer *, integer *, integer *, integer *, real *, integer *, 
	    real *, real *, real *, real *, integer *, real *, real *, real *,
	     integer *);
    static integer ic, lf, nd, ll, nl, nr;
    extern /* Subroutine */ int xerbla_(char *, integer *), slasdt_(
	    integer *, integer *, integer *, integer *, integer *, integer *, 
	    integer *);
    static integer im1, nlf, nrf, lvl, ndb1, nlp1, lvl2, nrp1;
#define difl_ref(a_1,a_2) difl[(a_2)*difl_dim1 + a_1]
#define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1]
#define perm_ref(a_1,a_2) perm[(a_2)*perm_dim1 + a_1]
#define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1
#define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)]
#define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1]
#define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]
#define poles_ref(a_1,a_2) poles[(a_2)*poles_dim1 + a_1]
#define bx_subscr(a_1,a_2) (a_2)*bx_dim1 + a_1
#define bx_ref(a_1,a_2) bx[bx_subscr(a_1,a_2)]
#define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1]
#define givcol_ref(a_1,a_2) givcol[(a_2)*givcol_dim1 + a_1]
#define givnum_ref(a_1,a_2) givnum[(a_2)*givnum_dim1 + a_1]


    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    bx_dim1 = *ldbx;
    bx_offset = 1 + bx_dim1 * 1;
    bx -= bx_offset;
    givnum_dim1 = *ldu;
    givnum_offset = 1 + givnum_dim1 * 1;
    givnum -= givnum_offset;
    poles_dim1 = *ldu;
    poles_offset = 1 + poles_dim1 * 1;
    poles -= poles_offset;
    z_dim1 = *ldu;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    difr_dim1 = *ldu;
    difr_offset = 1 + difr_dim1 * 1;
    difr -= difr_offset;
    difl_dim1 = *ldu;
    difl_offset = 1 + difl_dim1 * 1;
    difl -= difl_offset;
    vt_dim1 = *ldu;
    vt_offset = 1 + vt_dim1 * 1;
    vt -= vt_offset;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1 * 1;
    u -= u_offset;
    --k;
    --givptr;
    perm_dim1 = *ldgcol;
    perm_offset = 1 + perm_dim1 * 1;
    perm -= perm_offset;
    givcol_dim1 = *ldgcol;
    givcol_offset = 1 + givcol_dim1 * 1;
    givcol -= givcol_offset;
    --c__;
    --s;
    --rwork;
    --iwork;

    /* Function Body */
    *info = 0;

    if (*icompq < 0 || *icompq > 1) {
	*info = -1;
    } else if (*smlsiz < 3) {
	*info = -2;
    } else if (*n < *smlsiz) {
	*info = -3;
    } else if (*nrhs < 1) {
	*info = -4;
    } else if (*ldb < *n) {
	*info = -6;
    } else if (*ldbx < *n) {
	*info = -8;
    } else if (*ldu < *n) {
	*info = -10;
    } else if (*ldgcol < *n) {
	*info = -19;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CLALSA", &i__1);
	return 0;
    }

/*     Book-keeping and  setting up the computation tree. */

    inode = 1;
    ndiml = inode + *n;
    ndimr = ndiml + *n;

    slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], 
	    smlsiz);

/*     The following code applies back the left singular vector factors.   
       For applying back the right singular vector factors, go to 170. */

    if (*icompq == 1) {
	goto L170;
    }

/*     The nodes on the bottom level of the tree were solved   
       by SLASDQ. The corresponding left and right singular vector   
       matrices are in explicit form. First apply back the left   
       singular vector matrices. */

    ndb1 = (nd + 1) / 2;
    i__1 = nd;
    for (i__ = ndb1; i__ <= i__1; ++i__) {

/*        IC : center row of each node   
          NL : number of rows of left  subproblem   
          NR : number of rows of right subproblem   
          NLF: starting row of the left   subproblem   
          NRF: starting row of the right  subproblem */

	i1 = i__ - 1;
	ic = iwork[inode + i1];
	nl = iwork[ndiml + i1];
	nr = iwork[ndimr + i1];
	nlf = ic - nl;
	nrf = ic + 1;

/*        Since B and BX are complex, the following call to SGEMM   
          is performed in two steps (real and imaginary parts).   

          CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,   
       $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) */

	j = nl * *nrhs << 1;
	i__2 = *nrhs;
	for (jcol = 1; jcol <= i__2; ++jcol) {
	    i__3 = nlf + nl - 1;
	    for (jrow = nlf; jrow <= i__3; ++jrow) {
		++j;
		i__4 = b_subscr(jrow, jcol);
		rwork[j] = b[i__4].r;
/* L10: */
	    }
/* L20: */
	}
	sgemm_("T", "N", &nl, nrhs, &nl, &c_b9, &u_ref(nlf, 1), ldu, &rwork[(
		nl * *nrhs << 1) + 1], &nl, &c_b10, &rwork[1], &nl);
	j = nl * *nrhs << 1;
	i__2 = *nrhs;
	for (jcol = 1; jcol <= i__2; ++jcol) {
	    i__3 = nlf + nl - 1;
	    for (jrow = nlf; jrow <= i__3; ++jrow) {
		++j;
		rwork[j] = r_imag(&b_ref(jrow, jcol));
/* L30: */
	    }
/* L40: */
	}
	sgemm_("T", "N", &nl, nrhs, &nl, &c_b9, &u_ref(nlf, 1), ldu, &rwork[(
		nl * *nrhs << 1) + 1], &nl, &c_b10, &rwork[nl * *nrhs + 1], &
		nl);
	jreal = 0;
	jimag = nl * *nrhs;
	i__2 = *nrhs;
	for (jcol = 1; jcol <= i__2; ++jcol) {
	    i__3 = nlf + nl - 1;
	    for (jrow = nlf; jrow <= i__3; ++jrow) {
		++jreal;
		++jimag;
		i__4 = bx_subscr(jrow, jcol);
		i__5 = jreal;
		i__6 = jimag;
		q__1.r = rwork[i__5], q__1.i = rwork[i__6];
		bx[i__4].r = q__1.r, bx[i__4].i = q__1.i;
/* L50: */
	    }
/* L60: */
	}

/*        Since B and BX are complex, the following call to SGEMM   
          is performed in two steps (real and imaginary parts).   

          CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,   
      $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) */

	j = nr * *nrhs << 1;
	i__2 = *nrhs;
	for (jcol = 1; jcol <= i__2; ++jcol) {
	    i__3 = nrf + nr - 1;
	    for (jrow = nrf; jrow <= i__3; ++jrow) {
		++j;
		i__4 = b_subscr(jrow, jcol);
		rwork[j] = b[i__4].r;
/* L70: */
	    }
/* L80: */
	}
	sgemm_("T", "N", &nr, nrhs, &nr, &c_b9, &u_ref(nrf, 1), ldu, &rwork[(
		nr * *nrhs << 1) + 1], &nr, &c_b10, &rwork[1], &nr);
	j = nr * *nrhs << 1;
	i__2 = *nrhs;
	for (jcol = 1; jcol <= i__2; ++jcol) {
	    i__3 = nrf + nr - 1;
	    for (jrow = nrf; jrow <= i__3; ++jrow) {
		++j;
		rwork[j] = r_imag(&b_ref(jrow, jcol));
/* L90: */
	    }
/* L100: */
	}
	sgemm_("T", "N", &nr, nrhs, &nr, &c_b9, &u_ref(nrf, 1), ldu, &rwork[(
		nr * *nrhs << 1) + 1], &nr, &c_b10, &rwork[nr * *nrhs + 1], &
		nr);
	jreal = 0;
	jimag = nr * *nrhs;
	i__2 = *nrhs;
	for (jcol = 1; jcol <= i__2; ++jcol) {
	    i__3 = nrf + nr - 1;
	    for (jrow = nrf; jrow <= i__3; ++jrow) {
		++jreal;
		++jimag;
		i__4 = bx_subscr(jrow, jcol);
		i__5 = jreal;
		i__6 = jimag;
		q__1.r = rwork[i__5], q__1.i = rwork[i__6];
		bx[i__4].r = q__1.r, bx[i__4].i = q__1.i;
/* L110: */
	    }
/* L120: */
	}

/* L130: */
    }

/*     Next copy the rows of B that correspond to unchanged rows   
       in the bidiagonal matrix to BX. */

    i__1 = nd;
    for (i__ = 1; i__ <= i__1; ++i__) {
	ic = iwork[inode + i__ - 1];
	ccopy_(nrhs, &b_ref(ic, 1), ldb, &bx_ref(ic, 1), ldbx);
/* L140: */
    }

/*     Finally go through the left singular vector matrices of all   
       the other subproblems bottom-up on the tree. */

    j = pow_ii(&c__2, &nlvl);
    sqre = 0;

    for (lvl = nlvl; lvl >= 1; --lvl) {
	lvl2 = (lvl << 1) - 1;

/*        find the first node LF and last node LL on   
          the current level LVL */

	if (lvl == 1) {
	    lf = 1;
	    ll = 1;
	} else {
	    i__1 = lvl - 1;
	    lf = pow_ii(&c__2, &i__1);
	    ll = (lf << 1) - 1;
	}
	i__1 = ll;
	for (i__ = lf; i__ <= i__1; ++i__) {
	    im1 = i__ - 1;
	    ic = iwork[inode + im1];
	    nl = iwork[ndiml + im1];
	    nr = iwork[ndimr + im1];
	    nlf = ic - nl;
	    nrf = ic + 1;
	    --j;
	    clals0_(icompq, &nl, &nr, &sqre, nrhs, &bx_ref(nlf, 1), ldbx, &
		    b_ref(nlf, 1), ldb, &perm_ref(nlf, lvl), &givptr[j], &
		    givcol_ref(nlf, lvl2), ldgcol, &givnum_ref(nlf, lvl2), 
		    ldu, &poles_ref(nlf, lvl2), &difl_ref(nlf, lvl), &
		    difr_ref(nlf, lvl2), &z___ref(nlf, lvl), &k[j], &c__[j], &
		    s[j], &rwork[1], info);
/* L150: */
	}
/* L160: */
    }
    goto L330;

/*     ICOMPQ = 1: applying back the right singular vector factors. */

L170:

/*     First now go through the right singular vector matrices of all   
       the tree nodes top-down. */

    j = 0;
    i__1 = nlvl;
    for (lvl = 1; lvl <= i__1; ++lvl) {
	lvl2 = (lvl << 1) - 1;

/*        Find the first node LF and last node LL on   
          the current level LVL. */

	if (lvl == 1) {
	    lf = 1;
	    ll = 1;
	} else {
	    i__2 = lvl - 1;
	    lf = pow_ii(&c__2, &i__2);
	    ll = (lf << 1) - 1;
	}
	i__2 = lf;
	for (i__ = ll; i__ >= i__2; --i__) {
	    im1 = i__ - 1;
	    ic = iwork[inode + im1];
	    nl = iwork[ndiml + im1];
	    nr = iwork[ndimr + im1];
	    nlf = ic - nl;
	    nrf = ic + 1;
	    if (i__ == ll) {
		sqre = 0;
	    } else {
		sqre = 1;
	    }
	    ++j;
	    clals0_(icompq, &nl, &nr, &sqre, nrhs, &b_ref(nlf, 1), ldb, &
		    bx_ref(nlf, 1), ldbx, &perm_ref(nlf, lvl), &givptr[j], &
		    givcol_ref(nlf, lvl2), ldgcol, &givnum_ref(nlf, lvl2), 
		    ldu, &poles_ref(nlf, lvl2), &difl_ref(nlf, lvl), &
		    difr_ref(nlf, lvl2), &z___ref(nlf, lvl), &k[j], &c__[j], &
		    s[j], &rwork[1], info);
/* L180: */
	}
/* L190: */
    }

/*     The nodes on the bottom level of the tree were solved   
       by SLASDQ. The corresponding right singular vector   
       matrices are in explicit form. Apply them back. */

    ndb1 = (nd + 1) / 2;
    i__1 = nd;
    for (i__ = ndb1; i__ <= i__1; ++i__) {
	i1 = i__ - 1;
	ic = iwork[inode + i1];
	nl = iwork[ndiml + i1];
	nr = iwork[ndimr + i1];
	nlp1 = nl + 1;
	if (i__ == nd) {
	    nrp1 = nr;
	} else {
	    nrp1 = nr + 1;
	}
	nlf = ic - nl;
	nrf = ic + 1;

/*        Since B and BX are complex, the following call to SGEMM is   
          performed in two steps (real and imaginary parts).   

          CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,   
      $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) */

	j = nlp1 * *nrhs << 1;
	i__2 = *nrhs;
	for (jcol = 1; jcol <= i__2; ++jcol) {
	    i__3 = nlf + nlp1 - 1;
	    for (jrow = nlf; jrow <= i__3; ++jrow) {
		++j;
		i__4 = b_subscr(jrow, jcol);
		rwork[j] = b[i__4].r;
/* L200: */
	    }
/* L210: */
	}
	sgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b9, &vt_ref(nlf, 1), ldu, &
		rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b10, &rwork[1], &
		nlp1);
	j = nlp1 * *nrhs << 1;
	i__2 = *nrhs;
	for (jcol = 1; jcol <= i__2; ++jcol) {
	    i__3 = nlf + nlp1 - 1;
	    for (jrow = nlf; jrow <= i__3; ++jrow) {
		++j;
		rwork[j] = r_imag(&b_ref(jrow, jcol));
/* L220: */
	    }
/* L230: */
	}
	sgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b9, &vt_ref(nlf, 1), ldu, &
		rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b10, &rwork[nlp1 * *
		nrhs + 1], &nlp1);
	jreal = 0;
	jimag = nlp1 * *nrhs;
	i__2 = *nrhs;
	for (jcol = 1; jcol <= i__2; ++jcol) {
	    i__3 = nlf + nlp1 - 1;
	    for (jrow = nlf; jrow <= i__3; ++jrow) {
		++jreal;
		++jimag;
		i__4 = bx_subscr(jrow, jcol);
		i__5 = jreal;
		i__6 = jimag;
		q__1.r = rwork[i__5], q__1.i = rwork[i__6];
		bx[i__4].r = q__1.r, bx[i__4].i = q__1.i;
/* L240: */
	    }
/* L250: */
	}

/*        Since B and BX are complex, the following call to SGEMM is   
          performed in two steps (real and imaginary parts).   

          CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,   
      $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) */

	j = nrp1 * *nrhs << 1;
	i__2 = *nrhs;
	for (jcol = 1; jcol <= i__2; ++jcol) {
	    i__3 = nrf + nrp1 - 1;
	    for (jrow = nrf; jrow <= i__3; ++jrow) {
		++j;
		i__4 = b_subscr(jrow, jcol);
		rwork[j] = b[i__4].r;
/* L260: */
	    }
/* L270: */
	}
	sgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b9, &vt_ref(nrf, 1), ldu, &
		rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b10, &rwork[1], &
		nrp1);
	j = nrp1 * *nrhs << 1;
	i__2 = *nrhs;
	for (jcol = 1; jcol <= i__2; ++jcol) {
	    i__3 = nrf + nrp1 - 1;
	    for (jrow = nrf; jrow <= i__3; ++jrow) {
		++j;
		rwork[j] = r_imag(&b_ref(jrow, jcol));
/* L280: */
	    }
/* L290: */
	}
	sgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b9, &vt_ref(nrf, 1), ldu, &
		rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b10, &rwork[nrp1 * *
		nrhs + 1], &nrp1);
	jreal = 0;
	jimag = nrp1 * *nrhs;
	i__2 = *nrhs;
	for (jcol = 1; jcol <= i__2; ++jcol) {
	    i__3 = nrf + nrp1 - 1;
	    for (jrow = nrf; jrow <= i__3; ++jrow) {
		++jreal;
		++jimag;
		i__4 = bx_subscr(jrow, jcol);
		i__5 = jreal;
		i__6 = jimag;
		q__1.r = rwork[i__5], q__1.i = rwork[i__6];
		bx[i__4].r = q__1.r, bx[i__4].i = q__1.i;
/* L300: */
	    }
/* L310: */
	}

/* L320: */
    }

L330:

    return 0;

/*     End of CLALSA */

} /* clalsa_ */
예제 #2
0
/* Subroutine */ int dlasda_(integer *icompq, integer *smlsiz, integer *n, 
	integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer 
	*ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr, 
	doublereal *z__, doublereal *poles, integer *givptr, integer *givcol, 
	integer *ldgcol, integer *perm, doublereal *givnum, doublereal *c__, 
	doublereal *s, doublereal *work, integer *iwork, integer *info)
{
    /* System generated locals */
    integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1, 
	    difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, 
	    poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset, 
	    z_dim1, z_offset, i__1, i__2;

    /* Builtin functions */
    integer pow_ii(integer *, integer *);

    /* Local variables */
    static doublereal beta;
    static integer idxq, nlvl, i__, j, m;
    static doublereal alpha;
    static integer inode, ndiml, ndimr, idxqi, itemp;
    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
	    doublereal *, integer *);
    static integer sqrei, i1;
    extern /* Subroutine */ int dlasd6_(integer *, integer *, integer *, 
	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
	     doublereal *, integer *, integer *, integer *, integer *, 
	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
	    doublereal *, doublereal *, integer *, doublereal *, doublereal *,
	     doublereal *, integer *, integer *);
    static integer ic, nwork1, lf, nd, nwork2, ll, nl, vf, nr, vl;
    extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer 
	    *, integer *, integer *, doublereal *, doublereal *, doublereal *,
	     integer *, doublereal *, integer *, doublereal *, integer *, 
	    doublereal *, integer *), dlasdt_(integer *, integer *, 
	    integer *, integer *, integer *, integer *, integer *), dlaset_(
	    char *, integer *, integer *, doublereal *, doublereal *, 
	    doublereal *, integer *), xerbla_(char *, integer *);
    static integer im1, smlszp, ncc, nlf, nrf, vfi, iwk, vli, lvl, nru, ndb1, 
	    nlp1, lvl2, nrp1;


#define difl_ref(a_1,a_2) difl[(a_2)*difl_dim1 + a_1]
#define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1]
#define perm_ref(a_1,a_2) perm[(a_2)*perm_dim1 + a_1]
#define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1]
#define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]
#define poles_ref(a_1,a_2) poles[(a_2)*poles_dim1 + a_1]
#define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1]
#define givcol_ref(a_1,a_2) givcol[(a_2)*givcol_dim1 + a_1]
#define givnum_ref(a_1,a_2) givnum[(a_2)*givnum_dim1 + a_1]


/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       October 31, 1999   


    Purpose   
    =======   

    Using a divide and conquer approach, DLASDA computes the singular   
    value decomposition (SVD) of a real upper bidiagonal N-by-M matrix   
    B with diagonal D and offdiagonal E, where M = N + SQRE. The   
    algorithm computes the singular values in the SVD B = U * S * VT.   
    The orthogonal matrices U and VT are optionally computed in   
    compact form.   

    A related subroutine, DLASD0, computes the singular values and   
    the singular vectors in explicit form.   

    Arguments   
    =========   

    ICOMPQ (input) INTEGER   
           Specifies whether singular vectors are to be computed   
           in compact form, as follows   
           = 0: Compute singular values only.   
           = 1: Compute singular vectors of upper bidiagonal   
                matrix in compact form.   

    SMLSIZ (input) INTEGER   
           The maximum size of the subproblems at the bottom of the   
           computation tree.   

    N      (input) INTEGER   
           The row dimension of the upper bidiagonal matrix. This is   
           also the dimension of the main diagonal array D.   

    SQRE   (input) INTEGER   
           Specifies the column dimension of the bidiagonal matrix.   
           = 0: The bidiagonal matrix has column dimension M = N;   
           = 1: The bidiagonal matrix has column dimension M = N + 1.   

    D      (input/output) DOUBLE PRECISION array, dimension ( N )   
           On entry D contains the main diagonal of the bidiagonal   
           matrix. On exit D, if INFO = 0, contains its singular values.   

    E      (input) DOUBLE PRECISION array, dimension ( M-1 )   
           Contains the subdiagonal entries of the bidiagonal matrix.   
           On exit, E has been destroyed.   

    U      (output) DOUBLE PRECISION array,   
           dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced   
           if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left   
           singular vector matrices of all subproblems at the bottom   
           level.   

    LDU    (input) INTEGER, LDU = > N.   
           The leading dimension of arrays U, VT, DIFL, DIFR, POLES,   
           GIVNUM, and Z.   

    VT     (output) DOUBLE PRECISION array,   
           dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced   
           if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right   
           singular vector matrices of all subproblems at the bottom   
           level.   

    K      (output) INTEGER array,   
           dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0.   
           If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th   
           secular equation on the computation tree.   

    DIFL   (output) DOUBLE PRECISION array, dimension ( LDU, NLVL ),   
           where NLVL = floor(log_2 (N/SMLSIZ))).   

    DIFR   (output) DOUBLE PRECISION array,   
                    dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and   
                    dimension ( N ) if ICOMPQ = 0.   
           If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1)   
           record distances between singular values on the I-th   
           level and singular values on the (I -1)-th level, and   
           DIFR(1:N, 2 * I ) contains the normalizing factors for   
           the right singular vector matrix. See DLASD8 for details.   

    Z      (output) DOUBLE PRECISION array,   
                    dimension ( LDU, NLVL ) if ICOMPQ = 1 and   
                    dimension ( N ) if ICOMPQ = 0.   
           The first K elements of Z(1, I) contain the components of   
           the deflation-adjusted updating row vector for subproblems   
           on the I-th level.   

    POLES  (output) DOUBLE PRECISION array,   
           dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced   
           if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and   
           POLES(1, 2*I) contain  the new and old singular values   
           involved in the secular equations on the I-th level.   

    GIVPTR (output) INTEGER array,   
           dimension ( N ) if ICOMPQ = 1, and not referenced if   
           ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records   
           the number of Givens rotations performed on the I-th   
           problem on the computation tree.   

    GIVCOL (output) INTEGER array,   
           dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not   
           referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I,   
           GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations   
           of Givens rotations performed on the I-th level on the   
           computation tree.   

    LDGCOL (input) INTEGER, LDGCOL = > N.   
           The leading dimension of arrays GIVCOL and PERM.   

    PERM   (output) INTEGER array,   
           dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced   
           if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records   
           permutations done on the I-th level of the computation tree.   

    GIVNUM (output) DOUBLE PRECISION array,   
           dimension ( LDU,  2 * NLVL ) if ICOMPQ = 1, and not   
           referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I,   
           GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S-   
           values of Givens rotations performed on the I-th level on   
           the computation tree.   

    C      (output) DOUBLE PRECISION array,   
           dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0.   
           If ICOMPQ = 1 and the I-th subproblem is not square, on exit,   
           C( I ) contains the C-value of a Givens rotation related to   
           the right null space of the I-th subproblem.   

    S      (output) DOUBLE PRECISION array, dimension ( N ) if   
           ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1   
           and the I-th subproblem is not square, on exit, S( I )   
           contains the S-value of a Givens rotation related to   
           the right null space of the I-th subproblem.   

    WORK   (workspace) DOUBLE PRECISION array, dimension   
           (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)).   

    IWORK  (workspace) INTEGER array.   
           Dimension must be at least (7 * N).   

    INFO   (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   
            > 0:  if INFO = 1, an singular value did not converge   

    Further Details   
    ===============   

    Based on contributions by   
       Ming Gu and Huan Ren, Computer Science Division, University of   
       California at Berkeley, USA   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    --d__;
    --e;
    givnum_dim1 = *ldu;
    givnum_offset = 1 + givnum_dim1 * 1;
    givnum -= givnum_offset;
    poles_dim1 = *ldu;
    poles_offset = 1 + poles_dim1 * 1;
    poles -= poles_offset;
    z_dim1 = *ldu;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    difr_dim1 = *ldu;
    difr_offset = 1 + difr_dim1 * 1;
    difr -= difr_offset;
    difl_dim1 = *ldu;
    difl_offset = 1 + difl_dim1 * 1;
    difl -= difl_offset;
    vt_dim1 = *ldu;
    vt_offset = 1 + vt_dim1 * 1;
    vt -= vt_offset;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1 * 1;
    u -= u_offset;
    --k;
    --givptr;
    perm_dim1 = *ldgcol;
    perm_offset = 1 + perm_dim1 * 1;
    perm -= perm_offset;
    givcol_dim1 = *ldgcol;
    givcol_offset = 1 + givcol_dim1 * 1;
    givcol -= givcol_offset;
    --c__;
    --s;
    --work;
    --iwork;

    /* Function Body */
    *info = 0;

    if (*icompq < 0 || *icompq > 1) {
	*info = -1;
    } else if (*smlsiz < 3) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*sqre < 0 || *sqre > 1) {
	*info = -4;
    } else if (*ldu < *n + *sqre) {
	*info = -8;
    } else if (*ldgcol < *n) {
	*info = -17;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DLASDA", &i__1);
	return 0;
    }

    m = *n + *sqre;

/*     If the input matrix is too small, call DLASDQ to find the SVD. */

    if (*n <= *smlsiz) {
	if (*icompq == 0) {
	    dlasdq_("U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[
		    vt_offset], ldu, &u[u_offset], ldu, &u[u_offset], ldu, &
		    work[1], info);
	} else {
	    dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset]
		    , ldu, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], 
		    info);
	}
	return 0;
    }

/*     Book-keeping and  set up the computation tree. */

    inode = 1;
    ndiml = inode + *n;
    ndimr = ndiml + *n;
    idxq = ndimr + *n;
    iwk = idxq + *n;

    ncc = 0;
    nru = 0;

    smlszp = *smlsiz + 1;
    vf = 1;
    vl = vf + m;
    nwork1 = vl + m;
    nwork2 = nwork1 + smlszp * smlszp;

    dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], 
	    smlsiz);

/*     for the nodes on bottom level of the tree, solve   
       their subproblems by DLASDQ. */

    ndb1 = (nd + 1) / 2;
    i__1 = nd;
    for (i__ = ndb1; i__ <= i__1; ++i__) {

/*        IC : center row of each node   
          NL : number of rows of left  subproblem   
          NR : number of rows of right subproblem   
          NLF: starting row of the left   subproblem   
          NRF: starting row of the right  subproblem */

	i1 = i__ - 1;
	ic = iwork[inode + i1];
	nl = iwork[ndiml + i1];
	nlp1 = nl + 1;
	nr = iwork[ndimr + i1];
	nlf = ic - nl;
	nrf = ic + 1;
	idxqi = idxq + nlf - 2;
	vfi = vf + nlf - 1;
	vli = vl + nlf - 1;
	sqrei = 1;
	if (*icompq == 0) {
	    dlaset_("A", &nlp1, &nlp1, &c_b11, &c_b12, &work[nwork1], &smlszp);
	    dlasdq_("U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], &
		    work[nwork1], &smlszp, &work[nwork2], &nl, &work[nwork2], 
		    &nl, &work[nwork2], info);
	    itemp = nwork1 + nl * smlszp;
	    dcopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1);
	    dcopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1);
	} else {
	    dlaset_("A", &nl, &nl, &c_b11, &c_b12, &u_ref(nlf, 1), ldu);
	    dlaset_("A", &nlp1, &nlp1, &c_b11, &c_b12, &vt_ref(nlf, 1), ldu);
	    dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &
		    vt_ref(nlf, 1), ldu, &u_ref(nlf, 1), ldu, &u_ref(nlf, 1), 
		    ldu, &work[nwork1], info);
	    dcopy_(&nlp1, &vt_ref(nlf, 1), &c__1, &work[vfi], &c__1);
	    dcopy_(&nlp1, &vt_ref(nlf, nlp1), &c__1, &work[vli], &c__1);
	}
	if (*info != 0) {
	    return 0;
	}
	i__2 = nl;
	for (j = 1; j <= i__2; ++j) {
	    iwork[idxqi + j] = j;
/* L10: */
	}
	if (i__ == nd && *sqre == 0) {
	    sqrei = 0;
	} else {
	    sqrei = 1;
	}
	idxqi += nlp1;
	vfi += nlp1;
	vli += nlp1;
	nrp1 = nr + sqrei;
	if (*icompq == 0) {
	    dlaset_("A", &nrp1, &nrp1, &c_b11, &c_b12, &work[nwork1], &smlszp);
	    dlasdq_("U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], &
		    work[nwork1], &smlszp, &work[nwork2], &nr, &work[nwork2], 
		    &nr, &work[nwork2], info);
	    itemp = nwork1 + (nrp1 - 1) * smlszp;
	    dcopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1);
	    dcopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1);
	} else {
	    dlaset_("A", &nr, &nr, &c_b11, &c_b12, &u_ref(nrf, 1), ldu);
	    dlaset_("A", &nrp1, &nrp1, &c_b11, &c_b12, &vt_ref(nrf, 1), ldu);
	    dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &
		    vt_ref(nrf, 1), ldu, &u_ref(nrf, 1), ldu, &u_ref(nrf, 1), 
		    ldu, &work[nwork1], info);
	    dcopy_(&nrp1, &vt_ref(nrf, 1), &c__1, &work[vfi], &c__1);
	    dcopy_(&nrp1, &vt_ref(nrf, nrp1), &c__1, &work[vli], &c__1);
	}
	if (*info != 0) {
	    return 0;
	}
	i__2 = nr;
	for (j = 1; j <= i__2; ++j) {
	    iwork[idxqi + j] = j;
/* L20: */
	}
/* L30: */
    }

/*     Now conquer each subproblem bottom-up. */

    j = pow_ii(&c__2, &nlvl);
    for (lvl = nlvl; lvl >= 1; --lvl) {
	lvl2 = (lvl << 1) - 1;

/*        Find the first node LF and last node LL on   
          the current level LVL. */

	if (lvl == 1) {
	    lf = 1;
	    ll = 1;
	} else {
	    i__1 = lvl - 1;
	    lf = pow_ii(&c__2, &i__1);
	    ll = (lf << 1) - 1;
	}
	i__1 = ll;
	for (i__ = lf; i__ <= i__1; ++i__) {
	    im1 = i__ - 1;
	    ic = iwork[inode + im1];
	    nl = iwork[ndiml + im1];
	    nr = iwork[ndimr + im1];
	    nlf = ic - nl;
	    nrf = ic + 1;
	    if (i__ == ll) {
		sqrei = *sqre;
	    } else {
		sqrei = 1;
	    }
	    vfi = vf + nlf - 1;
	    vli = vl + nlf - 1;
	    idxqi = idxq + nlf - 1;
	    alpha = d__[ic];
	    beta = e[ic];
	    if (*icompq == 0) {
		dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], &
			work[vli], &alpha, &beta, &iwork[idxqi], &perm[
			perm_offset], &givptr[1], &givcol[givcol_offset], 
			ldgcol, &givnum[givnum_offset], ldu, &poles[
			poles_offset], &difl[difl_offset], &difr[difr_offset],
			 &z__[z_offset], &k[1], &c__[1], &s[1], &work[nwork1],
			 &iwork[iwk], info);
	    } else {
		--j;
		dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], &
			work[vli], &alpha, &beta, &iwork[idxqi], &perm_ref(
			nlf, lvl), &givptr[j], &givcol_ref(nlf, lvl2), ldgcol,
			 &givnum_ref(nlf, lvl2), ldu, &poles_ref(nlf, lvl2), &
			difl_ref(nlf, lvl), &difr_ref(nlf, lvl2), &z___ref(
			nlf, lvl), &k[j], &c__[j], &s[j], &work[nwork1], &
			iwork[iwk], info);
	    }
	    if (*info != 0) {
		return 0;
	    }
/* L40: */
	}
/* L50: */
    }

    return 0;

/*     End of DLASDA */

} /* dlasda_ */
예제 #3
0
/* Subroutine */ int dlasdq_(char *uplo, integer *sqre, integer *n, integer *
	ncvt, integer *nru, integer *ncc, doublereal *d__, doublereal *e, 
	doublereal *vt, integer *ldvt, doublereal *u, integer *ldu, 
	doublereal *c__, integer *ldc, doublereal *work, integer *info)
{
    /* System generated locals */
    integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, 
	    i__2;

    /* Local variables */
    static integer isub;
    static doublereal smin;
    static integer sqre1, i__, j;
    static doublereal r__;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, 
	    integer *, doublereal *, doublereal *, doublereal *, integer *), dswap_(integer *, doublereal *, integer *
	    , doublereal *, integer *);
    static integer iuplo;
    static doublereal cs, sn;
    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, 
	    doublereal *, doublereal *, doublereal *), xerbla_(char *, 
	    integer *), dbdsqr_(char *, integer *, integer *, integer 
	    *, integer *, doublereal *, doublereal *, doublereal *, integer *,
	     doublereal *, integer *, doublereal *, integer *, doublereal *, 
	    integer *);
    static logical rotate;
    static integer np1;


#define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
#define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1]
#define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1]


/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       October 31, 1999   


    Purpose   
    =======   

    DLASDQ computes the singular value decomposition (SVD) of a real   
    (upper or lower) bidiagonal matrix with diagonal D and offdiagonal   
    E, accumulating the transformations if desired. Letting B denote   
    the input bidiagonal matrix, the algorithm computes orthogonal   
    matrices Q and P such that B = Q * S * P' (P' denotes the transpose   
    of P). The singular values S are overwritten on D.   

    The input matrix U  is changed to U  * Q  if desired.   
    The input matrix VT is changed to P' * VT if desired.   
    The input matrix C  is changed to Q' * C  if desired.   

    See "Computing  Small Singular Values of Bidiagonal Matrices With   
    Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,   
    LAPACK Working Note #3, for a detailed description of the algorithm.   

    Arguments   
    =========   

    UPLO  (input) CHARACTER*1   
          On entry, UPLO specifies whether the input bidiagonal matrix   
          is upper or lower bidiagonal, and wether it is square are   
          not.   
             UPLO = 'U' or 'u'   B is upper bidiagonal.   
             UPLO = 'L' or 'l'   B is lower bidiagonal.   

    SQRE  (input) INTEGER   
          = 0: then the input matrix is N-by-N.   
          = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and   
               (N+1)-by-N if UPLU = 'L'.   

          The bidiagonal matrix has   
          N = NL + NR + 1 rows and   
          M = N + SQRE >= N columns.   

    N     (input) INTEGER   
          On entry, N specifies the number of rows and columns   
          in the matrix. N must be at least 0.   

    NCVT  (input) INTEGER   
          On entry, NCVT specifies the number of columns of   
          the matrix VT. NCVT must be at least 0.   

    NRU   (input) INTEGER   
          On entry, NRU specifies the number of rows of   
          the matrix U. NRU must be at least 0.   

    NCC   (input) INTEGER   
          On entry, NCC specifies the number of columns of   
          the matrix C. NCC must be at least 0.   

    D     (input/output) DOUBLE PRECISION array, dimension (N)   
          On entry, D contains the diagonal entries of the   
          bidiagonal matrix whose SVD is desired. On normal exit,   
          D contains the singular values in ascending order.   

    E     (input/output) DOUBLE PRECISION array.   
          dimension is (N-1) if SQRE = 0 and N if SQRE = 1.   
          On entry, the entries of E contain the offdiagonal entries   
          of the bidiagonal matrix whose SVD is desired. On normal   
          exit, E will contain 0. If the algorithm does not converge,   
          D and E will contain the diagonal and superdiagonal entries   
          of a bidiagonal matrix orthogonally equivalent to the one   
          given as input.   

    VT    (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT)   
          On entry, contains a matrix which on exit has been   
          premultiplied by P', dimension N-by-NCVT if SQRE = 0   
          and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0).   

    LDVT  (input) INTEGER   
          On entry, LDVT specifies the leading dimension of VT as   
          declared in the calling (sub) program. LDVT must be at   
          least 1. If NCVT is nonzero LDVT must also be at least N.   

    U     (input/output) DOUBLE PRECISION array, dimension (LDU, N)   
          On entry, contains a  matrix which on exit has been   
          postmultiplied by Q, dimension NRU-by-N if SQRE = 0   
          and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0).   

    LDU   (input) INTEGER   
          On entry, LDU  specifies the leading dimension of U as   
          declared in the calling (sub) program. LDU must be at   
          least max( 1, NRU ) .   

    C     (input/output) DOUBLE PRECISION array, dimension (LDC, NCC)   
          On entry, contains an N-by-NCC matrix which on exit   
          has been premultiplied by Q'  dimension N-by-NCC if SQRE = 0   
          and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0).   

    LDC   (input) INTEGER   
          On entry, LDC  specifies the leading dimension of C as   
          declared in the calling (sub) program. LDC must be at   
          least 1. If NCC is nonzero, LDC must also be at least N.   

    WORK  (workspace) DOUBLE PRECISION array, dimension (4*N)   
          Workspace. Only referenced if one of NCVT, NRU, or NCC is   
          nonzero, and if N is at least 2.   

    INFO  (output) INTEGER   
          On exit, a value of 0 indicates a successful exit.   
          If INFO < 0, argument number -INFO is illegal.   
          If INFO > 0, the algorithm did not converge, and INFO   
          specifies how many superdiagonals did not converge.   

    Further Details   
    ===============   

    Based on contributions by   
       Ming Gu and Huan Ren, Computer Science Division, University of   
       California at Berkeley, USA   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    --d__;
    --e;
    vt_dim1 = *ldvt;
    vt_offset = 1 + vt_dim1 * 1;
    vt -= vt_offset;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1 * 1;
    u -= u_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1 * 1;
    c__ -= c_offset;
    --work;

    /* Function Body */
    *info = 0;
    iuplo = 0;
    if (lsame_(uplo, "U")) {
	iuplo = 1;
    }
    if (lsame_(uplo, "L")) {
	iuplo = 2;
    }
    if (iuplo == 0) {
	*info = -1;
    } else if (*sqre < 0 || *sqre > 1) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*ncvt < 0) {
	*info = -4;
    } else if (*nru < 0) {
	*info = -5;
    } else if (*ncc < 0) {
	*info = -6;
    } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) {
	*info = -10;
    } else if (*ldu < max(1,*nru)) {
	*info = -12;
    } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) {
	*info = -14;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DLASDQ", &i__1);
	return 0;
    }
    if (*n == 0) {
	return 0;
    }

/*     ROTATE is true if any singular vectors desired, false otherwise */

    rotate = *ncvt > 0 || *nru > 0 || *ncc > 0;
    np1 = *n + 1;
    sqre1 = *sqre;

/*     If matrix non-square upper bidiagonal, rotate to be lower   
       bidiagonal.  The rotations are on the right. */

    if (iuplo == 1 && sqre1 == 1) {
	i__1 = *n - 1;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
	    d__[i__] = r__;
	    e[i__] = sn * d__[i__ + 1];
	    d__[i__ + 1] = cs * d__[i__ + 1];
	    if (rotate) {
		work[i__] = cs;
		work[*n + i__] = sn;
	    }
/* L10: */
	}
	dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__);
	d__[*n] = r__;
	e[*n] = 0.;
	if (rotate) {
	    work[*n] = cs;
	    work[*n + *n] = sn;
	}
	iuplo = 2;
	sqre1 = 0;

/*        Update singular vectors if desired. */

	if (*ncvt > 0) {
	    dlasr_("L", "V", "F", &np1, ncvt, &work[1], &work[np1], &vt[
		    vt_offset], ldvt);
	}
    }

/*     If matrix lower bidiagonal, rotate to be upper bidiagonal   
       by applying Givens rotations on the left. */

    if (iuplo == 2) {
	i__1 = *n - 1;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
	    d__[i__] = r__;
	    e[i__] = sn * d__[i__ + 1];
	    d__[i__ + 1] = cs * d__[i__ + 1];
	    if (rotate) {
		work[i__] = cs;
		work[*n + i__] = sn;
	    }
/* L20: */
	}

/*        If matrix (N+1)-by-N lower bidiagonal, one additional   
          rotation is needed. */

	if (sqre1 == 1) {
	    dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__);
	    d__[*n] = r__;
	    if (rotate) {
		work[*n] = cs;
		work[*n + *n] = sn;
	    }
	}

/*        Update singular vectors if desired. */

	if (*nru > 0) {
	    if (sqre1 == 0) {
		dlasr_("R", "V", "F", nru, n, &work[1], &work[np1], &u[
			u_offset], ldu);
	    } else {
		dlasr_("R", "V", "F", nru, &np1, &work[1], &work[np1], &u[
			u_offset], ldu);
	    }
	}
	if (*ncc > 0) {
	    if (sqre1 == 0) {
		dlasr_("L", "V", "F", n, ncc, &work[1], &work[np1], &c__[
			c_offset], ldc);
	    } else {
		dlasr_("L", "V", "F", &np1, ncc, &work[1], &work[np1], &c__[
			c_offset], ldc);
	    }
	}
    }

/*     Call DBDSQR to compute the SVD of the reduced real   
       N-by-N upper bidiagonal matrix. */

    dbdsqr_("U", n, ncvt, nru, ncc, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[
	    u_offset], ldu, &c__[c_offset], ldc, &work[1], info);

/*     Sort the singular values into ascending order (insertion sort on   
       singular values, but only one transposition per singular vector) */

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {

/*        Scan for smallest D(I). */

	isub = i__;
	smin = d__[i__];
	i__2 = *n;
	for (j = i__ + 1; j <= i__2; ++j) {
	    if (d__[j] < smin) {
		isub = j;
		smin = d__[j];
	    }
/* L30: */
	}
	if (isub != i__) {

/*           Swap singular values and vectors. */

	    d__[isub] = d__[i__];
	    d__[i__] = smin;
	    if (*ncvt > 0) {
		dswap_(ncvt, &vt_ref(isub, 1), ldvt, &vt_ref(i__, 1), ldvt);
	    }
	    if (*nru > 0) {
		dswap_(nru, &u_ref(1, isub), &c__1, &u_ref(1, i__), &c__1);
	    }
	    if (*ncc > 0) {
		dswap_(ncc, &c___ref(isub, 1), ldc, &c___ref(i__, 1), ldc);
	    }
	}
/* L40: */
    }

    return 0;

/*     End of DLASDQ */

} /* dlasdq_ */
예제 #4
0
파일: zbdsqr.c 프로젝트: MichaelH13/sdkpub
/* Subroutine */ int zbdsqr_(char *uplo, integer *n, integer *ncvt, integer *
	nru, integer *ncc, doublereal *d__, doublereal *e, doublecomplex *vt, 
	integer *ldvt, doublecomplex *u, integer *ldu, doublecomplex *c__, 
	integer *ldc, doublereal *rwork, integer *info)
{
    /* System generated locals */
    integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1, 
	    i__2;
    doublereal d__1, d__2, d__3, d__4;

    /* Builtin functions */
    double pow_dd(doublereal *, doublereal *), sqrt(doublereal), d_sign(
	    doublereal *, doublereal *);

    /* Local variables */
    static doublereal abse;
    static integer idir;
    static doublereal abss;
    static integer oldm;
    static doublereal cosl;
    static integer isub, iter;
    static doublereal unfl, sinl, cosr, smin, smax, sinr;
    extern /* Subroutine */ int dlas2_(doublereal *, doublereal *, doublereal 
	    *, doublereal *, doublereal *);
    static doublereal f, g, h__;
    static integer i__, j, m;
    static doublereal r__;
    extern logical lsame_(char *, char *);
    static doublereal oldcs;
    static integer oldll;
    static doublereal shift, sigmn, oldsn;
    static integer maxit;
    static doublereal sminl, sigmx;
    static logical lower;
    extern /* Subroutine */ int zlasr_(char *, char *, char *, integer *, 
	    integer *, doublereal *, doublereal *, doublecomplex *, integer *), zdrot_(integer *, doublecomplex *, 
	    integer *, doublecomplex *, integer *, doublereal *, doublereal *)
	    , zswap_(integer *, doublecomplex *, integer *, doublecomplex *, 
	    integer *), dlasq1_(integer *, doublereal *, doublereal *, 
	    doublereal *, integer *), dlasv2_(doublereal *, doublereal *, 
	    doublereal *, doublereal *, doublereal *, doublereal *, 
	    doublereal *, doublereal *, doublereal *);
    static doublereal cs;
    static integer ll;
    extern doublereal dlamch_(char *);
    static doublereal sn, mu;
    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, 
	    doublereal *, doublereal *, doublereal *), xerbla_(char *, 
	    integer *), zdscal_(integer *, doublereal *, 
	    doublecomplex *, integer *);
    static doublereal sminoa, thresh;
    static logical rotate;
    static doublereal sminlo;
    static integer nm1;
    static doublereal tolmul;
    static integer nm12, nm13, lll;
    static doublereal eps, sll, tol;


#define c___subscr(a_1,a_2) (a_2)*c_dim1 + a_1
#define c___ref(a_1,a_2) c__[c___subscr(a_1,a_2)]
#define u_subscr(a_1,a_2) (a_2)*u_dim1 + a_1
#define u_ref(a_1,a_2) u[u_subscr(a_1,a_2)]
#define vt_subscr(a_1,a_2) (a_2)*vt_dim1 + a_1
#define vt_ref(a_1,a_2) vt[vt_subscr(a_1,a_2)]


/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       October 31, 1999   


    Purpose   
    =======   

    ZBDSQR computes the singular value decomposition (SVD) of a real   
    N-by-N (upper or lower) bidiagonal matrix B:  B = Q * S * P' (P'   
    denotes the transpose of P), where S is a diagonal matrix with   
    non-negative diagonal elements (the singular values of B), and Q   
    and P are orthogonal matrices.   

    The routine computes S, and optionally computes U * Q, P' * VT,   
    or Q' * C, for given complex input matrices U, VT, and C.   

    See "Computing  Small Singular Values of Bidiagonal Matrices With   
    Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,   
    LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11,   
    no. 5, pp. 873-912, Sept 1990) and   
    "Accurate singular values and differential qd algorithms," by   
    B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics   
    Department, University of California at Berkeley, July 1992   
    for a detailed description of the algorithm.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            = 'U':  B is upper bidiagonal;   
            = 'L':  B is lower bidiagonal.   

    N       (input) INTEGER   
            The order of the matrix B.  N >= 0.   

    NCVT    (input) INTEGER   
            The number of columns of the matrix VT. NCVT >= 0.   

    NRU     (input) INTEGER   
            The number of rows of the matrix U. NRU >= 0.   

    NCC     (input) INTEGER   
            The number of columns of the matrix C. NCC >= 0.   

    D       (input/output) DOUBLE PRECISION array, dimension (N)   
            On entry, the n diagonal elements of the bidiagonal matrix B.   
            On exit, if INFO=0, the singular values of B in decreasing   
            order.   

    E       (input/output) DOUBLE PRECISION array, dimension (N)   
            On entry, the elements of E contain the   
            offdiagonal elements of of the bidiagonal matrix whose SVD   
            is desired. On normal exit (INFO = 0), E is destroyed.   
            If the algorithm does not converge (INFO > 0), D and E   
            will contain the diagonal and superdiagonal elements of a   
            bidiagonal matrix orthogonally equivalent to the one given   
            as input. E(N) is used for workspace.   

    VT      (input/output) COMPLEX*16 array, dimension (LDVT, NCVT)   
            On entry, an N-by-NCVT matrix VT.   
            On exit, VT is overwritten by P' * VT.   
            VT is not referenced if NCVT = 0.   

    LDVT    (input) INTEGER   
            The leading dimension of the array VT.   
            LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.   

    U       (input/output) COMPLEX*16 array, dimension (LDU, N)   
            On entry, an NRU-by-N matrix U.   
            On exit, U is overwritten by U * Q.   
            U is not referenced if NRU = 0.   

    LDU     (input) INTEGER   
            The leading dimension of the array U.  LDU >= max(1,NRU).   

    C       (input/output) COMPLEX*16 array, dimension (LDC, NCC)   
            On entry, an N-by-NCC matrix C.   
            On exit, C is overwritten by Q' * C.   
            C is not referenced if NCC = 0.   

    LDC     (input) INTEGER   
            The leading dimension of the array C.   
            LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.   

    RWORK   (workspace) DOUBLE PRECISION array, dimension (4*N)   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  If INFO = -i, the i-th argument had an illegal value   
            > 0:  the algorithm did not converge; D and E contain the   
                  elements of a bidiagonal matrix which is orthogonally   
                  similar to the input matrix B;  if INFO = i, i   
                  elements of E have not converged to zero.   

    Internal Parameters   
    ===================   

    TOLMUL  DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8)))   
            TOLMUL controls the convergence criterion of the QR loop.   
            If it is positive, TOLMUL*EPS is the desired relative   
               precision in the computed singular values.   
            If it is negative, abs(TOLMUL*EPS*sigma_max) is the   
               desired absolute accuracy in the computed singular   
               values (corresponds to relative accuracy   
               abs(TOLMUL*EPS) in the largest singular value.   
            abs(TOLMUL) should be between 1 and 1/EPS, and preferably   
               between 10 (for fast convergence) and .1/EPS   
               (for there to be some accuracy in the results).   
            Default is to lose at either one eighth or 2 of the   
               available decimal digits in each computed singular value   
               (whichever is smaller).   

    MAXITR  INTEGER, default = 6   
            MAXITR controls the maximum number of passes of the   
            algorithm through its inner loop. The algorithms stops   
            (and so fails to converge) if the number of passes   
            through the inner loop exceeds MAXITR*N**2.   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    --d__;
    --e;
    vt_dim1 = *ldvt;
    vt_offset = 1 + vt_dim1 * 1;
    vt -= vt_offset;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1 * 1;
    u -= u_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1 * 1;
    c__ -= c_offset;
    --rwork;

    /* Function Body */
    *info = 0;
    lower = lsame_(uplo, "L");
    if (! lsame_(uplo, "U") && ! lower) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*ncvt < 0) {
	*info = -3;
    } else if (*nru < 0) {
	*info = -4;
    } else if (*ncc < 0) {
	*info = -5;
    } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) {
	*info = -9;
    } else if (*ldu < max(1,*nru)) {
	*info = -11;
    } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) {
	*info = -13;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("ZBDSQR", &i__1);
	return 0;
    }
    if (*n == 0) {
	return 0;
    }
    if (*n == 1) {
	goto L160;
    }

/*     ROTATE is true if any singular vectors desired, false otherwise */

    rotate = *ncvt > 0 || *nru > 0 || *ncc > 0;

/*     If no singular vectors desired, use qd algorithm */

    if (! rotate) {
	dlasq1_(n, &d__[1], &e[1], &rwork[1], info);
	return 0;
    }

    nm1 = *n - 1;
    nm12 = nm1 + nm1;
    nm13 = nm12 + nm1;
    idir = 0;

/*     Get machine constants */

    eps = dlamch_("Epsilon");
    unfl = dlamch_("Safe minimum");

/*     If matrix lower bidiagonal, rotate to be upper bidiagonal   
       by applying Givens rotations on the left */

    if (lower) {
	i__1 = *n - 1;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
	    d__[i__] = r__;
	    e[i__] = sn * d__[i__ + 1];
	    d__[i__ + 1] = cs * d__[i__ + 1];
	    rwork[i__] = cs;
	    rwork[nm1 + i__] = sn;
/* L10: */
	}

/*        Update singular vectors if desired */

	if (*nru > 0) {
	    zlasr_("R", "V", "F", nru, n, &rwork[1], &rwork[*n], &u[u_offset],
		     ldu);
	}
	if (*ncc > 0) {
	    zlasr_("L", "V", "F", n, ncc, &rwork[1], &rwork[*n], &c__[
		    c_offset], ldc);
	}
    }

/*     Compute singular values to relative accuracy TOL   
       (By setting TOL to be negative, algorithm will compute   
       singular values to absolute accuracy ABS(TOL)*norm(input matrix))   

   Computing MAX   
   Computing MIN */
    d__3 = 100., d__4 = pow_dd(&eps, &c_b15);
    d__1 = 10., d__2 = min(d__3,d__4);
    tolmul = max(d__1,d__2);
    tol = tolmul * eps;

/*     Compute approximate maximum, minimum singular values */

    smax = 0.;
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
/* Computing MAX */
	d__2 = smax, d__3 = (d__1 = d__[i__], abs(d__1));
	smax = max(d__2,d__3);
/* L20: */
    }
    i__1 = *n - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
/* Computing MAX */
	d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1));
	smax = max(d__2,d__3);
/* L30: */
    }
    sminl = 0.;
    if (tol >= 0.) {

/*        Relative accuracy desired */

	sminoa = abs(d__[1]);
	if (sminoa == 0.) {
	    goto L50;
	}
	mu = sminoa;
	i__1 = *n;
	for (i__ = 2; i__ <= i__1; ++i__) {
	    mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1]
		    , abs(d__1))));
	    sminoa = min(sminoa,mu);
	    if (sminoa == 0.) {
		goto L50;
	    }
/* L40: */
	}
L50:
	sminoa /= sqrt((doublereal) (*n));
/* Computing MAX */
	d__1 = tol * sminoa, d__2 = *n * 6 * *n * unfl;
	thresh = max(d__1,d__2);
    } else {

/*        Absolute accuracy desired   

   Computing MAX */
	d__1 = abs(tol) * smax, d__2 = *n * 6 * *n * unfl;
	thresh = max(d__1,d__2);
    }

/*     Prepare for main iteration loop for the singular values   
       (MAXIT is the maximum number of passes through the inner   
       loop permitted before nonconvergence signalled.) */

    maxit = *n * 6 * *n;
    iter = 0;
    oldll = -1;
    oldm = -1;

/*     M points to last element of unconverged part of matrix */

    m = *n;

/*     Begin main iteration loop */

L60:

/*     Check for convergence or exceeding iteration count */

    if (m <= 1) {
	goto L160;
    }
    if (iter > maxit) {
	goto L200;
    }

/*     Find diagonal block of matrix to work on */

    if (tol < 0. && (d__1 = d__[m], abs(d__1)) <= thresh) {
	d__[m] = 0.;
    }
    smax = (d__1 = d__[m], abs(d__1));
    smin = smax;
    i__1 = m - 1;
    for (lll = 1; lll <= i__1; ++lll) {
	ll = m - lll;
	abss = (d__1 = d__[ll], abs(d__1));
	abse = (d__1 = e[ll], abs(d__1));
	if (tol < 0. && abss <= thresh) {
	    d__[ll] = 0.;
	}
	if (abse <= thresh) {
	    goto L80;
	}
	smin = min(smin,abss);
/* Computing MAX */
	d__1 = max(smax,abss);
	smax = max(d__1,abse);
/* L70: */
    }
    ll = 0;
    goto L90;
L80:
    e[ll] = 0.;

/*     Matrix splits since E(LL) = 0 */

    if (ll == m - 1) {

/*        Convergence of bottom singular value, return to top of loop */

	--m;
	goto L60;
    }
L90:
    ++ll;

/*     E(LL) through E(M-1) are nonzero, E(LL-1) is zero */

    if (ll == m - 1) {

/*        2 by 2 block, handle separately */

	dlasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr,
		 &sinl, &cosl);
	d__[m - 1] = sigmx;
	e[m - 1] = 0.;
	d__[m] = sigmn;

/*        Compute singular vectors, if desired */

	if (*ncvt > 0) {
	    zdrot_(ncvt, &vt_ref(m - 1, 1), ldvt, &vt_ref(m, 1), ldvt, &cosr, 
		    &sinr);
	}
	if (*nru > 0) {
	    zdrot_(nru, &u_ref(1, m - 1), &c__1, &u_ref(1, m), &c__1, &cosl, &
		    sinl);
	}
	if (*ncc > 0) {
	    zdrot_(ncc, &c___ref(m - 1, 1), ldc, &c___ref(m, 1), ldc, &cosl, &
		    sinl);
	}
	m += -2;
	goto L60;
    }

/*     If working on new submatrix, choose shift direction   
       (from larger end diagonal element towards smaller) */

    if (ll > oldm || m < oldll) {
	if ((d__1 = d__[ll], abs(d__1)) >= (d__2 = d__[m], abs(d__2))) {

/*           Chase bulge from top (big end) to bottom (small end) */

	    idir = 1;
	} else {

/*           Chase bulge from bottom (big end) to top (small end) */

	    idir = 2;
	}
    }

/*     Apply convergence tests */

    if (idir == 1) {

/*        Run convergence test in forward direction   
          First apply standard test to bottom of matrix */

	if ((d__2 = e[m - 1], abs(d__2)) <= abs(tol) * (d__1 = d__[m], abs(
		d__1)) || tol < 0. && (d__3 = e[m - 1], abs(d__3)) <= thresh) 
		{
	    e[m - 1] = 0.;
	    goto L60;
	}

	if (tol >= 0.) {

/*           If relative accuracy desired,   
             apply convergence criterion forward */

	    mu = (d__1 = d__[ll], abs(d__1));
	    sminl = mu;
	    i__1 = m - 1;
	    for (lll = ll; lll <= i__1; ++lll) {
		if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
		    e[lll] = 0.;
		    goto L60;
		}
		sminlo = sminl;
		mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[
			lll], abs(d__1))));
		sminl = min(sminl,mu);
/* L100: */
	    }
	}

    } else {

/*        Run convergence test in backward direction   
          First apply standard test to top of matrix */

	if ((d__2 = e[ll], abs(d__2)) <= abs(tol) * (d__1 = d__[ll], abs(d__1)
		) || tol < 0. && (d__3 = e[ll], abs(d__3)) <= thresh) {
	    e[ll] = 0.;
	    goto L60;
	}

	if (tol >= 0.) {

/*           If relative accuracy desired,   
             apply convergence criterion backward */

	    mu = (d__1 = d__[m], abs(d__1));
	    sminl = mu;
	    i__1 = ll;
	    for (lll = m - 1; lll >= i__1; --lll) {
		if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
		    e[lll] = 0.;
		    goto L60;
		}
		sminlo = sminl;
		mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll]
			, abs(d__1))));
		sminl = min(sminl,mu);
/* L110: */
	    }
	}
    }
    oldll = ll;
    oldm = m;

/*     Compute shift.  First, test if shifting would ruin relative   
       accuracy, and if so set the shift to zero.   

   Computing MAX */
    d__1 = eps, d__2 = tol * .01;
    if (tol >= 0. && *n * tol * (sminl / smax) <= max(d__1,d__2)) {

/*        Use a zero shift to avoid loss of relative accuracy */

	shift = 0.;
    } else {

/*        Compute the shift from 2-by-2 block at end of matrix */

	if (idir == 1) {
	    sll = (d__1 = d__[ll], abs(d__1));
	    dlas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__);
	} else {
	    sll = (d__1 = d__[m], abs(d__1));
	    dlas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__);
	}

/*        Test if shift negligible, and if so set to zero */

	if (sll > 0.) {
/* Computing 2nd power */
	    d__1 = shift / sll;
	    if (d__1 * d__1 < eps) {
		shift = 0.;
	    }
	}
    }

/*     Increment iteration count */

    iter = iter + m - ll;

/*     If SHIFT = 0, do simplified QR iteration */

    if (shift == 0.) {
	if (idir == 1) {

/*           Chase bulge from top to bottom   
             Save cosines and sines for later singular vector updates */

	    cs = 1.;
	    oldcs = 1.;
	    i__1 = m - 1;
	    for (i__ = ll; i__ <= i__1; ++i__) {
		d__1 = d__[i__] * cs;
		dlartg_(&d__1, &e[i__], &cs, &sn, &r__);
		if (i__ > ll) {
		    e[i__ - 1] = oldsn * r__;
		}
		d__1 = oldcs * r__;
		d__2 = d__[i__ + 1] * sn;
		dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
		rwork[i__ - ll + 1] = cs;
		rwork[i__ - ll + 1 + nm1] = sn;
		rwork[i__ - ll + 1 + nm12] = oldcs;
		rwork[i__ - ll + 1 + nm13] = oldsn;
/* L120: */
	    }
	    h__ = d__[m] * cs;
	    d__[m] = h__ * oldcs;
	    e[m - 1] = h__ * oldsn;

/*           Update singular vectors */

	    if (*ncvt > 0) {
		i__1 = m - ll + 1;
		zlasr_("L", "V", "F", &i__1, ncvt, &rwork[1], &rwork[*n], &
			vt_ref(ll, 1), ldvt);
	    }
	    if (*nru > 0) {
		i__1 = m - ll + 1;
		zlasr_("R", "V", "F", nru, &i__1, &rwork[nm12 + 1], &rwork[
			nm13 + 1], &u_ref(1, ll), ldu);
	    }
	    if (*ncc > 0) {
		i__1 = m - ll + 1;
		zlasr_("L", "V", "F", &i__1, ncc, &rwork[nm12 + 1], &rwork[
			nm13 + 1], &c___ref(ll, 1), ldc);
	    }

/*           Test convergence */

	    if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
		e[m - 1] = 0.;
	    }

	} else {

/*           Chase bulge from bottom to top   
             Save cosines and sines for later singular vector updates */

	    cs = 1.;
	    oldcs = 1.;
	    i__1 = ll + 1;
	    for (i__ = m; i__ >= i__1; --i__) {
		d__1 = d__[i__] * cs;
		dlartg_(&d__1, &e[i__ - 1], &cs, &sn, &r__);
		if (i__ < m) {
		    e[i__] = oldsn * r__;
		}
		d__1 = oldcs * r__;
		d__2 = d__[i__ - 1] * sn;
		dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
		rwork[i__ - ll] = cs;
		rwork[i__ - ll + nm1] = -sn;
		rwork[i__ - ll + nm12] = oldcs;
		rwork[i__ - ll + nm13] = -oldsn;
/* L130: */
	    }
	    h__ = d__[ll] * cs;
	    d__[ll] = h__ * oldcs;
	    e[ll] = h__ * oldsn;

/*           Update singular vectors */

	    if (*ncvt > 0) {
		i__1 = m - ll + 1;
		zlasr_("L", "V", "B", &i__1, ncvt, &rwork[nm12 + 1], &rwork[
			nm13 + 1], &vt_ref(ll, 1), ldvt);
	    }
	    if (*nru > 0) {
		i__1 = m - ll + 1;
		zlasr_("R", "V", "B", nru, &i__1, &rwork[1], &rwork[*n], &
			u_ref(1, ll), ldu);
	    }
	    if (*ncc > 0) {
		i__1 = m - ll + 1;
		zlasr_("L", "V", "B", &i__1, ncc, &rwork[1], &rwork[*n], &
			c___ref(ll, 1), ldc);
	    }

/*           Test convergence */

	    if ((d__1 = e[ll], abs(d__1)) <= thresh) {
		e[ll] = 0.;
	    }
	}
    } else {

/*        Use nonzero shift */

	if (idir == 1) {

/*           Chase bulge from top to bottom   
             Save cosines and sines for later singular vector updates */

	    f = ((d__1 = d__[ll], abs(d__1)) - shift) * (d_sign(&c_b49, &d__[
		    ll]) + shift / d__[ll]);
	    g = e[ll];
	    i__1 = m - 1;
	    for (i__ = ll; i__ <= i__1; ++i__) {
		dlartg_(&f, &g, &cosr, &sinr, &r__);
		if (i__ > ll) {
		    e[i__ - 1] = r__;
		}
		f = cosr * d__[i__] + sinr * e[i__];
		e[i__] = cosr * e[i__] - sinr * d__[i__];
		g = sinr * d__[i__ + 1];
		d__[i__ + 1] = cosr * d__[i__ + 1];
		dlartg_(&f, &g, &cosl, &sinl, &r__);
		d__[i__] = r__;
		f = cosl * e[i__] + sinl * d__[i__ + 1];
		d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__];
		if (i__ < m - 1) {
		    g = sinl * e[i__ + 1];
		    e[i__ + 1] = cosl * e[i__ + 1];
		}
		rwork[i__ - ll + 1] = cosr;
		rwork[i__ - ll + 1 + nm1] = sinr;
		rwork[i__ - ll + 1 + nm12] = cosl;
		rwork[i__ - ll + 1 + nm13] = sinl;
/* L140: */
	    }
	    e[m - 1] = f;

/*           Update singular vectors */

	    if (*ncvt > 0) {
		i__1 = m - ll + 1;
		zlasr_("L", "V", "F", &i__1, ncvt, &rwork[1], &rwork[*n], &
			vt_ref(ll, 1), ldvt);
	    }
	    if (*nru > 0) {
		i__1 = m - ll + 1;
		zlasr_("R", "V", "F", nru, &i__1, &rwork[nm12 + 1], &rwork[
			nm13 + 1], &u_ref(1, ll), ldu);
	    }
	    if (*ncc > 0) {
		i__1 = m - ll + 1;
		zlasr_("L", "V", "F", &i__1, ncc, &rwork[nm12 + 1], &rwork[
			nm13 + 1], &c___ref(ll, 1), ldc);
	    }

/*           Test convergence */

	    if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
		e[m - 1] = 0.;
	    }

	} else {

/*           Chase bulge from bottom to top   
             Save cosines and sines for later singular vector updates */

	    f = ((d__1 = d__[m], abs(d__1)) - shift) * (d_sign(&c_b49, &d__[m]
		    ) + shift / d__[m]);
	    g = e[m - 1];
	    i__1 = ll + 1;
	    for (i__ = m; i__ >= i__1; --i__) {
		dlartg_(&f, &g, &cosr, &sinr, &r__);
		if (i__ < m) {
		    e[i__] = r__;
		}
		f = cosr * d__[i__] + sinr * e[i__ - 1];
		e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__];
		g = sinr * d__[i__ - 1];
		d__[i__ - 1] = cosr * d__[i__ - 1];
		dlartg_(&f, &g, &cosl, &sinl, &r__);
		d__[i__] = r__;
		f = cosl * e[i__ - 1] + sinl * d__[i__ - 1];
		d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1];
		if (i__ > ll + 1) {
		    g = sinl * e[i__ - 2];
		    e[i__ - 2] = cosl * e[i__ - 2];
		}
		rwork[i__ - ll] = cosr;
		rwork[i__ - ll + nm1] = -sinr;
		rwork[i__ - ll + nm12] = cosl;
		rwork[i__ - ll + nm13] = -sinl;
/* L150: */
	    }
	    e[ll] = f;

/*           Test convergence */

	    if ((d__1 = e[ll], abs(d__1)) <= thresh) {
		e[ll] = 0.;
	    }

/*           Update singular vectors if desired */

	    if (*ncvt > 0) {
		i__1 = m - ll + 1;
		zlasr_("L", "V", "B", &i__1, ncvt, &rwork[nm12 + 1], &rwork[
			nm13 + 1], &vt_ref(ll, 1), ldvt);
	    }
	    if (*nru > 0) {
		i__1 = m - ll + 1;
		zlasr_("R", "V", "B", nru, &i__1, &rwork[1], &rwork[*n], &
			u_ref(1, ll), ldu);
	    }
	    if (*ncc > 0) {
		i__1 = m - ll + 1;
		zlasr_("L", "V", "B", &i__1, ncc, &rwork[1], &rwork[*n], &
			c___ref(ll, 1), ldc);
	    }
	}
    }

/*     QR iteration finished, go back and check convergence */

    goto L60;

/*     All singular values converged, so make them positive */

L160:
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	if (d__[i__] < 0.) {
	    d__[i__] = -d__[i__];

/*           Change sign of singular vectors, if desired */

	    if (*ncvt > 0) {
		zdscal_(ncvt, &c_b72, &vt_ref(i__, 1), ldvt);
	    }
	}
/* L170: */
    }

/*     Sort the singular values into decreasing order (insertion sort on   
       singular values, but only one transposition per singular vector) */

    i__1 = *n - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {

/*        Scan for smallest D(I) */

	isub = 1;
	smin = d__[1];
	i__2 = *n + 1 - i__;
	for (j = 2; j <= i__2; ++j) {
	    if (d__[j] <= smin) {
		isub = j;
		smin = d__[j];
	    }
/* L180: */
	}
	if (isub != *n + 1 - i__) {

/*           Swap singular values and vectors */

	    d__[isub] = d__[*n + 1 - i__];
	    d__[*n + 1 - i__] = smin;
	    if (*ncvt > 0) {
		zswap_(ncvt, &vt_ref(isub, 1), ldvt, &vt_ref(*n + 1 - i__, 1),
			 ldvt);
	    }
	    if (*nru > 0) {
		zswap_(nru, &u_ref(1, isub), &c__1, &u_ref(1, *n + 1 - i__), &
			c__1);
	    }
	    if (*ncc > 0) {
		zswap_(ncc, &c___ref(isub, 1), ldc, &c___ref(*n + 1 - i__, 1),
			 ldc);
	    }
	}
/* L190: */
    }
    goto L220;

/*     Maximum number of iterations exceeded, failure to converge */

L200:
    *info = 0;
    i__1 = *n - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
	if (e[i__] != 0.) {
	    ++(*info);
	}
/* L210: */
    }
L220:
    return 0;

/*     End of ZBDSQR */

} /* zbdsqr_ */
예제 #5
0
파일: slasd0.c 프로젝트: zangel/uquad
/* Subroutine */ int slasd0_(integer *n, integer *sqre, real *d__, real *e, 
	real *u, integer *ldu, real *vt, integer *ldvt, integer *smlsiz, 
	integer *iwork, real *work, integer *info)
{
    /* System generated locals */
    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;

    /* Builtin functions */
    integer pow_ii(integer *, integer *);

    /* Local variables */
    static real beta;
    static integer idxq, nlvl, i__, j, m;
    static real alpha;
    static integer inode, ndiml, idxqc, ndimr, itemp, sqrei, i1;
    extern /* Subroutine */ int slasd1_(integer *, integer *, integer *, real 
	    *, real *, real *, real *, integer *, real *, integer *, integer *
	    , integer *, real *, integer *);
    static integer ic, lf, nd, ll, nl, nr;
    extern /* Subroutine */ int xerbla_(char *, integer *), slasdq_(
	    char *, integer *, integer *, integer *, integer *, integer *, 
	    real *, real *, real *, integer *, real *, integer *, real *, 
	    integer *, real *, integer *), slasdt_(integer *, integer 
	    *, integer *, integer *, integer *, integer *, integer *);
    static integer im1, ncc, nlf, nrf, iwk, lvl, ndb1, nlp1, nrp1;


#define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1]
#define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1]


/*  -- LAPACK auxiliary routine (instrumented to count ops, version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    Using a divide and conquer approach, SLASD0 computes the singular   
    value decomposition (SVD) of a real upper bidiagonal N-by-M   
    matrix B with diagonal D and offdiagonal E, where M = N + SQRE.   
    The algorithm computes orthogonal matrices U and VT such that   
    B = U * S * VT. The singular values S are overwritten on D.   

    A related subroutine, SLASDA, computes only the singular values,   
    and optionally, the singular vectors in compact form.   

    Arguments   
    =========   

    N      (input) INTEGER   
           On entry, the row dimension of the upper bidiagonal matrix.   
           This is also the dimension of the main diagonal array D.   

    SQRE   (input) INTEGER   
           Specifies the column dimension of the bidiagonal matrix.   
           = 0: The bidiagonal matrix has column dimension M = N;   
           = 1: The bidiagonal matrix has column dimension M = N+1;   

    D      (input/output) REAL array, dimension (N)   
           On entry D contains the main diagonal of the bidiagonal   
           matrix.   
           On exit D, if INFO = 0, contains its singular values.   

    E      (input) REAL array, dimension (M-1)   
           Contains the subdiagonal entries of the bidiagonal matrix.   
           On exit, E has been destroyed.   

    U      (output) REAL array, dimension at least (LDQ, N)   
           On exit, U contains the left singular vectors.   

    LDU    (input) INTEGER   
           On entry, leading dimension of U.   

    VT     (output) REAL array, dimension at least (LDVT, M)   
           On exit, VT' contains the right singular vectors.   

    LDVT   (input) INTEGER   
           On entry, leading dimension of VT.   

    SMLSIZ (input) INTEGER   
           On entry, maximum size of the subproblems at the   
           bottom of the computation tree.   

    IWORK  INTEGER work array.   
           Dimension must be at least (8 * N)   

    WORK   REAL work array.   
           Dimension must be at least (3 * M**2 + 2 * M)   

    INFO   (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   
            > 0:  if INFO = 1, an singular value did not converge   

    Further Details   
    ===============   

    Based on contributions by   
       Ming Gu and Huan Ren, Computer Science Division, University of   
       California at Berkeley, USA   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    --d__;
    --e;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1 * 1;
    u -= u_offset;
    vt_dim1 = *ldvt;
    vt_offset = 1 + vt_dim1 * 1;
    vt -= vt_offset;
    --iwork;
    --work;

    /* Function Body */
    *info = 0;

    if (*n < 0) {
	*info = -1;
    } else if (*sqre < 0 || *sqre > 1) {
	*info = -2;
    }

    m = *n + *sqre;

    if (*ldu < *n) {
	*info = -6;
    } else if (*ldvt < m) {
	*info = -8;
    } else if (*smlsiz < 3) {
	*info = -9;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLASD0", &i__1);
	return 0;
    }

/*     If the input matrix is too small, call SLASDQ to find the SVD. */

    if (*n <= *smlsiz) {
	slasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset], 
		ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info);
	return 0;
    }

/*     Set up the computation tree. */

    inode = 1;
    ndiml = inode + *n;
    ndimr = ndiml + *n;
    idxq = ndimr + *n;
    iwk = idxq + *n;
    slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], 
	    smlsiz);

/*     For the nodes on bottom level of the tree, solve   
       their subproblems by SLASDQ. */

    ndb1 = (nd + 1) / 2;
    ncc = 0;
    i__1 = nd;
    for (i__ = ndb1; i__ <= i__1; ++i__) {

/*     IC : center row of each node   
       NL : number of rows of left  subproblem   
       NR : number of rows of right subproblem   
       NLF: starting row of the left   subproblem   
       NRF: starting row of the right  subproblem */

	i1 = i__ - 1;
	ic = iwork[inode + i1];
	nl = iwork[ndiml + i1];
	nlp1 = nl + 1;
	nr = iwork[ndimr + i1];
	nrp1 = nr + 1;
	nlf = ic - nl;
	nrf = ic + 1;
	sqrei = 1;
	slasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &
		vt_ref(nlf, nlf), ldvt, &u_ref(nlf, nlf), ldu, &u_ref(nlf, 
		nlf), ldu, &work[1], info);
	if (*info != 0) {
	    return 0;
	}
	itemp = idxq + nlf - 2;
	i__2 = nl;
	for (j = 1; j <= i__2; ++j) {
	    iwork[itemp + j] = j;
/* L10: */
	}
	if (i__ == nd) {
	    sqrei = *sqre;
	} else {
	    sqrei = 1;
	}
	nrp1 = nr + sqrei;
	slasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &
		vt_ref(nrf, nrf), ldvt, &u_ref(nrf, nrf), ldu, &u_ref(nrf, 
		nrf), ldu, &work[1], info);
	if (*info != 0) {
	    return 0;
	}
	itemp = idxq + ic;
	i__2 = nr;
	for (j = 1; j <= i__2; ++j) {
	    iwork[itemp + j - 1] = j;
/* L20: */
	}
/* L30: */
    }

/*     Now conquer each subproblem bottom-up. */

    for (lvl = nlvl; lvl >= 1; --lvl) {

/*        Find the first node LF and last node LL on the   
          current level LVL. */

	if (lvl == 1) {
	    lf = 1;
	    ll = 1;
	} else {
	    i__1 = lvl - 1;
	    lf = pow_ii(&c__2, &i__1);
	    ll = (lf << 1) - 1;
	}
	i__1 = ll;
	for (i__ = lf; i__ <= i__1; ++i__) {
	    im1 = i__ - 1;
	    ic = iwork[inode + im1];
	    nl = iwork[ndiml + im1];
	    nr = iwork[ndimr + im1];
	    nlf = ic - nl;
	    if (*sqre == 0 && i__ == ll) {
		sqrei = *sqre;
	    } else {
		sqrei = 1;
	    }
	    idxqc = idxq + nlf - 1;
	    alpha = d__[ic];
	    beta = e[ic];
	    slasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u_ref(nlf, 
		    nlf), ldu, &vt_ref(nlf, nlf), ldvt, &iwork[idxqc], &iwork[
		    iwk], &work[1], info);
	    if (*info != 0) {
		return 0;
	    }
/* L40: */
	}
/* L50: */
    }

    return 0;

/*     End of SLASD0 */

} /* slasd0_ */
예제 #6
0
파일: sbdt03.c 프로젝트: zangel/uquad
/* Subroutine */ int sbdt03_(char *uplo, integer *n, integer *kd, real *d__, 
	real *e, real *u, integer *ldu, real *s, real *vt, integer *ldvt, 
	real *work, real *resid)
{
    /* System generated locals */
    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
    real r__1, r__2, r__3, r__4;

    /* Local variables */
    static integer i__, j;
    extern logical lsame_(char *, char *);
    static real bnorm;
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
	    real *, integer *, real *, integer *, real *, real *, integer *);
    extern doublereal sasum_(integer *, real *, integer *), slamch_(char *);
    extern integer isamax_(integer *, real *, integer *);
    static real eps;


#define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    SBDT03 reconstructs a bidiagonal matrix B from its SVD:   
       S = U' * B * V   
    where U and V are orthogonal matrices and S is diagonal.   

    The test ratio to test the singular value decomposition is   
       RESID = norm( B - U * S * VT ) / ( n * norm(B) * EPS )   
    where VT = V' and EPS is the machine precision.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the matrix B is upper or lower bidiagonal.   
            = 'U':  Upper bidiagonal   
            = 'L':  Lower bidiagonal   

    N       (input) INTEGER   
            The order of the matrix B.   

    KD      (input) INTEGER   
            The bandwidth of the bidiagonal matrix B.  If KD = 1, the   
            matrix B is bidiagonal, and if KD = 0, B is diagonal and E is   
            not referenced.  If KD is greater than 1, it is assumed to be   
            1, and if KD is less than 0, it is assumed to be 0.   

    D       (input) REAL array, dimension (N)   
            The n diagonal elements of the bidiagonal matrix B.   

    E       (input) REAL array, dimension (N-1)   
            The (n-1) superdiagonal elements of the bidiagonal matrix B   
            if UPLO = 'U', or the (n-1) subdiagonal elements of B if   
            UPLO = 'L'.   

    U       (input) REAL array, dimension (LDU,N)   
            The n by n orthogonal matrix U in the reduction B = U'*A*P.   

    LDU     (input) INTEGER   
            The leading dimension of the array U.  LDU >= max(1,N)   

    S       (input) REAL array, dimension (N)   
            The singular values from the SVD of B, sorted in decreasing   
            order.   

    VT      (input) REAL array, dimension (LDVT,N)   
            The n by n orthogonal matrix V' in the reduction   
            B = U * S * V'.   

    LDVT    (input) INTEGER   
            The leading dimension of the array VT.   

    WORK    (workspace) REAL array, dimension (2*N)   

    RESID   (output) REAL   
            The test ratio:  norm(B - U * S * V') / ( n * norm(A) * EPS )   

   ======================================================================   


       Quick return if possible   

       Parameter adjustments */
    --d__;
    --e;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1 * 1;
    u -= u_offset;
    --s;
    vt_dim1 = *ldvt;
    vt_offset = 1 + vt_dim1 * 1;
    vt -= vt_offset;
    --work;

    /* Function Body */
    *resid = 0.f;
    if (*n <= 0) {
	return 0;
    }

/*     Compute B - U * S * V' one column at a time. */

    bnorm = 0.f;
    if (*kd >= 1) {

/*        B is bidiagonal. */

	if (lsame_(uplo, "U")) {

/*           B is upper bidiagonal. */

	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		i__2 = *n;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    work[*n + i__] = s[i__] * vt_ref(i__, j);
/* L10: */
		}
		sgemv_("No transpose", n, n, &c_b6, &u[u_offset], ldu, &work[*
			n + 1], &c__1, &c_b8, &work[1], &c__1);
		work[j] += d__[j];
		if (j > 1) {
		    work[j - 1] += e[j - 1];
/* Computing MAX */
		    r__3 = bnorm, r__4 = (r__1 = d__[j], dabs(r__1)) + (r__2 =
			     e[j - 1], dabs(r__2));
		    bnorm = dmax(r__3,r__4);
		} else {
/* Computing MAX */
		    r__2 = bnorm, r__3 = (r__1 = d__[j], dabs(r__1));
		    bnorm = dmax(r__2,r__3);
		}
/* Computing MAX */
		r__1 = *resid, r__2 = sasum_(n, &work[1], &c__1);
		*resid = dmax(r__1,r__2);
/* L20: */
	    }
	} else {

/*           B is lower bidiagonal. */

	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		i__2 = *n;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    work[*n + i__] = s[i__] * vt_ref(i__, j);
/* L30: */
		}
		sgemv_("No transpose", n, n, &c_b6, &u[u_offset], ldu, &work[*
			n + 1], &c__1, &c_b8, &work[1], &c__1);
		work[j] += d__[j];
		if (j < *n) {
		    work[j + 1] += e[j];
/* Computing MAX */
		    r__3 = bnorm, r__4 = (r__1 = d__[j], dabs(r__1)) + (r__2 =
			     e[j], dabs(r__2));
		    bnorm = dmax(r__3,r__4);
		} else {
/* Computing MAX */
		    r__2 = bnorm, r__3 = (r__1 = d__[j], dabs(r__1));
		    bnorm = dmax(r__2,r__3);
		}
/* Computing MAX */
		r__1 = *resid, r__2 = sasum_(n, &work[1], &c__1);
		*resid = dmax(r__1,r__2);
/* L40: */
	    }
	}
    } else {

/*        B is diagonal. */

	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    i__2 = *n;
	    for (i__ = 1; i__ <= i__2; ++i__) {
		work[*n + i__] = s[i__] * vt_ref(i__, j);
/* L50: */
	    }
	    sgemv_("No transpose", n, n, &c_b6, &u[u_offset], ldu, &work[*n + 
		    1], &c__1, &c_b8, &work[1], &c__1);
	    work[j] += d__[j];
/* Computing MAX */
	    r__1 = *resid, r__2 = sasum_(n, &work[1], &c__1);
	    *resid = dmax(r__1,r__2);
/* L60: */
	}
	j = isamax_(n, &d__[1], &c__1);
	bnorm = (r__1 = d__[j], dabs(r__1));
    }

/*     Compute norm(B - U * S * V') / ( n * norm(B) * EPS ) */

    eps = slamch_("Precision");

    if (bnorm <= 0.f) {
	if (*resid != 0.f) {
	    *resid = 1.f / eps;
	}
    } else {
	if (bnorm >= *resid) {
	    *resid = *resid / bnorm / ((real) (*n) * eps);
	} else {
	    if (bnorm < 1.f) {
/* Computing MIN */
		r__1 = *resid, r__2 = (real) (*n) * bnorm;
		*resid = dmin(r__1,r__2) / bnorm / ((real) (*n) * eps);
	    } else {
/* Computing MIN */
		r__1 = *resid / bnorm, r__2 = (real) (*n);
		*resid = dmin(r__1,r__2) / ((real) (*n) * eps);
	    }
	}
    }

    return 0;

/*     End of SBDT03 */

} /* sbdt03_ */
예제 #7
0
파일: dbdsdc.c 프로젝트: MichaelH13/sdkpub
/* Subroutine */ int dbdsdc_(char *uplo, char *compq, integer *n, doublereal *
	d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt, 
	integer *ldvt, doublereal *q, integer *iq, doublereal *work, integer *
	iwork, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       December 1, 1999   


    Purpose   
    =======   

    DBDSDC computes the singular value decomposition (SVD) of a real   
    N-by-N (upper or lower) bidiagonal matrix B:  B = U * S * VT,   
    using a divide and conquer method, where S is a diagonal matrix   
    with non-negative diagonal elements (the singular values of B), and   
    U and VT are orthogonal matrices of left and right singular vectors,   
    respectively. DBDSDC can be used to compute all singular values,   
    and optionally, singular vectors or singular vectors in compact form.   

    This code makes very mild assumptions about floating point   
    arithmetic. It will work on machines with a guard digit in   
    add/subtract, or on those binary machines without guard digits   
    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.   
    It could conceivably fail on hexadecimal or decimal machines   
    without guard digits, but we know of none.  See DLASD3 for details.   

    The code currently call DLASDQ if singular values only are desired.   
    However, it can be slightly modified to compute singular values   
    using the divide and conquer method.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            = 'U':  B is upper bidiagonal.   
            = 'L':  B is lower bidiagonal.   

    COMPQ   (input) CHARACTER*1   
            Specifies whether singular vectors are to be computed   
            as follows:   
            = 'N':  Compute singular values only;   
            = 'P':  Compute singular values and compute singular   
                    vectors in compact form;   
            = 'I':  Compute singular values and singular vectors.   

    N       (input) INTEGER   
            The order of the matrix B.  N >= 0.   

    D       (input/output) DOUBLE PRECISION array, dimension (N)   
            On entry, the n diagonal elements of the bidiagonal matrix B.   
            On exit, if INFO=0, the singular values of B.   

    E       (input/output) DOUBLE PRECISION array, dimension (N)   
            On entry, the elements of E contain the offdiagonal   
            elements of the bidiagonal matrix whose SVD is desired.   
            On exit, E has been destroyed.   

    U       (output) DOUBLE PRECISION array, dimension (LDU,N)   
            If  COMPQ = 'I', then:   
               On exit, if INFO = 0, U contains the left singular vectors   
               of the bidiagonal matrix.   
            For other values of COMPQ, U is not referenced.   

    LDU     (input) INTEGER   
            The leading dimension of the array U.  LDU >= 1.   
            If singular vectors are desired, then LDU >= max( 1, N ).   

    VT      (output) DOUBLE PRECISION array, dimension (LDVT,N)   
            If  COMPQ = 'I', then:   
               On exit, if INFO = 0, VT' contains the right singular   
               vectors of the bidiagonal matrix.   
            For other values of COMPQ, VT is not referenced.   

    LDVT    (input) INTEGER   
            The leading dimension of the array VT.  LDVT >= 1.   
            If singular vectors are desired, then LDVT >= max( 1, N ).   

    Q       (output) DOUBLE PRECISION array, dimension (LDQ)   
            If  COMPQ = 'P', then:   
               On exit, if INFO = 0, Q and IQ contain the left   
               and right singular vectors in a compact form,   
               requiring O(N log N) space instead of 2*N**2.   
               In particular, Q contains all the DOUBLE PRECISION data in   
               LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1))))   
               words of memory, where SMLSIZ is returned by ILAENV and   
               is equal to the maximum size of the subproblems at the   
               bottom of the computation tree (usually about 25).   
            For other values of COMPQ, Q is not referenced.   

    IQ      (output) INTEGER array, dimension (LDIQ)   
            If  COMPQ = 'P', then:   
               On exit, if INFO = 0, Q and IQ contain the left   
               and right singular vectors in a compact form,   
               requiring O(N log N) space instead of 2*N**2.   
               In particular, IQ contains all INTEGER data in   
               LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1))))   
               words of memory, where SMLSIZ is returned by ILAENV and   
               is equal to the maximum size of the subproblems at the   
               bottom of the computation tree (usually about 25).   
            For other values of COMPQ, IQ is not referenced.   

    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)   
            If COMPQ = 'N' then LWORK >= (4 * N).   
            If COMPQ = 'P' then LWORK >= (6 * N).   
            If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N).   

    IWORK   (workspace) INTEGER array, dimension (8*N)   

    INFO    (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   
            > 0:  The algorithm failed to compute an singular value.   
                  The update process of divide and conquer failed.   

    Further Details   
    ===============   

    Based on contributions by   
       Ming Gu and Huan Ren, Computer Science Division, University of   
       California at Berkeley, USA   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__9 = 9;
    static integer c__0 = 0;
    static doublereal c_b15 = 1.;
    static integer c__1 = 1;
    static doublereal c_b29 = 0.;
    
    /* System generated locals */
    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
    doublereal d__1;
    /* Builtin functions */
    double d_sign(doublereal *, doublereal *), log(doublereal);
    /* Local variables */
    static integer difl, difr, ierr, perm, mlvl, sqre, i__, j, k;
    static doublereal p, r__;
    static integer z__;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *, 
	    integer *, doublereal *, doublereal *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer *
	    , doublereal *, integer *), dswap_(integer *, doublereal *, 
	    integer *, doublereal *, integer *);
    static integer poles, iuplo, nsize, start;
    extern /* Subroutine */ int dlasd0_(integer *, integer *, doublereal *, 
	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
	    integer *, integer *, doublereal *, integer *);
    static integer ic, ii, kk;
    static doublereal cs;
    extern doublereal dlamch_(char *);
    extern /* Subroutine */ int dlasda_(integer *, integer *, integer *, 
	    integer *, doublereal *, doublereal *, doublereal *, integer *, 
	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
	     doublereal *, integer *, integer *, integer *, integer *, 
	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
	     integer *);
    static integer is, iu;
    static doublereal sn;
    extern /* Subroutine */ int dlascl_(char *, integer *, integer *, 
	    doublereal *, doublereal *, integer *, integer *, doublereal *, 
	    integer *, integer *), dlasdq_(char *, integer *, integer 
	    *, integer *, integer *, integer *, doublereal *, doublereal *, 
	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
	    integer *, doublereal *, integer *), dlaset_(char *, 
	    integer *, integer *, doublereal *, doublereal *, doublereal *, 
	    integer *), dlartg_(doublereal *, doublereal *, 
	    doublereal *, doublereal *, doublereal *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static integer givcol;
    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
    static integer icompq;
    static doublereal orgnrm;
    static integer givnum, givptr, nm1, qstart, smlsiz, wstart, smlszp;
    static doublereal eps;
    static integer ivt;
#define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1]
#define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1]


    --d__;
    --e;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1 * 1;
    u -= u_offset;
    vt_dim1 = *ldvt;
    vt_offset = 1 + vt_dim1 * 1;
    vt -= vt_offset;
    --q;
    --iq;
    --work;
    --iwork;

    /* Function Body */
    *info = 0;

    iuplo = 0;
    if (lsame_(uplo, "U")) {
	iuplo = 1;
    }
    if (lsame_(uplo, "L")) {
	iuplo = 2;
    }
    if (lsame_(compq, "N")) {
	icompq = 0;
    } else if (lsame_(compq, "P")) {
	icompq = 1;
    } else if (lsame_(compq, "I")) {
	icompq = 2;
    } else {
	icompq = -1;
    }
    if (iuplo == 0) {
	*info = -1;
    } else if (icompq < 0) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*ldu < 1 || icompq == 2 && *ldu < *n) {
	*info = -7;
    } else if (*ldvt < 1 || icompq == 2 && *ldvt < *n) {
	*info = -9;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DBDSDC", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }
    smlsiz = ilaenv_(&c__9, "DBDSDC", " ", &c__0, &c__0, &c__0, &c__0, (
	    ftnlen)6, (ftnlen)1);
    if (*n == 1) {
	if (icompq == 1) {
	    q[1] = d_sign(&c_b15, &d__[1]);
	    q[smlsiz * *n + 1] = 1.;
	} else if (icompq == 2) {
	    u_ref(1, 1) = d_sign(&c_b15, &d__[1]);
	    vt_ref(1, 1) = 1.;
	}
	d__[1] = abs(d__[1]);
	return 0;
    }
    nm1 = *n - 1;

/*     If matrix lower bidiagonal, rotate to be upper bidiagonal   
       by applying Givens rotations on the left */

    wstart = 1;
    qstart = 3;
    if (icompq == 1) {
	dcopy_(n, &d__[1], &c__1, &q[1], &c__1);
	i__1 = *n - 1;
	dcopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1);
    }
    if (iuplo == 2) {
	qstart = 5;
	wstart = (*n << 1) - 1;
	i__1 = *n - 1;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
	    d__[i__] = r__;
	    e[i__] = sn * d__[i__ + 1];
	    d__[i__ + 1] = cs * d__[i__ + 1];
	    if (icompq == 1) {
		q[i__ + (*n << 1)] = cs;
		q[i__ + *n * 3] = sn;
	    } else if (icompq == 2) {
		work[i__] = cs;
		work[nm1 + i__] = -sn;
	    }
/* L10: */
	}
    }

/*     If ICOMPQ = 0, use DLASDQ to compute the singular values. */

    if (icompq == 0) {
	dlasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[
		vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
		wstart], info);
	goto L40;
    }

/*     If N is smaller than the minimum divide size SMLSIZ, then solve   
       the problem with another solver. */

    if (*n <= smlsiz) {
	if (icompq == 2) {
	    dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu);
	    dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt);
	    dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset]
		    , ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
		    wstart], info);
	} else if (icompq == 1) {
	    iu = 1;
	    ivt = iu + *n;
	    dlaset_("A", n, n, &c_b29, &c_b15, &q[iu + (qstart - 1) * *n], n);
	    dlaset_("A", n, n, &c_b29, &c_b15, &q[ivt + (qstart - 1) * *n], n);
	    dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + (
		    qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[
		    iu + (qstart - 1) * *n], n, &work[wstart], info);
	}
	goto L40;
    }

    if (icompq == 2) {
	dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu);
	dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt);
    }

/*     Scale. */

    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
    if (orgnrm == 0.) {
	return 0;
    }
    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, &ierr);
    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1, &
	    ierr);

    eps = dlamch_("Epsilon");

    mlvl = (integer) (log((doublereal) (*n) / (doublereal) (smlsiz + 1)) / 
	    log(2.)) + 1;
    smlszp = smlsiz + 1;

    if (icompq == 1) {
	iu = 1;
	ivt = smlsiz + 1;
	difl = ivt + smlszp;
	difr = difl + mlvl;
	z__ = difr + (mlvl << 1);
	ic = z__ + mlvl;
	is = ic + 1;
	poles = is + 1;
	givnum = poles + (mlvl << 1);

	k = 1;
	givptr = 2;
	perm = 3;
	givcol = perm + mlvl;
    }

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	if ((d__1 = d__[i__], abs(d__1)) < eps) {
	    d__[i__] = d_sign(&eps, &d__[i__]);
	}
/* L20: */
    }

    start = 1;
    sqre = 0;

    i__1 = nm1;
    for (i__ = 1; i__ <= i__1; ++i__) {
	if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) {

/*        Subproblem found. First determine its size and then   
          apply divide and conquer on it. */

	    if (i__ < nm1) {

/*        A subproblem with E(I) small for I < NM1. */

		nsize = i__ - start + 1;
	    } else if ((d__1 = e[i__], abs(d__1)) >= eps) {

/*        A subproblem with E(NM1) not too small but I = NM1. */

		nsize = *n - start + 1;
	    } else {

/*        A subproblem with E(NM1) small. This implies an   
          1-by-1 subproblem at D(N). Solve this 1-by-1 problem   
          first. */

		nsize = i__ - start + 1;
		if (icompq == 2) {
		    u_ref(*n, *n) = d_sign(&c_b15, &d__[*n]);
		    vt_ref(*n, *n) = 1.;
		} else if (icompq == 1) {
		    q[*n + (qstart - 1) * *n] = d_sign(&c_b15, &d__[*n]);
		    q[*n + (smlsiz + qstart - 1) * *n] = 1.;
		}
		d__[*n] = (d__1 = d__[*n], abs(d__1));
	    }
	    if (icompq == 2) {
		dlasd0_(&nsize, &sqre, &d__[start], &e[start], &u_ref(start, 
			start), ldu, &vt_ref(start, start), ldvt, &smlsiz, &
			iwork[1], &work[wstart], info);
	    } else {
		dlasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[
			start], &q[start + (iu + qstart - 2) * *n], n, &q[
			start + (ivt + qstart - 2) * *n], &iq[start + k * *n],
			 &q[start + (difl + qstart - 2) * *n], &q[start + (
			difr + qstart - 2) * *n], &q[start + (z__ + qstart - 
			2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[
			start + givptr * *n], &iq[start + givcol * *n], n, &
			iq[start + perm * *n], &q[start + (givnum + qstart - 
			2) * *n], &q[start + (ic + qstart - 2) * *n], &q[
			start + (is + qstart - 2) * *n], &work[wstart], &
			iwork[1], info);
		if (*info != 0) {
		    return 0;
		}
	    }
	    start = i__ + 1;
	}
/* L30: */
    }

/*     Unscale */

    dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, &ierr);
L40:

/*     Use Selection Sort to minimize swaps of singular vectors */

    i__1 = *n;
    for (ii = 2; ii <= i__1; ++ii) {
	i__ = ii - 1;
	kk = i__;
	p = d__[i__];
	i__2 = *n;
	for (j = ii; j <= i__2; ++j) {
	    if (d__[j] > p) {
		kk = j;
		p = d__[j];
	    }
/* L50: */
	}
	if (kk != i__) {
	    d__[kk] = d__[i__];
	    d__[i__] = p;
	    if (icompq == 1) {
		iq[i__] = kk;
	    } else if (icompq == 2) {
		dswap_(n, &u_ref(1, i__), &c__1, &u_ref(1, kk), &c__1);
		dswap_(n, &vt_ref(i__, 1), ldvt, &vt_ref(kk, 1), ldvt);
	    }
	} else if (icompq == 1) {
	    iq[i__] = i__;
	}
/* L60: */
    }

/*     If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */

    if (icompq == 1) {
	if (iuplo == 1) {
	    iq[*n] = 1;
	} else {
	    iq[*n] = 0;
	}
    }

/*     If B is lower bidiagonal, update U by those Givens rotations   
       which rotated B to be upper bidiagonal */

    if (iuplo == 2 && icompq == 2) {
	dlasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu);
    }

    return 0;

/*     End of DBDSDC */

} /* dbdsdc_ */
예제 #8
0
파일: dlalsa.c 프로젝트: zangel/uquad
/* Subroutine */ int dlalsa_(integer *icompq, integer *smlsiz, integer *n, 
	integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer *
	ldbx, doublereal *u, integer *ldu, doublereal *vt, integer *k, 
	doublereal *difl, doublereal *difr, doublereal *z__, doublereal *
	poles, integer *givptr, integer *givcol, integer *ldgcol, integer *
	perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal *
	work, integer *iwork, integer *info)
{
    /* System generated locals */
    integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, b_dim1, 
	    b_offset, bx_dim1, bx_offset, difl_dim1, difl_offset, difr_dim1, 
	    difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset,
	     u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1, 
	    i__2;

    /* Builtin functions */
    integer pow_ii(integer *, integer *);

    /* Local variables */
    static integer nlvl, sqre, i__, j;
    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
	    integer *, doublereal *, doublereal *, integer *);
    static integer inode, ndiml, ndimr;
    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
	    doublereal *, integer *);
    static integer i1;
    extern /* Subroutine */ int dlals0_(integer *, integer *, integer *, 
	    integer *, integer *, doublereal *, integer *, doublereal *, 
	    integer *, integer *, integer *, integer *, integer *, doublereal 
	    *, integer *, doublereal *, doublereal *, doublereal *, 
	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
	     integer *);
    extern doublereal dopbl3_(char *, integer *, integer *, integer *)
	    ;
    static integer ic, lf, nd, ll, nl, nr;
    extern /* Subroutine */ int dlasdt_(integer *, integer *, integer *, 
	    integer *, integer *, integer *, integer *), xerbla_(char *, 
	    integer *);
    static integer im1, nlf, nrf, lvl, ndb1, nlp1, lvl2, nrp1;


#define difl_ref(a_1,a_2) difl[(a_2)*difl_dim1 + a_1]
#define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1]
#define perm_ref(a_1,a_2) perm[(a_2)*perm_dim1 + a_1]
#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
#define u_ref(a_1,a_2) u[(a_2)*u_dim1 + a_1]
#define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]
#define poles_ref(a_1,a_2) poles[(a_2)*poles_dim1 + a_1]
#define bx_ref(a_1,a_2) bx[(a_2)*bx_dim1 + a_1]
#define vt_ref(a_1,a_2) vt[(a_2)*vt_dim1 + a_1]
#define givcol_ref(a_1,a_2) givcol[(a_2)*givcol_dim1 + a_1]
#define givnum_ref(a_1,a_2) givnum[(a_2)*givnum_dim1 + a_1]


/*  -- LAPACK routine (instrumented to count ops, version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    DLALSA is an itermediate step in solving the least squares problem   
    by computing the SVD of the coefficient matrix in compact form (The   
    singular vectors are computed as products of simple orthorgonal   
    matrices.).   

    If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector   
    matrix of an upper bidiagonal matrix to the right hand side; and if   
    ICOMPQ = 1, DLALSA applies the right singular vector matrix to the   
    right hand side. The singular vector matrices were generated in   
    compact form by DLALSA.   

    Arguments   
    =========   


    ICOMPQ (input) INTEGER   
           Specifies whether the left or the right singular vector   
           matrix is involved.   
           = 0: Left singular vector matrix   
           = 1: Right singular vector matrix   

    SMLSIZ (input) INTEGER   
           The maximum size of the subproblems at the bottom of the   
           computation tree.   

    N      (input) INTEGER   
           The row and column dimensions of the upper bidiagonal matrix.   

    NRHS   (input) INTEGER   
           The number of columns of B and BX. NRHS must be at least 1.   

    B      (input) DOUBLE PRECISION array, dimension ( LDB, NRHS )   
           On input, B contains the right hand sides of the least   
           squares problem in rows 1 through M. On output, B contains   
           the solution X in rows 1 through N.   

    LDB    (input) INTEGER   
           The leading dimension of B in the calling subprogram.   
           LDB must be at least max(1,MAX( M, N ) ).   

    BX     (output) DOUBLE PRECISION array, dimension ( LDBX, NRHS )   
           On exit, the result of applying the left or right singular   
           vector matrix to B.   

    LDBX   (input) INTEGER   
           The leading dimension of BX.   

    U      (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ).   
           On entry, U contains the left singular vector matrices of all   
           subproblems at the bottom level.   

    LDU    (input) INTEGER, LDU = > N.   
           The leading dimension of arrays U, VT, DIFL, DIFR,   
           POLES, GIVNUM, and Z.   

    VT     (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ).   
           On entry, VT' contains the right singular vector matrices of   
           all subproblems at the bottom level.   

    K      (input) INTEGER array, dimension ( N ).   

    DIFL   (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).   
           where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.   

    DIFR   (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).   
           On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record   
           distances between singular values on the I-th level and   
           singular values on the (I -1)-th level, and DIFR(*, 2 * I)   
           record the normalizing factors of the right singular vectors   
           matrices of subproblems on I-th level.   

    Z      (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).   
           On entry, Z(1, I) contains the components of the deflation-   
           adjusted updating row vector for subproblems on the I-th   
           level.   

    POLES  (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).   
           On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old   
           singular values involved in the secular equations on the I-th   
           level.   

    GIVPTR (input) INTEGER array, dimension ( N ).   
           On entry, GIVPTR( I ) records the number of Givens   
           rotations performed on the I-th problem on the computation   
           tree.   

    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).   
           On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the   
           locations of Givens rotations performed on the I-th level on   
           the computation tree.   

    LDGCOL (input) INTEGER, LDGCOL = > N.   
           The leading dimension of arrays GIVCOL and PERM.   

    PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).   
           On entry, PERM(*, I) records permutations done on the I-th   
           level of the computation tree.   

    GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).   
           On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-   
           values of Givens rotations performed on the I-th level on the   
           computation tree.   

    C      (input) DOUBLE PRECISION array, dimension ( N ).   
           On entry, if the I-th subproblem is not square,   
           C( I ) contains the C-value of a Givens rotation related to   
           the right null space of the I-th subproblem.   

    S      (input) DOUBLE PRECISION array, dimension ( N ).   
           On entry, if the I-th subproblem is not square,   
           S( I ) contains the S-value of a Givens rotation related to   
           the right null space of the I-th subproblem.   

    WORK   (workspace) DOUBLE PRECISION array.   
           The dimension must be at least N.   

    IWORK  (workspace) INTEGER array.   
           The dimension must be at least 3 * N   

    INFO   (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    bx_dim1 = *ldbx;
    bx_offset = 1 + bx_dim1 * 1;
    bx -= bx_offset;
    givnum_dim1 = *ldu;
    givnum_offset = 1 + givnum_dim1 * 1;
    givnum -= givnum_offset;
    poles_dim1 = *ldu;
    poles_offset = 1 + poles_dim1 * 1;
    poles -= poles_offset;
    z_dim1 = *ldu;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    difr_dim1 = *ldu;
    difr_offset = 1 + difr_dim1 * 1;
    difr -= difr_offset;
    difl_dim1 = *ldu;
    difl_offset = 1 + difl_dim1 * 1;
    difl -= difl_offset;
    vt_dim1 = *ldu;
    vt_offset = 1 + vt_dim1 * 1;
    vt -= vt_offset;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1 * 1;
    u -= u_offset;
    --k;
    --givptr;
    perm_dim1 = *ldgcol;
    perm_offset = 1 + perm_dim1 * 1;
    perm -= perm_offset;
    givcol_dim1 = *ldgcol;
    givcol_offset = 1 + givcol_dim1 * 1;
    givcol -= givcol_offset;
    --c__;
    --s;
    --work;
    --iwork;

    /* Function Body */
    *info = 0;

    if (*icompq < 0 || *icompq > 1) {
	*info = -1;
    } else if (*smlsiz < 3) {
	*info = -2;
    } else if (*n < *smlsiz) {
	*info = -3;
    } else if (*nrhs < 1) {
	*info = -4;
    } else if (*ldb < *n) {
	*info = -6;
    } else if (*ldbx < *n) {
	*info = -8;
    } else if (*ldu < *n) {
	*info = -10;
    } else if (*ldgcol < *n) {
	*info = -19;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DLALSA", &i__1);
	return 0;
    }

/*     Book-keeping and  setting up the computation tree. */

    inode = 1;
    ndiml = inode + *n;
    ndimr = ndiml + *n;

    dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr], 
	    smlsiz);

/*     The following code applies back the left singular vector factors.   
       For applying back the right singular vector factors, go to 50. */

    if (*icompq == 1) {
	goto L50;
    }

/*     The nodes on the bottom level of the tree were solved by DLASDQ.   
       The corresponding left and right singular vector matrices are in   
       explicit form. First apply back the left singular vector matrices. */

    ndb1 = (nd + 1) / 2;
    i__1 = nd;
    for (i__ = ndb1; i__ <= i__1; ++i__) {

/*        IC : center row of each node   
          NL : number of rows of left  subproblem   
          NR : number of rows of right subproblem   
          NLF: starting row of the left   subproblem   
          NRF: starting row of the right  subproblem */

	i1 = i__ - 1;
	ic = iwork[inode + i1];
	nl = iwork[ndiml + i1];
	nr = iwork[ndimr + i1];
	nlf = ic - nl;
	nrf = ic + 1;
	latime_1.ops += dopbl3_("DGEMM ", &nl, nrhs, &nl);
	latime_1.ops += dopbl3_("DGEMM ", &nr, nrhs, &nr);
	dgemm_("T", "N", &nl, nrhs, &nl, &c_b9, &u_ref(nlf, 1), ldu, &b_ref(
		nlf, 1), ldb, &c_b10, &bx_ref(nlf, 1), ldbx);
	dgemm_("T", "N", &nr, nrhs, &nr, &c_b9, &u_ref(nrf, 1), ldu, &b_ref(
		nrf, 1), ldb, &c_b10, &bx_ref(nrf, 1), ldbx);
/* L10: */
    }

/*     Next copy the rows of B that correspond to unchanged rows   
       in the bidiagonal matrix to BX. */

    i__1 = nd;
    for (i__ = 1; i__ <= i__1; ++i__) {
	ic = iwork[inode + i__ - 1];
	dcopy_(nrhs, &b_ref(ic, 1), ldb, &bx_ref(ic, 1), ldbx);
/* L20: */
    }

/*     Finally go through the left singular vector matrices of all   
       the other subproblems bottom-up on the tree. */

    j = pow_ii(&c__2, &nlvl);
    sqre = 0;

    for (lvl = nlvl; lvl >= 1; --lvl) {
	lvl2 = (lvl << 1) - 1;

/*        find the first node LF and last node LL on   
          the current level LVL */

	if (lvl == 1) {
	    lf = 1;
	    ll = 1;
	} else {
	    i__1 = lvl - 1;
	    lf = pow_ii(&c__2, &i__1);
	    ll = (lf << 1) - 1;
	}
	i__1 = ll;
	for (i__ = lf; i__ <= i__1; ++i__) {
	    im1 = i__ - 1;
	    ic = iwork[inode + im1];
	    nl = iwork[ndiml + im1];
	    nr = iwork[ndimr + im1];
	    nlf = ic - nl;
	    nrf = ic + 1;
	    --j;
	    dlals0_(icompq, &nl, &nr, &sqre, nrhs, &bx_ref(nlf, 1), ldbx, &
		    b_ref(nlf, 1), ldb, &perm_ref(nlf, lvl), &givptr[j], &
		    givcol_ref(nlf, lvl2), ldgcol, &givnum_ref(nlf, lvl2), 
		    ldu, &poles_ref(nlf, lvl2), &difl_ref(nlf, lvl), &
		    difr_ref(nlf, lvl2), &z___ref(nlf, lvl), &k[j], &c__[j], &
		    s[j], &work[1], info);
/* L30: */
	}
/* L40: */
    }
    goto L90;

/*     ICOMPQ = 1: applying back the right singular vector factors. */

L50:

/*     First now go through the right singular vector matrices of all   
       the tree nodes top-down. */

    j = 0;
    i__1 = nlvl;
    for (lvl = 1; lvl <= i__1; ++lvl) {
	lvl2 = (lvl << 1) - 1;

/*        Find the first node LF and last node LL on   
          the current level LVL. */

	if (lvl == 1) {
	    lf = 1;
	    ll = 1;
	} else {
	    i__2 = lvl - 1;
	    lf = pow_ii(&c__2, &i__2);
	    ll = (lf << 1) - 1;
	}
	i__2 = lf;
	for (i__ = ll; i__ >= i__2; --i__) {
	    im1 = i__ - 1;
	    ic = iwork[inode + im1];
	    nl = iwork[ndiml + im1];
	    nr = iwork[ndimr + im1];
	    nlf = ic - nl;
	    nrf = ic + 1;
	    if (i__ == ll) {
		sqre = 0;
	    } else {
		sqre = 1;
	    }
	    ++j;
	    dlals0_(icompq, &nl, &nr, &sqre, nrhs, &b_ref(nlf, 1), ldb, &
		    bx_ref(nlf, 1), ldbx, &perm_ref(nlf, lvl), &givptr[j], &
		    givcol_ref(nlf, lvl2), ldgcol, &givnum_ref(nlf, lvl2), 
		    ldu, &poles_ref(nlf, lvl2), &difl_ref(nlf, lvl), &
		    difr_ref(nlf, lvl2), &z___ref(nlf, lvl), &k[j], &c__[j], &
		    s[j], &work[1], info);
/* L60: */
	}
/* L70: */
    }

/*     The nodes on the bottom level of the tree were solved by DLASDQ.   
       The corresponding right singular vector matrices are in explicit   
       form. Apply them back. */

    ndb1 = (nd + 1) / 2;
    i__1 = nd;
    for (i__ = ndb1; i__ <= i__1; ++i__) {
	i1 = i__ - 1;
	ic = iwork[inode + i1];
	nl = iwork[ndiml + i1];
	nr = iwork[ndimr + i1];
	nlp1 = nl + 1;
	if (i__ == nd) {
	    nrp1 = nr;
	} else {
	    nrp1 = nr + 1;
	}
	nlf = ic - nl;
	nrf = ic + 1;
	latime_1.ops += dopbl3_("DGEMM ", &nlp1, nrhs, &nlp1);
	latime_1.ops += dopbl3_("DGEMM ", &nrp1, nrhs, &nrp1);
	dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b9, &vt_ref(nlf, 1), ldu, &
		b_ref(nlf, 1), ldb, &c_b10, &bx_ref(nlf, 1), ldbx);
	dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b9, &vt_ref(nrf, 1), ldu, &
		b_ref(nrf, 1), ldb, &c_b10, &bx_ref(nrf, 1), ldbx);
/* L80: */
    }

L90:

    return 0;

/*     End of DLALSA */

} /* dlalsa_ */