Ejemplo n.º 1
0
/* Subroutine */
int slasd2_(integer *nl, integer *nr, integer *sqre, integer *k, real *d__, real *z__, real *alpha, real *beta, real *u, integer * ldu, real *vt, integer *ldvt, real *dsigma, real *u2, integer *ldu2, real *vt2, integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer *idxq, integer *coltyp, integer *info)
{
    /* System generated locals */
    integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, vt2_dim1, vt2_offset, i__1;
    real r__1, r__2;
    /* Local variables */
    real c__;
    integer i__, j, m, n;
    real s;
    integer k2;
    real z1;
    integer ct, jp;
    real eps, tau, tol;
    integer psm[4], nlp1, nlp2, idxi, idxj, ctot[4];
    extern /* Subroutine */
    int srot_(integer *, real *, integer *, real *, integer *, real *, real *);
    integer idxjp, jprev;
    extern /* Subroutine */
    int scopy_(integer *, real *, integer *, real *, integer *);
    extern real slapy2_(real *, real *), slamch_(char *);
    extern /* Subroutine */
    int xerbla_(char *, integer *), slamrg_( integer *, integer *, real *, integer *, integer *, integer *);
    real hlftol;
    extern /* Subroutine */
    int slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, real *, integer *);
    /* -- LAPACK auxiliary routine (version 3.4.2) -- */
    /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
    /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
    /* September 2012 */
    /* .. Scalar Arguments .. */
    /* .. */
    /* .. Array Arguments .. */
    /* .. */
    /* ===================================================================== */
    /* .. Parameters .. */
    /* .. */
    /* .. Local Arrays .. */
    /* .. */
    /* .. Local Scalars .. */
    /* .. */
    /* .. External Functions .. */
    /* .. */
    /* .. External Subroutines .. */
    /* .. */
    /* .. Intrinsic Functions .. */
    /* .. */
    /* .. Executable Statements .. */
    /* Test the input parameters. */
    /* Parameter adjustments */
    --d__;
    --z__;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1;
    u -= u_offset;
    vt_dim1 = *ldvt;
    vt_offset = 1 + vt_dim1;
    vt -= vt_offset;
    --dsigma;
    u2_dim1 = *ldu2;
    u2_offset = 1 + u2_dim1;
    u2 -= u2_offset;
    vt2_dim1 = *ldvt2;
    vt2_offset = 1 + vt2_dim1;
    vt2 -= vt2_offset;
    --idxp;
    --idx;
    --idxc;
    --idxq;
    --coltyp;
    /* Function Body */
    *info = 0;
    if (*nl < 1)
    {
        *info = -1;
    }
    else if (*nr < 1)
    {
        *info = -2;
    }
    else if (*sqre != 1 && *sqre != 0)
    {
        *info = -3;
    }
    n = *nl + *nr + 1;
    m = n + *sqre;
    if (*ldu < n)
    {
        *info = -10;
    }
    else if (*ldvt < m)
    {
        *info = -12;
    }
    else if (*ldu2 < n)
    {
        *info = -15;
    }
    else if (*ldvt2 < m)
    {
        *info = -17;
    }
    if (*info != 0)
    {
        i__1 = -(*info);
        xerbla_("SLASD2", &i__1);
        return 0;
    }
    nlp1 = *nl + 1;
    nlp2 = *nl + 2;
    /* Generate the first part of the vector Z;
    and move the singular */
    /* values in the first part of D one position backward. */
    z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1];
    z__[1] = z1;
    for (i__ = *nl;
            i__ >= 1;
            --i__)
    {
        z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1];
        d__[i__ + 1] = d__[i__];
        idxq[i__ + 1] = idxq[i__] + 1;
        /* L10: */
    }
    /* Generate the second part of the vector Z. */
    i__1 = m;
    for (i__ = nlp2;
            i__ <= i__1;
            ++i__)
    {
        z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1];
        /* L20: */
    }
    /* Initialize some reference arrays. */
    i__1 = nlp1;
    for (i__ = 2;
            i__ <= i__1;
            ++i__)
    {
        coltyp[i__] = 1;
        /* L30: */
    }
    i__1 = n;
    for (i__ = nlp2;
            i__ <= i__1;
            ++i__)
    {
        coltyp[i__] = 2;
        /* L40: */
    }
    /* Sort the singular values into increasing order */
    i__1 = n;
    for (i__ = nlp2;
            i__ <= i__1;
            ++i__)
    {
        idxq[i__] += nlp1;
        /* L50: */
    }
    /* DSIGMA, IDXC, IDXC, and the first column of U2 */
    /* are used as storage space. */
    i__1 = n;
    for (i__ = 2;
            i__ <= i__1;
            ++i__)
    {
        dsigma[i__] = d__[idxq[i__]];
        u2[i__ + u2_dim1] = z__[idxq[i__]];
        idxc[i__] = coltyp[idxq[i__]];
        /* L60: */
    }
    slamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);
    i__1 = n;
    for (i__ = 2;
            i__ <= i__1;
            ++i__)
    {
        idxi = idx[i__] + 1;
        d__[i__] = dsigma[idxi];
        z__[i__] = u2[idxi + u2_dim1];
        coltyp[i__] = idxc[idxi];
        /* L70: */
    }
    /* Calculate the allowable deflation tolerance */
    eps = slamch_("Epsilon");
    /* Computing MAX */
    r__1 = f2c_abs(*alpha);
    r__2 = f2c_abs(*beta); // , expr subst
    tol = max(r__1,r__2);
    /* Computing MAX */
    r__2 = (r__1 = d__[n], f2c_abs(r__1));
    tol = eps * 8.f * max(r__2,tol);
    /* There are 2 kinds of deflation -- first a value in the z-vector */
    /* is small, second two (or more) singular values are very close */
    /* together (their difference is small). */
    /* If the value in the z-vector is small, we simply permute the */
    /* array so that the corresponding singular value is moved to the */
    /* end. */
    /* If two values in the D-vector are close, we perform a two-sided */
    /* rotation designed to make one of the corresponding z-vector */
    /* entries zero, and then permute the array so that the deflated */
    /* singular value is moved to the end. */
    /* If there are multiple singular values then the problem deflates. */
    /* Here the number of equal singular values are found. As each equal */
    /* singular value is found, an elementary reflector is computed to */
    /* rotate the corresponding singular subspace so that the */
    /* corresponding components of Z are zero in this new basis. */
    *k = 1;
    k2 = n + 1;
    i__1 = n;
    for (j = 2;
            j <= i__1;
            ++j)
    {
        if ((r__1 = z__[j], f2c_abs(r__1)) <= tol)
        {
            /* Deflate due to small z component. */
            --k2;
            idxp[k2] = j;
            coltyp[j] = 4;
            if (j == n)
            {
                goto L120;
            }
        }
        else
        {
            jprev = j;
            goto L90;
        }
        /* L80: */
    }
L90:
    j = jprev;
L100:
    ++j;
    if (j > n)
    {
        goto L110;
    }
    if ((r__1 = z__[j], f2c_abs(r__1)) <= tol)
    {
        /* Deflate due to small z component. */
        --k2;
        idxp[k2] = j;
        coltyp[j] = 4;
    }
    else
    {
        /* Check if singular values are close enough to allow deflation. */
        if ((r__1 = d__[j] - d__[jprev], f2c_abs(r__1)) <= tol)
        {
            /* Deflation is possible. */
            s = z__[jprev];
            c__ = z__[j];
            /* Find sqrt(a**2+b**2) without overflow or */
            /* destructive underflow. */
            tau = slapy2_(&c__, &s);
            c__ /= tau;
            s = -s / tau;
            z__[j] = tau;
            z__[jprev] = 0.f;
            /* Apply back the Givens rotation to the left and right */
            /* singular vector matrices. */
            idxjp = idxq[idx[jprev] + 1];
            idxj = idxq[idx[j] + 1];
            if (idxjp <= nlp1)
            {
                --idxjp;
            }
            if (idxj <= nlp1)
            {
                --idxj;
            }
            srot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], & c__1, &c__, &s);
            srot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, & c__, &s);
            if (coltyp[j] != coltyp[jprev])
            {
                coltyp[j] = 3;
            }
            coltyp[jprev] = 4;
            --k2;
            idxp[k2] = jprev;
            jprev = j;
        }
        else
        {
            ++(*k);
            u2[*k + u2_dim1] = z__[jprev];
            dsigma[*k] = d__[jprev];
            idxp[*k] = jprev;
            jprev = j;
        }
    }
    goto L100;
L110: /* Record the last singular value. */
    ++(*k);
    u2[*k + u2_dim1] = z__[jprev];
    dsigma[*k] = d__[jprev];
    idxp[*k] = jprev;
L120: /* Count up the total number of the various types of columns, then */
    /* form a permutation which positions the four column types into */
    /* four groups of uniform structure (although one or more of these */
    /* groups may be empty). */
    for (j = 1;
            j <= 4;
            ++j)
    {
        ctot[j - 1] = 0;
        /* L130: */
    }
    i__1 = n;
    for (j = 2;
            j <= i__1;
            ++j)
    {
        ct = coltyp[j];
        ++ctot[ct - 1];
        /* L140: */
    }
    /* PSM(*) = Position in SubMatrix (of types 1 through 4) */
    psm[0] = 2;
    psm[1] = ctot[0] + 2;
    psm[2] = psm[1] + ctot[1];
    psm[3] = psm[2] + ctot[2];
    /* Fill out the IDXC array so that the permutation which it induces */
    /* will place all type-1 columns first, all type-2 columns next, */
    /* then all type-3's, and finally all type-4's, starting from the */
    /* second column. This applies similarly to the rows of VT. */
    i__1 = n;
    for (j = 2;
            j <= i__1;
            ++j)
    {
        jp = idxp[j];
        ct = coltyp[jp];
        idxc[psm[ct - 1]] = j;
        ++psm[ct - 1];
        /* L150: */
    }
    /* Sort the singular values and corresponding singular vectors into */
    /* DSIGMA, U2, and VT2 respectively. The singular values/vectors */
    /* which were not deflated go into the first K slots of DSIGMA, U2, */
    /* and VT2 respectively, while those which were deflated go into the */
    /* last N - K slots, except that the first column/row will be treated */
    /* separately. */
    i__1 = n;
    for (j = 2;
            j <= i__1;
            ++j)
    {
        jp = idxp[j];
        dsigma[j] = d__[jp];
        idxj = idxq[idx[idxp[idxc[j]]] + 1];
        if (idxj <= nlp1)
        {
            --idxj;
        }
        scopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1);
        scopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2);
        /* L160: */
    }
    /* Determine DSIGMA(1), DSIGMA(2) and Z(1) */
    dsigma[1] = 0.f;
    hlftol = tol / 2.f;
    if (f2c_abs(dsigma[2]) <= hlftol)
    {
        dsigma[2] = hlftol;
    }
    if (m > n)
    {
        z__[1] = slapy2_(&z1, &z__[m]);
        if (z__[1] <= tol)
        {
            c__ = 1.f;
            s = 0.f;
            z__[1] = tol;
        }
        else
        {
            c__ = z1 / z__[1];
            s = z__[m] / z__[1];
        }
    }
    else
    {
        if (f2c_abs(z1) <= tol)
        {
            z__[1] = tol;
        }
        else
        {
            z__[1] = z1;
        }
    }
    /* Move the rest of the updating row to Z. */
    i__1 = *k - 1;
    scopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1);
    /* Determine the first column of U2, the first row of VT2 and the */
    /* last row of VT. */
    slaset_("A", &n, &c__1, &c_b30, &c_b30, &u2[u2_offset], ldu2);
    u2[nlp1 + u2_dim1] = 1.f;
    if (m > n)
    {
        i__1 = nlp1;
        for (i__ = 1;
                i__ <= i__1;
                ++i__)
        {
            vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1];
            vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1];
            /* L170: */
        }
        i__1 = m;
        for (i__ = nlp2;
                i__ <= i__1;
                ++i__)
        {
            vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1];
            vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1];
            /* L180: */
        }
    }
    else
    {
        scopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2);
    }
    if (m > n)
    {
        scopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2);
    }
    /* The deflated singular values and their corresponding vectors go */
    /* into the back of D, U, and V respectively. */
    if (n > *k)
    {
        i__1 = n - *k;
        scopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
        i__1 = n - *k;
        slacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1) * u_dim1 + 1], ldu);
        i__1 = n - *k;
        slacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 + vt_dim1], ldvt);
    }
    /* Copy CTOT into COLTYP for referencing in SLASD3. */
    for (j = 1;
            j <= 4;
            ++j)
    {
        coltyp[j] = ctot[j - 1];
        /* L190: */
    }
    return 0;
    /* End of SLASD2 */
}
Ejemplo n.º 2
0
Archivo: slaed2.c Proyecto: RebUT/REBUT
/* Subroutine */ int slaed2_(integer *k, integer *n, integer *n1, real *d__,
                             real *q, integer *ldq, integer *indxq, real *rho, real *z__, real *
                             dlamda, real *w, real *q2, integer *indx, integer *indxc, integer *
                             indxp, integer *coltyp, integer *info)
{
    /* System generated locals */
    integer q_dim1, q_offset, i__1, i__2;
    real r__1, r__2, r__3, r__4;

    /* Builtin functions */
    double sqrt(doublereal);

    /* Local variables */
    real c__;
    integer i__, j;
    real s, t;
    integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1;
    real eps, tau, tol;
    integer psm[4], imax, jmax, ctot[4];
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
                                      integer *, real *, real *), sscal_(integer *, real *, real *,
                                              integer *), scopy_(integer *, real *, integer *, real *, integer *
                                                                );
    extern doublereal slapy2_(real *, real *), slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer isamax_(integer *, real *, integer *);
    extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer
                                        *, integer *, integer *), slacpy_(char *, integer *, integer *,
                                                real *, integer *, real *, integer *);


    /*  -- LAPACK routine (version 3.2) -- */
    /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
    /*     November 2006 */

    /*     .. Scalar Arguments .. */
    /*     .. */
    /*     .. Array Arguments .. */
    /*     .. */

    /*  Purpose */
    /*  ======= */

    /*  SLAED2 merges the two sets of eigenvalues together into a single */
    /*  sorted set.  Then it tries to deflate the size of the problem. */
    /*  There are two ways in which deflation can occur:  when two or more */
    /*  eigenvalues are close together or if there is a tiny entry in the */
    /*  Z vector.  For each such occurrence the order of the related secular */
    /*  equation problem is reduced by one. */

    /*  Arguments */
    /*  ========= */

    /*  K      (output) INTEGER */
    /*         The number of non-deflated eigenvalues, and the order of the */
    /*         related secular equation. 0 <= K <=N. */

    /*  N      (input) INTEGER */
    /*         The dimension of the symmetric tridiagonal matrix.  N >= 0. */

    /*  N1     (input) INTEGER */
    /*         The location of the last eigenvalue in the leading sub-matrix. */
    /*         min(1,N) <= N1 <= N/2. */

    /*  D      (input/output) REAL array, dimension (N) */
    /*         On entry, D contains the eigenvalues of the two submatrices to */
    /*         be combined. */
    /*         On exit, D contains the trailing (N-K) updated eigenvalues */
    /*         (those which were deflated) sorted into increasing order. */

    /*  Q      (input/output) REAL array, dimension (LDQ, N) */
    /*         On entry, Q contains the eigenvectors of two submatrices in */
    /*         the two square blocks with corners at (1,1), (N1,N1) */
    /*         and (N1+1, N1+1), (N,N). */
    /*         On exit, Q contains the trailing (N-K) updated eigenvectors */
    /*         (those which were deflated) in its last N-K columns. */

    /*  LDQ    (input) INTEGER */
    /*         The leading dimension of the array Q.  LDQ >= max(1,N). */

    /*  INDXQ  (input/output) INTEGER array, dimension (N) */
    /*         The permutation which separately sorts the two sub-problems */
    /*         in D into ascending order.  Note that elements in the second */
    /*         half of this permutation must first have N1 added to their */
    /*         values. Destroyed on exit. */

    /*  RHO    (input/output) REAL */
    /*         On entry, the off-diagonal element associated with the rank-1 */
    /*         cut which originally split the two submatrices which are now */
    /*         being recombined. */
    /*         On exit, RHO has been modified to the value required by */
    /*         SLAED3. */

    /*  Z      (input) REAL array, dimension (N) */
    /*         On entry, Z contains the updating vector (the last */
    /*         row of the first sub-eigenvector matrix and the first row of */
    /*         the second sub-eigenvector matrix). */
    /*         On exit, the contents of Z have been destroyed by the updating */
    /*         process. */

    /*  DLAMDA (output) REAL array, dimension (N) */
    /*         A copy of the first K eigenvalues which will be used by */
    /*         SLAED3 to form the secular equation. */

    /*  W      (output) REAL array, dimension (N) */
    /*         The first k values of the final deflation-altered z-vector */
    /*         which will be passed to SLAED3. */

    /*  Q2     (output) REAL array, dimension (N1**2+(N-N1)**2) */
    /*         A copy of the first K eigenvectors which will be used by */
    /*         SLAED3 in a matrix multiply (SGEMM) to solve for the new */
    /*         eigenvectors. */

    /*  INDX   (workspace) INTEGER array, dimension (N) */
    /*         The permutation used to sort the contents of DLAMDA into */
    /*         ascending order. */

    /*  INDXC  (output) INTEGER array, dimension (N) */
    /*         The permutation used to arrange the columns of the deflated */
    /*         Q matrix into three groups:  the first group contains non-zero */
    /*         elements only at and above N1, the second contains */
    /*         non-zero elements only below N1, and the third is dense. */

    /*  INDXP  (workspace) INTEGER array, dimension (N) */
    /*         The permutation used to place deflated values of D at the end */
    /*         of the array.  INDXP(1:K) points to the nondeflated D-values */
    /*         and INDXP(K+1:N) points to the deflated eigenvalues. */

    /*  COLTYP (workspace/output) INTEGER array, dimension (N) */
    /*         During execution, a label which will indicate which of the */
    /*         following types a column in the Q2 matrix is: */
    /*         1 : non-zero in the upper half only; */
    /*         2 : dense; */
    /*         3 : non-zero in the lower half only; */
    /*         4 : deflated. */
    /*         On exit, COLTYP(i) is the number of columns of type i, */
    /*         for i=1 to 4 only. */

    /*  INFO   (output) INTEGER */
    /*          = 0:  successful exit. */
    /*          < 0:  if INFO = -i, the i-th argument had an illegal value. */

    /*  Further Details */
    /*  =============== */

    /*  Based on contributions by */
    /*     Jeff Rutter, Computer Science Division, University of California */
    /*     at Berkeley, USA */
    /*  Modified by Francoise Tisseur, University of Tennessee. */

    /*  ===================================================================== */

    /*     .. Parameters .. */
    /*     .. */
    /*     .. Local Arrays .. */
    /*     .. */
    /*     .. Local Scalars .. */
    /*     .. */
    /*     .. External Functions .. */
    /*     .. */
    /*     .. External Subroutines .. */
    /*     .. */
    /*     .. Intrinsic Functions .. */
    /*     .. */
    /*     .. Executable Statements .. */

    /*     Test the input parameters. */

    /* Parameter adjustments */
    --d__;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    --indxq;
    --z__;
    --dlamda;
    --w;
    --q2;
    --indx;
    --indxc;
    --indxp;
    --coltyp;

    /* Function Body */
    *info = 0;

    if (*n < 0) {
        *info = -2;
    } else if (*ldq < max(1,*n)) {
        *info = -6;
    } else { /* if(complicated condition) */
        /* Computing MIN */
        i__1 = 1, i__2 = *n / 2;
        if (min(i__1,i__2) > *n1 || *n / 2 < *n1) {
            *info = -3;
        }
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_("SLAED2", &i__1);
        return 0;
    }

    /*     Quick return if possible */

    if (*n == 0) {
        return 0;
    }

    n2 = *n - *n1;
    n1p1 = *n1 + 1;

    if (*rho < 0.f) {
        sscal_(&n2, &c_b3, &z__[n1p1], &c__1);
    }

    /*     Normalize z so that norm(z) = 1.  Since z is the concatenation of */
    /*     two normalized vectors, norm2(z) = sqrt(2). */

    t = 1.f / sqrt(2.f);
    sscal_(n, &t, &z__[1], &c__1);

    /*     RHO = ABS( norm(z)**2 * RHO ) */

    *rho = (r__1 = *rho * 2.f, dabs(r__1));

    /*     Sort the eigenvalues into increasing order */

    i__1 = *n;
    for (i__ = n1p1; i__ <= i__1; ++i__) {
        indxq[i__] += *n1;
        /* L10: */
    }

    /*     re-integrate the deflated parts from the last pass */

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
        dlamda[i__] = d__[indxq[i__]];
        /* L20: */
    }
    slamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]);
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
        indx[i__] = indxq[indxc[i__]];
        /* L30: */
    }

    /*     Calculate the allowable deflation tolerance */

    imax = isamax_(n, &z__[1], &c__1);
    jmax = isamax_(n, &d__[1], &c__1);
    eps = slamch_("Epsilon");
    /* Computing MAX */
    r__3 = (r__1 = d__[jmax], dabs(r__1)), r__4 = (r__2 = z__[imax], dabs(
                r__2));
    tol = eps * 8.f * dmax(r__3,r__4);

    /*     If the rank-1 modifier is small enough, no more needs to be done */
    /*     except to reorganize Q so that its columns correspond with the */
    /*     elements in D. */

    if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) {
        *k = 0;
        iq2 = 1;
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            i__ = indx[j];
            scopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
            dlamda[j] = d__[i__];
            iq2 += *n;
            /* L40: */
        }
        slacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq);
        scopy_(n, &dlamda[1], &c__1, &d__[1], &c__1);
        goto L190;
    }

    /*     If there are multiple eigenvalues then the problem deflates.  Here */
    /*     the number of equal eigenvalues are found.  As each equal */
    /*     eigenvalue is found, an elementary reflector is computed to rotate */
    /*     the corresponding eigensubspace so that the corresponding */
    /*     components of Z are zero in this new basis. */

    i__1 = *n1;
    for (i__ = 1; i__ <= i__1; ++i__) {
        coltyp[i__] = 1;
        /* L50: */
    }
    i__1 = *n;
    for (i__ = n1p1; i__ <= i__1; ++i__) {
        coltyp[i__] = 3;
        /* L60: */
    }


    *k = 0;
    k2 = *n + 1;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
        nj = indx[j];
        if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) {

            /*           Deflate due to small z component. */

            --k2;
            coltyp[nj] = 4;
            indxp[k2] = nj;
            if (j == *n) {
                goto L100;
            }
        } else {
            pj = nj;
            goto L80;
        }
        /* L70: */
    }
L80:
    ++j;
    nj = indx[j];
    if (j > *n) {
        goto L100;
    }
    if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) {

        /*        Deflate due to small z component. */

        --k2;
        coltyp[nj] = 4;
        indxp[k2] = nj;
    } else {

        /*        Check if eigenvalues are close enough to allow deflation. */

        s = z__[pj];
        c__ = z__[nj];

        /*        Find sqrt(a**2+b**2) without overflow or */
        /*        destructive underflow. */

        tau = slapy2_(&c__, &s);
        t = d__[nj] - d__[pj];
        c__ /= tau;
        s = -s / tau;
        if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) {

            /*           Deflation is possible. */

            z__[nj] = tau;
            z__[pj] = 0.f;
            if (coltyp[nj] != coltyp[pj]) {
                coltyp[nj] = 2;
            }
            coltyp[pj] = 4;
            srot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, &
                  c__, &s);
            /* Computing 2nd power */
            r__1 = c__;
            /* Computing 2nd power */
            r__2 = s;
            t = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2);
            /* Computing 2nd power */
            r__1 = s;
            /* Computing 2nd power */
            r__2 = c__;
            d__[nj] = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2);
            d__[pj] = t;
            --k2;
            i__ = 1;
L90:
            if (k2 + i__ <= *n) {
                if (d__[pj] < d__[indxp[k2 + i__]]) {
                    indxp[k2 + i__ - 1] = indxp[k2 + i__];
                    indxp[k2 + i__] = pj;
                    ++i__;
                    goto L90;
                } else {
                    indxp[k2 + i__ - 1] = pj;
                }
            } else {
                indxp[k2 + i__ - 1] = pj;
            }
            pj = nj;
        } else {
            ++(*k);
            dlamda[*k] = d__[pj];
            w[*k] = z__[pj];
            indxp[*k] = pj;
            pj = nj;
        }
    }
    goto L80;
L100:

    /*     Record the last eigenvalue. */

    ++(*k);
    dlamda[*k] = d__[pj];
    w[*k] = z__[pj];
    indxp[*k] = pj;

    /*     Count up the total number of the various types of columns, then */
    /*     form a permutation which positions the four column types into */
    /*     four uniform groups (although one or more of these groups may be */
    /*     empty). */

    for (j = 1; j <= 4; ++j) {
        ctot[j - 1] = 0;
        /* L110: */
    }
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
        ct = coltyp[j];
        ++ctot[ct - 1];
        /* L120: */
    }

    /*     PSM(*) = Position in SubMatrix (of types 1 through 4) */

    psm[0] = 1;
    psm[1] = ctot[0] + 1;
    psm[2] = psm[1] + ctot[1];
    psm[3] = psm[2] + ctot[2];
    *k = *n - ctot[3];

    /*     Fill out the INDXC array so that the permutation which it induces */
    /*     will place all type-1 columns first, all type-2 columns next, */
    /*     then all type-3's, and finally all type-4's. */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
        js = indxp[j];
        ct = coltyp[js];
        indx[psm[ct - 1]] = js;
        indxc[psm[ct - 1]] = j;
        ++psm[ct - 1];
        /* L130: */
    }

    /*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA */
    /*     and Q2 respectively.  The eigenvalues/vectors which were not */
    /*     deflated go into the first K slots of DLAMDA and Q2 respectively, */
    /*     while those which were deflated go into the last N - K slots. */

    i__ = 1;
    iq1 = 1;
    iq2 = (ctot[0] + ctot[1]) * *n1 + 1;
    i__1 = ctot[0];
    for (j = 1; j <= i__1; ++j) {
        js = indx[i__];
        scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
        z__[i__] = d__[js];
        ++i__;
        iq1 += *n1;
        /* L140: */
    }

    i__1 = ctot[1];
    for (j = 1; j <= i__1; ++j) {
        js = indx[i__];
        scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
        scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
        z__[i__] = d__[js];
        ++i__;
        iq1 += *n1;
        iq2 += n2;
        /* L150: */
    }

    i__1 = ctot[2];
    for (j = 1; j <= i__1; ++j) {
        js = indx[i__];
        scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
        z__[i__] = d__[js];
        ++i__;
        iq2 += n2;
        /* L160: */
    }

    iq1 = iq2;
    i__1 = ctot[3];
    for (j = 1; j <= i__1; ++j) {
        js = indx[i__];
        scopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
        iq2 += *n;
        z__[i__] = d__[js];
        ++i__;
        /* L170: */
    }

    /*     The deflated eigenvalues and their corresponding vectors go back */
    /*     into the last N - K slots of D and Q respectively. */

    slacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq);
    i__1 = *n - *k;
    scopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1);

    /*     Copy CTOT into COLTYP for referencing in SLAED3. */

    for (j = 1; j <= 4; ++j) {
        coltyp[j] = ctot[j - 1];
        /* L180: */
    }

L190:
    return 0;

    /*     End of SLAED2 */

} /* slaed2_ */
Ejemplo n.º 3
0
 int claed7_(int *n, int *cutpnt, int *qsiz, 
	int *tlvls, int *curlvl, int *curpbm, float *d__, complex *
	q, int *ldq, float *rho, int *indxq, float *qstore, int *
	qptr, int *prmptr, int *perm, int *givptr, int *
	givcol, float *givnum, complex *work, float *rwork, int *iwork, 
	int *info)
{
    /* System generated locals */
    int q_dim1, q_offset, i__1, i__2;

    /* Builtin functions */
    int pow_ii(int *, int *);

    /* Local variables */
    int i__, k, n1, n2, iq, iw, iz, ptr, indx, curr, indxc, indxp;
    extern  int claed8_(int *, int *, int *, 
	    complex *, int *, float *, float *, int *, float *, float *, 
	    complex *, int *, float *, int *, int *, int *, 
	    int *, int *, int *, float *, int *), slaed9_(
	    int *, int *, int *, int *, float *, float *, 
	    int *, float *, float *, float *, float *, int *, int *), 
	    slaeda_(int *, int *, int *, int *, int *, 
	    int *, int *, int *, float *, float *, int *, float *
, float *, int *);
    int idlmda;
    extern  int clacrm_(int *, int *, complex *, 
	    int *, float *, int *, complex *, int *, float *), 
	    xerbla_(char *, int *), slamrg_(int *, int *, 
	    float *, int *, int *, int *);
    int coltyp;


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CLAED7 computes the updated eigensystem of a diagonal */
/*  matrix after modification by a rank-one symmetric matrix. This */
/*  routine is used only for the eigenproblem which requires all */
/*  eigenvalues and optionally eigenvectors of a dense or banded */
/*  Hermitian matrix that has been reduced to tridiagonal form. */

/*    T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) */

/*    where Z = Q'u, u is a vector of length N with ones in the */
/*    CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. */

/*     The eigenvectors of the original matrix are stored in Q, and the */
/*     eigenvalues are in D.  The algorithm consists of three stages: */

/*        The first stage consists of deflating the size of the problem */
/*        when there are multiple eigenvalues or if there is a zero in */
/*        the Z vector.  For each such occurence the dimension of the */
/*        secular equation problem is reduced by one.  This stage is */
/*        performed by the routine SLAED2. */

/*        The second stage consists of calculating the updated */
/*        eigenvalues. This is done by finding the roots of the secular */
/*        equation via the routine SLAED4 (as called by SLAED3). */
/*        This routine also calculates the eigenvectors of the current */
/*        problem. */

/*        The final stage consists of computing the updated eigenvectors */
/*        directly using the updated eigenvalues.  The eigenvectors for */
/*        the current problem are multiplied with the eigenvectors from */
/*        the overall problem. */

/*  Arguments */
/*  ========= */

/*  N      (input) INTEGER */
/*         The dimension of the symmetric tridiagonal matrix.  N >= 0. */

/*  CUTPNT (input) INTEGER */
/*         Contains the location of the last eigenvalue in the leading */
/*         sub-matrix.  MIN(1,N) <= CUTPNT <= N. */

/*  QSIZ   (input) INTEGER */
/*         The dimension of the unitary matrix used to reduce */
/*         the full matrix to tridiagonal form.  QSIZ >= N. */

/*  TLVLS  (input) INTEGER */
/*         The total number of merging levels in the overall divide and */
/*         conquer tree. */

/*  CURLVL (input) INTEGER */
/*         The current level in the overall merge routine, */
/*         0 <= curlvl <= tlvls. */

/*  CURPBM (input) INTEGER */
/*         The current problem in the current level in the overall */
/*         merge routine (counting from upper left to lower right). */

/*  D      (input/output) REAL array, dimension (N) */
/*         On entry, the eigenvalues of the rank-1-perturbed matrix. */
/*         On exit, the eigenvalues of the repaired matrix. */

/*  Q      (input/output) COMPLEX array, dimension (LDQ,N) */
/*         On entry, the eigenvectors of the rank-1-perturbed matrix. */
/*         On exit, the eigenvectors of the repaired tridiagonal matrix. */

/*  LDQ    (input) INTEGER */
/*         The leading dimension of the array Q.  LDQ >= MAX(1,N). */

/*  RHO    (input) REAL */
/*         Contains the subdiagonal element used to create the rank-1 */
/*         modification. */

/*  INDXQ  (output) INTEGER array, dimension (N) */
/*         This contains the permutation which will reintegrate the */
/*         subproblem just solved back into sorted order, */
/*         ie. D( INDXQ( I = 1, N ) ) will be in ascending order. */

/*  IWORK  (workspace) INTEGER array, dimension (4*N) */

/*  RWORK  (workspace) REAL array, */
/*                                 dimension (3*N+2*QSIZ*N) */

/*  WORK   (workspace) COMPLEX array, dimension (QSIZ*N) */

/*  QSTORE (input/output) REAL array, dimension (N**2+1) */
/*         Stores eigenvectors of submatrices encountered during */
/*         divide and conquer, packed together. QPTR points to */
/*         beginning of the submatrices. */

/*  QPTR   (input/output) INTEGER array, dimension (N+2) */
/*         List of indices pointing to beginning of submatrices stored */
/*         in QSTORE. The submatrices are numbered starting at the */
/*         bottom left of the divide and conquer tree, from left to */
/*         right and bottom to top. */

/*  PRMPTR (input) INTEGER array, dimension (N lg N) */
/*         Contains a list of pointers which indicate where in PERM a */
/*         level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i) */
/*         indicates the size of the permutation and also the size of */
/*         the full, non-deflated problem. */

/*  PERM   (input) INTEGER array, dimension (N lg N) */
/*         Contains the permutations (from deflation and sorting) to be */
/*         applied to each eigenblock. */

/*  GIVPTR (input) INTEGER array, dimension (N lg N) */
/*         Contains a list of pointers which indicate where in GIVCOL a */
/*         level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i) */
/*         indicates the number of Givens rotations. */

/*  GIVCOL (input) INTEGER array, dimension (2, N lg N) */
/*         Each pair of numbers indicates a pair of columns to take place */
/*         in a Givens rotation. */

/*  GIVNUM (input) REAL array, dimension (2, N lg N) */
/*         Each number indicates the S value to be used in the */
/*         corresponding Givens rotation. */

/*  INFO   (output) INTEGER */
/*          = 0:  successful exit. */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
/*          > 0:  if INFO = 1, an eigenvalue did not converge */

/*  ===================================================================== */

/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    --d__;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    --indxq;
    --qstore;
    --qptr;
    --prmptr;
    --perm;
    --givptr;
    givcol -= 3;
    givnum -= 3;
    --work;
    --rwork;
    --iwork;

    /* Function Body */
    *info = 0;

/*     IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN */
/*        INFO = -1 */
/*     ELSE IF( N.LT.0 ) THEN */
    if (*n < 0) {
	*info = -1;
    } else if (MIN(1,*n) > *cutpnt || *n < *cutpnt) {
	*info = -2;
    } else if (*qsiz < *n) {
	*info = -3;
    } else if (*ldq < MAX(1,*n)) {
	*info = -9;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CLAED7", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     The following values are for bookkeeping purposes only.  They are */
/*     int pointers which indicate the portion of the workspace */
/*     used by a particular array in SLAED2 and SLAED3. */

    iz = 1;
    idlmda = iz + *n;
    iw = idlmda + *n;
    iq = iw + *n;

    indx = 1;
    indxc = indx + *n;
    coltyp = indxc + *n;
    indxp = coltyp + *n;

/*     Form the z-vector which consists of the last row of Q_1 and the */
/*     first row of Q_2. */

    ptr = pow_ii(&c__2, tlvls) + 1;
    i__1 = *curlvl - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
	i__2 = *tlvls - i__;
	ptr += pow_ii(&c__2, &i__2);
/* L10: */
    }
    curr = ptr + *curpbm;
    slaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], &
	    givcol[3], &givnum[3], &qstore[1], &qptr[1], &rwork[iz], &rwork[
	    iz + *n], info);

/*     When solving the final problem, we no longer need the stored data, */
/*     so we will overwrite the data from this level onto the previously */
/*     used storage space. */

    if (*curlvl == *tlvls) {
	qptr[curr] = 1;
	prmptr[curr] = 1;
	givptr[curr] = 1;
    }

/*     Sort and Deflate eigenvalues. */

    claed8_(&k, n, qsiz, &q[q_offset], ldq, &d__[1], rho, cutpnt, &rwork[iz], 
	    &rwork[idlmda], &work[1], qsiz, &rwork[iw], &iwork[indxp], &iwork[
	    indx], &indxq[1], &perm[prmptr[curr]], &givptr[curr + 1], &givcol[
	    (givptr[curr] << 1) + 1], &givnum[(givptr[curr] << 1) + 1], info);
    prmptr[curr + 1] = prmptr[curr] + *n;
    givptr[curr + 1] += givptr[curr];

/*     Solve Secular Equation. */

    if (k != 0) {
	slaed9_(&k, &c__1, &k, n, &d__[1], &rwork[iq], &k, rho, &rwork[idlmda]
, &rwork[iw], &qstore[qptr[curr]], &k, info);
	clacrm_(qsiz, &k, &work[1], qsiz, &qstore[qptr[curr]], &k, &q[
		q_offset], ldq, &rwork[iq]);
/* Computing 2nd power */
	i__1 = k;
	qptr[curr + 1] = qptr[curr] + i__1 * i__1;
	if (*info != 0) {
	    return 0;
	}

/*     Prepare the INDXQ sorting premutation. */

	n1 = k;
	n2 = *n - k;
	slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
    } else {
	qptr[curr + 1] = qptr[curr];
	i__1 = *n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    indxq[i__] = i__;
/* L20: */
	}
    }

    return 0;

/*     End of CLAED7 */

} /* claed7_ */
Ejemplo n.º 4
0
/* Subroutine */ int slaed8_(integer *icompq, integer *k, integer *n, integer 
	*qsiz, real *d, real *q, integer *ldq, integer *indxq, real *rho, 
	integer *cutpnt, real *z, real *dlamda, real *q2, integer *ldq2, real 
	*w, integer *perm, integer *givptr, integer *givcol, real *givnum, 
	integer *indxp, integer *indx, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, 
  
       Courant Institute, NAG Ltd., and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    SLAED8 merges the two sets of eigenvalues together into a single   
    sorted set.  Then it tries to deflate the size of the problem.   
    There are two ways in which deflation can occur:  when two or more   
    eigenvalues are close together or if there is a tiny element in the   
    Z vector.  For each such occurrence the order of the related secular 
  
    equation problem is reduced by one.   

    Arguments   
    =========   

    ICOMPQ  (input) INTEGER   
            = 0:  Compute eigenvalues only.   
            = 1:  Compute eigenvectors of original dense symmetric matrix 
  
                  also.  On entry, Q contains the orthogonal matrix used 
  
                  to reduce the original matrix to tridiagonal form.   

    K      (output) INTEGER   
           The number of non-deflated eigenvalues, and the order of the   
           related secular equation.   

    N      (input) INTEGER   
           The dimension of the symmetric tridiagonal matrix.  N >= 0.   

    QSIZ   (input) INTEGER   
           The dimension of the orthogonal matrix used to reduce   
           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1. 
  

    D      (input/output) REAL array, dimension (N)   
           On entry, the eigenvalues of the two submatrices to be   
           combined.  On exit, the trailing (N-K) updated eigenvalues   
           (those which were deflated) sorted into increasing order.   

    Q      (input/output) REAL array, dimension (LDQ,N)   
           If ICOMPQ = 0, Q is not referenced.  Otherwise,   
           on entry, Q contains the eigenvectors of the partially solved 
  
           system which has been previously updated in matrix   
           multiplies with other partially solved eigensystems.   
           On exit, Q contains the trailing (N-K) updated eigenvectors   
           (those which were deflated) in its last N-K columns.   

    LDQ    (input) INTEGER   
           The leading dimension of the array Q.  LDQ >= max(1,N).   

    INDXQ  (input) INTEGER array, dimension (N)   
           The permutation which separately sorts the two sub-problems   
           in D into ascending order.  Note that elements in the second   
           half of this permutation must first have CUTPNT added to   
           their values in order to be accurate.   

    RHO    (input/output) REAL   
           On entry, the off-diagonal element associated with the rank-1 
  
           cut which originally split the two submatrices which are now   
           being recombined.   
           On exit, RHO has been modified to the value required by   
           SLAED3.   

    CUTPNT (input) INTEGER   
           The location of the last eigenvalue in the leading   
           sub-matrix.  min(1,N) <= CUTPNT <= N.   

    Z      (input) REAL array, dimension (N)   
           On entry, Z contains the updating vector (the last row of   
           the first sub-eigenvector matrix and the first row of the   
           second sub-eigenvector matrix).   
           On exit, the contents of Z are destroyed by the updating   
           process.   

    DLAMDA (output) REAL array, dimension (N)   
           A copy of the first K eigenvalues which will be used by   
           SLAED3 to form the secular equation.   

    Q2     (output) REAL array, dimension (LDQ2,N)   
           If ICOMPQ = 0, Q2 is not referenced.  Otherwise,   
           a copy of the first K eigenvectors which will be used by   
           SLAED7 in a matrix multiply (SGEMM) to update the new   
           eigenvectors.   

    LDQ2   (input) INTEGER   
           The leading dimension of the array Q2.  LDQ2 >= max(1,N).   

    W      (output) REAL array, dimension (N)   
           The first k values of the final deflation-altered z-vector and 
  
           will be passed to SLAED3.   

    PERM   (output) INTEGER array, dimension (N)   
           The permutations (from deflation and sorting) to be applied   
           to each eigenblock.   

    GIVPTR (output) INTEGER   
           The number of Givens rotations which took place in this   
           subproblem.   

    GIVCOL (output) INTEGER array, dimension (2, N)   
           Each pair of numbers indicates a pair of columns to take place 
  
           in a Givens rotation.   

    GIVNUM (output) REAL array, dimension (2, N)   
           Each number indicates the S value to be used in the   
           corresponding Givens rotation.   

    INDXP  (workspace) INTEGER array, dimension (N)   
           The permutation used to place deflated values of D at the end 
  
           of the array.  INDXP(1:K) points to the nondeflated D-values   
           and INDXP(K+1:N) points to the deflated eigenvalues.   

    INDX   (workspace) INTEGER array, dimension (N)   
           The permutation used to sort the contents of D into ascending 
  
           order.   

    INFO   (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   

    ===================================================================== 
  



       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static real c_b3 = -1.f;
    static integer c__1 = 1;
    
    /* System generated locals */
    integer q_dim1, q_offset, q2_dim1, q2_offset, i__1;
    real r__1;
    /* Builtin functions */
    double sqrt(doublereal);
    /* Local variables */
    static integer jlam, imax, jmax;
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *);
    static real c;
    static integer i, j;
    static real s, t;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static integer k2;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *);
    static integer n1, n2;
    extern doublereal slapy2_(real *, real *);
    static integer jp;
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer isamax_(integer *, real *, integer *);
    extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer 
	    *, integer *, integer *), slacpy_(char *, integer *, integer *, 
	    real *, integer *, real *, integer *);
    static integer n1p1;
    static real eps, tau, tol;


    --d;
    q_dim1 = *ldq;
    q_offset = q_dim1 + 1;
    q -= q_offset;
    --indxq;
    --z;
    --dlamda;
    q2_dim1 = *ldq2;
    q2_offset = q2_dim1 + 1;
    q2 -= q2_offset;
    --w;
    --perm;
    givcol -= 3;
    givnum -= 3;
    --indxp;
    --indx;

    /* Function Body */
    *info = 0;

    if (*icompq < 0 || *icompq > 1) {
	*info = -1;
    } else if (*n < 0) {
	*info = -3;
    } else if (*icompq == 1 && *qsiz < *n) {
	*info = -4;
    } else if (*ldq < max(1,*n)) {
	*info = -7;
    } else if (*cutpnt < min(1,*n) || *cutpnt > *n) {
	*info = -10;
    } else if (*ldq2 < max(1,*n)) {
	*info = -14;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLAED8", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

    n1 = *cutpnt;
    n2 = *n - n1;
    n1p1 = n1 + 1;

    if (*rho < 0.f) {
	sscal_(&n2, &c_b3, &z[n1p1], &c__1);
    }

/*     Normalize z so that norm(z) = 1 */

    t = 1.f / sqrt(2.f);
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	indx[j] = j;
/* L10: */
    }
    sscal_(n, &t, &z[1], &c__1);
    *rho = (r__1 = *rho * 2.f, dabs(r__1));

/*     Sort the eigenvalues into increasing order */

    i__1 = *n;
    for (i = *cutpnt + 1; i <= i__1; ++i) {
	indxq[i] += *cutpnt;
/* L20: */
    }
    i__1 = *n;
    for (i = 1; i <= i__1; ++i) {
	dlamda[i] = d[indxq[i]];
	w[i] = z[indxq[i]];
/* L30: */
    }
    i = 1;
    j = *cutpnt + 1;
    slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]);
    i__1 = *n;
    for (i = 1; i <= i__1; ++i) {
	d[i] = dlamda[indx[i]];
	z[i] = w[indx[i]];
/* L40: */
    }

/*     Calculate the allowable deflation tolerence */

    imax = isamax_(n, &z[1], &c__1);
    jmax = isamax_(n, &d[1], &c__1);
    eps = slamch_("Epsilon");
    tol = eps * 8.f * (r__1 = d[jmax], dabs(r__1));

/*     If the rank-1 modifier is small enough, no more needs to be done   
       except to reorganize Q so that its columns correspond with the   
       elements in D. */

    if (*rho * (r__1 = z[imax], dabs(r__1)) <= tol) {
	*k = 0;
	if (*icompq == 0) {
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		perm[j] = indxq[indx[j]];
/* L50: */
	    }
	} else {
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		perm[j] = indxq[indx[j]];
		scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 
			+ 1], &c__1);
/* L60: */
	    }
	    slacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq);
	}
	return 0;
    }

/*     If there are multiple eigenvalues then the problem deflates.  Here 
  
       the number of equal eigenvalues are found.  As each equal   
       eigenvalue is found, an elementary reflector is computed to rotate 
  
       the corresponding eigensubspace so that the corresponding   
       components of Z are zero in this new basis. */

    *k = 0;
    *givptr = 0;
    k2 = *n + 1;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	if (*rho * (r__1 = z[j], dabs(r__1)) <= tol) {

/*           Deflate due to small z component. */

	    --k2;
	    indxp[k2] = j;
	    if (j == *n) {
		goto L110;
	    }
	} else {
	    jlam = j;
	    goto L80;
	}
/* L70: */
    }
L80:
    ++j;
    if (j > *n) {
	goto L100;
    }
    if (*rho * (r__1 = z[j], dabs(r__1)) <= tol) {

/*        Deflate due to small z component. */

	--k2;
	indxp[k2] = j;
    } else {

/*        Check if eigenvalues are close enough to allow deflation. */

	s = z[jlam];
	c = z[j];

/*        Find sqrt(a**2+b**2) without overflow or   
          destructive underflow. */

	tau = slapy2_(&c, &s);
	t = d[j] - d[jlam];
	c /= tau;
	s = -(doublereal)s / tau;
	if ((r__1 = t * c * s, dabs(r__1)) <= tol) {

/*           Deflation is possible. */

	    z[j] = tau;
	    z[jlam] = 0.f;

/*           Record the appropriate Givens rotation */

	    ++(*givptr);
	    givcol[(*givptr << 1) + 1] = indxq[indx[jlam]];
	    givcol[(*givptr << 1) + 2] = indxq[indx[j]];
	    givnum[(*givptr << 1) + 1] = c;
	    givnum[(*givptr << 1) + 2] = s;
	    if (*icompq == 1) {
		srot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[
			indxq[indx[j]] * q_dim1 + 1], &c__1, &c, &s);
	    }
	    t = d[jlam] * c * c + d[j] * s * s;
	    d[j] = d[jlam] * s * s + d[j] * c * c;
	    d[jlam] = t;
	    --k2;
	    i = 1;
L90:
	    if (k2 + i <= *n) {
		if (d[jlam] < d[indxp[k2 + i]]) {
		    indxp[k2 + i - 1] = indxp[k2 + i];
		    indxp[k2 + i] = jlam;
		    ++i;
		    goto L90;
		} else {
		    indxp[k2 + i - 1] = jlam;
		}
	    } else {
		indxp[k2 + i - 1] = jlam;
	    }
	    jlam = j;
	} else {
	    ++(*k);
	    w[*k] = z[jlam];
	    dlamda[*k] = d[jlam];
	    indxp[*k] = jlam;
	    jlam = j;
	}
    }
    goto L80;
L100:

/*     Record the last eigenvalue. */

    ++(*k);
    w[*k] = z[jlam];
    dlamda[*k] = d[jlam];
    indxp[*k] = jlam;

L110:

/*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA   
       and Q2 respectively.  The eigenvalues/vectors which were not   
       deflated go into the first K slots of DLAMDA and Q2 respectively, 
  
       while those which were deflated go into the last N - K slots. */

    if (*icompq == 0) {
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    jp = indxp[j];
	    dlamda[j] = d[jp];
	    perm[j] = indxq[indx[jp]];
/* L120: */
	}
    } else {
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    jp = indxp[j];
	    dlamda[j] = d[jp];
	    perm[j] = indxq[indx[jp]];
	    scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1]
		    , &c__1);
/* L130: */
	}
    }

/*     The deflated eigenvalues and their corresponding vectors go back   
       into the last N - K slots of D and Q respectively. */

    if (*k < *n) {
	if (*icompq == 0) {
	    i__1 = *n - *k;
	    scopy_(&i__1, &dlamda[*k + 1], &c__1, &d[*k + 1], &c__1);
	} else {
	    i__1 = *n - *k;
	    scopy_(&i__1, &dlamda[*k + 1], &c__1, &d[*k + 1], &c__1);
	    i__1 = *n - *k;
	    slacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*
		    k + 1) * q_dim1 + 1], ldq);
	}
    }

    return 0;

/*     End of SLAED8 */

} /* slaed8_ */
Ejemplo n.º 5
0
/* Subroutine */ int claed8_(integer *k, integer *n, integer *qsiz, complex *
	q, integer *ldq, real *d__, real *rho, integer *cutpnt, real *z__, 
	real *dlamda, complex *q2, integer *ldq2, real *w, integer *indxp, 
	integer *indx, integer *indxq, integer *perm, integer *givptr, 
	integer *givcol, real *givnum, integer *info)
{
    /* System generated locals */
    integer q_dim1, q_offset, q2_dim1, q2_offset, i__1;
    real r__1;

    /* Builtin functions */
    double sqrt(doublereal);

    /* Local variables */
    real c__;
    integer i__, j;
    real s, t;
    integer k2, n1, n2, jp, n1p1;
    real eps, tau, tol;
    integer jlam, imax, jmax;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 
	    ccopy_(integer *, complex *, integer *, complex *, integer *), 
	    csrot_(integer *, complex *, integer *, complex *, integer *, 
	    real *, real *), scopy_(integer *, real *, integer *, real *, 
	    integer *);
    extern doublereal slapy2_(real *, real *), slamch_(char *);
    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex 
	    *, integer *, complex *, integer *), xerbla_(char *, 
	    integer *);
    extern integer isamax_(integer *, real *, integer *);
    extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer 
	    *, integer *, integer *);


/*  -- LAPACK routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CLAED8 merges the two sets of eigenvalues together into a single */
/*  sorted set.  Then it tries to deflate the size of the problem. */
/*  There are two ways in which deflation can occur:  when two or more */
/*  eigenvalues are close together or if there is a tiny element in the */
/*  Z vector.  For each such occurrence the order of the related secular */
/*  equation problem is reduced by one. */

/*  Arguments */
/*  ========= */

/*  K      (output) INTEGER */
/*         Contains the number of non-deflated eigenvalues. */
/*         This is the order of the related secular equation. */

/*  N      (input) INTEGER */
/*         The dimension of the symmetric tridiagonal matrix.  N >= 0. */

/*  QSIZ   (input) INTEGER */
/*         The dimension of the unitary matrix used to reduce */
/*         the dense or band matrix to tridiagonal form. */
/*         QSIZ >= N if ICOMPQ = 1. */

/*  Q      (input/output) COMPLEX array, dimension (LDQ,N) */
/*         On entry, Q contains the eigenvectors of the partially solved */
/*         system which has been previously updated in matrix */
/*         multiplies with other partially solved eigensystems. */
/*         On exit, Q contains the trailing (N-K) updated eigenvectors */
/*         (those which were deflated) in its last N-K columns. */

/*  LDQ    (input) INTEGER */
/*         The leading dimension of the array Q.  LDQ >= max( 1, N ). */

/*  D      (input/output) REAL array, dimension (N) */
/*         On entry, D contains the eigenvalues of the two submatrices to */
/*         be combined.  On exit, D contains the trailing (N-K) updated */
/*         eigenvalues (those which were deflated) sorted into increasing */
/*         order. */

/*  RHO    (input/output) REAL */
/*         Contains the off diagonal element associated with the rank-1 */
/*         cut which originally split the two submatrices which are now */
/*         being recombined. RHO is modified during the computation to */
/*         the value required by SLAED3. */

/*  CUTPNT (input) INTEGER */
/*         Contains the location of the last eigenvalue in the leading */
/*         sub-matrix.  MIN(1,N) <= CUTPNT <= N. */

/*  Z      (input) REAL array, dimension (N) */
/*         On input this vector contains the updating vector (the last */
/*         row of the first sub-eigenvector matrix and the first row of */
/*         the second sub-eigenvector matrix).  The contents of Z are */
/*         destroyed during the updating process. */

/*  DLAMDA (output) REAL array, dimension (N) */
/*         Contains a copy of the first K eigenvalues which will be used */
/*         by SLAED3 to form the secular equation. */

/*  Q2     (output) COMPLEX array, dimension (LDQ2,N) */
/*         If ICOMPQ = 0, Q2 is not referenced.  Otherwise, */
/*         Contains a copy of the first K eigenvectors which will be used */
/*         by SLAED7 in a matrix multiply (SGEMM) to update the new */
/*         eigenvectors. */

/*  LDQ2   (input) INTEGER */
/*         The leading dimension of the array Q2.  LDQ2 >= max( 1, N ). */

/*  W      (output) REAL array, dimension (N) */
/*         This will hold the first k values of the final */
/*         deflation-altered z-vector and will be passed to SLAED3. */

/*  INDXP  (workspace) INTEGER array, dimension (N) */
/*         This will contain the permutation used to place deflated */
/*         values of D at the end of the array. On output INDXP(1:K) */
/*         points to the nondeflated D-values and INDXP(K+1:N) */
/*         points to the deflated eigenvalues. */

/*  INDX   (workspace) INTEGER array, dimension (N) */
/*         This will contain the permutation used to sort the contents of */
/*         D into ascending order. */

/*  INDXQ  (input) INTEGER array, dimension (N) */
/*         This contains the permutation which separately sorts the two */
/*         sub-problems in D into ascending order.  Note that elements in */
/*         the second half of this permutation must first have CUTPNT */
/*         added to their values in order to be accurate. */

/*  PERM   (output) INTEGER array, dimension (N) */
/*         Contains the permutations (from deflation and sorting) to be */
/*         applied to each eigenblock. */

/*  GIVPTR (output) INTEGER */
/*         Contains the number of Givens rotations which took place in */
/*         this subproblem. */

/*  GIVCOL (output) INTEGER array, dimension (2, N) */
/*         Each pair of numbers indicates a pair of columns to take place */
/*         in a Givens rotation. */

/*  GIVNUM (output) REAL array, dimension (2, N) */
/*         Each number indicates the S value to be used in the */
/*         corresponding Givens rotation. */

/*  INFO   (output) INTEGER */
/*          = 0:  successful exit. */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    --d__;
    --z__;
    --dlamda;
    q2_dim1 = *ldq2;
    q2_offset = 1 + q2_dim1;
    q2 -= q2_offset;
    --w;
    --indxp;
    --indx;
    --indxq;
    --perm;
    givcol -= 3;
    givnum -= 3;

    /* Function Body */
    *info = 0;

    if (*n < 0) {
	*info = -2;
    } else if (*qsiz < *n) {
	*info = -3;
    } else if (*ldq < max(1,*n)) {
	*info = -5;
    } else if (*cutpnt < min(1,*n) || *cutpnt > *n) {
	*info = -8;
    } else if (*ldq2 < max(1,*n)) {
	*info = -12;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CLAED8", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

    n1 = *cutpnt;
    n2 = *n - n1;
    n1p1 = n1 + 1;

    if (*rho < 0.f) {
	sscal_(&n2, &c_b3, &z__[n1p1], &c__1);
    }

/*     Normalize z so that norm(z) = 1 */

    t = 1.f / sqrt(2.f);
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	indx[j] = j;
/* L10: */
    }
    sscal_(n, &t, &z__[1], &c__1);
    *rho = (r__1 = *rho * 2.f, dabs(r__1));

/*     Sort the eigenvalues into increasing order */

    i__1 = *n;
    for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) {
	indxq[i__] += *cutpnt;
/* L20: */
    }
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	dlamda[i__] = d__[indxq[i__]];
	w[i__] = z__[indxq[i__]];
/* L30: */
    }
    i__ = 1;
    j = *cutpnt + 1;
    slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]);
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	d__[i__] = dlamda[indx[i__]];
	z__[i__] = w[indx[i__]];
/* L40: */
    }

/*     Calculate the allowable deflation tolerance */

    imax = isamax_(n, &z__[1], &c__1);
    jmax = isamax_(n, &d__[1], &c__1);
    eps = slamch_("Epsilon");
    tol = eps * 8.f * (r__1 = d__[jmax], dabs(r__1));

/*     If the rank-1 modifier is small enough, no more needs to be done */
/*     -- except to reorganize Q so that its columns correspond with the */
/*     elements in D. */

    if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) {
	*k = 0;
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    perm[j] = indxq[indx[j]];
	    ccopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1]
, &c__1);
/* L50: */
	}
	clacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq);
	return 0;
    }

/*     If there are multiple eigenvalues then the problem deflates.  Here */
/*     the number of equal eigenvalues are found.  As each equal */
/*     eigenvalue is found, an elementary reflector is computed to rotate */
/*     the corresponding eigensubspace so that the corresponding */
/*     components of Z are zero in this new basis. */

    *k = 0;
    *givptr = 0;
    k2 = *n + 1;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) {

/*           Deflate due to small z component. */

	    --k2;
	    indxp[k2] = j;
	    if (j == *n) {
		goto L100;
	    }
	} else {
	    jlam = j;
	    goto L70;
	}
/* L60: */
    }
L70:
    ++j;
    if (j > *n) {
	goto L90;
    }
    if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) {

/*        Deflate due to small z component. */

	--k2;
	indxp[k2] = j;
    } else {

/*        Check if eigenvalues are close enough to allow deflation. */

	s = z__[jlam];
	c__ = z__[j];

/*        Find sqrt(a**2+b**2) without overflow or */
/*        destructive underflow. */

	tau = slapy2_(&c__, &s);
	t = d__[j] - d__[jlam];
	c__ /= tau;
	s = -s / tau;
	if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) {

/*           Deflation is possible. */

	    z__[j] = tau;
	    z__[jlam] = 0.f;

/*           Record the appropriate Givens rotation */

	    ++(*givptr);
	    givcol[(*givptr << 1) + 1] = indxq[indx[jlam]];
	    givcol[(*givptr << 1) + 2] = indxq[indx[j]];
	    givnum[(*givptr << 1) + 1] = c__;
	    givnum[(*givptr << 1) + 2] = s;
	    csrot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[indxq[
		    indx[j]] * q_dim1 + 1], &c__1, &c__, &s);
	    t = d__[jlam] * c__ * c__ + d__[j] * s * s;
	    d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__;
	    d__[jlam] = t;
	    --k2;
	    i__ = 1;
L80:
	    if (k2 + i__ <= *n) {
		if (d__[jlam] < d__[indxp[k2 + i__]]) {
		    indxp[k2 + i__ - 1] = indxp[k2 + i__];
		    indxp[k2 + i__] = jlam;
		    ++i__;
		    goto L80;
		} else {
		    indxp[k2 + i__ - 1] = jlam;
		}
	    } else {
		indxp[k2 + i__ - 1] = jlam;
	    }
	    jlam = j;
	} else {
	    ++(*k);
	    w[*k] = z__[jlam];
	    dlamda[*k] = d__[jlam];
	    indxp[*k] = jlam;
	    jlam = j;
	}
    }
    goto L70;
L90:

/*     Record the last eigenvalue. */

    ++(*k);
    w[*k] = z__[jlam];
    dlamda[*k] = d__[jlam];
    indxp[*k] = jlam;

L100:

/*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA */
/*     and Q2 respectively.  The eigenvalues/vectors which were not */
/*     deflated go into the first K slots of DLAMDA and Q2 respectively, */
/*     while those which were deflated go into the last N - K slots. */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	jp = indxp[j];
	dlamda[j] = d__[jp];
	perm[j] = indxq[indx[jp]];
	ccopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1], &
		c__1);
/* L110: */
    }

/*     The deflated eigenvalues and their corresponding vectors go back */
/*     into the last N - K slots of D and Q respectively. */

    if (*k < *n) {
	i__1 = *n - *k;
	scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
	i__1 = *n - *k;
	clacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*k + 
		1) * q_dim1 + 1], ldq);
    }

    return 0;

/*     End of CLAED8 */

} /* claed8_ */
Ejemplo n.º 6
0
 int slasd2_(int *nl, int *nr, int *sqre, int 
	*k, float *d__, float *z__, float *alpha, float *beta, float *u, int *
	ldu, float *vt, int *ldvt, float *dsigma, float *u2, int *ldu2, 
	float *vt2, int *ldvt2, int *idxp, int *idx, int *idxc, 
	 int *idxq, int *coltyp, int *info)
{
    /* System generated locals */
    int u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, 
	    vt2_dim1, vt2_offset, i__1;
    float r__1, r__2;

    /* Local variables */
    float c__;
    int i__, j, m, n;
    float s;
    int k2;
    float z1;
    int ct, jp;
    float eps, tau, tol;
    int psm[4], nlp1, nlp2, idxi, idxj, ctot[4];
    extern  int srot_(int *, float *, int *, float *, 
	    int *, float *, float *);
    int idxjp, jprev;
    extern  int scopy_(int *, float *, int *, float *, 
	    int *);
    extern double slapy2_(float *, float *), slamch_(char *);
    extern  int xerbla_(char *, int *), slamrg_(
	    int *, int *, float *, int *, int *, int *);
    float hlftol;
    extern  int slacpy_(char *, int *, int *, float *, 
	    int *, float *, int *), slaset_(char *, int *, 
	    int *, float *, float *, float *, int *);


/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLASD2 merges the two sets of singular values together into a single */
/*  sorted set.  Then it tries to deflate the size of the problem. */
/*  There are two ways in which deflation can occur:  when two or more */
/*  singular values are close together or if there is a tiny entry in the */
/*  Z vector.  For each such occurrence the order of the related secular */
/*  equation problem is reduced by one. */

/*  SLASD2 is called from SLASD1. */

/*  Arguments */
/*  ========= */

/*  NL     (input) INTEGER */
/*         The row dimension of the upper block.  NL >= 1. */

/*  NR     (input) INTEGER */
/*         The row dimension of the lower block.  NR >= 1. */

/*  SQRE   (input) INTEGER */
/*         = 0: the lower block is an NR-by-NR square matrix. */
/*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */

/*         The bidiagonal matrix has N = NL + NR + 1 rows and */
/*         M = N + SQRE >= N columns. */

/*  K      (output) INTEGER */
/*         Contains the dimension of the non-deflated matrix, */
/*         This is the order of the related secular equation. 1 <= K <=N. */

/*  D      (input/output) REAL array, dimension (N) */
/*         On entry D contains the singular values of the two submatrices */
/*         to be combined.  On exit D contains the trailing (N-K) updated */
/*         singular values (those which were deflated) sorted into */
/*         increasing order. */

/*  Z      (output) REAL array, dimension (N) */
/*         On exit Z contains the updating row vector in the secular */
/*         equation. */

/*  ALPHA  (input) REAL */
/*         Contains the diagonal element associated with the added row. */

/*  BETA   (input) REAL */
/*         Contains the off-diagonal element associated with the added */
/*         row. */

/*  U      (input/output) REAL array, dimension (LDU,N) */
/*         On entry U contains the left singular vectors of two */
/*         submatrices in the two square blocks with corners at (1,1), */
/*         (NL, NL), and (NL+2, NL+2), (N,N). */
/*         On exit U contains the trailing (N-K) updated left singular */
/*         vectors (those which were deflated) in its last N-K columns. */

/*  LDU    (input) INTEGER */
/*         The leading dimension of the array U.  LDU >= N. */

/*  VT     (input/output) REAL array, dimension (LDVT,M) */
/*         On entry VT' contains the right singular vectors of two */
/*         submatrices in the two square blocks with corners at (1,1), */
/*         (NL+1, NL+1), and (NL+2, NL+2), (M,M). */
/*         On exit VT' contains the trailing (N-K) updated right singular */
/*         vectors (those which were deflated) in its last N-K columns. */
/*         In case SQRE =1, the last row of VT spans the right null */
/*         space. */

/*  LDVT   (input) INTEGER */
/*         The leading dimension of the array VT.  LDVT >= M. */

/*  DSIGMA (output) REAL array, dimension (N) */
/*         Contains a copy of the diagonal elements (K-1 singular values */
/*         and one zero) in the secular equation. */

/*  U2     (output) REAL array, dimension (LDU2,N) */
/*         Contains a copy of the first K-1 left singular vectors which */
/*         will be used by SLASD3 in a matrix multiply (SGEMM) to solve */
/*         for the new left singular vectors. U2 is arranged into four */
/*         blocks. The first block contains a column with 1 at NL+1 and */
/*         zero everywhere else; the second block contains non-zero */
/*         entries only at and above NL; the third contains non-zero */
/*         entries only below NL+1; and the fourth is dense. */

/*  LDU2   (input) INTEGER */
/*         The leading dimension of the array U2.  LDU2 >= N. */

/*  VT2    (output) REAL array, dimension (LDVT2,N) */
/*         VT2' contains a copy of the first K right singular vectors */
/*         which will be used by SLASD3 in a matrix multiply (SGEMM) to */
/*         solve for the new right singular vectors. VT2 is arranged into */
/*         three blocks. The first block contains a row that corresponds */
/*         to the special 0 diagonal element in SIGMA; the second block */
/*         contains non-zeros only at and before NL +1; the third block */
/*         contains non-zeros only at and after  NL +2. */

/*  LDVT2  (input) INTEGER */
/*         The leading dimension of the array VT2.  LDVT2 >= M. */

/*  IDXP   (workspace) INTEGER array, dimension (N) */
/*         This will contain the permutation used to place deflated */
/*         values of D at the end of the array. On output IDXP(2:K) */
/*         points to the nondeflated D-values and IDXP(K+1:N) */
/*         points to the deflated singular values. */

/*  IDX    (workspace) INTEGER array, dimension (N) */
/*         This will contain the permutation used to sort the contents of */
/*         D into ascending order. */

/*  IDXC   (output) INTEGER array, dimension (N) */
/*         This will contain the permutation used to arrange the columns */
/*         of the deflated U matrix into three groups:  the first group */
/*         contains non-zero entries only at and above NL, the second */
/*         contains non-zero entries only below NL+2, and the third is */
/*         dense. */

/*  IDXQ   (input/output) INTEGER array, dimension (N) */
/*         This contains the permutation which separately sorts the two */
/*         sub-problems in D into ascending order.  Note that entries in */
/*         the first hlaf of this permutation must first be moved one */
/*         position backward; and entries in the second half */
/*         must first have NL+1 added to their values. */

/*  COLTYP (workspace/output) INTEGER array, dimension (N) */
/*         As workspace, this will contain a label which will indicate */
/*         which of the following types a column in the U2 matrix or a */
/*         row in the VT2 matrix is: */
/*         1 : non-zero in the upper half only */
/*         2 : non-zero in the lower half only */
/*         3 : dense */
/*         4 : deflated */

/*         On exit, it is an array of dimension 4, with COLTYP(I) being */
/*         the dimension of the I-th type columns. */

/*  INFO   (output) INTEGER */
/*          = 0:  successful exit. */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*     Ming Gu and Huan Ren, Computer Science Division, University of */
/*     California at Berkeley, USA */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    --d__;
    --z__;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1;
    u -= u_offset;
    vt_dim1 = *ldvt;
    vt_offset = 1 + vt_dim1;
    vt -= vt_offset;
    --dsigma;
    u2_dim1 = *ldu2;
    u2_offset = 1 + u2_dim1;
    u2 -= u2_offset;
    vt2_dim1 = *ldvt2;
    vt2_offset = 1 + vt2_dim1;
    vt2 -= vt2_offset;
    --idxp;
    --idx;
    --idxc;
    --idxq;
    --coltyp;

    /* Function Body */
    *info = 0;

    if (*nl < 1) {
	*info = -1;
    } else if (*nr < 1) {
	*info = -2;
    } else if (*sqre != 1 && *sqre != 0) {
	*info = -3;
    }

    n = *nl + *nr + 1;
    m = n + *sqre;

    if (*ldu < n) {
	*info = -10;
    } else if (*ldvt < m) {
	*info = -12;
    } else if (*ldu2 < n) {
	*info = -15;
    } else if (*ldvt2 < m) {
	*info = -17;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLASD2", &i__1);
	return 0;
    }

    nlp1 = *nl + 1;
    nlp2 = *nl + 2;

/*     Generate the first part of the vector Z; and move the singular */
/*     values in the first part of D one position backward. */

    z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1];
    z__[1] = z1;
    for (i__ = *nl; i__ >= 1; --i__) {
	z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1];
	d__[i__ + 1] = d__[i__];
	idxq[i__ + 1] = idxq[i__] + 1;
/* L10: */
    }

/*     Generate the second part of the vector Z. */

    i__1 = m;
    for (i__ = nlp2; i__ <= i__1; ++i__) {
	z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1];
/* L20: */
    }

/*     Initialize some reference arrays. */

    i__1 = nlp1;
    for (i__ = 2; i__ <= i__1; ++i__) {
	coltyp[i__] = 1;
/* L30: */
    }
    i__1 = n;
    for (i__ = nlp2; i__ <= i__1; ++i__) {
	coltyp[i__] = 2;
/* L40: */
    }

/*     Sort the singular values into increasing order */

    i__1 = n;
    for (i__ = nlp2; i__ <= i__1; ++i__) {
	idxq[i__] += nlp1;
/* L50: */
    }

/*     DSIGMA, IDXC, IDXC, and the first column of U2 */
/*     are used as storage space. */

    i__1 = n;
    for (i__ = 2; i__ <= i__1; ++i__) {
	dsigma[i__] = d__[idxq[i__]];
	u2[i__ + u2_dim1] = z__[idxq[i__]];
	idxc[i__] = coltyp[idxq[i__]];
/* L60: */
    }

    slamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);

    i__1 = n;
    for (i__ = 2; i__ <= i__1; ++i__) {
	idxi = idx[i__] + 1;
	d__[i__] = dsigma[idxi];
	z__[i__] = u2[idxi + u2_dim1];
	coltyp[i__] = idxc[idxi];
/* L70: */
    }

/*     Calculate the allowable deflation tolerance */

    eps = slamch_("Epsilon");
/* Computing MAX */
    r__1 = ABS(*alpha), r__2 = ABS(*beta);
    tol = MAX(r__1,r__2);
/* Computing MAX */
    r__2 = (r__1 = d__[n], ABS(r__1));
    tol = eps * 8.f * MAX(r__2,tol);

/*     There are 2 kinds of deflation -- first a value in the z-vector */
/*     is small, second two (or more) singular values are very close */
/*     together (their difference is small). */

/*     If the value in the z-vector is small, we simply permute the */
/*     array so that the corresponding singular value is moved to the */
/*     end. */

/*     If two values in the D-vector are close, we perform a two-sided */
/*     rotation designed to make one of the corresponding z-vector */
/*     entries zero, and then permute the array so that the deflated */
/*     singular value is moved to the end. */

/*     If there are multiple singular values then the problem deflates. */
/*     Here the number of equal singular values are found.  As each equal */
/*     singular value is found, an elementary reflector is computed to */
/*     rotate the corresponding singular subspace so that the */
/*     corresponding components of Z are zero in this new basis. */

    *k = 1;
    k2 = n + 1;
    i__1 = n;
    for (j = 2; j <= i__1; ++j) {
	if ((r__1 = z__[j], ABS(r__1)) <= tol) {

/*           Deflate due to small z component. */

	    --k2;
	    idxp[k2] = j;
	    coltyp[j] = 4;
	    if (j == n) {
		goto L120;
	    }
	} else {
	    jprev = j;
	    goto L90;
	}
/* L80: */
    }
L90:
    j = jprev;
L100:
    ++j;
    if (j > n) {
	goto L110;
    }
    if ((r__1 = z__[j], ABS(r__1)) <= tol) {

/*        Deflate due to small z component. */

	--k2;
	idxp[k2] = j;
	coltyp[j] = 4;
    } else {

/*        Check if singular values are close enough to allow deflation. */

	if ((r__1 = d__[j] - d__[jprev], ABS(r__1)) <= tol) {

/*           Deflation is possible. */

	    s = z__[jprev];
	    c__ = z__[j];

/*           Find sqrt(a**2+b**2) without overflow or */
/*           destructive underflow. */

	    tau = slapy2_(&c__, &s);
	    c__ /= tau;
	    s = -s / tau;
	    z__[j] = tau;
	    z__[jprev] = 0.f;

/*           Apply back the Givens rotation to the left and right */
/*           singular vector matrices. */

	    idxjp = idxq[idx[jprev] + 1];
	    idxj = idxq[idx[j] + 1];
	    if (idxjp <= nlp1) {
		--idxjp;
	    }
	    if (idxj <= nlp1) {
		--idxj;
	    }
	    srot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], &
		    c__1, &c__, &s);
	    srot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, &
		    c__, &s);
	    if (coltyp[j] != coltyp[jprev]) {
		coltyp[j] = 3;
	    }
	    coltyp[jprev] = 4;
	    --k2;
	    idxp[k2] = jprev;
	    jprev = j;
	} else {
	    ++(*k);
	    u2[*k + u2_dim1] = z__[jprev];
	    dsigma[*k] = d__[jprev];
	    idxp[*k] = jprev;
	    jprev = j;
	}
    }
    goto L100;
L110:

/*     Record the last singular value. */

    ++(*k);
    u2[*k + u2_dim1] = z__[jprev];
    dsigma[*k] = d__[jprev];
    idxp[*k] = jprev;

L120:

/*     Count up the total number of the various types of columns, then */
/*     form a permutation which positions the four column types into */
/*     four groups of uniform structure (although one or more of these */
/*     groups may be empty). */

    for (j = 1; j <= 4; ++j) {
	ctot[j - 1] = 0;
/* L130: */
    }
    i__1 = n;
    for (j = 2; j <= i__1; ++j) {
	ct = coltyp[j];
	++ctot[ct - 1];
/* L140: */
    }

/*     PSM(*) = Position in SubMatrix (of types 1 through 4) */

    psm[0] = 2;
    psm[1] = ctot[0] + 2;
    psm[2] = psm[1] + ctot[1];
    psm[3] = psm[2] + ctot[2];

/*     Fill out the IDXC array so that the permutation which it induces */
/*     will place all type-1 columns first, all type-2 columns next, */
/*     then all type-3's, and finally all type-4's, starting from the */
/*     second column. This applies similarly to the rows of VT. */

    i__1 = n;
    for (j = 2; j <= i__1; ++j) {
	jp = idxp[j];
	ct = coltyp[jp];
	idxc[psm[ct - 1]] = j;
	++psm[ct - 1];
/* L150: */
    }

/*     Sort the singular values and corresponding singular vectors into */
/*     DSIGMA, U2, and VT2 respectively.  The singular values/vectors */
/*     which were not deflated go into the first K slots of DSIGMA, U2, */
/*     and VT2 respectively, while those which were deflated go into the */
/*     last N - K slots, except that the first column/row will be treated */
/*     separately. */

    i__1 = n;
    for (j = 2; j <= i__1; ++j) {
	jp = idxp[j];
	dsigma[j] = d__[jp];
	idxj = idxq[idx[idxp[idxc[j]]] + 1];
	if (idxj <= nlp1) {
	    --idxj;
	}
	scopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1);
	scopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2);
/* L160: */
    }

/*     Determine DSIGMA(1), DSIGMA(2) and Z(1) */

    dsigma[1] = 0.f;
    hlftol = tol / 2.f;
    if (ABS(dsigma[2]) <= hlftol) {
	dsigma[2] = hlftol;
    }
    if (m > n) {
	z__[1] = slapy2_(&z1, &z__[m]);
	if (z__[1] <= tol) {
	    c__ = 1.f;
	    s = 0.f;
	    z__[1] = tol;
	} else {
	    c__ = z1 / z__[1];
	    s = z__[m] / z__[1];
	}
    } else {
	if (ABS(z1) <= tol) {
	    z__[1] = tol;
	} else {
	    z__[1] = z1;
	}
    }

/*     Move the rest of the updating row to Z. */

    i__1 = *k - 1;
    scopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1);

/*     Determine the first column of U2, the first row of VT2 and the */
/*     last row of VT. */

    slaset_("A", &n, &c__1, &c_b30, &c_b30, &u2[u2_offset], ldu2);
    u2[nlp1 + u2_dim1] = 1.f;
    if (m > n) {
	i__1 = nlp1;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1];
	    vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1];
/* L170: */
	}
	i__1 = m;
	for (i__ = nlp2; i__ <= i__1; ++i__) {
	    vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1];
	    vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1];
/* L180: */
	}
    } else {
	scopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2);
    }
    if (m > n) {
	scopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2);
    }

/*     The deflated singular values and their corresponding vectors go */
/*     into the back of D, U, and V respectively. */

    if (n > *k) {
	i__1 = n - *k;
	scopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
	i__1 = n - *k;
	slacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1)
		 * u_dim1 + 1], ldu);
	i__1 = n - *k;
	slacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 + 
		vt_dim1], ldvt);
    }

/*     Copy CTOT into COLTYP for referencing in SLASD3. */

    for (j = 1; j <= 4; ++j) {
	coltyp[j] = ctot[j - 1];
/* L190: */
    }

    return 0;

/*     End of SLASD2 */

} /* slasd2_ */
Ejemplo n.º 7
0
/* Subroutine */ int slaed7_(integer *icompq, integer *n, integer *qsiz, 
	integer *tlvls, integer *curlvl, integer *curpbm, real *d__, real *q, 
	integer *ldq, integer *indxq, real *rho, integer *cutpnt, real *
	qstore, integer *qptr, integer *prmptr, integer *perm, integer *
	givptr, integer *givcol, real *givnum, real *work, integer *iwork, 
	integer *info)
{
    /* System generated locals */
    integer q_dim1, q_offset, i__1, i__2;

    /* Builtin functions */
    integer pow_ii(integer *, integer *);

    /* Local variables */
    static integer indx, curr, i__, k, indxc;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    static integer indxp, n1, n2;
    extern /* Subroutine */ int slaed8_(integer *, integer *, integer *, 
	    integer *, real *, real *, integer *, integer *, real *, integer *
	    , real *, real *, real *, integer *, real *, integer *, integer *,
	     integer *, real *, integer *, integer *, integer *), slaed9_(
	    integer *, integer *, integer *, integer *, real *, real *, 
	    integer *, real *, real *, real *, real *, integer *, integer *), 
	    slaeda_(integer *, integer *, integer *, integer *, integer *, 
	    integer *, integer *, integer *, real *, real *, integer *, real *
	    , real *, integer *);
    static integer idlmda, is, iw, iz;
    extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_(
	    integer *, integer *, real *, integer *, integer *, integer *);
    static integer coltyp, iq2, ptr, ldq2;


#define givcol_ref(a_1,a_2) givcol[(a_2)*2 + a_1]
#define givnum_ref(a_1,a_2) givnum[(a_2)*2 + a_1]


/*  -- LAPACK routine (instrumented to count operations, version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   

       Common block to return operation count and iteration count   
       ITCNT is unchanged, OPS is only incremented   

    Purpose   
    =======   

    SLAED7 computes the updated eigensystem of a diagonal   
    matrix after modification by a rank-one symmetric matrix. This   
    routine is used only for the eigenproblem which requires all   
    eigenvalues and optionally eigenvectors of a dense symmetric matrix   
    that has been reduced to tridiagonal form.  SLAED1 handles   
    the case in which all eigenvalues and eigenvectors of a symmetric   
    tridiagonal matrix are desired.   

      T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)   

       where Z = Q'u, u is a vector of length N with ones in the   
       CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.   

       The eigenvectors of the original matrix are stored in Q, and the   
       eigenvalues are in D.  The algorithm consists of three stages:   

          The first stage consists of deflating the size of the problem   
          when there are multiple eigenvalues or if there is a zero in   
          the Z vector.  For each such occurence the dimension of the   
          secular equation problem is reduced by one.  This stage is   
          performed by the routine SLAED8.   

          The second stage consists of calculating the updated   
          eigenvalues. This is done by finding the roots of the secular   
          equation via the routine SLAED4 (as called by SLAED9).   
          This routine also calculates the eigenvectors of the current   
          problem.   

          The final stage consists of computing the updated eigenvectors   
          directly using the updated eigenvalues.  The eigenvectors for   
          the current problem are multiplied with the eigenvectors from   
          the overall problem.   

    Arguments   
    =========   

    ICOMPQ  (input) INTEGER   
            = 0:  Compute eigenvalues only.   
            = 1:  Compute eigenvectors of original dense symmetric matrix   
                  also.  On entry, Q contains the orthogonal matrix used   
                  to reduce the original matrix to tridiagonal form.   

    N      (input) INTEGER   
           The dimension of the symmetric tridiagonal matrix.  N >= 0.   

    QSIZ   (input) INTEGER   
           The dimension of the orthogonal matrix used to reduce   
           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.   

    TLVLS  (input) INTEGER   
           The total number of merging levels in the overall divide and   
           conquer tree.   

    CURLVL (input) INTEGER   
           The current level in the overall merge routine,   
           0 <= CURLVL <= TLVLS.   

    CURPBM (input) INTEGER   
           The current problem in the current level in the overall   
           merge routine (counting from upper left to lower right).   

    D      (input/output) REAL array, dimension (N)   
           On entry, the eigenvalues of the rank-1-perturbed matrix.   
           On exit, the eigenvalues of the repaired matrix.   

    Q      (input/output) REAL array, dimension (LDQ, N)   
           On entry, the eigenvectors of the rank-1-perturbed matrix.   
           On exit, the eigenvectors of the repaired tridiagonal matrix.   

    LDQ    (input) INTEGER   
           The leading dimension of the array Q.  LDQ >= max(1,N).   

    INDXQ  (output) INTEGER array, dimension (N)   
           The permutation which will reintegrate the subproblem just   
           solved back into sorted order, i.e., D( INDXQ( I = 1, N ) )   
           will be in ascending order.   

    RHO    (input) REAL   
           The subdiagonal element used to create the rank-1   
           modification.   

    CUTPNT (input) INTEGER   
           Contains the location of the last eigenvalue in the leading   
           sub-matrix.  min(1,N) <= CUTPNT <= N.   

    QSTORE (input/output) REAL array, dimension (N**2+1)   
           Stores eigenvectors of submatrices encountered during   
           divide and conquer, packed together. QPTR points to   
           beginning of the submatrices.   

    QPTR   (input/output) INTEGER array, dimension (N+2)   
           List of indices pointing to beginning of submatrices stored   
           in QSTORE. The submatrices are numbered starting at the   
           bottom left of the divide and conquer tree, from left to   
           right and bottom to top.   

    PRMPTR (input) INTEGER array, dimension (N lg N)   
           Contains a list of pointers which indicate where in PERM a   
           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)   
           indicates the size of the permutation and also the size of   
           the full, non-deflated problem.   

    PERM   (input) INTEGER array, dimension (N lg N)   
           Contains the permutations (from deflation and sorting) to be   
           applied to each eigenblock.   

    GIVPTR (input) INTEGER array, dimension (N lg N)   
           Contains a list of pointers which indicate where in GIVCOL a   
           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)   
           indicates the number of Givens rotations.   

    GIVCOL (input) INTEGER array, dimension (2, N lg N)   
           Each pair of numbers indicates a pair of columns to take place   
           in a Givens rotation.   

    GIVNUM (input) REAL array, dimension (2, N lg N)   
           Each number indicates the S value to be used in the   
           corresponding Givens rotation.   

    WORK   (workspace) REAL array, dimension (3*N+QSIZ*N)   

    IWORK  (workspace) INTEGER array, dimension (4*N)   

    INFO   (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   
            > 0:  if INFO = 1, an eigenvalue did not converge   

    Further Details   
    ===============   

    Based on contributions by   
       Jeff Rutter, Computer Science Division, University of California   
       at Berkeley, USA   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    --d__;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    --indxq;
    --qstore;
    --qptr;
    --prmptr;
    --perm;
    --givptr;
    givcol -= 3;
    givnum -= 3;
    --work;
    --iwork;

    /* Function Body */
    *info = 0;

    if (*icompq < 0 || *icompq > 1) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*icompq == 1 && *qsiz < *n) {
	*info = -4;
    } else if (*ldq < max(1,*n)) {
	*info = -9;
    } else if (min(1,*n) > *cutpnt || *n < *cutpnt) {
	*info = -12;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLAED7", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     The following values are for bookkeeping purposes only.  They are   
       integer pointers which indicate the portion of the workspace   
       used by a particular array in SLAED8 and SLAED9. */

    if (*icompq == 1) {
	ldq2 = *qsiz;
    } else {
	ldq2 = *n;
    }

    iz = 1;
    idlmda = iz + *n;
    iw = idlmda + *n;
    iq2 = iw + *n;
    is = iq2 + *n * ldq2;

    indx = 1;
    indxc = indx + *n;
    coltyp = indxc + *n;
    indxp = coltyp + *n;

/*     Form the z-vector which consists of the last row of Q_1 and the   
       first row of Q_2. */

    ptr = pow_ii(&c__2, tlvls) + 1;
    i__1 = *curlvl - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
	i__2 = *tlvls - i__;
	ptr += pow_ii(&c__2, &i__2);
/* L10: */
    }
    curr = ptr + *curpbm;
    slaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], &
	    givcol[3], &givnum[3], &qstore[1], &qptr[1], &work[iz], &work[iz 
	    + *n], info);

/*     When solving the final problem, we no longer need the stored data,   
       so we will overwrite the data from this level onto the previously   
       used storage space. */

    if (*curlvl == *tlvls) {
	qptr[curr] = 1;
	prmptr[curr] = 1;
	givptr[curr] = 1;
    }

/*     Sort and Deflate eigenvalues. */

    slaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho, 
	    cutpnt, &work[iz], &work[idlmda], &work[iq2], &ldq2, &work[iw], &
	    perm[prmptr[curr]], &givptr[curr + 1], &givcol_ref(1, givptr[curr]
	    ), &givnum_ref(1, givptr[curr]), &iwork[indxp], &iwork[indx], 
	    info);
    prmptr[curr + 1] = prmptr[curr] + *n;
    givptr[curr + 1] += givptr[curr];

/*     Solve Secular Equation. */

    if (k != 0) {
	slaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda], 
		&work[iw], &qstore[qptr[curr]], &k, info);
	if (*info != 0) {
	    goto L30;
	}
	if (*icompq == 1) {
	    latime_1.ops += (real) (*qsiz) * 2 * k * k;
	    sgemm_("N", "N", qsiz, &k, &k, &c_b10, &work[iq2], &ldq2, &qstore[
		    qptr[curr]], &k, &c_b11, &q[q_offset], ldq);
	}
/* Computing 2nd power */
	i__1 = k;
	qptr[curr + 1] = qptr[curr] + i__1 * i__1;

/*     Prepare the INDXQ sorting permutation. */

	n1 = k;
	n2 = *n - k;
	slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
    } else {
	qptr[curr + 1] = qptr[curr];
	i__1 = *n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    indxq[i__] = i__;
/* L20: */
	}
    }

L30:
    return 0;

/*     End of SLAED7 */

} /* slaed7_ */
Ejemplo n.º 8
0
/* Subroutine */ int slaed2_(integer *k, integer *n, real *d, real *q, 
	integer *ldq, integer *indxq, real *rho, integer *cutpnt, real *z, 
	real *dlamda, real *q2, integer *ldq2, integer *indxc, real *w, 
	integer *indxp, integer *indx, integer *coltyp, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, 
  
       Courant Institute, NAG Ltd., and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    SLAED2 merges the two sets of eigenvalues together into a single   
    sorted set.  Then it tries to deflate the size of the problem.   
    There are two ways in which deflation can occur:  when two or more   
    eigenvalues are close together or if there is a tiny entry in the   
    Z vector.  For each such occurrence the order of the related secular 
  
    equation problem is reduced by one.   

    Arguments   
    =========   

    K      (output) INTEGER   
           The number of non-deflated eigenvalues, and the order of the   
           related secular equation. 0 <= K <=N.   

    N      (input) INTEGER   
           The dimension of the symmetric tridiagonal matrix.  N >= 0.   

    D      (input/output) REAL array, dimension (N)   
           On entry, D contains the eigenvalues of the two submatrices to 
  
           be combined.   
           On exit, D contains the trailing (N-K) updated eigenvalues   
           (those which were deflated) sorted into increasing order.   

    Q      (input/output) REAL array, dimension (LDQ, N)   
           On entry, Q contains the eigenvectors of two submatrices in   
           the two square blocks with corners at (1,1), (CUTPNT,CUTPNT)   
           and (CUTPNT+1, CUTPNT+1), (N,N).   
           On exit, Q contains the trailing (N-K) updated eigenvectors   
           (those which were deflated) in its last N-K columns.   

    LDQ    (input) INTEGER   
           The leading dimension of the array Q.  LDQ >= max(1,N).   

    INDXQ  (input/output) INTEGER array, dimension (N)   
           The permutation which separately sorts the two sub-problems   
           in D into ascending order.  Note that elements in the second   
           half of this permutation must first have CUTPNT added to their 
  
           values. Destroyed on exit.   

    RHO    (input/output) REAL   
           On entry, the off-diagonal element associated with the rank-1 
  
           cut which originally split the two submatrices which are now   
           being recombined.   
           On exit, RHO has been modified to the value required by   
           SLAED3.   

    CUTPNT (input) INTEGER   
           The location of the last eigenvalue in the leading sub-matrix. 
  
           min(1,N) <= CUTPNT <= N.   

    Z      (input) REAL array, dimension (N)   
           On entry, Z contains the updating vector (the last   
           row of the first sub-eigenvector matrix and the first row of   
           the second sub-eigenvector matrix).   
           On exit, the contents of Z have been destroyed by the updating 
  
           process.   

    DLAMDA (output) REAL array, dimension (N)   
           A copy of the first K eigenvalues which will be used by   
           SLAED3 to form the secular equation.   

    Q2     (output) REAL array, dimension (LDQ2, N)   
           A copy of the first K eigenvectors which will be used by   
           SLAED3 in a matrix multiply (SGEMM) to solve for the new   
           eigenvectors.   Q2 is arranged into three blocks.  The   
           first block contains non-zero elements only at and above   
           CUTPNT, the second contains non-zero elements only below   
           CUTPNT, and the third is dense.   

    LDQ2   (input) INTEGER   
           The leading dimension of the array Q2.  LDQ2 >= max(1,N).   

    INDXC  (output) INTEGER array, dimension (N)   
           The permutation used to arrange the columns of the deflated   
           Q matrix into three groups:  the first group contains non-zero 
  
           elements only at and above CUTPNT, the second contains   
           non-zero elements only below CUTPNT, and the third is dense.   

    W      (output) REAL array, dimension (N)   
           The first k values of the final deflation-altered z-vector   
           which will be passed to SLAED3.   

    INDXP  (workspace) INTEGER array, dimension (N)   
           The permutation used to place deflated values of D at the end 
  
           of the array.  INDXP(1:K) points to the nondeflated D-values   
           and INDXP(K+1:N) points to the deflated eigenvalues.   

    INDX   (workspace) INTEGER array, dimension (N)   
           The permutation used to sort the contents of D into ascending 
  
           order.   

    COLTYP (workspace/output) INTEGER array, dimension (N)   
           During execution, a label which will indicate which of the   
           following types a column in the Q2 matrix is:   
           1 : non-zero in the upper half only;   
           2 : non-zero in the lower half only;   
           3 : dense;   
           4 : deflated.   
           On exit, COLTYP(i) is the number of columns of type i,   
           for i=1 to 4 only.   

    INFO   (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   

    ===================================================================== 
  


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static real c_b3 = -1.f;
    static integer c__1 = 1;
    
    /* System generated locals */
    integer q_dim1, q_offset, q2_dim1, q2_offset, i__1;
    real r__1, r__2, r__3, r__4;
    /* Builtin functions */
    double sqrt(doublereal);
    /* Local variables */
    static integer jlam, imax, jmax, ctot[4];
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *);
    static real c;
    static integer i, j;
    static real s, t;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static integer k2;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *);
    static integer n1, n2;
    extern doublereal slapy2_(real *, real *);
    static integer ct, jp;
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer isamax_(integer *, real *, integer *);
    extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer 
	    *, integer *, integer *), slacpy_(char *, integer *, integer *, 
	    real *, integer *, real *, integer *);
    static integer n1p1;
    static real eps, tau, tol;
    static integer psm[4];


    --d;
    q_dim1 = *ldq;
    q_offset = q_dim1 + 1;
    q -= q_offset;
    --indxq;
    --z;
    --dlamda;
    q2_dim1 = *ldq2;
    q2_offset = q2_dim1 + 1;
    q2 -= q2_offset;
    --indxc;
    --w;
    --indxp;
    --indx;
    --coltyp;

    /* Function Body */
    *info = 0;

    if (*n < 0) {
	*info = -2;
    } else if (*ldq < max(1,*n)) {
	*info = -5;
    } else if (min(1,*n) > *cutpnt || *n < *cutpnt) {
	*info = -8;
    } else if (*ldq2 < max(1,*n)) {
	*info = -12;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLAED2", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

    n1 = *cutpnt;
    n2 = *n - n1;
    n1p1 = n1 + 1;

    if (*rho < 0.f) {
	sscal_(&n2, &c_b3, &z[n1p1], &c__1);
    }

/*     Normalize z so that norm(z) = 1.  Since z is the concatenation of 
  
       two normalized vectors, norm2(z) = sqrt(2). */

    t = 1.f / sqrt(2.f);
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	indx[j] = j;
/* L10: */
    }
    sscal_(n, &t, &z[1], &c__1);

/*     RHO = ABS( norm(z)**2 * RHO ) */

    *rho = (r__1 = *rho * 2.f, dabs(r__1));

    i__1 = *cutpnt;
    for (i = 1; i <= i__1; ++i) {
	coltyp[i] = 1;
/* L20: */
    }
    i__1 = *n;
    for (i = *cutpnt + 1; i <= i__1; ++i) {
	coltyp[i] = 2;
/* L30: */
    }

/*     Sort the eigenvalues into increasing order */

    i__1 = *n;
    for (i = *cutpnt + 1; i <= i__1; ++i) {
	indxq[i] += *cutpnt;
/* L40: */
    }

/*     re-integrate the deflated parts from the last pass */

    i__1 = *n;
    for (i = 1; i <= i__1; ++i) {
	dlamda[i] = d[indxq[i]];
	w[i] = z[indxq[i]];
	indxc[i] = coltyp[indxq[i]];
/* L50: */
    }
    slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]);
    i__1 = *n;
    for (i = 1; i <= i__1; ++i) {
	d[i] = dlamda[indx[i]];
	z[i] = w[indx[i]];
	coltyp[i] = indxc[indx[i]];
/* L60: */
    }

/*     Calculate the allowable deflation tolerance */

    imax = isamax_(n, &z[1], &c__1);
    jmax = isamax_(n, &d[1], &c__1);
    eps = slamch_("Epsilon");
/* Computing MAX */
    r__3 = (r__1 = d[jmax], dabs(r__1)), r__4 = (r__2 = z[imax], dabs(r__2));
    tol = eps * 8.f * dmax(r__3,r__4);

/*     If the rank-1 modifier is small enough, no more needs to be done   
       except to reorganize Q so that its columns correspond with the   
       elements in D. */

    if (*rho * (r__1 = z[imax], dabs(r__1)) <= tol) {
	*k = 0;
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    scopy_(n, &q[indxq[indx[j]] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 
		    + 1], &c__1);
/* L70: */
	}
	slacpy_("A", n, n, &q2[q2_offset], ldq2, &q[q_offset], ldq);
	goto L180;
    }

/*     If there are multiple eigenvalues then the problem deflates.  Here 
  
       the number of equal eigenvalues are found.  As each equal   
       eigenvalue is found, an elementary reflector is computed to rotate 
  
       the corresponding eigensubspace so that the corresponding   
       components of Z are zero in this new basis. */

    *k = 0;
    k2 = *n + 1;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	if (*rho * (r__1 = z[j], dabs(r__1)) <= tol) {

/*           Deflate due to small z component. */

	    --k2;
	    indxp[k2] = j;
	    coltyp[j] = 4;
	    if (j == *n) {
		goto L120;
	    }
	} else {
	    jlam = j;
	    goto L90;
	}
/* L80: */
    }
L90:
    ++j;
    if (j > *n) {
	goto L110;
    }
    if (*rho * (r__1 = z[j], dabs(r__1)) <= tol) {

/*        Deflate due to small z component. */

	--k2;
	indxp[k2] = j;
	coltyp[j] = 4;
    } else {

/*        Check if eigenvalues are close enough to allow deflation. */

	s = z[jlam];
	c = z[j];

/*        Find sqrt(a**2+b**2) without overflow or   
          destructive underflow. */

	tau = slapy2_(&c, &s);
	t = d[j] - d[jlam];
	c /= tau;
	s = -(doublereal)s / tau;
	if ((r__1 = t * c * s, dabs(r__1)) <= tol) {

/*           Deflation is possible. */

	    z[j] = tau;
	    z[jlam] = 0.f;
	    if (coltyp[j] != coltyp[jlam]) {
		coltyp[j] = 3;
	    }
	    coltyp[jlam] = 4;
	    srot_(n, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[indxq[indx[
		    j]] * q_dim1 + 1], &c__1, &c, &s);
/* Computing 2nd power */
	    r__1 = c;
/* Computing 2nd power */
	    r__2 = s;
	    t = d[jlam] * (r__1 * r__1) + d[j] * (r__2 * r__2);
/* Computing 2nd power */
	    r__1 = s;
/* Computing 2nd power */
	    r__2 = c;
	    d[j] = d[jlam] * (r__1 * r__1) + d[j] * (r__2 * r__2);
	    d[jlam] = t;
	    --k2;
	    i = 1;
L100:
	    if (k2 + i <= *n) {
		if (d[jlam] < d[indxp[k2 + i]]) {
		    indxp[k2 + i - 1] = indxp[k2 + i];
		    indxp[k2 + i] = jlam;
		    ++i;
		    goto L100;
		} else {
		    indxp[k2 + i - 1] = jlam;
		}
	    } else {
		indxp[k2 + i - 1] = jlam;
	    }
	    jlam = j;
	} else {
	    ++(*k);
	    w[*k] = z[jlam];
	    dlamda[*k] = d[jlam];
	    indxp[*k] = jlam;
	    jlam = j;
	}
    }
    goto L90;
L110:

/*     Record the last eigenvalue. */

    ++(*k);
    w[*k] = z[jlam];
    dlamda[*k] = d[jlam];
    indxp[*k] = jlam;

L120:

/*     Count up the total number of the various types of columns, then   
       form a permutation which positions the four column types into   
       four uniform groups (although one or more of these groups may be   
       empty). */

    for (j = 1; j <= 4; ++j) {
	ctot[j - 1] = 0;
/* L130: */
    }
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	ct = coltyp[j];
	++ctot[ct - 1];
/* L140: */
    }

/*     PSM(*) = Position in SubVISMatrix (of types 1 through 4) */

    psm[0] = 1;
    psm[1] = ctot[0] + 1;
    psm[2] = psm[1] + ctot[1];
    psm[3] = psm[2] + ctot[2];

/*     Fill out the INDXC array so that the permutation which it induces 
  
       will place all type-1 columns first, all type-2 columns next,   
       then all type-3's, and finally all type-4's. */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	jp = indxp[j];
	ct = coltyp[jp];
	indxc[psm[ct - 1]] = j;
	++psm[ct - 1];
/* L150: */
    }

/*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA   
       and Q2 respectively.  The eigenvalues/vectors which were not   
       deflated go into the first K slots of DLAMDA and Q2 respectively, 
  
       while those which were deflated go into the last N - K slots. */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	jp = indxp[j];
	dlamda[j] = d[jp];
	scopy_(n, &q[indxq[indx[indxp[indxc[j]]]] * q_dim1 + 1], &c__1, &q2[j 
		* q2_dim1 + 1], &c__1);
/* L160: */
    }

/*     The deflated eigenvalues and their corresponding vectors go back   
       into the last N - K slots of D and Q respectively. */

    i__1 = *n - *k;
    scopy_(&i__1, &dlamda[*k + 1], &c__1, &d[*k + 1], &c__1);
    i__1 = *n - *k;
    slacpy_("A", n, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*k + 1) * 
	    q_dim1 + 1], ldq);

/*     Copy CTOT into COLTYP for referencing in SLAED3. */

    for (j = 1; j <= 4; ++j) {
	coltyp[j] = ctot[j - 1];
/* L170: */
    }

L180:
    return 0;

/*     End of SLAED2 */

} /* slaed2_ */
Ejemplo n.º 9
0
/* Subroutine */ int slaed1_(integer *n, real *d__, real *q, integer *ldq, 
	integer *indxq, real *rho, integer *cutpnt, real *work, integer *
	iwork, integer *info)
{
    /* System generated locals */
    integer q_dim1, q_offset, i__1, i__2;

    /* Local variables */
    integer i__, k, n1, n2, is, iw, iz, iq2, cpp1, indx, indxc, indxp;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *), slaed2_(integer *, integer *, integer *, real *, real 
	    *, integer *, integer *, real *, real *, real *, real *, real *, 
	    integer *, integer *, integer *, integer *, integer *), slaed3_(
	    integer *, integer *, integer *, real *, real *, integer *, real *
, real *, real *, integer *, integer *, real *, real *, integer *)
	    ;
    integer idlmda;
    extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_(
	    integer *, integer *, real *, integer *, integer *, integer *);
    integer coltyp;


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLAED1 computes the updated eigensystem of a diagonal */
/*  matrix after modification by a rank-one symmetric matrix.  This */
/*  routine is used only for the eigenproblem which requires all */
/*  eigenvalues and eigenvectors of a tridiagonal matrix.  SLAED7 handles */
/*  the case in which eigenvalues only or eigenvalues and eigenvectors */
/*  of a full symmetric matrix (which was reduced to tridiagonal form) */
/*  are desired. */

/*    T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out) */

/*     where Z = Q'u, u is a vector of length N with ones in the */
/*     CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. */

/*     The eigenvectors of the original matrix are stored in Q, and the */
/*     eigenvalues are in D.  The algorithm consists of three stages: */

/*        The first stage consists of deflating the size of the problem */
/*        when there are multiple eigenvalues or if there is a zero in */
/*        the Z vector.  For each such occurence the dimension of the */
/*        secular equation problem is reduced by one.  This stage is */
/*        performed by the routine SLAED2. */

/*        The second stage consists of calculating the updated */
/*        eigenvalues. This is done by finding the roots of the secular */
/*        equation via the routine SLAED4 (as called by SLAED3). */
/*        This routine also calculates the eigenvectors of the current */
/*        problem. */

/*        The final stage consists of computing the updated eigenvectors */
/*        directly using the updated eigenvalues.  The eigenvectors for */
/*        the current problem are multiplied with the eigenvectors from */
/*        the overall problem. */

/*  Arguments */
/*  ========= */

/*  N      (input) INTEGER */
/*         The dimension of the symmetric tridiagonal matrix.  N >= 0. */

/*  D      (input/output) REAL array, dimension (N) */
/*         On entry, the eigenvalues of the rank-1-perturbed matrix. */
/*         On exit, the eigenvalues of the repaired matrix. */

/*  Q      (input/output) REAL array, dimension (LDQ,N) */
/*         On entry, the eigenvectors of the rank-1-perturbed matrix. */
/*         On exit, the eigenvectors of the repaired tridiagonal matrix. */

/*  LDQ    (input) INTEGER */
/*         The leading dimension of the array Q.  LDQ >= max(1,N). */

/*  INDXQ  (input/output) INTEGER array, dimension (N) */
/*         On entry, the permutation which separately sorts the two */
/*         subproblems in D into ascending order. */
/*         On exit, the permutation which will reintegrate the */
/*         subproblems back into sorted order, */
/*         i.e. D( INDXQ( I = 1, N ) ) will be in ascending order. */

/*  RHO    (input) REAL */
/*         The subdiagonal entry used to create the rank-1 modification. */

/*  CUTPNT (input) INTEGER */
/*         The location of the last eigenvalue in the leading sub-matrix. */
/*         min(1,N) <= CUTPNT <= N/2. */

/*  WORK   (workspace) REAL array, dimension (4*N + N**2) */

/*  IWORK  (workspace) INTEGER array, dimension (4*N) */

/*  INFO   (output) INTEGER */
/*          = 0:  successful exit. */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
/*          > 0:  if INFO = 1, an eigenvalue did not converge */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*     Jeff Rutter, Computer Science Division, University of California */
/*     at Berkeley, USA */
/*  Modified by Francoise Tisseur, University of Tennessee. */

/*  ===================================================================== */

/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    --d__;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    --indxq;
    --work;
    --iwork;

    /* Function Body */
    *info = 0;

    if (*n < 0) {
	*info = -1;
    } else if (*ldq < max(1,*n)) {
	*info = -4;
    } else /* if(complicated condition) */ {
/* Computing MIN */
	i__1 = 1, i__2 = *n / 2;
	if (min(i__1,i__2) > *cutpnt || *n / 2 < *cutpnt) {
	    *info = -7;
	}
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLAED1", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     The following values are integer pointers which indicate */
/*     the portion of the workspace */
/*     used by a particular array in SLAED2 and SLAED3. */

    iz = 1;
    idlmda = iz + *n;
    iw = idlmda + *n;
    iq2 = iw + *n;

    indx = 1;
    indxc = indx + *n;
    coltyp = indxc + *n;
    indxp = coltyp + *n;


/*     Form the z-vector which consists of the last row of Q_1 and the */
/*     first row of Q_2. */

    scopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1);
    cpp1 = *cutpnt + 1;
    i__1 = *n - *cutpnt;
    scopy_(&i__1, &q[cpp1 + cpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1);

/*     Deflate eigenvalues. */

    slaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[
	    iz], &work[idlmda], &work[iw], &work[iq2], &iwork[indx], &iwork[
	    indxc], &iwork[indxp], &iwork[coltyp], info);

    if (*info != 0) {
	goto L20;
    }

/*     Solve Secular Equation. */

    if (k != 0) {
	is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt + (iwork[coltyp + 
		1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2;
	slaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda], 
		 &work[iq2], &iwork[indxc], &iwork[coltyp], &work[iw], &work[
		is], info);
	if (*info != 0) {
	    goto L20;
	}

/*     Prepare the INDXQ sorting permutation. */

	n1 = k;
	n2 = *n - k;
	slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
    } else {
	i__1 = *n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    indxq[i__] = i__;
/* L10: */
	}
    }

L20:
    return 0;

/*     End of SLAED1 */

} /* slaed1_ */
Ejemplo n.º 10
0
/* Subroutine */ int slasd6_(integer *icompq, integer *nl, integer *nr, 
	integer *sqre, real *d__, real *vf, real *vl, real *alpha, real *beta, 
	 integer *idxq, integer *perm, integer *givptr, integer *givcol, 
	integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real *
	difl, real *difr, real *z__, integer *k, real *c__, real *s, real *
	work, integer *iwork, integer *info)
{
    /* System generated locals */
    integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, 
	    poles_dim1, poles_offset, i__1;
    real r__1, r__2;

    /* Local variables */
    integer i__, m, n, n1, n2, iw, idx, idxc, idxp, ivfw, ivlw;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *), slasd7_(integer *, integer *, integer *, integer *, 
	    integer *, real *, real *, real *, real *, real *, real *, real *, 
	     real *, real *, real *, integer *, integer *, integer *, integer 
	    *, integer *, integer *, integer *, real *, integer *, real *, 
	    real *, integer *), slasd8_(integer *, integer *, real *, real *, 
	    real *, real *, real *, real *, integer *, real *, real *, 
	    integer *);
    integer isigma;
    extern /* Subroutine */ int xerbla_(char *, integer *), slascl_(
	    char *, integer *, integer *, real *, real *, integer *, integer *
, real *, integer *, integer *), slamrg_(integer *, 
	    integer *, real *, integer *, integer *, integer *);
    real orgnrm;


/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLASD6 computes the SVD of an updated upper bidiagonal matrix B */
/*  obtained by merging two smaller ones by appending a row. This */
/*  routine is used only for the problem which requires all singular */
/*  values and optionally singular vector matrices in factored form. */
/*  B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. */
/*  A related subroutine, SLASD1, handles the case in which all singular */
/*  values and singular vectors of the bidiagonal matrix are desired. */

/*  SLASD6 computes the SVD as follows: */

/*                ( D1(in)  0    0     0 ) */
/*    B = U(in) * (   Z1'   a   Z2'    b ) * VT(in) */
/*                (   0     0   D2(in) 0 ) */

/*      = U(out) * ( D(out) 0) * VT(out) */

/*  where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M */
/*  with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros */
/*  elsewhere; and the entry b is empty if SQRE = 0. */

/*  The singular values of B can be computed using D1, D2, the first */
/*  components of all the right singular vectors of the lower block, and */
/*  the last components of all the right singular vectors of the upper */
/*  block. These components are stored and updated in VF and VL, */
/*  respectively, in SLASD6. Hence U and VT are not explicitly */
/*  referenced. */

/*  The singular values are stored in D. The algorithm consists of two */
/*  stages: */

/*        The first stage consists of deflating the size of the problem */
/*        when there are multiple singular values or if there is a zero */
/*        in the Z vector. For each such occurence the dimension of the */
/*        secular equation problem is reduced by one. This stage is */
/*        performed by the routine SLASD7. */

/*        The second stage consists of calculating the updated */
/*        singular values. This is done by finding the roots of the */
/*        secular equation via the routine SLASD4 (as called by SLASD8). */
/*        This routine also updates VF and VL and computes the distances */
/*        between the updated singular values and the old singular */
/*        values. */

/*  SLASD6 is called from SLASDA. */

/*  Arguments */
/*  ========= */

/*  ICOMPQ (input) INTEGER */
/*         Specifies whether singular vectors are to be computed in */
/*         factored form: */
/*         = 0: Compute singular values only. */
/*         = 1: Compute singular vectors in factored form as well. */

/*  NL     (input) INTEGER */
/*         The row dimension of the upper block.  NL >= 1. */

/*  NR     (input) INTEGER */
/*         The row dimension of the lower block.  NR >= 1. */

/*  SQRE   (input) INTEGER */
/*         = 0: the lower block is an NR-by-NR square matrix. */
/*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */

/*         The bidiagonal matrix has row dimension N = NL + NR + 1, */
/*         and column dimension M = N + SQRE. */

/*  D      (input/output) REAL array, dimension (NL+NR+1). */
/*         On entry D(1:NL,1:NL) contains the singular values of the */
/*         upper block, and D(NL+2:N) contains the singular values */
/*         of the lower block. On exit D(1:N) contains the singular */
/*         values of the modified matrix. */

/*  VF     (input/output) REAL array, dimension (M) */
/*         On entry, VF(1:NL+1) contains the first components of all */
/*         right singular vectors of the upper block; and VF(NL+2:M) */
/*         contains the first components of all right singular vectors */
/*         of the lower block. On exit, VF contains the first components */
/*         of all right singular vectors of the bidiagonal matrix. */

/*  VL     (input/output) REAL array, dimension (M) */
/*         On entry, VL(1:NL+1) contains the  last components of all */
/*         right singular vectors of the upper block; and VL(NL+2:M) */
/*         contains the last components of all right singular vectors of */
/*         the lower block. On exit, VL contains the last components of */
/*         all right singular vectors of the bidiagonal matrix. */

/*  ALPHA  (input/output) REAL */
/*         Contains the diagonal element associated with the added row. */

/*  BETA   (input/output) REAL */
/*         Contains the off-diagonal element associated with the added */
/*         row. */

/*  IDXQ   (output) INTEGER array, dimension (N) */
/*         This contains the permutation which will reintegrate the */
/*         subproblem just solved back into sorted order, i.e. */
/*         D( IDXQ( I = 1, N ) ) will be in ascending order. */

/*  PERM   (output) INTEGER array, dimension ( N ) */
/*         The permutations (from deflation and sorting) to be applied */
/*         to each block. Not referenced if ICOMPQ = 0. */

/*  GIVPTR (output) INTEGER */
/*         The number of Givens rotations which took place in this */
/*         subproblem. Not referenced if ICOMPQ = 0. */

/*  GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) */
/*         Each pair of numbers indicates a pair of columns to take place */
/*         in a Givens rotation. Not referenced if ICOMPQ = 0. */

/*  LDGCOL (input) INTEGER */
/*         leading dimension of GIVCOL, must be at least N. */

/*  GIVNUM (output) REAL array, dimension ( LDGNUM, 2 ) */
/*         Each number indicates the C or S value to be used in the */
/*         corresponding Givens rotation. Not referenced if ICOMPQ = 0. */

/*  LDGNUM (input) INTEGER */
/*         The leading dimension of GIVNUM and POLES, must be at least N. */

/*  POLES  (output) REAL array, dimension ( LDGNUM, 2 ) */
/*         On exit, POLES(1,*) is an array containing the new singular */
/*         values obtained from solving the secular equation, and */
/*         POLES(2,*) is an array containing the poles in the secular */
/*         equation. Not referenced if ICOMPQ = 0. */

/*  DIFL   (output) REAL array, dimension ( N ) */
/*         On exit, DIFL(I) is the distance between I-th updated */
/*         (undeflated) singular value and the I-th (undeflated) old */
/*         singular value. */

/*  DIFR   (output) REAL array, */
/*                  dimension ( LDGNUM, 2 ) if ICOMPQ = 1 and */
/*                  dimension ( N ) if ICOMPQ = 0. */
/*         On exit, DIFR(I, 1) is the distance between I-th updated */
/*         (undeflated) singular value and the I+1-th (undeflated) old */
/*         singular value. */

/*         If ICOMPQ = 1, DIFR(1:K,2) is an array containing the */
/*         normalizing factors for the right singular vector matrix. */

/*         See SLASD8 for details on DIFL and DIFR. */

/*  Z      (output) REAL array, dimension ( M ) */
/*         The first elements of this array contain the components */
/*         of the deflation-adjusted updating row vector. */

/*  K      (output) INTEGER */
/*         Contains the dimension of the non-deflated matrix, */
/*         This is the order of the related secular equation. 1 <= K <=N. */

/*  C      (output) REAL */
/*         C contains garbage if SQRE =0 and the C-value of a Givens */
/*         rotation related to the right null space if SQRE = 1. */

/*  S      (output) REAL */
/*         S contains garbage if SQRE =0 and the S-value of a Givens */
/*         rotation related to the right null space if SQRE = 1. */

/*  WORK   (workspace) REAL array, dimension ( 4 * M ) */

/*  IWORK  (workspace) INTEGER array, dimension ( 3 * N ) */

/*  INFO   (output) INTEGER */
/*          = 0:  successful exit. */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
/*          > 0:  if INFO = 1, an singular value did not converge */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*     Ming Gu and Huan Ren, Computer Science Division, University of */
/*     California at Berkeley, USA */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    --d__;
    --vf;
    --vl;
    --idxq;
    --perm;
    givcol_dim1 = *ldgcol;
    givcol_offset = 1 + givcol_dim1;
    givcol -= givcol_offset;
    poles_dim1 = *ldgnum;
    poles_offset = 1 + poles_dim1;
    poles -= poles_offset;
    givnum_dim1 = *ldgnum;
    givnum_offset = 1 + givnum_dim1;
    givnum -= givnum_offset;
    --difl;
    --difr;
    --z__;
    --work;
    --iwork;

    /* Function Body */
    *info = 0;
    n = *nl + *nr + 1;
    m = n + *sqre;

    if (*icompq < 0 || *icompq > 1) {
	*info = -1;
    } else if (*nl < 1) {
	*info = -2;
    } else if (*nr < 1) {
	*info = -3;
    } else if (*sqre < 0 || *sqre > 1) {
	*info = -4;
    } else if (*ldgcol < n) {
	*info = -14;
    } else if (*ldgnum < n) {
	*info = -16;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLASD6", &i__1);
	return 0;
    }

/*     The following values are for bookkeeping purposes only.  They are */
/*     integer pointers which indicate the portion of the workspace */
/*     used by a particular array in SLASD7 and SLASD8. */

    isigma = 1;
    iw = isigma + n;
    ivfw = iw + m;
    ivlw = ivfw + m;

    idx = 1;
    idxc = idx + n;
    idxp = idxc + n;

/*     Scale. */

/* Computing MAX */
    r__1 = dabs(*alpha), r__2 = dabs(*beta);
    orgnrm = dmax(r__1,r__2);
    d__[*nl + 1] = 0.f;
    i__1 = n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	if ((r__1 = d__[i__], dabs(r__1)) > orgnrm) {
	    orgnrm = (r__1 = d__[i__], dabs(r__1));
	}
/* L10: */
    }
    slascl_("G", &c__0, &c__0, &orgnrm, &c_b7, &n, &c__1, &d__[1], &n, info);
    *alpha /= orgnrm;
    *beta /= orgnrm;

/*     Sort and Deflate singular values. */

    slasd7_(icompq, nl, nr, sqre, k, &d__[1], &z__[1], &work[iw], &vf[1], &
	    work[ivfw], &vl[1], &work[ivlw], alpha, beta, &work[isigma], &
	    iwork[idx], &iwork[idxp], &idxq[1], &perm[1], givptr, &givcol[
	    givcol_offset], ldgcol, &givnum[givnum_offset], ldgnum, c__, s, 
	    info);

/*     Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. */

    slasd8_(icompq, k, &d__[1], &z__[1], &vf[1], &vl[1], &difl[1], &difr[1], 
	    ldgnum, &work[isigma], &work[iw], info);

/*     Save the poles if ICOMPQ = 1. */

    if (*icompq == 1) {
	scopy_(k, &d__[1], &c__1, &poles[poles_dim1 + 1], &c__1);
	scopy_(k, &work[isigma], &c__1, &poles[(poles_dim1 << 1) + 1], &c__1);
    }

/*     Unscale. */

    slascl_("G", &c__0, &c__0, &c_b7, &orgnrm, &n, &c__1, &d__[1], &n, info);

/*     Prepare the IDXQ sorting permutation. */

    n1 = *k;
    n2 = n - *k;
    slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]);

    return 0;

/*     End of SLASD6 */

} /* slasd6_ */
Ejemplo n.º 11
0
/* Subroutine */ int slasd1_(integer *nl, integer *nr, integer *sqre, real *
	d__, real *alpha, real *beta, real *u, integer *ldu, real *vt, 
	integer *ldvt, integer *idxq, integer *iwork, real *work, integer *
	info)
{
    /* System generated locals */
    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1;
    real r__1, r__2;

    /* Local variables */
    static integer idxc, idxp, ldvt2, i__, k, m, n, n1, n2;
    extern /* Subroutine */ int slasd2_(integer *, integer *, integer *, 
	    integer *, real *, real *, real *, real *, real *, integer *, 
	    real *, integer *, real *, real *, integer *, real *, integer *, 
	    integer *, integer *, integer *, integer *, integer *, integer *),
	     slasd3_(integer *, integer *, integer *, integer *, real *, real 
	    *, integer *, real *, real *, integer *, real *, integer *, real *
	    , integer *, real *, integer *, integer *, integer *, real *, 
	    integer *);
    static integer iq, iz, isigma;
    extern /* Subroutine */ int xerbla_(char *, integer *), slascl_(
	    char *, integer *, integer *, real *, real *, integer *, integer *
	    , real *, integer *, integer *), slamrg_(integer *, 
	    integer *, real *, integer *, integer *, integer *);
    static real orgnrm;
    static integer coltyp, iu2, ldq, idx, ldu2, ivt2;


/*  -- LAPACK auxiliary routine (instrumented to count ops, version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    SLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B,   
    where N = NL + NR + 1 and M = N + SQRE. SLASD1 is called from SLASD0.   

    A related subroutine SLASD7 handles the case in which the singular   
    values (and the singular vectors in factored form) are desired.   

    SLASD1 computes the SVD as follows:   

                  ( D1(in)  0    0     0 )   
      B = U(in) * (   Z1'   a   Z2'    b ) * VT(in)   
                  (   0     0   D2(in) 0 )   

        = U(out) * ( D(out) 0) * VT(out)   

    where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M   
    with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros   
    elsewhere; and the entry b is empty if SQRE = 0.   

    The left singular vectors of the original matrix are stored in U, and   
    the transpose of the right singular vectors are stored in VT, and the   
    singular values are in D.  The algorithm consists of three stages:   

       The first stage consists of deflating the size of the problem   
       when there are multiple singular values or when there are zeros in   
       the Z vector.  For each such occurence the dimension of the   
       secular equation problem is reduced by one.  This stage is   
       performed by the routine SLASD2.   

       The second stage consists of calculating the updated   
       singular values. This is done by finding the square roots of the   
       roots of the secular equation via the routine SLASD4 (as called   
       by SLASD3). This routine also calculates the singular vectors of   
       the current problem.   

       The final stage consists of computing the updated singular vectors   
       directly using the updated singular values.  The singular vectors   
       for the current problem are multiplied with the singular vectors   
       from the overall problem.   

    Arguments   
    =========   

    NL     (input) INTEGER   
           The row dimension of the upper block.  NL >= 1.   

    NR     (input) INTEGER   
           The row dimension of the lower block.  NR >= 1.   

    SQRE   (input) INTEGER   
           = 0: the lower block is an NR-by-NR square matrix.   
           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.   

           The bidiagonal matrix has row dimension N = NL + NR + 1,   
           and column dimension M = N + SQRE.   

    D      (input/output) REAL array,   
                          dimension (N = NL+NR+1).   
           On entry D(1:NL,1:NL) contains the singular values of the   
           upper block; and D(NL+2:N) contains the singular values of   
           the lower block. On exit D(1:N) contains the singular values   
           of the modified matrix.   

    ALPHA  (input) REAL   
           Contains the diagonal element associated with the added row.   

    BETA   (input) REAL   
           Contains the off-diagonal element associated with the added   
           row.   

    U      (input/output) REAL array, dimension(LDU,N)   
           On entry U(1:NL, 1:NL) contains the left singular vectors of   
           the upper block; U(NL+2:N, NL+2:N) contains the left singular   
           vectors of the lower block. On exit U contains the left   
           singular vectors of the bidiagonal matrix.   

    LDU    (input) INTEGER   
           The leading dimension of the array U.  LDU >= max( 1, N ).   

    VT     (input/output) REAL array, dimension(LDVT,M)   
           where M = N + SQRE.   
           On entry VT(1:NL+1, 1:NL+1)' contains the right singular   
           vectors of the upper block; VT(NL+2:M, NL+2:M)' contains   
           the right singular vectors of the lower block. On exit   
           VT' contains the right singular vectors of the   
           bidiagonal matrix.   

    LDVT   (input) INTEGER   
           The leading dimension of the array VT.  LDVT >= max( 1, M ).   

    IDXQ  (output) INTEGER array, dimension(N)   
           This contains the permutation which will reintegrate the   
           subproblem just solved back into sorted order, i.e.   
           D( IDXQ( I = 1, N ) ) will be in ascending order.   

    IWORK  (workspace) INTEGER array, dimension( 4 * N )   

    WORK   (workspace) REAL array, dimension( 3*M**2 + 2*M )   

    INFO   (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   
            > 0:  if INFO = 1, an singular value did not converge   

    Further Details   
    ===============   

    Based on contributions by   
       Ming Gu and Huan Ren, Computer Science Division, University of   
       California at Berkeley, USA   

    =====================================================================   



       Test the input parameters.   

       Parameter adjustments */
    --d__;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1 * 1;
    u -= u_offset;
    vt_dim1 = *ldvt;
    vt_offset = 1 + vt_dim1 * 1;
    vt -= vt_offset;
    --idxq;
    --iwork;
    --work;

    /* Function Body */
    *info = 0;

    if (*nl < 1) {
	*info = -1;
    } else if (*nr < 1) {
	*info = -2;
    } else if (*sqre < 0 || *sqre > 1) {
	*info = -3;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLASD1", &i__1);
	return 0;
    }

    n = *nl + *nr + 1;
    m = n + *sqre;

/*     The following values are for bookkeeping purposes only.  They are   
       integer pointers which indicate the portion of the workspace   
       used by a particular array in SLASD2 and SLASD3. */

    ldu2 = n;
    ldvt2 = m;

    iz = 1;
    isigma = iz + m;
    iu2 = isigma + n;
    ivt2 = iu2 + ldu2 * n;
    iq = ivt2 + ldvt2 * m;

    idx = 1;
    idxc = idx + n;
    coltyp = idxc + n;
    idxp = coltyp + n;

/*     Scale.   

   Computing MAX */
    r__1 = dabs(*alpha), r__2 = dabs(*beta);
    orgnrm = dmax(r__1,r__2);
    d__[*nl + 1] = 0.f;
    i__1 = n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	if ((r__1 = d__[i__], dabs(r__1)) > orgnrm) {
	    orgnrm = (r__1 = d__[i__], dabs(r__1));
	}
/* L10: */
    }
    latime_1.ops += (real) (n + 2);
    slascl_("G", &c__0, &c__0, &orgnrm, &c_b7, &n, &c__1, &d__[1], &n, info);
    *alpha /= orgnrm;
    *beta /= orgnrm;

/*     Deflate singular values. */

    slasd2_(nl, nr, sqre, &k, &d__[1], &work[iz], alpha, beta, &u[u_offset], 
	    ldu, &vt[vt_offset], ldvt, &work[isigma], &work[iu2], &ldu2, &
	    work[ivt2], &ldvt2, &iwork[idxp], &iwork[idx], &iwork[idxc], &
	    idxq[1], &iwork[coltyp], info);

/*     Solve Secular Equation and update singular vectors. */

    ldq = k;
    slasd3_(nl, nr, sqre, &k, &d__[1], &work[iq], &ldq, &work[isigma], &u[
	    u_offset], ldu, &work[iu2], &ldu2, &vt[vt_offset], ldvt, &work[
	    ivt2], &ldvt2, &iwork[idxc], &iwork[coltyp], &work[iz], info);
    if (*info != 0) {
	return 0;
    }

/*     Unscale. */

    latime_1.ops += (real) n;
    slascl_("G", &c__0, &c__0, &c_b7, &orgnrm, &n, &c__1, &d__[1], &n, info);

/*     Prepare the IDXQ sorting permutation. */

    n1 = k;
    n2 = n - k;
    slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]);

    return 0;

/*     End of SLASD1 */

} /* slasd1_ */
Ejemplo n.º 12
0
/* Subroutine */ int slasd7_(integer *icompq, integer *nl, integer *nr, 
	integer *sqre, integer *k, real *d__, real *z__, real *zw, real *vf, 
	real *vfw, real *vl, real *vlw, real *alpha, real *beta, real *dsigma, 
	 integer *idx, integer *idxp, integer *idxq, integer *perm, integer *
	givptr, integer *givcol, integer *ldgcol, real *givnum, integer *
	ldgnum, real *c__, real *s, integer *info)
{
    /* System generated locals */
    integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, i__1;
    real r__1, r__2;

    /* Local variables */
    integer i__, j, m, n, k2;
    real z1;
    integer jp;
    real eps, tau, tol;
    integer nlp1, nlp2, idxi, idxj;
    integer idxjp, jprev;
    real hlftol;

/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     November 2006 */

/*  Purpose */
/*  ======= */

/*  SLASD7 merges the two sets of singular values together into a single */
/*  sorted set. Then it tries to deflate the size of the problem. There */
/*  are two ways in which deflation can occur:  when two or more singular */
/*  values are close together or if there is a tiny entry in the Z */
/*  vector. For each such occurrence the order of the related */
/*  secular equation problem is reduced by one. */

/*  SLASD7 is called from SLASD6. */

/*  Arguments */
/*  ========= */

/*  ICOMPQ  (input) INTEGER */
/*          Specifies whether singular vectors are to be computed */
/*          in compact form, as follows: */
/*          = 0: Compute singular values only. */
/*          = 1: Compute singular vectors of upper */
/*               bidiagonal matrix in compact form. */

/*  NL     (input) INTEGER */
/*         The row dimension of the upper block. NL >= 1. */

/*  NR     (input) INTEGER */
/*         The row dimension of the lower block. NR >= 1. */

/*  SQRE   (input) INTEGER */
/*         = 0: the lower block is an NR-by-NR square matrix. */
/*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */

/*         The bidiagonal matrix has */
/*         N = NL + NR + 1 rows and */
/*         M = N + SQRE >= N columns. */

/*  K      (output) INTEGER */
/*         Contains the dimension of the non-deflated matrix, this is */
/*         the order of the related secular equation. 1 <= K <=N. */

/*  D      (input/output) REAL array, dimension ( N ) */
/*         On entry D contains the singular values of the two submatrices */
/*         to be combined. On exit D contains the trailing (N-K) updated */
/*         singular values (those which were deflated) sorted into */
/*         increasing order. */

/*  Z      (output) REAL array, dimension ( M ) */
/*         On exit Z contains the updating row vector in the secular */
/*         equation. */

/*  ZW     (workspace) REAL array, dimension ( M ) */
/*         Workspace for Z. */

/*  VF     (input/output) REAL array, dimension ( M ) */
/*         On entry, VF(1:NL+1) contains the first components of all */
/*         right singular vectors of the upper block; and VF(NL+2:M) */
/*         contains the first components of all right singular vectors */
/*         of the lower block. On exit, VF contains the first components */
/*         of all right singular vectors of the bidiagonal matrix. */

/*  VFW    (workspace) REAL array, dimension ( M ) */
/*         Workspace for VF. */

/*  VL     (input/output) REAL array, dimension ( M ) */
/*         On entry, VL(1:NL+1) contains the  last components of all */
/*         right singular vectors of the upper block; and VL(NL+2:M) */
/*         contains the last components of all right singular vectors */
/*         of the lower block. On exit, VL contains the last components */
/*         of all right singular vectors of the bidiagonal matrix. */

/*  VLW    (workspace) REAL array, dimension ( M ) */
/*         Workspace for VL. */

/*  ALPHA  (input) REAL */
/*         Contains the diagonal element associated with the added row. */

/*  BETA   (input) REAL */
/*         Contains the off-diagonal element associated with the added */
/*         row. */

/*  DSIGMA (output) REAL array, dimension ( N ) */
/*         Contains a copy of the diagonal elements (K-1 singular values */
/*         and one zero) in the secular equation. */

/*  IDX    (workspace) INTEGER array, dimension ( N ) */
/*         This will contain the permutation used to sort the contents of */
/*         D into ascending order. */

/*  IDXP   (workspace) INTEGER array, dimension ( N ) */
/*         This will contain the permutation used to place deflated */
/*         values of D at the end of the array. On output IDXP(2:K) */
/*         points to the nondeflated D-values and IDXP(K+1:N) */
/*         points to the deflated singular values. */

/*  IDXQ   (input) INTEGER array, dimension ( N ) */
/*         This contains the permutation which separately sorts the two */
/*         sub-problems in D into ascending order.  Note that entries in */
/*         the first half of this permutation must first be moved one */
/*         position backward; and entries in the second half */
/*         must first have NL+1 added to their values. */

/*  PERM   (output) INTEGER array, dimension ( N ) */
/*         The permutations (from deflation and sorting) to be applied */
/*         to each singular block. Not referenced if ICOMPQ = 0. */

/*  GIVPTR (output) INTEGER */
/*         The number of Givens rotations which took place in this */
/*         subproblem. Not referenced if ICOMPQ = 0. */

/*  GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) */
/*         Each pair of numbers indicates a pair of columns to take place */
/*         in a Givens rotation. Not referenced if ICOMPQ = 0. */

/*  LDGCOL (input) INTEGER */
/*         The leading dimension of GIVCOL, must be at least N. */

/*  GIVNUM (output) REAL array, dimension ( LDGNUM, 2 ) */
/*         Each number indicates the C or S value to be used in the */
/*         corresponding Givens rotation. Not referenced if ICOMPQ = 0. */

/*  LDGNUM (input) INTEGER */
/*         The leading dimension of GIVNUM, must be at least N. */

/*  C      (output) REAL */
/*         C contains garbage if SQRE =0 and the C-value of a Givens */
/*         rotation related to the right null space if SQRE = 1. */

/*  S      (output) REAL */
/*         S contains garbage if SQRE =0 and the S-value of a Givens */
/*         rotation related to the right null space if SQRE = 1. */

/*  INFO   (output) INTEGER */
/*         = 0:  successful exit. */
/*         < 0:  if INFO = -i, the i-th argument had an illegal value. */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*     Ming Gu and Huan Ren, Computer Science Division, University of */
/*     California at Berkeley, USA */

/*  ===================================================================== */

/*     Test the input parameters. */

    /* Parameter adjustments */
    --d__;
    --z__;
    --zw;
    --vf;
    --vfw;
    --vl;
    --vlw;
    --dsigma;
    --idx;
    --idxp;
    --idxq;
    --perm;
    givcol_dim1 = *ldgcol;
    givcol_offset = 1 + givcol_dim1;
    givcol -= givcol_offset;
    givnum_dim1 = *ldgnum;
    givnum_offset = 1 + givnum_dim1;
    givnum -= givnum_offset;

    /* Function Body */
    *info = 0;
    n = *nl + *nr + 1;
    m = n + *sqre;

    if (*icompq < 0 || *icompq > 1) {
	*info = -1;
    } else if (*nl < 1) {
	*info = -2;
    } else if (*nr < 1) {
	*info = -3;
    } else if (*sqre < 0 || *sqre > 1) {
	*info = -4;
    } else if (*ldgcol < n) {
	*info = -22;
    } else if (*ldgnum < n) {
	*info = -24;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLASD7", &i__1);
	return 0;
    }

    nlp1 = *nl + 1;
    nlp2 = *nl + 2;
    if (*icompq == 1) {
	*givptr = 0;
    }

/*     Generate the first part of the vector Z and move the singular */
/*     values in the first part of D one position backward. */

    z1 = *alpha * vl[nlp1];
    vl[nlp1] = 0.f;
    tau = vf[nlp1];
    for (i__ = *nl; i__ >= 1; --i__) {
	z__[i__ + 1] = *alpha * vl[i__];
	vl[i__] = 0.f;
	vf[i__ + 1] = vf[i__];
	d__[i__ + 1] = d__[i__];
	idxq[i__ + 1] = idxq[i__] + 1;
    }
    vf[1] = tau;

/*     Generate the second part of the vector Z. */

    i__1 = m;
    for (i__ = nlp2; i__ <= i__1; ++i__) {
	z__[i__] = *beta * vf[i__];
	vf[i__] = 0.f;
    }

/*     Sort the singular values into increasing order */

    i__1 = n;
    for (i__ = nlp2; i__ <= i__1; ++i__) {
	idxq[i__] += nlp1;
    }

/*     DSIGMA, IDXC, IDXC, and ZW are used as storage space. */

    i__1 = n;
    for (i__ = 2; i__ <= i__1; ++i__) {
	dsigma[i__] = d__[idxq[i__]];
	zw[i__] = z__[idxq[i__]];
	vfw[i__] = vf[idxq[i__]];
	vlw[i__] = vl[idxq[i__]];
    }

    slamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);

    i__1 = n;
    for (i__ = 2; i__ <= i__1; ++i__) {
	idxi = idx[i__] + 1;
	d__[i__] = dsigma[idxi];
	z__[i__] = zw[idxi];
	vf[i__] = vfw[idxi];
	vl[i__] = vlw[idxi];
    }

/*     Calculate the allowable deflation tolerence */

    eps = slamch_("Epsilon");
/* Computing MAX */
    r__1 = dabs(*alpha), r__2 = dabs(*beta);
    tol = dmax(r__1,r__2);
/* Computing MAX */
    r__2 = (r__1 = d__[n], dabs(r__1));
    tol = eps * 64.f * dmax(r__2,tol);

/*     There are 2 kinds of deflation -- first a value in the z-vector */
/*     is small, second two (or more) singular values are very close */
/*     together (their difference is small). */

/*     If the value in the z-vector is small, we simply permute the */
/*     array so that the corresponding singular value is moved to the */
/*     end. */

/*     If two values in the D-vector are close, we perform a two-sided */
/*     rotation designed to make one of the corresponding z-vector */
/*     entries zero, and then permute the array so that the deflated */
/*     singular value is moved to the end. */

/*     If there are multiple singular values then the problem deflates. */
/*     Here the number of equal singular values are found.  As each equal */
/*     singular value is found, an elementary reflector is computed to */
/*     rotate the corresponding singular subspace so that the */
/*     corresponding components of Z are zero in this new basis. */

    *k = 1;
    k2 = n + 1;
    i__1 = n;
    for (j = 2; j <= i__1; ++j) {
	if ((r__1 = z__[j], dabs(r__1)) <= tol) {

/*           Deflate due to small z component. */

	    --k2;
	    idxp[k2] = j;
	    if (j == n) {
		goto L100;
	    }
	} else {
	    jprev = j;
	    goto L70;
	}
    }
L70:
    j = jprev;
L80:
    ++j;
    if (j > n) {
	goto L90;
    }
    if ((r__1 = z__[j], dabs(r__1)) <= tol) {

/*        Deflate due to small z component. */

	--k2;
	idxp[k2] = j;
    } else {

/*        Check if singular values are close enough to allow deflation. */

	if ((r__1 = d__[j] - d__[jprev], dabs(r__1)) <= tol) {

/*           Deflation is possible. */

	    *s = z__[jprev];
	    *c__ = z__[j];

/*           Find sqrt(a**2+b**2) without overflow or */
/*           destructive underflow. */

	    tau = slapy2_(c__, s);
	    z__[j] = tau;
	    z__[jprev] = 0.f;
	    *c__ /= tau;
	    *s = -(*s) / tau;

/*           Record the appropriate Givens rotation */

	    if (*icompq == 1) {
		++(*givptr);
		idxjp = idxq[idx[jprev] + 1];
		idxj = idxq[idx[j] + 1];
		if (idxjp <= nlp1) {
		    --idxjp;
		}
		if (idxj <= nlp1) {
		    --idxj;
		}
		givcol[*givptr + (givcol_dim1 << 1)] = idxjp;
		givcol[*givptr + givcol_dim1] = idxj;
		givnum[*givptr + (givnum_dim1 << 1)] = *c__;
		givnum[*givptr + givnum_dim1] = *s;
	    }
	    srot_(&c__1, &vf[jprev], &c__1, &vf[j], &c__1, c__, s);
	    srot_(&c__1, &vl[jprev], &c__1, &vl[j], &c__1, c__, s);
	    --k2;
	    idxp[k2] = jprev;
	    jprev = j;
	} else {
	    ++(*k);
	    zw[*k] = z__[jprev];
	    dsigma[*k] = d__[jprev];
	    idxp[*k] = jprev;
	    jprev = j;
	}
    }
    goto L80;
L90:

/*     Record the last singular value. */

    ++(*k);
    zw[*k] = z__[jprev];
    dsigma[*k] = d__[jprev];
    idxp[*k] = jprev;

L100:

/*     Sort the singular values into DSIGMA. The singular values which */
/*     were not deflated go into the first K slots of DSIGMA, except */
/*     that DSIGMA(1) is treated separately. */

    i__1 = n;
    for (j = 2; j <= i__1; ++j) {
	jp = idxp[j];
	dsigma[j] = d__[jp];
	vfw[j] = vf[jp];
	vlw[j] = vl[jp];
    }
    if (*icompq == 1) {
	i__1 = n;
	for (j = 2; j <= i__1; ++j) {
	    jp = idxp[j];
	    perm[j] = idxq[idx[jp] + 1];
	    if (perm[j] <= nlp1) {
		--perm[j];
	    }
	}
    }

/*     The deflated singular values go back into the last N - K slots of */
/*     D. */

    i__1 = n - *k;
    scopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);

/*     Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and */
/*     VL(M). */

    dsigma[1] = 0.f;
    hlftol = tol / 2.f;
    if (dabs(dsigma[2]) <= hlftol) {
	dsigma[2] = hlftol;
    }
    if (m > n) {
	z__[1] = slapy2_(&z1, &z__[m]);
	if (z__[1] <= tol) {
	    *c__ = 1.f;
	    *s = 0.f;
	    z__[1] = tol;
	} else {
	    *c__ = z1 / z__[1];
	    *s = -z__[m] / z__[1];
	}
	srot_(&c__1, &vf[m], &c__1, &vf[1], &c__1, c__, s);
	srot_(&c__1, &vl[m], &c__1, &vl[1], &c__1, c__, s);
    } else {
	if (dabs(z1) <= tol) {
	    z__[1] = tol;
	} else {
	    z__[1] = z1;
	}
    }

/*     Restore Z, VF, and VL. */

    i__1 = *k - 1;
    scopy_(&i__1, &zw[2], &c__1, &z__[2], &c__1);
    i__1 = n - 1;
    scopy_(&i__1, &vfw[2], &c__1, &vf[2], &c__1);
    i__1 = n - 1;
    scopy_(&i__1, &vlw[2], &c__1, &vl[2], &c__1);

    return 0;

/*     End of SLASD7 */

} /* slasd7_ */