Esempio n. 1
0
int toScalarF(int code, KFVEC(x), FVEC(r)) {
    REQUIRES(rn==1,BAD_SIZE);
    DEBUGMSG("toScalarF");
    float res;
    integer one = 1;
    integer n = xn;
    switch(code) {
        case 0: { res = snrm2_(&n,xp,&one); break; }
        case 1: { res = sasum_(&n,xp,&one);  break; }
        case 2: { res = vector_max_index_f(V(x));  break; }
        case 3: { res = vector_max_f(V(x));  break; }
        case 4: { res = vector_min_index_f(V(x)); break; }
        case 5: { res = vector_min_f(V(x)); break; }
        default: ERROR(BAD_CODE);
    }
    rp[0] = res;
    OK
}
Esempio n. 2
0
/* Subroutine */ int slasd8_(integer *icompq, integer *k, real *d__, real *
	z__, real *vf, real *vl, real *difl, real *difr, integer *lddifr, 
	real *dsigma, real *work, integer *info)
{
    /* System generated locals */
    integer difr_dim1, difr_offset, i__1, i__2;
    real r__1, r__2;

    /* Builtin functions */
    double sqrt(doublereal), r_sign(real *, real *);

    /* Local variables */
    integer i__, j;
    real dj, rho;
    integer iwk1, iwk2, iwk3;
    real temp;
    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
    integer iwk2i, iwk3i;
    extern doublereal snrm2_(integer *, real *, integer *);
    real diflj, difrj, dsigj;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *);
    extern doublereal slamc3_(real *, real *);
    extern /* Subroutine */ int slasd4_(integer *, integer *, real *, real *, 
	    real *, real *, real *, real *, integer *), xerbla_(char *, 
	    integer *);
    real dsigjp;
    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
	    real *, integer *, integer *, real *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *, 
	    real *, integer *);


/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     October 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLASD8 finds the square roots of the roots of the secular equation, */
/*  as defined by the values in DSIGMA and Z. It makes the appropriate */
/*  calls to SLASD4, and stores, for each  element in D, the distance */
/*  to its two nearest poles (elements in DSIGMA). It also updates */
/*  the arrays VF and VL, the first and last components of all the */
/*  right singular vectors of the original bidiagonal matrix. */

/*  SLASD8 is called from SLASD6. */

/*  Arguments */
/*  ========= */

/*  ICOMPQ  (input) INTEGER */
/*          Specifies whether singular vectors are to be computed in */
/*          factored form in the calling routine: */
/*          = 0: Compute singular values only. */
/*          = 1: Compute singular vectors in factored form as well. */

/*  K       (input) INTEGER */
/*          The number of terms in the rational function to be solved */
/*          by SLASD4.  K >= 1. */

/*  D       (output) REAL array, dimension ( K ) */
/*          On output, D contains the updated singular values. */

/*  Z       (input/output) REAL array, dimension ( K ) */
/*          On entry, the first K elements of this array contain the */
/*          components of the deflation-adjusted updating row vector. */
/*          On exit, Z is updated. */

/*  VF      (input/output) REAL array, dimension ( K ) */
/*          On entry, VF contains  information passed through DBEDE8. */
/*          On exit, VF contains the first K components of the first */
/*          components of all right singular vectors of the bidiagonal */
/*          matrix. */

/*  VL      (input/output) REAL array, dimension ( K ) */
/*          On entry, VL contains  information passed through DBEDE8. */
/*          On exit, VL contains the first K components of the last */
/*          components of all right singular vectors of the bidiagonal */
/*          matrix. */

/*  DIFL    (output) REAL array, dimension ( K ) */
/*          On exit, DIFL(I) = D(I) - DSIGMA(I). */

/*  DIFR    (output) REAL array, */
/*                   dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and */
/*                   dimension ( K ) if ICOMPQ = 0. */
/*          On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not */
/*          defined and will not be referenced. */

/*          If ICOMPQ = 1, DIFR(1:K,2) is an array containing the */
/*          normalizing factors for the right singular vector matrix. */

/*  LDDIFR  (input) INTEGER */
/*          The leading dimension of DIFR, must be at least K. */

/*  DSIGMA  (input/output) REAL array, dimension ( K ) */
/*          On entry, the first K elements of this array contain the old */
/*          roots of the deflated updating problem.  These are the poles */
/*          of the secular equation. */
/*          On exit, the elements of DSIGMA may be very slightly altered */
/*          in value. */

/*  WORK    (workspace) REAL array, dimension at least 3 * K */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit. */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
/*          > 0:  if INFO = 1, an singular value did not converge */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*     Ming Gu and Huan Ren, Computer Science Division, University of */
/*     California at Berkeley, USA */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    --d__;
    --z__;
    --vf;
    --vl;
    --difl;
    difr_dim1 = *lddifr;
    difr_offset = 1 + difr_dim1;
    difr -= difr_offset;
    --dsigma;
    --work;

    /* Function Body */
    *info = 0;

    if (*icompq < 0 || *icompq > 1) {
	*info = -1;
    } else if (*k < 1) {
	*info = -2;
    } else if (*lddifr < *k) {
	*info = -9;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLASD8", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*k == 1) {
	d__[1] = dabs(z__[1]);
	difl[1] = d__[1];
	if (*icompq == 1) {
	    difl[2] = 1.f;
	    difr[(difr_dim1 << 1) + 1] = 1.f;
	}
	return 0;
    }

/*     Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can */
/*     be computed with high relative accuracy (barring over/underflow). */
/*     This is a problem on machines without a guard digit in */
/*     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */
/*     The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), */
/*     which on any of these machines zeros out the bottommost */
/*     bit of DSIGMA(I) if it is 1; this makes the subsequent */
/*     subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation */
/*     occurs. On binary machines with a guard digit (almost all */
/*     machines) it does not change DSIGMA(I) at all. On hexadecimal */
/*     and decimal machines with a guard digit, it slightly */
/*     changes the bottommost bits of DSIGMA(I). It does not account */
/*     for hexadecimal or decimal machines without guard digits */
/*     (we know of none). We use a subroutine call to compute */
/*     2*DLAMBDA(I) to prevent optimizing compilers from eliminating */
/*     this code. */

    i__1 = *k;
    for (i__ = 1; i__ <= i__1; ++i__) {
	dsigma[i__] = slamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
/* L10: */
    }

/*     Book keeping. */

    iwk1 = 1;
    iwk2 = iwk1 + *k;
    iwk3 = iwk2 + *k;
    iwk2i = iwk2 - 1;
    iwk3i = iwk3 - 1;

/*     Normalize Z. */

    rho = snrm2_(k, &z__[1], &c__1);
    slascl_("G", &c__0, &c__0, &rho, &c_b8, k, &c__1, &z__[1], k, info);
    rho *= rho;

/*     Initialize WORK(IWK3). */

    slaset_("A", k, &c__1, &c_b8, &c_b8, &work[iwk3], k);

/*     Compute the updated singular values, the arrays DIFL, DIFR, */
/*     and the updated Z. */

    i__1 = *k;
    for (j = 1; j <= i__1; ++j) {
	slasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[
		iwk2], info);

/*        If the root finder fails, the computation is terminated. */

	if (*info != 0) {
	    return 0;
	}
	work[iwk3i + j] = work[iwk3i + j] * work[j] * work[iwk2i + j];
	difl[j] = -work[j];
	difr[j + difr_dim1] = -work[j + 1];
	i__2 = j - 1;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + 
		    i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[
		    j]);
/* L20: */
	}
	i__2 = *k;
	for (i__ = j + 1; i__ <= i__2; ++i__) {
	    work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + 
		    i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[
		    j]);
/* L30: */
	}
/* L40: */
    }

/*     Compute updated Z. */

    i__1 = *k;
    for (i__ = 1; i__ <= i__1; ++i__) {
	r__2 = sqrt((r__1 = work[iwk3i + i__], dabs(r__1)));
	z__[i__] = r_sign(&r__2, &z__[i__]);
/* L50: */
    }

/*     Update VF and VL. */

    i__1 = *k;
    for (j = 1; j <= i__1; ++j) {
	diflj = difl[j];
	dj = d__[j];
	dsigj = -dsigma[j];
	if (j < *k) {
	    difrj = -difr[j + difr_dim1];
	    dsigjp = -dsigma[j + 1];
	}
	work[j] = -z__[j] / diflj / (dsigma[j] + dj);
	i__2 = j - 1;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    work[i__] = z__[i__] / (slamc3_(&dsigma[i__], &dsigj) - diflj) / (
		    dsigma[i__] + dj);
/* L60: */
	}
	i__2 = *k;
	for (i__ = j + 1; i__ <= i__2; ++i__) {
	    work[i__] = z__[i__] / (slamc3_(&dsigma[i__], &dsigjp) + difrj) / 
		    (dsigma[i__] + dj);
/* L70: */
	}
	temp = snrm2_(k, &work[1], &c__1);
	work[iwk2i + j] = sdot_(k, &work[1], &c__1, &vf[1], &c__1) / temp;
	work[iwk3i + j] = sdot_(k, &work[1], &c__1, &vl[1], &c__1) / temp;
	if (*icompq == 1) {
	    difr[j + (difr_dim1 << 1)] = temp;
	}
/* L80: */
    }

    scopy_(k, &work[iwk2], &c__1, &vf[1], &c__1);
    scopy_(k, &work[iwk3], &c__1, &vl[1], &c__1);

    return 0;

/*     End of SLASD8 */

} /* slasd8_ */
Esempio n. 3
0
/* Subroutine */ int slaed3_(integer *k, integer *n, integer *n1, real *d__, 
	real *q, integer *ldq, real *rho, real *dlamda, real *q2, integer *
	indx, integer *ctot, real *w, real *s, integer *info)
{
    /* System generated locals */
    integer q_dim1, q_offset, i__1, i__2;
    real r__1;

    /* Builtin functions */
    double sqrt(doublereal), r_sign(real *, real *);

    /* Local variables */
    integer i__, j, n2, n12, ii, n23, iq2;
    real temp;
    extern doublereal snrm2_(integer *, real *, integer *);
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *), scopy_(integer *, real *, 
	    integer *, real *, integer *), slaed4_(integer *, integer *, real 
	    *, real *, real *, real *, real *, integer *);
    extern doublereal slamc3_(real *, real *);
    extern /* Subroutine */ int xerbla_(char *, integer *), slacpy_(
	    char *, integer *, integer *, real *, integer *, real *, integer *
), slaset_(char *, integer *, integer *, real *, real *, 
	    real *, integer *);


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLAED3 finds the roots of the secular equation, as defined by the */
/*  values in D, W, and RHO, between 1 and K.  It makes the */
/*  appropriate calls to SLAED4 and then updates the eigenvectors by */
/*  multiplying the matrix of eigenvectors of the pair of eigensystems */
/*  being combined by the matrix of eigenvectors of the K-by-K system */
/*  which is solved here. */

/*  This code makes very mild assumptions about floating point */
/*  arithmetic. It will work on machines with a guard digit in */
/*  add/subtract, or on those binary machines without guard digits */
/*  which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */
/*  It could conceivably fail on hexadecimal or decimal machines */
/*  without guard digits, but we know of none. */

/*  Arguments */
/*  ========= */

/*  K       (input) INTEGER */
/*          The number of terms in the rational function to be solved by */
/*          SLAED4.  K >= 0. */

/*  N       (input) INTEGER */
/*          The number of rows and columns in the Q matrix. */
/*          N >= K (deflation may result in N>K). */

/*  N1      (input) INTEGER */
/*          The location of the last eigenvalue in the leading submatrix. */
/*          min(1,N) <= N1 <= N/2. */

/*  D       (output) REAL array, dimension (N) */
/*          D(I) contains the updated eigenvalues for */
/*          1 <= I <= K. */

/*  Q       (output) REAL array, dimension (LDQ,N) */
/*          Initially the first K columns are used as workspace. */
/*          On output the columns 1 to K contain */
/*          the updated eigenvectors. */

/*  LDQ     (input) INTEGER */
/*          The leading dimension of the array Q.  LDQ >= max(1,N). */

/*  RHO     (input) REAL */
/*          The value of the parameter in the rank one update equation. */
/*          RHO >= 0 required. */

/*  DLAMDA  (input/output) REAL array, dimension (K) */
/*          The first K elements of this array contain the old roots */
/*          of the deflated updating problem.  These are the poles */
/*          of the secular equation. May be changed on output by */
/*          having lowest order bit set to zero on Cray X-MP, Cray Y-MP, */
/*          Cray-2, or Cray C-90, as described above. */

/*  Q2      (input) REAL array, dimension (LDQ2, N) */
/*          The first K columns of this matrix contain the non-deflated */
/*          eigenvectors for the split problem. */

/*  INDX    (input) INTEGER array, dimension (N) */
/*          The permutation used to arrange the columns of the deflated */
/*          Q matrix into three groups (see SLAED2). */
/*          The rows of the eigenvectors found by SLAED4 must be likewise */
/*          permuted before the matrix multiply can take place. */

/*  CTOT    (input) INTEGER array, dimension (4) */
/*          A count of the total number of the various types of columns */
/*          in Q, as described in INDX.  The fourth column type is any */
/*          column which has been deflated. */

/*  W       (input/output) REAL array, dimension (K) */
/*          The first K elements of this array contain the components */
/*          of the deflation-adjusted updating vector. Destroyed on */
/*          output. */

/*  S       (workspace) REAL array, dimension (N1 + 1)*K */
/*          Will contain the eigenvectors of the repaired matrix which */
/*          will be multiplied by the previously accumulated eigenvectors */
/*          to update the system. */

/*  LDS     (input) INTEGER */
/*          The leading dimension of S.  LDS >= max(1,K). */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit. */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
/*          > 0:  if INFO = 1, an eigenvalue did not converge */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*     Jeff Rutter, Computer Science Division, University of California */
/*     at Berkeley, USA */
/*  Modified by Francoise Tisseur, University of Tennessee. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    --d__;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    --dlamda;
    --q2;
    --indx;
    --ctot;
    --w;
    --s;

    /* Function Body */
    *info = 0;

    if (*k < 0) {
	*info = -1;
    } else if (*n < *k) {
	*info = -2;
    } else if (*ldq < max(1,*n)) {
	*info = -6;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLAED3", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*k == 0) {
	return 0;
    }

/*     Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can */
/*     be computed with high relative accuracy (barring over/underflow). */
/*     This is a problem on machines without a guard digit in */
/*     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */
/*     The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), */
/*     which on any of these machines zeros out the bottommost */
/*     bit of DLAMDA(I) if it is 1; this makes the subsequent */
/*     subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation */
/*     occurs. On binary machines with a guard digit (almost all */
/*     machines) it does not change DLAMDA(I) at all. On hexadecimal */
/*     and decimal machines with a guard digit, it slightly */
/*     changes the bottommost bits of DLAMDA(I). It does not account */
/*     for hexadecimal or decimal machines without guard digits */
/*     (we know of none). We use a subroutine call to compute */
/*     2*DLAMBDA(I) to prevent optimizing compilers from eliminating */
/*     this code. */

    i__1 = *k;
    for (i__ = 1; i__ <= i__1; ++i__) {
	dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
/* L10: */
    }

    i__1 = *k;
    for (j = 1; j <= i__1; ++j) {
	slaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], 
		info);

/*        If the zero finder fails, the computation is terminated. */

	if (*info != 0) {
	    goto L120;
	}
/* L20: */
    }

    if (*k == 1) {
	goto L110;
    }
    if (*k == 2) {
	i__1 = *k;
	for (j = 1; j <= i__1; ++j) {
	    w[1] = q[j * q_dim1 + 1];
	    w[2] = q[j * q_dim1 + 2];
	    ii = indx[1];
	    q[j * q_dim1 + 1] = w[ii];
	    ii = indx[2];
	    q[j * q_dim1 + 2] = w[ii];
/* L30: */
	}
	goto L110;
    }

/*     Compute updated W. */

    scopy_(k, &w[1], &c__1, &s[1], &c__1);

/*     Initialize W(I) = Q(I,I) */

    i__1 = *ldq + 1;
    scopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
    i__1 = *k;
    for (j = 1; j <= i__1; ++j) {
	i__2 = j - 1;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
/* L40: */
	}
	i__2 = *k;
	for (i__ = j + 1; i__ <= i__2; ++i__) {
	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
/* L50: */
	}
/* L60: */
    }
    i__1 = *k;
    for (i__ = 1; i__ <= i__1; ++i__) {
	r__1 = sqrt(-w[i__]);
	w[i__] = r_sign(&r__1, &s[i__]);
/* L70: */
    }

/*     Compute eigenvectors of the modified rank-1 modification. */

    i__1 = *k;
    for (j = 1; j <= i__1; ++j) {
	i__2 = *k;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    s[i__] = w[i__] / q[i__ + j * q_dim1];
/* L80: */
	}
	temp = snrm2_(k, &s[1], &c__1);
	i__2 = *k;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    ii = indx[i__];
	    q[i__ + j * q_dim1] = s[ii] / temp;
/* L90: */
	}
/* L100: */
    }

/*     Compute the updated eigenvectors. */

L110:

    n2 = *n - *n1;
    n12 = ctot[1] + ctot[2];
    n23 = ctot[2] + ctot[3];

    slacpy_("A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23);
    iq2 = *n1 * n12 + 1;
    if (n23 != 0) {
	sgemm_("N", "N", &n2, k, &n23, &c_b22, &q2[iq2], &n2, &s[1], &n23, &
		c_b23, &q[*n1 + 1 + q_dim1], ldq);
    } else {
	slaset_("A", &n2, k, &c_b23, &c_b23, &q[*n1 + 1 + q_dim1], ldq);
    }

    slacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12);
    if (n12 != 0) {
	sgemm_("N", "N", n1, k, &n12, &c_b22, &q2[1], n1, &s[1], &n12, &c_b23, 
		 &q[q_offset], ldq);
    } else {
	slaset_("A", n1, k, &c_b23, &c_b23, &q[q_dim1 + 1], ldq);
    }


L120:
    return 0;

/*     End of SLAED3 */

} /* slaed3_ */
Esempio n. 4
0
doublereal sqrt12_(integer *m, integer *n, real *a, integer *lda, real *s, 
	real *work, integer *lwork)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    real ret_val;

    /* Local variables */
    integer i__, j, mn, iscl, info;
    real anrm;
    extern doublereal snrm2_(integer *, real *, integer *), sasum_(integer *, 
	    real *, integer *);
    real dummy[1];
    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, 
	    real *, integer *), sgebd2_(integer *, integer *, real *, integer 
	    *, real *, real *, real *, real *, real *, integer *), slabad_(
	    real *, real *);
    extern doublereal slamch_(char *), slange_(char *, integer *, 
	    integer *, real *, integer *, real *);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    real bignum;
    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
	    real *, integer *, integer *, real *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *, 
	    real *, integer *), sbdsqr_(char *, integer *, integer *, 
	    integer *, integer *, real *, real *, real *, integer *, real *, 
	    integer *, real *, integer *, real *, integer *);
    real smlnum, nrmsvl;


/*  -- LAPACK test routine (version 3.1.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     January 2007 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SQRT12 computes the singular values `svlues' of the upper trapezoid */
/*  of A(1:M,1:N) and returns the ratio */

/*       || s - svlues||/(||svlues||*eps*max(M,N)) */

/*  Arguments */
/*  ========= */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix A. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix A. */

/*  A       (input) REAL array, dimension (LDA,N) */
/*          The M-by-N matrix A. Only the upper trapezoid is referenced. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A. */

/*  S       (input) REAL array, dimension (min(M,N)) */
/*          The singular values of the matrix A. */

/*  WORK    (workspace) REAL array, dimension (LWORK) */

/*  LWORK   (input) INTEGER */
/*          The length of the array WORK. LWORK >= max(M*N + 4*min(M,N) + */
/*          max(M,N), M*N+2*MIN( M, N )+4*N). */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --s;
    --work;

    /* Function Body */
    ret_val = 0.f;

/*     Test that enough workspace is supplied */

/* Computing MAX */
    i__1 = *m * *n + (min(*m,*n) << 2) + max(*m,*n), i__2 = *m * *n + (min(*m,
	    *n) << 1) + (*n << 2);
    if (*lwork < max(i__1,i__2)) {
	xerbla_("SQRT12", &c__7);
	return ret_val;
    }

/*     Quick return if possible */

    mn = min(*m,*n);
    if ((real) mn <= 0.f) {
	return ret_val;
    }

    nrmsvl = snrm2_(&mn, &s[1], &c__1);

/*     Copy upper triangle of A into work */

    slaset_("Full", m, n, &c_b6, &c_b6, &work[1], m);
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	i__2 = min(j,*m);
	for (i__ = 1; i__ <= i__2; ++i__) {
	    work[(j - 1) * *m + i__] = a[i__ + j * a_dim1];
/* L10: */
	}
/* L20: */
    }

/*     Get machine parameters */

    smlnum = slamch_("S") / slamch_("P");
    bignum = 1.f / smlnum;
    slabad_(&smlnum, &bignum);

/*     Scale work if max entry outside range [SMLNUM,BIGNUM] */

    anrm = slange_("M", m, n, &work[1], m, dummy);
    iscl = 0;
    if (anrm > 0.f && anrm < smlnum) {

/*        Scale matrix norm up to SMLNUM */

	slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &work[1], m, &info);
	iscl = 1;
    } else if (anrm > bignum) {

/*        Scale matrix norm down to BIGNUM */

	slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &work[1], m, &info);
	iscl = 1;
    }

    if (anrm != 0.f) {

/*        Compute SVD of work */

	sgebd2_(m, n, &work[1], m, &work[*m * *n + 1], &work[*m * *n + mn + 1]
, &work[*m * *n + (mn << 1) + 1], &work[*m * *n + mn * 3 + 1], 
		 &work[*m * *n + (mn << 2) + 1], &info);
	sbdsqr_("Upper", &mn, &c__0, &c__0, &c__0, &work[*m * *n + 1], &work[*
		m * *n + mn + 1], dummy, &mn, dummy, &c__1, dummy, &mn, &work[
		*m * *n + (mn << 1) + 1], &info);

	if (iscl == 1) {
	    if (anrm > bignum) {
		slascl_("G", &c__0, &c__0, &bignum, &anrm, &mn, &c__1, &work[*
			m * *n + 1], &mn, &info);
	    }
	    if (anrm < smlnum) {
		slascl_("G", &c__0, &c__0, &smlnum, &anrm, &mn, &c__1, &work[*
			m * *n + 1], &mn, &info);
	    }
	}

    } else {

	i__1 = mn;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    work[*m * *n + i__] = 0.f;
/* L30: */
	}
    }

/*     Compare s and singular values of work */

    saxpy_(&mn, &c_b33, &s[1], &c__1, &work[*m * *n + 1], &c__1);
    ret_val = sasum_(&mn, &work[*m * *n + 1], &c__1) / (slamch_("Epsilon") * (real) max(*m,*n));
    if (nrmsvl != 0.f) {
	ret_val /= nrmsvl;
    }

    return ret_val;

/*     End of SQRT12 */

} /* sqrt12_ */
Esempio n. 5
0
/* Subroutine */ int slarfp_(integer *n, real *alpha, real *x, integer *incx, 
	real *tau)
{
    /* System generated locals */
    integer i__1;
    real r__1;

    /* Local variables */
    integer j, knt;
    real beta;
    real xnorm;
    real safmin, rsafmn;

/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     November 2006 */

/*  Purpose */
/*  ======= */

/*  SLARFP generates a real elementary reflector H of order n, such */
/*  that */

/*        H * ( alpha ) = ( beta ),   H' * H = I. */
/*            (   x   )   (   0  ) */

/*  where alpha and beta are scalars, beta is non-negative, and x is */
/*  an (n-1)-element real vector.  H is represented in the form */

/*        H = I - tau * ( 1 ) * ( 1 v' ) , */
/*                      ( v ) */

/*  where tau is a real scalar and v is a real (n-1)-element */
/*  vector. */

/*  If the elements of x are all zero, then tau = 0 and H is taken to be */
/*  the unit matrix. */

/*  Otherwise  1 <= tau <= 2. */

/*  Arguments */
/*  ========= */

/*  N       (input) INTEGER */
/*          The order of the elementary reflector. */

/*  ALPHA   (input/output) REAL */
/*          On entry, the value alpha. */
/*          On exit, it is overwritten with the value beta. */

/*  X       (input/output) REAL array, dimension */
/*                         (1+(N-2)*abs(INCX)) */
/*          On entry, the vector x. */
/*          On exit, it is overwritten with the vector v. */

/*  INCX    (input) INTEGER */
/*          The increment between elements of X. INCX > 0. */

/*  TAU     (output) REAL */
/*          The value tau. */

/*  ===================================================================== */

    /* Parameter adjustments */
    --x;

    /* Function Body */
    if (*n <= 0) {
	*tau = 0.f;
	return 0;
    }

    i__1 = *n - 1;
    xnorm = snrm2_(&i__1, &x[1], incx);

    if (xnorm == 0.f) {

/*        H  =  [+/-1, 0; I], sign chosen so ALPHA >= 0. */

	if (*alpha >= 0.f) {
/*           When TAU.eq.ZERO, the vector is special-cased to be */
/*           all zeros in the application routines.  We do not need */
/*           to clear it. */
	    *tau = 0.f;
	} else {
/*           However, the application routines rely on explicit */
/*           zero checks when TAU.ne.ZERO, and we must clear X. */
	    *tau = 2.f;
	    i__1 = *n - 1;
	    for (j = 1; j <= i__1; ++j) {
		x[(j - 1) * *incx + 1] = 0.f;
	    }
	    *alpha = -(*alpha);
	}
    } else {

/*        general case */

	r__1 = slapy2_(alpha, &xnorm);
	beta = r_sign(&r__1, alpha);
	safmin = slamch_("S") / slamch_("E");
	knt = 0;
	if (dabs(beta) < safmin) {

/*           XNORM, BETA may be inaccurate; scale X and recompute them */

	    rsafmn = 1.f / safmin;
L10:
	    ++knt;
	    i__1 = *n - 1;
	    sscal_(&i__1, &rsafmn, &x[1], incx);
	    beta *= rsafmn;
	    *alpha *= rsafmn;
	    if (dabs(beta) < safmin) {
		goto L10;
	    }

/*           New BETA is at most 1, at least SAFMIN */

	    i__1 = *n - 1;
	    xnorm = snrm2_(&i__1, &x[1], incx);
	    r__1 = slapy2_(alpha, &xnorm);
	    beta = r_sign(&r__1, alpha);
	}
	*alpha += beta;
	if (beta < 0.f) {
	    beta = -beta;
	    *tau = -(*alpha) / beta;
	} else {
	    *alpha = xnorm * (xnorm / *alpha);
	    *tau = *alpha / beta;
	    *alpha = -(*alpha);
	}
	i__1 = *n - 1;
	r__1 = 1.f / *alpha;
	sscal_(&i__1, &r__1, &x[1], incx);

/*        If BETA is subnormal, it may lose relative accuracy */

	i__1 = knt;
	for (j = 1; j <= i__1; ++j) {
	    beta *= safmin;
	}
	*alpha = beta;
    }

    return 0;

/*     End of SLARFP */

} /* slarfp_ */
Esempio n. 6
0
/* Subroutine */ int sstein_(integer *n, real *d, real *e, integer *m, real *
	w, integer *iblock, integer *isplit, real *z, integer *ldz, real *
	work, integer *iwork, integer *ifail, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    SSTEIN computes the eigenvectors of a real symmetric tridiagonal   
    matrix T corresponding to specified eigenvalues, using inverse   
    iteration.   

    The maximum number of iterations allowed for each eigenvector is   
    specified by an internal parameter MAXITS (currently set to 5).   

    Arguments   
    =========   

    N       (input) INTEGER   
            The order of the matrix.  N >= 0.   

    D       (input) REAL array, dimension (N)   
            The n diagonal elements of the tridiagonal matrix T.   

    E       (input) REAL array, dimension (N)   
            The (n-1) subdiagonal elements of the tridiagonal matrix   
            T, in elements 1 to N-1.  E(N) need not be set.   

    M       (input) INTEGER   
            The number of eigenvectors to be found.  0 <= M <= N.   

    W       (input) REAL array, dimension (N)   
            The first M elements of W contain the eigenvalues for   
            which eigenvectors are to be computed.  The eigenvalues   
            should be grouped by split-off block and ordered from   
            smallest to largest within the block.  ( The output array   
            W from SSTEBZ with ORDER = 'B' is expected here. )   

    IBLOCK  (input) INTEGER array, dimension (N)   
            The submatrix indices associated with the corresponding   
            eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to   
            the first submatrix from the top, =2 if W(i) belongs to   
            the second submatrix, etc.  ( The output array IBLOCK   
            from SSTEBZ is expected here. )   

    ISPLIT  (input) INTEGER array, dimension (N)   
            The splitting points, at which T breaks up into submatrices. 
  
            The first submatrix consists of rows/columns 1 to   
            ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1   
            through ISPLIT( 2 ), etc.   
            ( The output array ISPLIT from SSTEBZ is expected here. )   

    Z       (output) REAL array, dimension (LDZ, M)   
            The computed eigenvectors.  The eigenvector associated   
            with the eigenvalue W(i) is stored in the i-th column of   
            Z.  Any vector which fails to converge is set to its current 
  
            iterate after MAXITS iterations.   

    LDZ     (input) INTEGER   
            The leading dimension of the array Z.  LDZ >= max(1,N).   

    WORK    (workspace) REAL array, dimension (5*N)   

    IWORK   (workspace) INTEGER array, dimension (N)   

    IFAIL   (output) INTEGER array, dimension (M)   
            On normal exit, all elements of IFAIL are zero.   
            If one or more eigenvectors fail to converge after   
            MAXITS iterations, then their indices are stored in   
            array IFAIL.   

    INFO    (output) INTEGER   
            = 0: successful exit.   
            < 0: if INFO = -i, the i-th argument had an illegal value   
            > 0: if INFO = i, then i eigenvectors failed to converge   
                 in MAXITS iterations.  Their indices are stored in   
                 array IFAIL.   

    Internal Parameters   
    ===================   

    MAXITS  INTEGER, default = 5   
            The maximum number of iterations performed.   

    EXTRA   INTEGER, default = 2   
            The number of iterations performed after norm growth   
            criterion is satisfied, should be at least 1.   

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static integer c__2 = 2;
    static integer c__1 = 1;
    static integer c_n1 = -1;
    
    /* System generated locals */
    integer z_dim1, z_offset, i__1, i__2, i__3;
    real r__1, r__2, r__3, r__4, r__5;
    /* Builtin functions */
    double sqrt(doublereal);
    /* Local variables */
    static integer jblk, nblk, jmax;
    extern doublereal sdot_(integer *, real *, integer *, real *, integer *), 
	    snrm2_(integer *, real *, integer *);
    static integer i, j, iseed[4], gpind, iinfo;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static integer b1;
    extern doublereal sasum_(integer *, real *, integer *);
    static integer j1;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *);
    static real ortol;
    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, 
	    real *, integer *);
    static integer indrv1, indrv2, indrv3, indrv4, indrv5, bn;
    static real xj;
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *), slagtf_(
	    integer *, real *, real *, real *, real *, real *, real *, 
	    integer *, integer *);
    static integer nrmchk;
    extern integer isamax_(integer *, real *, integer *);
    extern /* Subroutine */ int slagts_(integer *, integer *, real *, real *, 
	    real *, real *, integer *, real *, real *, integer *);
    static integer blksiz;
    static real onenrm, pertol;
    extern /* Subroutine */ int slarnv_(integer *, integer *, integer *, real 
	    *);
    static real stpcrt, scl, eps, ctr, sep, nrm, tol;
    static integer its;
    static real xjm, eps1;



#define ISEED(I) iseed[(I)]
#define D(I) d[(I)-1]
#define E(I) e[(I)-1]
#define W(I) w[(I)-1]
#define IBLOCK(I) iblock[(I)-1]
#define ISPLIT(I) isplit[(I)-1]
#define WORK(I) work[(I)-1]
#define IWORK(I) iwork[(I)-1]
#define IFAIL(I) ifail[(I)-1]

#define Z(I,J) z[(I)-1 + ((J)-1)* ( *ldz)]

    *info = 0;
    i__1 = *m;
    for (i = 1; i <= *m; ++i) {
	IFAIL(i) = 0;
/* L10: */
    }

    if (*n < 0) {
	*info = -1;
    } else if (*m < 0 || *m > *n) {
	*info = -4;
    } else if (*ldz < max(1,*n)) {
	*info = -9;
    } else {
	i__1 = *m;
	for (j = 2; j <= *m; ++j) {
	    if (IBLOCK(j) < IBLOCK(j - 1)) {
		*info = -6;
		goto L30;
	    }
	    if (IBLOCK(j) == IBLOCK(j - 1) && W(j) < W(j - 1)) {
		*info = -5;
		goto L30;
	    }
/* L20: */
	}
L30:
	;
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SSTEIN", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0 || *m == 0) {
	return 0;
    } else if (*n == 1) {
	Z(1,1) = 1.f;
	return 0;
    }

/*     Get machine constants. */

    eps = slamch_("Precision");

/*     Initialize seed for random number generator SLARNV. */

    for (i = 1; i <= 4; ++i) {
	ISEED(i - 1) = 1;
/* L40: */
    }

/*     Initialize pointers. */

    indrv1 = 0;
    indrv2 = indrv1 + *n;
    indrv3 = indrv2 + *n;
    indrv4 = indrv3 + *n;
    indrv5 = indrv4 + *n;

/*     Compute eigenvectors of matrix blocks. */

    j1 = 1;
    i__1 = IBLOCK(*m);
    for (nblk = 1; nblk <= IBLOCK(*m); ++nblk) {

/*        Find starting and ending indices of block nblk. */

	if (nblk == 1) {
	    b1 = 1;
	} else {
	    b1 = ISPLIT(nblk - 1) + 1;
	}
	bn = ISPLIT(nblk);
	blksiz = bn - b1 + 1;
	if (blksiz == 1) {
	    goto L60;
	}
	gpind = b1;

/*        Compute reorthogonalization criterion and stopping criterion
. */

	onenrm = (r__1 = D(b1), dabs(r__1)) + (r__2 = E(b1), dabs(r__2));
/* Computing MAX */
	r__3 = onenrm, r__4 = (r__1 = D(bn), dabs(r__1)) + (r__2 = E(bn - 1), 
		dabs(r__2));
	onenrm = dmax(r__3,r__4);
	i__2 = bn - 1;
	for (i = b1 + 1; i <= bn-1; ++i) {
/* Computing MAX */
	    r__4 = onenrm, r__5 = (r__1 = D(i), dabs(r__1)) + (r__2 = E(i - 1)
		    , dabs(r__2)) + (r__3 = E(i), dabs(r__3));
	    onenrm = dmax(r__4,r__5);
/* L50: */
	}
	ortol = onenrm * .001f;

	stpcrt = sqrt(.1f / blksiz);

/*        Loop through eigenvalues of block nblk. */

L60:
	jblk = 0;
	i__2 = *m;
	for (j = j1; j <= *m; ++j) {
	    if (IBLOCK(j) != nblk) {
		j1 = j;
		goto L160;
	    }
	    ++jblk;
	    xj = W(j);

/*           Skip all the work if the block size is one. */

	    if (blksiz == 1) {
		WORK(indrv1 + 1) = 1.f;
		goto L120;
	    }

/*           If eigenvalues j and j-1 are too close, add a relativ
ely   
             small perturbation. */

	    if (jblk > 1) {
		eps1 = (r__1 = eps * xj, dabs(r__1));
		pertol = eps1 * 10.f;
		sep = xj - xjm;
		if (sep < pertol) {
		    xj = xjm + pertol;
		}
	    }

	    its = 0;
	    nrmchk = 0;

/*           Get random starting vector. */

	    slarnv_(&c__2, iseed, &blksiz, &WORK(indrv1 + 1));

/*           Copy the matrix T so it won't be destroyed in factori
zation. */

	    scopy_(&blksiz, &D(b1), &c__1, &WORK(indrv4 + 1), &c__1);
	    i__3 = blksiz - 1;
	    scopy_(&i__3, &E(b1), &c__1, &WORK(indrv2 + 2), &c__1);
	    i__3 = blksiz - 1;
	    scopy_(&i__3, &E(b1), &c__1, &WORK(indrv3 + 1), &c__1);

/*           Compute LU factors with partial pivoting  ( PT = LU )
 */

	    tol = 0.f;
	    slagtf_(&blksiz, &WORK(indrv4 + 1), &xj, &WORK(indrv2 + 2), &WORK(
		    indrv3 + 1), &tol, &WORK(indrv5 + 1), &IWORK(1), &iinfo);

/*           Update iteration count. */

L70:
	    ++its;
	    if (its > 5) {
		goto L100;
	    }

/*           Normalize and scale the righthand side vector Pb.   

   Computing MAX */
	    r__2 = eps, r__3 = (r__1 = WORK(indrv4 + blksiz), dabs(r__1));
	    scl = blksiz * onenrm * dmax(r__2,r__3) / sasum_(&blksiz, &WORK(
		    indrv1 + 1), &c__1);
	    sscal_(&blksiz, &scl, &WORK(indrv1 + 1), &c__1);

/*           Solve the system LU = Pb. */

	    slagts_(&c_n1, &blksiz, &WORK(indrv4 + 1), &WORK(indrv2 + 2), &
		    WORK(indrv3 + 1), &WORK(indrv5 + 1), &IWORK(1), &WORK(
		    indrv1 + 1), &tol, &iinfo);

/*           Reorthogonalize by modified Gram-Schmidt if eigenvalu
es are   
             close enough. */

	    if (jblk == 1) {
		goto L90;
	    }
	    if ((r__1 = xj - xjm, dabs(r__1)) > ortol) {
		gpind = j;
	    }
	    if (gpind != j) {
		i__3 = j - 1;
		for (i = gpind; i <= j-1; ++i) {
		    ctr = -(doublereal)sdot_(&blksiz, &WORK(indrv1 + 1), &
			    c__1, &Z(b1,i), &c__1);
		    saxpy_(&blksiz, &ctr, &Z(b1,i), &c__1, &WORK(
			    indrv1 + 1), &c__1);
/* L80: */
		}
	    }

/*           Check the infinity norm of the iterate. */

L90:
	    jmax = isamax_(&blksiz, &WORK(indrv1 + 1), &c__1);
	    nrm = (r__1 = WORK(indrv1 + jmax), dabs(r__1));

/*           Continue for additional iterations after norm reaches
   
             stopping criterion. */

	    if (nrm < stpcrt) {
		goto L70;
	    }
	    ++nrmchk;
	    if (nrmchk < 3) {
		goto L70;
	    }

	    goto L110;

/*           If stopping criterion was not satisfied, update info 
and   
             store eigenvector number in array ifail. */

L100:
	    ++(*info);
	    IFAIL(*info) = j;

/*           Accept iterate as jth eigenvector. */

L110:
	    scl = 1.f / snrm2_(&blksiz, &WORK(indrv1 + 1), &c__1);
	    jmax = isamax_(&blksiz, &WORK(indrv1 + 1), &c__1);
	    if (WORK(indrv1 + jmax) < 0.f) {
		scl = -(doublereal)scl;
	    }
	    sscal_(&blksiz, &scl, &WORK(indrv1 + 1), &c__1);
L120:
	    i__3 = *n;
	    for (i = 1; i <= *n; ++i) {
		Z(i,j) = 0.f;
/* L130: */
	    }
	    i__3 = blksiz;
	    for (i = 1; i <= blksiz; ++i) {
		Z(b1+i-1,j) = WORK(indrv1 + i);
/* L140: */
	    }

/*           Save the shift to check eigenvalue spacing at next   
             iteration. */

	    xjm = xj;

/* L150: */
	}
L160:
	;
    }

    return 0;

/*     End of SSTEIN */

} /* sstein_ */
Esempio n. 7
0
int sfgmr(int n,
     void (*smatvec) (float, float[], float, float[]),
     void (*spsolve) (int, float[], float[]),
     float *rhs, float *sol, double tol, int im, int *itmax, FILE * fits)
{
/*----------------------------------------------------------------------
|                 *** Preconditioned FGMRES ***
+-----------------------------------------------------------------------
| This is a simple version of the ARMS preconditioned FGMRES algorithm.
+-----------------------------------------------------------------------
| Y. S. Dec. 2000. -- Apr. 2008
+-----------------------------------------------------------------------
| on entry:
|----------
|
| rhs     = real vector of length n containing the right hand side.
| sol     = real vector of length n containing an initial guess to the
|           solution on input.
| tol     = tolerance for stopping iteration
| im      = Krylov subspace dimension
| (itmax) = max number of iterations allowed.
| fits    = NULL: no output
|        != NULL: file handle to output " resid vs time and its"
|
| on return:
|----------
| fgmr      int =  0 --> successful return.
|           int =  1 --> convergence not achieved in itmax iterations.
| sol     = contains an approximate solution (upon successful return).
| itmax   = has changed. It now contains the number of steps required
|           to converge --
+-----------------------------------------------------------------------
| internal work arrays:
|----------
| vv      = work array of length [im+1][n] (used to store the Arnoldi
|           basis)
| hh      = work array of length [im][im+1] (Householder matrix)
| z       = work array of length [im][n] to store preconditioned vectors
+-----------------------------------------------------------------------
| subroutines called :
| matvec - matrix-vector multiplication operation
| psolve - (right) preconditionning operation
|	   psolve can be a NULL pointer (GMRES without preconditioner)
+---------------------------------------------------------------------*/

    int maxits = *itmax;
    int i, i1, ii, j, k, k1, its, retval, i_1 = 1, i_2 = 2;
    float beta, eps1 = 0.0, t, t0, gam;
    float **hh, *c, *s, *rs;
    float **vv, **z, tt;
    float zero = 0.0;
    float one = 1.0;

    its = 0;
    vv = (float **)SUPERLU_MALLOC((im + 1) * sizeof(float *));
    for (i = 0; i <= im; i++) vv[i] = floatMalloc(n);
    z = (float **)SUPERLU_MALLOC(im * sizeof(float *));
    hh = (float **)SUPERLU_MALLOC(im * sizeof(float *));
    for (i = 0; i < im; i++)
    {
	hh[i] = floatMalloc(i + 2);
	z[i] = floatMalloc(n);
    }
    c = floatMalloc(im);
    s = floatMalloc(im);
    rs = floatMalloc(im + 1);

    /*---- outer loop starts here ----*/
    do
    {
	/*---- compute initial residual vector ----*/
	smatvec(one, sol, zero, vv[0]);
	for (j = 0; j < n; j++)
	    vv[0][j] = rhs[j] - vv[0][j];	/* vv[0]= initial residual */
	beta = snrm2_(&n, vv[0], &i_1);

	/*---- print info if fits != null ----*/
	if (fits != NULL && its == 0)
	    fprintf(fits, "%8d   %10.2e\n", its, beta);
	/*if ( beta <= tol * dnrm2_(&n, rhs, &i_1) )*/
	if ( !(beta > tol * snrm2_(&n, rhs, &i_1)) )
	    break;
	t = 1.0 / beta;

	/*---- normalize: vv[0] = vv[0] / beta ----*/
	for (j = 0; j < n; j++)
	    vv[0][j] = vv[0][j] * t;
	if (its == 0)
	    eps1 = tol * beta;

	/*---- initialize 1-st term of rhs of hessenberg system ----*/
	rs[0] = beta;
	for (i = 0; i < im; i++)
	{
	    its++;
	    i1 = i + 1;

	    /*------------------------------------------------------------
	    |  (Right) Preconditioning Operation   z_{j} = M^{-1} v_{j}
	    +-----------------------------------------------------------*/
	    if (spsolve)
		spsolve(n, z[i], vv[i]);
	    else
		scopy_(&n, vv[i], &i_1, z[i], &i_1);

	    /*---- matvec operation w = A z_{j} = A M^{-1} v_{j} ----*/
	    smatvec(one, z[i], zero, vv[i1]);

	    /*------------------------------------------------------------
	    |     modified gram - schmidt...
	    |     h_{i,j} = (w,v_{i})
	    |     w  = w - h_{i,j} v_{i}
	    +------------------------------------------------------------*/
	    t0 = snrm2_(&n, vv[i1], &i_1);
	    for (j = 0; j <= i; j++)
	    {
		float negt;
		tt = sdot_(&n, vv[j], &i_1, vv[i1], &i_1);
		hh[i][j] = tt;
		negt = -tt;
		saxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1);
	    }

	    /*---- h_{j+1,j} = ||w||_{2} ----*/
	    t = snrm2_(&n, vv[i1], &i_1);
	    while (t < 0.5 * t0)
	    {
		t0 = t;
		for (j = 0; j <= i; j++)
		{
		    float negt;
		    tt = sdot_(&n, vv[j], &i_1, vv[i1], &i_1);
		    hh[i][j] += tt;
		    negt = -tt;
		    saxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1);
		}
		t = snrm2_(&n, vv[i1], &i_1);
	    }

	    hh[i][i1] = t;

	    if (t != 0.0)
	    {
		/*---- v_{j+1} = w / h_{j+1,j} ----*/
		t = 1.0 / t;
		for (k = 0; k < n; k++)
		    vv[i1][k] = vv[i1][k] * t;
	    }
	    /*---------------------------------------------------
	    |     done with modified gram schimdt and arnoldi step
	    |     now  update factorization of hh
	    +--------------------------------------------------*/

	    /*--------------------------------------------------------
	    |   perform previous transformations  on i-th column of h
	    +-------------------------------------------------------*/
	    for (k = 1; k <= i; k++)
	    {
		k1 = k - 1;
		tt = hh[i][k1];
		hh[i][k1] = c[k1] * tt + s[k1] * hh[i][k];
		hh[i][k] = -s[k1] * tt + c[k1] * hh[i][k];
	    }

	    gam = sqrt(pow(hh[i][i], 2) + pow(hh[i][i1], 2));

	    /*---------------------------------------------------
	    |     if gamma is zero then any small value will do
	    |     affect only residual estimate
	    +--------------------------------------------------*/
	    /* if (gam == 0.0) gam = epsmac; */

	    /*---- get next plane rotation ---*/
	    if (gam == 0.0)
	    {
		c[i] = one;
		s[i] = zero;
	    }
            else
	    {
		c[i] = hh[i][i] / gam;
		s[i] = hh[i][i1] / gam;
	    }

	    rs[i1] = -s[i] * rs[i];
	    rs[i] = c[i] * rs[i];

	    /*----------------------------------------------------
	    |   determine residual norm and test for convergence
	    +---------------------------------------------------*/
	    hh[i][i] = c[i] * hh[i][i] + s[i] * hh[i][i1];
	    beta = fabs(rs[i1]);
	    if (fits != NULL)
		fprintf(fits, "%8d   %10.2e\n", its, beta);
	    if (beta <= eps1 || its >= maxits)
		break;
	}

	if (i == im) i--;

	/*---- now compute solution. 1st, solve upper triangular system ----*/
	rs[i] = rs[i] / hh[i][i];

	for (ii = 1; ii <= i; ii++)
	{
	    k = i - ii;
	    k1 = k + 1;
	    tt = rs[k];
	    for (j = k1; j <= i; j++)
		tt = tt - hh[j][k] * rs[j];
	    rs[k] = tt / hh[k][k];
	}

	/*---- linear combination of v[i]'s to get sol. ----*/
	for (j = 0; j <= i; j++)
	{
	    tt = rs[j];
	    for (k = 0; k < n; k++)
		sol[k] += tt * z[j][k];
	}

	/* calculate the residual and output */
	smatvec(one, sol, zero, vv[0]);
	for (j = 0; j < n; j++)
	    vv[0][j] = rhs[j] - vv[0][j];	/* vv[0]= initial residual */

	/*---- print info if fits != null ----*/
	beta = snrm2_(&n, vv[0], &i_1);

	/*---- restart outer loop if needed ----*/
	/*if (beta >= eps1 / tol)*/
	if ( !(beta < eps1 / tol) )
	{
	    its = maxits + 10;
	    break;
	}
	if (beta <= eps1)
	    break;
    } while(its < maxits);

    retval = (its >= maxits);
    for (i = 0; i <= im; i++)
	SUPERLU_FREE(vv[i]);
    SUPERLU_FREE(vv);
    for (i = 0; i < im; i++)
    {
	SUPERLU_FREE(hh[i]);
	SUPERLU_FREE(z[i]);
    }
    SUPERLU_FREE(hh);
    SUPERLU_FREE(z);
    SUPERLU_FREE(c);
    SUPERLU_FREE(s);
    SUPERLU_FREE(rs);

    *itmax = its;

    return retval;
} /*----end of fgmr ----*/
Esempio n. 8
0
float snrm2( int n, float *x, int incx)
{
    return snrm2_(&n, x, &incx);
}
Esempio n. 9
0
/* Subroutine */ int check1_(real *sfac)
{
    /* Initialized data */

    static real sa[10] = { .3f,-1.f,0.f,1.f,.3f,.3f,.3f,.3f,.3f,.3f };
    static real dv[80]	/* was [8][5][2] */ = { .1f,2.f,2.f,2.f,2.f,2.f,2.f,
	    2.f,.3f,3.f,3.f,3.f,3.f,3.f,3.f,3.f,.3f,-.4f,4.f,4.f,4.f,4.f,4.f,
	    4.f,.2f,-.6f,.3f,5.f,5.f,5.f,5.f,5.f,.1f,-.3f,.5f,-.1f,6.f,6.f,
	    6.f,6.f,.1f,8.f,8.f,8.f,8.f,8.f,8.f,8.f,.3f,9.f,9.f,9.f,9.f,9.f,
	    9.f,9.f,.3f,2.f,-.4f,2.f,2.f,2.f,2.f,2.f,.2f,3.f,-.6f,5.f,.3f,2.f,
	    2.f,2.f,.1f,4.f,-.3f,6.f,-.5f,7.f,-.1f,3.f };
    static real dtrue1[5] = { 0.f,.3f,.5f,.7f,.6f };
    static real dtrue3[5] = { 0.f,.3f,.7f,1.1f,1.f };
    static real dtrue5[80]	/* was [8][5][2] */ = { .1f,2.f,2.f,2.f,2.f,
	    2.f,2.f,2.f,-.3f,3.f,3.f,3.f,3.f,3.f,3.f,3.f,0.f,0.f,4.f,4.f,4.f,
	    4.f,4.f,4.f,.2f,-.6f,.3f,5.f,5.f,5.f,5.f,5.f,.03f,-.09f,.15f,
	    -.03f,6.f,6.f,6.f,6.f,.1f,8.f,8.f,8.f,8.f,8.f,8.f,8.f,.09f,9.f,
	    9.f,9.f,9.f,9.f,9.f,9.f,.09f,2.f,-.12f,2.f,2.f,2.f,2.f,2.f,.06f,
	    3.f,-.18f,5.f,.09f,2.f,2.f,2.f,.03f,4.f,-.09f,6.f,-.15f,7.f,-.03f,
	    3.f };
    static integer itrue2[5] = { 0,1,2,2,3 };

    /* System generated locals */
    integer i__1;
    real r__1;

    /* Builtin functions */
    integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), 
	    e_wsle(void);
    /* Subroutine */ int s_stop(char *, ftnlen);

    /* Local variables */
    integer i__;
    real sx[8];
    integer np1, len;
    extern doublereal snrm2_(integer *, real *, integer *);
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    real stemp[1];
    extern doublereal sasum_(integer *, real *, integer *);
    real strue[8];
    extern /* Subroutine */ int stest_(integer *, real *, real *, real *, 
	    real *), itest1_(integer *, integer *), stest1_(real *, real *, 
	    real *, real *);
    extern integer isamax_(integer *, real *, integer *);

    /* Fortran I/O blocks */
    static cilist io___32 = { 0, 6, 0, 0, 0 };


/*     .. Parameters .. */
/*     .. Scalar Arguments .. */
/*     .. Scalars in Common .. */
/*     .. Local Scalars .. */
/*     .. Local Arrays .. */
/*     .. External Functions .. */
/*     .. External Subroutines .. */
/*     .. Intrinsic Functions .. */
/*     .. Common blocks .. */
/*     .. Data statements .. */
/*     .. Executable Statements .. */
    for (combla_1.incx = 1; combla_1.incx <= 2; ++combla_1.incx) {
	for (np1 = 1; np1 <= 5; ++np1) {
	    combla_1.n = np1 - 1;
	    len = max(combla_1.n,1) << 1;
/*           .. Set vector arguments .. */
	    i__1 = len;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		sx[i__ - 1] = dv[i__ + (np1 + combla_1.incx * 5 << 3) - 49];
/* L20: */
	    }

	    if (combla_1.icase == 7) {
/*              .. SNRM2 .. */
		stemp[0] = dtrue1[np1 - 1];
		r__1 = snrm2_(&combla_1.n, sx, &combla_1.incx);
		stest1_(&r__1, stemp, stemp, sfac);
	    } else if (combla_1.icase == 8) {
/*              .. SASUM .. */
		stemp[0] = dtrue3[np1 - 1];
		r__1 = sasum_(&combla_1.n, sx, &combla_1.incx);
		stest1_(&r__1, stemp, stemp, sfac);
	    } else if (combla_1.icase == 9) {
/*              .. SSCAL .. */
		sscal_(&combla_1.n, &sa[(combla_1.incx - 1) * 5 + np1 - 1], 
			sx, &combla_1.incx);
		i__1 = len;
		for (i__ = 1; i__ <= i__1; ++i__) {
		    strue[i__ - 1] = dtrue5[i__ + (np1 + combla_1.incx * 5 << 
			    3) - 49];
/* L40: */
		}
		stest_(&len, sx, strue, strue, sfac);
	    } else if (combla_1.icase == 10) {
/*              .. ISAMAX .. */
		i__1 = isamax_(&combla_1.n, sx, &combla_1.incx);
		itest1_(&i__1, &itrue2[np1 - 1]);
	    } else {
		s_wsle(&io___32);
		do_lio(&c__9, &c__1, " Shouldn't be here in CHECK1", (ftnlen)
			28);
		e_wsle();
		s_stop("", (ftnlen)0);
	    }
/* L60: */
	}
/* L80: */
    }
    return 0;
} /* check1_ */
Esempio n. 10
0
 int slaqps_(int *m, int *n, int *offset, int 
	*nb, int *kb, float *a, int *lda, int *jpvt, float *tau, 
	float *vn1, float *vn2, float *auxv, float *f, int *ldf)
{
    /* System generated locals */
    int a_dim1, a_offset, f_dim1, f_offset, i__1, i__2;
    float r__1, r__2;

    /* Builtin functions */
    double sqrt(double);
    int i_nint(float *);

    /* Local variables */
    int j, k, rk;
    float akk;
    int pvt;
    float temp, temp2;
    extern double snrm2_(int *, float *, int *);
    float tol3z;
    extern  int sgemm_(char *, char *, int *, int *, 
	    int *, float *, float *, int *, float *, int *, float *, 
	    float *, int *);
    int itemp;
    extern  int sgemv_(char *, int *, int *, float *, 
	    float *, int *, float *, int *, float *, float *, int *), sswap_(int *, float *, int *, float *, int *);
    extern double slamch_(char *);
    int lsticc;
    extern int isamax_(int *, float *, int *);
    extern  int slarfp_(int *, float *, float *, int *, 
	    float *);
    int lastrk;


/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLAQPS computes a step of QR factorization with column pivoting */
/*  of a float M-by-N matrix A by using Blas-3.  It tries to factorize */
/*  NB columns from A starting from the row OFFSET+1, and updates all */
/*  of the matrix with Blas-3 xGEMM. */

/*  In some cases, due to catastrophic cancellations, it cannot */
/*  factorize NB columns.  Hence, the actual number of factorized */
/*  columns is returned in KB. */

/*  Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. */

/*  Arguments */
/*  ========= */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix A. M >= 0. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix A. N >= 0 */

/*  OFFSET  (input) INTEGER */
/*          The number of rows of A that have been factorized in */
/*          previous steps. */

/*  NB      (input) INTEGER */
/*          The number of columns to factorize. */

/*  KB      (output) INTEGER */
/*          The number of columns actually factorized. */

/*  A       (input/output) REAL array, dimension (LDA,N) */
/*          On entry, the M-by-N matrix A. */
/*          On exit, block A(OFFSET+1:M,1:KB) is the triangular */
/*          factor obtained and block A(1:OFFSET,1:N) has been */
/*          accordingly pivoted, but no factorized. */
/*          The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has */
/*          been updated. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A. LDA >= MAX(1,M). */

/*  JPVT    (input/output) INTEGER array, dimension (N) */
/*          JPVT(I) = K <==> Column K of the full matrix A has been */
/*          permuted into position I in AP. */

/*  TAU     (output) REAL array, dimension (KB) */
/*          The scalar factors of the elementary reflectors. */

/*  VN1     (input/output) REAL array, dimension (N) */
/*          The vector with the partial column norms. */

/*  VN2     (input/output) REAL array, dimension (N) */
/*          The vector with the exact column norms. */

/*  AUXV    (input/output) REAL array, dimension (NB) */
/*          Auxiliar vector. */

/*  F       (input/output) REAL array, dimension (LDF,NB) */
/*          Matrix F' = L*Y'*A. */

/*  LDF     (input) INTEGER */
/*          The leading dimension of the array F. LDF >= MAX(1,N). */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */
/*    X. Sun, Computer Science Dept., Duke University, USA */

/*  Partial column norm updating strategy modified by */
/*    Z. Drmac and Z. Bujanovic, Dept. of Mathematics, */
/*    University of Zagreb, Croatia. */
/*    June 2006. */
/*  For more details see LAPACK Working Note 176. */
/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --jpvt;
    --tau;
    --vn1;
    --vn2;
    --auxv;
    f_dim1 = *ldf;
    f_offset = 1 + f_dim1;
    f -= f_offset;

    /* Function Body */
/* Computing MIN */
    i__1 = *m, i__2 = *n + *offset;
    lastrk = MIN(i__1,i__2);
    lsticc = 0;
    k = 0;
    tol3z = sqrt(slamch_("Epsilon"));

/*     Beginning of while loop. */

L10:
    if (k < *nb && lsticc == 0) {
	++k;
	rk = *offset + k;

/*        Determine ith pivot column and swap if necessary */

	i__1 = *n - k + 1;
	pvt = k - 1 + isamax_(&i__1, &vn1[k], &c__1);
	if (pvt != k) {
	    sswap_(m, &a[pvt * a_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1);
	    i__1 = k - 1;
	    sswap_(&i__1, &f[pvt + f_dim1], ldf, &f[k + f_dim1], ldf);
	    itemp = jpvt[pvt];
	    jpvt[pvt] = jpvt[k];
	    jpvt[k] = itemp;
	    vn1[pvt] = vn1[k];
	    vn2[pvt] = vn2[k];
	}

/*        Apply previous Householder reflectors to column K: */
/*        A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)'. */

	if (k > 1) {
	    i__1 = *m - rk + 1;
	    i__2 = k - 1;
	    sgemv_("No transpose", &i__1, &i__2, &c_b8, &a[rk + a_dim1], lda, 
		    &f[k + f_dim1], ldf, &c_b9, &a[rk + k * a_dim1], &c__1);
	}

/*        Generate elementary reflector H(k). */

	if (rk < *m) {
	    i__1 = *m - rk + 1;
	    slarfp_(&i__1, &a[rk + k * a_dim1], &a[rk + 1 + k * a_dim1], &
		    c__1, &tau[k]);
	} else {
	    slarfp_(&c__1, &a[rk + k * a_dim1], &a[rk + k * a_dim1], &c__1, &
		    tau[k]);
	}

	akk = a[rk + k * a_dim1];
	a[rk + k * a_dim1] = 1.f;

/*        Compute Kth column of F: */

/*        Compute  F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)'*A(RK:M,K). */

	if (k < *n) {
	    i__1 = *m - rk + 1;
	    i__2 = *n - k;
	    sgemv_("Transpose", &i__1, &i__2, &tau[k], &a[rk + (k + 1) * 
		    a_dim1], lda, &a[rk + k * a_dim1], &c__1, &c_b16, &f[k + 
		    1 + k * f_dim1], &c__1);
	}

/*        Padding F(1:K,K) with zeros. */

	i__1 = k;
	for (j = 1; j <= i__1; ++j) {
	    f[j + k * f_dim1] = 0.f;
/* L20: */
	}

/*        Incremental updating of F: */
/*        F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)' */
/*                    *A(RK:M,K). */

	if (k > 1) {
	    i__1 = *m - rk + 1;
	    i__2 = k - 1;
	    r__1 = -tau[k];
	    sgemv_("Transpose", &i__1, &i__2, &r__1, &a[rk + a_dim1], lda, &a[
		    rk + k * a_dim1], &c__1, &c_b16, &auxv[1], &c__1);

	    i__1 = k - 1;
	    sgemv_("No transpose", n, &i__1, &c_b9, &f[f_dim1 + 1], ldf, &
		    auxv[1], &c__1, &c_b9, &f[k * f_dim1 + 1], &c__1);
	}

/*        Update the current row of A: */
/*        A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)'. */

	if (k < *n) {
	    i__1 = *n - k;
	    sgemv_("No transpose", &i__1, &k, &c_b8, &f[k + 1 + f_dim1], ldf, 
		    &a[rk + a_dim1], lda, &c_b9, &a[rk + (k + 1) * a_dim1], 
		    lda);
	}

/*        Update partial column norms. */

	if (rk < lastrk) {
	    i__1 = *n;
	    for (j = k + 1; j <= i__1; ++j) {
		if (vn1[j] != 0.f) {

/*                 NOTE: The following 4 lines follow from the analysis in */
/*                 Lapack Working Note 176. */

		    temp = (r__1 = a[rk + j * a_dim1], ABS(r__1)) / vn1[j];
/* Computing MAX */
		    r__1 = 0.f, r__2 = (temp + 1.f) * (1.f - temp);
		    temp = MAX(r__1,r__2);
/* Computing 2nd power */
		    r__1 = vn1[j] / vn2[j];
		    temp2 = temp * (r__1 * r__1);
		    if (temp2 <= tol3z) {
			vn2[j] = (float) lsticc;
			lsticc = j;
		    } else {
			vn1[j] *= sqrt(temp);
		    }
		}
/* L30: */
	    }
	}

	a[rk + k * a_dim1] = akk;

/*        End of while loop. */

	goto L10;
    }
    *kb = k;
    rk = *offset + *kb;

/*     Apply the block reflector to the rest of the matrix: */
/*     A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) - */
/*                         A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)'. */

/* Computing MIN */
    i__1 = *n, i__2 = *m - *offset;
    if (*kb < MIN(i__1,i__2)) {
	i__1 = *m - rk;
	i__2 = *n - *kb;
	sgemm_("No transpose", "Transpose", &i__1, &i__2, kb, &c_b8, &a[rk + 
		1 + a_dim1], lda, &f[*kb + 1 + f_dim1], ldf, &c_b9, &a[rk + 1 
		+ (*kb + 1) * a_dim1], lda);
    }

/*     Recomputation of difficult columns. */

L40:
    if (lsticc > 0) {
	itemp = i_nint(&vn2[lsticc]);
	i__1 = *m - rk;
	vn1[lsticc] = snrm2_(&i__1, &a[rk + 1 + lsticc * a_dim1], &c__1);

/*        NOTE: The computation of VN1( LSTICC ) relies on the fact that */
/*        SNRM2 does not fail on vectors with norm below the value of */
/*        SQRT(DLAMCH('S')) */

	vn2[lsticc] = vn1[lsticc];
	lsticc = itemp;
	goto L40;
    }

    return 0;

/*     End of SLAQPS */

} /* slaqps_ */
Esempio n. 11
0
/* Subroutine */ int slaror_(char *side, char *init, integer *m, integer *n, 
	real *a, integer *lda, integer *iseed, real *x, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    real r__1;

    /* Builtin functions */
    double r_sign(real *, real *);

    /* Local variables */
    static integer kbeg, jcol;
    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, 
	    integer *, real *, integer *, real *, integer *);
    static integer irow;
    extern real snrm2_(integer *, real *, integer *);
    static integer j;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 
	    sgemv_(char *, integer *, integer *, real *, real *, integer *, 
	    real *, integer *, real *, real *, integer *);
    static integer ixfrm, itype, nxfrm;
    static real xnorm;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static real factor;
    extern doublereal slarnd_(integer *, integer *);
    extern /* Subroutine */ int slaset_(char *, integer *, integer *, real *, 
	    real *, real *, integer *);
    static real xnorms;


/*  -- LAPACK auxiliary test routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    SLAROR pre- or post-multiplies an M by N matrix A by a random   
    orthogonal matrix U, overwriting A.  A may optionally be initialized 
  
    to the identity matrix before multiplying by U.  U is generated using 
  
    the method of G.W. Stewart (SIAM J. Numer. Anal. 17, 1980, 403-409). 
  

    Arguments   
    =========   

    SIDE    (input) CHARACTER*1   
            Specifies whether A is multiplied on the left or right by U. 
  
            = 'L':         Multiply A on the left (premultiply) by U   
            = 'R':         Multiply A on the right (postmultiply) by U'   
            = 'C' or 'T':  Multiply A on the left by U and the right   
                            by U' (Here, U' means U-transpose.)   

    INIT    (input) CHARACTER*1   
            Specifies whether or not A should be initialized to the   
            identity matrix.   
            = 'I':  Initialize A to (a section of) the identity matrix   
                     before applying U.   
            = 'N':  No initialization.  Apply U to the input matrix A.   

            INIT = 'I' may be used to generate square or rectangular   
            orthogonal matrices:   

            For M = N and SIDE = 'L' or 'R', the rows will be orthogonal 
  
            to each other, as will the columns.   

            If M < N, SIDE = 'R' produces a dense matrix whose rows are   
            orthogonal and whose columns are not, while SIDE = 'L'   
            produces a matrix whose rows are orthogonal, and whose first 
  
            M columns are orthogonal, and whose remaining columns are   
            zero.   

            If M > N, SIDE = 'L' produces a dense matrix whose columns   
            are orthogonal and whose rows are not, while SIDE = 'R'   
            produces a matrix whose columns are orthogonal, and whose   
            first M rows are orthogonal, and whose remaining rows are   
            zero.   

    M       (input) INTEGER   
            The number of rows of A.   

    N       (input) INTEGER   
            The number of columns of A.   

    A       (input/output) REAL array, dimension (LDA, N)   
            On entry, the array A.   
            On exit, overwritten by U A ( if SIDE = 'L' ),   
             or by A U ( if SIDE = 'R' ),   
             or by U A U' ( if SIDE = 'C' or 'T').   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,M).   

    ISEED   (input/output) INTEGER array, dimension (4)   
            On entry ISEED specifies the seed of the random number   
            generator. The array elements should be between 0 and 4095;   
            if not they will be reduced mod 4096.  Also, ISEED(4) must   
            be odd.  The random number generator uses a linear   
            congruential sequence limited to small integers, and so   
            should produce machine independent random numbers. The   
            values of ISEED are changed on exit, and can be used in the   
            next call to SLAROR to continue the same random number   
            sequence.   

    X       (workspace) REAL array, dimension (3*MAX( M, N ))   
            Workspace of length   
                2*M + N if SIDE = 'L',   
                2*N + M if SIDE = 'R',   
                3*N     if SIDE = 'C' or 'T'.   

    INFO    (output) INTEGER   
            An error flag.  It is set to:   
            = 0:  normal return   
            < 0:  if INFO = -k, the k-th argument had an illegal value   
            = 1:  if the random numbers generated by SLARND are bad.   

    ===================================================================== 
  


       Parameter adjustments */
    a_dim1 = *lda;
    a_offset = a_dim1 + 1;
    a -= a_offset;
    --iseed;
    --x;

    /* Function Body */
    if (*n == 0 || *m == 0) {
	return 0;
    }

    itype = 0;
    if (lsame_(side, "L")) {
	itype = 1;
    } else if (lsame_(side, "R")) {
	itype = 2;
    } else if (lsame_(side, "C") || lsame_(side, "T")) {
	itype = 3;
    }

/*     Check for argument errors. */

    *info = 0;
    if (itype == 0) {
	*info = -1;
    } else if (*m < 0) {
	*info = -3;
    } else if (*n < 0 || itype == 3 && *n != *m) {
	*info = -4;
    } else if (*lda < *m) {
	*info = -6;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLAROR", &i__1);
	return 0;
    }

    if (itype == 1) {
	nxfrm = *m;
    } else {
	nxfrm = *n;
    }

/*     Initialize A to the identity matrix if desired */

    if (lsame_(init, "I")) {
	slaset_("Full", m, n, &c_b9, &c_b10, &a[a_offset], lda);
    }

/*     If no rotation possible, multiply by random +/-1   

       Compute rotation by computing Householder transformations   
       H(2), H(3), ..., H(nhouse) */

    i__1 = nxfrm;
    for (j = 1; j <= i__1; ++j) {
	x[j] = 0.f;
/* L10: */
    }

    i__1 = nxfrm;
    for (ixfrm = 2; ixfrm <= i__1; ++ixfrm) {
	kbeg = nxfrm - ixfrm + 1;

/*        Generate independent normal( 0, 1 ) random numbers */

	i__2 = nxfrm;
	for (j = kbeg; j <= i__2; ++j) {
	    x[j] = slarnd_(&c__3, &iseed[1]);
/* L20: */
	}

/*        Generate a Householder transformation from the random vector
 X */

	xnorm = snrm2_(&ixfrm, &x[kbeg], &c__1);
	xnorms = r_sign(&xnorm, &x[kbeg]);
	r__1 = -(doublereal)x[kbeg];
	x[kbeg + nxfrm] = r_sign(&c_b10, &r__1);
	factor = xnorms * (xnorms + x[kbeg]);
	if (dabs(factor) < 1e-20f) {
	    *info = 1;
	    xerbla_("SLAROR", info);
	    return 0;
	} else {
	    factor = 1.f / factor;
	}
	x[kbeg] += xnorms;

/*        Apply Householder transformation to A */

	if (itype == 1 || itype == 3) {

/*           Apply H(k) from the left. */

	    sgemv_("T", &ixfrm, n, &c_b10, &a[kbeg + a_dim1], lda, &x[kbeg], &
		    c__1, &c_b9, &x[(nxfrm << 1) + 1], &c__1);
	    r__1 = -(doublereal)factor;
	    sger_(&ixfrm, n, &r__1, &x[kbeg], &c__1, &x[(nxfrm << 1) + 1], &
		    c__1, &a[kbeg + a_dim1], lda);

	}

	if (itype == 2 || itype == 3) {

/*           Apply H(k) from the right. */

	    sgemv_("N", m, &ixfrm, &c_b10, &a[kbeg * a_dim1 + 1], lda, &x[
		    kbeg], &c__1, &c_b9, &x[(nxfrm << 1) + 1], &c__1);
	    r__1 = -(doublereal)factor;
	    sger_(m, &ixfrm, &r__1, &x[(nxfrm << 1) + 1], &c__1, &x[kbeg], &
		    c__1, &a[kbeg * a_dim1 + 1], lda);

	}
/* L30: */
    }

    r__1 = slarnd_(&c__3, &iseed[1]);
    x[nxfrm * 2] = r_sign(&c_b10, &r__1);

/*     Scale the matrix A by D. */

    if (itype == 1 || itype == 3) {
	i__1 = *m;
	for (irow = 1; irow <= i__1; ++irow) {
	    sscal_(n, &x[nxfrm + irow], &a[irow + a_dim1], lda);
/* L40: */
	}
    }

    if (itype == 2 || itype == 3) {
	i__1 = *n;
	for (jcol = 1; jcol <= i__1; ++jcol) {
	    sscal_(m, &x[nxfrm + jcol], &a[jcol * a_dim1 + 1], &c__1);
/* L50: */
	}
    }
    return 0;

/*     End of SLAROR */

} /* slaror_ */
Esempio n. 12
0
/* Subroutine */ int slals0_(integer *icompq, integer *nl, integer *nr, 
	integer *sqre, integer *nrhs, real *b, integer *ldb, real *bx, 
	integer *ldbx, integer *perm, integer *givptr, integer *givcol, 
	integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real *
	difl, real *difr, real *z__, integer *k, real *c__, real *s, real *
	work, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       December 1, 1999   


    Purpose   
    =======   

    SLALS0 applies back the multiplying factors of either the left or the   
    right singular vector matrix of a diagonal matrix appended by a row   
    to the right hand side matrix B in solving the least squares problem   
    using the divide-and-conquer SVD approach.   

    For the left singular vector matrix, three types of orthogonal   
    matrices are involved:   

    (1L) Givens rotations: the number of such rotations is GIVPTR; the   
         pairs of columns/rows they were applied to are stored in GIVCOL;   
         and the C- and S-values of these rotations are stored in GIVNUM.   

    (2L) Permutation. The (NL+1)-st row of B is to be moved to the first   
         row, and for J=2:N, PERM(J)-th row of B is to be moved to the   
         J-th row.   

    (3L) The left singular vector matrix of the remaining matrix.   

    For the right singular vector matrix, four types of orthogonal   
    matrices are involved:   

    (1R) The right singular vector matrix of the remaining matrix.   

    (2R) If SQRE = 1, one extra Givens rotation to generate the right   
         null space.   

    (3R) The inverse transformation of (2L).   

    (4R) The inverse transformation of (1L).   

    Arguments   
    =========   

    ICOMPQ (input) INTEGER   
           Specifies whether singular vectors are to be computed in   
           factored form:   
           = 0: Left singular vector matrix.   
           = 1: Right singular vector matrix.   

    NL     (input) INTEGER   
           The row dimension of the upper block. NL >= 1.   

    NR     (input) INTEGER   
           The row dimension of the lower block. NR >= 1.   

    SQRE   (input) INTEGER   
           = 0: the lower block is an NR-by-NR square matrix.   
           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.   

           The bidiagonal matrix has row dimension N = NL + NR + 1,   
           and column dimension M = N + SQRE.   

    NRHS   (input) INTEGER   
           The number of columns of B and BX. NRHS must be at least 1.   

    B      (input/output) REAL array, dimension ( LDB, NRHS )   
           On input, B contains the right hand sides of the least   
           squares problem in rows 1 through M. On output, B contains   
           the solution X in rows 1 through N.   

    LDB    (input) INTEGER   
           The leading dimension of B. LDB must be at least   
           max(1,MAX( M, N ) ).   

    BX     (workspace) REAL array, dimension ( LDBX, NRHS )   

    LDBX   (input) INTEGER   
           The leading dimension of BX.   

    PERM   (input) INTEGER array, dimension ( N )   
           The permutations (from deflation and sorting) applied   
           to the two blocks.   

    GIVPTR (input) INTEGER   
           The number of Givens rotations which took place in this   
           subproblem.   

    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 )   
           Each pair of numbers indicates a pair of rows/columns   
           involved in a Givens rotation.   

    LDGCOL (input) INTEGER   
           The leading dimension of GIVCOL, must be at least N.   

    GIVNUM (input) REAL array, dimension ( LDGNUM, 2 )   
           Each number indicates the C or S value used in the   
           corresponding Givens rotation.   

    LDGNUM (input) INTEGER   
           The leading dimension of arrays DIFR, POLES and   
           GIVNUM, must be at least K.   

    POLES  (input) REAL array, dimension ( LDGNUM, 2 )   
           On entry, POLES(1:K, 1) contains the new singular   
           values obtained from solving the secular equation, and   
           POLES(1:K, 2) is an array containing the poles in the secular   
           equation.   

    DIFL   (input) REAL array, dimension ( K ).   
           On entry, DIFL(I) is the distance between I-th updated   
           (undeflated) singular value and the I-th (undeflated) old   
           singular value.   

    DIFR   (input) REAL array, dimension ( LDGNUM, 2 ).   
           On entry, DIFR(I, 1) contains the distances between I-th   
           updated (undeflated) singular value and the I+1-th   
           (undeflated) old singular value. And DIFR(I, 2) is the   
           normalizing factor for the I-th right singular vector.   

    Z      (input) REAL array, dimension ( K )   
           Contain the components of the deflation-adjusted updating row   
           vector.   

    K      (input) INTEGER   
           Contains the dimension of the non-deflated matrix,   
           This is the order of the related secular equation. 1 <= K <=N.   

    C      (input) REAL   
           C contains garbage if SQRE =0 and the C-value of a Givens   
           rotation related to the right null space if SQRE = 1.   

    S      (input) REAL   
           S contains garbage if SQRE =0 and the S-value of a Givens   
           rotation related to the right null space if SQRE = 1.   

    WORK   (workspace) REAL array, dimension ( K )   

    INFO   (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   

    Further Details   
    ===============   

    Based on contributions by   
       Ming Gu and Ren-Cang Li, Computer Science Division, University of   
         California at Berkeley, USA   
       Osni Marques, LBNL/NERSC, USA   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static real c_b5 = -1.f;
    static integer c__1 = 1;
    static real c_b11 = 1.f;
    static real c_b13 = 0.f;
    static integer c__0 = 0;
    
    /* System generated locals */
    integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset, 
	    difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, 
	    poles_offset, i__1, i__2;
    real r__1;
    /* Local variables */
    static real temp;
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *);
    extern doublereal snrm2_(integer *, real *, integer *);
    static integer i__, j, m, n;
    static real diflj, difrj, dsigj;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 
	    sgemv_(char *, integer *, integer *, real *, real *, integer *, 
	    real *, integer *, real *, real *, integer *), scopy_(
	    integer *, real *, integer *, real *, integer *);
    extern doublereal slamc3_(real *, real *);
    static real dj;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static real dsigjp;
    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
	    real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, 
	    real *, integer *);
    static integer nlp1;
#define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1]
#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
#define poles_ref(a_1,a_2) poles[(a_2)*poles_dim1 + a_1]
#define bx_ref(a_1,a_2) bx[(a_2)*bx_dim1 + a_1]
#define givcol_ref(a_1,a_2) givcol[(a_2)*givcol_dim1 + a_1]
#define givnum_ref(a_1,a_2) givnum[(a_2)*givnum_dim1 + a_1]


    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    bx_dim1 = *ldbx;
    bx_offset = 1 + bx_dim1 * 1;
    bx -= bx_offset;
    --perm;
    givcol_dim1 = *ldgcol;
    givcol_offset = 1 + givcol_dim1 * 1;
    givcol -= givcol_offset;
    difr_dim1 = *ldgnum;
    difr_offset = 1 + difr_dim1 * 1;
    difr -= difr_offset;
    poles_dim1 = *ldgnum;
    poles_offset = 1 + poles_dim1 * 1;
    poles -= poles_offset;
    givnum_dim1 = *ldgnum;
    givnum_offset = 1 + givnum_dim1 * 1;
    givnum -= givnum_offset;
    --difl;
    --z__;
    --work;

    /* Function Body */
    *info = 0;

    if (*icompq < 0 || *icompq > 1) {
	*info = -1;
    } else if (*nl < 1) {
	*info = -2;
    } else if (*nr < 1) {
	*info = -3;
    } else if (*sqre < 0 || *sqre > 1) {
	*info = -4;
    }

    n = *nl + *nr + 1;

    if (*nrhs < 1) {
	*info = -5;
    } else if (*ldb < n) {
	*info = -7;
    } else if (*ldbx < n) {
	*info = -9;
    } else if (*givptr < 0) {
	*info = -11;
    } else if (*ldgcol < n) {
	*info = -13;
    } else if (*ldgnum < n) {
	*info = -15;
    } else if (*k < 1) {
	*info = -20;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLALS0", &i__1);
	return 0;
    }

    m = n + *sqre;
    nlp1 = *nl + 1;

    if (*icompq == 0) {

/*        Apply back orthogonal transformations from the left.   

          Step (1L): apply back the Givens rotations performed. */

	i__1 = *givptr;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    srot_(nrhs, &b_ref(givcol_ref(i__, 2), 1), ldb, &b_ref(givcol_ref(
		    i__, 1), 1), ldb, &givnum_ref(i__, 2), &givnum_ref(i__, 1)
		    );
/* L10: */
	}

/*        Step (2L): permute rows of B. */

	scopy_(nrhs, &b_ref(nlp1, 1), ldb, &bx_ref(1, 1), ldbx);
	i__1 = n;
	for (i__ = 2; i__ <= i__1; ++i__) {
	    scopy_(nrhs, &b_ref(perm[i__], 1), ldb, &bx_ref(i__, 1), ldbx);
/* L20: */
	}

/*        Step (3L): apply the inverse of the left singular vector   
          matrix to BX. */

	if (*k == 1) {
	    scopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb);
	    if (z__[1] < 0.f) {
		sscal_(nrhs, &c_b5, &b[b_offset], ldb);
	    }
	} else {
	    i__1 = *k;
	    for (j = 1; j <= i__1; ++j) {
		diflj = difl[j];
		dj = poles_ref(j, 1);
		dsigj = -poles_ref(j, 2);
		if (j < *k) {
		    difrj = -difr_ref(j, 1);
		    dsigjp = -poles_ref(j + 1, 2);
		}
		if (z__[j] == 0.f || poles_ref(j, 2) == 0.f) {
		    work[j] = 0.f;
		} else {
		    work[j] = -poles_ref(j, 2) * z__[j] / diflj / (poles_ref(
			    j, 2) + dj);
		}
		i__2 = j - 1;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    if (z__[i__] == 0.f || poles_ref(i__, 2) == 0.f) {
			work[i__] = 0.f;
		    } else {
			work[i__] = poles_ref(i__, 2) * z__[i__] / (slamc3_(&
				poles_ref(i__, 2), &dsigj) - diflj) / (
				poles_ref(i__, 2) + dj);
		    }
/* L30: */
		}
		i__2 = *k;
		for (i__ = j + 1; i__ <= i__2; ++i__) {
		    if (z__[i__] == 0.f || poles_ref(i__, 2) == 0.f) {
			work[i__] = 0.f;
		    } else {
			work[i__] = poles_ref(i__, 2) * z__[i__] / (slamc3_(&
				poles_ref(i__, 2), &dsigjp) + difrj) / (
				poles_ref(i__, 2) + dj);
		    }
/* L40: */
		}
		work[1] = -1.f;
		temp = snrm2_(k, &work[1], &c__1);
		sgemv_("T", k, nrhs, &c_b11, &bx[bx_offset], ldbx, &work[1], &
			c__1, &c_b13, &b_ref(j, 1), ldb);
		slascl_("G", &c__0, &c__0, &temp, &c_b11, &c__1, nrhs, &b_ref(
			j, 1), ldb, info);
/* L50: */
	    }
	}

/*        Move the deflated rows of BX to B also. */

	if (*k < max(m,n)) {
	    i__1 = n - *k;
	    slacpy_("A", &i__1, nrhs, &bx_ref(*k + 1, 1), ldbx, &b_ref(*k + 1,
		     1), ldb);
	}
    } else {

/*        Apply back the right orthogonal transformations.   

          Step (1R): apply back the new right singular vector matrix   
          to B. */

	if (*k == 1) {
	    scopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx);
	} else {
	    i__1 = *k;
	    for (j = 1; j <= i__1; ++j) {
		dsigj = poles_ref(j, 2);
		if (z__[j] == 0.f) {
		    work[j] = 0.f;
		} else {
		    work[j] = -z__[j] / difl[j] / (dsigj + poles_ref(j, 1)) / 
			    difr_ref(j, 2);
		}
		i__2 = j - 1;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    if (z__[j] == 0.f) {
			work[i__] = 0.f;
		    } else {
			r__1 = -poles_ref(i__ + 1, 2);
			work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - 
				difr_ref(i__, 1)) / (dsigj + poles_ref(i__, 1)
				) / difr_ref(i__, 2);
		    }
/* L60: */
		}
		i__2 = *k;
		for (i__ = j + 1; i__ <= i__2; ++i__) {
		    if (z__[j] == 0.f) {
			work[i__] = 0.f;
		    } else {
			r__1 = -poles_ref(i__, 2);
			work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difl[
				i__]) / (dsigj + poles_ref(i__, 1)) / 
				difr_ref(i__, 2);
		    }
/* L70: */
		}
		sgemv_("T", k, nrhs, &c_b11, &b[b_offset], ldb, &work[1], &
			c__1, &c_b13, &bx_ref(j, 1), ldbx);
/* L80: */
	    }
	}

/*        Step (2R): if SQRE = 1, apply back the rotation that is   
          related to the right null space of the subproblem. */

	if (*sqre == 1) {
	    scopy_(nrhs, &b_ref(m, 1), ldb, &bx_ref(m, 1), ldbx);
	    srot_(nrhs, &bx_ref(1, 1), ldbx, &bx_ref(m, 1), ldbx, c__, s);
	}
	if (*k < max(m,n)) {
	    i__1 = n - *k;
	    slacpy_("A", &i__1, nrhs, &b_ref(*k + 1, 1), ldb, &bx_ref(*k + 1, 
		    1), ldbx);
	}

/*        Step (3R): permute rows of B. */

	scopy_(nrhs, &bx_ref(1, 1), ldbx, &b_ref(nlp1, 1), ldb);
	if (*sqre == 1) {
	    scopy_(nrhs, &bx_ref(m, 1), ldbx, &b_ref(m, 1), ldb);
	}
	i__1 = n;
	for (i__ = 2; i__ <= i__1; ++i__) {
	    scopy_(nrhs, &bx_ref(i__, 1), ldbx, &b_ref(perm[i__], 1), ldb);
/* L90: */
	}

/*        Step (4R): apply back the Givens rotations performed. */

	for (i__ = *givptr; i__ >= 1; --i__) {
	    r__1 = -givnum_ref(i__, 1);
	    srot_(nrhs, &b_ref(givcol_ref(i__, 2), 1), ldb, &b_ref(givcol_ref(
		    i__, 1), 1), ldb, &givnum_ref(i__, 2), &r__1);
/* L100: */
	}
    }

    return 0;

/*     End of SLALS0 */

} /* slals0_ */
Esempio n. 13
0
 int slaein_(int *rightv, int *noinit, int *n, 
	float *h__, int *ldh, float *wr, float *wi, float *vr, float *vi, float 
	*b, int *ldb, float *work, float *eps3, float *smlnum, float *bignum, 
	int *info)
{
    /* System generated locals */
    int b_dim1, b_offset, h_dim1, h_offset, i__1, i__2, i__3, i__4;
    float r__1, r__2, r__3, r__4;

    /* Builtin functions */
    double sqrt(double);

    /* Local variables */
    int i__, j;
    float w, x, y;
    int i1, i2, i3;
    float w1, ei, ej, xi, xr, rec;
    int its, ierr;
    float temp, norm, vmax;
    extern double snrm2_(int *, float *, int *);
    float scale;
    extern  int sscal_(int *, float *, float *, int *);
    char trans[1];
    float vcrit;
    extern double sasum_(int *, float *, int *);
    float rootn, vnorm;
    extern double slapy2_(float *, float *);
    float absbii, absbjj;
    extern int isamax_(int *, float *, int *);
    extern  int sladiv_(float *, float *, float *, float *, float *
, float *);
    char normin[1];
    float nrmsml;
    extern  int slatrs_(char *, char *, char *, char *, 
	    int *, float *, int *, float *, float *, float *, int *);
    float growto;


/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLAEIN uses inverse iteration to find a right or left eigenvector */
/*  corresponding to the eigenvalue (WR,WI) of a float upper Hessenberg */
/*  matrix H. */

/*  Arguments */
/*  ========= */

/*  RIGHTV   (input) LOGICAL */
/*          = .TRUE. : compute right eigenvector; */
/*          = .FALSE.: compute left eigenvector. */

/*  NOINIT   (input) LOGICAL */
/*          = .TRUE. : no initial vector supplied in (VR,VI). */
/*          = .FALSE.: initial vector supplied in (VR,VI). */

/*  N       (input) INTEGER */
/*          The order of the matrix H.  N >= 0. */

/*  H       (input) REAL array, dimension (LDH,N) */
/*          The upper Hessenberg matrix H. */

/*  LDH     (input) INTEGER */
/*          The leading dimension of the array H.  LDH >= MAX(1,N). */

/*  WR      (input) REAL */
/*  WI      (input) REAL */
/*          The float and imaginary parts of the eigenvalue of H whose */
/*          corresponding right or left eigenvector is to be computed. */

/*  VR      (input/output) REAL array, dimension (N) */
/*  VI      (input/output) REAL array, dimension (N) */
/*          On entry, if NOINIT = .FALSE. and WI = 0.0, VR must contain */
/*          a float starting vector for inverse iteration using the float */
/*          eigenvalue WR; if NOINIT = .FALSE. and WI.ne.0.0, VR and VI */
/*          must contain the float and imaginary parts of a complex */
/*          starting vector for inverse iteration using the complex */
/*          eigenvalue (WR,WI); otherwise VR and VI need not be set. */
/*          On exit, if WI = 0.0 (float eigenvalue), VR contains the */
/*          computed float eigenvector; if WI.ne.0.0 (complex eigenvalue), */
/*          VR and VI contain the float and imaginary parts of the */
/*          computed complex eigenvector. The eigenvector is normalized */
/*          so that the component of largest magnitude has magnitude 1; */
/*          here the magnitude of a complex number (x,y) is taken to be */
/*          |x| + |y|. */
/*          VI is not referenced if WI = 0.0. */

/*  B       (workspace) REAL array, dimension (LDB,N) */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B.  LDB >= N+1. */

/*  WORK   (workspace) REAL array, dimension (N) */

/*  EPS3    (input) REAL */
/*          A small machine-dependent value which is used to perturb */
/*          close eigenvalues, and to replace zero pivots. */

/*  SMLNUM  (input) REAL */
/*          A machine-dependent value close to the underflow threshold. */

/*  BIGNUM  (input) REAL */
/*          A machine-dependent value close to the overflow threshold. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          = 1:  inverse iteration did not converge; VR is set to the */
/*                last iterate, and so is VI if WI.ne.0.0. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    h_dim1 = *ldh;
    h_offset = 1 + h_dim1;
    h__ -= h_offset;
    --vr;
    --vi;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    --work;

    /* Function Body */
    *info = 0;

/*     GROWTO is the threshold used in the acceptance test for an */
/*     eigenvector. */

    rootn = sqrt((float) (*n));
    growto = .1f / rootn;
/* Computing MAX */
    r__1 = 1.f, r__2 = *eps3 * rootn;
    nrmsml = MAX(r__1,r__2) * *smlnum;

/*     Form B = H - (WR,WI)*I (except that the subdiagonal elements and */
/*     the imaginary parts of the diagonal elements are not stored). */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	i__2 = j - 1;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    b[i__ + j * b_dim1] = h__[i__ + j * h_dim1];
/* L10: */
	}
	b[j + j * b_dim1] = h__[j + j * h_dim1] - *wr;
/* L20: */
    }

    if (*wi == 0.f) {

/*        Real eigenvalue. */

	if (*noinit) {

/*           Set initial vector. */

	    i__1 = *n;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		vr[i__] = *eps3;
/* L30: */
	    }
	} else {

/*           Scale supplied initial vector. */

	    vnorm = snrm2_(n, &vr[1], &c__1);
	    r__1 = *eps3 * rootn / MAX(vnorm,nrmsml);
	    sscal_(n, &r__1, &vr[1], &c__1);
	}

	if (*rightv) {

/*           LU decomposition with partial pivoting of B, replacing zero */
/*           pivots by EPS3. */

	    i__1 = *n - 1;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		ei = h__[i__ + 1 + i__ * h_dim1];
		if ((r__1 = b[i__ + i__ * b_dim1], ABS(r__1)) < ABS(ei)) {

/*                 Interchange rows and eliminate. */

		    x = b[i__ + i__ * b_dim1] / ei;
		    b[i__ + i__ * b_dim1] = ei;
		    i__2 = *n;
		    for (j = i__ + 1; j <= i__2; ++j) {
			temp = b[i__ + 1 + j * b_dim1];
			b[i__ + 1 + j * b_dim1] = b[i__ + j * b_dim1] - x * 
				temp;
			b[i__ + j * b_dim1] = temp;
/* L40: */
		    }
		} else {

/*                 Eliminate without interchange. */

		    if (b[i__ + i__ * b_dim1] == 0.f) {
			b[i__ + i__ * b_dim1] = *eps3;
		    }
		    x = ei / b[i__ + i__ * b_dim1];
		    if (x != 0.f) {
			i__2 = *n;
			for (j = i__ + 1; j <= i__2; ++j) {
			    b[i__ + 1 + j * b_dim1] -= x * b[i__ + j * b_dim1]
				    ;
/* L50: */
			}
		    }
		}
/* L60: */
	    }
	    if (b[*n + *n * b_dim1] == 0.f) {
		b[*n + *n * b_dim1] = *eps3;
	    }

	    *(unsigned char *)trans = 'N';

	} else {

/*           UL decomposition with partial pivoting of B, replacing zero */
/*           pivots by EPS3. */

	    for (j = *n; j >= 2; --j) {
		ej = h__[j + (j - 1) * h_dim1];
		if ((r__1 = b[j + j * b_dim1], ABS(r__1)) < ABS(ej)) {

/*                 Interchange columns and eliminate. */

		    x = b[j + j * b_dim1] / ej;
		    b[j + j * b_dim1] = ej;
		    i__1 = j - 1;
		    for (i__ = 1; i__ <= i__1; ++i__) {
			temp = b[i__ + (j - 1) * b_dim1];
			b[i__ + (j - 1) * b_dim1] = b[i__ + j * b_dim1] - x * 
				temp;
			b[i__ + j * b_dim1] = temp;
/* L70: */
		    }
		} else {

/*                 Eliminate without interchange. */

		    if (b[j + j * b_dim1] == 0.f) {
			b[j + j * b_dim1] = *eps3;
		    }
		    x = ej / b[j + j * b_dim1];
		    if (x != 0.f) {
			i__1 = j - 1;
			for (i__ = 1; i__ <= i__1; ++i__) {
			    b[i__ + (j - 1) * b_dim1] -= x * b[i__ + j * 
				    b_dim1];
/* L80: */
			}
		    }
		}
/* L90: */
	    }
	    if (b[b_dim1 + 1] == 0.f) {
		b[b_dim1 + 1] = *eps3;
	    }

	    *(unsigned char *)trans = 'T';

	}

	*(unsigned char *)normin = 'N';
	i__1 = *n;
	for (its = 1; its <= i__1; ++its) {

/*           Solve U*x = scale*v for a right eigenvector */
/*             or U'*x = scale*v for a left eigenvector, */
/*           overwriting x on v. */

	    slatrs_("Upper", trans, "Nonunit", normin, n, &b[b_offset], ldb, &
		    vr[1], &scale, &work[1], &ierr);
	    *(unsigned char *)normin = 'Y';

/*           Test for sufficient growth in the norm of v. */

	    vnorm = sasum_(n, &vr[1], &c__1);
	    if (vnorm >= growto * scale) {
		goto L120;
	    }

/*           Choose new orthogonal starting vector and try again. */

	    temp = *eps3 / (rootn + 1.f);
	    vr[1] = *eps3;
	    i__2 = *n;
	    for (i__ = 2; i__ <= i__2; ++i__) {
		vr[i__] = temp;
/* L100: */
	    }
	    vr[*n - its + 1] -= *eps3 * rootn;
/* L110: */
	}

/*        Failure to find eigenvector in N iterations. */

	*info = 1;

L120:

/*        Normalize eigenvector. */

	i__ = isamax_(n, &vr[1], &c__1);
	r__2 = 1.f / (r__1 = vr[i__], ABS(r__1));
	sscal_(n, &r__2, &vr[1], &c__1);
    } else {

/*        Complex eigenvalue. */

	if (*noinit) {

/*           Set initial vector. */

	    i__1 = *n;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		vr[i__] = *eps3;
		vi[i__] = 0.f;
/* L130: */
	    }
	} else {

/*           Scale supplied initial vector. */

	    r__1 = snrm2_(n, &vr[1], &c__1);
	    r__2 = snrm2_(n, &vi[1], &c__1);
	    norm = slapy2_(&r__1, &r__2);
	    rec = *eps3 * rootn / MAX(norm,nrmsml);
	    sscal_(n, &rec, &vr[1], &c__1);
	    sscal_(n, &rec, &vi[1], &c__1);
	}

	if (*rightv) {

/*           LU decomposition with partial pivoting of B, replacing zero */
/*           pivots by EPS3. */

/*           The imaginary part of the (i,j)-th element of U is stored in */
/*           B(j+1,i). */

	    b[b_dim1 + 2] = -(*wi);
	    i__1 = *n;
	    for (i__ = 2; i__ <= i__1; ++i__) {
		b[i__ + 1 + b_dim1] = 0.f;
/* L140: */
	    }

	    i__1 = *n - 1;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		absbii = slapy2_(&b[i__ + i__ * b_dim1], &b[i__ + 1 + i__ * 
			b_dim1]);
		ei = h__[i__ + 1 + i__ * h_dim1];
		if (absbii < ABS(ei)) {

/*                 Interchange rows and eliminate. */

		    xr = b[i__ + i__ * b_dim1] / ei;
		    xi = b[i__ + 1 + i__ * b_dim1] / ei;
		    b[i__ + i__ * b_dim1] = ei;
		    b[i__ + 1 + i__ * b_dim1] = 0.f;
		    i__2 = *n;
		    for (j = i__ + 1; j <= i__2; ++j) {
			temp = b[i__ + 1 + j * b_dim1];
			b[i__ + 1 + j * b_dim1] = b[i__ + j * b_dim1] - xr * 
				temp;
			b[j + 1 + (i__ + 1) * b_dim1] = b[j + 1 + i__ * 
				b_dim1] - xi * temp;
			b[i__ + j * b_dim1] = temp;
			b[j + 1 + i__ * b_dim1] = 0.f;
/* L150: */
		    }
		    b[i__ + 2 + i__ * b_dim1] = -(*wi);
		    b[i__ + 1 + (i__ + 1) * b_dim1] -= xi * *wi;
		    b[i__ + 2 + (i__ + 1) * b_dim1] += xr * *wi;
		} else {

/*                 Eliminate without interchanging rows. */

		    if (absbii == 0.f) {
			b[i__ + i__ * b_dim1] = *eps3;
			b[i__ + 1 + i__ * b_dim1] = 0.f;
			absbii = *eps3;
		    }
		    ei = ei / absbii / absbii;
		    xr = b[i__ + i__ * b_dim1] * ei;
		    xi = -b[i__ + 1 + i__ * b_dim1] * ei;
		    i__2 = *n;
		    for (j = i__ + 1; j <= i__2; ++j) {
			b[i__ + 1 + j * b_dim1] = b[i__ + 1 + j * b_dim1] - 
				xr * b[i__ + j * b_dim1] + xi * b[j + 1 + i__ 
				* b_dim1];
			b[j + 1 + (i__ + 1) * b_dim1] = -xr * b[j + 1 + i__ * 
				b_dim1] - xi * b[i__ + j * b_dim1];
/* L160: */
		    }
		    b[i__ + 2 + (i__ + 1) * b_dim1] -= *wi;
		}

/*              Compute 1-norm of offdiagonal elements of i-th row. */

		i__2 = *n - i__;
		i__3 = *n - i__;
		work[i__] = sasum_(&i__2, &b[i__ + (i__ + 1) * b_dim1], ldb) 
			+ sasum_(&i__3, &b[i__ + 2 + i__ * b_dim1], &c__1);
/* L170: */
	    }
	    if (b[*n + *n * b_dim1] == 0.f && b[*n + 1 + *n * b_dim1] == 0.f) 
		    {
		b[*n + *n * b_dim1] = *eps3;
	    }
	    work[*n] = 0.f;

	    i1 = *n;
	    i2 = 1;
	    i3 = -1;
	} else {

/*           UL decomposition with partial pivoting of conjg(B), */
/*           replacing zero pivots by EPS3. */

/*           The imaginary part of the (i,j)-th element of U is stored in */
/*           B(j+1,i). */

	    b[*n + 1 + *n * b_dim1] = *wi;
	    i__1 = *n - 1;
	    for (j = 1; j <= i__1; ++j) {
		b[*n + 1 + j * b_dim1] = 0.f;
/* L180: */
	    }

	    for (j = *n; j >= 2; --j) {
		ej = h__[j + (j - 1) * h_dim1];
		absbjj = slapy2_(&b[j + j * b_dim1], &b[j + 1 + j * b_dim1]);
		if (absbjj < ABS(ej)) {

/*                 Interchange columns and eliminate */

		    xr = b[j + j * b_dim1] / ej;
		    xi = b[j + 1 + j * b_dim1] / ej;
		    b[j + j * b_dim1] = ej;
		    b[j + 1 + j * b_dim1] = 0.f;
		    i__1 = j - 1;
		    for (i__ = 1; i__ <= i__1; ++i__) {
			temp = b[i__ + (j - 1) * b_dim1];
			b[i__ + (j - 1) * b_dim1] = b[i__ + j * b_dim1] - xr *
				 temp;
			b[j + i__ * b_dim1] = b[j + 1 + i__ * b_dim1] - xi * 
				temp;
			b[i__ + j * b_dim1] = temp;
			b[j + 1 + i__ * b_dim1] = 0.f;
/* L190: */
		    }
		    b[j + 1 + (j - 1) * b_dim1] = *wi;
		    b[j - 1 + (j - 1) * b_dim1] += xi * *wi;
		    b[j + (j - 1) * b_dim1] -= xr * *wi;
		} else {

/*                 Eliminate without interchange. */

		    if (absbjj == 0.f) {
			b[j + j * b_dim1] = *eps3;
			b[j + 1 + j * b_dim1] = 0.f;
			absbjj = *eps3;
		    }
		    ej = ej / absbjj / absbjj;
		    xr = b[j + j * b_dim1] * ej;
		    xi = -b[j + 1 + j * b_dim1] * ej;
		    i__1 = j - 1;
		    for (i__ = 1; i__ <= i__1; ++i__) {
			b[i__ + (j - 1) * b_dim1] = b[i__ + (j - 1) * b_dim1] 
				- xr * b[i__ + j * b_dim1] + xi * b[j + 1 + 
				i__ * b_dim1];
			b[j + i__ * b_dim1] = -xr * b[j + 1 + i__ * b_dim1] - 
				xi * b[i__ + j * b_dim1];
/* L200: */
		    }
		    b[j + (j - 1) * b_dim1] += *wi;
		}

/*              Compute 1-norm of offdiagonal elements of j-th column. */

		i__1 = j - 1;
		i__2 = j - 1;
		work[j] = sasum_(&i__1, &b[j * b_dim1 + 1], &c__1) + sasum_(&
			i__2, &b[j + 1 + b_dim1], ldb);
/* L210: */
	    }
	    if (b[b_dim1 + 1] == 0.f && b[b_dim1 + 2] == 0.f) {
		b[b_dim1 + 1] = *eps3;
	    }
	    work[1] = 0.f;

	    i1 = 1;
	    i2 = *n;
	    i3 = 1;
	}

	i__1 = *n;
	for (its = 1; its <= i__1; ++its) {
	    scale = 1.f;
	    vmax = 1.f;
	    vcrit = *bignum;

/*           Solve U*(xr,xi) = scale*(vr,vi) for a right eigenvector, */
/*             or U'*(xr,xi) = scale*(vr,vi) for a left eigenvector, */
/*           overwriting (xr,xi) on (vr,vi). */

	    i__2 = i2;
	    i__3 = i3;
	    for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) 
		    {

		if (work[i__] > vcrit) {
		    rec = 1.f / vmax;
		    sscal_(n, &rec, &vr[1], &c__1);
		    sscal_(n, &rec, &vi[1], &c__1);
		    scale *= rec;
		    vmax = 1.f;
		    vcrit = *bignum;
		}

		xr = vr[i__];
		xi = vi[i__];
		if (*rightv) {
		    i__4 = *n;
		    for (j = i__ + 1; j <= i__4; ++j) {
			xr = xr - b[i__ + j * b_dim1] * vr[j] + b[j + 1 + i__ 
				* b_dim1] * vi[j];
			xi = xi - b[i__ + j * b_dim1] * vi[j] - b[j + 1 + i__ 
				* b_dim1] * vr[j];
/* L220: */
		    }
		} else {
		    i__4 = i__ - 1;
		    for (j = 1; j <= i__4; ++j) {
			xr = xr - b[j + i__ * b_dim1] * vr[j] + b[i__ + 1 + j 
				* b_dim1] * vi[j];
			xi = xi - b[j + i__ * b_dim1] * vi[j] - b[i__ + 1 + j 
				* b_dim1] * vr[j];
/* L230: */
		    }
		}

		w = (r__1 = b[i__ + i__ * b_dim1], ABS(r__1)) + (r__2 = b[
			i__ + 1 + i__ * b_dim1], ABS(r__2));
		if (w > *smlnum) {
		    if (w < 1.f) {
			w1 = ABS(xr) + ABS(xi);
			if (w1 > w * *bignum) {
			    rec = 1.f / w1;
			    sscal_(n, &rec, &vr[1], &c__1);
			    sscal_(n, &rec, &vi[1], &c__1);
			    xr = vr[i__];
			    xi = vi[i__];
			    scale *= rec;
			    vmax *= rec;
			}
		    }

/*                 Divide by diagonal element of B. */

		    sladiv_(&xr, &xi, &b[i__ + i__ * b_dim1], &b[i__ + 1 + 
			    i__ * b_dim1], &vr[i__], &vi[i__]);
/* Computing MAX */
		    r__3 = (r__1 = vr[i__], ABS(r__1)) + (r__2 = vi[i__], 
			    ABS(r__2));
		    vmax = MAX(r__3,vmax);
		    vcrit = *bignum / vmax;
		} else {
		    i__4 = *n;
		    for (j = 1; j <= i__4; ++j) {
			vr[j] = 0.f;
			vi[j] = 0.f;
/* L240: */
		    }
		    vr[i__] = 1.f;
		    vi[i__] = 1.f;
		    scale = 0.f;
		    vmax = 1.f;
		    vcrit = *bignum;
		}
/* L250: */
	    }

/*           Test for sufficient growth in the norm of (VR,VI). */

	    vnorm = sasum_(n, &vr[1], &c__1) + sasum_(n, &vi[1], &c__1);
	    if (vnorm >= growto * scale) {
		goto L280;
	    }

/*           Choose a new orthogonal starting vector and try again. */

	    y = *eps3 / (rootn + 1.f);
	    vr[1] = *eps3;
	    vi[1] = 0.f;

	    i__3 = *n;
	    for (i__ = 2; i__ <= i__3; ++i__) {
		vr[i__] = y;
		vi[i__] = 0.f;
/* L260: */
	    }
	    vr[*n - its + 1] -= *eps3 * rootn;
/* L270: */
	}

/*        Failure to find eigenvector in N iterations */

	*info = 1;

L280:

/*        Normalize eigenvector. */

	vnorm = 0.f;
	i__1 = *n;
	for (i__ = 1; i__ <= i__1; ++i__) {
/* Computing MAX */
	    r__3 = vnorm, r__4 = (r__1 = vr[i__], ABS(r__1)) + (r__2 = vi[
		    i__], ABS(r__2));
	    vnorm = MAX(r__3,r__4);
/* L290: */
	}
	r__1 = 1.f / vnorm;
	sscal_(n, &r__1, &vr[1], &c__1);
	r__1 = 1.f / vnorm;
	sscal_(n, &r__1, &vi[1], &c__1);

    }

    return 0;

/*     End of SLAEIN */

} /* slaein_ */
Esempio n. 14
0
/*! \brief
 * <pre>
 * Purpose
 * =======
 *    ilu_sdrop_row() - Drop some small rows from the previous
 *    supernode (L-part only).
 * </pre>
 */
int ilu_sdrop_row(
        superlu_options_t *options, /* options */
        int    first,       /* index of the first column in the supernode */
        int    last,        /* index of the last column in the supernode */
        double drop_tol,    /* dropping parameter */
        int    quota,       /* maximum nonzero entries allowed */
        int    *nnzLj,      /* in/out number of nonzeros in L(:, 1:last) */
        double *fill_tol,   /* in/out - on exit, fill_tol=-num_zero_pivots,
                             * does not change if options->ILU_MILU != SMILU1 */
        GlobalLU_t *Glu,    /* modified */
        float swork[],   /* working space
                             * the length of swork[] should be no less than
                             * the number of rows in the supernode */
        float swork2[], /* working space with the same size as swork[],
                             * used only by the second dropping rule */
        int    lastc        /* if lastc == 0, there is nothing after the
                             * working supernode [first:last];
                             * if lastc == 1, there is one more column after
                             * the working supernode. */ )
{
    register int i, j, k, m1;
    register int nzlc; /* number of nonzeros in column last+1 */
    register int xlusup_first, xlsub_first;
    int m, n; /* m x n is the size of the supernode */
    int r = 0; /* number of dropped rows */
    register float *temp;
    register float *lusup = Glu->lusup;
    register int *lsub = Glu->lsub;
    register int *xlsub = Glu->xlsub;
    register int *xlusup = Glu->xlusup;
    register float d_max = 0.0, d_min = 1.0;
    int    drop_rule = options->ILU_DropRule;
    milu_t milu = options->ILU_MILU;
    norm_t nrm = options->ILU_Norm;
    float zero = 0.0;
    float one = 1.0;
    float none = -1.0;
    int i_1 = 1;
    int inc_diag; /* inc_diag = m + 1 */
    int nzp = 0;  /* number of zero pivots */
    float alpha = pow((double)(Glu->n), -1.0 / options->ILU_MILU_Dim);

    xlusup_first = xlusup[first];
    xlsub_first = xlsub[first];
    m = xlusup[first + 1] - xlusup_first;
    n = last - first + 1;
    m1 = m - 1;
    inc_diag = m + 1;
    nzlc = lastc ? (xlusup[last + 2] - xlusup[last + 1]) : 0;
    temp = swork - n;

    /* Quick return if nothing to do. */
    if (m == 0 || m == n || drop_rule == NODROP)
    {
        *nnzLj += m * n;
        return 0;
    }

    /* basic dropping: ILU(tau) */
    for (i = n; i <= m1; )
    {
        /* the average abs value of ith row */
        switch (nrm)
        {
            case ONE_NORM:
                temp[i] = sasum_(&n, &lusup[xlusup_first + i], &m) / (double)n;
                break;
            case TWO_NORM:
                temp[i] = snrm2_(&n, &lusup[xlusup_first + i], &m)
                    / sqrt((double)n);
                break;
            case INF_NORM:
            default:
                k = isamax_(&n, &lusup[xlusup_first + i], &m) - 1;
                temp[i] = fabs(lusup[xlusup_first + i + m * k]);
                break;
        }

        /* drop small entries due to drop_tol */
        if (drop_rule & DROP_BASIC && temp[i] < drop_tol)
        {
            r++;
            /* drop the current row and move the last undropped row here */
            if (r > 1) /* add to last row */
            {
                /* accumulate the sum (for MILU) */
                switch (milu)
                {
                    case SMILU_1:
                    case SMILU_2:
                        saxpy_(&n, &one, &lusup[xlusup_first + i], &m,
                                &lusup[xlusup_first + m - 1], &m);
                        break;
                    case SMILU_3:
                        for (j = 0; j < n; j++)
                            lusup[xlusup_first + (m - 1) + j * m] +=
                                    fabs(lusup[xlusup_first + i + j * m]);
                        break;
                    case SILU:
                    default:
                        break;
                }
                scopy_(&n, &lusup[xlusup_first + m1], &m,
                       &lusup[xlusup_first + i], &m);
            } /* if (r > 1) */
            else /* move to last row */
            {
                sswap_(&n, &lusup[xlusup_first + m1], &m,
                        &lusup[xlusup_first + i], &m);
                if (milu == SMILU_3)
                    for (j = 0; j < n; j++) {
                        lusup[xlusup_first + m1 + j * m] =
                                fabs(lusup[xlusup_first + m1 + j * m]);
                    }
            }
            lsub[xlsub_first + i] = lsub[xlsub_first + m1];
            m1--;
            continue;
        } /* if dropping */
        else
        {
            if (temp[i] > d_max) d_max = temp[i];
            if (temp[i] < d_min) d_min = temp[i];
        }
        i++;
    } /* for */

    /* Secondary dropping: drop more rows according to the quota. */
    quota = ceil((double)quota / (double)n);
    if (drop_rule & DROP_SECONDARY && m - r > quota)
    {
        register double tol = d_max;

        /* Calculate the second dropping tolerance */
        if (quota > n)
        {
            if (drop_rule & DROP_INTERP) /* by interpolation */
            {
                d_max = 1.0 / d_max; d_min = 1.0 / d_min;
                tol = 1.0 / (d_max + (d_min - d_max) * quota / (m - n - r));
            }
            else /* by quick select */
            {
                int len = m1 - n + 1;
                scopy_(&len, swork, &i_1, swork2, &i_1);
                tol = sqselect(len, swork2, quota - n);
#if 0
                register int *itemp = iwork - n;
                A = temp;
                for (i = n; i <= m1; i++) itemp[i] = i;
                qsort(iwork, m1 - n + 1, sizeof(int), _compare_);
                tol = temp[itemp[quota]];
#endif
            }
        }

        for (i = n; i <= m1; )
        {
            if (temp[i] <= tol)
            {
                register int j;
                r++;
                /* drop the current row and move the last undropped row here */
                if (r > 1) /* add to last row */
                {
                    /* accumulate the sum (for MILU) */
                    switch (milu)
                    {
                        case SMILU_1:
                        case SMILU_2:
                            saxpy_(&n, &one, &lusup[xlusup_first + i], &m,
                                    &lusup[xlusup_first + m - 1], &m);
                            break;
                        case SMILU_3:
                            for (j = 0; j < n; j++)
                                lusup[xlusup_first + (m - 1) + j * m] +=
                                        fabs(lusup[xlusup_first + i + j * m]);
                            break;
                        case SILU:
                        default:
                            break;
                    }
                    scopy_(&n, &lusup[xlusup_first + m1], &m,
                            &lusup[xlusup_first + i], &m);
                } /* if (r > 1) */
                else /* move to last row */
                {
                    sswap_(&n, &lusup[xlusup_first + m1], &m,
                            &lusup[xlusup_first + i], &m);
                    if (milu == SMILU_3)
                        for (j = 0; j < n; j++) {
                            lusup[xlusup_first + m1 + j * m] =
                                    fabs(lusup[xlusup_first + m1 + j * m]);
                        }
                }
                lsub[xlsub_first + i] = lsub[xlsub_first + m1];
                m1--;
                temp[i] = temp[m1];

                continue;
            }
            i++;

        } /* for */

    } /* if secondary dropping */

    for (i = n; i < m; i++) temp[i] = 0.0;

    if (r == 0)
    {
        *nnzLj += m * n;
        return 0;
    }

    /* add dropped entries to the diagnal */
    if (milu != SILU)
    {
        register int j;
        float t;
        float omega;
        for (j = 0; j < n; j++)
        {
            t = lusup[xlusup_first + (m - 1) + j * m];
            if (t == zero) continue;
            if (t > zero)
                omega = SUPERLU_MIN(2.0 * (1.0 - alpha) / t, 1.0);
            else
                omega = SUPERLU_MAX(2.0 * (1.0 - alpha) / t, -1.0);
            t *= omega;

            switch (milu)
            {
                case SMILU_1:
                    if (t != none) {
                        lusup[xlusup_first + j * inc_diag] *= (one + t);
                    }
                    else
                    {
                        lusup[xlusup_first + j * inc_diag] *= *fill_tol;
#ifdef DEBUG
                        printf("[1] ZERO PIVOT: FILL col %d.\n", first + j);
                        fflush(stdout);
#endif
                        nzp++;
                    }
                    break;
                case SMILU_2:
                    lusup[xlusup_first + j * inc_diag] *= (1.0 + fabs(t));
                    break;
                case SMILU_3:
                    lusup[xlusup_first + j * inc_diag] *= (one + t);
                    break;
                case SILU:
                default:
                    break;
            }
        }
        if (nzp > 0) *fill_tol = -nzp;
    }

    /* Remove dropped entries from the memory and fix the pointers. */
    m1 = m - r;
    for (j = 1; j < n; j++)
    {
        register int tmp1, tmp2;
        tmp1 = xlusup_first + j * m1;
        tmp2 = xlusup_first + j * m;
        for (i = 0; i < m1; i++)
            lusup[i + tmp1] = lusup[i + tmp2];
    }
    for (i = 0; i < nzlc; i++)
        lusup[xlusup_first + i + n * m1] = lusup[xlusup_first + i + n * m];
    for (i = 0; i < nzlc; i++)
        lsub[xlsub[last + 1] - r + i] = lsub[xlsub[last + 1] + i];
    for (i = first + 1; i <= last + 1; i++)
    {
        xlusup[i] -= r * (i - first);
        xlsub[i] -= r;
    }
    if (lastc)
    {
        xlusup[last + 2] -= r * n;
        xlsub[last + 2] -= r;
    }

    *nnzLj += (m - r) * n;
    return r;
}
Esempio n. 15
0
/* Subroutine */ int snaitr_(integer *ido, char *bmat, integer *n, integer *k,
	 integer *np, integer *nb, real *resid, real *rnorm, real *v, integer 
	*ldv, real *h__, integer *ldh, integer *ipntr, real *workd, integer *
	info, ftnlen bmat_len)
{
    /* Initialized data */

    static logical first = TRUE_;

    /* System generated locals */
    integer h_dim1, h_offset, v_dim1, v_offset, i__1, i__2;
    real r__1, r__2;

    /* Builtin functions */
    double sqrt(doublereal);

    /* Local variables */
    static integer i__, j;
    static real t0, t1, t2, t3, t4, t5;
    static integer jj, ipj, irj, ivj;
    static real ulp, tst1;
    static integer ierr, iter;
    static real unfl, ovfl;
    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
    static integer itry;
    static real temp1;
    static logical orth1, orth2, step3, step4;
    extern doublereal snrm2_(integer *, real *, integer *);
    static real betaj;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static integer infol;
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
	    real *, integer *, real *, integer *, real *, real *, integer *, 
	    ftnlen);
    static real xtemp[2];
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *);
    static real wnorm;
    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, 
	    real *, integer *), ivout_(integer *, integer *, integer *, 
	    integer *, char *, ftnlen), smout_(integer *, integer *, integer *
	    , real *, integer *, integer *, char *, ftnlen), svout_(integer *,
	     integer *, real *, integer *, char *, ftnlen), sgetv0_(integer *,
	     char *, integer *, logical *, integer *, integer *, real *, 
	    integer *, real *, real *, integer *, real *, integer *, ftnlen);
    static real rnorm1;
    extern /* Subroutine */ int slabad_(real *, real *);
    extern doublereal slamch_(char *, ftnlen);
    extern /* Subroutine */ int second_(real *), slascl_(char *, integer *, 
	    integer *, real *, real *, integer *, integer *, real *, integer *
	    , integer *, ftnlen);
    static logical rstart;
    static integer msglvl;
    static real smlnum;
    extern doublereal slanhs_(char *, integer *, real *, integer *, real *, 
	    ftnlen);


/*     %----------------------------------------------------% */
/*     | Include files for debugging and timing information | */
/*     %----------------------------------------------------% */


/* \SCCS Information: @(#) */
/* FILE: debug.h   SID: 2.3   DATE OF SID: 11/16/95   RELEASE: 2 */

/*     %---------------------------------% */
/*     | See debug.doc for documentation | */
/*     %---------------------------------% */

/*     %------------------% */
/*     | Scalar Arguments | */
/*     %------------------% */

/*     %--------------------------------% */
/*     | See stat.doc for documentation | */
/*     %--------------------------------% */

/* \SCCS Information: @(#) */
/* FILE: stat.h   SID: 2.2   DATE OF SID: 11/16/95   RELEASE: 2 */



/*     %-----------------% */
/*     | Array Arguments | */
/*     %-----------------% */


/*     %------------% */
/*     | Parameters | */
/*     %------------% */


/*     %---------------% */
/*     | Local Scalars | */
/*     %---------------% */


/*     %-----------------------% */
/*     | Local Array Arguments | */
/*     %-----------------------% */


/*     %----------------------% */
/*     | External Subroutines | */
/*     %----------------------% */


/*     %--------------------% */
/*     | External Functions | */
/*     %--------------------% */


/*     %---------------------% */
/*     | Intrinsic Functions | */
/*     %---------------------% */


/*     %-----------------% */
/*     | Data statements | */
/*     %-----------------% */

    /* Parameter adjustments */
    --workd;
    --resid;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1;
    v -= v_offset;
    h_dim1 = *ldh;
    h_offset = 1 + h_dim1;
    h__ -= h_offset;
    --ipntr;

    /* Function Body */

/*     %-----------------------% */
/*     | Executable Statements | */
/*     %-----------------------% */

    if (first) {

/*        %-----------------------------------------% */
/*        | Set machine-dependent constants for the | */
/*        | the splitting and deflation criterion.  | */
/*        | If norm(H) <= sqrt(OVFL),               | */
/*        | overflow should not occur.              | */
/*        | REFERENCE: LAPACK subroutine slahqr     | */
/*        %-----------------------------------------% */

	unfl = slamch_("safe minimum", (ftnlen)12);
	ovfl = 1.f / unfl;
	slabad_(&unfl, &ovfl);
	ulp = slamch_("precision", (ftnlen)9);
	smlnum = unfl * (*n / ulp);
	first = FALSE_;
    }

    if (*ido == 0) {

/*        %-------------------------------% */
/*        | Initialize timing statistics  | */
/*        | & message level for debugging | */
/*        %-------------------------------% */

	second_(&t0);
	msglvl = debug_1.mnaitr;

/*        %------------------------------% */
/*        | Initial call to this routine | */
/*        %------------------------------% */

	*info = 0;
	step3 = FALSE_;
	step4 = FALSE_;
	rstart = FALSE_;
	orth1 = FALSE_;
	orth2 = FALSE_;
	j = *k + 1;
	ipj = 1;
	irj = ipj + *n;
	ivj = irj + *n;
    }

/*     %-------------------------------------------------% */
/*     | When in reverse communication mode one of:      | */
/*     | STEP3, STEP4, ORTH1, ORTH2, RSTART              | */
/*     | will be .true. when ....                        | */
/*     | STEP3: return from computing OP*v_{j}.          | */
/*     | STEP4: return from computing B-norm of OP*v_{j} | */
/*     | ORTH1: return from computing B-norm of r_{j+1}  | */
/*     | ORTH2: return from computing B-norm of          | */
/*     |        correction to the residual vector.       | */
/*     | RSTART: return from OP computations needed by   | */
/*     |         sgetv0.                                 | */
/*     %-------------------------------------------------% */

    if (step3) {
	goto L50;
    }
    if (step4) {
	goto L60;
    }
    if (orth1) {
	goto L70;
    }
    if (orth2) {
	goto L90;
    }
    if (rstart) {
	goto L30;
    }

/*     %-----------------------------% */
/*     | Else this is the first step | */
/*     %-----------------------------% */

/*     %--------------------------------------------------------------% */
/*     |                                                              | */
/*     |        A R N O L D I     I T E R A T I O N     L O O P       | */
/*     |                                                              | */
/*     | Note:  B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) | */
/*     %--------------------------------------------------------------% */
L1000:

    if (msglvl > 1) {
	ivout_(&debug_1.logfil, &c__1, &j, &debug_1.ndigit, "_naitr: generat"
		"ing Arnoldi vector number", (ftnlen)40);
	svout_(&debug_1.logfil, &c__1, rnorm, &debug_1.ndigit, "_naitr: B-no"
		"rm of the current residual is", (ftnlen)41);
    }

/*        %---------------------------------------------------% */
/*        | STEP 1: Check if the B norm of j-th residual      | */
/*        | vector is zero. Equivalent to determing whether   | */
/*        | an exact j-step Arnoldi factorization is present. | */
/*        %---------------------------------------------------% */

    betaj = *rnorm;
    if (*rnorm > 0.f) {
	goto L40;
    }

/*           %---------------------------------------------------% */
/*           | Invariant subspace found, generate a new starting | */
/*           | vector which is orthogonal to the current Arnoldi | */
/*           | basis and continue the iteration.                 | */
/*           %---------------------------------------------------% */

    if (msglvl > 0) {
	ivout_(&debug_1.logfil, &c__1, &j, &debug_1.ndigit, "_naitr: ****** "
		"RESTART AT STEP ******", (ftnlen)37);
    }

/*           %---------------------------------------------% */
/*           | ITRY is the loop variable that controls the | */
/*           | maximum amount of times that a restart is   | */
/*           | attempted. NRSTRT is used by stat.h         | */
/*           %---------------------------------------------% */

    betaj = 0.f;
    ++timing_1.nrstrt;
    itry = 1;
L20:
    rstart = TRUE_;
    *ido = 0;
L30:

/*           %--------------------------------------% */
/*           | If in reverse communication mode and | */
/*           | RSTART = .true. flow returns here.   | */
/*           %--------------------------------------% */

    sgetv0_(ido, bmat, &itry, &c_false, n, &j, &v[v_offset], ldv, &resid[1], 
	    rnorm, &ipntr[1], &workd[1], &ierr, (ftnlen)1);
    if (*ido != 99) {
	goto L9000;
    }
    if (ierr < 0) {
	++itry;
	if (itry <= 3) {
	    goto L20;
	}

/*              %------------------------------------------------% */
/*              | Give up after several restart attempts.        | */
/*              | Set INFO to the size of the invariant subspace | */
/*              | which spans OP and exit.                       | */
/*              %------------------------------------------------% */

	*info = j - 1;
	second_(&t1);
	timing_1.tnaitr += t1 - t0;
	*ido = 99;
	goto L9000;
    }

L40:

/*        %---------------------------------------------------------% */
/*        | STEP 2:  v_{j} = r_{j-1}/rnorm and p_{j} = p_{j}/rnorm  | */
/*        | Note that p_{j} = B*r_{j-1}. In order to avoid overflow | */
/*        | when reciprocating a small RNORM, test against lower    | */
/*        | machine bound.                                          | */
/*        %---------------------------------------------------------% */

    scopy_(n, &resid[1], &c__1, &v[j * v_dim1 + 1], &c__1);
    if (*rnorm >= unfl) {
	temp1 = 1.f / *rnorm;
	sscal_(n, &temp1, &v[j * v_dim1 + 1], &c__1);
	sscal_(n, &temp1, &workd[ipj], &c__1);
    } else {

/*            %-----------------------------------------% */
/*            | To scale both v_{j} and p_{j} carefully | */
/*            | use LAPACK routine SLASCL               | */
/*            %-----------------------------------------% */

	slascl_("General", &i__, &i__, rnorm, &c_b25, n, &c__1, &v[j * v_dim1 
		+ 1], n, &infol, (ftnlen)7);
	slascl_("General", &i__, &i__, rnorm, &c_b25, n, &c__1, &workd[ipj], 
		n, &infol, (ftnlen)7);
    }

/*        %------------------------------------------------------% */
/*        | STEP 3:  r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} | */
/*        | Note that this is not quite yet r_{j}. See STEP 4    | */
/*        %------------------------------------------------------% */

    step3 = TRUE_;
    ++timing_1.nopx;
    second_(&t2);
    scopy_(n, &v[j * v_dim1 + 1], &c__1, &workd[ivj], &c__1);
    ipntr[1] = ivj;
    ipntr[2] = irj;
    ipntr[3] = ipj;
    *ido = 1;

/*        %-----------------------------------% */
/*        | Exit in order to compute OP*v_{j} | */
/*        %-----------------------------------% */

    goto L9000;
L50:

/*        %----------------------------------% */
/*        | Back from reverse communication; | */
/*        | WORKD(IRJ:IRJ+N-1) := OP*v_{j}   | */
/*        | if step3 = .true.                | */
/*        %----------------------------------% */

    second_(&t3);
    timing_1.tmvopx += t3 - t2;
    step3 = FALSE_;

/*        %------------------------------------------% */
/*        | Put another copy of OP*v_{j} into RESID. | */
/*        %------------------------------------------% */

    scopy_(n, &workd[irj], &c__1, &resid[1], &c__1);

/*        %---------------------------------------% */
/*        | STEP 4:  Finish extending the Arnoldi | */
/*        |          factorization to length j.   | */
/*        %---------------------------------------% */

    second_(&t2);
    if (*(unsigned char *)bmat == 'G') {
	++timing_1.nbx;
	step4 = TRUE_;
	ipntr[1] = irj;
	ipntr[2] = ipj;
	*ido = 2;

/*           %-------------------------------------% */
/*           | Exit in order to compute B*OP*v_{j} | */
/*           %-------------------------------------% */

	goto L9000;
    } else if (*(unsigned char *)bmat == 'I') {
	scopy_(n, &resid[1], &c__1, &workd[ipj], &c__1);
    }
L60:

/*        %----------------------------------% */
/*        | Back from reverse communication; | */
/*        | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j} | */
/*        | if step4 = .true.                | */
/*        %----------------------------------% */

    if (*(unsigned char *)bmat == 'G') {
	second_(&t3);
	timing_1.tmvbx += t3 - t2;
    }

    step4 = FALSE_;

/*        %-------------------------------------% */
/*        | The following is needed for STEP 5. | */
/*        | Compute the B-norm of OP*v_{j}.     | */
/*        %-------------------------------------% */

    if (*(unsigned char *)bmat == 'G') {
	wnorm = sdot_(n, &resid[1], &c__1, &workd[ipj], &c__1);
	wnorm = sqrt((dabs(wnorm)));
    } else if (*(unsigned char *)bmat == 'I') {
	wnorm = snrm2_(n, &resid[1], &c__1);
    }

/*        %-----------------------------------------% */
/*        | Compute the j-th residual corresponding | */
/*        | to the j step factorization.            | */
/*        | Use Classical Gram Schmidt and compute: | */
/*        | w_{j} <-  V_{j}^T * B * OP * v_{j}      | */
/*        | r_{j} <-  OP*v_{j} - V_{j} * w_{j}      | */
/*        %-----------------------------------------% */


/*        %------------------------------------------% */
/*        | Compute the j Fourier coefficients w_{j} | */
/*        | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}.  | */
/*        %------------------------------------------% */

    sgemv_("T", n, &j, &c_b25, &v[v_offset], ldv, &workd[ipj], &c__1, &c_b47, 
	    &h__[j * h_dim1 + 1], &c__1, (ftnlen)1);

/*        %--------------------------------------% */
/*        | Orthogonalize r_{j} against V_{j}.   | */
/*        | RESID contains OP*v_{j}. See STEP 3. | */
/*        %--------------------------------------% */

    sgemv_("N", n, &j, &c_b50, &v[v_offset], ldv, &h__[j * h_dim1 + 1], &c__1,
	     &c_b25, &resid[1], &c__1, (ftnlen)1);

    if (j > 1) {
	h__[j + (j - 1) * h_dim1] = betaj;
    }

    second_(&t4);

    orth1 = TRUE_;

    second_(&t2);
    if (*(unsigned char *)bmat == 'G') {
	++timing_1.nbx;
	scopy_(n, &resid[1], &c__1, &workd[irj], &c__1);
	ipntr[1] = irj;
	ipntr[2] = ipj;
	*ido = 2;

/*           %----------------------------------% */
/*           | Exit in order to compute B*r_{j} | */
/*           %----------------------------------% */

	goto L9000;
    } else if (*(unsigned char *)bmat == 'I') {
	scopy_(n, &resid[1], &c__1, &workd[ipj], &c__1);
    }
L70:

/*        %---------------------------------------------------% */
/*        | Back from reverse communication if ORTH1 = .true. | */
/*        | WORKD(IPJ:IPJ+N-1) := B*r_{j}.                    | */
/*        %---------------------------------------------------% */

    if (*(unsigned char *)bmat == 'G') {
	second_(&t3);
	timing_1.tmvbx += t3 - t2;
    }

    orth1 = FALSE_;

/*        %------------------------------% */
/*        | Compute the B-norm of r_{j}. | */
/*        %------------------------------% */

    if (*(unsigned char *)bmat == 'G') {
	*rnorm = sdot_(n, &resid[1], &c__1, &workd[ipj], &c__1);
	*rnorm = sqrt((dabs(*rnorm)));
    } else if (*(unsigned char *)bmat == 'I') {
	*rnorm = snrm2_(n, &resid[1], &c__1);
    }

/*        %-----------------------------------------------------------% */
/*        | STEP 5: Re-orthogonalization / Iterative refinement phase | */
/*        | Maximum NITER_ITREF tries.                                | */
/*        |                                                           | */
/*        |          s      = V_{j}^T * B * r_{j}                     | */
/*        |          r_{j}  = r_{j} - V_{j}*s                         | */
/*        |          alphaj = alphaj + s_{j}                          | */
/*        |                                                           | */
/*        | The stopping criteria used for iterative refinement is    | */
/*        | discussed in Parlett's book SEP, page 107 and in Gragg &  | */
/*        | Reichel ACM TOMS paper; Algorithm 686, Dec. 1990.         | */
/*        | Determine if we need to correct the residual. The goal is | */
/*        | to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} ||  | */
/*        | The following test determines whether the sine of the     | */
/*        | angle between  OP*x and the computed residual is less     | */
/*        | than or equal to 0.717.                                   | */
/*        %-----------------------------------------------------------% */

    if (*rnorm > wnorm * .717f) {
	goto L100;
    }
    iter = 0;
    ++timing_1.nrorth;

/*        %---------------------------------------------------% */
/*        | Enter the Iterative refinement phase. If further  | */
/*        | refinement is necessary, loop back here. The loop | */
/*        | variable is ITER. Perform a step of Classical     | */
/*        | Gram-Schmidt using all the Arnoldi vectors V_{j}  | */
/*        %---------------------------------------------------% */

L80:

    if (msglvl > 2) {
	xtemp[0] = wnorm;
	xtemp[1] = *rnorm;
	svout_(&debug_1.logfil, &c__2, xtemp, &debug_1.ndigit, "_naitr: re-o"
		"rthonalization; wnorm and rnorm are", (ftnlen)47);
	svout_(&debug_1.logfil, &j, &h__[j * h_dim1 + 1], &debug_1.ndigit, 
		"_naitr: j-th column of H", (ftnlen)24);
    }

/*        %----------------------------------------------------% */
/*        | Compute V_{j}^T * B * r_{j}.                       | */
/*        | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). | */
/*        %----------------------------------------------------% */

    sgemv_("T", n, &j, &c_b25, &v[v_offset], ldv, &workd[ipj], &c__1, &c_b47, 
	    &workd[irj], &c__1, (ftnlen)1);

/*        %---------------------------------------------% */
/*        | Compute the correction to the residual:     | */
/*        | r_{j} = r_{j} - V_{j} * WORKD(IRJ:IRJ+J-1). | */
/*        | The correction to H is v(:,1:J)*H(1:J,1:J)  | */
/*        | + v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j.         | */
/*        %---------------------------------------------% */

    sgemv_("N", n, &j, &c_b50, &v[v_offset], ldv, &workd[irj], &c__1, &c_b25, 
	    &resid[1], &c__1, (ftnlen)1);
    saxpy_(&j, &c_b25, &workd[irj], &c__1, &h__[j * h_dim1 + 1], &c__1);

    orth2 = TRUE_;
    second_(&t2);
    if (*(unsigned char *)bmat == 'G') {
	++timing_1.nbx;
	scopy_(n, &resid[1], &c__1, &workd[irj], &c__1);
	ipntr[1] = irj;
	ipntr[2] = ipj;
	*ido = 2;

/*           %-----------------------------------% */
/*           | Exit in order to compute B*r_{j}. | */
/*           | r_{j} is the corrected residual.  | */
/*           %-----------------------------------% */

	goto L9000;
    } else if (*(unsigned char *)bmat == 'I') {
	scopy_(n, &resid[1], &c__1, &workd[ipj], &c__1);
    }
L90:

/*        %---------------------------------------------------% */
/*        | Back from reverse communication if ORTH2 = .true. | */
/*        %---------------------------------------------------% */

    if (*(unsigned char *)bmat == 'G') {
	second_(&t3);
	timing_1.tmvbx += t3 - t2;
    }

/*        %-----------------------------------------------------% */
/*        | Compute the B-norm of the corrected residual r_{j}. | */
/*        %-----------------------------------------------------% */

    if (*(unsigned char *)bmat == 'G') {
	rnorm1 = sdot_(n, &resid[1], &c__1, &workd[ipj], &c__1);
	rnorm1 = sqrt((dabs(rnorm1)));
    } else if (*(unsigned char *)bmat == 'I') {
	rnorm1 = snrm2_(n, &resid[1], &c__1);
    }

    if (msglvl > 0 && iter > 0) {
	ivout_(&debug_1.logfil, &c__1, &j, &debug_1.ndigit, "_naitr: Iterati"
		"ve refinement for Arnoldi residual", (ftnlen)49);
	if (msglvl > 2) {
	    xtemp[0] = *rnorm;
	    xtemp[1] = rnorm1;
	    svout_(&debug_1.logfil, &c__2, xtemp, &debug_1.ndigit, "_naitr: "
		    "iterative refinement ; rnorm and rnorm1 are", (ftnlen)51);
	}
    }

/*        %-----------------------------------------% */
/*        | Determine if we need to perform another | */
/*        | step of re-orthogonalization.           | */
/*        %-----------------------------------------% */

    if (rnorm1 > *rnorm * .717f) {

/*           %---------------------------------------% */
/*           | No need for further refinement.       | */
/*           | The cosine of the angle between the   | */
/*           | corrected residual vector and the old | */
/*           | residual vector is greater than 0.717 | */
/*           | In other words the corrected residual | */
/*           | and the old residual vector share an  | */
/*           | angle of less than arcCOS(0.717)      | */
/*           %---------------------------------------% */

	*rnorm = rnorm1;

    } else {

/*           %-------------------------------------------% */
/*           | Another step of iterative refinement step | */
/*           | is required. NITREF is used by stat.h     | */
/*           %-------------------------------------------% */

	++timing_1.nitref;
	*rnorm = rnorm1;
	++iter;
	if (iter <= 1) {
	    goto L80;
	}

/*           %-------------------------------------------------% */
/*           | Otherwise RESID is numerically in the span of V | */
/*           %-------------------------------------------------% */

	i__1 = *n;
	for (jj = 1; jj <= i__1; ++jj) {
	    resid[jj] = 0.f;
/* L95: */
	}
	*rnorm = 0.f;
    }

/*        %----------------------------------------------% */
/*        | Branch here directly if iterative refinement | */
/*        | wasn't necessary or after at most NITER_REF  | */
/*        | steps of iterative refinement.               | */
/*        %----------------------------------------------% */

L100:

    rstart = FALSE_;
    orth2 = FALSE_;

    second_(&t5);
    timing_1.titref += t5 - t4;

/*        %------------------------------------% */
/*        | STEP 6: Update  j = j+1;  Continue | */
/*        %------------------------------------% */

    ++j;
    if (j > *k + *np) {
	second_(&t1);
	timing_1.tnaitr += t1 - t0;
	*ido = 99;
	i__1 = *k + *np - 1;
	for (i__ = max(1,*k); i__ <= i__1; ++i__) {

/*              %--------------------------------------------% */
/*              | Check for splitting and deflation.         | */
/*              | Use a standard test as in the QR algorithm | */
/*              | REFERENCE: LAPACK subroutine slahqr        | */
/*              %--------------------------------------------% */

	    tst1 = (r__1 = h__[i__ + i__ * h_dim1], dabs(r__1)) + (r__2 = h__[
		    i__ + 1 + (i__ + 1) * h_dim1], dabs(r__2));
	    if (tst1 == 0.f) {
		i__2 = *k + *np;
		tst1 = slanhs_("1", &i__2, &h__[h_offset], ldh, &workd[*n + 1]
			, (ftnlen)1);
	    }
/* Computing MAX */
	    r__2 = ulp * tst1;
	    if ((r__1 = h__[i__ + 1 + i__ * h_dim1], dabs(r__1)) <= dmax(r__2,
		    smlnum)) {
		h__[i__ + 1 + i__ * h_dim1] = 0.f;
	    }
/* L110: */
	}

	if (msglvl > 2) {
	    i__1 = *k + *np;
	    i__2 = *k + *np;
	    smout_(&debug_1.logfil, &i__1, &i__2, &h__[h_offset], ldh, &
		    debug_1.ndigit, "_naitr: Final upper Hessenberg matrix H"
		    " of order K+NP", (ftnlen)53);
	}

	goto L9000;
    }

/*        %--------------------------------------------------------% */
/*        | Loop back to extend the factorization by another step. | */
/*        %--------------------------------------------------------% */

    goto L1000;

/*     %---------------------------------------------------------------% */
/*     |                                                               | */
/*     |  E N D     O F     M A I N     I T E R A T I O N     L O O P  | */
/*     |                                                               | */
/*     %---------------------------------------------------------------% */

L9000:
    return 0;

/*     %---------------% */
/*     | End of snaitr | */
/*     %---------------% */

} /* snaitr_ */
Esempio n. 16
0
void test08 ( void )

/******************************************************************************/
/*
  Purpose:

    TEST08 demonstrates SNRM2.

  Modified:

    29 March 2007

  Author:

    John Burkardt
*/
{
/*
  These parameters illustrate the fact that matrices are typically
  dimensioned with more space than the user requires.
*/
  float *a;
  int i;
  int inc;
  int j;
  int lda = 10;
  int n = 5;
  int ncopy;
  float sum1;
  float *x;

  a = malloc ( lda * lda * sizeof ( float ) );
  x = malloc ( n * sizeof ( float ) );

  printf ( "\n" );
  printf ( "TEST08\n" );
  printf ( "  SNRM2 computes the Euclidean norm of a vector.\n" );
  printf ( "\n" );
/*
  Compute the euclidean norm of a vector:
*/
  for ( i = 0; i < n; i++ )
  {
    x[i] = ( float ) ( i + 1 );
  }

  printf ( "\n" );
  printf ( "  X =\n" );
  printf ( "\n" );
  for ( i = 0; i < n; i++ )
  {
    printf ( "  %6d  %14d\n", i + 1, x[i] );
  }
  printf ( "\n" );
  ncopy = n;
  inc = 1;
  printf ( "  The 2-norm of X is %f\n", snrm2_ ( &ncopy, x, &inc ) );
/*
  Compute the euclidean norm of a row or column of a matrix:
*/
  for ( i = 0; i < n; i++ )
  {
    for ( j = 0; j < n; j++ )
    {
      a[i+j*lda] = ( float ) ( i + 1 + j + 1 );
    }
  }

  printf ( "\n" );
  ncopy = n;
  inc = lda;
  printf ( "  The 2-norm of row 2 of A is %f\n", 
    snrm2_ ( &ncopy, a+1+0*lda, &inc ) );

  printf ( "\n" );
  ncopy = n;
  inc = 1;
  printf ( "  The 2-norm of column 2 of A is %f\n" ,
       snrm2_ ( &ncopy, a+0+1*lda, &inc ) );

  free ( a );
  free ( x );
 
  return;
}
Esempio n. 17
0
/* Subroutine */
int sgebal_(char *job, integer *n, real *a, integer *lda, integer *ilo, integer *ihi, real *scale, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    real r__1, r__2;
    /* Local variables */
    real c__, f, g;
    integer i__, j, k, l, m;
    real r__, s, ca, ra;
    integer ica, ira, iexc;
    extern real snrm2_(integer *, real *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */
    int sscal_(integer *, real *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer *);
    real sfmin1, sfmin2, sfmax1, sfmax2;
    extern real slamch_(char *);
    extern /* Subroutine */
    int xerbla_(char *, integer *);
    extern integer isamax_(integer *, real *, integer *);
    extern logical sisnan_(real *);
    logical noconv;
    /* -- LAPACK computational routine (version 3.5.0) -- */
    /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
    /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
    /* November 2013 */
    /* .. Scalar Arguments .. */
    /* .. */
    /* .. Array Arguments .. */
    /* .. */
    /* ===================================================================== */
    /* .. Parameters .. */
    /* .. */
    /* .. Local Scalars .. */
    /* .. */
    /* .. External Functions .. */
    /* .. */
    /* .. External Subroutines .. */
    /* .. */
    /* .. Intrinsic Functions .. */
    /* .. */
    /* .. Executable Statements .. */
    /* Test the input parameters */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --scale;
    /* Function Body */
    *info = 0;
    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") && ! lsame_(job, "B"))
    {
        *info = -1;
    }
    else if (*n < 0)
    {
        *info = -2;
    }
    else if (*lda < max(1,*n))
    {
        *info = -4;
    }
    if (*info != 0)
    {
        i__1 = -(*info);
        xerbla_("SGEBAL", &i__1);
        return 0;
    }
    k = 1;
    l = *n;
    if (*n == 0)
    {
        goto L210;
    }
    if (lsame_(job, "N"))
    {
        i__1 = *n;
        for (i__ = 1;
                i__ <= i__1;
                ++i__)
        {
            scale[i__] = 1.f;
            /* L10: */
        }
        goto L210;
    }
    if (lsame_(job, "S"))
    {
        goto L120;
    }
    /* Permutation to isolate eigenvalues if possible */
    goto L50;
    /* Row and column exchange. */
L20:
    scale[m] = (real) j;
    if (j == m)
    {
        goto L30;
    }
    sswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1);
    i__1 = *n - k + 1;
    sswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda);
L30:
    switch (iexc)
    {
    case 1:
        goto L40;
    case 2:
        goto L80;
    }
    /* Search for rows isolating an eigenvalue and push them down. */
L40:
    if (l == 1)
    {
        goto L210;
    }
    --l;
L50:
    for (j = l;
            j >= 1;
            --j)
    {
        i__1 = l;
        for (i__ = 1;
                i__ <= i__1;
                ++i__)
        {
            if (i__ == j)
            {
                goto L60;
            }
            if (a[j + i__ * a_dim1] != 0.f)
            {
                goto L70;
            }
L60:
            ;
        }
        m = l;
        iexc = 1;
        goto L20;
L70:
        ;
    }
    goto L90;
    /* Search for columns isolating an eigenvalue and push them left. */
L80:
    ++k;
L90:
    i__1 = l;
    for (j = k;
            j <= i__1;
            ++j)
    {
        i__2 = l;
        for (i__ = k;
                i__ <= i__2;
                ++i__)
        {
            if (i__ == j)
            {
                goto L100;
            }
            if (a[i__ + j * a_dim1] != 0.f)
            {
                goto L110;
            }
L100:
            ;
        }
        m = k;
        iexc = 2;
        goto L20;
L110:
        ;
    }
L120:
    i__1 = l;
    for (i__ = k;
            i__ <= i__1;
            ++i__)
    {
        scale[i__] = 1.f;
        /* L130: */
    }
    if (lsame_(job, "P"))
    {
        goto L210;
    }
    /* Balance the submatrix in rows K to L. */
    /* Iterative loop for norm reduction */
    sfmin1 = slamch_("S") / slamch_("P");
    sfmax1 = 1.f / sfmin1;
    sfmin2 = sfmin1 * 2.f;
    sfmax2 = 1.f / sfmin2;
L140:
    noconv = FALSE_;
    i__1 = l;
    for (i__ = k;
            i__ <= i__1;
            ++i__)
    {
        i__2 = l - k + 1;
        c__ = snrm2_(&i__2, &a[k + i__ * a_dim1], &c__1);
        i__2 = l - k + 1;
        r__ = snrm2_(&i__2, &a[i__ + k * a_dim1], lda);
        ica = isamax_(&l, &a[i__ * a_dim1 + 1], &c__1);
        ca = (r__1 = a[ica + i__ * a_dim1], f2c_abs(r__1));
        i__2 = *n - k + 1;
        ira = isamax_(&i__2, &a[i__ + k * a_dim1], lda);
        ra = (r__1 = a[i__ + (ira + k - 1) * a_dim1], f2c_abs(r__1));
        /* Guard against zero C or R due to underflow. */
        if (c__ == 0.f || r__ == 0.f)
        {
            goto L200;
        }
        g = r__ / 2.f;
        f = 1.f;
        s = c__ + r__;
L160: /* Computing MAX */
        r__1 = max(f,c__);
        /* Computing MIN */
        r__2 = min(r__,g);
        if (c__ >= g || max(r__1,ca) >= sfmax2 || min(r__2,ra) <= sfmin2)
        {
            goto L170;
        }
        f *= 2.f;
        c__ *= 2.f;
        ca *= 2.f;
        r__ /= 2.f;
        g /= 2.f;
        ra /= 2.f;
        goto L160;
L170:
        g = c__ / 2.f;
L180: /* Computing MIN */
        r__1 = min(f,c__);
        r__1 = min(r__1,g); // , expr subst
        if (g < r__ || max(r__,ra) >= sfmax2 || min(r__1,ca) <= sfmin2)
        {
            goto L190;
        }
        r__1 = c__ + f + ca + r__ + g + ra;
        if (sisnan_(&r__1))
        {
            /* Exit if NaN to avoid infinite loop */
            *info = -3;
            i__2 = -(*info);
            xerbla_("SGEBAL", &i__2);
            return 0;
        }
        f /= 2.f;
        c__ /= 2.f;
        g /= 2.f;
        ca /= 2.f;
        r__ *= 2.f;
        ra *= 2.f;
        goto L180;
        /* Now balance. */
L190:
        if (c__ + r__ >= s * .95f)
        {
            goto L200;
        }
        if (f < 1.f && scale[i__] < 1.f)
        {
            if (f * scale[i__] <= sfmin1)
            {
                goto L200;
            }
        }
        if (f > 1.f && scale[i__] > 1.f)
        {
            if (scale[i__] >= sfmax1 / f)
            {
                goto L200;
            }
        }
        g = 1.f / f;
        scale[i__] *= f;
        noconv = TRUE_;
        i__2 = *n - k + 1;
        sscal_(&i__2, &g, &a[i__ + k * a_dim1], lda);
        sscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1);
L200:
        ;
    }
    if (noconv)
    {
        goto L140;
    }
L210:
    *ilo = k;
    *ihi = l;
    return 0;
    /* End of SGEBAL */
}
Esempio n. 18
0
/* Subroutine */ int sqrt15_(integer *scale, integer *rksel, integer *m,
                             integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *
                             ldb, real *s, integer *rank, real *norma, real *normb, integer *iseed,
                             real *work, integer *lwork)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
    real r__1;

    /* Local variables */
    static integer info;
    static real temp;
    extern doublereal snrm2_(integer *, real *, integer *);
    static integer j;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
           slarf_(char *, integer *, integer *, real *, integer *, real *,
                  real *, integer *, real *), sgemm_(char *, char *,
                          integer *, integer *, integer *, real *, real *, integer *, real *
                          , integer *, real *, real *, integer *);
    extern doublereal sasum_(integer *, real *, integer *);
    static real dummy[1];
    static integer mn;
    extern doublereal slamch_(char *), slange_(char *, integer *,
            integer *, real *, integer *, real *);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static real bignum;
    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
                                        real *, integer *, integer *, real *, integer *, integer *);
    extern doublereal slarnd_(integer *, integer *);
    extern /* Subroutine */ int slaord_(char *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *,
            real *, integer *), slaror_(char *, char *, integer *,
                                        integer *, real *, integer *, integer *, real *, integer *), slarnv_(integer *, integer *, integer *, real *);
    static real smlnum, eps;


#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]


    /*  -- LAPACK test routine (version 3.0) --
           Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
           Courant Institute, Argonne National Lab, and Rice University
           September 30, 1994


        Purpose
        =======

        SQRT15 generates a matrix with full or deficient rank and of various
        norms.

        Arguments
        =========

        SCALE   (input) INTEGER
                SCALE = 1: normally scaled matrix
                SCALE = 2: matrix scaled up
                SCALE = 3: matrix scaled down

        RKSEL   (input) INTEGER
                RKSEL = 1: full rank matrix
                RKSEL = 2: rank-deficient matrix

        M       (input) INTEGER
                The number of rows of the matrix A.

        N       (input) INTEGER
                The number of columns of A.

        NRHS    (input) INTEGER
                The number of columns of B.

        A       (output) REAL array, dimension (LDA,N)
                The M-by-N matrix A.

        LDA     (input) INTEGER
                The leading dimension of the array A.

        B       (output) REAL array, dimension (LDB, NRHS)
                A matrix that is in the range space of matrix A.

        LDB     (input) INTEGER
                The leading dimension of the array B.

        S       (output) REAL array, dimension MIN(M,N)
                Singular values of A.

        RANK    (output) INTEGER
                number of nonzero singular values of A.

        NORMA   (output) REAL
                one-norm of A.

        NORMB   (output) REAL
                one-norm of B.

        ISEED   (input/output) integer array, dimension (4)
                seed for random number generator.

        WORK    (workspace) REAL array, dimension (LWORK)

        LWORK   (input) INTEGER
                length of work space required.
                LWORK >= MAX(M+MIN(M,N),NRHS*MIN(M,N),2*N+M)

        =====================================================================


           Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --s;
    --iseed;
    --work;

    /* Function Body */
    mn = min(*m,*n);
    /* Computing MAX */
    i__1 = *m + mn, i__2 = mn * *nrhs, i__1 = max(i__1,i__2), i__2 = (*n << 1)
                           + *m;
    if (*lwork < max(i__1,i__2)) {
        xerbla_("SQRT15", &c__16);
        return 0;
    }

    smlnum = slamch_("Safe minimum");
    bignum = 1.f / smlnum;
    eps = slamch_("Epsilon");
    smlnum = smlnum / eps / eps;
    bignum = 1.f / smlnum;

    /*     Determine rank and (unscaled) singular values */

    if (*rksel == 1) {
        *rank = mn;
    } else if (*rksel == 2) {
        *rank = mn * 3 / 4;
        i__1 = mn;
        for (j = *rank + 1; j <= i__1; ++j) {
            s[j] = 0.f;
            /* L10: */
        }
    } else {
        xerbla_("SQRT15", &c__2);
    }

    if (*rank > 0) {

        /*        Nontrivial case */

        s[1] = 1.f;
        i__1 = *rank;
        for (j = 2; j <= i__1; ++j) {
L20:
            temp = slarnd_(&c__1, &iseed[1]);
            if (temp > .1f) {
                s[j] = dabs(temp);
            } else {
                goto L20;
            }
            /* L30: */
        }
        slaord_("Decreasing", rank, &s[1], &c__1);

        /*        Generate 'rank' columns of a random orthogonal matrix in A */

        slarnv_(&c__2, &iseed[1], m, &work[1]);
        r__1 = 1.f / snrm2_(m, &work[1], &c__1);
        sscal_(m, &r__1, &work[1], &c__1);
        slaset_("Full", m, rank, &c_b18, &c_b19, &a[a_offset], lda)
        ;
        slarf_("Left", m, rank, &work[1], &c__1, &c_b22, &a[a_offset], lda, &
               work[*m + 1]);

        /*        workspace used: m+mn

                  Generate consistent rhs in the range space of A */

        i__1 = *rank * *nrhs;
        slarnv_(&c__2, &iseed[1], &i__1, &work[1]);
        sgemm_("No transpose", "No transpose", m, nrhs, rank, &c_b19, &a[
                   a_offset], lda, &work[1], rank, &c_b18, &b[b_offset], ldb);

        /*        work space used: <= mn *nrhs

                  generate (unscaled) matrix A */

        i__1 = *rank;
        for (j = 1; j <= i__1; ++j) {
            sscal_(m, &s[j], &a_ref(1, j), &c__1);
            /* L40: */
        }
        if (*rank < *n) {
            i__1 = *n - *rank;
            slaset_("Full", m, &i__1, &c_b18, &c_b18, &a_ref(1, *rank + 1),
                    lda);
        }
        slaror_("Right", "No initialization", m, n, &a[a_offset], lda, &iseed[
                    1], &work[1], &info);

    } else {

        /*        work space used 2*n+m

                  Generate null matrix and rhs */

        i__1 = mn;
        for (j = 1; j <= i__1; ++j) {
            s[j] = 0.f;
            /* L50: */
        }
        slaset_("Full", m, n, &c_b18, &c_b18, &a[a_offset], lda);
        slaset_("Full", m, nrhs, &c_b18, &c_b18, &b[b_offset], ldb)
        ;

    }

    /*     Scale the matrix */

    if (*scale != 1) {
        *norma = slange_("Max", m, n, &a[a_offset], lda, dummy);
        if (*norma != 0.f) {
            if (*scale == 2) {

                /*              matrix scaled up */

                slascl_("General", &c__0, &c__0, norma, &bignum, m, n, &a[
                            a_offset], lda, &info);
                slascl_("General", &c__0, &c__0, norma, &bignum, &mn, &c__1, &
                        s[1], &mn, &info);
                slascl_("General", &c__0, &c__0, norma, &bignum, m, nrhs, &b[
                            b_offset], ldb, &info);
            } else if (*scale == 3) {

                /*              matrix scaled down */

                slascl_("General", &c__0, &c__0, norma, &smlnum, m, n, &a[
                            a_offset], lda, &info);
                slascl_("General", &c__0, &c__0, norma, &smlnum, &mn, &c__1, &
                        s[1], &mn, &info);
                slascl_("General", &c__0, &c__0, norma, &smlnum, m, nrhs, &b[
                            b_offset], ldb, &info);
            } else {
                xerbla_("SQRT15", &c__1);
                return 0;
            }
        }
    }

    *norma = sasum_(&mn, &s[1], &c__1);
    *normb = slange_("One-norm", m, nrhs, &b[b_offset], ldb, dummy)
             ;

    return 0;

    /*     End of SQRT15 */

} /* sqrt15_ */
Esempio n. 19
0
/* Subroutine */ int slasd3_(integer *nl, integer *nr, integer *sqre, integer 
	*k, real *d__, real *q, integer *ldq, real *dsigma, real *u, integer *
	ldu, real *u2, integer *ldu2, real *vt, integer *ldvt, real *vt2, 
	integer *ldvt2, integer *idxc, integer *ctot, real *z__, integer *
	info)
{
    /* System generated locals */
    integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, 
	    vt_offset, vt2_dim1, vt2_offset, i__1, i__2;
    real r__1, r__2;

    /* Builtin functions */
    double sqrt(doublereal), r_sign(real *, real *);

    /* Local variables */
    integer i__, j, m, n, jc;
    real rho;
    integer nlp1, nlp2, nrp1;
    real temp;
    extern doublereal snrm2_(integer *, real *, integer *);
    integer ctemp;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    integer ktemp;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *);
    extern doublereal slamc3_(real *, real *);
    extern /* Subroutine */ int slasd4_(integer *, integer *, real *, real *, 
	    real *, real *, real *, real *, integer *), xerbla_(char *, 
	    integer *), slascl_(char *, integer *, integer *, real *, 
	    real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, 
	    real *, integer *);


/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLASD3 finds all the square roots of the roots of the secular */
/*  equation, as defined by the values in D and Z.  It makes the */
/*  appropriate calls to SLASD4 and then updates the singular */
/*  vectors by matrix multiplication. */

/*  This code makes very mild assumptions about floating point */
/*  arithmetic. It will work on machines with a guard digit in */
/*  add/subtract, or on those binary machines without guard digits */
/*  which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. */
/*  It could conceivably fail on hexadecimal or decimal machines */
/*  without guard digits, but we know of none. */

/*  SLASD3 is called from SLASD1. */

/*  Arguments */
/*  ========= */

/*  NL     (input) INTEGER */
/*         The row dimension of the upper block.  NL >= 1. */

/*  NR     (input) INTEGER */
/*         The row dimension of the lower block.  NR >= 1. */

/*  SQRE   (input) INTEGER */
/*         = 0: the lower block is an NR-by-NR square matrix. */
/*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */

/*         The bidiagonal matrix has N = NL + NR + 1 rows and */
/*         M = N + SQRE >= N columns. */

/*  K      (input) INTEGER */
/*         The size of the secular equation, 1 =< K = < N. */

/*  D      (output) REAL array, dimension(K) */
/*         On exit the square roots of the roots of the secular equation, */
/*         in ascending order. */

/*  Q      (workspace) REAL array, */
/*                     dimension at least (LDQ,K). */

/*  LDQ    (input) INTEGER */
/*         The leading dimension of the array Q.  LDQ >= K. */

/*  DSIGMA (input/output) REAL array, dimension(K) */
/*         The first K elements of this array contain the old roots */
/*         of the deflated updating problem.  These are the poles */
/*         of the secular equation. */

/*  U      (output) REAL array, dimension (LDU, N) */
/*         The last N - K columns of this matrix contain the deflated */
/*         left singular vectors. */

/*  LDU    (input) INTEGER */
/*         The leading dimension of the array U.  LDU >= N. */

/*  U2     (input) REAL array, dimension (LDU2, N) */
/*         The first K columns of this matrix contain the non-deflated */
/*         left singular vectors for the split problem. */

/*  LDU2   (input) INTEGER */
/*         The leading dimension of the array U2.  LDU2 >= N. */

/*  VT     (output) REAL array, dimension (LDVT, M) */
/*         The last M - K columns of VT' contain the deflated */
/*         right singular vectors. */

/*  LDVT   (input) INTEGER */
/*         The leading dimension of the array VT.  LDVT >= N. */

/*  VT2    (input/output) REAL array, dimension (LDVT2, N) */
/*         The first K columns of VT2' contain the non-deflated */
/*         right singular vectors for the split problem. */

/*  LDVT2  (input) INTEGER */
/*         The leading dimension of the array VT2.  LDVT2 >= N. */

/*  IDXC   (input) INTEGER array, dimension (N) */
/*         The permutation used to arrange the columns of U (and rows of */
/*         VT) into three groups:  the first group contains non-zero */
/*         entries only at and above (or before) NL +1; the second */
/*         contains non-zero entries only at and below (or after) NL+2; */
/*         and the third is dense. The first column of U and the row of */
/*         VT are treated separately, however. */

/*         The rows of the singular vectors found by SLASD4 */
/*         must be likewise permuted before the matrix multiplies can */
/*         take place. */

/*  CTOT   (input) INTEGER array, dimension (4) */
/*         A count of the total number of the various types of columns */
/*         in U (or rows in VT), as described in IDXC. The fourth column */
/*         type is any column which has been deflated. */

/*  Z      (input/output) REAL array, dimension (K) */
/*         The first K elements of this array contain the components */
/*         of the deflation-adjusted updating row vector. */

/*  INFO   (output) INTEGER */
/*         = 0:  successful exit. */
/*         < 0:  if INFO = -i, the i-th argument had an illegal value. */
/*         > 0:  if INFO = 1, an singular value did not converge */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*     Ming Gu and Huan Ren, Computer Science Division, University of */
/*     California at Berkeley, USA */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    --d__;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    --dsigma;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1;
    u -= u_offset;
    u2_dim1 = *ldu2;
    u2_offset = 1 + u2_dim1;
    u2 -= u2_offset;
    vt_dim1 = *ldvt;
    vt_offset = 1 + vt_dim1;
    vt -= vt_offset;
    vt2_dim1 = *ldvt2;
    vt2_offset = 1 + vt2_dim1;
    vt2 -= vt2_offset;
    --idxc;
    --ctot;
    --z__;

    /* Function Body */
    *info = 0;

    if (*nl < 1) {
	*info = -1;
    } else if (*nr < 1) {
	*info = -2;
    } else if (*sqre != 1 && *sqre != 0) {
	*info = -3;
    }

    n = *nl + *nr + 1;
    m = n + *sqre;
    nlp1 = *nl + 1;
    nlp2 = *nl + 2;

    if (*k < 1 || *k > n) {
	*info = -4;
    } else if (*ldq < *k) {
	*info = -7;
    } else if (*ldu < n) {
	*info = -10;
    } else if (*ldu2 < n) {
	*info = -12;
    } else if (*ldvt < m) {
	*info = -14;
    } else if (*ldvt2 < m) {
	*info = -16;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLASD3", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*k == 1) {
	d__[1] = dabs(z__[1]);
	scopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt);
	if (z__[1] > 0.f) {
	    scopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1);
	} else {
	    i__1 = n;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		u[i__ + u_dim1] = -u2[i__ + u2_dim1];
/* L10: */
	    }
	}
	return 0;
    }

/*     Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can */
/*     be computed with high relative accuracy (barring over/underflow). */
/*     This is a problem on machines without a guard digit in */
/*     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */
/*     The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), */
/*     which on any of these machines zeros out the bottommost */
/*     bit of DSIGMA(I) if it is 1; this makes the subsequent */
/*     subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation */
/*     occurs. On binary machines with a guard digit (almost all */
/*     machines) it does not change DSIGMA(I) at all. On hexadecimal */
/*     and decimal machines with a guard digit, it slightly */
/*     changes the bottommost bits of DSIGMA(I). It does not account */
/*     for hexadecimal or decimal machines without guard digits */
/*     (we know of none). We use a subroutine call to compute */
/*     2*DSIGMA(I) to prevent optimizing compilers from eliminating */
/*     this code. */

    i__1 = *k;
    for (i__ = 1; i__ <= i__1; ++i__) {
	dsigma[i__] = slamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
/* L20: */
    }

/*     Keep a copy of Z. */

    scopy_(k, &z__[1], &c__1, &q[q_offset], &c__1);

/*     Normalize Z. */

    rho = snrm2_(k, &z__[1], &c__1);
    slascl_("G", &c__0, &c__0, &rho, &c_b13, k, &c__1, &z__[1], k, info);
    rho *= rho;

/*     Find the new singular values. */

    i__1 = *k;
    for (j = 1; j <= i__1; ++j) {
	slasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j], 
		 &vt[j * vt_dim1 + 1], info);

/*        If the zero finder fails, the computation is terminated. */

	if (*info != 0) {
	    return 0;
	}
/* L30: */
    }

/*     Compute updated Z. */

    i__1 = *k;
    for (i__ = 1; i__ <= i__1; ++i__) {
	z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1];
	i__2 = i__ - 1;
	for (j = 1; j <= i__2; ++j) {
	    z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[
		    i__] - dsigma[j]) / (dsigma[i__] + dsigma[j]);
/* L40: */
	}
	i__2 = *k - 1;
	for (j = i__; j <= i__2; ++j) {
	    z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[
		    i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]);
/* L50: */
	}
	r__2 = sqrt((r__1 = z__[i__], dabs(r__1)));
	z__[i__] = r_sign(&r__2, &q[i__ + q_dim1]);
/* L60: */
    }

/*     Compute left singular vectors of the modified diagonal matrix, */
/*     and store related information for the right singular vectors. */

    i__1 = *k;
    for (i__ = 1; i__ <= i__1; ++i__) {
	vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ * 
		vt_dim1 + 1];
	u[i__ * u_dim1 + 1] = -1.f;
	i__2 = *k;
	for (j = 2; j <= i__2; ++j) {
	    vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__ 
		    * vt_dim1];
	    u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1];
/* L70: */
	}
	temp = snrm2_(k, &u[i__ * u_dim1 + 1], &c__1);
	q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp;
	i__2 = *k;
	for (j = 2; j <= i__2; ++j) {
	    jc = idxc[j];
	    q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp;
/* L80: */
	}
/* L90: */
    }

/*     Update the left singular vector matrix. */

    if (*k == 2) {
	sgemm_("N", "N", &n, k, k, &c_b13, &u2[u2_offset], ldu2, &q[q_offset], 
		 ldq, &c_b26, &u[u_offset], ldu);
	goto L100;
    }
    if (ctot[1] > 0) {
	sgemm_("N", "N", nl, k, &ctot[1], &c_b13, &u2[(u2_dim1 << 1) + 1], 
		ldu2, &q[q_dim1 + 2], ldq, &c_b26, &u[u_dim1 + 1], ldu);
	if (ctot[3] > 0) {
	    ktemp = ctot[1] + 2 + ctot[2];
	    sgemm_("N", "N", nl, k, &ctot[3], &c_b13, &u2[ktemp * u2_dim1 + 1]
, ldu2, &q[ktemp + q_dim1], ldq, &c_b13, &u[u_dim1 + 1], 
		    ldu);
	}
    } else if (ctot[3] > 0) {
	ktemp = ctot[1] + 2 + ctot[2];
	sgemm_("N", "N", nl, k, &ctot[3], &c_b13, &u2[ktemp * u2_dim1 + 1], 
		ldu2, &q[ktemp + q_dim1], ldq, &c_b26, &u[u_dim1 + 1], ldu);
    } else {
	slacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu);
    }
    scopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu);
    ktemp = ctot[1] + 2;
    ctemp = ctot[2] + ctot[3];
    sgemm_("N", "N", nr, k, &ctemp, &c_b13, &u2[nlp2 + ktemp * u2_dim1], ldu2, 
	     &q[ktemp + q_dim1], ldq, &c_b26, &u[nlp2 + u_dim1], ldu);

/*     Generate the right singular vectors. */

L100:
    i__1 = *k;
    for (i__ = 1; i__ <= i__1; ++i__) {
	temp = snrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1);
	q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp;
	i__2 = *k;
	for (j = 2; j <= i__2; ++j) {
	    jc = idxc[j];
	    q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp;
/* L110: */
	}
/* L120: */
    }

/*     Update the right singular vector matrix. */

    if (*k == 2) {
	sgemm_("N", "N", k, &m, k, &c_b13, &q[q_offset], ldq, &vt2[vt2_offset]
, ldvt2, &c_b26, &vt[vt_offset], ldvt);
	return 0;
    }
    ktemp = ctot[1] + 1;
    sgemm_("N", "N", k, &nlp1, &ktemp, &c_b13, &q[q_dim1 + 1], ldq, &vt2[
	    vt2_dim1 + 1], ldvt2, &c_b26, &vt[vt_dim1 + 1], ldvt);
    ktemp = ctot[1] + 2 + ctot[2];
    if (ktemp <= *ldvt2) {
	sgemm_("N", "N", k, &nlp1, &ctot[3], &c_b13, &q[ktemp * q_dim1 + 1], 
		ldq, &vt2[ktemp + vt2_dim1], ldvt2, &c_b13, &vt[vt_dim1 + 1], 
		ldvt);
    }

    ktemp = ctot[1] + 1;
    nrp1 = *nr + *sqre;
    if (ktemp > 1) {
	i__1 = *k;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    q[i__ + ktemp * q_dim1] = q[i__ + q_dim1];
/* L130: */
	}
	i__1 = m;
	for (i__ = nlp2; i__ <= i__1; ++i__) {
	    vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1];
/* L140: */
	}
    }
    ctemp = ctot[2] + 1 + ctot[3];
    sgemm_("N", "N", k, &nrp1, &ctemp, &c_b13, &q[ktemp * q_dim1 + 1], ldq, &
	    vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b26, &vt[nlp2 * vt_dim1 + 
	    1], ldvt);

    return 0;

/*     End of SLASD3 */

} /* slasd3_ */
Esempio n. 20
0
/* ----------------------------------------------------------------------- */
/* Subroutine */ int sneupd_(logical *rvec, char *howmny, logical *select, 
	real *dr, real *di, real *z__, integer *ldz, real *sigmar, real *
	sigmai, real *workev, char *bmat, integer *n, char *which, integer *
	nev, real *tol, real *resid, integer *ncv, real *v, integer *ldv, 
	integer *iparam, integer *ipntr, real *workd, real *workl, integer *
	lworkl, integer *info, ftnlen howmny_len, ftnlen bmat_len, ftnlen 
	which_len)
{
    /* System generated locals */
    integer v_dim1, v_offset, z_dim1, z_offset, i__1;
    real r__1, r__2;
    doublereal d__1;

    /* Local variables */
    static integer j, k, ih, jj, np;
    static real vl[1]	/* was [1][1] */;
    static integer ibd, ldh, ldq, iri;
    static real sep;
    static integer irr, wri, wrr, mode;
    static real eps23;
    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, 
	    integer *, real *, integer *, real *, integer *);
    static integer ierr;
    static real temp;
    static integer iwev;
    static char type__[6];
    static real temp1;
    extern doublereal snrm2_(integer *, real *, integer *);
    static integer ihbds, iconj;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static real conds;
    static logical reord;
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
	    real *, integer *, real *, integer *, real *, real *, integer *, 
	    ftnlen);
    static integer nconv, iwork[1];
    static real rnorm;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *);
    static integer ritzi;
    extern /* Subroutine */ int strmm_(char *, char *, char *, char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
	    , ftnlen, ftnlen, ftnlen, ftnlen), ivout_(integer *, integer *, 
	    integer *, integer *, char *, ftnlen), smout_(integer *, integer *
	    , integer *, real *, integer *, integer *, char *, ftnlen);
    static integer ritzr;
    extern /* Subroutine */ int svout_(integer *, integer *, real *, integer *
	    , char *, ftnlen), sgeqr2_(integer *, integer *, real *, integer *
	    , real *, real *, integer *);
    static integer nconv2;
    extern doublereal slapy2_(real *, real *);
    extern /* Subroutine */ int sorm2r_(char *, char *, integer *, integer *, 
	    integer *, real *, integer *, real *, real *, integer *, real *, 
	    integer *, ftnlen, ftnlen);
    static integer iheigi, iheigr, bounds, invsub, iuptri, msglvl, outncv, 
	    ishift, numcnv;
    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
	    integer *, real *, integer *, ftnlen), slahqr_(logical *, logical 
	    *, integer *, integer *, integer *, real *, integer *, real *, 
	    real *, integer *, integer *, real *, integer *, integer *), 
	    slaset_(char *, integer *, integer *, real *, real *, real *, 
	    integer *, ftnlen), strevc_(char *, char *, logical *, integer *, 
	    real *, integer *, real *, integer *, real *, integer *, integer *
	    , integer *, real *, integer *, ftnlen, ftnlen), strsen_(char *, 
	    char *, logical *, integer *, real *, integer *, real *, integer *
	    , real *, real *, integer *, real *, real *, real *, integer *, 
	    integer *, integer *, integer *, ftnlen, ftnlen);
    extern doublereal slamch_(char *, ftnlen);
    extern /* Subroutine */ int sngets_(integer *, char *, integer *, integer 
	    *, real *, real *, real *, real *, real *, ftnlen);


/*     %----------------------------------------------------% */
/*     | Include files for debugging and timing information | */
/*     %----------------------------------------------------% */


/* \SCCS Information: @(#) */
/* FILE: debug.h   SID: 2.3   DATE OF SID: 11/16/95   RELEASE: 2 */

/*     %---------------------------------% */
/*     | See debug.doc for documentation | */
/*     %---------------------------------% */

/*     %------------------% */
/*     | Scalar Arguments | */
/*     %------------------% */

/*     %--------------------------------% */
/*     | See stat.doc for documentation | */
/*     %--------------------------------% */

/* \SCCS Information: @(#) */
/* FILE: stat.h   SID: 2.2   DATE OF SID: 11/16/95   RELEASE: 2 */



/*     %-----------------% */
/*     | Array Arguments | */
/*     %-----------------% */


/*     %------------% */
/*     | Parameters | */
/*     %------------% */


/*     %---------------% */
/*     | Local Scalars | */
/*     %---------------% */


/*     %----------------------% */
/*     | External Subroutines | */
/*     %----------------------% */


/*     %--------------------% */
/*     | External Functions | */
/*     %--------------------% */


/*     %---------------------% */
/*     | Intrinsic Functions | */
/*     %---------------------% */


/*     %-----------------------% */
/*     | Executable Statements | */
/*     %-----------------------% */

/*     %------------------------% */
/*     | Set default parameters | */
/*     %------------------------% */

    /* Parameter adjustments */
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1;
    z__ -= z_offset;
    --workd;
    --resid;
    --di;
    --dr;
    --workev;
    --select;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1;
    v -= v_offset;
    --iparam;
    --ipntr;
    --workl;

    /* Function Body */
    msglvl = debug_1.mneupd;
    mode = iparam[7];
    nconv = iparam[5];
    *info = 0;

/*     %---------------------------------% */
/*     | Get machine dependent constant. | */
/*     %---------------------------------% */

    eps23 = slamch_("Epsilon-Machine", (ftnlen)15);
    d__1 = (doublereal) eps23;
    eps23 = pow_dd(&d__1, &c_b3);

/*     %--------------% */
/*     | Quick return | */
/*     %--------------% */

    ierr = 0;

    if (nconv <= 0) {
	ierr = -14;
    } else if (*n <= 0) {
	ierr = -1;
    } else if (*nev <= 0) {
	ierr = -2;
    } else if (*ncv <= *nev + 1 || *ncv > *n) {
	ierr = -3;
    } else if (s_cmp(which, "LM", (ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, 
	    "SM", (ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, "LR", (ftnlen)2, 
	    (ftnlen)2) != 0 && s_cmp(which, "SR", (ftnlen)2, (ftnlen)2) != 0 
	    && s_cmp(which, "LI", (ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, 
	    "SI", (ftnlen)2, (ftnlen)2) != 0) {
	ierr = -5;
    } else if (*(unsigned char *)bmat != 'I' && *(unsigned char *)bmat != 'G')
	     {
	ierr = -6;
    } else /* if(complicated condition) */ {
/* Computing 2nd power */
	i__1 = *ncv;
	if (*lworkl < i__1 * i__1 * 3 + *ncv * 6) {
	    ierr = -7;
	} else if (*(unsigned char *)howmny != 'A' && *(unsigned char *)
		howmny != 'P' && *(unsigned char *)howmny != 'S' && *rvec) {
	    ierr = -13;
	} else if (*(unsigned char *)howmny == 'S') {
	    ierr = -12;
	}
    }

    if (mode == 1 || mode == 2) {
	s_copy(type__, "REGULR", (ftnlen)6, (ftnlen)6);
    } else if (mode == 3 && *sigmai == 0.f) {
	s_copy(type__, "SHIFTI", (ftnlen)6, (ftnlen)6);
    } else if (mode == 3) {
	s_copy(type__, "REALPT", (ftnlen)6, (ftnlen)6);
    } else if (mode == 4) {
	s_copy(type__, "IMAGPT", (ftnlen)6, (ftnlen)6);
    } else {
	ierr = -10;
    }
    if (mode == 1 && *(unsigned char *)bmat == 'G') {
	ierr = -11;
    }

/*     %------------% */
/*     | Error Exit | */
/*     %------------% */

    if (ierr != 0) {
	*info = ierr;
	goto L9000;
    }

/*     %--------------------------------------------------------% */
/*     | Pointer into WORKL for address of H, RITZ, BOUNDS, Q   | */
/*     | etc... and the remaining workspace.                    | */
/*     | Also update pointer to be used on output.              | */
/*     | Memory is laid out as follows:                         | */
/*     | workl(1:ncv*ncv) := generated Hessenberg matrix        | */
/*     | workl(ncv*ncv+1:ncv*ncv+2*ncv) := real and imaginary   | */
/*     |                                   parts of ritz values | */
/*     | workl(ncv*ncv+2*ncv+1:ncv*ncv+3*ncv) := error bounds   | */
/*     %--------------------------------------------------------% */

/*     %-----------------------------------------------------------% */
/*     | The following is used and set by SNEUPD.                  | */
/*     | workl(ncv*ncv+3*ncv+1:ncv*ncv+4*ncv) := The untransformed | */
/*     |                             real part of the Ritz values. | */
/*     | workl(ncv*ncv+4*ncv+1:ncv*ncv+5*ncv) := The untransformed | */
/*     |                        imaginary part of the Ritz values. | */
/*     | workl(ncv*ncv+5*ncv+1:ncv*ncv+6*ncv) := The untransformed | */
/*     |                           error bounds of the Ritz values | */
/*     | workl(ncv*ncv+6*ncv+1:2*ncv*ncv+6*ncv) := Holds the upper | */
/*     |                             quasi-triangular matrix for H | */
/*     | workl(2*ncv*ncv+6*ncv+1: 3*ncv*ncv+6*ncv) := Holds the    | */
/*     |       associated matrix representation of the invariant   | */
/*     |       subspace for H.                                     | */
/*     | GRAND total of NCV * ( 3 * NCV + 6 ) locations.           | */
/*     %-----------------------------------------------------------% */

    ih = ipntr[5];
    ritzr = ipntr[6];
    ritzi = ipntr[7];
    bounds = ipntr[8];
    ldh = *ncv;
    ldq = *ncv;
    iheigr = bounds + ldh;
    iheigi = iheigr + ldh;
    ihbds = iheigi + ldh;
    iuptri = ihbds + ldh;
    invsub = iuptri + ldh * *ncv;
    ipntr[9] = iheigr;
    ipntr[10] = iheigi;
    ipntr[11] = ihbds;
    ipntr[12] = iuptri;
    ipntr[13] = invsub;
    wrr = 1;
    wri = *ncv + 1;
    iwev = wri + *ncv;

/*     %-----------------------------------------% */
/*     | irr points to the REAL part of the Ritz | */
/*     |     values computed by _neigh before    | */
/*     |     exiting _naup2.                     | */
/*     | iri points to the IMAGINARY part of the | */
/*     |     Ritz values computed by _neigh      | */
/*     |     before exiting _naup2.              | */
/*     | ibd points to the Ritz estimates        | */
/*     |     computed by _neigh before exiting   | */
/*     |     _naup2.                             | */
/*     %-----------------------------------------% */

    irr = ipntr[14] + *ncv * *ncv;
    iri = irr + *ncv;
    ibd = iri + *ncv;

/*     %------------------------------------% */
/*     | RNORM is B-norm of the RESID(1:N). | */
/*     %------------------------------------% */

    rnorm = workl[ih + 2];
    workl[ih + 2] = 0.f;

    if (msglvl > 2) {
	svout_(&debug_1.logfil, ncv, &workl[irr], &debug_1.ndigit, "_neupd: "
		"Real part of Ritz values passed in from _NAUPD.", (ftnlen)55);
	svout_(&debug_1.logfil, ncv, &workl[iri], &debug_1.ndigit, "_neupd: "
		"Imag part of Ritz values passed in from _NAUPD.", (ftnlen)55);
	svout_(&debug_1.logfil, ncv, &workl[ibd], &debug_1.ndigit, "_neupd: "
		"Ritz estimates passed in from _NAUPD.", (ftnlen)45);
    }

    if (*rvec) {

	reord = FALSE_;

/*        %---------------------------------------------------% */
/*        | Use the temporary bounds array to store indices   | */
/*        | These will be used to mark the select array later | */
/*        %---------------------------------------------------% */

	i__1 = *ncv;
	for (j = 1; j <= i__1; ++j) {
	    workl[bounds + j - 1] = (real) j;
	    select[j] = FALSE_;
/* L10: */
	}

/*        %-------------------------------------% */
/*        | Select the wanted Ritz values.      | */
/*        | Sort the Ritz values so that the    | */
/*        | wanted ones appear at the tailing   | */
/*        | NEV positions of workl(irr) and     | */
/*        | workl(iri).  Move the corresponding | */
/*        | error estimates in workl(bound)     | */
/*        | accordingly.                        | */
/*        %-------------------------------------% */

	np = *ncv - *nev;
	ishift = 0;
	sngets_(&ishift, which, nev, &np, &workl[irr], &workl[iri], &workl[
		bounds], &workl[1], &workl[np + 1], (ftnlen)2);

	if (msglvl > 2) {
	    svout_(&debug_1.logfil, ncv, &workl[irr], &debug_1.ndigit, "_neu"
		    "pd: Real part of Ritz values after calling _NGETS.", (
		    ftnlen)54);
	    svout_(&debug_1.logfil, ncv, &workl[iri], &debug_1.ndigit, "_neu"
		    "pd: Imag part of Ritz values after calling _NGETS.", (
		    ftnlen)54);
	    svout_(&debug_1.logfil, ncv, &workl[bounds], &debug_1.ndigit, 
		    "_neupd: Ritz value indices after calling _NGETS.", (
		    ftnlen)48);
	}

/*        %-----------------------------------------------------% */
/*        | Record indices of the converged wanted Ritz values  | */
/*        | Mark the select array for possible reordering       | */
/*        %-----------------------------------------------------% */

	numcnv = 0;
	i__1 = *ncv;
	for (j = 1; j <= i__1; ++j) {
/* Computing MAX */
	    r__1 = eps23, r__2 = slapy2_(&workl[irr + *ncv - j], &workl[iri + 
		    *ncv - j]);
	    temp1 = dmax(r__1,r__2);
	    jj = workl[bounds + *ncv - j];
	    if (numcnv < nconv && workl[ibd + jj - 1] <= *tol * temp1) {
		select[jj] = TRUE_;
		++numcnv;
		if (jj > nconv) {
		    reord = TRUE_;
		}
	    }
/* L11: */
	}

/*        %-----------------------------------------------------------% */
/*        | Check the count (numcnv) of converged Ritz values with    | */
/*        | the number (nconv) reported by dnaupd.  If these two      | */
/*        | are different then there has probably been an error       | */
/*        | caused by incorrect passing of the dnaupd data.           | */
/*        %-----------------------------------------------------------% */

	if (msglvl > 2) {
	    ivout_(&debug_1.logfil, &c__1, &numcnv, &debug_1.ndigit, "_neupd"
		    ": Number of specified eigenvalues", (ftnlen)39);
	    ivout_(&debug_1.logfil, &c__1, &nconv, &debug_1.ndigit, "_neupd:"
		    " Number of \"converged\" eigenvalues", (ftnlen)41);
	}

	if (numcnv != nconv) {
	    *info = -15;
	    goto L9000;
	}

/*        %-----------------------------------------------------------% */
/*        | Call LAPACK routine slahqr to compute the real Schur form | */
/*        | of the upper Hessenberg matrix returned by SNAUPD.        | */
/*        | Make a copy of the upper Hessenberg matrix.               | */
/*        | Initialize the Schur vector matrix Q to the identity.     | */
/*        %-----------------------------------------------------------% */

	i__1 = ldh * *ncv;
	scopy_(&i__1, &workl[ih], &c__1, &workl[iuptri], &c__1);
	slaset_("All", ncv, ncv, &c_b37, &c_b38, &workl[invsub], &ldq, (
		ftnlen)3);
	slahqr_(&c_true, &c_true, ncv, &c__1, ncv, &workl[iuptri], &ldh, &
		workl[iheigr], &workl[iheigi], &c__1, ncv, &workl[invsub], &
		ldq, &ierr);
	scopy_(ncv, &workl[invsub + *ncv - 1], &ldq, &workl[ihbds], &c__1);

	if (ierr != 0) {
	    *info = -8;
	    goto L9000;
	}

	if (msglvl > 1) {
	    svout_(&debug_1.logfil, ncv, &workl[iheigr], &debug_1.ndigit, 
		    "_neupd: Real part of the eigenvalues of H", (ftnlen)41);
	    svout_(&debug_1.logfil, ncv, &workl[iheigi], &debug_1.ndigit, 
		    "_neupd: Imaginary part of the Eigenvalues of H", (ftnlen)
		    46);
	    svout_(&debug_1.logfil, ncv, &workl[ihbds], &debug_1.ndigit, 
		    "_neupd: Last row of the Schur vector matrix", (ftnlen)43)
		    ;
	    if (msglvl > 3) {
		smout_(&debug_1.logfil, ncv, ncv, &workl[iuptri], &ldh, &
			debug_1.ndigit, "_neupd: The upper quasi-triangular "
			"matrix ", (ftnlen)42);
	    }
	}

	if (reord) {

/*           %-----------------------------------------------------% */
/*           | Reorder the computed upper quasi-triangular matrix. | */
/*           %-----------------------------------------------------% */

	    strsen_("None", "V", &select[1], ncv, &workl[iuptri], &ldh, &
		    workl[invsub], &ldq, &workl[iheigr], &workl[iheigi], &
		    nconv2, &conds, &sep, &workl[ihbds], ncv, iwork, &c__1, &
		    ierr, (ftnlen)4, (ftnlen)1);

	    if (nconv2 < nconv) {
		nconv = nconv2;
	    }
	    if (ierr == 1) {
		*info = 1;
		goto L9000;
	    }

	    if (msglvl > 2) {
		svout_(&debug_1.logfil, ncv, &workl[iheigr], &debug_1.ndigit, 
			"_neupd: Real part of the eigenvalues of H--reordered"
			, (ftnlen)52);
		svout_(&debug_1.logfil, ncv, &workl[iheigi], &debug_1.ndigit, 
			"_neupd: Imag part of the eigenvalues of H--reordered"
			, (ftnlen)52);
		if (msglvl > 3) {
		    smout_(&debug_1.logfil, ncv, ncv, &workl[iuptri], &ldq, &
			    debug_1.ndigit, "_neupd: Quasi-triangular matrix"
			    " after re-ordering", (ftnlen)49);
		}
	    }

	}

/*        %---------------------------------------% */
/*        | Copy the last row of the Schur vector | */
/*        | into workl(ihbds).  This will be used | */
/*        | to compute the Ritz estimates of      | */
/*        | converged Ritz values.                | */
/*        %---------------------------------------% */

	scopy_(ncv, &workl[invsub + *ncv - 1], &ldq, &workl[ihbds], &c__1);

/*        %----------------------------------------------------% */
/*        | Place the computed eigenvalues of H into DR and DI | */
/*        | if a spectral transformation was not used.         | */
/*        %----------------------------------------------------% */

	if (s_cmp(type__, "REGULR", (ftnlen)6, (ftnlen)6) == 0) {
	    scopy_(&nconv, &workl[iheigr], &c__1, &dr[1], &c__1);
	    scopy_(&nconv, &workl[iheigi], &c__1, &di[1], &c__1);
	}

/*        %----------------------------------------------------------% */
/*        | Compute the QR factorization of the matrix representing  | */
/*        | the wanted invariant subspace located in the first NCONV | */
/*        | columns of workl(invsub,ldq).                            | */
/*        %----------------------------------------------------------% */

	sgeqr2_(ncv, &nconv, &workl[invsub], &ldq, &workev[1], &workev[*ncv + 
		1], &ierr);

/*        %---------------------------------------------------------% */
/*        | * Postmultiply V by Q using sorm2r.                     | */
/*        | * Copy the first NCONV columns of VQ into Z.            | */
/*        | * Postmultiply Z by R.                                  | */
/*        | The N by NCONV matrix Z is now a matrix representation  | */
/*        | of the approximate invariant subspace associated with   | */
/*        | the Ritz values in workl(iheigr) and workl(iheigi)      | */
/*        | The first NCONV columns of V are now approximate Schur  | */
/*        | vectors associated with the real upper quasi-triangular | */
/*        | matrix of order NCONV in workl(iuptri)                  | */
/*        %---------------------------------------------------------% */

	sorm2r_("Right", "Notranspose", n, ncv, &nconv, &workl[invsub], &ldq, 
		&workev[1], &v[v_offset], ldv, &workd[*n + 1], &ierr, (ftnlen)
		5, (ftnlen)11);
	slacpy_("All", n, &nconv, &v[v_offset], ldv, &z__[z_offset], ldz, (
		ftnlen)3);

	i__1 = nconv;
	for (j = 1; j <= i__1; ++j) {

/*           %---------------------------------------------------% */
/*           | Perform both a column and row scaling if the      | */
/*           | diagonal element of workl(invsub,ldq) is negative | */
/*           | I'm lazy and don't take advantage of the upper    | */
/*           | quasi-triangular form of workl(iuptri,ldq)        | */
/*           | Note that since Q is orthogonal, R is a diagonal  | */
/*           | matrix consisting of plus or minus ones           | */
/*           %---------------------------------------------------% */

	    if (workl[invsub + (j - 1) * ldq + j - 1] < 0.f) {
		sscal_(&nconv, &c_b64, &workl[iuptri + j - 1], &ldq);
		sscal_(&nconv, &c_b64, &workl[iuptri + (j - 1) * ldq], &c__1);
	    }

/* L20: */
	}

	if (*(unsigned char *)howmny == 'A') {

/*           %--------------------------------------------% */
/*           | Compute the NCONV wanted eigenvectors of T | */
/*           | located in workl(iuptri,ldq).              | */
/*           %--------------------------------------------% */

	    i__1 = *ncv;
	    for (j = 1; j <= i__1; ++j) {
		if (j <= nconv) {
		    select[j] = TRUE_;
		} else {
		    select[j] = FALSE_;
		}
/* L30: */
	    }

	    strevc_("Right", "Select", &select[1], ncv, &workl[iuptri], &ldq, 
		    vl, &c__1, &workl[invsub], &ldq, ncv, &outncv, &workev[1],
		     &ierr, (ftnlen)5, (ftnlen)6);

	    if (ierr != 0) {
		*info = -9;
		goto L9000;
	    }

/*           %------------------------------------------------% */
/*           | Scale the returning eigenvectors so that their | */
/*           | Euclidean norms are all one. LAPACK subroutine | */
/*           | strevc returns each eigenvector normalized so  | */
/*           | that the element of largest magnitude has      | */
/*           | magnitude 1;                                   | */
/*           %------------------------------------------------% */

	    iconj = 0;
	    i__1 = nconv;
	    for (j = 1; j <= i__1; ++j) {

		if (workl[iheigi + j - 1] == 0.f) {

/*                 %----------------------% */
/*                 | real eigenvalue case | */
/*                 %----------------------% */

		    temp = snrm2_(ncv, &workl[invsub + (j - 1) * ldq], &c__1);
		    r__1 = 1.f / temp;
		    sscal_(ncv, &r__1, &workl[invsub + (j - 1) * ldq], &c__1);

		} else {

/*                 %-------------------------------------------% */
/*                 | Complex conjugate pair case. Note that    | */
/*                 | since the real and imaginary part of      | */
/*                 | the eigenvector are stored in consecutive | */
/*                 | columns, we further normalize by the      | */
/*                 | square root of two.                       | */
/*                 %-------------------------------------------% */

		    if (iconj == 0) {
			r__1 = snrm2_(ncv, &workl[invsub + (j - 1) * ldq], &
				c__1);
			r__2 = snrm2_(ncv, &workl[invsub + j * ldq], &c__1);
			temp = slapy2_(&r__1, &r__2);
			r__1 = 1.f / temp;
			sscal_(ncv, &r__1, &workl[invsub + (j - 1) * ldq], &
				c__1);
			r__1 = 1.f / temp;
			sscal_(ncv, &r__1, &workl[invsub + j * ldq], &c__1);
			iconj = 1;
		    } else {
			iconj = 0;
		    }

		}

/* L40: */
	    }

	    sgemv_("T", ncv, &nconv, &c_b38, &workl[invsub], &ldq, &workl[
		    ihbds], &c__1, &c_b37, &workev[1], &c__1, (ftnlen)1);

	    iconj = 0;
	    i__1 = nconv;
	    for (j = 1; j <= i__1; ++j) {
		if (workl[iheigi + j - 1] != 0.f) {

/*                 %-------------------------------------------% */
/*                 | Complex conjugate pair case. Note that    | */
/*                 | since the real and imaginary part of      | */
/*                 | the eigenvector are stored in consecutive | */
/*                 %-------------------------------------------% */

		    if (iconj == 0) {
			workev[j] = slapy2_(&workev[j], &workev[j + 1]);
			workev[j + 1] = workev[j];
			iconj = 1;
		    } else {
			iconj = 0;
		    }
		}
/* L45: */
	    }

	    if (msglvl > 2) {
		scopy_(ncv, &workl[invsub + *ncv - 1], &ldq, &workl[ihbds], &
			c__1);
		svout_(&debug_1.logfil, ncv, &workl[ihbds], &debug_1.ndigit, 
			"_neupd: Last row of the eigenvector matrix for T", (
			ftnlen)48);
		if (msglvl > 3) {
		    smout_(&debug_1.logfil, ncv, ncv, &workl[invsub], &ldq, &
			    debug_1.ndigit, "_neupd: The eigenvector matrix "
			    "for T", (ftnlen)36);
		}
	    }

/*           %---------------------------------------% */
/*           | Copy Ritz estimates into workl(ihbds) | */
/*           %---------------------------------------% */

	    scopy_(&nconv, &workev[1], &c__1, &workl[ihbds], &c__1);

/*           %---------------------------------------------------------% */
/*           | Compute the QR factorization of the eigenvector matrix  | */
/*           | associated with leading portion of T in the first NCONV | */
/*           | columns of workl(invsub,ldq).                           | */
/*           %---------------------------------------------------------% */

	    sgeqr2_(ncv, &nconv, &workl[invsub], &ldq, &workev[1], &workev[*
		    ncv + 1], &ierr);

/*           %----------------------------------------------% */
/*           | * Postmultiply Z by Q.                       | */
/*           | * Postmultiply Z by R.                       | */
/*           | The N by NCONV matrix Z is now contains the  | */
/*           | Ritz vectors associated with the Ritz values | */
/*           | in workl(iheigr) and workl(iheigi).          | */
/*           %----------------------------------------------% */

	    sorm2r_("Right", "Notranspose", n, ncv, &nconv, &workl[invsub], &
		    ldq, &workev[1], &z__[z_offset], ldz, &workd[*n + 1], &
		    ierr, (ftnlen)5, (ftnlen)11);

	    strmm_("Right", "Upper", "No transpose", "Non-unit", n, &nconv, &
		    c_b38, &workl[invsub], &ldq, &z__[z_offset], ldz, (ftnlen)
		    5, (ftnlen)5, (ftnlen)12, (ftnlen)8);

	}

    } else {

/*        %------------------------------------------------------% */
/*        | An approximate invariant subspace is not needed.     | */
/*        | Place the Ritz values computed SNAUPD into DR and DI | */
/*        %------------------------------------------------------% */

	scopy_(&nconv, &workl[ritzr], &c__1, &dr[1], &c__1);
	scopy_(&nconv, &workl[ritzi], &c__1, &di[1], &c__1);
	scopy_(&nconv, &workl[ritzr], &c__1, &workl[iheigr], &c__1);
	scopy_(&nconv, &workl[ritzi], &c__1, &workl[iheigi], &c__1);
	scopy_(&nconv, &workl[bounds], &c__1, &workl[ihbds], &c__1);
    }

/*     %------------------------------------------------% */
/*     | Transform the Ritz values and possibly vectors | */
/*     | and corresponding error bounds of OP to those  | */
/*     | of A*x = lambda*B*x.                           | */
/*     %------------------------------------------------% */

    if (s_cmp(type__, "REGULR", (ftnlen)6, (ftnlen)6) == 0) {

	if (*rvec) {
	    sscal_(ncv, &rnorm, &workl[ihbds], &c__1);
	}

    } else {

/*        %---------------------------------------% */
/*        |   A spectral transformation was used. | */
/*        | * Determine the Ritz estimates of the | */
/*        |   Ritz values in the original system. | */
/*        %---------------------------------------% */

	if (s_cmp(type__, "SHIFTI", (ftnlen)6, (ftnlen)6) == 0) {

	    if (*rvec) {
		sscal_(ncv, &rnorm, &workl[ihbds], &c__1);
	    }

	    i__1 = *ncv;
	    for (k = 1; k <= i__1; ++k) {
		temp = slapy2_(&workl[iheigr + k - 1], &workl[iheigi + k - 1])
			;
		workl[ihbds + k - 1] = (r__1 = workl[ihbds + k - 1], dabs(
			r__1)) / temp / temp;
/* L50: */
	    }

	} else if (s_cmp(type__, "REALPT", (ftnlen)6, (ftnlen)6) == 0) {

	    i__1 = *ncv;
	    for (k = 1; k <= i__1; ++k) {
/* L60: */
	    }

	} else if (s_cmp(type__, "IMAGPT", (ftnlen)6, (ftnlen)6) == 0) {

	    i__1 = *ncv;
	    for (k = 1; k <= i__1; ++k) {
/* L70: */
	    }

	}

/*        %-----------------------------------------------------------% */
/*        | *  Transform the Ritz values back to the original system. | */
/*        |    For TYPE = 'SHIFTI' the transformation is              | */
/*        |             lambda = 1/theta + sigma                      | */
/*        |    For TYPE = 'REALPT' or 'IMAGPT' the user must from     | */
/*        |    Rayleigh quotients or a projection. See remark 3 above.| */
/*        | NOTES:                                                    | */
/*        | *The Ritz vectors are not affected by the transformation. | */
/*        %-----------------------------------------------------------% */

	if (s_cmp(type__, "SHIFTI", (ftnlen)6, (ftnlen)6) == 0) {

	    i__1 = *ncv;
	    for (k = 1; k <= i__1; ++k) {
		temp = slapy2_(&workl[iheigr + k - 1], &workl[iheigi + k - 1])
			;
		workl[iheigr + k - 1] = workl[iheigr + k - 1] / temp / temp + 
			*sigmar;
		workl[iheigi + k - 1] = -workl[iheigi + k - 1] / temp / temp 
			+ *sigmai;
/* L80: */
	    }

	    scopy_(&nconv, &workl[iheigr], &c__1, &dr[1], &c__1);
	    scopy_(&nconv, &workl[iheigi], &c__1, &di[1], &c__1);

	} else if (s_cmp(type__, "REALPT", (ftnlen)6, (ftnlen)6) == 0 || 
		s_cmp(type__, "IMAGPT", (ftnlen)6, (ftnlen)6) == 0) {

	    scopy_(&nconv, &workl[iheigr], &c__1, &dr[1], &c__1);
	    scopy_(&nconv, &workl[iheigi], &c__1, &di[1], &c__1);

	}

    }

    if (s_cmp(type__, "SHIFTI", (ftnlen)6, (ftnlen)6) == 0 && msglvl > 1) {
	svout_(&debug_1.logfil, &nconv, &dr[1], &debug_1.ndigit, "_neupd: Un"
		"transformed real part of the Ritz valuess.", (ftnlen)52);
	svout_(&debug_1.logfil, &nconv, &di[1], &debug_1.ndigit, "_neupd: Un"
		"transformed imag part of the Ritz valuess.", (ftnlen)52);
	svout_(&debug_1.logfil, &nconv, &workl[ihbds], &debug_1.ndigit, "_ne"
		"upd: Ritz estimates of untransformed Ritz values.", (ftnlen)
		52);
    } else if (s_cmp(type__, "REGULR", (ftnlen)6, (ftnlen)6) == 0 && msglvl > 
	    1) {
	svout_(&debug_1.logfil, &nconv, &dr[1], &debug_1.ndigit, "_neupd: Re"
		"al parts of converged Ritz values.", (ftnlen)44);
	svout_(&debug_1.logfil, &nconv, &di[1], &debug_1.ndigit, "_neupd: Im"
		"ag parts of converged Ritz values.", (ftnlen)44);
	svout_(&debug_1.logfil, &nconv, &workl[ihbds], &debug_1.ndigit, "_ne"
		"upd: Associated Ritz estimates.", (ftnlen)34);
    }

/*     %-------------------------------------------------% */
/*     | Eigenvector Purification step. Formally perform | */
/*     | one of inverse subspace iteration. Only used    | */
/*     | for MODE = 2.                                   | */
/*     %-------------------------------------------------% */

    if (*rvec && *(unsigned char *)howmny == 'A' && s_cmp(type__, "SHIFTI", (
	    ftnlen)6, (ftnlen)6) == 0) {

/*        %------------------------------------------------% */
/*        | Purify the computed Ritz vectors by adding a   | */
/*        | little bit of the residual vector:             | */
/*        |                      T                         | */
/*        |          resid(:)*( e    s ) / theta           | */
/*        |                      NCV                       | */
/*        | where H s = s theta. Remember that when theta  | */
/*        | has nonzero imaginary part, the corresponding  | */
/*        | Ritz vector is stored across two columns of Z. | */
/*        %------------------------------------------------% */

	iconj = 0;
	i__1 = nconv;
	for (j = 1; j <= i__1; ++j) {
	    if (workl[iheigi + j - 1] == 0.f) {
		workev[j] = workl[invsub + (j - 1) * ldq + *ncv - 1] / workl[
			iheigr + j - 1];
	    } else if (iconj == 0) {
		temp = slapy2_(&workl[iheigr + j - 1], &workl[iheigi + j - 1])
			;
		workev[j] = (workl[invsub + (j - 1) * ldq + *ncv - 1] * workl[
			iheigr + j - 1] + workl[invsub + j * ldq + *ncv - 1] *
			 workl[iheigi + j - 1]) / temp / temp;
		workev[j + 1] = (workl[invsub + j * ldq + *ncv - 1] * workl[
			iheigr + j - 1] - workl[invsub + (j - 1) * ldq + *ncv 
			- 1] * workl[iheigi + j - 1]) / temp / temp;
		iconj = 1;
	    } else {
		iconj = 0;
	    }
/* L110: */
	}

/*        %---------------------------------------% */
/*        | Perform a rank one update to Z and    | */
/*        | purify all the Ritz vectors together. | */
/*        %---------------------------------------% */

	sger_(n, &nconv, &c_b38, &resid[1], &c__1, &workev[1], &c__1, &z__[
		z_offset], ldz);

    }

L9000:

    return 0;

/*     %---------------% */
/*     | End of SNEUPD | */
/*     %---------------% */

} /* sneupd_ */
Esempio n. 21
0
/* Subroutine */
int slaein_(logical *rightv, logical *noinit, integer *n, real *h__, integer *ldh, real *wr, real *wi, real *vr, real *vi, real *b, integer *ldb, real *work, real *eps3, real *smlnum, real *bignum, integer *info)
{
    /* System generated locals */
    integer b_dim1, b_offset, h_dim1, h_offset, i__1, i__2, i__3, i__4;
    real r__1, r__2, r__3, r__4;
    /* Builtin functions */
    double sqrt(doublereal);
    /* Local variables */
    integer i__, j;
    real w, x, y;
    integer i1, i2, i3;
    real w1, ei, ej, xi, xr, rec;
    integer its, ierr;
    real temp, norm, vmax;
    extern real snrm2_(integer *, real *, integer *);
    real scale;
    extern /* Subroutine */
    int sscal_(integer *, real *, real *, integer *);
    char trans[1];
    real vcrit;
    extern real sasum_(integer *, real *, integer *);
    real rootn, vnorm;
    extern real slapy2_(real *, real *);
    real absbii, absbjj;
    extern integer isamax_(integer *, real *, integer *);
    extern /* Subroutine */
    int sladiv_(real *, real *, real *, real *, real * , real *);
    char normin[1];
    real nrmsml;
    extern /* Subroutine */
    int slatrs_(char *, char *, char *, char *, integer *, real *, integer *, real *, real *, real *, integer *);
    real growto;
    /* -- LAPACK auxiliary routine (version 3.4.2) -- */
    /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
    /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
    /* September 2012 */
    /* .. Scalar Arguments .. */
    /* .. */
    /* .. Array Arguments .. */
    /* .. */
    /* ===================================================================== */
    /* .. Parameters .. */
    /* .. */
    /* .. Local Scalars .. */
    /* .. */
    /* .. External Functions .. */
    /* .. */
    /* .. External Subroutines .. */
    /* .. */
    /* .. Intrinsic Functions .. */
    /* .. */
    /* .. Executable Statements .. */
    /* Parameter adjustments */
    h_dim1 = *ldh;
    h_offset = 1 + h_dim1;
    h__ -= h_offset;
    --vr;
    --vi;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    --work;
    /* Function Body */
    *info = 0;
    /* GROWTO is the threshold used in the acceptance test for an */
    /* eigenvector. */
    rootn = sqrt((real) (*n));
    growto = .1f / rootn;
    /* Computing MAX */
    r__1 = 1.f;
    r__2 = *eps3 * rootn; // , expr subst
    nrmsml = max(r__1,r__2) * *smlnum;
    /* Form B = H - (WR,WI)*I (except that the subdiagonal elements and */
    /* the imaginary parts of the diagonal elements are not stored). */
    i__1 = *n;
    for (j = 1;
            j <= i__1;
            ++j)
    {
        i__2 = j - 1;
        for (i__ = 1;
                i__ <= i__2;
                ++i__)
        {
            b[i__ + j * b_dim1] = h__[i__ + j * h_dim1];
            /* L10: */
        }
        b[j + j * b_dim1] = h__[j + j * h_dim1] - *wr;
        /* L20: */
    }
    if (*wi == 0.f)
    {
        /* Real eigenvalue. */
        if (*noinit)
        {
            /* Set initial vector. */
            i__1 = *n;
            for (i__ = 1;
                    i__ <= i__1;
                    ++i__)
            {
                vr[i__] = *eps3;
                /* L30: */
            }
        }
        else
        {
            /* Scale supplied initial vector. */
            vnorm = snrm2_(n, &vr[1], &c__1);
            r__1 = *eps3 * rootn / max(vnorm,nrmsml);
            sscal_(n, &r__1, &vr[1], &c__1);
        }
        if (*rightv)
        {
            /* LU decomposition with partial pivoting of B, replacing zero */
            /* pivots by EPS3. */
            i__1 = *n - 1;
            for (i__ = 1;
                    i__ <= i__1;
                    ++i__)
            {
                ei = h__[i__ + 1 + i__ * h_dim1];
                if ((r__1 = b[i__ + i__ * b_dim1], f2c_abs(r__1)) < f2c_abs(ei))
                {
                    /* Interchange rows and eliminate. */
                    x = b[i__ + i__ * b_dim1] / ei;
                    b[i__ + i__ * b_dim1] = ei;
                    i__2 = *n;
                    for (j = i__ + 1;
                            j <= i__2;
                            ++j)
                    {
                        temp = b[i__ + 1 + j * b_dim1];
                        b[i__ + 1 + j * b_dim1] = b[i__ + j * b_dim1] - x * temp;
                        b[i__ + j * b_dim1] = temp;
                        /* L40: */
                    }
                }
                else
                {
                    /* Eliminate without interchange. */
                    if (b[i__ + i__ * b_dim1] == 0.f)
                    {
                        b[i__ + i__ * b_dim1] = *eps3;
                    }
                    x = ei / b[i__ + i__ * b_dim1];
                    if (x != 0.f)
                    {
                        i__2 = *n;
                        for (j = i__ + 1;
                                j <= i__2;
                                ++j)
                        {
                            b[i__ + 1 + j * b_dim1] -= x * b[i__ + j * b_dim1] ;
                            /* L50: */
                        }
                    }
                }
                /* L60: */
            }
            if (b[*n + *n * b_dim1] == 0.f)
            {
                b[*n + *n * b_dim1] = *eps3;
            }
            *(unsigned char *)trans = 'N';
        }
        else
        {
            /* UL decomposition with partial pivoting of B, replacing zero */
            /* pivots by EPS3. */
            for (j = *n;
                    j >= 2;
                    --j)
            {
                ej = h__[j + (j - 1) * h_dim1];
                if ((r__1 = b[j + j * b_dim1], f2c_abs(r__1)) < f2c_abs(ej))
                {
                    /* Interchange columns and eliminate. */
                    x = b[j + j * b_dim1] / ej;
                    b[j + j * b_dim1] = ej;
                    i__1 = j - 1;
                    for (i__ = 1;
                            i__ <= i__1;
                            ++i__)
                    {
                        temp = b[i__ + (j - 1) * b_dim1];
                        b[i__ + (j - 1) * b_dim1] = b[i__ + j * b_dim1] - x * temp;
                        b[i__ + j * b_dim1] = temp;
                        /* L70: */
                    }
                }
                else
                {
                    /* Eliminate without interchange. */
                    if (b[j + j * b_dim1] == 0.f)
                    {
                        b[j + j * b_dim1] = *eps3;
                    }
                    x = ej / b[j + j * b_dim1];
                    if (x != 0.f)
                    {
                        i__1 = j - 1;
                        for (i__ = 1;
                                i__ <= i__1;
                                ++i__)
                        {
                            b[i__ + (j - 1) * b_dim1] -= x * b[i__ + j * b_dim1];
                            /* L80: */
                        }
                    }
                }
                /* L90: */
            }
            if (b[b_dim1 + 1] == 0.f)
            {
                b[b_dim1 + 1] = *eps3;
            }
            *(unsigned char *)trans = 'T';
        }
        *(unsigned char *)normin = 'N';
        i__1 = *n;
        for (its = 1;
                its <= i__1;
                ++its)
        {
            /* Solve U*x = scale*v for a right eigenvector */
            /* or U**T*x = scale*v for a left eigenvector, */
            /* overwriting x on v. */
            slatrs_("Upper", trans, "Nonunit", normin, n, &b[b_offset], ldb, & vr[1], &scale, &work[1], &ierr);
            *(unsigned char *)normin = 'Y';
            /* Test for sufficient growth in the norm of v. */
            vnorm = sasum_(n, &vr[1], &c__1);
            if (vnorm >= growto * scale)
            {
                goto L120;
            }
            /* Choose new orthogonal starting vector and try again. */
            temp = *eps3 / (rootn + 1.f);
            vr[1] = *eps3;
            i__2 = *n;
            for (i__ = 2;
                    i__ <= i__2;
                    ++i__)
            {
                vr[i__] = temp;
                /* L100: */
            }
            vr[*n - its + 1] -= *eps3 * rootn;
            /* L110: */
        }
        /* Failure to find eigenvector in N iterations. */
        *info = 1;
L120: /* Normalize eigenvector. */
        i__ = isamax_(n, &vr[1], &c__1);
        r__2 = 1.f / (r__1 = vr[i__], f2c_abs(r__1));
        sscal_(n, &r__2, &vr[1], &c__1);
    }
    else
    {
        /* Complex eigenvalue. */
        if (*noinit)
        {
            /* Set initial vector. */
            i__1 = *n;
            for (i__ = 1;
                    i__ <= i__1;
                    ++i__)
            {
                vr[i__] = *eps3;
                vi[i__] = 0.f;
                /* L130: */
            }
        }
        else
        {
            /* Scale supplied initial vector. */
            r__1 = snrm2_(n, &vr[1], &c__1);
            r__2 = snrm2_(n, &vi[1], &c__1);
            norm = slapy2_(&r__1, &r__2);
            rec = *eps3 * rootn / max(norm,nrmsml);
            sscal_(n, &rec, &vr[1], &c__1);
            sscal_(n, &rec, &vi[1], &c__1);
        }
        if (*rightv)
        {
            /* LU decomposition with partial pivoting of B, replacing zero */
            /* pivots by EPS3. */
            /* The imaginary part of the (i,j)-th element of U is stored in */
            /* B(j+1,i). */
            b[b_dim1 + 2] = -(*wi);
            i__1 = *n;
            for (i__ = 2;
                    i__ <= i__1;
                    ++i__)
            {
                b[i__ + 1 + b_dim1] = 0.f;
                /* L140: */
            }
            i__1 = *n - 1;
            for (i__ = 1;
                    i__ <= i__1;
                    ++i__)
            {
                absbii = slapy2_(&b[i__ + i__ * b_dim1], &b[i__ + 1 + i__ * b_dim1]);
                ei = h__[i__ + 1 + i__ * h_dim1];
                if (absbii < f2c_abs(ei))
                {
                    /* Interchange rows and eliminate. */
                    xr = b[i__ + i__ * b_dim1] / ei;
                    xi = b[i__ + 1 + i__ * b_dim1] / ei;
                    b[i__ + i__ * b_dim1] = ei;
                    b[i__ + 1 + i__ * b_dim1] = 0.f;
                    i__2 = *n;
                    for (j = i__ + 1;
                            j <= i__2;
                            ++j)
                    {
                        temp = b[i__ + 1 + j * b_dim1];
                        b[i__ + 1 + j * b_dim1] = b[i__ + j * b_dim1] - xr * temp;
                        b[j + 1 + (i__ + 1) * b_dim1] = b[j + 1 + i__ * b_dim1] - xi * temp;
                        b[i__ + j * b_dim1] = temp;
                        b[j + 1 + i__ * b_dim1] = 0.f;
                        /* L150: */
                    }
                    b[i__ + 2 + i__ * b_dim1] = -(*wi);
                    b[i__ + 1 + (i__ + 1) * b_dim1] -= xi * *wi;
                    b[i__ + 2 + (i__ + 1) * b_dim1] += xr * *wi;
                }
                else
                {
                    /* Eliminate without interchanging rows. */
                    if (absbii == 0.f)
                    {
                        b[i__ + i__ * b_dim1] = *eps3;
                        b[i__ + 1 + i__ * b_dim1] = 0.f;
                        absbii = *eps3;
                    }
                    ei = ei / absbii / absbii;
                    xr = b[i__ + i__ * b_dim1] * ei;
                    xi = -b[i__ + 1 + i__ * b_dim1] * ei;
                    i__2 = *n;
                    for (j = i__ + 1;
                            j <= i__2;
                            ++j)
                    {
                        b[i__ + 1 + j * b_dim1] = b[i__ + 1 + j * b_dim1] - xr * b[i__ + j * b_dim1] + xi * b[j + 1 + i__ * b_dim1];
                        b[j + 1 + (i__ + 1) * b_dim1] = -xr * b[j + 1 + i__ * b_dim1] - xi * b[i__ + j * b_dim1];
                        /* L160: */
                    }
                    b[i__ + 2 + (i__ + 1) * b_dim1] -= *wi;
                }
                /* Compute 1-norm of offdiagonal elements of i-th row. */
                i__2 = *n - i__;
                i__3 = *n - i__;
                work[i__] = sasum_(&i__2, &b[i__ + (i__ + 1) * b_dim1], ldb) + sasum_(&i__3, &b[i__ + 2 + i__ * b_dim1], &c__1);
                /* L170: */
            }
            if (b[*n + *n * b_dim1] == 0.f && b[*n + 1 + *n * b_dim1] == 0.f)
            {
                b[*n + *n * b_dim1] = *eps3;
            }
            work[*n] = 0.f;
            i1 = *n;
            i2 = 1;
            i3 = -1;
        }
        else
        {
            /* UL decomposition with partial pivoting of conjg(B), */
            /* replacing zero pivots by EPS3. */
            /* The imaginary part of the (i,j)-th element of U is stored in */
            /* B(j+1,i). */
            b[*n + 1 + *n * b_dim1] = *wi;
            i__1 = *n - 1;
            for (j = 1;
                    j <= i__1;
                    ++j)
            {
                b[*n + 1 + j * b_dim1] = 0.f;
                /* L180: */
            }
            for (j = *n;
                    j >= 2;
                    --j)
            {
                ej = h__[j + (j - 1) * h_dim1];
                absbjj = slapy2_(&b[j + j * b_dim1], &b[j + 1 + j * b_dim1]);
                if (absbjj < f2c_abs(ej))
                {
                    /* Interchange columns and eliminate */
                    xr = b[j + j * b_dim1] / ej;
                    xi = b[j + 1 + j * b_dim1] / ej;
                    b[j + j * b_dim1] = ej;
                    b[j + 1 + j * b_dim1] = 0.f;
                    i__1 = j - 1;
                    for (i__ = 1;
                            i__ <= i__1;
                            ++i__)
                    {
                        temp = b[i__ + (j - 1) * b_dim1];
                        b[i__ + (j - 1) * b_dim1] = b[i__ + j * b_dim1] - xr * temp;
                        b[j + i__ * b_dim1] = b[j + 1 + i__ * b_dim1] - xi * temp;
                        b[i__ + j * b_dim1] = temp;
                        b[j + 1 + i__ * b_dim1] = 0.f;
                        /* L190: */
                    }
                    b[j + 1 + (j - 1) * b_dim1] = *wi;
                    b[j - 1 + (j - 1) * b_dim1] += xi * *wi;
                    b[j + (j - 1) * b_dim1] -= xr * *wi;
                }
                else
                {
                    /* Eliminate without interchange. */
                    if (absbjj == 0.f)
                    {
                        b[j + j * b_dim1] = *eps3;
                        b[j + 1 + j * b_dim1] = 0.f;
                        absbjj = *eps3;
                    }
                    ej = ej / absbjj / absbjj;
                    xr = b[j + j * b_dim1] * ej;
                    xi = -b[j + 1 + j * b_dim1] * ej;
                    i__1 = j - 1;
                    for (i__ = 1;
                            i__ <= i__1;
                            ++i__)
                    {
                        b[i__ + (j - 1) * b_dim1] = b[i__ + (j - 1) * b_dim1] - xr * b[i__ + j * b_dim1] + xi * b[j + 1 + i__ * b_dim1];
                        b[j + i__ * b_dim1] = -xr * b[j + 1 + i__ * b_dim1] - xi * b[i__ + j * b_dim1];
                        /* L200: */
                    }
                    b[j + (j - 1) * b_dim1] += *wi;
                }
                /* Compute 1-norm of offdiagonal elements of j-th column. */
                i__1 = j - 1;
                i__2 = j - 1;
                work[j] = sasum_(&i__1, &b[j * b_dim1 + 1], &c__1) + sasum_(& i__2, &b[j + 1 + b_dim1], ldb);
                /* L210: */
            }
            if (b[b_dim1 + 1] == 0.f && b[b_dim1 + 2] == 0.f)
            {
                b[b_dim1 + 1] = *eps3;
            }
            work[1] = 0.f;
            i1 = 1;
            i2 = *n;
            i3 = 1;
        }
        i__1 = *n;
        for (its = 1;
                its <= i__1;
                ++its)
        {
            scale = 1.f;
            vmax = 1.f;
            vcrit = *bignum;
            /* Solve U*(xr,xi) = scale*(vr,vi) for a right eigenvector, */
            /* or U**T*(xr,xi) = scale*(vr,vi) for a left eigenvector, */
            /* overwriting (xr,xi) on (vr,vi). */
            i__2 = i2;
            i__3 = i3;
            for (i__ = i1;
                    i__3 < 0 ? i__ >= i__2 : i__ <= i__2;
                    i__ += i__3)
            {
                if (work[i__] > vcrit)
                {
                    rec = 1.f / vmax;
                    sscal_(n, &rec, &vr[1], &c__1);
                    sscal_(n, &rec, &vi[1], &c__1);
                    scale *= rec;
                    vmax = 1.f;
                    vcrit = *bignum;
                }
                xr = vr[i__];
                xi = vi[i__];
                if (*rightv)
                {
                    i__4 = *n;
                    for (j = i__ + 1;
                            j <= i__4;
                            ++j)
                    {
                        xr = xr - b[i__ + j * b_dim1] * vr[j] + b[j + 1 + i__ * b_dim1] * vi[j];
                        xi = xi - b[i__ + j * b_dim1] * vi[j] - b[j + 1 + i__ * b_dim1] * vr[j];
                        /* L220: */
                    }
                }
                else
                {
                    i__4 = i__ - 1;
                    for (j = 1;
                            j <= i__4;
                            ++j)
                    {
                        xr = xr - b[j + i__ * b_dim1] * vr[j] + b[i__ + 1 + j * b_dim1] * vi[j];
                        xi = xi - b[j + i__ * b_dim1] * vi[j] - b[i__ + 1 + j * b_dim1] * vr[j];
                        /* L230: */
                    }
                }
                w = (r__1 = b[i__ + i__ * b_dim1], f2c_abs(r__1)) + (r__2 = b[i__ + 1 + i__ * b_dim1], f2c_abs(r__2));
                if (w > *smlnum)
                {
                    if (w < 1.f)
                    {
                        w1 = f2c_abs(xr) + f2c_abs(xi);
                        if (w1 > w * *bignum)
                        {
                            rec = 1.f / w1;
                            sscal_(n, &rec, &vr[1], &c__1);
                            sscal_(n, &rec, &vi[1], &c__1);
                            xr = vr[i__];
                            xi = vi[i__];
                            scale *= rec;
                            vmax *= rec;
                        }
                    }
                    /* Divide by diagonal element of B. */
                    sladiv_(&xr, &xi, &b[i__ + i__ * b_dim1], &b[i__ + 1 + i__ * b_dim1], &vr[i__], &vi[i__]);
                    /* Computing MAX */
                    r__3 = (r__1 = vr[i__], f2c_abs(r__1)) + (r__2 = vi[i__], f2c_abs( r__2));
                    vmax = max(r__3,vmax);
                    vcrit = *bignum / vmax;
                }
                else
                {
                    i__4 = *n;
                    for (j = 1;
                            j <= i__4;
                            ++j)
                    {
                        vr[j] = 0.f;
                        vi[j] = 0.f;
                        /* L240: */
                    }
                    vr[i__] = 1.f;
                    vi[i__] = 1.f;
                    scale = 0.f;
                    vmax = 1.f;
                    vcrit = *bignum;
                }
                /* L250: */
            }
            /* Test for sufficient growth in the norm of (VR,VI). */
            vnorm = sasum_(n, &vr[1], &c__1) + sasum_(n, &vi[1], &c__1);
            if (vnorm >= growto * scale)
            {
                goto L280;
            }
            /* Choose a new orthogonal starting vector and try again. */
            y = *eps3 / (rootn + 1.f);
            vr[1] = *eps3;
            vi[1] = 0.f;
            i__3 = *n;
            for (i__ = 2;
                    i__ <= i__3;
                    ++i__)
            {
                vr[i__] = y;
                vi[i__] = 0.f;
                /* L260: */
            }
            vr[*n - its + 1] -= *eps3 * rootn;
            /* L270: */
        }
        /* Failure to find eigenvector in N iterations */
        *info = 1;
L280: /* Normalize eigenvector. */
        vnorm = 0.f;
        i__1 = *n;
        for (i__ = 1;
                i__ <= i__1;
                ++i__)
        {
            /* Computing MAX */
            r__3 = vnorm;
            r__4 = (r__1 = vr[i__], f2c_abs(r__1)) + (r__2 = vi[i__] , f2c_abs(r__2)); // , expr subst
            vnorm = max(r__3,r__4);
            /* L290: */
        }
        r__1 = 1.f / vnorm;
        sscal_(n, &r__1, &vr[1], &c__1);
        r__1 = 1.f / vnorm;
        sscal_(n, &r__1, &vi[1], &c__1);
    }
    return 0;
    /* End of SLAEIN */
}
Esempio n. 22
0
/* Subroutine */ int slaqp2_(integer *m, integer *n, integer *offset, real *a,
	 integer *lda, integer *jpvt, real *tau, real *vn1, real *vn2, real *
	work)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    real r__1, r__2;

    /* Builtin functions */
    double sqrt(doublereal);

    /* Local variables */
    static integer i__, j, mn;
    static real aii;
    static integer pvt;
    static real temp, temp2;
    extern doublereal snrm2_(integer *, real *, integer *);
    static integer offpi;
    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, 
	    integer *, real *, real *, integer *, real *, ftnlen);
    static integer itemp;
    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 
	    integer *), slarfg_(integer *, real *, real *, integer *, real *);
    extern integer isamax_(integer *, real *, integer *);


/*  -- LAPACK auxiliary routine (version 3.0) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */
/*     Courant Institute, Argonne National Lab, and Rice University */
/*     June 30, 1999 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLAQP2 computes a QR factorization with column pivoting of */
/*  the block A(OFFSET+1:M,1:N). */
/*  The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. */

/*  Arguments */
/*  ========= */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix A. M >= 0. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix A. N >= 0. */

/*  OFFSET  (input) INTEGER */
/*          The number of rows of the matrix A that must be pivoted */
/*          but no factorized. OFFSET >= 0. */

/*  A       (input/output) REAL array, dimension (LDA,N) */
/*          On entry, the M-by-N matrix A. */
/*          On exit, the upper triangle of block A(OFFSET+1:M,1:N) is */
/*          the triangular factor obtained; the elements in block */
/*          A(OFFSET+1:M,1:N) below the diagonal, together with the */
/*          array TAU, represent the orthogonal matrix Q as a product of */
/*          elementary reflectors. Block A(1:OFFSET,1:N) has been */
/*          accordingly pivoted, but no factorized. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A. LDA >= max(1,M). */

/*  JPVT    (input/output) INTEGER array, dimension (N) */
/*          On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted */
/*          to the front of A*P (a leading column); if JPVT(i) = 0, */
/*          the i-th column of A is a free column. */
/*          On exit, if JPVT(i) = k, then the i-th column of A*P */
/*          was the k-th column of A. */

/*  TAU     (output) REAL array, dimension (min(M,N)) */
/*          The scalar factors of the elementary reflectors. */

/*  VN1     (input/output) REAL array, dimension (N) */
/*          The vector with the partial column norms. */

/*  VN2     (input/output) REAL array, dimension (N) */
/*          The vector with the exact column norms. */

/*  WORK    (workspace) REAL array, dimension (N) */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */
/*    X. Sun, Computer Science Dept., Duke University, USA */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --jpvt;
    --tau;
    --vn1;
    --vn2;
    --work;

    /* Function Body */
/* Computing MIN */
    i__1 = *m - *offset;
    mn = min(i__1,*n);

/*     Compute factorization. */

    i__1 = mn;
    for (i__ = 1; i__ <= i__1; ++i__) {

	offpi = *offset + i__;

/*        Determine ith pivot column and swap if necessary. */

	i__2 = *n - i__ + 1;
	pvt = i__ - 1 + isamax_(&i__2, &vn1[i__], &c__1);

	if (pvt != i__) {
	    sswap_(m, &a[pvt * a_dim1 + 1], &c__1, &a[i__ * a_dim1 + 1], &
		    c__1);
	    itemp = jpvt[pvt];
	    jpvt[pvt] = jpvt[i__];
	    jpvt[i__] = itemp;
	    vn1[pvt] = vn1[i__];
	    vn2[pvt] = vn2[i__];
	}

/*        Generate elementary reflector H(i). */

	if (offpi < *m) {
	    i__2 = *m - offpi + 1;
	    slarfg_(&i__2, &a[offpi + i__ * a_dim1], &a[offpi + 1 + i__ * 
		    a_dim1], &c__1, &tau[i__]);
	} else {
	    slarfg_(&c__1, &a[*m + i__ * a_dim1], &a[*m + i__ * a_dim1], &
		    c__1, &tau[i__]);
	}

	if (i__ < *n) {

/*           Apply H(i)' to A(offset+i:m,i+1:n) from the left. */

	    aii = a[offpi + i__ * a_dim1];
	    a[offpi + i__ * a_dim1] = 1.f;
	    i__2 = *m - offpi + 1;
	    i__3 = *n - i__;
	    slarf_("Left", &i__2, &i__3, &a[offpi + i__ * a_dim1], &c__1, &
		    tau[i__], &a[offpi + (i__ + 1) * a_dim1], lda, &work[1], (
		    ftnlen)4);
	    a[offpi + i__ * a_dim1] = aii;
	}

/*        Update partial column norms. */

	i__2 = *n;
	for (j = i__ + 1; j <= i__2; ++j) {
	    if (vn1[j] != 0.f) {
/* Computing 2nd power */
		r__2 = (r__1 = a[offpi + j * a_dim1], dabs(r__1)) / vn1[j];
		temp = 1.f - r__2 * r__2;
		temp = dmax(temp,0.f);
/* Computing 2nd power */
		r__1 = vn1[j] / vn2[j];
		temp2 = temp * .05f * (r__1 * r__1) + 1.f;
		if (temp2 == 1.f) {
		    if (offpi < *m) {
			i__3 = *m - offpi;
			vn1[j] = snrm2_(&i__3, &a[offpi + 1 + j * a_dim1], &
				c__1);
			vn2[j] = vn1[j];
		    } else {
			vn1[j] = 0.f;
			vn2[j] = 0.f;
		    }
		} else {
		    vn1[j] *= sqrt(temp);
		}
	    }
/* L10: */
	}

/* L20: */
    }

    return 0;

/*     End of SLAQP2 */

} /* slaqp2_ */
Esempio n. 23
0
/*<       SUBROUTINE SLARFG( N, ALPHA, X, INCX, TAU ) >*/
/* Subroutine */ int slarfg_(integer *n, real *alpha, real *x, integer *incx,
        real *tau)
{
    /* System generated locals */
    integer i__1;
    real r__1;

    /* Builtin functions */
    double r_sign(real *, real *);

    /* Local variables */
    integer j, knt;
    real beta;
    extern doublereal snrm2_(integer *, real *, integer *);
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    real xnorm;
    extern doublereal slapy2_(real *, real *), slamch_(char *, ftnlen);
    real safmin, rsafmn;


/*  -- LAPACK auxiliary routine (version 3.0) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */
/*     Courant Institute, Argonne National Lab, and Rice University */
/*     September 30, 1994 */

/*     .. Scalar Arguments .. */
/*<       INTEGER            INCX, N >*/
/*<       REAL               ALPHA, TAU >*/
/*     .. */
/*     .. Array Arguments .. */
/*<       REAL               X( * ) >*/
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLARFG generates a real elementary reflector H of order n, such */
/*  that */

/*        H * ( alpha ) = ( beta ),   H' * H = I. */
/*            (   x   )   (   0  ) */

/*  where alpha and beta are scalars, and x is an (n-1)-element real */
/*  vector. H is represented in the form */

/*        H = I - tau * ( 1 ) * ( 1 v' ) , */
/*                      ( v ) */

/*  where tau is a real scalar and v is a real (n-1)-element */
/*  vector. */

/*  If the elements of x are all zero, then tau = 0 and H is taken to be */
/*  the unit matrix. */

/*  Otherwise  1 <= tau <= 2. */

/*  Arguments */
/*  ========= */

/*  N       (input) INTEGER */
/*          The order of the elementary reflector. */

/*  ALPHA   (input/output) REAL */
/*          On entry, the value alpha. */
/*          On exit, it is overwritten with the value beta. */

/*  X       (input/output) REAL array, dimension */
/*                         (1+(N-2)*abs(INCX)) */
/*          On entry, the vector x. */
/*          On exit, it is overwritten with the vector v. */

/*  INCX    (input) INTEGER */
/*          The increment between elements of X. INCX > 0. */

/*  TAU     (output) REAL */
/*          The value tau. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*<       REAL               ONE, ZERO >*/
/*<       PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 ) >*/
/*     .. */
/*     .. Local Scalars .. */
/*<       INTEGER            J, KNT >*/
/*<       REAL               BETA, RSAFMN, SAFMIN, XNORM >*/
/*     .. */
/*     .. External Functions .. */
/*<       REAL               SLAMCH, SLAPY2, SNRM2 >*/
/*<       EXTERNAL           SLAMCH, SLAPY2, SNRM2 >*/
/*     .. */
/*     .. Intrinsic Functions .. */
/*<       INTRINSIC          ABS, SIGN >*/
/*     .. */
/*     .. External Subroutines .. */
/*<       EXTERNAL           SSCAL >*/
/*     .. */
/*     .. Executable Statements .. */

/*<       IF( N.LE.1 ) THEN >*/
    /* Parameter adjustments */
    --x;

    /* Function Body */
    if (*n <= 1) {
/*<          TAU = ZERO >*/
        *tau = (float)0.;
/*<          RETURN >*/
        return 0;
/*<       END IF >*/
    }

/*<       XNORM = SNRM2( N-1, X, INCX ) >*/
    i__1 = *n - 1;
    xnorm = snrm2_(&i__1, &x[1], incx);

/*<       IF( XNORM.EQ.ZERO ) THEN >*/
    if (xnorm == (float)0.) {

/*        H  =  I */

/*<          TAU = ZERO >*/
        *tau = (float)0.;
/*<       ELSE >*/
    } else {

/*        general case */

/*<          BETA = -SIGN( SLAPY2( ALPHA, XNORM ), ALPHA ) >*/
        r__1 = slapy2_(alpha, &xnorm);
        beta = -r_sign(&r__1, alpha);
/*<          SAFMIN = SLAMCH( 'S' ) / SLAMCH( 'E' ) >*/
        safmin = slamch_("S", (ftnlen)1) / slamch_("E", (ftnlen)1);
/*<          IF( ABS( BETA ).LT.SAFMIN ) THEN >*/
        if (dabs(beta) < safmin) {

/*           XNORM, BETA may be inaccurate; scale X and recompute them */

/*<             RSAFMN = ONE / SAFMIN >*/
            rsafmn = (float)1. / safmin;
/*<             KNT = 0 >*/
            knt = 0;
/*<    10       CONTINUE >*/
L10:
/*<             KNT = KNT + 1 >*/
            ++knt;
/*<             CALL SSCAL( N-1, RSAFMN, X, INCX ) >*/
            i__1 = *n - 1;
            sscal_(&i__1, &rsafmn, &x[1], incx);
/*<             BETA = BETA*RSAFMN >*/
            beta *= rsafmn;
/*<             ALPHA = ALPHA*RSAFMN >*/
            *alpha *= rsafmn;
/*<    >*/
            if (dabs(beta) < safmin) {
                goto L10;
            }

/*           New BETA is at most 1, at least SAFMIN */

/*<             XNORM = SNRM2( N-1, X, INCX ) >*/
            i__1 = *n - 1;
            xnorm = snrm2_(&i__1, &x[1], incx);
/*<             BETA = -SIGN( SLAPY2( ALPHA, XNORM ), ALPHA ) >*/
            r__1 = slapy2_(alpha, &xnorm);
            beta = -r_sign(&r__1, alpha);
/*<             TAU = ( BETA-ALPHA ) / BETA >*/
            *tau = (beta - *alpha) / beta;
/*<             CALL SSCAL( N-1, ONE / ( ALPHA-BETA ), X, INCX ) >*/
            i__1 = *n - 1;
            r__1 = (float)1. / (*alpha - beta);
            sscal_(&i__1, &r__1, &x[1], incx);

/*           If ALPHA is subnormal, it may lose relative accuracy */

/*<             ALPHA = BETA >*/
            *alpha = beta;
/*<             DO 20 J = 1, KNT >*/
            i__1 = knt;
            for (j = 1; j <= i__1; ++j) {
/*<                ALPHA = ALPHA*SAFMIN >*/
                *alpha *= safmin;
/*<    20       CONTINUE >*/
/* L20: */
            }
/*<          ELSE >*/
        } else {
/*<             TAU = ( BETA-ALPHA ) / BETA >*/
            *tau = (beta - *alpha) / beta;
/*<             CALL SSCAL( N-1, ONE / ( ALPHA-BETA ), X, INCX ) >*/
            i__1 = *n - 1;
            r__1 = (float)1. / (*alpha - beta);
            sscal_(&i__1, &r__1, &x[1], incx);
/*<             ALPHA = BETA >*/
            *alpha = beta;
/*<          END IF >*/
        }
/*<       END IF >*/
    }

/*<       RETURN >*/
    return 0;

/*     End of SLARFG */

/*<       END >*/
} /* slarfg_ */
Esempio n. 24
0
/* Subroutine */ int slaed3_(integer *k, integer *n, integer *n1, real *d__, 
	real *q, integer *ldq, real *rho, real *dlamda, real *q2, integer *
	indx, integer *ctot, real *w, real *s, integer *info)
{
    /* System generated locals */
    integer q_dim1, q_offset, i__1, i__2;
    real r__1;

    /* Builtin functions */
    double sqrt(doublereal), r_sign(real *, real *);

    /* Local variables */
    static real temp;
    extern doublereal snrm2_(integer *, real *, integer *);
    static integer i__, j;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *), scopy_(integer *, real *, 
	    integer *, real *, integer *);
    static integer n2;
    extern /* Subroutine */ int slaed4_(integer *, integer *, real *, real *, 
	    real *, real *, real *, integer *);
    extern doublereal slamc3_(real *, real *);
    static integer n12, ii, n23;
    extern /* Subroutine */ int xerbla_(char *, integer *), slacpy_(
	    char *, integer *, integer *, real *, integer *, real *, integer *
	    ), slaset_(char *, integer *, integer *, real *, real *, 
	    real *, integer *);
    static integer iq2;


#define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]


/*  -- LAPACK routine (instrumented to count operations, version 3.0) --   
       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,   
       Courant Institute, NAG Ltd., and Rice University   
       June 30, 1999   

       Common block to return operation count and iteration count   
       ITCNT is unchanged, OPS is only incremented   

    Purpose   
    =======   

    SLAED3 finds the roots of the secular equation, as defined by the   
    values in D, W, and RHO, between 1 and K.  It makes the   
    appropriate calls to SLAED4 and then updates the eigenvectors by   
    multiplying the matrix of eigenvectors of the pair of eigensystems   
    being combined by the matrix of eigenvectors of the K-by-K system   
    which is solved here.   

    This code makes very mild assumptions about floating point   
    arithmetic. It will work on machines with a guard digit in   
    add/subtract, or on those binary machines without guard digits   
    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.   
    It could conceivably fail on hexadecimal or decimal machines   
    without guard digits, but we know of none.   

    Arguments   
    =========   

    K       (input) INTEGER   
            The number of terms in the rational function to be solved by   
            SLAED4.  K >= 0.   

    N       (input) INTEGER   
            The number of rows and columns in the Q matrix.   
            N >= K (deflation may result in N>K).   

    N1      (input) INTEGER   
            The location of the last eigenvalue in the leading submatrix.   
            min(1,N) <= N1 <= N/2.   

    D       (output) REAL array, dimension (N)   
            D(I) contains the updated eigenvalues for   
            1 <= I <= K.   

    Q       (output) REAL array, dimension (LDQ,N)   
            Initially the first K columns are used as workspace.   
            On output the columns 1 to K contain   
            the updated eigenvectors.   

    LDQ     (input) INTEGER   
            The leading dimension of the array Q.  LDQ >= max(1,N).   

    RHO     (input) REAL   
            The value of the parameter in the rank one update equation.   
            RHO >= 0 required.   

    DLAMDA  (input/output) REAL array, dimension (K)   
            The first K elements of this array contain the old roots   
            of the deflated updating problem.  These are the poles   
            of the secular equation. May be changed on output by   
            having lowest order bit set to zero on Cray X-MP, Cray Y-MP,   
            Cray-2, or Cray C-90, as described above.   

    Q2      (input) REAL array, dimension (LDQ2, N)   
            The first K columns of this matrix contain the non-deflated   
            eigenvectors for the split problem.   

    INDX    (input) INTEGER array, dimension (N)   
            The permutation used to arrange the columns of the deflated   
            Q matrix into three groups (see SLAED2).   
            The rows of the eigenvectors found by SLAED4 must be likewise   
            permuted before the matrix multiply can take place.   

    CTOT    (input) INTEGER array, dimension (4)   
            A count of the total number of the various types of columns   
            in Q, as described in INDX.  The fourth column type is any   
            column which has been deflated.   

    W       (input/output) REAL array, dimension (K)   
            The first K elements of this array contain the components   
            of the deflation-adjusted updating vector. Destroyed on   
            output.   

    S       (workspace) REAL array, dimension (N1 + 1)*K   
            Will contain the eigenvectors of the repaired matrix which   
            will be multiplied by the previously accumulated eigenvectors   
            to update the system.   

    LDS     (input) INTEGER   
            The leading dimension of S.  LDS >= max(1,K).   

    INFO    (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   
            > 0:  if INFO = 1, an eigenvalue did not converge   

    Further Details   
    ===============   

    Based on contributions by   
       Jeff Rutter, Computer Science Division, University of California   
       at Berkeley, USA   
    Modified by Francoise Tisseur, University of Tennessee.   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    --d__;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    --dlamda;
    --q2;
    --indx;
    --ctot;
    --w;
    --s;

    /* Function Body */
    *info = 0;

    if (*k < 0) {
	*info = -1;
    } else if (*n < *k) {
	*info = -2;
    } else if (*ldq < max(1,*n)) {
	*info = -6;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLAED3", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*k == 0) {
	return 0;
    }

/*     Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can   
       be computed with high relative accuracy (barring over/underflow).   
       This is a problem on machines without a guard digit in   
       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).   
       The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),   
       which on any of these machines zeros out the bottommost   
       bit of DLAMDA(I) if it is 1; this makes the subsequent   
       subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation   
       occurs. On binary machines with a guard digit (almost all   
       machines) it does not change DLAMDA(I) at all. On hexadecimal   
       and decimal machines with a guard digit, it slightly   
       changes the bottommost bits of DLAMDA(I). It does not account   
       for hexadecimal or decimal machines without guard digits   
       (we know of none). We use a subroutine call to compute   
       2*DLAMBDA(I) to prevent optimizing compilers from eliminating   
       this code. */

    latime_1.ops += *n << 1;
    i__1 = *k;
    for (i__ = 1; i__ <= i__1; ++i__) {
	dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
/* L10: */
    }

    i__1 = *k;
    for (j = 1; j <= i__1; ++j) {
	slaed4_(k, &j, &dlamda[1], &w[1], &q_ref(1, j), rho, &d__[j], info);

/*        If the zero finder fails, the computation is terminated. */

	if (*info != 0) {
	    goto L120;
	}
/* L20: */
    }

    if (*k == 1) {
	goto L110;
    }
    if (*k == 2) {
	i__1 = *k;
	for (j = 1; j <= i__1; ++j) {
	    w[1] = q_ref(1, j);
	    w[2] = q_ref(2, j);
	    ii = indx[1];
	    q_ref(1, j) = w[ii];
	    ii = indx[2];
	    q_ref(2, j) = w[ii];
/* L30: */
	}
	goto L110;
    }

/*     Compute updated W. */

    scopy_(k, &w[1], &c__1, &s[1], &c__1);

/*     Initialize W(I) = Q(I,I) */

    i__1 = *ldq + 1;
    scopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
    latime_1.ops += *k * 3 * (*k - 1);
    i__1 = *k;
    for (j = 1; j <= i__1; ++j) {
	i__2 = j - 1;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    w[i__] *= q_ref(i__, j) / (dlamda[i__] - dlamda[j]);
/* L40: */
	}
	i__2 = *k;
	for (i__ = j + 1; i__ <= i__2; ++i__) {
	    w[i__] *= q_ref(i__, j) / (dlamda[i__] - dlamda[j]);
/* L50: */
	}
/* L60: */
    }
    latime_1.ops += *k;
    i__1 = *k;
    for (i__ = 1; i__ <= i__1; ++i__) {
	r__1 = sqrt(-w[i__]);
	w[i__] = r_sign(&r__1, &s[i__]);
/* L70: */
    }

/*     Compute eigenvectors of the modified rank-1 modification. */

    latime_1.ops += (*k << 2) * *k;
    i__1 = *k;
    for (j = 1; j <= i__1; ++j) {
	i__2 = *k;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    s[i__] = w[i__] / q_ref(i__, j);
/* L80: */
	}
	temp = snrm2_(k, &s[1], &c__1);
	i__2 = *k;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    ii = indx[i__];
	    q_ref(i__, j) = s[ii] / temp;
/* L90: */
	}
/* L100: */
    }

/*     Compute the updated eigenvectors. */

L110:

    n2 = *n - *n1;
    n12 = ctot[1] + ctot[2];
    n23 = ctot[2] + ctot[3];

    slacpy_("A", &n23, k, &q_ref(ctot[1] + 1, 1), ldq, &s[1], &n23)
	    ;
    iq2 = *n1 * n12 + 1;
    if (n23 != 0) {
	latime_1.ops += (real) n2 * 2 * *k * n23;
	sgemm_("N", "N", &n2, k, &n23, &c_b22, &q2[iq2], &n2, &s[1], &n23, &
		c_b23, &q_ref(*n1 + 1, 1), ldq);
    } else {
	slaset_("A", &n2, k, &c_b23, &c_b23, &q_ref(*n1 + 1, 1), ldq);
    }

    slacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12);
    if (n12 != 0) {
	latime_1.ops += (real) (*n1) * 2 * *k * n12;
	sgemm_("N", "N", n1, k, &n12, &c_b22, &q2[1], n1, &s[1], &n12, &c_b23,
		 &q[q_offset], ldq);
    } else {
	slaset_("A", n1, k, &c_b23, &c_b23, &q_ref(1, 1), ldq);
    }


L120:
    return 0;

/*     End of SLAED3 */

} /* slaed3_ */
Esempio n. 25
0
/* Subroutine */ int slals0_(integer *icompq, integer *nl, integer *nr, 
	integer *sqre, integer *nrhs, real *b, integer *ldb, real *bx, 
	integer *ldbx, integer *perm, integer *givptr, integer *givcol, 
	integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real *
	difl, real *difr, real *z__, integer *k, real *c__, real *s, real *
	work, integer *info)
{
    /* System generated locals */
    integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset, 
	    difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, 
	    poles_offset, i__1, i__2;
    real r__1;

    /* Local variables */
    integer i__, j, m, n;
    real dj;
    integer nlp1;
    real temp;
    real diflj, difrj, dsigj;
    real dsigjp;

/*  -- LAPACK routine (version 3.2) -- */
/*     November 2006 */

/*  Purpose */
/*  ======= */

/*  SLALS0 applies back the multiplying factors of either the left or the */
/*  right singular vector matrix of a diagonal matrix appended by a row */
/*  to the right hand side matrix B in solving the least squares problem */
/*  using the divide-and-conquer SVD approach. */

/*  For the left singular vector matrix, three types of orthogonal */
/*  matrices are involved: */

/*  (1L) Givens rotations: the number of such rotations is GIVPTR; the */
/*       pairs of columns/rows they were applied to are stored in GIVCOL; */
/*       and the C- and S-values of these rotations are stored in GIVNUM. */

/*  (2L) Permutation. The (NL+1)-st row of B is to be moved to the first */
/*       row, and for J=2:N, PERM(J)-th row of B is to be moved to the */
/*       J-th row. */

/*  (3L) The left singular vector matrix of the remaining matrix. */

/*  For the right singular vector matrix, four types of orthogonal */
/*  matrices are involved: */

/*  (1R) The right singular vector matrix of the remaining matrix. */

/*  (2R) If SQRE = 1, one extra Givens rotation to generate the right */
/*       null space. */

/*  (3R) The inverse transformation of (2L). */

/*  (4R) The inverse transformation of (1L). */

/*  Arguments */
/*  ========= */

/*  ICOMPQ (input) INTEGER */
/*         Specifies whether singular vectors are to be computed in */
/*         factored form: */
/*         = 0: Left singular vector matrix. */
/*         = 1: Right singular vector matrix. */

/*  NL     (input) INTEGER */
/*         The row dimension of the upper block. NL >= 1. */

/*  NR     (input) INTEGER */
/*         The row dimension of the lower block. NR >= 1. */

/*  SQRE   (input) INTEGER */
/*         = 0: the lower block is an NR-by-NR square matrix. */
/*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */

/*         The bidiagonal matrix has row dimension N = NL + NR + 1, */
/*         and column dimension M = N + SQRE. */

/*  NRHS   (input) INTEGER */
/*         The number of columns of B and BX. NRHS must be at least 1. */

/*  B      (input/output) REAL array, dimension ( LDB, NRHS ) */
/*         On input, B contains the right hand sides of the least */
/*         squares problem in rows 1 through M. On output, B contains */
/*         the solution X in rows 1 through N. */

/*  LDB    (input) INTEGER */
/*         The leading dimension of B. LDB must be at least */
/*         max(1,MAX( M, N ) ). */

/*  BX     (workspace) REAL array, dimension ( LDBX, NRHS ) */

/*  LDBX   (input) INTEGER */
/*         The leading dimension of BX. */

/*  PERM   (input) INTEGER array, dimension ( N ) */
/*         The permutations (from deflation and sorting) applied */
/*         to the two blocks. */

/*  GIVPTR (input) INTEGER */
/*         The number of Givens rotations which took place in this */
/*         subproblem. */

/*  GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) */
/*         Each pair of numbers indicates a pair of rows/columns */
/*         involved in a Givens rotation. */

/*  LDGCOL (input) INTEGER */
/*         The leading dimension of GIVCOL, must be at least N. */

/*  GIVNUM (input) REAL array, dimension ( LDGNUM, 2 ) */
/*         Each number indicates the C or S value used in the */
/*         corresponding Givens rotation. */

/*  LDGNUM (input) INTEGER */
/*         The leading dimension of arrays DIFR, POLES and */
/*         GIVNUM, must be at least K. */

/*  POLES  (input) REAL array, dimension ( LDGNUM, 2 ) */
/*         On entry, POLES(1:K, 1) contains the new singular */
/*         values obtained from solving the secular equation, and */
/*         POLES(1:K, 2) is an array containing the poles in the secular */
/*         equation. */

/*  DIFL   (input) REAL array, dimension ( K ). */
/*         On entry, DIFL(I) is the distance between I-th updated */
/*         (undeflated) singular value and the I-th (undeflated) old */
/*         singular value. */

/*  DIFR   (input) REAL array, dimension ( LDGNUM, 2 ). */
/*         On entry, DIFR(I, 1) contains the distances between I-th */
/*         updated (undeflated) singular value and the I+1-th */
/*         (undeflated) old singular value. And DIFR(I, 2) is the */
/*         normalizing factor for the I-th right singular vector. */

/*  Z      (input) REAL array, dimension ( K ) */
/*         Contain the components of the deflation-adjusted updating row */
/*         vector. */

/*  K      (input) INTEGER */
/*         Contains the dimension of the non-deflated matrix, */
/*         This is the order of the related secular equation. 1 <= K <=N. */

/*  C      (input) REAL */
/*         C contains garbage if SQRE =0 and the C-value of a Givens */
/*         rotation related to the right null space if SQRE = 1. */

/*  S      (input) REAL */
/*         S contains garbage if SQRE =0 and the S-value of a Givens */
/*         rotation related to the right null space if SQRE = 1. */

/*  WORK   (workspace) REAL array, dimension ( K ) */

/*  INFO   (output) INTEGER */
/*          = 0:  successful exit. */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*     Ming Gu and Ren-Cang Li, Computer Science Division, University of */
/*       California at Berkeley, USA */
/*     Osni Marques, LBNL/NERSC, USA */

/*  ===================================================================== */

/*     Test the input parameters. */

    /* Parameter adjustments */
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    bx_dim1 = *ldbx;
    bx_offset = 1 + bx_dim1;
    bx -= bx_offset;
    --perm;
    givcol_dim1 = *ldgcol;
    givcol_offset = 1 + givcol_dim1;
    givcol -= givcol_offset;
    difr_dim1 = *ldgnum;
    difr_offset = 1 + difr_dim1;
    difr -= difr_offset;
    poles_dim1 = *ldgnum;
    poles_offset = 1 + poles_dim1;
    poles -= poles_offset;
    givnum_dim1 = *ldgnum;
    givnum_offset = 1 + givnum_dim1;
    givnum -= givnum_offset;
    --difl;
    --z__;
    --work;

    /* Function Body */
    *info = 0;

    if (*icompq < 0 || *icompq > 1) {
	*info = -1;
    } else if (*nl < 1) {
	*info = -2;
    } else if (*nr < 1) {
	*info = -3;
    } else if (*sqre < 0 || *sqre > 1) {
	*info = -4;
    }

    n = *nl + *nr + 1;

    if (*nrhs < 1) {
	*info = -5;
    } else if (*ldb < n) {
	*info = -7;
    } else if (*ldbx < n) {
	*info = -9;
    } else if (*givptr < 0) {
	*info = -11;
    } else if (*ldgcol < n) {
	*info = -13;
    } else if (*ldgnum < n) {
	*info = -15;
    } else if (*k < 1) {
	*info = -20;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLALS0", &i__1);
	return 0;
    }

    m = n + *sqre;
    nlp1 = *nl + 1;

    if (*icompq == 0) {

/*        Apply back orthogonal transformations from the left. */

/*        Step (1L): apply back the Givens rotations performed. */

	i__1 = *givptr;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    srot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, &
		    b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + 
		    (givnum_dim1 << 1)], &givnum[i__ + givnum_dim1]);
	}

/*        Step (2L): permute rows of B. */

	scopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx);
	i__1 = n;
	for (i__ = 2; i__ <= i__1; ++i__) {
	    scopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1], 
		    ldbx);
	}

/*        Step (3L): apply the inverse of the left singular vector */
/*        matrix to BX. */

	if (*k == 1) {
	    scopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb);
	    if (z__[1] < 0.f) {
		sscal_(nrhs, &c_b5, &b[b_offset], ldb);
	    }
	} else {
	    i__1 = *k;
	    for (j = 1; j <= i__1; ++j) {
		diflj = difl[j];
		dj = poles[j + poles_dim1];
		dsigj = -poles[j + (poles_dim1 << 1)];
		if (j < *k) {
		    difrj = -difr[j + difr_dim1];
		    dsigjp = -poles[j + 1 + (poles_dim1 << 1)];
		}
		if (z__[j] == 0.f || poles[j + (poles_dim1 << 1)] == 0.f) {
		    work[j] = 0.f;
		} else {
		    work[j] = -poles[j + (poles_dim1 << 1)] * z__[j] / diflj /
			     (poles[j + (poles_dim1 << 1)] + dj);
		}
		i__2 = j - 1;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    if (z__[i__] == 0.f || poles[i__ + (poles_dim1 << 1)] == 
			    0.f) {
			work[i__] = 0.f;
		    } else {
			work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] 
				/ (slamc3_(&poles[i__ + (poles_dim1 << 1)], &
				dsigj) - diflj) / (poles[i__ + (poles_dim1 << 
				1)] + dj);
		    }
		}
		i__2 = *k;
		for (i__ = j + 1; i__ <= i__2; ++i__) {
		    if (z__[i__] == 0.f || poles[i__ + (poles_dim1 << 1)] == 
			    0.f) {
			work[i__] = 0.f;
		    } else {
			work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] 
				/ (slamc3_(&poles[i__ + (poles_dim1 << 1)], &
				dsigjp) + difrj) / (poles[i__ + (poles_dim1 <<
				 1)] + dj);
		    }
		}
		work[1] = -1.f;
		temp = snrm2_(k, &work[1], &c__1);
		sgemv_("T", k, nrhs, &c_b11, &bx[bx_offset], ldbx, &work[1], &
			c__1, &c_b13, &b[j + b_dim1], ldb);
		slascl_("G", &c__0, &c__0, &temp, &c_b11, &c__1, nrhs, &b[j + 
			b_dim1], ldb, info);
	    }
	}

/*        Move the deflated rows of BX to B also. */

	if (*k < max(m,n)) {
	    i__1 = n - *k;
	    slacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1 
		    + b_dim1], ldb);
	}
    } else {

/*        Apply back the right orthogonal transformations. */

/*        Step (1R): apply back the new right singular vector matrix */
/*        to B. */

	if (*k == 1) {
	    scopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx);
	} else {
	    i__1 = *k;
	    for (j = 1; j <= i__1; ++j) {
		dsigj = poles[j + (poles_dim1 << 1)];
		if (z__[j] == 0.f) {
		    work[j] = 0.f;
		} else {
		    work[j] = -z__[j] / difl[j] / (dsigj + poles[j + 
			    poles_dim1]) / difr[j + (difr_dim1 << 1)];
		}
		i__2 = j - 1;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    if (z__[j] == 0.f) {
			work[i__] = 0.f;
		    } else {
			r__1 = -poles[i__ + 1 + (poles_dim1 << 1)];
			work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difr[
				i__ + difr_dim1]) / (dsigj + poles[i__ + 
				poles_dim1]) / difr[i__ + (difr_dim1 << 1)];
		    }
		}
		i__2 = *k;
		for (i__ = j + 1; i__ <= i__2; ++i__) {
		    if (z__[j] == 0.f) {
			work[i__] = 0.f;
		    } else {
			r__1 = -poles[i__ + (poles_dim1 << 1)];
			work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difl[
				i__]) / (dsigj + poles[i__ + poles_dim1]) / 
				difr[i__ + (difr_dim1 << 1)];
		    }
		}
		sgemv_("T", k, nrhs, &c_b11, &b[b_offset], ldb, &work[1], &
			c__1, &c_b13, &bx[j + bx_dim1], ldbx);
	    }
	}

/*        Step (2R): if SQRE = 1, apply back the rotation that is */
/*        related to the right null space of the subproblem. */

	if (*sqre == 1) {
	    scopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx);
	    srot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__, 
		    s);
	}
	if (*k < max(m,n)) {
	    i__1 = n - *k;
	    slacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 + 
		    bx_dim1], ldbx);
	}

/*        Step (3R): permute rows of B. */

	scopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb);
	if (*sqre == 1) {
	    scopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb);
	}
	i__1 = n;
	for (i__ = 2; i__ <= i__1; ++i__) {
	    scopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1], 
		    ldb);
	}

/*        Step (4R): apply back the Givens rotations performed. */

	for (i__ = *givptr; i__ >= 1; --i__) {
	    r__1 = -givnum[i__ + givnum_dim1];
	    srot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, &
		    b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + 
		    (givnum_dim1 << 1)], &r__1);
	}
    }

    return 0;

/*     End of SLALS0 */

} /* slals0_ */
Esempio n. 26
0
GURLS_EXPORT float nrm2(const int N, const float* X, const int incX)
{
    return snrm2_(const_cast<int*>(&N), const_cast<float*>(X), const_cast<int*>(&incX));
}
Esempio n. 27
0
/* Subroutine */ int slarfg_(integer *n, real *alpha, real *x, integer *incx, 
	real *tau)
{
    /* System generated locals */
    integer i__1;
    real r__1;

    /* Builtin functions */
    double r_sign(real *, real *);

    /* Local variables */
    static integer j, knt;
    static real beta;
    extern doublereal snrm2_(integer *, real *, integer *);
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static real xnorm;
    extern doublereal slapy2_(real *, real *), slamch_(char *, ftnlen);
    static real safmin, rsafmn;


/*  -- LAPACK auxiliary routine (version 3.0) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */
/*     Courant Institute, Argonne National Lab, and Rice University */
/*     September 30, 1994 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLARFG generates a real elementary reflector H of order n, such */
/*  that */

/*        H * ( alpha ) = ( beta ),   H' * H = I. */
/*            (   x   )   (   0  ) */

/*  where alpha and beta are scalars, and x is an (n-1)-element real */
/*  vector. H is represented in the form */

/*        H = I - tau * ( 1 ) * ( 1 v' ) , */
/*                      ( v ) */

/*  where tau is a real scalar and v is a real (n-1)-element */
/*  vector. */

/*  If the elements of x are all zero, then tau = 0 and H is taken to be */
/*  the unit matrix. */

/*  Otherwise  1 <= tau <= 2. */

/*  Arguments */
/*  ========= */

/*  N       (input) INTEGER */
/*          The order of the elementary reflector. */

/*  ALPHA   (input/output) REAL */
/*          On entry, the value alpha. */
/*          On exit, it is overwritten with the value beta. */

/*  X       (input/output) REAL array, dimension */
/*                         (1+(N-2)*abs(INCX)) */
/*          On entry, the vector x. */
/*          On exit, it is overwritten with the vector v. */

/*  INCX    (input) INTEGER */
/*          The increment between elements of X. INCX > 0. */

/*  TAU     (output) REAL */
/*          The value tau. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    --x;

    /* Function Body */
    if (*n <= 1) {
	*tau = 0.f;
	return 0;
    }

    i__1 = *n - 1;
    xnorm = snrm2_(&i__1, &x[1], incx);

    if (xnorm == 0.f) {

/*        H  =  I */

	*tau = 0.f;
    } else {

/*        general case */

	r__1 = slapy2_(alpha, &xnorm);
	beta = -r_sign(&r__1, alpha);
	safmin = slamch_("S", (ftnlen)1) / slamch_("E", (ftnlen)1);
	if (dabs(beta) < safmin) {

/*           XNORM, BETA may be inaccurate; scale X and recompute them */

	    rsafmn = 1.f / safmin;
	    knt = 0;
L10:
	    ++knt;
	    i__1 = *n - 1;
	    sscal_(&i__1, &rsafmn, &x[1], incx);
	    beta *= rsafmn;
	    *alpha *= rsafmn;
	    if (dabs(beta) < safmin) {
		goto L10;
	    }

/*           New BETA is at most 1, at least SAFMIN */

	    i__1 = *n - 1;
	    xnorm = snrm2_(&i__1, &x[1], incx);
	    r__1 = slapy2_(alpha, &xnorm);
	    beta = -r_sign(&r__1, alpha);
	    *tau = (beta - *alpha) / beta;
	    i__1 = *n - 1;
	    r__1 = 1.f / (*alpha - beta);
	    sscal_(&i__1, &r__1, &x[1], incx);

/*           If ALPHA is subnormal, it may lose relative accuracy */

	    *alpha = beta;
	    i__1 = knt;
	    for (j = 1; j <= i__1; ++j) {
		*alpha *= safmin;
/* L20: */
	    }
	} else {
	    *tau = (beta - *alpha) / beta;
	    i__1 = *n - 1;
	    r__1 = 1.f / (*alpha - beta);
	    sscal_(&i__1, &r__1, &x[1], incx);
	    *alpha = beta;
	}
    }

    return 0;

/*     End of SLARFG */

} /* slarfg_ */
Esempio n. 28
0
/* Subroutine */ int cstein_(integer *n, real *d__, real *e, integer *m, real 
	*w, integer *iblock, integer *isplit, complex *z__, integer *ldz, 
	real *work, integer *iwork, integer *ifail, integer *info)
{
    /* System generated locals */
    integer z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
    real r__1, r__2, r__3, r__4, r__5;
    complex q__1;

    /* Builtin functions */
    double sqrt(doublereal);

    /* Local variables */
    integer i__, j, b1, j1, bn, jr;
    real xj, scl, eps, ctr, sep, nrm, tol;
    integer its;
    real xjm, eps1;
    integer jblk, nblk, jmax;
    extern doublereal snrm2_(integer *, real *, integer *);
    integer iseed[4], gpind, iinfo;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    extern doublereal sasum_(integer *, real *, integer *);
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *);
    real ortol;
    integer indrv1, indrv2, indrv3, indrv4, indrv5;
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *), slagtf_(
	    integer *, real *, real *, real *, real *, real *, real *, 
	    integer *, integer *);
    integer nrmchk;
    extern integer isamax_(integer *, real *, integer *);
    extern /* Subroutine */ int slagts_(integer *, integer *, real *, real *, 
	    real *, real *, integer *, real *, real *, integer *);
    integer blksiz;
    real onenrm, pertol;
    extern /* Subroutine */ int slarnv_(integer *, integer *, integer *, real 
	    *);
    real stpcrt;


/*  -- LAPACK routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CSTEIN computes the eigenvectors of a real symmetric tridiagonal */
/*  matrix T corresponding to specified eigenvalues, using inverse */
/*  iteration. */

/*  The maximum number of iterations allowed for each eigenvector is */
/*  specified by an internal parameter MAXITS (currently set to 5). */

/*  Although the eigenvectors are real, they are stored in a complex */
/*  array, which may be passed to CUNMTR or CUPMTR for back */
/*  transformation to the eigenvectors of a complex Hermitian matrix */
/*  which was reduced to tridiagonal form. */


/*  Arguments */
/*  ========= */

/*  N       (input) INTEGER */
/*          The order of the matrix.  N >= 0. */

/*  D       (input) REAL array, dimension (N) */
/*          The n diagonal elements of the tridiagonal matrix T. */

/*  E       (input) REAL array, dimension (N-1) */
/*          The (n-1) subdiagonal elements of the tridiagonal matrix */
/*          T, stored in elements 1 to N-1. */

/*  M       (input) INTEGER */
/*          The number of eigenvectors to be found.  0 <= M <= N. */

/*  W       (input) REAL array, dimension (N) */
/*          The first M elements of W contain the eigenvalues for */
/*          which eigenvectors are to be computed.  The eigenvalues */
/*          should be grouped by split-off block and ordered from */
/*          smallest to largest within the block.  ( The output array */
/*          W from SSTEBZ with ORDER = 'B' is expected here. ) */

/*  IBLOCK  (input) INTEGER array, dimension (N) */
/*          The submatrix indices associated with the corresponding */
/*          eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to */
/*          the first submatrix from the top, =2 if W(i) belongs to */
/*          the second submatrix, etc.  ( The output array IBLOCK */
/*          from SSTEBZ is expected here. ) */

/*  ISPLIT  (input) INTEGER array, dimension (N) */
/*          The splitting points, at which T breaks up into submatrices. */
/*          The first submatrix consists of rows/columns 1 to */
/*          ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 */
/*          through ISPLIT( 2 ), etc. */
/*          ( The output array ISPLIT from SSTEBZ is expected here. ) */

/*  Z       (output) COMPLEX array, dimension (LDZ, M) */
/*          The computed eigenvectors.  The eigenvector associated */
/*          with the eigenvalue W(i) is stored in the i-th column of */
/*          Z.  Any vector which fails to converge is set to its current */
/*          iterate after MAXITS iterations. */
/*          The imaginary parts of the eigenvectors are set to zero. */

/*  LDZ     (input) INTEGER */
/*          The leading dimension of the array Z.  LDZ >= max(1,N). */

/*  WORK    (workspace) REAL array, dimension (5*N) */

/*  IWORK   (workspace) INTEGER array, dimension (N) */

/*  IFAIL   (output) INTEGER array, dimension (M) */
/*          On normal exit, all elements of IFAIL are zero. */
/*          If one or more eigenvectors fail to converge after */
/*          MAXITS iterations, then their indices are stored in */
/*          array IFAIL. */

/*  INFO    (output) INTEGER */
/*          = 0: successful exit */
/*          < 0: if INFO = -i, the i-th argument had an illegal value */
/*          > 0: if INFO = i, then i eigenvectors failed to converge */
/*               in MAXITS iterations.  Their indices are stored in */
/*               array IFAIL. */

/*  Internal Parameters */
/*  =================== */

/*  MAXITS  INTEGER, default = 5 */
/*          The maximum number of iterations performed. */

/*  EXTRA   INTEGER, default = 2 */
/*          The number of iterations performed after norm growth */
/*          criterion is satisfied, should be at least 1. */

/* ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    --d__;
    --e;
    --w;
    --iblock;
    --isplit;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1;
    z__ -= z_offset;
    --work;
    --iwork;
    --ifail;

    /* Function Body */
    *info = 0;
    i__1 = *m;
    for (i__ = 1; i__ <= i__1; ++i__) {
	ifail[i__] = 0;
/* L10: */
    }

    if (*n < 0) {
	*info = -1;
    } else if (*m < 0 || *m > *n) {
	*info = -4;
    } else if (*ldz < max(1,*n)) {
	*info = -9;
    } else {
	i__1 = *m;
	for (j = 2; j <= i__1; ++j) {
	    if (iblock[j] < iblock[j - 1]) {
		*info = -6;
		goto L30;
	    }
	    if (iblock[j] == iblock[j - 1] && w[j] < w[j - 1]) {
		*info = -5;
		goto L30;
	    }
/* L20: */
	}
L30:
	;
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CSTEIN", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0 || *m == 0) {
	return 0;
    } else if (*n == 1) {
	i__1 = z_dim1 + 1;
	z__[i__1].r = 1.f, z__[i__1].i = 0.f;
	return 0;
    }

/*     Get machine constants. */

    eps = slamch_("Precision");

/*     Initialize seed for random number generator SLARNV. */

    for (i__ = 1; i__ <= 4; ++i__) {
	iseed[i__ - 1] = 1;
/* L40: */
    }

/*     Initialize pointers. */

    indrv1 = 0;
    indrv2 = indrv1 + *n;
    indrv3 = indrv2 + *n;
    indrv4 = indrv3 + *n;
    indrv5 = indrv4 + *n;

/*     Compute eigenvectors of matrix blocks. */

    j1 = 1;
    i__1 = iblock[*m];
    for (nblk = 1; nblk <= i__1; ++nblk) {

/*        Find starting and ending indices of block nblk. */

	if (nblk == 1) {
	    b1 = 1;
	} else {
	    b1 = isplit[nblk - 1] + 1;
	}
	bn = isplit[nblk];
	blksiz = bn - b1 + 1;
	if (blksiz == 1) {
	    goto L60;
	}
	gpind = b1;

/*        Compute reorthogonalization criterion and stopping criterion. */

	onenrm = (r__1 = d__[b1], dabs(r__1)) + (r__2 = e[b1], dabs(r__2));
/* Computing MAX */
	r__3 = onenrm, r__4 = (r__1 = d__[bn], dabs(r__1)) + (r__2 = e[bn - 1]
		, dabs(r__2));
	onenrm = dmax(r__3,r__4);
	i__2 = bn - 1;
	for (i__ = b1 + 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    r__4 = onenrm, r__5 = (r__1 = d__[i__], dabs(r__1)) + (r__2 = e[
		    i__ - 1], dabs(r__2)) + (r__3 = e[i__], dabs(r__3));
	    onenrm = dmax(r__4,r__5);
/* L50: */
	}
	ortol = onenrm * .001f;

	stpcrt = sqrt(.1f / blksiz);

/*        Loop through eigenvalues of block nblk. */

L60:
	jblk = 0;
	i__2 = *m;
	for (j = j1; j <= i__2; ++j) {
	    if (iblock[j] != nblk) {
		j1 = j;
		goto L180;
	    }
	    ++jblk;
	    xj = w[j];

/*           Skip all the work if the block size is one. */

	    if (blksiz == 1) {
		work[indrv1 + 1] = 1.f;
		goto L140;
	    }

/*           If eigenvalues j and j-1 are too close, add a relatively */
/*           small perturbation. */

	    if (jblk > 1) {
		eps1 = (r__1 = eps * xj, dabs(r__1));
		pertol = eps1 * 10.f;
		sep = xj - xjm;
		if (sep < pertol) {
		    xj = xjm + pertol;
		}
	    }

	    its = 0;
	    nrmchk = 0;

/*           Get random starting vector. */

	    slarnv_(&c__2, iseed, &blksiz, &work[indrv1 + 1]);

/*           Copy the matrix T so it won't be destroyed in factorization. */

	    scopy_(&blksiz, &d__[b1], &c__1, &work[indrv4 + 1], &c__1);
	    i__3 = blksiz - 1;
	    scopy_(&i__3, &e[b1], &c__1, &work[indrv2 + 2], &c__1);
	    i__3 = blksiz - 1;
	    scopy_(&i__3, &e[b1], &c__1, &work[indrv3 + 1], &c__1);

/*           Compute LU factors with partial pivoting  ( PT = LU ) */

	    tol = 0.f;
	    slagtf_(&blksiz, &work[indrv4 + 1], &xj, &work[indrv2 + 2], &work[
		    indrv3 + 1], &tol, &work[indrv5 + 1], &iwork[1], &iinfo);

/*           Update iteration count. */

L70:
	    ++its;
	    if (its > 5) {
		goto L120;
	    }

/*           Normalize and scale the righthand side vector Pb. */

/* Computing MAX */
	    r__2 = eps, r__3 = (r__1 = work[indrv4 + blksiz], dabs(r__1));
	    scl = blksiz * onenrm * dmax(r__2,r__3) / sasum_(&blksiz, &work[
		    indrv1 + 1], &c__1);
	    sscal_(&blksiz, &scl, &work[indrv1 + 1], &c__1);

/*           Solve the system LU = Pb. */

	    slagts_(&c_n1, &blksiz, &work[indrv4 + 1], &work[indrv2 + 2], &
		    work[indrv3 + 1], &work[indrv5 + 1], &iwork[1], &work[
		    indrv1 + 1], &tol, &iinfo);

/*           Reorthogonalize by modified Gram-Schmidt if eigenvalues are */
/*           close enough. */

	    if (jblk == 1) {
		goto L110;
	    }
	    if ((r__1 = xj - xjm, dabs(r__1)) > ortol) {
		gpind = j;
	    }
	    if (gpind != j) {
		i__3 = j - 1;
		for (i__ = gpind; i__ <= i__3; ++i__) {
		    ctr = 0.f;
		    i__4 = blksiz;
		    for (jr = 1; jr <= i__4; ++jr) {
			i__5 = b1 - 1 + jr + i__ * z_dim1;
			ctr += work[indrv1 + jr] * z__[i__5].r;
/* L80: */
		    }
		    i__4 = blksiz;
		    for (jr = 1; jr <= i__4; ++jr) {
			i__5 = b1 - 1 + jr + i__ * z_dim1;
			work[indrv1 + jr] -= ctr * z__[i__5].r;
/* L90: */
		    }
/* L100: */
		}
	    }

/*           Check the infinity norm of the iterate. */

L110:
	    jmax = isamax_(&blksiz, &work[indrv1 + 1], &c__1);
	    nrm = (r__1 = work[indrv1 + jmax], dabs(r__1));

/*           Continue for additional iterations after norm reaches */
/*           stopping criterion. */

	    if (nrm < stpcrt) {
		goto L70;
	    }
	    ++nrmchk;
	    if (nrmchk < 3) {
		goto L70;
	    }

	    goto L130;

/*           If stopping criterion was not satisfied, update info and */
/*           store eigenvector number in array ifail. */

L120:
	    ++(*info);
	    ifail[*info] = j;

/*           Accept iterate as jth eigenvector. */

L130:
	    scl = 1.f / snrm2_(&blksiz, &work[indrv1 + 1], &c__1);
	    jmax = isamax_(&blksiz, &work[indrv1 + 1], &c__1);
	    if (work[indrv1 + jmax] < 0.f) {
		scl = -scl;
	    }
	    sscal_(&blksiz, &scl, &work[indrv1 + 1], &c__1);
L140:
	    i__3 = *n;
	    for (i__ = 1; i__ <= i__3; ++i__) {
		i__4 = i__ + j * z_dim1;
		z__[i__4].r = 0.f, z__[i__4].i = 0.f;
/* L150: */
	    }
	    i__3 = blksiz;
	    for (i__ = 1; i__ <= i__3; ++i__) {
		i__4 = b1 + i__ - 1 + j * z_dim1;
		i__5 = indrv1 + i__;
		q__1.r = work[i__5], q__1.i = 0.f;
		z__[i__4].r = q__1.r, z__[i__4].i = q__1.i;
/* L160: */
	    }

/*           Save the shift to check eigenvalue spacing at next */
/*           iteration. */

	    xjm = xj;

/* L170: */
	}
L180:
	;
    }

    return 0;

/*     End of CSTEIN */

} /* cstein_ */
Esempio n. 29
0
int main(int argc, char *argv[])
{
    void smatvec_mult(float alpha, float x[], float beta, float y[]);
    void spsolve(int n, float x[], float y[]);
    extern int sfgmr( int n,
	void (*matvec_mult)(float, float [], float, float []),
	void (*psolve)(int n, float [], float[]),
	float *rhs, float *sol, double tol, int restrt, int *itmax,
	FILE *fits);
    extern int sfill_diag(int n, NCformat *Astore);

    char     equed[1] = {'B'};
    yes_no_t equil;
    trans_t  trans;
    SuperMatrix A, L, U;
    SuperMatrix B, X;
    NCformat *Astore;
    NCformat *Ustore;
    SCformat *Lstore;
    GlobalLU_t	   Glu; /* facilitate multiple factorizations with 
                           SamePattern_SameRowPerm                  */
    float   *a;
    int      *asub, *xa;
    int      *etree;
    int      *perm_c; /* column permutation vector */
    int      *perm_r; /* row permutations from partial pivoting */
    int      nrhs, ldx, lwork, info, m, n, nnz;
    float   *rhsb, *rhsx, *xact;
    float   *work = NULL;
    float   *R, *C;
    float   u, rpg, rcond;
    float zero = 0.0;
    float one = 1.0;
    mem_usage_t   mem_usage;
    superlu_options_t options;
    SuperLUStat_t stat;
    FILE 	  *fp = stdin;

    int restrt, iter, maxit, i;
    double resid;
    float *x, *b;

#ifdef DEBUG
    extern int num_drop_L, num_drop_U;
#endif

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC("Enter main()");
#endif

    /* Defaults */
    lwork = 0;
    nrhs  = 1;
    trans = NOTRANS;

    /* Set the default input options:
	options.Fact = DOFACT;
	options.Equil = YES;
	options.ColPerm = COLAMD;
	options.DiagPivotThresh = 0.1; //different from complete LU
	options.Trans = NOTRANS;
	options.IterRefine = NOREFINE;
	options.SymmetricMode = NO;
	options.PivotGrowth = NO;
	options.ConditionNumber = NO;
	options.PrintStat = YES;
	options.RowPerm = LargeDiag;
	options.ILU_DropTol = 1e-4;
	options.ILU_FillTol = 1e-2;
	options.ILU_FillFactor = 10.0;
	options.ILU_DropRule = DROP_BASIC | DROP_AREA;
	options.ILU_Norm = INF_NORM;
	options.ILU_MILU = SILU;
     */
    ilu_set_default_options(&options);

    /* Modify the defaults. */
    options.PivotGrowth = YES;	  /* Compute reciprocal pivot growth */
    options.ConditionNumber = YES;/* Compute reciprocal condition number */

    if ( lwork > 0 ) {
	work = SUPERLU_MALLOC(lwork);
	if ( !work ) ABORT("Malloc fails for work[].");
    }

    /* Read matrix A from a file in Harwell-Boeing format.*/
    if (argc < 2)
    {
	printf("Usage:\n%s [OPTION] < [INPUT] > [OUTPUT]\nOPTION:\n"
		"-h -hb:\n\t[INPUT] is a Harwell-Boeing format matrix.\n"
		"-r -rb:\n\t[INPUT] is a Rutherford-Boeing format matrix.\n"
		"-t -triplet:\n\t[INPUT] is a triplet format matrix.\n",
		argv[0]);
	return 0;
    }
    else
    {
	switch (argv[1][1])
	{
	    case 'H':
	    case 'h':
		printf("Input a Harwell-Boeing format matrix:\n");
		sreadhb(fp, &m, &n, &nnz, &a, &asub, &xa);
		break;
	    case 'R':
	    case 'r':
		printf("Input a Rutherford-Boeing format matrix:\n");
		sreadrb(&m, &n, &nnz, &a, &asub, &xa);
		break;
	    case 'T':
	    case 't':
		printf("Input a triplet format matrix:\n");
		sreadtriple(&m, &n, &nnz, &a, &asub, &xa);
		break;
	    default:
		printf("Unrecognized format.\n");
		return 0;
	}
    }

    sCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa,
                                SLU_NC, SLU_S, SLU_GE);
    Astore = A.Store;
    sfill_diag(n, Astore);
    printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz);
    fflush(stdout);

    /* Generate the right-hand side */
    if ( !(rhsb = floatMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[].");
    if ( !(rhsx = floatMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[].");
    sCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_S, SLU_GE);
    sCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_S, SLU_GE);
    xact = floatMalloc(n * nrhs);
    ldx = n;
    sGenXtrue(n, nrhs, xact, ldx);
    sFillRHS(trans, nrhs, xact, ldx, &A, &B);

    if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[].");
    if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[].");
    if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[].");
    if ( !(R = (float *) SUPERLU_MALLOC(A.nrow * sizeof(float))) )
	ABORT("SUPERLU_MALLOC fails for R[].");
    if ( !(C = (float *) SUPERLU_MALLOC(A.ncol * sizeof(float))) )
	ABORT("SUPERLU_MALLOC fails for C[].");

    info = 0;
#ifdef DEBUG
    num_drop_L = 0;
    num_drop_U = 0;
#endif

    /* Initialize the statistics variables. */
    StatInit(&stat);

    /* Compute the incomplete factorization and compute the condition number
       and pivot growth using dgsisx. */
    B.ncol = 0;  /* not to perform triangular solution */
    sgsisx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work,
	   lwork, &B, &X, &rpg, &rcond, &Glu, &mem_usage, &stat, &info);

    /* Set RHS for GMRES. */
    if (!(b = floatMalloc(m))) ABORT("Malloc fails for b[].");
    if (*equed == 'R' || *equed == 'B') {
	for (i = 0; i < n; ++i) b[i] = rhsb[i] * R[i];
    } else {
	for (i = 0; i < m; i++) b[i] = rhsb[i];
    }

    printf("sgsisx(): info %d, equed %c\n", info, equed[0]);
    if (info > 0 || rcond < 1e-8 || rpg > 1e8)
	printf("WARNING: This preconditioner might be unstable.\n");

    if ( info == 0 || info == n+1 ) {
	if ( options.PivotGrowth == YES )
	    printf("Recip. pivot growth = %e\n", rpg);
	if ( options.ConditionNumber == YES )
	    printf("Recip. condition number = %e\n", rcond);
    } else if ( info > 0 && lwork == -1 ) {
	printf("** Estimated memory: %d bytes\n", info - n);
    }

    Lstore = (SCformat *) L.Store;
    Ustore = (NCformat *) U.Store;
    printf("n(A) = %d, nnz(A) = %d\n", n, Astore->nnz);
    printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
    printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
    printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n);
    printf("Fill ratio: nnz(F)/nnz(A) = %.3f\n",
	    ((double)(Lstore->nnz) + (double)(Ustore->nnz) - (double)n)
	    / (double)Astore->nnz);
    printf("L\\U MB %.3f\ttotal MB needed %.3f\n",
	   mem_usage.for_lu/1e6, mem_usage.total_needed/1e6);
    fflush(stdout);

    /* Set the global variables. */
    GLOBAL_A = &A;
    GLOBAL_L = &L;
    GLOBAL_U = &U;
    GLOBAL_STAT = &stat;
    GLOBAL_PERM_C = perm_c;
    GLOBAL_PERM_R = perm_r;
    GLOBAL_OPTIONS = &options;
    GLOBAL_R = R;
    GLOBAL_C = C;
    GLOBAL_MEM_USAGE = &mem_usage;

    /* Set the options to do solve-only. */
    options.Fact = FACTORED;
    options.PivotGrowth = NO;
    options.ConditionNumber = NO;

    /* Set the variables used by GMRES. */
    restrt = SUPERLU_MIN(n / 3 + 1, 50);
    maxit = 1000;
    iter = maxit;
    resid = 1e-8;
    if (!(x = floatMalloc(n))) ABORT("Malloc fails for x[].");

    if (info <= n + 1)
    {
	int i_1 = 1;
	double maxferr = 0.0, nrmA, nrmB, res, t;
        float temp;
	extern float snrm2_(int *, float [], int *);
	extern void saxpy_(int *, float *, float [], int *, float [], int *);

	/* Initial guess */
	for (i = 0; i < n; i++) x[i] = zero;

	t = SuperLU_timer_();

	/* Call GMRES */
	sfgmr(n, smatvec_mult, spsolve, b, x, resid, restrt, &iter, stdout);

	t = SuperLU_timer_() - t;

	/* Output the result. */
	nrmA = snrm2_(&(Astore->nnz), (float *)((DNformat *)A.Store)->nzval,
		&i_1);
	nrmB = snrm2_(&m, b, &i_1);
	sp_sgemv("N", -1.0, &A, x, 1, 1.0, b, 1);
	res = snrm2_(&m, b, &i_1);
	resid = res / nrmB;
	printf("||A||_F = %.1e, ||B||_2 = %.1e, ||B-A*X||_2 = %.1e, "
		"relres = %.1e\n", nrmA, nrmB, res, resid);

	if (iter >= maxit)
	{
	    if (resid >= 1.0) iter = -180;
	    else if (resid > 1e-8) iter = -111;
	}
	printf("iteration: %d\nresidual: %.1e\nGMRES time: %.2f seconds.\n",
		iter, resid, t);

	/* Scale the solution back if equilibration was performed. */
	if (*equed == 'C' || *equed == 'B') 
	    for (i = 0; i < n; i++) x[i] *= C[i];

	for (i = 0; i < m; i++) {
	    maxferr = SUPERLU_MAX(maxferr, fabs(x[i] - xact[i]));
        }
	printf("||X-X_true||_oo = %.1e\n", maxferr);
    }
#ifdef DEBUG
    printf("%d entries in L and %d entries in U dropped.\n",
	    num_drop_L, num_drop_U);
#endif
    fflush(stdout);

    if ( options.PrintStat ) StatPrint(&stat);
    StatFree(&stat);

    SUPERLU_FREE (rhsb);
    SUPERLU_FREE (rhsx);
    SUPERLU_FREE (xact);
    SUPERLU_FREE (etree);
    SUPERLU_FREE (perm_r);
    SUPERLU_FREE (perm_c);
    SUPERLU_FREE (R);
    SUPERLU_FREE (C);
    Destroy_CompCol_Matrix(&A);
    Destroy_SuperMatrix_Store(&B);
    Destroy_SuperMatrix_Store(&X);
    if ( lwork >= 0 ) {
	Destroy_SuperNode_Matrix(&L);
	Destroy_CompCol_Matrix(&U);
    }
    SUPERLU_FREE(b);
    SUPERLU_FREE(x);

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC("Exit main()");
#endif

    return 0;
}
Esempio n. 30
0
/* Subroutine */ int stgsna_(char *job, char *howmny, logical *select, 
	integer *n, real *a, integer *lda, real *b, integer *ldb, real *vl, 
	integer *ldvl, real *vr, integer *ldvr, real *s, real *dif, integer *
	mm, integer *m, real *work, integer *lwork, integer *iwork, integer *
	info)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, 
	    vr_offset, i__1, i__2;
    real r__1, r__2;

    /* Builtin functions */
    double sqrt(doublereal);

    /* Local variables */
    integer i__, k;
    real c1, c2;
    integer n1, n2, ks, iz;
    real eps, beta, cond;
    logical pair;
    integer ierr;
    real uhav, uhbv;
    integer ifst;
    real lnrm;
    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
    integer ilst;
    real rnrm;
    extern /* Subroutine */ int slag2_(real *, integer *, real *, integer *, 
	    real *, real *, real *, real *, real *, real *);
    extern doublereal snrm2_(integer *, real *, integer *);
    real root1, root2, scale;
    extern logical lsame_(char *, char *);
    real uhavi, uhbvi;
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
	    real *, integer *, real *, integer *, real *, real *, integer *);
    real tmpii;
    integer lwmin;
    logical wants;
    real tmpir, tmpri, dummy[1], tmprr;
    extern doublereal slapy2_(real *, real *);
    real dummy1[1], alphai, alphar;
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    logical wantbh, wantdf;
    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
	    integer *, real *, integer *), stgexc_(logical *, logical 
	    *, integer *, real *, integer *, real *, integer *, real *, 
	    integer *, real *, integer *, integer *, integer *, real *, 
	    integer *, integer *);
    logical somcon;
    real alprqt, smlnum;
    logical lquery;
    extern /* Subroutine */ int stgsyl_(char *, integer *, integer *, integer 
	    *, real *, integer *, real *, integer *, real *, integer *, real *
, integer *, real *, integer *, real *, integer *, real *, real *, 
	     real *, integer *, integer *, integer *);


/*  -- LAPACK routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  STGSNA estimates reciprocal condition numbers for specified */
/*  eigenvalues and/or eigenvectors of a matrix pair (A, B) in */
/*  generalized real Schur canonical form (or of any matrix pair */
/*  (Q*A*Z', Q*B*Z') with orthogonal matrices Q and Z, where */
/*  Z' denotes the transpose of Z. */

/*  (A, B) must be in generalized real Schur form (as returned by SGGES), */
/*  i.e. A is block upper triangular with 1-by-1 and 2-by-2 diagonal */
/*  blocks. B is upper triangular. */


/*  Arguments */
/*  ========= */

/*  JOB     (input) CHARACTER*1 */
/*          Specifies whether condition numbers are required for */
/*          eigenvalues (S) or eigenvectors (DIF): */
/*          = 'E': for eigenvalues only (S); */
/*          = 'V': for eigenvectors only (DIF); */
/*          = 'B': for both eigenvalues and eigenvectors (S and DIF). */

/*  HOWMNY  (input) CHARACTER*1 */
/*          = 'A': compute condition numbers for all eigenpairs; */
/*          = 'S': compute condition numbers for selected eigenpairs */
/*                 specified by the array SELECT. */

/*  SELECT  (input) LOGICAL array, dimension (N) */
/*          If HOWMNY = 'S', SELECT specifies the eigenpairs for which */
/*          condition numbers are required. To select condition numbers */
/*          for the eigenpair corresponding to a real eigenvalue w(j), */
/*          SELECT(j) must be set to .TRUE.. To select condition numbers */
/*          corresponding to a complex conjugate pair of eigenvalues w(j) */
/*          and w(j+1), either SELECT(j) or SELECT(j+1) or both, must be */
/*          set to .TRUE.. */
/*          If HOWMNY = 'A', SELECT is not referenced. */

/*  N       (input) INTEGER */
/*          The order of the square matrix pair (A, B). N >= 0. */

/*  A       (input) REAL array, dimension (LDA,N) */
/*          The upper quasi-triangular matrix A in the pair (A,B). */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A. LDA >= max(1,N). */

/*  B       (input) REAL array, dimension (LDB,N) */
/*          The upper triangular matrix B in the pair (A,B). */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B. LDB >= max(1,N). */

/*  VL      (input) REAL array, dimension (LDVL,M) */
/*          If JOB = 'E' or 'B', VL must contain left eigenvectors of */
/*          (A, B), corresponding to the eigenpairs specified by HOWMNY */
/*          and SELECT. The eigenvectors must be stored in consecutive */
/*          columns of VL, as returned by STGEVC. */
/*          If JOB = 'V', VL is not referenced. */

/*  LDVL    (input) INTEGER */
/*          The leading dimension of the array VL. LDVL >= 1. */
/*          If JOB = 'E' or 'B', LDVL >= N. */

/*  VR      (input) REAL array, dimension (LDVR,M) */
/*          If JOB = 'E' or 'B', VR must contain right eigenvectors of */
/*          (A, B), corresponding to the eigenpairs specified by HOWMNY */
/*          and SELECT. The eigenvectors must be stored in consecutive */
/*          columns ov VR, as returned by STGEVC. */
/*          If JOB = 'V', VR is not referenced. */

/*  LDVR    (input) INTEGER */
/*          The leading dimension of the array VR. LDVR >= 1. */
/*          If JOB = 'E' or 'B', LDVR >= N. */

/*  S       (output) REAL array, dimension (MM) */
/*          If JOB = 'E' or 'B', the reciprocal condition numbers of the */
/*          selected eigenvalues, stored in consecutive elements of the */
/*          array. For a complex conjugate pair of eigenvalues two */
/*          consecutive elements of S are set to the same value. Thus */
/*          S(j), DIF(j), and the j-th columns of VL and VR all */
/*          correspond to the same eigenpair (but not in general the */
/*          j-th eigenpair, unless all eigenpairs are selected). */
/*          If JOB = 'V', S is not referenced. */

/*  DIF     (output) REAL array, dimension (MM) */
/*          If JOB = 'V' or 'B', the estimated reciprocal condition */
/*          numbers of the selected eigenvectors, stored in consecutive */
/*          elements of the array. For a complex eigenvector two */
/*          consecutive elements of DIF are set to the same value. If */
/*          the eigenvalues cannot be reordered to compute DIF(j), DIF(j) */
/*          is set to 0; this can only occur when the true value would be */
/*          very small anyway. */
/*          If JOB = 'E', DIF is not referenced. */

/*  MM      (input) INTEGER */
/*          The number of elements in the arrays S and DIF. MM >= M. */

/*  M       (output) INTEGER */
/*          The number of elements of the arrays S and DIF used to store */
/*          the specified condition numbers; for each selected real */
/*          eigenvalue one element is used, and for each selected complex */
/*          conjugate pair of eigenvalues, two elements are used. */
/*          If HOWMNY = 'A', M is set to N. */

/*  WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK)) */
/*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */

/*  LWORK   (input) INTEGER */
/*          The dimension of the array WORK. LWORK >= max(1,N). */
/*          If JOB = 'V' or 'B' LWORK >= 2*N*(N+2)+16. */

/*          If LWORK = -1, then a workspace query is assumed; the routine */
/*          only calculates the optimal size of the WORK array, returns */
/*          this value as the first entry of the WORK array, and no error */
/*          message related to LWORK is issued by XERBLA. */

/*  IWORK   (workspace) INTEGER array, dimension (N + 6) */
/*          If JOB = 'E', IWORK is not referenced. */

/*  INFO    (output) INTEGER */
/*          =0: Successful exit */
/*          <0: If INFO = -i, the i-th argument had an illegal value */


/*  Further Details */
/*  =============== */

/*  The reciprocal of the condition number of a generalized eigenvalue */
/*  w = (a, b) is defined as */

/*       S(w) = (|u'Av|**2 + |u'Bv|**2)**(1/2) / (norm(u)*norm(v)) */

/*  where u and v are the left and right eigenvectors of (A, B) */
/*  corresponding to w; |z| denotes the absolute value of the complex */
/*  number, and norm(u) denotes the 2-norm of the vector u. */
/*  The pair (a, b) corresponds to an eigenvalue w = a/b (= u'Av/u'Bv) */
/*  of the matrix pair (A, B). If both a and b equal zero, then (A B) is */
/*  singular and S(I) = -1 is returned. */

/*  An approximate error bound on the chordal distance between the i-th */
/*  computed generalized eigenvalue w and the corresponding exact */
/*  eigenvalue lambda is */

/*       chord(w, lambda) <= EPS * norm(A, B) / S(I) */

/*  where EPS is the machine precision. */

/*  The reciprocal of the condition number DIF(i) of right eigenvector u */
/*  and left eigenvector v corresponding to the generalized eigenvalue w */
/*  is defined as follows: */

/*  a) If the i-th eigenvalue w = (a,b) is real */

/*     Suppose U and V are orthogonal transformations such that */

/*                U'*(A, B)*V  = (S, T) = ( a   *  ) ( b  *  )  1 */
/*                                        ( 0  S22 ),( 0 T22 )  n-1 */
/*                                          1  n-1     1 n-1 */

/*     Then the reciprocal condition number DIF(i) is */

/*                Difl((a, b), (S22, T22)) = sigma-min( Zl ), */

/*     where sigma-min(Zl) denotes the smallest singular value of the */
/*     2(n-1)-by-2(n-1) matrix */

/*         Zl = [ kron(a, In-1)  -kron(1, S22) ] */
/*              [ kron(b, In-1)  -kron(1, T22) ] . */

/*     Here In-1 is the identity matrix of size n-1. kron(X, Y) is the */
/*     Kronecker product between the matrices X and Y. */

/*     Note that if the default method for computing DIF(i) is wanted */
/*     (see SLATDF), then the parameter DIFDRI (see below) should be */
/*     changed from 3 to 4 (routine SLATDF(IJOB = 2 will be used)). */
/*     See STGSYL for more details. */

/*  b) If the i-th and (i+1)-th eigenvalues are complex conjugate pair, */

/*     Suppose U and V are orthogonal transformations such that */

/*                U'*(A, B)*V = (S, T) = ( S11  *   ) ( T11  *  )  2 */
/*                                       ( 0    S22 ),( 0    T22) n-2 */
/*                                         2    n-2     2    n-2 */

/*     and (S11, T11) corresponds to the complex conjugate eigenvalue */
/*     pair (w, conjg(w)). There exist unitary matrices U1 and V1 such */
/*     that */

/*         U1'*S11*V1 = ( s11 s12 )   and U1'*T11*V1 = ( t11 t12 ) */
/*                      (  0  s22 )                    (  0  t22 ) */

/*     where the generalized eigenvalues w = s11/t11 and */
/*     conjg(w) = s22/t22. */

/*     Then the reciprocal condition number DIF(i) is bounded by */

/*         min( d1, max( 1, |real(s11)/real(s22)| )*d2 ) */

/*     where, d1 = Difl((s11, t11), (s22, t22)) = sigma-min(Z1), where */
/*     Z1 is the complex 2-by-2 matrix */

/*              Z1 =  [ s11  -s22 ] */
/*                    [ t11  -t22 ], */

/*     This is done by computing (using real arithmetic) the */
/*     roots of the characteristical polynomial det(Z1' * Z1 - lambda I), */
/*     where Z1' denotes the conjugate transpose of Z1 and det(X) denotes */
/*     the determinant of X. */

/*     and d2 is an upper bound on Difl((S11, T11), (S22, T22)), i.e. an */
/*     upper bound on sigma-min(Z2), where Z2 is (2n-2)-by-(2n-2) */

/*              Z2 = [ kron(S11', In-2)  -kron(I2, S22) ] */
/*                   [ kron(T11', In-2)  -kron(I2, T22) ] */

/*     Note that if the default method for computing DIF is wanted (see */
/*     SLATDF), then the parameter DIFDRI (see below) should be changed */
/*     from 3 to 4 (routine SLATDF(IJOB = 2 will be used)). See STGSYL */
/*     for more details. */

/*  For each eigenvalue/vector specified by SELECT, DIF stores a */
/*  Frobenius norm-based estimate of Difl. */

/*  An approximate error bound for the i-th computed eigenvector VL(i) or */
/*  VR(i) is given by */

/*             EPS * norm(A, B) / DIF(i). */

/*  See ref. [2-3] for more details and further references. */

/*  Based on contributions by */
/*     Bo Kagstrom and Peter Poromaa, Department of Computing Science, */
/*     Umea University, S-901 87 Umea, Sweden. */

/*  References */
/*  ========== */

/*  [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the */
/*      Generalized Real Schur Form of a Regular Matrix Pair (A, B), in */
/*      M.S. Moonen et al (eds), Linear Algebra for Large Scale and */
/*      Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. */

/*  [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified */
/*      Eigenvalues of a Regular Matrix Pair (A, B) and Condition */
/*      Estimation: Theory, Algorithms and Software, */
/*      Report UMINF - 94.04, Department of Computing Science, Umea */
/*      University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working */
/*      Note 87. To appear in Numerical Algorithms, 1996. */

/*  [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software */
/*      for Solving the Generalized Sylvester Equation and Estimating the */
/*      Separation between Regular Matrix Pairs, Report UMINF - 93.23, */
/*      Department of Computing Science, Umea University, S-901 87 Umea, */
/*      Sweden, December 1993, Revised April 1994, Also as LAPACK Working */
/*      Note 75.  To appear in ACM Trans. on Math. Software, Vol 22, */
/*      No 1, 1996. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Decode and test the input parameters */

    /* Parameter adjustments */
    --select;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    vl_dim1 = *ldvl;
    vl_offset = 1 + vl_dim1;
    vl -= vl_offset;
    vr_dim1 = *ldvr;
    vr_offset = 1 + vr_dim1;
    vr -= vr_offset;
    --s;
    --dif;
    --work;
    --iwork;

    /* Function Body */
    wantbh = lsame_(job, "B");
    wants = lsame_(job, "E") || wantbh;
    wantdf = lsame_(job, "V") || wantbh;

    somcon = lsame_(howmny, "S");

    *info = 0;
    lquery = *lwork == -1;

    if (! wants && ! wantdf) {
	*info = -1;
    } else if (! lsame_(howmny, "A") && ! somcon) {
	*info = -2;
    } else if (*n < 0) {
	*info = -4;
    } else if (*lda < max(1,*n)) {
	*info = -6;
    } else if (*ldb < max(1,*n)) {
	*info = -8;
    } else if (wants && *ldvl < *n) {
	*info = -10;
    } else if (wants && *ldvr < *n) {
	*info = -12;
    } else {

/*        Set M to the number of eigenpairs for which condition numbers */
/*        are required, and test MM. */

	if (somcon) {
	    *m = 0;
	    pair = FALSE_;
	    i__1 = *n;
	    for (k = 1; k <= i__1; ++k) {
		if (pair) {
		    pair = FALSE_;
		} else {
		    if (k < *n) {
			if (a[k + 1 + k * a_dim1] == 0.f) {
			    if (select[k]) {
				++(*m);
			    }
			} else {
			    pair = TRUE_;
			    if (select[k] || select[k + 1]) {
				*m += 2;
			    }
			}
		    } else {
			if (select[*n]) {
			    ++(*m);
			}
		    }
		}
/* L10: */
	    }
	} else {
	    *m = *n;
	}

	if (*n == 0) {
	    lwmin = 1;
	} else if (lsame_(job, "V") || lsame_(job, 
		"B")) {
	    lwmin = (*n << 1) * (*n + 2) + 16;
	} else {
	    lwmin = *n;
	}
	work[1] = (real) lwmin;

	if (*mm < *m) {
	    *info = -15;
	} else if (*lwork < lwmin && ! lquery) {
	    *info = -18;
	}
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("STGSNA", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Get machine constants */

    eps = slamch_("P");
    smlnum = slamch_("S") / eps;
    ks = 0;
    pair = FALSE_;

    i__1 = *n;
    for (k = 1; k <= i__1; ++k) {

/*        Determine whether A(k,k) begins a 1-by-1 or 2-by-2 block. */

	if (pair) {
	    pair = FALSE_;
	    goto L20;
	} else {
	    if (k < *n) {
		pair = a[k + 1 + k * a_dim1] != 0.f;
	    }
	}

/*        Determine whether condition numbers are required for the k-th */
/*        eigenpair. */

	if (somcon) {
	    if (pair) {
		if (! select[k] && ! select[k + 1]) {
		    goto L20;
		}
	    } else {
		if (! select[k]) {
		    goto L20;
		}
	    }
	}

	++ks;

	if (wants) {

/*           Compute the reciprocal condition number of the k-th */
/*           eigenvalue. */

	    if (pair) {

/*              Complex eigenvalue pair. */

		r__1 = snrm2_(n, &vr[ks * vr_dim1 + 1], &c__1);
		r__2 = snrm2_(n, &vr[(ks + 1) * vr_dim1 + 1], &c__1);
		rnrm = slapy2_(&r__1, &r__2);
		r__1 = snrm2_(n, &vl[ks * vl_dim1 + 1], &c__1);
		r__2 = snrm2_(n, &vl[(ks + 1) * vl_dim1 + 1], &c__1);
		lnrm = slapy2_(&r__1, &r__2);
		sgemv_("N", n, n, &c_b19, &a[a_offset], lda, &vr[ks * vr_dim1 
			+ 1], &c__1, &c_b21, &work[1], &c__1);
		tmprr = sdot_(n, &work[1], &c__1, &vl[ks * vl_dim1 + 1], &
			c__1);
		tmpri = sdot_(n, &work[1], &c__1, &vl[(ks + 1) * vl_dim1 + 1], 
			 &c__1);
		sgemv_("N", n, n, &c_b19, &a[a_offset], lda, &vr[(ks + 1) * 
			vr_dim1 + 1], &c__1, &c_b21, &work[1], &c__1);
		tmpii = sdot_(n, &work[1], &c__1, &vl[(ks + 1) * vl_dim1 + 1], 
			 &c__1);
		tmpir = sdot_(n, &work[1], &c__1, &vl[ks * vl_dim1 + 1], &
			c__1);
		uhav = tmprr + tmpii;
		uhavi = tmpir - tmpri;
		sgemv_("N", n, n, &c_b19, &b[b_offset], ldb, &vr[ks * vr_dim1 
			+ 1], &c__1, &c_b21, &work[1], &c__1);
		tmprr = sdot_(n, &work[1], &c__1, &vl[ks * vl_dim1 + 1], &
			c__1);
		tmpri = sdot_(n, &work[1], &c__1, &vl[(ks + 1) * vl_dim1 + 1], 
			 &c__1);
		sgemv_("N", n, n, &c_b19, &b[b_offset], ldb, &vr[(ks + 1) * 
			vr_dim1 + 1], &c__1, &c_b21, &work[1], &c__1);
		tmpii = sdot_(n, &work[1], &c__1, &vl[(ks + 1) * vl_dim1 + 1], 
			 &c__1);
		tmpir = sdot_(n, &work[1], &c__1, &vl[ks * vl_dim1 + 1], &
			c__1);
		uhbv = tmprr + tmpii;
		uhbvi = tmpir - tmpri;
		uhav = slapy2_(&uhav, &uhavi);
		uhbv = slapy2_(&uhbv, &uhbvi);
		cond = slapy2_(&uhav, &uhbv);
		s[ks] = cond / (rnrm * lnrm);
		s[ks + 1] = s[ks];

	    } else {

/*              Real eigenvalue. */

		rnrm = snrm2_(n, &vr[ks * vr_dim1 + 1], &c__1);
		lnrm = snrm2_(n, &vl[ks * vl_dim1 + 1], &c__1);
		sgemv_("N", n, n, &c_b19, &a[a_offset], lda, &vr[ks * vr_dim1 
			+ 1], &c__1, &c_b21, &work[1], &c__1);
		uhav = sdot_(n, &work[1], &c__1, &vl[ks * vl_dim1 + 1], &c__1)
			;
		sgemv_("N", n, n, &c_b19, &b[b_offset], ldb, &vr[ks * vr_dim1 
			+ 1], &c__1, &c_b21, &work[1], &c__1);
		uhbv = sdot_(n, &work[1], &c__1, &vl[ks * vl_dim1 + 1], &c__1)
			;
		cond = slapy2_(&uhav, &uhbv);
		if (cond == 0.f) {
		    s[ks] = -1.f;
		} else {
		    s[ks] = cond / (rnrm * lnrm);
		}
	    }
	}

	if (wantdf) {
	    if (*n == 1) {
		dif[ks] = slapy2_(&a[a_dim1 + 1], &b[b_dim1 + 1]);
		goto L20;
	    }

/*           Estimate the reciprocal condition number of the k-th */
/*           eigenvectors. */
	    if (pair) {

/*              Copy the  2-by 2 pencil beginning at (A(k,k), B(k, k)). */
/*              Compute the eigenvalue(s) at position K. */

		work[1] = a[k + k * a_dim1];
		work[2] = a[k + 1 + k * a_dim1];
		work[3] = a[k + (k + 1) * a_dim1];
		work[4] = a[k + 1 + (k + 1) * a_dim1];
		work[5] = b[k + k * b_dim1];
		work[6] = b[k + 1 + k * b_dim1];
		work[7] = b[k + (k + 1) * b_dim1];
		work[8] = b[k + 1 + (k + 1) * b_dim1];
		r__1 = smlnum * eps;
		slag2_(&work[1], &c__2, &work[5], &c__2, &r__1, &beta, dummy1, 
			 &alphar, dummy, &alphai);
		alprqt = 1.f;
		c1 = (alphar * alphar + alphai * alphai + beta * beta) * 2.f;
		c2 = beta * 4.f * beta * alphai * alphai;
		root1 = c1 + sqrt(c1 * c1 - c2 * 4.f);
		root2 = c2 / root1;
		root1 /= 2.f;
/* Computing MIN */
		r__1 = sqrt(root1), r__2 = sqrt(root2);
		cond = dmin(r__1,r__2);
	    }

/*           Copy the matrix (A, B) to the array WORK and swap the */
/*           diagonal block beginning at A(k,k) to the (1,1) position. */

	    slacpy_("Full", n, n, &a[a_offset], lda, &work[1], n);
	    slacpy_("Full", n, n, &b[b_offset], ldb, &work[*n * *n + 1], n);
	    ifst = k;
	    ilst = 1;

	    i__2 = *lwork - (*n << 1) * *n;
	    stgexc_(&c_false, &c_false, n, &work[1], n, &work[*n * *n + 1], n, 
		     dummy, &c__1, dummy1, &c__1, &ifst, &ilst, &work[(*n * *
		    n << 1) + 1], &i__2, &ierr);

	    if (ierr > 0) {

/*              Ill-conditioned problem - swap rejected. */

		dif[ks] = 0.f;
	    } else {

/*              Reordering successful, solve generalized Sylvester */
/*              equation for R and L, */
/*                         A22 * R - L * A11 = A12 */
/*                         B22 * R - L * B11 = B12, */
/*              and compute estimate of Difl((A11,B11), (A22, B22)). */

		n1 = 1;
		if (work[2] != 0.f) {
		    n1 = 2;
		}
		n2 = *n - n1;
		if (n2 == 0) {
		    dif[ks] = cond;
		} else {
		    i__ = *n * *n + 1;
		    iz = (*n << 1) * *n + 1;
		    i__2 = *lwork - (*n << 1) * *n;
		    stgsyl_("N", &c__3, &n2, &n1, &work[*n * n1 + n1 + 1], n, 
			    &work[1], n, &work[n1 + 1], n, &work[*n * n1 + n1 
			    + i__], n, &work[i__], n, &work[n1 + i__], n, &
			    scale, &dif[ks], &work[iz + 1], &i__2, &iwork[1], 
			    &ierr);

		    if (pair) {
/* Computing MIN */
			r__1 = dmax(1.f,alprqt) * dif[ks];
			dif[ks] = dmin(r__1,cond);
		    }
		}
	    }
	    if (pair) {
		dif[ks + 1] = dif[ks];
	    }
	}
	if (pair) {
	    ++ks;
	}

L20:
	;
    }
    work[1] = (real) lwmin;
    return 0;

/*     End of STGSNA */

} /* stgsna_ */