Exemplos de sgemv_ em C++ (Cpp)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: strevc.c Projeto: dacap/loseface

/* Subroutine */ int strevc_(char *side, char *howmny, logical *select, 
	integer *n, real *t, integer *ldt, real *vl, integer *ldvl, real *vr, 
	integer *ldvr, integer *mm, integer *m, real *work, integer *info)
{
    /* System generated locals */
    integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, 
	    i__2, i__3;
    real r__1, r__2, r__3, r__4;

    /* Builtin functions */
    double sqrt(doublereal);

    /* Local variables */
    integer i__, j, k;
    real x[4]	/* was [2][2] */;
    integer j1, j2, n2, ii, ki, ip, is;
    real wi, wr, rec, ulp, beta, emax;
    logical pair, allv;
    integer ierr;
    real unfl, ovfl, smin;
    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
    logical over;
    real vmax;
    integer jnxt;
    real scale;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    real remax;
    logical leftv;
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
	    real *, integer *, real *, integer *, real *, real *, integer *);
    logical bothv;
    real vcrit;
    logical somev;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *);
    real xnorm;
    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, 
	    real *, integer *), slaln2_(logical *, integer *, integer *, real 
	    *, real *, real *, integer *, real *, real *, real *, integer *, 
	    real *, real *, real *, integer *, real *, real *, integer *), 
	    slabad_(real *, real *);
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    real bignum;
    extern integer isamax_(integer *, real *, integer *);
    logical rightv;
    real smlnum;


/*  -- LAPACK routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  STREVC computes some or all of the right and/or left eigenvectors of */
/*  a real upper quasi-triangular matrix T. */
/*  Matrices of this type are produced by the Schur factorization of */
/*  a real general matrix:  A = Q*T*Q**T, as computed by SHSEQR. */

/*  The right eigenvector x and the left eigenvector y of T corresponding */
/*  to an eigenvalue w are defined by: */

/*     T*x = w*x,     (y**H)*T = w*(y**H) */

/*  where y**H denotes the conjugate transpose of y. */
/*  The eigenvalues are not input to this routine, but are read directly */
/*  from the diagonal blocks of T. */

/*  This routine returns the matrices X and/or Y of right and left */
/*  eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an */
/*  input matrix.  If Q is the orthogonal factor that reduces a matrix */
/*  A to Schur form T, then Q*X and Q*Y are the matrices of right and */
/*  left eigenvectors of A. */

/*  Arguments */
/*  ========= */

/*  SIDE    (input) CHARACTER*1 */
/*          = 'R':  compute right eigenvectors only; */
/*          = 'L':  compute left eigenvectors only; */
/*          = 'B':  compute both right and left eigenvectors. */

/*  HOWMNY  (input) CHARACTER*1 */
/*          = 'A':  compute all right and/or left eigenvectors; */
/*          = 'B':  compute all right and/or left eigenvectors, */
/*                  backtransformed by the matrices in VR and/or VL; */
/*          = 'S':  compute selected right and/or left eigenvectors, */
/*                  as indicated by the logical array SELECT. */

/*  SELECT  (input/output) LOGICAL array, dimension (N) */
/*          If HOWMNY = 'S', SELECT specifies the eigenvectors to be */
/*          computed. */
/*          If w(j) is a real eigenvalue, the corresponding real */
/*          eigenvector is computed if SELECT(j) is .TRUE.. */
/*          If w(j) and w(j+1) are the real and imaginary parts of a */
/*          complex eigenvalue, the corresponding complex eigenvector is */
/*          computed if either SELECT(j) or SELECT(j+1) is .TRUE., and */
/*          on exit SELECT(j) is set to .TRUE. and SELECT(j+1) is set to */
/*          .FALSE.. */
/*          Not referenced if HOWMNY = 'A' or 'B'. */

/*  N       (input) INTEGER */
/*          The order of the matrix T. N >= 0. */

/*  T       (input) REAL array, dimension (LDT,N) */
/*          The upper quasi-triangular matrix T in Schur canonical form. */

/*  LDT     (input) INTEGER */
/*          The leading dimension of the array T. LDT >= max(1,N). */

/*  VL      (input/output) REAL array, dimension (LDVL,MM) */
/*          On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must */
/*          contain an N-by-N matrix Q (usually the orthogonal matrix Q */
/*          of Schur vectors returned by SHSEQR). */
/*          On exit, if SIDE = 'L' or 'B', VL contains: */
/*          if HOWMNY = 'A', the matrix Y of left eigenvectors of T; */
/*          if HOWMNY = 'B', the matrix Q*Y; */
/*          if HOWMNY = 'S', the left eigenvectors of T specified by */
/*                           SELECT, stored consecutively in the columns */
/*                           of VL, in the same order as their */
/*                           eigenvalues. */
/*          A complex eigenvector corresponding to a complex eigenvalue */
/*          is stored in two consecutive columns, the first holding the */
/*          real part, and the second the imaginary part. */
/*          Not referenced if SIDE = 'R'. */

/*  LDVL    (input) INTEGER */
/*          The leading dimension of the array VL.  LDVL >= 1, and if */
/*          SIDE = 'L' or 'B', LDVL >= N. */

/*  VR      (input/output) REAL array, dimension (LDVR,MM) */
/*          On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must */
/*          contain an N-by-N matrix Q (usually the orthogonal matrix Q */
/*          of Schur vectors returned by SHSEQR). */
/*          On exit, if SIDE = 'R' or 'B', VR contains: */
/*          if HOWMNY = 'A', the matrix X of right eigenvectors of T; */
/*          if HOWMNY = 'B', the matrix Q*X; */
/*          if HOWMNY = 'S', the right eigenvectors of T specified by */
/*                           SELECT, stored consecutively in the columns */
/*                           of VR, in the same order as their */
/*                           eigenvalues. */
/*          A complex eigenvector corresponding to a complex eigenvalue */
/*          is stored in two consecutive columns, the first holding the */
/*          real part and the second the imaginary part. */
/*          Not referenced if SIDE = 'L'. */

/*  LDVR    (input) INTEGER */
/*          The leading dimension of the array VR.  LDVR >= 1, and if */
/*          SIDE = 'R' or 'B', LDVR >= N. */

/*  MM      (input) INTEGER */
/*          The number of columns in the arrays VL and/or VR. MM >= M. */

/*  M       (output) INTEGER */
/*          The number of columns in the arrays VL and/or VR actually */
/*          used to store the eigenvectors. */
/*          If HOWMNY = 'A' or 'B', M is set to N. */
/*          Each selected real eigenvector occupies one column and each */
/*          selected complex eigenvector occupies two columns. */

/*  WORK    (workspace) REAL array, dimension (3*N) */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */

/*  Further Details */
/*  =============== */

/*  The algorithm used in this program is basically backward (forward) */
/*  substitution, with scaling to make the the code robust against */
/*  possible overflow. */

/*  Each eigenvector is normalized so that the element of largest */
/*  magnitude has magnitude 1; here the magnitude of a complex number */
/*  (x,y) is taken to be |x| + |y|. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Decode and test the input parameters */

    /* Parameter adjustments */
    --select;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1;
    t -= t_offset;
    vl_dim1 = *ldvl;
    vl_offset = 1 + vl_dim1;
    vl -= vl_offset;
    vr_dim1 = *ldvr;
    vr_offset = 1 + vr_dim1;
    vr -= vr_offset;
    --work;

    /* Function Body */
    bothv = lsame_(side, "B");
    rightv = lsame_(side, "R") || bothv;
    leftv = lsame_(side, "L") || bothv;

    allv = lsame_(howmny, "A");
    over = lsame_(howmny, "B");
    somev = lsame_(howmny, "S");

    *info = 0;
    if (! rightv && ! leftv) {
	*info = -1;
    } else if (! allv && ! over && ! somev) {
	*info = -2;
    } else if (*n < 0) {
	*info = -4;
    } else if (*ldt < max(1,*n)) {
	*info = -6;
    } else if (*ldvl < 1 || leftv && *ldvl < *n) {
	*info = -8;
    } else if (*ldvr < 1 || rightv && *ldvr < *n) {
	*info = -10;
    } else {

/*        Set M to the number of columns required to store the selected */
/*        eigenvectors, standardize the array SELECT if necessary, and */
/*        test MM. */

	if (somev) {
	    *m = 0;
	    pair = FALSE_;
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		if (pair) {
		    pair = FALSE_;
		    select[j] = FALSE_;
		} else {
		    if (j < *n) {
			if (t[j + 1 + j * t_dim1] == 0.f) {
			    if (select[j]) {
				++(*m);
			    }
			} else {
			    pair = TRUE_;
			    if (select[j] || select[j + 1]) {
				select[j] = TRUE_;
				*m += 2;
			    }
			}
		    } else {
			if (select[*n]) {
			    ++(*m);
			}
		    }
		}
/* L10: */
	    }
	} else {
	    *m = *n;
	}

	if (*mm < *m) {
	    *info = -11;
	}
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("STREVC", &i__1);
	return 0;
    }

/*     Quick return if possible. */

    if (*n == 0) {
	return 0;
    }

/*     Set the constants to control overflow. */

    unfl = slamch_("Safe minimum");
    ovfl = 1.f / unfl;
    slabad_(&unfl, &ovfl);
    ulp = slamch_("Precision");
    smlnum = unfl * (*n / ulp);
    bignum = (1.f - ulp) / smlnum;

/*     Compute 1-norm of each column of strictly upper triangular */
/*     part of T to control overflow in triangular solver. */

    work[1] = 0.f;
    i__1 = *n;
    for (j = 2; j <= i__1; ++j) {
	work[j] = 0.f;
	i__2 = j - 1;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    work[j] += (r__1 = t[i__ + j * t_dim1], dabs(r__1));
/* L20: */
	}
/* L30: */
    }

/*     Index IP is used to specify the real or complex eigenvalue: */
/*       IP = 0, real eigenvalue, */
/*            1, first of conjugate complex pair: (wr,wi) */
/*           -1, second of conjugate complex pair: (wr,wi) */

    n2 = *n << 1;

    if (rightv) {

/*        Compute right eigenvectors. */

	ip = 0;
	is = *m;
	for (ki = *n; ki >= 1; --ki) {

	    if (ip == 1) {
		goto L130;
	    }
	    if (ki == 1) {
		goto L40;
	    }
	    if (t[ki + (ki - 1) * t_dim1] == 0.f) {
		goto L40;
	    }
	    ip = -1;

L40:
	    if (somev) {
		if (ip == 0) {
		    if (! select[ki]) {
			goto L130;
		    }
		} else {
		    if (! select[ki - 1]) {
			goto L130;
		    }
		}
	    }

/*           Compute the KI-th eigenvalue (WR,WI). */

	    wr = t[ki + ki * t_dim1];
	    wi = 0.f;
	    if (ip != 0) {
		wi = sqrt((r__1 = t[ki + (ki - 1) * t_dim1], dabs(r__1))) * 
			sqrt((r__2 = t[ki - 1 + ki * t_dim1], dabs(r__2)));
	    }
/* Computing MAX */
	    r__1 = ulp * (dabs(wr) + dabs(wi));
	    smin = dmax(r__1,smlnum);

	    if (ip == 0) {

/*              Real right eigenvector */

		work[ki + *n] = 1.f;

/*              Form right-hand side */

		i__1 = ki - 1;
		for (k = 1; k <= i__1; ++k) {
		    work[k + *n] = -t[k + ki * t_dim1];
/* L50: */
		}

/*              Solve the upper quasi-triangular system: */
/*                 (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK. */

		jnxt = ki - 1;
		for (j = ki - 1; j >= 1; --j) {
		    if (j > jnxt) {
			goto L60;
		    }
		    j1 = j;
		    j2 = j;
		    jnxt = j - 1;
		    if (j > 1) {
			if (t[j + (j - 1) * t_dim1] != 0.f) {
			    j1 = j - 1;
			    jnxt = j - 2;
			}
		    }

		    if (j1 == j2) {

/*                    1-by-1 diagonal block */

			slaln2_(&c_false, &c__1, &c__1, &smin, &c_b22, &t[j + 
				j * t_dim1], ldt, &c_b22, &c_b22, &work[j + *
				n], n, &wr, &c_b25, x, &c__2, &scale, &xnorm, 
				&ierr);

/*                    Scale X(1,1) to avoid overflow when updating */
/*                    the right-hand side. */

			if (xnorm > 1.f) {
			    if (work[j] > bignum / xnorm) {
				x[0] /= xnorm;
				scale /= xnorm;
			    }
			}

/*                    Scale if necessary */

			if (scale != 1.f) {
			    sscal_(&ki, &scale, &work[*n + 1], &c__1);
			}
			work[j + *n] = x[0];

/*                    Update right-hand side */

			i__1 = j - 1;
			r__1 = -x[0];
			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
				*n + 1], &c__1);

		    } else {

/*                    2-by-2 diagonal block */

			slaln2_(&c_false, &c__2, &c__1, &smin, &c_b22, &t[j - 
				1 + (j - 1) * t_dim1], ldt, &c_b22, &c_b22, &
				work[j - 1 + *n], n, &wr, &c_b25, x, &c__2, &
				scale, &xnorm, &ierr);

/*                    Scale X(1,1) and X(2,1) to avoid overflow when */
/*                    updating the right-hand side. */

			if (xnorm > 1.f) {
/* Computing MAX */
			    r__1 = work[j - 1], r__2 = work[j];
			    beta = dmax(r__1,r__2);
			    if (beta > bignum / xnorm) {
				x[0] /= xnorm;
				x[1] /= xnorm;
				scale /= xnorm;
			    }
			}

/*                    Scale if necessary */

			if (scale != 1.f) {
			    sscal_(&ki, &scale, &work[*n + 1], &c__1);
			}
			work[j - 1 + *n] = x[0];
			work[j + *n] = x[1];

/*                    Update right-hand side */

			i__1 = j - 2;
			r__1 = -x[0];
			saxpy_(&i__1, &r__1, &t[(j - 1) * t_dim1 + 1], &c__1, 
				&work[*n + 1], &c__1);
			i__1 = j - 2;
			r__1 = -x[1];
			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
				*n + 1], &c__1);
		    }
L60:
		    ;
		}

/*              Copy the vector x or Q*x to VR and normalize. */

		if (! over) {
		    scopy_(&ki, &work[*n + 1], &c__1, &vr[is * vr_dim1 + 1], &
			    c__1);

		    ii = isamax_(&ki, &vr[is * vr_dim1 + 1], &c__1);
		    remax = 1.f / (r__1 = vr[ii + is * vr_dim1], dabs(r__1));
		    sscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);

		    i__1 = *n;
		    for (k = ki + 1; k <= i__1; ++k) {
			vr[k + is * vr_dim1] = 0.f;
/* L70: */
		    }
		} else {
		    if (ki > 1) {
			i__1 = ki - 1;
			sgemv_("N", n, &i__1, &c_b22, &vr[vr_offset], ldvr, &
				work[*n + 1], &c__1, &work[ki + *n], &vr[ki * 
				vr_dim1 + 1], &c__1);
		    }

		    ii = isamax_(n, &vr[ki * vr_dim1 + 1], &c__1);
		    remax = 1.f / (r__1 = vr[ii + ki * vr_dim1], dabs(r__1));
		    sscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
		}

	    } else {

/*              Complex right eigenvector. */

/*              Initial solve */
/*                [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0. */
/*                [ (T(KI,KI-1)   T(KI,KI)   )               ] */

		if ((r__1 = t[ki - 1 + ki * t_dim1], dabs(r__1)) >= (r__2 = t[
			ki + (ki - 1) * t_dim1], dabs(r__2))) {
		    work[ki - 1 + *n] = 1.f;
		    work[ki + n2] = wi / t[ki - 1 + ki * t_dim1];
		} else {
		    work[ki - 1 + *n] = -wi / t[ki + (ki - 1) * t_dim1];
		    work[ki + n2] = 1.f;
		}
		work[ki + *n] = 0.f;
		work[ki - 1 + n2] = 0.f;

/*              Form right-hand side */

		i__1 = ki - 2;
		for (k = 1; k <= i__1; ++k) {
		    work[k + *n] = -work[ki - 1 + *n] * t[k + (ki - 1) * 
			    t_dim1];
		    work[k + n2] = -work[ki + n2] * t[k + ki * t_dim1];
/* L80: */
		}

/*              Solve upper quasi-triangular system: */
/*              (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2) */

		jnxt = ki - 2;
		for (j = ki - 2; j >= 1; --j) {
		    if (j > jnxt) {
			goto L90;
		    }
		    j1 = j;
		    j2 = j;
		    jnxt = j - 1;
		    if (j > 1) {
			if (t[j + (j - 1) * t_dim1] != 0.f) {
			    j1 = j - 1;
			    jnxt = j - 2;
			}
		    }

		    if (j1 == j2) {

/*                    1-by-1 diagonal block */

			slaln2_(&c_false, &c__1, &c__2, &smin, &c_b22, &t[j + 
				j * t_dim1], ldt, &c_b22, &c_b22, &work[j + *
				n], n, &wr, &wi, x, &c__2, &scale, &xnorm, &
				ierr);

/*                    Scale X(1,1) and X(1,2) to avoid overflow when */
/*                    updating the right-hand side. */

			if (xnorm > 1.f) {
			    if (work[j] > bignum / xnorm) {
				x[0] /= xnorm;
				x[2] /= xnorm;
				scale /= xnorm;
			    }
			}

/*                    Scale if necessary */

			if (scale != 1.f) {
			    sscal_(&ki, &scale, &work[*n + 1], &c__1);
			    sscal_(&ki, &scale, &work[n2 + 1], &c__1);
			}
			work[j + *n] = x[0];
			work[j + n2] = x[2];

/*                    Update the right-hand side */

			i__1 = j - 1;
			r__1 = -x[0];
			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
				*n + 1], &c__1);
			i__1 = j - 1;
			r__1 = -x[2];
			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
				n2 + 1], &c__1);

		    } else {

/*                    2-by-2 diagonal block */

			slaln2_(&c_false, &c__2, &c__2, &smin, &c_b22, &t[j - 
				1 + (j - 1) * t_dim1], ldt, &c_b22, &c_b22, &
				work[j - 1 + *n], n, &wr, &wi, x, &c__2, &
				scale, &xnorm, &ierr);

/*                    Scale X to avoid overflow when updating */
/*                    the right-hand side. */

			if (xnorm > 1.f) {
/* Computing MAX */
			    r__1 = work[j - 1], r__2 = work[j];
			    beta = dmax(r__1,r__2);
			    if (beta > bignum / xnorm) {
				rec = 1.f / xnorm;
				x[0] *= rec;
				x[2] *= rec;
				x[1] *= rec;
				x[3] *= rec;
				scale *= rec;
			    }
			}

/*                    Scale if necessary */

			if (scale != 1.f) {
			    sscal_(&ki, &scale, &work[*n + 1], &c__1);
			    sscal_(&ki, &scale, &work[n2 + 1], &c__1);
			}
			work[j - 1 + *n] = x[0];
			work[j + *n] = x[1];
			work[j - 1 + n2] = x[2];
			work[j + n2] = x[3];

/*                    Update the right-hand side */

			i__1 = j - 2;
			r__1 = -x[0];
			saxpy_(&i__1, &r__1, &t[(j - 1) * t_dim1 + 1], &c__1, 
				&work[*n + 1], &c__1);
			i__1 = j - 2;
			r__1 = -x[1];
			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
				*n + 1], &c__1);
			i__1 = j - 2;
			r__1 = -x[2];
			saxpy_(&i__1, &r__1, &t[(j - 1) * t_dim1 + 1], &c__1, 
				&work[n2 + 1], &c__1);
			i__1 = j - 2;
			r__1 = -x[3];
			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
				n2 + 1], &c__1);
		    }
L90:
		    ;
		}

/*              Copy the vector x or Q*x to VR and normalize. */

		if (! over) {
		    scopy_(&ki, &work[*n + 1], &c__1, &vr[(is - 1) * vr_dim1 
			    + 1], &c__1);
		    scopy_(&ki, &work[n2 + 1], &c__1, &vr[is * vr_dim1 + 1], &
			    c__1);

		    emax = 0.f;
		    i__1 = ki;
		    for (k = 1; k <= i__1; ++k) {
/* Computing MAX */
			r__3 = emax, r__4 = (r__1 = vr[k + (is - 1) * vr_dim1]
				, dabs(r__1)) + (r__2 = vr[k + is * vr_dim1], 
				dabs(r__2));
			emax = dmax(r__3,r__4);
/* L100: */
		    }

		    remax = 1.f / emax;
		    sscal_(&ki, &remax, &vr[(is - 1) * vr_dim1 + 1], &c__1);
		    sscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);

		    i__1 = *n;
		    for (k = ki + 1; k <= i__1; ++k) {
			vr[k + (is - 1) * vr_dim1] = 0.f;
			vr[k + is * vr_dim1] = 0.f;
/* L110: */
		    }

		} else {

		    if (ki > 2) {
			i__1 = ki - 2;
			sgemv_("N", n, &i__1, &c_b22, &vr[vr_offset], ldvr, &
				work[*n + 1], &c__1, &work[ki - 1 + *n], &vr[(
				ki - 1) * vr_dim1 + 1], &c__1);
			i__1 = ki - 2;
			sgemv_("N", n, &i__1, &c_b22, &vr[vr_offset], ldvr, &
				work[n2 + 1], &c__1, &work[ki + n2], &vr[ki * 
				vr_dim1 + 1], &c__1);
		    } else {
			sscal_(n, &work[ki - 1 + *n], &vr[(ki - 1) * vr_dim1 
				+ 1], &c__1);
			sscal_(n, &work[ki + n2], &vr[ki * vr_dim1 + 1], &
				c__1);
		    }

		    emax = 0.f;
		    i__1 = *n;
		    for (k = 1; k <= i__1; ++k) {
/* Computing MAX */
			r__3 = emax, r__4 = (r__1 = vr[k + (ki - 1) * vr_dim1]
				, dabs(r__1)) + (r__2 = vr[k + ki * vr_dim1], 
				dabs(r__2));
			emax = dmax(r__3,r__4);
/* L120: */
		    }
		    remax = 1.f / emax;
		    sscal_(n, &remax, &vr[(ki - 1) * vr_dim1 + 1], &c__1);
		    sscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
		}
	    }

	    --is;
	    if (ip != 0) {
		--is;
	    }
L130:
	    if (ip == 1) {
		ip = 0;
	    }
	    if (ip == -1) {
		ip = 1;
	    }
/* L140: */
	}
    }

    if (leftv) {

/*        Compute left eigenvectors. */

	ip = 0;
	is = 1;
	i__1 = *n;
	for (ki = 1; ki <= i__1; ++ki) {

	    if (ip == -1) {
		goto L250;
	    }
	    if (ki == *n) {
		goto L150;
	    }
	    if (t[ki + 1 + ki * t_dim1] == 0.f) {
		goto L150;
	    }
	    ip = 1;

L150:
	    if (somev) {
		if (! select[ki]) {
		    goto L250;
		}
	    }

/*           Compute the KI-th eigenvalue (WR,WI). */

	    wr = t[ki + ki * t_dim1];
	    wi = 0.f;
	    if (ip != 0) {
		wi = sqrt((r__1 = t[ki + (ki + 1) * t_dim1], dabs(r__1))) * 
			sqrt((r__2 = t[ki + 1 + ki * t_dim1], dabs(r__2)));
	    }
/* Computing MAX */
	    r__1 = ulp * (dabs(wr) + dabs(wi));
	    smin = dmax(r__1,smlnum);

	    if (ip == 0) {

/*              Real left eigenvector. */

		work[ki + *n] = 1.f;

/*              Form right-hand side */

		i__2 = *n;
		for (k = ki + 1; k <= i__2; ++k) {
		    work[k + *n] = -t[ki + k * t_dim1];
/* L160: */
		}

/*              Solve the quasi-triangular system: */
/*                 (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK */

		vmax = 1.f;
		vcrit = bignum;

		jnxt = ki + 1;
		i__2 = *n;
		for (j = ki + 1; j <= i__2; ++j) {
		    if (j < jnxt) {
			goto L170;
		    }
		    j1 = j;
		    j2 = j;
		    jnxt = j + 1;
		    if (j < *n) {
			if (t[j + 1 + j * t_dim1] != 0.f) {
			    j2 = j + 1;
			    jnxt = j + 2;
			}
		    }

		    if (j1 == j2) {

/*                    1-by-1 diagonal block */

/*                    Scale if necessary to avoid overflow when forming */
/*                    the right-hand side. */

			if (work[j] > vcrit) {
			    rec = 1.f / vmax;
			    i__3 = *n - ki + 1;
			    sscal_(&i__3, &rec, &work[ki + *n], &c__1);
			    vmax = 1.f;
			    vcrit = bignum;
			}

			i__3 = j - ki - 1;
			work[j + *n] -= sdot_(&i__3, &t[ki + 1 + j * t_dim1], 
				&c__1, &work[ki + 1 + *n], &c__1);

/*                    Solve (T(J,J)-WR)'*X = WORK */

			slaln2_(&c_false, &c__1, &c__1, &smin, &c_b22, &t[j + 
				j * t_dim1], ldt, &c_b22, &c_b22, &work[j + *
				n], n, &wr, &c_b25, x, &c__2, &scale, &xnorm, 
				&ierr);

/*                    Scale if necessary */

			if (scale != 1.f) {
			    i__3 = *n - ki + 1;
			    sscal_(&i__3, &scale, &work[ki + *n], &c__1);
			}
			work[j + *n] = x[0];
/* Computing MAX */
			r__2 = (r__1 = work[j + *n], dabs(r__1));
			vmax = dmax(r__2,vmax);
			vcrit = bignum / vmax;

		    } else {

/*                    2-by-2 diagonal block */

/*                    Scale if necessary to avoid overflow when forming */
/*                    the right-hand side. */

/* Computing MAX */
			r__1 = work[j], r__2 = work[j + 1];
			beta = dmax(r__1,r__2);
			if (beta > vcrit) {
			    rec = 1.f / vmax;
			    i__3 = *n - ki + 1;
			    sscal_(&i__3, &rec, &work[ki + *n], &c__1);
			    vmax = 1.f;
			    vcrit = bignum;
			}

			i__3 = j - ki - 1;
			work[j + *n] -= sdot_(&i__3, &t[ki + 1 + j * t_dim1], 
				&c__1, &work[ki + 1 + *n], &c__1);

			i__3 = j - ki - 1;
			work[j + 1 + *n] -= sdot_(&i__3, &t[ki + 1 + (j + 1) *
				 t_dim1], &c__1, &work[ki + 1 + *n], &c__1);

/*                    Solve */
/*                      [T(J,J)-WR   T(J,J+1)     ]'* X = SCALE*( WORK1 ) */
/*                      [T(J+1,J)    T(J+1,J+1)-WR]             ( WORK2 ) */

			slaln2_(&c_true, &c__2, &c__1, &smin, &c_b22, &t[j + 
				j * t_dim1], ldt, &c_b22, &c_b22, &work[j + *
				n], n, &wr, &c_b25, x, &c__2, &scale, &xnorm, 
				&ierr);

/*                    Scale if necessary */

			if (scale != 1.f) {
			    i__3 = *n - ki + 1;
			    sscal_(&i__3, &scale, &work[ki + *n], &c__1);
			}
			work[j + *n] = x[0];
			work[j + 1 + *n] = x[1];

/* Computing MAX */
			r__3 = (r__1 = work[j + *n], dabs(r__1)), r__4 = (
				r__2 = work[j + 1 + *n], dabs(r__2)), r__3 = 
				max(r__3,r__4);
			vmax = dmax(r__3,vmax);
			vcrit = bignum / vmax;

		    }
L170:
		    ;
		}

/*              Copy the vector x or Q*x to VL and normalize. */

		if (! over) {
		    i__2 = *n - ki + 1;
		    scopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is * 
			    vl_dim1], &c__1);

		    i__2 = *n - ki + 1;
		    ii = isamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - 
			    1;
		    remax = 1.f / (r__1 = vl[ii + is * vl_dim1], dabs(r__1));
		    i__2 = *n - ki + 1;
		    sscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);

		    i__2 = ki - 1;
		    for (k = 1; k <= i__2; ++k) {
			vl[k + is * vl_dim1] = 0.f;
/* L180: */
		    }

		} else {

		    if (ki < *n) {
			i__2 = *n - ki;
			sgemv_("N", n, &i__2, &c_b22, &vl[(ki + 1) * vl_dim1 
				+ 1], ldvl, &work[ki + 1 + *n], &c__1, &work[
				ki + *n], &vl[ki * vl_dim1 + 1], &c__1);
		    }

		    ii = isamax_(n, &vl[ki * vl_dim1 + 1], &c__1);
		    remax = 1.f / (r__1 = vl[ii + ki * vl_dim1], dabs(r__1));
		    sscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);

		}

	    } else {

/*              Complex left eigenvector. */

/*               Initial solve: */
/*                 ((T(KI,KI)    T(KI,KI+1) )' - (WR - I* WI))*X = 0. */
/*                 ((T(KI+1,KI) T(KI+1,KI+1))                ) */

		if ((r__1 = t[ki + (ki + 1) * t_dim1], dabs(r__1)) >= (r__2 = 
			t[ki + 1 + ki * t_dim1], dabs(r__2))) {
		    work[ki + *n] = wi / t[ki + (ki + 1) * t_dim1];
		    work[ki + 1 + n2] = 1.f;
		} else {
		    work[ki + *n] = 1.f;
		    work[ki + 1 + n2] = -wi / t[ki + 1 + ki * t_dim1];
		}
		work[ki + 1 + *n] = 0.f;
		work[ki + n2] = 0.f;

/*              Form right-hand side */

		i__2 = *n;
		for (k = ki + 2; k <= i__2; ++k) {
		    work[k + *n] = -work[ki + *n] * t[ki + k * t_dim1];
		    work[k + n2] = -work[ki + 1 + n2] * t[ki + 1 + k * t_dim1]
			    ;
/* L190: */
		}

/*              Solve complex quasi-triangular system: */
/*              ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2 */

		vmax = 1.f;
		vcrit = bignum;

		jnxt = ki + 2;
		i__2 = *n;
		for (j = ki + 2; j <= i__2; ++j) {
		    if (j < jnxt) {
			goto L200;
		    }
		    j1 = j;
		    j2 = j;
		    jnxt = j + 1;
		    if (j < *n) {
			if (t[j + 1 + j * t_dim1] != 0.f) {
			    j2 = j + 1;
			    jnxt = j + 2;
			}
		    }

		    if (j1 == j2) {

/*                    1-by-1 diagonal block */

/*                    Scale if necessary to avoid overflow when */
/*                    forming the right-hand side elements. */

			if (work[j] > vcrit) {
			    rec = 1.f / vmax;
			    i__3 = *n - ki + 1;
			    sscal_(&i__3, &rec, &work[ki + *n], &c__1);
			    i__3 = *n - ki + 1;
			    sscal_(&i__3, &rec, &work[ki + n2], &c__1);
			    vmax = 1.f;
			    vcrit = bignum;
			}

			i__3 = j - ki - 2;
			work[j + *n] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1], 
				&c__1, &work[ki + 2 + *n], &c__1);
			i__3 = j - ki - 2;
			work[j + n2] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1], 
				&c__1, &work[ki + 2 + n2], &c__1);

/*                    Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 */

			r__1 = -wi;
			slaln2_(&c_false, &c__1, &c__2, &smin, &c_b22, &t[j + 
				j * t_dim1], ldt, &c_b22, &c_b22, &work[j + *
				n], n, &wr, &r__1, x, &c__2, &scale, &xnorm, &
				ierr);

/*                    Scale if necessary */

			if (scale != 1.f) {
			    i__3 = *n - ki + 1;
			    sscal_(&i__3, &scale, &work[ki + *n], &c__1);
			    i__3 = *n - ki + 1;
			    sscal_(&i__3, &scale, &work[ki + n2], &c__1);
			}
			work[j + *n] = x[0];
			work[j + n2] = x[2];
/* Computing MAX */
			r__3 = (r__1 = work[j + *n], dabs(r__1)), r__4 = (
				r__2 = work[j + n2], dabs(r__2)), r__3 = max(
				r__3,r__4);
			vmax = dmax(r__3,vmax);
			vcrit = bignum / vmax;

		    } else {

/*                    2-by-2 diagonal block */

/*                    Scale if necessary to avoid overflow when forming */
/*                    the right-hand side elements. */

/* Computing MAX */
			r__1 = work[j], r__2 = work[j + 1];
			beta = dmax(r__1,r__2);
			if (beta > vcrit) {
			    rec = 1.f / vmax;
			    i__3 = *n - ki + 1;
			    sscal_(&i__3, &rec, &work[ki + *n], &c__1);
			    i__3 = *n - ki + 1;
			    sscal_(&i__3, &rec, &work[ki + n2], &c__1);
			    vmax = 1.f;
			    vcrit = bignum;
			}

			i__3 = j - ki - 2;
			work[j + *n] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1], 
				&c__1, &work[ki + 2 + *n], &c__1);

			i__3 = j - ki - 2;
			work[j + n2] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1], 
				&c__1, &work[ki + 2 + n2], &c__1);

			i__3 = j - ki - 2;
			work[j + 1 + *n] -= sdot_(&i__3, &t[ki + 2 + (j + 1) *
				 t_dim1], &c__1, &work[ki + 2 + *n], &c__1);

			i__3 = j - ki - 2;
			work[j + 1 + n2] -= sdot_(&i__3, &t[ki + 2 + (j + 1) *
				 t_dim1], &c__1, &work[ki + 2 + n2], &c__1);

/*                    Solve 2-by-2 complex linear equation */
/*                      ([T(j,j)   T(j,j+1)  ]'-(wr-i*wi)*I)*X = SCALE*B */
/*                      ([T(j+1,j) T(j+1,j+1)]             ) */

			r__1 = -wi;
			slaln2_(&c_true, &c__2, &c__2, &smin, &c_b22, &t[j + 
				j * t_dim1], ldt, &c_b22, &c_b22, &work[j + *
				n], n, &wr, &r__1, x, &c__2, &scale, &xnorm, &
				ierr);

/*                    Scale if necessary */

			if (scale != 1.f) {
			    i__3 = *n - ki + 1;
			    sscal_(&i__3, &scale, &work[ki + *n], &c__1);
			    i__3 = *n - ki + 1;
			    sscal_(&i__3, &scale, &work[ki + n2], &c__1);
			}
			work[j + *n] = x[0];
			work[j + n2] = x[2];
			work[j + 1 + *n] = x[1];
			work[j + 1 + n2] = x[3];
/* Computing MAX */
			r__1 = dabs(x[0]), r__2 = dabs(x[2]), r__1 = max(r__1,
				r__2), r__2 = dabs(x[1]), r__1 = max(r__1,
				r__2), r__2 = dabs(x[3]), r__1 = max(r__1,
				r__2);
			vmax = dmax(r__1,vmax);
			vcrit = bignum / vmax;

		    }
L200:
		    ;
		}

/*              Copy the vector x or Q*x to VL and normalize. */

		if (! over) {
		    i__2 = *n - ki + 1;
		    scopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is * 
			    vl_dim1], &c__1);
		    i__2 = *n - ki + 1;
		    scopy_(&i__2, &work[ki + n2], &c__1, &vl[ki + (is + 1) * 
			    vl_dim1], &c__1);

		    emax = 0.f;
		    i__2 = *n;
		    for (k = ki; k <= i__2; ++k) {
/* Computing MAX */
			r__3 = emax, r__4 = (r__1 = vl[k + is * vl_dim1], 
				dabs(r__1)) + (r__2 = vl[k + (is + 1) * 
				vl_dim1], dabs(r__2));
			emax = dmax(r__3,r__4);
/* L220: */
		    }
		    remax = 1.f / emax;
		    i__2 = *n - ki + 1;
		    sscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
		    i__2 = *n - ki + 1;
		    sscal_(&i__2, &remax, &vl[ki + (is + 1) * vl_dim1], &c__1)
			    ;

		    i__2 = ki - 1;
		    for (k = 1; k <= i__2; ++k) {
			vl[k + is * vl_dim1] = 0.f;
			vl[k + (is + 1) * vl_dim1] = 0.f;
/* L230: */
		    }
		} else {
		    if (ki < *n - 1) {
			i__2 = *n - ki - 1;
			sgemv_("N", n, &i__2, &c_b22, &vl[(ki + 2) * vl_dim1 
				+ 1], ldvl, &work[ki + 2 + *n], &c__1, &work[
				ki + *n], &vl[ki * vl_dim1 + 1], &c__1);
			i__2 = *n - ki - 1;
			sgemv_("N", n, &i__2, &c_b22, &vl[(ki + 2) * vl_dim1 
				+ 1], ldvl, &work[ki + 2 + n2], &c__1, &work[
				ki + 1 + n2], &vl[(ki + 1) * vl_dim1 + 1], &
				c__1);
		    } else {
			sscal_(n, &work[ki + *n], &vl[ki * vl_dim1 + 1], &
				c__1);
			sscal_(n, &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1 
				+ 1], &c__1);
		    }

		    emax = 0.f;
		    i__2 = *n;
		    for (k = 1; k <= i__2; ++k) {
/* Computing MAX */
			r__3 = emax, r__4 = (r__1 = vl[k + ki * vl_dim1], 
				dabs(r__1)) + (r__2 = vl[k + (ki + 1) * 
				vl_dim1], dabs(r__2));
			emax = dmax(r__3,r__4);
/* L240: */
		    }
		    remax = 1.f / emax;
		    sscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
		    sscal_(n, &remax, &vl[(ki + 1) * vl_dim1 + 1], &c__1);

		}

	    }

	    ++is;
	    if (ip != 0) {
		++is;
	    }
L250:
	    if (ip == -1) {
		ip = 0;
	    }
	    if (ip == 1) {
		ip = -1;
	    }

/* L260: */
	}

    }

    return 0;

/*     End of STREVC */

} /* strevc_ */

Exemplo n.º 2

0

Exibir arquivo

Arquivo: psnaitr.c Projeto: openbigdatagroup/pspectralclustering

/* Subroutine */ int psnaitr_(integer *comm, integer *ido, char *bmat,
                              integer *n, integer *k, integer *np, integer *nb, real *resid, real *
                              rnorm, real *v, integer *ldv, real *h__, integer *ldh, integer *ipntr,
                              real *workd, real *workl, integer *info, ftnlen bmat_len)
{
    /* Initialized data */

    static logical first = TRUE_;

    /* System generated locals */
    integer h_dim1, h_offset, v_dim1, v_offset, i__1, i__2;
    real r__1, r__2;

    /* Builtin functions */
    double sqrt(doublereal);

    /* Local variables */
    static integer i__, j;
    static real t0, t1, t2, t3, t4, t5, rnorm_buf__;
    static integer jj, ipj, irj, ivj;
    static real ulp, tst1;
    static integer ierr, iter;
    static real unfl, ovfl;
    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
    static integer itry;
    static real temp1;
    static logical orth1, orth2, step3, step4;
    static real betaj;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static integer infol;
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
                                       real *, integer *, real *, integer *, real *, real *, integer *,
                                       ftnlen);
    static real xtemp[2], wnorm;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
                                       integer *), saxpy_(integer *, real *, real *, integer *, real *,
                                               integer *), mpi_allreduce__(real *, real *, integer *, integer *,
                                                       integer *, integer *, integer *);
    static real rnorm1;
    extern /* Subroutine */ int slabad_(real *, real *);
    static logical rstart;
    static integer msglvl;
    static real smlnum;
    extern /* Subroutine */ int psmout_(integer *, integer *, integer *,
                                        integer *, real *, integer *, integer *, char *, ftnlen), pivout_(
                                            integer *, integer *, integer *, integer *, integer *, char *,
                                            ftnlen), second_(real *);
    extern doublereal slanhs_(char *, integer *, real *, integer *, real *,
                              ftnlen);
    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
                                        real *, integer *, integer *, real *, integer *, integer *,
                                        ftnlen), psvout_(integer *, integer *, integer *, real *, integer
                                                *, char *, ftnlen), psgetv0_(integer *, integer *, char *,
                                                        integer *, logical *, integer *, integer *, real *, integer *,
                                                        real *, real *, integer *, real *, real *, integer *, ftnlen);
    extern doublereal psnorm2_(integer *, integer *, real *, integer *),
           pslamch_(integer *, char *, ftnlen);



    /*     %---------------% */
    /*     | MPI Variables | */
    /*     %---------------% */

    /* /+ */
    /* * */
    /* *  (C) 1993 by Argonne National Laboratory and Mississipi State University. */
    /* *      All rights reserved.  See COPYRIGHT in top-level directory. */
    /* +/ */

    /* /+ user include file for MPI programs, with no dependencies +/ */

    /* /+ return codes +/ */







    /*     We handle datatypes by putting the variables that hold them into */
    /*     common.  This way, a Fortran program can directly use the various */
    /*     datatypes and can even give them to C programs. */

    /*     MPI_BOTTOM needs to be a known address; here we put it at the */
    /*     beginning of the common block.  The point-to-point and collective */
    /*     routines know about MPI_BOTTOM, but MPI_TYPE_STRUCT as yet does not. */

    /*     The types MPI_INTEGER1,2,4 and MPI_REAL4,8 are OPTIONAL. */
    /*     Their values are zero if they are not available.  Note that */
    /*     using these reduces the portability of code (though may enhance */
    /*     portability between Crays and other systems) */



    /*     All other MPI routines are subroutines */

    /*     The attribute copy/delete functions are symbols that can be passed */
    /*     to MPI routines */

    /*     %----------------------------------------------------% */
    /*     | Include files for debugging and timing information | */
    /*     %----------------------------------------------------% */


    /* \SCCS Information: @(#) */
    /* FILE: debug.h   SID: 2.3   DATE OF SID: 11/16/95   RELEASE: 2 */

    /*     %---------------------------------% */
    /*     | See debug.doc for documentation | */
    /*     %---------------------------------% */

    /*     %------------------% */
    /*     | Scalar Arguments | */
    /*     %------------------% */

    /*     %--------------------------------% */
    /*     | See stat.doc for documentation | */
    /*     %--------------------------------% */

    /* \SCCS Information: @(#) */
    /* FILE: stat.h   SID: 2.2   DATE OF SID: 11/16/95   RELEASE: 2 */



    /*     %-----------------% */
    /*     | Array Arguments | */
    /*     %-----------------% */


    /*     %------------% */
    /*     | Parameters | */
    /*     %------------% */


    /*     %---------------% */
    /*     | Local Scalars | */
    /*     %---------------% */




    /*     %-----------------------% */
    /*     | Local Array Arguments | */
    /*     %-----------------------% */


    /*     %----------------------% */
    /*     | External Subroutines | */
    /*     %----------------------% */


    /*     %--------------------% */
    /*     | External Functions | */
    /*     %--------------------% */


    /*     %---------------------% */
    /*     | Intrinsic Functions | */
    /*     %---------------------% */


    /*     %-----------------% */
    /*     | Data statements | */
    /*     %-----------------% */

    /* Parameter adjustments */
    --workd;
    --resid;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1;
    v -= v_offset;
    --workl;
    h_dim1 = *ldh;
    h_offset = 1 + h_dim1;
    h__ -= h_offset;
    --ipntr;

    /* Function Body */

    /*     %-----------------------% */
    /*     | Executable Statements | */
    /*     %-----------------------% */

    if (first) {

        /*        %-----------------------------------------% */
        /*        | Set machine-dependent constants for the | */
        /*        | the splitting and deflation criterion.  | */
        /*        | If norm(H) <= sqrt(OVFL),               | */
        /*        | overflow should not occur.              | */
        /*        | REFERENCE: LAPACK subroutine slahqr     | */
        /*        %-----------------------------------------% */

        unfl = pslamch_(comm, "safe minimum", (ftnlen)12);
        ovfl = 1.f / unfl;
        slabad_(&unfl, &ovfl);
        ulp = pslamch_(comm, "precision", (ftnlen)9);
        smlnum = unfl * (*n / ulp);
        first = FALSE_;
    }

    if (*ido == 0) {

        /*        %-------------------------------% */
        /*        | Initialize timing statistics  | */
        /*        | & message level for debugging | */
        /*        %-------------------------------% */

        second_(&t0);
        msglvl = debug_1.mnaitr;

        /*        %------------------------------% */
        /*        | Initial call to this routine | */
        /*        %------------------------------% */

        *info = 0;
        step3 = FALSE_;
        step4 = FALSE_;
        rstart = FALSE_;
        orth1 = FALSE_;
        orth2 = FALSE_;
        j = *k + 1;
        ipj = 1;
        irj = ipj + *n;
        ivj = irj + *n;
    }

    /*     %-------------------------------------------------% */
    /*     | When in reverse communication mode one of:      | */
    /*     | STEP3, STEP4, ORTH1, ORTH2, RSTART              | */
    /*     | will be .true. when ....                        | */
    /*     | STEP3: return from computing OP*v_{j}.          | */
    /*     | STEP4: return from computing B-norm of OP*v_{j} | */
    /*     | ORTH1: return from computing B-norm of r_{j+1}  | */
    /*     | ORTH2: return from computing B-norm of          | */
    /*     |        correction to the residual vector.       | */
    /*     | RSTART: return from OP computations needed by   | */
    /*     |         psgetv0.                                | */
    /*     %-------------------------------------------------% */

    if (step3) {
        goto L50;
    }
    if (step4) {
        goto L60;
    }
    if (orth1) {
        goto L70;
    }
    if (orth2) {
        goto L90;
    }
    if (rstart) {
        goto L30;
    }

    /*     %-----------------------------% */
    /*     | Else this is the first step | */
    /*     %-----------------------------% */

    /*     %--------------------------------------------------------------% */
    /*     |                                                              | */
    /*     |        A R N O L D I     I T E R A T I O N     L O O P       | */
    /*     |                                                              | */
    /*     | Note:  B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) | */
    /*     %--------------------------------------------------------------% */
L1000:

    if (msglvl > 1) {
        pivout_(comm, &debug_1.logfil, &c__1, &j, &debug_1.ndigit, "_naitr: "
                "generating Arnoldi vector number", (ftnlen)40);
        psvout_(comm, &debug_1.logfil, &c__1, rnorm, &debug_1.ndigit, "_nait"
                "r: B-norm of the current residual is", (ftnlen)41);
    }

    /*        %---------------------------------------------------% */
    /*        | STEP 1: Check if the B norm of j-th residual      | */
    /*        | vector is zero. Equivalent to determing whether   | */
    /*        | an exact j-step Arnoldi factorization is present. | */
    /*        %---------------------------------------------------% */

    betaj = *rnorm;
    if (*rnorm > 0.f) {
        goto L40;
    }

    /*           %---------------------------------------------------% */
    /*           | Invariant subspace found, generate a new starting | */
    /*           | vector which is orthogonal to the current Arnoldi | */
    /*           | basis and continue the iteration.                 | */
    /*           %---------------------------------------------------% */

    if (msglvl > 0) {
        pivout_(comm, &debug_1.logfil, &c__1, &j, &debug_1.ndigit, "_naitr: "
                "****** RESTART AT STEP ******", (ftnlen)37);
    }

    /*           %---------------------------------------------% */
    /*           | ITRY is the loop variable that controls the | */
    /*           | maximum amount of times that a restart is   | */
    /*           | attempted. NRSTRT is used by stat.h         | */
    /*           %---------------------------------------------% */

    betaj = 0.f;
    ++timing_1.nrstrt;
    itry = 1;
L20:
    rstart = TRUE_;
    *ido = 0;
L30:

    /*           %--------------------------------------% */
    /*           | If in reverse communication mode and | */
    /*           | RSTART = .true. flow returns here.   | */
    /*           %--------------------------------------% */

    psgetv0_(comm, ido, bmat, &itry, &c_false, n, &j, &v[v_offset], ldv, &
             resid[1], rnorm, &ipntr[1], &workd[1], &workl[1], &ierr, (ftnlen)
             1);
    if (*ido != 99) {
        goto L9000;
    }
    if (ierr < 0) {
        ++itry;
        if (itry <= 3) {
            goto L20;
        }

        /*              %------------------------------------------------% */
        /*              | Give up after several restart attempts.        | */
        /*              | Set INFO to the size of the invariant subspace | */
        /*              | which spans OP and exit.                       | */
        /*              %------------------------------------------------% */

        *info = j - 1;
        second_(&t1);
        timing_1.tnaitr += t1 - t0;
        *ido = 99;
        goto L9000;
    }

L40:

    /*        %---------------------------------------------------------% */
    /*        | STEP 2:  v_{j} = r_{j-1}/rnorm and p_{j} = p_{j}/rnorm  | */
    /*        | Note that p_{j} = B*r_{j-1}. In order to avoid overflow | */
    /*        | when reciprocating a small RNORM, test against lower    | */
    /*        | machine bound.                                          | */
    /*        %---------------------------------------------------------% */

    scopy_(n, &resid[1], &c__1, &v[j * v_dim1 + 1], &c__1);
    if (*rnorm >= unfl) {
        temp1 = 1.f / *rnorm;
        sscal_(n, &temp1, &v[j * v_dim1 + 1], &c__1);
        sscal_(n, &temp1, &workd[ipj], &c__1);
    } else {

        /*            %-----------------------------------------% */
        /*            | To scale both v_{j} and p_{j} carefully | */
        /*            | use LAPACK routine SLASCL               | */
        /*            %-----------------------------------------% */

        slascl_("General", &i__, &i__, rnorm, &c_b25, n, &c__1, &v[j * v_dim1
                + 1], n, &infol, (ftnlen)7);
        slascl_("General", &i__, &i__, rnorm, &c_b25, n, &c__1, &workd[ipj],
                n, &infol, (ftnlen)7);
    }

    /*        %------------------------------------------------------% */
    /*        | STEP 3:  r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} | */
    /*        | Note that this is not quite yet r_{j}. See STEP 4    | */
    /*        %------------------------------------------------------% */

    step3 = TRUE_;
    ++timing_1.nopx;
    second_(&t2);
    scopy_(n, &v[j * v_dim1 + 1], &c__1, &workd[ivj], &c__1);
    ipntr[1] = ivj;
    ipntr[2] = irj;
    ipntr[3] = ipj;
    *ido = 1;

    /*        %-----------------------------------% */
    /*        | Exit in order to compute OP*v_{j} | */
    /*        %-----------------------------------% */

    goto L9000;
L50:

    /*        %----------------------------------% */
    /*        | Back from reverse communication; | */
    /*        | WORKD(IRJ:IRJ+N-1) := OP*v_{j}   | */
    /*        | if step3 = .true.                | */
    /*        %----------------------------------% */

    second_(&t3);
    timing_1.tmvopx += t3 - t2;
    step3 = FALSE_;

    /*        %------------------------------------------% */
    /*        | Put another copy of OP*v_{j} into RESID. | */
    /*        %------------------------------------------% */

    scopy_(n, &workd[irj], &c__1, &resid[1], &c__1);

    /*        %---------------------------------------% */
    /*        | STEP 4:  Finish extending the Arnoldi | */
    /*        |          factorization to length j.   | */
    /*        %---------------------------------------% */

    second_(&t2);
    if (*(unsigned char *)bmat == 'G') {
        ++timing_1.nbx;
        step4 = TRUE_;
        ipntr[1] = irj;
        ipntr[2] = ipj;
        *ido = 2;

        /*           %-------------------------------------% */
        /*           | Exit in order to compute B*OP*v_{j} | */
        /*           %-------------------------------------% */

        goto L9000;
    } else if (*(unsigned char *)bmat == 'I') {
        scopy_(n, &resid[1], &c__1, &workd[ipj], &c__1);
    }
L60:

    /*        %----------------------------------% */
    /*        | Back from reverse communication; | */
    /*        | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j} | */
    /*        | if step4 = .true.                | */
    /*        %----------------------------------% */

    if (*(unsigned char *)bmat == 'G') {
        second_(&t3);
        timing_1.tmvbx += t3 - t2;
    }

    step4 = FALSE_;

    /*        %-------------------------------------% */
    /*        | The following is needed for STEP 5. | */
    /*        | Compute the B-norm of OP*v_{j}.     | */
    /*        %-------------------------------------% */

    if (*(unsigned char *)bmat == 'G') {
        rnorm_buf__ = sdot_(n, &resid[1], &c__1, &workd[ipj], &c__1);
        mpi_allreduce__(&rnorm_buf__, &wnorm, &c__1, &mpipriv_1.mpi_real__, &
                        mpipriv_1.mpi_sum__, comm, &ierr);
        wnorm = sqrt((dabs(wnorm)));
    } else if (*(unsigned char *)bmat == 'I') {
        wnorm = psnorm2_(comm, n, &resid[1], &c__1);
    }

    /*        %-----------------------------------------% */
    /*        | Compute the j-th residual corresponding | */
    /*        | to the j step factorization.            | */
    /*        | Use Classical Gram Schmidt and compute: | */
    /*        | w_{j} <-  V_{j}^T * B * OP * v_{j}      | */
    /*        | r_{j} <-  OP*v_{j} - V_{j} * w_{j}      | */
    /*        %-----------------------------------------% */


    /*        %------------------------------------------% */
    /*        | Compute the j Fourier coefficients w_{j} | */
    /*        | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}.  | */
    /*        %------------------------------------------% */

    sgemv_("T", n, &j, &c_b25, &v[v_offset], ldv, &workd[ipj], &c__1, &c_b48,
           &workl[1], &c__1, (ftnlen)1);
    mpi_allreduce__(&workl[1], &h__[j * h_dim1 + 1], &j, &
                    mpipriv_1.mpi_real__, &mpipriv_1.mpi_sum__, comm, &ierr);

    /*        %--------------------------------------% */
    /*        | Orthogonalize r_{j} against V_{j}.   | */
    /*        | RESID contains OP*v_{j}. See STEP 3. | */
    /*        %--------------------------------------% */

    sgemv_("N", n, &j, &c_b51, &v[v_offset], ldv, &h__[j * h_dim1 + 1], &c__1,
           &c_b25, &resid[1], &c__1, (ftnlen)1);

    if (j > 1) {
        h__[j + (j - 1) * h_dim1] = betaj;
    }

    second_(&t4);

    orth1 = TRUE_;

    second_(&t2);
    if (*(unsigned char *)bmat == 'G') {
        ++timing_1.nbx;
        scopy_(n, &resid[1], &c__1, &workd[irj], &c__1);
        ipntr[1] = irj;
        ipntr[2] = ipj;
        *ido = 2;

        /*           %----------------------------------% */
        /*           | Exit in order to compute B*r_{j} | */
        /*           %----------------------------------% */

        goto L9000;
    } else if (*(unsigned char *)bmat == 'I') {
        scopy_(n, &resid[1], &c__1, &workd[ipj], &c__1);
    }
L70:

    /*        %---------------------------------------------------% */
    /*        | Back from reverse communication if ORTH1 = .true. | */
    /*        | WORKD(IPJ:IPJ+N-1) := B*r_{j}.                    | */
    /*        %---------------------------------------------------% */

    if (*(unsigned char *)bmat == 'G') {
        second_(&t3);
        timing_1.tmvbx += t3 - t2;
    }

    orth1 = FALSE_;

    /*        %------------------------------% */
    /*        | Compute the B-norm of r_{j}. | */
    /*        %------------------------------% */

    if (*(unsigned char *)bmat == 'G') {
        rnorm_buf__ = sdot_(n, &resid[1], &c__1, &workd[ipj], &c__1);
        mpi_allreduce__(&rnorm_buf__, rnorm, &c__1, &mpipriv_1.mpi_real__, &
                        mpipriv_1.mpi_sum__, comm, &ierr);
        *rnorm = sqrt((dabs(*rnorm)));
    } else if (*(unsigned char *)bmat == 'I') {
        *rnorm = psnorm2_(comm, n, &resid[1], &c__1);
    }

    /*        %-----------------------------------------------------------% */
    /*        | STEP 5: Re-orthogonalization / Iterative refinement phase | */
    /*        | Maximum NITER_ITREF tries.                                | */
    /*        |                                                           | */
    /*        |          s      = V_{j}^T * B * r_{j}                     | */
    /*        |          r_{j}  = r_{j} - V_{j}*s                         | */
    /*        |          alphaj = alphaj + s_{j}                          | */
    /*        |                                                           | */
    /*        | The stopping criteria used for iterative refinement is    | */
    /*        | discussed in Parlett's book SEP, page 107 and in Gragg &  | */
    /*        | Reichel ACM TOMS paper; Algorithm 686, Dec. 1990.         | */
    /*        | Determine if we need to correct the residual. The goal is | */
    /*        | to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} ||  | */
    /*        | The following test determines whether the sine of the     | */
    /*        | angle between  OP*x and the computed residual is less     | */
    /*        | than or equal to 0.717.                                   | */
    /*        %-----------------------------------------------------------% */

    if (*rnorm > wnorm * .717f) {
        goto L100;
    }
    iter = 0;
    ++timing_1.nrorth;

    /*        %---------------------------------------------------% */
    /*        | Enter the Iterative refinement phase. If further  | */
    /*        | refinement is necessary, loop back here. The loop | */
    /*        | variable is ITER. Perform a step of Classical     | */
    /*        | Gram-Schmidt using all the Arnoldi vectors V_{j}  | */
    /*        %---------------------------------------------------% */

L80:

    if (msglvl > 2) {
        xtemp[0] = wnorm;
        xtemp[1] = *rnorm;
        psvout_(comm, &debug_1.logfil, &c__2, xtemp, &debug_1.ndigit, "_nait"
                "r: re-orthonalization; wnorm and rnorm are", (ftnlen)47);
        psvout_(comm, &debug_1.logfil, &j, &h__[j * h_dim1 + 1], &
                debug_1.ndigit, "_naitr: j-th column of H", (ftnlen)24);
    }

    /*        %----------------------------------------------------% */
    /*        | Compute V_{j}^T * B * r_{j}.                       | */
    /*        | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). | */
    /*        %----------------------------------------------------% */

    sgemv_("T", n, &j, &c_b25, &v[v_offset], ldv, &workd[ipj], &c__1, &c_b48,
           &workl[j + 1], &c__1, (ftnlen)1);
    mpi_allreduce__(&workl[j + 1], &workl[1], &j, &mpipriv_1.mpi_real__, &
                    mpipriv_1.mpi_sum__, comm, &ierr);

    /*        %---------------------------------------------% */
    /*        | Compute the correction to the residual:     | */
    /*        | r_{j} = r_{j} - V_{j} * WORKD(IRJ:IRJ+J-1). | */
    /*        | The correction to H is v(:,1:J)*H(1:J,1:J)  | */
    /*        | + v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j.         | */
    /*        %---------------------------------------------% */

    sgemv_("N", n, &j, &c_b51, &v[v_offset], ldv, &workl[1], &c__1, &c_b25, &
           resid[1], &c__1, (ftnlen)1);
    saxpy_(&j, &c_b25, &workl[1], &c__1, &h__[j * h_dim1 + 1], &c__1);

    orth2 = TRUE_;
    second_(&t2);
    if (*(unsigned char *)bmat == 'G') {
        ++timing_1.nbx;
        scopy_(n, &resid[1], &c__1, &workd[irj], &c__1);
        ipntr[1] = irj;
        ipntr[2] = ipj;
        *ido = 2;

        /*           %-----------------------------------% */
        /*           | Exit in order to compute B*r_{j}. | */
        /*           | r_{j} is the corrected residual.  | */
        /*           %-----------------------------------% */

        goto L9000;
    } else if (*(unsigned char *)bmat == 'I') {
        scopy_(n, &resid[1], &c__1, &workd[ipj], &c__1);
    }
L90:

    /*        %---------------------------------------------------% */
    /*        | Back from reverse communication if ORTH2 = .true. | */
    /*        %---------------------------------------------------% */

    if (*(unsigned char *)bmat == 'G') {
        second_(&t3);
        timing_1.tmvbx += t3 - t2;
    }

    /*        %-----------------------------------------------------% */
    /*        | Compute the B-norm of the corrected residual r_{j}. | */
    /*        %-----------------------------------------------------% */

    if (*(unsigned char *)bmat == 'G') {
        rnorm_buf__ = sdot_(n, &resid[1], &c__1, &workd[ipj], &c__1);
        mpi_allreduce__(&rnorm_buf__, &rnorm1, &c__1, &mpipriv_1.mpi_real__, &
                        mpipriv_1.mpi_sum__, comm, &ierr);
        rnorm1 = sqrt((dabs(rnorm1)));
    } else if (*(unsigned char *)bmat == 'I') {
        rnorm1 = psnorm2_(comm, n, &resid[1], &c__1);
    }

    if (msglvl > 0 && iter > 0) {
        pivout_(comm, &debug_1.logfil, &c__1, &j, &debug_1.ndigit, "_naitr: "
                "Iterative refinement for Arnoldi residual", (ftnlen)49);
        if (msglvl > 2) {
            xtemp[0] = *rnorm;
            xtemp[1] = rnorm1;
            psvout_(comm, &debug_1.logfil, &c__2, xtemp, &debug_1.ndigit,
                    "_naitr: iterative refinement ; rnorm and rnorm1 are", (
                        ftnlen)51);
        }
    }

    /*        %-----------------------------------------% */
    /*        | Determine if we need to perform another | */
    /*        | step of re-orthogonalization.           | */
    /*        %-----------------------------------------% */

    if (rnorm1 > *rnorm * .717f) {

        /*           %---------------------------------------% */
        /*           | No need for further refinement.       | */
        /*           | The cosine of the angle between the   | */
        /*           | corrected residual vector and the old | */
        /*           | residual vector is greater than 0.717 | */
        /*           | In other words the corrected residual | */
        /*           | and the old residual vector share an  | */
        /*           | angle of less than arcCOS(0.717)      | */
        /*           %---------------------------------------% */

        *rnorm = rnorm1;

    } else {

        /*           %-------------------------------------------% */
        /*           | Another step of iterative refinement step | */
        /*           | is required. NITREF is used by stat.h     | */
        /*           %-------------------------------------------% */

        ++timing_1.nitref;
        *rnorm = rnorm1;
        ++iter;
        if (iter <= 1) {
            goto L80;
        }

        /*           %-------------------------------------------------% */
        /*           | Otherwise RESID is numerically in the span of V | */
        /*           %-------------------------------------------------% */

        i__1 = *n;
        for (jj = 1; jj <= i__1; ++jj) {
            resid[jj] = 0.f;
            /* L95: */
        }
        *rnorm = 0.f;
    }

    /*        %----------------------------------------------% */
    /*        | Branch here directly if iterative refinement | */
    /*        | wasn't necessary or after at most NITER_REF  | */
    /*        | steps of iterative refinement.               | */
    /*        %----------------------------------------------% */

L100:

    rstart = FALSE_;
    orth2 = FALSE_;

    second_(&t5);
    timing_1.titref += t5 - t4;

    /*        %------------------------------------% */
    /*        | STEP 6: Update  j = j+1;  Continue | */
    /*        %------------------------------------% */

    ++j;
    if (j > *k + *np) {
        second_(&t1);
        timing_1.tnaitr += t1 - t0;
        *ido = 99;
        i__1 = *k + *np - 1;
        for (i__ = max(1,*k); i__ <= i__1; ++i__) {

            /*              %--------------------------------------------% */
            /*              | Check for splitting and deflation.         | */
            /*              | Use a standard test as in the QR algorithm | */
            /*              | REFERENCE: LAPACK subroutine slahqr        | */
            /*              %--------------------------------------------% */

            tst1 = (r__1 = h__[i__ + i__ * h_dim1], dabs(r__1)) + (r__2 = h__[
                        i__ + 1 + (i__ + 1) * h_dim1], dabs(r__2));
            if (tst1 == 0.f) {
                i__2 = *k + *np;
                tst1 = slanhs_("1", &i__2, &h__[h_offset], ldh, &workd[*n + 1]
                               , (ftnlen)1);
            }
            /* Computing MAX */
            r__2 = ulp * tst1;
            if ((r__1 = h__[i__ + 1 + i__ * h_dim1], dabs(r__1)) <= dmax(r__2,
                    smlnum)) {
                h__[i__ + 1 + i__ * h_dim1] = 0.f;
            }
            /* L110: */
        }

        if (msglvl > 2) {
            i__1 = *k + *np;
            i__2 = *k + *np;
            psmout_(comm, &debug_1.logfil, &i__1, &i__2, &h__[h_offset], ldh,
                    &debug_1.ndigit, "_naitr: Final upper Hessenberg matrix "
                    "H of order K+NP", (ftnlen)53);
        }

        goto L9000;
    }

    /*        %--------------------------------------------------------% */
    /*        | Loop back to extend the factorization by another step. | */
    /*        %--------------------------------------------------------% */

    goto L1000;

    /*     %---------------------------------------------------------------% */
    /*     |                                                               | */
    /*     |  E N D     O F     M A I N     I T E R A T I O N     L O O P  | */
    /*     |                                                               | */
    /*     %---------------------------------------------------------------% */

L9000:
    return 0;

    /*     %----------------% */
    /*     | End of psnaitr | */
    /*     %----------------% */

} /* psnaitr_ */

Exemplo n.º 3

0

Exibir arquivo

Arquivo: slarf.c Projeto: juanjosegarciaripoll/cblapack

/* Subroutine */ int slarf_(char *side, integer *m, integer *n, real *v, 
	integer *incv, real *tau, real *c__, integer *ldc, real *work)
{
    /* System generated locals */
    integer c_dim1, c_offset;
    real r__1;

    /* Local variables */
    integer i__;
    logical applyleft;
    integer lastc;
    integer lastv;

/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     November 2006 */

/*  Purpose */
/*  ======= */

/*  SLARF applies a real elementary reflector H to a real m by n matrix */
/*  C, from either the left or the right. H is represented in the form */

/*        H = I - tau * v * v' */

/*  where tau is a real scalar and v is a real vector. */

/*  If tau = 0, then H is taken to be the unit matrix. */

/*  Arguments */
/*  ========= */

/*  SIDE    (input) CHARACTER*1 */
/*          = 'L': form  H * C */
/*          = 'R': form  C * H */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix C. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix C. */

/*  V       (input) REAL array, dimension */
/*                     (1 + (M-1)*abs(INCV)) if SIDE = 'L' */
/*                  or (1 + (N-1)*abs(INCV)) if SIDE = 'R' */
/*          The vector v in the representation of H. V is not used if */
/*          TAU = 0. */

/*  INCV    (input) INTEGER */
/*          The increment between elements of v. INCV <> 0. */

/*  TAU     (input) REAL */
/*          The value tau in the representation of H. */

/*  C       (input/output) REAL array, dimension (LDC,N) */
/*          On entry, the m by n matrix C. */
/*          On exit, C is overwritten by the matrix H * C if SIDE = 'L', */
/*          or C * H if SIDE = 'R'. */

/*  LDC     (input) INTEGER */
/*          The leading dimension of the array C. LDC >= max(1,M). */

/*  WORK    (workspace) REAL array, dimension */
/*                         (N) if SIDE = 'L' */
/*                      or (M) if SIDE = 'R' */

/*  ===================================================================== */

    /* Parameter adjustments */
    --v;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1;
    c__ -= c_offset;
    --work;

    /* Function Body */
    applyleft = lsame_(side, "L");
    lastv = 0;
    lastc = 0;
    if (*tau != 0.f) {
/*     Set up variables for scanning V.  LASTV begins pointing to the end */
/*     of V. */
	if (applyleft) {
	    lastv = *m;
	} else {
	    lastv = *n;
	}
	if (*incv > 0) {
	    i__ = (lastv - 1) * *incv + 1;
	} else {
	    i__ = 1;
	}
/*     Look for the last non-zero row in V. */
	while(lastv > 0 && v[i__] == 0.f) {
	    --lastv;
	    i__ -= *incv;
	}
	if (applyleft) {
/*     Scan for the last non-zero column in C(1:lastv,:). */
	    lastc = ilaslc_(&lastv, n, &c__[c_offset], ldc);
	} else {
/*     Scan for the last non-zero row in C(:,1:lastv). */
	    lastc = ilaslr_(m, &lastv, &c__[c_offset], ldc);
	}
    }
/*     Note that lastc.eq.0 renders the BLAS operations null; no special */
/*     case is needed at this level. */
    if (applyleft) {

/*        Form  H * C */

	if (lastv > 0) {

/*           w(1:lastc,1) := C(1:lastv,1:lastc)' * v(1:lastv,1) */

	    sgemv_("Transpose", &lastv, &lastc, &c_b4, &c__[c_offset], ldc, &
		    v[1], incv, &c_b5, &work[1], &c__1);

	    r__1 = -(*tau);
	    sger_(&lastv, &lastc, &r__1, &v[1], incv, &work[1], &c__1, &c__[
		    c_offset], ldc);
	}
    } else {

/*        Form  C * H */

	if (lastv > 0) {

/*           w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */

	    sgemv_("No transpose", &lastc, &lastv, &c_b4, &c__[c_offset], ldc, 
		     &v[1], incv, &c_b5, &work[1], &c__1);

	    r__1 = -(*tau);
	    sger_(&lastc, &lastv, &r__1, &work[1], &c__1, &v[1], incv, &c__[
		    c_offset], ldc);
	}
    }
    return 0;

/*     End of SLARF */

} /* slarf_ */

Exemplo n.º 4

0

Exibir arquivo

Arquivo: slauu2.c Projeto: dacap/loseface

/* Subroutine */ int slauu2_(char *uplo, integer *n, real *a, integer *lda, 
	integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;

    /* Local variables */
    integer i__;
    real aii;
    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 
	    sgemv_(char *, integer *, integer *, real *, real *, integer *, 
	    real *, integer *, real *, real *, integer *);
    logical upper;
    extern /* Subroutine */ int xerbla_(char *, integer *);


/*  -- LAPACK auxiliary routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLAUU2 computes the product U * U' or L' * L, where the triangular */
/*  factor U or L is stored in the upper or lower triangular part of */
/*  the array A. */

/*  If UPLO = 'U' or 'u' then the upper triangle of the result is stored, */
/*  overwriting the factor U in A. */
/*  If UPLO = 'L' or 'l' then the lower triangle of the result is stored, */
/*  overwriting the factor L in A. */

/*  This is the unblocked form of the algorithm, calling Level 2 BLAS. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          Specifies whether the triangular factor stored in the array A */
/*          is upper or lower triangular: */
/*          = 'U':  Upper triangular */
/*          = 'L':  Lower triangular */

/*  N       (input) INTEGER */
/*          The order of the triangular factor U or L.  N >= 0. */

/*  A       (input/output) REAL array, dimension (LDA,N) */
/*          On entry, the triangular factor U or L. */
/*          On exit, if UPLO = 'U', the upper triangle of A is */
/*          overwritten with the upper triangle of the product U * U'; */
/*          if UPLO = 'L', the lower triangle of A is overwritten with */
/*          the lower triangle of the product L' * L. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  INFO    (output) INTEGER */
/*          = 0: successful exit */
/*          < 0: if INFO = -k, the k-th argument had an illegal value */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLAUU2", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

    if (upper) {

/*        Compute the product U * U'. */

	i__1 = *n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    aii = a[i__ + i__ * a_dim1];
	    if (i__ < *n) {
		i__2 = *n - i__ + 1;
		a[i__ + i__ * a_dim1] = sdot_(&i__2, &a[i__ + i__ * a_dim1], 
			lda, &a[i__ + i__ * a_dim1], lda);
		i__2 = i__ - 1;
		i__3 = *n - i__;
		sgemv_("No transpose", &i__2, &i__3, &c_b7, &a[(i__ + 1) * 
			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
			aii, &a[i__ * a_dim1 + 1], &c__1);
	    } else {
		sscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1);
	    }
/* L10: */
	}

    } else {

/*        Compute the product L' * L. */

	i__1 = *n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    aii = a[i__ + i__ * a_dim1];
	    if (i__ < *n) {
		i__2 = *n - i__ + 1;
		a[i__ + i__ * a_dim1] = sdot_(&i__2, &a[i__ + i__ * a_dim1], &
			c__1, &a[i__ + i__ * a_dim1], &c__1);
		i__2 = *n - i__;
		i__3 = i__ - 1;
		sgemv_("Transpose", &i__2, &i__3, &c_b7, &a[i__ + 1 + a_dim1], 
			 lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &aii, &a[i__ 
			+ a_dim1], lda);
	    } else {
		sscal_(&i__, &aii, &a[i__ + a_dim1], lda);
	    }
/* L20: */
	}
    }

    return 0;

/*     End of SLAUU2 */

} /* slauu2_ */

Exemplo n.º 5

0

Exibir arquivo

Arquivo: sgelss.c Projeto: EugeneGalipchak/antelope_contrib

/* Subroutine */ int sgelss_(integer *m, integer *n, integer *nrhs, real *a, 
	integer *lda, real *b, integer *ldb, real *s, real *rcond, integer *
	rank, real *work, integer *lwork, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
    real r__1;

    /* Local variables */
    static real anrm, bnrm;
    static integer itau;
    static real vdum[1];
    static integer i__, iascl, ibscl, chunk;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    static real sfmin;
    static integer minmn, maxmn;
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
	    real *, integer *, real *, integer *, real *, real *, integer *);
    static integer itaup, itauq;
    extern /* Subroutine */ int srscl_(integer *, real *, real *, integer *);
    static integer mnthr, iwork;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *);
    static integer bl, ie, il;
    extern /* Subroutine */ int slabad_(real *, real *);
    static integer mm, bdspac;
    extern /* Subroutine */ int sgebrd_(integer *, integer *, real *, integer 
	    *, real *, real *, real *, real *, real *, integer *, integer *);
    extern doublereal slamch_(char *), slange_(char *, integer *, 
	    integer *, real *, integer *, real *);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    static real bignum;
    extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer 
	    *, real *, real *, integer *, integer *), slascl_(char *, integer 
	    *, integer *, real *, real *, integer *, integer *, real *, 
	    integer *, integer *), sgeqrf_(integer *, integer *, real 
	    *, integer *, real *, real *, integer *, integer *), slacpy_(char 
	    *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, 
	    real *, integer *), sbdsqr_(char *, integer *, integer *, 
	    integer *, integer *, real *, real *, real *, integer *, real *, 
	    integer *, real *, integer *, real *, integer *), sorgbr_(
	    char *, integer *, integer *, integer *, real *, integer *, real *
	    , real *, integer *, integer *);
    static integer ldwork;
    extern /* Subroutine */ int sormbr_(char *, char *, char *, integer *, 
	    integer *, integer *, real *, integer *, real *, real *, integer *
	    , real *, integer *, integer *);
    static integer minwrk, maxwrk;
    static real smlnum;
    extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *, 
	    integer *, real *, integer *, real *, real *, integer *, real *, 
	    integer *, integer *);
    static logical lquery;
    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
	    integer *, real *, integer *, real *, real *, integer *, real *, 
	    integer *, integer *);
    static real eps, thr;


#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]


/*  -- LAPACK driver routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       October 31, 1999   


    Purpose   
    =======   

    SGELSS computes the minimum norm solution to a real linear least   
    squares problem:   

    Minimize 2-norm(| b - A*x |).   

    using the singular value decomposition (SVD) of A. A is an M-by-N   
    matrix which may be rank-deficient.   

    Several right hand side vectors b and solution vectors x can be   
    handled in a single call; they are stored as the columns of the   
    M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix   
    X.   

    The effective rank of A is determined by treating as zero those   
    singular values which are less than RCOND times the largest singular   
    value.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of the matrix A. M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix A. N >= 0.   

    NRHS    (input) INTEGER   
            The number of right hand sides, i.e., the number of columns   
            of the matrices B and X. NRHS >= 0.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the M-by-N matrix A.   
            On exit, the first min(m,n) rows of A are overwritten with   
            its right singular vectors, stored rowwise.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,M).   

    B       (input/output) REAL array, dimension (LDB,NRHS)   
            On entry, the M-by-NRHS right hand side matrix B.   
            On exit, B is overwritten by the N-by-NRHS solution   
            matrix X.  If m >= n and RANK = n, the residual   
            sum-of-squares for the solution in the i-th column is given   
            by the sum of squares of elements n+1:m in that column.   

    LDB     (input) INTEGER   
            The leading dimension of the array B. LDB >= max(1,max(M,N)).   

    S       (output) REAL array, dimension (min(M,N))   
            The singular values of A in decreasing order.   
            The condition number of A in the 2-norm = S(1)/S(min(m,n)).   

    RCOND   (input) REAL   
            RCOND is used to determine the effective rank of A.   
            Singular values S(i) <= RCOND*S(1) are treated as zero.   
            If RCOND < 0, machine precision is used instead.   

    RANK    (output) INTEGER   
            The effective rank of A, i.e., the number of singular values   
            which are greater than RCOND*S(1).   

    WORK    (workspace/output) REAL array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The dimension of the array WORK. LWORK >= 1, and also:   
            LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS )   
            For good performance, LWORK should generally be larger.   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   
            > 0:  the algorithm for computing the SVD failed to converge;   
                  if INFO = i, i off-diagonal elements of an intermediate   
                  bidiagonal form did not converge to zero.   

    =====================================================================   


       Test the input arguments   

       Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --s;
    --work;

    /* Function Body */
    *info = 0;
    minmn = min(*m,*n);
    maxmn = max(*m,*n);
    mnthr = ilaenv_(&c__6, "SGELSS", " ", m, n, nrhs, &c_n1, (ftnlen)6, (
	    ftnlen)1);
    lquery = *lwork == -1;
    if (*m < 0) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*nrhs < 0) {
	*info = -3;
    } else if (*lda < max(1,*m)) {
	*info = -5;
    } else if (*ldb < max(1,maxmn)) {
	*info = -7;
    }

/*     Compute workspace   
        (Note: Comments in the code beginning "Workspace:" describe the   
         minimal amount of workspace needed at that point in the code,   
         as well as the preferred amount for good performance.   
         NB refers to the optimal block size for the immediately   
         following subroutine, as returned by ILAENV.) */

    minwrk = 1;
    if (*info == 0 && (*lwork >= 1 || lquery)) {
	maxwrk = 0;
	mm = *m;
	if (*m >= *n && *m >= mnthr) {

/*           Path 1a - overdetermined, with many more rows than columns */

	    mm = *n;
/* Computing MAX */
	    i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, 
		    n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
	    maxwrk = max(i__1,i__2);
/* Computing MAX */
	    i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, "SORMQR", "LT", 
		    m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2);
	    maxwrk = max(i__1,i__2);
	}
	if (*m >= *n) {

/*           Path 1 - overdetermined or exactly determined   

             Compute workspace needed for SBDSQR   

   Computing MAX */
	    i__1 = 1, i__2 = *n * 5;
	    bdspac = max(i__1,i__2);
/* Computing MAX */
	    i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, "SGEBRD"
		    , " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
	    maxwrk = max(i__1,i__2);
/* Computing MAX */
	    i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, "SORMBR", 
		    "QLT", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
	    maxwrk = max(i__1,i__2);
/* Computing MAX */
	    i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, "SORGBR",
		     "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
	    maxwrk = max(i__1,i__2);
	    maxwrk = max(maxwrk,bdspac);
/* Computing MAX */
	    i__1 = maxwrk, i__2 = *n * *nrhs;
	    maxwrk = max(i__1,i__2);
/* Computing MAX */
	    i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1,i__2);
	    minwrk = max(i__1,bdspac);
	    maxwrk = max(minwrk,maxwrk);
	}
	if (*n > *m) {

/*           Compute workspace needed for SBDSQR   

   Computing MAX */
	    i__1 = 1, i__2 = *m * 5;
	    bdspac = max(i__1,i__2);
/* Computing MAX */
	    i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *n, i__1 = max(i__1,i__2);
	    minwrk = max(i__1,bdspac);
	    if (*n >= mnthr) {

/*              Path 2a - underdetermined, with many more columns   
                than rows */

		maxwrk = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &c_n1, 
			&c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * 
			ilaenv_(&c__1, "SGEBRD", " ", m, m, &c_n1, &c_n1, (
			ftnlen)6, (ftnlen)1);
		maxwrk = max(i__1,i__2);
/* Computing MAX */
		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * ilaenv_(&
			c__1, "SORMBR", "QLT", m, nrhs, m, &c_n1, (ftnlen)6, (
			ftnlen)3);
		maxwrk = max(i__1,i__2);
/* Computing MAX */
		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * 
			ilaenv_(&c__1, "SORGBR", "P", m, m, m, &c_n1, (ftnlen)
			6, (ftnlen)1);
		maxwrk = max(i__1,i__2);
/* Computing MAX */
		i__1 = maxwrk, i__2 = *m * *m + *m + bdspac;
		maxwrk = max(i__1,i__2);
		if (*nrhs > 1) {
/* Computing MAX */
		    i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
		    maxwrk = max(i__1,i__2);
		} else {
/* Computing MAX */
		    i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
		    maxwrk = max(i__1,i__2);
		}
/* Computing MAX */
		i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, "SORMLQ", 
			"LT", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)2);
		maxwrk = max(i__1,i__2);
	    } else {

/*              Path 2 - underdetermined */

		maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, "SGEBRD", " ", m,
			 n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
		i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, "SORMBR"
			, "QLT", m, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)3);
		maxwrk = max(i__1,i__2);
/* Computing MAX */
		i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORGBR", 
			"P", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
		maxwrk = max(i__1,i__2);
		maxwrk = max(maxwrk,bdspac);
/* Computing MAX */
		i__1 = maxwrk, i__2 = *n * *nrhs;
		maxwrk = max(i__1,i__2);
	    }
	}
	maxwrk = max(minwrk,maxwrk);
	work[1] = (real) maxwrk;
    }

    minwrk = max(minwrk,1);
    if (*lwork < minwrk && ! lquery) {
	*info = -12;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SGELSS", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    if (*m == 0 || *n == 0) {
	*rank = 0;
	return 0;
    }

/*     Get machine parameters */

    eps = slamch_("P");
    sfmin = slamch_("S");
    smlnum = sfmin / eps;
    bignum = 1.f / smlnum;
    slabad_(&smlnum, &bignum);

/*     Scale A if max element outside range [SMLNUM,BIGNUM] */

    anrm = slange_("M", m, n, &a[a_offset], lda, &work[1]);
    iascl = 0;
    if (anrm > 0.f && anrm < smlnum) {

/*        Scale matrix norm up to SMLNUM */

	slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
		info);
	iascl = 1;
    } else if (anrm > bignum) {

/*        Scale matrix norm down to BIGNUM */

	slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
		info);
	iascl = 2;
    } else if (anrm == 0.f) {

/*        Matrix all zero. Return zero solution. */

	i__1 = max(*m,*n);
	slaset_("F", &i__1, nrhs, &c_b74, &c_b74, &b[b_offset], ldb);
	slaset_("F", &minmn, &c__1, &c_b74, &c_b74, &s[1], &c__1);
	*rank = 0;
	goto L70;
    }

/*     Scale B if max element outside range [SMLNUM,BIGNUM] */

    bnrm = slange_("M", m, nrhs, &b[b_offset], ldb, &work[1]);
    ibscl = 0;
    if (bnrm > 0.f && bnrm < smlnum) {

/*        Scale matrix norm up to SMLNUM */

	slascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
		 info);
	ibscl = 1;
    } else if (bnrm > bignum) {

/*        Scale matrix norm down to BIGNUM */

	slascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
		 info);
	ibscl = 2;
    }

/*     Overdetermined case */

    if (*m >= *n) {

/*        Path 1 - overdetermined or exactly determined */

	mm = *m;
	if (*m >= mnthr) {

/*           Path 1a - overdetermined, with many more rows than columns */

	    mm = *n;
	    itau = 1;
	    iwork = itau + *n;

/*           Compute A=Q*R   
             (Workspace: need 2*N, prefer N+N*NB) */

	    i__1 = *lwork - iwork + 1;
	    sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__1,
		     info);

/*           Multiply B by transpose(Q)   
             (Workspace: need N+NRHS, prefer N+NRHS*NB) */

	    i__1 = *lwork - iwork + 1;
	    sormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[
		    b_offset], ldb, &work[iwork], &i__1, info);

/*           Zero out below R */

	    if (*n > 1) {
		i__1 = *n - 1;
		i__2 = *n - 1;
		slaset_("L", &i__1, &i__2, &c_b74, &c_b74, &a_ref(2, 1), lda);
	    }
	}

	ie = 1;
	itauq = ie + *n;
	itaup = itauq + *n;
	iwork = itaup + *n;

/*        Bidiagonalize R in A   
          (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) */

	i__1 = *lwork - iwork + 1;
	sgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
		work[itaup], &work[iwork], &i__1, info);

/*        Multiply B by transpose of left bidiagonalizing vectors of R   
          (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) */

	i__1 = *lwork - iwork + 1;
	sormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], 
		&b[b_offset], ldb, &work[iwork], &i__1, info);

/*        Generate right bidiagonalizing vectors of R in A   
          (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */

	i__1 = *lwork - iwork + 1;
	sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[iwork], &
		i__1, info);
	iwork = ie + *n;

/*        Perform bidiagonal QR iteration   
            multiply B by transpose of left singular vectors   
            compute right singular vectors in A   
          (Workspace: need BDSPAC) */

	sbdsqr_("U", n, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], lda, 
		vdum, &c__1, &b[b_offset], ldb, &work[iwork], info)
		;
	if (*info != 0) {
	    goto L70;
	}

/*        Multiply B by reciprocals of singular values   

   Computing MAX */
	r__1 = *rcond * s[1];
	thr = dmax(r__1,sfmin);
	if (*rcond < 0.f) {
/* Computing MAX */
	    r__1 = eps * s[1];
	    thr = dmax(r__1,sfmin);
	}
	*rank = 0;
	i__1 = *n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    if (s[i__] > thr) {
		srscl_(nrhs, &s[i__], &b_ref(i__, 1), ldb);
		++(*rank);
	    } else {
		slaset_("F", &c__1, nrhs, &c_b74, &c_b74, &b_ref(i__, 1), ldb);
	    }
/* L10: */
	}

/*        Multiply B by right singular vectors   
          (Workspace: need N, prefer N*NRHS) */

	if (*lwork >= *ldb * *nrhs && *nrhs > 1) {
	    sgemm_("T", "N", n, nrhs, n, &c_b108, &a[a_offset], lda, &b[
		    b_offset], ldb, &c_b74, &work[1], ldb);
	    slacpy_("G", n, nrhs, &work[1], ldb, &b[b_offset], ldb)
		    ;
	} else if (*nrhs > 1) {
	    chunk = *lwork / *n;
	    i__1 = *nrhs;
	    i__2 = chunk;
	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
/* Computing MIN */
		i__3 = *nrhs - i__ + 1;
		bl = min(i__3,chunk);
		sgemm_("T", "N", n, &bl, n, &c_b108, &a[a_offset], lda, &
			b_ref(1, i__), ldb, &c_b74, &work[1], n);
		slacpy_("G", n, &bl, &work[1], n, &b_ref(1, i__), ldb);
/* L20: */
	    }
	} else {
	    sgemv_("T", n, n, &c_b108, &a[a_offset], lda, &b[b_offset], &c__1,
		     &c_b74, &work[1], &c__1);
	    scopy_(n, &work[1], &c__1, &b[b_offset], &c__1);
	}

    } else /* if(complicated condition) */ {
/* Computing MAX */
	i__2 = *m, i__1 = (*m << 1) - 4, i__2 = max(i__2,i__1), i__2 = max(
		i__2,*nrhs), i__1 = *n - *m * 3;
	if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__2,i__1)) {

/*        Path 2a - underdetermined, with many more columns than rows   
          and sufficient workspace for an efficient algorithm */

	    ldwork = *m;
/* Computing MAX   
   Computing MAX */
	    i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = 
		    max(i__3,*nrhs), i__4 = *n - *m * 3;
	    i__2 = (*m << 2) + *m * *lda + max(i__3,i__4), i__1 = *m * *lda + 
		    *m + *m * *nrhs;
	    if (*lwork >= max(i__2,i__1)) {
		ldwork = *lda;
	    }
	    itau = 1;
	    iwork = *m + 1;

/*        Compute A=L*Q   
          (Workspace: need 2*M, prefer M+M*NB) */

	    i__2 = *lwork - iwork + 1;
	    sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &i__2,
		     info);
	    il = iwork;

/*        Copy L to WORK(IL), zeroing out above it */

	    slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork);
	    i__2 = *m - 1;
	    i__1 = *m - 1;
	    slaset_("U", &i__2, &i__1, &c_b74, &c_b74, &work[il + ldwork], &
		    ldwork);
	    ie = il + ldwork * *m;
	    itauq = ie + *m;
	    itaup = itauq + *m;
	    iwork = itaup + *m;

/*        Bidiagonalize L in WORK(IL)   
          (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) */

	    i__2 = *lwork - iwork + 1;
	    sgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], 
		    &work[itaup], &work[iwork], &i__2, info);

/*        Multiply B by transpose of left bidiagonalizing vectors of L   
          (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) */

	    i__2 = *lwork - iwork + 1;
	    sormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[
		    itauq], &b[b_offset], ldb, &work[iwork], &i__2, info);

/*        Generate right bidiagonalizing vectors of R in WORK(IL)   
          (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB) */

	    i__2 = *lwork - iwork + 1;
	    sorgbr_("P", m, m, m, &work[il], &ldwork, &work[itaup], &work[
		    iwork], &i__2, info);
	    iwork = ie + *m;

/*        Perform bidiagonal QR iteration,   
             computing right singular vectors of L in WORK(IL) and   
             multiplying B by transpose of left singular vectors   
          (Workspace: need M*M+M+BDSPAC) */

	    sbdsqr_("U", m, m, &c__0, nrhs, &s[1], &work[ie], &work[il], &
		    ldwork, &a[a_offset], lda, &b[b_offset], ldb, &work[iwork]
		    , info);
	    if (*info != 0) {
		goto L70;
	    }

/*        Multiply B by reciprocals of singular values   

   Computing MAX */
	    r__1 = *rcond * s[1];
	    thr = dmax(r__1,sfmin);
	    if (*rcond < 0.f) {
/* Computing MAX */
		r__1 = eps * s[1];
		thr = dmax(r__1,sfmin);
	    }
	    *rank = 0;
	    i__2 = *m;
	    for (i__ = 1; i__ <= i__2; ++i__) {
		if (s[i__] > thr) {
		    srscl_(nrhs, &s[i__], &b_ref(i__, 1), ldb);
		    ++(*rank);
		} else {
		    slaset_("F", &c__1, nrhs, &c_b74, &c_b74, &b_ref(i__, 1), 
			    ldb);
		}
/* L30: */
	    }
	    iwork = ie;

/*        Multiply B by right singular vectors of L in WORK(IL)   
          (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS) */

	    if (*lwork >= *ldb * *nrhs + iwork - 1 && *nrhs > 1) {
		sgemm_("T", "N", m, nrhs, m, &c_b108, &work[il], &ldwork, &b[
			b_offset], ldb, &c_b74, &work[iwork], ldb);
		slacpy_("G", m, nrhs, &work[iwork], ldb, &b[b_offset], ldb);
	    } else if (*nrhs > 1) {
		chunk = (*lwork - iwork + 1) / *m;
		i__2 = *nrhs;
		i__1 = chunk;
		for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += 
			i__1) {
/* Computing MIN */
		    i__3 = *nrhs - i__ + 1;
		    bl = min(i__3,chunk);
		    sgemm_("T", "N", m, &bl, m, &c_b108, &work[il], &ldwork, &
			    b_ref(1, i__), ldb, &c_b74, &work[iwork], n);
		    slacpy_("G", m, &bl, &work[iwork], n, &b_ref(1, i__), ldb);
/* L40: */
		}
	    } else {
		sgemv_("T", m, m, &c_b108, &work[il], &ldwork, &b_ref(1, 1), &
			c__1, &c_b74, &work[iwork], &c__1);
		scopy_(m, &work[iwork], &c__1, &b_ref(1, 1), &c__1);
	    }

/*        Zero out below first M rows of B */

	    i__1 = *n - *m;
	    slaset_("F", &i__1, nrhs, &c_b74, &c_b74, &b_ref(*m + 1, 1), ldb);
	    iwork = itau + *m;

/*        Multiply transpose(Q) by B   
          (Workspace: need M+NRHS, prefer M+NRHS*NB) */

	    i__1 = *lwork - iwork + 1;
	    sormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[
		    b_offset], ldb, &work[iwork], &i__1, info);

	} else {

/*        Path 2 - remaining underdetermined cases */

	    ie = 1;
	    itauq = ie + *m;
	    itaup = itauq + *m;
	    iwork = itaup + *m;

/*        Bidiagonalize A   
          (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */

	    i__1 = *lwork - iwork + 1;
	    sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
		    work[itaup], &work[iwork], &i__1, info);

/*        Multiply B by transpose of left bidiagonalizing vectors   
          (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) */

	    i__1 = *lwork - iwork + 1;
	    sormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq]
		    , &b[b_offset], ldb, &work[iwork], &i__1, info);

/*        Generate right bidiagonalizing vectors in A   
          (Workspace: need 4*M, prefer 3*M+M*NB) */

	    i__1 = *lwork - iwork + 1;
	    sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[
		    iwork], &i__1, info);
	    iwork = ie + *m;

/*        Perform bidiagonal QR iteration,   
             computing right singular vectors of A in A and   
             multiplying B by transpose of left singular vectors   
          (Workspace: need BDSPAC) */

	    sbdsqr_("L", m, n, &c__0, nrhs, &s[1], &work[ie], &a[a_offset], 
		    lda, vdum, &c__1, &b[b_offset], ldb, &work[iwork], info);
	    if (*info != 0) {
		goto L70;
	    }

/*        Multiply B by reciprocals of singular values   

   Computing MAX */
	    r__1 = *rcond * s[1];
	    thr = dmax(r__1,sfmin);
	    if (*rcond < 0.f) {
/* Computing MAX */
		r__1 = eps * s[1];
		thr = dmax(r__1,sfmin);
	    }
	    *rank = 0;
	    i__1 = *m;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		if (s[i__] > thr) {
		    srscl_(nrhs, &s[i__], &b_ref(i__, 1), ldb);
		    ++(*rank);
		} else {
		    slaset_("F", &c__1, nrhs, &c_b74, &c_b74, &b_ref(i__, 1), 
			    ldb);
		}
/* L50: */
	    }

/*        Multiply B by right singular vectors of A   
          (Workspace: need N, prefer N*NRHS) */

	    if (*lwork >= *ldb * *nrhs && *nrhs > 1) {
		sgemm_("T", "N", n, nrhs, m, &c_b108, &a[a_offset], lda, &b[
			b_offset], ldb, &c_b74, &work[1], ldb);
		slacpy_("F", n, nrhs, &work[1], ldb, &b[b_offset], ldb);
	    } else if (*nrhs > 1) {
		chunk = *lwork / *n;
		i__1 = *nrhs;
		i__2 = chunk;
		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += 
			i__2) {
/* Computing MIN */
		    i__3 = *nrhs - i__ + 1;
		    bl = min(i__3,chunk);
		    sgemm_("T", "N", n, &bl, m, &c_b108, &a[a_offset], lda, &
			    b_ref(1, i__), ldb, &c_b74, &work[1], n);
		    slacpy_("F", n, &bl, &work[1], n, &b_ref(1, i__), ldb);
/* L60: */
		}
	    } else {
		sgemv_("T", m, n, &c_b108, &a[a_offset], lda, &b[b_offset], &
			c__1, &c_b74, &work[1], &c__1);
		scopy_(n, &work[1], &c__1, &b[b_offset], &c__1);
	    }
	}
    }

/*     Undo scaling */

    if (iascl == 1) {
	slascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
		 info);
	slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
		minmn, info);
    } else if (iascl == 2) {
	slascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
		 info);
	slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
		minmn, info);
    }
    if (ibscl == 1) {
	slascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
		 info);
    } else if (ibscl == 2) {
	slascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
		 info);
    }

L70:
    work[1] = (real) maxwrk;
    return 0;

/*     End of SGELSS */

} /* sgelss_ */

Exemplo n.º 6

0

Exibir arquivo

Arquivo: scolumn_bmod.c Projeto: saggita/RevisedThirdPartyLibraries

/* Return value:   0 - successful return
 *               > 0 - number of bytes allocated when run out of space
 */
int
scolumn_bmod (
	     const int  jcol,	  /* in */
	     const int  nseg,	  /* in */
	     float     *dense,	  /* in */
	     float     *tempv,	  /* working array */
	     int        *segrep,  /* in */
	     int        *repfnz,  /* in */
	     int        fpanelc,  /* in -- first column in the current panel */
	     GlobalLU_t *Glu      /* modified */
	     )
{
/*
 * Purpose:
 * ========
 *    Performs numeric block updates (sup-col) in topological order.
 *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
 *    Special processing on the supernodal portion of L\U[*,j]
 *
 */
#ifdef _CRAY
    _fcd ftcs1 = _cptofcd("L", strlen("L")),
         ftcs2 = _cptofcd("N", strlen("N")),
         ftcs3 = _cptofcd("U", strlen("U"));
#endif
    int         incx = 1, incy = 1;
    float      alpha, beta;
    
    /* krep = representative of current k-th supernode
     * fsupc = first supernodal column
     * nsupc = no of columns in supernode
     * nsupr = no of rows in supernode (used as leading dimension)
     * luptr = location of supernodal LU-block in storage
     * kfnz = first nonz in the k-th supernodal segment
     * no_zeros = no of leading zeros in a supernodal U-segment
     */
    float       ukj, ukj1, ukj2;
    int          luptr, luptr1, luptr2;
    int          fsupc, nsupc, nsupr, segsze;
    int          nrow;	  /* No of rows in the matrix of matrix-vector */
    int          jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno;
    register int lptr, kfnz, isub, irow, i;
    register int no_zeros, new_next; 
    int          ufirst, nextlu;
    int          fst_col; /* First column within small LU update */
    int          d_fsupc; /* Distance between the first column of the current
			     panel and the first column of the current snode. */
    int          *xsup, *supno;
    int          *lsub, *xlsub;
    float       *lusup;
    int          *xlusup;
    int          nzlumax;
    float       *tempv1;
    float      zero = 0.0;
    float      one = 1.0;
    float      none = -1.0;
    int          mem_error;
    extern SuperLUStat_t SuperLUStat;
    flops_t  *ops = SuperLUStat.ops;

    xsup    = Glu->xsup;
    supno   = Glu->supno;
    lsub    = Glu->lsub;
    xlsub   = Glu->xlsub;
    lusup   = Glu->lusup;
    xlusup  = Glu->xlusup;
    nzlumax = Glu->nzlumax;
    jcolp1 = jcol + 1;
    jsupno = supno[jcol];
    
    /* 
     * For each nonz supernode segment of U[*,j] in topological order 
     */
    k = nseg - 1;
    for (ksub = 0; ksub < nseg; ksub++) {

	krep = segrep[k];
	k--;
	ksupno = supno[krep];
	if ( jsupno != ksupno ) { /* Outside the rectangular supernode */

	    fsupc = xsup[ksupno];
	    fst_col = SUPERLU_MAX ( fsupc, fpanelc );

  	    /* Distance from the current supernode to the current panel; 
	       d_fsupc=0 if fsupc > fpanelc. */
  	    d_fsupc = fst_col - fsupc; 

	    luptr = xlusup[fst_col] + d_fsupc;
	    lptr = xlsub[fsupc] + d_fsupc;

	    kfnz = repfnz[krep];
	    kfnz = SUPERLU_MAX ( kfnz, fpanelc );

	    segsze = krep - kfnz + 1;
	    nsupc = krep - fst_col + 1;
	    nsupr = xlsub[fsupc+1] - xlsub[fsupc];	/* Leading dimension */
	    nrow = nsupr - d_fsupc - nsupc;
	    krep_ind = lptr + nsupc - 1;

	    ops[TRSV] += segsze * (segsze - 1);
	    ops[GEMV] += 2 * nrow * segsze;


	    /* 
	     * Case 1: Update U-segment of size 1 -- col-col update 
	     */
	    if ( segsze == 1 ) {
	  	ukj = dense[lsub[krep_ind]];
		luptr += nsupr*(nsupc-1) + nsupc;

		for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
		    irow = lsub[i];
		    dense[irow] -=  ukj*lusup[luptr];
		    luptr++;
		}

	    } else if ( segsze <= 3 ) {
		ukj = dense[lsub[krep_ind]];
		luptr += nsupr*(nsupc-1) + nsupc-1;
		ukj1 = dense[lsub[krep_ind - 1]];
		luptr1 = luptr - nsupr;

		if ( segsze == 2 ) { /* Case 2: 2cols-col update */
		    ukj -= ukj1 * lusup[luptr1];
		    dense[lsub[krep_ind]] = ukj;
		    for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
		    	irow = lsub[i];
		    	luptr++;
		    	luptr1++;
		    	dense[irow] -= ( ukj*lusup[luptr]
					+ ukj1*lusup[luptr1] );
		    }
		} else { /* Case 3: 3cols-col update */
		    ukj2 = dense[lsub[krep_ind - 2]];
		    luptr2 = luptr1 - nsupr;
		    ukj1 -= ukj2 * lusup[luptr2-1];
		    ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
		    dense[lsub[krep_ind]] = ukj;
		    dense[lsub[krep_ind-1]] = ukj1;
		    for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
		    	irow = lsub[i];
		    	luptr++;
		    	luptr1++;
			luptr2++;
		    	dense[irow] -= ( ukj*lusup[luptr]
			     + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
		    }
		}



	    } else {
	  	/*
		 * Case: sup-col update
		 * Perform a triangular solve and block update,
		 * then scatter the result of sup-col update to dense
		 */

		no_zeros = kfnz - fst_col;

	        /* Copy U[*,j] segment from dense[*] to tempv[*] */
	        isub = lptr + no_zeros;
	        for (i = 0; i < segsze; i++) {
	  	    irow = lsub[isub];
		    tempv[i] = dense[irow];
		    ++isub; 
	        }

	        /* Dense triangular solve -- start effective triangle */
		luptr += nsupr * no_zeros + no_zeros; 
		
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
		STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
		       &nsupr, tempv, &incx );
#else		
		strsv_( "L", "N", "U", &segsze, &lusup[luptr], 
		       &nsupr, tempv, &incx );
#endif		
 		luptr += segsze;  /* Dense matrix-vector */
		tempv1 = &tempv[segsze];
                alpha = one;
                beta = zero;
#ifdef _CRAY
		SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
		       &nsupr, tempv, &incx, &beta, tempv1, &incy );
#else
		sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
		       &nsupr, tempv, &incx, &beta, tempv1, &incy );
#endif
#else
		slsolve ( nsupr, segsze, &lusup[luptr], tempv );

 		luptr += segsze;  /* Dense matrix-vector */
		tempv1 = &tempv[segsze];
		smatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
#endif
		
		
                /* Scatter tempv[] into SPA dense[] as a temporary storage */
                isub = lptr + no_zeros;
                for (i = 0; i < segsze; i++) {
                    irow = lsub[isub];
                    dense[irow] = tempv[i];
                    tempv[i] = zero;
                    ++isub;
                }

		/* Scatter tempv1[] into SPA dense[] */
		for (i = 0; i < nrow; i++) {
		    irow = lsub[isub];
		    dense[irow] -= tempv1[i];
		    tempv1[i] = zero;
		    ++isub;
		}
	    }
	    
	} /* if jsupno ... */

    } /* for each segment... */

    /*
     *	Process the supernodal portion of L\U[*,j]
     */
    nextlu = xlusup[jcol];
    fsupc = xsup[jsupno];

    /* Copy the SPA dense into L\U[*,j] */
    new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc];
    while ( new_next > nzlumax ) {
	if (mem_error = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu))
	    return (mem_error);
	lusup = Glu->lusup;
	lsub = Glu->lsub;
    }

    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
  	irow = lsub[isub];
	lusup[nextlu] = dense[irow];
        dense[irow] = zero;
	++nextlu;
    }

    xlusup[jcolp1] = nextlu;	/* Close L\U[*,jcol] */

    /* For more updates within the panel (also within the current supernode), 
     * should start from the first column of the panel, or the first column 
     * of the supernode, whichever is bigger. There are 2 cases:
     *    1) fsupc < fpanelc, then fst_col := fpanelc
     *    2) fsupc >= fpanelc, then fst_col := fsupc
     */
    fst_col = SUPERLU_MAX ( fsupc, fpanelc );

    if ( fst_col < jcol ) {

  	/* Distance between the current supernode and the current panel.
	   d_fsupc=0 if fsupc >= fpanelc. */
  	d_fsupc = fst_col - fsupc;

	lptr = xlsub[fsupc] + d_fsupc;
	luptr = xlusup[fst_col] + d_fsupc;
	nsupr = xlsub[fsupc+1] - xlsub[fsupc];	/* Leading dimension */
	nsupc = jcol - fst_col;	/* Excluding jcol */
	nrow = nsupr - d_fsupc - nsupc;

	/* Points to the beginning of jcol in snode L\U(jsupno) */
	ufirst = xlusup[jcol] + d_fsupc;	

	ops[TRSV] += nsupc * (nsupc - 1);
	ops[GEMV] += 2 * nrow * nsupc;
	
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
	STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], 
	       &nsupr, &lusup[ufirst], &incx );
#else
	strsv_( "L", "N", "U", &nsupc, &lusup[luptr], 
	       &nsupr, &lusup[ufirst], &incx );
#endif
	
	alpha = none; beta = one; /* y := beta*y + alpha*A*x */

#ifdef _CRAY
	SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
	       &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#else
	sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
	       &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#endif
#else
	slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );

	smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
		&lusup[ufirst], tempv );
	
        /* Copy updates from tempv[*] into lusup[*] */
	isub = ufirst + nsupc;
	for (i = 0; i < nrow; i++) {
	    lusup[isub] -= tempv[i];
	    tempv[i] = 0.0;
	    ++isub;
	}

#endif
	
	
    } /* if fst_col < jcol ... */ 

    return 0;
}

Exemplo n.º 7

0

Exibir arquivo

Arquivo: blas.c Projeto: excess-project/starpu-energy-aware-extension

inline void STARPU_SGEMV(char *transa, int M, int N, float alpha, float *A, int lda,
		float *X, int incX, float beta, float *Y, int incY)
{
	sgemv_(transa, &M, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY);
}

Exemplo n.º 8

0

Exibir arquivo

Arquivo: slagge.c Projeto: petsc/superlu

/* Subroutine */ int slagge_slu(integer *m, integer *n, integer *kl, integer *ku,
	 real *d, real *a, integer *lda, integer *iseed, real *work, integer *
	info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    real r__1;

    /* Builtin functions */
    double r_sign(real *, real *);

    /* Local variables */
    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, 
	    integer *, real *, integer *, real *, integer *);
    extern real snrm2_(integer *, real *, integer *);
    static integer i, j;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 
	    sgemv_(char *, integer *, integer *, real *, real *, integer *, 
	    real *, integer *, real *, real *, integer *);
    static real wa, wb, wn;
    extern /* Subroutine */ int slarnv_slu(integer *, integer *, integer *, real *);
    extern int input_error(char *, int *);
    static real tau;


/*  -- LAPACK auxiliary test routine (version 2.0)   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    SLAGGE generates a real general m by n matrix A, by pre- and post-   
    multiplying a real diagonal matrix D with random orthogonal matrices: 
  
    A = U*D*V. The lower and upper bandwidths may then be reduced to   
    kl and ku by additional orthogonal transformations.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix A.  N >= 0.   

    KL      (input) INTEGER   
            The number of nonzero subdiagonals within the band of A.   
            0 <= KL <= M-1.   

    KU      (input) INTEGER   
            The number of nonzero superdiagonals within the band of A.   
            0 <= KU <= N-1.   

    D       (input) REAL array, dimension (min(M,N))   
            The diagonal elements of the diagonal matrix D.   

    A       (output) REAL array, dimension (LDA,N)   
            The generated m by n matrix A.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= M.   

    ISEED   (input/output) INTEGER array, dimension (4)   
            On entry, the seed of the random number generator; the array 
  
            elements must be between 0 and 4095, and ISEED(4) must be   
            odd.   
            On exit, the seed is updated.   

    WORK    (workspace) REAL array, dimension (M+N)   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -i, the i-th argument had an illegal value   

    ===================================================================== 
  


       Test the input arguments   

       Parameter adjustments */
    --d;
    a_dim1 = *lda;
    a_offset = a_dim1 + 1;
    a -= a_offset;
    --iseed;
    --work;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*kl < 0 || *kl > *m - 1) {
	*info = -3;
    } else if (*ku < 0 || *ku > *n - 1) {
	*info = -4;
    } else if (*lda < max(1,*m)) {
	*info = -7;
    }
    if (*info < 0) {
	i__1 = -(*info);
	input_error("SLAGGE", &i__1);
	return 0;
    }

/*     initialize A to diagonal matrix */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	i__2 = *m;
	for (i = 1; i <= i__2; ++i) {
	    a[i + j * a_dim1] = 0.f;
/* L10: */
	}
/* L20: */
    }
    i__1 = min(*m,*n);
    for (i = 1; i <= i__1; ++i) {
	a[i + i * a_dim1] = d[i];
/* L30: */
    }

/*     pre- and post-multiply A by random orthogonal matrices */

    for (i = min(*m,*n); i >= 1; --i) {
	if (i < *m) {

/*           generate random reflection */

	    i__1 = *m - i + 1;
	    slarnv_slu(&c__3, &iseed[1], &i__1, &work[1]);
	    i__1 = *m - i + 1;
	    wn = snrm2_(&i__1, &work[1], &c__1);
	    wa = r_sign(&wn, &work[1]);
	    if (wn == 0.f) {
		tau = 0.f;
	    } else {
		wb = work[1] + wa;
		i__1 = *m - i;
		r__1 = 1.f / wb;
		sscal_(&i__1, &r__1, &work[2], &c__1);
		work[1] = 1.f;
		tau = wb / wa;
	    }

/*           multiply A(i:m,i:n) by random reflection from the lef
t */

	    i__1 = *m - i + 1;
	    i__2 = *n - i + 1;
	    sgemv_("Transpose", &i__1, &i__2, &c_b11, &a[i + i * a_dim1], lda,
		     &work[1], &c__1, &c_b13, &work[*m + 1], &c__1);
	    i__1 = *m - i + 1;
	    i__2 = *n - i + 1;
	    r__1 = -(doublereal)tau;
	    sger_(&i__1, &i__2, &r__1, &work[1], &c__1, &work[*m + 1], &c__1, 
		    &a[i + i * a_dim1], lda);
	}
	if (i < *n) {

/*           generate random reflection */

	    i__1 = *n - i + 1;
	    slarnv_slu(&c__3, &iseed[1], &i__1, &work[1]);
	    i__1 = *n - i + 1;
	    wn = snrm2_(&i__1, &work[1], &c__1);
	    wa = r_sign(&wn, &work[1]);
	    if (wn == 0.f) {
		tau = 0.f;
	    } else {
		wb = work[1] + wa;
		i__1 = *n - i;
		r__1 = 1.f / wb;
		sscal_(&i__1, &r__1, &work[2], &c__1);
		work[1] = 1.f;
		tau = wb / wa;
	    }

/*           multiply A(i:m,i:n) by random reflection from the rig
ht */

	    i__1 = *m - i + 1;
	    i__2 = *n - i + 1;
	    sgemv_("No transpose", &i__1, &i__2, &c_b11, &a[i + i * a_dim1], 
		    lda, &work[1], &c__1, &c_b13, &work[*n + 1], &c__1);
	    i__1 = *m - i + 1;
	    i__2 = *n - i + 1;
	    r__1 = -(doublereal)tau;
	    sger_(&i__1, &i__2, &r__1, &work[*n + 1], &c__1, &work[1], &c__1, 
		    &a[i + i * a_dim1], lda);
	}
/* L40: */
    }

/*     Reduce number of subdiagonals to KL and number of superdiagonals   
       to KU   

   Computing MAX */
    i__2 = *m - 1 - *kl, i__3 = *n - 1 - *ku;
    i__1 = max(i__2,i__3);
    for (i = 1; i <= i__1; ++i) {
	if (*kl <= *ku) {

/*           annihilate subdiagonal elements first (necessary if K
L = 0)   

   Computing MIN */
	    i__2 = *m - 1 - *kl;
	    if (i <= min(i__2,*n)) {

/*              generate reflection to annihilate A(kl+i+1:m,i
) */

		i__2 = *m - *kl - i + 1;
		wn = snrm2_(&i__2, &a[*kl + i + i * a_dim1], &c__1);
		wa = r_sign(&wn, &a[*kl + i + i * a_dim1]);
		if (wn == 0.f) {
		    tau = 0.f;
		} else {
		    wb = a[*kl + i + i * a_dim1] + wa;
		    i__2 = *m - *kl - i;
		    r__1 = 1.f / wb;
		    sscal_(&i__2, &r__1, &a[*kl + i + 1 + i * a_dim1], &c__1);
		    a[*kl + i + i * a_dim1] = 1.f;
		    tau = wb / wa;
		}

/*              apply reflection to A(kl+i:m,i+1:n) from the l
eft */

		i__2 = *m - *kl - i + 1;
		i__3 = *n - i;
		sgemv_("Transpose", &i__2, &i__3, &c_b11, &a[*kl + i + (i + 1)
			 * a_dim1], lda, &a[*kl + i + i * a_dim1], &c__1, &
			c_b13, &work[1], &c__1);
		i__2 = *m - *kl - i + 1;
		i__3 = *n - i;
		r__1 = -(doublereal)tau;
		sger_(&i__2, &i__3, &r__1, &a[*kl + i + i * a_dim1], &c__1, &
			work[1], &c__1, &a[*kl + i + (i + 1) * a_dim1], lda);
		a[*kl + i + i * a_dim1] = -(doublereal)wa;
	    }

/* Computing MIN */
	    i__2 = *n - 1 - *ku;
	    if (i <= min(i__2,*m)) {

/*              generate reflection to annihilate A(i,ku+i+1:n
) */

		i__2 = *n - *ku - i + 1;
		wn = snrm2_(&i__2, &a[i + (*ku + i) * a_dim1], lda);
		wa = r_sign(&wn, &a[i + (*ku + i) * a_dim1]);
		if (wn == 0.f) {
		    tau = 0.f;
		} else {
		    wb = a[i + (*ku + i) * a_dim1] + wa;
		    i__2 = *n - *ku - i;
		    r__1 = 1.f / wb;
		    sscal_(&i__2, &r__1, &a[i + (*ku + i + 1) * a_dim1], lda);
		    a[i + (*ku + i) * a_dim1] = 1.f;
		    tau = wb / wa;
		}

/*              apply reflection to A(i+1:m,ku+i:n) from the r
ight */

		i__2 = *m - i;
		i__3 = *n - *ku - i + 1;
		sgemv_("No transpose", &i__2, &i__3, &c_b11, &a[i + 1 + (*ku 
			+ i) * a_dim1], lda, &a[i + (*ku + i) * a_dim1], lda, 
			&c_b13, &work[1], &c__1);
		i__2 = *m - i;
		i__3 = *n - *ku - i + 1;
		r__1 = -(doublereal)tau;
		sger_(&i__2, &i__3, &r__1, &work[1], &c__1, &a[i + (*ku + i) *
			 a_dim1], lda, &a[i + 1 + (*ku + i) * a_dim1], lda);
		a[i + (*ku + i) * a_dim1] = -(doublereal)wa;
	    }
	} else {

/*           annihilate superdiagonal elements first (necessary if
   
             KU = 0)   

   Computing MIN */
	    i__2 = *n - 1 - *ku;
	    if (i <= min(i__2,*m)) {

/*              generate reflection to annihilate A(i,ku+i+1:n
) */

		i__2 = *n - *ku - i + 1;
		wn = snrm2_(&i__2, &a[i + (*ku + i) * a_dim1], lda);
		wa = r_sign(&wn, &a[i + (*ku + i) * a_dim1]);
		if (wn == 0.f) {
		    tau = 0.f;
		} else {
		    wb = a[i + (*ku + i) * a_dim1] + wa;
		    i__2 = *n - *ku - i;
		    r__1 = 1.f / wb;
		    sscal_(&i__2, &r__1, &a[i + (*ku + i + 1) * a_dim1], lda);
		    a[i + (*ku + i) * a_dim1] = 1.f;
		    tau = wb / wa;
		}

/*              apply reflection to A(i+1:m,ku+i:n) from the r
ight */

		i__2 = *m - i;
		i__3 = *n - *ku - i + 1;
		sgemv_("No transpose", &i__2, &i__3, &c_b11, &a[i + 1 + (*ku 
			+ i) * a_dim1], lda, &a[i + (*ku + i) * a_dim1], lda, 
			&c_b13, &work[1], &c__1);
		i__2 = *m - i;
		i__3 = *n - *ku - i + 1;
		r__1 = -(doublereal)tau;
		sger_(&i__2, &i__3, &r__1, &work[1], &c__1, &a[i + (*ku + i) *
			 a_dim1], lda, &a[i + 1 + (*ku + i) * a_dim1], lda);
		a[i + (*ku + i) * a_dim1] = -(doublereal)wa;
	    }

/* Computing MIN */
	    i__2 = *m - 1 - *kl;
	    if (i <= min(i__2,*n)) {

/*              generate reflection to annihilate A(kl+i+1:m,i
) */

		i__2 = *m - *kl - i + 1;
		wn = snrm2_(&i__2, &a[*kl + i + i * a_dim1], &c__1);
		wa = r_sign(&wn, &a[*kl + i + i * a_dim1]);
		if (wn == 0.f) {
		    tau = 0.f;
		} else {
		    wb = a[*kl + i + i * a_dim1] + wa;
		    i__2 = *m - *kl - i;
		    r__1 = 1.f / wb;
		    sscal_(&i__2, &r__1, &a[*kl + i + 1 + i * a_dim1], &c__1);
		    a[*kl + i + i * a_dim1] = 1.f;
		    tau = wb / wa;
		}

/*              apply reflection to A(kl+i:m,i+1:n) from the l
eft */

		i__2 = *m - *kl - i + 1;
		i__3 = *n - i;
		sgemv_("Transpose", &i__2, &i__3, &c_b11, &a[*kl + i + (i + 1)
			 * a_dim1], lda, &a[*kl + i + i * a_dim1], &c__1, &
			c_b13, &work[1], &c__1);
		i__2 = *m - *kl - i + 1;
		i__3 = *n - i;
		r__1 = -(doublereal)tau;
		sger_(&i__2, &i__3, &r__1, &a[*kl + i + i * a_dim1], &c__1, &
			work[1], &c__1, &a[*kl + i + (i + 1) * a_dim1], lda);
		a[*kl + i + i * a_dim1] = -(doublereal)wa;
	    }
	}

	i__2 = *m;
	for (j = *kl + i + 1; j <= i__2; ++j) {
	    a[j + i * a_dim1] = 0.f;
/* L50: */
	}

	i__2 = *n;
	for (j = *ku + i + 1; j <= i__2; ++j) {
	    a[i + j * a_dim1] = 0.f;
/* L60: */
	}
/* L70: */
    }
    return 0;

/*     End of SLAGGE */

} /* slagge_slu */

Exemplo n.º 9

0

Exibir arquivo

Arquivo: ssnode_bmod.cpp Projeto: erli2/parameterization

/*
 * Performs numeric block updates within the relaxed snode. 
 */
int
ssnode_bmod (
	    const int  jcol,	  /* in */
	    const int  fsupc,     /* in */
	    float     *dense,    /* in */
	    float     *tempv,    /* working array */
	    GlobalLU_t *Glu,      /* modified */
	    SuperLUStat_t *stat   /* output */
	    )
{
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
    _fcd ftcs1 = _cptofcd("L", strlen("L")),
	 ftcs2 = _cptofcd("N", strlen("N")),
	 ftcs3 = _cptofcd("U", strlen("U"));
#endif
    int            incx = 1, incy = 1;
    float         alpha = -1.0, beta = 1.0;
#endif

    int            luptr, nsupc, nsupr, nrow;
    int            isub, irow, i, iptr; 
    register int   ufirst, nextlu;
    int            *lsub, *xlsub;
    float         *lusup;
    int            *xlusup;
    flops_t *ops = stat->ops;

    lsub    = Glu->lsub;
    xlsub   = Glu->xlsub;
    lusup   = Glu->lusup;
    xlusup  = Glu->xlusup;

    nextlu = xlusup[jcol];
    
    /*
     *	Process the supernodal portion of L\U[*,j]
     */
    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
  	irow = lsub[isub];
	lusup[nextlu] = dense[irow];
	dense[irow] = 0;
	++nextlu;
    }

    xlusup[jcol + 1] = nextlu;	/* Initialize xlusup for next column */
    
    if ( fsupc < jcol ) {

	luptr = xlusup[fsupc];
	nsupr = xlsub[fsupc+1] - xlsub[fsupc];
	nsupc = jcol - fsupc;	/* Excluding jcol */
	ufirst = xlusup[jcol];	/* Points to the beginning of column
				   jcol in supernode L\U(jsupno). */
	nrow = nsupr - nsupc;

	ops[TRSV] += nsupc * (nsupc - 1);
	ops[GEMV] += 2 * nrow * nsupc;

#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
	STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, 
	      &lusup[ufirst], &incx );
	SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
		&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#else
	strsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, 
	      &lusup[ufirst], &incx );
	sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
		&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#endif
#else
	slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
	smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], 
			&lusup[ufirst], &tempv[0] );

        /* Scatter tempv[*] into lusup[*] */
	iptr = ufirst + nsupc;
	for (i = 0; i < nrow; i++) {
	    lusup[iptr++] -= tempv[i];
	    tempv[i] = 0.0;
	}
#endif

    }

    return 0;
}

Exemplo n.º 10

0

Exibir arquivo

Arquivo: slahr2.c Projeto: juanjosegarciaripoll/cblapack

/* Subroutine */ int slahr2_(integer *n, integer *k, integer *nb, real *a, 
	integer *lda, real *tau, real *t, integer *ldt, real *y, integer *ldy)
{
    /* System generated locals */
    integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, 
	    i__3;
    real r__1;

    /* Local variables */
    integer i__;
    real ei;

/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     November 2006 */

/*  Purpose */
/*  ======= */

/*  SLAHR2 reduces the first NB columns of A real general n-BY-(n-k+1) */
/*  matrix A so that elements below the k-th subdiagonal are zero. The */
/*  reduction is performed by an orthogonal similarity transformation */
/*  Q' * A * Q. The routine returns the matrices V and T which determine */
/*  Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. */

/*  This is an auxiliary routine called by SGEHRD. */

/*  Arguments */
/*  ========= */

/*  N       (input) INTEGER */
/*          The order of the matrix A. */

/*  K       (input) INTEGER */
/*          The offset for the reduction. Elements below the k-th */
/*          subdiagonal in the first NB columns are reduced to zero. */
/*          K < N. */

/*  NB      (input) INTEGER */
/*          The number of columns to be reduced. */

/*  A       (input/output) REAL array, dimension (LDA,N-K+1) */
/*          On entry, the n-by-(n-k+1) general matrix A. */
/*          On exit, the elements on and above the k-th subdiagonal in */
/*          the first NB columns are overwritten with the corresponding */
/*          elements of the reduced matrix; the elements below the k-th */
/*          subdiagonal, with the array TAU, represent the matrix Q as a */
/*          product of elementary reflectors. The other columns of A are */
/*          unchanged. See Further Details. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  TAU     (output) REAL array, dimension (NB) */
/*          The scalar factors of the elementary reflectors. See Further */
/*          Details. */

/*  T       (output) REAL array, dimension (LDT,NB) */
/*          The upper triangular matrix T. */

/*  LDT     (input) INTEGER */
/*          The leading dimension of the array T.  LDT >= NB. */

/*  Y       (output) REAL array, dimension (LDY,NB) */
/*          The n-by-nb matrix Y. */

/*  LDY     (input) INTEGER */
/*          The leading dimension of the array Y. LDY >= N. */

/*  Further Details */
/*  =============== */

/*  The matrix Q is represented as a product of nb elementary reflectors */

/*     Q = H(1) H(2) . . . H(nb). */

/*  Each H(i) has the form */

/*     H(i) = I - tau * v * v' */

/*  where tau is a real scalar, and v is a real vector with */
/*  v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in */
/*  A(i+k+1:n,i), and tau in TAU(i). */

/*  The elements of the vectors v together form the (n-k+1)-by-nb matrix */
/*  V which is needed, with T and Y, to apply the transformation to the */
/*  unreduced part of the matrix, using an update of the form: */
/*  A := (I - V*T*V') * (A - Y*V'). */

/*  The contents of A on exit are illustrated by the following example */
/*  with n = 7, k = 3 and nb = 2: */

/*     ( a   a   a   a   a ) */
/*     ( a   a   a   a   a ) */
/*     ( a   a   a   a   a ) */
/*     ( h   h   a   a   a ) */
/*     ( v1  h   a   a   a ) */
/*     ( v1  v2  a   a   a ) */
/*     ( v1  v2  a   a   a ) */

/*  where a denotes an element of the original matrix A, h denotes a */
/*  modified element of the upper Hessenberg matrix H, and vi denotes an */
/*  element of the vector defining H(i). */

/*  This file is a slight modification of LAPACK-3.0's SLAHRD */
/*  incorporating improvements proposed by Quintana-Orti and Van de */
/*  Gejin. Note that the entries of A(1:K,2:NB) differ from those */
/*  returned by the original LAPACK routine. This function is */
/*  not backward compatible with LAPACK3.0. */

/*  ===================================================================== */

/*     Quick return if possible */

    /* Parameter adjustments */
    --tau;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1;
    t -= t_offset;
    y_dim1 = *ldy;
    y_offset = 1 + y_dim1;
    y -= y_offset;

    /* Function Body */
    if (*n <= 1) {
	return 0;
    }

    i__1 = *nb;
    for (i__ = 1; i__ <= i__1; ++i__) {
	if (i__ > 1) {

/*           Update A(K+1:N,I) */

/*           Update I-th column of A - Y * V' */

	    i__2 = *n - *k;
	    i__3 = i__ - 1;
	    sgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b4, &y[*k + 1 + y_dim1], 
		    ldy, &a[*k + i__ - 1 + a_dim1], lda, &c_b5, &a[*k + 1 + 
		    i__ * a_dim1], &c__1);

/*           Apply I - V * T' * V' to this column (call it b) from the */
/*           left, using the last column of T as workspace */

/*           Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows) */
/*                    ( V2 )             ( b2 ) */

/*           where V1 is unit lower triangular */

/*           w := V1' * b1 */

	    i__2 = i__ - 1;
	    scopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + 
		    1], &c__1);
	    i__2 = i__ - 1;
	    strmv_("Lower", "Transpose", "UNIT", &i__2, &a[*k + 1 + a_dim1], 
		    lda, &t[*nb * t_dim1 + 1], &c__1);

/*           w := w + V2'*b2 */

	    i__2 = *n - *k - i__ + 1;
	    i__3 = i__ - 1;
	    sgemv_("Transpose", &i__2, &i__3, &c_b5, &a[*k + i__ + a_dim1], 
		    lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b5, &t[*nb * 
		    t_dim1 + 1], &c__1);

/*           w := T'*w */

	    i__2 = i__ - 1;
	    strmv_("Upper", "Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt, 
		     &t[*nb * t_dim1 + 1], &c__1);

/*           b2 := b2 - V2*w */

	    i__2 = *n - *k - i__ + 1;
	    i__3 = i__ - 1;
	    sgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b4, &a[*k + i__ + a_dim1], 
		     lda, &t[*nb * t_dim1 + 1], &c__1, &c_b5, &a[*k + i__ + 
		    i__ * a_dim1], &c__1);

/*           b1 := b1 - V1*w */

	    i__2 = i__ - 1;
	    strmv_("Lower", "NO TRANSPOSE", "UNIT", &i__2, &a[*k + 1 + a_dim1]
, lda, &t[*nb * t_dim1 + 1], &c__1);
	    i__2 = i__ - 1;
	    saxpy_(&i__2, &c_b4, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__ 
		    * a_dim1], &c__1);

	    a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei;
	}

/*        Generate the elementary reflector H(I) to annihilate */
/*        A(K+I+1:N,I) */

	i__2 = *n - *k - i__ + 1;
/* Computing MIN */
	i__3 = *k + i__ + 1;
	slarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3, *n)+ i__ * 
		a_dim1], &c__1, &tau[i__]);
	ei = a[*k + i__ + i__ * a_dim1];
	a[*k + i__ + i__ * a_dim1] = 1.f;

/*        Compute  Y(K+1:N,I) */

	i__2 = *n - *k;
	i__3 = *n - *k - i__ + 1;
	sgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b5, &a[*k + 1 + (i__ + 1) * 
		a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b38, &y[*
		k + 1 + i__ * y_dim1], &c__1);
	i__2 = *n - *k - i__ + 1;
	i__3 = i__ - 1;
	sgemv_("Transpose", &i__2, &i__3, &c_b5, &a[*k + i__ + a_dim1], lda, &
		a[*k + i__ + i__ * a_dim1], &c__1, &c_b38, &t[i__ * t_dim1 + 
		1], &c__1);
	i__2 = *n - *k;
	i__3 = i__ - 1;
	sgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b4, &y[*k + 1 + y_dim1], ldy, 
		&t[i__ * t_dim1 + 1], &c__1, &c_b5, &y[*k + 1 + i__ * y_dim1], 
		 &c__1);
	i__2 = *n - *k;
	sscal_(&i__2, &tau[i__], &y[*k + 1 + i__ * y_dim1], &c__1);

/*        Compute T(1:I,I) */

	i__2 = i__ - 1;
	r__1 = -tau[i__];
	sscal_(&i__2, &r__1, &t[i__ * t_dim1 + 1], &c__1);
	i__2 = i__ - 1;
	strmv_("Upper", "No Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt, 
		&t[i__ * t_dim1 + 1], &c__1)
		;
	t[i__ + i__ * t_dim1] = tau[i__];

    }
    a[*k + *nb + *nb * a_dim1] = ei;

/*     Compute Y(1:K,1:NB) */

    slacpy_("ALL", k, nb, &a[(a_dim1 << 1) + 1], lda, &y[y_offset], ldy);
    strmm_("RIGHT", "Lower", "NO TRANSPOSE", "UNIT", k, nb, &c_b5, &a[*k + 1 
	    + a_dim1], lda, &y[y_offset], ldy);
    if (*n > *k + *nb) {
	i__1 = *n - *k - *nb;
	sgemm_("NO TRANSPOSE", "NO TRANSPOSE", k, nb, &i__1, &c_b5, &a[(*nb + 
		2) * a_dim1 + 1], lda, &a[*k + 1 + *nb + a_dim1], lda, &c_b5, 
		&y[y_offset], ldy);
    }
    strmm_("RIGHT", "Upper", "NO TRANSPOSE", "NON-UNIT", k, nb, &c_b5, &t[
	    t_offset], ldt, &y[y_offset], ldy);

    return 0;

/*     End of SLAHR2 */

} /* slahr2_ */

Exemplo n.º 11

0

Exibir arquivo

Arquivo: ssbevx.c Projeto: deepakantony/vispack

/* Subroutine */ int ssbevx_(char *jobz, char *range, char *uplo, integer *n, 
	integer *kd, real *ab, integer *ldab, real *q, integer *ldq, real *vl,
	 real *vu, integer *il, integer *iu, real *abstol, integer *m, real *
	w, real *z, integer *ldz, real *work, integer *iwork, integer *ifail, 
	integer *info)
{
/*  -- LAPACK driver routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    SSBEVX computes selected eigenvalues and, optionally, eigenvectors   
    of a real symmetric band matrix A.  Eigenvalues and eigenvectors can 
  
    be selected by specifying either a range of values or a range of   
    indices for the desired eigenvalues.   

    Arguments   
    =========   

    JOBZ    (input) CHARACTER*1   
            = 'N':  Compute eigenvalues only;   
            = 'V':  Compute eigenvalues and eigenvectors.   

    RANGE   (input) CHARACTER*1   
            = 'A': all eigenvalues will be found;   
            = 'V': all eigenvalues in the half-open interval (VL,VU]   
                   will be found;   
            = 'I': the IL-th through IU-th eigenvalues will be found.   

    UPLO    (input) CHARACTER*1   
            = 'U':  Upper triangle of A is stored;   
            = 'L':  Lower triangle of A is stored.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    KD      (input) INTEGER   
            The number of superdiagonals of the matrix A if UPLO = 'U',   
            or the number of subdiagonals if UPLO = 'L'.  KD >= 0.   

    AB      (input/output) REAL array, dimension (LDAB, N)   
            On entry, the upper or lower triangle of the symmetric band   
            matrix A, stored in the first KD+1 rows of the array.  The   
            j-th column of A is stored in the j-th column of the array AB 
  
            as follows:   
            if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; 
  
            if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd). 
  

            On exit, AB is overwritten by values generated during the   
            reduction to tridiagonal form.  If UPLO = 'U', the first   
            superdiagonal and the diagonal of the tridiagonal matrix T   
            are returned in rows KD and KD+1 of AB, and if UPLO = 'L',   
            the diagonal and first subdiagonal of T are returned in the   
            first two rows of AB.   

    LDAB    (input) INTEGER   
            The leading dimension of the array AB.  LDAB >= KD + 1.   

    Q       (output) REAL array, dimension (LDQ, N)   
            If JOBZ = 'V', the N-by-N orthogonal matrix used in the   
                           reduction to tridiagonal form.   
            If JOBZ = 'N', the array Q is not referenced.   

    LDQ     (input) INTEGER   
            The leading dimension of the array Q.  If JOBZ = 'V', then   
            LDQ >= max(1,N).   

    VL      (input) REAL   
    VU      (input) REAL   
            If RANGE='V', the lower and upper bounds of the interval to   
            be searched for eigenvalues. VL < VU.   
            Not referenced if RANGE = 'A' or 'I'.   

    IL      (input) INTEGER   
    IU      (input) INTEGER   
            If RANGE='I', the indices (in ascending order) of the   
            smallest and largest eigenvalues to be returned.   
            1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.   
            Not referenced if RANGE = 'A' or 'V'.   

    ABSTOL  (input) REAL   
            The absolute error tolerance for the eigenvalues.   
            An approximate eigenvalue is accepted as converged   
            when it is determined to lie in an interval [a,b]   
            of width less than or equal to   

                    ABSTOL + EPS *   max( |a|,|b| ) ,   

            where EPS is the machine precision.  If ABSTOL is less than   
            or equal to zero, then  EPS*|T|  will be used in its place,   
            where |T| is the 1-norm of the tridiagonal matrix obtained   
            by reducing AB to tridiagonal form.   

            Eigenvalues will be computed most accurately when ABSTOL is   
            set to twice the underflow threshold 2*SLAMCH('S'), not zero. 
  
            If this routine returns with INFO>0, indicating that some   
            eigenvectors did not converge, try setting ABSTOL to   
            2*SLAMCH('S').   

            See "Computing Small Singular Values of Bidiagonal Matrices   
            with Guaranteed High Relative Accuracy," by Demmel and   
            Kahan, LAPACK Working Note #3.   

    M       (output) INTEGER   
            The total number of eigenvalues found.  0 <= M <= N.   
            If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.   

    W       (output) REAL array, dimension (N)   
            The first M elements contain the selected eigenvalues in   
            ascending order.   

    Z       (output) REAL array, dimension (LDZ, max(1,M))   
            If JOBZ = 'V', then if INFO = 0, the first M columns of Z   
            contain the orthonormal eigenvectors of the matrix A   
            corresponding to the selected eigenvalues, with the i-th   
            column of Z holding the eigenvector associated with W(i).   
            If an eigenvector fails to converge, then that column of Z   
            contains the latest approximation to the eigenvector, and the 
  
            index of the eigenvector is returned in IFAIL.   
            If JOBZ = 'N', then Z is not referenced.   
            Note: the user must ensure that at least max(1,M) columns are 
  
            supplied in the array Z; if RANGE = 'V', the exact value of M 
  
            is not known in advance and an upper bound must be used.   

    LDZ     (input) INTEGER   
            The leading dimension of the array Z.  LDZ >= 1, and if   
            JOBZ = 'V', LDZ >= max(1,N).   

    WORK    (workspace) REAL array, dimension (7*N)   

    IWORK   (workspace) INTEGER array, dimension (5*N)   

    IFAIL   (output) INTEGER array, dimension (N)   
            If JOBZ = 'V', then if INFO = 0, the first M elements of   
            IFAIL are zero.  If INFO > 0, then IFAIL contains the   
            indices of the eigenvectors that failed to converge.   
            If JOBZ = 'N', then IFAIL is not referenced.   

    INFO    (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   
            > 0:  if INFO = i, then i eigenvectors failed to converge.   
                  Their indices are stored in array IFAIL.   

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static real c_b14 = 1.f;
    static integer c__1 = 1;
    static real c_b34 = 0.f;
    
    /* System generated locals */
    integer ab_dim1, ab_offset, q_dim1, q_offset, z_dim1, z_offset, i__1, 
	    i__2;
    real r__1, r__2;
    /* Builtin functions */
    double sqrt(doublereal);
    /* Local variables */
    static integer indd, inde;
    static real anrm;
    static integer imax;
    static real rmin, rmax;
    static integer itmp1, i, j, indee;
    static real sigma;
    extern logical lsame_(char *, char *);
    static integer iinfo;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static char order[1];
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
	    real *, integer *, real *, integer *, real *, real *, integer *);
    static logical lower;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *), sswap_(integer *, real *, integer *, real *, integer *
	    );
    static logical wantz;
    static integer jj;
    static logical alleig, indeig;
    static integer iscale, indibl;
    static logical valeig;
    extern doublereal slamch_(char *);
    static real safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static real abstll, bignum;
    extern doublereal slansb_(char *, char *, integer *, integer *, real *, 
	    integer *, real *);
    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
	    real *, integer *, integer *, real *, integer *, integer *);
    static integer indisp, indiwo;
    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
	    integer *, real *, integer *);
    static integer indwrk;
    extern /* Subroutine */ int ssbtrd_(char *, char *, integer *, integer *, 
	    real *, integer *, real *, real *, real *, integer *, real *, 
	    integer *), sstein_(integer *, real *, real *, 
	    integer *, real *, integer *, integer *, real *, integer *, real *
	    , integer *, integer *, integer *), ssterf_(integer *, real *, 
	    real *, integer *);
    static integer nsplit;
    static real smlnum;
    extern /* Subroutine */ int sstebz_(char *, char *, integer *, real *, 
	    real *, integer *, integer *, real *, real *, real *, integer *, 
	    integer *, real *, integer *, integer *, real *, integer *, 
	    integer *), ssteqr_(char *, integer *, real *, 
	    real *, real *, integer *, real *, integer *);
    static real eps, vll, vuu, tmp1;



#define W(I) w[(I)-1]
#define WORK(I) work[(I)-1]
#define IWORK(I) iwork[(I)-1]
#define IFAIL(I) ifail[(I)-1]

#define AB(I,J) ab[(I)-1 + ((J)-1)* ( *ldab)]
#define Q(I,J) q[(I)-1 + ((J)-1)* ( *ldq)]
#define Z(I,J) z[(I)-1 + ((J)-1)* ( *ldz)]

    wantz = lsame_(jobz, "V");
    alleig = lsame_(range, "A");
    valeig = lsame_(range, "V");
    indeig = lsame_(range, "I");
    lower = lsame_(uplo, "L");

    *info = 0;
    if (! (wantz || lsame_(jobz, "N"))) {
	*info = -1;
    } else if (! (alleig || valeig || indeig)) {
	*info = -2;
    } else if (! (lower || lsame_(uplo, "U"))) {
	*info = -3;
    } else if (*n < 0) {
	*info = -4;
    } else if (*kd < 0) {
	*info = -5;
    } else if (*ldab < *kd + 1) {
	*info = -7;
    } else if (*ldq < *n) {
	*info = -9;
    } else if (valeig && *n > 0 && *vu <= *vl) {
	*info = -11;
    } else if (indeig && *il < 1) {
	*info = -12;
    } else if (indeig && (*iu < min(*n,*il) || *iu > *n)) {
	*info = -13;
    } else if (*ldz < 1 || wantz && *ldz < *n) {
	*info = -18;
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SSBEVX", &i__1);
	return 0;
    }

/*     Quick return if possible */

    *m = 0;
    if (*n == 0) {
	return 0;
    }

    if (*n == 1) {
	if (alleig || indeig) {
	    *m = 1;
	    W(1) = AB(1,1);
	} else {
	    if (*vl < AB(1,1) && *vu >= AB(1,1)) {
		*m = 1;
		W(1) = AB(1,1);
	    }
	}
	if (wantz) {
	    Z(1,1) = 1.f;
	}
	return 0;
    }

/*     Get machine constants. */

    safmin = slamch_("Safe minimum");
    eps = slamch_("Precision");
    smlnum = safmin / eps;
    bignum = 1.f / smlnum;
    rmin = sqrt(smlnum);
/* Computing MIN */
    r__1 = sqrt(bignum), r__2 = 1.f / sqrt(sqrt(safmin));
    rmax = dmin(r__1,r__2);

/*     Scale matrix to allowable range, if necessary. */

    iscale = 0;
    abstll = *abstol;
    if (valeig) {
	vll = *vl;
	vuu = *vu;
    }
    anrm = slansb_("M", uplo, n, kd, &AB(1,1), ldab, &WORK(1));
    if (anrm > 0.f && anrm < rmin) {
	iscale = 1;
	sigma = rmin / anrm;
    } else if (anrm > rmax) {
	iscale = 1;
	sigma = rmax / anrm;
    }
    if (iscale == 1) {
	if (lower) {
	    slascl_("B", kd, kd, &c_b14, &sigma, n, n, &AB(1,1), ldab, 
		    info);
	} else {
	    slascl_("Q", kd, kd, &c_b14, &sigma, n, n, &AB(1,1), ldab, 
		    info);
	}
	if (*abstol > 0.f) {
	    abstll = *abstol * sigma;
	}
	if (valeig) {
	    vll = *vl * sigma;
	    vuu = *vu * sigma;
	}
    }

/*     Call SSBTRD to reduce symmetric band matrix to tridiagonal form. */

    indd = 1;
    inde = indd + *n;
    indwrk = inde + *n;
    ssbtrd_(jobz, uplo, n, kd, &AB(1,1), ldab, &WORK(indd), &WORK(inde),
	     &Q(1,1), ldq, &WORK(indwrk), &iinfo);

/*     If all eigenvalues are desired and ABSTOL is less than or equal   
       to zero, then call SSTERF or SSTEQR.  If this fails for some   
       eigenvalue, then try SSTEBZ. */

    if ((alleig || indeig && *il == 1 && *iu == *n) && *abstol <= 0.f) {
	scopy_(n, &WORK(indd), &c__1, &W(1), &c__1);
	indee = indwrk + (*n << 1);
	if (! wantz) {
	    i__1 = *n - 1;
	    scopy_(&i__1, &WORK(inde), &c__1, &WORK(indee), &c__1);
	    ssterf_(n, &W(1), &WORK(indee), info);
	} else {
	    slacpy_("A", n, n, &Q(1,1), ldq, &Z(1,1), ldz);
	    i__1 = *n - 1;
	    scopy_(&i__1, &WORK(inde), &c__1, &WORK(indee), &c__1);
	    ssteqr_(jobz, n, &W(1), &WORK(indee), &Z(1,1), ldz, &WORK(
		    indwrk), info);
	    if (*info == 0) {
		i__1 = *n;
		for (i = 1; i <= *n; ++i) {
		    IFAIL(i) = 0;
/* L10: */
		}
	    }
	}
	if (*info == 0) {
	    *m = *n;
	    goto L30;
	}
	*info = 0;
    }

/*     Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN. */

    if (wantz) {
	*(unsigned char *)order = 'B';
    } else {
	*(unsigned char *)order = 'E';
    }
    indibl = 1;
    indisp = indibl + *n;
    indiwo = indisp + *n;
    sstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &WORK(indd), &WORK(
	    inde), m, &nsplit, &W(1), &IWORK(indibl), &IWORK(indisp), &WORK(
	    indwrk), &IWORK(indiwo), info);

    if (wantz) {
	sstein_(n, &WORK(indd), &WORK(inde), m, &W(1), &IWORK(indibl), &IWORK(
		indisp), &Z(1,1), ldz, &WORK(indwrk), &IWORK(indiwo), &
		IFAIL(1), info);

/*        Apply orthogonal matrix used in reduction to tridiagonal   
          form to eigenvectors returned by SSTEIN. */

	i__1 = *m;
	for (j = 1; j <= *m; ++j) {
	    scopy_(n, &Z(1,j), &c__1, &WORK(1), &c__1);
	    sgemv_("N", n, n, &c_b14, &Q(1,1), ldq, &WORK(1), &c__1, &
		    c_b34, &Z(1,j), &c__1);
/* L20: */
	}
    }

/*     If matrix was scaled, then rescale eigenvalues appropriately. */

L30:
    if (iscale == 1) {
	if (*info == 0) {
	    imax = *m;
	} else {
	    imax = *info - 1;
	}
	r__1 = 1.f / sigma;
	sscal_(&imax, &r__1, &W(1), &c__1);
    }

/*     If eigenvalues are not in order, then sort them, along with   
       eigenvectors. */

    if (wantz) {
	i__1 = *m - 1;
	for (j = 1; j <= *m-1; ++j) {
	    i = 0;
	    tmp1 = W(j);
	    i__2 = *m;
	    for (jj = j + 1; jj <= *m; ++jj) {
		if (W(jj) < tmp1) {
		    i = jj;
		    tmp1 = W(jj);
		}
/* L40: */
	    }

	    if (i != 0) {
		itmp1 = IWORK(indibl + i - 1);
		W(i) = W(j);
		IWORK(indibl + i - 1) = IWORK(indibl + j - 1);
		W(j) = tmp1;
		IWORK(indibl + j - 1) = itmp1;
		sswap_(n, &Z(1,i), &c__1, &Z(1,j), &
			c__1);
		if (*info != 0) {
		    itmp1 = IFAIL(i);
		    IFAIL(i) = IFAIL(j);
		    IFAIL(j) = itmp1;
		}
	    }
/* L50: */
	}
    }

    return 0;

/*     End of SSBEVX */

} /* ssbevx_ */

Exemplo n.º 12

0

Exibir arquivo

Arquivo: slals0.c Projeto: Electrostatics/FETK

/* Subroutine */ int slals0_(integer *icompq, integer *nl, integer *nr, 
	integer *sqre, integer *nrhs, real *b, integer *ldb, real *bx, 
	integer *ldbx, integer *perm, integer *givptr, integer *givcol, 
	integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real *
	difl, real *difr, real *z__, integer *k, real *c__, real *s, real *
	work, integer *info)
{
    /* System generated locals */
    integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset, 
	    difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, 
	    poles_offset, i__1, i__2;
    real r__1;

    /* Local variables */
    static integer i__, j, m, n;
    static real dj;
    static integer nlp1;
    static real temp;
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *);
    extern doublereal snrm2_(integer *, real *, integer *);
    static real diflj, difrj, dsigj;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 
	    sgemv_(char *, integer *, integer *, real *, real *, integer *, 
	    real *, integer *, real *, real *, integer *, ftnlen), scopy_(
	    integer *, real *, integer *, real *, integer *);
    extern doublereal slamc3_(real *, real *);
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
    static real dsigjp;
    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
	    real *, integer *, integer *, real *, integer *, integer *, 
	    ftnlen), slacpy_(char *, integer *, integer *, real *, integer *, 
	    real *, integer *, ftnlen);


/*  -- LAPACK routine (version 3.0) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */
/*     Courant Institute, Argonne National Lab, and Rice University */
/*     December 1, 1999 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLALS0 applies back the multiplying factors of either the left or the */
/*  right singular vector matrix of a diagonal matrix appended by a row */
/*  to the right hand side matrix B in solving the least squares problem */
/*  using the divide-and-conquer SVD approach. */

/*  For the left singular vector matrix, three types of orthogonal */
/*  matrices are involved: */

/*  (1L) Givens rotations: the number of such rotations is GIVPTR; the */
/*       pairs of columns/rows they were applied to are stored in GIVCOL; */
/*       and the C- and S-values of these rotations are stored in GIVNUM. */

/*  (2L) Permutation. The (NL+1)-st row of B is to be moved to the first */
/*       row, and for J=2:N, PERM(J)-th row of B is to be moved to the */
/*       J-th row. */

/*  (3L) The left singular vector matrix of the remaining matrix. */

/*  For the right singular vector matrix, four types of orthogonal */
/*  matrices are involved: */

/*  (1R) The right singular vector matrix of the remaining matrix. */

/*  (2R) If SQRE = 1, one extra Givens rotation to generate the right */
/*       null space. */

/*  (3R) The inverse transformation of (2L). */

/*  (4R) The inverse transformation of (1L). */

/*  Arguments */
/*  ========= */

/*  ICOMPQ (input) INTEGER */
/*         Specifies whether singular vectors are to be computed in */
/*         factored form: */
/*         = 0: Left singular vector matrix. */
/*         = 1: Right singular vector matrix. */

/*  NL     (input) INTEGER */
/*         The row dimension of the upper block. NL >= 1. */

/*  NR     (input) INTEGER */
/*         The row dimension of the lower block. NR >= 1. */

/*  SQRE   (input) INTEGER */
/*         = 0: the lower block is an NR-by-NR square matrix. */
/*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */

/*         The bidiagonal matrix has row dimension N = NL + NR + 1, */
/*         and column dimension M = N + SQRE. */

/*  NRHS   (input) INTEGER */
/*         The number of columns of B and BX. NRHS must be at least 1. */

/*  B      (input/output) REAL array, dimension ( LDB, NRHS ) */
/*         On input, B contains the right hand sides of the least */
/*         squares problem in rows 1 through M. On output, B contains */
/*         the solution X in rows 1 through N. */

/*  LDB    (input) INTEGER */
/*         The leading dimension of B. LDB must be at least */
/*         max(1,MAX( M, N ) ). */

/*  BX     (workspace) REAL array, dimension ( LDBX, NRHS ) */

/*  LDBX   (input) INTEGER */
/*         The leading dimension of BX. */

/*  PERM   (input) INTEGER array, dimension ( N ) */
/*         The permutations (from deflation and sorting) applied */
/*         to the two blocks. */

/*  GIVPTR (input) INTEGER */
/*         The number of Givens rotations which took place in this */
/*         subproblem. */

/*  GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) */
/*         Each pair of numbers indicates a pair of rows/columns */
/*         involved in a Givens rotation. */

/*  LDGCOL (input) INTEGER */
/*         The leading dimension of GIVCOL, must be at least N. */

/*  GIVNUM (input) REAL array, dimension ( LDGNUM, 2 ) */
/*         Each number indicates the C or S value used in the */
/*         corresponding Givens rotation. */

/*  LDGNUM (input) INTEGER */
/*         The leading dimension of arrays DIFR, POLES and */
/*         GIVNUM, must be at least K. */

/*  POLES  (input) REAL array, dimension ( LDGNUM, 2 ) */
/*         On entry, POLES(1:K, 1) contains the new singular */
/*         values obtained from solving the secular equation, and */
/*         POLES(1:K, 2) is an array containing the poles in the secular */
/*         equation. */

/*  DIFL   (input) REAL array, dimension ( K ). */
/*         On entry, DIFL(I) is the distance between I-th updated */
/*         (undeflated) singular value and the I-th (undeflated) old */
/*         singular value. */

/*  DIFR   (input) REAL array, dimension ( LDGNUM, 2 ). */
/*         On entry, DIFR(I, 1) contains the distances between I-th */
/*         updated (undeflated) singular value and the I+1-th */
/*         (undeflated) old singular value. And DIFR(I, 2) is the */
/*         normalizing factor for the I-th right singular vector. */

/*  Z      (input) REAL array, dimension ( K ) */
/*         Contain the components of the deflation-adjusted updating row */
/*         vector. */

/*  K      (input) INTEGER */
/*         Contains the dimension of the non-deflated matrix, */
/*         This is the order of the related secular equation. 1 <= K <=N. */

/*  C      (input) REAL */
/*         C contains garbage if SQRE =0 and the C-value of a Givens */
/*         rotation related to the right null space if SQRE = 1. */

/*  S      (input) REAL */
/*         S contains garbage if SQRE =0 and the S-value of a Givens */
/*         rotation related to the right null space if SQRE = 1. */

/*  WORK   (workspace) REAL array, dimension ( K ) */

/*  INFO   (output) INTEGER */
/*          = 0:  successful exit. */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*     Ming Gu and Ren-Cang Li, Computer Science Division, University of */
/*       California at Berkeley, USA */
/*     Osni Marques, LBNL/NERSC, USA */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    bx_dim1 = *ldbx;
    bx_offset = 1 + bx_dim1;
    bx -= bx_offset;
    --perm;
    givcol_dim1 = *ldgcol;
    givcol_offset = 1 + givcol_dim1;
    givcol -= givcol_offset;
    difr_dim1 = *ldgnum;
    difr_offset = 1 + difr_dim1;
    difr -= difr_offset;
    poles_dim1 = *ldgnum;
    poles_offset = 1 + poles_dim1;
    poles -= poles_offset;
    givnum_dim1 = *ldgnum;
    givnum_offset = 1 + givnum_dim1;
    givnum -= givnum_offset;
    --difl;
    --z__;
    --work;

    /* Function Body */
    *info = 0;

    if (*icompq < 0 || *icompq > 1) {
	*info = -1;
    } else if (*nl < 1) {
	*info = -2;
    } else if (*nr < 1) {
	*info = -3;
    } else if (*sqre < 0 || *sqre > 1) {
	*info = -4;
    }

    n = *nl + *nr + 1;

    if (*nrhs < 1) {
	*info = -5;
    } else if (*ldb < n) {
	*info = -7;
    } else if (*ldbx < n) {
	*info = -9;
    } else if (*givptr < 0) {
	*info = -11;
    } else if (*ldgcol < n) {
	*info = -13;
    } else if (*ldgnum < n) {
	*info = -15;
    } else if (*k < 1) {
	*info = -20;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLALS0", &i__1, (ftnlen)6);
	return 0;
    }

    m = n + *sqre;
    nlp1 = *nl + 1;

    if (*icompq == 0) {

/*        Apply back orthogonal transformations from the left. */

/*        Step (1L): apply back the Givens rotations performed. */

	i__1 = *givptr;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    srot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, &
		    b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + 
		    (givnum_dim1 << 1)], &givnum[i__ + givnum_dim1]);
/* L10: */
	}

/*        Step (2L): permute rows of B. */

	scopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx);
	i__1 = n;
	for (i__ = 2; i__ <= i__1; ++i__) {
	    scopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1], 
		    ldbx);
/* L20: */
	}

/*        Step (3L): apply the inverse of the left singular vector */
/*        matrix to BX. */

	if (*k == 1) {
	    scopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb);
	    if (z__[1] < 0.f) {
		sscal_(nrhs, &c_b5, &b[b_offset], ldb);
	    }
	} else {
	    i__1 = *k;
	    for (j = 1; j <= i__1; ++j) {
		diflj = difl[j];
		dj = poles[j + poles_dim1];
		dsigj = -poles[j + (poles_dim1 << 1)];
		if (j < *k) {
		    difrj = -difr[j + difr_dim1];
		    dsigjp = -poles[j + 1 + (poles_dim1 << 1)];
		}
		if (z__[j] == 0.f || poles[j + (poles_dim1 << 1)] == 0.f) {
		    work[j] = 0.f;
		} else {
		    work[j] = -poles[j + (poles_dim1 << 1)] * z__[j] / diflj /
			     (poles[j + (poles_dim1 << 1)] + dj);
		}
		i__2 = j - 1;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    if (z__[i__] == 0.f || poles[i__ + (poles_dim1 << 1)] == 
			    0.f) {
			work[i__] = 0.f;
		    } else {
			work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] 
				/ (slamc3_(&poles[i__ + (poles_dim1 << 1)], &
				dsigj) - diflj) / (poles[i__ + (poles_dim1 << 
				1)] + dj);
		    }
/* L30: */
		}
		i__2 = *k;
		for (i__ = j + 1; i__ <= i__2; ++i__) {
		    if (z__[i__] == 0.f || poles[i__ + (poles_dim1 << 1)] == 
			    0.f) {
			work[i__] = 0.f;
		    } else {
			work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] 
				/ (slamc3_(&poles[i__ + (poles_dim1 << 1)], &
				dsigjp) + difrj) / (poles[i__ + (poles_dim1 <<
				 1)] + dj);
		    }
/* L40: */
		}
		work[1] = -1.f;
		temp = snrm2_(k, &work[1], &c__1);
		sgemv_("T", k, nrhs, &c_b11, &bx[bx_offset], ldbx, &work[1], &
			c__1, &c_b13, &b[j + b_dim1], ldb, (ftnlen)1);
		slascl_("G", &c__0, &c__0, &temp, &c_b11, &c__1, nrhs, &b[j + 
			b_dim1], ldb, info, (ftnlen)1);
/* L50: */
	    }
	}

/*        Move the deflated rows of BX to B also. */

	if (*k < max(m,n)) {
	    i__1 = n - *k;
	    slacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1 
		    + b_dim1], ldb, (ftnlen)1);
	}
    } else {

/*        Apply back the right orthogonal transformations. */

/*        Step (1R): apply back the new right singular vector matrix */
/*        to B. */

	if (*k == 1) {
	    scopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx);
	} else {
	    i__1 = *k;
	    for (j = 1; j <= i__1; ++j) {
		dsigj = poles[j + (poles_dim1 << 1)];
		if (z__[j] == 0.f) {
		    work[j] = 0.f;
		} else {
		    work[j] = -z__[j] / difl[j] / (dsigj + poles[j + 
			    poles_dim1]) / difr[j + (difr_dim1 << 1)];
		}
		i__2 = j - 1;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    if (z__[j] == 0.f) {
			work[i__] = 0.f;
		    } else {
			r__1 = -poles[i__ + 1 + (poles_dim1 << 1)];
			work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difr[
				i__ + difr_dim1]) / (dsigj + poles[i__ + 
				poles_dim1]) / difr[i__ + (difr_dim1 << 1)];
		    }
/* L60: */
		}
		i__2 = *k;
		for (i__ = j + 1; i__ <= i__2; ++i__) {
		    if (z__[j] == 0.f) {
			work[i__] = 0.f;
		    } else {
			r__1 = -poles[i__ + (poles_dim1 << 1)];
			work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difl[
				i__]) / (dsigj + poles[i__ + poles_dim1]) / 
				difr[i__ + (difr_dim1 << 1)];
		    }
/* L70: */
		}
		sgemv_("T", k, nrhs, &c_b11, &b[b_offset], ldb, &work[1], &
			c__1, &c_b13, &bx[j + bx_dim1], ldbx, (ftnlen)1);
/* L80: */
	    }
	}

/*        Step (2R): if SQRE = 1, apply back the rotation that is */
/*        related to the right null space of the subproblem. */

	if (*sqre == 1) {
	    scopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx);
	    srot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__, 
		    s);
	}
	if (*k < max(m,n)) {
	    i__1 = n - *k;
	    slacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 + 
		    bx_dim1], ldbx, (ftnlen)1);
	}

/*        Step (3R): permute rows of B. */

	scopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb);
	if (*sqre == 1) {
	    scopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb);
	}
	i__1 = n;
	for (i__ = 2; i__ <= i__1; ++i__) {
	    scopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1], 
		    ldb);
/* L90: */
	}

/*        Step (4R): apply back the Givens rotations performed. */

	for (i__ = *givptr; i__ >= 1; --i__) {
	    r__1 = -givnum[i__ + givnum_dim1];
	    srot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, &
		    b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + 
		    (givnum_dim1 << 1)], &r__1);
/* L100: */
	}
    }

    return 0;

/*     End of SLALS0 */

} /* slals0_ */

Exemplo n.º 13

0

Exibir arquivo

Arquivo: psgstrf_column_bmod.c Projeto: SanjayaDeSilva/GridLAB-D

int
psgstrf_column_bmod(
		    const int  pnum,   /* process number */
		    const int  jcol,   /* current column in the panel */
		    const int  fpanelc,/* first column in the panel */
		    const int  nseg,   /* number of s-nodes to update jcol */
		    int        *segrep,/* in */
		    int        *repfnz,/* in */
		    float     *dense, /* modified */
		    float     *tempv, /* working array */
		    pxgstrf_shared_t *pxgstrf_shared, /* modified */
		    Gstat_t *Gstat     /* modified */
		    )
{
/*
 * -- SuperLU MT routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley,
 * and Xerox Palo Alto Research Center.
 * September 10, 2007
 *
 * Purpose:
 * ========
 *    Performs numeric block updates (sup-col) in topological order.
 *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
 *    Special processing on the supernodal portion of L\U[*,j].
 *
 * Return value:
 * =============
 *      0 - successful return
 *    > 0 - number of bytes allocated when run out of space
 *
 */
#if ( MACH==CRAY_PVP )
    _fcd ftcs1 = _cptofcd("L", strlen("L")),
         ftcs2 = _cptofcd("N", strlen("N")),
         ftcs3 = _cptofcd("U", strlen("U"));
#endif
    
#ifdef USE_VENDOR_BLAS    
    int         incx = 1, incy = 1;
    float      alpha, beta;
#endif
    GlobalLU_t *Glu = pxgstrf_shared->Glu;   /* modified */
    
    /* krep = representative of current k-th supernode
     * fsupc = first supernodal column
     * nsupc = no of columns in supernode
     * nsupr = no of rows in supernode (used as leading dimension)
     * luptr = location of supernodal LU-block in storage
     * kfnz = first nonz in the k-th supernodal segment
     * no_zeros = no of leading zeros in a supernodal U-segment
     */
    float	  ukj, ukj1, ukj2;
    register int lptr, kfnz, isub, irow, i, no_zeros;
    register int luptr, luptr1, luptr2;
    int          fsupc, nsupc, nsupr, segsze;
    int          nrow;	  /* No of rows in the matrix of matrix-vector */
    int          jsupno, k, ksub, krep, krep_ind, ksupno;
    int          ufirst, nextlu;
    int          fst_col; /* First column within small LU update */
    int          d_fsupc; /* Distance between the first column of the current
			     panel and the first column of the current snode.*/
    int          *xsup, *supno;
    int          *lsub, *xlsub, *xlsub_end;
    float       *lusup;
    int          *xlusup, *xlusup_end;
    float       *tempv1;
    int          mem_error;
    register float flopcnt;

    float      zero = 0.0;
    float      one = 1.0;
    float      none = -1.0;

    xsup       = Glu->xsup;
    supno      = Glu->supno;
    lsub       = Glu->lsub;
    xlsub      = Glu->xlsub;
    xlsub_end  = Glu->xlsub_end;
    lusup      = Glu->lusup;
    xlusup     = Glu->xlusup;
    xlusup_end = Glu->xlusup_end;
    jsupno     = supno[jcol];

    /* 
     * For each nonz supernode segment of U[*,j] in topological order 
     */
    k = nseg - 1;
    for (ksub = 0; ksub < nseg; ksub++) {

	krep = segrep[k];
	k--;
	ksupno = supno[krep];
#if ( DEBUGlvel>=2 )
if (jcol==BADCOL)
printf("(%d) psgstrf_column_bmod[1]: %d, nseg %d, krep %d, jsupno %d, ksupno %d\n",
       pnum, jcol, nseg, krep, jsupno, ksupno);
#endif    
	if ( jsupno != ksupno ) { /* Outside the rectangular supernode */

	    fsupc = xsup[ksupno];
	    fst_col = SUPERLU_MAX ( fsupc, fpanelc );

  	    /* Distance from the current supernode to the current panel; 
	       d_fsupc=0 if fsupc >= fpanelc. */
  	    d_fsupc = fst_col - fsupc; 

	    luptr = xlusup[fst_col] + d_fsupc;
	    lptr = xlsub[fsupc] + d_fsupc;
	    kfnz = repfnz[krep];
	    kfnz = SUPERLU_MAX ( kfnz, fpanelc );
	    segsze = krep - kfnz + 1;
	    nsupc = krep - fst_col + 1;
	    nsupr = xlsub_end[fsupc] - xlsub[fsupc]; /* Leading dimension */
	    nrow = nsupr - d_fsupc - nsupc;
	    krep_ind = lptr + nsupc - 1;

            flopcnt = segsze * (segsze - 1) + 2 * nrow * segsze;
	    Gstat->procstat[pnum].fcops += flopcnt;

#if ( DEBUGlevel>=2 )
if (jcol==BADCOL)	    
printf("(%d) psgstrf_column_bmod[2]: %d, krep %d, kfnz %d, segsze %d, d_fsupc %d,\
fsupc %d, nsupr %d, nsupc %d\n",
       pnum, jcol, krep, kfnz, segsze, d_fsupc, fsupc, nsupr, nsupc);

#endif


	    /* 
	     * Case 1: Update U-segment of size 1 -- col-col update 
	     */
	    if ( segsze == 1 ) {
	  	ukj = dense[lsub[krep_ind]];
		luptr += nsupr*(nsupc-1) + nsupc;

		for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
		    irow = lsub[i];
		    dense[irow] -=  ukj*lusup[luptr];
		    luptr++;
		}
	    } else if ( segsze <= 3 ) {
		ukj = dense[lsub[krep_ind]];
		luptr += nsupr*(nsupc-1) + nsupc-1;
		ukj1 = dense[lsub[krep_ind - 1]];
		luptr1 = luptr - nsupr;
		if ( segsze == 2 ) { /* Case 2: 2cols-col update */
		    ukj -= ukj1 * lusup[luptr1];
		    dense[lsub[krep_ind]] = ukj;
		    for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
		    	irow = lsub[i];
		    	luptr++;
		    	luptr1++;
		    	dense[irow] -= ( ukj*lusup[luptr]
					+ ukj1*lusup[luptr1] );
		    }
		} else { /* Case 3: 3cols-col update */
		    ukj2 = dense[lsub[krep_ind - 2]];
		    luptr2 = luptr1 - nsupr;
		    ukj1 -= ukj2 * lusup[luptr2-1];
		    ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
		    dense[lsub[krep_ind]] = ukj;
		    dense[lsub[krep_ind-1]] = ukj1;
		    for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
		    	irow = lsub[i];
		    	luptr++;
		    	luptr1++;
			luptr2++;
		    	dense[irow] -= ( ukj*lusup[luptr]
			     + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
		    }
		}


	    } else {
	  	/*
		 * Case: sup-col update
		 * Perform a triangular solve and block update,
		 * then scatter the result of sup-col update to dense
		 */
		no_zeros = kfnz - fst_col;

	        /* Copy U[*,j] segment from dense[*] to tempv[*] */
	        isub = lptr + no_zeros;
	        for (i = 0; i < segsze; i++) {
	  	    irow = lsub[isub];
		    tempv[i] = dense[irow];
		    ++isub; 
	        }

	        /* Dense triangular solve -- start effective triangle */
		luptr += nsupr * no_zeros + no_zeros; 
#ifdef USE_VENDOR_BLAS
#if ( MACH==CRAY_PVP )
		STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
		       &nsupr, tempv, &incx );
#else
		strsv_( "L", "N", "U", &segsze, &lusup[luptr], 
		       &nsupr, tempv, &incx );
#endif
		
 		luptr += segsze;  /* Dense matrix-vector */
		tempv1 = &tempv[segsze];
		alpha = one;
		beta = zero;
#if ( MACH==CRAY_PVP )
		SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
		       &nsupr, tempv, &incx, &beta, tempv1, &incy );
#else
		sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
		       &nsupr, tempv, &incx, &beta, tempv1, &incy );
#endif
#else
		slsolve ( nsupr, segsze, &lusup[luptr], tempv );

 		luptr += segsze;  /* Dense matrix-vector */
		tempv1 = &tempv[segsze];
		smatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
#endif
                /* Scatter tempv[] into SPA dense[*] */
                isub = lptr + no_zeros;
                for (i = 0; i < segsze; i++) {
                    irow = lsub[isub];
                    dense[irow] = tempv[i]; /* Scatter */
                    tempv[i] = zero;
                    isub++;
                }

		/* Scatter tempv1[] into SPA dense[*] */
		for (i = 0; i < nrow; i++) {
		    irow = lsub[isub];
                    dense[irow] -= tempv1[i];
		    tempv1[i] = zero;
		    ++isub;
		}
	    } /* else segsze >= 4 */
	    
	} /* if jsupno ... */

    } /* for each segment... */

    
    /* ------------------------------------------
       Process the supernodal portion of L\U[*,j]
       ------------------------------------------ */
    
    fsupc = SUPER_FSUPC (jsupno);
    nsupr = xlsub_end[fsupc] - xlsub[fsupc];
    if ( (mem_error = Glu_alloc(pnum, jcol, nsupr, LUSUP, &nextlu, 
			       pxgstrf_shared)) )
	return mem_error;
    xlusup[jcol] = nextlu;
    lusup = Glu->lusup;
    
    /* Gather the nonzeros from SPA dense[*,j] into L\U[*,j] */
    for (isub = xlsub[fsupc]; isub < xlsub_end[fsupc]; ++isub) {
  	irow = lsub[isub];
	lusup[nextlu] = dense[irow];
	dense[irow] = zero;
#ifdef DEBUG
if (jcol == -1)
    printf("(%d) psgstrf_column_bmod[lusup] jcol %d, irow %d, lusup %.10e\n",
	   pnum, jcol, irow, lusup[nextlu]);
#endif	
	++nextlu;
    }
    xlusup_end[jcol] = nextlu; /* close L\U[*,jcol] */

#if ( DEBUGlevel>=2 )
if (jcol == -1) {
    nrow = xlusup_end[jcol] - xlusup[jcol];
    print_double_vec("before sup-col update", nrow, &lsub[xlsub[fsupc]],
		     &lusup[xlusup[jcol]]);
}
#endif    
    
    /*
     * For more updates within the panel (also within the current supernode), 
     * should start from the first column of the panel, or the first column 
     * of the supernode, whichever is bigger. There are 2 cases:
     *    (1) fsupc < fpanelc,  then fst_col := fpanelc
     *    (2) fsupc >= fpanelc, then fst_col := fsupc
     */
    fst_col = SUPERLU_MAX ( fsupc, fpanelc );

    if ( fst_col < jcol ) {

  	/* distance between the current supernode and the current panel;
	   d_fsupc=0 if fsupc >= fpanelc. */
  	d_fsupc = fst_col - fsupc;

	lptr = xlsub[fsupc] + d_fsupc;
	luptr = xlusup[fst_col] + d_fsupc;
	nsupr = xlsub_end[fsupc] - xlsub[fsupc]; /* Leading dimension */
	nsupc = jcol - fst_col;	/* Excluding jcol */
	nrow = nsupr - d_fsupc - nsupc;

	/* points to the beginning of jcol in supernode L\U[*,jsupno] */
	ufirst = xlusup[jcol] + d_fsupc;	

#if ( DEBUGlevel>=2 )
if (jcol==BADCOL)
printf("(%d) psgstrf_column_bmod[3] jcol %d, fsupc %d, nsupr %d, nsupc %d, nrow %d\n",
       pnum, jcol, fsupc, nsupr, nsupc, nrow);
#endif    

        flopcnt = nsupc * (nsupc - 1) + 2 * nrow * nsupc;
	Gstat->procstat[pnum].fcops += flopcnt;

/*	ops[TRSV] += nsupc * (nsupc - 1);
	ops[GEMV] += 2 * nrow * nsupc;    */
	
#ifdef USE_VENDOR_BLAS
	alpha = none; beta = one; /* y := beta*y + alpha*A*x */
#if ( MACH==CRAY_PVP )
	STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], 
	       &nsupr, &lusup[ufirst], &incx );
	SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
	       &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#else
	strsv_( "L", "N", "U", &nsupc, &lusup[luptr], 
	       &nsupr, &lusup[ufirst], &incx );
	sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
	       &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#endif
#else
	slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );

	smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
		 &lusup[ufirst], tempv );
	
        /* Copy updates from tempv[*] into lusup[*] */
	isub = ufirst + nsupc;
	for (i = 0; i < nrow; i++) {
            lusup[isub] -= tempv[i];
            tempv[i] = 0.0;
	    ++isub;
	}
#endif
    } /* if fst_col < jcol ... */ 

    return 0;
}

Exemplo n.º 14

0

Exibir arquivo

Arquivo: slabrd.c Projeto: csapng/libflame

/* Subroutine */
int slabrd_(integer *m, integer *n, integer *nb, real *a, integer *lda, real *d__, real *e, real *tauq, real *taup, real *x, integer *ldx, real *y, integer *ldy)
{
    /* System generated locals */
    integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, i__3;
    /* Local variables */
    integer i__;
    extern /* Subroutine */
    int sscal_(integer *, real *, real *, integer *), sgemv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), slarfg_( integer *, real *, real *, integer *, real *);
    /* -- LAPACK auxiliary routine (version 3.4.2) -- */
    /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
    /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
    /* September 2012 */
    /* .. Scalar Arguments .. */
    /* .. */
    /* .. Array Arguments .. */
    /* .. */
    /* ===================================================================== */
    /* .. Parameters .. */
    /* .. */
    /* .. Local Scalars .. */
    /* .. */
    /* .. External Subroutines .. */
    /* .. */
    /* .. Intrinsic Functions .. */
    /* .. */
    /* .. Executable Statements .. */
    /* Quick return if possible */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --d__;
    --e;
    --tauq;
    --taup;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1;
    x -= x_offset;
    y_dim1 = *ldy;
    y_offset = 1 + y_dim1;
    y -= y_offset;
    /* Function Body */
    if (*m <= 0 || *n <= 0)
    {
        return 0;
    }
    if (*m >= *n)
    {
        /* Reduce to upper bidiagonal form */
        i__1 = *nb;
        for (i__ = 1;
                i__ <= i__1;
                ++i__)
        {
            /* Update A(i:m,i) */
            i__2 = *m - i__ + 1;
            i__3 = i__ - 1;
            sgemv_("No transpose", &i__2, &i__3, &c_b4, &a[i__ + a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b5, &a[i__ + i__ * a_dim1], & c__1);
            i__2 = *m - i__ + 1;
            i__3 = i__ - 1;
            sgemv_("No transpose", &i__2, &i__3, &c_b4, &x[i__ + x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b5, &a[i__ + i__ * a_dim1], &c__1);
            /* Generate reflection Q(i) to annihilate A(i+1:m,i) */
            i__2 = *m - i__ + 1;
            /* Computing MIN */
            i__3 = i__ + 1;
            slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1], &c__1, &tauq[i__]);
            d__[i__] = a[i__ + i__ * a_dim1];
            if (i__ < *n)
            {
                a[i__ + i__ * a_dim1] = 1.f;
                /* Compute Y(i+1:n,i) */
                i__2 = *m - i__ + 1;
                i__3 = *n - i__;
                sgemv_("Transpose", &i__2, &i__3, &c_b5, &a[i__ + (i__ + 1) * a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b16, & y[i__ + 1 + i__ * y_dim1], &c__1);
                i__2 = *m - i__ + 1;
                i__3 = i__ - 1;
                sgemv_("Transpose", &i__2, &i__3, &c_b5, &a[i__ + a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b16, &y[i__ * y_dim1 + 1], &c__1);
                i__2 = *n - i__;
                i__3 = i__ - 1;
                sgemv_("No transpose", &i__2, &i__3, &c_b4, &y[i__ + 1 + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[ i__ + 1 + i__ * y_dim1], &c__1);
                i__2 = *m - i__ + 1;
                i__3 = i__ - 1;
                sgemv_("Transpose", &i__2, &i__3, &c_b5, &x[i__ + x_dim1], ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b16, &y[i__ * y_dim1 + 1], &c__1);
                i__2 = i__ - 1;
                i__3 = *n - i__;
                sgemv_("Transpose", &i__2, &i__3, &c_b4, &a[(i__ + 1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + 1 + i__ * y_dim1], &c__1);
                i__2 = *n - i__;
                sscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
                /* Update A(i,i+1:n) */
                i__2 = *n - i__;
                sgemv_("No transpose", &i__2, &i__, &c_b4, &y[i__ + 1 + y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b5, &a[i__ + ( i__ + 1) * a_dim1], lda);
                i__2 = i__ - 1;
                i__3 = *n - i__;
                sgemv_("Transpose", &i__2, &i__3, &c_b4, &a[(i__ + 1) * a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b5, &a[ i__ + (i__ + 1) * a_dim1], lda);
                /* Generate reflection P(i) to annihilate A(i,i+2:n) */
                i__2 = *n - i__;
                /* Computing MIN */
                i__3 = i__ + 2;
                slarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min( i__3,*n) * a_dim1], lda, &taup[i__]);
                e[i__] = a[i__ + (i__ + 1) * a_dim1];
                a[i__ + (i__ + 1) * a_dim1] = 1.f;
                /* Compute X(i+1:m,i) */
                i__2 = *m - i__;
                i__3 = *n - i__;
                sgemv_("No transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &c_b16, &x[i__ + 1 + i__ * x_dim1], &c__1);
                i__2 = *n - i__;
                sgemv_("Transpose", &i__2, &i__, &c_b5, &y[i__ + 1 + y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &c_b16, &x[ i__ * x_dim1 + 1], &c__1);
                i__2 = *m - i__;
                sgemv_("No transpose", &i__2, &i__, &c_b4, &a[i__ + 1 + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[ i__ + 1 + i__ * x_dim1], &c__1);
                i__2 = i__ - 1;
                i__3 = *n - i__;
                sgemv_("No transpose", &i__2, &i__3, &c_b5, &a[(i__ + 1) * a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & c_b16, &x[i__ * x_dim1 + 1], &c__1);
                i__2 = *m - i__;
                i__3 = i__ - 1;
                sgemv_("No transpose", &i__2, &i__3, &c_b4, &x[i__ + 1 + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[ i__ + 1 + i__ * x_dim1], &c__1);
                i__2 = *m - i__;
                sscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
            }
            /* L10: */
        }
    }
    else
    {
        /* Reduce to lower bidiagonal form */
        i__1 = *nb;
        for (i__ = 1;
                i__ <= i__1;
                ++i__)
        {
            /* Update A(i,i:n) */
            i__2 = *n - i__ + 1;
            i__3 = i__ - 1;
            sgemv_("No transpose", &i__2, &i__3, &c_b4, &y[i__ + y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b5, &a[i__ + i__ * a_dim1], lda);
            i__2 = i__ - 1;
            i__3 = *n - i__ + 1;
            sgemv_("Transpose", &i__2, &i__3, &c_b4, &a[i__ * a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b5, &a[i__ + i__ * a_dim1], lda);
            /* Generate reflection P(i) to annihilate A(i,i+1:n) */
            i__2 = *n - i__ + 1;
            /* Computing MIN */
            i__3 = i__ + 1;
            slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * a_dim1], lda, &taup[i__]);
            d__[i__] = a[i__ + i__ * a_dim1];
            if (i__ < *m)
            {
                a[i__ + i__ * a_dim1] = 1.f;
                /* Compute X(i+1:m,i) */
                i__2 = *m - i__;
                i__3 = *n - i__ + 1;
                sgemv_("No transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + i__ * a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b16, & x[i__ + 1 + i__ * x_dim1], &c__1);
                i__2 = *n - i__ + 1;
                i__3 = i__ - 1;
                sgemv_("Transpose", &i__2, &i__3, &c_b5, &y[i__ + y_dim1], ldy, &a[i__ + i__ * a_dim1], lda, &c_b16, &x[i__ * x_dim1 + 1], &c__1);
                i__2 = *m - i__;
                i__3 = i__ - 1;
                sgemv_("No transpose", &i__2, &i__3, &c_b4, &a[i__ + 1 + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[ i__ + 1 + i__ * x_dim1], &c__1);
                i__2 = i__ - 1;
                i__3 = *n - i__ + 1;
                sgemv_("No transpose", &i__2, &i__3, &c_b5, &a[i__ * a_dim1 + 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b16, &x[i__ * x_dim1 + 1], &c__1);
                i__2 = *m - i__;
                i__3 = i__ - 1;
                sgemv_("No transpose", &i__2, &i__3, &c_b4, &x[i__ + 1 + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b5, &x[ i__ + 1 + i__ * x_dim1], &c__1);
                i__2 = *m - i__;
                sscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
                /* Update A(i+1:m,i) */
                i__2 = *m - i__;
                i__3 = i__ - 1;
                sgemv_("No transpose", &i__2, &i__3, &c_b4, &a[i__ + 1 + a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b5, &a[i__ + 1 + i__ * a_dim1], &c__1);
                i__2 = *m - i__;
                sgemv_("No transpose", &i__2, &i__, &c_b4, &x[i__ + 1 + x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b5, &a[ i__ + 1 + i__ * a_dim1], &c__1);
                /* Generate reflection Q(i) to annihilate A(i+2:m,i) */
                i__2 = *m - i__;
                /* Computing MIN */
                i__3 = i__ + 2;
                slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1], &c__1, &tauq[i__]);
                e[i__] = a[i__ + 1 + i__ * a_dim1];
                a[i__ + 1 + i__ * a_dim1] = 1.f;
                /* Compute Y(i+1:n,i) */
                i__2 = *m - i__;
                i__3 = *n - i__;
                sgemv_("Transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[i__ + 1 + i__ * y_dim1], &c__1);
                i__2 = *m - i__;
                i__3 = i__ - 1;
                sgemv_("Transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[ i__ * y_dim1 + 1], &c__1);
                i__2 = *n - i__;
                i__3 = i__ - 1;
                sgemv_("No transpose", &i__2, &i__3, &c_b4, &y[i__ + 1 + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[ i__ + 1 + i__ * y_dim1], &c__1);
                i__2 = *m - i__;
                sgemv_("Transpose", &i__2, &i__, &c_b5, &x[i__ + 1 + x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &y[ i__ * y_dim1 + 1], &c__1);
                i__2 = *n - i__;
                sgemv_("Transpose", &i__, &i__2, &c_b4, &a[(i__ + 1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &c_b5, &y[i__ + 1 + i__ * y_dim1], &c__1);
                i__2 = *n - i__;
                sscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
            }
            /* L20: */
        }
    }
    return 0;
    /* End of SLABRD */
}

Exemplo n.º 15

0

Exibir arquivo

Arquivo: clals0.c Projeto: EugeneGalipchak/antelope_contrib

/* Subroutine */ int clals0_(integer *icompq, integer *nl, integer *nr, 
	integer *sqre, integer *nrhs, complex *b, integer *ldb, complex *bx, 
	integer *ldbx, integer *perm, integer *givptr, integer *givcol, 
	integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real *
	difl, real *difr, real *z__, integer *k, real *c__, real *s, real *
	rwork, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       December 1, 1999   


    Purpose   
    =======   

    CLALS0 applies back the multiplying factors of either the left or the   
    right singular vector matrix of a diagonal matrix appended by a row   
    to the right hand side matrix B in solving the least squares problem   
    using the divide-and-conquer SVD approach.   

    For the left singular vector matrix, three types of orthogonal   
    matrices are involved:   

    (1L) Givens rotations: the number of such rotations is GIVPTR; the   
         pairs of columns/rows they were applied to are stored in GIVCOL;   
         and the C- and S-values of these rotations are stored in GIVNUM.   

    (2L) Permutation. The (NL+1)-st row of B is to be moved to the first   
         row, and for J=2:N, PERM(J)-th row of B is to be moved to the   
         J-th row.   

    (3L) The left singular vector matrix of the remaining matrix.   

    For the right singular vector matrix, four types of orthogonal   
    matrices are involved:   

    (1R) The right singular vector matrix of the remaining matrix.   

    (2R) If SQRE = 1, one extra Givens rotation to generate the right   
         null space.   

    (3R) The inverse transformation of (2L).   

    (4R) The inverse transformation of (1L).   

    Arguments   
    =========   

    ICOMPQ (input) INTEGER   
           Specifies whether singular vectors are to be computed in   
           factored form:   
           = 0: Left singular vector matrix.   
           = 1: Right singular vector matrix.   

    NL     (input) INTEGER   
           The row dimension of the upper block. NL >= 1.   

    NR     (input) INTEGER   
           The row dimension of the lower block. NR >= 1.   

    SQRE   (input) INTEGER   
           = 0: the lower block is an NR-by-NR square matrix.   
           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.   

           The bidiagonal matrix has row dimension N = NL + NR + 1,   
           and column dimension M = N + SQRE.   

    NRHS   (input) INTEGER   
           The number of columns of B and BX. NRHS must be at least 1.   

    B      (input/output) COMPLEX array, dimension ( LDB, NRHS )   
           On input, B contains the right hand sides of the least   
           squares problem in rows 1 through M. On output, B contains   
           the solution X in rows 1 through N.   

    LDB    (input) INTEGER   
           The leading dimension of B. LDB must be at least   
           max(1,MAX( M, N ) ).   

    BX     (workspace) COMPLEX array, dimension ( LDBX, NRHS )   

    LDBX   (input) INTEGER   
           The leading dimension of BX.   

    PERM   (input) INTEGER array, dimension ( N )   
           The permutations (from deflation and sorting) applied   
           to the two blocks.   

    GIVPTR (input) INTEGER   
           The number of Givens rotations which took place in this   
           subproblem.   

    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 )   
           Each pair of numbers indicates a pair of rows/columns   
           involved in a Givens rotation.   

    LDGCOL (input) INTEGER   
           The leading dimension of GIVCOL, must be at least N.   

    GIVNUM (input) REAL array, dimension ( LDGNUM, 2 )   
           Each number indicates the C or S value used in the   
           corresponding Givens rotation.   

    LDGNUM (input) INTEGER   
           The leading dimension of arrays DIFR, POLES and   
           GIVNUM, must be at least K.   

    POLES  (input) REAL array, dimension ( LDGNUM, 2 )   
           On entry, POLES(1:K, 1) contains the new singular   
           values obtained from solving the secular equation, and   
           POLES(1:K, 2) is an array containing the poles in the secular   
           equation.   

    DIFL   (input) REAL array, dimension ( K ).   
           On entry, DIFL(I) is the distance between I-th updated   
           (undeflated) singular value and the I-th (undeflated) old   
           singular value.   

    DIFR   (input) REAL array, dimension ( LDGNUM, 2 ).   
           On entry, DIFR(I, 1) contains the distances between I-th   
           updated (undeflated) singular value and the I+1-th   
           (undeflated) old singular value. And DIFR(I, 2) is the   
           normalizing factor for the I-th right singular vector.   

    Z      (input) REAL array, dimension ( K )   
           Contain the components of the deflation-adjusted updating row   
           vector.   

    K      (input) INTEGER   
           Contains the dimension of the non-deflated matrix,   
           This is the order of the related secular equation. 1 <= K <=N.   

    C      (input) REAL   
           C contains garbage if SQRE =0 and the C-value of a Givens   
           rotation related to the right null space if SQRE = 1.   

    S      (input) REAL   
           S contains garbage if SQRE =0 and the S-value of a Givens   
           rotation related to the right null space if SQRE = 1.   

    RWORK  (workspace) REAL array, dimension   
           ( K*(1+NRHS) + 2*NRHS )   

    INFO   (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   

    Further Details   
    ===============   

    Based on contributions by   
       Ming Gu and Ren-Cang Li, Computer Science Division, University of   
         California at Berkeley, USA   
       Osni Marques, LBNL/NERSC, USA   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static real c_b5 = -1.f;
    static integer c__1 = 1;
    static real c_b13 = 1.f;
    static real c_b15 = 0.f;
    static integer c__0 = 0;
    
    /* System generated locals */
    integer givcol_dim1, givcol_offset, difr_dim1, difr_offset, givnum_dim1, 
	    givnum_offset, poles_dim1, poles_offset, b_dim1, b_offset, 
	    bx_dim1, bx_offset, i__1, i__2, i__3, i__4, i__5;
    real r__1;
    complex q__1;
    /* Builtin functions */
    double r_imag(complex *);
    /* Local variables */
    static integer jcol;
    static real temp;
    static integer jrow;
    extern doublereal snrm2_(integer *, real *, integer *);
    static integer i__, j, m, n;
    static real diflj, difrj, dsigj;
    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, 
	    complex *, integer *), sgemv_(char *, integer *, integer *, real *
	    , real *, integer *, real *, integer *, real *, real *, integer *), csrot_(integer *, complex *, integer *, complex *, 
	    integer *, real *, real *);
    extern doublereal slamc3_(real *, real *);
    static real dj;
    extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, 
	    real *, integer *, integer *, complex *, integer *, integer *), csscal_(integer *, real *, complex *, integer *), 
	    clacpy_(char *, integer *, integer *, complex *, integer *, 
	    complex *, integer *), xerbla_(char *, integer *);
    static real dsigjp;
    static integer nlp1;
#define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1]
#define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1
#define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)]
#define poles_ref(a_1,a_2) poles[(a_2)*poles_dim1 + a_1]
#define bx_subscr(a_1,a_2) (a_2)*bx_dim1 + a_1
#define bx_ref(a_1,a_2) bx[bx_subscr(a_1,a_2)]
#define givcol_ref(a_1,a_2) givcol[(a_2)*givcol_dim1 + a_1]
#define givnum_ref(a_1,a_2) givnum[(a_2)*givnum_dim1 + a_1]


    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    bx_dim1 = *ldbx;
    bx_offset = 1 + bx_dim1 * 1;
    bx -= bx_offset;
    --perm;
    givcol_dim1 = *ldgcol;
    givcol_offset = 1 + givcol_dim1 * 1;
    givcol -= givcol_offset;
    difr_dim1 = *ldgnum;
    difr_offset = 1 + difr_dim1 * 1;
    difr -= difr_offset;
    poles_dim1 = *ldgnum;
    poles_offset = 1 + poles_dim1 * 1;
    poles -= poles_offset;
    givnum_dim1 = *ldgnum;
    givnum_offset = 1 + givnum_dim1 * 1;
    givnum -= givnum_offset;
    --difl;
    --z__;
    --rwork;

    /* Function Body */
    *info = 0;

    if (*icompq < 0 || *icompq > 1) {
	*info = -1;
    } else if (*nl < 1) {
	*info = -2;
    } else if (*nr < 1) {
	*info = -3;
    } else if (*sqre < 0 || *sqre > 1) {
	*info = -4;
    }

    n = *nl + *nr + 1;

    if (*nrhs < 1) {
	*info = -5;
    } else if (*ldb < n) {
	*info = -7;
    } else if (*ldbx < n) {
	*info = -9;
    } else if (*givptr < 0) {
	*info = -11;
    } else if (*ldgcol < n) {
	*info = -13;
    } else if (*ldgnum < n) {
	*info = -15;
    } else if (*k < 1) {
	*info = -20;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CLALS0", &i__1);
	return 0;
    }

    m = n + *sqre;
    nlp1 = *nl + 1;

    if (*icompq == 0) {

/*        Apply back orthogonal transformations from the left.   

          Step (1L): apply back the Givens rotations performed. */

	i__1 = *givptr;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    csrot_(nrhs, &b_ref(givcol_ref(i__, 2), 1), ldb, &b_ref(
		    givcol_ref(i__, 1), 1), ldb, &givnum_ref(i__, 2), &
		    givnum_ref(i__, 1));
/* L10: */
	}

/*        Step (2L): permute rows of B. */

	ccopy_(nrhs, &b_ref(nlp1, 1), ldb, &bx_ref(1, 1), ldbx);
	i__1 = n;
	for (i__ = 2; i__ <= i__1; ++i__) {
	    ccopy_(nrhs, &b_ref(perm[i__], 1), ldb, &bx_ref(i__, 1), ldbx);
/* L20: */
	}

/*        Step (3L): apply the inverse of the left singular vector   
          matrix to BX. */

	if (*k == 1) {
	    ccopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb);
	    if (z__[1] < 0.f) {
		csscal_(nrhs, &c_b5, &b[b_offset], ldb);
	    }
	} else {
	    i__1 = *k;
	    for (j = 1; j <= i__1; ++j) {
		diflj = difl[j];
		dj = poles_ref(j, 1);
		dsigj = -poles_ref(j, 2);
		if (j < *k) {
		    difrj = -difr_ref(j, 1);
		    dsigjp = -poles_ref(j + 1, 2);
		}
		if (z__[j] == 0.f || poles_ref(j, 2) == 0.f) {
		    rwork[j] = 0.f;
		} else {
		    rwork[j] = -poles_ref(j, 2) * z__[j] / diflj / (poles_ref(
			    j, 2) + dj);
		}
		i__2 = j - 1;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    if (z__[i__] == 0.f || poles_ref(i__, 2) == 0.f) {
			rwork[i__] = 0.f;
		    } else {
			rwork[i__] = poles_ref(i__, 2) * z__[i__] / (slamc3_(&
				poles_ref(i__, 2), &dsigj) - diflj) / (
				poles_ref(i__, 2) + dj);
		    }
/* L30: */
		}
		i__2 = *k;
		for (i__ = j + 1; i__ <= i__2; ++i__) {
		    if (z__[i__] == 0.f || poles_ref(i__, 2) == 0.f) {
			rwork[i__] = 0.f;
		    } else {
			rwork[i__] = poles_ref(i__, 2) * z__[i__] / (slamc3_(&
				poles_ref(i__, 2), &dsigjp) + difrj) / (
				poles_ref(i__, 2) + dj);
		    }
/* L40: */
		}
		rwork[1] = -1.f;
		temp = snrm2_(k, &rwork[1], &c__1);

/*              Since B and BX are complex, the following call to SGEMV   
                is performed in two steps (real and imaginary parts).   

                CALL SGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO,   
      $                     B( J, 1 ), LDB ) */

		i__ = *k + (*nrhs << 1);
		i__2 = *nrhs;
		for (jcol = 1; jcol <= i__2; ++jcol) {
		    i__3 = *k;
		    for (jrow = 1; jrow <= i__3; ++jrow) {
			++i__;
			i__4 = bx_subscr(jrow, jcol);
			rwork[i__] = bx[i__4].r;
/* L50: */
		    }
/* L60: */
		}
		sgemv_("T", k, nrhs, &c_b13, &rwork[*k + 1 + (*nrhs << 1)], k,
			 &rwork[1], &c__1, &c_b15, &rwork[*k + 1], &c__1);
		i__ = *k + (*nrhs << 1);
		i__2 = *nrhs;
		for (jcol = 1; jcol <= i__2; ++jcol) {
		    i__3 = *k;
		    for (jrow = 1; jrow <= i__3; ++jrow) {
			++i__;
			rwork[i__] = r_imag(&bx_ref(jrow, jcol));
/* L70: */
		    }
/* L80: */
		}
		sgemv_("T", k, nrhs, &c_b13, &rwork[*k + 1 + (*nrhs << 1)], k,
			 &rwork[1], &c__1, &c_b15, &rwork[*k + 1 + *nrhs], &
			c__1);
		i__2 = *nrhs;
		for (jcol = 1; jcol <= i__2; ++jcol) {
		    i__3 = b_subscr(j, jcol);
		    i__4 = jcol + *k;
		    i__5 = jcol + *k + *nrhs;
		    q__1.r = rwork[i__4], q__1.i = rwork[i__5];
		    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
/* L90: */
		}
		clascl_("G", &c__0, &c__0, &temp, &c_b13, &c__1, nrhs, &b_ref(
			j, 1), ldb, info);
/* L100: */
	    }
	}

/*        Move the deflated rows of BX to B also. */

	if (*k < max(m,n)) {
	    i__1 = n - *k;
	    clacpy_("A", &i__1, nrhs, &bx_ref(*k + 1, 1), ldbx, &b_ref(*k + 1,
		     1), ldb);
	}
    } else {

/*        Apply back the right orthogonal transformations.   

          Step (1R): apply back the new right singular vector matrix   
          to B. */

	if (*k == 1) {
	    ccopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx);
	} else {
	    i__1 = *k;
	    for (j = 1; j <= i__1; ++j) {
		dsigj = poles_ref(j, 2);
		if (z__[j] == 0.f) {
		    rwork[j] = 0.f;
		} else {
		    rwork[j] = -z__[j] / difl[j] / (dsigj + poles_ref(j, 1)) /
			     difr_ref(j, 2);
		}
		i__2 = j - 1;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    if (z__[j] == 0.f) {
			rwork[i__] = 0.f;
		    } else {
			r__1 = -poles_ref(i__ + 1, 2);
			rwork[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - 
				difr_ref(i__, 1)) / (dsigj + poles_ref(i__, 1)
				) / difr_ref(i__, 2);
		    }
/* L110: */
		}
		i__2 = *k;
		for (i__ = j + 1; i__ <= i__2; ++i__) {
		    if (z__[j] == 0.f) {
			rwork[i__] = 0.f;
		    } else {
			r__1 = -poles_ref(i__, 2);
			rwork[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difl[
				i__]) / (dsigj + poles_ref(i__, 1)) / 
				difr_ref(i__, 2);
		    }
/* L120: */
		}

/*              Since B and BX are complex, the following call to SGEMV   
                is performed in two steps (real and imaginary parts).   

                CALL SGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO,   
      $                     BX( J, 1 ), LDBX ) */

		i__ = *k + (*nrhs << 1);
		i__2 = *nrhs;
		for (jcol = 1; jcol <= i__2; ++jcol) {
		    i__3 = *k;
		    for (jrow = 1; jrow <= i__3; ++jrow) {
			++i__;
			i__4 = b_subscr(jrow, jcol);
			rwork[i__] = b[i__4].r;
/* L130: */
		    }
/* L140: */
		}
		sgemv_("T", k, nrhs, &c_b13, &rwork[*k + 1 + (*nrhs << 1)], k,
			 &rwork[1], &c__1, &c_b15, &rwork[*k + 1], &c__1);
		i__ = *k + (*nrhs << 1);
		i__2 = *nrhs;
		for (jcol = 1; jcol <= i__2; ++jcol) {
		    i__3 = *k;
		    for (jrow = 1; jrow <= i__3; ++jrow) {
			++i__;
			rwork[i__] = r_imag(&b_ref(jrow, jcol));
/* L150: */
		    }
/* L160: */
		}
		sgemv_("T", k, nrhs, &c_b13, &rwork[*k + 1 + (*nrhs << 1)], k,
			 &rwork[1], &c__1, &c_b15, &rwork[*k + 1 + *nrhs], &
			c__1);
		i__2 = *nrhs;
		for (jcol = 1; jcol <= i__2; ++jcol) {
		    i__3 = bx_subscr(j, jcol);
		    i__4 = jcol + *k;
		    i__5 = jcol + *k + *nrhs;
		    q__1.r = rwork[i__4], q__1.i = rwork[i__5];
		    bx[i__3].r = q__1.r, bx[i__3].i = q__1.i;
/* L170: */
		}
/* L180: */
	    }
	}

/*        Step (2R): if SQRE = 1, apply back the rotation that is   
          related to the right null space of the subproblem. */

	if (*sqre == 1) {
	    ccopy_(nrhs, &b_ref(m, 1), ldb, &bx_ref(m, 1), ldbx);
	    csrot_(nrhs, &bx_ref(1, 1), ldbx, &bx_ref(m, 1), ldbx, c__, s);
	}
	if (*k < max(m,n)) {
	    i__1 = n - *k;
	    clacpy_("A", &i__1, nrhs, &b_ref(*k + 1, 1), ldb, &bx_ref(*k + 1, 
		    1), ldbx);
	}

/*        Step (3R): permute rows of B. */

	ccopy_(nrhs, &bx_ref(1, 1), ldbx, &b_ref(nlp1, 1), ldb);
	if (*sqre == 1) {
	    ccopy_(nrhs, &bx_ref(m, 1), ldbx, &b_ref(m, 1), ldb);
	}
	i__1 = n;
	for (i__ = 2; i__ <= i__1; ++i__) {
	    ccopy_(nrhs, &bx_ref(i__, 1), ldbx, &b_ref(perm[i__], 1), ldb);
/* L190: */
	}

/*        Step (4R): apply back the Givens rotations performed. */

	for (i__ = *givptr; i__ >= 1; --i__) {
	    r__1 = -givnum_ref(i__, 1);
	    csrot_(nrhs, &b_ref(givcol_ref(i__, 2), 1), ldb, &b_ref(
		    givcol_ref(i__, 1), 1), ldb, &givnum_ref(i__, 2), &r__1);
/* L200: */
	}
    }

    return 0;

/*     End of CLALS0 */

} /* clals0_ */

Exemplo n.º 16

0

Exibir arquivo

Arquivo: slaeda.c Projeto: csapng/libflame

/* Subroutine */
int slaeda_(integer *n, integer *tlvls, integer *curlvl, integer *curpbm, integer *prmptr, integer *perm, integer *givptr, integer *givcol, real *givnum, real *q, integer *qptr, real *z__, real *ztemp, integer *info)
{
    /* System generated locals */
    integer i__1, i__2, i__3;
    /* Builtin functions */
    integer pow_ii(integer *, integer *);
    double sqrt(doublereal);
    /* Local variables */
    integer i__, k, mid, ptr, curr;
    extern /* Subroutine */
    int srot_(integer *, real *, integer *, real *, integer *, real *, real *);
    integer bsiz1, bsiz2, psiz1, psiz2, zptr1;
    extern /* Subroutine */
    int sgemv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
    /* -- LAPACK computational routine (version 3.4.2) -- */
    /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
    /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
    /* September 2012 */
    /* .. Scalar Arguments .. */
    /* .. */
    /* .. Array Arguments .. */
    /* .. */
    /* ===================================================================== */
    /* .. Parameters .. */
    /* .. */
    /* .. Local Scalars .. */
    /* .. */
    /* .. External Subroutines .. */
    /* .. */
    /* .. Intrinsic Functions .. */
    /* .. */
    /* .. Executable Statements .. */
    /* Test the input parameters. */
    /* Parameter adjustments */
    --ztemp;
    --z__;
    --qptr;
    --q;
    givnum -= 3;
    givcol -= 3;
    --givptr;
    --perm;
    --prmptr;
    /* Function Body */
    *info = 0;
    if (*n < 0)
    {
        *info = -1;
    }
    if (*info != 0)
    {
        i__1 = -(*info);
        xerbla_("SLAEDA", &i__1);
        return 0;
    }
    /* Quick return if possible */
    if (*n == 0)
    {
        return 0;
    }
    /* Determine location of first number in second half. */
    mid = *n / 2 + 1;
    /* Gather last/first rows of appropriate eigenblocks into center of Z */
    ptr = 1;
    /* Determine location of lowest level subproblem in the full storage */
    /* scheme */
    i__1 = *curlvl - 1;
    curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
    /* Determine size of these matrices. We add HALF to the value of */
    /* the SQRT in case the machine underestimates one of these square */
    /* roots. */
    bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f);
    bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + .5f);
    i__1 = mid - bsiz1 - 1;
    for (k = 1;
            k <= i__1;
            ++k)
    {
        z__[k] = 0.f;
        /* L10: */
    }
    scopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], & c__1);
    scopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1);
    i__1 = *n;
    for (k = mid + bsiz2;
            k <= i__1;
            ++k)
    {
        z__[k] = 0.f;
        /* L20: */
    }
    /* Loop through remaining levels 1 -> CURLVL applying the Givens */
    /* rotations and permutation and then multiplying the center matrices */
    /* against the current Z. */
    ptr = pow_ii(&c__2, tlvls) + 1;
    i__1 = *curlvl - 1;
    for (k = 1;
            k <= i__1;
            ++k)
    {
        i__2 = *curlvl - k;
        i__3 = *curlvl - k - 1;
        curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) - 1;
        psiz1 = prmptr[curr + 1] - prmptr[curr];
        psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
        zptr1 = mid - psiz1;
        /* Apply Givens at CURR and CURR+1 */
        i__2 = givptr[curr + 1] - 1;
        for (i__ = givptr[curr];
                i__ <= i__2;
                ++i__)
        {
            srot_(&c__1, &z__[zptr1 + givcol[(i__ << 1) + 1] - 1], &c__1, & z__[zptr1 + givcol[(i__ << 1) + 2] - 1], &c__1, &givnum[( i__ << 1) + 1], &givnum[(i__ << 1) + 2]);
            /* L30: */
        }
        i__2 = givptr[curr + 2] - 1;
        for (i__ = givptr[curr + 1];
                i__ <= i__2;
                ++i__)
        {
            srot_(&c__1, &z__[mid - 1 + givcol[(i__ << 1) + 1]], &c__1, &z__[ mid - 1 + givcol[(i__ << 1) + 2]], &c__1, &givnum[(i__ << 1) + 1], &givnum[(i__ << 1) + 2]);
            /* L40: */
        }
        psiz1 = prmptr[curr + 1] - prmptr[curr];
        psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
        i__2 = psiz1 - 1;
        for (i__ = 0;
                i__ <= i__2;
                ++i__)
        {
            ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1];
            /* L50: */
        }
        i__2 = psiz2 - 1;
        for (i__ = 0;
                i__ <= i__2;
                ++i__)
        {
            ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] - 1];
            /* L60: */
        }
        /* Multiply Blocks at CURR and CURR+1 */
        /* Determine size of these matrices. We add HALF to the value of */
        /* the SQRT in case the machine underestimates one of these */
        /* square roots. */
        bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f);
        bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + .5f);
        if (bsiz1 > 0)
        {
            sgemv_("T", &bsiz1, &bsiz1, &c_b24, &q[qptr[curr]], &bsiz1, & ztemp[1], &c__1, &c_b26, &z__[zptr1], &c__1);
        }
        i__2 = psiz1 - bsiz1;
        scopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1);
        if (bsiz2 > 0)
        {
            sgemv_("T", &bsiz2, &bsiz2, &c_b24, &q[qptr[curr + 1]], &bsiz2, & ztemp[psiz1 + 1], &c__1, &c_b26, &z__[mid], &c__1);
        }
        i__2 = psiz2 - bsiz2;
        scopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], & c__1);
        i__2 = *tlvls - k;
        ptr += pow_ii(&c__2, &i__2);
        /* L70: */
    }
    return 0;
    /* End of SLAEDA */
}

Exemplo n.º 17

0

Exibir arquivo

Arquivo: ssapps.c Projeto: openbigdatagroup/pspectralclustering

/* Subroutine */ int ssapps_(integer *n, integer *kev, integer *np, real *
                             shift, real *v, integer *ldv, real *h__, integer *ldh, real *resid,
                             real *q, integer *ldq, real *workd)
{
    /* Initialized data */

    static logical first = TRUE_;

    /* System generated locals */
    integer h_dim1, h_offset, q_dim1, q_offset, v_dim1, v_offset, i__1, i__2,
            i__3, i__4;
    real r__1, r__2;

    /* Local variables */
    static real c__, f, g;
    static integer i__, j;
    static real r__, s, a1, a2, a3, a4, t0, t1;
    static integer jj;
    static real big;
    static integer iend, itop;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
           sgemv_(char *, integer *, integer *, real *, real *, integer *,
                  real *, integer *, real *, real *, integer *, ftnlen), scopy_(
               integer *, real *, integer *, real *, integer *), saxpy_(integer *
                       , real *, real *, integer *, real *, integer *), ivout_(integer *,
                               integer *, integer *, integer *, char *, ftnlen), svout_(integer
                                       *, integer *, real *, integer *, char *, ftnlen);
    extern doublereal slamch_(char *, ftnlen);
    extern /* Subroutine */ int second_(real *);
    static real epsmch;
    static integer istart, kplusp, msglvl;
    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *,
                                        integer *, real *, integer *, ftnlen), slartg_(real *, real *,
                                                real *, real *, real *), slaset_(char *, integer *, integer *,
                                                        real *, real *, real *, integer *, ftnlen);


    /*     %----------------------------------------------------% */
    /*     | Include files for debugging and timing information | */
    /*     %----------------------------------------------------% */


    /* \SCCS Information: @(#) */
    /* FILE: debug.h   SID: 2.3   DATE OF SID: 11/16/95   RELEASE: 2 */

    /*     %---------------------------------% */
    /*     | See debug.doc for documentation | */
    /*     %---------------------------------% */

    /*     %------------------% */
    /*     | Scalar Arguments | */
    /*     %------------------% */

    /*     %--------------------------------% */
    /*     | See stat.doc for documentation | */
    /*     %--------------------------------% */

    /* \SCCS Information: @(#) */
    /* FILE: stat.h   SID: 2.2   DATE OF SID: 11/16/95   RELEASE: 2 */



    /*     %-----------------% */
    /*     | Array Arguments | */
    /*     %-----------------% */


    /*     %------------% */
    /*     | Parameters | */
    /*     %------------% */


    /*     %---------------% */
    /*     | Local Scalars | */
    /*     %---------------% */



    /*     %----------------------% */
    /*     | External Subroutines | */
    /*     %----------------------% */


    /*     %--------------------% */
    /*     | External Functions | */
    /*     %--------------------% */


    /*     %----------------------% */
    /*     | Intrinsics Functions | */
    /*     %----------------------% */


    /*     %----------------% */
    /*     | Data statments | */
    /*     %----------------% */

    /* Parameter adjustments */
    --workd;
    --resid;
    --shift;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1;
    v -= v_offset;
    h_dim1 = *ldh;
    h_offset = 1 + h_dim1;
    h__ -= h_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1;
    q -= q_offset;

    /* Function Body */

    /*     %-----------------------% */
    /*     | Executable Statements | */
    /*     %-----------------------% */

    if (first) {
        epsmch = slamch_("Epsilon-Machine", (ftnlen)15);
        first = FALSE_;
    }
    itop = 1;

    /*     %-------------------------------% */
    /*     | Initialize timing statistics  | */
    /*     | & message level for debugging | */
    /*     %-------------------------------% */

    second_(&t0);
    msglvl = debug_1.msapps;

    kplusp = *kev + *np;

    /*     %----------------------------------------------% */
    /*     | Initialize Q to the identity matrix of order | */
    /*     | kplusp used to accumulate the rotations.     | */
    /*     %----------------------------------------------% */

    slaset_("All", &kplusp, &kplusp, &c_b4, &c_b5, &q[q_offset], ldq, (ftnlen)
            3);

    /*     %----------------------------------------------% */
    /*     | Quick return if there are no shifts to apply | */
    /*     %----------------------------------------------% */

    if (*np == 0) {
        goto L9000;
    }

    /*     %----------------------------------------------------------% */
    /*     | Apply the np shifts implicitly. Apply each shift to the  | */
    /*     | whole matrix and not just to the submatrix from which it | */
    /*     | comes.                                                   | */
    /*     %----------------------------------------------------------% */

    i__1 = *np;
    for (jj = 1; jj <= i__1; ++jj) {

        istart = itop;

        /*        %----------------------------------------------------------% */
        /*        | Check for splitting and deflation. Currently we consider | */
        /*        | an off-diagonal element h(i+1,1) negligible if           | */
        /*        |         h(i+1,1) .le. epsmch*( |h(i,2)| + |h(i+1,2)| )   | */
        /*        | for i=1:KEV+NP-1.                                        | */
        /*        | If above condition tests true then we set h(i+1,1) = 0.  | */
        /*        | Note that h(1:KEV+NP,1) are assumed to be non negative.  | */
        /*        %----------------------------------------------------------% */

L20:

        /*        %------------------------------------------------% */
        /*        | The following loop exits early if we encounter | */
        /*        | a negligible off diagonal element.             | */
        /*        %------------------------------------------------% */

        i__2 = kplusp - 1;
        for (i__ = istart; i__ <= i__2; ++i__) {
            big = (r__1 = h__[i__ + (h_dim1 << 1)], dabs(r__1)) + (r__2 = h__[
                        i__ + 1 + (h_dim1 << 1)], dabs(r__2));
            if (h__[i__ + 1 + h_dim1] <= epsmch * big) {
                if (msglvl > 0) {
                    ivout_(&debug_1.logfil, &c__1, &i__, &debug_1.ndigit,
                           "_sapps: deflation at row/column no.", (ftnlen)35)
                    ;
                    ivout_(&debug_1.logfil, &c__1, &jj, &debug_1.ndigit,
                           "_sapps: occured before shift number.", (ftnlen)
                           36);
                    svout_(&debug_1.logfil, &c__1, &h__[i__ + 1 + h_dim1], &
                           debug_1.ndigit, "_sapps: the corresponding off d"
                           "iagonal element", (ftnlen)46);
                }
                h__[i__ + 1 + h_dim1] = 0.f;
                iend = i__;
                goto L40;
            }
            /* L30: */
        }
        iend = kplusp;
L40:

        if (istart < iend) {

            /*           %--------------------------------------------------------% */
            /*           | Construct the plane rotation G'(istart,istart+1,theta) | */
            /*           | that attempts to drive h(istart+1,1) to zero.          | */
            /*           %--------------------------------------------------------% */

            f = h__[istart + (h_dim1 << 1)] - shift[jj];
            g = h__[istart + 1 + h_dim1];
            slartg_(&f, &g, &c__, &s, &r__);

            /*            %-------------------------------------------------------% */
            /*            | Apply rotation to the left and right of H;            | */
            /*            | H <- G' * H * G,  where G = G(istart,istart+1,theta). | */
            /*            | This will create a "bulge".                           | */
            /*            %-------------------------------------------------------% */

            a1 = c__ * h__[istart + (h_dim1 << 1)] + s * h__[istart + 1 +
                    h_dim1];
            a2 = c__ * h__[istart + 1 + h_dim1] + s * h__[istart + 1 + (
                        h_dim1 << 1)];
            a4 = c__ * h__[istart + 1 + (h_dim1 << 1)] - s * h__[istart + 1 +
                    h_dim1];
            a3 = c__ * h__[istart + 1 + h_dim1] - s * h__[istart + (h_dim1 <<
                    1)];
            h__[istart + (h_dim1 << 1)] = c__ * a1 + s * a2;
            h__[istart + 1 + (h_dim1 << 1)] = c__ * a4 - s * a3;
            h__[istart + 1 + h_dim1] = c__ * a3 + s * a4;

            /*            %----------------------------------------------------% */
            /*            | Accumulate the rotation in the matrix Q;  Q <- Q*G | */
            /*            %----------------------------------------------------% */

            /* Computing MIN */
            i__3 = istart + jj;
            i__2 = min(i__3,kplusp);
            for (j = 1; j <= i__2; ++j) {
                a1 = c__ * q[j + istart * q_dim1] + s * q[j + (istart + 1) *
                        q_dim1];
                q[j + (istart + 1) * q_dim1] = -s * q[j + istart * q_dim1] +
                                               c__ * q[j + (istart + 1) * q_dim1];
                q[j + istart * q_dim1] = a1;
                /* L60: */
            }


            /*            %----------------------------------------------% */
            /*            | The following loop chases the bulge created. | */
            /*            | Note that the previous rotation may also be  | */
            /*            | done within the following loop. But it is    | */
            /*            | kept separate to make the distinction among  | */
            /*            | the bulge chasing sweeps and the first plane | */
            /*            | rotation designed to drive h(istart+1,1) to  | */
            /*            | zero.                                        | */
            /*            %----------------------------------------------% */

            i__2 = iend - 1;
            for (i__ = istart + 1; i__ <= i__2; ++i__) {

                /*               %----------------------------------------------% */
                /*               | Construct the plane rotation G'(i,i+1,theta) | */
                /*               | that zeros the i-th bulge that was created   | */
                /*               | by G(i-1,i,theta). g represents the bulge.   | */
                /*               %----------------------------------------------% */

                f = h__[i__ + h_dim1];
                g = s * h__[i__ + 1 + h_dim1];

                /*               %----------------------------------% */
                /*               | Final update with G(i-1,i,theta) | */
                /*               %----------------------------------% */

                h__[i__ + 1 + h_dim1] = c__ * h__[i__ + 1 + h_dim1];
                slartg_(&f, &g, &c__, &s, &r__);

                /*               %-------------------------------------------% */
                /*               | The following ensures that h(1:iend-1,1), | */
                /*               | the first iend-2 off diagonal of elements | */
                /*               | H, remain non negative.                   | */
                /*               %-------------------------------------------% */

                if (r__ < 0.f) {
                    r__ = -r__;
                    c__ = -c__;
                    s = -s;
                }

                /*               %--------------------------------------------% */
                /*               | Apply rotation to the left and right of H; | */
                /*               | H <- G * H * G',  where G = G(i,i+1,theta) | */
                /*               %--------------------------------------------% */

                h__[i__ + h_dim1] = r__;

                a1 = c__ * h__[i__ + (h_dim1 << 1)] + s * h__[i__ + 1 +
                        h_dim1];
                a2 = c__ * h__[i__ + 1 + h_dim1] + s * h__[i__ + 1 + (h_dim1
                        << 1)];
                a3 = c__ * h__[i__ + 1 + h_dim1] - s * h__[i__ + (h_dim1 << 1)
                                                          ];
                a4 = c__ * h__[i__ + 1 + (h_dim1 << 1)] - s * h__[i__ + 1 +
                        h_dim1];

                h__[i__ + (h_dim1 << 1)] = c__ * a1 + s * a2;
                h__[i__ + 1 + (h_dim1 << 1)] = c__ * a4 - s * a3;
                h__[i__ + 1 + h_dim1] = c__ * a3 + s * a4;

                /*               %----------------------------------------------------% */
                /*               | Accumulate the rotation in the matrix Q;  Q <- Q*G | */
                /*               %----------------------------------------------------% */

                /* Computing MIN */
                i__4 = i__ + jj;
                i__3 = min(i__4,kplusp);
                for (j = 1; j <= i__3; ++j) {
                    a1 = c__ * q[j + i__ * q_dim1] + s * q[j + (i__ + 1) *
                                                           q_dim1];
                    q[j + (i__ + 1) * q_dim1] = -s * q[j + i__ * q_dim1] +
                                                c__ * q[j + (i__ + 1) * q_dim1];
                    q[j + i__ * q_dim1] = a1;
                    /* L50: */
                }

                /* L70: */
            }

        }

        /*        %--------------------------% */
        /*        | Update the block pointer | */
        /*        %--------------------------% */

        istart = iend + 1;

        /*        %------------------------------------------% */
        /*        | Make sure that h(iend,1) is non-negative | */
        /*        | If not then set h(iend,1) <-- -h(iend,1) | */
        /*        | and negate the last column of Q.         | */
        /*        | We have effectively carried out a        | */
        /*        | similarity on transformation H           | */
        /*        %------------------------------------------% */

        if (h__[iend + h_dim1] < 0.f) {
            h__[iend + h_dim1] = -h__[iend + h_dim1];
            sscal_(&kplusp, &c_b20, &q[iend * q_dim1 + 1], &c__1);
        }

        /*        %--------------------------------------------------------% */
        /*        | Apply the same shift to the next block if there is any | */
        /*        %--------------------------------------------------------% */

        if (iend < kplusp) {
            goto L20;
        }

        /*        %-----------------------------------------------------% */
        /*        | Check if we can increase the the start of the block | */
        /*        %-----------------------------------------------------% */

        i__2 = kplusp - 1;
        for (i__ = itop; i__ <= i__2; ++i__) {
            if (h__[i__ + 1 + h_dim1] > 0.f) {
                goto L90;
            }
            ++itop;
            /* L80: */
        }

        /*        %-----------------------------------% */
        /*        | Finished applying the jj-th shift | */
        /*        %-----------------------------------% */

L90:
        ;
    }

    /*     %------------------------------------------% */
    /*     | All shifts have been applied. Check for  | */
    /*     | more possible deflation that might occur | */
    /*     | after the last shift is applied.         | */
    /*     %------------------------------------------% */

    i__1 = kplusp - 1;
    for (i__ = itop; i__ <= i__1; ++i__) {
        big = (r__1 = h__[i__ + (h_dim1 << 1)], dabs(r__1)) + (r__2 = h__[i__
                + 1 + (h_dim1 << 1)], dabs(r__2));
        if (h__[i__ + 1 + h_dim1] <= epsmch * big) {
            if (msglvl > 0) {
                ivout_(&debug_1.logfil, &c__1, &i__, &debug_1.ndigit, "_sapp"
                       "s: deflation at row/column no.", (ftnlen)35);
                svout_(&debug_1.logfil, &c__1, &h__[i__ + 1 + h_dim1], &
                       debug_1.ndigit, "_sapps: the corresponding off diago"
                       "nal element", (ftnlen)46);
            }
            h__[i__ + 1 + h_dim1] = 0.f;
        }
        /* L100: */
    }

    /*     %-------------------------------------------------% */
    /*     | Compute the (kev+1)-st column of (V*Q) and      | */
    /*     | temporarily store the result in WORKD(N+1:2*N). | */
    /*     | This is not necessary if h(kev+1,1) = 0.         | */
    /*     %-------------------------------------------------% */

    if (h__[*kev + 1 + h_dim1] > 0.f) {
        sgemv_("N", n, &kplusp, &c_b5, &v[v_offset], ldv, &q[(*kev + 1) *
                q_dim1 + 1], &c__1, &c_b4, &workd[*n + 1], &c__1, (ftnlen)1);
    }

    /*     %-------------------------------------------------------% */
    /*     | Compute column 1 to kev of (V*Q) in backward order    | */
    /*     | taking advantage that Q is an upper triangular matrix | */
    /*     | with lower bandwidth np.                              | */
    /*     | Place results in v(:,kplusp-kev:kplusp) temporarily.  | */
    /*     %-------------------------------------------------------% */

    i__1 = *kev;
    for (i__ = 1; i__ <= i__1; ++i__) {
        i__2 = kplusp - i__ + 1;
        sgemv_("N", n, &i__2, &c_b5, &v[v_offset], ldv, &q[(*kev - i__ + 1) *
                q_dim1 + 1], &c__1, &c_b4, &workd[1], &c__1, (ftnlen)1);
        scopy_(n, &workd[1], &c__1, &v[(kplusp - i__ + 1) * v_dim1 + 1], &
               c__1);
        /* L130: */
    }

    /*     %-------------------------------------------------% */
    /*     |  Move v(:,kplusp-kev+1:kplusp) into v(:,1:kev). | */
    /*     %-------------------------------------------------% */

    slacpy_("All", n, kev, &v[(*np + 1) * v_dim1 + 1], ldv, &v[v_offset], ldv,
            (ftnlen)3);

    /*     %--------------------------------------------% */
    /*     | Copy the (kev+1)-st column of (V*Q) in the | */
    /*     | appropriate place if h(kev+1,1) .ne. zero. | */
    /*     %--------------------------------------------% */

    if (h__[*kev + 1 + h_dim1] > 0.f) {
        scopy_(n, &workd[*n + 1], &c__1, &v[(*kev + 1) * v_dim1 + 1], &c__1);
    }

    /*     %-------------------------------------% */
    /*     | Update the residual vector:         | */
    /*     |    r <- sigmak*r + betak*v(:,kev+1) | */
    /*     | where                               | */
    /*     |    sigmak = (e_{kev+p}'*Q)*e_{kev}  | */
    /*     |    betak = e_{kev+1}'*H*e_{kev}     | */
    /*     %-------------------------------------% */

    sscal_(n, &q[kplusp + *kev * q_dim1], &resid[1], &c__1);
    if (h__[*kev + 1 + h_dim1] > 0.f) {
        saxpy_(n, &h__[*kev + 1 + h_dim1], &v[(*kev + 1) * v_dim1 + 1], &c__1,
               &resid[1], &c__1);
    }

    if (msglvl > 1) {
        svout_(&debug_1.logfil, &c__1, &q[kplusp + *kev * q_dim1], &
               debug_1.ndigit, "_sapps: sigmak of the updated residual vect"
               "or", (ftnlen)45);
        svout_(&debug_1.logfil, &c__1, &h__[*kev + 1 + h_dim1], &
               debug_1.ndigit, "_sapps: betak of the updated residual vector"
               , (ftnlen)44);
        svout_(&debug_1.logfil, kev, &h__[(h_dim1 << 1) + 1], &debug_1.ndigit,
               "_sapps: updated main diagonal of H for next iteration", (
                   ftnlen)53);
        if (*kev > 1) {
            i__1 = *kev - 1;
            svout_(&debug_1.logfil, &i__1, &h__[h_dim1 + 2], &debug_1.ndigit,
                   "_sapps: updated sub diagonal of H for next iteration", (
                       ftnlen)52);
        }
    }

    second_(&t1);
    timing_1.tsapps += t1 - t0;

L9000:
    return 0;

    /*     %---------------% */
    /*     | End of ssapps | */
    /*     %---------------% */

} /* ssapps_ */

Exemplo n.º 18

0

Exibir arquivo

Arquivo: slapack_lite.c Projeto: KuoHugo/pocketsphinx-wp-demo

/* Subroutine */ int spotf2_(char *uplo, integer *n, real *a, integer *lda,
	integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    real r__1;

    /* Builtin functions */
    double sqrt(doublereal);

    /* Local variables */
    static integer j;
    static real ajj;
    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
	    sgemv_(char *, integer *, integer *, real *, real *, integer *,
	    real *, integer *, real *, real *, integer *);
    static logical upper;
    extern /* Subroutine */ int xerbla_(char *, integer *);


/*
    -- LAPACK routine (version 3.0) --
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
       Courant Institute, Argonne National Lab, and Rice University
       February 29, 1992


    Purpose
    =======

    SPOTF2 computes the Cholesky factorization of a real symmetric
    positive definite matrix A.

    The factorization has the form
       A = U' * U ,  if UPLO = 'U', or
       A = L  * L',  if UPLO = 'L',
    where U is an upper triangular matrix and L is lower triangular.

    This is the unblocked version of the algorithm, calling Level 2 BLAS.

    Arguments
    =========

    UPLO    (input) CHARACTER*1
            Specifies whether the upper or lower triangular part of the
            symmetric matrix A is stored.
            = 'U':  Upper triangular
            = 'L':  Lower triangular

    N       (input) INTEGER
            The order of the matrix A.  N >= 0.

    A       (input/output) REAL array, dimension (LDA,N)
            On entry, the symmetric matrix A.  If UPLO = 'U', the leading
            n by n upper triangular part of A contains the upper
            triangular part of the matrix A, and the strictly lower
            triangular part of A is not referenced.  If UPLO = 'L', the
            leading n by n lower triangular part of A contains the lower
            triangular part of the matrix A, and the strictly upper
            triangular part of A is not referenced.

            On exit, if INFO = 0, the factor U or L from the Cholesky
            factorization A = U'*U  or A = L*L'.

    LDA     (input) INTEGER
            The leading dimension of the array A.  LDA >= max(1,N).

    INFO    (output) INTEGER
            = 0: successful exit
            < 0: if INFO = -k, the k-th argument had an illegal value
            > 0: if INFO = k, the leading minor of order k is not
                 positive definite, and the factorization could not be
                 completed.

    =====================================================================


       Test the input parameters.
*/

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SPOTF2", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

    if (upper) {

/*        Compute the Cholesky factorization A = U'*U. */

	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {

/*           Compute U(J,J) and test for non-positive-definiteness. */

	    i__2 = j - 1;
	    ajj = a[j + j * a_dim1] - sdot_(&i__2, &a[j * a_dim1 + 1], &c__1,
		    &a[j * a_dim1 + 1], &c__1);
	    if (ajj <= 0.f) {
		a[j + j * a_dim1] = ajj;
		goto L30;
	    }
	    ajj = sqrt(ajj);
	    a[j + j * a_dim1] = ajj;

/*           Compute elements J+1:N of row J. */

	    if (j < *n) {
		i__2 = j - 1;
		i__3 = *n - j;
		sgemv_("Transpose", &i__2, &i__3, &c_b181, &a[(j + 1) *
			a_dim1 + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b164,
			&a[j + (j + 1) * a_dim1], lda);
		i__2 = *n - j;
		r__1 = 1.f / ajj;
		sscal_(&i__2, &r__1, &a[j + (j + 1) * a_dim1], lda);
	    }
/* L10: */
	}
    } else {

/*        Compute the Cholesky factorization A = L*L'. */

	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {

/*           Compute L(J,J) and test for non-positive-definiteness. */

	    i__2 = j - 1;
	    ajj = a[j + j * a_dim1] - sdot_(&i__2, &a[j + a_dim1], lda, &a[j
		    + a_dim1], lda);
	    if (ajj <= 0.f) {
		a[j + j * a_dim1] = ajj;
		goto L30;
	    }
	    ajj = sqrt(ajj);
	    a[j + j * a_dim1] = ajj;

/*           Compute elements J+1:N of column J. */

	    if (j < *n) {
		i__2 = *n - j;
		i__3 = j - 1;
		sgemv_("No transpose", &i__2, &i__3, &c_b181, &a[j + 1 +
			a_dim1], lda, &a[j + a_dim1], lda, &c_b164, &a[j + 1
			+ j * a_dim1], &c__1);
		i__2 = *n - j;
		r__1 = 1.f / ajj;
		sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1);
	    }
/* L20: */
	}
    }
    goto L40;

L30:
    *info = j;

L40:
    return 0;

/*     End of SPOTF2 */

} /* spotf2_ */

Exemplo n.º 19

0

Exibir arquivo

Arquivo: sneupd.c Projeto: Electrostatics/FETK

/* ----------------------------------------------------------------------- */
/* Subroutine */ int sneupd_(logical *rvec, char *howmny, logical *select, 
	real *dr, real *di, real *z__, integer *ldz, real *sigmar, real *
	sigmai, real *workev, char *bmat, integer *n, char *which, integer *
	nev, real *tol, real *resid, integer *ncv, real *v, integer *ldv, 
	integer *iparam, integer *ipntr, real *workd, real *workl, integer *
	lworkl, integer *info, ftnlen howmny_len, ftnlen bmat_len, ftnlen 
	which_len)
{
    /* System generated locals */
    integer v_dim1, v_offset, z_dim1, z_offset, i__1;
    real r__1, r__2;
    doublereal d__1;

    /* Builtin functions */
    double pow_dd(doublereal *, doublereal *);
    integer s_cmp(char *, char *, ftnlen, ftnlen);
    /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);

    /* Local variables */
    static integer j, k, ih, jj, np;
    static real vl[1]	/* was [1][1] */;
    static integer ibd, ldh, ldq, iri;
    static real sep;
    static integer irr, wri, wrr, mode;
    static real eps23;
    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, 
	    integer *, real *, integer *, real *, integer *);
    static integer ierr;
    static real temp;
    static integer iwev;
    static char type__[6];
    static real temp1;
    extern doublereal snrm2_(integer *, real *, integer *);
    static integer ihbds, iconj;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static real conds;
    static logical reord;
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
	    real *, integer *, real *, integer *, real *, real *, integer *, 
	    ftnlen);
    static integer nconv, iwork[1];
    static real rnorm;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *);
    static integer ritzi;
    extern /* Subroutine */ int strmm_(char *, char *, char *, char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
	    , ftnlen, ftnlen, ftnlen, ftnlen), ivout_(integer *, integer *, 
	    integer *, integer *, char *, ftnlen), smout_(integer *, integer *
	    , integer *, real *, integer *, integer *, char *, ftnlen);
    static integer ritzr;
    extern /* Subroutine */ int svout_(integer *, integer *, real *, integer *
	    , char *, ftnlen), sgeqr2_(integer *, integer *, real *, integer *
	    , real *, real *, integer *);
    extern doublereal slapy2_(real *, real *);
    extern /* Subroutine */ int sorm2r_(char *, char *, integer *, integer *, 
	    integer *, real *, integer *, real *, real *, integer *, real *, 
	    integer *, ftnlen, ftnlen);
    static integer iheigi, iheigr, bounds, invsub, iuptri, msglvl, outncv, 
	    ishift, numcnv;
    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
	    integer *, real *, integer *, ftnlen), slahqr_(logical *, logical 
	    *, integer *, integer *, integer *, real *, integer *, real *, 
	    real *, integer *, integer *, real *, integer *, integer *), 
	    slaset_(char *, integer *, integer *, real *, real *, real *, 
	    integer *, ftnlen), strevc_(char *, char *, logical *, integer *, 
	    real *, integer *, real *, integer *, real *, integer *, integer *
	    , integer *, real *, integer *, ftnlen, ftnlen), strsen_(char *, 
	    char *, logical *, integer *, real *, integer *, real *, integer *
	    , real *, real *, integer *, real *, real *, real *, integer *, 
	    integer *, integer *, integer *, ftnlen, ftnlen);
    extern doublereal slamch_(char *, ftnlen);
    extern /* Subroutine */ int sngets_(integer *, char *, integer *, integer 
	    *, real *, real *, real *, real *, real *, ftnlen);


/*     %----------------------------------------------------% */
/*     | Include files for debugging and timing information | */
/*     %----------------------------------------------------% */


/* \SCCS Information: @(#) */
/* FILE: debug.h   SID: 2.3   DATE OF SID: 11/16/95   RELEASE: 2 */

/*     %---------------------------------% */
/*     | See debug.doc for documentation | */
/*     %---------------------------------% */

/*     %------------------% */
/*     | Scalar Arguments | */
/*     %------------------% */

/*     %--------------------------------% */
/*     | See stat.doc for documentation | */
/*     %--------------------------------% */

/* \SCCS Information: @(#) */
/* FILE: stat.h   SID: 2.2   DATE OF SID: 11/16/95   RELEASE: 2 */



/*     %-----------------% */
/*     | Array Arguments | */
/*     %-----------------% */


/*     %------------% */
/*     | Parameters | */
/*     %------------% */


/*     %---------------% */
/*     | Local Scalars | */
/*     %---------------% */


/*     %----------------------% */
/*     | External Subroutines | */
/*     %----------------------% */


/*     %--------------------% */
/*     | External Functions | */
/*     %--------------------% */


/*     %---------------------% */
/*     | Intrinsic Functions | */
/*     %---------------------% */


/*     %-----------------------% */
/*     | Executable Statements | */
/*     %-----------------------% */

/*     %------------------------% */
/*     | Set default parameters | */
/*     %------------------------% */

    /* Parameter adjustments */
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1;
    z__ -= z_offset;
    --workd;
    --resid;
    --di;
    --dr;
    --workev;
    --select;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1;
    v -= v_offset;
    --iparam;
    --ipntr;
    --workl;

    /* Function Body */
    msglvl = debug_1.mneupd;
    mode = iparam[7];
    nconv = iparam[5];
    *info = 0;

/*     %---------------------------------% */
/*     | Get machine dependent constant. | */
/*     %---------------------------------% */

    eps23 = slamch_("Epsilon-Machine", (ftnlen)15);
    d__1 = (doublereal) eps23;
    eps23 = pow_dd(&d__1, &c_b3);

/*     %--------------% */
/*     | Quick return | */
/*     %--------------% */

    ierr = 0;

    if (nconv <= 0) {
	ierr = -14;
    } else if (*n <= 0) {
	ierr = -1;
    } else if (*nev <= 0) {
	ierr = -2;
    } else if (*ncv <= *nev + 1 || *ncv > *n) {
	ierr = -3;
    } else if (s_cmp(which, "LM", (ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, 
	    "SM", (ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, "LR", (ftnlen)2, 
	    (ftnlen)2) != 0 && s_cmp(which, "SR", (ftnlen)2, (ftnlen)2) != 0 
	    && s_cmp(which, "LI", (ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, 
	    "SI", (ftnlen)2, (ftnlen)2) != 0) {
	ierr = -5;
    } else if (*(unsigned char *)bmat != 'I' && *(unsigned char *)bmat != 'G')
	     {
	ierr = -6;
    } else /* if(complicated condition) */ {
/* Computing 2nd power */
	i__1 = *ncv;
	if (*lworkl < i__1 * i__1 * 3 + *ncv * 6) {
	    ierr = -7;
	} else if (*(unsigned char *)howmny != 'A' && *(unsigned char *)
		howmny != 'P' && *(unsigned char *)howmny != 'S' && *rvec) {
	    ierr = -13;
	} else if (*(unsigned char *)howmny == 'S') {
	    ierr = -12;
	}
    }

    if (mode == 1 || mode == 2) {
	s_copy(type__, "REGULR", (ftnlen)6, (ftnlen)6);
    } else if (mode == 3 && *sigmai == 0.f) {
	s_copy(type__, "SHIFTI", (ftnlen)6, (ftnlen)6);
    } else if (mode == 3) {
	s_copy(type__, "REALPT", (ftnlen)6, (ftnlen)6);
    } else if (mode == 4) {
	s_copy(type__, "IMAGPT", (ftnlen)6, (ftnlen)6);
    } else {
	ierr = -10;
    }
    if (mode == 1 && *(unsigned char *)bmat == 'G') {
	ierr = -11;
    }

/*     %------------% */
/*     | Error Exit | */
/*     %------------% */

    if (ierr != 0) {
	*info = ierr;
	goto L9000;
    }

/*     %--------------------------------------------------------% */
/*     | Pointer into WORKL for address of H, RITZ, BOUNDS, Q   | */
/*     | etc... and the remaining workspace.                    | */
/*     | Also update pointer to be used on output.              | */
/*     | Memory is laid out as follows:                         | */
/*     | workl(1:ncv*ncv) := generated Hessenberg matrix        | */
/*     | workl(ncv*ncv+1:ncv*ncv+2*ncv) := real and imaginary   | */
/*     |                                   parts of ritz values | */
/*     | workl(ncv*ncv+2*ncv+1:ncv*ncv+3*ncv) := error bounds   | */
/*     %--------------------------------------------------------% */

/*     %-----------------------------------------------------------% */
/*     | The following is used and set by SNEUPD.                  | */
/*     | workl(ncv*ncv+3*ncv+1:ncv*ncv+4*ncv) := The untransformed | */
/*     |                             real part of the Ritz values. | */
/*     | workl(ncv*ncv+4*ncv+1:ncv*ncv+5*ncv) := The untransformed | */
/*     |                        imaginary part of the Ritz values. | */
/*     | workl(ncv*ncv+5*ncv+1:ncv*ncv+6*ncv) := The untransformed | */
/*     |                           error bounds of the Ritz values | */
/*     | workl(ncv*ncv+6*ncv+1:2*ncv*ncv+6*ncv) := Holds the upper | */
/*     |                             quasi-triangular matrix for H | */
/*     | workl(2*ncv*ncv+6*ncv+1: 3*ncv*ncv+6*ncv) := Holds the    | */
/*     |       associated matrix representation of the invariant   | */
/*     |       subspace for H.                                     | */
/*     | GRAND total of NCV * ( 3 * NCV + 6 ) locations.           | */
/*     %-----------------------------------------------------------% */

    ih = ipntr[5];
    ritzr = ipntr[6];
    ritzi = ipntr[7];
    bounds = ipntr[8];
    ldh = *ncv;
    ldq = *ncv;
    iheigr = bounds + ldh;
    iheigi = iheigr + ldh;
    ihbds = iheigi + ldh;
    iuptri = ihbds + ldh;
    invsub = iuptri + ldh * *ncv;
    ipntr[9] = iheigr;
    ipntr[10] = iheigi;
    ipntr[11] = ihbds;
    ipntr[12] = iuptri;
    ipntr[13] = invsub;
    wrr = 1;
    wri = *ncv + 1;
    iwev = wri + *ncv;

/*     %-----------------------------------------% */
/*     | irr points to the REAL part of the Ritz | */
/*     |     values computed by _neigh before    | */
/*     |     exiting _naup2.                     | */
/*     | iri points to the IMAGINARY part of the | */
/*     |     Ritz values computed by _neigh      | */
/*     |     before exiting _naup2.              | */
/*     | ibd points to the Ritz estimates        | */
/*     |     computed by _neigh before exiting   | */
/*     |     _naup2.                             | */
/*     %-----------------------------------------% */

    irr = ipntr[14] + *ncv * *ncv;
    iri = irr + *ncv;
    ibd = iri + *ncv;

/*     %------------------------------------% */
/*     | RNORM is B-norm of the RESID(1:N). | */
/*     %------------------------------------% */

    rnorm = workl[ih + 2];
    workl[ih + 2] = 0.f;

    if (msglvl > 2) {
	svout_(&debug_1.logfil, ncv, &workl[irr], &debug_1.ndigit, "_neupd: "
		"Real part of Ritz values passed in from _NAUPD.", (ftnlen)55);
	svout_(&debug_1.logfil, ncv, &workl[iri], &debug_1.ndigit, "_neupd: "
		"Imag part of Ritz values passed in from _NAUPD.", (ftnlen)55);
	svout_(&debug_1.logfil, ncv, &workl[ibd], &debug_1.ndigit, "_neupd: "
		"Ritz estimates passed in from _NAUPD.", (ftnlen)45);
    }

    if (*rvec) {

	reord = FALSE_;

/*        %---------------------------------------------------% */
/*        | Use the temporary bounds array to store indices   | */
/*        | These will be used to mark the select array later | */
/*        %---------------------------------------------------% */

	i__1 = *ncv;
	for (j = 1; j <= i__1; ++j) {
	    workl[bounds + j - 1] = (real) j;
	    select[j] = FALSE_;
/* L10: */
	}

/*        %-------------------------------------% */
/*        | Select the wanted Ritz values.      | */
/*        | Sort the Ritz values so that the    | */
/*        | wanted ones appear at the tailing   | */
/*        | NEV positions of workl(irr) and     | */
/*        | workl(iri).  Move the corresponding | */
/*        | error estimates in workl(bound)     | */
/*        | accordingly.                        | */
/*        %-------------------------------------% */

	np = *ncv - *nev;
	ishift = 0;
	sngets_(&ishift, which, nev, &np, &workl[irr], &workl[iri], &workl[
		bounds], &workl[1], &workl[np + 1], (ftnlen)2);

	if (msglvl > 2) {
	    svout_(&debug_1.logfil, ncv, &workl[irr], &debug_1.ndigit, "_neu"
		    "pd: Real part of Ritz values after calling _NGETS.", (
		    ftnlen)54);
	    svout_(&debug_1.logfil, ncv, &workl[iri], &debug_1.ndigit, "_neu"
		    "pd: Imag part of Ritz values after calling _NGETS.", (
		    ftnlen)54);
	    svout_(&debug_1.logfil, ncv, &workl[bounds], &debug_1.ndigit, 
		    "_neupd: Ritz value indices after calling _NGETS.", (
		    ftnlen)48);
	}

/*        %-----------------------------------------------------% */
/*        | Record indices of the converged wanted Ritz values  | */
/*        | Mark the select array for possible reordering       | */
/*        %-----------------------------------------------------% */

	numcnv = 0;
	i__1 = *ncv;
	for (j = 1; j <= i__1; ++j) {
/* Computing MAX */
	    r__1 = eps23, r__2 = slapy2_(&workl[irr + *ncv - j], &workl[iri + 
		    *ncv - j]);
	    temp1 = dmax(r__1,r__2);
	    jj = workl[bounds + *ncv - j];
	    if (numcnv < nconv && workl[ibd + jj - 1] <= *tol * temp1) {
		select[jj] = TRUE_;
		++numcnv;
		if (jj > *nev) {
		    reord = TRUE_;
		}
	    }
/* L11: */
	}

/*        %-----------------------------------------------------------% */
/*        | Check the count (numcnv) of converged Ritz values with    | */
/*        | the number (nconv) reported by dnaupd.  If these two      | */
/*        | are different then there has probably been an error       | */
/*        | caused by incorrect passing of the dnaupd data.           | */
/*        %-----------------------------------------------------------% */

	if (msglvl > 2) {
	    ivout_(&debug_1.logfil, &c__1, &numcnv, &debug_1.ndigit, "_neupd"
		    ": Number of specified eigenvalues", (ftnlen)39);
	    ivout_(&debug_1.logfil, &c__1, &nconv, &debug_1.ndigit, "_neupd:"
		    " Number of \"converged\" eigenvalues", (ftnlen)41);
	}

	if (numcnv != nconv) {
	    *info = -15;
	    goto L9000;
	}

/*        %-----------------------------------------------------------% */
/*        | Call LAPACK routine slahqr to compute the real Schur form | */
/*        | of the upper Hessenberg matrix returned by SNAUPD.        | */
/*        | Make a copy of the upper Hessenberg matrix.               | */
/*        | Initialize the Schur vector matrix Q to the identity.     | */
/*        %-----------------------------------------------------------% */

	i__1 = ldh * *ncv;
	scopy_(&i__1, &workl[ih], &c__1, &workl[iuptri], &c__1);
	slaset_("All", ncv, ncv, &c_b37, &c_b38, &workl[invsub], &ldq, (
		ftnlen)3);
	slahqr_(&c_true, &c_true, ncv, &c__1, ncv, &workl[iuptri], &ldh, &
		workl[iheigr], &workl[iheigi], &c__1, ncv, &workl[invsub], &
		ldq, &ierr);
	scopy_(ncv, &workl[invsub + *ncv - 1], &ldq, &workl[ihbds], &c__1);

	if (ierr != 0) {
	    *info = -8;
	    goto L9000;
	}

	if (msglvl > 1) {
	    svout_(&debug_1.logfil, ncv, &workl[iheigr], &debug_1.ndigit, 
		    "_neupd: Real part of the eigenvalues of H", (ftnlen)41);
	    svout_(&debug_1.logfil, ncv, &workl[iheigi], &debug_1.ndigit, 
		    "_neupd: Imaginary part of the Eigenvalues of H", (ftnlen)
		    46);
	    svout_(&debug_1.logfil, ncv, &workl[ihbds], &debug_1.ndigit, 
		    "_neupd: Last row of the Schur vector matrix", (ftnlen)43)
		    ;
	    if (msglvl > 3) {
		smout_(&debug_1.logfil, ncv, ncv, &workl[iuptri], &ldh, &
			debug_1.ndigit, "_neupd: The upper quasi-triangular "
			"matrix ", (ftnlen)42);
	    }
	}

	if (reord) {

/*           %-----------------------------------------------------% */
/*           | Reorder the computed upper quasi-triangular matrix. | */
/*           %-----------------------------------------------------% */

	    strsen_("None", "V", &select[1], ncv, &workl[iuptri], &ldh, &
		    workl[invsub], &ldq, &workl[iheigr], &workl[iheigi], &
		    nconv, &conds, &sep, &workl[ihbds], ncv, iwork, &c__1, &
		    ierr, (ftnlen)4, (ftnlen)1);

	    if (ierr == 1) {
		*info = 1;
		goto L9000;
	    }

	    if (msglvl > 2) {
		svout_(&debug_1.logfil, ncv, &workl[iheigr], &debug_1.ndigit, 
			"_neupd: Real part of the eigenvalues of H--reordered"
			, (ftnlen)52);
		svout_(&debug_1.logfil, ncv, &workl[iheigi], &debug_1.ndigit, 
			"_neupd: Imag part of the eigenvalues of H--reordered"
			, (ftnlen)52);
		if (msglvl > 3) {
		    smout_(&debug_1.logfil, ncv, ncv, &workl[iuptri], &ldq, &
			    debug_1.ndigit, "_neupd: Quasi-triangular matrix"
			    " after re-ordering", (ftnlen)49);
		}
	    }

	}

/*        %---------------------------------------% */
/*        | Copy the last row of the Schur vector | */
/*        | into workl(ihbds).  This will be used | */
/*        | to compute the Ritz estimates of      | */
/*        | converged Ritz values.                | */
/*        %---------------------------------------% */

	scopy_(ncv, &workl[invsub + *ncv - 1], &ldq, &workl[ihbds], &c__1);

/*        %----------------------------------------------------% */
/*        | Place the computed eigenvalues of H into DR and DI | */
/*        | if a spectral transformation was not used.         | */
/*        %----------------------------------------------------% */

	if (s_cmp(type__, "REGULR", (ftnlen)6, (ftnlen)6) == 0) {
	    scopy_(&nconv, &workl[iheigr], &c__1, &dr[1], &c__1);
	    scopy_(&nconv, &workl[iheigi], &c__1, &di[1], &c__1);
	}

/*        %----------------------------------------------------------% */
/*        | Compute the QR factorization of the matrix representing  | */
/*        | the wanted invariant subspace located in the first NCONV | */
/*        | columns of workl(invsub,ldq).                            | */
/*        %----------------------------------------------------------% */

	sgeqr2_(ncv, &nconv, &workl[invsub], &ldq, &workev[1], &workev[*ncv + 
		1], &ierr);

/*        %---------------------------------------------------------% */
/*        | * Postmultiply V by Q using sorm2r.                     | */
/*        | * Copy the first NCONV columns of VQ into Z.            | */
/*        | * Postmultiply Z by R.                                  | */
/*        | The N by NCONV matrix Z is now a matrix representation  | */
/*        | of the approximate invariant subspace associated with   | */
/*        | the Ritz values in workl(iheigr) and workl(iheigi)      | */
/*        | The first NCONV columns of V are now approximate Schur  | */
/*        | vectors associated with the real upper quasi-triangular | */
/*        | matrix of order NCONV in workl(iuptri)                  | */
/*        %---------------------------------------------------------% */

	sorm2r_("Right", "Notranspose", n, ncv, &nconv, &workl[invsub], &ldq, 
		&workev[1], &v[v_offset], ldv, &workd[*n + 1], &ierr, (ftnlen)
		5, (ftnlen)11);
	slacpy_("All", n, &nconv, &v[v_offset], ldv, &z__[z_offset], ldz, (
		ftnlen)3);

	i__1 = nconv;
	for (j = 1; j <= i__1; ++j) {

/*           %---------------------------------------------------% */
/*           | Perform both a column and row scaling if the      | */
/*           | diagonal element of workl(invsub,ldq) is negative | */
/*           | I'm lazy and don't take advantage of the upper    | */
/*           | quasi-triangular form of workl(iuptri,ldq)        | */
/*           | Note that since Q is orthogonal, R is a diagonal  | */
/*           | matrix consisting of plus or minus ones           | */
/*           %---------------------------------------------------% */

	    if (workl[invsub + (j - 1) * ldq + j - 1] < 0.f) {
		sscal_(&nconv, &c_b64, &workl[iuptri + j - 1], &ldq);
		sscal_(&nconv, &c_b64, &workl[iuptri + (j - 1) * ldq], &c__1);
	    }

/* L20: */
	}

	if (*(unsigned char *)howmny == 'A') {

/*           %--------------------------------------------% */
/*           | Compute the NCONV wanted eigenvectors of T | */
/*           | located in workl(iuptri,ldq).              | */
/*           %--------------------------------------------% */

	    i__1 = *ncv;
	    for (j = 1; j <= i__1; ++j) {
		if (j <= nconv) {
		    select[j] = TRUE_;
		} else {
		    select[j] = FALSE_;
		}
/* L30: */
	    }

	    strevc_("Right", "Select", &select[1], ncv, &workl[iuptri], &ldq, 
		    vl, &c__1, &workl[invsub], &ldq, ncv, &outncv, &workev[1],
		     &ierr, (ftnlen)5, (ftnlen)6);

	    if (ierr != 0) {
		*info = -9;
		goto L9000;
	    }

/*           %------------------------------------------------% */
/*           | Scale the returning eigenvectors so that their | */
/*           | Euclidean norms are all one. LAPACK subroutine | */
/*           | strevc returns each eigenvector normalized so  | */
/*           | that the element of largest magnitude has      | */
/*           | magnitude 1;                                   | */
/*           %------------------------------------------------% */

	    iconj = 0;
	    i__1 = nconv;
	    for (j = 1; j <= i__1; ++j) {

		if (workl[iheigi + j - 1] == 0.f) {

/*                 %----------------------% */
/*                 | real eigenvalue case | */
/*                 %----------------------% */

		    temp = snrm2_(ncv, &workl[invsub + (j - 1) * ldq], &c__1);
		    r__1 = 1.f / temp;
		    sscal_(ncv, &r__1, &workl[invsub + (j - 1) * ldq], &c__1);

		} else {

/*                 %-------------------------------------------% */
/*                 | Complex conjugate pair case. Note that    | */
/*                 | since the real and imaginary part of      | */
/*                 | the eigenvector are stored in consecutive | */
/*                 | columns, we further normalize by the      | */
/*                 | square root of two.                       | */
/*                 %-------------------------------------------% */

		    if (iconj == 0) {
			r__1 = snrm2_(ncv, &workl[invsub + (j - 1) * ldq], &
				c__1);
			r__2 = snrm2_(ncv, &workl[invsub + j * ldq], &c__1);
			temp = slapy2_(&r__1, &r__2);
			r__1 = 1.f / temp;
			sscal_(ncv, &r__1, &workl[invsub + (j - 1) * ldq], &
				c__1);
			r__1 = 1.f / temp;
			sscal_(ncv, &r__1, &workl[invsub + j * ldq], &c__1);
			iconj = 1;
		    } else {
			iconj = 0;
		    }

		}

/* L40: */
	    }

	    sgemv_("T", ncv, &nconv, &c_b38, &workl[invsub], &ldq, &workl[
		    ihbds], &c__1, &c_b37, &workev[1], &c__1, (ftnlen)1);

	    iconj = 0;
	    i__1 = nconv;
	    for (j = 1; j <= i__1; ++j) {
		if (workl[iheigi + j - 1] != 0.f) {

/*                 %-------------------------------------------% */
/*                 | Complex conjugate pair case. Note that    | */
/*                 | since the real and imaginary part of      | */
/*                 | the eigenvector are stored in consecutive | */
/*                 %-------------------------------------------% */

		    if (iconj == 0) {
			workev[j] = slapy2_(&workev[j], &workev[j + 1]);
			workev[j + 1] = workev[j];
			iconj = 1;
		    } else {
			iconj = 0;
		    }
		}
/* L45: */
	    }

	    if (msglvl > 2) {
		scopy_(ncv, &workl[invsub + *ncv - 1], &ldq, &workl[ihbds], &
			c__1);
		svout_(&debug_1.logfil, ncv, &workl[ihbds], &debug_1.ndigit, 
			"_neupd: Last row of the eigenvector matrix for T", (
			ftnlen)48);
		if (msglvl > 3) {
		    smout_(&debug_1.logfil, ncv, ncv, &workl[invsub], &ldq, &
			    debug_1.ndigit, "_neupd: The eigenvector matrix "
			    "for T", (ftnlen)36);
		}
	    }

/*           %---------------------------------------% */
/*           | Copy Ritz estimates into workl(ihbds) | */
/*           %---------------------------------------% */

	    scopy_(&nconv, &workev[1], &c__1, &workl[ihbds], &c__1);

/*           %---------------------------------------------------------% */
/*           | Compute the QR factorization of the eigenvector matrix  | */
/*           | associated with leading portion of T in the first NCONV | */
/*           | columns of workl(invsub,ldq).                           | */
/*           %---------------------------------------------------------% */

	    sgeqr2_(ncv, &nconv, &workl[invsub], &ldq, &workev[1], &workev[*
		    ncv + 1], &ierr);

/*           %----------------------------------------------% */
/*           | * Postmultiply Z by Q.                       | */
/*           | * Postmultiply Z by R.                       | */
/*           | The N by NCONV matrix Z is now contains the  | */
/*           | Ritz vectors associated with the Ritz values | */
/*           | in workl(iheigr) and workl(iheigi).          | */
/*           %----------------------------------------------% */

	    sorm2r_("Right", "Notranspose", n, ncv, &nconv, &workl[invsub], &
		    ldq, &workev[1], &z__[z_offset], ldz, &workd[*n + 1], &
		    ierr, (ftnlen)5, (ftnlen)11);

	    strmm_("Right", "Upper", "No transpose", "Non-unit", n, &nconv, &
		    c_b38, &workl[invsub], &ldq, &z__[z_offset], ldz, (ftnlen)
		    5, (ftnlen)5, (ftnlen)12, (ftnlen)8);

	}

    } else {

/*        %------------------------------------------------------% */
/*        | An approximate invariant subspace is not needed.     | */
/*        | Place the Ritz values computed SNAUPD into DR and DI | */
/*        %------------------------------------------------------% */

	scopy_(&nconv, &workl[ritzr], &c__1, &dr[1], &c__1);
	scopy_(&nconv, &workl[ritzi], &c__1, &di[1], &c__1);
	scopy_(&nconv, &workl[ritzr], &c__1, &workl[iheigr], &c__1);
	scopy_(&nconv, &workl[ritzi], &c__1, &workl[iheigi], &c__1);
	scopy_(&nconv, &workl[bounds], &c__1, &workl[ihbds], &c__1);
    }

/*     %------------------------------------------------% */
/*     | Transform the Ritz values and possibly vectors | */
/*     | and corresponding error bounds of OP to those  | */
/*     | of A*x = lambda*B*x.                           | */
/*     %------------------------------------------------% */

    if (s_cmp(type__, "REGULR", (ftnlen)6, (ftnlen)6) == 0) {

	if (*rvec) {
	    sscal_(ncv, &rnorm, &workl[ihbds], &c__1);
	}

    } else {

/*        %---------------------------------------% */
/*        |   A spectral transformation was used. | */
/*        | * Determine the Ritz estimates of the | */
/*        |   Ritz values in the original system. | */
/*        %---------------------------------------% */

	if (s_cmp(type__, "SHIFTI", (ftnlen)6, (ftnlen)6) == 0) {

	    if (*rvec) {
		sscal_(ncv, &rnorm, &workl[ihbds], &c__1);
	    }

	    i__1 = *ncv;
	    for (k = 1; k <= i__1; ++k) {
		temp = slapy2_(&workl[iheigr + k - 1], &workl[iheigi + k - 1])
			;
		workl[ihbds + k - 1] = (r__1 = workl[ihbds + k - 1], dabs(
			r__1)) / temp / temp;
/* L50: */
	    }

	} else if (s_cmp(type__, "REALPT", (ftnlen)6, (ftnlen)6) == 0) {

	    i__1 = *ncv;
	    for (k = 1; k <= i__1; ++k) {
/* L60: */
	    }

	} else if (s_cmp(type__, "IMAGPT", (ftnlen)6, (ftnlen)6) == 0) {

	    i__1 = *ncv;
	    for (k = 1; k <= i__1; ++k) {
/* L70: */
	    }

	}

/*        %-----------------------------------------------------------% */
/*        | *  Transform the Ritz values back to the original system. | */
/*        |    For TYPE = 'SHIFTI' the transformation is              | */
/*        |             lambda = 1/theta + sigma                      | */
/*        |    For TYPE = 'REALPT' or 'IMAGPT' the user must from     | */
/*        |    Rayleigh quotients or a projection. See remark 3 above.| */
/*        | NOTES:                                                    | */
/*        | *The Ritz vectors are not affected by the transformation. | */
/*        %-----------------------------------------------------------% */

	if (s_cmp(type__, "SHIFTI", (ftnlen)6, (ftnlen)6) == 0) {

	    i__1 = *ncv;
	    for (k = 1; k <= i__1; ++k) {
		temp = slapy2_(&workl[iheigr + k - 1], &workl[iheigi + k - 1])
			;
		workl[iheigr + k - 1] = workl[iheigr + k - 1] / temp / temp + 
			*sigmar;
		workl[iheigi + k - 1] = -workl[iheigi + k - 1] / temp / temp 
			+ *sigmai;
/* L80: */
	    }

	    scopy_(&nconv, &workl[iheigr], &c__1, &dr[1], &c__1);
	    scopy_(&nconv, &workl[iheigi], &c__1, &di[1], &c__1);

	} else if (s_cmp(type__, "REALPT", (ftnlen)6, (ftnlen)6) == 0 || 
		s_cmp(type__, "IMAGPT", (ftnlen)6, (ftnlen)6) == 0) {

	    scopy_(&nconv, &workl[iheigr], &c__1, &dr[1], &c__1);
	    scopy_(&nconv, &workl[iheigi], &c__1, &di[1], &c__1);

	}

    }

    if (s_cmp(type__, "SHIFTI", (ftnlen)6, (ftnlen)6) == 0 && msglvl > 1) {
	svout_(&debug_1.logfil, &nconv, &dr[1], &debug_1.ndigit, "_neupd: Un"
		"transformed real part of the Ritz valuess.", (ftnlen)52);
	svout_(&debug_1.logfil, &nconv, &di[1], &debug_1.ndigit, "_neupd: Un"
		"transformed imag part of the Ritz valuess.", (ftnlen)52);
	svout_(&debug_1.logfil, &nconv, &workl[ihbds], &debug_1.ndigit, "_ne"
		"upd: Ritz estimates of untransformed Ritz values.", (ftnlen)
		52);
    } else if (s_cmp(type__, "REGULR", (ftnlen)6, (ftnlen)6) == 0 && msglvl > 
	    1) {
	svout_(&debug_1.logfil, &nconv, &dr[1], &debug_1.ndigit, "_neupd: Re"
		"al parts of converged Ritz values.", (ftnlen)44);
	svout_(&debug_1.logfil, &nconv, &di[1], &debug_1.ndigit, "_neupd: Im"
		"ag parts of converged Ritz values.", (ftnlen)44);
	svout_(&debug_1.logfil, &nconv, &workl[ihbds], &debug_1.ndigit, "_ne"
		"upd: Associated Ritz estimates.", (ftnlen)34);
    }

/*     %-------------------------------------------------% */
/*     | Eigenvector Purification step. Formally perform | */
/*     | one of inverse subspace iteration. Only used    | */
/*     | for MODE = 2.                                   | */
/*     %-------------------------------------------------% */

    if (*rvec && *(unsigned char *)howmny == 'A' && s_cmp(type__, "SHIFTI", (
	    ftnlen)6, (ftnlen)6) == 0) {

/*        %------------------------------------------------% */
/*        | Purify the computed Ritz vectors by adding a   | */
/*        | little bit of the residual vector:             | */
/*        |                      T                         | */
/*        |          resid(:)*( e    s ) / theta           | */
/*        |                      NCV                       | */
/*        | where H s = s theta. Remember that when theta  | */
/*        | has nonzero imaginary part, the corresponding  | */
/*        | Ritz vector is stored across two columns of Z. | */
/*        %------------------------------------------------% */

	iconj = 0;
	i__1 = nconv;
	for (j = 1; j <= i__1; ++j) {
	    if (workl[iheigi + j - 1] == 0.f) {
		workev[j] = workl[invsub + (j - 1) * ldq + *ncv - 1] / workl[
			iheigr + j - 1];
	    } else if (iconj == 0) {
		temp = slapy2_(&workl[iheigr + j - 1], &workl[iheigi + j - 1])
			;
		workev[j] = (workl[invsub + (j - 1) * ldq + *ncv - 1] * workl[
			iheigr + j - 1] + workl[invsub + j * ldq + *ncv - 1] *
			 workl[iheigi + j - 1]) / temp / temp;
		workev[j + 1] = (workl[invsub + j * ldq + *ncv - 1] * workl[
			iheigr + j - 1] - workl[invsub + (j - 1) * ldq + *ncv 
			- 1] * workl[iheigi + j - 1]) / temp / temp;
		iconj = 1;
	    } else {
		iconj = 0;
	    }
/* L110: */
	}

/*        %---------------------------------------% */
/*        | Perform a rank one update to Z and    | */
/*        | purify all the Ritz vectors together. | */
/*        %---------------------------------------% */

	sger_(n, &nconv, &c_b38, &resid[1], &c__1, &workev[1], &c__1, &z__[
		z_offset], ldz);

    }

L9000:

    return 0;

/*     %---------------% */
/*     | End of SNEUPD | */
/*     %---------------% */

} /* sneupd_ */

Exemplo n.º 20

0

Exibir arquivo

Arquivo: slatrd.c Projeto: RebUT/REBUT

/* Subroutine */ int slatrd_(char *uplo, integer *n, integer *nb, real *a,
                             integer *lda, real *e, real *tau, real *w, integer *ldw)
{
    /* System generated locals */
    integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3;

    /* Local variables */
    integer i__, iw;
    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
    real alpha;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
           sgemv_(char *, integer *, integer *, real *, real *, integer *,
                  real *, integer *, real *, real *, integer *), saxpy_(
               integer *, real *, real *, integer *, real *, integer *), ssymv_(
               char *, integer *, real *, real *, integer *, real *, integer *,
               real *, real *, integer *), slarfg_(integer *, real *,
                       real *, integer *, real *);


    /*  -- LAPACK auxiliary routine (version 3.2) -- */
    /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
    /*     November 2006 */

    /*     .. Scalar Arguments .. */
    /*     .. */
    /*     .. Array Arguments .. */
    /*     .. */

    /*  Purpose */
    /*  ======= */

    /*  SLATRD reduces NB rows and columns of a real symmetric matrix A to */
    /*  symmetric tridiagonal form by an orthogonal similarity */
    /*  transformation Q' * A * Q, and returns the matrices V and W which are */
    /*  needed to apply the transformation to the unreduced part of A. */

    /*  If UPLO = 'U', SLATRD reduces the last NB rows and columns of a */
    /*  matrix, of which the upper triangle is supplied; */
    /*  if UPLO = 'L', SLATRD reduces the first NB rows and columns of a */
    /*  matrix, of which the lower triangle is supplied. */

    /*  This is an auxiliary routine called by SSYTRD. */

    /*  Arguments */
    /*  ========= */

    /*  UPLO    (input) CHARACTER*1 */
    /*          Specifies whether the upper or lower triangular part of the */
    /*          symmetric matrix A is stored: */
    /*          = 'U': Upper triangular */
    /*          = 'L': Lower triangular */

    /*  N       (input) INTEGER */
    /*          The order of the matrix A. */

    /*  NB      (input) INTEGER */
    /*          The number of rows and columns to be reduced. */

    /*  A       (input/output) REAL array, dimension (LDA,N) */
    /*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading */
    /*          n-by-n upper triangular part of A contains the upper */
    /*          triangular part of the matrix A, and the strictly lower */
    /*          triangular part of A is not referenced.  If UPLO = 'L', the */
    /*          leading n-by-n lower triangular part of A contains the lower */
    /*          triangular part of the matrix A, and the strictly upper */
    /*          triangular part of A is not referenced. */
    /*          On exit: */
    /*          if UPLO = 'U', the last NB columns have been reduced to */
    /*            tridiagonal form, with the diagonal elements overwriting */
    /*            the diagonal elements of A; the elements above the diagonal */
    /*            with the array TAU, represent the orthogonal matrix Q as a */
    /*            product of elementary reflectors; */
    /*          if UPLO = 'L', the first NB columns have been reduced to */
    /*            tridiagonal form, with the diagonal elements overwriting */
    /*            the diagonal elements of A; the elements below the diagonal */
    /*            with the array TAU, represent the  orthogonal matrix Q as a */
    /*            product of elementary reflectors. */
    /*          See Further Details. */

    /*  LDA     (input) INTEGER */
    /*          The leading dimension of the array A.  LDA >= (1,N). */

    /*  E       (output) REAL array, dimension (N-1) */
    /*          If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal */
    /*          elements of the last NB columns of the reduced matrix; */
    /*          if UPLO = 'L', E(1:nb) contains the subdiagonal elements of */
    /*          the first NB columns of the reduced matrix. */

    /*  TAU     (output) REAL array, dimension (N-1) */
    /*          The scalar factors of the elementary reflectors, stored in */
    /*          TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. */
    /*          See Further Details. */

    /*  W       (output) REAL array, dimension (LDW,NB) */
    /*          The n-by-nb matrix W required to update the unreduced part */
    /*          of A. */

    /*  LDW     (input) INTEGER */
    /*          The leading dimension of the array W. LDW >= max(1,N). */

    /*  Further Details */
    /*  =============== */

    /*  If UPLO = 'U', the matrix Q is represented as a product of elementary */
    /*  reflectors */

    /*     Q = H(n) H(n-1) . . . H(n-nb+1). */

    /*  Each H(i) has the form */

    /*     H(i) = I - tau * v * v' */

    /*  where tau is a real scalar, and v is a real vector with */
    /*  v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), */
    /*  and tau in TAU(i-1). */

    /*  If UPLO = 'L', the matrix Q is represented as a product of elementary */
    /*  reflectors */

    /*     Q = H(1) H(2) . . . H(nb). */

    /*  Each H(i) has the form */

    /*     H(i) = I - tau * v * v' */

    /*  where tau is a real scalar, and v is a real vector with */
    /*  v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), */
    /*  and tau in TAU(i). */

    /*  The elements of the vectors v together form the n-by-nb matrix V */
    /*  which is needed, with W, to apply the transformation to the unreduced */
    /*  part of the matrix, using a symmetric rank-2k update of the form: */
    /*  A := A - V*W' - W*V'. */

    /*  The contents of A on exit are illustrated by the following examples */
    /*  with n = 5 and nb = 2: */

    /*  if UPLO = 'U':                       if UPLO = 'L': */

    /*    (  a   a   a   v4  v5 )              (  d                  ) */
    /*    (      a   a   v4  v5 )              (  1   d              ) */
    /*    (          a   1   v5 )              (  v1  1   a          ) */
    /*    (              d   1  )              (  v1  v2  a   a      ) */
    /*    (                  d  )              (  v1  v2  a   a   a  ) */

    /*  where d denotes a diagonal element of the reduced matrix, a denotes */
    /*  an element of the original matrix that is unchanged, and vi denotes */
    /*  an element of the vector defining H(i). */

    /*  ===================================================================== */

    /*     .. Parameters .. */
    /*     .. */
    /*     .. Local Scalars .. */
    /*     .. */
    /*     .. External Subroutines .. */
    /*     .. */
    /*     .. External Functions .. */
    /*     .. */
    /*     .. Intrinsic Functions .. */
    /*     .. */
    /*     .. Executable Statements .. */

    /*     Quick return if possible */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --e;
    --tau;
    w_dim1 = *ldw;
    w_offset = 1 + w_dim1;
    w -= w_offset;

    /* Function Body */
    if (*n <= 0) {
        return 0;
    }

    if (lsame_(uplo, "U")) {

        /*        Reduce last NB columns of upper triangle */

        i__1 = *n - *nb + 1;
        for (i__ = *n; i__ >= i__1; --i__) {
            iw = i__ - *n + *nb;
            if (i__ < *n) {

                /*              Update A(1:i,i) */

                i__2 = *n - i__;
                sgemv_("No transpose", &i__, &i__2, &c_b5, &a[(i__ + 1) *
                        a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, &
                       c_b6, &a[i__ * a_dim1 + 1], &c__1);
                i__2 = *n - i__;
                sgemv_("No transpose", &i__, &i__2, &c_b5, &w[(iw + 1) *
                        w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, &
                       c_b6, &a[i__ * a_dim1 + 1], &c__1);
            }
            if (i__ > 1) {

                /*              Generate elementary reflector H(i) to annihilate */
                /*              A(1:i-2,i) */

                i__2 = i__ - 1;
                slarfg_(&i__2, &a[i__ - 1 + i__ * a_dim1], &a[i__ * a_dim1 +
                        1], &c__1, &tau[i__ - 1]);
                e[i__ - 1] = a[i__ - 1 + i__ * a_dim1];
                a[i__ - 1 + i__ * a_dim1] = 1.f;

                /*              Compute W(1:i-1,i) */

                i__2 = i__ - 1;
                ssymv_("Upper", &i__2, &c_b6, &a[a_offset], lda, &a[i__ *
                        a_dim1 + 1], &c__1, &c_b16, &w[iw * w_dim1 + 1], &
                       c__1);
                if (i__ < *n) {
                    i__2 = i__ - 1;
                    i__3 = *n - i__;
                    sgemv_("Transpose", &i__2, &i__3, &c_b6, &w[(iw + 1) *
                            w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], &c__1, &
                           c_b16, &w[i__ + 1 + iw * w_dim1], &c__1);
                    i__2 = i__ - 1;
                    i__3 = *n - i__;
                    sgemv_("No transpose", &i__2, &i__3, &c_b5, &a[(i__ + 1) *
                            a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], &
                           c__1, &c_b6, &w[iw * w_dim1 + 1], &c__1);
                    i__2 = i__ - 1;
                    i__3 = *n - i__;
                    sgemv_("Transpose", &i__2, &i__3, &c_b6, &a[(i__ + 1) *
                            a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], &c__1, &
                           c_b16, &w[i__ + 1 + iw * w_dim1], &c__1);
                    i__2 = i__ - 1;
                    i__3 = *n - i__;
                    sgemv_("No transpose", &i__2, &i__3, &c_b5, &w[(iw + 1) *
                            w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], &
                           c__1, &c_b6, &w[iw * w_dim1 + 1], &c__1);
                }
                i__2 = i__ - 1;
                sscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1);
                i__2 = i__ - 1;
                alpha = tau[i__ - 1] * -.5f * sdot_(&i__2, &w[iw * w_dim1 + 1]
                                                    , &c__1, &a[i__ * a_dim1 + 1], &c__1);
                i__2 = i__ - 1;
                saxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw *
                        w_dim1 + 1], &c__1);
            }

            /* L10: */
        }
    } else {

        /*        Reduce first NB columns of lower triangle */

        i__1 = *nb;
        for (i__ = 1; i__ <= i__1; ++i__) {

            /*           Update A(i:n,i) */

            i__2 = *n - i__ + 1;
            i__3 = i__ - 1;
            sgemv_("No transpose", &i__2, &i__3, &c_b5, &a[i__ + a_dim1], lda,
                   &w[i__ + w_dim1], ldw, &c_b6, &a[i__ + i__ * a_dim1], &
                   c__1);
            i__2 = *n - i__ + 1;
            i__3 = i__ - 1;
            sgemv_("No transpose", &i__2, &i__3, &c_b5, &w[i__ + w_dim1], ldw,
                   &a[i__ + a_dim1], lda, &c_b6, &a[i__ + i__ * a_dim1], &
                   c__1);
            if (i__ < *n) {

                /*              Generate elementary reflector H(i) to annihilate */
                /*              A(i+2:n,i) */

                i__2 = *n - i__;
                /* Computing MIN */
                i__3 = i__ + 2;
                slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *n)+
                        i__ * a_dim1], &c__1, &tau[i__]);
                e[i__] = a[i__ + 1 + i__ * a_dim1];
                a[i__ + 1 + i__ * a_dim1] = 1.f;

                /*              Compute W(i+1:n,i) */

                i__2 = *n - i__;
                ssymv_("Lower", &i__2, &c_b6, &a[i__ + 1 + (i__ + 1) * a_dim1]
                       , lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &w[
                           i__ + 1 + i__ * w_dim1], &c__1);
                i__2 = *n - i__;
                i__3 = i__ - 1;
                sgemv_("Transpose", &i__2, &i__3, &c_b6, &w[i__ + 1 + w_dim1],
                       ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &w[
                           i__ * w_dim1 + 1], &c__1);
                i__2 = *n - i__;
                i__3 = i__ - 1;
                sgemv_("No transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 +
                        a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b6, &w[
                           i__ + 1 + i__ * w_dim1], &c__1);
                i__2 = *n - i__;
                i__3 = i__ - 1;
                sgemv_("Transpose", &i__2, &i__3, &c_b6, &a[i__ + 1 + a_dim1],
                       lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &w[
                           i__ * w_dim1 + 1], &c__1);
                i__2 = *n - i__;
                i__3 = i__ - 1;
                sgemv_("No transpose", &i__2, &i__3, &c_b5, &w[i__ + 1 +
                        w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b6, &w[
                           i__ + 1 + i__ * w_dim1], &c__1);
                i__2 = *n - i__;
                sscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1);
                i__2 = *n - i__;
                alpha = tau[i__] * -.5f * sdot_(&i__2, &w[i__ + 1 + i__ *
                                                w_dim1], &c__1, &a[i__ + 1 + i__ * a_dim1], &c__1);
                i__2 = *n - i__;
                saxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[
                           i__ + 1 + i__ * w_dim1], &c__1);
            }

            /* L20: */
        }
    }

    return 0;

    /*     End of SLATRD */

} /* slatrd_ */

Exemplo n.º 21

0

Exibir arquivo

Arquivo: sget52.c Projeto: kstraube/hysim

/* Subroutine */ int sget52_(logical *left, integer *n, real *a, integer *lda, 
	 real *b, integer *ldb, real *e, integer *lde, real *alphar, real *
	alphai, real *beta, real *work, real *result)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, e_dim1, e_offset, i__1, i__2;
    real r__1, r__2, r__3, r__4;

    /* Local variables */
    integer j;
    real ulp;
    integer jvec;
    real temp1, acoef, scale, abmax, salfi, sbeta, salfr, anorm, bnorm, enorm;
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
	    real *, integer *, real *, integer *, real *, real *, integer *);
    char trans[1];
    real bcoefi, bcoefr, alfmax;
    extern doublereal slamch_(char *), slange_(char *, integer *, 
	    integer *, real *, integer *, real *);
    real safmin;
    char normab[1];
    real safmax, betmax, enrmer;
    logical ilcplx;
    real errnrm;


/*  -- LAPACK test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SGET52  does an eigenvector check for the generalized eigenvalue */
/*  problem. */

/*  The basic test for right eigenvectors is: */

/*                            | b(j) A E(j) -  a(j) B E(j) | */
/*          RESULT(1) = max   ------------------------------- */
/*                       j    n ulp max( |b(j) A|, |a(j) B| ) */

/*  using the 1-norm.  Here, a(j)/b(j) = w is the j-th generalized */
/*  eigenvalue of A - w B, or, equivalently, b(j)/a(j) = m is the j-th */
/*  generalized eigenvalue of m A - B. */

/*  For real eigenvalues, the test is straightforward.  For complex */
/*  eigenvalues, E(j) and a(j) are complex, represented by */
/*  Er(j) + i*Ei(j) and ar(j) + i*ai(j), resp., so the test for that */
/*  eigenvector becomes */

/*                  max( |Wr|, |Wi| ) */
/*      -------------------------------------------- */
/*      n ulp max( |b(j) A|, (|ar(j)|+|ai(j)|) |B| ) */

/*  where */

/*      Wr = b(j) A Er(j) - ar(j) B Er(j) + ai(j) B Ei(j) */

/*      Wi = b(j) A Ei(j) - ai(j) B Er(j) - ar(j) B Ei(j) */

/*                          T   T  _ */
/*  For left eigenvectors, A , B , a, and b  are used. */

/*  SGET52 also tests the normalization of E.  Each eigenvector is */
/*  supposed to be normalized so that the maximum "absolute value" */
/*  of its elements is 1, where in this case, "absolute value" */
/*  of a complex value x is  |Re(x)| + |Im(x)| ; let us call this */
/*  maximum "absolute value" norm of a vector v  M(v). */
/*  if a(j)=b(j)=0, then the eigenvector is set to be the jth coordinate */
/*  vector.  The normalization test is: */

/*          RESULT(2) =      max       | M(v(j)) - 1 | / ( n ulp ) */
/*                     eigenvectors v(j) */

/*  Arguments */
/*  ========= */

/*  LEFT    (input) LOGICAL */
/*          =.TRUE.:  The eigenvectors in the columns of E are assumed */
/*                    to be *left* eigenvectors. */
/*          =.FALSE.: The eigenvectors in the columns of E are assumed */
/*                    to be *right* eigenvectors. */

/*  N       (input) INTEGER */
/*          The size of the matrices.  If it is zero, SGET52 does */
/*          nothing.  It must be at least zero. */

/*  A       (input) REAL array, dimension (LDA, N) */
/*          The matrix A. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of A.  It must be at least 1 */
/*          and at least N. */

/*  B       (input) REAL array, dimension (LDB, N) */
/*          The matrix B. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of B.  It must be at least 1 */
/*          and at least N. */

/*  E       (input) REAL array, dimension (LDE, N) */
/*          The matrix of eigenvectors.  It must be O( 1 ).  Complex */
/*          eigenvalues and eigenvectors always come in pairs, the */
/*          eigenvalue and its conjugate being stored in adjacent */
/*          elements of ALPHAR, ALPHAI, and BETA.  Thus, if a(j)/b(j) */
/*          and a(j+1)/b(j+1) are a complex conjugate pair of */
/*          generalized eigenvalues, then E(,j) contains the real part */
/*          of the eigenvector and E(,j+1) contains the imaginary part. */
/*          Note that whether E(,j) is a real eigenvector or part of a */
/*          complex one is specified by whether ALPHAI(j) is zero or not. */

/*  LDE     (input) INTEGER */
/*          The leading dimension of E.  It must be at least 1 and at */
/*          least N. */

/*  ALPHAR  (input) REAL array, dimension (N) */
/*          The real parts of the values a(j) as described above, which, */
/*          along with b(j), define the generalized eigenvalues. */
/*          Complex eigenvalues always come in complex conjugate pairs */
/*          a(j)/b(j) and a(j+1)/b(j+1), which are stored in adjacent */
/*          elements in ALPHAR, ALPHAI, and BETA.  Thus, if the j-th */
/*          and (j+1)-st eigenvalues form a pair, ALPHAR(j+1)/BETA(j+1) */
/*          is assumed to be equal to ALPHAR(j)/BETA(j). */

/*  ALPHAI  (input) REAL array, dimension (N) */
/*          The imaginary parts of the values a(j) as described above, */
/*          which, along with b(j), define the generalized eigenvalues. */
/*          If ALPHAI(j)=0, then the eigenvalue is real, otherwise it */
/*          is part of a complex conjugate pair.  Complex eigenvalues */
/*          always come in complex conjugate pairs a(j)/b(j) and */
/*          a(j+1)/b(j+1), which are stored in adjacent elements in */
/*          ALPHAR, ALPHAI, and BETA.  Thus, if the j-th and (j+1)-st */
/*          eigenvalues form a pair, ALPHAI(j+1)/BETA(j+1) is assumed to */
/*          be equal to  -ALPHAI(j)/BETA(j).  Also, nonzero values in */
/*          ALPHAI are assumed to always come in adjacent pairs. */

/*  BETA    (input) REAL array, dimension (N) */
/*          The values b(j) as described above, which, along with a(j), */
/*          define the generalized eigenvalues. */

/*  WORK    (workspace) REAL array, dimension (N**2+N) */

/*  RESULT  (output) REAL array, dimension (2) */
/*          The values computed by the test described above.  If A E or */
/*          B E is likely to overflow, then RESULT(1:2) is set to */
/*          10 / ulp. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    e_dim1 = *lde;
    e_offset = 1 + e_dim1;
    e -= e_offset;
    --alphar;
    --alphai;
    --beta;
    --work;
    --result;

    /* Function Body */
    result[1] = 0.f;
    result[2] = 0.f;
    if (*n <= 0) {
	return 0;
    }

    safmin = slamch_("Safe minimum");
    safmax = 1.f / safmin;
    ulp = slamch_("Epsilon") * slamch_("Base");

    if (*left) {
	*(unsigned char *)trans = 'T';
	*(unsigned char *)normab = 'I';
    } else {
	*(unsigned char *)trans = 'N';
	*(unsigned char *)normab = 'O';
    }

/*     Norm of A, B, and E: */

/* Computing MAX */
    r__1 = slange_(normab, n, n, &a[a_offset], lda, &work[1]);
    anorm = dmax(r__1,safmin);
/* Computing MAX */
    r__1 = slange_(normab, n, n, &b[b_offset], ldb, &work[1]);
    bnorm = dmax(r__1,safmin);
/* Computing MAX */
    r__1 = slange_("O", n, n, &e[e_offset], lde, &work[1]);
    enorm = dmax(r__1,ulp);
    alfmax = safmax / dmax(1.f,bnorm);
    betmax = safmax / dmax(1.f,anorm);

/*     Compute error matrix. */
/*     Column i = ( b(i) A - a(i) B ) E(i) / max( |a(i) B| |b(i) A| ) */

    ilcplx = FALSE_;
    i__1 = *n;
    for (jvec = 1; jvec <= i__1; ++jvec) {
	if (ilcplx) {

/*           2nd Eigenvalue/-vector of pair -- do nothing */

	    ilcplx = FALSE_;
	} else {
	    salfr = alphar[jvec];
	    salfi = alphai[jvec];
	    sbeta = beta[jvec];
	    if (salfi == 0.f) {

/*              Real eigenvalue and -vector */

/* Computing MAX */
		r__1 = dabs(salfr), r__2 = dabs(sbeta);
		abmax = dmax(r__1,r__2);
		if (dabs(salfr) > alfmax || dabs(sbeta) > betmax || abmax < 
			1.f) {
		    scale = 1.f / dmax(abmax,safmin);
		    salfr = scale * salfr;
		    sbeta = scale * sbeta;
		}
/* Computing MAX */
		r__1 = dabs(salfr) * bnorm, r__2 = dabs(sbeta) * anorm, r__1 =
			 max(r__1,r__2);
		scale = 1.f / dmax(r__1,safmin);
		acoef = scale * sbeta;
		bcoefr = scale * salfr;
		sgemv_(trans, n, n, &acoef, &a[a_offset], lda, &e[jvec * 
			e_dim1 + 1], &c__1, &c_b12, &work[*n * (jvec - 1) + 1]
, &c__1);
		r__1 = -bcoefr;
		sgemv_(trans, n, n, &r__1, &b[b_offset], lda, &e[jvec * 
			e_dim1 + 1], &c__1, &c_b15, &work[*n * (jvec - 1) + 1]
, &c__1);
	    } else {

/*              Complex conjugate pair */

		ilcplx = TRUE_;
		if (jvec == *n) {
		    result[1] = 10.f / ulp;
		    return 0;
		}
/* Computing MAX */
		r__1 = dabs(salfr) + dabs(salfi), r__2 = dabs(sbeta);
		abmax = dmax(r__1,r__2);
		if (dabs(salfr) + dabs(salfi) > alfmax || dabs(sbeta) > 
			betmax || abmax < 1.f) {
		    scale = 1.f / dmax(abmax,safmin);
		    salfr = scale * salfr;
		    salfi = scale * salfi;
		    sbeta = scale * sbeta;
		}
/* Computing MAX */
		r__1 = (dabs(salfr) + dabs(salfi)) * bnorm, r__2 = dabs(sbeta)
			 * anorm, r__1 = max(r__1,r__2);
		scale = 1.f / dmax(r__1,safmin);
		acoef = scale * sbeta;
		bcoefr = scale * salfr;
		bcoefi = scale * salfi;
		if (*left) {
		    bcoefi = -bcoefi;
		}

		sgemv_(trans, n, n, &acoef, &a[a_offset], lda, &e[jvec * 
			e_dim1 + 1], &c__1, &c_b12, &work[*n * (jvec - 1) + 1]
, &c__1);
		r__1 = -bcoefr;
		sgemv_(trans, n, n, &r__1, &b[b_offset], lda, &e[jvec * 
			e_dim1 + 1], &c__1, &c_b15, &work[*n * (jvec - 1) + 1]
, &c__1);
		sgemv_(trans, n, n, &bcoefi, &b[b_offset], lda, &e[(jvec + 1) 
			* e_dim1 + 1], &c__1, &c_b15, &work[*n * (jvec - 1) + 
			1], &c__1);

		sgemv_(trans, n, n, &acoef, &a[a_offset], lda, &e[(jvec + 1) *
			 e_dim1 + 1], &c__1, &c_b12, &work[*n * jvec + 1], &
			c__1);
		r__1 = -bcoefi;
		sgemv_(trans, n, n, &r__1, &b[b_offset], lda, &e[jvec * 
			e_dim1 + 1], &c__1, &c_b15, &work[*n * jvec + 1], &
			c__1);
		r__1 = -bcoefr;
		sgemv_(trans, n, n, &r__1, &b[b_offset], lda, &e[(jvec + 1) * 
			e_dim1 + 1], &c__1, &c_b15, &work[*n * jvec + 1], &
			c__1);
	    }
	}
/* L10: */
    }

/* Computing 2nd power */
    i__1 = *n;
    errnrm = slange_("One", n, n, &work[1], n, &work[i__1 * i__1 + 1]) / enorm;

/*     Compute RESULT(1) */

    result[1] = errnrm / ulp;

/*     Normalization of E: */

    enrmer = 0.f;
    ilcplx = FALSE_;
    i__1 = *n;
    for (jvec = 1; jvec <= i__1; ++jvec) {
	if (ilcplx) {
	    ilcplx = FALSE_;
	} else {
	    temp1 = 0.f;
	    if (alphai[jvec] == 0.f) {
		i__2 = *n;
		for (j = 1; j <= i__2; ++j) {
/* Computing MAX */
		    r__2 = temp1, r__3 = (r__1 = e[j + jvec * e_dim1], dabs(
			    r__1));
		    temp1 = dmax(r__2,r__3);
/* L20: */
		}
/* Computing MAX */
		r__1 = enrmer, r__2 = temp1 - 1.f;
		enrmer = dmax(r__1,r__2);
	    } else {
		ilcplx = TRUE_;
		i__2 = *n;
		for (j = 1; j <= i__2; ++j) {
/* Computing MAX */
		    r__3 = temp1, r__4 = (r__1 = e[j + jvec * e_dim1], dabs(
			    r__1)) + (r__2 = e[j + (jvec + 1) * e_dim1], dabs(
			    r__2));
		    temp1 = dmax(r__3,r__4);
/* L30: */
		}
/* Computing MAX */
		r__1 = enrmer, r__2 = temp1 - 1.f;
		enrmer = dmax(r__1,r__2);
	    }
	}
/* L40: */
    }

/*     Compute RESULT(2) : the normalization error in E. */

    result[2] = enrmer / ((real) (*n) * ulp);

    return 0;

/*     End of SGET52 */

} /* sget52_ */

Exemplo n.º 22

0

Exibir arquivo

Arquivo: stgevc.c Projeto: juanjosegarciaripoll/cblapack

/* Subroutine */ int stgevc_(char *side, char *howmny, logical *select, 
	integer *n, real *s, integer *lds, real *p, integer *ldp, real *vl, 
	integer *ldvl, real *vr, integer *ldvr, integer *mm, integer *m, real 
	*work, integer *info)
{
    /* System generated locals */
    integer p_dim1, p_offset, s_dim1, s_offset, vl_dim1, vl_offset, vr_dim1, 
	    vr_offset, i__1, i__2, i__3, i__4, i__5;
    real r__1, r__2, r__3, r__4, r__5, r__6;

    /* Local variables */
    integer i__, j, ja, jc, je, na, im, jr, jw, nw;
    real big;
    logical lsa, lsb;
    real ulp, sum[4]	/* was [2][2] */;
    integer ibeg, ieig, iend;
    real dmin__, temp, xmax, sump[4]	/* was [2][2] */, sums[4]	/* 
	    was [2][2] */, cim2a, cim2b, cre2a, cre2b;
    real temp2, bdiag[2], acoef, scale;
    logical ilall;
    integer iside;
    real sbeta;
    logical il2by2;
    integer iinfo;
    real small;
    logical compl;
    real anorm, bnorm;
    logical compr;
    real temp2i, temp2r;
    logical ilabad, ilbbad;
    real acoefa, bcoefa, cimaga, cimagb;
    logical ilback;
    real bcoefi, ascale, bscale, creala, crealb, bcoefr;
    real salfar, safmin;
    real xscale, bignum;
    logical ilcomp, ilcplx;
    integer ihwmny;

/*  -- LAPACK routine (version 3.2) -- */
/*     November 2006 */

/*  Purpose */
/*  ======= */

/*  STGEVC computes some or all of the right and/or left eigenvectors of */
/*  a pair of real matrices (S,P), where S is a quasi-triangular matrix */
/*  and P is upper triangular.  Matrix pairs of this type are produced by */
/*  the generalized Schur factorization of a matrix pair (A,B): */

/*     A = Q*S*Z**T,  B = Q*P*Z**T */

/*  as computed by SGGHRD + SHGEQZ. */

/*  The right eigenvector x and the left eigenvector y of (S,P) */
/*  corresponding to an eigenvalue w are defined by: */

/*     S*x = w*P*x,  (y**H)*S = w*(y**H)*P, */

/*  where y**H denotes the conjugate tranpose of y. */
/*  The eigenvalues are not input to this routine, but are computed */
/*  directly from the diagonal blocks of S and P. */

/*  This routine returns the matrices X and/or Y of right and left */
/*  eigenvectors of (S,P), or the products Z*X and/or Q*Y, */
/*  where Z and Q are input matrices. */
/*  If Q and Z are the orthogonal factors from the generalized Schur */
/*  factorization of a matrix pair (A,B), then Z*X and Q*Y */
/*  are the matrices of right and left eigenvectors of (A,B). */

/*  Arguments */
/*  ========= */

/*  SIDE    (input) CHARACTER*1 */
/*          = 'R': compute right eigenvectors only; */
/*          = 'L': compute left eigenvectors only; */
/*          = 'B': compute both right and left eigenvectors. */

/*  HOWMNY  (input) CHARACTER*1 */
/*          = 'A': compute all right and/or left eigenvectors; */
/*          = 'B': compute all right and/or left eigenvectors, */
/*                 backtransformed by the matrices in VR and/or VL; */
/*          = 'S': compute selected right and/or left eigenvectors, */
/*                 specified by the logical array SELECT. */

/*  SELECT  (input) LOGICAL array, dimension (N) */
/*          If HOWMNY='S', SELECT specifies the eigenvectors to be */
/*          computed.  If w(j) is a real eigenvalue, the corresponding */
/*          If w(j) and w(j+1) are the real and imaginary parts of a */
/*          complex eigenvalue, the corresponding complex eigenvector */
/*          is computed if either SELECT(j) or SELECT(j+1) is .TRUE., */
/*          and on exit SELECT(j) is set to .TRUE. and SELECT(j+1) is */
/*          Not referenced if HOWMNY = 'A' or 'B'. */

/*  N       (input) INTEGER */
/*          The order of the matrices S and P.  N >= 0. */

/*  S       (input) REAL array, dimension (LDS,N) */
/*          The upper quasi-triangular matrix S from a generalized Schur */
/*          factorization, as computed by SHGEQZ. */

/*  LDS     (input) INTEGER */
/*          The leading dimension of array S.  LDS >= max(1,N). */

/*  P       (input) REAL array, dimension (LDP,N) */
/*          The upper triangular matrix P from a generalized Schur */
/*          factorization, as computed by SHGEQZ. */
/*          2-by-2 diagonal blocks of P corresponding to 2-by-2 blocks */
/*          of S must be in positive diagonal form. */

/*  LDP     (input) INTEGER */
/*          The leading dimension of array P.  LDP >= max(1,N). */

/*  VL      (input/output) REAL array, dimension (LDVL,MM) */
/*          On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must */
/*          contain an N-by-N matrix Q (usually the orthogonal matrix Q */
/*          of left Schur vectors returned by SHGEQZ). */
/*          On exit, if SIDE = 'L' or 'B', VL contains: */
/*          if HOWMNY = 'A', the matrix Y of left eigenvectors of (S,P); */
/*          if HOWMNY = 'B', the matrix Q*Y; */
/*          if HOWMNY = 'S', the left eigenvectors of (S,P) specified by */
/*                      SELECT, stored consecutively in the columns of */
/*                      VL, in the same order as their eigenvalues. */

/*          A complex eigenvector corresponding to a complex eigenvalue */
/*          is stored in two consecutive columns, the first holding the */
/*          real part, and the second the imaginary part. */

/*          Not referenced if SIDE = 'R'. */

/*  LDVL    (input) INTEGER */
/*          The leading dimension of array VL.  LDVL >= 1, and if */
/*          SIDE = 'L' or 'B', LDVL >= N. */

/*  VR      (input/output) REAL array, dimension (LDVR,MM) */
/*          On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must */
/*          contain an N-by-N matrix Z (usually the orthogonal matrix Z */
/*          of right Schur vectors returned by SHGEQZ). */

/*          On exit, if SIDE = 'R' or 'B', VR contains: */
/*          if HOWMNY = 'A', the matrix X of right eigenvectors of (S,P); */
/*          if HOWMNY = 'B' or 'b', the matrix Z*X; */
/*          if HOWMNY = 'S' or 's', the right eigenvectors of (S,P) */
/*                      specified by SELECT, stored consecutively in the */
/*                      columns of VR, in the same order as their */
/*                      eigenvalues. */

/*          A complex eigenvector corresponding to a complex eigenvalue */
/*          is stored in two consecutive columns, the first holding the */
/*          real part and the second the imaginary part. */

/*          Not referenced if SIDE = 'L'. */

/*  LDVR    (input) INTEGER */
/*          The leading dimension of the array VR.  LDVR >= 1, and if */
/*          SIDE = 'R' or 'B', LDVR >= N. */

/*  MM      (input) INTEGER */
/*          The number of columns in the arrays VL and/or VR. MM >= M. */

/*  M       (output) INTEGER */
/*          The number of columns in the arrays VL and/or VR actually */
/*          used to store the eigenvectors.  If HOWMNY = 'A' or 'B', M */
/*          is set to N.  Each selected real eigenvector occupies one */
/*          column and each selected complex eigenvector occupies two */
/*          columns. */

/*  WORK    (workspace) REAL array, dimension (6*N) */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit. */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
/*          > 0:  the 2-by-2 block (INFO:INFO+1) does not have a complex */
/*                eigenvalue. */

/*  Further Details */
/*  =============== */

/*  Allocation of workspace: */
/*  ---------- -- --------- */

/*     WORK( j ) = 1-norm of j-th column of A, above the diagonal */
/*     WORK( N+j ) = 1-norm of j-th column of B, above the diagonal */
/*     WORK( 2*N+1:3*N ) = real part of eigenvector */
/*     WORK( 3*N+1:4*N ) = imaginary part of eigenvector */
/*     WORK( 4*N+1:5*N ) = real part of back-transformed eigenvector */
/*     WORK( 5*N+1:6*N ) = imaginary part of back-transformed eigenvector */

/*  Rowwise vs. columnwise solution methods: */
/*  ------- --  ---------- -------- ------- */

/*  Finding a generalized eigenvector consists basically of solving the */
/*  singular triangular system */

/*   (A - w B) x = 0     (for right) or:   (A - w B)**H y = 0  (for left) */

/*  Consider finding the i-th right eigenvector (assume all eigenvalues */
/*  are real). The equation to be solved is: */
/*       n                   i */
/*  0 = sum  C(j,k) v(k)  = sum  C(j,k) v(k)     for j = i,. . .,1 */
/*      k=j                 k=j */

/*  where  C = (A - w B)  (The components v(i+1:n) are 0.) */

/*  The "rowwise" method is: */

/*  (1)  v(i) := 1 */
/*  for j = i-1,. . .,1: */
/*                          i */
/*      (2) compute  s = - sum C(j,k) v(k)   and */
/*                        k=j+1 */

/*      (3) v(j) := s / C(j,j) */

/*  Step 2 is sometimes called the "dot product" step, since it is an */
/*  inner product between the j-th row and the portion of the eigenvector */
/*  that has been computed so far. */

/*  The "columnwise" method consists basically in doing the sums */
/*  for all the rows in parallel.  As each v(j) is computed, the */
/*  contribution of v(j) times the j-th column of C is added to the */
/*  partial sums.  Since FORTRAN arrays are stored columnwise, this has */
/*  the advantage that at each step, the elements of C that are accessed */
/*  are adjacent to one another, whereas with the rowwise method, the */
/*  elements accessed at a step are spaced LDS (and LDP) words apart. */

/*  When finding left eigenvectors, the matrix in question is the */
/*  transpose of the one in storage, so the rowwise method then */
/*  actually accesses columns of A and B at each step, and so is the */
/*  preferred method. */

/*  ===================================================================== */

/*     Decode and Test the input parameters */

    /* Parameter adjustments */
    --select;
    s_dim1 = *lds;
    s_offset = 1 + s_dim1;
    s -= s_offset;
    p_dim1 = *ldp;
    p_offset = 1 + p_dim1;
    p -= p_offset;
    vl_dim1 = *ldvl;
    vl_offset = 1 + vl_dim1;
    vl -= vl_offset;
    vr_dim1 = *ldvr;
    vr_offset = 1 + vr_dim1;
    vr -= vr_offset;
    --work;

    /* Function Body */
    if (lsame_(howmny, "A")) {
	ihwmny = 1;
	ilall = TRUE_;
	ilback = FALSE_;
    } else if (lsame_(howmny, "S")) {
	ihwmny = 2;
	ilall = FALSE_;
	ilback = FALSE_;
    } else if (lsame_(howmny, "B")) {
	ihwmny = 3;
	ilall = TRUE_;
	ilback = TRUE_;
    } else {
	ihwmny = -1;
	ilall = TRUE_;
    }

    if (lsame_(side, "R")) {
	iside = 1;
	compl = FALSE_;
	compr = TRUE_;
    } else if (lsame_(side, "L")) {
	iside = 2;
	compl = TRUE_;
	compr = FALSE_;
    } else if (lsame_(side, "B")) {
	iside = 3;
	compl = TRUE_;
	compr = TRUE_;
    } else {
	iside = -1;
    }

    *info = 0;
    if (iside < 0) {
	*info = -1;
    } else if (ihwmny < 0) {
	*info = -2;
    } else if (*n < 0) {
	*info = -4;
    } else if (*lds < max(1,*n)) {
	*info = -6;
    } else if (*ldp < max(1,*n)) {
	*info = -8;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("STGEVC", &i__1);
	return 0;
    }

/*     Count the number of eigenvectors to be computed */

    if (! ilall) {
	im = 0;
	ilcplx = FALSE_;
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    if (ilcplx) {
		ilcplx = FALSE_;
		goto L10;
	    }
	    if (j < *n) {
		if (s[j + 1 + j * s_dim1] != 0.f) {
		    ilcplx = TRUE_;
		}
	    }
	    if (ilcplx) {
		if (select[j] || select[j + 1]) {
		    im += 2;
		}
	    } else {
		if (select[j]) {
		    ++im;
		}
	    }
L10:
	    ;
	}
    } else {
	im = *n;
    }

/*     Check 2-by-2 diagonal blocks of A, B */

    ilabad = FALSE_;
    ilbbad = FALSE_;
    i__1 = *n - 1;
    for (j = 1; j <= i__1; ++j) {
	if (s[j + 1 + j * s_dim1] != 0.f) {
	    if (p[j + j * p_dim1] == 0.f || p[j + 1 + (j + 1) * p_dim1] == 
		    0.f || p[j + (j + 1) * p_dim1] != 0.f) {
		ilbbad = TRUE_;
	    }
	    if (j < *n - 1) {
		if (s[j + 2 + (j + 1) * s_dim1] != 0.f) {
		    ilabad = TRUE_;
		}
	    }
	}
    }

    if (ilabad) {
	*info = -5;
    } else if (ilbbad) {
	*info = -7;
    } else if (compl && *ldvl < *n || *ldvl < 1) {
	*info = -10;
    } else if (compr && *ldvr < *n || *ldvr < 1) {
	*info = -12;
    } else if (*mm < im) {
	*info = -13;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("STGEVC", &i__1);
	return 0;
    }

/*     Quick return if possible */

    *m = im;
    if (*n == 0) {
	return 0;
    }

/*     Machine Constants */

    safmin = slamch_("Safe minimum");
    big = 1.f / safmin;
    slabad_(&safmin, &big);
    ulp = slamch_("Epsilon") * slamch_("Base");
    small = safmin * *n / ulp;
    big = 1.f / small;
    bignum = 1.f / (safmin * *n);

/*     Compute the 1-norm of each column of the strictly upper triangular */
/*     part (i.e., excluding all elements belonging to the diagonal */
/*     blocks) of A and B to check for possible overflow in the */
/*     triangular solver. */

    anorm = (r__1 = s[s_dim1 + 1], dabs(r__1));
    if (*n > 1) {
	anorm += (r__1 = s[s_dim1 + 2], dabs(r__1));
    }
    bnorm = (r__1 = p[p_dim1 + 1], dabs(r__1));
    work[1] = 0.f;
    work[*n + 1] = 0.f;

    i__1 = *n;
    for (j = 2; j <= i__1; ++j) {
	temp = 0.f;
	temp2 = 0.f;
	if (s[j + (j - 1) * s_dim1] == 0.f) {
	    iend = j - 1;
	} else {
	    iend = j - 2;
	}
	i__2 = iend;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    temp += (r__1 = s[i__ + j * s_dim1], dabs(r__1));
	    temp2 += (r__1 = p[i__ + j * p_dim1], dabs(r__1));
	}
	work[j] = temp;
	work[*n + j] = temp2;
/* Computing MIN */
	i__3 = j + 1;
	i__2 = min(i__3,*n);
	for (i__ = iend + 1; i__ <= i__2; ++i__) {
	    temp += (r__1 = s[i__ + j * s_dim1], dabs(r__1));
	    temp2 += (r__1 = p[i__ + j * p_dim1], dabs(r__1));
	}
	anorm = dmax(anorm,temp);
	bnorm = dmax(bnorm,temp2);
    }

    ascale = 1.f / dmax(anorm,safmin);
    bscale = 1.f / dmax(bnorm,safmin);

/*     Left eigenvectors */

    if (compl) {
	ieig = 0;

/*        Main loop over eigenvalues */

	ilcplx = FALSE_;
	i__1 = *n;
	for (je = 1; je <= i__1; ++je) {

/*           Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or */
/*           (b) this would be the second of a complex pair. */
/*           Check for complex eigenvalue, so as to be sure of which */
/*           entry(-ies) of SELECT to look at. */

	    if (ilcplx) {
		ilcplx = FALSE_;
		goto L220;
	    }
	    nw = 1;
	    if (je < *n) {
		if (s[je + 1 + je * s_dim1] != 0.f) {
		    ilcplx = TRUE_;
		    nw = 2;
		}
	    }
	    if (ilall) {
		ilcomp = TRUE_;
	    } else if (ilcplx) {
		ilcomp = select[je] || select[je + 1];
	    } else {
		ilcomp = select[je];
	    }
	    if (! ilcomp) {
		goto L220;
	    }

/*           Decide if (a) singular pencil, (b) real eigenvalue, or */
/*           (c) complex eigenvalue. */

	    if (! ilcplx) {
		if ((r__1 = s[je + je * s_dim1], dabs(r__1)) <= safmin && (
			r__2 = p[je + je * p_dim1], dabs(r__2)) <= safmin) {

/*                 Singular matrix pencil -- return unit eigenvector */

		    ++ieig;
		    i__2 = *n;
		    for (jr = 1; jr <= i__2; ++jr) {
			vl[jr + ieig * vl_dim1] = 0.f;
		    }
		    vl[ieig + ieig * vl_dim1] = 1.f;
		    goto L220;
		}
	    }

/*           Clear vector */

	    i__2 = nw * *n;
	    for (jr = 1; jr <= i__2; ++jr) {
		work[(*n << 1) + jr] = 0.f;
	    }
/*                                                 T */
/*           Compute coefficients in  ( a A - b B )  y = 0 */
/*              a  is  ACOEF */
/*              b  is  BCOEFR + i*BCOEFI */

	    if (! ilcplx) {

/*              Real eigenvalue */

/* Computing MAX */
		r__3 = (r__1 = s[je + je * s_dim1], dabs(r__1)) * ascale, 
			r__4 = (r__2 = p[je + je * p_dim1], dabs(r__2)) * 
			bscale, r__3 = max(r__3,r__4);
		temp = 1.f / dmax(r__3,safmin);
		salfar = temp * s[je + je * s_dim1] * ascale;
		sbeta = temp * p[je + je * p_dim1] * bscale;
		acoef = sbeta * ascale;
		bcoefr = salfar * bscale;
		bcoefi = 0.f;

/*              Scale to avoid underflow */

		scale = 1.f;
		lsa = dabs(sbeta) >= safmin && dabs(acoef) < small;
		lsb = dabs(salfar) >= safmin && dabs(bcoefr) < small;
		if (lsa) {
		    scale = small / dabs(sbeta) * dmin(anorm,big);
		}
		if (lsb) {
/* Computing MAX */
		    r__1 = scale, r__2 = small / dabs(salfar) * dmin(bnorm,
			    big);
		    scale = dmax(r__1,r__2);
		}
		if (lsa || lsb) {
/* Computing MIN */
/* Computing MAX */
		    r__3 = 1.f, r__4 = dabs(acoef), r__3 = max(r__3,r__4), 
			    r__4 = dabs(bcoefr);
		    r__1 = scale, r__2 = 1.f / (safmin * dmax(r__3,r__4));
		    scale = dmin(r__1,r__2);
		    if (lsa) {
			acoef = ascale * (scale * sbeta);
		    } else {
			acoef = scale * acoef;
		    }
		    if (lsb) {
			bcoefr = bscale * (scale * salfar);
		    } else {
			bcoefr = scale * bcoefr;
		    }
		}
		acoefa = dabs(acoef);
		bcoefa = dabs(bcoefr);

/*              First component is 1 */

		work[(*n << 1) + je] = 1.f;
		xmax = 1.f;
	    } else {

/*              Complex eigenvalue */

		r__1 = safmin * 100.f;
		slag2_(&s[je + je * s_dim1], lds, &p[je + je * p_dim1], ldp, &
			r__1, &acoef, &temp, &bcoefr, &temp2, &bcoefi);
		bcoefi = -bcoefi;
		if (bcoefi == 0.f) {
		    *info = je;
		    return 0;
		}

/*              Scale to avoid over/underflow */

		acoefa = dabs(acoef);
		bcoefa = dabs(bcoefr) + dabs(bcoefi);
		scale = 1.f;
		if (acoefa * ulp < safmin && acoefa >= safmin) {
		    scale = safmin / ulp / acoefa;
		}
		if (bcoefa * ulp < safmin && bcoefa >= safmin) {
/* Computing MAX */
		    r__1 = scale, r__2 = safmin / ulp / bcoefa;
		    scale = dmax(r__1,r__2);
		}
		if (safmin * acoefa > ascale) {
		    scale = ascale / (safmin * acoefa);
		}
		if (safmin * bcoefa > bscale) {
/* Computing MIN */
		    r__1 = scale, r__2 = bscale / (safmin * bcoefa);
		    scale = dmin(r__1,r__2);
		}
		if (scale != 1.f) {
		    acoef = scale * acoef;
		    acoefa = dabs(acoef);
		    bcoefr = scale * bcoefr;
		    bcoefi = scale * bcoefi;
		    bcoefa = dabs(bcoefr) + dabs(bcoefi);
		}

/*              Compute first two components of eigenvector */

		temp = acoef * s[je + 1 + je * s_dim1];
		temp2r = acoef * s[je + je * s_dim1] - bcoefr * p[je + je * 
			p_dim1];
		temp2i = -bcoefi * p[je + je * p_dim1];
		if (dabs(temp) > dabs(temp2r) + dabs(temp2i)) {
		    work[(*n << 1) + je] = 1.f;
		    work[*n * 3 + je] = 0.f;
		    work[(*n << 1) + je + 1] = -temp2r / temp;
		    work[*n * 3 + je + 1] = -temp2i / temp;
		} else {
		    work[(*n << 1) + je + 1] = 1.f;
		    work[*n * 3 + je + 1] = 0.f;
		    temp = acoef * s[je + (je + 1) * s_dim1];
		    work[(*n << 1) + je] = (bcoefr * p[je + 1 + (je + 1) * 
			    p_dim1] - acoef * s[je + 1 + (je + 1) * s_dim1]) /
			     temp;
		    work[*n * 3 + je] = bcoefi * p[je + 1 + (je + 1) * p_dim1]
			     / temp;
		}
/* Computing MAX */
		r__5 = (r__1 = work[(*n << 1) + je], dabs(r__1)) + (r__2 = 
			work[*n * 3 + je], dabs(r__2)), r__6 = (r__3 = work[(*
			n << 1) + je + 1], dabs(r__3)) + (r__4 = work[*n * 3 
			+ je + 1], dabs(r__4));
		xmax = dmax(r__5,r__6);
	    }

/* Computing MAX */
	    r__1 = ulp * acoefa * anorm, r__2 = ulp * bcoefa * bnorm, r__1 = 
		    max(r__1,r__2);
	    dmin__ = dmax(r__1,safmin);

/*                                           T */
/*           Triangular solve of  (a A - b B)  y = 0 */

/*                                   T */
/*           (rowwise in  (a A - b B) , or columnwise in (a A - b B) ) */

	    il2by2 = FALSE_;

	    i__2 = *n;
	    for (j = je + nw; j <= i__2; ++j) {
		if (il2by2) {
		    il2by2 = FALSE_;
		    goto L160;
		}

		na = 1;
		bdiag[0] = p[j + j * p_dim1];
		if (j < *n) {
		    if (s[j + 1 + j * s_dim1] != 0.f) {
			il2by2 = TRUE_;
			bdiag[1] = p[j + 1 + (j + 1) * p_dim1];
			na = 2;
		    }
		}

/*              Check whether scaling is necessary for dot products */

		xscale = 1.f / dmax(1.f,xmax);
/* Computing MAX */
		r__1 = work[j], r__2 = work[*n + j], r__1 = max(r__1,r__2), 
			r__2 = acoefa * work[j] + bcoefa * work[*n + j];
		temp = dmax(r__1,r__2);
		if (il2by2) {
/* Computing MAX */
		    r__1 = temp, r__2 = work[j + 1], r__1 = max(r__1,r__2), 
			    r__2 = work[*n + j + 1], r__1 = max(r__1,r__2), 
			    r__2 = acoefa * work[j + 1] + bcoefa * work[*n + 
			    j + 1];
		    temp = dmax(r__1,r__2);
		}
		if (temp > bignum * xscale) {
		    i__3 = nw - 1;
		    for (jw = 0; jw <= i__3; ++jw) {
			i__4 = j - 1;
			for (jr = je; jr <= i__4; ++jr) {
			    work[(jw + 2) * *n + jr] = xscale * work[(jw + 2) 
				    * *n + jr];
			}
		    }
		    xmax *= xscale;
		}

/*              Compute dot products */

/*                    j-1 */
/*              SUM = sum  conjg( a*S(k,j) - b*P(k,j) )*x(k) */
/*                    k=je */

/*              To reduce the op count, this is done as */

/*              _        j-1                  _        j-1 */
/*              a*conjg( sum  S(k,j)*x(k) ) - b*conjg( sum  P(k,j)*x(k) ) */
/*                       k=je                          k=je */

/*              which may cause underflow problems if A or B are close */
/*              to underflow.  (E.g., less than SMALL.) */

/*              A series of compiler directives to defeat vectorization */
/*              for the next loop */

/* $PL$ CMCHAR=' ' */
/* DIR$          NEXTSCALAR */
/* $DIR          SCALAR */
/* DIR$          NEXT SCALAR */
/* VD$L          NOVECTOR */
/* DEC$          NOVECTOR */
/* VD$           NOVECTOR */
/* VDIR          NOVECTOR */
/* VOCL          LOOP,SCALAR */
/* IBM           PREFER SCALAR */
/* $PL$ CMCHAR='*' */

		i__3 = nw;
		for (jw = 1; jw <= i__3; ++jw) {

/* $PL$ CMCHAR=' ' */
/* DIR$             NEXTSCALAR */
/* $DIR             SCALAR */
/* DIR$             NEXT SCALAR */
/* VD$L             NOVECTOR */
/* DEC$             NOVECTOR */
/* VD$              NOVECTOR */
/* VDIR             NOVECTOR */
/* VOCL             LOOP,SCALAR */
/* IBM              PREFER SCALAR */
/* $PL$ CMCHAR='*' */

		    i__4 = na;
		    for (ja = 1; ja <= i__4; ++ja) {
			sums[ja + (jw << 1) - 3] = 0.f;
			sump[ja + (jw << 1) - 3] = 0.f;

			i__5 = j - 1;
			for (jr = je; jr <= i__5; ++jr) {
			    sums[ja + (jw << 1) - 3] += s[jr + (j + ja - 1) * 
				    s_dim1] * work[(jw + 1) * *n + jr];
			    sump[ja + (jw << 1) - 3] += p[jr + (j + ja - 1) * 
				    p_dim1] * work[(jw + 1) * *n + jr];
			}
		    }
		}

/* $PL$ CMCHAR=' ' */
/* DIR$          NEXTSCALAR */
/* $DIR          SCALAR */
/* DIR$          NEXT SCALAR */
/* VD$L          NOVECTOR */
/* DEC$          NOVECTOR */
/* VD$           NOVECTOR */
/* VDIR          NOVECTOR */
/* VOCL          LOOP,SCALAR */
/* IBM           PREFER SCALAR */
/* $PL$ CMCHAR='*' */

		i__3 = na;
		for (ja = 1; ja <= i__3; ++ja) {
		    if (ilcplx) {
			sum[ja - 1] = -acoef * sums[ja - 1] + bcoefr * sump[
				ja - 1] - bcoefi * sump[ja + 1];
			sum[ja + 1] = -acoef * sums[ja + 1] + bcoefr * sump[
				ja + 1] + bcoefi * sump[ja - 1];
		    } else {
			sum[ja - 1] = -acoef * sums[ja - 1] + bcoefr * sump[
				ja - 1];
		    }
		}

/*                                  T */
/*              Solve  ( a A - b B )  y = SUM(,) */
/*              with scaling and perturbation of the denominator */

		slaln2_(&c_true, &na, &nw, &dmin__, &acoef, &s[j + j * s_dim1]
, lds, bdiag, &bdiag[1], sum, &c__2, &bcoefr, &bcoefi, 
			 &work[(*n << 1) + j], n, &scale, &temp, &iinfo);
		if (scale < 1.f) {
		    i__3 = nw - 1;
		    for (jw = 0; jw <= i__3; ++jw) {
			i__4 = j - 1;
			for (jr = je; jr <= i__4; ++jr) {
			    work[(jw + 2) * *n + jr] = scale * work[(jw + 2) *
				     *n + jr];
			}
		    }
		    xmax = scale * xmax;
		}
		xmax = dmax(xmax,temp);
L160:
		;
	    }

/*           Copy eigenvector to VL, back transforming if */
/*           HOWMNY='B'. */

	    ++ieig;
	    if (ilback) {
		i__2 = nw - 1;
		for (jw = 0; jw <= i__2; ++jw) {
		    i__3 = *n + 1 - je;
		    sgemv_("N", n, &i__3, &c_b34, &vl[je * vl_dim1 + 1], ldvl, 
			     &work[(jw + 2) * *n + je], &c__1, &c_b36, &work[(
			    jw + 4) * *n + 1], &c__1);
		}
		slacpy_(" ", n, &nw, &work[(*n << 2) + 1], n, &vl[je * 
			vl_dim1 + 1], ldvl);
		ibeg = 1;
	    } else {
		slacpy_(" ", n, &nw, &work[(*n << 1) + 1], n, &vl[ieig * 
			vl_dim1 + 1], ldvl);
		ibeg = je;
	    }

/*           Scale eigenvector */

	    xmax = 0.f;
	    if (ilcplx) {
		i__2 = *n;
		for (j = ibeg; j <= i__2; ++j) {
/* Computing MAX */
		    r__3 = xmax, r__4 = (r__1 = vl[j + ieig * vl_dim1], dabs(
			    r__1)) + (r__2 = vl[j + (ieig + 1) * vl_dim1], 
			    dabs(r__2));
		    xmax = dmax(r__3,r__4);
		}
	    } else {
		i__2 = *n;
		for (j = ibeg; j <= i__2; ++j) {
/* Computing MAX */
		    r__2 = xmax, r__3 = (r__1 = vl[j + ieig * vl_dim1], dabs(
			    r__1));
		    xmax = dmax(r__2,r__3);
		}
	    }

	    if (xmax > safmin) {
		xscale = 1.f / xmax;

		i__2 = nw - 1;
		for (jw = 0; jw <= i__2; ++jw) {
		    i__3 = *n;
		    for (jr = ibeg; jr <= i__3; ++jr) {
			vl[jr + (ieig + jw) * vl_dim1] = xscale * vl[jr + (
				ieig + jw) * vl_dim1];
		    }
		}
	    }
	    ieig = ieig + nw - 1;

L220:
	    ;
	}
    }

/*     Right eigenvectors */

    if (compr) {
	ieig = im + 1;

/*        Main loop over eigenvalues */

	ilcplx = FALSE_;
	for (je = *n; je >= 1; --je) {

/*           Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or */
/*           (b) this would be the second of a complex pair. */
/*           Check for complex eigenvalue, so as to be sure of which */
/*           entry(-ies) of SELECT to look at -- if complex, SELECT(JE) */
/*           or SELECT(JE-1). */
/*           If this is a complex pair, the 2-by-2 diagonal block */
/*           corresponding to the eigenvalue is in rows/columns JE-1:JE */

	    if (ilcplx) {
		ilcplx = FALSE_;
		goto L500;
	    }
	    nw = 1;
	    if (je > 1) {
		if (s[je + (je - 1) * s_dim1] != 0.f) {
		    ilcplx = TRUE_;
		    nw = 2;
		}
	    }
	    if (ilall) {
		ilcomp = TRUE_;
	    } else if (ilcplx) {
		ilcomp = select[je] || select[je - 1];
	    } else {
		ilcomp = select[je];
	    }
	    if (! ilcomp) {
		goto L500;
	    }

/*           Decide if (a) singular pencil, (b) real eigenvalue, or */
/*           (c) complex eigenvalue. */

	    if (! ilcplx) {
		if ((r__1 = s[je + je * s_dim1], dabs(r__1)) <= safmin && (
			r__2 = p[je + je * p_dim1], dabs(r__2)) <= safmin) {

/*                 Singular matrix pencil -- unit eigenvector */

		    --ieig;
		    i__1 = *n;
		    for (jr = 1; jr <= i__1; ++jr) {
			vr[jr + ieig * vr_dim1] = 0.f;
		    }
		    vr[ieig + ieig * vr_dim1] = 1.f;
		    goto L500;
		}
	    }

/*           Clear vector */

	    i__1 = nw - 1;
	    for (jw = 0; jw <= i__1; ++jw) {
		i__2 = *n;
		for (jr = 1; jr <= i__2; ++jr) {
		    work[(jw + 2) * *n + jr] = 0.f;
		}
	    }

/*           Compute coefficients in  ( a A - b B ) x = 0 */
/*              a  is  ACOEF */
/*              b  is  BCOEFR + i*BCOEFI */

	    if (! ilcplx) {

/*              Real eigenvalue */

/* Computing MAX */
		r__3 = (r__1 = s[je + je * s_dim1], dabs(r__1)) * ascale, 
			r__4 = (r__2 = p[je + je * p_dim1], dabs(r__2)) * 
			bscale, r__3 = max(r__3,r__4);
		temp = 1.f / dmax(r__3,safmin);
		salfar = temp * s[je + je * s_dim1] * ascale;
		sbeta = temp * p[je + je * p_dim1] * bscale;
		acoef = sbeta * ascale;
		bcoefr = salfar * bscale;
		bcoefi = 0.f;

/*              Scale to avoid underflow */

		scale = 1.f;
		lsa = dabs(sbeta) >= safmin && dabs(acoef) < small;
		lsb = dabs(salfar) >= safmin && dabs(bcoefr) < small;
		if (lsa) {
		    scale = small / dabs(sbeta) * dmin(anorm,big);
		}
		if (lsb) {
/* Computing MAX */
		    r__1 = scale, r__2 = small / dabs(salfar) * dmin(bnorm,
			    big);
		    scale = dmax(r__1,r__2);
		}
		if (lsa || lsb) {
/* Computing MIN */
/* Computing MAX */
		    r__3 = 1.f, r__4 = dabs(acoef), r__3 = max(r__3,r__4), 
			    r__4 = dabs(bcoefr);
		    r__1 = scale, r__2 = 1.f / (safmin * dmax(r__3,r__4));
		    scale = dmin(r__1,r__2);
		    if (lsa) {
			acoef = ascale * (scale * sbeta);
		    } else {
			acoef = scale * acoef;
		    }
		    if (lsb) {
			bcoefr = bscale * (scale * salfar);
		    } else {
			bcoefr = scale * bcoefr;
		    }
		}
		acoefa = dabs(acoef);
		bcoefa = dabs(bcoefr);

/*              First component is 1 */

		work[(*n << 1) + je] = 1.f;
		xmax = 1.f;

/*              Compute contribution from column JE of A and B to sum */
/*              (See "Further Details", above.) */

		i__1 = je - 1;
		for (jr = 1; jr <= i__1; ++jr) {
		    work[(*n << 1) + jr] = bcoefr * p[jr + je * p_dim1] - 
			    acoef * s[jr + je * s_dim1];
		}
	    } else {

/*              Complex eigenvalue */

		r__1 = safmin * 100.f;
		slag2_(&s[je - 1 + (je - 1) * s_dim1], lds, &p[je - 1 + (je - 
			1) * p_dim1], ldp, &r__1, &acoef, &temp, &bcoefr, &
			temp2, &bcoefi);
		if (bcoefi == 0.f) {
		    *info = je - 1;
		    return 0;
		}

/*              Scale to avoid over/underflow */

		acoefa = dabs(acoef);
		bcoefa = dabs(bcoefr) + dabs(bcoefi);
		scale = 1.f;
		if (acoefa * ulp < safmin && acoefa >= safmin) {
		    scale = safmin / ulp / acoefa;
		}
		if (bcoefa * ulp < safmin && bcoefa >= safmin) {
/* Computing MAX */
		    r__1 = scale, r__2 = safmin / ulp / bcoefa;
		    scale = dmax(r__1,r__2);
		}
		if (safmin * acoefa > ascale) {
		    scale = ascale / (safmin * acoefa);
		}
		if (safmin * bcoefa > bscale) {
/* Computing MIN */
		    r__1 = scale, r__2 = bscale / (safmin * bcoefa);
		    scale = dmin(r__1,r__2);
		}
		if (scale != 1.f) {
		    acoef = scale * acoef;
		    acoefa = dabs(acoef);
		    bcoefr = scale * bcoefr;
		    bcoefi = scale * bcoefi;
		    bcoefa = dabs(bcoefr) + dabs(bcoefi);
		}

/*              Compute first two components of eigenvector */
/*              and contribution to sums */

		temp = acoef * s[je + (je - 1) * s_dim1];
		temp2r = acoef * s[je + je * s_dim1] - bcoefr * p[je + je * 
			p_dim1];
		temp2i = -bcoefi * p[je + je * p_dim1];
		if (dabs(temp) >= dabs(temp2r) + dabs(temp2i)) {
		    work[(*n << 1) + je] = 1.f;
		    work[*n * 3 + je] = 0.f;
		    work[(*n << 1) + je - 1] = -temp2r / temp;
		    work[*n * 3 + je - 1] = -temp2i / temp;
		} else {
		    work[(*n << 1) + je - 1] = 1.f;
		    work[*n * 3 + je - 1] = 0.f;
		    temp = acoef * s[je - 1 + je * s_dim1];
		    work[(*n << 1) + je] = (bcoefr * p[je - 1 + (je - 1) * 
			    p_dim1] - acoef * s[je - 1 + (je - 1) * s_dim1]) /
			     temp;
		    work[*n * 3 + je] = bcoefi * p[je - 1 + (je - 1) * p_dim1]
			     / temp;
		}

/* Computing MAX */
		r__5 = (r__1 = work[(*n << 1) + je], dabs(r__1)) + (r__2 = 
			work[*n * 3 + je], dabs(r__2)), r__6 = (r__3 = work[(*
			n << 1) + je - 1], dabs(r__3)) + (r__4 = work[*n * 3 
			+ je - 1], dabs(r__4));
		xmax = dmax(r__5,r__6);

/*              Compute contribution from columns JE and JE-1 */
/*              of A and B to the sums. */

		creala = acoef * work[(*n << 1) + je - 1];
		cimaga = acoef * work[*n * 3 + je - 1];
		crealb = bcoefr * work[(*n << 1) + je - 1] - bcoefi * work[*n 
			* 3 + je - 1];
		cimagb = bcoefi * work[(*n << 1) + je - 1] + bcoefr * work[*n 
			* 3 + je - 1];
		cre2a = acoef * work[(*n << 1) + je];
		cim2a = acoef * work[*n * 3 + je];
		cre2b = bcoefr * work[(*n << 1) + je] - bcoefi * work[*n * 3 
			+ je];
		cim2b = bcoefi * work[(*n << 1) + je] + bcoefr * work[*n * 3 
			+ je];
		i__1 = je - 2;
		for (jr = 1; jr <= i__1; ++jr) {
		    work[(*n << 1) + jr] = -creala * s[jr + (je - 1) * s_dim1]
			     + crealb * p[jr + (je - 1) * p_dim1] - cre2a * s[
			    jr + je * s_dim1] + cre2b * p[jr + je * p_dim1];
		    work[*n * 3 + jr] = -cimaga * s[jr + (je - 1) * s_dim1] + 
			    cimagb * p[jr + (je - 1) * p_dim1] - cim2a * s[jr 
			    + je * s_dim1] + cim2b * p[jr + je * p_dim1];
		}
	    }

/* Computing MAX */
	    r__1 = ulp * acoefa * anorm, r__2 = ulp * bcoefa * bnorm, r__1 = 
		    max(r__1,r__2);
	    dmin__ = dmax(r__1,safmin);

/*           Columnwise triangular solve of  (a A - b B)  x = 0 */

	    il2by2 = FALSE_;
	    for (j = je - nw; j >= 1; --j) {

/*              If a 2-by-2 block, is in position j-1:j, wait until */
/*              next iteration to process it (when it will be j:j+1) */

		if (! il2by2 && j > 1) {
		    if (s[j + (j - 1) * s_dim1] != 0.f) {
			il2by2 = TRUE_;
			goto L370;
		    }
		}
		bdiag[0] = p[j + j * p_dim1];
		if (il2by2) {
		    na = 2;
		    bdiag[1] = p[j + 1 + (j + 1) * p_dim1];
		} else {
		    na = 1;
		}

/*              Compute x(j) (and x(j+1), if 2-by-2 block) */

		slaln2_(&c_false, &na, &nw, &dmin__, &acoef, &s[j + j * 
			s_dim1], lds, bdiag, &bdiag[1], &work[(*n << 1) + j], 
			n, &bcoefr, &bcoefi, sum, &c__2, &scale, &temp, &
			iinfo);
		if (scale < 1.f) {

		    i__1 = nw - 1;
		    for (jw = 0; jw <= i__1; ++jw) {
			i__2 = je;
			for (jr = 1; jr <= i__2; ++jr) {
			    work[(jw + 2) * *n + jr] = scale * work[(jw + 2) *
				     *n + jr];
			}
		    }
		}
/* Computing MAX */
		r__1 = scale * xmax;
		xmax = dmax(r__1,temp);

		i__1 = nw;
		for (jw = 1; jw <= i__1; ++jw) {
		    i__2 = na;
		    for (ja = 1; ja <= i__2; ++ja) {
			work[(jw + 1) * *n + j + ja - 1] = sum[ja + (jw << 1) 
				- 3];
		    }
		}

/*              w = w + x(j)*(a S(*,j) - b P(*,j) ) with scaling */

		if (j > 1) {

/*                 Check whether scaling is necessary for sum. */

		    xscale = 1.f / dmax(1.f,xmax);
		    temp = acoefa * work[j] + bcoefa * work[*n + j];
		    if (il2by2) {
/* Computing MAX */
			r__1 = temp, r__2 = acoefa * work[j + 1] + bcoefa * 
				work[*n + j + 1];
			temp = dmax(r__1,r__2);
		    }
/* Computing MAX */
		    r__1 = max(temp,acoefa);
		    temp = dmax(r__1,bcoefa);
		    if (temp > bignum * xscale) {

			i__1 = nw - 1;
			for (jw = 0; jw <= i__1; ++jw) {
			    i__2 = je;
			    for (jr = 1; jr <= i__2; ++jr) {
				work[(jw + 2) * *n + jr] = xscale * work[(jw 
					+ 2) * *n + jr];
			    }
			}
			xmax *= xscale;
		    }

/*                 Compute the contributions of the off-diagonals of */
/*                 column j (and j+1, if 2-by-2 block) of A and B to the */
/*                 sums. */

		    i__1 = na;
		    for (ja = 1; ja <= i__1; ++ja) {
			if (ilcplx) {
			    creala = acoef * work[(*n << 1) + j + ja - 1];
			    cimaga = acoef * work[*n * 3 + j + ja - 1];
			    crealb = bcoefr * work[(*n << 1) + j + ja - 1] - 
				    bcoefi * work[*n * 3 + j + ja - 1];
			    cimagb = bcoefi * work[(*n << 1) + j + ja - 1] + 
				    bcoefr * work[*n * 3 + j + ja - 1];
			    i__2 = j - 1;
			    for (jr = 1; jr <= i__2; ++jr) {
				work[(*n << 1) + jr] = work[(*n << 1) + jr] - 
					creala * s[jr + (j + ja - 1) * s_dim1]
					 + crealb * p[jr + (j + ja - 1) * 
					p_dim1];
				work[*n * 3 + jr] = work[*n * 3 + jr] - 
					cimaga * s[jr + (j + ja - 1) * s_dim1]
					 + cimagb * p[jr + (j + ja - 1) * 
					p_dim1];
			    }
			} else {
			    creala = acoef * work[(*n << 1) + j + ja - 1];
			    crealb = bcoefr * work[(*n << 1) + j + ja - 1];
			    i__2 = j - 1;
			    for (jr = 1; jr <= i__2; ++jr) {
				work[(*n << 1) + jr] = work[(*n << 1) + jr] - 
					creala * s[jr + (j + ja - 1) * s_dim1]
					 + crealb * p[jr + (j + ja - 1) * 
					p_dim1];
			    }
			}
		    }
		}

		il2by2 = FALSE_;
L370:
		;
	    }

/*           Copy eigenvector to VR, back transforming if */
/*           HOWMNY='B'. */

	    ieig -= nw;
	    if (ilback) {

		i__1 = nw - 1;
		for (jw = 0; jw <= i__1; ++jw) {
		    i__2 = *n;
		    for (jr = 1; jr <= i__2; ++jr) {
			work[(jw + 4) * *n + jr] = work[(jw + 2) * *n + 1] * 
				vr[jr + vr_dim1];
		    }

/*                 A series of compiler directives to defeat */
/*                 vectorization for the next loop */

		    i__2 = je;
		    for (jc = 2; jc <= i__2; ++jc) {
			i__3 = *n;
			for (jr = 1; jr <= i__3; ++jr) {
			    work[(jw + 4) * *n + jr] += work[(jw + 2) * *n + 
				    jc] * vr[jr + jc * vr_dim1];
			}
		    }
		}

		i__1 = nw - 1;
		for (jw = 0; jw <= i__1; ++jw) {
		    i__2 = *n;
		    for (jr = 1; jr <= i__2; ++jr) {
			vr[jr + (ieig + jw) * vr_dim1] = work[(jw + 4) * *n + 
				jr];
		    }
		}

		iend = *n;
	    } else {
		i__1 = nw - 1;
		for (jw = 0; jw <= i__1; ++jw) {
		    i__2 = *n;
		    for (jr = 1; jr <= i__2; ++jr) {
			vr[jr + (ieig + jw) * vr_dim1] = work[(jw + 2) * *n + 
				jr];
		    }
		}

		iend = je;
	    }

/*           Scale eigenvector */

	    xmax = 0.f;
	    if (ilcplx) {
		i__1 = iend;
		for (j = 1; j <= i__1; ++j) {
/* Computing MAX */
		    r__3 = xmax, r__4 = (r__1 = vr[j + ieig * vr_dim1], dabs(
			    r__1)) + (r__2 = vr[j + (ieig + 1) * vr_dim1], 
			    dabs(r__2));
		    xmax = dmax(r__3,r__4);
		}
	    } else {
		i__1 = iend;
		for (j = 1; j <= i__1; ++j) {
/* Computing MAX */
		    r__2 = xmax, r__3 = (r__1 = vr[j + ieig * vr_dim1], dabs(
			    r__1));
		    xmax = dmax(r__2,r__3);
		}
	    }

	    if (xmax > safmin) {
		xscale = 1.f / xmax;
		i__1 = nw - 1;
		for (jw = 0; jw <= i__1; ++jw) {
		    i__2 = iend;
		    for (jr = 1; jr <= i__2; ++jr) {
			vr[jr + (ieig + jw) * vr_dim1] = xscale * vr[jr + (
				ieig + jw) * vr_dim1];
		    }
		}
	    }
L500:
	    ;
	}
    }

    return 0;

/*     End of STGEVC */

} /* stgevc_ */

Exemplo n.º 23

0

Exibir arquivo

Arquivo: slagsy.c Projeto: AmEv7Fam/opentoonz

/* Subroutine */ int slagsy_(integer *n, integer *k, real *d, real *a, 
	integer *lda, integer *iseed, real *work, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    real r__1;

    /* Builtin functions */
    double r_sign(real *, real *);

    /* Local variables */
    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, 
	    integer *, real *, integer *, real *, integer *);
    extern real sdot_(integer *, real *, integer *, real *, integer *), 
	    snrm2_(integer *, real *, integer *);
    static integer i, j;
    extern /* Subroutine */ int ssyr2_(char *, integer *, real *, real *, 
	    integer *, real *, integer *, real *, integer *);
    static real alpha;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 
	    sgemv_(char *, integer *, integer *, real *, real *, integer *, 
	    real *, integer *, real *, real *, integer *), saxpy_(
	    integer *, real *, real *, integer *, real *, integer *), ssymv_(
	    char *, integer *, real *, real *, integer *, real *, integer *, 
	    real *, real *, integer *);
    static real wa, wb, wn;
    extern /* Subroutine */ int xerbla_(char *, integer *), slarnv_(
	    integer *, integer *, integer *, real *);
    static real tau;


/*  -- LAPACK auxiliary test routine (version 2.0)   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    SLAGSY generates a real symmetric matrix A, by pre- and post-   
    multiplying a real diagonal matrix D with a random orthogonal matrix: 
  
    A = U*D*U'. The semi-bandwidth may then be reduced to k by additional 
  
    orthogonal transformations.   

    Arguments   
    =========   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    K       (input) INTEGER   
            The number of nonzero subdiagonals within the band of A.   
            0 <= K <= N-1.   

    D       (input) REAL array, dimension (N)   
            The diagonal elements of the diagonal matrix D.   

    A       (output) REAL array, dimension (LDA,N)   
            The generated n by n symmetric matrix A (the full matrix is   
            stored).   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= N.   

    ISEED   (input/output) INTEGER array, dimension (4)   
            On entry, the seed of the random number generator; the array 
  
            elements must be between 0 and 4095, and ISEED(4) must be   
            odd.   
            On exit, the seed is updated.   

    WORK    (workspace) REAL array, dimension (2*N)   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -i, the i-th argument had an illegal value   

    ===================================================================== 
  


       Test the input arguments   

       Parameter adjustments */
    --d;
    a_dim1 = *lda;
    a_offset = a_dim1 + 1;
    a -= a_offset;
    --iseed;
    --work;

    /* Function Body */
    *info = 0;
    if (*n < 0) {
	*info = -1;
    } else if (*k < 0 || *k > *n - 1) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    }
    if (*info < 0) {
	i__1 = -(*info);
	xerbla_("SLAGSY", &i__1);
	return 0;
    }

/*     initialize lower triangle of A to diagonal matrix */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	i__2 = *n;
	for (i = j + 1; i <= i__2; ++i) {
	    a[i + j * a_dim1] = 0.f;
/* L10: */
	}
/* L20: */
    }
    i__1 = *n;
    for (i = 1; i <= i__1; ++i) {
	a[i + i * a_dim1] = d[i];
/* L30: */
    }

/*     Generate lower triangle of symmetric matrix */

    for (i = *n - 1; i >= 1; --i) {

/*        generate random reflection */

	i__1 = *n - i + 1;
	slarnv_(&c__3, &iseed[1], &i__1, &work[1]);
	i__1 = *n - i + 1;
	wn = snrm2_(&i__1, &work[1], &c__1);
	wa = r_sign(&wn, &work[1]);
	if (wn == 0.f) {
	    tau = 0.f;
	} else {
	    wb = work[1] + wa;
	    i__1 = *n - i;
	    r__1 = 1.f / wb;
	    sscal_(&i__1, &r__1, &work[2], &c__1);
	    work[1] = 1.f;
	    tau = wb / wa;
	}

/*        apply random reflection to A(i:n,i:n) from the left   
          and the right   

          compute  y := tau * A * u */

	i__1 = *n - i + 1;
	ssymv_("Lower", &i__1, &tau, &a[i + i * a_dim1], lda, &work[1], &c__1,
		 &c_b12, &work[*n + 1], &c__1);

/*        compute  v := y - 1/2 * tau * ( y, u ) * u */

	i__1 = *n - i + 1;
	alpha = tau * -.5f * sdot_(&i__1, &work[*n + 1], &c__1, &work[1], &
		c__1);
	i__1 = *n - i + 1;
	saxpy_(&i__1, &alpha, &work[1], &c__1, &work[*n + 1], &c__1);

/*        apply the transformation as a rank-2 update to A(i:n,i:n) */

	i__1 = *n - i + 1;
	ssyr2_("Lower", &i__1, &c_b19, &work[1], &c__1, &work[*n + 1], &c__1, 
		&a[i + i * a_dim1], lda);
/* L40: */
    }

/*     Reduce number of subdiagonals to K */

    i__1 = *n - 1 - *k;
    for (i = 1; i <= i__1; ++i) {

/*        generate reflection to annihilate A(k+i+1:n,i) */

	i__2 = *n - *k - i + 1;
	wn = snrm2_(&i__2, &a[*k + i + i * a_dim1], &c__1);
	wa = r_sign(&wn, &a[*k + i + i * a_dim1]);
	if (wn == 0.f) {
	    tau = 0.f;
	} else {
	    wb = a[*k + i + i * a_dim1] + wa;
	    i__2 = *n - *k - i;
	    r__1 = 1.f / wb;
	    sscal_(&i__2, &r__1, &a[*k + i + 1 + i * a_dim1], &c__1);
	    a[*k + i + i * a_dim1] = 1.f;
	    tau = wb / wa;
	}

/*        apply reflection to A(k+i:n,i+1:k+i-1) from the left */

	i__2 = *n - *k - i + 1;
	i__3 = *k - 1;
	sgemv_("Transpose", &i__2, &i__3, &c_b26, &a[*k + i + (i + 1) * 
		a_dim1], lda, &a[*k + i + i * a_dim1], &c__1, &c_b12, &work[1]
		, &c__1);
	i__2 = *n - *k - i + 1;
	i__3 = *k - 1;
	r__1 = -(doublereal)tau;
	sger_(&i__2, &i__3, &r__1, &a[*k + i + i * a_dim1], &c__1, &work[1], &
		c__1, &a[*k + i + (i + 1) * a_dim1], lda);

/*        apply reflection to A(k+i:n,k+i:n) from the left and the rig
ht   

          compute  y := tau * A * u */

	i__2 = *n - *k - i + 1;
	ssymv_("Lower", &i__2, &tau, &a[*k + i + (*k + i) * a_dim1], lda, &a[*
		k + i + i * a_dim1], &c__1, &c_b12, &work[1], &c__1);

/*        compute  v := y - 1/2 * tau * ( y, u ) * u */

	i__2 = *n - *k - i + 1;
	alpha = tau * -.5f * sdot_(&i__2, &work[1], &c__1, &a[*k + i + i * 
		a_dim1], &c__1);
	i__2 = *n - *k - i + 1;
	saxpy_(&i__2, &alpha, &a[*k + i + i * a_dim1], &c__1, &work[1], &c__1)
		;

/*        apply symmetric rank-2 update to A(k+i:n,k+i:n) */

	i__2 = *n - *k - i + 1;
	ssyr2_("Lower", &i__2, &c_b19, &a[*k + i + i * a_dim1], &c__1, &work[
		1], &c__1, &a[*k + i + (*k + i) * a_dim1], lda);

	a[*k + i + i * a_dim1] = -(doublereal)wa;
	i__2 = *n;
	for (j = *k + i + 1; j <= i__2; ++j) {
	    a[j + i * a_dim1] = 0.f;
/* L50: */
	}
/* L60: */
    }

/*     Store full symmetric matrix */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	i__2 = *n;
	for (i = j + 1; i <= i__2; ++i) {
	    a[j + i * a_dim1] = a[i + j * a_dim1];
/* L70: */
	}
/* L80: */
    }
    return 0;

/*     End of SLAGSY */

} /* slagsy_ */

Exemplo n.º 24

0

Exibir arquivo

Arquivo: slaqps.c Projeto: Electrostatics/FETK

/* Subroutine */ int slaqps_(integer *m, integer *n, integer *offset, integer 
	*nb, integer *kb, real *a, integer *lda, integer *jpvt, real *tau, 
	real *vn1, real *vn2, real *auxv, real *f, integer *ldf)
{
    /* System generated locals */
    integer a_dim1, a_offset, f_dim1, f_offset, i__1, i__2;
    real r__1, r__2;

    /* Builtin functions */
    double sqrt(doublereal);
    integer i_nint(real *);

    /* Local variables */
    static integer j, k, rk;
    static real akk;
    static integer pvt;
    static real temp, temp2;
    extern doublereal snrm2_(integer *, real *, integer *);
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *, ftnlen, ftnlen);
    static integer itemp;
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
	    real *, integer *, real *, integer *, real *, real *, integer *, 
	    ftnlen), sswap_(integer *, real *, integer *, real *, integer *), 
	    slarfg_(integer *, real *, real *, integer *, real *);
    static integer lsticc;
    extern integer isamax_(integer *, real *, integer *);
    static integer lastrk;


/*  -- LAPACK auxiliary routine (version 3.0) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */
/*     Courant Institute, Argonne National Lab, and Rice University */
/*     June 30, 1999 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLAQPS computes a step of QR factorization with column pivoting */
/*  of a real M-by-N matrix A by using Blas-3.  It tries to factorize */
/*  NB columns from A starting from the row OFFSET+1, and updates all */
/*  of the matrix with Blas-3 xGEMM. */

/*  In some cases, due to catastrophic cancellations, it cannot */
/*  factorize NB columns.  Hence, the actual number of factorized */
/*  columns is returned in KB. */

/*  Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. */

/*  Arguments */
/*  ========= */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix A. M >= 0. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix A. N >= 0 */

/*  OFFSET  (input) INTEGER */
/*          The number of rows of A that have been factorized in */
/*          previous steps. */

/*  NB      (input) INTEGER */
/*          The number of columns to factorize. */

/*  KB      (output) INTEGER */
/*          The number of columns actually factorized. */

/*  A       (input/output) REAL array, dimension (LDA,N) */
/*          On entry, the M-by-N matrix A. */
/*          On exit, block A(OFFSET+1:M,1:KB) is the triangular */
/*          factor obtained and block A(1:OFFSET,1:N) has been */
/*          accordingly pivoted, but no factorized. */
/*          The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has */
/*          been updated. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A. LDA >= max(1,M). */

/*  JPVT    (input/output) INTEGER array, dimension (N) */
/*          JPVT(I) = K <==> Column K of the full matrix A has been */
/*          permuted into position I in AP. */

/*  TAU     (output) REAL array, dimension (KB) */
/*          The scalar factors of the elementary reflectors. */

/*  VN1     (input/output) REAL array, dimension (N) */
/*          The vector with the partial column norms. */

/*  VN2     (input/output) REAL array, dimension (N) */
/*          The vector with the exact column norms. */

/*  AUXV    (input/output) REAL array, dimension (NB) */
/*          Auxiliar vector. */

/*  F       (input/output) REAL array, dimension (LDF,NB) */
/*          Matrix F' = L*Y'*A. */

/*  LDF     (input) INTEGER */
/*          The leading dimension of the array F. LDF >= max(1,N). */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */
/*    X. Sun, Computer Science Dept., Duke University, USA */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --jpvt;
    --tau;
    --vn1;
    --vn2;
    --auxv;
    f_dim1 = *ldf;
    f_offset = 1 + f_dim1;
    f -= f_offset;

    /* Function Body */
/* Computing MIN */
    i__1 = *m, i__2 = *n + *offset;
    lastrk = min(i__1,i__2);
    lsticc = 0;
    k = 0;

/*     Beginning of while loop. */

L10:
    if (k < *nb && lsticc == 0) {
	++k;
	rk = *offset + k;

/*        Determine ith pivot column and swap if necessary */

	i__1 = *n - k + 1;
	pvt = k - 1 + isamax_(&i__1, &vn1[k], &c__1);
	if (pvt != k) {
	    sswap_(m, &a[pvt * a_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1);
	    i__1 = k - 1;
	    sswap_(&i__1, &f[pvt + f_dim1], ldf, &f[k + f_dim1], ldf);
	    itemp = jpvt[pvt];
	    jpvt[pvt] = jpvt[k];
	    jpvt[k] = itemp;
	    vn1[pvt] = vn1[k];
	    vn2[pvt] = vn2[k];
	}

/*        Apply previous Householder reflectors to column K: */
/*        A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)'. */

	if (k > 1) {
	    i__1 = *m - rk + 1;
	    i__2 = k - 1;
	    sgemv_("No transpose", &i__1, &i__2, &c_b7, &a[rk + a_dim1], lda, 
		    &f[k + f_dim1], ldf, &c_b8, &a[rk + k * a_dim1], &c__1, (
		    ftnlen)12);
	}

/*        Generate elementary reflector H(k). */

	if (rk < *m) {
	    i__1 = *m - rk + 1;
	    slarfg_(&i__1, &a[rk + k * a_dim1], &a[rk + 1 + k * a_dim1], &
		    c__1, &tau[k]);
	} else {
	    slarfg_(&c__1, &a[rk + k * a_dim1], &a[rk + k * a_dim1], &c__1, &
		    tau[k]);
	}

	akk = a[rk + k * a_dim1];
	a[rk + k * a_dim1] = 1.f;

/*        Compute Kth column of F: */

/*        Compute  F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)'*A(RK:M,K). */

	if (k < *n) {
	    i__1 = *m - rk + 1;
	    i__2 = *n - k;
	    sgemv_("Transpose", &i__1, &i__2, &tau[k], &a[rk + (k + 1) * 
		    a_dim1], lda, &a[rk + k * a_dim1], &c__1, &c_b15, &f[k + 
		    1 + k * f_dim1], &c__1, (ftnlen)9);
	}

/*        Padding F(1:K,K) with zeros. */

	i__1 = k;
	for (j = 1; j <= i__1; ++j) {
	    f[j + k * f_dim1] = 0.f;
/* L20: */
	}

/*        Incremental updating of F: */
/*        F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)' */
/*                    *A(RK:M,K). */

	if (k > 1) {
	    i__1 = *m - rk + 1;
	    i__2 = k - 1;
	    r__1 = -tau[k];
	    sgemv_("Transpose", &i__1, &i__2, &r__1, &a[rk + a_dim1], lda, &a[
		    rk + k * a_dim1], &c__1, &c_b15, &auxv[1], &c__1, (ftnlen)
		    9);

	    i__1 = k - 1;
	    sgemv_("No transpose", n, &i__1, &c_b8, &f[f_dim1 + 1], ldf, &
		    auxv[1], &c__1, &c_b8, &f[k * f_dim1 + 1], &c__1, (ftnlen)
		    12);
	}

/*        Update the current row of A: */
/*        A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)'. */

	if (k < *n) {
	    i__1 = *n - k;
	    sgemv_("No transpose", &i__1, &k, &c_b7, &f[k + 1 + f_dim1], ldf, 
		    &a[rk + a_dim1], lda, &c_b8, &a[rk + (k + 1) * a_dim1], 
		    lda, (ftnlen)12);
	}

/*        Update partial column norms. */

	if (rk < lastrk) {
	    i__1 = *n;
	    for (j = k + 1; j <= i__1; ++j) {
		if (vn1[j] != 0.f) {
		    temp = (r__1 = a[rk + j * a_dim1], dabs(r__1)) / vn1[j];
/* Computing MAX */
		    r__1 = 0.f, r__2 = (temp + 1.f) * (1.f - temp);
		    temp = dmax(r__1,r__2);
/* Computing 2nd power */
		    r__1 = vn1[j] / vn2[j];
		    temp2 = temp * .05f * (r__1 * r__1) + 1.f;
		    if (temp2 == 1.f) {
			vn2[j] = (real) lsticc;
			lsticc = j;
		    } else {
			vn1[j] *= sqrt(temp);
		    }
		}
/* L30: */
	    }
	}

	a[rk + k * a_dim1] = akk;

/*        End of while loop. */

	goto L10;
    }
    *kb = k;
    rk = *offset + *kb;

/*     Apply the block reflector to the rest of the matrix: */
/*     A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) - */
/*                         A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)'. */

/* Computing MIN */
    i__1 = *n, i__2 = *m - *offset;
    if (*kb < min(i__1,i__2)) {
	i__1 = *m - rk;
	i__2 = *n - *kb;
	sgemm_("No transpose", "Transpose", &i__1, &i__2, kb, &c_b7, &a[rk + 
		1 + a_dim1], lda, &f[*kb + 1 + f_dim1], ldf, &c_b8, &a[rk + 1 
		+ (*kb + 1) * a_dim1], lda, (ftnlen)12, (ftnlen)9);
    }

/*     Recomputation of difficult columns. */

L40:
    if (lsticc > 0) {
	itemp = i_nint(&vn2[lsticc]);
	i__1 = *m - rk;
	vn1[lsticc] = snrm2_(&i__1, &a[rk + 1 + lsticc * a_dim1], &c__1);
	vn2[lsticc] = vn1[lsticc];
	lsticc = itemp;
	goto L40;
    }

    return 0;

/*     End of SLAQPS */

} /* slaqps_ */

Exemplo n.º 25

0

Exibir arquivo

Arquivo: slaror.c Projeto: juanjosegarciaripoll/cblapack

/* Subroutine */ int slaror_(char *side, char *init, integer *m, integer *n, 
	real *a, integer *lda, integer *iseed, real *x, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    real r__1;

    /* Local variables */
    integer j, kbeg, jcol;
    integer irow;
    integer ixfrm, itype, nxfrm;
    real xnorm;
    real factor;
    extern doublereal slarnd_(integer *, integer *);
    real xnorms;


/*  -- LAPACK auxiliary test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLAROR pre- or post-multiplies an M by N matrix A by a random */
/*  orthogonal matrix U, overwriting A.  A may optionally be initialized */
/*  to the identity matrix before multiplying by U.  U is generated using */
/*  the method of G.W. Stewart (SIAM J. Numer. Anal. 17, 1980, 403-409). */

/*  Arguments */
/*  ========= */

/*  SIDE    (input) CHARACTER*1 */
/*          Specifies whether A is multiplied on the left or right by U. */
/*          = 'L':         Multiply A on the left (premultiply) by U */
/*          = 'R':         Multiply A on the right (postmultiply) by U' */
/*          = 'C' or 'T':  Multiply A on the left by U and the right */
/*                          by U' (Here, U' means U-transpose.) */

/*  INIT    (input) CHARACTER*1 */
/*          Specifies whether or not A should be initialized to the */
/*          identity matrix. */
/*          = 'I':  Initialize A to (a section of) the identity matrix */
/*                   before applying U. */
/*          = 'N':  No initialization.  Apply U to the input matrix A. */

/*          INIT = 'I' may be used to generate square or rectangular */
/*          orthogonal matrices: */

/*          For M = N and SIDE = 'L' or 'R', the rows will be orthogonal */
/*          to each other, as will the columns. */

/*          If M < N, SIDE = 'R' produces a dense matrix whose rows are */
/*          orthogonal and whose columns are not, while SIDE = 'L' */
/*          produces a matrix whose rows are orthogonal, and whose first */
/*          M columns are orthogonal, and whose remaining columns are */
/*          zero. */

/*          If M > N, SIDE = 'L' produces a dense matrix whose columns */
/*          are orthogonal and whose rows are not, while SIDE = 'R' */
/*          produces a matrix whose columns are orthogonal, and whose */
/*          first M rows are orthogonal, and whose remaining rows are */
/*          zero. */

/*  M       (input) INTEGER */
/*          The number of rows of A. */

/*  N       (input) INTEGER */
/*          The number of columns of A. */

/*  A       (input/output) REAL array, dimension (LDA, N) */
/*          On entry, the array A. */
/*          On exit, overwritten by U A ( if SIDE = 'L' ), */
/*           or by A U ( if SIDE = 'R' ), */
/*           or by U A U' ( if SIDE = 'C' or 'T'). */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,M). */

/*  ISEED   (input/output) INTEGER array, dimension (4) */
/*          On entry ISEED specifies the seed of the random number */
/*          generator. The array elements should be between 0 and 4095; */
/*          if not they will be reduced mod 4096.  Also, ISEED(4) must */
/*          be odd.  The random number generator uses a linear */
/*          congruential sequence limited to small integers, and so */
/*          should produce machine independent random numbers. The */
/*          values of ISEED are changed on exit, and can be used in the */
/*          next call to SLAROR to continue the same random number */
/*          sequence. */

/*  X       (workspace) REAL array, dimension (3*MAX( M, N )) */
/*          Workspace of length */
/*              2*M + N if SIDE = 'L', */
/*              2*N + M if SIDE = 'R', */
/*              3*N     if SIDE = 'C' or 'T'. */

/*  INFO    (output) INTEGER */
/*          An error flag.  It is set to: */
/*          = 0:  normal return */
/*          < 0:  if INFO = -k, the k-th argument had an illegal value */
/*          = 1:  if the random numbers generated by SLARND are bad. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --iseed;
    --x;

    /* Function Body */
    if (*n == 0 || *m == 0) {
	return 0;
    }

    itype = 0;
    if (lsame_(side, "L")) {
	itype = 1;
    } else if (lsame_(side, "R")) {
	itype = 2;
    } else if (lsame_(side, "C") || lsame_(side, "T")) {
	itype = 3;
    }

/*     Check for argument errors. */

    *info = 0;
    if (itype == 0) {
	*info = -1;
    } else if (*m < 0) {
	*info = -3;
    } else if (*n < 0 || (itype == 3 && *n != *m)) {
	*info = -4;
    } else if (*lda < *m) {
	*info = -6;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLAROR", &i__1);
	return 0;
    }

    if (itype == 1) {
	nxfrm = *m;
    } else {
	nxfrm = *n;
    }

/*     Initialize A to the identity matrix if desired */

    if (lsame_(init, "I")) {
	slaset_("Full", m, n, &c_b9, &c_b10, &a[a_offset], lda);
    }

/*     If no rotation possible, multiply by random +/-1 */

/*     Compute rotation by computing Householder transformations */
/*     H(2), H(3), ..., H(nhouse) */

    i__1 = nxfrm;
    for (j = 1; j <= i__1; ++j) {
	x[j] = 0.f;
/* L10: */
    }

    i__1 = nxfrm;
    for (ixfrm = 2; ixfrm <= i__1; ++ixfrm) {
	kbeg = nxfrm - ixfrm + 1;

/*        Generate independent normal( 0, 1 ) random numbers */

	i__2 = nxfrm;
	for (j = kbeg; j <= i__2; ++j) {
	    x[j] = slarnd_(&c__3, &iseed[1]);
/* L20: */
	}

/*        Generate a Householder transformation from the random vector X */

	xnorm = snrm2_(&ixfrm, &x[kbeg], &c__1);
	xnorms = r_sign(&xnorm, &x[kbeg]);
	r__1 = -x[kbeg];
	x[kbeg + nxfrm] = r_sign(&c_b10, &r__1);
	factor = xnorms * (xnorms + x[kbeg]);
	if (dabs(factor) < 1e-20f) {
	    *info = 1;
	    xerbla_("SLAROR", info);
	    return 0;
	} else {
	    factor = 1.f / factor;
	}
	x[kbeg] += xnorms;

/*        Apply Householder transformation to A */

	if (itype == 1 || itype == 3) {

/*           Apply H(k) from the left. */

	    sgemv_("T", &ixfrm, n, &c_b10, &a[kbeg + a_dim1], lda, &x[kbeg], &
		    c__1, &c_b9, &x[(nxfrm << 1) + 1], &c__1);
	    r__1 = -factor;
	    sger_(&ixfrm, n, &r__1, &x[kbeg], &c__1, &x[(nxfrm << 1) + 1], &
		    c__1, &a[kbeg + a_dim1], lda);

	}

	if (itype == 2 || itype == 3) {

/*           Apply H(k) from the right. */

	    sgemv_("N", m, &ixfrm, &c_b10, &a[kbeg * a_dim1 + 1], lda, &x[
		    kbeg], &c__1, &c_b9, &x[(nxfrm << 1) + 1], &c__1);
	    r__1 = -factor;
	    sger_(m, &ixfrm, &r__1, &x[(nxfrm << 1) + 1], &c__1, &x[kbeg], &
		    c__1, &a[kbeg * a_dim1 + 1], lda);

	}
/* L30: */
    }

    r__1 = slarnd_(&c__3, &iseed[1]);
    x[nxfrm * 2] = r_sign(&c_b10, &r__1);

/*     Scale the matrix A by D. */

    if (itype == 1 || itype == 3) {
	i__1 = *m;
	for (irow = 1; irow <= i__1; ++irow) {
	    sscal_(n, &x[nxfrm + irow], &a[irow + a_dim1], lda);
/* L40: */
	}
    }

    if (itype == 2 || itype == 3) {
	i__1 = *n;
	for (jcol = 1; jcol <= i__1; ++jcol) {
	    sscal_(m, &x[nxfrm + jcol], &a[jcol * a_dim1 + 1], &c__1);
/* L50: */
	}
    }
    return 0;

/*     End of SLAROR */

} /* slaror_ */