Ejemplo n.º 1
0
/* Subroutine */ int dtrmm_(char *side, char *uplo, char *transa, char *diag, 
	integer *m, integer *n, doublereal *alpha, doublereal *a, integer *
	lda, doublereal *b, integer *ldb)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
    /* Local variables */
    static integer info;
    static doublereal temp;
    static integer i__, j, k;
    static logical lside;
    extern logical lsame_(char *, char *);
    static integer nrowa;
    static logical upper;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static logical nounit;
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
/*  Purpose   
    =======   
    DTRMM  performs one of the matrix-matrix operations   
       B := alpha*op( A )*B,   or   B := alpha*B*op( A ),   
    where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or   
    non-unit,  upper or lower triangular matrix  and  op( A )  is one  of   
       op( A ) = A   or   op( A ) = A'.   
    Parameters   
    ==========   
    SIDE   - CHARACTER*1.   
             On entry,  SIDE specifies whether  op( A ) multiplies B from   
             the left or right as follows:   
                SIDE = 'L' or 'l'   B := alpha*op( A )*B.   
                SIDE = 'R' or 'r'   B := alpha*B*op( A ).   
             Unchanged on exit.   
    UPLO   - CHARACTER*1.   
             On entry, UPLO specifies whether the matrix A is an upper or   
             lower triangular matrix as follows:   
                UPLO = 'U' or 'u'   A is an upper triangular matrix.   
                UPLO = 'L' or 'l'   A is a lower triangular matrix.   
             Unchanged on exit.   
    TRANSA - CHARACTER*1.   
             On entry, TRANSA specifies the form of op( A ) to be used in   
             the matrix multiplication as follows:   
                TRANSA = 'N' or 'n'   op( A ) = A.   
                TRANSA = 'T' or 't'   op( A ) = A'.   
                TRANSA = 'C' or 'c'   op( A ) = A'.   
             Unchanged on exit.   
    DIAG   - CHARACTER*1.   
             On entry, DIAG specifies whether or not A is unit triangular   
             as follows:   
                DIAG = 'U' or 'u'   A is assumed to be unit triangular.   
                DIAG = 'N' or 'n'   A is not assumed to be unit   
                                    triangular.   
             Unchanged on exit.   
    M      - INTEGER.   
             On entry, M specifies the number of rows of B. M must be at   
             least zero.   
             Unchanged on exit.   
    N      - INTEGER.   
             On entry, N specifies the number of columns of B.  N must be   
             at least zero.   
             Unchanged on exit.   
    ALPHA  - DOUBLE PRECISION.   
             On entry,  ALPHA specifies the scalar  alpha. When  alpha is   
             zero then  A is not referenced and  B need not be set before   
             entry.   
             Unchanged on exit.   
    A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m   
             when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.   
             Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k   
             upper triangular part of the array  A must contain the upper   
             triangular matrix  and the strictly lower triangular part of   
             A is not referenced.   
             Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k   
             lower triangular part of the array  A must contain the lower   
             triangular matrix  and the strictly upper triangular part of   
             A is not referenced.   
             Note that when  DIAG = 'U' or 'u',  the diagonal elements of   
             A  are not referenced either,  but are assumed to be  unity.   
             Unchanged on exit.   
    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared   
             in the calling (sub) program.  When  SIDE = 'L' or 'l'  then   
             LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'   
             then LDA must be at least max( 1, n ).   
             Unchanged on exit.   
    B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).   
             Before entry,  the leading  m by n part of the array  B must   
             contain the matrix  B,  and  on exit  is overwritten  by the   
             transformed matrix.   
    LDB    - INTEGER.   
             On entry, LDB specifies the first dimension of B as declared   
             in  the  calling  (sub)  program.   LDB  must  be  at  least   
             max( 1, m ).   
             Unchanged on exit.   
    Level 3 Blas routine.   
    -- Written on 8-February-1989.   
       Jack Dongarra, Argonne National Laboratory.   
       Iain Duff, AERE Harwell.   
       Jeremy Du Croz, Numerical Algorithms Group Ltd.   
       Sven Hammarling, Numerical Algorithms Group Ltd.   
       Test the input parameters.   
       Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    /* Function Body */
    lside = lsame_(side, "L");
    if (lside) {
	nrowa = *m;
    } else {
	nrowa = *n;
    }
    nounit = lsame_(diag, "N");
    upper = lsame_(uplo, "U");
    info = 0;
    if (! lside && ! lsame_(side, "R")) {
	info = 1;
    } else if (! upper && ! lsame_(uplo, "L")) {
	info = 2;
    } else if (! lsame_(transa, "N") && ! lsame_(transa,
	     "T") && ! lsame_(transa, "C")) {
	info = 3;
    } else if (! lsame_(diag, "U") && ! lsame_(diag, 
	    "N")) {
	info = 4;
    } else if (*m < 0) {
	info = 5;
    } else if (*n < 0) {
	info = 6;
    } else if (*lda < max(1,nrowa)) {
	info = 9;
    } else if (*ldb < max(1,*m)) {
	info = 11;
    }
    if (info != 0) {
	xerbla_("DTRMM ", &info);
	return 0;
    }
/*     Quick return if possible. */
    if (*n == 0) {
	return 0;
    }
/*     And when  alpha.eq.zero. */
    if (*alpha == 0.) {
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    i__2 = *m;
	    for (i__ = 1; i__ <= i__2; ++i__) {
		b_ref(i__, j) = 0.;
/* L10: */
	    }
/* L20: */
	}
	return 0;
    }
/*     Start the operations. */
    if (lside) {
	if (lsame_(transa, "N")) {
/*           Form  B := alpha*A*B. */
	    if (upper) {
		i__1 = *n;
		for (j = 1; j <= i__1; ++j) {
		    i__2 = *m;
		    for (k = 1; k <= i__2; ++k) {
			if (b_ref(k, j) != 0.) {
			    temp = *alpha * b_ref(k, j);
			    i__3 = k - 1;
			    for (i__ = 1; i__ <= i__3; ++i__) {
				b_ref(i__, j) = b_ref(i__, j) + temp * a_ref(
					i__, k);
/* L30: */
			    }
			    if (nounit) {
				temp *= a_ref(k, k);
			    }
			    b_ref(k, j) = temp;
			}
/* L40: */
		    }
/* L50: */
		}
	    } else {
		i__1 = *n;
		for (j = 1; j <= i__1; ++j) {
		    for (k = *m; k >= 1; --k) {
			if (b_ref(k, j) != 0.) {
			    temp = *alpha * b_ref(k, j);
			    b_ref(k, j) = temp;
			    if (nounit) {
				b_ref(k, j) = b_ref(k, j) * a_ref(k, k);
			    }
			    i__2 = *m;
			    for (i__ = k + 1; i__ <= i__2; ++i__) {
				b_ref(i__, j) = b_ref(i__, j) + temp * a_ref(
					i__, k);
/* L60: */
			    }
			}
/* L70: */
		    }
/* L80: */
		}
	    }
	} else {
/*           Form  B := alpha*A'*B. */
	    if (upper) {
		i__1 = *n;
		for (j = 1; j <= i__1; ++j) {
		    for (i__ = *m; i__ >= 1; --i__) {
			temp = b_ref(i__, j);
			if (nounit) {
			    temp *= a_ref(i__, i__);
			}
			i__2 = i__ - 1;
			for (k = 1; k <= i__2; ++k) {
			    temp += a_ref(k, i__) * b_ref(k, j);
/* L90: */
			}
			b_ref(i__, j) = *alpha * temp;
/* L100: */
		    }
/* L110: */
		}
	    } else {
		i__1 = *n;
		for (j = 1; j <= i__1; ++j) {
		    i__2 = *m;
		    for (i__ = 1; i__ <= i__2; ++i__) {
			temp = b_ref(i__, j);
			if (nounit) {
			    temp *= a_ref(i__, i__);
			}
			i__3 = *m;
			for (k = i__ + 1; k <= i__3; ++k) {
			    temp += a_ref(k, i__) * b_ref(k, j);
/* L120: */
			}
			b_ref(i__, j) = *alpha * temp;
/* L130: */
		    }
/* L140: */
		}
	    }
	}
    } else {
	if (lsame_(transa, "N")) {
/*           Form  B := alpha*B*A. */
	    if (upper) {
		for (j = *n; j >= 1; --j) {
		    temp = *alpha;
		    if (nounit) {
			temp *= a_ref(j, j);
		    }
		    i__1 = *m;
		    for (i__ = 1; i__ <= i__1; ++i__) {
			b_ref(i__, j) = temp * b_ref(i__, j);
/* L150: */
		    }
		    i__1 = j - 1;
		    for (k = 1; k <= i__1; ++k) {
			if (a_ref(k, j) != 0.) {
			    temp = *alpha * a_ref(k, j);
			    i__2 = *m;
			    for (i__ = 1; i__ <= i__2; ++i__) {
				b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
					i__, k);
/* L160: */
			    }
			}
/* L170: */
		    }
/* L180: */
		}
	    } else {
		i__1 = *n;
		for (j = 1; j <= i__1; ++j) {
		    temp = *alpha;
		    if (nounit) {
			temp *= a_ref(j, j);
		    }
		    i__2 = *m;
		    for (i__ = 1; i__ <= i__2; ++i__) {
			b_ref(i__, j) = temp * b_ref(i__, j);
/* L190: */
		    }
		    i__2 = *n;
		    for (k = j + 1; k <= i__2; ++k) {
			if (a_ref(k, j) != 0.) {
			    temp = *alpha * a_ref(k, j);
			    i__3 = *m;
			    for (i__ = 1; i__ <= i__3; ++i__) {
				b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
					i__, k);
/* L200: */
			    }
			}
/* L210: */
		    }
/* L220: */
		}
	    }
	} else {
/*           Form  B := alpha*B*A'. */
	    if (upper) {
		i__1 = *n;
		for (k = 1; k <= i__1; ++k) {
		    i__2 = k - 1;
		    for (j = 1; j <= i__2; ++j) {
			if (a_ref(j, k) != 0.) {
			    temp = *alpha * a_ref(j, k);
			    i__3 = *m;
			    for (i__ = 1; i__ <= i__3; ++i__) {
				b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
					i__, k);
/* L230: */
			    }
			}
/* L240: */
		    }
		    temp = *alpha;
		    if (nounit) {
			temp *= a_ref(k, k);
		    }
		    if (temp != 1.) {
			i__2 = *m;
			for (i__ = 1; i__ <= i__2; ++i__) {
			    b_ref(i__, k) = temp * b_ref(i__, k);
/* L250: */
			}
		    }
/* L260: */
		}
	    } else {
		for (k = *n; k >= 1; --k) {
		    i__1 = *n;
		    for (j = k + 1; j <= i__1; ++j) {
			if (a_ref(j, k) != 0.) {
			    temp = *alpha * a_ref(j, k);
			    i__2 = *m;
			    for (i__ = 1; i__ <= i__2; ++i__) {
				b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
					i__, k);
/* L270: */
			    }
			}
/* L280: */
		    }
		    temp = *alpha;
		    if (nounit) {
			temp *= a_ref(k, k);
		    }
		    if (temp != 1.) {
			i__1 = *m;
			for (i__ = 1; i__ <= i__1; ++i__) {
			    b_ref(i__, k) = temp * b_ref(i__, k);
/* L290: */
			}
		    }
/* L300: */
		}
	    }
	}
    }
    return 0;
/*     End of DTRMM . */
} /* dtrmm_ */
Ejemplo n.º 2
0
/* Subroutine */ int dlagv2_(doublereal *a, integer *lda, doublereal *b, 
	integer *ldb, doublereal *alphar, doublereal *alphai, doublereal *
	beta, doublereal *csl, doublereal *snl, doublereal *csr, doublereal *
	snr)
{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    DLAGV2 computes the Generalized Schur factorization of a real 2-by-2   
    matrix pencil (A,B) where B is upper triangular. This routine   
    computes orthogonal (rotation) matrices given by CSL, SNL and CSR,   
    SNR such that   

    1) if the pencil (A,B) has two real eigenvalues (include 0/0 or 1/0   
       types), then   

       [ a11 a12 ] := [  CSL  SNL ] [ a11 a12 ] [  CSR -SNR ]   
       [  0  a22 ]    [ -SNL  CSL ] [ a21 a22 ] [  SNR  CSR ]   

       [ b11 b12 ] := [  CSL  SNL ] [ b11 b12 ] [  CSR -SNR ]   
       [  0  b22 ]    [ -SNL  CSL ] [  0  b22 ] [  SNR  CSR ],   

    2) if the pencil (A,B) has a pair of complex conjugate eigenvalues,   
       then   

       [ a11 a12 ] := [  CSL  SNL ] [ a11 a12 ] [  CSR -SNR ]   
       [ a21 a22 ]    [ -SNL  CSL ] [ a21 a22 ] [  SNR  CSR ]   

       [ b11  0  ] := [  CSL  SNL ] [ b11 b12 ] [  CSR -SNR ]   
       [  0  b22 ]    [ -SNL  CSL ] [  0  b22 ] [  SNR  CSR ]   

       where b11 >= b22 > 0.   


    Arguments   
    =========   

    A       (input/output) DOUBLE PRECISION array, dimension (LDA, 2)   
            On entry, the 2 x 2 matrix A.   
            On exit, A is overwritten by the ``A-part'' of the   
            generalized Schur form.   

    LDA     (input) INTEGER   
            THe leading dimension of the array A.  LDA >= 2.   

    B       (input/output) DOUBLE PRECISION array, dimension (LDB, 2)   
            On entry, the upper triangular 2 x 2 matrix B.   
            On exit, B is overwritten by the ``B-part'' of the   
            generalized Schur form.   

    LDB     (input) INTEGER   
            THe leading dimension of the array B.  LDB >= 2.   

    ALPHAR  (output) DOUBLE PRECISION array, dimension (2)   
    ALPHAI  (output) DOUBLE PRECISION array, dimension (2)   
    BETA    (output) DOUBLE PRECISION array, dimension (2)   
            (ALPHAR(k)+i*ALPHAI(k))/BETA(k) are the eigenvalues of the   
            pencil (A,B), k=1,2, i = sqrt(-1).  Note that BETA(k) may   
            be zero.   

    CSL     (output) DOUBLE PRECISION   
            The cosine of the left rotation matrix.   

    SNL     (output) DOUBLE PRECISION   
            The sine of the left rotation matrix.   

    CSR     (output) DOUBLE PRECISION   
            The cosine of the right rotation matrix.   

    SNR     (output) DOUBLE PRECISION   
            The sine of the right rotation matrix.   

    Further Details   
    ===============   

    Based on contributions by   
       Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA   

    =====================================================================   


       Parameter adjustments */
    /* Table of constant values */
    static integer c__2 = 2;
    static integer c__1 = 1;
    
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset;
    doublereal d__1, d__2, d__3, d__4, d__5, d__6;
    /* Local variables */
    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, 
	    doublereal *, integer *, doublereal *, doublereal *), dlag2_(
	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
	    doublereal *, doublereal *, doublereal *, doublereal *, 
	    doublereal *);
    static doublereal r__, t, anorm, bnorm, h1, h2, h3, scale1, scale2;
    extern /* Subroutine */ int dlasv2_(doublereal *, doublereal *, 
	    doublereal *, doublereal *, doublereal *, doublereal *, 
	    doublereal *, doublereal *, doublereal *);
    extern doublereal dlapy2_(doublereal *, doublereal *);
    static doublereal ascale, bscale;
    extern doublereal dlamch_(char *);
    static doublereal wi, qq, rr, safmin;
    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, 
	    doublereal *, doublereal *, doublereal *);
    static doublereal wr1, wr2, ulp;
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --alphar;
    --alphai;
    --beta;

    /* Function Body */
    safmin = dlamch_("S");
    ulp = dlamch_("P");

/*     Scale A   

   Computing MAX */
    d__5 = (d__1 = a_ref(1, 1), abs(d__1)) + (d__2 = a_ref(2, 1), abs(d__2)), 
	    d__6 = (d__3 = a_ref(1, 2), abs(d__3)) + (d__4 = a_ref(2, 2), abs(
	    d__4)), d__5 = max(d__5,d__6);
    anorm = max(d__5,safmin);
    ascale = 1. / anorm;
    a_ref(1, 1) = ascale * a_ref(1, 1);
    a_ref(1, 2) = ascale * a_ref(1, 2);
    a_ref(2, 1) = ascale * a_ref(2, 1);
    a_ref(2, 2) = ascale * a_ref(2, 2);

/*     Scale B   

   Computing MAX */
    d__4 = (d__3 = b_ref(1, 1), abs(d__3)), d__5 = (d__1 = b_ref(1, 2), abs(
	    d__1)) + (d__2 = b_ref(2, 2), abs(d__2)), d__4 = max(d__4,d__5);
    bnorm = max(d__4,safmin);
    bscale = 1. / bnorm;
    b_ref(1, 1) = bscale * b_ref(1, 1);
    b_ref(1, 2) = bscale * b_ref(1, 2);
    b_ref(2, 2) = bscale * b_ref(2, 2);

/*     Check if A can be deflated */

    if ((d__1 = a_ref(2, 1), abs(d__1)) <= ulp) {
	*csl = 1.;
	*snl = 0.;
	*csr = 1.;
	*snr = 0.;
	a_ref(2, 1) = 0.;
	b_ref(2, 1) = 0.;

/*     Check if B is singular */

    } else if ((d__1 = b_ref(1, 1), abs(d__1)) <= ulp) {
	dlartg_(&a_ref(1, 1), &a_ref(2, 1), csl, snl, &r__);
	*csr = 1.;
	*snr = 0.;
	drot_(&c__2, &a_ref(1, 1), lda, &a_ref(2, 1), lda, csl, snl);
	drot_(&c__2, &b_ref(1, 1), ldb, &b_ref(2, 1), ldb, csl, snl);
	a_ref(2, 1) = 0.;
	b_ref(1, 1) = 0.;
	b_ref(2, 1) = 0.;

    } else if ((d__1 = b_ref(2, 2), abs(d__1)) <= ulp) {
	dlartg_(&a_ref(2, 2), &a_ref(2, 1), csr, snr, &t);
	*snr = -(*snr);
	drot_(&c__2, &a_ref(1, 1), &c__1, &a_ref(1, 2), &c__1, csr, snr);
	drot_(&c__2, &b_ref(1, 1), &c__1, &b_ref(1, 2), &c__1, csr, snr);
	*csl = 1.;
	*snl = 0.;
	a_ref(2, 1) = 0.;
	b_ref(2, 1) = 0.;
	b_ref(2, 2) = 0.;

    } else {

/*        B is nonsingular, first compute the eigenvalues of (A,B) */

	dlag2_(&a[a_offset], lda, &b[b_offset], ldb, &safmin, &scale1, &
		scale2, &wr1, &wr2, &wi);

	if (wi == 0.) {

/*           two real eigenvalues, compute s*A-w*B */

	    h1 = scale1 * a_ref(1, 1) - wr1 * b_ref(1, 1);
	    h2 = scale1 * a_ref(1, 2) - wr1 * b_ref(1, 2);
	    h3 = scale1 * a_ref(2, 2) - wr1 * b_ref(2, 2);

	    rr = dlapy2_(&h1, &h2);
	    d__1 = scale1 * a_ref(2, 1);
	    qq = dlapy2_(&d__1, &h3);

	    if (rr > qq) {

/*              find right rotation matrix to zero 1,1 element of   
                (sA - wB) */

		dlartg_(&h2, &h1, csr, snr, &t);

	    } else {

/*              find right rotation matrix to zero 2,1 element of   
                (sA - wB) */

		d__1 = scale1 * a_ref(2, 1);
		dlartg_(&h3, &d__1, csr, snr, &t);

	    }

	    *snr = -(*snr);
	    drot_(&c__2, &a_ref(1, 1), &c__1, &a_ref(1, 2), &c__1, csr, snr);
	    drot_(&c__2, &b_ref(1, 1), &c__1, &b_ref(1, 2), &c__1, csr, snr);

/*           compute inf norms of A and B   

   Computing MAX */
	    d__5 = (d__1 = a_ref(1, 1), abs(d__1)) + (d__2 = a_ref(1, 2), abs(
		    d__2)), d__6 = (d__3 = a_ref(2, 1), abs(d__3)) + (d__4 = 
		    a_ref(2, 2), abs(d__4));
	    h1 = max(d__5,d__6);
/* Computing MAX */
	    d__5 = (d__1 = b_ref(1, 1), abs(d__1)) + (d__2 = b_ref(1, 2), abs(
		    d__2)), d__6 = (d__3 = b_ref(2, 1), abs(d__3)) + (d__4 = 
		    b_ref(2, 2), abs(d__4));
	    h2 = max(d__5,d__6);

	    if (scale1 * h1 >= abs(wr1) * h2) {

/*              find left rotation matrix Q to zero out B(2,1) */

		dlartg_(&b_ref(1, 1), &b_ref(2, 1), csl, snl, &r__);

	    } else {

/*              find left rotation matrix Q to zero out A(2,1) */

		dlartg_(&a_ref(1, 1), &a_ref(2, 1), csl, snl, &r__);

	    }

	    drot_(&c__2, &a_ref(1, 1), lda, &a_ref(2, 1), lda, csl, snl);
	    drot_(&c__2, &b_ref(1, 1), ldb, &b_ref(2, 1), ldb, csl, snl);

	    a_ref(2, 1) = 0.;
	    b_ref(2, 1) = 0.;

	} else {

/*           a pair of complex conjugate eigenvalues   
             first compute the SVD of the matrix B */

	    dlasv2_(&b_ref(1, 1), &b_ref(1, 2), &b_ref(2, 2), &r__, &t, snr, 
		    csr, snl, csl);

/*           Form (A,B) := Q(A,B)Z' where Q is left rotation matrix and   
             Z is right rotation matrix computed from DLASV2 */

	    drot_(&c__2, &a_ref(1, 1), lda, &a_ref(2, 1), lda, csl, snl);
	    drot_(&c__2, &b_ref(1, 1), ldb, &b_ref(2, 1), ldb, csl, snl);
	    drot_(&c__2, &a_ref(1, 1), &c__1, &a_ref(1, 2), &c__1, csr, snr);
	    drot_(&c__2, &b_ref(1, 1), &c__1, &b_ref(1, 2), &c__1, csr, snr);

	    b_ref(2, 1) = 0.;
	    b_ref(1, 2) = 0.;

	}

    }

/*     Unscaling */

    a_ref(1, 1) = anorm * a_ref(1, 1);
    a_ref(2, 1) = anorm * a_ref(2, 1);
    a_ref(1, 2) = anorm * a_ref(1, 2);
    a_ref(2, 2) = anorm * a_ref(2, 2);
    b_ref(1, 1) = bnorm * b_ref(1, 1);
    b_ref(2, 1) = bnorm * b_ref(2, 1);
    b_ref(1, 2) = bnorm * b_ref(1, 2);
    b_ref(2, 2) = bnorm * b_ref(2, 2);

    if (wi == 0.) {
	alphar[1] = a_ref(1, 1);
	alphar[2] = a_ref(2, 2);
	alphai[1] = 0.;
	alphai[2] = 0.;
	beta[1] = b_ref(1, 1);
	beta[2] = b_ref(2, 2);
    } else {
	alphar[1] = anorm * wr1 / scale1 / bnorm;
	alphai[1] = anorm * wi / scale1 / bnorm;
	alphar[2] = alphar[1];
	alphai[2] = -alphai[1];
	beta[1] = 1.;
	beta[2] = 1.;
    }

/* L10: */

    return 0;

/*     End of DLAGV2 */

} /* dlagv2_ */
Ejemplo n.º 3
0
/* Subroutine */ int sgelsd_(integer *m, integer *n, integer *nrhs, real *a, 
	integer *lda, real *b, integer *ldb, real *s, real *rcond, integer *
	rank, real *work, integer *lwork, integer *iwork, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;

    /* Builtin functions */
    double log(doublereal);

    /* Local variables */
    static real anrm, bnrm;
    static integer itau, nlvl, iascl, ibscl;
    static real sfmin;
    static integer minmn, maxmn, itaup, itauq, mnthr, nwork, ie, il;
    extern /* Subroutine */ int slabad_(real *, real *);
    static integer mm;
    extern /* Subroutine */ int sgebrd_(integer *, integer *, real *, integer 
	    *, real *, real *, real *, real *, real *, integer *, integer *);
    extern doublereal slamch_(char *), slange_(char *, integer *, 
	    integer *, real *, integer *, real *);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    static real bignum;
    extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer 
	    *, real *, real *, integer *, integer *), slalsd_(char *, integer 
	    *, integer *, integer *, real *, real *, real *, integer *, real *
	    , integer *, real *, integer *, integer *), slascl_(char *
	    , integer *, integer *, real *, real *, integer *, integer *, 
	    real *, integer *, integer *);
    static integer wlalsd;
    extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer 
	    *, real *, real *, integer *, integer *), slacpy_(char *, integer 
	    *, integer *, real *, integer *, real *, integer *), 
	    slaset_(char *, integer *, integer *, real *, real *, real *, 
	    integer *);
    static integer ldwork;
    extern /* Subroutine */ int sormbr_(char *, char *, char *, integer *, 
	    integer *, integer *, real *, integer *, real *, real *, integer *
	    , real *, integer *, integer *);
    static integer minwrk, maxwrk;
    static real smlnum;
    extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *, 
	    integer *, real *, integer *, real *, real *, integer *, real *, 
	    integer *, integer *);
    static logical lquery;
    static integer smlsiz;
    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
	    integer *, real *, integer *, real *, real *, integer *, real *, 
	    integer *, integer *);
    static real eps;


#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]


/*  -- LAPACK driver routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       October 31, 1999   


    Purpose   
    =======   

    SGELSD computes the minimum-norm solution to a real linear least   
    squares problem:   
        minimize 2-norm(| b - A*x |)   
    using the singular value decomposition (SVD) of A. A is an M-by-N   
    matrix which may be rank-deficient.   

    Several right hand side vectors b and solution vectors x can be   
    handled in a single call; they are stored as the columns of the   
    M-by-NRHS right hand side matrix B and the N-by-NRHS solution   
    matrix X.   

    The problem is solved in three steps:   
    (1) Reduce the coefficient matrix A to bidiagonal form with   
        Householder transformations, reducing the original problem   
        into a "bidiagonal least squares problem" (BLS)   
    (2) Solve the BLS using a divide and conquer approach.   
    (3) Apply back all the Householder tranformations to solve   
        the original least squares problem.   

    The effective rank of A is determined by treating as zero those   
    singular values which are less than RCOND times the largest singular   
    value.   

    The divide and conquer algorithm makes very mild assumptions about   
    floating point arithmetic. It will work on machines with a guard   
    digit in add/subtract, or on those binary machines without guard   
    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or   
    Cray-2. It could conceivably fail on hexadecimal or decimal machines   
    without guard digits, but we know of none.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of A. M >= 0.   

    N       (input) INTEGER   
            The number of columns of A. N >= 0.   

    NRHS    (input) INTEGER   
            The number of right hand sides, i.e., the number of columns   
            of the matrices B and X. NRHS >= 0.   

    A       (input) REAL array, dimension (LDA,N)   
            On entry, the M-by-N matrix A.   
            On exit, A has been destroyed.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,M).   

    B       (input/output) REAL array, dimension (LDB,NRHS)   
            On entry, the M-by-NRHS right hand side matrix B.   
            On exit, B is overwritten by the N-by-NRHS solution   
            matrix X.  If m >= n and RANK = n, the residual   
            sum-of-squares for the solution in the i-th column is given   
            by the sum of squares of elements n+1:m in that column.   

    LDB     (input) INTEGER   
            The leading dimension of the array B. LDB >= max(1,max(M,N)).   

    S       (output) REAL array, dimension (min(M,N))   
            The singular values of A in decreasing order.   
            The condition number of A in the 2-norm = S(1)/S(min(m,n)).   

    RCOND   (input) REAL   
            RCOND is used to determine the effective rank of A.   
            Singular values S(i) <= RCOND*S(1) are treated as zero.   
            If RCOND < 0, machine precision is used instead.   

    RANK    (output) INTEGER   
            The effective rank of A, i.e., the number of singular values   
            which are greater than RCOND*S(1).   

    WORK    (workspace/output) REAL array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The dimension of the array WORK. LWORK must be at least 1.   
            The exact minimum amount of workspace needed depends on M,   
            N and NRHS. As long as LWORK is at least   
                12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2,   
            if M is greater than or equal to N or   
                12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2,   
            if M is less than N, the code will execute correctly.   
            SMLSIZ is returned by ILAENV and is equal to the maximum   
            size of the subproblems at the bottom of the computation   
            tree (usually about 25), and   
               NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )   
            For good performance, LWORK should generally be larger.   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    IWORK   (workspace) INTEGER array, dimension (LIWORK)   
            LIWORK >= 3 * MINMN * NLVL + 11 * MINMN,   
            where MINMN = MIN( M,N ).   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   
            > 0:  the algorithm for computing the SVD failed to converge;   
                  if INFO = i, i off-diagonal elements of an intermediate   
                  bidiagonal form did not converge to zero.   

    Further Details   
    ===============   

    Based on contributions by   
       Ming Gu and Ren-Cang Li, Computer Science Division, University of   
         California at Berkeley, USA   
       Osni Marques, LBNL/NERSC, USA   

    =====================================================================   


       Test the input arguments.   

       Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --s;
    --work;
    --iwork;

    /* Function Body */
    *info = 0;
    minmn = min(*m,*n);
    maxmn = max(*m,*n);
    mnthr = ilaenv_(&c__6, "SGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, (
	    ftnlen)1);
    lquery = *lwork == -1;
    if (*m < 0) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*nrhs < 0) {
	*info = -3;
    } else if (*lda < max(1,*m)) {
	*info = -5;
    } else if (*ldb < max(1,maxmn)) {
	*info = -7;
    }

    smlsiz = ilaenv_(&c__9, "SGELSD", " ", &c__0, &c__0, &c__0, &c__0, (
	    ftnlen)6, (ftnlen)1);

/*     Compute workspace.   
       (Note: Comments in the code beginning "Workspace:" describe the   
       minimal amount of workspace needed at that point in the code,   
       as well as the preferred amount for good performance.   
       NB refers to the optimal block size for the immediately   
       following subroutine, as returned by ILAENV.) */

    minwrk = 1;
    minmn = max(1,minmn);
/* Computing MAX */
    i__1 = (integer) (log((real) minmn / (real) (smlsiz + 1)) / log(2.f)) + 1;
    nlvl = max(i__1,0);

    if (*info == 0) {
	maxwrk = 0;
	mm = *m;
	if (*m >= *n && *m >= mnthr) {

/*           Path 1a - overdetermined, with many more rows than columns. */

	    mm = *n;
/* Computing MAX */
	    i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, 
		    n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
	    maxwrk = max(i__1,i__2);
/* Computing MAX */
	    i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, "SORMQR", "LT", 
		    m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2);
	    maxwrk = max(i__1,i__2);
	}
	if (*m >= *n) {

/*           Path 1 - overdetermined or exactly determined.   

   Computing MAX */
	    i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, "SGEBRD"
		    , " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
	    maxwrk = max(i__1,i__2);
/* Computing MAX */
	    i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, "SORMBR", 
		    "QLT", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
	    maxwrk = max(i__1,i__2);
/* Computing MAX */
	    i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, "SORMBR",
		     "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
	    maxwrk = max(i__1,i__2);
/* Computing 2nd power */
	    i__1 = smlsiz + 1;
	    wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * *
		    nrhs + i__1 * i__1;
/* Computing MAX */
	    i__1 = maxwrk, i__2 = *n * 3 + wlalsd;
	    maxwrk = max(i__1,i__2);
/* Computing MAX */
	    i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1,i__2), 
		    i__2 = *n * 3 + wlalsd;
	    minwrk = max(i__1,i__2);
	}
	if (*n > *m) {
/* Computing 2nd power */
	    i__1 = smlsiz + 1;
	    wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * *
		    nrhs + i__1 * i__1;
	    if (*n >= mnthr) {

/*              Path 2a - underdetermined, with many more columns   
                than rows. */

		maxwrk = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &c_n1, 
			&c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * 
			ilaenv_(&c__1, "SGEBRD", " ", m, m, &c_n1, &c_n1, (
			ftnlen)6, (ftnlen)1);
		maxwrk = max(i__1,i__2);
/* Computing MAX */
		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * ilaenv_(&
			c__1, "SORMBR", "QLT", m, nrhs, m, &c_n1, (ftnlen)6, (
			ftnlen)3);
		maxwrk = max(i__1,i__2);
/* Computing MAX */
		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * 
			ilaenv_(&c__1, "SORMBR", "PLN", m, nrhs, m, &c_n1, (
			ftnlen)6, (ftnlen)3);
		maxwrk = max(i__1,i__2);
		if (*nrhs > 1) {
/* Computing MAX */
		    i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
		    maxwrk = max(i__1,i__2);
		} else {
/* Computing MAX */
		    i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
		    maxwrk = max(i__1,i__2);
		}
/* Computing MAX */
		i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, "SORMLQ", 
			"LT", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)2);
		maxwrk = max(i__1,i__2);
/* Computing MAX */
		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd;
		maxwrk = max(i__1,i__2);
	    } else {

/*              Path 2 - remaining underdetermined cases. */

		maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, "SGEBRD", " ", m,
			 n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
		i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, "SORMBR"
			, "QLT", m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
		maxwrk = max(i__1,i__2);
/* Computing MAX */
		i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR", 
			"PLN", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)3);
		maxwrk = max(i__1,i__2);
/* Computing MAX */
		i__1 = maxwrk, i__2 = *m * 3 + wlalsd;
		maxwrk = max(i__1,i__2);
	    }
/* Computing MAX */
	    i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = max(i__1,i__2), 
		    i__2 = *m * 3 + wlalsd;
	    minwrk = max(i__1,i__2);
	}
	minwrk = min(minwrk,maxwrk);
	work[1] = (real) maxwrk;
	if (*lwork < minwrk && ! lquery) {
	    *info = -12;
	}
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SGELSD", &i__1);
	return 0;
    } else if (lquery) {
	goto L10;
    }

/*     Quick return if possible. */

    if (*m == 0 || *n == 0) {
	*rank = 0;
	return 0;
    }

/*     Get machine parameters. */

    eps = slamch_("P");
    sfmin = slamch_("S");
    smlnum = sfmin / eps;
    bignum = 1.f / smlnum;
    slabad_(&smlnum, &bignum);

/*     Scale A if max entry outside range [SMLNUM,BIGNUM]. */

    anrm = slange_("M", m, n, &a[a_offset], lda, &work[1]);
    iascl = 0;
    if (anrm > 0.f && anrm < smlnum) {

/*        Scale matrix norm up to SMLNUM. */

	slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
		info);
	iascl = 1;
    } else if (anrm > bignum) {

/*        Scale matrix norm down to BIGNUM. */

	slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
		info);
	iascl = 2;
    } else if (anrm == 0.f) {

/*        Matrix all zero. Return zero solution. */

	i__1 = max(*m,*n);
	slaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b[b_offset], ldb);
	slaset_("F", &minmn, &c__1, &c_b82, &c_b82, &s[1], &c__1);
	*rank = 0;
	goto L10;
    }

/*     Scale B if max entry outside range [SMLNUM,BIGNUM]. */

    bnrm = slange_("M", m, nrhs, &b[b_offset], ldb, &work[1]);
    ibscl = 0;
    if (bnrm > 0.f && bnrm < smlnum) {

/*        Scale matrix norm up to SMLNUM. */

	slascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
		 info);
	ibscl = 1;
    } else if (bnrm > bignum) {

/*        Scale matrix norm down to BIGNUM. */

	slascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
		 info);
	ibscl = 2;
    }

/*     If M < N make sure certain entries of B are zero. */

    if (*m < *n) {
	i__1 = *n - *m;
	slaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b_ref(*m + 1, 1), ldb);
    }

/*     Overdetermined case. */

    if (*m >= *n) {

/*        Path 1 - overdetermined or exactly determined. */

	mm = *m;
	if (*m >= mnthr) {

/*           Path 1a - overdetermined, with many more rows than columns. */

	    mm = *n;
	    itau = 1;
	    nwork = itau + *n;

/*           Compute A=Q*R.   
             (Workspace: need 2*N, prefer N+N*NB) */

	    i__1 = *lwork - nwork + 1;
	    sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
		     info);

/*           Multiply B by transpose(Q).   
             (Workspace: need N+NRHS, prefer N+NRHS*NB) */

	    i__1 = *lwork - nwork + 1;
	    sormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[
		    b_offset], ldb, &work[nwork], &i__1, info);

/*           Zero out below R. */

	    if (*n > 1) {
		i__1 = *n - 1;
		i__2 = *n - 1;
		slaset_("L", &i__1, &i__2, &c_b82, &c_b82, &a_ref(2, 1), lda);
	    }
	}

	ie = 1;
	itauq = ie + *n;
	itaup = itauq + *n;
	nwork = itaup + *n;

/*        Bidiagonalize R in A.   
          (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) */

	i__1 = *lwork - nwork + 1;
	sgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
		work[itaup], &work[nwork], &i__1, info);

/*        Multiply B by transpose of left bidiagonalizing vectors of R.   
          (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) */

	i__1 = *lwork - nwork + 1;
	sormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], 
		&b[b_offset], ldb, &work[nwork], &i__1, info);

/*        Solve the bidiagonal least squares problem. */

	slalsd_("U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb, 
		rcond, rank, &work[nwork], &iwork[1], info);
	if (*info != 0) {
	    goto L10;
	}

/*        Multiply B by right bidiagonalizing vectors of R. */

	i__1 = *lwork - nwork + 1;
	sormbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], &
		b[b_offset], ldb, &work[nwork], &i__1, info);

    } else /* if(complicated condition) */ {
/* Computing MAX */
	i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1,i__2), i__1 = max(
		i__1,*nrhs), i__2 = *n - *m * 3;
	if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,i__2)) {

/*        Path 2a - underdetermined, with many more columns than rows   
          and sufficient workspace for an efficient algorithm. */

	    ldwork = *m;
/* Computing MAX   
   Computing MAX */
	    i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = 
		    max(i__3,*nrhs), i__4 = *n - *m * 3;
	    i__1 = (*m << 2) + *m * *lda + max(i__3,i__4), i__2 = *m * *lda + 
		    *m + *m * *nrhs;
	    if (*lwork >= max(i__1,i__2)) {
		ldwork = *lda;
	    }
	    itau = 1;
	    nwork = *m + 1;

/*        Compute A=L*Q.   
          (Workspace: need 2*M, prefer M+M*NB) */

	    i__1 = *lwork - nwork + 1;
	    sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
		     info);
	    il = nwork;

/*        Copy L to WORK(IL), zeroing out above its diagonal. */

	    slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork);
	    i__1 = *m - 1;
	    i__2 = *m - 1;
	    slaset_("U", &i__1, &i__2, &c_b82, &c_b82, &work[il + ldwork], &
		    ldwork);
	    ie = il + ldwork * *m;
	    itauq = ie + *m;
	    itaup = itauq + *m;
	    nwork = itaup + *m;

/*        Bidiagonalize L in WORK(IL).   
          (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) */

	    i__1 = *lwork - nwork + 1;
	    sgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], 
		    &work[itaup], &work[nwork], &i__1, info);

/*        Multiply B by transpose of left bidiagonalizing vectors of L.   
          (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) */

	    i__1 = *lwork - nwork + 1;
	    sormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[
		    itauq], &b[b_offset], ldb, &work[nwork], &i__1, info);

/*        Solve the bidiagonal least squares problem. */

	    slalsd_("U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], 
		    ldb, rcond, rank, &work[nwork], &iwork[1], info);
	    if (*info != 0) {
		goto L10;
	    }

/*        Multiply B by right bidiagonalizing vectors of L. */

	    i__1 = *lwork - nwork + 1;
	    sormbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[
		    itaup], &b[b_offset], ldb, &work[nwork], &i__1, info);

/*        Zero out below first M rows of B. */

	    i__1 = *n - *m;
	    slaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b_ref(*m + 1, 1), ldb);
	    nwork = itau + *m;

/*        Multiply transpose(Q) by B.   
          (Workspace: need M+NRHS, prefer M+NRHS*NB) */

	    i__1 = *lwork - nwork + 1;
	    sormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[
		    b_offset], ldb, &work[nwork], &i__1, info);

	} else {

/*        Path 2 - remaining underdetermined cases. */

	    ie = 1;
	    itauq = ie + *m;
	    itaup = itauq + *m;
	    nwork = itaup + *m;

/*        Bidiagonalize A.   
          (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */

	    i__1 = *lwork - nwork + 1;
	    sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
		    work[itaup], &work[nwork], &i__1, info);

/*        Multiply B by transpose of left bidiagonalizing vectors.   
          (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) */

	    i__1 = *lwork - nwork + 1;
	    sormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq]
		    , &b[b_offset], ldb, &work[nwork], &i__1, info);

/*        Solve the bidiagonal least squares problem. */

	    slalsd_("L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], 
		    ldb, rcond, rank, &work[nwork], &iwork[1], info);
	    if (*info != 0) {
		goto L10;
	    }

/*        Multiply B by right bidiagonalizing vectors of A. */

	    i__1 = *lwork - nwork + 1;
	    sormbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup]
		    , &b[b_offset], ldb, &work[nwork], &i__1, info);

	}
    }

/*     Undo scaling. */

    if (iascl == 1) {
	slascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
		 info);
	slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
		minmn, info);
    } else if (iascl == 2) {
	slascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
		 info);
	slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
		minmn, info);
    }
    if (ibscl == 1) {
	slascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
		 info);
    } else if (ibscl == 2) {
	slascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
		 info);
    }

L10:
    work[1] = (real) maxwrk;
    return 0;

/*     End of SGELSD */

} /* sgelsd_ */
Ejemplo n.º 4
0
/* Subroutine */ int dget10_(integer *m, integer *n, doublereal *a, integer *
	lda, doublereal *b, integer *ldb, doublereal *work, doublereal *
	result)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
    doublereal d__1, d__2;

    /* Local variables */
    static doublereal unfl;
    static integer j;
    extern doublereal dasum_(integer *, doublereal *, integer *);
    static doublereal anorm;
    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
	    doublereal *, integer *), daxpy_(integer *, doublereal *, 
	    doublereal *, integer *, doublereal *, integer *);
    static doublereal wnorm;
    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
	    integer *, doublereal *, integer *, doublereal *);
    static doublereal eps;


#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    DGET10 compares two matrices A and B and computes the ratio   
    RESULT = norm( A - B ) / ( norm(A) * M * EPS )   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of the matrices A and B.   

    N       (input) INTEGER   
            The number of columns of the matrices A and B.   

    A       (input) DOUBLE PRECISION array, dimension (LDA,N)   
            The m by n matrix A.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,M).   

    B       (input) DOUBLE PRECISION array, dimension (LDB,N)   
            The m by n matrix B.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,M).   

    WORK    (workspace) DOUBLE PRECISION array, dimension (M)   

    RESULT  (output) DOUBLE PRECISION   
            RESULT = norm( A - B ) / ( norm(A) * M * EPS )   

    =====================================================================   


       Quick return if possible   

       Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --work;

    /* Function Body */
    if (*m <= 0 || *n <= 0) {
	*result = 0.;
	return 0;
    }

    unfl = dlamch_("Safe minimum");
    eps = dlamch_("Precision");

    wnorm = 0.;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	dcopy_(m, &a_ref(1, j), &c__1, &work[1], &c__1);
	daxpy_(m, &c_b7, &b_ref(1, j), &c__1, &work[1], &c__1);
/* Computing MAX */
	d__1 = wnorm, d__2 = dasum_(n, &work[1], &c__1);
	wnorm = max(d__1,d__2);
/* L10: */
    }

/* Computing MAX */
    d__1 = dlange_("1", m, n, &a[a_offset], lda, &work[1]);
    anorm = max(d__1,unfl);

    if (anorm > wnorm) {
	*result = wnorm / anorm / (*m * eps);
    } else {
	if (anorm < 1.) {
/* Computing MIN */
	    d__1 = wnorm, d__2 = *m * anorm;
	    *result = min(d__1,d__2) / anorm / (*m * eps);
	} else {
/* Computing MIN */
	    d__1 = wnorm / anorm, d__2 = (doublereal) (*m);
	    *result = min(d__1,d__2) / (*m * eps);
	}
    }

    return 0;

/*     End of DGET10 */

} /* dget10_ */
Ejemplo n.º 5
0
/* Subroutine */ int ctrtri_(char *uplo, char *diag, integer *n, complex *a, 
	integer *lda, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CTRTRI computes the inverse of a complex upper or lower triangular   
    matrix A.   

    This is the Level 3 BLAS version of the algorithm.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            = 'U':  A is upper triangular;   
            = 'L':  A is lower triangular.   

    DIAG    (input) CHARACTER*1   
            = 'N':  A is non-unit triangular;   
            = 'U':  A is unit triangular.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    A       (input/output) COMPLEX array, dimension (LDA,N)   
            On entry, the triangular matrix A.  If UPLO = 'U', the   
            leading N-by-N upper triangular part of the array A contains   
            the upper triangular matrix, and the strictly lower   
            triangular part of A is not referenced.  If UPLO = 'L', the   
            leading N-by-N lower triangular part of the array A contains   
            the lower triangular matrix, and the strictly upper   
            triangular part of A is not referenced.  If DIAG = 'U', the   
            diagonal elements of A are also not referenced and are   
            assumed to be 1.   
            On exit, the (triangular) inverse of the original matrix, in   
            the same storage format.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -i, the i-th argument had an illegal value   
            > 0: if INFO = i, A(i,i) is exactly zero.  The triangular   
                 matrix is singular and its inverse can not be computed.   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static complex c_b1 = {1.f,0.f};
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static integer c__2 = 2;
    
    /* System generated locals */
    address a__1[2];
    integer a_dim1, a_offset, i__1, i__2, i__3[2], i__4, i__5;
    complex q__1;
    char ch__1[2];
    /* Builtin functions   
       Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
    /* Local variables */
    static integer j;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, 
	    integer *, integer *, complex *, complex *, integer *, complex *, 
	    integer *), ctrsm_(char *, char *,
	     char *, char *, integer *, integer *, complex *, complex *, 
	    integer *, complex *, integer *);
    static logical upper;
    extern /* Subroutine */ int ctrti2_(char *, char *, integer *, complex *, 
	    integer *, integer *);
    static integer jb, nb, nn;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    static logical nounit;
#define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1
#define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    nounit = lsame_(diag, "N");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (! nounit && ! lsame_(diag, "U")) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CTRTRI", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Check for singularity if non-unit. */

    if (nounit) {
	i__1 = *n;
	for (*info = 1; *info <= i__1; ++(*info)) {
	    i__2 = a_subscr(*info, *info);
	    if (a[i__2].r == 0.f && a[i__2].i == 0.f) {
		return 0;
	    }
/* L10: */
	}
	*info = 0;
    }

/*     Determine the block size for this environment.   

   Writing concatenation */
    i__3[0] = 1, a__1[0] = uplo;
    i__3[1] = 1, a__1[1] = diag;
    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
    nb = ilaenv_(&c__1, "CTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)2);
    if (nb <= 1 || nb >= *n) {

/*        Use unblocked code */

	ctrti2_(uplo, diag, n, &a[a_offset], lda, info);
    } else {

/*        Use blocked code */

	if (upper) {

/*           Compute inverse of upper triangular matrix */

	    i__1 = *n;
	    i__2 = nb;
	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
/* Computing MIN */
		i__4 = nb, i__5 = *n - j + 1;
		jb = min(i__4,i__5);

/*              Compute rows 1:j-1 of current block column */

		i__4 = j - 1;
		ctrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, &
			c_b1, &a[a_offset], lda, &a_ref(1, j), lda);
		i__4 = j - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		ctrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, &
			q__1, &a_ref(j, j), lda, &a_ref(1, j), lda);

/*              Compute inverse of current diagonal block */

		ctrti2_("Upper", diag, &jb, &a_ref(j, j), lda, info);
/* L20: */
	    }
	} else {

/*           Compute inverse of lower triangular matrix */

	    nn = (*n - 1) / nb * nb + 1;
	    i__2 = -nb;
	    for (j = nn; i__2 < 0 ? j >= 1 : j <= 1; j += i__2) {
/* Computing MIN */
		i__1 = nb, i__4 = *n - j + 1;
		jb = min(i__1,i__4);
		if (j + jb <= *n) {

/*                 Compute rows j+jb:n of current block column */

		    i__1 = *n - j - jb + 1;
		    ctrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb, 
			    &c_b1, &a_ref(j + jb, j + jb), lda, &a_ref(j + jb,
			     j), lda);
		    i__1 = *n - j - jb + 1;
		    q__1.r = -1.f, q__1.i = 0.f;
		    ctrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb,
			     &q__1, &a_ref(j, j), lda, &a_ref(j + jb, j), lda);
		}

/*              Compute inverse of current diagonal block */

		ctrti2_("Lower", diag, &jb, &a_ref(j, j), lda, info);
/* L30: */
	    }
	}
    }

    return 0;

/*     End of CTRTRI */

} /* ctrtri_ */
Ejemplo n.º 6
0
/* Subroutine */ int derrlq_(char *path, integer *nunit)
{
    /* Builtin functions */
    integer s_wsle(cilist *), e_wsle(void);
    /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);

    /* Local variables */
    static integer info;
    static doublereal a[4]	/* was [2][2] */, b[2];
    static integer i__, j;
    static doublereal w[2], x[2];
    extern /* Subroutine */ int dgelq2_(integer *, integer *, doublereal *, 
	    integer *, doublereal *, doublereal *, integer *), dorgl2_(
	    integer *, integer *, integer *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *), dorml2_(char *, char *, 
	    integer *, integer *, integer *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *, doublereal *, integer *);
    static doublereal af[4]	/* was [2][2] */;
    extern /* Subroutine */ int alaesm_(char *, logical *, integer *),
	     dgelqf_(integer *, integer *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *, integer *), dgelqs_(
	    integer *, integer *, integer *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
	    integer *), chkxer_(char *, integer *, integer *, logical *, 
	    logical *), dorglq_(integer *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
	    integer *), dormlq_(char *, char *, integer *, integer *, integer 
	    *, doublereal *, integer *, doublereal *, doublereal *, integer *,
	     doublereal *, integer *, integer *);

    /* Fortran I/O blocks */
    static cilist io___1 = { 0, 0, 0, 0, 0 };



#define a_ref(a_1,a_2) a[(a_2)*2 + a_1 - 3]
#define af_ref(a_1,a_2) af[(a_2)*2 + a_1 - 3]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    DERRLQ tests the error exits for the DOUBLE PRECISION routines   
    that use the LQ decomposition of a general matrix.   

    Arguments   
    =========   

    PATH    (input) CHARACTER*3   
            The LAPACK path name for the routines to be tested.   

    NUNIT   (input) INTEGER   
            The unit number for output.   

    ===================================================================== */


    infoc_1.nout = *nunit;
    io___1.ciunit = infoc_1.nout;
    s_wsle(&io___1);
    e_wsle();

/*     Set the variables to innocuous values. */

    for (j = 1; j <= 2; ++j) {
	for (i__ = 1; i__ <= 2; ++i__) {
	    a_ref(i__, j) = 1. / (doublereal) (i__ + j);
	    af_ref(i__, j) = 1. / (doublereal) (i__ + j);
/* L10: */
	}
	b[j - 1] = 0.;
	w[j - 1] = 0.;
	x[j - 1] = 0.;
/* L20: */
    }
    infoc_1.ok = TRUE_;

/*     Error exits for LQ factorization   

       DGELQF */

    s_copy(srnamc_1.srnamt, "DGELQF", (ftnlen)6, (ftnlen)6);
    infoc_1.infot = 1;
    dgelqf_(&c_n1, &c__0, a, &c__1, b, w, &c__1, &info);
    chkxer_("DGELQF", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 2;
    dgelqf_(&c__0, &c_n1, a, &c__1, b, w, &c__1, &info);
    chkxer_("DGELQF", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 4;
    dgelqf_(&c__2, &c__1, a, &c__1, b, w, &c__2, &info);
    chkxer_("DGELQF", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 7;
    dgelqf_(&c__2, &c__1, a, &c__2, b, w, &c__1, &info);
    chkxer_("DGELQF", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);

/*     DGELQ2 */

    s_copy(srnamc_1.srnamt, "DGELQ2", (ftnlen)6, (ftnlen)6);
    infoc_1.infot = 1;
    dgelq2_(&c_n1, &c__0, a, &c__1, b, w, &info);
    chkxer_("DGELQ2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 2;
    dgelq2_(&c__0, &c_n1, a, &c__1, b, w, &info);
    chkxer_("DGELQ2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 4;
    dgelq2_(&c__2, &c__1, a, &c__1, b, w, &info);
    chkxer_("DGELQ2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);

/*     DGELQS */

    s_copy(srnamc_1.srnamt, "DGELQS", (ftnlen)6, (ftnlen)6);
    infoc_1.infot = 1;
    dgelqs_(&c_n1, &c__0, &c__0, a, &c__1, x, b, &c__1, w, &c__1, &info);
    chkxer_("DGELQS", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 2;
    dgelqs_(&c__0, &c_n1, &c__0, a, &c__1, x, b, &c__1, w, &c__1, &info);
    chkxer_("DGELQS", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 2;
    dgelqs_(&c__2, &c__1, &c__0, a, &c__2, x, b, &c__1, w, &c__1, &info);
    chkxer_("DGELQS", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 3;
    dgelqs_(&c__0, &c__0, &c_n1, a, &c__1, x, b, &c__1, w, &c__1, &info);
    chkxer_("DGELQS", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 5;
    dgelqs_(&c__2, &c__2, &c__0, a, &c__1, x, b, &c__2, w, &c__1, &info);
    chkxer_("DGELQS", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 8;
    dgelqs_(&c__1, &c__2, &c__0, a, &c__1, x, b, &c__1, w, &c__1, &info);
    chkxer_("DGELQS", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 10;
    dgelqs_(&c__1, &c__1, &c__2, a, &c__1, x, b, &c__1, w, &c__1, &info);
    chkxer_("DGELQS", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);

/*     DORGLQ */

    s_copy(srnamc_1.srnamt, "DORGLQ", (ftnlen)6, (ftnlen)6);
    infoc_1.infot = 1;
    dorglq_(&c_n1, &c__0, &c__0, a, &c__1, x, w, &c__1, &info);
    chkxer_("DORGLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 2;
    dorglq_(&c__0, &c_n1, &c__0, a, &c__1, x, w, &c__1, &info);
    chkxer_("DORGLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 2;
    dorglq_(&c__2, &c__1, &c__0, a, &c__2, x, w, &c__2, &info);
    chkxer_("DORGLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 3;
    dorglq_(&c__0, &c__0, &c_n1, a, &c__1, x, w, &c__1, &info);
    chkxer_("DORGLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 3;
    dorglq_(&c__1, &c__1, &c__2, a, &c__1, x, w, &c__1, &info);
    chkxer_("DORGLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 5;
    dorglq_(&c__2, &c__2, &c__0, a, &c__1, x, w, &c__2, &info);
    chkxer_("DORGLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 8;
    dorglq_(&c__2, &c__2, &c__0, a, &c__2, x, w, &c__1, &info);
    chkxer_("DORGLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);

/*     DORGL2 */

    s_copy(srnamc_1.srnamt, "DORGL2", (ftnlen)6, (ftnlen)6);
    infoc_1.infot = 1;
    dorgl2_(&c_n1, &c__0, &c__0, a, &c__1, x, w, &info);
    chkxer_("DORGL2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 2;
    dorgl2_(&c__0, &c_n1, &c__0, a, &c__1, x, w, &info);
    chkxer_("DORGL2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 2;
    dorgl2_(&c__2, &c__1, &c__0, a, &c__2, x, w, &info);
    chkxer_("DORGL2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 3;
    dorgl2_(&c__0, &c__0, &c_n1, a, &c__1, x, w, &info);
    chkxer_("DORGL2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 3;
    dorgl2_(&c__1, &c__1, &c__2, a, &c__1, x, w, &info);
    chkxer_("DORGL2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 5;
    dorgl2_(&c__2, &c__2, &c__0, a, &c__1, x, w, &info);
    chkxer_("DORGL2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);

/*     DORMLQ */

    s_copy(srnamc_1.srnamt, "DORMLQ", (ftnlen)6, (ftnlen)6);
    infoc_1.infot = 1;
    dormlq_("/", "N", &c__0, &c__0, &c__0, a, &c__1, x, af, &c__1, w, &c__1, &
	    info);
    chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 2;
    dormlq_("L", "/", &c__0, &c__0, &c__0, a, &c__1, x, af, &c__1, w, &c__1, &
	    info);
    chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 3;
    dormlq_("L", "N", &c_n1, &c__0, &c__0, a, &c__1, x, af, &c__1, w, &c__1, &
	    info);
    chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 4;
    dormlq_("L", "N", &c__0, &c_n1, &c__0, a, &c__1, x, af, &c__1, w, &c__1, &
	    info);
    chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 5;
    dormlq_("L", "N", &c__0, &c__0, &c_n1, a, &c__1, x, af, &c__1, w, &c__1, &
	    info);
    chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 5;
    dormlq_("L", "N", &c__0, &c__1, &c__1, a, &c__1, x, af, &c__1, w, &c__1, &
	    info);
    chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 5;
    dormlq_("R", "N", &c__1, &c__0, &c__1, a, &c__1, x, af, &c__1, w, &c__1, &
	    info);
    chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 7;
    dormlq_("L", "N", &c__2, &c__0, &c__2, a, &c__1, x, af, &c__2, w, &c__1, &
	    info);
    chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 7;
    dormlq_("R", "N", &c__0, &c__2, &c__2, a, &c__1, x, af, &c__1, w, &c__1, &
	    info);
    chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 10;
    dormlq_("L", "N", &c__2, &c__1, &c__0, a, &c__2, x, af, &c__1, w, &c__1, &
	    info);
    chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 12;
    dormlq_("L", "N", &c__1, &c__2, &c__0, a, &c__1, x, af, &c__1, w, &c__1, &
	    info);
    chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 12;
    dormlq_("R", "N", &c__2, &c__1, &c__0, a, &c__1, x, af, &c__2, w, &c__1, &
	    info);
    chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);

/*     DORML2 */

    s_copy(srnamc_1.srnamt, "DORML2", (ftnlen)6, (ftnlen)6);
    infoc_1.infot = 1;
    dorml2_("/", "N", &c__0, &c__0, &c__0, a, &c__1, x, af, &c__1, w, &info);
    chkxer_("DORML2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 2;
    dorml2_("L", "/", &c__0, &c__0, &c__0, a, &c__1, x, af, &c__1, w, &info);
    chkxer_("DORML2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 3;
    dorml2_("L", "N", &c_n1, &c__0, &c__0, a, &c__1, x, af, &c__1, w, &info);
    chkxer_("DORML2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 4;
    dorml2_("L", "N", &c__0, &c_n1, &c__0, a, &c__1, x, af, &c__1, w, &info);
    chkxer_("DORML2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 5;
    dorml2_("L", "N", &c__0, &c__0, &c_n1, a, &c__1, x, af, &c__1, w, &info);
    chkxer_("DORML2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 5;
    dorml2_("L", "N", &c__0, &c__1, &c__1, a, &c__1, x, af, &c__1, w, &info);
    chkxer_("DORML2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 5;
    dorml2_("R", "N", &c__1, &c__0, &c__1, a, &c__1, x, af, &c__1, w, &info);
    chkxer_("DORML2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 7;
    dorml2_("L", "N", &c__2, &c__1, &c__2, a, &c__1, x, af, &c__2, w, &info);
    chkxer_("DORML2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 7;
    dorml2_("R", "N", &c__1, &c__2, &c__2, a, &c__1, x, af, &c__1, w, &info);
    chkxer_("DORML2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);
    infoc_1.infot = 10;
    dorml2_("L", "N", &c__2, &c__1, &c__0, a, &c__2, x, af, &c__1, w, &info);
    chkxer_("DORML2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
	    infoc_1.ok);

/*     Print a summary line. */

    alaesm_(path, &infoc_1.ok, &infoc_1.nout);

    return 0;

/*     End of DERRLQ */

} /* derrlq_ */
Ejemplo n.º 7
0
/* Subroutine */ int dgelqf_(integer *m, integer *n, doublereal *a, integer *
	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    DGELQF computes an LQ factorization of a real M-by-N matrix A:   
    A = L * Q.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix A.  N >= 0.   

    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)   
            On entry, the M-by-N matrix A.   
            On exit, the elements on and below the diagonal of the array   
            contain the m-by-min(m,n) lower trapezoidal matrix L (L is   
            lower triangular if m <= n); the elements above the diagonal,   
            with the array TAU, represent the orthogonal matrix Q as a   
            product of elementary reflectors (see Further Details).   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,M).   

    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))   
            The scalar factors of the elementary reflectors (see Further   
            Details).   

    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The dimension of the array WORK.  LWORK >= max(1,M).   
            For optimum performance LWORK >= M*NB, where NB is the   
            optimal blocksize.   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   

    Further Details   
    ===============   

    The matrix Q is represented as a product of elementary reflectors   

       Q = H(k) . . . H(2) H(1), where k = min(m,n).   

    Each H(i) has the form   

       H(i) = I - tau * v * v'   

    where tau is a real scalar, and v is a real vector with   
    v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),   
    and tau in TAU(i).   

    =====================================================================   


       Test the input arguments   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static integer c__3 = 3;
    static integer c__2 = 2;
    
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    /* Local variables */
    static integer i__, k, nbmin, iinfo;
    extern /* Subroutine */ int dgelq2_(integer *, integer *, doublereal *, 
	    integer *, doublereal *, doublereal *, integer *);
    static integer ib, nb;
    extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, 
	    integer *, integer *, integer *, doublereal *, integer *, 
	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
	    integer *);
    static integer nx;
    extern /* Subroutine */ int dlarft_(char *, char *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    static integer ldwork, lwkopt;
    static logical lquery;
    static integer iws;
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    nb = ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
	    1);
    lwkopt = *m * nb;
    work[1] = (doublereal) lwkopt;
    lquery = *lwork == -1;
    if (*m < 0) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*m)) {
	*info = -4;
    } else if (*lwork < max(1,*m) && ! lquery) {
	*info = -7;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DGELQF", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    k = min(*m,*n);
    if (k == 0) {
	work[1] = 1.;
	return 0;
    }

    nbmin = 2;
    nx = 0;
    iws = *m;
    if (nb > 1 && nb < k) {

/*        Determine when to cross over from blocked to unblocked code.   

   Computing MAX */
	i__1 = 0, i__2 = ilaenv_(&c__3, "DGELQF", " ", m, n, &c_n1, &c_n1, (
		ftnlen)6, (ftnlen)1);
	nx = max(i__1,i__2);
	if (nx < k) {

/*           Determine if workspace is large enough for blocked code. */

	    ldwork = *m;
	    iws = ldwork * nb;
	    if (*lwork < iws) {

/*              Not enough workspace to use optimal NB:  reduce NB and   
                determine the minimum value of NB. */

		nb = *lwork / ldwork;
/* Computing MAX */
		i__1 = 2, i__2 = ilaenv_(&c__2, "DGELQF", " ", m, n, &c_n1, &
			c_n1, (ftnlen)6, (ftnlen)1);
		nbmin = max(i__1,i__2);
	    }
	}
    }

    if (nb >= nbmin && nb < k && nx < k) {

/*        Use blocked code initially */

	i__1 = k - nx;
	i__2 = nb;
	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
/* Computing MIN */
	    i__3 = k - i__ + 1;
	    ib = min(i__3,nb);

/*           Compute the LQ factorization of the current block   
             A(i:i+ib-1,i:n) */

	    i__3 = *n - i__ + 1;
	    dgelq2_(&ib, &i__3, &a_ref(i__, i__), lda, &tau[i__], &work[1], &
		    iinfo);
	    if (i__ + ib <= *m) {

/*              Form the triangular factor of the block reflector   
                H = H(i) H(i+1) . . . H(i+ib-1) */

		i__3 = *n - i__ + 1;
		dlarft_("Forward", "Rowwise", &i__3, &ib, &a_ref(i__, i__), 
			lda, &tau[i__], &work[1], &ldwork);

/*              Apply H to A(i+ib:m,i:n) from the right */

		i__3 = *m - i__ - ib + 1;
		i__4 = *n - i__ + 1;
		dlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, 
			&i__4, &ib, &a_ref(i__, i__), lda, &work[1], &ldwork, 
			&a_ref(i__ + ib, i__), lda, &work[ib + 1], &ldwork);
	    }
/* L10: */
	}
    } else {
	i__ = 1;
    }

/*     Use unblocked code to factor the last or only block. */

    if (i__ <= k) {
	i__2 = *m - i__ + 1;
	i__1 = *n - i__ + 1;
	dgelq2_(&i__2, &i__1, &a_ref(i__, i__), lda, &tau[i__], &work[1], &
		iinfo);
    }

    work[1] = (doublereal) iws;
    return 0;

/*     End of DGELQF */

} /* dgelqf_ */
Ejemplo n.º 8
0
/* Subroutine */ int dgeqr2_(integer *m, integer *n, doublereal *a, integer *
	lda, doublereal *tau, doublereal *work, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    DGEQR2 computes a QR factorization of a real m by n matrix A:   
    A = Q * R.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix A.  N >= 0.   

    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)   
            On entry, the m by n matrix A.   
            On exit, the elements on and above the diagonal of the array   
            contain the min(m,n) by n upper trapezoidal matrix R (R is   
            upper triangular if m >= n); the elements below the diagonal,   
            with the array TAU, represent the orthogonal matrix Q as a   
            product of elementary reflectors (see Further Details).   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,M).   

    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))   
            The scalar factors of the elementary reflectors (see Further   
            Details).   

    WORK    (workspace) DOUBLE PRECISION array, dimension (N)   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -i, the i-th argument had an illegal value   

    Further Details   
    ===============   

    The matrix Q is represented as a product of elementary reflectors   

       Q = H(1) H(2) . . . H(k), where k = min(m,n).   

    Each H(i) has the form   

       H(i) = I - tau * v * v'   

    where tau is a real scalar, and v is a real vector with   
    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),   
    and tau in TAU(i).   

    =====================================================================   


       Test the input arguments   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    /* Local variables */
    static integer i__, k;
    extern /* Subroutine */ int dlarf_(char *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
	    doublereal *), dlarfg_(integer *, doublereal *, 
	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
    static doublereal aii;
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*m)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DGEQR2", &i__1);
	return 0;
    }

    k = min(*m,*n);

    i__1 = k;
    for (i__ = 1; i__ <= i__1; ++i__) {

/*        Generate elementary reflector H(i) to annihilate A(i+1:m,i)   

   Computing MIN */
	i__2 = i__ + 1;
	i__3 = *m - i__ + 1;
	dlarfg_(&i__3, &a_ref(i__, i__), &a_ref(min(i__2,*m), i__), &c__1, &
		tau[i__]);
	if (i__ < *n) {

/*           Apply H(i) to A(i:m,i+1:n) from the left */

	    aii = a_ref(i__, i__);
	    a_ref(i__, i__) = 1.;
	    i__2 = *m - i__ + 1;
	    i__3 = *n - i__;
	    dlarf_("Left", &i__2, &i__3, &a_ref(i__, i__), &c__1, &tau[i__], &
		    a_ref(i__, i__ + 1), lda, &work[1]);
	    a_ref(i__, i__) = aii;
	}
/* L10: */
    }
    return 0;

/*     End of DGEQR2 */

} /* dgeqr2_ */
Ejemplo n.º 9
0
/* Subroutine */ int dsyr2_(char *uplo, integer *n, doublereal *alpha, 
	doublereal *x, integer *incx, doublereal *y, integer *incy, 
	doublereal *a, integer *lda)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    /* Local variables */
    static integer info;
    static doublereal temp1, temp2;
    static integer i__, j;
    extern logical lsame_(char *, char *);
    static integer ix, iy, jx, jy, kx, ky;
    extern /* Subroutine */ int xerbla_(char *, integer *);
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
/*  Purpose   
    =======   
    DSYR2  performs the symmetric rank 2 operation   
       A := alpha*x*y' + alpha*y*x' + A,   
    where alpha is a scalar, x and y are n element vectors and A is an n   
    by n symmetric matrix.   
    Parameters   
    ==========   
    UPLO   - CHARACTER*1.   
             On entry, UPLO specifies whether the upper or lower   
             triangular part of the array A is to be referenced as   
             follows:   
                UPLO = 'U' or 'u'   Only the upper triangular part of A   
                                    is to be referenced.   
                UPLO = 'L' or 'l'   Only the lower triangular part of A   
                                    is to be referenced.   
             Unchanged on exit.   
    N      - INTEGER.   
             On entry, N specifies the order of the matrix A.   
             N must be at least zero.   
             Unchanged on exit.   
    ALPHA  - DOUBLE PRECISION.   
             On entry, ALPHA specifies the scalar alpha.   
             Unchanged on exit.   
    X      - DOUBLE PRECISION array of dimension at least   
             ( 1 + ( n - 1 )*abs( INCX ) ).   
             Before entry, the incremented array X must contain the n   
             element vector x.   
             Unchanged on exit.   
    INCX   - INTEGER.   
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   
             Unchanged on exit.   
    Y      - DOUBLE PRECISION array of dimension at least   
             ( 1 + ( n - 1 )*abs( INCY ) ).   
             Before entry, the incremented array Y must contain the n   
             element vector y.   
             Unchanged on exit.   
    INCY   - INTEGER.   
             On entry, INCY specifies the increment for the elements of   
             Y. INCY must not be zero.   
             Unchanged on exit.   
    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).   
             Before entry with  UPLO = 'U' or 'u', the leading n by n   
             upper triangular part of the array A must contain the upper   
             triangular part of the symmetric matrix and the strictly   
             lower triangular part of A is not referenced. On exit, the   
             upper triangular part of the array A is overwritten by the   
             upper triangular part of the updated matrix.   
             Before entry with UPLO = 'L' or 'l', the leading n by n   
             lower triangular part of the array A must contain the lower   
             triangular part of the symmetric matrix and the strictly   
             upper triangular part of A is not referenced. On exit, the   
             lower triangular part of the array A is overwritten by the   
             lower triangular part of the updated matrix.   
    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared   
             in the calling (sub) program. LDA must be at least   
             max( 1, n ).   
             Unchanged on exit.   
    Level 2 Blas routine.   
    -- Written on 22-October-1986.   
       Jack Dongarra, Argonne National Lab.   
       Jeremy Du Croz, Nag Central Office.   
       Sven Hammarling, Nag Central Office.   
       Richard Hanson, Sandia National Labs.   
       Test the input parameters.   
       Parameter adjustments */
    --x;
    --y;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    /* Function Body */
    info = 0;
    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
	info = 1;
    } else if (*n < 0) {
	info = 2;
    } else if (*incx == 0) {
	info = 5;
    } else if (*incy == 0) {
	info = 7;
    } else if (*lda < max(1,*n)) {
	info = 9;
    }
    if (info != 0) {
	xerbla_("DSYR2 ", &info);
	return 0;
    }
/*     Quick return if possible. */
    if (*n == 0 || *alpha == 0.) {
	return 0;
    }
/*     Set up the start points in X and Y if the increments are not both   
       unity. */
    if (*incx != 1 || *incy != 1) {
	if (*incx > 0) {
	    kx = 1;
	} else {
	    kx = 1 - (*n - 1) * *incx;
	}
	if (*incy > 0) {
	    ky = 1;
	} else {
	    ky = 1 - (*n - 1) * *incy;
	}
	jx = kx;
	jy = ky;
    }
/*     Start the operations. In this version the elements of A are   
       accessed sequentially with one pass through the triangular part   
       of A. */
    if (lsame_(uplo, "U")) {
/*        Form  A  when A is stored in the upper triangle. */
	if (*incx == 1 && *incy == 1) {
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		if (x[j] != 0. || y[j] != 0.) {
		    temp1 = *alpha * y[j];
		    temp2 = *alpha * x[j];
		    i__2 = j;
		    for (i__ = 1; i__ <= i__2; ++i__) {
			a_ref(i__, j) = a_ref(i__, j) + x[i__] * temp1 + y[
				i__] * temp2;
/* L10: */
		    }
		}
/* L20: */
	    }
	} else {
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		if (x[jx] != 0. || y[jy] != 0.) {
		    temp1 = *alpha * y[jy];
		    temp2 = *alpha * x[jx];
		    ix = kx;
		    iy = ky;
		    i__2 = j;
		    for (i__ = 1; i__ <= i__2; ++i__) {
			a_ref(i__, j) = a_ref(i__, j) + x[ix] * temp1 + y[iy] 
				* temp2;
			ix += *incx;
			iy += *incy;
/* L30: */
		    }
		}
		jx += *incx;
		jy += *incy;
/* L40: */
	    }
	}
    } else {
/*        Form  A  when A is stored in the lower triangle. */
	if (*incx == 1 && *incy == 1) {
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		if (x[j] != 0. || y[j] != 0.) {
		    temp1 = *alpha * y[j];
		    temp2 = *alpha * x[j];
		    i__2 = *n;
		    for (i__ = j; i__ <= i__2; ++i__) {
			a_ref(i__, j) = a_ref(i__, j) + x[i__] * temp1 + y[
				i__] * temp2;
/* L50: */
		    }
		}
/* L60: */
	    }
	} else {
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		if (x[jx] != 0. || y[jy] != 0.) {
		    temp1 = *alpha * y[jy];
		    temp2 = *alpha * x[jx];
		    ix = jx;
		    iy = jy;
		    i__2 = *n;
		    for (i__ = j; i__ <= i__2; ++i__) {
			a_ref(i__, j) = a_ref(i__, j) + x[ix] * temp1 + y[iy] 
				* temp2;
			ix += *incx;
			iy += *incy;
/* L70: */
		    }
		}
		jx += *incx;
		jy += *incy;
/* L80: */
	    }
	}
    }
    return 0;
/*     End of DSYR2 . */
} /* dsyr2_ */
Ejemplo n.º 10
0
/* Subroutine */ int ztrt05_(char *uplo, char *trans, char *diag, integer *n, 
	integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, 
	integer *ldb, doublecomplex *x, integer *ldx, doublecomplex *xact, 
	integer *ldxact, doublereal *ferr, doublereal *berr, doublereal *
	reslts)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset, xact_dim1, 
	    xact_offset, i__1, i__2, i__3, i__4, i__5;
    doublereal d__1, d__2, d__3, d__4;
    doublecomplex z__1, z__2;

    /* Builtin functions */
    double d_imag(doublecomplex *);

    /* Local variables */
    static doublereal diff, axbi;
    static integer imax;
    static doublereal unfl, ovfl;
    static logical unit;
    static integer i__, j, k;
    extern logical lsame_(char *, char *);
    static logical upper;
    static doublereal xnorm;
    extern doublereal dlamch_(char *);
    static doublereal errbnd;
    extern integer izamax_(integer *, doublecomplex *, integer *);
    static logical notran;
    static integer ifu;
    static doublereal eps, tmp;


#define xact_subscr(a_1,a_2) (a_2)*xact_dim1 + a_1
#define xact_ref(a_1,a_2) xact[xact_subscr(a_1,a_2)]
#define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1
#define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)]
#define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1
#define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)]
#define x_subscr(a_1,a_2) (a_2)*x_dim1 + a_1
#define x_ref(a_1,a_2) x[x_subscr(a_1,a_2)]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    ZTRT05 tests the error bounds from iterative refinement for the   
    computed solution to a system of equations A*X = B, where A is a   
    triangular n by n matrix.   

    RESLTS(1) = test of the error bound   
              = norm(X - XACT) / ( norm(X) * FERR )   

    A large value is returned if this ratio is not less than one.   

    RESLTS(2) = residual from the iterative refinement routine   
              = the maximum of BERR / ( (n+1)*EPS + (*) ), where   
                (*) = (n+1)*UNFL / (min_i (abs(A)*abs(X) +abs(b))_i )   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the matrix A is upper or lower triangular.   
            = 'U':  Upper triangular   
            = 'L':  Lower triangular   

    TRANS   (input) CHARACTER*1   
            Specifies the form of the system of equations.   
            = 'N':  A * X = B  (No transpose)   
            = 'T':  A'* X = B  (Transpose)   
            = 'C':  A'* X = B  (Conjugate transpose = Transpose)   

    DIAG    (input) CHARACTER*1   
            Specifies whether or not the matrix A is unit triangular.   
            = 'N':  Non-unit triangular   
            = 'U':  Unit triangular   

    N       (input) INTEGER   
            The number of rows of the matrices X, B, and XACT, and the   
            order of the matrix A.  N >= 0.   

    NRHS    (input) INTEGER   
            The number of columns of the matrices X, B, and XACT.   
            NRHS >= 0.   

    A       (input) COMPLEX*16 array, dimension (LDA,N)   
            The triangular matrix A.  If UPLO = 'U', the leading n by n   
            upper triangular part of the array A contains the upper   
            triangular matrix, and the strictly lower triangular part of   
            A is not referenced.  If UPLO = 'L', the leading n by n lower   
            triangular part of the array A contains the lower triangular   
            matrix, and the strictly upper triangular part of A is not   
            referenced.  If DIAG = 'U', the diagonal elements of A are   
            also not referenced and are assumed to be 1.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    B       (input) COMPLEX*16 array, dimension (LDB,NRHS)   
            The right hand side vectors for the system of linear   
            equations.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    X       (input) COMPLEX*16 array, dimension (LDX,NRHS)   
            The computed solution vectors.  Each vector is stored as a   
            column of the matrix X.   

    LDX     (input) INTEGER   
            The leading dimension of the array X.  LDX >= max(1,N).   

    XACT    (input) COMPLEX*16 array, dimension (LDX,NRHS)   
            The exact solution vectors.  Each vector is stored as a   
            column of the matrix XACT.   

    LDXACT  (input) INTEGER   
            The leading dimension of the array XACT.  LDXACT >= max(1,N).   

    FERR    (input) DOUBLE PRECISION array, dimension (NRHS)   
            The estimated forward error bounds for each solution vector   
            X.  If XTRUE is the true solution, FERR bounds the magnitude   
            of the largest entry in (X - XTRUE) divided by the magnitude   
            of the largest entry in X.   

    BERR    (input) DOUBLE PRECISION array, dimension (NRHS)   
            The componentwise relative backward error of each solution   
            vector (i.e., the smallest relative change in any entry of A   
            or B that makes X an exact solution).   

    RESLTS  (output) DOUBLE PRECISION array, dimension (2)   
            The maximum over the NRHS solution vectors of the ratios:   
            RESLTS(1) = norm(X - XACT) / ( norm(X) * FERR )   
            RESLTS(2) = BERR / ( (n+1)*EPS + (*) )   

    =====================================================================   


       Quick exit if N = 0 or NRHS = 0.   

       Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    xact_dim1 = *ldxact;
    xact_offset = 1 + xact_dim1 * 1;
    xact -= xact_offset;
    --ferr;
    --berr;
    --reslts;

    /* Function Body */
    if (*n <= 0 || *nrhs <= 0) {
	reslts[1] = 0.;
	reslts[2] = 0.;
	return 0;
    }

    eps = dlamch_("Epsilon");
    unfl = dlamch_("Safe minimum");
    ovfl = 1. / unfl;
    upper = lsame_(uplo, "U");
    notran = lsame_(trans, "N");
    unit = lsame_(diag, "U");

/*     Test 1:  Compute the maximum of   
          norm(X - XACT) / ( norm(X) * FERR )   
       over all the vectors X and XACT using the infinity-norm. */

    errbnd = 0.;
    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
	imax = izamax_(n, &x_ref(1, j), &c__1);
/* Computing MAX */
	i__2 = x_subscr(imax, j);
	d__3 = (d__1 = x[i__2].r, abs(d__1)) + (d__2 = d_imag(&x_ref(imax, j))
		, abs(d__2));
	xnorm = max(d__3,unfl);
	diff = 0.;
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    i__3 = x_subscr(i__, j);
	    i__4 = xact_subscr(i__, j);
	    z__2.r = x[i__3].r - xact[i__4].r, z__2.i = x[i__3].i - xact[i__4]
		    .i;
	    z__1.r = z__2.r, z__1.i = z__2.i;
/* Computing MAX */
	    d__3 = diff, d__4 = (d__1 = z__1.r, abs(d__1)) + (d__2 = d_imag(&
		    z__1), abs(d__2));
	    diff = max(d__3,d__4);
/* L10: */
	}

	if (xnorm > 1.) {
	    goto L20;
	} else if (diff <= ovfl * xnorm) {
	    goto L20;
	} else {
	    errbnd = 1. / eps;
	    goto L30;
	}

L20:
	if (diff / xnorm <= ferr[j]) {
/* Computing MAX */
	    d__1 = errbnd, d__2 = diff / xnorm / ferr[j];
	    errbnd = max(d__1,d__2);
	} else {
	    errbnd = 1. / eps;
	}
L30:
	;
    }
    reslts[1] = errbnd;

/*     Test 2:  Compute the maximum of BERR / ( (n+1)*EPS + (*) ), where   
       (*) = (n+1)*UNFL / (min_i (abs(A)*abs(X) +abs(b))_i ) */

    ifu = 0;
    if (unit) {
	ifu = 1;
    }
    i__1 = *nrhs;
    for (k = 1; k <= i__1; ++k) {
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    i__3 = b_subscr(i__, k);
	    tmp = (d__1 = b[i__3].r, abs(d__1)) + (d__2 = d_imag(&b_ref(i__, 
		    k)), abs(d__2));
	    if (upper) {
		if (! notran) {
		    i__3 = i__ - ifu;
		    for (j = 1; j <= i__3; ++j) {
			i__4 = a_subscr(j, i__);
			i__5 = x_subscr(j, k);
			tmp += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = 
				d_imag(&a_ref(j, i__)), abs(d__2))) * ((d__3 =
				 x[i__5].r, abs(d__3)) + (d__4 = d_imag(&
				x_ref(j, k)), abs(d__4)));
/* L40: */
		    }
		    if (unit) {
			i__3 = x_subscr(i__, k);
			tmp += (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(
				&x_ref(i__, k)), abs(d__2));
		    }
		} else {
		    if (unit) {
			i__3 = x_subscr(i__, k);
			tmp += (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(
				&x_ref(i__, k)), abs(d__2));
		    }
		    i__3 = *n;
		    for (j = i__ + ifu; j <= i__3; ++j) {
			i__4 = a_subscr(i__, j);
			i__5 = x_subscr(j, k);
			tmp += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = 
				d_imag(&a_ref(i__, j)), abs(d__2))) * ((d__3 =
				 x[i__5].r, abs(d__3)) + (d__4 = d_imag(&
				x_ref(j, k)), abs(d__4)));
/* L50: */
		    }
		}
	    } else {
		if (notran) {
		    i__3 = i__ - ifu;
		    for (j = 1; j <= i__3; ++j) {
			i__4 = a_subscr(i__, j);
			i__5 = x_subscr(j, k);
			tmp += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = 
				d_imag(&a_ref(i__, j)), abs(d__2))) * ((d__3 =
				 x[i__5].r, abs(d__3)) + (d__4 = d_imag(&
				x_ref(j, k)), abs(d__4)));
/* L60: */
		    }
		    if (unit) {
			i__3 = x_subscr(i__, k);
			tmp += (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(
				&x_ref(i__, k)), abs(d__2));
		    }
		} else {
		    if (unit) {
			i__3 = x_subscr(i__, k);
			tmp += (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(
				&x_ref(i__, k)), abs(d__2));
		    }
		    i__3 = *n;
		    for (j = i__ + ifu; j <= i__3; ++j) {
			i__4 = a_subscr(j, i__);
			i__5 = x_subscr(j, k);
			tmp += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = 
				d_imag(&a_ref(j, i__)), abs(d__2))) * ((d__3 =
				 x[i__5].r, abs(d__3)) + (d__4 = d_imag(&
				x_ref(j, k)), abs(d__4)));
/* L70: */
		    }
		}
	    }
	    if (i__ == 1) {
		axbi = tmp;
	    } else {
		axbi = min(axbi,tmp);
	    }
/* L80: */
	}
/* Computing MAX */
	d__1 = axbi, d__2 = (*n + 1) * unfl;
	tmp = berr[k] / ((*n + 1) * eps + (*n + 1) * unfl / max(d__1,d__2));
	if (k == 1) {
	    reslts[2] = tmp;
	} else {
	    reslts[2] = max(reslts[2],tmp);
	}
/* L90: */
    }

    return 0;

/*     End of ZTRT05 */

} /* ztrt05_ */
Ejemplo n.º 11
0
/* Subroutine */ int ztgsna_(char *job, char *howmny, logical *select, 
	integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer 
	*ldb, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *
	ldvr, doublereal *s, doublereal *dif, integer *mm, integer *m, 
	doublecomplex *work, integer *lwork, integer *iwork, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    ZTGSNA estimates reciprocal condition numbers for specified   
    eigenvalues and/or eigenvectors of a matrix pair (A, B).   

    (A, B) must be in generalized Schur canonical form, that is, A and   
    B are both upper triangular.   

    Arguments   
    =========   

    JOB     (input) CHARACTER*1   
            Specifies whether condition numbers are required for   
            eigenvalues (S) or eigenvectors (DIF):   
            = 'E': for eigenvalues only (S);   
            = 'V': for eigenvectors only (DIF);   
            = 'B': for both eigenvalues and eigenvectors (S and DIF).   

    HOWMNY  (input) CHARACTER*1   
            = 'A': compute condition numbers for all eigenpairs;   
            = 'S': compute condition numbers for selected eigenpairs   
                   specified by the array SELECT.   

    SELECT  (input) LOGICAL array, dimension (N)   
            If HOWMNY = 'S', SELECT specifies the eigenpairs for which   
            condition numbers are required. To select condition numbers   
            for the corresponding j-th eigenvalue and/or eigenvector,   
            SELECT(j) must be set to .TRUE..   
            If HOWMNY = 'A', SELECT is not referenced.   

    N       (input) INTEGER   
            The order of the square matrix pair (A, B). N >= 0.   

    A       (input) COMPLEX*16 array, dimension (LDA,N)   
            The upper triangular matrix A in the pair (A,B).   

    LDA     (input) INTEGER   
            The leading dimension of the array A. LDA >= max(1,N).   

    B       (input) COMPLEX*16 array, dimension (LDB,N)   
            The upper triangular matrix B in the pair (A, B).   

    LDB     (input) INTEGER   
            The leading dimension of the array B. LDB >= max(1,N).   

    VL      (input) COMPLEX*16 array, dimension (LDVL,M)   
            IF JOB = 'E' or 'B', VL must contain left eigenvectors of   
            (A, B), corresponding to the eigenpairs specified by HOWMNY   
            and SELECT.  The eigenvectors must be stored in consecutive   
            columns of VL, as returned by ZTGEVC.   
            If JOB = 'V', VL is not referenced.   

    LDVL    (input) INTEGER   
            The leading dimension of the array VL. LDVL >= 1; and   
            If JOB = 'E' or 'B', LDVL >= N.   

    VR      (input) COMPLEX*16 array, dimension (LDVR,M)   
            IF JOB = 'E' or 'B', VR must contain right eigenvectors of   
            (A, B), corresponding to the eigenpairs specified by HOWMNY   
            and SELECT.  The eigenvectors must be stored in consecutive   
            columns of VR, as returned by ZTGEVC.   
            If JOB = 'V', VR is not referenced.   

    LDVR    (input) INTEGER   
            The leading dimension of the array VR. LDVR >= 1;   
            If JOB = 'E' or 'B', LDVR >= N.   

    S       (output) DOUBLE PRECISION array, dimension (MM)   
            If JOB = 'E' or 'B', the reciprocal condition numbers of the   
            selected eigenvalues, stored in consecutive elements of the   
            array.   
            If JOB = 'V', S is not referenced.   

    DIF     (output) DOUBLE PRECISION array, dimension (MM)   
            If JOB = 'V' or 'B', the estimated reciprocal condition   
            numbers of the selected eigenvectors, stored in consecutive   
            elements of the array.   
            If the eigenvalues cannot be reordered to compute DIF(j),   
            DIF(j) is set to 0; this can only occur when the true value   
            would be very small anyway.   
            For each eigenvalue/vector specified by SELECT, DIF stores   
            a Frobenius norm-based estimate of Difl.   
            If JOB = 'E', DIF is not referenced.   

    MM      (input) INTEGER   
            The number of elements in the arrays S and DIF. MM >= M.   

    M       (output) INTEGER   
            The number of elements of the arrays S and DIF used to store   
            the specified condition numbers; for each selected eigenvalue   
            one element is used. If HOWMNY = 'A', M is set to N.   

    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)   
            If JOB = 'E', WORK is not referenced.  Otherwise,   
            on exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK  (input) INTEGER   
            The dimension of the array WORK. LWORK >= 1.   
            If JOB = 'V' or 'B', LWORK >= 2*N*N.   

    IWORK   (workspace) INTEGER array, dimension (N+2)   
            If JOB = 'E', IWORK is not referenced.   

    INFO    (output) INTEGER   
            = 0: Successful exit   
            < 0: If INFO = -i, the i-th argument had an illegal value   

    Further Details   
    ===============   

    The reciprocal of the condition number of the i-th generalized   
    eigenvalue w = (a, b) is defined as   

            S(I) = (|v'Au|**2 + |v'Bu|**2)**(1/2) / (norm(u)*norm(v))   

    where u and v are the right and left eigenvectors of (A, B)   
    corresponding to w; |z| denotes the absolute value of the complex   
    number, and norm(u) denotes the 2-norm of the vector u. The pair   
    (a, b) corresponds to an eigenvalue w = a/b (= v'Au/v'Bu) of the   
    matrix pair (A, B). If both a and b equal zero, then (A,B) is   
    singular and S(I) = -1 is returned.   

    An approximate error bound on the chordal distance between the i-th   
    computed generalized eigenvalue w and the corresponding exact   
    eigenvalue lambda is   

            chord(w, lambda) <=   EPS * norm(A, B) / S(I),   

    where EPS is the machine precision.   

    The reciprocal of the condition number of the right eigenvector u   
    and left eigenvector v corresponding to the generalized eigenvalue w   
    is defined as follows. Suppose   

                     (A, B) = ( a   *  ) ( b  *  )  1   
                              ( 0  A22 ),( 0 B22 )  n-1   
                                1  n-1     1 n-1   

    Then the reciprocal condition number DIF(I) is   

            Difl[(a, b), (A22, B22)]  = sigma-min( Zl )   

    where sigma-min(Zl) denotes the smallest singular value of   

           Zl = [ kron(a, In-1) -kron(1, A22) ]   
                [ kron(b, In-1) -kron(1, B22) ].   

    Here In-1 is the identity matrix of size n-1 and X' is the conjugate   
    transpose of X. kron(X, Y) is the Kronecker product between the   
    matrices X and Y.   

    We approximate the smallest singular value of Zl with an upper   
    bound. This is done by ZLATDF.   

    An approximate error bound for a computed eigenvector VL(i) or   
    VR(i) is given by   

                        EPS * norm(A, B) / DIF(i).   

    See ref. [2-3] for more details and further references.   

    Based on contributions by   
       Bo Kagstrom and Peter Poromaa, Department of Computing Science,   
       Umea University, S-901 87 Umea, Sweden.   

    References   
    ==========   

    [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the   
        Generalized Real Schur Form of a Regular Matrix Pair (A, B), in   
        M.S. Moonen et al (eds), Linear Algebra for Large Scale and   
        Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218.   

    [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified   
        Eigenvalues of a Regular Matrix Pair (A, B) and Condition   
        Estimation: Theory, Algorithms and Software, Report   
        UMINF - 94.04, Department of Computing Science, Umea University,   
        S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87.   
        To appear in Numerical Algorithms, 1996.   

    [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software   
        for Solving the Generalized Sylvester Equation and Estimating the   
        Separation between Regular Matrix Pairs, Report UMINF - 93.23,   
        Department of Computing Science, Umea University, S-901 87 Umea,   
        Sweden, December 1993, Revised April 1994, Also as LAPACK Working   
        Note 75.   
        To appear in ACM Trans. on Math. Software, Vol 22, No 1, 1996.   

    =====================================================================   


       Decode and test the input parameters   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static doublecomplex c_b19 = {1.,0.};
    static doublecomplex c_b20 = {0.,0.};
    static logical c_false = FALSE_;
    static integer c__3 = 3;
    
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, 
	    vr_offset, i__1, i__2;
    doublereal d__1, d__2;
    doublecomplex z__1;
    /* Builtin functions */
    double z_abs(doublecomplex *);
    /* Local variables */
    static doublereal cond;
    static integer ierr, ifst;
    static doublereal lnrm;
    static doublecomplex yhax, yhbx;
    static integer ilst;
    static doublereal rnrm;
    static integer i__, k;
    static doublereal scale;
    extern logical lsame_(char *, char *);
    extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, 
	    doublecomplex *, integer *, doublecomplex *, integer *);
    static integer lwmin;
    extern /* Subroutine */ int zgemv_(char *, integer *, integer *, 
	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
	    integer *, doublecomplex *, doublecomplex *, integer *);
    static logical wants;
    static integer llwrk, n1, n2;
    static doublecomplex dummy[1];
    extern doublereal dlapy2_(doublereal *, doublereal *);
    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
    static doublecomplex dummy1[1];
    extern doublereal dznrm2_(integer *, doublecomplex *, integer *), dlamch_(
	    char *);
    static integer ks;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static doublereal bignum;
    static logical wantbh, wantdf, somcon;
    extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, 
	    doublecomplex *, integer *, doublecomplex *, integer *), 
	    ztgexc_(logical *, logical *, integer *, doublecomplex *, integer 
	    *, doublecomplex *, integer *, doublecomplex *, integer *, 
	    doublecomplex *, integer *, integer *, integer *, integer *);
    static doublereal smlnum;
    static logical lquery;
    extern /* Subroutine */ int ztgsyl_(char *, integer *, integer *, integer 
	    *, doublecomplex *, integer *, doublecomplex *, integer *, 
	    doublecomplex *, integer *, doublecomplex *, integer *, 
	    doublecomplex *, integer *, doublecomplex *, integer *, 
	    doublereal *, doublereal *, doublecomplex *, integer *, integer *,
	     integer *);
    static doublereal eps;
#define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1
#define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)]
#define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1
#define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)]
#define vl_subscr(a_1,a_2) (a_2)*vl_dim1 + a_1
#define vl_ref(a_1,a_2) vl[vl_subscr(a_1,a_2)]
#define vr_subscr(a_1,a_2) (a_2)*vr_dim1 + a_1
#define vr_ref(a_1,a_2) vr[vr_subscr(a_1,a_2)]


    --select;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    vl_dim1 = *ldvl;
    vl_offset = 1 + vl_dim1 * 1;
    vl -= vl_offset;
    vr_dim1 = *ldvr;
    vr_offset = 1 + vr_dim1 * 1;
    vr -= vr_offset;
    --s;
    --dif;
    --work;
    --iwork;

    /* Function Body */
    wantbh = lsame_(job, "B");
    wants = lsame_(job, "E") || wantbh;
    wantdf = lsame_(job, "V") || wantbh;

    somcon = lsame_(howmny, "S");

    *info = 0;
    lquery = *lwork == -1;

    if (lsame_(job, "V") || lsame_(job, "B")) {
/* Computing MAX */
	i__1 = 1, i__2 = (*n << 1) * *n;
	lwmin = max(i__1,i__2);
    } else {
	lwmin = 1;
    }

    if (! wants && ! wantdf) {
	*info = -1;
    } else if (! lsame_(howmny, "A") && ! somcon) {
	*info = -2;
    } else if (*n < 0) {
	*info = -4;
    } else if (*lda < max(1,*n)) {
	*info = -6;
    } else if (*ldb < max(1,*n)) {
	*info = -8;
    } else if (wants && *ldvl < *n) {
	*info = -10;
    } else if (wants && *ldvr < *n) {
	*info = -12;
    } else {

/*        Set M to the number of eigenpairs for which condition numbers   
          are required, and test MM. */

	if (somcon) {
	    *m = 0;
	    i__1 = *n;
	    for (k = 1; k <= i__1; ++k) {
		if (select[k]) {
		    ++(*m);
		}
/* L10: */
	    }
	} else {
	    *m = *n;
	}

	if (*mm < *m) {
	    *info = -15;
	} else if (*lwork < lwmin && ! lquery) {
	    *info = -18;
	}
    }

    if (*info == 0) {
	work[1].r = (doublereal) lwmin, work[1].i = 0.;
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("ZTGSNA", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Get machine constants */

    eps = dlamch_("P");
    smlnum = dlamch_("S") / eps;
    bignum = 1. / smlnum;
    dlabad_(&smlnum, &bignum);
    llwrk = *lwork - (*n << 1) * *n;
    ks = 0;
    i__1 = *n;
    for (k = 1; k <= i__1; ++k) {

/*        Determine whether condition numbers are required for the k-th   
          eigenpair. */

	if (somcon) {
	    if (! select[k]) {
		goto L20;
	    }
	}

	++ks;

	if (wants) {

/*           Compute the reciprocal condition number of the k-th   
             eigenvalue. */

	    rnrm = dznrm2_(n, &vr_ref(1, ks), &c__1);
	    lnrm = dznrm2_(n, &vl_ref(1, ks), &c__1);
	    zgemv_("N", n, n, &c_b19, &a[a_offset], lda, &vr_ref(1, ks), &
		    c__1, &c_b20, &work[1], &c__1);
	    zdotc_(&z__1, n, &work[1], &c__1, &vl_ref(1, ks), &c__1);
	    yhax.r = z__1.r, yhax.i = z__1.i;
	    zgemv_("N", n, n, &c_b19, &b[b_offset], ldb, &vr_ref(1, ks), &
		    c__1, &c_b20, &work[1], &c__1);
	    zdotc_(&z__1, n, &work[1], &c__1, &vl_ref(1, ks), &c__1);
	    yhbx.r = z__1.r, yhbx.i = z__1.i;
	    d__1 = z_abs(&yhax);
	    d__2 = z_abs(&yhbx);
	    cond = dlapy2_(&d__1, &d__2);
	    if (cond == 0.) {
		s[ks] = -1.;
	    } else {
		s[ks] = cond / (rnrm * lnrm);
	    }
	}

	if (wantdf) {
	    if (*n == 1) {
		d__1 = z_abs(&a_ref(1, 1));
		d__2 = z_abs(&b_ref(1, 1));
		dif[ks] = dlapy2_(&d__1, &d__2);
		goto L20;
	    }

/*           Estimate the reciprocal condition number of the k-th   
             eigenvectors.   

             Copy the matrix (A, B) to the array WORK and move the   
             (k,k)th pair to the (1,1) position. */

	    zlacpy_("Full", n, n, &a[a_offset], lda, &work[1], n);
	    zlacpy_("Full", n, n, &b[b_offset], ldb, &work[*n * *n + 1], n);
	    ifst = k;
	    ilst = 1;

	    ztgexc_(&c_false, &c_false, n, &work[1], n, &work[*n * *n + 1], n,
		     dummy, &c__1, dummy1, &c__1, &ifst, &ilst, &ierr);

	    if (ierr > 0) {

/*              Ill-conditioned problem - swap rejected. */

		dif[ks] = 0.;
	    } else {

/*              Reordering successful, solve generalized Sylvester   
                equation for R and L,   
                           A22 * R - L * A11 = A12   
                           B22 * R - L * B11 = B12,   
                and compute estimate of Difl[(A11,B11), (A22, B22)]. */

		n1 = 1;
		n2 = *n - n1;
		i__ = *n * *n + 1;
		ztgsyl_("N", &c__3, &n2, &n1, &work[*n * n1 + n1 + 1], n, &
			work[1], n, &work[n1 + 1], n, &work[*n * n1 + n1 + 
			i__], n, &work[i__], n, &work[n1 + i__], n, &scale, &
			dif[ks], &work[(*n * *n << 1) + 1], &llwrk, &iwork[1],
			 &ierr);
	    }
	}

L20:
	;
    }
    work[1].r = (doublereal) lwmin, work[1].i = 0.;
    return 0;

/*     End of ZTGSNA */

} /* ztgsna_ */
Ejemplo n.º 12
0
/* Subroutine */ int dlascl_(char *type__, integer *kl, integer *ku,
                             doublereal *cfrom, doublereal *cto, integer *m, integer *n,
                             doublereal *a, integer *lda, integer *info)
{
    /*  -- LAPACK auxiliary routine (version 3.0) --
           Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
           Courant Institute, Argonne National Lab, and Rice University
           February 29, 1992


        Purpose
        =======

        DLASCL multiplies the M by N real matrix A by the real scalar
        CTO/CFROM.  This is done without over/underflow as long as the final
        result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that
        A may be full, upper triangular, lower triangular, upper Hessenberg,
        or banded.

        Arguments
        =========

        TYPE    (input) CHARACTER*1
                TYPE indices the storage type of the input matrix.
                = 'G':  A is a full matrix.
                = 'L':  A is a lower triangular matrix.
                = 'U':  A is an upper triangular matrix.
                = 'H':  A is an upper Hessenberg matrix.
                = 'B':  A is a symmetric band matrix with lower bandwidth KL
                        and upper bandwidth KU and with the only the lower
                        half stored.
                = 'Q':  A is a symmetric band matrix with lower bandwidth KL
                        and upper bandwidth KU and with the only the upper
                        half stored.
                = 'Z':  A is a band matrix with lower bandwidth KL and upper
                        bandwidth KU.

        KL      (input) INTEGER
                The lower bandwidth of A.  Referenced only if TYPE = 'B',
                'Q' or 'Z'.

        KU      (input) INTEGER
                The upper bandwidth of A.  Referenced only if TYPE = 'B',
                'Q' or 'Z'.

        CFROM   (input) DOUBLE PRECISION
        CTO     (input) DOUBLE PRECISION
                The matrix A is multiplied by CTO/CFROM. A(I,J) is computed
                without over/underflow if the final result CTO*A(I,J)/CFROM
                can be represented without over/underflow.  CFROM must be
                nonzero.

        M       (input) INTEGER
                The number of rows of the matrix A.  M >= 0.

        N       (input) INTEGER
                The number of columns of the matrix A.  N >= 0.

        A       (input/output) DOUBLE PRECISION array, dimension (LDA,M)
                The matrix to be multiplied by CTO/CFROM.  See TYPE for the
                storage type.

        LDA     (input) INTEGER
                The leading dimension of the array A.  LDA >= max(1,M).

        INFO    (output) INTEGER
                0  - successful exit
                <0 - if INFO = -i, the i-th argument had an illegal value.

        =====================================================================


           Test the input arguments

           Parameter adjustments */
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
    /* Local variables */
    static logical done;
    static doublereal ctoc;
    static integer i__, j;
    extern logical lsame_(char *, char *);
    static integer itype, k1, k2, k3, k4;
    static doublereal cfrom1;
    extern doublereal dlamch_(char *);
    static doublereal cfromc;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static doublereal bignum, smlnum, mul, cto1;
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]

    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;

    /* Function Body */
    *info = 0;

    if (lsame_(type__, "G")) {
        itype = 0;
    } else if (lsame_(type__, "L")) {
        itype = 1;
    } else if (lsame_(type__, "U")) {
        itype = 2;
    } else if (lsame_(type__, "H")) {
        itype = 3;
    } else if (lsame_(type__, "B")) {
        itype = 4;
    } else if (lsame_(type__, "Q")) {
        itype = 5;
    } else if (lsame_(type__, "Z")) {
        itype = 6;
    } else {
        itype = -1;
    }

    if (itype == -1) {
        *info = -1;
    } else if (*cfrom == 0.) {
        *info = -4;
    } else if (*m < 0) {
        *info = -6;
    } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) {
        *info = -7;
    } else if (itype <= 3 && *lda < max(1,*m)) {
        *info = -9;
    } else if (itype >= 4) {
        /* Computing MAX */
        i__1 = *m - 1;
        if (*kl < 0 || *kl > max(i__1,0)) {
            *info = -2;
        } else { /* if(complicated condition) */
            /* Computing MAX */
            i__1 = *n - 1;
            if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) &&
                    *kl != *ku) {
                *info = -3;
            } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < *
                       ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) {
                *info = -9;
            }
        }
    }

    if (*info != 0) {
        i__1 = -(*info);
        xerbla_("DLASCL", &i__1);
        return 0;
    }

    /*     Quick return if possible */

    if (*n == 0 || *m == 0) {
        return 0;
    }

    /*     Get machine parameters */

    smlnum = dlamch_("S");
    bignum = 1. / smlnum;

    cfromc = *cfrom;
    ctoc = *cto;

L10:
    cfrom1 = cfromc * smlnum;
    cto1 = ctoc / bignum;
    if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) {
        mul = smlnum;
        done = FALSE_;
        cfromc = cfrom1;
    } else if (abs(cto1) > abs(cfromc)) {
        mul = bignum;
        done = FALSE_;
        ctoc = cto1;
    } else {
        mul = ctoc / cfromc;
        done = TRUE_;
    }

    if (itype == 0) {

        /*        Full matrix */

        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            i__2 = *m;
            for (i__ = 1; i__ <= i__2; ++i__) {
                a_ref(i__, j) = a_ref(i__, j) * mul;
                /* L20: */
            }
            /* L30: */
        }

    } else if (itype == 1) {

        /*        Lower triangular matrix */

        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            i__2 = *m;
            for (i__ = j; i__ <= i__2; ++i__) {
                a_ref(i__, j) = a_ref(i__, j) * mul;
                /* L40: */
            }
            /* L50: */
        }

    } else if (itype == 2) {

        /*        Upper triangular matrix */

        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            i__2 = min(j,*m);
            for (i__ = 1; i__ <= i__2; ++i__) {
                a_ref(i__, j) = a_ref(i__, j) * mul;
                /* L60: */
            }
            /* L70: */
        }

    } else if (itype == 3) {

        /*        Upper Hessenberg matrix */

        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            /* Computing MIN */
            i__3 = j + 1;
            i__2 = min(i__3,*m);
            for (i__ = 1; i__ <= i__2; ++i__) {
                a_ref(i__, j) = a_ref(i__, j) * mul;
                /* L80: */
            }
            /* L90: */
        }

    } else if (itype == 4) {

        /*        Lower half of a symmetric band matrix */

        k3 = *kl + 1;
        k4 = *n + 1;
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            /* Computing MIN */
            i__3 = k3, i__4 = k4 - j;
            i__2 = min(i__3,i__4);
            for (i__ = 1; i__ <= i__2; ++i__) {
                a_ref(i__, j) = a_ref(i__, j) * mul;
                /* L100: */
            }
            /* L110: */
        }

    } else if (itype == 5) {

        /*        Upper half of a symmetric band matrix */

        k1 = *ku + 2;
        k3 = *ku + 1;
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            /* Computing MAX */
            i__2 = k1 - j;
            i__3 = k3;
            for (i__ = max(i__2,1); i__ <= i__3; ++i__) {
                a_ref(i__, j) = a_ref(i__, j) * mul;
                /* L120: */
            }
            /* L130: */
        }

    } else if (itype == 6) {

        /*        Band matrix */

        k1 = *kl + *ku + 2;
        k2 = *kl + 1;
        k3 = (*kl << 1) + *ku + 1;
        k4 = *kl + *ku + 1 + *m;
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            /* Computing MAX */
            i__3 = k1 - j;
            /* Computing MIN */
            i__4 = k3, i__5 = k4 - j;
            i__2 = min(i__4,i__5);
            for (i__ = max(i__3,k2); i__ <= i__2; ++i__) {
                a_ref(i__, j) = a_ref(i__, j) * mul;
                /* L140: */
            }
            /* L150: */
        }

    }

    if (! done) {
        goto L10;
    }

    return 0;

    /*     End of DLASCL */

} /* dlascl_ */
Ejemplo n.º 13
0
/* Subroutine */ int slauum_(char *uplo, integer *n, real *a, integer *lda, 
	integer *info)
{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    SLAUUM computes the product U * U' or L' * L, where the triangular   
    factor U or L is stored in the upper or lower triangular part of   
    the array A.   

    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,   
    overwriting the factor U in A.   
    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,   
    overwriting the factor L in A.   

    This is the blocked form of the algorithm, calling Level 3 BLAS.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the triangular factor stored in the array A   
            is upper or lower triangular:   
            = 'U':  Upper triangular   
            = 'L':  Lower triangular   

    N       (input) INTEGER   
            The order of the triangular factor U or L.  N >= 0.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the triangular factor U or L.   
            On exit, if UPLO = 'U', the upper triangle of A is   
            overwritten with the upper triangle of the product U * U';   
            if UPLO = 'L', the lower triangle of A is overwritten with   
            the lower triangle of the product L' * L.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -k, the k-th argument had an illegal value   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static real c_b15 = 1.f;
    
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    /* Local variables */
    static integer i__;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    static logical upper;
    extern /* Subroutine */ int strmm_(char *, char *, char *, char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
	    ), ssyrk_(char *, char *, integer 
	    *, integer *, real *, real *, integer *, real *, real *, integer *
	    );
    static integer ib;
    extern /* Subroutine */ int slauu2_(char *, integer *, real *, integer *, 
	    integer *);
    static integer nb;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLAUUM", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine the block size for this environment. */

    nb = ilaenv_(&c__1, "SLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)1);

    if (nb <= 1 || nb >= *n) {

/*        Use unblocked code */

	slauu2_(uplo, n, &a[a_offset], lda, info);
    } else {

/*        Use blocked code */

	if (upper) {

/*           Compute the product U * U'. */

	    i__1 = *n;
	    i__2 = nb;
	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
/* Computing MIN */
		i__3 = nb, i__4 = *n - i__ + 1;
		ib = min(i__3,i__4);
		i__3 = i__ - 1;
		strmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib, 
			&c_b15, &a_ref(i__, i__), lda, &a_ref(1, i__), lda);
		slauu2_("Upper", &ib, &a_ref(i__, i__), lda, info);
		if (i__ + ib <= *n) {
		    i__3 = i__ - 1;
		    i__4 = *n - i__ - ib + 1;
		    sgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, &
			    c_b15, &a_ref(1, i__ + ib), lda, &a_ref(i__, i__ 
			    + ib), lda, &c_b15, &a_ref(1, i__), lda);
		    i__3 = *n - i__ - ib + 1;
		    ssyrk_("Upper", "No transpose", &ib, &i__3, &c_b15, &
			    a_ref(i__, i__ + ib), lda, &c_b15, &a_ref(i__, 
			    i__), lda);
		}
/* L10: */
	    }
	} else {

/*           Compute the product L' * L. */

	    i__2 = *n;
	    i__1 = nb;
	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
/* Computing MIN */
		i__3 = nb, i__4 = *n - i__ + 1;
		ib = min(i__3,i__4);
		i__3 = i__ - 1;
		strmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, &
			c_b15, &a_ref(i__, i__), lda, &a_ref(i__, 1), lda);
		slauu2_("Lower", &ib, &a_ref(i__, i__), lda, info);
		if (i__ + ib <= *n) {
		    i__3 = i__ - 1;
		    i__4 = *n - i__ - ib + 1;
		    sgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, &
			    c_b15, &a_ref(i__ + ib, i__), lda, &a_ref(i__ + 
			    ib, 1), lda, &c_b15, &a_ref(i__, 1), lda);
		    i__3 = *n - i__ - ib + 1;
		    ssyrk_("Lower", "Transpose", &ib, &i__3, &c_b15, &a_ref(
			    i__ + ib, i__), lda, &c_b15, &a_ref(i__, i__), 
			    lda);
		}
/* L20: */
	    }
	}
    }

    return 0;

/*     End of SLAUUM */

} /* slauum_ */
Ejemplo n.º 14
0
/* Subroutine */ int clasyf_(char *uplo, integer *n, integer *nb, integer *kb,
	 complex *a, integer *lda, integer *ipiv, complex *w, integer *ldw, 
	integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    CLASYF computes a partial factorization of a complex symmetric matrix   
    A using the Bunch-Kaufman diagonal pivoting method. The partial   
    factorization has the form:   

    A  =  ( I  U12 ) ( A11  0  ) (  I    0   )  if UPLO = 'U', or:   
          ( 0  U22 ) (  0   D  ) ( U12' U22' )   

    A  =  ( L11  0 ) ( D    0  ) ( L11' L21' )  if UPLO = 'L'   
          ( L21  I ) ( 0   A22 ) (  0    I   )   

    where the order of D is at most NB. The actual order is returned in   
    the argument KB, and is either NB or NB-1, or N if N <= NB.   
    Note that U' denotes the transpose of U.   

    CLASYF is an auxiliary routine called by CSYTRF. It uses blocked code   
    (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or   
    A22 (if UPLO = 'L').   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the upper or lower triangular part of the   
            symmetric matrix A is stored:   
            = 'U':  Upper triangular   
            = 'L':  Lower triangular   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    NB      (input) INTEGER   
            The maximum number of columns of the matrix A that should be   
            factored.  NB should be at least 2 to allow for 2-by-2 pivot   
            blocks.   

    KB      (output) INTEGER   
            The number of columns of A that were actually factored.   
            KB is either NB-1 or NB, or N if N <= NB.   

    A       (input/output) COMPLEX array, dimension (LDA,N)   
            On entry, the symmetric matrix A.  If UPLO = 'U', the leading   
            n-by-n upper triangular part of A contains the upper   
            triangular part of the matrix A, and the strictly lower   
            triangular part of A is not referenced.  If UPLO = 'L', the   
            leading n-by-n lower triangular part of A contains the lower   
            triangular part of the matrix A, and the strictly upper   
            triangular part of A is not referenced.   
            On exit, A contains details of the partial factorization.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    IPIV    (output) INTEGER array, dimension (N)   
            Details of the interchanges and the block structure of D.   
            If UPLO = 'U', only the last KB elements of IPIV are set;   
            if UPLO = 'L', only the first KB elements are set.   

            If IPIV(k) > 0, then rows and columns k and IPIV(k) were   
            interchanged and D(k,k) is a 1-by-1 diagonal block.   
            If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and   
            columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)   
            is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =   
            IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were   
            interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.   

    W       (workspace) COMPLEX array, dimension (LDW,NB)   

    LDW     (input) INTEGER   
            The leading dimension of the array W.  LDW >= max(1,N).   

    INFO    (output) INTEGER   
            = 0: successful exit   
            > 0: if INFO = k, D(k,k) is exactly zero.  The factorization   
                 has been completed, but the block diagonal matrix D is   
                 exactly singular.   

    =====================================================================   


       Parameter adjustments */
    /* Table of constant values */
    static complex c_b1 = {1.f,0.f};
    static integer c__1 = 1;
    
    /* System generated locals */
    integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4, i__5;
    real r__1, r__2, r__3, r__4;
    complex q__1, q__2, q__3;
    /* Builtin functions */
    double sqrt(doublereal), r_imag(complex *);
    void c_div(complex *, complex *, complex *);
    /* Local variables */
    static integer imax, jmax, j, k;
    static complex t;
    static real alpha;
    extern /* Subroutine */ int cscal_(integer *, complex *, complex *, 
	    integer *), cgemm_(char *, char *, integer *, integer *, integer *
	    , complex *, complex *, integer *, complex *, integer *, complex *
	    , complex *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
	    , complex *, integer *, complex *, integer *, complex *, complex *
	    , integer *), ccopy_(integer *, complex *, integer *, 
	    complex *, integer *), cswap_(integer *, complex *, integer *, 
	    complex *, integer *);
    static integer kstep;
    static complex r1, d11, d21, d22;
    static integer jb, jj, kk, jp, kp;
    static real absakk;
    static integer kw;
    extern integer icamax_(integer *, complex *, integer *);
    static real colmax, rowmax;
    static integer kkw;
#define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1
#define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)]
#define w_subscr(a_1,a_2) (a_2)*w_dim1 + a_1
#define w_ref(a_1,a_2) w[w_subscr(a_1,a_2)]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    w_dim1 = *ldw;
    w_offset = 1 + w_dim1 * 1;
    w -= w_offset;

    /* Function Body */
    *info = 0;

/*     Initialize ALPHA for use in choosing pivot block size. */

    alpha = (sqrt(17.f) + 1.f) / 8.f;

    if (lsame_(uplo, "U")) {

/*        Factorize the trailing columns of A using the upper triangle   
          of A and working backwards, and compute the matrix W = U12*D   
          for use in updating A11   

          K is the main loop index, decreasing from N in steps of 1 or 2   

          KW is the column of W which corresponds to column K of A */

	k = *n;
L10:
	kw = *nb + k - *n;

/*        Exit from loop */

	if (k <= *n - *nb + 1 && *nb < *n || k < 1) {
	    goto L30;
	}

/*        Copy column K of A to column KW of W and update it */

	ccopy_(&k, &a_ref(1, k), &c__1, &w_ref(1, kw), &c__1);
	if (k < *n) {
	    i__1 = *n - k;
	    q__1.r = -1.f, q__1.i = 0.f;
	    cgemv_("No transpose", &k, &i__1, &q__1, &a_ref(1, k + 1), lda, &
		    w_ref(k, kw + 1), ldw, &c_b1, &w_ref(1, kw), &c__1);
	}

	kstep = 1;

/*        Determine rows and columns to be interchanged and whether   
          a 1-by-1 or 2-by-2 pivot block will be used */

	i__1 = w_subscr(k, kw);
	absakk = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w_ref(k, kw)
		), dabs(r__2));

/*        IMAX is the row-index of the largest off-diagonal element in   
          column K, and COLMAX is its absolute value */

	if (k > 1) {
	    i__1 = k - 1;
	    imax = icamax_(&i__1, &w_ref(1, kw), &c__1);
	    i__1 = w_subscr(imax, kw);
	    colmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w_ref(
		    imax, kw)), dabs(r__2));
	} else {
	    colmax = 0.f;
	}

	if (dmax(absakk,colmax) == 0.f) {

/*           Column K is zero: set INFO and continue */

	    if (*info == 0) {
		*info = k;
	    }
	    kp = k;
	} else {
	    if (absakk >= alpha * colmax) {

/*              no interchange, use 1-by-1 pivot block */

		kp = k;
	    } else {

/*              Copy column IMAX to column KW-1 of W and update it */

		ccopy_(&imax, &a_ref(1, imax), &c__1, &w_ref(1, kw - 1), &
			c__1);
		i__1 = k - imax;
		ccopy_(&i__1, &a_ref(imax, imax + 1), lda, &w_ref(imax + 1, 
			kw - 1), &c__1);
		if (k < *n) {
		    i__1 = *n - k;
		    q__1.r = -1.f, q__1.i = 0.f;
		    cgemv_("No transpose", &k, &i__1, &q__1, &a_ref(1, k + 1),
			     lda, &w_ref(imax, kw + 1), ldw, &c_b1, &w_ref(1, 
			    kw - 1), &c__1);
		}

/*              JMAX is the column-index of the largest off-diagonal   
                element in row IMAX, and ROWMAX is its absolute value */

		i__1 = k - imax;
		jmax = imax + icamax_(&i__1, &w_ref(imax + 1, kw - 1), &c__1);
		i__1 = w_subscr(jmax, kw - 1);
		rowmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&
			w_ref(jmax, kw - 1)), dabs(r__2));
		if (imax > 1) {
		    i__1 = imax - 1;
		    jmax = icamax_(&i__1, &w_ref(1, kw - 1), &c__1);
/* Computing MAX */
		    i__1 = w_subscr(jmax, kw - 1);
		    r__3 = rowmax, r__4 = (r__1 = w[i__1].r, dabs(r__1)) + (
			    r__2 = r_imag(&w_ref(jmax, kw - 1)), dabs(r__2));
		    rowmax = dmax(r__3,r__4);
		}

		if (absakk >= alpha * colmax * (colmax / rowmax)) {

/*                 no interchange, use 1-by-1 pivot block */

		    kp = k;
		} else /* if(complicated condition) */ {
		    i__1 = w_subscr(imax, kw - 1);
		    if ((r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&
			    w_ref(imax, kw - 1)), dabs(r__2)) >= alpha * 
			    rowmax) {

/*                 interchange rows and columns K and IMAX, use 1-by-1   
                   pivot block */

			kp = imax;

/*                 copy column KW-1 of W to column KW */

			ccopy_(&k, &w_ref(1, kw - 1), &c__1, &w_ref(1, kw), &
				c__1);
		    } else {

/*                 interchange rows and columns K-1 and IMAX, use 2-by-2   
                   pivot block */

			kp = imax;
			kstep = 2;
		    }
		}
	    }

	    kk = k - kstep + 1;
	    kkw = *nb + kk - *n;

/*           Updated column KP is already stored in column KKW of W */

	    if (kp != kk) {

/*              Copy non-updated column KK to column KP */

		i__1 = a_subscr(kp, k);
		i__2 = a_subscr(kk, k);
		a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i;
		i__1 = k - 1 - kp;
		ccopy_(&i__1, &a_ref(kp + 1, kk), &c__1, &a_ref(kp, kp + 1), 
			lda);
		ccopy_(&kp, &a_ref(1, kk), &c__1, &a_ref(1, kp), &c__1);

/*              Interchange rows KK and KP in last KK columns of A and W */

		i__1 = *n - kk + 1;
		cswap_(&i__1, &a_ref(kk, kk), lda, &a_ref(kp, kk), lda);
		i__1 = *n - kk + 1;
		cswap_(&i__1, &w_ref(kk, kkw), ldw, &w_ref(kp, kkw), ldw);
	    }

	    if (kstep == 1) {

/*              1-by-1 pivot block D(k): column KW of W now holds   

                W(k) = U(k)*D(k)   

                where U(k) is the k-th column of U   

                Store U(k) in column k of A */

		ccopy_(&k, &w_ref(1, kw), &c__1, &a_ref(1, k), &c__1);
		c_div(&q__1, &c_b1, &a_ref(k, k));
		r1.r = q__1.r, r1.i = q__1.i;
		i__1 = k - 1;
		cscal_(&i__1, &r1, &a_ref(1, k), &c__1);
	    } else {

/*              2-by-2 pivot block D(k): columns KW and KW-1 of W now   
                hold   

                ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)   

                where U(k) and U(k-1) are the k-th and (k-1)-th columns   
                of U */

		if (k > 2) {

/*                 Store U(k) and U(k-1) in columns k and k-1 of A */

		    i__1 = w_subscr(k - 1, kw);
		    d21.r = w[i__1].r, d21.i = w[i__1].i;
		    c_div(&q__1, &w_ref(k, kw), &d21);
		    d11.r = q__1.r, d11.i = q__1.i;
		    c_div(&q__1, &w_ref(k - 1, kw - 1), &d21);
		    d22.r = q__1.r, d22.i = q__1.i;
		    q__3.r = d11.r * d22.r - d11.i * d22.i, q__3.i = d11.r * 
			    d22.i + d11.i * d22.r;
		    q__2.r = q__3.r - 1.f, q__2.i = q__3.i + 0.f;
		    c_div(&q__1, &c_b1, &q__2);
		    t.r = q__1.r, t.i = q__1.i;
		    c_div(&q__1, &t, &d21);
		    d21.r = q__1.r, d21.i = q__1.i;
		    i__1 = k - 2;
		    for (j = 1; j <= i__1; ++j) {
			i__2 = a_subscr(j, k - 1);
			i__3 = w_subscr(j, kw - 1);
			q__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i, 
				q__3.i = d11.r * w[i__3].i + d11.i * w[i__3]
				.r;
			i__4 = w_subscr(j, kw);
			q__2.r = q__3.r - w[i__4].r, q__2.i = q__3.i - w[i__4]
				.i;
			q__1.r = d21.r * q__2.r - d21.i * q__2.i, q__1.i = 
				d21.r * q__2.i + d21.i * q__2.r;
			a[i__2].r = q__1.r, a[i__2].i = q__1.i;
			i__2 = a_subscr(j, k);
			i__3 = w_subscr(j, kw);
			q__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i, 
				q__3.i = d22.r * w[i__3].i + d22.i * w[i__3]
				.r;
			i__4 = w_subscr(j, kw - 1);
			q__2.r = q__3.r - w[i__4].r, q__2.i = q__3.i - w[i__4]
				.i;
			q__1.r = d21.r * q__2.r - d21.i * q__2.i, q__1.i = 
				d21.r * q__2.i + d21.i * q__2.r;
			a[i__2].r = q__1.r, a[i__2].i = q__1.i;
/* L20: */
		    }
		}

/*              Copy D(k) to A */

		i__1 = a_subscr(k - 1, k - 1);
		i__2 = w_subscr(k - 1, kw - 1);
		a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
		i__1 = a_subscr(k - 1, k);
		i__2 = w_subscr(k - 1, kw);
		a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
		i__1 = a_subscr(k, k);
		i__2 = w_subscr(k, kw);
		a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
	    }
	}

/*        Store details of the interchanges in IPIV */

	if (kstep == 1) {
	    ipiv[k] = kp;
	} else {
	    ipiv[k] = -kp;
	    ipiv[k - 1] = -kp;
	}

/*        Decrease K and return to the start of the main loop */

	k -= kstep;
	goto L10;

L30:

/*        Update the upper triangle of A11 (= A(1:k,1:k)) as   

          A11 := A11 - U12*D*U12' = A11 - U12*W'   

          computing blocks of NB columns at a time */

	i__1 = -(*nb);
	for (j = (k - 1) / *nb * *nb + 1; i__1 < 0 ? j >= 1 : j <= 1; j += 
		i__1) {
/* Computing MIN */
	    i__2 = *nb, i__3 = k - j + 1;
	    jb = min(i__2,i__3);

/*           Update the upper triangle of the diagonal block */

	    i__2 = j + jb - 1;
	    for (jj = j; jj <= i__2; ++jj) {
		i__3 = jj - j + 1;
		i__4 = *n - k;
		q__1.r = -1.f, q__1.i = 0.f;
		cgemv_("No transpose", &i__3, &i__4, &q__1, &a_ref(j, k + 1), 
			lda, &w_ref(jj, kw + 1), ldw, &c_b1, &a_ref(j, jj), &
			c__1);
/* L40: */
	    }

/*           Update the rectangular superdiagonal block */

	    i__2 = j - 1;
	    i__3 = *n - k;
	    q__1.r = -1.f, q__1.i = 0.f;
	    cgemm_("No transpose", "Transpose", &i__2, &jb, &i__3, &q__1, &
		    a_ref(1, k + 1), lda, &w_ref(j, kw + 1), ldw, &c_b1, &
		    a_ref(1, j), lda);
/* L50: */
	}

/*        Put U12 in standard form by partially undoing the interchanges   
          in columns k+1:n */

	j = k + 1;
L60:
	jj = j;
	jp = ipiv[j];
	if (jp < 0) {
	    jp = -jp;
	    ++j;
	}
	++j;
	if (jp != jj && j <= *n) {
	    i__1 = *n - j + 1;
	    cswap_(&i__1, &a_ref(jp, j), lda, &a_ref(jj, j), lda);
	}
	if (j <= *n) {
	    goto L60;
	}

/*        Set KB to the number of columns factorized */

	*kb = *n - k;

    } else {

/*        Factorize the leading columns of A using the lower triangle   
          of A and working forwards, and compute the matrix W = L21*D   
          for use in updating A22   

          K is the main loop index, increasing from 1 in steps of 1 or 2 */

	k = 1;
L70:

/*        Exit from loop */

	if (k >= *nb && *nb < *n || k > *n) {
	    goto L90;
	}

/*        Copy column K of A to column K of W and update it */

	i__1 = *n - k + 1;
	ccopy_(&i__1, &a_ref(k, k), &c__1, &w_ref(k, k), &c__1);
	i__1 = *n - k + 1;
	i__2 = k - 1;
	q__1.r = -1.f, q__1.i = 0.f;
	cgemv_("No transpose", &i__1, &i__2, &q__1, &a_ref(k, 1), lda, &w_ref(
		k, 1), ldw, &c_b1, &w_ref(k, k), &c__1);

	kstep = 1;

/*        Determine rows and columns to be interchanged and whether   
          a 1-by-1 or 2-by-2 pivot block will be used */

	i__1 = w_subscr(k, k);
	absakk = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w_ref(k, k))
		, dabs(r__2));

/*        IMAX is the row-index of the largest off-diagonal element in   
          column K, and COLMAX is its absolute value */

	if (k < *n) {
	    i__1 = *n - k;
	    imax = k + icamax_(&i__1, &w_ref(k + 1, k), &c__1);
	    i__1 = w_subscr(imax, k);
	    colmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w_ref(
		    imax, k)), dabs(r__2));
	} else {
	    colmax = 0.f;
	}

	if (dmax(absakk,colmax) == 0.f) {

/*           Column K is zero: set INFO and continue */

	    if (*info == 0) {
		*info = k;
	    }
	    kp = k;
	} else {
	    if (absakk >= alpha * colmax) {

/*              no interchange, use 1-by-1 pivot block */

		kp = k;
	    } else {

/*              Copy column IMAX to column K+1 of W and update it */

		i__1 = imax - k;
		ccopy_(&i__1, &a_ref(imax, k), lda, &w_ref(k, k + 1), &c__1);
		i__1 = *n - imax + 1;
		ccopy_(&i__1, &a_ref(imax, imax), &c__1, &w_ref(imax, k + 1), 
			&c__1);
		i__1 = *n - k + 1;
		i__2 = k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		cgemv_("No transpose", &i__1, &i__2, &q__1, &a_ref(k, 1), lda,
			 &w_ref(imax, 1), ldw, &c_b1, &w_ref(k, k + 1), &c__1);

/*              JMAX is the column-index of the largest off-diagonal   
                element in row IMAX, and ROWMAX is its absolute value */

		i__1 = imax - k;
		jmax = k - 1 + icamax_(&i__1, &w_ref(k, k + 1), &c__1);
		i__1 = w_subscr(jmax, k + 1);
		rowmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&
			w_ref(jmax, k + 1)), dabs(r__2));
		if (imax < *n) {
		    i__1 = *n - imax;
		    jmax = imax + icamax_(&i__1, &w_ref(imax + 1, k + 1), &
			    c__1);
/* Computing MAX */
		    i__1 = w_subscr(jmax, k + 1);
		    r__3 = rowmax, r__4 = (r__1 = w[i__1].r, dabs(r__1)) + (
			    r__2 = r_imag(&w_ref(jmax, k + 1)), dabs(r__2));
		    rowmax = dmax(r__3,r__4);
		}

		if (absakk >= alpha * colmax * (colmax / rowmax)) {

/*                 no interchange, use 1-by-1 pivot block */

		    kp = k;
		} else /* if(complicated condition) */ {
		    i__1 = w_subscr(imax, k + 1);
		    if ((r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&
			    w_ref(imax, k + 1)), dabs(r__2)) >= alpha * 
			    rowmax) {

/*                 interchange rows and columns K and IMAX, use 1-by-1   
                   pivot block */

			kp = imax;

/*                 copy column K+1 of W to column K */

			i__1 = *n - k + 1;
			ccopy_(&i__1, &w_ref(k, k + 1), &c__1, &w_ref(k, k), &
				c__1);
		    } else {

/*                 interchange rows and columns K+1 and IMAX, use 2-by-2   
                   pivot block */

			kp = imax;
			kstep = 2;
		    }
		}
	    }

	    kk = k + kstep - 1;

/*           Updated column KP is already stored in column KK of W */

	    if (kp != kk) {

/*              Copy non-updated column KK to column KP */

		i__1 = a_subscr(kp, k);
		i__2 = a_subscr(kk, k);
		a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i;
		i__1 = kp - k - 1;
		ccopy_(&i__1, &a_ref(k + 1, kk), &c__1, &a_ref(kp, k + 1), 
			lda);
		i__1 = *n - kp + 1;
		ccopy_(&i__1, &a_ref(kp, kk), &c__1, &a_ref(kp, kp), &c__1);

/*              Interchange rows KK and KP in first KK columns of A and W */

		cswap_(&kk, &a_ref(kk, 1), lda, &a_ref(kp, 1), lda);
		cswap_(&kk, &w_ref(kk, 1), ldw, &w_ref(kp, 1), ldw);
	    }

	    if (kstep == 1) {

/*              1-by-1 pivot block D(k): column k of W now holds   

                W(k) = L(k)*D(k)   

                where L(k) is the k-th column of L   

                Store L(k) in column k of A */

		i__1 = *n - k + 1;
		ccopy_(&i__1, &w_ref(k, k), &c__1, &a_ref(k, k), &c__1);
		if (k < *n) {
		    c_div(&q__1, &c_b1, &a_ref(k, k));
		    r1.r = q__1.r, r1.i = q__1.i;
		    i__1 = *n - k;
		    cscal_(&i__1, &r1, &a_ref(k + 1, k), &c__1);
		}
	    } else {

/*              2-by-2 pivot block D(k): columns k and k+1 of W now hold   

                ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)   

                where L(k) and L(k+1) are the k-th and (k+1)-th columns   
                of L */

		if (k < *n - 1) {

/*                 Store L(k) and L(k+1) in columns k and k+1 of A */

		    i__1 = w_subscr(k + 1, k);
		    d21.r = w[i__1].r, d21.i = w[i__1].i;
		    c_div(&q__1, &w_ref(k + 1, k + 1), &d21);
		    d11.r = q__1.r, d11.i = q__1.i;
		    c_div(&q__1, &w_ref(k, k), &d21);
		    d22.r = q__1.r, d22.i = q__1.i;
		    q__3.r = d11.r * d22.r - d11.i * d22.i, q__3.i = d11.r * 
			    d22.i + d11.i * d22.r;
		    q__2.r = q__3.r - 1.f, q__2.i = q__3.i + 0.f;
		    c_div(&q__1, &c_b1, &q__2);
		    t.r = q__1.r, t.i = q__1.i;
		    c_div(&q__1, &t, &d21);
		    d21.r = q__1.r, d21.i = q__1.i;
		    i__1 = *n;
		    for (j = k + 2; j <= i__1; ++j) {
			i__2 = a_subscr(j, k);
			i__3 = w_subscr(j, k);
			q__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i, 
				q__3.i = d11.r * w[i__3].i + d11.i * w[i__3]
				.r;
			i__4 = w_subscr(j, k + 1);
			q__2.r = q__3.r - w[i__4].r, q__2.i = q__3.i - w[i__4]
				.i;
			q__1.r = d21.r * q__2.r - d21.i * q__2.i, q__1.i = 
				d21.r * q__2.i + d21.i * q__2.r;
			a[i__2].r = q__1.r, a[i__2].i = q__1.i;
			i__2 = a_subscr(j, k + 1);
			i__3 = w_subscr(j, k + 1);
			q__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i, 
				q__3.i = d22.r * w[i__3].i + d22.i * w[i__3]
				.r;
			i__4 = w_subscr(j, k);
			q__2.r = q__3.r - w[i__4].r, q__2.i = q__3.i - w[i__4]
				.i;
			q__1.r = d21.r * q__2.r - d21.i * q__2.i, q__1.i = 
				d21.r * q__2.i + d21.i * q__2.r;
			a[i__2].r = q__1.r, a[i__2].i = q__1.i;
/* L80: */
		    }
		}

/*              Copy D(k) to A */

		i__1 = a_subscr(k, k);
		i__2 = w_subscr(k, k);
		a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
		i__1 = a_subscr(k + 1, k);
		i__2 = w_subscr(k + 1, k);
		a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
		i__1 = a_subscr(k + 1, k + 1);
		i__2 = w_subscr(k + 1, k + 1);
		a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
	    }
	}

/*        Store details of the interchanges in IPIV */

	if (kstep == 1) {
	    ipiv[k] = kp;
	} else {
	    ipiv[k] = -kp;
	    ipiv[k + 1] = -kp;
	}

/*        Increase K and return to the start of the main loop */

	k += kstep;
	goto L70;

L90:

/*        Update the lower triangle of A22 (= A(k:n,k:n)) as   

          A22 := A22 - L21*D*L21' = A22 - L21*W'   

          computing blocks of NB columns at a time */

	i__1 = *n;
	i__2 = *nb;
	for (j = k; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
/* Computing MIN */
	    i__3 = *nb, i__4 = *n - j + 1;
	    jb = min(i__3,i__4);

/*           Update the lower triangle of the diagonal block */

	    i__3 = j + jb - 1;
	    for (jj = j; jj <= i__3; ++jj) {
		i__4 = j + jb - jj;
		i__5 = k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		cgemv_("No transpose", &i__4, &i__5, &q__1, &a_ref(jj, 1), 
			lda, &w_ref(jj, 1), ldw, &c_b1, &a_ref(jj, jj), &c__1);
/* L100: */
	    }

/*           Update the rectangular subdiagonal block */

	    if (j + jb <= *n) {
		i__3 = *n - j - jb + 1;
		i__4 = k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		cgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &q__1, 
			&a_ref(j + jb, 1), lda, &w_ref(j, 1), ldw, &c_b1, &
			a_ref(j + jb, j), lda);
	    }
/* L110: */
	}

/*        Put L21 in standard form by partially undoing the interchanges   
          in columns 1:k-1 */

	j = k - 1;
L120:
	jj = j;
	jp = ipiv[j];
	if (jp < 0) {
	    jp = -jp;
	    --j;
	}
	--j;
	if (jp != jj && j >= 1) {
	    cswap_(&j, &a_ref(jp, 1), lda, &a_ref(jj, 1), lda);
	}
	if (j >= 1) {
	    goto L120;
	}

/*        Set KB to the number of columns factorized */

	*kb = k - 1;

    }
    return 0;

/*     End of CLASYF */

} /* clasyf_ */
Ejemplo n.º 15
0
extern "C" magma_int_t
magma_dgeqrf_gpu(
    magma_int_t m, magma_int_t n,
    magmaDouble_ptr dA, size_t dA_offset,  magma_int_t ldda,
    double *tau, magmaDouble_ptr dT, size_t dT_offset,
    magma_queue_t queue,
    magma_int_t *info)
{
/*  -- clMAGMA (version 1.3.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       @date November 2014

    Purpose
    =======
    DGEQRF computes a QR factorization of a real M-by-N matrix A:
    A = Q * R.
    
    This version stores the triangular dT matrices used in
    the block QR factorization so that they can be applied directly (i.e.,
    without being recomputed) later. As a result, the application
    of Q is much faster. Also, the upper triangular matrices for V have 0s
    in them. The corresponding parts of the upper triangular R are inverted
    and stored separately in dT.

    Arguments
    =========
    M       (input) INTEGER
            The number of rows of the matrix A.  M >= 0.

    N       (input) INTEGER
            The number of columns of the matrix A.  N >= 0.

    dA      (input/output) DOUBLE_PRECISION array on the GPU, dimension (LDDA,N)
            On entry, the M-by-N matrix A.
            On exit, the elements on and above the diagonal of the array
            contain the min(M,N)-by-N upper trapezoidal matrix R (R is
            upper triangular if m >= n); the elements below the diagonal,
            with the array TAU, represent the orthogonal matrix Q as a
            product of min(m,n) elementary reflectors (see Further
            Details).

    LDDA     (input) INTEGER
            The leading dimension of the array dA.  LDDA >= max(1,M).
            To benefit from coalescent memory accesses LDDA must be
            divisible by 16.

    TAU     (output) DOUBLE_PRECISION array, dimension (min(M,N))
            The scalar factors of the elementary reflectors (see Further
            Details).

    dT      (workspace/output)  DOUBLE_PRECISION array on the GPU,
            dimension (2*MIN(M, N) + (N+31)/32*32 )*NB,
            where NB can be obtained through magma_get_dgeqrf_nb(M).
            It starts with MIN(M,N)*NB block that store the triangular T
            matrices, followed by the MIN(M,N)*NB block of the diagonal
            inverses for the R matrix. The rest of the array is used as workspace.

    INFO    (output) INTEGER
            = 0:  successful exit
            < 0:  if INFO = -i, the i-th argument had an illegal value
                  or another error occured, such as memory allocation failed.

    Further Details
    ===============
    The matrix Q is represented as a product of elementary reflectors

       Q = H(1) H(2) . . . H(k), where k = min(m,n).

    Each H(i) has the form

       H(i) = I - tau * v * v'

    where tau is a real scalar, and v is a real vector with
    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
    and tau in TAU(i).
    =====================================================================    */

    #define a_ref(a_1,a_2) dA, (dA_offset + (a_1) + (a_2)*(ldda))
    #define t_ref(a_1)     dT, (dT_offset + (a_1)*nb)
    #define d_ref(a_1)     dT, (dT_offset + (minmn + (a_1))*nb)
    #define dd_ref(a_1)    dT, (dT_offset + (2*minmn+(a_1))*nb)
    #define work_ref(a_1)  ( work + (a_1))
    #define hwork          ( work + (nb)*(m))

    magma_int_t i, k, minmn, old_i, old_ib, rows, cols;
    magma_int_t ib, nb;
    magma_int_t ldwork, lddwork, lwork, lhwork;
    double *work, *ut;

    /* check arguments */
    *info = 0;
    if (m < 0) {
        *info = -1;
    } else if (n < 0) {
        *info = -2;
    } else if (ldda < max(1,m)) {
        *info = -4;
    }
    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    k = minmn = min(m,n);
    if (k == 0)
        return *info;

    nb = magma_get_dgeqrf_nb(m);

    lwork  = (m + n + nb)*nb;
    lhwork = lwork - m*nb;

    if (MAGMA_SUCCESS != magma_dmalloc_cpu( &work, lwork )) {
        *info = MAGMA_ERR_HOST_ALLOC;
        return *info;
    }
    
    ut = hwork+nb*(n);
    memset( ut, 0, nb*nb*sizeof(double));

    magma_event_t event[2] = {NULL, NULL};

    ldwork = m;
    lddwork= n;

    if ( (nb > 1) && (nb < k) ) {
        /* Use blocked code initially */
        old_i = 0; old_ib = nb;
        for (i = 0; i < k-nb; i += nb) {
            ib = min(k-i, nb);
            rows = m -i;
            magma_dgetmatrix_async( rows, ib,
                                    a_ref(i,i),  ldda,
                                    work_ref(i), ldwork, queue, &event[1] );
            if (i>0){
                /* Apply H' to A(i:m,i+2*ib:n) from the left */
                cols = n-old_i-2*old_ib;
                magma_dlarfb_gpu( MagmaLeft, MagmaConjTrans, MagmaForward, MagmaColumnwise,
                                  m-old_i, cols, old_ib,
                                  a_ref(old_i, old_i         ), ldda, t_ref(old_i), nb,
                                  a_ref(old_i, old_i+2*old_ib), ldda, dd_ref(0),    lddwork, queue);
                
                /* store the diagonal */
                magma_dsetmatrix_async( old_ib, old_ib,
                                        ut, old_ib,
                                        d_ref(old_i), old_ib, queue, &event[0] );
            }
            
            magma_event_sync(event[1]);
            lapackf77_dgeqrf(&rows, &ib, work_ref(i), &ldwork, tau+i, hwork, &lhwork, info);
            /* Form the triangular factor of the block reflector
               H = H(i) H(i+1) . . . H(i+ib-1) */
            lapackf77_dlarft( MagmaForwardStr, MagmaColumnwiseStr,
                              &rows, &ib,
                              work_ref(i), &ldwork, tau+i, hwork, &ib);

            /* Put 0s in the upper triangular part of a panel (and 1s on the
               diagonal); copy the upper triangular in ut and invert it. */
            magma_event_sync(event[0]);
            dsplit_diag_block(ib, work_ref(i), ldwork, ut);
            magma_dsetmatrix( rows, ib, work_ref(i), ldwork, a_ref(i,i), ldda, queue);
            
            if (i + ib < n) {
                /* Send the triangular factor T to the GPU */
                magma_dsetmatrix( ib, ib, hwork, ib, t_ref(i), nb, queue );

                if (i+nb < k-nb){
                    /* Apply H' to A(i:m,i+ib:i+2*ib) from the left */
                    magma_dlarfb_gpu( MagmaLeft, MagmaConjTrans, MagmaForward, MagmaColumnwise,
                                      rows, ib, ib,
                                      a_ref(i, i   ), ldda, t_ref(i),  nb,
                                      a_ref(i, i+ib), ldda, dd_ref(0), lddwork, queue);
                }
                else {
                    cols = n-i-ib;
                    magma_dlarfb_gpu( MagmaLeft, MagmaConjTrans, MagmaForward, MagmaColumnwise,
                                      rows, cols, ib,
                                      a_ref(i, i   ), ldda, t_ref(i),  nb,
                                      a_ref(i, i+ib), ldda, dd_ref(0), lddwork, queue);
                    /* Fix the diagonal block */
                    magma_dsetmatrix( ib, ib, ut, ib, d_ref(i), ib , queue);
                }
                old_i  = i;
                old_ib = ib;
            }
        }
    } else {
        i = 0;
    }

    /* Use unblocked code to factor the last or only block. */
    if (i < k) {
        ib   = n-i;
        rows = m-i;
        magma_dgetmatrix( rows, ib, a_ref(i, i), ldda, work, rows, queue );
        lhwork = lwork - rows*ib;
        lapackf77_dgeqrf(&rows, &ib, work, &rows, tau+i, work+ib*rows, &lhwork, info);
        
        magma_dsetmatrix( rows, ib, work, rows, a_ref(i, i), ldda, queue );
    }

    magma_free_cpu( work );
    return *info;
} /* magma_dgeqrf_gpu */
Ejemplo n.º 16
0
/* Subroutine */ int derred_(char *path, integer *nunit)
{
    /* Format strings */
    static char fmt_9999[] = "(1x,a6,\002 passed the tests of the error exit"
	    "s (\002,i3,\002 tests done)\002)";
    static char fmt_9998[] = "(\002 *** \002,a6,\002 failed the tests of the"
	    " error exits ***\002)";

    /* Builtin functions */
    integer s_wsle(cilist *), e_wsle(void);
    /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);
    integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void);

    /* Local variables */
    static integer info, sdim;
    static doublereal a[16]	/* was [4][4] */;
    static logical b[4];
    static integer i__, j;
    static doublereal s[4], u[16]	/* was [4][4] */, w[16];
    extern /* Subroutine */ int dgees_(char *, char *, L_fp, integer *, 
	    doublereal *, integer *, integer *, doublereal *, doublereal *, 
	    doublereal *, integer *, doublereal *, integer *, logical *, 
	    integer *), dgeev_(char *, char *, integer *, 
	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
	     integer *, doublereal *, integer *, doublereal *, integer *, 
	    integer *);
    static doublereal abnrm;
    static char c2[2];
    static doublereal r1[4], r2[4];
    extern /* Subroutine */ int dgesdd_(char *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
	    doublereal *, integer *, doublereal *, integer *, integer *, 
	    integer *);
    static integer iw[8];
    static doublereal wi[4];
    static integer nt;
    static doublereal vl[16]	/* was [4][4] */, vr[16]	/* was [4][4] 
	    */, wr[4], vt[16]	/* was [4][4] */;
    extern /* Subroutine */ int dgesvd_(char *, char *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
	    doublereal *, integer *, doublereal *, integer *, integer *);
    extern logical dslect_();
    extern /* Subroutine */ int dgeesx_(char *, char *, L_fp, char *, integer 
	    *, doublereal *, integer *, integer *, doublereal *, doublereal *,
	     doublereal *, integer *, doublereal *, doublereal *, doublereal *
	    , integer *, integer *, integer *, logical *, integer *);
    extern logical lsamen_(integer *, char *, char *);
    extern /* Subroutine */ int dgeevx_(char *, char *, char *, char *, 
	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
	    doublereal *, integer *, doublereal *, integer *, integer *, 
	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
	     doublereal *, integer *, integer *, integer *), chkxer_(char *, integer *, integer *, logical *, 
	    logical *);
    static integer ihi, ilo;

    /* Fortran I/O blocks */
    static cilist io___1 = { 0, 0, 0, 0, 0 };
    static cilist io___24 = { 0, 0, 0, fmt_9999, 0 };
    static cilist io___25 = { 0, 0, 0, fmt_9998, 0 };
    static cilist io___26 = { 0, 0, 0, fmt_9999, 0 };
    static cilist io___27 = { 0, 0, 0, fmt_9998, 0 };
    static cilist io___28 = { 0, 0, 0, fmt_9999, 0 };
    static cilist io___29 = { 0, 0, 0, fmt_9998, 0 };



#define a_ref(a_1,a_2) a[(a_2)*4 + a_1 - 5]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       December 22, 1999   


    Purpose   
    =======   

    DERRED tests the error exits for the eigenvalue driver routines for   
    DOUBLE PRECISION matrices:   

    PATH  driver   description   
    ----  ------   -----------   
    SEV   DGEEV    find eigenvalues/eigenvectors for nonsymmetric A   
    SES   DGEES    find eigenvalues/Schur form for nonsymmetric A   
    SVX   DGEEVX   SGEEV + balancing and condition estimation   
    SSX   DGEESX   SGEES + balancing and condition estimation   
    DBD   DGESVD   compute SVD of an M-by-N matrix A   
          DGESDD   compute SVD of an M-by-N matrix A (by divide and   
                   conquer)   

    Arguments   
    =========   

    PATH    (input) CHARACTER*3   
            The LAPACK path name for the routines to be tested.   

    NUNIT   (input) INTEGER   
            The unit number for output.   

    ===================================================================== */


    infoc_1.nout = *nunit;
    io___1.ciunit = infoc_1.nout;
    s_wsle(&io___1);
    e_wsle();
    s_copy(c2, path + 1, (ftnlen)2, (ftnlen)2);

/*     Initialize A */

    for (j = 1; j <= 4; ++j) {
	for (i__ = 1; i__ <= 4; ++i__) {
	    a_ref(i__, j) = 0.;
/* L10: */
	}
/* L20: */
    }
    for (i__ = 1; i__ <= 4; ++i__) {
	a_ref(i__, i__) = 1.;
/* L30: */
    }
    infoc_1.ok = TRUE_;
    nt = 0;

    if (lsamen_(&c__2, c2, "EV")) {

/*        Test DGEEV */

	s_copy(srnamc_1.srnamt, "DGEEV ", (ftnlen)6, (ftnlen)6);
	infoc_1.infot = 1;
	dgeev_("X", "N", &c__0, a, &c__1, wr, wi, vl, &c__1, vr, &c__1, w, &
		c__1, &info);
	chkxer_("DGEEV ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 2;
	dgeev_("N", "X", &c__0, a, &c__1, wr, wi, vl, &c__1, vr, &c__1, w, &
		c__1, &info);
	chkxer_("DGEEV ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 3;
	dgeev_("N", "N", &c_n1, a, &c__1, wr, wi, vl, &c__1, vr, &c__1, w, &
		c__1, &info);
	chkxer_("DGEEV ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 5;
	dgeev_("N", "N", &c__2, a, &c__1, wr, wi, vl, &c__1, vr, &c__1, w, &
		c__6, &info);
	chkxer_("DGEEV ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 9;
	dgeev_("V", "N", &c__2, a, &c__2, wr, wi, vl, &c__1, vr, &c__1, w, &
		c__8, &info);
	chkxer_("DGEEV ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 11;
	dgeev_("N", "V", &c__2, a, &c__2, wr, wi, vl, &c__1, vr, &c__1, w, &
		c__8, &info);
	chkxer_("DGEEV ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 13;
	dgeev_("V", "V", &c__1, a, &c__1, wr, wi, vl, &c__1, vr, &c__1, w, &
		c__3, &info);
	chkxer_("DGEEV ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	nt += 7;

    } else if (lsamen_(&c__2, c2, "ES")) {

/*        Test DGEES */

	s_copy(srnamc_1.srnamt, "DGEES ", (ftnlen)6, (ftnlen)6);
	infoc_1.infot = 1;
	dgees_("X", "N", (L_fp)dslect_, &c__0, a, &c__1, &sdim, wr, wi, vl, &
		c__1, w, &c__1, b, &info);
	chkxer_("DGEES ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 2;
	dgees_("N", "X", (L_fp)dslect_, &c__0, a, &c__1, &sdim, wr, wi, vl, &
		c__1, w, &c__1, b, &info);
	chkxer_("DGEES ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 4;
	dgees_("N", "S", (L_fp)dslect_, &c_n1, a, &c__1, &sdim, wr, wi, vl, &
		c__1, w, &c__1, b, &info);
	chkxer_("DGEES ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 6;
	dgees_("N", "S", (L_fp)dslect_, &c__2, a, &c__1, &sdim, wr, wi, vl, &
		c__1, w, &c__6, b, &info);
	chkxer_("DGEES ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 11;
	dgees_("V", "S", (L_fp)dslect_, &c__2, a, &c__2, &sdim, wr, wi, vl, &
		c__1, w, &c__6, b, &info);
	chkxer_("DGEES ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 13;
	dgees_("N", "S", (L_fp)dslect_, &c__1, a, &c__1, &sdim, wr, wi, vl, &
		c__1, w, &c__2, b, &info);
	chkxer_("DGEES ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	nt += 6;

    } else if (lsamen_(&c__2, c2, "VX")) {

/*        Test DGEEVX */

	s_copy(srnamc_1.srnamt, "DGEEVX", (ftnlen)6, (ftnlen)6);
	infoc_1.infot = 1;
	dgeevx_("X", "N", "N", "N", &c__0, a, &c__1, wr, wi, vl, &c__1, vr, &
		c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__1, iw, &info);
	chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 2;
	dgeevx_("N", "X", "N", "N", &c__0, a, &c__1, wr, wi, vl, &c__1, vr, &
		c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__1, iw, &info);
	chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 3;
	dgeevx_("N", "N", "X", "N", &c__0, a, &c__1, wr, wi, vl, &c__1, vr, &
		c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__1, iw, &info);
	chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 4;
	dgeevx_("N", "N", "N", "X", &c__0, a, &c__1, wr, wi, vl, &c__1, vr, &
		c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__1, iw, &info);
	chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 5;
	dgeevx_("N", "N", "N", "N", &c_n1, a, &c__1, wr, wi, vl, &c__1, vr, &
		c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__1, iw, &info);
	chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 7;
	dgeevx_("N", "N", "N", "N", &c__2, a, &c__1, wr, wi, vl, &c__1, vr, &
		c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__1, iw, &info);
	chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 11;
	dgeevx_("N", "V", "N", "N", &c__2, a, &c__2, wr, wi, vl, &c__1, vr, &
		c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__6, iw, &info);
	chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 13;
	dgeevx_("N", "N", "V", "N", &c__2, a, &c__2, wr, wi, vl, &c__1, vr, &
		c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__6, iw, &info);
	chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 21;
	dgeevx_("N", "N", "N", "N", &c__1, a, &c__1, wr, wi, vl, &c__1, vr, &
		c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__1, iw, &info);
	chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 21;
	dgeevx_("N", "V", "N", "N", &c__1, a, &c__1, wr, wi, vl, &c__1, vr, &
		c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__2, iw, &info);
	chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 21;
	dgeevx_("N", "N", "V", "V", &c__1, a, &c__1, wr, wi, vl, &c__1, vr, &
		c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__3, iw, &info);
	chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	nt += 11;

    } else if (lsamen_(&c__2, c2, "SX")) {

/*        Test DGEESX */

	s_copy(srnamc_1.srnamt, "DGEESX", (ftnlen)6, (ftnlen)6);
	infoc_1.infot = 1;
	dgeesx_("X", "N", (L_fp)dslect_, "N", &c__0, a, &c__1, &sdim, wr, wi, 
		vl, &c__1, r1, r2, w, &c__1, iw, &c__1, b, &info);
	chkxer_("DGEESX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 2;
	dgeesx_("N", "X", (L_fp)dslect_, "N", &c__0, a, &c__1, &sdim, wr, wi, 
		vl, &c__1, r1, r2, w, &c__1, iw, &c__1, b, &info);
	chkxer_("DGEESX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 4;
	dgeesx_("N", "N", (L_fp)dslect_, "X", &c__0, a, &c__1, &sdim, wr, wi, 
		vl, &c__1, r1, r2, w, &c__1, iw, &c__1, b, &info);
	chkxer_("DGEESX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 5;
	dgeesx_("N", "N", (L_fp)dslect_, "N", &c_n1, a, &c__1, &sdim, wr, wi, 
		vl, &c__1, r1, r2, w, &c__1, iw, &c__1, b, &info);
	chkxer_("DGEESX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 7;
	dgeesx_("N", "N", (L_fp)dslect_, "N", &c__2, a, &c__1, &sdim, wr, wi, 
		vl, &c__1, r1, r2, w, &c__6, iw, &c__1, b, &info);
	chkxer_("DGEESX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 12;
	dgeesx_("V", "N", (L_fp)dslect_, "N", &c__2, a, &c__2, &sdim, wr, wi, 
		vl, &c__1, r1, r2, w, &c__6, iw, &c__1, b, &info);
	chkxer_("DGEESX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 16;
	dgeesx_("N", "N", (L_fp)dslect_, "N", &c__1, a, &c__1, &sdim, wr, wi, 
		vl, &c__1, r1, r2, w, &c__2, iw, &c__1, b, &info);
	chkxer_("DGEESX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	nt += 7;

    } else if (lsamen_(&c__2, c2, "BD")) {

/*        Test DGESVD */

	s_copy(srnamc_1.srnamt, "DGESVD", (ftnlen)6, (ftnlen)6);
	infoc_1.infot = 1;
	dgesvd_("X", "N", &c__0, &c__0, a, &c__1, s, u, &c__1, vt, &c__1, w, &
		c__1, &info);
	chkxer_("DGESVD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 2;
	dgesvd_("N", "X", &c__0, &c__0, a, &c__1, s, u, &c__1, vt, &c__1, w, &
		c__1, &info);
	chkxer_("DGESVD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 2;
	dgesvd_("O", "O", &c__0, &c__0, a, &c__1, s, u, &c__1, vt, &c__1, w, &
		c__1, &info);
	chkxer_("DGESVD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 3;
	dgesvd_("N", "N", &c_n1, &c__0, a, &c__1, s, u, &c__1, vt, &c__1, w, &
		c__1, &info);
	chkxer_("DGESVD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 4;
	dgesvd_("N", "N", &c__0, &c_n1, a, &c__1, s, u, &c__1, vt, &c__1, w, &
		c__1, &info);
	chkxer_("DGESVD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 6;
	dgesvd_("N", "N", &c__2, &c__1, a, &c__1, s, u, &c__1, vt, &c__1, w, &
		c__5, &info);
	chkxer_("DGESVD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 9;
	dgesvd_("A", "N", &c__2, &c__1, a, &c__2, s, u, &c__1, vt, &c__1, w, &
		c__5, &info);
	chkxer_("DGESVD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 11;
	dgesvd_("N", "A", &c__1, &c__2, a, &c__1, s, u, &c__1, vt, &c__1, w, &
		c__5, &info);
	chkxer_("DGESVD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	nt += 8;
	if (infoc_1.ok) {
	    io___24.ciunit = infoc_1.nout;
	    s_wsfe(&io___24);
	    do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6);
	    do_fio(&c__1, (char *)&nt, (ftnlen)sizeof(integer));
	    e_wsfe();
	} else {
	    io___25.ciunit = infoc_1.nout;
	    s_wsfe(&io___25);
	    e_wsfe();
	}

/*        Test DGESDD */

	s_copy(srnamc_1.srnamt, "DGESDD", (ftnlen)6, (ftnlen)6);
	infoc_1.infot = 1;
	dgesdd_("X", &c__0, &c__0, a, &c__1, s, u, &c__1, vt, &c__1, w, &c__1,
		 iw, &info);
	chkxer_("DGESDD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 2;
	dgesdd_("N", &c_n1, &c__0, a, &c__1, s, u, &c__1, vt, &c__1, w, &c__1,
		 iw, &info);
	chkxer_("DGESDD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 3;
	dgesdd_("N", &c__0, &c_n1, a, &c__1, s, u, &c__1, vt, &c__1, w, &c__1,
		 iw, &info);
	chkxer_("DGESDD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 5;
	dgesdd_("N", &c__2, &c__1, a, &c__1, s, u, &c__1, vt, &c__1, w, &c__5,
		 iw, &info);
	chkxer_("DGESDD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 8;
	dgesdd_("A", &c__2, &c__1, a, &c__2, s, u, &c__1, vt, &c__1, w, &c__5,
		 iw, &info);
	chkxer_("DGESDD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	infoc_1.infot = 10;
	dgesdd_("A", &c__1, &c__2, a, &c__1, s, u, &c__1, vt, &c__1, w, &c__5,
		 iw, &info);
	chkxer_("DGESDD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, &
		infoc_1.ok);
	nt += -2;
	if (infoc_1.ok) {
	    io___26.ciunit = infoc_1.nout;
	    s_wsfe(&io___26);
	    do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6);
	    do_fio(&c__1, (char *)&nt, (ftnlen)sizeof(integer));
	    e_wsfe();
	} else {
	    io___27.ciunit = infoc_1.nout;
	    s_wsfe(&io___27);
	    e_wsfe();
	}
    }

/*     Print a summary line. */

    if (! lsamen_(&c__2, c2, "BD")) {
	if (infoc_1.ok) {
	    io___28.ciunit = infoc_1.nout;
	    s_wsfe(&io___28);
	    do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6);
	    do_fio(&c__1, (char *)&nt, (ftnlen)sizeof(integer));
	    e_wsfe();
	} else {
	    io___29.ciunit = infoc_1.nout;
	    s_wsfe(&io___29);
	    e_wsfe();
	}
    }

    return 0;

/*     End of DERRED */
} /* derred_ */
Ejemplo n.º 17
0
/* Subroutine */ int strt03_(char *uplo, char *trans, char *diag, integer *n, 
	integer *nrhs, real *a, integer *lda, real *scale, real *cnorm, real *
	tscal, real *x, integer *ldx, real *b, integer *ldb, real *work, real 
	*resid)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset, i__1;
    real r__1, r__2, r__3;

    /* Local variables */
    static integer j;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static real xscal;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *);
    static real tnorm, xnorm;
    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, 
	    real *, integer *), strmv_(char *, char *, char *, integer *, 
	    real *, integer *, real *, integer *), 
	    slabad_(real *, real *);
    static integer ix;
    extern doublereal slamch_(char *);
    static real bignum;
    extern integer isamax_(integer *, real *, integer *);
    static real smlnum, eps, err;


#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
#define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    STRT03 computes the residual for the solution to a scaled triangular   
    system of equations A*x = s*b  or  A'*x = s*b.   
    Here A is a triangular matrix, A' is the transpose of A, s is a   
    scalar, and x and b are N by NRHS matrices.  The test ratio is the   
    maximum over the number of right hand sides of   
       norm(s*b - op(A)*x) / ( norm(op(A)) * norm(x) * EPS ),   
    where op(A) denotes A or A' and EPS is the machine epsilon.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the matrix A is upper or lower triangular.   
            = 'U':  Upper triangular   
            = 'L':  Lower triangular   

    TRANS   (input) CHARACTER*1   
            Specifies the operation applied to A.   
            = 'N':  A *x = s*b  (No transpose)   
            = 'T':  A'*x = s*b  (Transpose)   
            = 'C':  A'*x = s*b  (Conjugate transpose = Transpose)   

    DIAG    (input) CHARACTER*1   
            Specifies whether or not the matrix A is unit triangular.   
            = 'N':  Non-unit triangular   
            = 'U':  Unit triangular   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    NRHS    (input) INTEGER   
            The number of right hand sides, i.e., the number of columns   
            of the matrices X and B.  NRHS >= 0.   

    A       (input) REAL array, dimension (LDA,N)   
            The triangular matrix A.  If UPLO = 'U', the leading n by n   
            upper triangular part of the array A contains the upper   
            triangular matrix, and the strictly lower triangular part of   
            A is not referenced.  If UPLO = 'L', the leading n by n lower   
            triangular part of the array A contains the lower triangular   
            matrix, and the strictly upper triangular part of A is not   
            referenced.  If DIAG = 'U', the diagonal elements of A are   
            also not referenced and are assumed to be 1.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    SCALE   (input) REAL   
            The scaling factor s used in solving the triangular system.   

    CNORM   (input) REAL array, dimension (N)   
            The 1-norms of the columns of A, not counting the diagonal.   

    TSCAL   (input) REAL   
            The scaling factor used in computing the 1-norms in CNORM.   
            CNORM actually contains the column norms of TSCAL*A.   

    X       (input) REAL array, dimension (LDX,NRHS)   
            The computed solution vectors for the system of linear   
            equations.   

    LDX     (input) INTEGER   
            The leading dimension of the array X.  LDX >= max(1,N).   

    B       (input) REAL array, dimension (LDB,NRHS)   
            The right hand side vectors for the system of linear   
            equations.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    WORK    (workspace) REAL array, dimension (N)   

    RESID   (output) REAL   
            The maximum over the number of right hand sides of   
            norm(op(A)*x - s*b) / ( norm(op(A)) * norm(x) * EPS ).   

    =====================================================================   


       Quick exit if N = 0   

       Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --cnorm;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --work;

    /* Function Body */
    if (*n <= 0 || *nrhs <= 0) {
	*resid = 0.f;
	return 0;
    }
    eps = slamch_("Epsilon");
    smlnum = slamch_("Safe minimum");
    bignum = 1.f / smlnum;
    slabad_(&smlnum, &bignum);

/*     Compute the norm of the triangular matrix A using the column   
       norms already computed by SLATRS. */

    tnorm = 0.f;
    if (lsame_(diag, "N")) {
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
/* Computing MAX */
	    r__2 = tnorm, r__3 = *tscal * (r__1 = a_ref(j, j), dabs(r__1)) + 
		    cnorm[j];
	    tnorm = dmax(r__2,r__3);
/* L10: */
	}
    } else {
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
/* Computing MAX */
	    r__1 = tnorm, r__2 = *tscal + cnorm[j];
	    tnorm = dmax(r__1,r__2);
/* L20: */
	}
    }

/*     Compute the maximum over the number of right hand sides of   
          norm(op(A)*x - s*b) / ( norm(op(A)) * norm(x) * EPS ). */

    *resid = 0.f;
    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
	scopy_(n, &x_ref(1, j), &c__1, &work[1], &c__1);
	ix = isamax_(n, &work[1], &c__1);
/* Computing MAX */
	r__2 = 1.f, r__3 = (r__1 = x_ref(ix, j), dabs(r__1));
	xnorm = dmax(r__2,r__3);
	xscal = 1.f / xnorm / (real) (*n);
	sscal_(n, &xscal, &work[1], &c__1);
	strmv_(uplo, trans, diag, n, &a[a_offset], lda, &work[1], &c__1);
	r__1 = -(*scale) * xscal;
	saxpy_(n, &r__1, &b_ref(1, j), &c__1, &work[1], &c__1);
	ix = isamax_(n, &work[1], &c__1);
	err = *tscal * (r__1 = work[ix], dabs(r__1));
	ix = isamax_(n, &x_ref(1, j), &c__1);
	xnorm = (r__1 = x_ref(ix, j), dabs(r__1));
	if (err * smlnum <= xnorm) {
	    if (xnorm > 0.f) {
		err /= xnorm;
	    }
	} else {
	    if (err > 0.f) {
		err = 1.f / eps;
	    }
	}
	if (err * smlnum <= tnorm) {
	    if (tnorm > 0.f) {
		err /= tnorm;
	    }
	} else {
	    if (err > 0.f) {
		err = 1.f / eps;
	    }
	}
	*resid = dmax(*resid,err);
/* L30: */
    }

    return 0;

/*     End of STRT03 */

} /* strt03_ */
Ejemplo n.º 18
0
/* Subroutine */ int sgegs_(char *jobvsl, char *jobvsr, integer *n, real *a, 
	integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real 
	*beta, real *vsl, integer *ldvsl, real *vsr, integer *ldvsr, real *
	work, integer *lwork, integer *info)
{
/*  -- LAPACK driver routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    This routine is deprecated and has been replaced by routine SGGES.   

    SGEGS computes for a pair of N-by-N real nonsymmetric matrices A, B:   
    the generalized eigenvalues (alphar +/- alphai*i, beta), the real   
    Schur form (A, B), and optionally left and/or right Schur vectors   
    (VSL and VSR).   

    (If only the generalized eigenvalues are needed, use the driver SGEGV   
    instead.)   

    A generalized eigenvalue for a pair of matrices (A,B) is, roughly   
    speaking, a scalar w or a ratio  alpha/beta = w, such that  A - w*B   
    is singular.  It is usually represented as the pair (alpha,beta),   
    as there is a reasonable interpretation for beta=0, and even for   
    both being zero.  A good beginning reference is the book, "Matrix   
    Computations", by G. Golub & C. van Loan (Johns Hopkins U. Press)   

    The (generalized) Schur form of a pair of matrices is the result of   
    multiplying both matrices on the left by one orthogonal matrix and   
    both on the right by another orthogonal matrix, these two orthogonal   
    matrices being chosen so as to bring the pair of matrices into   
    (real) Schur form.   

    A pair of matrices A, B is in generalized real Schur form if B is   
    upper triangular with non-negative diagonal and A is block upper   
    triangular with 1-by-1 and 2-by-2 blocks.  1-by-1 blocks correspond   
    to real generalized eigenvalues, while 2-by-2 blocks of A will be   
    "standardized" by making the corresponding elements of B have the   
    form:   
            [  a  0  ]   
            [  0  b  ]   

    and the pair of corresponding 2-by-2 blocks in A and B will   
    have a complex conjugate pair of generalized eigenvalues.   

    The left and right Schur vectors are the columns of VSL and VSR,   
    respectively, where VSL and VSR are the orthogonal matrices   
    which reduce A and B to Schur form:   

    Schur form of (A,B) = ( (VSL)**T A (VSR), (VSL)**T B (VSR) )   

    Arguments   
    =========   

    JOBVSL  (input) CHARACTER*1   
            = 'N':  do not compute the left Schur vectors;   
            = 'V':  compute the left Schur vectors.   

    JOBVSR  (input) CHARACTER*1   
            = 'N':  do not compute the right Schur vectors;   
            = 'V':  compute the right Schur vectors.   

    N       (input) INTEGER   
            The order of the matrices A, B, VSL, and VSR.  N >= 0.   

    A       (input/output) REAL array, dimension (LDA, N)   
            On entry, the first of the pair of matrices whose generalized   
            eigenvalues and (optionally) Schur vectors are to be   
            computed.   
            On exit, the generalized Schur form of A.   
            Note: to avoid overflow, the Frobenius norm of the matrix   
            A should be less than the overflow threshold.   

    LDA     (input) INTEGER   
            The leading dimension of A.  LDA >= max(1,N).   

    B       (input/output) REAL array, dimension (LDB, N)   
            On entry, the second of the pair of matrices whose   
            generalized eigenvalues and (optionally) Schur vectors are   
            to be computed.   
            On exit, the generalized Schur form of B.   
            Note: to avoid overflow, the Frobenius norm of the matrix   
            B should be less than the overflow threshold.   

    LDB     (input) INTEGER   
            The leading dimension of B.  LDB >= max(1,N).   

    ALPHAR  (output) REAL array, dimension (N)   
    ALPHAI  (output) REAL array, dimension (N)   
    BETA    (output) REAL array, dimension (N)   
            On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will   
            be the generalized eigenvalues.  ALPHAR(j) + ALPHAI(j)*i,   
            j=1,...,N  and  BETA(j),j=1,...,N  are the diagonals of the   
            complex Schur form (A,B) that would result if the 2-by-2   
            diagonal blocks of the real Schur form of (A,B) were further   
            reduced to triangular form using 2-by-2 complex unitary   
            transformations.  If ALPHAI(j) is zero, then the j-th   
            eigenvalue is real; if positive, then the j-th and (j+1)-st   
            eigenvalues are a complex conjugate pair, with ALPHAI(j+1)   
            negative.   

            Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j)   
            may easily over- or underflow, and BETA(j) may even be zero.   
            Thus, the user should avoid naively computing the ratio   
            alpha/beta.  However, ALPHAR and ALPHAI will be always less   
            than and usually comparable with norm(A) in magnitude, and   
            BETA always less than and usually comparable with norm(B).   

    VSL     (output) REAL array, dimension (LDVSL,N)   
            If JOBVSL = 'V', VSL will contain the left Schur vectors.   
            (See "Purpose", above.)   
            Not referenced if JOBVSL = 'N'.   

    LDVSL   (input) INTEGER   
            The leading dimension of the matrix VSL. LDVSL >=1, and   
            if JOBVSL = 'V', LDVSL >= N.   

    VSR     (output) REAL array, dimension (LDVSR,N)   
            If JOBVSR = 'V', VSR will contain the right Schur vectors.   
            (See "Purpose", above.)   
            Not referenced if JOBVSR = 'N'.   

    LDVSR   (input) INTEGER   
            The leading dimension of the matrix VSR. LDVSR >= 1, and   
            if JOBVSR = 'V', LDVSR >= N.   

    WORK    (workspace/output) REAL array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The dimension of the array WORK.  LWORK >= max(1,4*N).   
            For good performance, LWORK must generally be larger.   
            To compute the optimal value of LWORK, call ILAENV to get   
            blocksizes (for SGEQRF, SORMQR, and SORGQR.)  Then compute:   
            NB  -- MAX of the blocksizes for SGEQRF, SORMQR, and SORGQR   
            The optimal LWORK is  2*N + N*(NB+1).   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   
            = 1,...,N:   
                  The QZ iteration failed.  (A,B) are not in Schur   
                  form, but ALPHAR(j), ALPHAI(j), and BETA(j) should   
                  be correct for j=INFO+1,...,N.   
            > N:  errors that usually indicate LAPACK problems:   
                  =N+1: error return from SGGBAL   
                  =N+2: error return from SGEQRF   
                  =N+3: error return from SORMQR   
                  =N+4: error return from SORGQR   
                  =N+5: error return from SGGHRD   
                  =N+6: error return from SHGEQZ (other than failed   
                                                  iteration)   
                  =N+7: error return from SGGBAK (computing VSL)   
                  =N+8: error return from SGGBAK (computing VSR)   
                  =N+9: error return from SLASCL (various places)   

    =====================================================================   


       Decode the input arguments   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static real c_b36 = 0.f;
    static real c_b37 = 1.f;
    
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, 
	    vsr_dim1, vsr_offset, i__1, i__2;
    /* Local variables */
    static real anrm, bnrm;
    static integer itau, lopt;
    extern logical lsame_(char *, char *);
    static integer ileft, iinfo, icols;
    static logical ilvsl;
    static integer iwork;
    static logical ilvsr;
    static integer irows, nb;
    extern /* Subroutine */ int sggbak_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, integer *
	    ), sggbal_(char *, integer *, real *, integer *, 
	    real *, integer *, integer *, integer *, real *, real *, real *, 
	    integer *);
    static logical ilascl, ilbscl;
    extern doublereal slamch_(char *), slange_(char *, integer *, 
	    integer *, real *, integer *, real *);
    static real safmin;
    extern /* Subroutine */ int sgghrd_(char *, char *, integer *, integer *, 
	    integer *, real *, integer *, real *, integer *, real *, integer *
	    , real *, integer *, integer *), xerbla_(char *, 
	    integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    static real bignum;
    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
	    real *, integer *, integer *, real *, integer *, integer *);
    static integer ijobvl, iright;
    extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer 
	    *, real *, real *, integer *, integer *);
    static integer ijobvr;
    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
	    integer *, real *, integer *), slaset_(char *, integer *, 
	    integer *, real *, real *, real *, integer *);
    static real anrmto;
    static integer lwkmin, nb1, nb2, nb3;
    static real bnrmto;
    extern /* Subroutine */ int shgeqz_(char *, char *, char *, integer *, 
	    integer *, integer *, real *, integer *, real *, integer *, real *
	    , real *, real *, real *, integer *, real *, integer *, real *, 
	    integer *, integer *);
    static real smlnum;
    extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real 
	    *, integer *, real *, real *, integer *, integer *);
    static integer lwkopt;
    static logical lquery;
    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
	    integer *, real *, integer *, real *, real *, integer *, real *, 
	    integer *, integer *);
    static integer ihi, ilo;
    static real eps;
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
#define vsl_ref(a_1,a_2) vsl[(a_2)*vsl_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --alphar;
    --alphai;
    --beta;
    vsl_dim1 = *ldvsl;
    vsl_offset = 1 + vsl_dim1 * 1;
    vsl -= vsl_offset;
    vsr_dim1 = *ldvsr;
    vsr_offset = 1 + vsr_dim1 * 1;
    vsr -= vsr_offset;
    --work;

    /* Function Body */
    if (lsame_(jobvsl, "N")) {
	ijobvl = 1;
	ilvsl = FALSE_;
    } else if (lsame_(jobvsl, "V")) {
	ijobvl = 2;
	ilvsl = TRUE_;
    } else {
	ijobvl = -1;
	ilvsl = FALSE_;
    }

    if (lsame_(jobvsr, "N")) {
	ijobvr = 1;
	ilvsr = FALSE_;
    } else if (lsame_(jobvsr, "V")) {
	ijobvr = 2;
	ilvsr = TRUE_;
    } else {
	ijobvr = -1;
	ilvsr = FALSE_;
    }

/*     Test the input arguments   

   Computing MAX */
    i__1 = *n << 2;
    lwkmin = max(i__1,1);
    lwkopt = lwkmin;
    work[1] = (real) lwkopt;
    lquery = *lwork == -1;
    *info = 0;
    if (ijobvl <= 0) {
	*info = -1;
    } else if (ijobvr <= 0) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    } else if (*ldb < max(1,*n)) {
	*info = -7;
    } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) {
	*info = -12;
    } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) {
	*info = -14;
    } else if (*lwork < lwkmin && ! lquery) {
	*info = -16;
    }

    if (*info == 0) {
	nb1 = ilaenv_(&c__1, "SGEQRF", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
		ftnlen)1);
	nb2 = ilaenv_(&c__1, "SORMQR", " ", n, n, n, &c_n1, (ftnlen)6, (
		ftnlen)1);
	nb3 = ilaenv_(&c__1, "SORGQR", " ", n, n, n, &c_n1, (ftnlen)6, (
		ftnlen)1);
/* Computing MAX */
	i__1 = max(nb1,nb2);
	nb = max(i__1,nb3);
	lopt = (*n << 1) + *n * (nb + 1);
	work[1] = (real) lopt;
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SGEGS ", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Get machine constants */

    eps = slamch_("E") * slamch_("B");
    safmin = slamch_("S");
    smlnum = *n * safmin / eps;
    bignum = 1.f / smlnum;

/*     Scale A if max element outside range [SMLNUM,BIGNUM] */

    anrm = slange_("M", n, n, &a[a_offset], lda, &work[1]);
    ilascl = FALSE_;
    if (anrm > 0.f && anrm < smlnum) {
	anrmto = smlnum;
	ilascl = TRUE_;
    } else if (anrm > bignum) {
	anrmto = bignum;
	ilascl = TRUE_;
    }

    if (ilascl) {
	slascl_("G", &c_n1, &c_n1, &anrm, &anrmto, n, n, &a[a_offset], lda, &
		iinfo);
	if (iinfo != 0) {
	    *info = *n + 9;
	    return 0;
	}
    }

/*     Scale B if max element outside range [SMLNUM,BIGNUM] */

    bnrm = slange_("M", n, n, &b[b_offset], ldb, &work[1]);
    ilbscl = FALSE_;
    if (bnrm > 0.f && bnrm < smlnum) {
	bnrmto = smlnum;
	ilbscl = TRUE_;
    } else if (bnrm > bignum) {
	bnrmto = bignum;
	ilbscl = TRUE_;
    }

    if (ilbscl) {
	slascl_("G", &c_n1, &c_n1, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, &
		iinfo);
	if (iinfo != 0) {
	    *info = *n + 9;
	    return 0;
	}
    }

/*     Permute the matrix to make it more nearly triangular   
       Workspace layout:  (2*N words -- "work..." not actually used)   
          left_permutation, right_permutation, work... */

    ileft = 1;
    iright = *n + 1;
    iwork = iright + *n;
    sggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[
	    ileft], &work[iright], &work[iwork], &iinfo);
    if (iinfo != 0) {
	*info = *n + 1;
	goto L10;
    }

/*     Reduce B to triangular form, and initialize VSL and/or VSR   
       Workspace layout:  ("work..." must have at least N words)   
          left_permutation, right_permutation, tau, work... */

    irows = ihi + 1 - ilo;
    icols = *n + 1 - ilo;
    itau = iwork;
    iwork = itau + irows;
    i__1 = *lwork + 1 - iwork;
    sgeqrf_(&irows, &icols, &b_ref(ilo, ilo), ldb, &work[itau], &work[iwork], 
	    &i__1, &iinfo);
    if (iinfo >= 0) {
/* Computing MAX */
	i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1;
	lwkopt = max(i__1,i__2);
    }
    if (iinfo != 0) {
	*info = *n + 2;
	goto L10;
    }

    i__1 = *lwork + 1 - iwork;
    sormqr_("L", "T", &irows, &icols, &irows, &b_ref(ilo, ilo), ldb, &work[
	    itau], &a_ref(ilo, ilo), lda, &work[iwork], &i__1, &iinfo);
    if (iinfo >= 0) {
/* Computing MAX */
	i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1;
	lwkopt = max(i__1,i__2);
    }
    if (iinfo != 0) {
	*info = *n + 3;
	goto L10;
    }

    if (ilvsl) {
	slaset_("Full", n, n, &c_b36, &c_b37, &vsl[vsl_offset], ldvsl);
	i__1 = irows - 1;
	i__2 = irows - 1;
	slacpy_("L", &i__1, &i__2, &b_ref(ilo + 1, ilo), ldb, &vsl_ref(ilo + 
		1, ilo), ldvsl);
	i__1 = *lwork + 1 - iwork;
	sorgqr_(&irows, &irows, &irows, &vsl_ref(ilo, ilo), ldvsl, &work[itau]
		, &work[iwork], &i__1, &iinfo);
	if (iinfo >= 0) {
/* Computing MAX */
	    i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1;
	    lwkopt = max(i__1,i__2);
	}
	if (iinfo != 0) {
	    *info = *n + 4;
	    goto L10;
	}
    }

    if (ilvsr) {
	slaset_("Full", n, n, &c_b36, &c_b37, &vsr[vsr_offset], ldvsr);
    }

/*     Reduce to generalized Hessenberg form */

    sgghrd_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], 
	    ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &iinfo);
    if (iinfo != 0) {
	*info = *n + 5;
	goto L10;
    }

/*     Perform QZ algorithm, computing Schur vectors if desired   
       Workspace layout:  ("work..." must have at least 1 word)   
          left_permutation, right_permutation, work... */

    iwork = itau;
    i__1 = *lwork + 1 - iwork;
    shgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[
	    b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[vsl_offset]
	    , ldvsl, &vsr[vsr_offset], ldvsr, &work[iwork], &i__1, &iinfo);
    if (iinfo >= 0) {
/* Computing MAX */
	i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1;
	lwkopt = max(i__1,i__2);
    }
    if (iinfo != 0) {
	if (iinfo > 0 && iinfo <= *n) {
	    *info = iinfo;
	} else if (iinfo > *n && iinfo <= *n << 1) {
	    *info = iinfo - *n;
	} else {
	    *info = *n + 6;
	}
	goto L10;
    }

/*     Apply permutation to VSL and VSR */

    if (ilvsl) {
	sggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsl[
		vsl_offset], ldvsl, &iinfo);
	if (iinfo != 0) {
	    *info = *n + 7;
	    goto L10;
	}
    }
    if (ilvsr) {
	sggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsr[
		vsr_offset], ldvsr, &iinfo);
	if (iinfo != 0) {
	    *info = *n + 8;
	    goto L10;
	}
    }

/*     Undo scaling */

    if (ilascl) {
	slascl_("H", &c_n1, &c_n1, &anrmto, &anrm, n, n, &a[a_offset], lda, &
		iinfo);
	if (iinfo != 0) {
	    *info = *n + 9;
	    return 0;
	}
	slascl_("G", &c_n1, &c_n1, &anrmto, &anrm, n, &c__1, &alphar[1], n, &
		iinfo);
	if (iinfo != 0) {
	    *info = *n + 9;
	    return 0;
	}
	slascl_("G", &c_n1, &c_n1, &anrmto, &anrm, n, &c__1, &alphai[1], n, &
		iinfo);
	if (iinfo != 0) {
	    *info = *n + 9;
	    return 0;
	}
    }

    if (ilbscl) {
	slascl_("U", &c_n1, &c_n1, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, &
		iinfo);
	if (iinfo != 0) {
	    *info = *n + 9;
	    return 0;
	}
	slascl_("G", &c_n1, &c_n1, &bnrmto, &bnrm, n, &c__1, &beta[1], n, &
		iinfo);
	if (iinfo != 0) {
	    *info = *n + 9;
	    return 0;
	}
    }

L10:
    work[1] = (real) lwkopt;

    return 0;

/*     End of SGEGS */

} /* sgegs_ */
Ejemplo n.º 19
0
/* Subroutine */ int zgglse_(integer *m, integer *n, integer *p, 
	doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, 
	doublecomplex *c__, doublecomplex *d__, doublecomplex *x, 
	doublecomplex *work, integer *lwork, integer *info)
{
/*  -- LAPACK driver routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    ZGGLSE solves the linear equality-constrained least squares (LSE)   
    problem:   

            minimize || c - A*x ||_2   subject to   B*x = d   

    where A is an M-by-N matrix, B is a P-by-N matrix, c is a given   
    M-vector, and d is a given P-vector. It is assumed that   
    P <= N <= M+P, and   

             rank(B) = P and  rank( ( A ) ) = N.   
                                  ( ( B ) )   

    These conditions ensure that the LSE problem has a unique solution,   
    which is obtained using a GRQ factorization of the matrices B and A.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrices A and B. N >= 0.   

    P       (input) INTEGER   
            The number of rows of the matrix B. 0 <= P <= N <= M+P.   

    A       (input/output) COMPLEX*16 array, dimension (LDA,N)   
            On entry, the M-by-N matrix A.   
            On exit, A is destroyed.   

    LDA     (input) INTEGER   
            The leading dimension of the array A. LDA >= max(1,M).   

    B       (input/output) COMPLEX*16 array, dimension (LDB,N)   
            On entry, the P-by-N matrix B.   
            On exit, B is destroyed.   

    LDB     (input) INTEGER   
            The leading dimension of the array B. LDB >= max(1,P).   

    C       (input/output) COMPLEX*16 array, dimension (M)   
            On entry, C contains the right hand side vector for the   
            least squares part of the LSE problem.   
            On exit, the residual sum of squares for the solution   
            is given by the sum of squares of elements N-P+1 to M of   
            vector C.   

    D       (input/output) COMPLEX*16 array, dimension (P)   
            On entry, D contains the right hand side vector for the   
            constrained equation.   
            On exit, D is destroyed.   

    X       (output) COMPLEX*16 array, dimension (N)   
            On exit, X is the solution of the LSE problem.   

    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The dimension of the array WORK. LWORK >= max(1,M+N+P).   
            For optimum performance LWORK >= P+min(M,N)+max(M,N)*NB,   
            where NB is an upper bound for the optimal blocksizes for   
            ZGEQRF, CGERQF, ZUNMQR and CUNMRQ.   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    INFO    (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   

    =====================================================================   


       Test the input parameters   

       Parameter adjustments */
    /* Table of constant values */
    static doublecomplex c_b1 = {1.,0.};
    static integer c__1 = 1;
    static integer c_n1 = -1;
    
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
    doublecomplex z__1;
    /* Local variables */
    static integer lopt;
    extern /* Subroutine */ int zgemv_(char *, integer *, integer *, 
	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
	    integer *, doublecomplex *, doublecomplex *, integer *), 
	    zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, 
	    integer *), zaxpy_(integer *, doublecomplex *, doublecomplex *, 
	    integer *, doublecomplex *, integer *), ztrmv_(char *, char *, 
	    char *, integer *, doublecomplex *, integer *, doublecomplex *, 
	    integer *), ztrsv_(char *, char *, char *,
	     integer *, doublecomplex *, integer *, doublecomplex *, integer *
	    );
    static integer nb, mn, nr;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int zggrqf_(integer *, integer *, integer *, 
	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
	    integer *, doublecomplex *, doublecomplex *, integer *, integer *)
	    ;
    static integer nb1, nb2, nb3, nb4, lwkopt;
    static logical lquery;
    extern /* Subroutine */ int zunmqr_(char *, char *, integer *, integer *, 
	    integer *, doublecomplex *, integer *, doublecomplex *, 
	    doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmrq_(char *, char *, integer *, integer *, 
	    integer *, doublecomplex *, integer *, doublecomplex *, 
	    doublecomplex *, integer *, doublecomplex *, integer *, integer *);
#define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1
#define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)]
#define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1
#define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --c__;
    --d__;
    --x;
    --work;

    /* Function Body */
    *info = 0;
    mn = min(*m,*n);
    nb1 = ilaenv_(&c__1, "ZGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)1);
    nb2 = ilaenv_(&c__1, "ZGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)1);
    nb3 = ilaenv_(&c__1, "ZUNMQR", " ", m, n, p, &c_n1, (ftnlen)6, (ftnlen)1);
    nb4 = ilaenv_(&c__1, "ZUNMRQ", " ", m, n, p, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
    i__1 = max(nb1,nb2), i__1 = max(i__1,nb3);
    nb = max(i__1,nb4);
    lwkopt = *p + mn + max(*m,*n) * nb;
    work[1].r = (doublereal) lwkopt, work[1].i = 0.;
    lquery = *lwork == -1;
    if (*m < 0) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*p < 0 || *p > *n || *p < *n - *m) {
	*info = -3;
    } else if (*lda < max(1,*m)) {
	*info = -5;
    } else if (*ldb < max(1,*p)) {
	*info = -7;
    } else /* if(complicated condition) */ {
/* Computing MAX */
	i__1 = 1, i__2 = *m + *n + *p;
	if (*lwork < max(i__1,i__2) && ! lquery) {
	    *info = -12;
	}
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("ZGGLSE", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Compute the GRQ factorization of matrices B and A:   

              B*Q' = (  0  T12 ) P   Z'*A*Q' = ( R11 R12 ) N-P   
                       N-P  P                  (  0  R22 ) M+P-N   
                                                 N-P  P   

       where T12 and R11 are upper triangular, and Q and Z are   
       unitary. */

    i__1 = *lwork - *p - mn;
    zggrqf_(p, m, n, &b[b_offset], ldb, &work[1], &a[a_offset], lda, &work[*p 
	    + 1], &work[*p + mn + 1], &i__1, info);
    i__1 = *p + mn + 1;
    lopt = (integer) work[i__1].r;

/*     Update c = Z'*c = ( c1 ) N-P   
                         ( c2 ) M+P-N */

    i__1 = max(1,*m);
    i__2 = *lwork - *p - mn;
    zunmqr_("Left", "Conjugate Transpose", m, &c__1, &mn, &a[a_offset], lda, &
	    work[*p + 1], &c__[1], &i__1, &work[*p + mn + 1], &i__2, info);
/* Computing MAX */
    i__3 = *p + mn + 1;
    i__1 = lopt, i__2 = (integer) work[i__3].r;
    lopt = max(i__1,i__2);

/*     Solve T12*x2 = d for x2 */

    ztrsv_("Upper", "No transpose", "Non unit", p, &b_ref(1, *n - *p + 1), 
	    ldb, &d__[1], &c__1);

/*     Update c1 */

    i__1 = *n - *p;
    z__1.r = -1., z__1.i = 0.;
    zgemv_("No transpose", &i__1, p, &z__1, &a_ref(1, *n - *p + 1), lda, &d__[
	    1], &c__1, &c_b1, &c__[1], &c__1);

/*     Sovle R11*x1 = c1 for x1 */

    i__1 = *n - *p;
    ztrsv_("Upper", "No transpose", "Non unit", &i__1, &a[a_offset], lda, &
	    c__[1], &c__1);

/*     Put the solutions in X */

    i__1 = *n - *p;
    zcopy_(&i__1, &c__[1], &c__1, &x[1], &c__1);
    zcopy_(p, &d__[1], &c__1, &x[*n - *p + 1], &c__1);

/*     Compute the residual vector: */

    if (*m < *n) {
	nr = *m + *p - *n;
	i__1 = *n - *m;
	z__1.r = -1., z__1.i = 0.;
	zgemv_("No transpose", &nr, &i__1, &z__1, &a_ref(*n - *p + 1, *m + 1),
		 lda, &d__[nr + 1], &c__1, &c_b1, &c__[*n - *p + 1], &c__1);
    } else {
	nr = *p;
    }
    ztrmv_("Upper", "No transpose", "Non unit", &nr, &a_ref(*n - *p + 1, *n - 
	    *p + 1), lda, &d__[1], &c__1);
    z__1.r = -1., z__1.i = 0.;
    zaxpy_(&nr, &z__1, &d__[1], &c__1, &c__[*n - *p + 1], &c__1);

/*     Backward transformation x = Q'*x */

    i__1 = *lwork - *p - mn;
    zunmrq_("Left", "Conjugate Transpose", n, &c__1, p, &b[b_offset], ldb, &
	    work[1], &x[1], n, &work[*p + mn + 1], &i__1, info);
/* Computing MAX */
    i__4 = *p + mn + 1;
    i__2 = lopt, i__3 = (integer) work[i__4].r;
    i__1 = *p + mn + max(i__2,i__3);
    work[1].r = (doublereal) i__1, work[1].i = 0.;

    return 0;

/*     End of ZGGLSE */

} /* zgglse_ */
Ejemplo n.º 20
0
/* Subroutine */ int cggev_(char *jobvl, char *jobvr, integer *n, complex *a, 
	integer *lda, complex *b, integer *ldb, complex *alpha, complex *beta,
	 complex *vl, integer *ldvl, complex *vr, integer *ldvr, complex *
	work, integer *lwork, real *rwork, integer *info)
{
/*  -- LAPACK driver routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    CGGEV computes for a pair of N-by-N complex nonsymmetric matrices   
    (A,B), the generalized eigenvalues, and optionally, the left and/or   
    right generalized eigenvectors.   

    A generalized eigenvalue for a pair of matrices (A,B) is a scalar   
    lambda or a ratio alpha/beta = lambda, such that A - lambda*B is   
    singular. It is usually represented as the pair (alpha,beta), as   
    there is a reasonable interpretation for beta=0, and even for both   
    being zero.   

    The right generalized eigenvector v(j) corresponding to the   
    generalized eigenvalue lambda(j) of (A,B) satisfies   

                 A * v(j) = lambda(j) * B * v(j).   

    The left generalized eigenvector u(j) corresponding to the   
    generalized eigenvalues lambda(j) of (A,B) satisfies   

                 u(j)**H * A = lambda(j) * u(j)**H * B   

    where u(j)**H is the conjugate-transpose of u(j).   

    Arguments   
    =========   

    JOBVL   (input) CHARACTER*1   
            = 'N':  do not compute the left generalized eigenvectors;   
            = 'V':  compute the left generalized eigenvectors.   

    JOBVR   (input) CHARACTER*1   
            = 'N':  do not compute the right generalized eigenvectors;   
            = 'V':  compute the right generalized eigenvectors.   

    N       (input) INTEGER   
            The order of the matrices A, B, VL, and VR.  N >= 0.   

    A       (input/output) COMPLEX array, dimension (LDA, N)   
            On entry, the matrix A in the pair (A,B).   
            On exit, A has been overwritten.   

    LDA     (input) INTEGER   
            The leading dimension of A.  LDA >= max(1,N).   

    B       (input/output) COMPLEX array, dimension (LDB, N)   
            On entry, the matrix B in the pair (A,B).   
            On exit, B has been overwritten.   

    LDB     (input) INTEGER   
            The leading dimension of B.  LDB >= max(1,N).   

    ALPHA   (output) COMPLEX array, dimension (N)   
    BETA    (output) COMPLEX array, dimension (N)   
            On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the   
            generalized eigenvalues.   

            Note: the quotients ALPHA(j)/BETA(j) may easily over- or   
            underflow, and BETA(j) may even be zero.  Thus, the user   
            should avoid naively computing the ratio alpha/beta.   
            However, ALPHA will be always less than and usually   
            comparable with norm(A) in magnitude, and BETA always less   
            than and usually comparable with norm(B).   

    VL      (output) COMPLEX array, dimension (LDVL,N)   
            If JOBVL = 'V', the left generalized eigenvectors u(j) are   
            stored one after another in the columns of VL, in the same   
            order as their eigenvalues.   
            Each eigenvector will be scaled so the largest component   
            will have abs(real part) + abs(imag. part) = 1.   
            Not referenced if JOBVL = 'N'.   

    LDVL    (input) INTEGER   
            The leading dimension of the matrix VL. LDVL >= 1, and   
            if JOBVL = 'V', LDVL >= N.   

    VR      (output) COMPLEX array, dimension (LDVR,N)   
            If JOBVR = 'V', the right generalized eigenvectors v(j) are   
            stored one after another in the columns of VR, in the same   
            order as their eigenvalues.   
            Each eigenvector will be scaled so the largest component   
            will have abs(real part) + abs(imag. part) = 1.   
            Not referenced if JOBVR = 'N'.   

    LDVR    (input) INTEGER   
            The leading dimension of the matrix VR. LDVR >= 1, and   
            if JOBVR = 'V', LDVR >= N.   

    WORK    (workspace/output) COMPLEX array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The dimension of the array WORK.  LWORK >= max(1,2*N).   
            For good performance, LWORK must generally be larger.   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    RWORK   (workspace/output) REAL array, dimension (8*N)   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   
            =1,...,N:   
                  The QZ iteration failed.  No eigenvectors have been   
                  calculated, but ALPHA(j) and BETA(j) should be   
                  correct for j=INFO+1,...,N.   
            > N:  =N+1: other then QZ iteration failed in SHGEQZ,   
                  =N+2: error return from STGEVC.   

    =====================================================================   


       Decode the input arguments   

       Parameter adjustments */
    /* Table of constant values */
    static complex c_b1 = {0.f,0.f};
    static complex c_b2 = {1.f,0.f};
    static integer c__1 = 1;
    static integer c__0 = 0;
    
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, 
	    vr_offset, i__1, i__2, i__3, i__4;
    real r__1, r__2, r__3, r__4;
    complex q__1;
    /* Builtin functions */
    double sqrt(doublereal), r_imag(complex *);
    /* Local variables */
    static real anrm, bnrm;
    static integer ierr, itau;
    static real temp;
    static logical ilvl, ilvr;
    static integer iwrk;
    extern logical lsame_(char *, char *);
    static integer ileft, icols, irwrk, irows, jc;
    extern /* Subroutine */ int cggbak_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, complex *, integer *, 
	    integer *), cggbal_(char *, integer *, complex *, 
	    integer *, complex *, integer *, integer *, integer *, real *, 
	    real *, real *, integer *), slabad_(real *, real *);
    static integer in;
    extern doublereal clange_(char *, integer *, integer *, complex *, 
	    integer *, real *);
    static integer jr;
    extern /* Subroutine */ int cgghrd_(char *, char *, integer *, integer *, 
	    integer *, complex *, integer *, complex *, integer *, complex *, 
	    integer *, complex *, integer *, integer *), 
	    clascl_(char *, integer *, integer *, real *, real *, integer *, 
	    integer *, complex *, integer *, integer *);
    static logical ilascl, ilbscl;
    extern /* Subroutine */ int cgeqrf_(integer *, integer *, complex *, 
	    integer *, complex *, complex *, integer *, integer *), clacpy_(
	    char *, integer *, integer *, complex *, integer *, complex *, 
	    integer *), claset_(char *, integer *, integer *, complex 
	    *, complex *, complex *, integer *), ctgevc_(char *, char 
	    *, logical *, integer *, complex *, integer *, complex *, integer 
	    *, complex *, integer *, complex *, integer *, integer *, integer 
	    *, complex *, real *, integer *), xerbla_(char *, 
	    integer *);
    static logical ldumma[1];
    static char chtemp[1];
    static real bignum;
    extern /* Subroutine */ int chgeqz_(char *, char *, char *, integer *, 
	    integer *, integer *, complex *, integer *, complex *, integer *, 
	    complex *, complex *, complex *, integer *, complex *, integer *, 
	    complex *, integer *, real *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    extern doublereal slamch_(char *);
    static integer ijobvl, iright, ijobvr;
    extern /* Subroutine */ int cungqr_(integer *, integer *, integer *, 
	    complex *, integer *, complex *, complex *, integer *, integer *);
    static real anrmto;
    static integer lwkmin;
    static real bnrmto;
    extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, 
	    integer *, complex *, integer *, complex *, complex *, integer *, 
	    complex *, integer *, integer *);
    static real smlnum;
    static integer lwkopt;
    static logical lquery;
    static integer ihi, ilo;
    static real eps;
    static logical ilv;
#define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1
#define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)]
#define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1
#define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)]
#define vl_subscr(a_1,a_2) (a_2)*vl_dim1 + a_1
#define vl_ref(a_1,a_2) vl[vl_subscr(a_1,a_2)]
#define vr_subscr(a_1,a_2) (a_2)*vr_dim1 + a_1
#define vr_ref(a_1,a_2) vr[vr_subscr(a_1,a_2)]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --alpha;
    --beta;
    vl_dim1 = *ldvl;
    vl_offset = 1 + vl_dim1 * 1;
    vl -= vl_offset;
    vr_dim1 = *ldvr;
    vr_offset = 1 + vr_dim1 * 1;
    vr -= vr_offset;
    --work;
    --rwork;

    /* Function Body */
    if (lsame_(jobvl, "N")) {
	ijobvl = 1;
	ilvl = FALSE_;
    } else if (lsame_(jobvl, "V")) {
	ijobvl = 2;
	ilvl = TRUE_;
    } else {
	ijobvl = -1;
	ilvl = FALSE_;
    }

    if (lsame_(jobvr, "N")) {
	ijobvr = 1;
	ilvr = FALSE_;
    } else if (lsame_(jobvr, "V")) {
	ijobvr = 2;
	ilvr = TRUE_;
    } else {
	ijobvr = -1;
	ilvr = FALSE_;
    }
    ilv = ilvl || ilvr;

/*     Test the input arguments */

    *info = 0;
    lquery = *lwork == -1;
    if (ijobvl <= 0) {
	*info = -1;
    } else if (ijobvr <= 0) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    } else if (*ldb < max(1,*n)) {
	*info = -7;
    } else if (*ldvl < 1 || ilvl && *ldvl < *n) {
	*info = -11;
    } else if (*ldvr < 1 || ilvr && *ldvr < *n) {
	*info = -13;
    }

/*     Compute workspace   
        (Note: Comments in the code beginning "Workspace:" describe the   
         minimal amount of workspace needed at that point in the code,   
         as well as the preferred amount for good performance.   
         NB refers to the optimal block size for the immediately   
         following subroutine, as returned by ILAENV. The workspace is   
         computed assuming ILO = 1 and IHI = N, the worst case.) */

    lwkmin = 1;
    if (*info == 0 && (*lwork >= 1 || lquery)) {
	lwkopt = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", n, &c__1, n, &c__0, (
		ftnlen)6, (ftnlen)1);
/* Computing MAX */
	i__1 = 1, i__2 = *n << 1;
	lwkmin = max(i__1,i__2);
	work[1].r = (real) lwkopt, work[1].i = 0.f;
    }

    if (*lwork < lwkmin && ! lquery) {
	*info = -15;
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CGGEV ", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    work[1].r = (real) lwkopt, work[1].i = 0.f;
    if (*n == 0) {
	return 0;
    }

/*     Get machine constants */

    eps = slamch_("E") * slamch_("B");
    smlnum = slamch_("S");
    bignum = 1.f / smlnum;
    slabad_(&smlnum, &bignum);
    smlnum = sqrt(smlnum) / eps;
    bignum = 1.f / smlnum;

/*     Scale A if max element outside range [SMLNUM,BIGNUM] */

    anrm = clange_("M", n, n, &a[a_offset], lda, &rwork[1]);
    ilascl = FALSE_;
    if (anrm > 0.f && anrm < smlnum) {
	anrmto = smlnum;
	ilascl = TRUE_;
    } else if (anrm > bignum) {
	anrmto = bignum;
	ilascl = TRUE_;
    }
    if (ilascl) {
	clascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, &
		ierr);
    }

/*     Scale B if max element outside range [SMLNUM,BIGNUM] */

    bnrm = clange_("M", n, n, &b[b_offset], ldb, &rwork[1]);
    ilbscl = FALSE_;
    if (bnrm > 0.f && bnrm < smlnum) {
	bnrmto = smlnum;
	ilbscl = TRUE_;
    } else if (bnrm > bignum) {
	bnrmto = bignum;
	ilbscl = TRUE_;
    }
    if (ilbscl) {
	clascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, &
		ierr);
    }

/*     Permute the matrices A, B to isolate eigenvalues if possible   
       (Real Workspace: need 6*N) */

    ileft = 1;
    iright = *n + 1;
    irwrk = iright + *n;
    cggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &rwork[
	    ileft], &rwork[iright], &rwork[irwrk], &ierr);

/*     Reduce B to triangular form (QR decomposition of B)   
       (Complex Workspace: need N, prefer N*NB) */

    irows = ihi + 1 - ilo;
    if (ilv) {
	icols = *n + 1 - ilo;
    } else {
	icols = irows;
    }
    itau = 1;
    iwrk = itau + irows;
    i__1 = *lwork + 1 - iwrk;
    cgeqrf_(&irows, &icols, &b_ref(ilo, ilo), ldb, &work[itau], &work[iwrk], &
	    i__1, &ierr);

/*     Apply the orthogonal transformation to matrix A   
       (Complex Workspace: need N, prefer N*NB) */

    i__1 = *lwork + 1 - iwrk;
    cunmqr_("L", "C", &irows, &icols, &irows, &b_ref(ilo, ilo), ldb, &work[
	    itau], &a_ref(ilo, ilo), lda, &work[iwrk], &i__1, &ierr);

/*     Initialize VL   
       (Complex Workspace: need N, prefer N*NB) */

    if (ilvl) {
	claset_("Full", n, n, &c_b1, &c_b2, &vl[vl_offset], ldvl);
	i__1 = irows - 1;
	i__2 = irows - 1;
	clacpy_("L", &i__1, &i__2, &b_ref(ilo + 1, ilo), ldb, &vl_ref(ilo + 1,
		 ilo), ldvl);
	i__1 = *lwork + 1 - iwrk;
	cungqr_(&irows, &irows, &irows, &vl_ref(ilo, ilo), ldvl, &work[itau], 
		&work[iwrk], &i__1, &ierr);
    }

/*     Initialize VR */

    if (ilvr) {
	claset_("Full", n, n, &c_b1, &c_b2, &vr[vr_offset], ldvr);
    }

/*     Reduce to generalized Hessenberg form */

    if (ilv) {

/*        Eigenvectors requested -- work on whole matrix. */

	cgghrd_(jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], 
		ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &ierr);
    } else {
	cgghrd_("N", "N", &irows, &c__1, &irows, &a_ref(ilo, ilo), lda, &
		b_ref(ilo, ilo), ldb, &vl[vl_offset], ldvl, &vr[vr_offset], 
		ldvr, &ierr);
    }

/*     Perform QZ algorithm (Compute eigenvalues, and optionally, the   
       Schur form and Schur vectors)   
       (Complex Workspace: need N)   
       (Real Workspace: need N) */

    iwrk = itau;
    if (ilv) {
	*(unsigned char *)chtemp = 'S';
    } else {
	*(unsigned char *)chtemp = 'E';
    }
    i__1 = *lwork + 1 - iwrk;
    chgeqz_(chtemp, jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[
	    b_offset], ldb, &alpha[1], &beta[1], &vl[vl_offset], ldvl, &vr[
	    vr_offset], ldvr, &work[iwrk], &i__1, &rwork[irwrk], &ierr);
    if (ierr != 0) {
	if (ierr > 0 && ierr <= *n) {
	    *info = ierr;
	} else if (ierr > *n && ierr <= *n << 1) {
	    *info = ierr - *n;
	} else {
	    *info = *n + 1;
	}
	goto L70;
    }

/*     Compute Eigenvectors   
       (Real Workspace: need 2*N)   
       (Complex Workspace: need 2*N) */

    if (ilv) {
	if (ilvl) {
	    if (ilvr) {
		*(unsigned char *)chtemp = 'B';
	    } else {
		*(unsigned char *)chtemp = 'L';
	    }
	} else {
	    *(unsigned char *)chtemp = 'R';
	}

	ctgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], ldb, 
		&vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, &work[
		iwrk], &rwork[irwrk], &ierr);
	if (ierr != 0) {
	    *info = *n + 2;
	    goto L70;
	}

/*        Undo balancing on VL and VR and normalization   
          (Workspace: none needed) */

	if (ilvl) {
	    cggbak_("P", "L", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n,
		     &vl[vl_offset], ldvl, &ierr);
	    i__1 = *n;
	    for (jc = 1; jc <= i__1; ++jc) {
		temp = 0.f;
		i__2 = *n;
		for (jr = 1; jr <= i__2; ++jr) {
/* Computing MAX */
		    i__3 = vl_subscr(jr, jc);
		    r__3 = temp, r__4 = (r__1 = vl[i__3].r, dabs(r__1)) + (
			    r__2 = r_imag(&vl_ref(jr, jc)), dabs(r__2));
		    temp = dmax(r__3,r__4);
/* L10: */
		}
		if (temp < smlnum) {
		    goto L30;
		}
		temp = 1.f / temp;
		i__2 = *n;
		for (jr = 1; jr <= i__2; ++jr) {
		    i__3 = vl_subscr(jr, jc);
		    i__4 = vl_subscr(jr, jc);
		    q__1.r = temp * vl[i__4].r, q__1.i = temp * vl[i__4].i;
		    vl[i__3].r = q__1.r, vl[i__3].i = q__1.i;
/* L20: */
		}
L30:
		;
	    }
	}
	if (ilvr) {
	    cggbak_("P", "R", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n,
		     &vr[vr_offset], ldvr, &ierr);
	    i__1 = *n;
	    for (jc = 1; jc <= i__1; ++jc) {
		temp = 0.f;
		i__2 = *n;
		for (jr = 1; jr <= i__2; ++jr) {
/* Computing MAX */
		    i__3 = vr_subscr(jr, jc);
		    r__3 = temp, r__4 = (r__1 = vr[i__3].r, dabs(r__1)) + (
			    r__2 = r_imag(&vr_ref(jr, jc)), dabs(r__2));
		    temp = dmax(r__3,r__4);
/* L40: */
		}
		if (temp < smlnum) {
		    goto L60;
		}
		temp = 1.f / temp;
		i__2 = *n;
		for (jr = 1; jr <= i__2; ++jr) {
		    i__3 = vr_subscr(jr, jc);
		    i__4 = vr_subscr(jr, jc);
		    q__1.r = temp * vr[i__4].r, q__1.i = temp * vr[i__4].i;
		    vr[i__3].r = q__1.r, vr[i__3].i = q__1.i;
/* L50: */
		}
L60:
		;
	    }
	}
    }

/*     Undo scaling if necessary */

    if (ilascl) {
	clascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alpha[1], n, &
		ierr);
    }

    if (ilbscl) {
	clascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, &
		ierr);
    }

L70:
    work[1].r = (real) lwkopt, work[1].i = 0.f;

    return 0;

/*     End of CGGEV */

} /* cggev_ */
Ejemplo n.º 21
0
extern "C" magma_int_t
magma_dgeqrf_ooc(magma_int_t m, magma_int_t n,
                 double *a,    magma_int_t lda, double *tau,
                 double *work, magma_int_t lwork,
                 magma_int_t *info )
{
/*  -- MAGMA (version 1.4.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       August 2013

    Purpose
    =======
    DGEQRF_OOC computes a QR factorization of a DOUBLE_PRECISION M-by-N matrix A:
    A = Q * R. This version does not require work space on the GPU
    passed as input. GPU memory is allocated in the routine.
    This is an out-of-core (ooc) version that is similar to magma_dgeqrf but
    the difference is that this version can use a GPU even if the matrix
    does not fit into the GPU memory at once.

    Arguments
    =========
    M       (input) INTEGER
            The number of rows of the matrix A.  M >= 0.

    N       (input) INTEGER
            The number of columns of the matrix A.  N >= 0.

    A       (input/output) DOUBLE_PRECISION array, dimension (LDA,N)
            On entry, the M-by-N matrix A.
            On exit, the elements on and above the diagonal of the array
            contain the min(M,N)-by-N upper trapezoidal matrix R (R is
            upper triangular if m >= n); the elements below the diagonal,
            with the array TAU, represent the orthogonal matrix Q as a
            product of min(m,n) elementary reflectors (see Further
            Details).

            Higher performance is achieved if A is in pinned memory, e.g.
            allocated using magma_malloc_pinned.

    LDA     (input) INTEGER
            The leading dimension of the array A.  LDA >= max(1,M).

    TAU     (output) DOUBLE_PRECISION array, dimension (min(M,N))
            The scalar factors of the elementary reflectors (see Further
            Details).

    WORK    (workspace/output) DOUBLE_PRECISION array, dimension (MAX(1,LWORK))
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.

            Higher performance is achieved if WORK is in pinned memory, e.g.
            allocated using magma_malloc_pinned.

    LWORK   (input) INTEGER
            The dimension of the array WORK.  LWORK >= N*NB,
            where NB can be obtained through magma_get_dgeqrf_nb(M).

            If LWORK = -1, then a workspace query is assumed; the routine
            only calculates the optimal size of the WORK array, returns
            this value as the first entry of the WORK array, and no error
            message related to LWORK is issued.

    INFO    (output) INTEGER
            = 0:  successful exit
            < 0:  if INFO = -i, the i-th argument had an illegal value
                  or another error occured, such as memory allocation failed.

    Further Details
    ===============
    The matrix Q is represented as a product of elementary reflectors

       Q = H(1) H(2) . . . H(k), where k = min(m,n).

    Each H(i) has the form

       H(i) = I - tau * v * v'

    where tau is a real scalar, and v is a real vector with
    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
    and tau in TAU(i).
    =====================================================================    */

    #define  a_ref(a_1,a_2) ( a+(a_2)*(lda) + (a_1))
    #define da_ref(a_1,a_2) (da+(a_2)*ldda  + (a_1))

    double *da, *dwork;
    double c_one = MAGMA_D_ONE;

    int  k, lddwork, ldda;

    *info = 0;
    int nb = magma_get_dgeqrf_nb(min(m, n));

    int lwkopt = n * nb;
    work[0] = MAGMA_D_MAKE( (double)lwkopt, 0 );
    int lquery = (lwork == -1);
    if (m < 0) {
        *info = -1;
    } else if (n < 0) {
        *info = -2;
    } else if (lda < max(1,m)) {
        *info = -4;
    } else if (lwork < max(1,n) && ! lquery) {
        *info = -7;
    }
    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }
    else if (lquery)
        return *info;

    /* Check how much memory do we have */
    size_t freeMem, totalMem;
    cudaMemGetInfo( &freeMem, &totalMem );
    freeMem /= sizeof(double);
    
    magma_int_t IB, NB = (magma_int_t)(0.8*freeMem/m);
    NB = (NB / nb) * nb;

    if (NB >= n)
        return magma_dgeqrf(m, n, a, lda, tau, work, lwork, info);

    k = min(m,n);
    if (k == 0) {
        work[0] = c_one;
        return *info;
    }

    lddwork = ((NB+31)/32)*32+nb;
    ldda    = ((m+31)/32)*32;

    if (MAGMA_SUCCESS != magma_dmalloc( &da, (NB + nb)*ldda + nb*lddwork )) {
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }

    magma_queue_t stream[2];
    magma_queue_create( &stream[0] );
    magma_queue_create( &stream[1] );

    //   magmablasSetKernelStream(stream[1]);

    double *ptr = da + ldda * NB;
    dwork = da + ldda*(NB + nb);

    /* start the main loop over the blocks that fit in the GPU memory */
    for(int i=0; i<n; i+=NB) {
        IB = min(n-i, NB);
        //printf("Processing %5d columns -- %5d to %5d ... \n", IB, i, i+IB);

        /* 1. Copy the next part of the matrix to the GPU */
        magma_dsetmatrix_async( (m), IB,
                                a_ref(0,i),  lda,
                                da_ref(0,0), ldda, stream[0] );
        magma_queue_sync( stream[0] );

        /* 2. Update it with the previous transformations */
        for(int j=0; j<min(i,k); j+=nb) {
            magma_int_t ib = min(k-j, nb);

            /* Get a panel in ptr.                                           */
            //   1. Form the triangular factor of the block reflector
            //   2. Send it to the GPU.
            //   3. Put 0s in the upper triangular part of V.
            //   4. Send V to the GPU in ptr.
            //   5. Update the matrix.
            //   6. Restore the upper part of V.
            magma_int_t rows = m-j;
            lapackf77_dlarft( MagmaForwardStr, MagmaColumnwiseStr,
                              &rows, &ib, a_ref(j,j), &lda, tau+j, work, &ib);
            magma_dsetmatrix_async( ib, ib,
                                    work,  ib,
                                    dwork, lddwork, stream[1] );

            dpanel_to_q(MagmaUpper, ib, a_ref(j,j), lda, work+ib*ib);
            magma_dsetmatrix_async( rows, ib,
                                    a_ref(j,j), lda,
                                    ptr,        rows, stream[1] );
            magma_queue_sync( stream[1] );

            magma_dlarfb_gpu( MagmaLeft, MagmaTrans, MagmaForward, MagmaColumnwise,
                              rows, IB, ib,
                              ptr, rows, dwork,    lddwork,
                              da_ref(j, 0), ldda, dwork+ib, lddwork);

            dq_to_panel(MagmaUpper, ib, a_ref(j,j), lda, work+ib*ib);
        }

        /* 3. Do a QR on the current part */
        if (i<k)
            magma_dgeqrf2_gpu(m-i, IB, da_ref(i,0), ldda, tau+i, info);

        /* 4. Copy the current part back to the CPU */
        magma_dgetmatrix_async( (m), IB,
                                da_ref(0,0), ldda,
                                a_ref(0,i),  lda, stream[0] );
    }

    magma_queue_sync( stream[0] );

    magma_queue_destroy( stream[0] );
    magma_queue_destroy( stream[1] );
    magma_free( da );

    return *info;
} /* magma_dgeqrf_ooc */
Ejemplo n.º 22
0
/* Subroutine */ int dsyevx_(char *jobz, char *range, char *uplo, integer *n, 
	doublereal *a, integer *lda, doublereal *vl, doublereal *vu, integer *
	il, integer *iu, doublereal *abstol, integer *m, doublereal *w, 
	doublereal *z__, integer *ldz, doublereal *work, integer *lwork, 
	integer *iwork, integer *ifail, integer *info)
{
/*  -- LAPACK driver routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    DSYEVX computes selected eigenvalues and, optionally, eigenvectors   
    of a real symmetric matrix A.  Eigenvalues and eigenvectors can be   
    selected by specifying either a range of values or a range of indices   
    for the desired eigenvalues.   

    Arguments   
    =========   

    JOBZ    (input) CHARACTER*1   
            = 'N':  Compute eigenvalues only;   
            = 'V':  Compute eigenvalues and eigenvectors.   

    RANGE   (input) CHARACTER*1   
            = 'A': all eigenvalues will be found.   
            = 'V': all eigenvalues in the half-open interval (VL,VU]   
                   will be found.   
            = 'I': the IL-th through IU-th eigenvalues will be found.   

    UPLO    (input) CHARACTER*1   
            = 'U':  Upper triangle of A is stored;   
            = 'L':  Lower triangle of A is stored.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)   
            On entry, the symmetric matrix A.  If UPLO = 'U', the   
            leading N-by-N upper triangular part of A contains the   
            upper triangular part of the matrix A.  If UPLO = 'L',   
            the leading N-by-N lower triangular part of A contains   
            the lower triangular part of the matrix A.   
            On exit, the lower triangle (if UPLO='L') or the upper   
            triangle (if UPLO='U') of A, including the diagonal, is   
            destroyed.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    VL      (input) DOUBLE PRECISION   
    VU      (input) DOUBLE PRECISION   
            If RANGE='V', the lower and upper bounds of the interval to   
            be searched for eigenvalues. VL < VU.   
            Not referenced if RANGE = 'A' or 'I'.   

    IL      (input) INTEGER   
    IU      (input) INTEGER   
            If RANGE='I', the indices (in ascending order) of the   
            smallest and largest eigenvalues to be returned.   
            1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.   
            Not referenced if RANGE = 'A' or 'V'.   

    ABSTOL  (input) DOUBLE PRECISION   
            The absolute error tolerance for the eigenvalues.   
            An approximate eigenvalue is accepted as converged   
            when it is determined to lie in an interval [a,b]   
            of width less than or equal to   

                    ABSTOL + EPS *   max( |a|,|b| ) ,   

            where EPS is the machine precision.  If ABSTOL is less than   
            or equal to zero, then  EPS*|T|  will be used in its place,   
            where |T| is the 1-norm of the tridiagonal matrix obtained   
            by reducing A to tridiagonal form.   

            Eigenvalues will be computed most accurately when ABSTOL is   
            set to twice the underflow threshold 2*DLAMCH('S'), not zero.   
            If this routine returns with INFO>0, indicating that some   
            eigenvectors did not converge, try setting ABSTOL to   
            2*DLAMCH('S').   

            See "Computing Small Singular Values of Bidiagonal Matrices   
            with Guaranteed High Relative Accuracy," by Demmel and   
            Kahan, LAPACK Working Note #3.   

    M       (output) INTEGER   
            The total number of eigenvalues found.  0 <= M <= N.   
            If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.   

    W       (output) DOUBLE PRECISION array, dimension (N)   
            On normal exit, the first M elements contain the selected   
            eigenvalues in ascending order.   

    Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M))   
            If JOBZ = 'V', then if INFO = 0, the first M columns of Z   
            contain the orthonormal eigenvectors of the matrix A   
            corresponding to the selected eigenvalues, with the i-th   
            column of Z holding the eigenvector associated with W(i).   
            If an eigenvector fails to converge, then that column of Z   
            contains the latest approximation to the eigenvector, and the   
            index of the eigenvector is returned in IFAIL.   
            If JOBZ = 'N', then Z is not referenced.   
            Note: the user must ensure that at least max(1,M) columns are   
            supplied in the array Z; if RANGE = 'V', the exact value of M   
            is not known in advance and an upper bound must be used.   

    LDZ     (input) INTEGER   
            The leading dimension of the array Z.  LDZ >= 1, and if   
            JOBZ = 'V', LDZ >= max(1,N).   

    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The length of the array WORK.  LWORK >= max(1,8*N).   
            For optimal efficiency, LWORK >= (NB+3)*N,   
            where NB is the max of the blocksize for DSYTRD and DORMTR   
            returned by ILAENV.   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    IWORK   (workspace) INTEGER array, dimension (5*N)   

    IFAIL   (output) INTEGER array, dimension (N)   
            If JOBZ = 'V', then if INFO = 0, the first M elements of   
            IFAIL are zero.  If INFO > 0, then IFAIL contains the   
            indices of the eigenvectors that failed to converge.   
            If JOBZ = 'N', then IFAIL is not referenced.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   
            > 0:  if INFO = i, then i eigenvectors failed to converge.   
                  Their indices are stored in array IFAIL.   

   =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    
    /* System generated locals */
    integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2;
    doublereal d__1, d__2;
    /* Builtin functions */
    double sqrt(doublereal);
    /* Local variables */
    static integer indd, inde;
    static doublereal anrm;
    static integer imax;
    static doublereal rmin, rmax;
    static integer lopt, itmp1, i__, j, indee;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
	    integer *);
    static doublereal sigma;
    extern logical lsame_(char *, char *);
    static integer iinfo;
    static char order[1];
    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
	    doublereal *, integer *), dswap_(integer *, doublereal *, integer 
	    *, doublereal *, integer *);
    static logical lower, wantz;
    static integer nb, jj;
    extern doublereal dlamch_(char *);
    static logical alleig, indeig;
    static integer iscale, indibl;
    static logical valeig;
    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, integer *);
    static doublereal safmin;
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static doublereal abstll, bignum;
    static integer indtau, indisp;
    extern /* Subroutine */ int dstein_(integer *, doublereal *, doublereal *,
	     integer *, doublereal *, integer *, integer *, doublereal *, 
	    integer *, doublereal *, integer *, integer *, integer *), 
	    dsterf_(integer *, doublereal *, doublereal *, integer *);
    static integer indiwo, indwkn;
    extern doublereal dlansy_(char *, char *, integer *, doublereal *, 
	    integer *, doublereal *);
    extern /* Subroutine */ int dstebz_(char *, char *, integer *, doublereal 
	    *, doublereal *, integer *, integer *, doublereal *, doublereal *,
	     doublereal *, integer *, integer *, doublereal *, integer *, 
	    integer *, doublereal *, integer *, integer *);
    static integer indwrk;
    extern /* Subroutine */ int dorgtr_(char *, integer *, doublereal *, 
	    integer *, doublereal *, doublereal *, integer *, integer *), dsteqr_(char *, integer *, doublereal *, doublereal *, 
	    doublereal *, integer *, doublereal *, integer *), 
	    dormtr_(char *, char *, char *, integer *, integer *, doublereal *
	    , integer *, doublereal *, doublereal *, integer *, doublereal *, 
	    integer *, integer *);
    static integer llwrkn, llwork, nsplit;
    static doublereal smlnum;
    extern /* Subroutine */ int dsytrd_(char *, integer *, doublereal *, 
	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
	     integer *, integer *);
    static integer lwkopt;
    static logical lquery;
    static doublereal eps, vll, vuu, tmp1;
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;
    --iwork;
    --ifail;

    /* Function Body */
    lower = lsame_(uplo, "L");
    wantz = lsame_(jobz, "V");
    alleig = lsame_(range, "A");
    valeig = lsame_(range, "V");
    indeig = lsame_(range, "I");
    lquery = *lwork == -1;

    *info = 0;
    if (! (wantz || lsame_(jobz, "N"))) {
	*info = -1;
    } else if (! (alleig || valeig || indeig)) {
	*info = -2;
    } else if (! (lower || lsame_(uplo, "U"))) {
	*info = -3;
    } else if (*n < 0) {
	*info = -4;
    } else if (*lda < max(1,*n)) {
	*info = -6;
    } else {
	if (valeig) {
	    if (*n > 0 && *vu <= *vl) {
		*info = -8;
	    }
	} else if (indeig) {
	    if (*il < 1 || *il > max(1,*n)) {
		*info = -9;
	    } else if (*iu < min(*n,*il) || *iu > *n) {
		*info = -10;
	    }
	}
    }
    if (*info == 0) {
	if (*ldz < 1 || wantz && *ldz < *n) {
	    *info = -15;
	} else /* if(complicated condition) */ {
/* Computing MAX */
	    i__1 = 1, i__2 = *n << 3;
	    if (*lwork < max(i__1,i__2) && ! lquery) {
		*info = -17;
	    }
	}
    }

    if (*info == 0) {
	nb = ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
		 (ftnlen)1);
/* Computing MAX */
	i__1 = nb, i__2 = ilaenv_(&c__1, "DORMTR", uplo, n, &c_n1, &c_n1, &
		c_n1, (ftnlen)6, (ftnlen)1);
	nb = max(i__1,i__2);
	lwkopt = (nb + 3) * *n;
	work[1] = (doublereal) lwkopt;
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DSYEVX", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    *m = 0;
    if (*n == 0) {
	work[1] = 1.;
	return 0;
    }

    if (*n == 1) {
	work[1] = 7.;
	if (alleig || indeig) {
	    *m = 1;
	    w[1] = a_ref(1, 1);
	} else {
	    if (*vl < a_ref(1, 1) && *vu >= a_ref(1, 1)) {
		*m = 1;
		w[1] = a_ref(1, 1);
	    }
	}
	if (wantz) {
	    z___ref(1, 1) = 1.;
	}
	return 0;
    }

/*     Get machine constants. */

    safmin = dlamch_("Safe minimum");
    eps = dlamch_("Precision");
    smlnum = safmin / eps;
    bignum = 1. / smlnum;
    rmin = sqrt(smlnum);
/* Computing MIN */
    d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin));
    rmax = min(d__1,d__2);

/*     Scale matrix to allowable range, if necessary. */

    iscale = 0;
    abstll = *abstol;
    vll = *vl;
    vuu = *vu;
    anrm = dlansy_("M", uplo, n, &a[a_offset], lda, &work[1]);
    if (anrm > 0. && anrm < rmin) {
	iscale = 1;
	sigma = rmin / anrm;
    } else if (anrm > rmax) {
	iscale = 1;
	sigma = rmax / anrm;
    }
    if (iscale == 1) {
	if (lower) {
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		i__2 = *n - j + 1;
		dscal_(&i__2, &sigma, &a_ref(j, j), &c__1);
/* L10: */
	    }
	} else {
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		dscal_(&j, &sigma, &a_ref(1, j), &c__1);
/* L20: */
	    }
	}
	if (*abstol > 0.) {
	    abstll = *abstol * sigma;
	}
	if (valeig) {
	    vll = *vl * sigma;
	    vuu = *vu * sigma;
	}
    }

/*     Call DSYTRD to reduce symmetric matrix to tridiagonal form. */

    indtau = 1;
    inde = indtau + *n;
    indd = inde + *n;
    indwrk = indd + *n;
    llwork = *lwork - indwrk + 1;
    dsytrd_(uplo, n, &a[a_offset], lda, &work[indd], &work[inde], &work[
	    indtau], &work[indwrk], &llwork, &iinfo);
    lopt = (integer) (*n * 3 + work[indwrk]);

/*     If all eigenvalues are desired and ABSTOL is less than or equal to   
       zero, then call DSTERF or DORGTR and SSTEQR.  If this fails for   
       some eigenvalue, then try DSTEBZ. */

    if ((alleig || indeig && *il == 1 && *iu == *n) && *abstol <= 0.) {
	dcopy_(n, &work[indd], &c__1, &w[1], &c__1);
	indee = indwrk + (*n << 1);
	if (! wantz) {
	    i__1 = *n - 1;
	    dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1);
	    dsterf_(n, &w[1], &work[indee], info);
	} else {
	    dlacpy_("A", n, n, &a[a_offset], lda, &z__[z_offset], ldz);
	    dorgtr_(uplo, n, &z__[z_offset], ldz, &work[indtau], &work[indwrk]
		    , &llwork, &iinfo);
	    i__1 = *n - 1;
	    dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1);
	    dsteqr_(jobz, n, &w[1], &work[indee], &z__[z_offset], ldz, &work[
		    indwrk], info);
	    if (*info == 0) {
		i__1 = *n;
		for (i__ = 1; i__ <= i__1; ++i__) {
		    ifail[i__] = 0;
/* L30: */
		}
	    }
	}
	if (*info == 0) {
	    *m = *n;
	    goto L40;
	}
	*info = 0;
    }

/*     Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. */

    if (wantz) {
	*(unsigned char *)order = 'B';
    } else {
	*(unsigned char *)order = 'E';
    }
    indibl = 1;
    indisp = indibl + *n;
    indiwo = indisp + *n;
    dstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &work[indd], &work[
	    inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[
	    indwrk], &iwork[indiwo], info);

    if (wantz) {
	dstein_(n, &work[indd], &work[inde], m, &w[1], &iwork[indibl], &iwork[
		indisp], &z__[z_offset], ldz, &work[indwrk], &iwork[indiwo], &
		ifail[1], info);

/*        Apply orthogonal matrix used in reduction to tridiagonal   
          form to eigenvectors returned by DSTEIN. */

	indwkn = inde;
	llwrkn = *lwork - indwkn + 1;
	dormtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau], &z__[
		z_offset], ldz, &work[indwkn], &llwrkn, &iinfo);
    }

/*     If matrix was scaled, then rescale eigenvalues appropriately. */

L40:
    if (iscale == 1) {
	if (*info == 0) {
	    imax = *m;
	} else {
	    imax = *info - 1;
	}
	d__1 = 1. / sigma;
	dscal_(&imax, &d__1, &w[1], &c__1);
    }

/*     If eigenvalues are not in order, then sort them, along with   
       eigenvectors. */

    if (wantz) {
	i__1 = *m - 1;
	for (j = 1; j <= i__1; ++j) {
	    i__ = 0;
	    tmp1 = w[j];
	    i__2 = *m;
	    for (jj = j + 1; jj <= i__2; ++jj) {
		if (w[jj] < tmp1) {
		    i__ = jj;
		    tmp1 = w[jj];
		}
/* L50: */
	    }

	    if (i__ != 0) {
		itmp1 = iwork[indibl + i__ - 1];
		w[i__] = w[j];
		iwork[indibl + i__ - 1] = iwork[indibl + j - 1];
		w[j] = tmp1;
		iwork[indibl + j - 1] = itmp1;
		dswap_(n, &z___ref(1, i__), &c__1, &z___ref(1, j), &c__1);
		if (*info != 0) {
		    itmp1 = ifail[i__];
		    ifail[i__] = ifail[j];
		    ifail[j] = itmp1;
		}
	    }
/* L60: */
	}
    }

/*     Set WORK(1) to optimal workspace size. */

    work[1] = (doublereal) lwkopt;

    return 0;

/*     End of DSYEVX */

} /* dsyevx_ */
Ejemplo n.º 23
0
/* Subroutine */ int cggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp 
	selctg, char *sense, integer *n, complex *a, integer *lda, complex *b,
	 integer *ldb, integer *sdim, complex *alpha, complex *beta, complex *
	vsl, integer *ldvsl, complex *vsr, integer *ldvsr, real *rconde, real 
	*rcondv, complex *work, integer *lwork, real *rwork, integer *iwork, 
	integer *liwork, logical *bwork, integer *info)
{
/*  -- LAPACK driver routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    CGGESX computes for a pair of N-by-N complex nonsymmetric matrices   
    (A,B), the generalized eigenvalues, the complex Schur form (S,T),   
    and, optionally, the left and/or right matrices of Schur vectors (VSL   
    and VSR).  This gives the generalized Schur factorization   

         (A,B) = ( (VSL) S (VSR)**H, (VSL) T (VSR)**H )   

    where (VSR)**H is the conjugate-transpose of VSR.   

    Optionally, it also orders the eigenvalues so that a selected cluster   
    of eigenvalues appears in the leading diagonal blocks of the upper   
    triangular matrix S and the upper triangular matrix T; computes   
    a reciprocal condition number for the average of the selected   
    eigenvalues (RCONDE); and computes a reciprocal condition number for   
    the right and left deflating subspaces corresponding to the selected   
    eigenvalues (RCONDV). The leading columns of VSL and VSR then form   
    an orthonormal basis for the corresponding left and right eigenspaces   
    (deflating subspaces).   

    A generalized eigenvalue for a pair of matrices (A,B) is a scalar w   
    or a ratio alpha/beta = w, such that  A - w*B is singular.  It is   
    usually represented as the pair (alpha,beta), as there is a   
    reasonable interpretation for beta=0 or for both being zero.   

    A pair of matrices (S,T) is in generalized complex Schur form if T is   
    upper triangular with non-negative diagonal and S is upper   
    triangular.   

    Arguments   
    =========   

    JOBVSL  (input) CHARACTER*1   
            = 'N':  do not compute the left Schur vectors;   
            = 'V':  compute the left Schur vectors.   

    JOBVSR  (input) CHARACTER*1   
            = 'N':  do not compute the right Schur vectors;   
            = 'V':  compute the right Schur vectors.   

    SORT    (input) CHARACTER*1   
            Specifies whether or not to order the eigenvalues on the   
            diagonal of the generalized Schur form.   
            = 'N':  Eigenvalues are not ordered;   
            = 'S':  Eigenvalues are ordered (see SELCTG).   

    SELCTG  (input) LOGICAL FUNCTION of two COMPLEX arguments   
            SELCTG must be declared EXTERNAL in the calling subroutine.   
            If SORT = 'N', SELCTG is not referenced.   
            If SORT = 'S', SELCTG is used to select eigenvalues to sort   
            to the top left of the Schur form.   
            Note that a selected complex eigenvalue may no longer satisfy   
            SELCTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since   
            ordering may change the value of complex eigenvalues   
            (especially if the eigenvalue is ill-conditioned), in this   
            case INFO is set to N+3 see INFO below).   

    SENSE   (input) CHARACTER   
            Determines which reciprocal condition numbers are computed.   
            = 'N' : None are computed;   
            = 'E' : Computed for average of selected eigenvalues only;   
            = 'V' : Computed for selected deflating subspaces only;   
            = 'B' : Computed for both.   
            If SENSE = 'E', 'V', or 'B', SORT must equal 'S'.   

    N       (input) INTEGER   
            The order of the matrices A, B, VSL, and VSR.  N >= 0.   

    A       (input/output) COMPLEX array, dimension (LDA, N)   
            On entry, the first of the pair of matrices.   
            On exit, A has been overwritten by its generalized Schur   
            form S.   

    LDA     (input) INTEGER   
            The leading dimension of A.  LDA >= max(1,N).   

    B       (input/output) COMPLEX array, dimension (LDB, N)   
            On entry, the second of the pair of matrices.   
            On exit, B has been overwritten by its generalized Schur   
            form T.   

    LDB     (input) INTEGER   
            The leading dimension of B.  LDB >= max(1,N).   

    SDIM    (output) INTEGER   
            If SORT = 'N', SDIM = 0.   
            If SORT = 'S', SDIM = number of eigenvalues (after sorting)   
            for which SELCTG is true.   

    ALPHA   (output) COMPLEX array, dimension (N)   
    BETA    (output) COMPLEX array, dimension (N)   
            On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the   
            generalized eigenvalues.  ALPHA(j) and BETA(j),j=1,...,N  are   
            the diagonals of the complex Schur form (S,T).  BETA(j) will   
            be non-negative real.   

            Note: the quotients ALPHA(j)/BETA(j) may easily over- or   
            underflow, and BETA(j) may even be zero.  Thus, the user   
            should avoid naively computing the ratio alpha/beta.   
            However, ALPHA will be always less than and usually   
            comparable with norm(A) in magnitude, and BETA always less   
            than and usually comparable with norm(B).   

    VSL     (output) COMPLEX array, dimension (LDVSL,N)   
            If JOBVSL = 'V', VSL will contain the left Schur vectors.   
            Not referenced if JOBVSL = 'N'.   

    LDVSL   (input) INTEGER   
            The leading dimension of the matrix VSL. LDVSL >=1, and   
            if JOBVSL = 'V', LDVSL >= N.   

    VSR     (output) COMPLEX array, dimension (LDVSR,N)   
            If JOBVSR = 'V', VSR will contain the right Schur vectors.   
            Not referenced if JOBVSR = 'N'.   

    LDVSR   (input) INTEGER   
            The leading dimension of the matrix VSR. LDVSR >= 1, and   
            if JOBVSR = 'V', LDVSR >= N.   

    RCONDE  (output) REAL array, dimension ( 2 )   
            If SENSE = 'E' or 'B', RCONDE(1) and RCONDE(2) contain the   
            reciprocal condition numbers for the average of the selected   
            eigenvalues.   
            Not referenced if SENSE = 'N' or 'V'.   

    RCONDV  (output) REAL array, dimension ( 2 )   
            If SENSE = 'V' or 'B', RCONDV(1) and RCONDV(2) contain the   
            reciprocal condition number for the selected deflating   
            subspaces.   
            Not referenced if SENSE = 'N' or 'E'.   

    WORK    (workspace/output) COMPLEX array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The dimension of the array WORK.  LWORK >= 2*N.   
            If SENSE = 'E', 'V', or 'B',   
            LWORK >= MAX(2*N, 2*SDIM*(N-SDIM)).   

    RWORK   (workspace) REAL array, dimension ( 8*N )   
            Real workspace.   

    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)   
            Not referenced if SENSE = 'N'.   
            On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.   

    LIWORK  (input) INTEGER   
            The dimension of the array WORK. LIWORK >= N+2.   

    BWORK   (workspace) LOGICAL array, dimension (N)   
            Not referenced if SORT = 'N'.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   
            = 1,...,N:   
                  The QZ iteration failed.  (A,B) are not in Schur   
                  form, but ALPHA(j) and BETA(j) should be correct for   
                  j=INFO+1,...,N.   
            > N:  =N+1: other than QZ iteration failed in CHGEQZ   
                  =N+2: after reordering, roundoff changed values of   
                        some complex eigenvalues so that leading   
                        eigenvalues in the Generalized Schur form no   
                        longer satisfy SELCTG=.TRUE.  This could also   
                        be caused due to scaling.   
                  =N+3: reordering failed in CTGSEN.   

    =====================================================================   


       Decode the input arguments   

       Parameter adjustments */
    /* Table of constant values */
    static complex c_b1 = {0.f,0.f};
    static complex c_b2 = {1.f,0.f};
    static integer c__1 = 1;
    static integer c__0 = 0;
    static integer c_n1 = -1;
    
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, 
	    vsr_dim1, vsr_offset, i__1, i__2;
    /* Builtin functions */
    double sqrt(doublereal);
    /* Local variables */
    static integer ijob;
    static real anrm, bnrm;
    static integer ierr, itau, iwrk, i__;
    extern logical lsame_(char *, char *);
    static integer ileft, icols;
    static logical cursl, ilvsl, ilvsr;
    static integer irwrk, irows;
    extern /* Subroutine */ int cggbak_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, complex *, integer *, 
	    integer *), cggbal_(char *, integer *, complex *, 
	    integer *, complex *, integer *, integer *, integer *, real *, 
	    real *, real *, integer *), slabad_(real *, real *);
    extern doublereal clange_(char *, integer *, integer *, complex *, 
	    integer *, real *);
    static real pl;
    extern /* Subroutine */ int cgghrd_(char *, char *, integer *, integer *, 
	    integer *, complex *, integer *, complex *, integer *, complex *, 
	    integer *, complex *, integer *, integer *), 
	    clascl_(char *, integer *, integer *, real *, real *, integer *, 
	    integer *, complex *, integer *, integer *);
    static real pr;
    static logical ilascl, ilbscl;
    extern /* Subroutine */ int cgeqrf_(integer *, integer *, complex *, 
	    integer *, complex *, complex *, integer *, integer *), clacpy_(
	    char *, integer *, integer *, complex *, integer *, complex *, 
	    integer *), claset_(char *, integer *, integer *, complex 
	    *, complex *, complex *, integer *), xerbla_(char *, 
	    integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    extern doublereal slamch_(char *);
    static real bignum;
    extern /* Subroutine */ int chgeqz_(char *, char *, char *, integer *, 
	    integer *, integer *, complex *, integer *, complex *, integer *, 
	    complex *, complex *, complex *, integer *, complex *, integer *, 
	    complex *, integer *, real *, integer *), 
	    ctgsen_(integer *, logical *, logical *, logical *, integer *, 
	    complex *, integer *, complex *, integer *, complex *, complex *, 
	    complex *, integer *, complex *, integer *, integer *, real *, 
	    real *, real *, complex *, integer *, integer *, integer *, 
	    integer *);
    static integer ijobvl, iright, ijobvr;
    static logical wantsb;
    static integer liwmin;
    static logical wantse, lastsl;
    static real anrmto, bnrmto;
    extern /* Subroutine */ int cungqr_(integer *, integer *, integer *, 
	    complex *, integer *, complex *, complex *, integer *, integer *);
    static integer minwrk, maxwrk;
    static logical wantsn;
    static real smlnum;
    extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, 
	    integer *, complex *, integer *, complex *, complex *, integer *, 
	    complex *, integer *, integer *);
    static logical wantst, wantsv;
    static real dif[2];
    static integer ihi, ilo;
    static real eps;
#define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1
#define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)]
#define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1
#define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)]
#define vsl_subscr(a_1,a_2) (a_2)*vsl_dim1 + a_1
#define vsl_ref(a_1,a_2) vsl[vsl_subscr(a_1,a_2)]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --alpha;
    --beta;
    vsl_dim1 = *ldvsl;
    vsl_offset = 1 + vsl_dim1 * 1;
    vsl -= vsl_offset;
    vsr_dim1 = *ldvsr;
    vsr_offset = 1 + vsr_dim1 * 1;
    vsr -= vsr_offset;
    --rconde;
    --rcondv;
    --work;
    --rwork;
    --iwork;
    --bwork;

    /* Function Body */
    if (lsame_(jobvsl, "N")) {
	ijobvl = 1;
	ilvsl = FALSE_;
    } else if (lsame_(jobvsl, "V")) {
	ijobvl = 2;
	ilvsl = TRUE_;
    } else {
	ijobvl = -1;
	ilvsl = FALSE_;
    }

    if (lsame_(jobvsr, "N")) {
	ijobvr = 1;
	ilvsr = FALSE_;
    } else if (lsame_(jobvsr, "V")) {
	ijobvr = 2;
	ilvsr = TRUE_;
    } else {
	ijobvr = -1;
	ilvsr = FALSE_;
    }

    wantst = lsame_(sort, "S");
    wantsn = lsame_(sense, "N");
    wantse = lsame_(sense, "E");
    wantsv = lsame_(sense, "V");
    wantsb = lsame_(sense, "B");
    if (wantsn) {
	ijob = 0;
	iwork[1] = 1;
    } else if (wantse) {
	ijob = 1;
    } else if (wantsv) {
	ijob = 2;
    } else if (wantsb) {
	ijob = 4;
    }

/*     Test the input arguments */

    *info = 0;
    if (ijobvl <= 0) {
	*info = -1;
    } else if (ijobvr <= 0) {
	*info = -2;
    } else if (! wantst && ! lsame_(sort, "N")) {
	*info = -3;
    } else if (! (wantsn || wantse || wantsv || wantsb) || ! wantst && ! 
	    wantsn) {
	*info = -5;
    } else if (*n < 0) {
	*info = -6;
    } else if (*lda < max(1,*n)) {
	*info = -8;
    } else if (*ldb < max(1,*n)) {
	*info = -10;
    } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) {
	*info = -15;
    } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) {
	*info = -17;
    }

/*     Compute workspace   
        (Note: Comments in the code beginning "Workspace:" describe the   
         minimal amount of workspace needed at that point in the code,   
         as well as the preferred amount for good performance.   
         NB refers to the optimal block size for the immediately   
         following subroutine, as returned by ILAENV.) */

    minwrk = 1;
    if (*info == 0 && *lwork >= 1) {
/* Computing MAX */
	i__1 = 1, i__2 = *n << 1;
	minwrk = max(i__1,i__2);
	maxwrk = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", n, &c__1, n, &c__0, (
		ftnlen)6, (ftnlen)1);
	if (ilvsl) {
/* Computing MAX */
	    i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "CUNGQR", " ", n, &
		    c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
	    maxwrk = max(i__1,i__2);
	}
	work[1].r = (real) maxwrk, work[1].i = 0.f;
    }
    if (! wantsn) {
	liwmin = *n + 2;
    } else {
	liwmin = 1;
    }
    iwork[1] = liwmin;

    if (*info == 0 && *lwork < minwrk) {
	*info = -21;
    } else if (*info == 0 && ijob >= 1) {
	if (*liwork < liwmin) {
	    *info = -24;
	}
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CGGESX", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	*sdim = 0;
	return 0;
    }

/*     Get machine constants */

    eps = slamch_("P");
    smlnum = slamch_("S");
    bignum = 1.f / smlnum;
    slabad_(&smlnum, &bignum);
    smlnum = sqrt(smlnum) / eps;
    bignum = 1.f / smlnum;

/*     Scale A if max element outside range [SMLNUM,BIGNUM] */

    anrm = clange_("M", n, n, &a[a_offset], lda, &rwork[1]);
    ilascl = FALSE_;
    if (anrm > 0.f && anrm < smlnum) {
	anrmto = smlnum;
	ilascl = TRUE_;
    } else if (anrm > bignum) {
	anrmto = bignum;
	ilascl = TRUE_;
    }
    if (ilascl) {
	clascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, &
		ierr);
    }

/*     Scale B if max element outside range [SMLNUM,BIGNUM] */

    bnrm = clange_("M", n, n, &b[b_offset], ldb, &rwork[1]);
    ilbscl = FALSE_;
    if (bnrm > 0.f && bnrm < smlnum) {
	bnrmto = smlnum;
	ilbscl = TRUE_;
    } else if (bnrm > bignum) {
	bnrmto = bignum;
	ilbscl = TRUE_;
    }
    if (ilbscl) {
	clascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, &
		ierr);
    }

/*     Permute the matrix to make it more nearly triangular   
       (Real Workspace: need 6*N) */

    ileft = 1;
    iright = *n + 1;
    irwrk = iright + *n;
    cggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &rwork[
	    ileft], &rwork[iright], &rwork[irwrk], &ierr);

/*     Reduce B to triangular form (QR decomposition of B)   
       (Complex Workspace: need N, prefer N*NB) */

    irows = ihi + 1 - ilo;
    icols = *n + 1 - ilo;
    itau = 1;
    iwrk = itau + irows;
    i__1 = *lwork + 1 - iwrk;
    cgeqrf_(&irows, &icols, &b_ref(ilo, ilo), ldb, &work[itau], &work[iwrk], &
	    i__1, &ierr);

/*     Apply the unitary transformation to matrix A   
       (Complex Workspace: need N, prefer N*NB) */

    i__1 = *lwork + 1 - iwrk;
    cunmqr_("L", "C", &irows, &icols, &irows, &b_ref(ilo, ilo), ldb, &work[
	    itau], &a_ref(ilo, ilo), lda, &work[iwrk], &i__1, &ierr);

/*     Initialize VSL   
       (Complex Workspace: need N, prefer N*NB) */

    if (ilvsl) {
	claset_("Full", n, n, &c_b1, &c_b2, &vsl[vsl_offset], ldvsl);
	i__1 = irows - 1;
	i__2 = irows - 1;
	clacpy_("L", &i__1, &i__2, &b_ref(ilo + 1, ilo), ldb, &vsl_ref(ilo + 
		1, ilo), ldvsl);
	i__1 = *lwork + 1 - iwrk;
	cungqr_(&irows, &irows, &irows, &vsl_ref(ilo, ilo), ldvsl, &work[itau]
		, &work[iwrk], &i__1, &ierr);
    }

/*     Initialize VSR */

    if (ilvsr) {
	claset_("Full", n, n, &c_b1, &c_b2, &vsr[vsr_offset], ldvsr);
    }

/*     Reduce to generalized Hessenberg form   
       (Workspace: none needed) */

    cgghrd_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], 
	    ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &ierr);

    *sdim = 0;

/*     Perform QZ algorithm, computing Schur vectors if desired   
       (Complex Workspace: need N)   
       (Real Workspace:    need N) */

    iwrk = itau;
    i__1 = *lwork + 1 - iwrk;
    chgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[
	    b_offset], ldb, &alpha[1], &beta[1], &vsl[vsl_offset], ldvsl, &
	    vsr[vsr_offset], ldvsr, &work[iwrk], &i__1, &rwork[irwrk], &ierr);
    if (ierr != 0) {
	if (ierr > 0 && ierr <= *n) {
	    *info = ierr;
	} else if (ierr > *n && ierr <= *n << 1) {
	    *info = ierr - *n;
	} else {
	    *info = *n + 1;
	}
	goto L40;
    }

/*     Sort eigenvalues ALPHA/BETA and compute the reciprocal of   
       condition number(s) */

    if (wantst) {

/*        Undo scaling on eigenvalues before SELCTGing */

	if (ilascl) {
	    clascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alpha[1], n,
		     &ierr);
	}
	if (ilbscl) {
	    clascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, 
		    &ierr);
	}

/*        Select eigenvalues */

	i__1 = *n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    bwork[i__] = (*selctg)(&alpha[i__], &beta[i__]);
/* L10: */
	}

/*        Reorder eigenvalues, transform Generalized Schur vectors, and   
          compute reciprocal condition numbers   
          (Complex Workspace: If IJOB >= 1, need MAX(1, 2*SDIM*(N-SDIM))   
                              otherwise, need 1 ) */

	i__1 = *lwork - iwrk + 1;
	ctgsen_(&ijob, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, &b[
		b_offset], ldb, &alpha[1], &beta[1], &vsl[vsl_offset], ldvsl, 
		&vsr[vsr_offset], ldvsr, sdim, &pl, &pr, dif, &work[iwrk], &
		i__1, &iwork[1], liwork, &ierr);

	if (ijob >= 1) {
/* Computing MAX */
	    i__1 = maxwrk, i__2 = (*sdim << 1) * (*n - *sdim);
	    maxwrk = max(i__1,i__2);
	}
	if (ierr == -21) {

/*            not enough complex workspace */

	    *info = -21;
	} else {
	    rconde[1] = pl;
	    rconde[2] = pl;
	    rcondv[1] = dif[0];
	    rcondv[2] = dif[1];
	    if (ierr == 1) {
		*info = *n + 3;
	    }
	}

    }

/*     Apply permutation to VSL and VSR   
       (Workspace: none needed) */

    if (ilvsl) {
	cggbak_("P", "L", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, &
		vsl[vsl_offset], ldvsl, &ierr);
    }

    if (ilvsr) {
	cggbak_("P", "R", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, &
		vsr[vsr_offset], ldvsr, &ierr);
    }

/*     Undo scaling */

    if (ilascl) {
	clascl_("U", &c__0, &c__0, &anrmto, &anrm, n, n, &a[a_offset], lda, &
		ierr);
	clascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alpha[1], n, &
		ierr);
    }

    if (ilbscl) {
	clascl_("U", &c__0, &c__0, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, &
		ierr);
	clascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, &
		ierr);
    }

/* L20: */

    if (wantst) {

/*        Check if reordering is correct */

	lastsl = TRUE_;
	*sdim = 0;
	i__1 = *n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    cursl = (*selctg)(&alpha[i__], &beta[i__]);
	    if (cursl) {
		++(*sdim);
	    }
	    if (cursl && ! lastsl) {
		*info = *n + 2;
	    }
	    lastsl = cursl;
/* L30: */
	}

    }

L40:

    work[1].r = (real) maxwrk, work[1].i = 0.f;
    iwork[1] = liwmin;

    return 0;

/*     End of CGGESX */

} /* cggesx_ */
Ejemplo n.º 24
0
/* Subroutine */ HYPRE_Int dormbr_(const char *vect,const char *side,const char *trans, integer *m, 
	integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, 
	doublereal *c__, integer *ldc, doublereal *work, integer *lwork, 
	integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    If VECT = 'Q', DORMBR overwrites the general real M-by-N matrix C   
    with   
                    SIDE = 'L'     SIDE = 'R'   
    TRANS = 'N':      Q * C          C * Q   
    TRANS = 'T':      Q**T * C       C * Q**T   

    If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C   
    with   
                    SIDE = 'L'     SIDE = 'R'   
    TRANS = 'N':      P * C          C * P   
    TRANS = 'T':      P**T * C       C * P**T   

    Here Q and P**T are the orthogonal matrices determined by DGEBRD when   
    reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and   
    P**T are defined as products of elementary reflectors H(i) and G(i)   
    respectively.   

    Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the   
    order of the orthogonal matrix Q or P**T that is applied.   

    If VECT = 'Q', A is assumed to have been an NQ-by-K matrix:   
    if nq >= k, Q = H(1) H(2) . . . H(k);   
    if nq < k, Q = H(1) H(2) . . . H(nq-1).   

    If VECT = 'P', A is assumed to have been a K-by-NQ matrix:   
    if k < nq, P = G(1) G(2) . . . G(k);   
    if k >= nq, P = G(1) G(2) . . . G(nq-1).   

    Arguments   
    =========   

    VECT    (input) CHARACTER*1   
            = 'Q': apply Q or Q**T;   
            = 'P': apply P or P**T.   

    SIDE    (input) CHARACTER*1   
            = 'L': apply Q, Q**T, P or P**T from the Left;   
            = 'R': apply Q, Q**T, P or P**T from the Right.   

    TRANS   (input) CHARACTER*1   
            = 'N':  No transpose, apply Q  or P;   
            = 'T':  Transpose, apply Q**T or P**T.   

    M       (input) INTEGER   
            The number of rows of the matrix C. M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix C. N >= 0.   

    K       (input) INTEGER   
            If VECT = 'Q', the number of columns in the original   
            matrix reduced by DGEBRD.   
            If VECT = 'P', the number of rows in the original   
            matrix reduced by DGEBRD.   
            K >= 0.   

    A       (input) DOUBLE PRECISION array, dimension   
                                  (LDA,min(nq,K)) if VECT = 'Q'   
                                  (LDA,nq)        if VECT = 'P'   
            The vectors which define the elementary reflectors H(i) and   
            G(i), whose products determine the matrices Q and P, as   
            returned by DGEBRD.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.   
            If VECT = 'Q', LDA >= max(1,nq);   
            if VECT = 'P', LDA >= max(1,min(nq,K)).   

    TAU     (input) DOUBLE PRECISION array, dimension (min(nq,K))   
            TAU(i) must contain the scalar factor of the elementary   
            reflector H(i) or G(i) which determines Q or P, as returned   
            by DGEBRD in the array argument TAUQ or TAUP.   

    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)   
            On entry, the M-by-N matrix C.   
            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q   
            or P*C or P**T*C or C*P or C*P**T.   

    LDC     (input) INTEGER   
            The leading dimension of the array C. LDC >= max(1,M).   

    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The dimension of the array WORK.   
            If SIDE = 'L', LWORK >= max(1,N);   
            if SIDE = 'R', LWORK >= max(1,M).   
            For optimum performance LWORK >= N*NB if SIDE = 'L', and   
            LWORK >= M*NB if SIDE = 'R', where NB is the optimal   
            blocksize.   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   

    =====================================================================   


       Test the input arguments   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static integer c__2 = 2;
    
    /* System generated locals */
    address a__1[2];
    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2];
    char ch__1[2];
    /* Builtin functions   
       Subroutine */ HYPRE_Int s_cat(char *, char **, integer *, integer *, ftnlen);
    /* Local variables */
    static logical left;
    extern logical lsame_(const char *,const char *);
    static integer iinfo, i1, i2, nb, mi, ni, nq, nw;
    extern /* Subroutine */ HYPRE_Int xerbla_(const char *, integer *);
    extern integer ilaenv_(integer *,const char *,const char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ HYPRE_Int dormlq_(const char *,const char *, integer *, integer *, 
	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
	    integer *, doublereal *, integer *, integer *);
    static logical notran;
    extern /* Subroutine */ HYPRE_Int dormqr_(const char *,const char *, integer *, integer *, 
	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
	    integer *, doublereal *, integer *, integer *);
    static logical applyq;
    static char transt[1];
    static integer lwkopt;
    static logical lquery;
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1 * 1;
    c__ -= c_offset;
    --work;

    /* Function Body */
    *info = 0;
    applyq = lsame_(vect, "Q");
    left = lsame_(side, "L");
    notran = lsame_(trans, "N");
    lquery = *lwork == -1;

/*     NQ is the order of Q or P and NW is the minimum dimension of WORK */

    if (left) {
	nq = *m;
	nw = *n;
    } else {
	nq = *n;
	nw = *m;
    }
    if (! applyq && ! lsame_(vect, "P")) {
	*info = -1;
    } else if (! left && ! lsame_(side, "R")) {
	*info = -2;
    } else if (! notran && ! lsame_(trans, "T")) {
	*info = -3;
    } else if (*m < 0) {
	*info = -4;
    } else if (*n < 0) {
	*info = -5;
    } else if (*k < 0) {
	*info = -6;
    } else /* if(complicated condition) */ {
/* Computing MAX */
	i__1 = 1, i__2 = min(nq,*k);
	if (((applyq) && (*lda < max(1,nq))) || 
            ((! applyq) && (*lda < max(i__1,i__2)))) {
	    *info = -8;
	} else if (*ldc < max(1,*m)) {
	    *info = -11;
	} else if (*lwork < max(1,nw) && ! lquery) {
	    *info = -13;
	}
    }

    if (*info == 0) {
	if (applyq) {
	    if (left) {
/* Writing concatenation */
			i__3[0] = 1, a__1[0] = (char*)side;
			i__3[1] = 1, a__1[1] = (char*)trans;
		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
		i__1 = *m - 1;
		i__2 = *m - 1;
		nb = ilaenv_(&c__1, "DORMQR", ch__1, &i__1, n, &i__2, &c_n1, (
			ftnlen)6, (ftnlen)2);
	    } else {
/* Writing concatenation */
			i__3[0] = 1, a__1[0] = (char*)side;
			i__3[1] = 1, a__1[1] = (char*)trans;
		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
		i__1 = *n - 1;
		i__2 = *n - 1;
		nb = ilaenv_(&c__1, "DORMQR", ch__1, m, &i__1, &i__2, &c_n1, (
			ftnlen)6, (ftnlen)2);
	    }
	} else {
	    if (left) {
/* Writing concatenation */
			i__3[0] = 1, a__1[0] = (char*)side;
			i__3[1] = 1, a__1[1] = (char*)trans;
		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
		i__1 = *m - 1;
		i__2 = *m - 1;
		nb = ilaenv_(&c__1, "DORMLQ", ch__1, &i__1, n, &i__2, &c_n1, (
			ftnlen)6, (ftnlen)2);
	    } else {
/* Writing concatenation */
			i__3[0] = 1, a__1[0] = (char*)side;
			i__3[1] = 1, a__1[1] = (char*)trans;
		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
		i__1 = *n - 1;
		i__2 = *n - 1;
		nb = ilaenv_(&c__1, "DORMLQ", ch__1, m, &i__1, &i__2, &c_n1, (
			ftnlen)6, (ftnlen)2);
	    }
	}
	lwkopt = max(1,nw) * nb;
	work[1] = (doublereal) lwkopt;
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DORMBR", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    work[1] = 1.;
    if (*m == 0 || *n == 0) {
	return 0;
    }

    if (applyq) {

/*        Apply Q */

	if (nq >= *k) {

/*           Q was determined by a call to DGEBRD with nq >= k */

	    dormqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
		    c_offset], ldc, &work[1], lwork, &iinfo);
	} else if (nq > 1) {

/*           Q was determined by a call to DGEBRD with nq < k */

	    if (left) {
		mi = *m - 1;
		ni = *n;
		i1 = 2;
		i2 = 1;
	    } else {
		mi = *m;
		ni = *n - 1;
		i1 = 1;
		i2 = 2;
	    }
	    i__1 = nq - 1;
	    dormqr_(side, trans, &mi, &ni, &i__1, &a_ref(2, 1), lda, &tau[1], 
		    &c___ref(i1, i2), ldc, &work[1], lwork, &iinfo);
	}
    } else {

/*        Apply P */

	if (notran) {
	    *(unsigned char *)transt = 'T';
	} else {
	    *(unsigned char *)transt = 'N';
	}
	if (nq > *k) {

/*           P was determined by a call to DGEBRD with nq > k */

	    dormlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[
		    c_offset], ldc, &work[1], lwork, &iinfo);
	} else if (nq > 1) {

/*           P was determined by a call to DGEBRD with nq <= k */

	    if (left) {
		mi = *m - 1;
		ni = *n;
		i1 = 2;
		i2 = 1;
	    } else {
		mi = *m;
		ni = *n - 1;
		i1 = 1;
		i2 = 2;
	    }
	    i__1 = nq - 1;
	    dormlq_(side, transt, &mi, &ni, &i__1, &a_ref(1, 2), lda, &tau[1],
		     &c___ref(i1, i2), ldc, &work[1], lwork, &iinfo);
	}
    }
    work[1] = (doublereal) lwkopt;
    return 0;

/*     End of DORMBR */

} /* dormbr_ */
Ejemplo n.º 25
0
/* Subroutine */ int dpotf2_(char *uplo, integer *n, doublereal *a, integer *
	lda, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    DPOTF2 computes the Cholesky factorization of a real symmetric   
    positive definite matrix A.   

    The factorization has the form   
       A = U' * U ,  if UPLO = 'U', or   
       A = L  * L',  if UPLO = 'L',   
    where U is an upper triangular matrix and L is lower triangular.   

    This is the unblocked version of the algorithm, calling Level 2 BLAS.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the upper or lower triangular part of the   
            symmetric matrix A is stored.   
            = 'U':  Upper triangular   
            = 'L':  Lower triangular   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)   
            On entry, the symmetric matrix A.  If UPLO = 'U', the leading   
            n by n upper triangular part of A contains the upper   
            triangular part of the matrix A, and the strictly lower   
            triangular part of A is not referenced.  If UPLO = 'L', the   
            leading n by n lower triangular part of A contains the lower   
            triangular part of the matrix A, and the strictly upper   
            triangular part of A is not referenced.   

            On exit, if INFO = 0, the factor U or L from the Cholesky   
            factorization A = U'*U  or A = L*L'.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -k, the k-th argument had an illegal value   
            > 0: if INFO = k, the leading minor of order k is not   
                 positive definite, and the factorization could not be   
                 completed.   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static doublereal c_b10 = -1.;
    static doublereal c_b12 = 1.;
    
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    doublereal d__1;
    /* Builtin functions */
    double sqrt(doublereal);
    /* Local variables */
    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, 
	    integer *);
    static integer j;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
	    integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dgemv_(char *, integer *, integer *, 
	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *);
    static logical upper;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static doublereal ajj;
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DPOTF2", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

    if (upper) {

/*        Compute the Cholesky factorization A = U'*U. */

	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {

/*           Compute U(J,J) and test for non-positive-definiteness. */

	    i__2 = j - 1;
	    ajj = a_ref(j, j) - ddot_(&i__2, &a_ref(1, j), &c__1, &a_ref(1, j)
		    , &c__1);
	    if (ajj <= 0.) {
		a_ref(j, j) = ajj;
		goto L30;
	    }
	    ajj = sqrt(ajj);
	    a_ref(j, j) = ajj;

/*           Compute elements J+1:N of row J. */

	    if (j < *n) {
		i__2 = j - 1;
		i__3 = *n - j;
		dgemv_("Transpose", &i__2, &i__3, &c_b10, &a_ref(1, j + 1), 
			lda, &a_ref(1, j), &c__1, &c_b12, &a_ref(j, j + 1), 
			lda);
		i__2 = *n - j;
		d__1 = 1. / ajj;
		dscal_(&i__2, &d__1, &a_ref(j, j + 1), lda);
	    }
/* L10: */
	}
    } else {

/*        Compute the Cholesky factorization A = L*L'. */

	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {

/*           Compute L(J,J) and test for non-positive-definiteness. */

	    i__2 = j - 1;
	    ajj = a_ref(j, j) - ddot_(&i__2, &a_ref(j, 1), lda, &a_ref(j, 1), 
		    lda);
	    if (ajj <= 0.) {
		a_ref(j, j) = ajj;
		goto L30;
	    }
	    ajj = sqrt(ajj);
	    a_ref(j, j) = ajj;

/*           Compute elements J+1:N of column J. */

	    if (j < *n) {
		i__2 = *n - j;
		i__3 = j - 1;
		dgemv_("No transpose", &i__2, &i__3, &c_b10, &a_ref(j + 1, 1),
			 lda, &a_ref(j, 1), lda, &c_b12, &a_ref(j + 1, j), &
			c__1);
		i__2 = *n - j;
		d__1 = 1. / ajj;
		dscal_(&i__2, &d__1, &a_ref(j + 1, j), &c__1);
	    }
/* L20: */
	}
    }
    goto L40;

L30:
    *info = j;

L40:
    return 0;

/*     End of DPOTF2 */

} /* dpotf2_ */
Ejemplo n.º 26
0
/* Subroutine */ int sorgr2_(integer *m, integer *n, integer *k, real *a, 
	integer *lda, real *tau, real *work, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    SORGR2 generates an m by n real matrix Q with orthonormal rows,   
    which is defined as the last m rows of a product of k elementary   
    reflectors of order n   

          Q  =  H(1) H(2) . . . H(k)   

    as returned by SGERQF.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of the matrix Q. M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix Q. N >= M.   

    K       (input) INTEGER   
            The number of elementary reflectors whose product defines the   
            matrix Q. M >= K >= 0.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the (m-k+i)-th row must contain the vector which   
            defines the elementary reflector H(i), for i = 1,2,...,k, as   
            returned by SGERQF in the last k rows of its array argument   
            A.   
            On exit, the m by n matrix Q.   

    LDA     (input) INTEGER   
            The first dimension of the array A. LDA >= max(1,M).   

    TAU     (input) REAL array, dimension (K)   
            TAU(i) must contain the scalar factor of the elementary   
            reflector H(i), as returned by SGERQF.   

    WORK    (workspace) REAL array, dimension (M)   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -i, the i-th argument has an illegal value   

    =====================================================================   


       Test the input arguments   

       Parameter adjustments */
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    real r__1;
    /* Local variables */
    static integer i__, j, l;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 
	    slarf_(char *, integer *, integer *, real *, integer *, real *, 
	    real *, integer *, real *);
    static integer ii;
    extern /* Subroutine */ int xerbla_(char *, integer *);
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]

    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
	*info = -1;
    } else if (*n < *m) {
	*info = -2;
    } else if (*k < 0 || *k > *m) {
	*info = -3;
    } else if (*lda < max(1,*m)) {
	*info = -5;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SORGR2", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*m <= 0) {
	return 0;
    }

    if (*k < *m) {

/*        Initialise rows 1:m-k to rows of the unit matrix */

	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    i__2 = *m - *k;
	    for (l = 1; l <= i__2; ++l) {
		a_ref(l, j) = 0.f;
/* L10: */
	    }
	    if (j > *n - *m && j <= *n - *k) {
		a_ref(*m - *n + j, j) = 1.f;
	    }
/* L20: */
	}
    }

    i__1 = *k;
    for (i__ = 1; i__ <= i__1; ++i__) {
	ii = *m - *k + i__;

/*        Apply H(i) to A(1:m-k+i,1:n-k+i) from the right */

	a_ref(ii, *n - *m + ii) = 1.f;
	i__2 = ii - 1;
	i__3 = *n - *m + ii;
	slarf_("Right", &i__2, &i__3, &a_ref(ii, 1), lda, &tau[i__], &a[
		a_offset], lda, &work[1]);
	i__2 = *n - *m + ii - 1;
	r__1 = -tau[i__];
	sscal_(&i__2, &r__1, &a_ref(ii, 1), lda);
	a_ref(ii, *n - *m + ii) = 1.f - tau[i__];

/*        Set A(m-k+i,n-k+i+1:n) to zero */

	i__2 = *n;
	for (l = *n - *m + ii + 1; l <= i__2; ++l) {
	    a_ref(ii, l) = 0.f;
/* L30: */
	}
/* L40: */
    }
    return 0;

/*     End of SORGR2 */

} /* sorgr2_ */
Ejemplo n.º 27
0
/* Subroutine */ int stbmv_(char *uplo, char *trans, char *diag, integer *n, 
	integer *k, real *a, integer *lda, real *x, integer *incx)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    /* Local variables */
    static integer info;
    static real temp;
    static integer i__, j, l;
    extern logical lsame_(char *, char *);
    static integer kplus1, ix, jx, kx;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static logical nounit;
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
/*  Purpose   
    =======   
    STBMV  performs one of the matrix-vector operations   
       x := A*x,   or   x := A'*x,   
    where x is an n element vector and  A is an n by n unit, or non-unit,   
    upper or lower triangular band matrix, with ( k + 1 ) diagonals.   
    Parameters   
    ==========   
    UPLO   - CHARACTER*1.   
             On entry, UPLO specifies whether the matrix is an upper or   
             lower triangular matrix as follows:   
                UPLO = 'U' or 'u'   A is an upper triangular matrix.   
                UPLO = 'L' or 'l'   A is a lower triangular matrix.   
             Unchanged on exit.   
    TRANS  - CHARACTER*1.   
             On entry, TRANS specifies the operation to be performed as   
             follows:   
                TRANS = 'N' or 'n'   x := A*x.   
                TRANS = 'T' or 't'   x := A'*x.   
                TRANS = 'C' or 'c'   x := A'*x.   
             Unchanged on exit.   
    DIAG   - CHARACTER*1.   
             On entry, DIAG specifies whether or not A is unit   
             triangular as follows:   
                DIAG = 'U' or 'u'   A is assumed to be unit triangular.   
                DIAG = 'N' or 'n'   A is not assumed to be unit   
                                    triangular.   
             Unchanged on exit.   
    N      - INTEGER.   
             On entry, N specifies the order of the matrix A.   
             N must be at least zero.   
             Unchanged on exit.   
    K      - INTEGER.   
             On entry with UPLO = 'U' or 'u', K specifies the number of   
             super-diagonals of the matrix A.   
             On entry with UPLO = 'L' or 'l', K specifies the number of   
             sub-diagonals of the matrix A.   
             K must satisfy  0 .le. K.   
             Unchanged on exit.   
    A      - REAL             array of DIMENSION ( LDA, n ).   
             Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )   
             by n part of the array A must contain the upper triangular   
             band part of the matrix of coefficients, supplied column by   
             column, with the leading diagonal of the matrix in row   
             ( k + 1 ) of the array, the first super-diagonal starting at   
             position 2 in row k, and so on. The top left k by k triangle   
             of the array A is not referenced.   
             The following program segment will transfer an upper   
             triangular band matrix from conventional full matrix storage   
             to band storage:   
                   DO 20, J = 1, N   
                      M = K + 1 - J   
                      DO 10, I = MAX( 1, J - K ), J   
                         A( M + I, J ) = matrix( I, J )   
                10    CONTINUE   
                20 CONTINUE   
             Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )   
             by n part of the array A must contain the lower triangular   
             band part of the matrix of coefficients, supplied column by   
             column, with the leading diagonal of the matrix in row 1 of   
             the array, the first sub-diagonal starting at position 1 in   
             row 2, and so on. The bottom right k by k triangle of the   
             array A is not referenced.   
             The following program segment will transfer a lower   
             triangular band matrix from conventional full matrix storage   
             to band storage:   
                   DO 20, J = 1, N   
                      M = 1 - J   
                      DO 10, I = J, MIN( N, J + K )   
                         A( M + I, J ) = matrix( I, J )   
                10    CONTINUE   
                20 CONTINUE   
             Note that when DIAG = 'U' or 'u' the elements of the array A   
             corresponding to the diagonal elements of the matrix are not   
             referenced, but are assumed to be unity.   
             Unchanged on exit.   
    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared   
             in the calling (sub) program. LDA must be at least   
             ( k + 1 ).   
             Unchanged on exit.   
    X      - REAL             array of dimension at least   
             ( 1 + ( n - 1 )*abs( INCX ) ).   
             Before entry, the incremented array X must contain the n   
             element vector x. On exit, X is overwritten with the   
             tranformed vector x.   
    INCX   - INTEGER.   
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   
             Unchanged on exit.   
    Level 2 Blas routine.   
    -- Written on 22-October-1986.   
       Jack Dongarra, Argonne National Lab.   
       Jeremy Du Croz, Nag Central Office.   
       Sven Hammarling, Nag Central Office.   
       Richard Hanson, Sandia National Labs.   
       Test the input parameters.   
       Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --x;
    /* Function Body */
    info = 0;
    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
	info = 1;
    } else if (! lsame_(trans, "N") && ! lsame_(trans, 
	    "T") && ! lsame_(trans, "C")) {
	info = 2;
    } else if (! lsame_(diag, "U") && ! lsame_(diag, 
	    "N")) {
	info = 3;
    } else if (*n < 0) {
	info = 4;
    } else if (*k < 0) {
	info = 5;
    } else if (*lda < *k + 1) {
	info = 7;
    } else if (*incx == 0) {
	info = 9;
    }
    if (info != 0) {
	xerbla_("STBMV ", &info);
	return 0;
    }
/*     Quick return if possible. */
    if (*n == 0) {
	return 0;
    }
    nounit = lsame_(diag, "N");
/*     Set up the start point in X if the increment is not unity. This   
       will be  ( N - 1 )*INCX   too small for descending loops. */
    if (*incx <= 0) {
	kx = 1 - (*n - 1) * *incx;
    } else if (*incx != 1) {
	kx = 1;
    }
/*     Start the operations. In this version the elements of A are   
       accessed sequentially with one pass through A. */
    if (lsame_(trans, "N")) {
/*         Form  x := A*x. */
	if (lsame_(uplo, "U")) {
	    kplus1 = *k + 1;
	    if (*incx == 1) {
		i__1 = *n;
		for (j = 1; j <= i__1; ++j) {
		    if (x[j] != 0.f) {
			temp = x[j];
			l = kplus1 - j;
/* Computing MAX */
			i__2 = 1, i__3 = j - *k;
			i__4 = j - 1;
			for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
			    x[i__] += temp * a_ref(l + i__, j);
/* L10: */
			}
			if (nounit) {
			    x[j] *= a_ref(kplus1, j);
			}
		    }
/* L20: */
		}
	    } else {
		jx = kx;
		i__1 = *n;
		for (j = 1; j <= i__1; ++j) {
		    if (x[jx] != 0.f) {
			temp = x[jx];
			ix = kx;
			l = kplus1 - j;
/* Computing MAX */
			i__4 = 1, i__2 = j - *k;
			i__3 = j - 1;
			for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
			    x[ix] += temp * a_ref(l + i__, j);
			    ix += *incx;
/* L30: */
			}
			if (nounit) {
			    x[jx] *= a_ref(kplus1, j);
			}
		    }
		    jx += *incx;
		    if (j > *k) {
			kx += *incx;
		    }
/* L40: */
		}
	    }
	} else {
	    if (*incx == 1) {
		for (j = *n; j >= 1; --j) {
		    if (x[j] != 0.f) {
			temp = x[j];
			l = 1 - j;
/* Computing MIN */
			i__1 = *n, i__3 = j + *k;
			i__4 = j + 1;
			for (i__ = min(i__1,i__3); i__ >= i__4; --i__) {
			    x[i__] += temp * a_ref(l + i__, j);
/* L50: */
			}
			if (nounit) {
			    x[j] *= a_ref(1, j);
			}
		    }
/* L60: */
		}
	    } else {
		kx += (*n - 1) * *incx;
		jx = kx;
		for (j = *n; j >= 1; --j) {
		    if (x[jx] != 0.f) {
			temp = x[jx];
			ix = kx;
			l = 1 - j;
/* Computing MIN */
			i__4 = *n, i__1 = j + *k;
			i__3 = j + 1;
			for (i__ = min(i__4,i__1); i__ >= i__3; --i__) {
			    x[ix] += temp * a_ref(l + i__, j);
			    ix -= *incx;
/* L70: */
			}
			if (nounit) {
			    x[jx] *= a_ref(1, j);
			}
		    }
		    jx -= *incx;
		    if (*n - j >= *k) {
			kx -= *incx;
		    }
/* L80: */
		}
	    }
	}
    } else {
/*        Form  x := A'*x. */
	if (lsame_(uplo, "U")) {
	    kplus1 = *k + 1;
	    if (*incx == 1) {
		for (j = *n; j >= 1; --j) {
		    temp = x[j];
		    l = kplus1 - j;
		    if (nounit) {
			temp *= a_ref(kplus1, j);
		    }
/* Computing MAX */
		    i__4 = 1, i__1 = j - *k;
		    i__3 = max(i__4,i__1);
		    for (i__ = j - 1; i__ >= i__3; --i__) {
			temp += a_ref(l + i__, j) * x[i__];
/* L90: */
		    }
		    x[j] = temp;
/* L100: */
		}
	    } else {
		kx += (*n - 1) * *incx;
		jx = kx;
		for (j = *n; j >= 1; --j) {
		    temp = x[jx];
		    kx -= *incx;
		    ix = kx;
		    l = kplus1 - j;
		    if (nounit) {
			temp *= a_ref(kplus1, j);
		    }
/* Computing MAX */
		    i__4 = 1, i__1 = j - *k;
		    i__3 = max(i__4,i__1);
		    for (i__ = j - 1; i__ >= i__3; --i__) {
			temp += a_ref(l + i__, j) * x[ix];
			ix -= *incx;
/* L110: */
		    }
		    x[jx] = temp;
		    jx -= *incx;
/* L120: */
		}
	    }
	} else {
	    if (*incx == 1) {
		i__3 = *n;
		for (j = 1; j <= i__3; ++j) {
		    temp = x[j];
		    l = 1 - j;
		    if (nounit) {
			temp *= a_ref(1, j);
		    }
/* Computing MIN */
		    i__1 = *n, i__2 = j + *k;
		    i__4 = min(i__1,i__2);
		    for (i__ = j + 1; i__ <= i__4; ++i__) {
			temp += a_ref(l + i__, j) * x[i__];
/* L130: */
		    }
		    x[j] = temp;
/* L140: */
		}
	    } else {
		jx = kx;
		i__3 = *n;
		for (j = 1; j <= i__3; ++j) {
		    temp = x[jx];
		    kx += *incx;
		    ix = kx;
		    l = 1 - j;
		    if (nounit) {
			temp *= a_ref(1, j);
		    }
/* Computing MIN */
		    i__1 = *n, i__2 = j + *k;
		    i__4 = min(i__1,i__2);
		    for (i__ = j + 1; i__ <= i__4; ++i__) {
			temp += a_ref(l + i__, j) * x[ix];
			ix += *incx;
/* L150: */
		    }
		    x[jx] = temp;
		    jx += *incx;
/* L160: */
		}
	    }
	}
    }
    return 0;
/*     End of STBMV . */
} /* stbmv_ */
Ejemplo n.º 28
0
/* Subroutine */ int stgsen_(integer *ijob, logical *wantq, logical *wantz, 
	logical *select, integer *n, real *a, integer *lda, real *b, integer *
	ldb, real *alphar, real *alphai, real *beta, real *q, integer *ldq, 
	real *z__, integer *ldz, integer *m, real *pl, real *pr, real *dif, 
	real *work, integer *lwork, integer *iwork, integer *liwork, integer *
	info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    STGSEN reorders the generalized real Schur decomposition of a real   
    matrix pair (A, B) (in terms of an orthonormal equivalence trans-   
    formation Q' * (A, B) * Z), so that a selected cluster of eigenvalues   
    appears in the leading diagonal blocks of the upper quasi-triangular   
    matrix A and the upper triangular B. The leading columns of Q and   
    Z form orthonormal bases of the corresponding left and right eigen-   
    spaces (deflating subspaces). (A, B) must be in generalized real   
    Schur canonical form (as returned by SGGES), i.e. A is block upper   
    triangular with 1-by-1 and 2-by-2 diagonal blocks. B is upper   
    triangular.   

    STGSEN also computes the generalized eigenvalues   

                w(j) = (ALPHAR(j) + i*ALPHAI(j))/BETA(j)   

    of the reordered matrix pair (A, B).   

    Optionally, STGSEN computes the estimates of reciprocal condition   
    numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11),   
    (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s)   
    between the matrix pairs (A11, B11) and (A22,B22) that correspond to   
    the selected cluster and the eigenvalues outside the cluster, resp.,   
    and norms of "projections" onto left and right eigenspaces w.r.t.   
    the selected cluster in the (1,1)-block.   

    Arguments   
    =========   

    IJOB    (input) INTEGER   
            Specifies whether condition numbers are required for the   
            cluster of eigenvalues (PL and PR) or the deflating subspaces   
            (Difu and Difl):   
             =0: Only reorder w.r.t. SELECT. No extras.   
             =1: Reciprocal of norms of "projections" onto left and right   
                 eigenspaces w.r.t. the selected cluster (PL and PR).   
             =2: Upper bounds on Difu and Difl. F-norm-based estimate   
                 (DIF(1:2)).   
             =3: Estimate of Difu and Difl. 1-norm-based estimate   
                 (DIF(1:2)).   
                 About 5 times as expensive as IJOB = 2.   
             =4: Compute PL, PR and DIF (i.e. 0, 1 and 2 above): Economic   
                 version to get it all.   
             =5: Compute PL, PR and DIF (i.e. 0, 1 and 3 above)   

    WANTQ   (input) LOGICAL   
            .TRUE. : update the left transformation matrix Q;   
            .FALSE.: do not update Q.   

    WANTZ   (input) LOGICAL   
            .TRUE. : update the right transformation matrix Z;   
            .FALSE.: do not update Z.   

    SELECT  (input) LOGICAL array, dimension (N)   
            SELECT specifies the eigenvalues in the selected cluster.   
            To select a real eigenvalue w(j), SELECT(j) must be set to   
            .TRUE.. To select a complex conjugate pair of eigenvalues   
            w(j) and w(j+1), corresponding to a 2-by-2 diagonal block,   
            either SELECT(j) or SELECT(j+1) or both must be set to   
            .TRUE.; a complex conjugate pair of eigenvalues must be   
            either both included in the cluster or both excluded.   

    N       (input) INTEGER   
            The order of the matrices A and B. N >= 0.   

    A       (input/output) REAL array, dimension(LDA,N)   
            On entry, the upper quasi-triangular matrix A, with (A, B) in   
            generalized real Schur canonical form.   
            On exit, A is overwritten by the reordered matrix A.   

    LDA     (input) INTEGER   
            The leading dimension of the array A. LDA >= max(1,N).   

    B       (input/output) REAL array, dimension(LDB,N)   
            On entry, the upper triangular matrix B, with (A, B) in   
            generalized real Schur canonical form.   
            On exit, B is overwritten by the reordered matrix B.   

    LDB     (input) INTEGER   
            The leading dimension of the array B. LDB >= max(1,N).   

    ALPHAR  (output) REAL array, dimension (N)   
    ALPHAI  (output) REAL array, dimension (N)   
    BETA    (output) REAL array, dimension (N)   
            On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will   
            be the generalized eigenvalues.  ALPHAR(j) + ALPHAI(j)*i   
            and BETA(j),j=1,...,N  are the diagonals of the complex Schur   
            form (S,T) that would result if the 2-by-2 diagonal blocks of   
            the real generalized Schur form of (A,B) were further reduced   
            to triangular form using complex unitary transformations.   
            If ALPHAI(j) is zero, then the j-th eigenvalue is real; if   
            positive, then the j-th and (j+1)-st eigenvalues are a   
            complex conjugate pair, with ALPHAI(j+1) negative.   

    Q       (input/output) REAL array, dimension (LDQ,N)   
            On entry, if WANTQ = .TRUE., Q is an N-by-N matrix.   
            On exit, Q has been postmultiplied by the left orthogonal   
            transformation matrix which reorder (A, B); The leading M   
            columns of Q form orthonormal bases for the specified pair of   
            left eigenspaces (deflating subspaces).   
            If WANTQ = .FALSE., Q is not referenced.   

    LDQ     (input) INTEGER   
            The leading dimension of the array Q.  LDQ >= 1;   
            and if WANTQ = .TRUE., LDQ >= N.   

    Z       (input/output) REAL array, dimension (LDZ,N)   
            On entry, if WANTZ = .TRUE., Z is an N-by-N matrix.   
            On exit, Z has been postmultiplied by the left orthogonal   
            transformation matrix which reorder (A, B); The leading M   
            columns of Z form orthonormal bases for the specified pair of   
            left eigenspaces (deflating subspaces).   
            If WANTZ = .FALSE., Z is not referenced.   

    LDZ     (input) INTEGER   
            The leading dimension of the array Z. LDZ >= 1;   
            If WANTZ = .TRUE., LDZ >= N.   

    M       (output) INTEGER   
            The dimension of the specified pair of left and right eigen-   
            spaces (deflating subspaces). 0 <= M <= N.   

    PL, PR  (output) REAL   
            If IJOB = 1, 4 or 5, PL, PR are lower bounds on the   
            reciprocal of the norm of "projections" onto left and right   
            eigenspaces with respect to the selected cluster.   
            0 < PL, PR <= 1.   
            If M = 0 or M = N, PL = PR  = 1.   
            If IJOB = 0, 2 or 3, PL and PR are not referenced.   

    DIF     (output) REAL array, dimension (2).   
            If IJOB >= 2, DIF(1:2) store the estimates of Difu and Difl.   
            If IJOB = 2 or 4, DIF(1:2) are F-norm-based upper bounds on   
            Difu and Difl. If IJOB = 3 or 5, DIF(1:2) are 1-norm-based   
            estimates of Difu and Difl.   
            If M = 0 or N, DIF(1:2) = F-norm([A, B]).   
            If IJOB = 0 or 1, DIF is not referenced.   

    WORK    (workspace/output) REAL array, dimension (LWORK)   
            IF IJOB = 0, WORK is not referenced.  Otherwise,   
            on exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The dimension of the array WORK. LWORK >=  4*N+16.   
            If IJOB = 1, 2 or 4, LWORK >= MAX(4*N+16, 2*M*(N-M)).   
            If IJOB = 3 or 5, LWORK >= MAX(4*N+16, 4*M*(N-M)).   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)   
            IF IJOB = 0, IWORK is not referenced.  Otherwise,   
            on exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.   

    LIWORK  (input) INTEGER   
            The dimension of the array IWORK. LIWORK >= 1.   
            If IJOB = 1, 2 or 4, LIWORK >=  N+6.   
            If IJOB = 3 or 5, LIWORK >= MAX(2*M*(N-M), N+6).   

            If LIWORK = -1, then a workspace query is assumed; the   
            routine only calculates the optimal size of the IWORK array,   
            returns this value as the first entry of the IWORK array, and   
            no error message related to LIWORK is issued by XERBLA.   

    INFO    (output) INTEGER   
              =0: Successful exit.   
              <0: If INFO = -i, the i-th argument had an illegal value.   
              =1: Reordering of (A, B) failed because the transformed   
                  matrix pair (A, B) would be too far from generalized   
                  Schur form; the problem is very ill-conditioned.   
                  (A, B) may have been partially reordered.   
                  If requested, 0 is returned in DIF(*), PL and PR.   

    Further Details   
    ===============   

    STGSEN first collects the selected eigenvalues by computing   
    orthogonal U and W that move them to the top left corner of (A, B).   
    In other words, the selected eigenvalues are the eigenvalues of   
    (A11, B11) in:   

                  U'*(A, B)*W = (A11 A12) (B11 B12) n1   
                                ( 0  A22),( 0  B22) n2   
                                  n1  n2    n1  n2   

    where N = n1+n2 and U' means the transpose of U. The first n1 columns   
    of U and W span the specified pair of left and right eigenspaces   
    (deflating subspaces) of (A, B).   

    If (A, B) has been obtained from the generalized real Schur   
    decomposition of a matrix pair (C, D) = Q*(A, B)*Z', then the   
    reordered generalized real Schur form of (C, D) is given by   

             (C, D) = (Q*U)*(U'*(A, B)*W)*(Z*W)',   

    and the first n1 columns of Q*U and Z*W span the corresponding   
    deflating subspaces of (C, D) (Q and Z store Q*U and Z*W, resp.).   

    Note that if the selected eigenvalue is sufficiently ill-conditioned,   
    then its value may differ significantly from its value before   
    reordering.   

    The reciprocal condition numbers of the left and right eigenspaces   
    spanned by the first n1 columns of U and W (or Q*U and Z*W) may   
    be returned in DIF(1:2), corresponding to Difu and Difl, resp.   

    The Difu and Difl are defined as:   

         Difu[(A11, B11), (A22, B22)] = sigma-min( Zu )   
    and   
         Difl[(A11, B11), (A22, B22)] = Difu[(A22, B22), (A11, B11)],   

    where sigma-min(Zu) is the smallest singular value of the   
    (2*n1*n2)-by-(2*n1*n2) matrix   

         Zu = [ kron(In2, A11)  -kron(A22', In1) ]   
              [ kron(In2, B11)  -kron(B22', In1) ].   

    Here, Inx is the identity matrix of size nx and A22' is the   
    transpose of A22. kron(X, Y) is the Kronecker product between   
    the matrices X and Y.   

    When DIF(2) is small, small changes in (A, B) can cause large changes   
    in the deflating subspace. An approximate (asymptotic) bound on the   
    maximum angular error in the computed deflating subspaces is   

         EPS * norm((A, B)) / DIF(2),   

    where EPS is the machine precision.   

    The reciprocal norm of the projectors on the left and right   
    eigenspaces associated with (A11, B11) may be returned in PL and PR.   
    They are computed as follows. First we compute L and R so that   
    P*(A, B)*Q is block diagonal, where   

         P = ( I -L ) n1           Q = ( I R ) n1   
             ( 0  I ) n2    and        ( 0 I ) n2   
               n1 n2                    n1 n2   

    and (L, R) is the solution to the generalized Sylvester equation   

         A11*R - L*A22 = -A12   
         B11*R - L*B22 = -B12   

    Then PL = (F-norm(L)**2+1)**(-1/2) and PR = (F-norm(R)**2+1)**(-1/2).   
    An approximate (asymptotic) bound on the average absolute error of   
    the selected eigenvalues is   

         EPS * norm((A, B)) / PL.   

    There are also global error bounds which valid for perturbations up   
    to a certain restriction:  A lower bound (x) on the smallest   
    F-norm(E,F) for which an eigenvalue of (A11, B11) may move and   
    coalesce with an eigenvalue of (A22, B22) under perturbation (E,F),   
    (i.e. (A + E, B + F), is   

     x = min(Difu,Difl)/((1/(PL*PL)+1/(PR*PR))**(1/2)+2*max(1/PL,1/PR)).   

    An approximate bound on x can be computed from DIF(1:2), PL and PR.   

    If y = ( F-norm(E,F) / x) <= 1, the angles between the perturbed   
    (L', R') and unperturbed (L, R) left and right deflating subspaces   
    associated with the selected cluster in the (1,1)-blocks can be   
    bounded as   

     max-angle(L, L') <= arctan( y * PL / (1 - y * (1 - PL * PL)**(1/2))   
     max-angle(R, R') <= arctan( y * PR / (1 - y * (1 - PR * PR)**(1/2))   

    See LAPACK User's Guide section 4.11 or the following references   
    for more information.   

    Note that if the default method for computing the Frobenius-norm-   
    based estimate DIF is not wanted (see SLATDF), then the parameter   
    IDIFJB (see below) should be changed from 3 to 4 (routine SLATDF   
    (IJOB = 2 will be used)). See STGSYL for more details.   

    Based on contributions by   
       Bo Kagstrom and Peter Poromaa, Department of Computing Science,   
       Umea University, S-901 87 Umea, Sweden.   

    References   
    ==========   

    [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the   
        Generalized Real Schur Form of a Regular Matrix Pair (A, B), in   
        M.S. Moonen et al (eds), Linear Algebra for Large Scale and   
        Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218.   

    [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified   
        Eigenvalues of a Regular Matrix Pair (A, B) and Condition   
        Estimation: Theory, Algorithms and Software,   
        Report UMINF - 94.04, Department of Computing Science, Umea   
        University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working   
        Note 87. To appear in Numerical Algorithms, 1996.   

    [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software   
        for Solving the Generalized Sylvester Equation and Estimating the   
        Separation between Regular Matrix Pairs, Report UMINF - 93.23,   
        Department of Computing Science, Umea University, S-901 87 Umea,   
        Sweden, December 1993, Revised April 1994, Also as LAPACK Working   
        Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1,   
        1996.   

    =====================================================================   


       Decode and test the input parameters   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c__2 = 2;
    static real c_b28 = 1.f;
    
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, 
	    z_offset, i__1, i__2;
    real r__1;
    /* Builtin functions */
    double sqrt(doublereal), r_sign(real *, real *);
    /* Local variables */
    static integer kase;
    static logical pair;
    static integer ierr;
    static real dsum;
    static logical swap;
    extern /* Subroutine */ int slag2_(real *, integer *, real *, integer *, 
	    real *, real *, real *, real *, real *, real *);
    static integer i__, k;
    static logical wantd;
    static integer lwmin;
    static logical wantp;
    static integer n1, n2;
    static logical wantd1, wantd2;
    static integer kk;
    static real dscale;
    static integer ks;
    static real rdscal;
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *), slacon_(
	    integer *, real *, real *, integer *, real *, integer *), slacpy_(
	    char *, integer *, integer *, real *, integer *, real *, integer *
	    ), stgexc_(logical *, logical *, integer *, real *, 
	    integer *, real *, integer *, real *, integer *, real *, integer *
	    , integer *, integer *, real *, integer *, integer *);
    static integer liwmin;
    extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, 
	    real *);
    static real smlnum;
    static integer mn2;
    static logical lquery;
    extern /* Subroutine */ int stgsyl_(char *, integer *, integer *, integer 
	    *, real *, integer *, real *, integer *, real *, integer *, real *
	    , integer *, real *, integer *, real *, integer *, real *, real *,
	     real *, integer *, integer *, integer *);
    static integer ijb;
    static real eps;
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
#define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]


    --select;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --alphar;
    --alphai;
    --beta;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --dif;
    --work;
    --iwork;

    /* Function Body */
    *info = 0;
    lquery = *lwork == -1 || *liwork == -1;

    if (*ijob < 0 || *ijob > 5) {
	*info = -1;
    } else if (*n < 0) {
	*info = -5;
    } else if (*lda < max(1,*n)) {
	*info = -7;
    } else if (*ldb < max(1,*n)) {
	*info = -9;
    } else if (*ldq < 1 || *wantq && *ldq < *n) {
	*info = -14;
    } else if (*ldz < 1 || *wantz && *ldz < *n) {
	*info = -16;
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("STGSEN", &i__1);
	return 0;
    }

/*     Get machine constants */

    eps = slamch_("P");
    smlnum = slamch_("S") / eps;
    ierr = 0;

    wantp = *ijob == 1 || *ijob >= 4;
    wantd1 = *ijob == 2 || *ijob == 4;
    wantd2 = *ijob == 3 || *ijob == 5;
    wantd = wantd1 || wantd2;

/*     Set M to the dimension of the specified pair of deflating   
       subspaces. */

    *m = 0;
    pair = FALSE_;
    i__1 = *n;
    for (k = 1; k <= i__1; ++k) {
	if (pair) {
	    pair = FALSE_;
	} else {
	    if (k < *n) {
		if (a_ref(k + 1, k) == 0.f) {
		    if (select[k]) {
			++(*m);
		    }
		} else {
		    pair = TRUE_;
		    if (select[k] || select[k + 1]) {
			*m += 2;
		    }
		}
	    } else {
		if (select[*n]) {
		    ++(*m);
		}
	    }
	}
/* L10: */
    }

    if (*ijob == 1 || *ijob == 2 || *ijob == 4) {
/* Computing MAX */
	i__1 = 1, i__2 = (*n << 2) + 16, i__1 = max(i__1,i__2), i__2 = (*m << 
		1) * (*n - *m);
	lwmin = max(i__1,i__2);
/* Computing MAX */
	i__1 = 1, i__2 = *n + 6;
	liwmin = max(i__1,i__2);
    } else if (*ijob == 3 || *ijob == 5) {
/* Computing MAX */
	i__1 = 1, i__2 = (*n << 2) + 16, i__1 = max(i__1,i__2), i__2 = (*m << 
		2) * (*n - *m);
	lwmin = max(i__1,i__2);
/* Computing MAX */
	i__1 = 1, i__2 = (*m << 1) * (*n - *m), i__1 = max(i__1,i__2), i__2 = 
		*n + 6;
	liwmin = max(i__1,i__2);
    } else {
/* Computing MAX */
	i__1 = 1, i__2 = (*n << 2) + 16;
	lwmin = max(i__1,i__2);
	liwmin = 1;
    }

    work[1] = (real) lwmin;
    iwork[1] = liwmin;

    if (*lwork < lwmin && ! lquery) {
	*info = -22;
    } else if (*liwork < liwmin && ! lquery) {
	*info = -24;
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("STGSEN", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible. */

    if (*m == *n || *m == 0) {
	if (wantp) {
	    *pl = 1.f;
	    *pr = 1.f;
	}
	if (wantd) {
	    dscale = 0.f;
	    dsum = 1.f;
	    i__1 = *n;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		slassq_(n, &a_ref(1, i__), &c__1, &dscale, &dsum);
		slassq_(n, &b_ref(1, i__), &c__1, &dscale, &dsum);
/* L20: */
	    }
	    dif[1] = dscale * sqrt(dsum);
	    dif[2] = dif[1];
	}
	goto L60;
    }

/*     Collect the selected blocks at the top-left corner of (A, B). */

    ks = 0;
    pair = FALSE_;
    i__1 = *n;
    for (k = 1; k <= i__1; ++k) {
	if (pair) {
	    pair = FALSE_;
	} else {

	    swap = select[k];
	    if (k < *n) {
		if (a_ref(k + 1, k) != 0.f) {
		    pair = TRUE_;
		    swap = swap || select[k + 1];
		}
	    }

	    if (swap) {
		++ks;

/*              Swap the K-th block to position KS.   
                Perform the reordering of diagonal blocks in (A, B)   
                by orthogonal transformation matrices and update   
                Q and Z accordingly (if requested): */

		kk = k;
		if (k != ks) {
		    stgexc_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], 
			    ldb, &q[q_offset], ldq, &z__[z_offset], ldz, &kk, 
			    &ks, &work[1], lwork, &ierr);
		}

		if (ierr > 0) {

/*                 Swap is rejected: exit. */

		    *info = 1;
		    if (wantp) {
			*pl = 0.f;
			*pr = 0.f;
		    }
		    if (wantd) {
			dif[1] = 0.f;
			dif[2] = 0.f;
		    }
		    goto L60;
		}

		if (pair) {
		    ++ks;
		}
	    }
	}
/* L30: */
    }
    if (wantp) {

/*        Solve generalized Sylvester equation for R and L   
          and compute PL and PR. */

	n1 = *m;
	n2 = *n - *m;
	i__ = n1 + 1;
	ijb = 0;
	slacpy_("Full", &n1, &n2, &a_ref(1, i__), lda, &work[1], &n1);
	slacpy_("Full", &n1, &n2, &b_ref(1, i__), ldb, &work[n1 * n2 + 1], &
		n1);
	i__1 = *lwork - (n1 << 1) * n2;
	stgsyl_("N", &ijb, &n1, &n2, &a[a_offset], lda, &a_ref(i__, i__), lda,
		 &work[1], &n1, &b[b_offset], ldb, &b_ref(i__, i__), ldb, &
		work[n1 * n2 + 1], &n1, &dscale, &dif[1], &work[(n1 * n2 << 1)
		 + 1], &i__1, &iwork[1], &ierr);

/*        Estimate the reciprocal of norms of "projections" onto left   
          and right eigenspaces. */

	rdscal = 0.f;
	dsum = 1.f;
	i__1 = n1 * n2;
	slassq_(&i__1, &work[1], &c__1, &rdscal, &dsum);
	*pl = rdscal * sqrt(dsum);
	if (*pl == 0.f) {
	    *pl = 1.f;
	} else {
	    *pl = dscale / (sqrt(dscale * dscale / *pl + *pl) * sqrt(*pl));
	}
	rdscal = 0.f;
	dsum = 1.f;
	i__1 = n1 * n2;
	slassq_(&i__1, &work[n1 * n2 + 1], &c__1, &rdscal, &dsum);
	*pr = rdscal * sqrt(dsum);
	if (*pr == 0.f) {
	    *pr = 1.f;
	} else {
	    *pr = dscale / (sqrt(dscale * dscale / *pr + *pr) * sqrt(*pr));
	}
    }

    if (wantd) {

/*        Compute estimates of Difu and Difl. */

	if (wantd1) {
	    n1 = *m;
	    n2 = *n - *m;
	    i__ = n1 + 1;
	    ijb = 3;

/*           Frobenius norm-based Difu-estimate. */

	    i__1 = *lwork - (n1 << 1) * n2;
	    stgsyl_("N", &ijb, &n1, &n2, &a[a_offset], lda, &a_ref(i__, i__), 
		    lda, &work[1], &n1, &b[b_offset], ldb, &b_ref(i__, i__), 
		    ldb, &work[n1 * n2 + 1], &n1, &dscale, &dif[1], &work[(n1 
		    << 1) * n2 + 1], &i__1, &iwork[1], &ierr);

/*           Frobenius norm-based Difl-estimate. */

	    i__1 = *lwork - (n1 << 1) * n2;
	    stgsyl_("N", &ijb, &n2, &n1, &a_ref(i__, i__), lda, &a[a_offset], 
		    lda, &work[1], &n2, &b_ref(i__, i__), ldb, &b[b_offset], 
		    ldb, &work[n1 * n2 + 1], &n2, &dscale, &dif[2], &work[(n1 
		    << 1) * n2 + 1], &i__1, &iwork[1], &ierr);
	} else {


/*           Compute 1-norm-based estimates of Difu and Difl using   
             reversed communication with SLACON. In each step a   
             generalized Sylvester equation or a transposed variant   
             is solved. */

	    kase = 0;
	    n1 = *m;
	    n2 = *n - *m;
	    i__ = n1 + 1;
	    ijb = 0;
	    mn2 = (n1 << 1) * n2;

/*           1-norm-based estimate of Difu. */

L40:
	    slacon_(&mn2, &work[mn2 + 1], &work[1], &iwork[1], &dif[1], &kase)
		    ;
	    if (kase != 0) {
		if (kase == 1) {

/*                 Solve generalized Sylvester equation. */

		    i__1 = *lwork - (n1 << 1) * n2;
		    stgsyl_("N", &ijb, &n1, &n2, &a[a_offset], lda, &a_ref(
			    i__, i__), lda, &work[1], &n1, &b[b_offset], ldb, 
			    &b_ref(i__, i__), ldb, &work[n1 * n2 + 1], &n1, &
			    dscale, &dif[1], &work[(n1 << 1) * n2 + 1], &i__1,
			     &iwork[1], &ierr);
		} else {

/*                 Solve the transposed variant. */

		    i__1 = *lwork - (n1 << 1) * n2;
		    stgsyl_("T", &ijb, &n1, &n2, &a[a_offset], lda, &a_ref(
			    i__, i__), lda, &work[1], &n1, &b[b_offset], ldb, 
			    &b_ref(i__, i__), ldb, &work[n1 * n2 + 1], &n1, &
			    dscale, &dif[1], &work[(n1 << 1) * n2 + 1], &i__1,
			     &iwork[1], &ierr);
		}
		goto L40;
	    }
	    dif[1] = dscale / dif[1];

/*           1-norm-based estimate of Difl. */

L50:
	    slacon_(&mn2, &work[mn2 + 1], &work[1], &iwork[1], &dif[2], &kase)
		    ;
	    if (kase != 0) {
		if (kase == 1) {

/*                 Solve generalized Sylvester equation. */

		    i__1 = *lwork - (n1 << 1) * n2;
		    stgsyl_("N", &ijb, &n2, &n1, &a_ref(i__, i__), lda, &a[
			    a_offset], lda, &work[1], &n2, &b_ref(i__, i__), 
			    ldb, &b[b_offset], ldb, &work[n1 * n2 + 1], &n2, &
			    dscale, &dif[2], &work[(n1 << 1) * n2 + 1], &i__1,
			     &iwork[1], &ierr);
		} else {

/*                 Solve the transposed variant. */

		    i__1 = *lwork - (n1 << 1) * n2;
		    stgsyl_("T", &ijb, &n2, &n1, &a_ref(i__, i__), lda, &a[
			    a_offset], lda, &work[1], &n2, &b_ref(i__, i__), 
			    ldb, &b[b_offset], ldb, &work[n1 * n2 + 1], &n2, &
			    dscale, &dif[2], &work[(n1 << 1) * n2 + 1], &i__1,
			     &iwork[1], &ierr);
		}
		goto L50;
	    }
	    dif[2] = dscale / dif[2];

	}
    }

L60:

/*     Compute generalized eigenvalues of reordered pair (A, B) and   
       normalize the generalized Schur form. */

    pair = FALSE_;
    i__1 = *n;
    for (k = 1; k <= i__1; ++k) {
	if (pair) {
	    pair = FALSE_;
	} else {

	    if (k < *n) {
		if (a_ref(k + 1, k) != 0.f) {
		    pair = TRUE_;
		}
	    }

	    if (pair) {

/*             Compute the eigenvalue(s) at position K. */

		work[1] = a_ref(k, k);
		work[2] = a_ref(k + 1, k);
		work[3] = a_ref(k, k + 1);
		work[4] = a_ref(k + 1, k + 1);
		work[5] = b_ref(k, k);
		work[6] = b_ref(k + 1, k);
		work[7] = b_ref(k, k + 1);
		work[8] = b_ref(k + 1, k + 1);
		r__1 = smlnum * eps;
		slag2_(&work[1], &c__2, &work[5], &c__2, &r__1, &beta[k], &
			beta[k + 1], &alphar[k], &alphar[k + 1], &alphai[k]);
		alphai[k + 1] = -alphai[k];

	    } else {

		if (r_sign(&c_b28, &b_ref(k, k)) < 0.f) {

/*                 If B(K,K) is negative, make it positive */

		    i__2 = *n;
		    for (i__ = 1; i__ <= i__2; ++i__) {
			a_ref(k, i__) = -a_ref(k, i__);
			b_ref(k, i__) = -b_ref(k, i__);
			q_ref(i__, k) = -q_ref(i__, k);
/* L80: */
		    }
		}

		alphar[k] = a_ref(k, k);
		alphai[k] = 0.f;
		beta[k] = b_ref(k, k);

	    }
	}
/* L70: */
    }

    work[1] = (real) lwmin;
    iwork[1] = liwmin;

    return 0;

/*     End of STGSEN */

} /* stgsen_ */
Ejemplo n.º 29
0
/* Subroutine */ int cungbr_(char *vect, integer *m, integer *n, integer *k, 
	complex *a, integer *lda, complex *tau, complex *work, integer *lwork,
	 integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    CUNGBR generates one of the complex unitary matrices Q or P**H   
    determined by CGEBRD when reducing a complex matrix A to bidiagonal   
    form: A = Q * B * P**H.  Q and P**H are defined as products of   
    elementary reflectors H(i) or G(i) respectively.   

    If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q   
    is of order M:   
    if m >= k, Q = H(1) H(2) . . . H(k) and CUNGBR returns the first n   
    columns of Q, where m >= n >= k;   
    if m < k, Q = H(1) H(2) . . . H(m-1) and CUNGBR returns Q as an   
    M-by-M matrix.   

    If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H   
    is of order N:   
    if k < n, P**H = G(k) . . . G(2) G(1) and CUNGBR returns the first m   
    rows of P**H, where n >= m >= k;   
    if k >= n, P**H = G(n-1) . . . G(2) G(1) and CUNGBR returns P**H as   
    an N-by-N matrix.   

    Arguments   
    =========   

    VECT    (input) CHARACTER*1   
            Specifies whether the matrix Q or the matrix P**H is   
            required, as defined in the transformation applied by CGEBRD:   
            = 'Q':  generate Q;   
            = 'P':  generate P**H.   

    M       (input) INTEGER   
            The number of rows of the matrix Q or P**H to be returned.   
            M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix Q or P**H to be returned.   
            N >= 0.   
            If VECT = 'Q', M >= N >= min(M,K);   
            if VECT = 'P', N >= M >= min(N,K).   

    K       (input) INTEGER   
            If VECT = 'Q', the number of columns in the original M-by-K   
            matrix reduced by CGEBRD.   
            If VECT = 'P', the number of rows in the original K-by-N   
            matrix reduced by CGEBRD.   
            K >= 0.   

    A       (input/output) COMPLEX array, dimension (LDA,N)   
            On entry, the vectors which define the elementary reflectors,   
            as returned by CGEBRD.   
            On exit, the M-by-N matrix Q or P**H.   

    LDA     (input) INTEGER   
            The leading dimension of the array A. LDA >= M.   

    TAU     (input) COMPLEX array, dimension   
                                  (min(M,K)) if VECT = 'Q'   
                                  (min(N,K)) if VECT = 'P'   
            TAU(i) must contain the scalar factor of the elementary   
            reflector H(i) or G(i), which determines Q or P**H, as   
            returned by CGEBRD in its array argument TAUQ or TAUP.   

    WORK    (workspace/output) COMPLEX array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The dimension of the array WORK. LWORK >= max(1,min(M,N)).   
            For optimum performance LWORK >= min(M,N)*NB, where NB   
            is the optimal blocksize.   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   

    =====================================================================   


       Test the input arguments   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    /* Local variables */
    static integer i__, j;
    extern logical lsame_(char *, char *);
    static integer iinfo;
    static logical wantq;
    static integer nb, mn;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int cunglq_(integer *, integer *, integer *, 
	    complex *, integer *, complex *, complex *, integer *, integer *),
	     cungqr_(integer *, integer *, integer *, complex *, integer *, 
	    complex *, complex *, integer *, integer *);
    static integer lwkopt;
    static logical lquery;
#define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1
#define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
    wantq = lsame_(vect, "Q");
    mn = min(*m,*n);
    lquery = *lwork == -1;
    if (! wantq && ! lsame_(vect, "P")) {
	*info = -1;
    } else if (*m < 0) {
	*info = -2;
    } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && (
	    *m > *n || *m < min(*n,*k))) {
	*info = -3;
    } else if (*k < 0) {
	*info = -4;
    } else if (*lda < max(1,*m)) {
	*info = -6;
    } else if (*lwork < max(1,mn) && ! lquery) {
	*info = -9;
    }

    if (*info == 0) {
	if (wantq) {
	    nb = ilaenv_(&c__1, "CUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, (
		    ftnlen)1);
	} else {
	    nb = ilaenv_(&c__1, "CUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (
		    ftnlen)1);
	}
	lwkopt = max(1,mn) * nb;
	work[1].r = (real) lwkopt, work[1].i = 0.f;
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CUNGBR", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    if (*m == 0 || *n == 0) {
	work[1].r = 1.f, work[1].i = 0.f;
	return 0;
    }

    if (wantq) {

/*        Form Q, determined by a call to CGEBRD to reduce an m-by-k   
          matrix */

	if (*m >= *k) {

/*           If m >= k, assume m >= n >= k */

	    cungqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
		    iinfo);

	} else {

/*           If m < k, assume m = n   

             Shift the vectors which define the elementary reflectors one   
             column to the right, and set the first row and column of Q   
             to those of the unit matrix */

	    for (j = *m; j >= 2; --j) {
		i__1 = a_subscr(1, j);
		a[i__1].r = 0.f, a[i__1].i = 0.f;
		i__1 = *m;
		for (i__ = j + 1; i__ <= i__1; ++i__) {
		    i__2 = a_subscr(i__, j);
		    i__3 = a_subscr(i__, j - 1);
		    a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i;
/* L10: */
		}
/* L20: */
	    }
	    i__1 = a_subscr(1, 1);
	    a[i__1].r = 1.f, a[i__1].i = 0.f;
	    i__1 = *m;
	    for (i__ = 2; i__ <= i__1; ++i__) {
		i__2 = a_subscr(i__, 1);
		a[i__2].r = 0.f, a[i__2].i = 0.f;
/* L30: */
	    }
	    if (*m > 1) {

/*              Form Q(2:m,2:m) */

		i__1 = *m - 1;
		i__2 = *m - 1;
		i__3 = *m - 1;
		cungqr_(&i__1, &i__2, &i__3, &a_ref(2, 2), lda, &tau[1], &
			work[1], lwork, &iinfo);
	    }
	}
    } else {

/*        Form P', determined by a call to CGEBRD to reduce a k-by-n   
          matrix */

	if (*k < *n) {

/*           If k < n, assume k <= m <= n */

	    cunglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
		    iinfo);

	} else {

/*           If k >= n, assume m = n   

             Shift the vectors which define the elementary reflectors one   
             row downward, and set the first row and column of P' to   
             those of the unit matrix */

	    i__1 = a_subscr(1, 1);
	    a[i__1].r = 1.f, a[i__1].i = 0.f;
	    i__1 = *n;
	    for (i__ = 2; i__ <= i__1; ++i__) {
		i__2 = a_subscr(i__, 1);
		a[i__2].r = 0.f, a[i__2].i = 0.f;
/* L40: */
	    }
	    i__1 = *n;
	    for (j = 2; j <= i__1; ++j) {
		for (i__ = j - 1; i__ >= 2; --i__) {
		    i__2 = a_subscr(i__, j);
		    i__3 = a_subscr(i__ - 1, j);
		    a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i;
/* L50: */
		}
		i__2 = a_subscr(1, j);
		a[i__2].r = 0.f, a[i__2].i = 0.f;
/* L60: */
	    }
	    if (*n > 1) {

/*              Form P'(2:n,2:n) */

		i__1 = *n - 1;
		i__2 = *n - 1;
		i__3 = *n - 1;
		cunglq_(&i__1, &i__2, &i__3, &a_ref(2, 2), lda, &tau[1], &
			work[1], lwork, &iinfo);
	    }
	}
    }
    work[1].r = (real) lwkopt, work[1].i = 0.f;
    return 0;

/*     End of CUNGBR */

} /* cungbr_ */
Ejemplo n.º 30
0
/* Subroutine */ int cunmql_(char *side, char *trans, integer *m, integer *n, 
	integer *k, complex *a, integer *lda, complex *tau, complex *c__, 
	integer *ldc, complex *work, integer *lwork, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    CUNMQL overwrites the general complex M-by-N matrix C with   

                    SIDE = 'L'     SIDE = 'R'   
    TRANS = 'N':      Q * C          C * Q   
    TRANS = 'C':      Q**H * C       C * Q**H   

    where Q is a complex unitary matrix defined as the product of k   
    elementary reflectors   

          Q = H(k) . . . H(2) H(1)   

    as returned by CGEQLF. Q is of order M if SIDE = 'L' and of order N   
    if SIDE = 'R'.   

    Arguments   
    =========   

    SIDE    (input) CHARACTER*1   
            = 'L': apply Q or Q**H from the Left;   
            = 'R': apply Q or Q**H from the Right.   

    TRANS   (input) CHARACTER*1   
            = 'N':  No transpose, apply Q;   
            = 'C':  Transpose, apply Q**H.   

    M       (input) INTEGER   
            The number of rows of the matrix C. M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix C. N >= 0.   

    K       (input) INTEGER   
            The number of elementary reflectors whose product defines   
            the matrix Q.   
            If SIDE = 'L', M >= K >= 0;   
            if SIDE = 'R', N >= K >= 0.   

    A       (input) COMPLEX array, dimension (LDA,K)   
            The i-th column must contain the vector which defines the   
            elementary reflector H(i), for i = 1,2,...,k, as returned by   
            CGEQLF in the last k columns of its array argument A.   
            A is modified by the routine but restored on exit.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.   
            If SIDE = 'L', LDA >= max(1,M);   
            if SIDE = 'R', LDA >= max(1,N).   

    TAU     (input) COMPLEX array, dimension (K)   
            TAU(i) must contain the scalar factor of the elementary   
            reflector H(i), as returned by CGEQLF.   

    C       (input/output) COMPLEX array, dimension (LDC,N)   
            On entry, the M-by-N matrix C.   
            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.   

    LDC     (input) INTEGER   
            The leading dimension of the array C. LDC >= max(1,M).   

    WORK    (workspace/output) COMPLEX array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The dimension of the array WORK.   
            If SIDE = 'L', LWORK >= max(1,N);   
            if SIDE = 'R', LWORK >= max(1,M).   
            For optimum performance LWORK >= N*NB if SIDE = 'L', and   
            LWORK >= M*NB if SIDE = 'R', where NB is the optimal   
            blocksize.   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   

    =====================================================================   


       Test the input arguments   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static integer c__2 = 2;
    static integer c__65 = 65;
    
    /* System generated locals */
    address a__1[2];
    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, 
	    i__5;
    char ch__1[2];
    /* Builtin functions   
       Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
    /* Local variables */
    static logical left;
    static integer i__;
    static complex t[4160]	/* was [65][64] */;
    extern logical lsame_(char *, char *);
    static integer nbmin, iinfo, i1, i2, i3;
    extern /* Subroutine */ int cunm2l_(char *, char *, integer *, integer *, 
	    integer *, complex *, integer *, complex *, complex *, integer *, 
	    complex *, integer *);
    static integer ib, nb, mi, ni;
    extern /* Subroutine */ int clarfb_(char *, char *, char *, char *, 
	    integer *, integer *, integer *, complex *, integer *, complex *, 
	    integer *, complex *, integer *, complex *, integer *);
    static integer nq, nw;
    extern /* Subroutine */ int clarft_(char *, char *, integer *, integer *, 
	    complex *, integer *, complex *, complex *, integer *), xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    static logical notran;
    static integer ldwork, lwkopt;
    static logical lquery;
    static integer iws;
#define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1
#define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1 * 1;
    c__ -= c_offset;
    --work;

    /* Function Body */
    *info = 0;
    left = lsame_(side, "L");
    notran = lsame_(trans, "N");
    lquery = *lwork == -1;

/*     NQ is the order of Q and NW is the minimum dimension of WORK */

    if (left) {
	nq = *m;
	nw = *n;
    } else {
	nq = *n;
	nw = *m;
    }
    if (! left && ! lsame_(side, "R")) {
	*info = -1;
    } else if (! notran && ! lsame_(trans, "C")) {
	*info = -2;
    } else if (*m < 0) {
	*info = -3;
    } else if (*n < 0) {
	*info = -4;
    } else if (*k < 0 || *k > nq) {
	*info = -5;
    } else if (*lda < max(1,nq)) {
	*info = -7;
    } else if (*ldc < max(1,*m)) {
	*info = -10;
    } else if (*lwork < max(1,nw) && ! lquery) {
	*info = -12;
    }

    if (*info == 0) {

/*        Determine the block size.  NB may be at most NBMAX, where NBMAX   
          is used to define the local array T.   

   Computing MIN   
   Writing concatenation */
	i__3[0] = 1, a__1[0] = side;
	i__3[1] = 1, a__1[1] = trans;
	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
	i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMQL", ch__1, m, n, k, &c_n1, (
		ftnlen)6, (ftnlen)2);
	nb = min(i__1,i__2);
	lwkopt = max(1,nw) * nb;
	work[1].r = (real) lwkopt, work[1].i = 0.f;
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CUNMQL", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    if (*m == 0 || *n == 0 || *k == 0) {
	work[1].r = 1.f, work[1].i = 0.f;
	return 0;
    }

    nbmin = 2;
    ldwork = nw;
    if (nb > 1 && nb < *k) {
	iws = nw * nb;
	if (*lwork < iws) {
	    nb = *lwork / ldwork;
/* Computing MAX   
   Writing concatenation */
	    i__3[0] = 1, a__1[0] = side;
	    i__3[1] = 1, a__1[1] = trans;
	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
	    i__1 = 2, i__2 = ilaenv_(&c__2, "CUNMQL", ch__1, m, n, k, &c_n1, (
		    ftnlen)6, (ftnlen)2);
	    nbmin = max(i__1,i__2);
	}
    } else {
	iws = nw;
    }

    if (nb < nbmin || nb >= *k) {

/*        Use unblocked code */

	cunm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
		c_offset], ldc, &work[1], &iinfo);
    } else {

/*        Use blocked code */

	if (left && notran || ! left && ! notran) {
	    i1 = 1;
	    i2 = *k;
	    i3 = nb;
	} else {
	    i1 = (*k - 1) / nb * nb + 1;
	    i2 = 1;
	    i3 = -nb;
	}

	if (left) {
	    ni = *n;
	} else {
	    mi = *m;
	}

	i__1 = i2;
	i__2 = i3;
	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
/* Computing MIN */
	    i__4 = nb, i__5 = *k - i__ + 1;
	    ib = min(i__4,i__5);

/*           Form the triangular factor of the block reflector   
             H = H(i+ib-1) . . . H(i+1) H(i) */

	    i__4 = nq - *k + i__ + ib - 1;
	    clarft_("Backward", "Columnwise", &i__4, &ib, &a_ref(1, i__), lda,
		     &tau[i__], t, &c__65);
	    if (left) {

/*              H or H' is applied to C(1:m-k+i+ib-1,1:n) */

		mi = *m - *k + i__ + ib - 1;
	    } else {

/*              H or H' is applied to C(1:m,1:n-k+i+ib-1) */

		ni = *n - *k + i__ + ib - 1;
	    }

/*           Apply H or H' */

	    clarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &
		    a_ref(1, i__), lda, t, &c__65, &c__[c_offset], ldc, &work[
		    1], &ldwork);
/* L10: */
	}
    }
    work[1].r = (real) lwkopt, work[1].i = 0.f;
    return 0;

/*     End of CUNMQL */

} /* cunmql_ */