/* Subroutine */ int dtrmm_(char *side, char *uplo, char *transa, char *diag, integer *m, integer *n, doublereal *alpha, doublereal *a, integer * lda, doublereal *b, integer *ldb) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; /* Local variables */ static integer info; static doublereal temp; static integer i__, j, k; static logical lside; extern logical lsame_(char *, char *); static integer nrowa; static logical upper; extern /* Subroutine */ int xerbla_(char *, integer *); static logical nounit; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] /* Purpose ======= DTRMM performs one of the matrix-matrix operations B := alpha*op( A )*B, or B := alpha*B*op( A ), where alpha is a scalar, B is an m by n matrix, A is a unit, or non-unit, upper or lower triangular matrix and op( A ) is one of op( A ) = A or op( A ) = A'. Parameters ========== SIDE - CHARACTER*1. On entry, SIDE specifies whether op( A ) multiplies B from the left or right as follows: SIDE = 'L' or 'l' B := alpha*op( A )*B. SIDE = 'R' or 'r' B := alpha*B*op( A ). Unchanged on exit. UPLO - CHARACTER*1. On entry, UPLO specifies whether the matrix A is an upper or lower triangular matrix as follows: UPLO = 'U' or 'u' A is an upper triangular matrix. UPLO = 'L' or 'l' A is a lower triangular matrix. Unchanged on exit. TRANSA - CHARACTER*1. On entry, TRANSA specifies the form of op( A ) to be used in the matrix multiplication as follows: TRANSA = 'N' or 'n' op( A ) = A. TRANSA = 'T' or 't' op( A ) = A'. TRANSA = 'C' or 'c' op( A ) = A'. Unchanged on exit. DIAG - CHARACTER*1. On entry, DIAG specifies whether or not A is unit triangular as follows: DIAG = 'U' or 'u' A is assumed to be unit triangular. DIAG = 'N' or 'n' A is not assumed to be unit triangular. Unchanged on exit. M - INTEGER. On entry, M specifies the number of rows of B. M must be at least zero. Unchanged on exit. N - INTEGER. On entry, N specifies the number of columns of B. N must be at least zero. Unchanged on exit. ALPHA - DOUBLE PRECISION. On entry, ALPHA specifies the scalar alpha. When alpha is zero then A is not referenced and B need not be set before entry. Unchanged on exit. A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. Before entry with UPLO = 'U' or 'u', the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. Before entry with UPLO = 'L' or 'l', the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when DIAG = 'U' or 'u', the diagonal elements of A are not referenced either, but are assumed to be unity. Unchanged on exit. LDA - INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When SIDE = 'L' or 'l' then LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' then LDA must be at least max( 1, n ). Unchanged on exit. B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). Before entry, the leading m by n part of the array B must contain the matrix B, and on exit is overwritten by the transformed matrix. LDB - INTEGER. On entry, LDB specifies the first dimension of B as declared in the calling (sub) program. LDB must be at least max( 1, m ). Unchanged on exit. Level 3 Blas routine. -- Written on 8-February-1989. Jack Dongarra, Argonne National Laboratory. Iain Duff, AERE Harwell. Jeremy Du Croz, Numerical Algorithms Group Ltd. Sven Hammarling, Numerical Algorithms Group Ltd. Test the input parameters. Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; /* Function Body */ lside = lsame_(side, "L"); if (lside) { nrowa = *m; } else { nrowa = *n; } nounit = lsame_(diag, "N"); upper = lsame_(uplo, "U"); info = 0; if (! lside && ! lsame_(side, "R")) { info = 1; } else if (! upper && ! lsame_(uplo, "L")) { info = 2; } else if (! lsame_(transa, "N") && ! lsame_(transa, "T") && ! lsame_(transa, "C")) { info = 3; } else if (! lsame_(diag, "U") && ! lsame_(diag, "N")) { info = 4; } else if (*m < 0) { info = 5; } else if (*n < 0) { info = 6; } else if (*lda < max(1,nrowa)) { info = 9; } else if (*ldb < max(1,*m)) { info = 11; } if (info != 0) { xerbla_("DTRMM ", &info); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } /* And when alpha.eq.zero. */ if (*alpha == 0.) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b_ref(i__, j) = 0.; /* L10: */ } /* L20: */ } return 0; } /* Start the operations. */ if (lside) { if (lsame_(transa, "N")) { /* Form B := alpha*A*B. */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (k = 1; k <= i__2; ++k) { if (b_ref(k, j) != 0.) { temp = *alpha * b_ref(k, j); i__3 = k - 1; for (i__ = 1; i__ <= i__3; ++i__) { b_ref(i__, j) = b_ref(i__, j) + temp * a_ref( i__, k); /* L30: */ } if (nounit) { temp *= a_ref(k, k); } b_ref(k, j) = temp; } /* L40: */ } /* L50: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { for (k = *m; k >= 1; --k) { if (b_ref(k, j) != 0.) { temp = *alpha * b_ref(k, j); b_ref(k, j) = temp; if (nounit) { b_ref(k, j) = b_ref(k, j) * a_ref(k, k); } i__2 = *m; for (i__ = k + 1; i__ <= i__2; ++i__) { b_ref(i__, j) = b_ref(i__, j) + temp * a_ref( i__, k); /* L60: */ } } /* L70: */ } /* L80: */ } } } else { /* Form B := alpha*A'*B. */ if (upper) { i__1 = *n; for (j = 1; j <= i__1; ++j) { for (i__ = *m; i__ >= 1; --i__) { temp = b_ref(i__, j); if (nounit) { temp *= a_ref(i__, i__); } i__2 = i__ - 1; for (k = 1; k <= i__2; ++k) { temp += a_ref(k, i__) * b_ref(k, j); /* L90: */ } b_ref(i__, j) = *alpha * temp; /* L100: */ } /* L110: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { temp = b_ref(i__, j); if (nounit) { temp *= a_ref(i__, i__); } i__3 = *m; for (k = i__ + 1; k <= i__3; ++k) { temp += a_ref(k, i__) * b_ref(k, j); /* L120: */ } b_ref(i__, j) = *alpha * temp; /* L130: */ } /* L140: */ } } } } else { if (lsame_(transa, "N")) { /* Form B := alpha*B*A. */ if (upper) { for (j = *n; j >= 1; --j) { temp = *alpha; if (nounit) { temp *= a_ref(j, j); } i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { b_ref(i__, j) = temp * b_ref(i__, j); /* L150: */ } i__1 = j - 1; for (k = 1; k <= i__1; ++k) { if (a_ref(k, j) != 0.) { temp = *alpha * a_ref(k, j); i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b_ref(i__, j) = b_ref(i__, j) + temp * b_ref( i__, k); /* L160: */ } } /* L170: */ } /* L180: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = *alpha; if (nounit) { temp *= a_ref(j, j); } i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b_ref(i__, j) = temp * b_ref(i__, j); /* L190: */ } i__2 = *n; for (k = j + 1; k <= i__2; ++k) { if (a_ref(k, j) != 0.) { temp = *alpha * a_ref(k, j); i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { b_ref(i__, j) = b_ref(i__, j) + temp * b_ref( i__, k); /* L200: */ } } /* L210: */ } /* L220: */ } } } else { /* Form B := alpha*B*A'. */ if (upper) { i__1 = *n; for (k = 1; k <= i__1; ++k) { i__2 = k - 1; for (j = 1; j <= i__2; ++j) { if (a_ref(j, k) != 0.) { temp = *alpha * a_ref(j, k); i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { b_ref(i__, j) = b_ref(i__, j) + temp * b_ref( i__, k); /* L230: */ } } /* L240: */ } temp = *alpha; if (nounit) { temp *= a_ref(k, k); } if (temp != 1.) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b_ref(i__, k) = temp * b_ref(i__, k); /* L250: */ } } /* L260: */ } } else { for (k = *n; k >= 1; --k) { i__1 = *n; for (j = k + 1; j <= i__1; ++j) { if (a_ref(j, k) != 0.) { temp = *alpha * a_ref(j, k); i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { b_ref(i__, j) = b_ref(i__, j) + temp * b_ref( i__, k); /* L270: */ } } /* L280: */ } temp = *alpha; if (nounit) { temp *= a_ref(k, k); } if (temp != 1.) { i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { b_ref(i__, k) = temp * b_ref(i__, k); /* L290: */ } } /* L300: */ } } } } return 0; /* End of DTRMM . */ } /* dtrmm_ */
/* Subroutine */ int dlagv2_(doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *alphar, doublereal *alphai, doublereal * beta, doublereal *csl, doublereal *snl, doublereal *csr, doublereal * snr) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= DLAGV2 computes the Generalized Schur factorization of a real 2-by-2 matrix pencil (A,B) where B is upper triangular. This routine computes orthogonal (rotation) matrices given by CSL, SNL and CSR, SNR such that 1) if the pencil (A,B) has two real eigenvalues (include 0/0 or 1/0 types), then [ a11 a12 ] := [ CSL SNL ] [ a11 a12 ] [ CSR -SNR ] [ 0 a22 ] [ -SNL CSL ] [ a21 a22 ] [ SNR CSR ] [ b11 b12 ] := [ CSL SNL ] [ b11 b12 ] [ CSR -SNR ] [ 0 b22 ] [ -SNL CSL ] [ 0 b22 ] [ SNR CSR ], 2) if the pencil (A,B) has a pair of complex conjugate eigenvalues, then [ a11 a12 ] := [ CSL SNL ] [ a11 a12 ] [ CSR -SNR ] [ a21 a22 ] [ -SNL CSL ] [ a21 a22 ] [ SNR CSR ] [ b11 0 ] := [ CSL SNL ] [ b11 b12 ] [ CSR -SNR ] [ 0 b22 ] [ -SNL CSL ] [ 0 b22 ] [ SNR CSR ] where b11 >= b22 > 0. Arguments ========= A (input/output) DOUBLE PRECISION array, dimension (LDA, 2) On entry, the 2 x 2 matrix A. On exit, A is overwritten by the ``A-part'' of the generalized Schur form. LDA (input) INTEGER THe leading dimension of the array A. LDA >= 2. B (input/output) DOUBLE PRECISION array, dimension (LDB, 2) On entry, the upper triangular 2 x 2 matrix B. On exit, B is overwritten by the ``B-part'' of the generalized Schur form. LDB (input) INTEGER THe leading dimension of the array B. LDB >= 2. ALPHAR (output) DOUBLE PRECISION array, dimension (2) ALPHAI (output) DOUBLE PRECISION array, dimension (2) BETA (output) DOUBLE PRECISION array, dimension (2) (ALPHAR(k)+i*ALPHAI(k))/BETA(k) are the eigenvalues of the pencil (A,B), k=1,2, i = sqrt(-1). Note that BETA(k) may be zero. CSL (output) DOUBLE PRECISION The cosine of the left rotation matrix. SNL (output) DOUBLE PRECISION The sine of the left rotation matrix. CSR (output) DOUBLE PRECISION The cosine of the right rotation matrix. SNR (output) DOUBLE PRECISION The sine of the right rotation matrix. Further Details =============== Based on contributions by Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA ===================================================================== Parameter adjustments */ /* Table of constant values */ static integer c__2 = 2; static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset; doublereal d__1, d__2, d__3, d__4, d__5, d__6; /* Local variables */ extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *), dlag2_( doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); static doublereal r__, t, anorm, bnorm, h1, h2, h3, scale1, scale2; extern /* Subroutine */ int dlasv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); extern doublereal dlapy2_(doublereal *, doublereal *); static doublereal ascale, bscale; extern doublereal dlamch_(char *); static doublereal wi, qq, rr, safmin; extern /* Subroutine */ int dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); static doublereal wr1, wr2, ulp; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --alphar; --alphai; --beta; /* Function Body */ safmin = dlamch_("S"); ulp = dlamch_("P"); /* Scale A Computing MAX */ d__5 = (d__1 = a_ref(1, 1), abs(d__1)) + (d__2 = a_ref(2, 1), abs(d__2)), d__6 = (d__3 = a_ref(1, 2), abs(d__3)) + (d__4 = a_ref(2, 2), abs( d__4)), d__5 = max(d__5,d__6); anorm = max(d__5,safmin); ascale = 1. / anorm; a_ref(1, 1) = ascale * a_ref(1, 1); a_ref(1, 2) = ascale * a_ref(1, 2); a_ref(2, 1) = ascale * a_ref(2, 1); a_ref(2, 2) = ascale * a_ref(2, 2); /* Scale B Computing MAX */ d__4 = (d__3 = b_ref(1, 1), abs(d__3)), d__5 = (d__1 = b_ref(1, 2), abs( d__1)) + (d__2 = b_ref(2, 2), abs(d__2)), d__4 = max(d__4,d__5); bnorm = max(d__4,safmin); bscale = 1. / bnorm; b_ref(1, 1) = bscale * b_ref(1, 1); b_ref(1, 2) = bscale * b_ref(1, 2); b_ref(2, 2) = bscale * b_ref(2, 2); /* Check if A can be deflated */ if ((d__1 = a_ref(2, 1), abs(d__1)) <= ulp) { *csl = 1.; *snl = 0.; *csr = 1.; *snr = 0.; a_ref(2, 1) = 0.; b_ref(2, 1) = 0.; /* Check if B is singular */ } else if ((d__1 = b_ref(1, 1), abs(d__1)) <= ulp) { dlartg_(&a_ref(1, 1), &a_ref(2, 1), csl, snl, &r__); *csr = 1.; *snr = 0.; drot_(&c__2, &a_ref(1, 1), lda, &a_ref(2, 1), lda, csl, snl); drot_(&c__2, &b_ref(1, 1), ldb, &b_ref(2, 1), ldb, csl, snl); a_ref(2, 1) = 0.; b_ref(1, 1) = 0.; b_ref(2, 1) = 0.; } else if ((d__1 = b_ref(2, 2), abs(d__1)) <= ulp) { dlartg_(&a_ref(2, 2), &a_ref(2, 1), csr, snr, &t); *snr = -(*snr); drot_(&c__2, &a_ref(1, 1), &c__1, &a_ref(1, 2), &c__1, csr, snr); drot_(&c__2, &b_ref(1, 1), &c__1, &b_ref(1, 2), &c__1, csr, snr); *csl = 1.; *snl = 0.; a_ref(2, 1) = 0.; b_ref(2, 1) = 0.; b_ref(2, 2) = 0.; } else { /* B is nonsingular, first compute the eigenvalues of (A,B) */ dlag2_(&a[a_offset], lda, &b[b_offset], ldb, &safmin, &scale1, & scale2, &wr1, &wr2, &wi); if (wi == 0.) { /* two real eigenvalues, compute s*A-w*B */ h1 = scale1 * a_ref(1, 1) - wr1 * b_ref(1, 1); h2 = scale1 * a_ref(1, 2) - wr1 * b_ref(1, 2); h3 = scale1 * a_ref(2, 2) - wr1 * b_ref(2, 2); rr = dlapy2_(&h1, &h2); d__1 = scale1 * a_ref(2, 1); qq = dlapy2_(&d__1, &h3); if (rr > qq) { /* find right rotation matrix to zero 1,1 element of (sA - wB) */ dlartg_(&h2, &h1, csr, snr, &t); } else { /* find right rotation matrix to zero 2,1 element of (sA - wB) */ d__1 = scale1 * a_ref(2, 1); dlartg_(&h3, &d__1, csr, snr, &t); } *snr = -(*snr); drot_(&c__2, &a_ref(1, 1), &c__1, &a_ref(1, 2), &c__1, csr, snr); drot_(&c__2, &b_ref(1, 1), &c__1, &b_ref(1, 2), &c__1, csr, snr); /* compute inf norms of A and B Computing MAX */ d__5 = (d__1 = a_ref(1, 1), abs(d__1)) + (d__2 = a_ref(1, 2), abs( d__2)), d__6 = (d__3 = a_ref(2, 1), abs(d__3)) + (d__4 = a_ref(2, 2), abs(d__4)); h1 = max(d__5,d__6); /* Computing MAX */ d__5 = (d__1 = b_ref(1, 1), abs(d__1)) + (d__2 = b_ref(1, 2), abs( d__2)), d__6 = (d__3 = b_ref(2, 1), abs(d__3)) + (d__4 = b_ref(2, 2), abs(d__4)); h2 = max(d__5,d__6); if (scale1 * h1 >= abs(wr1) * h2) { /* find left rotation matrix Q to zero out B(2,1) */ dlartg_(&b_ref(1, 1), &b_ref(2, 1), csl, snl, &r__); } else { /* find left rotation matrix Q to zero out A(2,1) */ dlartg_(&a_ref(1, 1), &a_ref(2, 1), csl, snl, &r__); } drot_(&c__2, &a_ref(1, 1), lda, &a_ref(2, 1), lda, csl, snl); drot_(&c__2, &b_ref(1, 1), ldb, &b_ref(2, 1), ldb, csl, snl); a_ref(2, 1) = 0.; b_ref(2, 1) = 0.; } else { /* a pair of complex conjugate eigenvalues first compute the SVD of the matrix B */ dlasv2_(&b_ref(1, 1), &b_ref(1, 2), &b_ref(2, 2), &r__, &t, snr, csr, snl, csl); /* Form (A,B) := Q(A,B)Z' where Q is left rotation matrix and Z is right rotation matrix computed from DLASV2 */ drot_(&c__2, &a_ref(1, 1), lda, &a_ref(2, 1), lda, csl, snl); drot_(&c__2, &b_ref(1, 1), ldb, &b_ref(2, 1), ldb, csl, snl); drot_(&c__2, &a_ref(1, 1), &c__1, &a_ref(1, 2), &c__1, csr, snr); drot_(&c__2, &b_ref(1, 1), &c__1, &b_ref(1, 2), &c__1, csr, snr); b_ref(2, 1) = 0.; b_ref(1, 2) = 0.; } } /* Unscaling */ a_ref(1, 1) = anorm * a_ref(1, 1); a_ref(2, 1) = anorm * a_ref(2, 1); a_ref(1, 2) = anorm * a_ref(1, 2); a_ref(2, 2) = anorm * a_ref(2, 2); b_ref(1, 1) = bnorm * b_ref(1, 1); b_ref(2, 1) = bnorm * b_ref(2, 1); b_ref(1, 2) = bnorm * b_ref(1, 2); b_ref(2, 2) = bnorm * b_ref(2, 2); if (wi == 0.) { alphar[1] = a_ref(1, 1); alphar[2] = a_ref(2, 2); alphai[1] = 0.; alphai[2] = 0.; beta[1] = b_ref(1, 1); beta[2] = b_ref(2, 2); } else { alphar[1] = anorm * wr1 / scale1 / bnorm; alphai[1] = anorm * wi / scale1 / bnorm; alphar[2] = alphar[1]; alphai[2] = -alphai[1]; beta[1] = 1.; beta[2] = 1.; } /* L10: */ return 0; /* End of DLAGV2 */ } /* dlagv2_ */
/* Subroutine */ int sgelsd_(integer *m, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, real *s, real *rcond, integer * rank, real *work, integer *lwork, integer *iwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; /* Builtin functions */ double log(doublereal); /* Local variables */ static real anrm, bnrm; static integer itau, nlvl, iascl, ibscl; static real sfmin; static integer minmn, maxmn, itaup, itauq, mnthr, nwork, ie, il; extern /* Subroutine */ int slabad_(real *, real *); static integer mm; extern /* Subroutine */ int sgebrd_(integer *, integer *, real *, integer *, real *, real *, real *, real *, real *, integer *, integer *); extern doublereal slamch_(char *), slange_(char *, integer *, integer *, real *, integer *, real *); extern /* Subroutine */ int xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); static real bignum; extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer *, real *, real *, integer *, integer *), slalsd_(char *, integer *, integer *, integer *, real *, real *, real *, integer *, real * , integer *, real *, integer *, integer *), slascl_(char * , integer *, integer *, real *, real *, integer *, integer *, real *, integer *, integer *); static integer wlalsd; extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer *, real *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, real *, integer *); static integer ldwork; extern /* Subroutine */ int sormbr_(char *, char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer * , real *, integer *, integer *); static integer minwrk, maxwrk; static real smlnum; extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, integer *, integer *); static logical lquery; static integer smlsiz; extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, integer *, integer *); static real eps; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1999 Purpose ======= SGELSD computes the minimum-norm solution to a real linear least squares problem: minimize 2-norm(| b - A*x |) using the singular value decomposition (SVD) of A. A is an M-by-N matrix which may be rank-deficient. Several right hand side vectors b and solution vectors x can be handled in a single call; they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix X. The problem is solved in three steps: (1) Reduce the coefficient matrix A to bidiagonal form with Householder transformations, reducing the original problem into a "bidiagonal least squares problem" (BLS) (2) Solve the BLS using a divide and conquer approach. (3) Apply back all the Householder tranformations to solve the original least squares problem. The effective rank of A is determined by treating as zero those singular values which are less than RCOND times the largest singular value. The divide and conquer algorithm makes very mild assumptions about floating point arithmetic. It will work on machines with a guard digit in add/subtract, or on those binary machines without guard digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. It could conceivably fail on hexadecimal or decimal machines without guard digits, but we know of none. Arguments ========= M (input) INTEGER The number of rows of A. M >= 0. N (input) INTEGER The number of columns of A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0. A (input) REAL array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, A has been destroyed. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). B (input/output) REAL array, dimension (LDB,NRHS) On entry, the M-by-NRHS right hand side matrix B. On exit, B is overwritten by the N-by-NRHS solution matrix X. If m >= n and RANK = n, the residual sum-of-squares for the solution in the i-th column is given by the sum of squares of elements n+1:m in that column. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,max(M,N)). S (output) REAL array, dimension (min(M,N)) The singular values of A in decreasing order. The condition number of A in the 2-norm = S(1)/S(min(m,n)). RCOND (input) REAL RCOND is used to determine the effective rank of A. Singular values S(i) <= RCOND*S(1) are treated as zero. If RCOND < 0, machine precision is used instead. RANK (output) INTEGER The effective rank of A, i.e., the number of singular values which are greater than RCOND*S(1). WORK (workspace/output) REAL array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK must be at least 1. The exact minimum amount of workspace needed depends on M, N and NRHS. As long as LWORK is at least 12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2, if M is greater than or equal to N or 12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2, if M is less than N, the code will execute correctly. SMLSIZ is returned by ILAENV and is equal to the maximum size of the subproblems at the bottom of the computation tree (usually about 25), and NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) For good performance, LWORK should generally be larger. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. IWORK (workspace) INTEGER array, dimension (LIWORK) LIWORK >= 3 * MINMN * NLVL + 11 * MINMN, where MINMN = MIN( M,N ). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. > 0: the algorithm for computing the SVD failed to converge; if INFO = i, i off-diagonal elements of an intermediate bidiagonal form did not converge to zero. Further Details =============== Based on contributions by Ming Gu and Ren-Cang Li, Computer Science Division, University of California at Berkeley, USA Osni Marques, LBNL/NERSC, USA ===================================================================== Test the input arguments. Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --s; --work; --iwork; /* Function Body */ *info = 0; minmn = min(*m,*n); maxmn = max(*m,*n); mnthr = ilaenv_(&c__6, "SGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, ( ftnlen)1); lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else if (*ldb < max(1,maxmn)) { *info = -7; } smlsiz = ilaenv_(&c__9, "SGELSD", " ", &c__0, &c__0, &c__0, &c__0, ( ftnlen)6, (ftnlen)1); /* Compute workspace. (Note: Comments in the code beginning "Workspace:" describe the minimal amount of workspace needed at that point in the code, as well as the preferred amount for good performance. NB refers to the optimal block size for the immediately following subroutine, as returned by ILAENV.) */ minwrk = 1; minmn = max(1,minmn); /* Computing MAX */ i__1 = (integer) (log((real) minmn / (real) (smlsiz + 1)) / log(2.f)) + 1; nlvl = max(i__1,0); if (*info == 0) { maxwrk = 0; mm = *m; if (*m >= *n && *m >= mnthr) { /* Path 1a - overdetermined, with many more rows than columns. */ mm = *n; /* Computing MAX */ i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, "SORMQR", "LT", m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2); maxwrk = max(i__1,i__2); } if (*m >= *n) { /* Path 1 - overdetermined or exactly determined. Computing MAX */ i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, "SGEBRD" , " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, "SORMBR", "QLT", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, "SORMBR", "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); maxwrk = max(i__1,i__2); /* Computing 2nd power */ i__1 = smlsiz + 1; wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * * nrhs + i__1 * i__1; /* Computing MAX */ i__1 = maxwrk, i__2 = *n * 3 + wlalsd; maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1,i__2), i__2 = *n * 3 + wlalsd; minwrk = max(i__1,i__2); } if (*n > *m) { /* Computing 2nd power */ i__1 = smlsiz + 1; wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * * nrhs + i__1 * i__1; if (*n >= mnthr) { /* Path 2a - underdetermined, with many more columns than rows. */ maxwrk = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * ilaenv_(&c__1, "SGEBRD", " ", m, m, &c_n1, &c_n1, ( ftnlen)6, (ftnlen)1); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * ilaenv_(& c__1, "SORMBR", "QLT", m, nrhs, m, &c_n1, (ftnlen)6, ( ftnlen)3); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * ilaenv_(&c__1, "SORMBR", "PLN", m, nrhs, m, &c_n1, ( ftnlen)6, (ftnlen)3); maxwrk = max(i__1,i__2); if (*nrhs > 1) { /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs; maxwrk = max(i__1,i__2); } else { /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 1); maxwrk = max(i__1,i__2); } /* Computing MAX */ i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, "SORMLQ", "LT", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)2); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd; maxwrk = max(i__1,i__2); } else { /* Path 2 - remaining underdetermined cases. */ maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, "SGEBRD", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, "SORMBR" , "QLT", m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR", "PLN", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)3); maxwrk = max(i__1,i__2); /* Computing MAX */ i__1 = maxwrk, i__2 = *m * 3 + wlalsd; maxwrk = max(i__1,i__2); } /* Computing MAX */ i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = max(i__1,i__2), i__2 = *m * 3 + wlalsd; minwrk = max(i__1,i__2); } minwrk = min(minwrk,maxwrk); work[1] = (real) maxwrk; if (*lwork < minwrk && ! lquery) { *info = -12; } } if (*info != 0) { i__1 = -(*info); xerbla_("SGELSD", &i__1); return 0; } else if (lquery) { goto L10; } /* Quick return if possible. */ if (*m == 0 || *n == 0) { *rank = 0; return 0; } /* Get machine parameters. */ eps = slamch_("P"); sfmin = slamch_("S"); smlnum = sfmin / eps; bignum = 1.f / smlnum; slabad_(&smlnum, &bignum); /* Scale A if max entry outside range [SMLNUM,BIGNUM]. */ anrm = slange_("M", m, n, &a[a_offset], lda, &work[1]); iascl = 0; if (anrm > 0.f && anrm < smlnum) { /* Scale matrix norm up to SMLNUM. */ slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, info); iascl = 1; } else if (anrm > bignum) { /* Scale matrix norm down to BIGNUM. */ slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, info); iascl = 2; } else if (anrm == 0.f) { /* Matrix all zero. Return zero solution. */ i__1 = max(*m,*n); slaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b[b_offset], ldb); slaset_("F", &minmn, &c__1, &c_b82, &c_b82, &s[1], &c__1); *rank = 0; goto L10; } /* Scale B if max entry outside range [SMLNUM,BIGNUM]. */ bnrm = slange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); ibscl = 0; if (bnrm > 0.f && bnrm < smlnum) { /* Scale matrix norm up to SMLNUM. */ slascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, info); ibscl = 1; } else if (bnrm > bignum) { /* Scale matrix norm down to BIGNUM. */ slascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, info); ibscl = 2; } /* If M < N make sure certain entries of B are zero. */ if (*m < *n) { i__1 = *n - *m; slaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b_ref(*m + 1, 1), ldb); } /* Overdetermined case. */ if (*m >= *n) { /* Path 1 - overdetermined or exactly determined. */ mm = *m; if (*m >= mnthr) { /* Path 1a - overdetermined, with many more rows than columns. */ mm = *n; itau = 1; nwork = itau + *n; /* Compute A=Q*R. (Workspace: need 2*N, prefer N+N*NB) */ i__1 = *lwork - nwork + 1; sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, info); /* Multiply B by transpose(Q). (Workspace: need N+NRHS, prefer N+NRHS*NB) */ i__1 = *lwork - nwork + 1; sormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[ b_offset], ldb, &work[nwork], &i__1, info); /* Zero out below R. */ if (*n > 1) { i__1 = *n - 1; i__2 = *n - 1; slaset_("L", &i__1, &i__2, &c_b82, &c_b82, &a_ref(2, 1), lda); } } ie = 1; itauq = ie + *n; itaup = itauq + *n; nwork = itaup + *n; /* Bidiagonalize R in A. (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) */ i__1 = *lwork - nwork + 1; sgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & work[itaup], &work[nwork], &i__1, info); /* Multiply B by transpose of left bidiagonalizing vectors of R. (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) */ i__1 = *lwork - nwork + 1; sormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], &b[b_offset], ldb, &work[nwork], &i__1, info); /* Solve the bidiagonal least squares problem. */ slalsd_("U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb, rcond, rank, &work[nwork], &iwork[1], info); if (*info != 0) { goto L10; } /* Multiply B by right bidiagonalizing vectors of R. */ i__1 = *lwork - nwork + 1; sormbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], & b[b_offset], ldb, &work[nwork], &i__1, info); } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1,i__2), i__1 = max( i__1,*nrhs), i__2 = *n - *m * 3; if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,i__2)) { /* Path 2a - underdetermined, with many more columns than rows and sufficient workspace for an efficient algorithm. */ ldwork = *m; /* Computing MAX Computing MAX */ i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 = max(i__3,*nrhs), i__4 = *n - *m * 3; i__1 = (*m << 2) + *m * *lda + max(i__3,i__4), i__2 = *m * *lda + *m + *m * *nrhs; if (*lwork >= max(i__1,i__2)) { ldwork = *lda; } itau = 1; nwork = *m + 1; /* Compute A=L*Q. (Workspace: need 2*M, prefer M+M*NB) */ i__1 = *lwork - nwork + 1; sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, info); il = nwork; /* Copy L to WORK(IL), zeroing out above its diagonal. */ slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork); i__1 = *m - 1; i__2 = *m - 1; slaset_("U", &i__1, &i__2, &c_b82, &c_b82, &work[il + ldwork], & ldwork); ie = il + ldwork * *m; itauq = ie + *m; itaup = itauq + *m; nwork = itaup + *m; /* Bidiagonalize L in WORK(IL). (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) */ i__1 = *lwork - nwork + 1; sgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], &work[itaup], &work[nwork], &i__1, info); /* Multiply B by transpose of left bidiagonalizing vectors of L. (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) */ i__1 = *lwork - nwork + 1; sormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[ itauq], &b[b_offset], ldb, &work[nwork], &i__1, info); /* Solve the bidiagonal least squares problem. */ slalsd_("U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], ldb, rcond, rank, &work[nwork], &iwork[1], info); if (*info != 0) { goto L10; } /* Multiply B by right bidiagonalizing vectors of L. */ i__1 = *lwork - nwork + 1; sormbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[ itaup], &b[b_offset], ldb, &work[nwork], &i__1, info); /* Zero out below first M rows of B. */ i__1 = *n - *m; slaset_("F", &i__1, nrhs, &c_b82, &c_b82, &b_ref(*m + 1, 1), ldb); nwork = itau + *m; /* Multiply transpose(Q) by B. (Workspace: need M+NRHS, prefer M+NRHS*NB) */ i__1 = *lwork - nwork + 1; sormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[ b_offset], ldb, &work[nwork], &i__1, info); } else { /* Path 2 - remaining underdetermined cases. */ ie = 1; itauq = ie + *m; itaup = itauq + *m; nwork = itaup + *m; /* Bidiagonalize A. (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ i__1 = *lwork - nwork + 1; sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & work[itaup], &work[nwork], &i__1, info); /* Multiply B by transpose of left bidiagonalizing vectors. (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) */ i__1 = *lwork - nwork + 1; sormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq] , &b[b_offset], ldb, &work[nwork], &i__1, info); /* Solve the bidiagonal least squares problem. */ slalsd_("L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], ldb, rcond, rank, &work[nwork], &iwork[1], info); if (*info != 0) { goto L10; } /* Multiply B by right bidiagonalizing vectors of A. */ i__1 = *lwork - nwork + 1; sormbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup] , &b[b_offset], ldb, &work[nwork], &i__1, info); } } /* Undo scaling. */ if (iascl == 1) { slascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, info); slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & minmn, info); } else if (iascl == 2) { slascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, info); slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & minmn, info); } if (ibscl == 1) { slascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, info); } else if (ibscl == 2) { slascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, info); } L10: work[1] = (real) maxwrk; return 0; /* End of SGELSD */ } /* sgelsd_ */
/* Subroutine */ int dget10_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *b, integer *ldb, doublereal *work, doublereal * result) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; doublereal d__1, d__2; /* Local variables */ static doublereal unfl; static integer j; extern doublereal dasum_(integer *, doublereal *, integer *); static doublereal anorm; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static doublereal wnorm; extern doublereal dlamch_(char *), dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); static doublereal eps; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= DGET10 compares two matrices A and B and computes the ratio RESULT = norm( A - B ) / ( norm(A) * M * EPS ) Arguments ========= M (input) INTEGER The number of rows of the matrices A and B. N (input) INTEGER The number of columns of the matrices A and B. A (input) DOUBLE PRECISION array, dimension (LDA,N) The m by n matrix A. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). B (input) DOUBLE PRECISION array, dimension (LDB,N) The m by n matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,M). WORK (workspace) DOUBLE PRECISION array, dimension (M) RESULT (output) DOUBLE PRECISION RESULT = norm( A - B ) / ( norm(A) * M * EPS ) ===================================================================== Quick return if possible Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --work; /* Function Body */ if (*m <= 0 || *n <= 0) { *result = 0.; return 0; } unfl = dlamch_("Safe minimum"); eps = dlamch_("Precision"); wnorm = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { dcopy_(m, &a_ref(1, j), &c__1, &work[1], &c__1); daxpy_(m, &c_b7, &b_ref(1, j), &c__1, &work[1], &c__1); /* Computing MAX */ d__1 = wnorm, d__2 = dasum_(n, &work[1], &c__1); wnorm = max(d__1,d__2); /* L10: */ } /* Computing MAX */ d__1 = dlange_("1", m, n, &a[a_offset], lda, &work[1]); anorm = max(d__1,unfl); if (anorm > wnorm) { *result = wnorm / anorm / (*m * eps); } else { if (anorm < 1.) { /* Computing MIN */ d__1 = wnorm, d__2 = *m * anorm; *result = min(d__1,d__2) / anorm / (*m * eps); } else { /* Computing MIN */ d__1 = wnorm / anorm, d__2 = (doublereal) (*m); *result = min(d__1,d__2) / (*m * eps); } } return 0; /* End of DGET10 */ } /* dget10_ */
/* Subroutine */ int ctrtri_(char *uplo, char *diag, integer *n, complex *a, integer *lda, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CTRTRI computes the inverse of a complex upper or lower triangular matrix A. This is the Level 3 BLAS version of the algorithm. Arguments ========= UPLO (input) CHARACTER*1 = 'U': A is upper triangular; = 'L': A is lower triangular. DIAG (input) CHARACTER*1 = 'N': A is non-unit triangular; = 'U': A is unit triangular. N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) COMPLEX array, dimension (LDA,N) On entry, the triangular matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If DIAG = 'U', the diagonal elements of A are also not referenced and are assumed to be 1. On exit, the (triangular) inverse of the original matrix, in the same storage format. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, A(i,i) is exactly zero. The triangular matrix is singular and its inverse can not be computed. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static complex c_b1 = {1.f,0.f}; static integer c__1 = 1; static integer c_n1 = -1; static integer c__2 = 2; /* System generated locals */ address a__1[2]; integer a_dim1, a_offset, i__1, i__2, i__3[2], i__4, i__5; complex q__1; char ch__1[2]; /* Builtin functions Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); /* Local variables */ static integer j; extern logical lsame_(char *, char *); extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *), ctrsm_(char *, char *, char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *); static logical upper; extern /* Subroutine */ int ctrti2_(char *, char *, integer *, complex *, integer *, integer *); static integer jb, nb, nn; extern /* Subroutine */ int xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); static logical nounit; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); nounit = lsame_(diag, "N"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (! nounit && ! lsame_(diag, "U")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } if (*info != 0) { i__1 = -(*info); xerbla_("CTRTRI", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Check for singularity if non-unit. */ if (nounit) { i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { i__2 = a_subscr(*info, *info); if (a[i__2].r == 0.f && a[i__2].i == 0.f) { return 0; } /* L10: */ } *info = 0; } /* Determine the block size for this environment. Writing concatenation */ i__3[0] = 1, a__1[0] = uplo; i__3[1] = 1, a__1[1] = diag; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); nb = ilaenv_(&c__1, "CTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( ftnlen)2); if (nb <= 1 || nb >= *n) { /* Use unblocked code */ ctrti2_(uplo, diag, n, &a[a_offset], lda, info); } else { /* Use blocked code */ if (upper) { /* Compute inverse of upper triangular matrix */ i__1 = *n; i__2 = nb; for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Computing MIN */ i__4 = nb, i__5 = *n - j + 1; jb = min(i__4,i__5); /* Compute rows 1:j-1 of current block column */ i__4 = j - 1; ctrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, & c_b1, &a[a_offset], lda, &a_ref(1, j), lda); i__4 = j - 1; q__1.r = -1.f, q__1.i = 0.f; ctrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, & q__1, &a_ref(j, j), lda, &a_ref(1, j), lda); /* Compute inverse of current diagonal block */ ctrti2_("Upper", diag, &jb, &a_ref(j, j), lda, info); /* L20: */ } } else { /* Compute inverse of lower triangular matrix */ nn = (*n - 1) / nb * nb + 1; i__2 = -nb; for (j = nn; i__2 < 0 ? j >= 1 : j <= 1; j += i__2) { /* Computing MIN */ i__1 = nb, i__4 = *n - j + 1; jb = min(i__1,i__4); if (j + jb <= *n) { /* Compute rows j+jb:n of current block column */ i__1 = *n - j - jb + 1; ctrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb, &c_b1, &a_ref(j + jb, j + jb), lda, &a_ref(j + jb, j), lda); i__1 = *n - j - jb + 1; q__1.r = -1.f, q__1.i = 0.f; ctrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb, &q__1, &a_ref(j, j), lda, &a_ref(j + jb, j), lda); } /* Compute inverse of current diagonal block */ ctrti2_("Lower", diag, &jb, &a_ref(j, j), lda, info); /* L30: */ } } } return 0; /* End of CTRTRI */ } /* ctrtri_ */
/* Subroutine */ int derrlq_(char *path, integer *nunit) { /* Builtin functions */ integer s_wsle(cilist *), e_wsle(void); /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); /* Local variables */ static integer info; static doublereal a[4] /* was [2][2] */, b[2]; static integer i__, j; static doublereal w[2], x[2]; extern /* Subroutine */ int dgelq2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dorgl2_( integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dorml2_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static doublereal af[4] /* was [2][2] */; extern /* Subroutine */ int alaesm_(char *, logical *, integer *), dgelqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), dgelqs_( integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), chkxer_(char *, integer *, integer *, logical *, logical *), dorglq_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), dormlq_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); /* Fortran I/O blocks */ static cilist io___1 = { 0, 0, 0, 0, 0 }; #define a_ref(a_1,a_2) a[(a_2)*2 + a_1 - 3] #define af_ref(a_1,a_2) af[(a_2)*2 + a_1 - 3] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= DERRLQ tests the error exits for the DOUBLE PRECISION routines that use the LQ decomposition of a general matrix. Arguments ========= PATH (input) CHARACTER*3 The LAPACK path name for the routines to be tested. NUNIT (input) INTEGER The unit number for output. ===================================================================== */ infoc_1.nout = *nunit; io___1.ciunit = infoc_1.nout; s_wsle(&io___1); e_wsle(); /* Set the variables to innocuous values. */ for (j = 1; j <= 2; ++j) { for (i__ = 1; i__ <= 2; ++i__) { a_ref(i__, j) = 1. / (doublereal) (i__ + j); af_ref(i__, j) = 1. / (doublereal) (i__ + j); /* L10: */ } b[j - 1] = 0.; w[j - 1] = 0.; x[j - 1] = 0.; /* L20: */ } infoc_1.ok = TRUE_; /* Error exits for LQ factorization DGELQF */ s_copy(srnamc_1.srnamt, "DGELQF", (ftnlen)6, (ftnlen)6); infoc_1.infot = 1; dgelqf_(&c_n1, &c__0, a, &c__1, b, w, &c__1, &info); chkxer_("DGELQF", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dgelqf_(&c__0, &c_n1, a, &c__1, b, w, &c__1, &info); chkxer_("DGELQF", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 4; dgelqf_(&c__2, &c__1, a, &c__1, b, w, &c__2, &info); chkxer_("DGELQF", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 7; dgelqf_(&c__2, &c__1, a, &c__2, b, w, &c__1, &info); chkxer_("DGELQF", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); /* DGELQ2 */ s_copy(srnamc_1.srnamt, "DGELQ2", (ftnlen)6, (ftnlen)6); infoc_1.infot = 1; dgelq2_(&c_n1, &c__0, a, &c__1, b, w, &info); chkxer_("DGELQ2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dgelq2_(&c__0, &c_n1, a, &c__1, b, w, &info); chkxer_("DGELQ2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 4; dgelq2_(&c__2, &c__1, a, &c__1, b, w, &info); chkxer_("DGELQ2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); /* DGELQS */ s_copy(srnamc_1.srnamt, "DGELQS", (ftnlen)6, (ftnlen)6); infoc_1.infot = 1; dgelqs_(&c_n1, &c__0, &c__0, a, &c__1, x, b, &c__1, w, &c__1, &info); chkxer_("DGELQS", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dgelqs_(&c__0, &c_n1, &c__0, a, &c__1, x, b, &c__1, w, &c__1, &info); chkxer_("DGELQS", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dgelqs_(&c__2, &c__1, &c__0, a, &c__2, x, b, &c__1, w, &c__1, &info); chkxer_("DGELQS", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 3; dgelqs_(&c__0, &c__0, &c_n1, a, &c__1, x, b, &c__1, w, &c__1, &info); chkxer_("DGELQS", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 5; dgelqs_(&c__2, &c__2, &c__0, a, &c__1, x, b, &c__2, w, &c__1, &info); chkxer_("DGELQS", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 8; dgelqs_(&c__1, &c__2, &c__0, a, &c__1, x, b, &c__1, w, &c__1, &info); chkxer_("DGELQS", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 10; dgelqs_(&c__1, &c__1, &c__2, a, &c__1, x, b, &c__1, w, &c__1, &info); chkxer_("DGELQS", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); /* DORGLQ */ s_copy(srnamc_1.srnamt, "DORGLQ", (ftnlen)6, (ftnlen)6); infoc_1.infot = 1; dorglq_(&c_n1, &c__0, &c__0, a, &c__1, x, w, &c__1, &info); chkxer_("DORGLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dorglq_(&c__0, &c_n1, &c__0, a, &c__1, x, w, &c__1, &info); chkxer_("DORGLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dorglq_(&c__2, &c__1, &c__0, a, &c__2, x, w, &c__2, &info); chkxer_("DORGLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 3; dorglq_(&c__0, &c__0, &c_n1, a, &c__1, x, w, &c__1, &info); chkxer_("DORGLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 3; dorglq_(&c__1, &c__1, &c__2, a, &c__1, x, w, &c__1, &info); chkxer_("DORGLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 5; dorglq_(&c__2, &c__2, &c__0, a, &c__1, x, w, &c__2, &info); chkxer_("DORGLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 8; dorglq_(&c__2, &c__2, &c__0, a, &c__2, x, w, &c__1, &info); chkxer_("DORGLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); /* DORGL2 */ s_copy(srnamc_1.srnamt, "DORGL2", (ftnlen)6, (ftnlen)6); infoc_1.infot = 1; dorgl2_(&c_n1, &c__0, &c__0, a, &c__1, x, w, &info); chkxer_("DORGL2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dorgl2_(&c__0, &c_n1, &c__0, a, &c__1, x, w, &info); chkxer_("DORGL2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dorgl2_(&c__2, &c__1, &c__0, a, &c__2, x, w, &info); chkxer_("DORGL2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 3; dorgl2_(&c__0, &c__0, &c_n1, a, &c__1, x, w, &info); chkxer_("DORGL2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 3; dorgl2_(&c__1, &c__1, &c__2, a, &c__1, x, w, &info); chkxer_("DORGL2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 5; dorgl2_(&c__2, &c__2, &c__0, a, &c__1, x, w, &info); chkxer_("DORGL2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); /* DORMLQ */ s_copy(srnamc_1.srnamt, "DORMLQ", (ftnlen)6, (ftnlen)6); infoc_1.infot = 1; dormlq_("/", "N", &c__0, &c__0, &c__0, a, &c__1, x, af, &c__1, w, &c__1, & info); chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dormlq_("L", "/", &c__0, &c__0, &c__0, a, &c__1, x, af, &c__1, w, &c__1, & info); chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 3; dormlq_("L", "N", &c_n1, &c__0, &c__0, a, &c__1, x, af, &c__1, w, &c__1, & info); chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 4; dormlq_("L", "N", &c__0, &c_n1, &c__0, a, &c__1, x, af, &c__1, w, &c__1, & info); chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 5; dormlq_("L", "N", &c__0, &c__0, &c_n1, a, &c__1, x, af, &c__1, w, &c__1, & info); chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 5; dormlq_("L", "N", &c__0, &c__1, &c__1, a, &c__1, x, af, &c__1, w, &c__1, & info); chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 5; dormlq_("R", "N", &c__1, &c__0, &c__1, a, &c__1, x, af, &c__1, w, &c__1, & info); chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 7; dormlq_("L", "N", &c__2, &c__0, &c__2, a, &c__1, x, af, &c__2, w, &c__1, & info); chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 7; dormlq_("R", "N", &c__0, &c__2, &c__2, a, &c__1, x, af, &c__1, w, &c__1, & info); chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 10; dormlq_("L", "N", &c__2, &c__1, &c__0, a, &c__2, x, af, &c__1, w, &c__1, & info); chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 12; dormlq_("L", "N", &c__1, &c__2, &c__0, a, &c__1, x, af, &c__1, w, &c__1, & info); chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 12; dormlq_("R", "N", &c__2, &c__1, &c__0, a, &c__1, x, af, &c__2, w, &c__1, & info); chkxer_("DORMLQ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); /* DORML2 */ s_copy(srnamc_1.srnamt, "DORML2", (ftnlen)6, (ftnlen)6); infoc_1.infot = 1; dorml2_("/", "N", &c__0, &c__0, &c__0, a, &c__1, x, af, &c__1, w, &info); chkxer_("DORML2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dorml2_("L", "/", &c__0, &c__0, &c__0, a, &c__1, x, af, &c__1, w, &info); chkxer_("DORML2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 3; dorml2_("L", "N", &c_n1, &c__0, &c__0, a, &c__1, x, af, &c__1, w, &info); chkxer_("DORML2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 4; dorml2_("L", "N", &c__0, &c_n1, &c__0, a, &c__1, x, af, &c__1, w, &info); chkxer_("DORML2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 5; dorml2_("L", "N", &c__0, &c__0, &c_n1, a, &c__1, x, af, &c__1, w, &info); chkxer_("DORML2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 5; dorml2_("L", "N", &c__0, &c__1, &c__1, a, &c__1, x, af, &c__1, w, &info); chkxer_("DORML2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 5; dorml2_("R", "N", &c__1, &c__0, &c__1, a, &c__1, x, af, &c__1, w, &info); chkxer_("DORML2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 7; dorml2_("L", "N", &c__2, &c__1, &c__2, a, &c__1, x, af, &c__2, w, &info); chkxer_("DORML2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 7; dorml2_("R", "N", &c__1, &c__2, &c__2, a, &c__1, x, af, &c__1, w, &info); chkxer_("DORML2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 10; dorml2_("L", "N", &c__2, &c__1, &c__0, a, &c__2, x, af, &c__1, w, &info); chkxer_("DORML2", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); /* Print a summary line. */ alaesm_(path, &infoc_1.ok, &infoc_1.nout); return 0; /* End of DERRLQ */ } /* derrlq_ */
/* Subroutine */ int dgelqf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *lwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= DGELQF computes an LQ factorization of a real M-by-N matrix A: A = L * Q. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. A (input/output) DOUBLE PRECISION array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, the elements on and below the diagonal of the array contain the m-by-min(m,n) lower trapezoidal matrix L (L is lower triangular if m <= n); the elements above the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of elementary reflectors (see Further Details). LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details). WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,M). For optimum performance LWORK >= M*NB, where NB is the optimal blocksize. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Further Details =============== The matrix Q is represented as a product of elementary reflectors Q = H(k) . . . H(2) H(1), where k = min(m,n). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), and tau in TAU(i). ===================================================================== Test the input arguments Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ static integer i__, k, nbmin, iinfo; extern /* Subroutine */ int dgelq2_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); static integer ib, nb; extern /* Subroutine */ int dlarfb_(char *, char *, char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); static integer nx; extern /* Subroutine */ int dlarft_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); static integer ldwork, lwkopt; static logical lquery; static integer iws; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; nb = ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen) 1); lwkopt = *m * nb; work[1] = (doublereal) lwkopt; lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } else if (*lwork < max(1,*m) && ! lquery) { *info = -7; } if (*info != 0) { i__1 = -(*info); xerbla_("DGELQF", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ k = min(*m,*n); if (k == 0) { work[1] = 1.; return 0; } nbmin = 2; nx = 0; iws = *m; if (nb > 1 && nb < k) { /* Determine when to cross over from blocked to unblocked code. Computing MAX */ i__1 = 0, i__2 = ilaenv_(&c__3, "DGELQF", " ", m, n, &c_n1, &c_n1, ( ftnlen)6, (ftnlen)1); nx = max(i__1,i__2); if (nx < k) { /* Determine if workspace is large enough for blocked code. */ ldwork = *m; iws = ldwork * nb; if (*lwork < iws) { /* Not enough workspace to use optimal NB: reduce NB and determine the minimum value of NB. */ nb = *lwork / ldwork; /* Computing MAX */ i__1 = 2, i__2 = ilaenv_(&c__2, "DGELQF", " ", m, n, &c_n1, & c_n1, (ftnlen)6, (ftnlen)1); nbmin = max(i__1,i__2); } } } if (nb >= nbmin && nb < k && nx < k) { /* Use blocked code initially */ i__1 = k - nx; i__2 = nb; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = k - i__ + 1; ib = min(i__3,nb); /* Compute the LQ factorization of the current block A(i:i+ib-1,i:n) */ i__3 = *n - i__ + 1; dgelq2_(&ib, &i__3, &a_ref(i__, i__), lda, &tau[i__], &work[1], & iinfo); if (i__ + ib <= *m) { /* Form the triangular factor of the block reflector H = H(i) H(i+1) . . . H(i+ib-1) */ i__3 = *n - i__ + 1; dlarft_("Forward", "Rowwise", &i__3, &ib, &a_ref(i__, i__), lda, &tau[i__], &work[1], &ldwork); /* Apply H to A(i+ib:m,i:n) from the right */ i__3 = *m - i__ - ib + 1; i__4 = *n - i__ + 1; dlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3, &i__4, &ib, &a_ref(i__, i__), lda, &work[1], &ldwork, &a_ref(i__ + ib, i__), lda, &work[ib + 1], &ldwork); } /* L10: */ } } else { i__ = 1; } /* Use unblocked code to factor the last or only block. */ if (i__ <= k) { i__2 = *m - i__ + 1; i__1 = *n - i__ + 1; dgelq2_(&i__2, &i__1, &a_ref(i__, i__), lda, &tau[i__], &work[1], & iinfo); } work[1] = (doublereal) iws; return 0; /* End of DGELQF */ } /* dgelqf_ */
/* Subroutine */ int dgeqr2_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, doublereal *work, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= DGEQR2 computes a QR factorization of a real m by n matrix A: A = Q * R. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. A (input/output) DOUBLE PRECISION array, dimension (LDA,N) On entry, the m by n matrix A. On exit, the elements on and above the diagonal of the array contain the min(m,n) by n upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of elementary reflectors (see Further Details). LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). TAU (output) DOUBLE PRECISION array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details). WORK (workspace) DOUBLE PRECISION array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Further Details =============== The matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(k), where k = min(m,n). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i). ===================================================================== Test the input arguments Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ static integer i__, k; extern /* Subroutine */ int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *), dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), xerbla_(char *, integer *); static doublereal aii; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); xerbla_("DGEQR2", &i__1); return 0; } k = min(*m,*n); i__1 = k; for (i__ = 1; i__ <= i__1; ++i__) { /* Generate elementary reflector H(i) to annihilate A(i+1:m,i) Computing MIN */ i__2 = i__ + 1; i__3 = *m - i__ + 1; dlarfg_(&i__3, &a_ref(i__, i__), &a_ref(min(i__2,*m), i__), &c__1, & tau[i__]); if (i__ < *n) { /* Apply H(i) to A(i:m,i+1:n) from the left */ aii = a_ref(i__, i__); a_ref(i__, i__) = 1.; i__2 = *m - i__ + 1; i__3 = *n - i__; dlarf_("Left", &i__2, &i__3, &a_ref(i__, i__), &c__1, &tau[i__], & a_ref(i__, i__ + 1), lda, &work[1]); a_ref(i__, i__) = aii; } /* L10: */ } return 0; /* End of DGEQR2 */ } /* dgeqr2_ */
/* Subroutine */ int dsyr2_(char *uplo, integer *n, doublereal *alpha, doublereal *x, integer *incx, doublereal *y, integer *incy, doublereal *a, integer *lda) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Local variables */ static integer info; static doublereal temp1, temp2; static integer i__, j; extern logical lsame_(char *, char *); static integer ix, iy, jx, jy, kx, ky; extern /* Subroutine */ int xerbla_(char *, integer *); #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] /* Purpose ======= DSYR2 performs the symmetric rank 2 operation A := alpha*x*y' + alpha*y*x' + A, where alpha is a scalar, x and y are n element vectors and A is an n by n symmetric matrix. Parameters ========== UPLO - CHARACTER*1. On entry, UPLO specifies whether the upper or lower triangular part of the array A is to be referenced as follows: UPLO = 'U' or 'u' Only the upper triangular part of A is to be referenced. UPLO = 'L' or 'l' Only the lower triangular part of A is to be referenced. Unchanged on exit. N - INTEGER. On entry, N specifies the order of the matrix A. N must be at least zero. Unchanged on exit. ALPHA - DOUBLE PRECISION. On entry, ALPHA specifies the scalar alpha. Unchanged on exit. X - DOUBLE PRECISION array of dimension at least ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented array X must contain the n element vector x. Unchanged on exit. INCX - INTEGER. On entry, INCX specifies the increment for the elements of X. INCX must not be zero. Unchanged on exit. Y - DOUBLE PRECISION array of dimension at least ( 1 + ( n - 1 )*abs( INCY ) ). Before entry, the incremented array Y must contain the n element vector y. Unchanged on exit. INCY - INTEGER. On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. Unchanged on exit. A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). Before entry with UPLO = 'U' or 'u', the leading n by n upper triangular part of the array A must contain the upper triangular part of the symmetric matrix and the strictly lower triangular part of A is not referenced. On exit, the upper triangular part of the array A is overwritten by the upper triangular part of the updated matrix. Before entry with UPLO = 'L' or 'l', the leading n by n lower triangular part of the array A must contain the lower triangular part of the symmetric matrix and the strictly upper triangular part of A is not referenced. On exit, the lower triangular part of the array A is overwritten by the lower triangular part of the updated matrix. LDA - INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. LDA must be at least max( 1, n ). Unchanged on exit. Level 2 Blas routine. -- Written on 22-October-1986. Jack Dongarra, Argonne National Lab. Jeremy Du Croz, Nag Central Office. Sven Hammarling, Nag Central Office. Richard Hanson, Sandia National Labs. Test the input parameters. Parameter adjustments */ --x; --y; a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; /* Function Body */ info = 0; if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 5; } else if (*incy == 0) { info = 7; } else if (*lda < max(1,*n)) { info = 9; } if (info != 0) { xerbla_("DSYR2 ", &info); return 0; } /* Quick return if possible. */ if (*n == 0 || *alpha == 0.) { return 0; } /* Set up the start points in X and Y if the increments are not both unity. */ if (*incx != 1 || *incy != 1) { if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } jx = kx; jy = ky; } /* Start the operations. In this version the elements of A are accessed sequentially with one pass through the triangular part of A. */ if (lsame_(uplo, "U")) { /* Form A when A is stored in the upper triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0. || y[j] != 0.) { temp1 = *alpha * y[j]; temp2 = *alpha * x[j]; i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { a_ref(i__, j) = a_ref(i__, j) + x[i__] * temp1 + y[ i__] * temp2; /* L10: */ } } /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0. || y[jy] != 0.) { temp1 = *alpha * y[jy]; temp2 = *alpha * x[jx]; ix = kx; iy = ky; i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { a_ref(i__, j) = a_ref(i__, j) + x[ix] * temp1 + y[iy] * temp2; ix += *incx; iy += *incy; /* L30: */ } } jx += *incx; jy += *incy; /* L40: */ } } } else { /* Form A when A is stored in the lower triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0. || y[j] != 0.) { temp1 = *alpha * y[j]; temp2 = *alpha * x[j]; i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { a_ref(i__, j) = a_ref(i__, j) + x[i__] * temp1 + y[ i__] * temp2; /* L50: */ } } /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0. || y[jy] != 0.) { temp1 = *alpha * y[jy]; temp2 = *alpha * x[jx]; ix = jx; iy = jy; i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { a_ref(i__, j) = a_ref(i__, j) + x[ix] * temp1 + y[iy] * temp2; ix += *incx; iy += *incy; /* L70: */ } } jx += *incx; jy += *incy; /* L80: */ } } } return 0; /* End of DSYR2 . */ } /* dsyr2_ */
/* Subroutine */ int ztrt05_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *x, integer *ldx, doublecomplex *xact, integer *ldxact, doublereal *ferr, doublereal *berr, doublereal * reslts) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset, xact_dim1, xact_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2, d__3, d__4; doublecomplex z__1, z__2; /* Builtin functions */ double d_imag(doublecomplex *); /* Local variables */ static doublereal diff, axbi; static integer imax; static doublereal unfl, ovfl; static logical unit; static integer i__, j, k; extern logical lsame_(char *, char *); static logical upper; static doublereal xnorm; extern doublereal dlamch_(char *); static doublereal errbnd; extern integer izamax_(integer *, doublecomplex *, integer *); static logical notran; static integer ifu; static doublereal eps, tmp; #define xact_subscr(a_1,a_2) (a_2)*xact_dim1 + a_1 #define xact_ref(a_1,a_2) xact[xact_subscr(a_1,a_2)] #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define x_subscr(a_1,a_2) (a_2)*x_dim1 + a_1 #define x_ref(a_1,a_2) x[x_subscr(a_1,a_2)] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= ZTRT05 tests the error bounds from iterative refinement for the computed solution to a system of equations A*X = B, where A is a triangular n by n matrix. RESLTS(1) = test of the error bound = norm(X - XACT) / ( norm(X) * FERR ) A large value is returned if this ratio is not less than one. RESLTS(2) = residual from the iterative refinement routine = the maximum of BERR / ( (n+1)*EPS + (*) ), where (*) = (n+1)*UNFL / (min_i (abs(A)*abs(X) +abs(b))_i ) Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the matrix A is upper or lower triangular. = 'U': Upper triangular = 'L': Lower triangular TRANS (input) CHARACTER*1 Specifies the form of the system of equations. = 'N': A * X = B (No transpose) = 'T': A'* X = B (Transpose) = 'C': A'* X = B (Conjugate transpose = Transpose) DIAG (input) CHARACTER*1 Specifies whether or not the matrix A is unit triangular. = 'N': Non-unit triangular = 'U': Unit triangular N (input) INTEGER The number of rows of the matrices X, B, and XACT, and the order of the matrix A. N >= 0. NRHS (input) INTEGER The number of columns of the matrices X, B, and XACT. NRHS >= 0. A (input) COMPLEX*16 array, dimension (LDA,N) The triangular matrix A. If UPLO = 'U', the leading n by n upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading n by n lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If DIAG = 'U', the diagonal elements of A are also not referenced and are assumed to be 1. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input) COMPLEX*16 array, dimension (LDB,NRHS) The right hand side vectors for the system of linear equations. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (input) COMPLEX*16 array, dimension (LDX,NRHS) The computed solution vectors. Each vector is stored as a column of the matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). XACT (input) COMPLEX*16 array, dimension (LDX,NRHS) The exact solution vectors. Each vector is stored as a column of the matrix XACT. LDXACT (input) INTEGER The leading dimension of the array XACT. LDXACT >= max(1,N). FERR (input) DOUBLE PRECISION array, dimension (NRHS) The estimated forward error bounds for each solution vector X. If XTRUE is the true solution, FERR bounds the magnitude of the largest entry in (X - XTRUE) divided by the magnitude of the largest entry in X. BERR (input) DOUBLE PRECISION array, dimension (NRHS) The componentwise relative backward error of each solution vector (i.e., the smallest relative change in any entry of A or B that makes X an exact solution). RESLTS (output) DOUBLE PRECISION array, dimension (2) The maximum over the NRHS solution vectors of the ratios: RESLTS(1) = norm(X - XACT) / ( norm(X) * FERR ) RESLTS(2) = BERR / ( (n+1)*EPS + (*) ) ===================================================================== Quick exit if N = 0 or NRHS = 0. Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; xact_dim1 = *ldxact; xact_offset = 1 + xact_dim1 * 1; xact -= xact_offset; --ferr; --berr; --reslts; /* Function Body */ if (*n <= 0 || *nrhs <= 0) { reslts[1] = 0.; reslts[2] = 0.; return 0; } eps = dlamch_("Epsilon"); unfl = dlamch_("Safe minimum"); ovfl = 1. / unfl; upper = lsame_(uplo, "U"); notran = lsame_(trans, "N"); unit = lsame_(diag, "U"); /* Test 1: Compute the maximum of norm(X - XACT) / ( norm(X) * FERR ) over all the vectors X and XACT using the infinity-norm. */ errbnd = 0.; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { imax = izamax_(n, &x_ref(1, j), &c__1); /* Computing MAX */ i__2 = x_subscr(imax, j); d__3 = (d__1 = x[i__2].r, abs(d__1)) + (d__2 = d_imag(&x_ref(imax, j)) , abs(d__2)); xnorm = max(d__3,unfl); diff = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = x_subscr(i__, j); i__4 = xact_subscr(i__, j); z__2.r = x[i__3].r - xact[i__4].r, z__2.i = x[i__3].i - xact[i__4] .i; z__1.r = z__2.r, z__1.i = z__2.i; /* Computing MAX */ d__3 = diff, d__4 = (d__1 = z__1.r, abs(d__1)) + (d__2 = d_imag(& z__1), abs(d__2)); diff = max(d__3,d__4); /* L10: */ } if (xnorm > 1.) { goto L20; } else if (diff <= ovfl * xnorm) { goto L20; } else { errbnd = 1. / eps; goto L30; } L20: if (diff / xnorm <= ferr[j]) { /* Computing MAX */ d__1 = errbnd, d__2 = diff / xnorm / ferr[j]; errbnd = max(d__1,d__2); } else { errbnd = 1. / eps; } L30: ; } reslts[1] = errbnd; /* Test 2: Compute the maximum of BERR / ( (n+1)*EPS + (*) ), where (*) = (n+1)*UNFL / (min_i (abs(A)*abs(X) +abs(b))_i ) */ ifu = 0; if (unit) { ifu = 1; } i__1 = *nrhs; for (k = 1; k <= i__1; ++k) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = b_subscr(i__, k); tmp = (d__1 = b[i__3].r, abs(d__1)) + (d__2 = d_imag(&b_ref(i__, k)), abs(d__2)); if (upper) { if (! notran) { i__3 = i__ - ifu; for (j = 1; j <= i__3; ++j) { i__4 = a_subscr(j, i__); i__5 = x_subscr(j, k); tmp += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a_ref(j, i__)), abs(d__2))) * ((d__3 = x[i__5].r, abs(d__3)) + (d__4 = d_imag(& x_ref(j, k)), abs(d__4))); /* L40: */ } if (unit) { i__3 = x_subscr(i__, k); tmp += (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag( &x_ref(i__, k)), abs(d__2)); } } else { if (unit) { i__3 = x_subscr(i__, k); tmp += (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag( &x_ref(i__, k)), abs(d__2)); } i__3 = *n; for (j = i__ + ifu; j <= i__3; ++j) { i__4 = a_subscr(i__, j); i__5 = x_subscr(j, k); tmp += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a_ref(i__, j)), abs(d__2))) * ((d__3 = x[i__5].r, abs(d__3)) + (d__4 = d_imag(& x_ref(j, k)), abs(d__4))); /* L50: */ } } } else { if (notran) { i__3 = i__ - ifu; for (j = 1; j <= i__3; ++j) { i__4 = a_subscr(i__, j); i__5 = x_subscr(j, k); tmp += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a_ref(i__, j)), abs(d__2))) * ((d__3 = x[i__5].r, abs(d__3)) + (d__4 = d_imag(& x_ref(j, k)), abs(d__4))); /* L60: */ } if (unit) { i__3 = x_subscr(i__, k); tmp += (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag( &x_ref(i__, k)), abs(d__2)); } } else { if (unit) { i__3 = x_subscr(i__, k); tmp += (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag( &x_ref(i__, k)), abs(d__2)); } i__3 = *n; for (j = i__ + ifu; j <= i__3; ++j) { i__4 = a_subscr(j, i__); i__5 = x_subscr(j, k); tmp += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a_ref(j, i__)), abs(d__2))) * ((d__3 = x[i__5].r, abs(d__3)) + (d__4 = d_imag(& x_ref(j, k)), abs(d__4))); /* L70: */ } } } if (i__ == 1) { axbi = tmp; } else { axbi = min(axbi,tmp); } /* L80: */ } /* Computing MAX */ d__1 = axbi, d__2 = (*n + 1) * unfl; tmp = berr[k] / ((*n + 1) * eps + (*n + 1) * unfl / max(d__1,d__2)); if (k == 1) { reslts[2] = tmp; } else { reslts[2] = max(reslts[2],tmp); } /* L90: */ } return 0; /* End of ZTRT05 */ } /* ztrt05_ */
/* Subroutine */ int ztgsna_(char *job, char *howmny, logical *select, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer * ldvr, doublereal *s, doublereal *dif, integer *mm, integer *m, doublecomplex *work, integer *lwork, integer *iwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= ZTGSNA estimates reciprocal condition numbers for specified eigenvalues and/or eigenvectors of a matrix pair (A, B). (A, B) must be in generalized Schur canonical form, that is, A and B are both upper triangular. Arguments ========= JOB (input) CHARACTER*1 Specifies whether condition numbers are required for eigenvalues (S) or eigenvectors (DIF): = 'E': for eigenvalues only (S); = 'V': for eigenvectors only (DIF); = 'B': for both eigenvalues and eigenvectors (S and DIF). HOWMNY (input) CHARACTER*1 = 'A': compute condition numbers for all eigenpairs; = 'S': compute condition numbers for selected eigenpairs specified by the array SELECT. SELECT (input) LOGICAL array, dimension (N) If HOWMNY = 'S', SELECT specifies the eigenpairs for which condition numbers are required. To select condition numbers for the corresponding j-th eigenvalue and/or eigenvector, SELECT(j) must be set to .TRUE.. If HOWMNY = 'A', SELECT is not referenced. N (input) INTEGER The order of the square matrix pair (A, B). N >= 0. A (input) COMPLEX*16 array, dimension (LDA,N) The upper triangular matrix A in the pair (A,B). LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input) COMPLEX*16 array, dimension (LDB,N) The upper triangular matrix B in the pair (A, B). LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). VL (input) COMPLEX*16 array, dimension (LDVL,M) IF JOB = 'E' or 'B', VL must contain left eigenvectors of (A, B), corresponding to the eigenpairs specified by HOWMNY and SELECT. The eigenvectors must be stored in consecutive columns of VL, as returned by ZTGEVC. If JOB = 'V', VL is not referenced. LDVL (input) INTEGER The leading dimension of the array VL. LDVL >= 1; and If JOB = 'E' or 'B', LDVL >= N. VR (input) COMPLEX*16 array, dimension (LDVR,M) IF JOB = 'E' or 'B', VR must contain right eigenvectors of (A, B), corresponding to the eigenpairs specified by HOWMNY and SELECT. The eigenvectors must be stored in consecutive columns of VR, as returned by ZTGEVC. If JOB = 'V', VR is not referenced. LDVR (input) INTEGER The leading dimension of the array VR. LDVR >= 1; If JOB = 'E' or 'B', LDVR >= N. S (output) DOUBLE PRECISION array, dimension (MM) If JOB = 'E' or 'B', the reciprocal condition numbers of the selected eigenvalues, stored in consecutive elements of the array. If JOB = 'V', S is not referenced. DIF (output) DOUBLE PRECISION array, dimension (MM) If JOB = 'V' or 'B', the estimated reciprocal condition numbers of the selected eigenvectors, stored in consecutive elements of the array. If the eigenvalues cannot be reordered to compute DIF(j), DIF(j) is set to 0; this can only occur when the true value would be very small anyway. For each eigenvalue/vector specified by SELECT, DIF stores a Frobenius norm-based estimate of Difl. If JOB = 'E', DIF is not referenced. MM (input) INTEGER The number of elements in the arrays S and DIF. MM >= M. M (output) INTEGER The number of elements of the arrays S and DIF used to store the specified condition numbers; for each selected eigenvalue one element is used. If HOWMNY = 'A', M is set to N. WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) If JOB = 'E', WORK is not referenced. Otherwise, on exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= 1. If JOB = 'V' or 'B', LWORK >= 2*N*N. IWORK (workspace) INTEGER array, dimension (N+2) If JOB = 'E', IWORK is not referenced. INFO (output) INTEGER = 0: Successful exit < 0: If INFO = -i, the i-th argument had an illegal value Further Details =============== The reciprocal of the condition number of the i-th generalized eigenvalue w = (a, b) is defined as S(I) = (|v'Au|**2 + |v'Bu|**2)**(1/2) / (norm(u)*norm(v)) where u and v are the right and left eigenvectors of (A, B) corresponding to w; |z| denotes the absolute value of the complex number, and norm(u) denotes the 2-norm of the vector u. The pair (a, b) corresponds to an eigenvalue w = a/b (= v'Au/v'Bu) of the matrix pair (A, B). If both a and b equal zero, then (A,B) is singular and S(I) = -1 is returned. An approximate error bound on the chordal distance between the i-th computed generalized eigenvalue w and the corresponding exact eigenvalue lambda is chord(w, lambda) <= EPS * norm(A, B) / S(I), where EPS is the machine precision. The reciprocal of the condition number of the right eigenvector u and left eigenvector v corresponding to the generalized eigenvalue w is defined as follows. Suppose (A, B) = ( a * ) ( b * ) 1 ( 0 A22 ),( 0 B22 ) n-1 1 n-1 1 n-1 Then the reciprocal condition number DIF(I) is Difl[(a, b), (A22, B22)] = sigma-min( Zl ) where sigma-min(Zl) denotes the smallest singular value of Zl = [ kron(a, In-1) -kron(1, A22) ] [ kron(b, In-1) -kron(1, B22) ]. Here In-1 is the identity matrix of size n-1 and X' is the conjugate transpose of X. kron(X, Y) is the Kronecker product between the matrices X and Y. We approximate the smallest singular value of Zl with an upper bound. This is done by ZLATDF. An approximate error bound for a computed eigenvector VL(i) or VR(i) is given by EPS * norm(A, B) / DIF(i). See ref. [2-3] for more details and further references. Based on contributions by Bo Kagstrom and Peter Poromaa, Department of Computing Science, Umea University, S-901 87 Umea, Sweden. References ========== [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the Generalized Real Schur Form of a Regular Matrix Pair (A, B), in M.S. Moonen et al (eds), Linear Algebra for Large Scale and Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified Eigenvalues of a Regular Matrix Pair (A, B) and Condition Estimation: Theory, Algorithms and Software, Report UMINF - 94.04, Department of Computing Science, Umea University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87. To appear in Numerical Algorithms, 1996. [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software for Solving the Generalized Sylvester Equation and Estimating the Separation between Regular Matrix Pairs, Report UMINF - 93.23, Department of Computing Science, Umea University, S-901 87 Umea, Sweden, December 1993, Revised April 1994, Also as LAPACK Working Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1, 1996. ===================================================================== Decode and test the input parameters Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static doublecomplex c_b19 = {1.,0.}; static doublecomplex c_b20 = {0.,0.}; static logical c_false = FALSE_; static integer c__3 = 3; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2; doublereal d__1, d__2; doublecomplex z__1; /* Builtin functions */ double z_abs(doublecomplex *); /* Local variables */ static doublereal cond; static integer ierr, ifst; static doublereal lnrm; static doublecomplex yhax, yhbx; static integer ilst; static doublereal rnrm; static integer i__, k; static doublereal scale; extern logical lsame_(char *, char *); extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); static integer lwmin; extern /* Subroutine */ int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *); static logical wants; static integer llwrk, n1, n2; static doublecomplex dummy[1]; extern doublereal dlapy2_(doublereal *, doublereal *); extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); static doublecomplex dummy1[1]; extern doublereal dznrm2_(integer *, doublecomplex *, integer *), dlamch_( char *); static integer ks; extern /* Subroutine */ int xerbla_(char *, integer *); static doublereal bignum; static logical wantbh, wantdf, somcon; extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *), ztgexc_(logical *, logical *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, integer *); static doublereal smlnum; static logical lquery; extern /* Subroutine */ int ztgsyl_(char *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublereal *, doublereal *, doublecomplex *, integer *, integer *, integer *); static doublereal eps; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define vl_subscr(a_1,a_2) (a_2)*vl_dim1 + a_1 #define vl_ref(a_1,a_2) vl[vl_subscr(a_1,a_2)] #define vr_subscr(a_1,a_2) (a_2)*vr_dim1 + a_1 #define vr_ref(a_1,a_2) vr[vr_subscr(a_1,a_2)] --select; a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1 * 1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1 * 1; vr -= vr_offset; --s; --dif; --work; --iwork; /* Function Body */ wantbh = lsame_(job, "B"); wants = lsame_(job, "E") || wantbh; wantdf = lsame_(job, "V") || wantbh; somcon = lsame_(howmny, "S"); *info = 0; lquery = *lwork == -1; if (lsame_(job, "V") || lsame_(job, "B")) { /* Computing MAX */ i__1 = 1, i__2 = (*n << 1) * *n; lwmin = max(i__1,i__2); } else { lwmin = 1; } if (! wants && ! wantdf) { *info = -1; } else if (! lsame_(howmny, "A") && ! somcon) { *info = -2; } else if (*n < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldb < max(1,*n)) { *info = -8; } else if (wants && *ldvl < *n) { *info = -10; } else if (wants && *ldvr < *n) { *info = -12; } else { /* Set M to the number of eigenpairs for which condition numbers are required, and test MM. */ if (somcon) { *m = 0; i__1 = *n; for (k = 1; k <= i__1; ++k) { if (select[k]) { ++(*m); } /* L10: */ } } else { *m = *n; } if (*mm < *m) { *info = -15; } else if (*lwork < lwmin && ! lquery) { *info = -18; } } if (*info == 0) { work[1].r = (doublereal) lwmin, work[1].i = 0.; } if (*info != 0) { i__1 = -(*info); xerbla_("ZTGSNA", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Get machine constants */ eps = dlamch_("P"); smlnum = dlamch_("S") / eps; bignum = 1. / smlnum; dlabad_(&smlnum, &bignum); llwrk = *lwork - (*n << 1) * *n; ks = 0; i__1 = *n; for (k = 1; k <= i__1; ++k) { /* Determine whether condition numbers are required for the k-th eigenpair. */ if (somcon) { if (! select[k]) { goto L20; } } ++ks; if (wants) { /* Compute the reciprocal condition number of the k-th eigenvalue. */ rnrm = dznrm2_(n, &vr_ref(1, ks), &c__1); lnrm = dznrm2_(n, &vl_ref(1, ks), &c__1); zgemv_("N", n, n, &c_b19, &a[a_offset], lda, &vr_ref(1, ks), & c__1, &c_b20, &work[1], &c__1); zdotc_(&z__1, n, &work[1], &c__1, &vl_ref(1, ks), &c__1); yhax.r = z__1.r, yhax.i = z__1.i; zgemv_("N", n, n, &c_b19, &b[b_offset], ldb, &vr_ref(1, ks), & c__1, &c_b20, &work[1], &c__1); zdotc_(&z__1, n, &work[1], &c__1, &vl_ref(1, ks), &c__1); yhbx.r = z__1.r, yhbx.i = z__1.i; d__1 = z_abs(&yhax); d__2 = z_abs(&yhbx); cond = dlapy2_(&d__1, &d__2); if (cond == 0.) { s[ks] = -1.; } else { s[ks] = cond / (rnrm * lnrm); } } if (wantdf) { if (*n == 1) { d__1 = z_abs(&a_ref(1, 1)); d__2 = z_abs(&b_ref(1, 1)); dif[ks] = dlapy2_(&d__1, &d__2); goto L20; } /* Estimate the reciprocal condition number of the k-th eigenvectors. Copy the matrix (A, B) to the array WORK and move the (k,k)th pair to the (1,1) position. */ zlacpy_("Full", n, n, &a[a_offset], lda, &work[1], n); zlacpy_("Full", n, n, &b[b_offset], ldb, &work[*n * *n + 1], n); ifst = k; ilst = 1; ztgexc_(&c_false, &c_false, n, &work[1], n, &work[*n * *n + 1], n, dummy, &c__1, dummy1, &c__1, &ifst, &ilst, &ierr); if (ierr > 0) { /* Ill-conditioned problem - swap rejected. */ dif[ks] = 0.; } else { /* Reordering successful, solve generalized Sylvester equation for R and L, A22 * R - L * A11 = A12 B22 * R - L * B11 = B12, and compute estimate of Difl[(A11,B11), (A22, B22)]. */ n1 = 1; n2 = *n - n1; i__ = *n * *n + 1; ztgsyl_("N", &c__3, &n2, &n1, &work[*n * n1 + n1 + 1], n, & work[1], n, &work[n1 + 1], n, &work[*n * n1 + n1 + i__], n, &work[i__], n, &work[n1 + i__], n, &scale, & dif[ks], &work[(*n * *n << 1) + 1], &llwrk, &iwork[1], &ierr); } } L20: ; } work[1].r = (doublereal) lwmin, work[1].i = 0.; return 0; /* End of ZTGSNA */ } /* ztgsna_ */
/* Subroutine */ int dlascl_(char *type__, integer *kl, integer *ku, doublereal *cfrom, doublereal *cto, integer *m, integer *n, doublereal *a, integer *lda, integer *info) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= DLASCL multiplies the M by N real matrix A by the real scalar CTO/CFROM. This is done without over/underflow as long as the final result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that A may be full, upper triangular, lower triangular, upper Hessenberg, or banded. Arguments ========= TYPE (input) CHARACTER*1 TYPE indices the storage type of the input matrix. = 'G': A is a full matrix. = 'L': A is a lower triangular matrix. = 'U': A is an upper triangular matrix. = 'H': A is an upper Hessenberg matrix. = 'B': A is a symmetric band matrix with lower bandwidth KL and upper bandwidth KU and with the only the lower half stored. = 'Q': A is a symmetric band matrix with lower bandwidth KL and upper bandwidth KU and with the only the upper half stored. = 'Z': A is a band matrix with lower bandwidth KL and upper bandwidth KU. KL (input) INTEGER The lower bandwidth of A. Referenced only if TYPE = 'B', 'Q' or 'Z'. KU (input) INTEGER The upper bandwidth of A. Referenced only if TYPE = 'B', 'Q' or 'Z'. CFROM (input) DOUBLE PRECISION CTO (input) DOUBLE PRECISION The matrix A is multiplied by CTO/CFROM. A(I,J) is computed without over/underflow if the final result CTO*A(I,J)/CFROM can be represented without over/underflow. CFROM must be nonzero. M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. A (input/output) DOUBLE PRECISION array, dimension (LDA,M) The matrix to be multiplied by CTO/CFROM. See TYPE for the storage type. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). INFO (output) INTEGER 0 - successful exit <0 - if INFO = -i, the i-th argument had an illegal value. ===================================================================== Test the input arguments Parameter adjustments */ /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; /* Local variables */ static logical done; static doublereal ctoc; static integer i__, j; extern logical lsame_(char *, char *); static integer itype, k1, k2, k3, k4; static doublereal cfrom1; extern doublereal dlamch_(char *); static doublereal cfromc; extern /* Subroutine */ int xerbla_(char *, integer *); static doublereal bignum, smlnum, mul, cto1; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; /* Function Body */ *info = 0; if (lsame_(type__, "G")) { itype = 0; } else if (lsame_(type__, "L")) { itype = 1; } else if (lsame_(type__, "U")) { itype = 2; } else if (lsame_(type__, "H")) { itype = 3; } else if (lsame_(type__, "B")) { itype = 4; } else if (lsame_(type__, "Q")) { itype = 5; } else if (lsame_(type__, "Z")) { itype = 6; } else { itype = -1; } if (itype == -1) { *info = -1; } else if (*cfrom == 0.) { *info = -4; } else if (*m < 0) { *info = -6; } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) { *info = -7; } else if (itype <= 3 && *lda < max(1,*m)) { *info = -9; } else if (itype >= 4) { /* Computing MAX */ i__1 = *m - 1; if (*kl < 0 || *kl > max(i__1,0)) { *info = -2; } else { /* if(complicated condition) */ /* Computing MAX */ i__1 = *n - 1; if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) && *kl != *ku) { *info = -3; } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < * ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) { *info = -9; } } } if (*info != 0) { i__1 = -(*info); xerbla_("DLASCL", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *m == 0) { return 0; } /* Get machine parameters */ smlnum = dlamch_("S"); bignum = 1. / smlnum; cfromc = *cfrom; ctoc = *cto; L10: cfrom1 = cfromc * smlnum; cto1 = ctoc / bignum; if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) { mul = smlnum; done = FALSE_; cfromc = cfrom1; } else if (abs(cto1) > abs(cfromc)) { mul = bignum; done = FALSE_; ctoc = cto1; } else { mul = ctoc / cfromc; done = TRUE_; } if (itype == 0) { /* Full matrix */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { a_ref(i__, j) = a_ref(i__, j) * mul; /* L20: */ } /* L30: */ } } else if (itype == 1) { /* Lower triangular matrix */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = j; i__ <= i__2; ++i__) { a_ref(i__, j) = a_ref(i__, j) * mul; /* L40: */ } /* L50: */ } } else if (itype == 2) { /* Upper triangular matrix */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = min(j,*m); for (i__ = 1; i__ <= i__2; ++i__) { a_ref(i__, j) = a_ref(i__, j) * mul; /* L60: */ } /* L70: */ } } else if (itype == 3) { /* Upper Hessenberg matrix */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__3 = j + 1; i__2 = min(i__3,*m); for (i__ = 1; i__ <= i__2; ++i__) { a_ref(i__, j) = a_ref(i__, j) * mul; /* L80: */ } /* L90: */ } } else if (itype == 4) { /* Lower half of a symmetric band matrix */ k3 = *kl + 1; k4 = *n + 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__3 = k3, i__4 = k4 - j; i__2 = min(i__3,i__4); for (i__ = 1; i__ <= i__2; ++i__) { a_ref(i__, j) = a_ref(i__, j) * mul; /* L100: */ } /* L110: */ } } else if (itype == 5) { /* Upper half of a symmetric band matrix */ k1 = *ku + 2; k3 = *ku + 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = k1 - j; i__3 = k3; for (i__ = max(i__2,1); i__ <= i__3; ++i__) { a_ref(i__, j) = a_ref(i__, j) * mul; /* L120: */ } /* L130: */ } } else if (itype == 6) { /* Band matrix */ k1 = *kl + *ku + 2; k2 = *kl + 1; k3 = (*kl << 1) + *ku + 1; k4 = *kl + *ku + 1 + *m; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__3 = k1 - j; /* Computing MIN */ i__4 = k3, i__5 = k4 - j; i__2 = min(i__4,i__5); for (i__ = max(i__3,k2); i__ <= i__2; ++i__) { a_ref(i__, j) = a_ref(i__, j) * mul; /* L140: */ } /* L150: */ } } if (! done) { goto L10; } return 0; /* End of DLASCL */ } /* dlascl_ */
/* Subroutine */ int slauum_(char *uplo, integer *n, real *a, integer *lda, integer *info) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= SLAUUM computes the product U * U' or L' * L, where the triangular factor U or L is stored in the upper or lower triangular part of the array A. If UPLO = 'U' or 'u' then the upper triangle of the result is stored, overwriting the factor U in A. If UPLO = 'L' or 'l' then the lower triangle of the result is stored, overwriting the factor L in A. This is the blocked form of the algorithm, calling Level 3 BLAS. Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the triangular factor stored in the array A is upper or lower triangular: = 'U': Upper triangular = 'L': Lower triangular N (input) INTEGER The order of the triangular factor U or L. N >= 0. A (input/output) REAL array, dimension (LDA,N) On entry, the triangular factor U or L. On exit, if UPLO = 'U', the upper triangle of A is overwritten with the upper triangle of the product U * U'; if UPLO = 'L', the lower triangle of A is overwritten with the lower triangle of the product L' * L. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -k, the k-th argument had an illegal value ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static real c_b15 = 1.f; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ static integer i__; extern logical lsame_(char *, char *); extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static logical upper; extern /* Subroutine */ int strmm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * ), ssyrk_(char *, char *, integer *, integer *, real *, real *, integer *, real *, real *, integer * ); static integer ib; extern /* Subroutine */ int slauu2_(char *, integer *, real *, integer *, integer *); static integer nb; extern /* Subroutine */ int xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); xerbla_("SLAUUM", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine the block size for this environment. */ nb = ilaenv_(&c__1, "SLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( ftnlen)1); if (nb <= 1 || nb >= *n) { /* Use unblocked code */ slauu2_(uplo, n, &a[a_offset], lda, info); } else { /* Use blocked code */ if (upper) { /* Compute the product U * U'. */ i__1 = *n; i__2 = nb; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__3 = nb, i__4 = *n - i__ + 1; ib = min(i__3,i__4); i__3 = i__ - 1; strmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib, &c_b15, &a_ref(i__, i__), lda, &a_ref(1, i__), lda); slauu2_("Upper", &ib, &a_ref(i__, i__), lda, info); if (i__ + ib <= *n) { i__3 = i__ - 1; i__4 = *n - i__ - ib + 1; sgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, & c_b15, &a_ref(1, i__ + ib), lda, &a_ref(i__, i__ + ib), lda, &c_b15, &a_ref(1, i__), lda); i__3 = *n - i__ - ib + 1; ssyrk_("Upper", "No transpose", &ib, &i__3, &c_b15, & a_ref(i__, i__ + ib), lda, &c_b15, &a_ref(i__, i__), lda); } /* L10: */ } } else { /* Compute the product L' * L. */ i__2 = *n; i__1 = nb; for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { /* Computing MIN */ i__3 = nb, i__4 = *n - i__ + 1; ib = min(i__3,i__4); i__3 = i__ - 1; strmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, & c_b15, &a_ref(i__, i__), lda, &a_ref(i__, 1), lda); slauu2_("Lower", &ib, &a_ref(i__, i__), lda, info); if (i__ + ib <= *n) { i__3 = i__ - 1; i__4 = *n - i__ - ib + 1; sgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, & c_b15, &a_ref(i__ + ib, i__), lda, &a_ref(i__ + ib, 1), lda, &c_b15, &a_ref(i__, 1), lda); i__3 = *n - i__ - ib + 1; ssyrk_("Lower", "Transpose", &ib, &i__3, &c_b15, &a_ref( i__ + ib, i__), lda, &c_b15, &a_ref(i__, i__), lda); } /* L20: */ } } } return 0; /* End of SLAUUM */ } /* slauum_ */
/* Subroutine */ int clasyf_(char *uplo, integer *n, integer *nb, integer *kb, complex *a, integer *lda, integer *ipiv, complex *w, integer *ldw, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= CLASYF computes a partial factorization of a complex symmetric matrix A using the Bunch-Kaufman diagonal pivoting method. The partial factorization has the form: A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: ( 0 U22 ) ( 0 D ) ( U12' U22' ) A = ( L11 0 ) ( D 0 ) ( L11' L21' ) if UPLO = 'L' ( L21 I ) ( 0 A22 ) ( 0 I ) where the order of D is at most NB. The actual order is returned in the argument KB, and is either NB or NB-1, or N if N <= NB. Note that U' denotes the transpose of U. CLASYF is an auxiliary routine called by CSYTRF. It uses blocked code (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the upper or lower triangular part of the symmetric matrix A is stored: = 'U': Upper triangular = 'L': Lower triangular N (input) INTEGER The order of the matrix A. N >= 0. NB (input) INTEGER The maximum number of columns of the matrix A that should be factored. NB should be at least 2 to allow for 2-by-2 pivot blocks. KB (output) INTEGER The number of columns of A that were actually factored. KB is either NB-1 or NB, or N if N <= NB. A (input/output) COMPLEX array, dimension (LDA,N) On entry, the symmetric matrix A. If UPLO = 'U', the leading n-by-n upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading n-by-n lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, A contains details of the partial factorization. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). IPIV (output) INTEGER array, dimension (N) Details of the interchanges and the block structure of D. If UPLO = 'U', only the last KB elements of IPIV are set; if UPLO = 'L', only the first KB elements are set. If IPIV(k) > 0, then rows and columns k and IPIV(k) were interchanged and D(k,k) is a 1-by-1 diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. W (workspace) COMPLEX array, dimension (LDW,NB) LDW (input) INTEGER The leading dimension of the array W. LDW >= max(1,N). INFO (output) INTEGER = 0: successful exit > 0: if INFO = k, D(k,k) is exactly zero. The factorization has been completed, but the block diagonal matrix D is exactly singular. ===================================================================== Parameter adjustments */ /* Table of constant values */ static complex c_b1 = {1.f,0.f}; static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4, i__5; real r__1, r__2, r__3, r__4; complex q__1, q__2, q__3; /* Builtin functions */ double sqrt(doublereal), r_imag(complex *); void c_div(complex *, complex *, complex *); /* Local variables */ static integer imax, jmax, j, k; static complex t; static real alpha; extern /* Subroutine */ int cscal_(integer *, complex *, complex *, integer *), cgemm_(char *, char *, integer *, integer *, integer * , complex *, complex *, integer *, complex *, integer *, complex * , complex *, integer *); extern logical lsame_(char *, char *); extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *), ccopy_(integer *, complex *, integer *, complex *, integer *), cswap_(integer *, complex *, integer *, complex *, integer *); static integer kstep; static complex r1, d11, d21, d22; static integer jb, jj, kk, jp, kp; static real absakk; static integer kw; extern integer icamax_(integer *, complex *, integer *); static real colmax, rowmax; static integer kkw; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define w_subscr(a_1,a_2) (a_2)*w_dim1 + a_1 #define w_ref(a_1,a_2) w[w_subscr(a_1,a_2)] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --ipiv; w_dim1 = *ldw; w_offset = 1 + w_dim1 * 1; w -= w_offset; /* Function Body */ *info = 0; /* Initialize ALPHA for use in choosing pivot block size. */ alpha = (sqrt(17.f) + 1.f) / 8.f; if (lsame_(uplo, "U")) { /* Factorize the trailing columns of A using the upper triangle of A and working backwards, and compute the matrix W = U12*D for use in updating A11 K is the main loop index, decreasing from N in steps of 1 or 2 KW is the column of W which corresponds to column K of A */ k = *n; L10: kw = *nb + k - *n; /* Exit from loop */ if (k <= *n - *nb + 1 && *nb < *n || k < 1) { goto L30; } /* Copy column K of A to column KW of W and update it */ ccopy_(&k, &a_ref(1, k), &c__1, &w_ref(1, kw), &c__1); if (k < *n) { i__1 = *n - k; q__1.r = -1.f, q__1.i = 0.f; cgemv_("No transpose", &k, &i__1, &q__1, &a_ref(1, k + 1), lda, & w_ref(k, kw + 1), ldw, &c_b1, &w_ref(1, kw), &c__1); } kstep = 1; /* Determine rows and columns to be interchanged and whether a 1-by-1 or 2-by-2 pivot block will be used */ i__1 = w_subscr(k, kw); absakk = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w_ref(k, kw) ), dabs(r__2)); /* IMAX is the row-index of the largest off-diagonal element in column K, and COLMAX is its absolute value */ if (k > 1) { i__1 = k - 1; imax = icamax_(&i__1, &w_ref(1, kw), &c__1); i__1 = w_subscr(imax, kw); colmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w_ref( imax, kw)), dabs(r__2)); } else { colmax = 0.f; } if (dmax(absakk,colmax) == 0.f) { /* Column K is zero: set INFO and continue */ if (*info == 0) { *info = k; } kp = k; } else { if (absakk >= alpha * colmax) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else { /* Copy column IMAX to column KW-1 of W and update it */ ccopy_(&imax, &a_ref(1, imax), &c__1, &w_ref(1, kw - 1), & c__1); i__1 = k - imax; ccopy_(&i__1, &a_ref(imax, imax + 1), lda, &w_ref(imax + 1, kw - 1), &c__1); if (k < *n) { i__1 = *n - k; q__1.r = -1.f, q__1.i = 0.f; cgemv_("No transpose", &k, &i__1, &q__1, &a_ref(1, k + 1), lda, &w_ref(imax, kw + 1), ldw, &c_b1, &w_ref(1, kw - 1), &c__1); } /* JMAX is the column-index of the largest off-diagonal element in row IMAX, and ROWMAX is its absolute value */ i__1 = k - imax; jmax = imax + icamax_(&i__1, &w_ref(imax + 1, kw - 1), &c__1); i__1 = w_subscr(jmax, kw - 1); rowmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(& w_ref(jmax, kw - 1)), dabs(r__2)); if (imax > 1) { i__1 = imax - 1; jmax = icamax_(&i__1, &w_ref(1, kw - 1), &c__1); /* Computing MAX */ i__1 = w_subscr(jmax, kw - 1); r__3 = rowmax, r__4 = (r__1 = w[i__1].r, dabs(r__1)) + ( r__2 = r_imag(&w_ref(jmax, kw - 1)), dabs(r__2)); rowmax = dmax(r__3,r__4); } if (absakk >= alpha * colmax * (colmax / rowmax)) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else /* if(complicated condition) */ { i__1 = w_subscr(imax, kw - 1); if ((r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(& w_ref(imax, kw - 1)), dabs(r__2)) >= alpha * rowmax) { /* interchange rows and columns K and IMAX, use 1-by-1 pivot block */ kp = imax; /* copy column KW-1 of W to column KW */ ccopy_(&k, &w_ref(1, kw - 1), &c__1, &w_ref(1, kw), & c__1); } else { /* interchange rows and columns K-1 and IMAX, use 2-by-2 pivot block */ kp = imax; kstep = 2; } } } kk = k - kstep + 1; kkw = *nb + kk - *n; /* Updated column KP is already stored in column KKW of W */ if (kp != kk) { /* Copy non-updated column KK to column KP */ i__1 = a_subscr(kp, k); i__2 = a_subscr(kk, k); a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; i__1 = k - 1 - kp; ccopy_(&i__1, &a_ref(kp + 1, kk), &c__1, &a_ref(kp, kp + 1), lda); ccopy_(&kp, &a_ref(1, kk), &c__1, &a_ref(1, kp), &c__1); /* Interchange rows KK and KP in last KK columns of A and W */ i__1 = *n - kk + 1; cswap_(&i__1, &a_ref(kk, kk), lda, &a_ref(kp, kk), lda); i__1 = *n - kk + 1; cswap_(&i__1, &w_ref(kk, kkw), ldw, &w_ref(kp, kkw), ldw); } if (kstep == 1) { /* 1-by-1 pivot block D(k): column KW of W now holds W(k) = U(k)*D(k) where U(k) is the k-th column of U Store U(k) in column k of A */ ccopy_(&k, &w_ref(1, kw), &c__1, &a_ref(1, k), &c__1); c_div(&q__1, &c_b1, &a_ref(k, k)); r1.r = q__1.r, r1.i = q__1.i; i__1 = k - 1; cscal_(&i__1, &r1, &a_ref(1, k), &c__1); } else { /* 2-by-2 pivot block D(k): columns KW and KW-1 of W now hold ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) where U(k) and U(k-1) are the k-th and (k-1)-th columns of U */ if (k > 2) { /* Store U(k) and U(k-1) in columns k and k-1 of A */ i__1 = w_subscr(k - 1, kw); d21.r = w[i__1].r, d21.i = w[i__1].i; c_div(&q__1, &w_ref(k, kw), &d21); d11.r = q__1.r, d11.i = q__1.i; c_div(&q__1, &w_ref(k - 1, kw - 1), &d21); d22.r = q__1.r, d22.i = q__1.i; q__3.r = d11.r * d22.r - d11.i * d22.i, q__3.i = d11.r * d22.i + d11.i * d22.r; q__2.r = q__3.r - 1.f, q__2.i = q__3.i + 0.f; c_div(&q__1, &c_b1, &q__2); t.r = q__1.r, t.i = q__1.i; c_div(&q__1, &t, &d21); d21.r = q__1.r, d21.i = q__1.i; i__1 = k - 2; for (j = 1; j <= i__1; ++j) { i__2 = a_subscr(j, k - 1); i__3 = w_subscr(j, kw - 1); q__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i, q__3.i = d11.r * w[i__3].i + d11.i * w[i__3] .r; i__4 = w_subscr(j, kw); q__2.r = q__3.r - w[i__4].r, q__2.i = q__3.i - w[i__4] .i; q__1.r = d21.r * q__2.r - d21.i * q__2.i, q__1.i = d21.r * q__2.i + d21.i * q__2.r; a[i__2].r = q__1.r, a[i__2].i = q__1.i; i__2 = a_subscr(j, k); i__3 = w_subscr(j, kw); q__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i, q__3.i = d22.r * w[i__3].i + d22.i * w[i__3] .r; i__4 = w_subscr(j, kw - 1); q__2.r = q__3.r - w[i__4].r, q__2.i = q__3.i - w[i__4] .i; q__1.r = d21.r * q__2.r - d21.i * q__2.i, q__1.i = d21.r * q__2.i + d21.i * q__2.r; a[i__2].r = q__1.r, a[i__2].i = q__1.i; /* L20: */ } } /* Copy D(k) to A */ i__1 = a_subscr(k - 1, k - 1); i__2 = w_subscr(k - 1, kw - 1); a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; i__1 = a_subscr(k - 1, k); i__2 = w_subscr(k - 1, kw); a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; i__1 = a_subscr(k, k); i__2 = w_subscr(k, kw); a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; } } /* Store details of the interchanges in IPIV */ if (kstep == 1) { ipiv[k] = kp; } else { ipiv[k] = -kp; ipiv[k - 1] = -kp; } /* Decrease K and return to the start of the main loop */ k -= kstep; goto L10; L30: /* Update the upper triangle of A11 (= A(1:k,1:k)) as A11 := A11 - U12*D*U12' = A11 - U12*W' computing blocks of NB columns at a time */ i__1 = -(*nb); for (j = (k - 1) / *nb * *nb + 1; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { /* Computing MIN */ i__2 = *nb, i__3 = k - j + 1; jb = min(i__2,i__3); /* Update the upper triangle of the diagonal block */ i__2 = j + jb - 1; for (jj = j; jj <= i__2; ++jj) { i__3 = jj - j + 1; i__4 = *n - k; q__1.r = -1.f, q__1.i = 0.f; cgemv_("No transpose", &i__3, &i__4, &q__1, &a_ref(j, k + 1), lda, &w_ref(jj, kw + 1), ldw, &c_b1, &a_ref(j, jj), & c__1); /* L40: */ } /* Update the rectangular superdiagonal block */ i__2 = j - 1; i__3 = *n - k; q__1.r = -1.f, q__1.i = 0.f; cgemm_("No transpose", "Transpose", &i__2, &jb, &i__3, &q__1, & a_ref(1, k + 1), lda, &w_ref(j, kw + 1), ldw, &c_b1, & a_ref(1, j), lda); /* L50: */ } /* Put U12 in standard form by partially undoing the interchanges in columns k+1:n */ j = k + 1; L60: jj = j; jp = ipiv[j]; if (jp < 0) { jp = -jp; ++j; } ++j; if (jp != jj && j <= *n) { i__1 = *n - j + 1; cswap_(&i__1, &a_ref(jp, j), lda, &a_ref(jj, j), lda); } if (j <= *n) { goto L60; } /* Set KB to the number of columns factorized */ *kb = *n - k; } else { /* Factorize the leading columns of A using the lower triangle of A and working forwards, and compute the matrix W = L21*D for use in updating A22 K is the main loop index, increasing from 1 in steps of 1 or 2 */ k = 1; L70: /* Exit from loop */ if (k >= *nb && *nb < *n || k > *n) { goto L90; } /* Copy column K of A to column K of W and update it */ i__1 = *n - k + 1; ccopy_(&i__1, &a_ref(k, k), &c__1, &w_ref(k, k), &c__1); i__1 = *n - k + 1; i__2 = k - 1; q__1.r = -1.f, q__1.i = 0.f; cgemv_("No transpose", &i__1, &i__2, &q__1, &a_ref(k, 1), lda, &w_ref( k, 1), ldw, &c_b1, &w_ref(k, k), &c__1); kstep = 1; /* Determine rows and columns to be interchanged and whether a 1-by-1 or 2-by-2 pivot block will be used */ i__1 = w_subscr(k, k); absakk = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w_ref(k, k)) , dabs(r__2)); /* IMAX is the row-index of the largest off-diagonal element in column K, and COLMAX is its absolute value */ if (k < *n) { i__1 = *n - k; imax = k + icamax_(&i__1, &w_ref(k + 1, k), &c__1); i__1 = w_subscr(imax, k); colmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w_ref( imax, k)), dabs(r__2)); } else { colmax = 0.f; } if (dmax(absakk,colmax) == 0.f) { /* Column K is zero: set INFO and continue */ if (*info == 0) { *info = k; } kp = k; } else { if (absakk >= alpha * colmax) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else { /* Copy column IMAX to column K+1 of W and update it */ i__1 = imax - k; ccopy_(&i__1, &a_ref(imax, k), lda, &w_ref(k, k + 1), &c__1); i__1 = *n - imax + 1; ccopy_(&i__1, &a_ref(imax, imax), &c__1, &w_ref(imax, k + 1), &c__1); i__1 = *n - k + 1; i__2 = k - 1; q__1.r = -1.f, q__1.i = 0.f; cgemv_("No transpose", &i__1, &i__2, &q__1, &a_ref(k, 1), lda, &w_ref(imax, 1), ldw, &c_b1, &w_ref(k, k + 1), &c__1); /* JMAX is the column-index of the largest off-diagonal element in row IMAX, and ROWMAX is its absolute value */ i__1 = imax - k; jmax = k - 1 + icamax_(&i__1, &w_ref(k, k + 1), &c__1); i__1 = w_subscr(jmax, k + 1); rowmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(& w_ref(jmax, k + 1)), dabs(r__2)); if (imax < *n) { i__1 = *n - imax; jmax = imax + icamax_(&i__1, &w_ref(imax + 1, k + 1), & c__1); /* Computing MAX */ i__1 = w_subscr(jmax, k + 1); r__3 = rowmax, r__4 = (r__1 = w[i__1].r, dabs(r__1)) + ( r__2 = r_imag(&w_ref(jmax, k + 1)), dabs(r__2)); rowmax = dmax(r__3,r__4); } if (absakk >= alpha * colmax * (colmax / rowmax)) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else /* if(complicated condition) */ { i__1 = w_subscr(imax, k + 1); if ((r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(& w_ref(imax, k + 1)), dabs(r__2)) >= alpha * rowmax) { /* interchange rows and columns K and IMAX, use 1-by-1 pivot block */ kp = imax; /* copy column K+1 of W to column K */ i__1 = *n - k + 1; ccopy_(&i__1, &w_ref(k, k + 1), &c__1, &w_ref(k, k), & c__1); } else { /* interchange rows and columns K+1 and IMAX, use 2-by-2 pivot block */ kp = imax; kstep = 2; } } } kk = k + kstep - 1; /* Updated column KP is already stored in column KK of W */ if (kp != kk) { /* Copy non-updated column KK to column KP */ i__1 = a_subscr(kp, k); i__2 = a_subscr(kk, k); a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i; i__1 = kp - k - 1; ccopy_(&i__1, &a_ref(k + 1, kk), &c__1, &a_ref(kp, k + 1), lda); i__1 = *n - kp + 1; ccopy_(&i__1, &a_ref(kp, kk), &c__1, &a_ref(kp, kp), &c__1); /* Interchange rows KK and KP in first KK columns of A and W */ cswap_(&kk, &a_ref(kk, 1), lda, &a_ref(kp, 1), lda); cswap_(&kk, &w_ref(kk, 1), ldw, &w_ref(kp, 1), ldw); } if (kstep == 1) { /* 1-by-1 pivot block D(k): column k of W now holds W(k) = L(k)*D(k) where L(k) is the k-th column of L Store L(k) in column k of A */ i__1 = *n - k + 1; ccopy_(&i__1, &w_ref(k, k), &c__1, &a_ref(k, k), &c__1); if (k < *n) { c_div(&q__1, &c_b1, &a_ref(k, k)); r1.r = q__1.r, r1.i = q__1.i; i__1 = *n - k; cscal_(&i__1, &r1, &a_ref(k + 1, k), &c__1); } } else { /* 2-by-2 pivot block D(k): columns k and k+1 of W now hold ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) where L(k) and L(k+1) are the k-th and (k+1)-th columns of L */ if (k < *n - 1) { /* Store L(k) and L(k+1) in columns k and k+1 of A */ i__1 = w_subscr(k + 1, k); d21.r = w[i__1].r, d21.i = w[i__1].i; c_div(&q__1, &w_ref(k + 1, k + 1), &d21); d11.r = q__1.r, d11.i = q__1.i; c_div(&q__1, &w_ref(k, k), &d21); d22.r = q__1.r, d22.i = q__1.i; q__3.r = d11.r * d22.r - d11.i * d22.i, q__3.i = d11.r * d22.i + d11.i * d22.r; q__2.r = q__3.r - 1.f, q__2.i = q__3.i + 0.f; c_div(&q__1, &c_b1, &q__2); t.r = q__1.r, t.i = q__1.i; c_div(&q__1, &t, &d21); d21.r = q__1.r, d21.i = q__1.i; i__1 = *n; for (j = k + 2; j <= i__1; ++j) { i__2 = a_subscr(j, k); i__3 = w_subscr(j, k); q__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i, q__3.i = d11.r * w[i__3].i + d11.i * w[i__3] .r; i__4 = w_subscr(j, k + 1); q__2.r = q__3.r - w[i__4].r, q__2.i = q__3.i - w[i__4] .i; q__1.r = d21.r * q__2.r - d21.i * q__2.i, q__1.i = d21.r * q__2.i + d21.i * q__2.r; a[i__2].r = q__1.r, a[i__2].i = q__1.i; i__2 = a_subscr(j, k + 1); i__3 = w_subscr(j, k + 1); q__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i, q__3.i = d22.r * w[i__3].i + d22.i * w[i__3] .r; i__4 = w_subscr(j, k); q__2.r = q__3.r - w[i__4].r, q__2.i = q__3.i - w[i__4] .i; q__1.r = d21.r * q__2.r - d21.i * q__2.i, q__1.i = d21.r * q__2.i + d21.i * q__2.r; a[i__2].r = q__1.r, a[i__2].i = q__1.i; /* L80: */ } } /* Copy D(k) to A */ i__1 = a_subscr(k, k); i__2 = w_subscr(k, k); a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; i__1 = a_subscr(k + 1, k); i__2 = w_subscr(k + 1, k); a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; i__1 = a_subscr(k + 1, k + 1); i__2 = w_subscr(k + 1, k + 1); a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; } } /* Store details of the interchanges in IPIV */ if (kstep == 1) { ipiv[k] = kp; } else { ipiv[k] = -kp; ipiv[k + 1] = -kp; } /* Increase K and return to the start of the main loop */ k += kstep; goto L70; L90: /* Update the lower triangle of A22 (= A(k:n,k:n)) as A22 := A22 - L21*D*L21' = A22 - L21*W' computing blocks of NB columns at a time */ i__1 = *n; i__2 = *nb; for (j = k; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Computing MIN */ i__3 = *nb, i__4 = *n - j + 1; jb = min(i__3,i__4); /* Update the lower triangle of the diagonal block */ i__3 = j + jb - 1; for (jj = j; jj <= i__3; ++jj) { i__4 = j + jb - jj; i__5 = k - 1; q__1.r = -1.f, q__1.i = 0.f; cgemv_("No transpose", &i__4, &i__5, &q__1, &a_ref(jj, 1), lda, &w_ref(jj, 1), ldw, &c_b1, &a_ref(jj, jj), &c__1); /* L100: */ } /* Update the rectangular subdiagonal block */ if (j + jb <= *n) { i__3 = *n - j - jb + 1; i__4 = k - 1; q__1.r = -1.f, q__1.i = 0.f; cgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &q__1, &a_ref(j + jb, 1), lda, &w_ref(j, 1), ldw, &c_b1, & a_ref(j + jb, j), lda); } /* L110: */ } /* Put L21 in standard form by partially undoing the interchanges in columns 1:k-1 */ j = k - 1; L120: jj = j; jp = ipiv[j]; if (jp < 0) { jp = -jp; --j; } --j; if (jp != jj && j >= 1) { cswap_(&j, &a_ref(jp, 1), lda, &a_ref(jj, 1), lda); } if (j >= 1) { goto L120; } /* Set KB to the number of columns factorized */ *kb = k - 1; } return 0; /* End of CLASYF */ } /* clasyf_ */
extern "C" magma_int_t magma_dgeqrf_gpu( magma_int_t m, magma_int_t n, magmaDouble_ptr dA, size_t dA_offset, magma_int_t ldda, double *tau, magmaDouble_ptr dT, size_t dT_offset, magma_queue_t queue, magma_int_t *info) { /* -- clMAGMA (version 1.3.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver @date November 2014 Purpose ======= DGEQRF computes a QR factorization of a real M-by-N matrix A: A = Q * R. This version stores the triangular dT matrices used in the block QR factorization so that they can be applied directly (i.e., without being recomputed) later. As a result, the application of Q is much faster. Also, the upper triangular matrices for V have 0s in them. The corresponding parts of the upper triangular R are inverted and stored separately in dT. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. dA (input/output) DOUBLE_PRECISION array on the GPU, dimension (LDDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details). LDDA (input) INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be divisible by 16. TAU (output) DOUBLE_PRECISION array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details). dT (workspace/output) DOUBLE_PRECISION array on the GPU, dimension (2*MIN(M, N) + (N+31)/32*32 )*NB, where NB can be obtained through magma_get_dgeqrf_nb(M). It starts with MIN(M,N)*NB block that store the triangular T matrices, followed by the MIN(M,N)*NB block of the diagonal inverses for the R matrix. The rest of the array is used as workspace. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. Further Details =============== The matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(k), where k = min(m,n). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i). ===================================================================== */ #define a_ref(a_1,a_2) dA, (dA_offset + (a_1) + (a_2)*(ldda)) #define t_ref(a_1) dT, (dT_offset + (a_1)*nb) #define d_ref(a_1) dT, (dT_offset + (minmn + (a_1))*nb) #define dd_ref(a_1) dT, (dT_offset + (2*minmn+(a_1))*nb) #define work_ref(a_1) ( work + (a_1)) #define hwork ( work + (nb)*(m)) magma_int_t i, k, minmn, old_i, old_ib, rows, cols; magma_int_t ib, nb; magma_int_t ldwork, lddwork, lwork, lhwork; double *work, *ut; /* check arguments */ *info = 0; if (m < 0) { *info = -1; } else if (n < 0) { *info = -2; } else if (ldda < max(1,m)) { *info = -4; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } k = minmn = min(m,n); if (k == 0) return *info; nb = magma_get_dgeqrf_nb(m); lwork = (m + n + nb)*nb; lhwork = lwork - m*nb; if (MAGMA_SUCCESS != magma_dmalloc_cpu( &work, lwork )) { *info = MAGMA_ERR_HOST_ALLOC; return *info; } ut = hwork+nb*(n); memset( ut, 0, nb*nb*sizeof(double)); magma_event_t event[2] = {NULL, NULL}; ldwork = m; lddwork= n; if ( (nb > 1) && (nb < k) ) { /* Use blocked code initially */ old_i = 0; old_ib = nb; for (i = 0; i < k-nb; i += nb) { ib = min(k-i, nb); rows = m -i; magma_dgetmatrix_async( rows, ib, a_ref(i,i), ldda, work_ref(i), ldwork, queue, &event[1] ); if (i>0){ /* Apply H' to A(i:m,i+2*ib:n) from the left */ cols = n-old_i-2*old_ib; magma_dlarfb_gpu( MagmaLeft, MagmaConjTrans, MagmaForward, MagmaColumnwise, m-old_i, cols, old_ib, a_ref(old_i, old_i ), ldda, t_ref(old_i), nb, a_ref(old_i, old_i+2*old_ib), ldda, dd_ref(0), lddwork, queue); /* store the diagonal */ magma_dsetmatrix_async( old_ib, old_ib, ut, old_ib, d_ref(old_i), old_ib, queue, &event[0] ); } magma_event_sync(event[1]); lapackf77_dgeqrf(&rows, &ib, work_ref(i), &ldwork, tau+i, hwork, &lhwork, info); /* Form the triangular factor of the block reflector H = H(i) H(i+1) . . . H(i+ib-1) */ lapackf77_dlarft( MagmaForwardStr, MagmaColumnwiseStr, &rows, &ib, work_ref(i), &ldwork, tau+i, hwork, &ib); /* Put 0s in the upper triangular part of a panel (and 1s on the diagonal); copy the upper triangular in ut and invert it. */ magma_event_sync(event[0]); dsplit_diag_block(ib, work_ref(i), ldwork, ut); magma_dsetmatrix( rows, ib, work_ref(i), ldwork, a_ref(i,i), ldda, queue); if (i + ib < n) { /* Send the triangular factor T to the GPU */ magma_dsetmatrix( ib, ib, hwork, ib, t_ref(i), nb, queue ); if (i+nb < k-nb){ /* Apply H' to A(i:m,i+ib:i+2*ib) from the left */ magma_dlarfb_gpu( MagmaLeft, MagmaConjTrans, MagmaForward, MagmaColumnwise, rows, ib, ib, a_ref(i, i ), ldda, t_ref(i), nb, a_ref(i, i+ib), ldda, dd_ref(0), lddwork, queue); } else { cols = n-i-ib; magma_dlarfb_gpu( MagmaLeft, MagmaConjTrans, MagmaForward, MagmaColumnwise, rows, cols, ib, a_ref(i, i ), ldda, t_ref(i), nb, a_ref(i, i+ib), ldda, dd_ref(0), lddwork, queue); /* Fix the diagonal block */ magma_dsetmatrix( ib, ib, ut, ib, d_ref(i), ib , queue); } old_i = i; old_ib = ib; } } } else { i = 0; } /* Use unblocked code to factor the last or only block. */ if (i < k) { ib = n-i; rows = m-i; magma_dgetmatrix( rows, ib, a_ref(i, i), ldda, work, rows, queue ); lhwork = lwork - rows*ib; lapackf77_dgeqrf(&rows, &ib, work, &rows, tau+i, work+ib*rows, &lhwork, info); magma_dsetmatrix( rows, ib, work, rows, a_ref(i, i), ldda, queue ); } magma_free_cpu( work ); return *info; } /* magma_dgeqrf_gpu */
/* Subroutine */ int derred_(char *path, integer *nunit) { /* Format strings */ static char fmt_9999[] = "(1x,a6,\002 passed the tests of the error exit" "s (\002,i3,\002 tests done)\002)"; static char fmt_9998[] = "(\002 *** \002,a6,\002 failed the tests of the" " error exits ***\002)"; /* Builtin functions */ integer s_wsle(cilist *), e_wsle(void); /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ static integer info, sdim; static doublereal a[16] /* was [4][4] */; static logical b[4]; static integer i__, j; static doublereal s[4], u[16] /* was [4][4] */, w[16]; extern /* Subroutine */ int dgees_(char *, char *, L_fp, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, logical *, integer *), dgeev_(char *, char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); static doublereal abnrm; static char c2[2]; static doublereal r1[4], r2[4]; extern /* Subroutine */ int dgesdd_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *); static integer iw[8]; static doublereal wi[4]; static integer nt; static doublereal vl[16] /* was [4][4] */, vr[16] /* was [4][4] */, wr[4], vt[16] /* was [4][4] */; extern /* Subroutine */ int dgesvd_(char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); extern logical dslect_(); extern /* Subroutine */ int dgeesx_(char *, char *, L_fp, char *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal * , integer *, integer *, integer *, logical *, integer *); extern logical lsamen_(integer *, char *, char *); extern /* Subroutine */ int dgeevx_(char *, char *, char *, char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *, integer *), chkxer_(char *, integer *, integer *, logical *, logical *); static integer ihi, ilo; /* Fortran I/O blocks */ static cilist io___1 = { 0, 0, 0, 0, 0 }; static cilist io___24 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___25 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___26 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___27 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___28 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___29 = { 0, 0, 0, fmt_9998, 0 }; #define a_ref(a_1,a_2) a[(a_2)*4 + a_1 - 5] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University December 22, 1999 Purpose ======= DERRED tests the error exits for the eigenvalue driver routines for DOUBLE PRECISION matrices: PATH driver description ---- ------ ----------- SEV DGEEV find eigenvalues/eigenvectors for nonsymmetric A SES DGEES find eigenvalues/Schur form for nonsymmetric A SVX DGEEVX SGEEV + balancing and condition estimation SSX DGEESX SGEES + balancing and condition estimation DBD DGESVD compute SVD of an M-by-N matrix A DGESDD compute SVD of an M-by-N matrix A (by divide and conquer) Arguments ========= PATH (input) CHARACTER*3 The LAPACK path name for the routines to be tested. NUNIT (input) INTEGER The unit number for output. ===================================================================== */ infoc_1.nout = *nunit; io___1.ciunit = infoc_1.nout; s_wsle(&io___1); e_wsle(); s_copy(c2, path + 1, (ftnlen)2, (ftnlen)2); /* Initialize A */ for (j = 1; j <= 4; ++j) { for (i__ = 1; i__ <= 4; ++i__) { a_ref(i__, j) = 0.; /* L10: */ } /* L20: */ } for (i__ = 1; i__ <= 4; ++i__) { a_ref(i__, i__) = 1.; /* L30: */ } infoc_1.ok = TRUE_; nt = 0; if (lsamen_(&c__2, c2, "EV")) { /* Test DGEEV */ s_copy(srnamc_1.srnamt, "DGEEV ", (ftnlen)6, (ftnlen)6); infoc_1.infot = 1; dgeev_("X", "N", &c__0, a, &c__1, wr, wi, vl, &c__1, vr, &c__1, w, & c__1, &info); chkxer_("DGEEV ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dgeev_("N", "X", &c__0, a, &c__1, wr, wi, vl, &c__1, vr, &c__1, w, & c__1, &info); chkxer_("DGEEV ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 3; dgeev_("N", "N", &c_n1, a, &c__1, wr, wi, vl, &c__1, vr, &c__1, w, & c__1, &info); chkxer_("DGEEV ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 5; dgeev_("N", "N", &c__2, a, &c__1, wr, wi, vl, &c__1, vr, &c__1, w, & c__6, &info); chkxer_("DGEEV ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 9; dgeev_("V", "N", &c__2, a, &c__2, wr, wi, vl, &c__1, vr, &c__1, w, & c__8, &info); chkxer_("DGEEV ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 11; dgeev_("N", "V", &c__2, a, &c__2, wr, wi, vl, &c__1, vr, &c__1, w, & c__8, &info); chkxer_("DGEEV ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 13; dgeev_("V", "V", &c__1, a, &c__1, wr, wi, vl, &c__1, vr, &c__1, w, & c__3, &info); chkxer_("DGEEV ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); nt += 7; } else if (lsamen_(&c__2, c2, "ES")) { /* Test DGEES */ s_copy(srnamc_1.srnamt, "DGEES ", (ftnlen)6, (ftnlen)6); infoc_1.infot = 1; dgees_("X", "N", (L_fp)dslect_, &c__0, a, &c__1, &sdim, wr, wi, vl, & c__1, w, &c__1, b, &info); chkxer_("DGEES ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dgees_("N", "X", (L_fp)dslect_, &c__0, a, &c__1, &sdim, wr, wi, vl, & c__1, w, &c__1, b, &info); chkxer_("DGEES ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 4; dgees_("N", "S", (L_fp)dslect_, &c_n1, a, &c__1, &sdim, wr, wi, vl, & c__1, w, &c__1, b, &info); chkxer_("DGEES ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 6; dgees_("N", "S", (L_fp)dslect_, &c__2, a, &c__1, &sdim, wr, wi, vl, & c__1, w, &c__6, b, &info); chkxer_("DGEES ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 11; dgees_("V", "S", (L_fp)dslect_, &c__2, a, &c__2, &sdim, wr, wi, vl, & c__1, w, &c__6, b, &info); chkxer_("DGEES ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 13; dgees_("N", "S", (L_fp)dslect_, &c__1, a, &c__1, &sdim, wr, wi, vl, & c__1, w, &c__2, b, &info); chkxer_("DGEES ", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); nt += 6; } else if (lsamen_(&c__2, c2, "VX")) { /* Test DGEEVX */ s_copy(srnamc_1.srnamt, "DGEEVX", (ftnlen)6, (ftnlen)6); infoc_1.infot = 1; dgeevx_("X", "N", "N", "N", &c__0, a, &c__1, wr, wi, vl, &c__1, vr, & c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__1, iw, &info); chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dgeevx_("N", "X", "N", "N", &c__0, a, &c__1, wr, wi, vl, &c__1, vr, & c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__1, iw, &info); chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 3; dgeevx_("N", "N", "X", "N", &c__0, a, &c__1, wr, wi, vl, &c__1, vr, & c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__1, iw, &info); chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 4; dgeevx_("N", "N", "N", "X", &c__0, a, &c__1, wr, wi, vl, &c__1, vr, & c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__1, iw, &info); chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 5; dgeevx_("N", "N", "N", "N", &c_n1, a, &c__1, wr, wi, vl, &c__1, vr, & c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__1, iw, &info); chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 7; dgeevx_("N", "N", "N", "N", &c__2, a, &c__1, wr, wi, vl, &c__1, vr, & c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__1, iw, &info); chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 11; dgeevx_("N", "V", "N", "N", &c__2, a, &c__2, wr, wi, vl, &c__1, vr, & c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__6, iw, &info); chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 13; dgeevx_("N", "N", "V", "N", &c__2, a, &c__2, wr, wi, vl, &c__1, vr, & c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__6, iw, &info); chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 21; dgeevx_("N", "N", "N", "N", &c__1, a, &c__1, wr, wi, vl, &c__1, vr, & c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__1, iw, &info); chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 21; dgeevx_("N", "V", "N", "N", &c__1, a, &c__1, wr, wi, vl, &c__1, vr, & c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__2, iw, &info); chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 21; dgeevx_("N", "N", "V", "V", &c__1, a, &c__1, wr, wi, vl, &c__1, vr, & c__1, &ilo, &ihi, s, &abnrm, r1, r2, w, &c__3, iw, &info); chkxer_("DGEEVX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); nt += 11; } else if (lsamen_(&c__2, c2, "SX")) { /* Test DGEESX */ s_copy(srnamc_1.srnamt, "DGEESX", (ftnlen)6, (ftnlen)6); infoc_1.infot = 1; dgeesx_("X", "N", (L_fp)dslect_, "N", &c__0, a, &c__1, &sdim, wr, wi, vl, &c__1, r1, r2, w, &c__1, iw, &c__1, b, &info); chkxer_("DGEESX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dgeesx_("N", "X", (L_fp)dslect_, "N", &c__0, a, &c__1, &sdim, wr, wi, vl, &c__1, r1, r2, w, &c__1, iw, &c__1, b, &info); chkxer_("DGEESX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 4; dgeesx_("N", "N", (L_fp)dslect_, "X", &c__0, a, &c__1, &sdim, wr, wi, vl, &c__1, r1, r2, w, &c__1, iw, &c__1, b, &info); chkxer_("DGEESX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 5; dgeesx_("N", "N", (L_fp)dslect_, "N", &c_n1, a, &c__1, &sdim, wr, wi, vl, &c__1, r1, r2, w, &c__1, iw, &c__1, b, &info); chkxer_("DGEESX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 7; dgeesx_("N", "N", (L_fp)dslect_, "N", &c__2, a, &c__1, &sdim, wr, wi, vl, &c__1, r1, r2, w, &c__6, iw, &c__1, b, &info); chkxer_("DGEESX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 12; dgeesx_("V", "N", (L_fp)dslect_, "N", &c__2, a, &c__2, &sdim, wr, wi, vl, &c__1, r1, r2, w, &c__6, iw, &c__1, b, &info); chkxer_("DGEESX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 16; dgeesx_("N", "N", (L_fp)dslect_, "N", &c__1, a, &c__1, &sdim, wr, wi, vl, &c__1, r1, r2, w, &c__2, iw, &c__1, b, &info); chkxer_("DGEESX", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); nt += 7; } else if (lsamen_(&c__2, c2, "BD")) { /* Test DGESVD */ s_copy(srnamc_1.srnamt, "DGESVD", (ftnlen)6, (ftnlen)6); infoc_1.infot = 1; dgesvd_("X", "N", &c__0, &c__0, a, &c__1, s, u, &c__1, vt, &c__1, w, & c__1, &info); chkxer_("DGESVD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dgesvd_("N", "X", &c__0, &c__0, a, &c__1, s, u, &c__1, vt, &c__1, w, & c__1, &info); chkxer_("DGESVD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dgesvd_("O", "O", &c__0, &c__0, a, &c__1, s, u, &c__1, vt, &c__1, w, & c__1, &info); chkxer_("DGESVD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 3; dgesvd_("N", "N", &c_n1, &c__0, a, &c__1, s, u, &c__1, vt, &c__1, w, & c__1, &info); chkxer_("DGESVD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 4; dgesvd_("N", "N", &c__0, &c_n1, a, &c__1, s, u, &c__1, vt, &c__1, w, & c__1, &info); chkxer_("DGESVD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 6; dgesvd_("N", "N", &c__2, &c__1, a, &c__1, s, u, &c__1, vt, &c__1, w, & c__5, &info); chkxer_("DGESVD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 9; dgesvd_("A", "N", &c__2, &c__1, a, &c__2, s, u, &c__1, vt, &c__1, w, & c__5, &info); chkxer_("DGESVD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 11; dgesvd_("N", "A", &c__1, &c__2, a, &c__1, s, u, &c__1, vt, &c__1, w, & c__5, &info); chkxer_("DGESVD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); nt += 8; if (infoc_1.ok) { io___24.ciunit = infoc_1.nout; s_wsfe(&io___24); do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6); do_fio(&c__1, (char *)&nt, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___25.ciunit = infoc_1.nout; s_wsfe(&io___25); e_wsfe(); } /* Test DGESDD */ s_copy(srnamc_1.srnamt, "DGESDD", (ftnlen)6, (ftnlen)6); infoc_1.infot = 1; dgesdd_("X", &c__0, &c__0, a, &c__1, s, u, &c__1, vt, &c__1, w, &c__1, iw, &info); chkxer_("DGESDD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 2; dgesdd_("N", &c_n1, &c__0, a, &c__1, s, u, &c__1, vt, &c__1, w, &c__1, iw, &info); chkxer_("DGESDD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 3; dgesdd_("N", &c__0, &c_n1, a, &c__1, s, u, &c__1, vt, &c__1, w, &c__1, iw, &info); chkxer_("DGESDD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 5; dgesdd_("N", &c__2, &c__1, a, &c__1, s, u, &c__1, vt, &c__1, w, &c__5, iw, &info); chkxer_("DGESDD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 8; dgesdd_("A", &c__2, &c__1, a, &c__2, s, u, &c__1, vt, &c__1, w, &c__5, iw, &info); chkxer_("DGESDD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); infoc_1.infot = 10; dgesdd_("A", &c__1, &c__2, a, &c__1, s, u, &c__1, vt, &c__1, w, &c__5, iw, &info); chkxer_("DGESDD", &infoc_1.infot, &infoc_1.nout, &infoc_1.lerr, & infoc_1.ok); nt += -2; if (infoc_1.ok) { io___26.ciunit = infoc_1.nout; s_wsfe(&io___26); do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6); do_fio(&c__1, (char *)&nt, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___27.ciunit = infoc_1.nout; s_wsfe(&io___27); e_wsfe(); } } /* Print a summary line. */ if (! lsamen_(&c__2, c2, "BD")) { if (infoc_1.ok) { io___28.ciunit = infoc_1.nout; s_wsfe(&io___28); do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6); do_fio(&c__1, (char *)&nt, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___29.ciunit = infoc_1.nout; s_wsfe(&io___29); e_wsfe(); } } return 0; /* End of DERRED */ } /* derred_ */
/* Subroutine */ int strt03_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, real *a, integer *lda, real *scale, real *cnorm, real * tscal, real *x, integer *ldx, real *b, integer *ldb, real *work, real *resid) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset, i__1; real r__1, r__2, r__3; /* Local variables */ static integer j; extern logical lsame_(char *, char *); extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static real xscal; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *); static real tnorm, xnorm; extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, real *, integer *), strmv_(char *, char *, char *, integer *, real *, integer *, real *, integer *), slabad_(real *, real *); static integer ix; extern doublereal slamch_(char *); static real bignum; extern integer isamax_(integer *, real *, integer *); static real smlnum, eps, err; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= STRT03 computes the residual for the solution to a scaled triangular system of equations A*x = s*b or A'*x = s*b. Here A is a triangular matrix, A' is the transpose of A, s is a scalar, and x and b are N by NRHS matrices. The test ratio is the maximum over the number of right hand sides of norm(s*b - op(A)*x) / ( norm(op(A)) * norm(x) * EPS ), where op(A) denotes A or A' and EPS is the machine epsilon. Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the matrix A is upper or lower triangular. = 'U': Upper triangular = 'L': Lower triangular TRANS (input) CHARACTER*1 Specifies the operation applied to A. = 'N': A *x = s*b (No transpose) = 'T': A'*x = s*b (Transpose) = 'C': A'*x = s*b (Conjugate transpose = Transpose) DIAG (input) CHARACTER*1 Specifies whether or not the matrix A is unit triangular. = 'N': Non-unit triangular = 'U': Unit triangular N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrices X and B. NRHS >= 0. A (input) REAL array, dimension (LDA,N) The triangular matrix A. If UPLO = 'U', the leading n by n upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading n by n lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If DIAG = 'U', the diagonal elements of A are also not referenced and are assumed to be 1. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). SCALE (input) REAL The scaling factor s used in solving the triangular system. CNORM (input) REAL array, dimension (N) The 1-norms of the columns of A, not counting the diagonal. TSCAL (input) REAL The scaling factor used in computing the 1-norms in CNORM. CNORM actually contains the column norms of TSCAL*A. X (input) REAL array, dimension (LDX,NRHS) The computed solution vectors for the system of linear equations. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). B (input) REAL array, dimension (LDB,NRHS) The right hand side vectors for the system of linear equations. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). WORK (workspace) REAL array, dimension (N) RESID (output) REAL The maximum over the number of right hand sides of norm(op(A)*x - s*b) / ( norm(op(A)) * norm(x) * EPS ). ===================================================================== Quick exit if N = 0 Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --cnorm; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --work; /* Function Body */ if (*n <= 0 || *nrhs <= 0) { *resid = 0.f; return 0; } eps = slamch_("Epsilon"); smlnum = slamch_("Safe minimum"); bignum = 1.f / smlnum; slabad_(&smlnum, &bignum); /* Compute the norm of the triangular matrix A using the column norms already computed by SLATRS. */ tnorm = 0.f; if (lsame_(diag, "N")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ r__2 = tnorm, r__3 = *tscal * (r__1 = a_ref(j, j), dabs(r__1)) + cnorm[j]; tnorm = dmax(r__2,r__3); /* L10: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ r__1 = tnorm, r__2 = *tscal + cnorm[j]; tnorm = dmax(r__1,r__2); /* L20: */ } } /* Compute the maximum over the number of right hand sides of norm(op(A)*x - s*b) / ( norm(op(A)) * norm(x) * EPS ). */ *resid = 0.f; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { scopy_(n, &x_ref(1, j), &c__1, &work[1], &c__1); ix = isamax_(n, &work[1], &c__1); /* Computing MAX */ r__2 = 1.f, r__3 = (r__1 = x_ref(ix, j), dabs(r__1)); xnorm = dmax(r__2,r__3); xscal = 1.f / xnorm / (real) (*n); sscal_(n, &xscal, &work[1], &c__1); strmv_(uplo, trans, diag, n, &a[a_offset], lda, &work[1], &c__1); r__1 = -(*scale) * xscal; saxpy_(n, &r__1, &b_ref(1, j), &c__1, &work[1], &c__1); ix = isamax_(n, &work[1], &c__1); err = *tscal * (r__1 = work[ix], dabs(r__1)); ix = isamax_(n, &x_ref(1, j), &c__1); xnorm = (r__1 = x_ref(ix, j), dabs(r__1)); if (err * smlnum <= xnorm) { if (xnorm > 0.f) { err /= xnorm; } } else { if (err > 0.f) { err = 1.f / eps; } } if (err * smlnum <= tnorm) { if (tnorm > 0.f) { err /= tnorm; } } else { if (err > 0.f) { err = 1.f / eps; } } *resid = dmax(*resid,err); /* L30: */ } return 0; /* End of STRT03 */ } /* strt03_ */
/* Subroutine */ int sgegs_(char *jobvsl, char *jobvsr, integer *n, real *a, integer *lda, real *b, integer *ldb, real *alphar, real *alphai, real *beta, real *vsl, integer *ldvsl, real *vsr, integer *ldvsr, real * work, integer *lwork, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= This routine is deprecated and has been replaced by routine SGGES. SGEGS computes for a pair of N-by-N real nonsymmetric matrices A, B: the generalized eigenvalues (alphar +/- alphai*i, beta), the real Schur form (A, B), and optionally left and/or right Schur vectors (VSL and VSR). (If only the generalized eigenvalues are needed, use the driver SGEGV instead.) A generalized eigenvalue for a pair of matrices (A,B) is, roughly speaking, a scalar w or a ratio alpha/beta = w, such that A - w*B is singular. It is usually represented as the pair (alpha,beta), as there is a reasonable interpretation for beta=0, and even for both being zero. A good beginning reference is the book, "Matrix Computations", by G. Golub & C. van Loan (Johns Hopkins U. Press) The (generalized) Schur form of a pair of matrices is the result of multiplying both matrices on the left by one orthogonal matrix and both on the right by another orthogonal matrix, these two orthogonal matrices being chosen so as to bring the pair of matrices into (real) Schur form. A pair of matrices A, B is in generalized real Schur form if B is upper triangular with non-negative diagonal and A is block upper triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond to real generalized eigenvalues, while 2-by-2 blocks of A will be "standardized" by making the corresponding elements of B have the form: [ a 0 ] [ 0 b ] and the pair of corresponding 2-by-2 blocks in A and B will have a complex conjugate pair of generalized eigenvalues. The left and right Schur vectors are the columns of VSL and VSR, respectively, where VSL and VSR are the orthogonal matrices which reduce A and B to Schur form: Schur form of (A,B) = ( (VSL)**T A (VSR), (VSL)**T B (VSR) ) Arguments ========= JOBVSL (input) CHARACTER*1 = 'N': do not compute the left Schur vectors; = 'V': compute the left Schur vectors. JOBVSR (input) CHARACTER*1 = 'N': do not compute the right Schur vectors; = 'V': compute the right Schur vectors. N (input) INTEGER The order of the matrices A, B, VSL, and VSR. N >= 0. A (input/output) REAL array, dimension (LDA, N) On entry, the first of the pair of matrices whose generalized eigenvalues and (optionally) Schur vectors are to be computed. On exit, the generalized Schur form of A. Note: to avoid overflow, the Frobenius norm of the matrix A should be less than the overflow threshold. LDA (input) INTEGER The leading dimension of A. LDA >= max(1,N). B (input/output) REAL array, dimension (LDB, N) On entry, the second of the pair of matrices whose generalized eigenvalues and (optionally) Schur vectors are to be computed. On exit, the generalized Schur form of B. Note: to avoid overflow, the Frobenius norm of the matrix B should be less than the overflow threshold. LDB (input) INTEGER The leading dimension of B. LDB >= max(1,N). ALPHAR (output) REAL array, dimension (N) ALPHAI (output) REAL array, dimension (N) BETA (output) REAL array, dimension (N) On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i, j=1,...,N and BETA(j),j=1,...,N are the diagonals of the complex Schur form (A,B) that would result if the 2-by-2 diagonal blocks of the real Schur form of (A,B) were further reduced to triangular form using 2-by-2 complex unitary transformations. If ALPHAI(j) is zero, then the j-th eigenvalue is real; if positive, then the j-th and (j+1)-st eigenvalues are a complex conjugate pair, with ALPHAI(j+1) negative. Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) may easily over- or underflow, and BETA(j) may even be zero. Thus, the user should avoid naively computing the ratio alpha/beta. However, ALPHAR and ALPHAI will be always less than and usually comparable with norm(A) in magnitude, and BETA always less than and usually comparable with norm(B). VSL (output) REAL array, dimension (LDVSL,N) If JOBVSL = 'V', VSL will contain the left Schur vectors. (See "Purpose", above.) Not referenced if JOBVSL = 'N'. LDVSL (input) INTEGER The leading dimension of the matrix VSL. LDVSL >=1, and if JOBVSL = 'V', LDVSL >= N. VSR (output) REAL array, dimension (LDVSR,N) If JOBVSR = 'V', VSR will contain the right Schur vectors. (See "Purpose", above.) Not referenced if JOBVSR = 'N'. LDVSR (input) INTEGER The leading dimension of the matrix VSR. LDVSR >= 1, and if JOBVSR = 'V', LDVSR >= N. WORK (workspace/output) REAL array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,4*N). For good performance, LWORK must generally be larger. To compute the optimal value of LWORK, call ILAENV to get blocksizes (for SGEQRF, SORMQR, and SORGQR.) Then compute: NB -- MAX of the blocksizes for SGEQRF, SORMQR, and SORGQR The optimal LWORK is 2*N + N*(NB+1). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. = 1,...,N: The QZ iteration failed. (A,B) are not in Schur form, but ALPHAR(j), ALPHAI(j), and BETA(j) should be correct for j=INFO+1,...,N. > N: errors that usually indicate LAPACK problems: =N+1: error return from SGGBAL =N+2: error return from SGEQRF =N+3: error return from SORMQR =N+4: error return from SORGQR =N+5: error return from SGGHRD =N+6: error return from SHGEQZ (other than failed iteration) =N+7: error return from SGGBAK (computing VSL) =N+8: error return from SGGBAK (computing VSR) =N+9: error return from SLASCL (various places) ===================================================================== Decode the input arguments Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static real c_b36 = 0.f; static real c_b37 = 1.f; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, vsr_dim1, vsr_offset, i__1, i__2; /* Local variables */ static real anrm, bnrm; static integer itau, lopt; extern logical lsame_(char *, char *); static integer ileft, iinfo, icols; static logical ilvsl; static integer iwork; static logical ilvsr; static integer irows, nb; extern /* Subroutine */ int sggbak_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, integer * ), sggbal_(char *, integer *, real *, integer *, real *, integer *, integer *, integer *, real *, real *, real *, integer *); static logical ilascl, ilbscl; extern doublereal slamch_(char *), slange_(char *, integer *, integer *, real *, integer *, real *); static real safmin; extern /* Subroutine */ int sgghrd_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, integer *, real *, integer * , real *, integer *, integer *), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); static real bignum; extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, real *, integer *, integer *, real *, integer *, integer *); static integer ijobvl, iright; extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer *, real *, real *, integer *, integer *); static integer ijobvr; extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, real *, integer *); static real anrmto; static integer lwkmin, nb1, nb2, nb3; static real bnrmto; extern /* Subroutine */ int shgeqz_(char *, char *, char *, integer *, integer *, integer *, real *, integer *, real *, integer *, real * , real *, real *, real *, integer *, real *, integer *, real *, integer *, integer *); static real smlnum; extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real *, integer *, real *, real *, integer *, integer *); static integer lwkopt; static logical lquery; extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, integer *, integer *); static integer ihi, ilo; static real eps; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define vsl_ref(a_1,a_2) vsl[(a_2)*vsl_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --alphar; --alphai; --beta; vsl_dim1 = *ldvsl; vsl_offset = 1 + vsl_dim1 * 1; vsl -= vsl_offset; vsr_dim1 = *ldvsr; vsr_offset = 1 + vsr_dim1 * 1; vsr -= vsr_offset; --work; /* Function Body */ if (lsame_(jobvsl, "N")) { ijobvl = 1; ilvsl = FALSE_; } else if (lsame_(jobvsl, "V")) { ijobvl = 2; ilvsl = TRUE_; } else { ijobvl = -1; ilvsl = FALSE_; } if (lsame_(jobvsr, "N")) { ijobvr = 1; ilvsr = FALSE_; } else if (lsame_(jobvsr, "V")) { ijobvr = 2; ilvsr = TRUE_; } else { ijobvr = -1; ilvsr = FALSE_; } /* Test the input arguments Computing MAX */ i__1 = *n << 2; lwkmin = max(i__1,1); lwkopt = lwkmin; work[1] = (real) lwkopt; lquery = *lwork == -1; *info = 0; if (ijobvl <= 0) { *info = -1; } else if (ijobvr <= 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) { *info = -12; } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) { *info = -14; } else if (*lwork < lwkmin && ! lquery) { *info = -16; } if (*info == 0) { nb1 = ilaenv_(&c__1, "SGEQRF", " ", n, n, &c_n1, &c_n1, (ftnlen)6, ( ftnlen)1); nb2 = ilaenv_(&c__1, "SORMQR", " ", n, n, n, &c_n1, (ftnlen)6, ( ftnlen)1); nb3 = ilaenv_(&c__1, "SORGQR", " ", n, n, n, &c_n1, (ftnlen)6, ( ftnlen)1); /* Computing MAX */ i__1 = max(nb1,nb2); nb = max(i__1,nb3); lopt = (*n << 1) + *n * (nb + 1); work[1] = (real) lopt; } if (*info != 0) { i__1 = -(*info); xerbla_("SGEGS ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Get machine constants */ eps = slamch_("E") * slamch_("B"); safmin = slamch_("S"); smlnum = *n * safmin / eps; bignum = 1.f / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = slange_("M", n, n, &a[a_offset], lda, &work[1]); ilascl = FALSE_; if (anrm > 0.f && anrm < smlnum) { anrmto = smlnum; ilascl = TRUE_; } else if (anrm > bignum) { anrmto = bignum; ilascl = TRUE_; } if (ilascl) { slascl_("G", &c_n1, &c_n1, &anrm, &anrmto, n, n, &a[a_offset], lda, & iinfo); if (iinfo != 0) { *info = *n + 9; return 0; } } /* Scale B if max element outside range [SMLNUM,BIGNUM] */ bnrm = slange_("M", n, n, &b[b_offset], ldb, &work[1]); ilbscl = FALSE_; if (bnrm > 0.f && bnrm < smlnum) { bnrmto = smlnum; ilbscl = TRUE_; } else if (bnrm > bignum) { bnrmto = bignum; ilbscl = TRUE_; } if (ilbscl) { slascl_("G", &c_n1, &c_n1, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & iinfo); if (iinfo != 0) { *info = *n + 9; return 0; } } /* Permute the matrix to make it more nearly triangular Workspace layout: (2*N words -- "work..." not actually used) left_permutation, right_permutation, work... */ ileft = 1; iright = *n + 1; iwork = iright + *n; sggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &work[ ileft], &work[iright], &work[iwork], &iinfo); if (iinfo != 0) { *info = *n + 1; goto L10; } /* Reduce B to triangular form, and initialize VSL and/or VSR Workspace layout: ("work..." must have at least N words) left_permutation, right_permutation, tau, work... */ irows = ihi + 1 - ilo; icols = *n + 1 - ilo; itau = iwork; iwork = itau + irows; i__1 = *lwork + 1 - iwork; sgeqrf_(&irows, &icols, &b_ref(ilo, ilo), ldb, &work[itau], &work[iwork], &i__1, &iinfo); if (iinfo >= 0) { /* Computing MAX */ i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; lwkopt = max(i__1,i__2); } if (iinfo != 0) { *info = *n + 2; goto L10; } i__1 = *lwork + 1 - iwork; sormqr_("L", "T", &irows, &icols, &irows, &b_ref(ilo, ilo), ldb, &work[ itau], &a_ref(ilo, ilo), lda, &work[iwork], &i__1, &iinfo); if (iinfo >= 0) { /* Computing MAX */ i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; lwkopt = max(i__1,i__2); } if (iinfo != 0) { *info = *n + 3; goto L10; } if (ilvsl) { slaset_("Full", n, n, &c_b36, &c_b37, &vsl[vsl_offset], ldvsl); i__1 = irows - 1; i__2 = irows - 1; slacpy_("L", &i__1, &i__2, &b_ref(ilo + 1, ilo), ldb, &vsl_ref(ilo + 1, ilo), ldvsl); i__1 = *lwork + 1 - iwork; sorgqr_(&irows, &irows, &irows, &vsl_ref(ilo, ilo), ldvsl, &work[itau] , &work[iwork], &i__1, &iinfo); if (iinfo >= 0) { /* Computing MAX */ i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; lwkopt = max(i__1,i__2); } if (iinfo != 0) { *info = *n + 4; goto L10; } } if (ilvsr) { slaset_("Full", n, n, &c_b36, &c_b37, &vsr[vsr_offset], ldvsr); } /* Reduce to generalized Hessenberg form */ sgghrd_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &iinfo); if (iinfo != 0) { *info = *n + 5; goto L10; } /* Perform QZ algorithm, computing Schur vectors if desired Workspace layout: ("work..." must have at least 1 word) left_permutation, right_permutation, work... */ iwork = itau; i__1 = *lwork + 1 - iwork; shgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[ b_offset], ldb, &alphar[1], &alphai[1], &beta[1], &vsl[vsl_offset] , ldvsl, &vsr[vsr_offset], ldvsr, &work[iwork], &i__1, &iinfo); if (iinfo >= 0) { /* Computing MAX */ i__1 = lwkopt, i__2 = (integer) work[iwork] + iwork - 1; lwkopt = max(i__1,i__2); } if (iinfo != 0) { if (iinfo > 0 && iinfo <= *n) { *info = iinfo; } else if (iinfo > *n && iinfo <= *n << 1) { *info = iinfo - *n; } else { *info = *n + 6; } goto L10; } /* Apply permutation to VSL and VSR */ if (ilvsl) { sggbak_("P", "L", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsl[ vsl_offset], ldvsl, &iinfo); if (iinfo != 0) { *info = *n + 7; goto L10; } } if (ilvsr) { sggbak_("P", "R", n, &ilo, &ihi, &work[ileft], &work[iright], n, &vsr[ vsr_offset], ldvsr, &iinfo); if (iinfo != 0) { *info = *n + 8; goto L10; } } /* Undo scaling */ if (ilascl) { slascl_("H", &c_n1, &c_n1, &anrmto, &anrm, n, n, &a[a_offset], lda, & iinfo); if (iinfo != 0) { *info = *n + 9; return 0; } slascl_("G", &c_n1, &c_n1, &anrmto, &anrm, n, &c__1, &alphar[1], n, & iinfo); if (iinfo != 0) { *info = *n + 9; return 0; } slascl_("G", &c_n1, &c_n1, &anrmto, &anrm, n, &c__1, &alphai[1], n, & iinfo); if (iinfo != 0) { *info = *n + 9; return 0; } } if (ilbscl) { slascl_("U", &c_n1, &c_n1, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, & iinfo); if (iinfo != 0) { *info = *n + 9; return 0; } slascl_("G", &c_n1, &c_n1, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & iinfo); if (iinfo != 0) { *info = *n + 9; return 0; } } L10: work[1] = (real) lwkopt; return 0; /* End of SGEGS */ } /* sgegs_ */
/* Subroutine */ int zgglse_(integer *m, integer *n, integer *p, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *c__, doublecomplex *d__, doublecomplex *x, doublecomplex *work, integer *lwork, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= ZGGLSE solves the linear equality-constrained least squares (LSE) problem: minimize || c - A*x ||_2 subject to B*x = d where A is an M-by-N matrix, B is a P-by-N matrix, c is a given M-vector, and d is a given P-vector. It is assumed that P <= N <= M+P, and rank(B) = P and rank( ( A ) ) = N. ( ( B ) ) These conditions ensure that the LSE problem has a unique solution, which is obtained using a GRQ factorization of the matrices B and A. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrices A and B. N >= 0. P (input) INTEGER The number of rows of the matrix B. 0 <= P <= N <= M+P. A (input/output) COMPLEX*16 array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, A is destroyed. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). B (input/output) COMPLEX*16 array, dimension (LDB,N) On entry, the P-by-N matrix B. On exit, B is destroyed. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,P). C (input/output) COMPLEX*16 array, dimension (M) On entry, C contains the right hand side vector for the least squares part of the LSE problem. On exit, the residual sum of squares for the solution is given by the sum of squares of elements N-P+1 to M of vector C. D (input/output) COMPLEX*16 array, dimension (P) On entry, D contains the right hand side vector for the constrained equation. On exit, D is destroyed. X (output) COMPLEX*16 array, dimension (N) On exit, X is the solution of the LSE problem. WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,M+N+P). For optimum performance LWORK >= P+min(M,N)+max(M,N)*NB, where NB is an upper bound for the optimal blocksizes for ZGEQRF, CGERQF, ZUNMQR and CUNMRQ. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. ===================================================================== Test the input parameters Parameter adjustments */ /* Table of constant values */ static doublecomplex c_b1 = {1.,0.}; static integer c__1 = 1; static integer c_n1 = -1; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; doublecomplex z__1; /* Local variables */ static integer lopt; extern /* Subroutine */ int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *), zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), zaxpy_(integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *), ztrmv_(char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, integer *), ztrsv_(char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, integer * ); static integer nb, mn, nr; extern /* Subroutine */ int xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern /* Subroutine */ int zggrqf_(integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, integer *) ; static integer nb1, nb2, nb3, nb4, lwkopt; static logical lquery; extern /* Subroutine */ int zunmqr_(char *, char *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmrq_(char *, char *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, integer *); #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --c__; --d__; --x; --work; /* Function Body */ *info = 0; mn = min(*m,*n); nb1 = ilaenv_(&c__1, "ZGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, ( ftnlen)1); nb2 = ilaenv_(&c__1, "ZGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, ( ftnlen)1); nb3 = ilaenv_(&c__1, "ZUNMQR", " ", m, n, p, &c_n1, (ftnlen)6, (ftnlen)1); nb4 = ilaenv_(&c__1, "ZUNMRQ", " ", m, n, p, &c_n1, (ftnlen)6, (ftnlen)1); /* Computing MAX */ i__1 = max(nb1,nb2), i__1 = max(i__1,nb3); nb = max(i__1,nb4); lwkopt = *p + mn + max(*m,*n) * nb; work[1].r = (doublereal) lwkopt, work[1].i = 0.; lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*p < 0 || *p > *n || *p < *n - *m) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else if (*ldb < max(1,*p)) { *info = -7; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 1, i__2 = *m + *n + *p; if (*lwork < max(i__1,i__2) && ! lquery) { *info = -12; } } if (*info != 0) { i__1 = -(*info); xerbla_("ZGGLSE", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Compute the GRQ factorization of matrices B and A: B*Q' = ( 0 T12 ) P Z'*A*Q' = ( R11 R12 ) N-P N-P P ( 0 R22 ) M+P-N N-P P where T12 and R11 are upper triangular, and Q and Z are unitary. */ i__1 = *lwork - *p - mn; zggrqf_(p, m, n, &b[b_offset], ldb, &work[1], &a[a_offset], lda, &work[*p + 1], &work[*p + mn + 1], &i__1, info); i__1 = *p + mn + 1; lopt = (integer) work[i__1].r; /* Update c = Z'*c = ( c1 ) N-P ( c2 ) M+P-N */ i__1 = max(1,*m); i__2 = *lwork - *p - mn; zunmqr_("Left", "Conjugate Transpose", m, &c__1, &mn, &a[a_offset], lda, & work[*p + 1], &c__[1], &i__1, &work[*p + mn + 1], &i__2, info); /* Computing MAX */ i__3 = *p + mn + 1; i__1 = lopt, i__2 = (integer) work[i__3].r; lopt = max(i__1,i__2); /* Solve T12*x2 = d for x2 */ ztrsv_("Upper", "No transpose", "Non unit", p, &b_ref(1, *n - *p + 1), ldb, &d__[1], &c__1); /* Update c1 */ i__1 = *n - *p; z__1.r = -1., z__1.i = 0.; zgemv_("No transpose", &i__1, p, &z__1, &a_ref(1, *n - *p + 1), lda, &d__[ 1], &c__1, &c_b1, &c__[1], &c__1); /* Sovle R11*x1 = c1 for x1 */ i__1 = *n - *p; ztrsv_("Upper", "No transpose", "Non unit", &i__1, &a[a_offset], lda, & c__[1], &c__1); /* Put the solutions in X */ i__1 = *n - *p; zcopy_(&i__1, &c__[1], &c__1, &x[1], &c__1); zcopy_(p, &d__[1], &c__1, &x[*n - *p + 1], &c__1); /* Compute the residual vector: */ if (*m < *n) { nr = *m + *p - *n; i__1 = *n - *m; z__1.r = -1., z__1.i = 0.; zgemv_("No transpose", &nr, &i__1, &z__1, &a_ref(*n - *p + 1, *m + 1), lda, &d__[nr + 1], &c__1, &c_b1, &c__[*n - *p + 1], &c__1); } else { nr = *p; } ztrmv_("Upper", "No transpose", "Non unit", &nr, &a_ref(*n - *p + 1, *n - *p + 1), lda, &d__[1], &c__1); z__1.r = -1., z__1.i = 0.; zaxpy_(&nr, &z__1, &d__[1], &c__1, &c__[*n - *p + 1], &c__1); /* Backward transformation x = Q'*x */ i__1 = *lwork - *p - mn; zunmrq_("Left", "Conjugate Transpose", n, &c__1, p, &b[b_offset], ldb, & work[1], &x[1], n, &work[*p + mn + 1], &i__1, info); /* Computing MAX */ i__4 = *p + mn + 1; i__2 = lopt, i__3 = (integer) work[i__4].r; i__1 = *p + mn + max(i__2,i__3); work[1].r = (doublereal) i__1, work[1].i = 0.; return 0; /* End of ZGGLSE */ } /* zgglse_ */
/* Subroutine */ int cggev_(char *jobvl, char *jobvr, integer *n, complex *a, integer *lda, complex *b, integer *ldb, complex *alpha, complex *beta, complex *vl, integer *ldvl, complex *vr, integer *ldvr, complex * work, integer *lwork, real *rwork, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= CGGEV computes for a pair of N-by-N complex nonsymmetric matrices (A,B), the generalized eigenvalues, and optionally, the left and/or right generalized eigenvectors. A generalized eigenvalue for a pair of matrices (A,B) is a scalar lambda or a ratio alpha/beta = lambda, such that A - lambda*B is singular. It is usually represented as the pair (alpha,beta), as there is a reasonable interpretation for beta=0, and even for both being zero. The right generalized eigenvector v(j) corresponding to the generalized eigenvalue lambda(j) of (A,B) satisfies A * v(j) = lambda(j) * B * v(j). The left generalized eigenvector u(j) corresponding to the generalized eigenvalues lambda(j) of (A,B) satisfies u(j)**H * A = lambda(j) * u(j)**H * B where u(j)**H is the conjugate-transpose of u(j). Arguments ========= JOBVL (input) CHARACTER*1 = 'N': do not compute the left generalized eigenvectors; = 'V': compute the left generalized eigenvectors. JOBVR (input) CHARACTER*1 = 'N': do not compute the right generalized eigenvectors; = 'V': compute the right generalized eigenvectors. N (input) INTEGER The order of the matrices A, B, VL, and VR. N >= 0. A (input/output) COMPLEX array, dimension (LDA, N) On entry, the matrix A in the pair (A,B). On exit, A has been overwritten. LDA (input) INTEGER The leading dimension of A. LDA >= max(1,N). B (input/output) COMPLEX array, dimension (LDB, N) On entry, the matrix B in the pair (A,B). On exit, B has been overwritten. LDB (input) INTEGER The leading dimension of B. LDB >= max(1,N). ALPHA (output) COMPLEX array, dimension (N) BETA (output) COMPLEX array, dimension (N) On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the generalized eigenvalues. Note: the quotients ALPHA(j)/BETA(j) may easily over- or underflow, and BETA(j) may even be zero. Thus, the user should avoid naively computing the ratio alpha/beta. However, ALPHA will be always less than and usually comparable with norm(A) in magnitude, and BETA always less than and usually comparable with norm(B). VL (output) COMPLEX array, dimension (LDVL,N) If JOBVL = 'V', the left generalized eigenvectors u(j) are stored one after another in the columns of VL, in the same order as their eigenvalues. Each eigenvector will be scaled so the largest component will have abs(real part) + abs(imag. part) = 1. Not referenced if JOBVL = 'N'. LDVL (input) INTEGER The leading dimension of the matrix VL. LDVL >= 1, and if JOBVL = 'V', LDVL >= N. VR (output) COMPLEX array, dimension (LDVR,N) If JOBVR = 'V', the right generalized eigenvectors v(j) are stored one after another in the columns of VR, in the same order as their eigenvalues. Each eigenvector will be scaled so the largest component will have abs(real part) + abs(imag. part) = 1. Not referenced if JOBVR = 'N'. LDVR (input) INTEGER The leading dimension of the matrix VR. LDVR >= 1, and if JOBVR = 'V', LDVR >= N. WORK (workspace/output) COMPLEX array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,2*N). For good performance, LWORK must generally be larger. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. RWORK (workspace/output) REAL array, dimension (8*N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. =1,...,N: The QZ iteration failed. No eigenvectors have been calculated, but ALPHA(j) and BETA(j) should be correct for j=INFO+1,...,N. > N: =N+1: other then QZ iteration failed in SHGEQZ, =N+2: error return from STGEVC. ===================================================================== Decode the input arguments Parameter adjustments */ /* Table of constant values */ static complex c_b1 = {0.f,0.f}; static complex c_b2 = {1.f,0.f}; static integer c__1 = 1; static integer c__0 = 0; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2, i__3, i__4; real r__1, r__2, r__3, r__4; complex q__1; /* Builtin functions */ double sqrt(doublereal), r_imag(complex *); /* Local variables */ static real anrm, bnrm; static integer ierr, itau; static real temp; static logical ilvl, ilvr; static integer iwrk; extern logical lsame_(char *, char *); static integer ileft, icols, irwrk, irows, jc; extern /* Subroutine */ int cggbak_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, complex *, integer *, integer *), cggbal_(char *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, real *, real *, real *, integer *), slabad_(real *, real *); static integer in; extern doublereal clange_(char *, integer *, integer *, complex *, integer *, real *); static integer jr; extern /* Subroutine */ int cgghrd_(char *, char *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, integer *), clascl_(char *, integer *, integer *, real *, real *, integer *, integer *, complex *, integer *, integer *); static logical ilascl, ilbscl; extern /* Subroutine */ int cgeqrf_(integer *, integer *, complex *, integer *, complex *, complex *, integer *, integer *), clacpy_( char *, integer *, integer *, complex *, integer *, complex *, integer *), claset_(char *, integer *, integer *, complex *, complex *, complex *, integer *), ctgevc_(char *, char *, logical *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, complex *, real *, integer *), xerbla_(char *, integer *); static logical ldumma[1]; static char chtemp[1]; static real bignum; extern /* Subroutine */ int chgeqz_(char *, char *, char *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, complex *, complex *, complex *, integer *, complex *, integer *, complex *, integer *, real *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern doublereal slamch_(char *); static integer ijobvl, iright, ijobvr; extern /* Subroutine */ int cungqr_(integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, integer *); static real anrmto; static integer lwkmin; static real bnrmto; extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, integer *, integer *); static real smlnum; static integer lwkopt; static logical lquery; static integer ihi, ilo; static real eps; static logical ilv; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define vl_subscr(a_1,a_2) (a_2)*vl_dim1 + a_1 #define vl_ref(a_1,a_2) vl[vl_subscr(a_1,a_2)] #define vr_subscr(a_1,a_2) (a_2)*vr_dim1 + a_1 #define vr_ref(a_1,a_2) vr[vr_subscr(a_1,a_2)] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --alpha; --beta; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1 * 1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1 * 1; vr -= vr_offset; --work; --rwork; /* Function Body */ if (lsame_(jobvl, "N")) { ijobvl = 1; ilvl = FALSE_; } else if (lsame_(jobvl, "V")) { ijobvl = 2; ilvl = TRUE_; } else { ijobvl = -1; ilvl = FALSE_; } if (lsame_(jobvr, "N")) { ijobvr = 1; ilvr = FALSE_; } else if (lsame_(jobvr, "V")) { ijobvr = 2; ilvr = TRUE_; } else { ijobvr = -1; ilvr = FALSE_; } ilv = ilvl || ilvr; /* Test the input arguments */ *info = 0; lquery = *lwork == -1; if (ijobvl <= 0) { *info = -1; } else if (ijobvr <= 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } else if (*ldvl < 1 || ilvl && *ldvl < *n) { *info = -11; } else if (*ldvr < 1 || ilvr && *ldvr < *n) { *info = -13; } /* Compute workspace (Note: Comments in the code beginning "Workspace:" describe the minimal amount of workspace needed at that point in the code, as well as the preferred amount for good performance. NB refers to the optimal block size for the immediately following subroutine, as returned by ILAENV. The workspace is computed assuming ILO = 1 and IHI = N, the worst case.) */ lwkmin = 1; if (*info == 0 && (*lwork >= 1 || lquery)) { lwkopt = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", n, &c__1, n, &c__0, ( ftnlen)6, (ftnlen)1); /* Computing MAX */ i__1 = 1, i__2 = *n << 1; lwkmin = max(i__1,i__2); work[1].r = (real) lwkopt, work[1].i = 0.f; } if (*lwork < lwkmin && ! lquery) { *info = -15; } if (*info != 0) { i__1 = -(*info); xerbla_("CGGEV ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ work[1].r = (real) lwkopt, work[1].i = 0.f; if (*n == 0) { return 0; } /* Get machine constants */ eps = slamch_("E") * slamch_("B"); smlnum = slamch_("S"); bignum = 1.f / smlnum; slabad_(&smlnum, &bignum); smlnum = sqrt(smlnum) / eps; bignum = 1.f / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = clange_("M", n, n, &a[a_offset], lda, &rwork[1]); ilascl = FALSE_; if (anrm > 0.f && anrm < smlnum) { anrmto = smlnum; ilascl = TRUE_; } else if (anrm > bignum) { anrmto = bignum; ilascl = TRUE_; } if (ilascl) { clascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & ierr); } /* Scale B if max element outside range [SMLNUM,BIGNUM] */ bnrm = clange_("M", n, n, &b[b_offset], ldb, &rwork[1]); ilbscl = FALSE_; if (bnrm > 0.f && bnrm < smlnum) { bnrmto = smlnum; ilbscl = TRUE_; } else if (bnrm > bignum) { bnrmto = bignum; ilbscl = TRUE_; } if (ilbscl) { clascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & ierr); } /* Permute the matrices A, B to isolate eigenvalues if possible (Real Workspace: need 6*N) */ ileft = 1; iright = *n + 1; irwrk = iright + *n; cggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &rwork[ ileft], &rwork[iright], &rwork[irwrk], &ierr); /* Reduce B to triangular form (QR decomposition of B) (Complex Workspace: need N, prefer N*NB) */ irows = ihi + 1 - ilo; if (ilv) { icols = *n + 1 - ilo; } else { icols = irows; } itau = 1; iwrk = itau + irows; i__1 = *lwork + 1 - iwrk; cgeqrf_(&irows, &icols, &b_ref(ilo, ilo), ldb, &work[itau], &work[iwrk], & i__1, &ierr); /* Apply the orthogonal transformation to matrix A (Complex Workspace: need N, prefer N*NB) */ i__1 = *lwork + 1 - iwrk; cunmqr_("L", "C", &irows, &icols, &irows, &b_ref(ilo, ilo), ldb, &work[ itau], &a_ref(ilo, ilo), lda, &work[iwrk], &i__1, &ierr); /* Initialize VL (Complex Workspace: need N, prefer N*NB) */ if (ilvl) { claset_("Full", n, n, &c_b1, &c_b2, &vl[vl_offset], ldvl); i__1 = irows - 1; i__2 = irows - 1; clacpy_("L", &i__1, &i__2, &b_ref(ilo + 1, ilo), ldb, &vl_ref(ilo + 1, ilo), ldvl); i__1 = *lwork + 1 - iwrk; cungqr_(&irows, &irows, &irows, &vl_ref(ilo, ilo), ldvl, &work[itau], &work[iwrk], &i__1, &ierr); } /* Initialize VR */ if (ilvr) { claset_("Full", n, n, &c_b1, &c_b2, &vr[vr_offset], ldvr); } /* Reduce to generalized Hessenberg form */ if (ilv) { /* Eigenvectors requested -- work on whole matrix. */ cgghrd_(jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &ierr); } else { cgghrd_("N", "N", &irows, &c__1, &irows, &a_ref(ilo, ilo), lda, & b_ref(ilo, ilo), ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, &ierr); } /* Perform QZ algorithm (Compute eigenvalues, and optionally, the Schur form and Schur vectors) (Complex Workspace: need N) (Real Workspace: need N) */ iwrk = itau; if (ilv) { *(unsigned char *)chtemp = 'S'; } else { *(unsigned char *)chtemp = 'E'; } i__1 = *lwork + 1 - iwrk; chgeqz_(chtemp, jobvl, jobvr, n, &ilo, &ihi, &a[a_offset], lda, &b[ b_offset], ldb, &alpha[1], &beta[1], &vl[vl_offset], ldvl, &vr[ vr_offset], ldvr, &work[iwrk], &i__1, &rwork[irwrk], &ierr); if (ierr != 0) { if (ierr > 0 && ierr <= *n) { *info = ierr; } else if (ierr > *n && ierr <= *n << 1) { *info = ierr - *n; } else { *info = *n + 1; } goto L70; } /* Compute Eigenvectors (Real Workspace: need 2*N) (Complex Workspace: need 2*N) */ if (ilv) { if (ilvl) { if (ilvr) { *(unsigned char *)chtemp = 'B'; } else { *(unsigned char *)chtemp = 'L'; } } else { *(unsigned char *)chtemp = 'R'; } ctgevc_(chtemp, "B", ldumma, n, &a[a_offset], lda, &b[b_offset], ldb, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr, n, &in, &work[ iwrk], &rwork[irwrk], &ierr); if (ierr != 0) { *info = *n + 2; goto L70; } /* Undo balancing on VL and VR and normalization (Workspace: none needed) */ if (ilvl) { cggbak_("P", "L", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, &vl[vl_offset], ldvl, &ierr); i__1 = *n; for (jc = 1; jc <= i__1; ++jc) { temp = 0.f; i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { /* Computing MAX */ i__3 = vl_subscr(jr, jc); r__3 = temp, r__4 = (r__1 = vl[i__3].r, dabs(r__1)) + ( r__2 = r_imag(&vl_ref(jr, jc)), dabs(r__2)); temp = dmax(r__3,r__4); /* L10: */ } if (temp < smlnum) { goto L30; } temp = 1.f / temp; i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { i__3 = vl_subscr(jr, jc); i__4 = vl_subscr(jr, jc); q__1.r = temp * vl[i__4].r, q__1.i = temp * vl[i__4].i; vl[i__3].r = q__1.r, vl[i__3].i = q__1.i; /* L20: */ } L30: ; } } if (ilvr) { cggbak_("P", "R", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, &vr[vr_offset], ldvr, &ierr); i__1 = *n; for (jc = 1; jc <= i__1; ++jc) { temp = 0.f; i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { /* Computing MAX */ i__3 = vr_subscr(jr, jc); r__3 = temp, r__4 = (r__1 = vr[i__3].r, dabs(r__1)) + ( r__2 = r_imag(&vr_ref(jr, jc)), dabs(r__2)); temp = dmax(r__3,r__4); /* L40: */ } if (temp < smlnum) { goto L60; } temp = 1.f / temp; i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { i__3 = vr_subscr(jr, jc); i__4 = vr_subscr(jr, jc); q__1.r = temp * vr[i__4].r, q__1.i = temp * vr[i__4].i; vr[i__3].r = q__1.r, vr[i__3].i = q__1.i; /* L50: */ } L60: ; } } } /* Undo scaling if necessary */ if (ilascl) { clascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alpha[1], n, & ierr); } if (ilbscl) { clascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & ierr); } L70: work[1].r = (real) lwkopt, work[1].i = 0.f; return 0; /* End of CGGEV */ } /* cggev_ */
extern "C" magma_int_t magma_dgeqrf_ooc(magma_int_t m, magma_int_t n, double *a, magma_int_t lda, double *tau, double *work, magma_int_t lwork, magma_int_t *info ) { /* -- MAGMA (version 1.4.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver August 2013 Purpose ======= DGEQRF_OOC computes a QR factorization of a DOUBLE_PRECISION M-by-N matrix A: A = Q * R. This version does not require work space on the GPU passed as input. GPU memory is allocated in the routine. This is an out-of-core (ooc) version that is similar to magma_dgeqrf but the difference is that this version can use a GPU even if the matrix does not fit into the GPU memory at once. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. A (input/output) DOUBLE_PRECISION array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details). Higher performance is achieved if A is in pinned memory, e.g. allocated using magma_malloc_pinned. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). TAU (output) DOUBLE_PRECISION array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details). WORK (workspace/output) DOUBLE_PRECISION array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. Higher performance is achieved if WORK is in pinned memory, e.g. allocated using magma_malloc_pinned. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= N*NB, where NB can be obtained through magma_get_dgeqrf_nb(M). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. Further Details =============== The matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(k), where k = min(m,n). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i). ===================================================================== */ #define a_ref(a_1,a_2) ( a+(a_2)*(lda) + (a_1)) #define da_ref(a_1,a_2) (da+(a_2)*ldda + (a_1)) double *da, *dwork; double c_one = MAGMA_D_ONE; int k, lddwork, ldda; *info = 0; int nb = magma_get_dgeqrf_nb(min(m, n)); int lwkopt = n * nb; work[0] = MAGMA_D_MAKE( (double)lwkopt, 0 ); int lquery = (lwork == -1); if (m < 0) { *info = -1; } else if (n < 0) { *info = -2; } else if (lda < max(1,m)) { *info = -4; } else if (lwork < max(1,n) && ! lquery) { *info = -7; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) return *info; /* Check how much memory do we have */ size_t freeMem, totalMem; cudaMemGetInfo( &freeMem, &totalMem ); freeMem /= sizeof(double); magma_int_t IB, NB = (magma_int_t)(0.8*freeMem/m); NB = (NB / nb) * nb; if (NB >= n) return magma_dgeqrf(m, n, a, lda, tau, work, lwork, info); k = min(m,n); if (k == 0) { work[0] = c_one; return *info; } lddwork = ((NB+31)/32)*32+nb; ldda = ((m+31)/32)*32; if (MAGMA_SUCCESS != magma_dmalloc( &da, (NB + nb)*ldda + nb*lddwork )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } magma_queue_t stream[2]; magma_queue_create( &stream[0] ); magma_queue_create( &stream[1] ); // magmablasSetKernelStream(stream[1]); double *ptr = da + ldda * NB; dwork = da + ldda*(NB + nb); /* start the main loop over the blocks that fit in the GPU memory */ for(int i=0; i<n; i+=NB) { IB = min(n-i, NB); //printf("Processing %5d columns -- %5d to %5d ... \n", IB, i, i+IB); /* 1. Copy the next part of the matrix to the GPU */ magma_dsetmatrix_async( (m), IB, a_ref(0,i), lda, da_ref(0,0), ldda, stream[0] ); magma_queue_sync( stream[0] ); /* 2. Update it with the previous transformations */ for(int j=0; j<min(i,k); j+=nb) { magma_int_t ib = min(k-j, nb); /* Get a panel in ptr. */ // 1. Form the triangular factor of the block reflector // 2. Send it to the GPU. // 3. Put 0s in the upper triangular part of V. // 4. Send V to the GPU in ptr. // 5. Update the matrix. // 6. Restore the upper part of V. magma_int_t rows = m-j; lapackf77_dlarft( MagmaForwardStr, MagmaColumnwiseStr, &rows, &ib, a_ref(j,j), &lda, tau+j, work, &ib); magma_dsetmatrix_async( ib, ib, work, ib, dwork, lddwork, stream[1] ); dpanel_to_q(MagmaUpper, ib, a_ref(j,j), lda, work+ib*ib); magma_dsetmatrix_async( rows, ib, a_ref(j,j), lda, ptr, rows, stream[1] ); magma_queue_sync( stream[1] ); magma_dlarfb_gpu( MagmaLeft, MagmaTrans, MagmaForward, MagmaColumnwise, rows, IB, ib, ptr, rows, dwork, lddwork, da_ref(j, 0), ldda, dwork+ib, lddwork); dq_to_panel(MagmaUpper, ib, a_ref(j,j), lda, work+ib*ib); } /* 3. Do a QR on the current part */ if (i<k) magma_dgeqrf2_gpu(m-i, IB, da_ref(i,0), ldda, tau+i, info); /* 4. Copy the current part back to the CPU */ magma_dgetmatrix_async( (m), IB, da_ref(0,0), ldda, a_ref(0,i), lda, stream[0] ); } magma_queue_sync( stream[0] ); magma_queue_destroy( stream[0] ); magma_queue_destroy( stream[1] ); magma_free( da ); return *info; } /* magma_dgeqrf_ooc */
/* Subroutine */ int dsyevx_(char *jobz, char *range, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *vl, doublereal *vu, integer * il, integer *iu, doublereal *abstol, integer *m, doublereal *w, doublereal *z__, integer *ldz, doublereal *work, integer *lwork, integer *iwork, integer *ifail, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= DSYEVX computes selected eigenvalues and, optionally, eigenvectors of a real symmetric matrix A. Eigenvalues and eigenvectors can be selected by specifying either a range of values or a range of indices for the desired eigenvalues. Arguments ========= JOBZ (input) CHARACTER*1 = 'N': Compute eigenvalues only; = 'V': Compute eigenvalues and eigenvectors. RANGE (input) CHARACTER*1 = 'A': all eigenvalues will be found. = 'V': all eigenvalues in the half-open interval (VL,VU] will be found. = 'I': the IL-th through IU-th eigenvalues will be found. UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) DOUBLE PRECISION array, dimension (LDA, N) On entry, the symmetric matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A. On exit, the lower triangle (if UPLO='L') or the upper triangle (if UPLO='U') of A, including the diagonal, is destroyed. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). VL (input) DOUBLE PRECISION VU (input) DOUBLE PRECISION If RANGE='V', the lower and upper bounds of the interval to be searched for eigenvalues. VL < VU. Not referenced if RANGE = 'A' or 'I'. IL (input) INTEGER IU (input) INTEGER If RANGE='I', the indices (in ascending order) of the smallest and largest eigenvalues to be returned. 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. Not referenced if RANGE = 'A' or 'V'. ABSTOL (input) DOUBLE PRECISION The absolute error tolerance for the eigenvalues. An approximate eigenvalue is accepted as converged when it is determined to lie in an interval [a,b] of width less than or equal to ABSTOL + EPS * max( |a|,|b| ) , where EPS is the machine precision. If ABSTOL is less than or equal to zero, then EPS*|T| will be used in its place, where |T| is the 1-norm of the tridiagonal matrix obtained by reducing A to tridiagonal form. Eigenvalues will be computed most accurately when ABSTOL is set to twice the underflow threshold 2*DLAMCH('S'), not zero. If this routine returns with INFO>0, indicating that some eigenvectors did not converge, try setting ABSTOL to 2*DLAMCH('S'). See "Computing Small Singular Values of Bidiagonal Matrices with Guaranteed High Relative Accuracy," by Demmel and Kahan, LAPACK Working Note #3. M (output) INTEGER The total number of eigenvalues found. 0 <= M <= N. If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. W (output) DOUBLE PRECISION array, dimension (N) On normal exit, the first M elements contain the selected eigenvalues in ascending order. Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M)) If JOBZ = 'V', then if INFO = 0, the first M columns of Z contain the orthonormal eigenvectors of the matrix A corresponding to the selected eigenvalues, with the i-th column of Z holding the eigenvector associated with W(i). If an eigenvector fails to converge, then that column of Z contains the latest approximation to the eigenvector, and the index of the eigenvector is returned in IFAIL. If JOBZ = 'N', then Z is not referenced. Note: the user must ensure that at least max(1,M) columns are supplied in the array Z; if RANGE = 'V', the exact value of M is not known in advance and an upper bound must be used. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= 1, and if JOBZ = 'V', LDZ >= max(1,N). WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The length of the array WORK. LWORK >= max(1,8*N). For optimal efficiency, LWORK >= (NB+3)*N, where NB is the max of the blocksize for DSYTRD and DORMTR returned by ILAENV. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. IWORK (workspace) INTEGER array, dimension (5*N) IFAIL (output) INTEGER array, dimension (N) If JOBZ = 'V', then if INFO = 0, the first M elements of IFAIL are zero. If INFO > 0, then IFAIL contains the indices of the eigenvectors that failed to converge. If JOBZ = 'N', then IFAIL is not referenced. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, then i eigenvectors failed to converge. Their indices are stored in array IFAIL. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; /* System generated locals */ integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer indd, inde; static doublereal anrm; static integer imax; static doublereal rmin, rmax; static integer lopt, itmp1, i__, j, indee; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); static doublereal sigma; extern logical lsame_(char *, char *); static integer iinfo; static char order[1]; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), dswap_(integer *, doublereal *, integer *, doublereal *, integer *); static logical lower, wantz; static integer nb, jj; extern doublereal dlamch_(char *); static logical alleig, indeig; static integer iscale, indibl; static logical valeig; extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); static doublereal safmin; extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern /* Subroutine */ int xerbla_(char *, integer *); static doublereal abstll, bignum; static integer indtau, indisp; extern /* Subroutine */ int dstein_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *), dsterf_(integer *, doublereal *, doublereal *, integer *); static integer indiwo, indwkn; extern doublereal dlansy_(char *, char *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int dstebz_(char *, char *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); static integer indwrk; extern /* Subroutine */ int dorgtr_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), dsteqr_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *), dormtr_(char *, char *, char *, integer *, integer *, doublereal * , integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); static integer llwrkn, llwork, nsplit; static doublereal smlnum; extern /* Subroutine */ int dsytrd_(char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *); static integer lwkopt; static logical lquery; static doublereal eps, vll, vuu, tmp1; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --work; --iwork; --ifail; /* Function Body */ lower = lsame_(uplo, "L"); wantz = lsame_(jobz, "V"); alleig = lsame_(range, "A"); valeig = lsame_(range, "V"); indeig = lsame_(range, "I"); lquery = *lwork == -1; *info = 0; if (! (wantz || lsame_(jobz, "N"))) { *info = -1; } else if (! (alleig || valeig || indeig)) { *info = -2; } else if (! (lower || lsame_(uplo, "U"))) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else { if (valeig) { if (*n > 0 && *vu <= *vl) { *info = -8; } } else if (indeig) { if (*il < 1 || *il > max(1,*n)) { *info = -9; } else if (*iu < min(*n,*il) || *iu > *n) { *info = -10; } } } if (*info == 0) { if (*ldz < 1 || wantz && *ldz < *n) { *info = -15; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 1, i__2 = *n << 3; if (*lwork < max(i__1,i__2) && ! lquery) { *info = -17; } } } if (*info == 0) { nb = ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); /* Computing MAX */ i__1 = nb, i__2 = ilaenv_(&c__1, "DORMTR", uplo, n, &c_n1, &c_n1, & c_n1, (ftnlen)6, (ftnlen)1); nb = max(i__1,i__2); lwkopt = (nb + 3) * *n; work[1] = (doublereal) lwkopt; } if (*info != 0) { i__1 = -(*info); xerbla_("DSYEVX", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ *m = 0; if (*n == 0) { work[1] = 1.; return 0; } if (*n == 1) { work[1] = 7.; if (alleig || indeig) { *m = 1; w[1] = a_ref(1, 1); } else { if (*vl < a_ref(1, 1) && *vu >= a_ref(1, 1)) { *m = 1; w[1] = a_ref(1, 1); } } if (wantz) { z___ref(1, 1) = 1.; } return 0; } /* Get machine constants. */ safmin = dlamch_("Safe minimum"); eps = dlamch_("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = sqrt(smlnum); /* Computing MIN */ d__1 = sqrt(bignum), d__2 = 1. / sqrt(sqrt(safmin)); rmax = min(d__1,d__2); /* Scale matrix to allowable range, if necessary. */ iscale = 0; abstll = *abstol; vll = *vl; vuu = *vu; anrm = dlansy_("M", uplo, n, &a[a_offset], lda, &work[1]); if (anrm > 0. && anrm < rmin) { iscale = 1; sigma = rmin / anrm; } else if (anrm > rmax) { iscale = 1; sigma = rmax / anrm; } if (iscale == 1) { if (lower) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n - j + 1; dscal_(&i__2, &sigma, &a_ref(j, j), &c__1); /* L10: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { dscal_(&j, &sigma, &a_ref(1, j), &c__1); /* L20: */ } } if (*abstol > 0.) { abstll = *abstol * sigma; } if (valeig) { vll = *vl * sigma; vuu = *vu * sigma; } } /* Call DSYTRD to reduce symmetric matrix to tridiagonal form. */ indtau = 1; inde = indtau + *n; indd = inde + *n; indwrk = indd + *n; llwork = *lwork - indwrk + 1; dsytrd_(uplo, n, &a[a_offset], lda, &work[indd], &work[inde], &work[ indtau], &work[indwrk], &llwork, &iinfo); lopt = (integer) (*n * 3 + work[indwrk]); /* If all eigenvalues are desired and ABSTOL is less than or equal to zero, then call DSTERF or DORGTR and SSTEQR. If this fails for some eigenvalue, then try DSTEBZ. */ if ((alleig || indeig && *il == 1 && *iu == *n) && *abstol <= 0.) { dcopy_(n, &work[indd], &c__1, &w[1], &c__1); indee = indwrk + (*n << 1); if (! wantz) { i__1 = *n - 1; dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); dsterf_(n, &w[1], &work[indee], info); } else { dlacpy_("A", n, n, &a[a_offset], lda, &z__[z_offset], ldz); dorgtr_(uplo, n, &z__[z_offset], ldz, &work[indtau], &work[indwrk] , &llwork, &iinfo); i__1 = *n - 1; dcopy_(&i__1, &work[inde], &c__1, &work[indee], &c__1); dsteqr_(jobz, n, &w[1], &work[indee], &z__[z_offset], ldz, &work[ indwrk], info); if (*info == 0) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { ifail[i__] = 0; /* L30: */ } } } if (*info == 0) { *m = *n; goto L40; } *info = 0; } /* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. */ if (wantz) { *(unsigned char *)order = 'B'; } else { *(unsigned char *)order = 'E'; } indibl = 1; indisp = indibl + *n; indiwo = indisp + *n; dstebz_(range, order, n, &vll, &vuu, il, iu, &abstll, &work[indd], &work[ inde], m, &nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[ indwrk], &iwork[indiwo], info); if (wantz) { dstein_(n, &work[indd], &work[inde], m, &w[1], &iwork[indibl], &iwork[ indisp], &z__[z_offset], ldz, &work[indwrk], &iwork[indiwo], & ifail[1], info); /* Apply orthogonal matrix used in reduction to tridiagonal form to eigenvectors returned by DSTEIN. */ indwkn = inde; llwrkn = *lwork - indwkn + 1; dormtr_("L", uplo, "N", n, m, &a[a_offset], lda, &work[indtau], &z__[ z_offset], ldz, &work[indwkn], &llwrkn, &iinfo); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ L40: if (iscale == 1) { if (*info == 0) { imax = *m; } else { imax = *info - 1; } d__1 = 1. / sigma; dscal_(&imax, &d__1, &w[1], &c__1); } /* If eigenvalues are not in order, then sort them, along with eigenvectors. */ if (wantz) { i__1 = *m - 1; for (j = 1; j <= i__1; ++j) { i__ = 0; tmp1 = w[j]; i__2 = *m; for (jj = j + 1; jj <= i__2; ++jj) { if (w[jj] < tmp1) { i__ = jj; tmp1 = w[jj]; } /* L50: */ } if (i__ != 0) { itmp1 = iwork[indibl + i__ - 1]; w[i__] = w[j]; iwork[indibl + i__ - 1] = iwork[indibl + j - 1]; w[j] = tmp1; iwork[indibl + j - 1] = itmp1; dswap_(n, &z___ref(1, i__), &c__1, &z___ref(1, j), &c__1); if (*info != 0) { itmp1 = ifail[i__]; ifail[i__] = ifail[j]; ifail[j] = itmp1; } } /* L60: */ } } /* Set WORK(1) to optimal workspace size. */ work[1] = (doublereal) lwkopt; return 0; /* End of DSYEVX */ } /* dsyevx_ */
/* Subroutine */ int cggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp selctg, char *sense, integer *n, complex *a, integer *lda, complex *b, integer *ldb, integer *sdim, complex *alpha, complex *beta, complex * vsl, integer *ldvsl, complex *vsr, integer *ldvsr, real *rconde, real *rcondv, complex *work, integer *lwork, real *rwork, integer *iwork, integer *liwork, logical *bwork, integer *info) { /* -- LAPACK driver routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= CGGESX computes for a pair of N-by-N complex nonsymmetric matrices (A,B), the generalized eigenvalues, the complex Schur form (S,T), and, optionally, the left and/or right matrices of Schur vectors (VSL and VSR). This gives the generalized Schur factorization (A,B) = ( (VSL) S (VSR)**H, (VSL) T (VSR)**H ) where (VSR)**H is the conjugate-transpose of VSR. Optionally, it also orders the eigenvalues so that a selected cluster of eigenvalues appears in the leading diagonal blocks of the upper triangular matrix S and the upper triangular matrix T; computes a reciprocal condition number for the average of the selected eigenvalues (RCONDE); and computes a reciprocal condition number for the right and left deflating subspaces corresponding to the selected eigenvalues (RCONDV). The leading columns of VSL and VSR then form an orthonormal basis for the corresponding left and right eigenspaces (deflating subspaces). A generalized eigenvalue for a pair of matrices (A,B) is a scalar w or a ratio alpha/beta = w, such that A - w*B is singular. It is usually represented as the pair (alpha,beta), as there is a reasonable interpretation for beta=0 or for both being zero. A pair of matrices (S,T) is in generalized complex Schur form if T is upper triangular with non-negative diagonal and S is upper triangular. Arguments ========= JOBVSL (input) CHARACTER*1 = 'N': do not compute the left Schur vectors; = 'V': compute the left Schur vectors. JOBVSR (input) CHARACTER*1 = 'N': do not compute the right Schur vectors; = 'V': compute the right Schur vectors. SORT (input) CHARACTER*1 Specifies whether or not to order the eigenvalues on the diagonal of the generalized Schur form. = 'N': Eigenvalues are not ordered; = 'S': Eigenvalues are ordered (see SELCTG). SELCTG (input) LOGICAL FUNCTION of two COMPLEX arguments SELCTG must be declared EXTERNAL in the calling subroutine. If SORT = 'N', SELCTG is not referenced. If SORT = 'S', SELCTG is used to select eigenvalues to sort to the top left of the Schur form. Note that a selected complex eigenvalue may no longer satisfy SELCTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since ordering may change the value of complex eigenvalues (especially if the eigenvalue is ill-conditioned), in this case INFO is set to N+3 see INFO below). SENSE (input) CHARACTER Determines which reciprocal condition numbers are computed. = 'N' : None are computed; = 'E' : Computed for average of selected eigenvalues only; = 'V' : Computed for selected deflating subspaces only; = 'B' : Computed for both. If SENSE = 'E', 'V', or 'B', SORT must equal 'S'. N (input) INTEGER The order of the matrices A, B, VSL, and VSR. N >= 0. A (input/output) COMPLEX array, dimension (LDA, N) On entry, the first of the pair of matrices. On exit, A has been overwritten by its generalized Schur form S. LDA (input) INTEGER The leading dimension of A. LDA >= max(1,N). B (input/output) COMPLEX array, dimension (LDB, N) On entry, the second of the pair of matrices. On exit, B has been overwritten by its generalized Schur form T. LDB (input) INTEGER The leading dimension of B. LDB >= max(1,N). SDIM (output) INTEGER If SORT = 'N', SDIM = 0. If SORT = 'S', SDIM = number of eigenvalues (after sorting) for which SELCTG is true. ALPHA (output) COMPLEX array, dimension (N) BETA (output) COMPLEX array, dimension (N) On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the generalized eigenvalues. ALPHA(j) and BETA(j),j=1,...,N are the diagonals of the complex Schur form (S,T). BETA(j) will be non-negative real. Note: the quotients ALPHA(j)/BETA(j) may easily over- or underflow, and BETA(j) may even be zero. Thus, the user should avoid naively computing the ratio alpha/beta. However, ALPHA will be always less than and usually comparable with norm(A) in magnitude, and BETA always less than and usually comparable with norm(B). VSL (output) COMPLEX array, dimension (LDVSL,N) If JOBVSL = 'V', VSL will contain the left Schur vectors. Not referenced if JOBVSL = 'N'. LDVSL (input) INTEGER The leading dimension of the matrix VSL. LDVSL >=1, and if JOBVSL = 'V', LDVSL >= N. VSR (output) COMPLEX array, dimension (LDVSR,N) If JOBVSR = 'V', VSR will contain the right Schur vectors. Not referenced if JOBVSR = 'N'. LDVSR (input) INTEGER The leading dimension of the matrix VSR. LDVSR >= 1, and if JOBVSR = 'V', LDVSR >= N. RCONDE (output) REAL array, dimension ( 2 ) If SENSE = 'E' or 'B', RCONDE(1) and RCONDE(2) contain the reciprocal condition numbers for the average of the selected eigenvalues. Not referenced if SENSE = 'N' or 'V'. RCONDV (output) REAL array, dimension ( 2 ) If SENSE = 'V' or 'B', RCONDV(1) and RCONDV(2) contain the reciprocal condition number for the selected deflating subspaces. Not referenced if SENSE = 'N' or 'E'. WORK (workspace/output) COMPLEX array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= 2*N. If SENSE = 'E', 'V', or 'B', LWORK >= MAX(2*N, 2*SDIM*(N-SDIM)). RWORK (workspace) REAL array, dimension ( 8*N ) Real workspace. IWORK (workspace/output) INTEGER array, dimension (LIWORK) Not referenced if SENSE = 'N'. On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. LIWORK (input) INTEGER The dimension of the array WORK. LIWORK >= N+2. BWORK (workspace) LOGICAL array, dimension (N) Not referenced if SORT = 'N'. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. = 1,...,N: The QZ iteration failed. (A,B) are not in Schur form, but ALPHA(j) and BETA(j) should be correct for j=INFO+1,...,N. > N: =N+1: other than QZ iteration failed in CHGEQZ =N+2: after reordering, roundoff changed values of some complex eigenvalues so that leading eigenvalues in the Generalized Schur form no longer satisfy SELCTG=.TRUE. This could also be caused due to scaling. =N+3: reordering failed in CTGSEN. ===================================================================== Decode the input arguments Parameter adjustments */ /* Table of constant values */ static complex c_b1 = {0.f,0.f}; static complex c_b2 = {1.f,0.f}; static integer c__1 = 1; static integer c__0 = 0; static integer c_n1 = -1; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, vsl_dim1, vsl_offset, vsr_dim1, vsr_offset, i__1, i__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer ijob; static real anrm, bnrm; static integer ierr, itau, iwrk, i__; extern logical lsame_(char *, char *); static integer ileft, icols; static logical cursl, ilvsl, ilvsr; static integer irwrk, irows; extern /* Subroutine */ int cggbak_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, complex *, integer *, integer *), cggbal_(char *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, real *, real *, real *, integer *), slabad_(real *, real *); extern doublereal clange_(char *, integer *, integer *, complex *, integer *, real *); static real pl; extern /* Subroutine */ int cgghrd_(char *, char *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, integer *), clascl_(char *, integer *, integer *, real *, real *, integer *, integer *, complex *, integer *, integer *); static real pr; static logical ilascl, ilbscl; extern /* Subroutine */ int cgeqrf_(integer *, integer *, complex *, integer *, complex *, complex *, integer *, integer *), clacpy_( char *, integer *, integer *, complex *, integer *, complex *, integer *), claset_(char *, integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern doublereal slamch_(char *); static real bignum; extern /* Subroutine */ int chgeqz_(char *, char *, char *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, complex *, complex *, complex *, integer *, complex *, integer *, complex *, integer *, real *, integer *), ctgsen_(integer *, logical *, logical *, logical *, integer *, complex *, integer *, complex *, integer *, complex *, complex *, complex *, integer *, complex *, integer *, integer *, real *, real *, real *, complex *, integer *, integer *, integer *, integer *); static integer ijobvl, iright, ijobvr; static logical wantsb; static integer liwmin; static logical wantse, lastsl; static real anrmto, bnrmto; extern /* Subroutine */ int cungqr_(integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, integer *); static integer minwrk, maxwrk; static logical wantsn; static real smlnum; extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, integer *, integer *); static logical wantst, wantsv; static real dif[2]; static integer ihi, ilo; static real eps; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define vsl_subscr(a_1,a_2) (a_2)*vsl_dim1 + a_1 #define vsl_ref(a_1,a_2) vsl[vsl_subscr(a_1,a_2)] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --alpha; --beta; vsl_dim1 = *ldvsl; vsl_offset = 1 + vsl_dim1 * 1; vsl -= vsl_offset; vsr_dim1 = *ldvsr; vsr_offset = 1 + vsr_dim1 * 1; vsr -= vsr_offset; --rconde; --rcondv; --work; --rwork; --iwork; --bwork; /* Function Body */ if (lsame_(jobvsl, "N")) { ijobvl = 1; ilvsl = FALSE_; } else if (lsame_(jobvsl, "V")) { ijobvl = 2; ilvsl = TRUE_; } else { ijobvl = -1; ilvsl = FALSE_; } if (lsame_(jobvsr, "N")) { ijobvr = 1; ilvsr = FALSE_; } else if (lsame_(jobvsr, "V")) { ijobvr = 2; ilvsr = TRUE_; } else { ijobvr = -1; ilvsr = FALSE_; } wantst = lsame_(sort, "S"); wantsn = lsame_(sense, "N"); wantse = lsame_(sense, "E"); wantsv = lsame_(sense, "V"); wantsb = lsame_(sense, "B"); if (wantsn) { ijob = 0; iwork[1] = 1; } else if (wantse) { ijob = 1; } else if (wantsv) { ijob = 2; } else if (wantsb) { ijob = 4; } /* Test the input arguments */ *info = 0; if (ijobvl <= 0) { *info = -1; } else if (ijobvr <= 0) { *info = -2; } else if (! wantst && ! lsame_(sort, "N")) { *info = -3; } else if (! (wantsn || wantse || wantsv || wantsb) || ! wantst && ! wantsn) { *info = -5; } else if (*n < 0) { *info = -6; } else if (*lda < max(1,*n)) { *info = -8; } else if (*ldb < max(1,*n)) { *info = -10; } else if (*ldvsl < 1 || ilvsl && *ldvsl < *n) { *info = -15; } else if (*ldvsr < 1 || ilvsr && *ldvsr < *n) { *info = -17; } /* Compute workspace (Note: Comments in the code beginning "Workspace:" describe the minimal amount of workspace needed at that point in the code, as well as the preferred amount for good performance. NB refers to the optimal block size for the immediately following subroutine, as returned by ILAENV.) */ minwrk = 1; if (*info == 0 && *lwork >= 1) { /* Computing MAX */ i__1 = 1, i__2 = *n << 1; minwrk = max(i__1,i__2); maxwrk = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", n, &c__1, n, &c__0, ( ftnlen)6, (ftnlen)1); if (ilvsl) { /* Computing MAX */ i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "CUNGQR", " ", n, & c__1, n, &c_n1, (ftnlen)6, (ftnlen)1); maxwrk = max(i__1,i__2); } work[1].r = (real) maxwrk, work[1].i = 0.f; } if (! wantsn) { liwmin = *n + 2; } else { liwmin = 1; } iwork[1] = liwmin; if (*info == 0 && *lwork < minwrk) { *info = -21; } else if (*info == 0 && ijob >= 1) { if (*liwork < liwmin) { *info = -24; } } if (*info != 0) { i__1 = -(*info); xerbla_("CGGESX", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { *sdim = 0; return 0; } /* Get machine constants */ eps = slamch_("P"); smlnum = slamch_("S"); bignum = 1.f / smlnum; slabad_(&smlnum, &bignum); smlnum = sqrt(smlnum) / eps; bignum = 1.f / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = clange_("M", n, n, &a[a_offset], lda, &rwork[1]); ilascl = FALSE_; if (anrm > 0.f && anrm < smlnum) { anrmto = smlnum; ilascl = TRUE_; } else if (anrm > bignum) { anrmto = bignum; ilascl = TRUE_; } if (ilascl) { clascl_("G", &c__0, &c__0, &anrm, &anrmto, n, n, &a[a_offset], lda, & ierr); } /* Scale B if max element outside range [SMLNUM,BIGNUM] */ bnrm = clange_("M", n, n, &b[b_offset], ldb, &rwork[1]); ilbscl = FALSE_; if (bnrm > 0.f && bnrm < smlnum) { bnrmto = smlnum; ilbscl = TRUE_; } else if (bnrm > bignum) { bnrmto = bignum; ilbscl = TRUE_; } if (ilbscl) { clascl_("G", &c__0, &c__0, &bnrm, &bnrmto, n, n, &b[b_offset], ldb, & ierr); } /* Permute the matrix to make it more nearly triangular (Real Workspace: need 6*N) */ ileft = 1; iright = *n + 1; irwrk = iright + *n; cggbal_("P", n, &a[a_offset], lda, &b[b_offset], ldb, &ilo, &ihi, &rwork[ ileft], &rwork[iright], &rwork[irwrk], &ierr); /* Reduce B to triangular form (QR decomposition of B) (Complex Workspace: need N, prefer N*NB) */ irows = ihi + 1 - ilo; icols = *n + 1 - ilo; itau = 1; iwrk = itau + irows; i__1 = *lwork + 1 - iwrk; cgeqrf_(&irows, &icols, &b_ref(ilo, ilo), ldb, &work[itau], &work[iwrk], & i__1, &ierr); /* Apply the unitary transformation to matrix A (Complex Workspace: need N, prefer N*NB) */ i__1 = *lwork + 1 - iwrk; cunmqr_("L", "C", &irows, &icols, &irows, &b_ref(ilo, ilo), ldb, &work[ itau], &a_ref(ilo, ilo), lda, &work[iwrk], &i__1, &ierr); /* Initialize VSL (Complex Workspace: need N, prefer N*NB) */ if (ilvsl) { claset_("Full", n, n, &c_b1, &c_b2, &vsl[vsl_offset], ldvsl); i__1 = irows - 1; i__2 = irows - 1; clacpy_("L", &i__1, &i__2, &b_ref(ilo + 1, ilo), ldb, &vsl_ref(ilo + 1, ilo), ldvsl); i__1 = *lwork + 1 - iwrk; cungqr_(&irows, &irows, &irows, &vsl_ref(ilo, ilo), ldvsl, &work[itau] , &work[iwrk], &i__1, &ierr); } /* Initialize VSR */ if (ilvsr) { claset_("Full", n, n, &c_b1, &c_b2, &vsr[vsr_offset], ldvsr); } /* Reduce to generalized Hessenberg form (Workspace: none needed) */ cgghrd_(jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[b_offset], ldb, &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, &ierr); *sdim = 0; /* Perform QZ algorithm, computing Schur vectors if desired (Complex Workspace: need N) (Real Workspace: need N) */ iwrk = itau; i__1 = *lwork + 1 - iwrk; chgeqz_("S", jobvsl, jobvsr, n, &ilo, &ihi, &a[a_offset], lda, &b[ b_offset], ldb, &alpha[1], &beta[1], &vsl[vsl_offset], ldvsl, & vsr[vsr_offset], ldvsr, &work[iwrk], &i__1, &rwork[irwrk], &ierr); if (ierr != 0) { if (ierr > 0 && ierr <= *n) { *info = ierr; } else if (ierr > *n && ierr <= *n << 1) { *info = ierr - *n; } else { *info = *n + 1; } goto L40; } /* Sort eigenvalues ALPHA/BETA and compute the reciprocal of condition number(s) */ if (wantst) { /* Undo scaling on eigenvalues before SELCTGing */ if (ilascl) { clascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alpha[1], n, &ierr); } if (ilbscl) { clascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, &ierr); } /* Select eigenvalues */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { bwork[i__] = (*selctg)(&alpha[i__], &beta[i__]); /* L10: */ } /* Reorder eigenvalues, transform Generalized Schur vectors, and compute reciprocal condition numbers (Complex Workspace: If IJOB >= 1, need MAX(1, 2*SDIM*(N-SDIM)) otherwise, need 1 ) */ i__1 = *lwork - iwrk + 1; ctgsen_(&ijob, &ilvsl, &ilvsr, &bwork[1], n, &a[a_offset], lda, &b[ b_offset], ldb, &alpha[1], &beta[1], &vsl[vsl_offset], ldvsl, &vsr[vsr_offset], ldvsr, sdim, &pl, &pr, dif, &work[iwrk], & i__1, &iwork[1], liwork, &ierr); if (ijob >= 1) { /* Computing MAX */ i__1 = maxwrk, i__2 = (*sdim << 1) * (*n - *sdim); maxwrk = max(i__1,i__2); } if (ierr == -21) { /* not enough complex workspace */ *info = -21; } else { rconde[1] = pl; rconde[2] = pl; rcondv[1] = dif[0]; rcondv[2] = dif[1]; if (ierr == 1) { *info = *n + 3; } } } /* Apply permutation to VSL and VSR (Workspace: none needed) */ if (ilvsl) { cggbak_("P", "L", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, & vsl[vsl_offset], ldvsl, &ierr); } if (ilvsr) { cggbak_("P", "R", n, &ilo, &ihi, &rwork[ileft], &rwork[iright], n, & vsr[vsr_offset], ldvsr, &ierr); } /* Undo scaling */ if (ilascl) { clascl_("U", &c__0, &c__0, &anrmto, &anrm, n, n, &a[a_offset], lda, & ierr); clascl_("G", &c__0, &c__0, &anrmto, &anrm, n, &c__1, &alpha[1], n, & ierr); } if (ilbscl) { clascl_("U", &c__0, &c__0, &bnrmto, &bnrm, n, n, &b[b_offset], ldb, & ierr); clascl_("G", &c__0, &c__0, &bnrmto, &bnrm, n, &c__1, &beta[1], n, & ierr); } /* L20: */ if (wantst) { /* Check if reordering is correct */ lastsl = TRUE_; *sdim = 0; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { cursl = (*selctg)(&alpha[i__], &beta[i__]); if (cursl) { ++(*sdim); } if (cursl && ! lastsl) { *info = *n + 2; } lastsl = cursl; /* L30: */ } } L40: work[1].r = (real) maxwrk, work[1].i = 0.f; iwork[1] = liwmin; return 0; /* End of CGGESX */ } /* cggesx_ */
/* Subroutine */ HYPRE_Int dormbr_(const char *vect,const char *side,const char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work, integer *lwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= If VECT = 'Q', DORMBR overwrites the general real M-by-N matrix C with SIDE = 'L' SIDE = 'R' TRANS = 'N': Q * C C * Q TRANS = 'T': Q**T * C C * Q**T If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C with SIDE = 'L' SIDE = 'R' TRANS = 'N': P * C C * P TRANS = 'T': P**T * C C * P**T Here Q and P**T are the orthogonal matrices determined by DGEBRD when reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and P**T are defined as products of elementary reflectors H(i) and G(i) respectively. Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the order of the orthogonal matrix Q or P**T that is applied. If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: if nq >= k, Q = H(1) H(2) . . . H(k); if nq < k, Q = H(1) H(2) . . . H(nq-1). If VECT = 'P', A is assumed to have been a K-by-NQ matrix: if k < nq, P = G(1) G(2) . . . G(k); if k >= nq, P = G(1) G(2) . . . G(nq-1). Arguments ========= VECT (input) CHARACTER*1 = 'Q': apply Q or Q**T; = 'P': apply P or P**T. SIDE (input) CHARACTER*1 = 'L': apply Q, Q**T, P or P**T from the Left; = 'R': apply Q, Q**T, P or P**T from the Right. TRANS (input) CHARACTER*1 = 'N': No transpose, apply Q or P; = 'T': Transpose, apply Q**T or P**T. M (input) INTEGER The number of rows of the matrix C. M >= 0. N (input) INTEGER The number of columns of the matrix C. N >= 0. K (input) INTEGER If VECT = 'Q', the number of columns in the original matrix reduced by DGEBRD. If VECT = 'P', the number of rows in the original matrix reduced by DGEBRD. K >= 0. A (input) DOUBLE PRECISION array, dimension (LDA,min(nq,K)) if VECT = 'Q' (LDA,nq) if VECT = 'P' The vectors which define the elementary reflectors H(i) and G(i), whose products determine the matrices Q and P, as returned by DGEBRD. LDA (input) INTEGER The leading dimension of the array A. If VECT = 'Q', LDA >= max(1,nq); if VECT = 'P', LDA >= max(1,min(nq,K)). TAU (input) DOUBLE PRECISION array, dimension (min(nq,K)) TAU(i) must contain the scalar factor of the elementary reflector H(i) or G(i) which determines Q or P, as returned by DGEBRD in the array argument TAUQ or TAUP. C (input/output) DOUBLE PRECISION array, dimension (LDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q or P*C or P**T*C or C*P or C*P**T. LDC (input) INTEGER The leading dimension of the array C. LDC >= max(1,M). WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. If SIDE = 'L', LWORK >= max(1,N); if SIDE = 'R', LWORK >= max(1,M). For optimum performance LWORK >= N*NB if SIDE = 'L', and LWORK >= M*NB if SIDE = 'R', where NB is the optimal blocksize. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input arguments Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__2 = 2; /* System generated locals */ address a__1[2]; integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2]; char ch__1[2]; /* Builtin functions Subroutine */ HYPRE_Int s_cat(char *, char **, integer *, integer *, ftnlen); /* Local variables */ static logical left; extern logical lsame_(const char *,const char *); static integer iinfo, i1, i2, nb, mi, ni, nq, nw; extern /* Subroutine */ HYPRE_Int xerbla_(const char *, integer *); extern integer ilaenv_(integer *,const char *,const char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern /* Subroutine */ HYPRE_Int dormlq_(const char *,const char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); static logical notran; extern /* Subroutine */ HYPRE_Int dormqr_(const char *,const char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); static logical applyq; static char transt[1]; static integer lwkopt; static logical lquery; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --tau; c_dim1 = *ldc; c_offset = 1 + c_dim1 * 1; c__ -= c_offset; --work; /* Function Body */ *info = 0; applyq = lsame_(vect, "Q"); left = lsame_(side, "L"); notran = lsame_(trans, "N"); lquery = *lwork == -1; /* NQ is the order of Q or P and NW is the minimum dimension of WORK */ if (left) { nq = *m; nw = *n; } else { nq = *n; nw = *m; } if (! applyq && ! lsame_(vect, "P")) { *info = -1; } else if (! left && ! lsame_(side, "R")) { *info = -2; } else if (! notran && ! lsame_(trans, "T")) { *info = -3; } else if (*m < 0) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*k < 0) { *info = -6; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 1, i__2 = min(nq,*k); if (((applyq) && (*lda < max(1,nq))) || ((! applyq) && (*lda < max(i__1,i__2)))) { *info = -8; } else if (*ldc < max(1,*m)) { *info = -11; } else if (*lwork < max(1,nw) && ! lquery) { *info = -13; } } if (*info == 0) { if (applyq) { if (left) { /* Writing concatenation */ i__3[0] = 1, a__1[0] = (char*)side; i__3[1] = 1, a__1[1] = (char*)trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = *m - 1; i__2 = *m - 1; nb = ilaenv_(&c__1, "DORMQR", ch__1, &i__1, n, &i__2, &c_n1, ( ftnlen)6, (ftnlen)2); } else { /* Writing concatenation */ i__3[0] = 1, a__1[0] = (char*)side; i__3[1] = 1, a__1[1] = (char*)trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = *n - 1; i__2 = *n - 1; nb = ilaenv_(&c__1, "DORMQR", ch__1, m, &i__1, &i__2, &c_n1, ( ftnlen)6, (ftnlen)2); } } else { if (left) { /* Writing concatenation */ i__3[0] = 1, a__1[0] = (char*)side; i__3[1] = 1, a__1[1] = (char*)trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = *m - 1; i__2 = *m - 1; nb = ilaenv_(&c__1, "DORMLQ", ch__1, &i__1, n, &i__2, &c_n1, ( ftnlen)6, (ftnlen)2); } else { /* Writing concatenation */ i__3[0] = 1, a__1[0] = (char*)side; i__3[1] = 1, a__1[1] = (char*)trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = *n - 1; i__2 = *n - 1; nb = ilaenv_(&c__1, "DORMLQ", ch__1, m, &i__1, &i__2, &c_n1, ( ftnlen)6, (ftnlen)2); } } lwkopt = max(1,nw) * nb; work[1] = (doublereal) lwkopt; } if (*info != 0) { i__1 = -(*info); xerbla_("DORMBR", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ work[1] = 1.; if (*m == 0 || *n == 0) { return 0; } if (applyq) { /* Apply Q */ if (nq >= *k) { /* Q was determined by a call to DGEBRD with nq >= k */ dormqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ c_offset], ldc, &work[1], lwork, &iinfo); } else if (nq > 1) { /* Q was determined by a call to DGEBRD with nq < k */ if (left) { mi = *m - 1; ni = *n; i1 = 2; i2 = 1; } else { mi = *m; ni = *n - 1; i1 = 1; i2 = 2; } i__1 = nq - 1; dormqr_(side, trans, &mi, &ni, &i__1, &a_ref(2, 1), lda, &tau[1], &c___ref(i1, i2), ldc, &work[1], lwork, &iinfo); } } else { /* Apply P */ if (notran) { *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transt = 'N'; } if (nq > *k) { /* P was determined by a call to DGEBRD with nq > k */ dormlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[ c_offset], ldc, &work[1], lwork, &iinfo); } else if (nq > 1) { /* P was determined by a call to DGEBRD with nq <= k */ if (left) { mi = *m - 1; ni = *n; i1 = 2; i2 = 1; } else { mi = *m; ni = *n - 1; i1 = 1; i2 = 2; } i__1 = nq - 1; dormlq_(side, transt, &mi, &ni, &i__1, &a_ref(1, 2), lda, &tau[1], &c___ref(i1, i2), ldc, &work[1], lwork, &iinfo); } } work[1] = (doublereal) lwkopt; return 0; /* End of DORMBR */ } /* dormbr_ */
/* Subroutine */ int dpotf2_(char *uplo, integer *n, doublereal *a, integer * lda, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= DPOTF2 computes the Cholesky factorization of a real symmetric positive definite matrix A. The factorization has the form A = U' * U , if UPLO = 'U', or A = L * L', if UPLO = 'L', where U is an upper triangular matrix and L is lower triangular. This is the unblocked version of the algorithm, calling Level 2 BLAS. Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the upper or lower triangular part of the symmetric matrix A is stored. = 'U': Upper triangular = 'L': Lower triangular N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) DOUBLE PRECISION array, dimension (LDA,N) On entry, the symmetric matrix A. If UPLO = 'U', the leading n by n upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading n by n lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if INFO = 0, the factor U or L from the Cholesky factorization A = U'*U or A = L*L'. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -k, the k-th argument had an illegal value > 0: if INFO = k, the leading minor of order k is not positive definite, and the factorization could not be completed. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static doublereal c_b10 = -1.; static doublereal c_b12 = 1.; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, integer *); static integer j; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); extern logical lsame_(char *, char *); extern /* Subroutine */ int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); static logical upper; extern /* Subroutine */ int xerbla_(char *, integer *); static doublereal ajj; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); xerbla_("DPOTF2", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (upper) { /* Compute the Cholesky factorization A = U'*U. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Compute U(J,J) and test for non-positive-definiteness. */ i__2 = j - 1; ajj = a_ref(j, j) - ddot_(&i__2, &a_ref(1, j), &c__1, &a_ref(1, j) , &c__1); if (ajj <= 0.) { a_ref(j, j) = ajj; goto L30; } ajj = sqrt(ajj); a_ref(j, j) = ajj; /* Compute elements J+1:N of row J. */ if (j < *n) { i__2 = j - 1; i__3 = *n - j; dgemv_("Transpose", &i__2, &i__3, &c_b10, &a_ref(1, j + 1), lda, &a_ref(1, j), &c__1, &c_b12, &a_ref(j, j + 1), lda); i__2 = *n - j; d__1 = 1. / ajj; dscal_(&i__2, &d__1, &a_ref(j, j + 1), lda); } /* L10: */ } } else { /* Compute the Cholesky factorization A = L*L'. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Compute L(J,J) and test for non-positive-definiteness. */ i__2 = j - 1; ajj = a_ref(j, j) - ddot_(&i__2, &a_ref(j, 1), lda, &a_ref(j, 1), lda); if (ajj <= 0.) { a_ref(j, j) = ajj; goto L30; } ajj = sqrt(ajj); a_ref(j, j) = ajj; /* Compute elements J+1:N of column J. */ if (j < *n) { i__2 = *n - j; i__3 = j - 1; dgemv_("No transpose", &i__2, &i__3, &c_b10, &a_ref(j + 1, 1), lda, &a_ref(j, 1), lda, &c_b12, &a_ref(j + 1, j), & c__1); i__2 = *n - j; d__1 = 1. / ajj; dscal_(&i__2, &d__1, &a_ref(j + 1, j), &c__1); } /* L20: */ } } goto L40; L30: *info = j; L40: return 0; /* End of DPOTF2 */ } /* dpotf2_ */
/* Subroutine */ int sorgr2_(integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *work, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= SORGR2 generates an m by n real matrix Q with orthonormal rows, which is defined as the last m rows of a product of k elementary reflectors of order n Q = H(1) H(2) . . . H(k) as returned by SGERQF. Arguments ========= M (input) INTEGER The number of rows of the matrix Q. M >= 0. N (input) INTEGER The number of columns of the matrix Q. N >= M. K (input) INTEGER The number of elementary reflectors whose product defines the matrix Q. M >= K >= 0. A (input/output) REAL array, dimension (LDA,N) On entry, the (m-k+i)-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by SGERQF in the last k rows of its array argument A. On exit, the m by n matrix Q. LDA (input) INTEGER The first dimension of the array A. LDA >= max(1,M). TAU (input) REAL array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by SGERQF. WORK (workspace) REAL array, dimension (M) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument has an illegal value ===================================================================== Test the input arguments Parameter adjustments */ /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; real r__1; /* Local variables */ static integer i__, j, l; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), slarf_(char *, integer *, integer *, real *, integer *, real *, real *, integer *, real *); static integer ii; extern /* Subroutine */ int xerbla_(char *, integer *); #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < *m) { *info = -2; } else if (*k < 0 || *k > *m) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } if (*info != 0) { i__1 = -(*info); xerbla_("SORGR2", &i__1); return 0; } /* Quick return if possible */ if (*m <= 0) { return 0; } if (*k < *m) { /* Initialise rows 1:m-k to rows of the unit matrix */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m - *k; for (l = 1; l <= i__2; ++l) { a_ref(l, j) = 0.f; /* L10: */ } if (j > *n - *m && j <= *n - *k) { a_ref(*m - *n + j, j) = 1.f; } /* L20: */ } } i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { ii = *m - *k + i__; /* Apply H(i) to A(1:m-k+i,1:n-k+i) from the right */ a_ref(ii, *n - *m + ii) = 1.f; i__2 = ii - 1; i__3 = *n - *m + ii; slarf_("Right", &i__2, &i__3, &a_ref(ii, 1), lda, &tau[i__], &a[ a_offset], lda, &work[1]); i__2 = *n - *m + ii - 1; r__1 = -tau[i__]; sscal_(&i__2, &r__1, &a_ref(ii, 1), lda); a_ref(ii, *n - *m + ii) = 1.f - tau[i__]; /* Set A(m-k+i,n-k+i+1:n) to zero */ i__2 = *n; for (l = *n - *m + ii + 1; l <= i__2; ++l) { a_ref(ii, l) = 0.f; /* L30: */ } /* L40: */ } return 0; /* End of SORGR2 */ } /* sorgr2_ */
/* Subroutine */ int stbmv_(char *uplo, char *trans, char *diag, integer *n, integer *k, real *a, integer *lda, real *x, integer *incx) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ static integer info; static real temp; static integer i__, j, l; extern logical lsame_(char *, char *); static integer kplus1, ix, jx, kx; extern /* Subroutine */ int xerbla_(char *, integer *); static logical nounit; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] /* Purpose ======= STBMV performs one of the matrix-vector operations x := A*x, or x := A'*x, where x is an n element vector and A is an n by n unit, or non-unit, upper or lower triangular band matrix, with ( k + 1 ) diagonals. Parameters ========== UPLO - CHARACTER*1. On entry, UPLO specifies whether the matrix is an upper or lower triangular matrix as follows: UPLO = 'U' or 'u' A is an upper triangular matrix. UPLO = 'L' or 'l' A is a lower triangular matrix. Unchanged on exit. TRANS - CHARACTER*1. On entry, TRANS specifies the operation to be performed as follows: TRANS = 'N' or 'n' x := A*x. TRANS = 'T' or 't' x := A'*x. TRANS = 'C' or 'c' x := A'*x. Unchanged on exit. DIAG - CHARACTER*1. On entry, DIAG specifies whether or not A is unit triangular as follows: DIAG = 'U' or 'u' A is assumed to be unit triangular. DIAG = 'N' or 'n' A is not assumed to be unit triangular. Unchanged on exit. N - INTEGER. On entry, N specifies the order of the matrix A. N must be at least zero. Unchanged on exit. K - INTEGER. On entry with UPLO = 'U' or 'u', K specifies the number of super-diagonals of the matrix A. On entry with UPLO = 'L' or 'l', K specifies the number of sub-diagonals of the matrix A. K must satisfy 0 .le. K. Unchanged on exit. A - REAL array of DIMENSION ( LDA, n ). Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) by n part of the array A must contain the upper triangular band part of the matrix of coefficients, supplied column by column, with the leading diagonal of the matrix in row ( k + 1 ) of the array, the first super-diagonal starting at position 2 in row k, and so on. The top left k by k triangle of the array A is not referenced. The following program segment will transfer an upper triangular band matrix from conventional full matrix storage to band storage: DO 20, J = 1, N M = K + 1 - J DO 10, I = MAX( 1, J - K ), J A( M + I, J ) = matrix( I, J ) 10 CONTINUE 20 CONTINUE Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) by n part of the array A must contain the lower triangular band part of the matrix of coefficients, supplied column by column, with the leading diagonal of the matrix in row 1 of the array, the first sub-diagonal starting at position 1 in row 2, and so on. The bottom right k by k triangle of the array A is not referenced. The following program segment will transfer a lower triangular band matrix from conventional full matrix storage to band storage: DO 20, J = 1, N M = 1 - J DO 10, I = J, MIN( N, J + K ) A( M + I, J ) = matrix( I, J ) 10 CONTINUE 20 CONTINUE Note that when DIAG = 'U' or 'u' the elements of the array A corresponding to the diagonal elements of the matrix are not referenced, but are assumed to be unity. Unchanged on exit. LDA - INTEGER. On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. LDA must be at least ( k + 1 ). Unchanged on exit. X - REAL array of dimension at least ( 1 + ( n - 1 )*abs( INCX ) ). Before entry, the incremented array X must contain the n element vector x. On exit, X is overwritten with the tranformed vector x. INCX - INTEGER. On entry, INCX specifies the increment for the elements of X. INCX must not be zero. Unchanged on exit. Level 2 Blas routine. -- Written on 22-October-1986. Jack Dongarra, Argonne National Lab. Jeremy Du Croz, Nag Central Office. Sven Hammarling, Nag Central Office. Richard Hanson, Sandia National Labs. Test the input parameters. Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --x; /* Function Body */ info = 0; if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { info = 1; } else if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C")) { info = 2; } else if (! lsame_(diag, "U") && ! lsame_(diag, "N")) { info = 3; } else if (*n < 0) { info = 4; } else if (*k < 0) { info = 5; } else if (*lda < *k + 1) { info = 7; } else if (*incx == 0) { info = 9; } if (info != 0) { xerbla_("STBMV ", &info); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } nounit = lsame_(diag, "N"); /* Set up the start point in X if the increment is not unity. This will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of A are accessed sequentially with one pass through A. */ if (lsame_(trans, "N")) { /* Form x := A*x. */ if (lsame_(uplo, "U")) { kplus1 = *k + 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.f) { temp = x[j]; l = kplus1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__4 = j - 1; for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { x[i__] += temp * a_ref(l + i__, j); /* L10: */ } if (nounit) { x[j] *= a_ref(kplus1, j); } } /* L20: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.f) { temp = x[jx]; ix = kx; l = kplus1 - j; /* Computing MAX */ i__4 = 1, i__2 = j - *k; i__3 = j - 1; for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { x[ix] += temp * a_ref(l + i__, j); ix += *incx; /* L30: */ } if (nounit) { x[jx] *= a_ref(kplus1, j); } } jx += *incx; if (j > *k) { kx += *incx; } /* L40: */ } } } else { if (*incx == 1) { for (j = *n; j >= 1; --j) { if (x[j] != 0.f) { temp = x[j]; l = 1 - j; /* Computing MIN */ i__1 = *n, i__3 = j + *k; i__4 = j + 1; for (i__ = min(i__1,i__3); i__ >= i__4; --i__) { x[i__] += temp * a_ref(l + i__, j); /* L50: */ } if (nounit) { x[j] *= a_ref(1, j); } } /* L60: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { if (x[jx] != 0.f) { temp = x[jx]; ix = kx; l = 1 - j; /* Computing MIN */ i__4 = *n, i__1 = j + *k; i__3 = j + 1; for (i__ = min(i__4,i__1); i__ >= i__3; --i__) { x[ix] += temp * a_ref(l + i__, j); ix -= *incx; /* L70: */ } if (nounit) { x[jx] *= a_ref(1, j); } } jx -= *incx; if (*n - j >= *k) { kx -= *incx; } /* L80: */ } } } } else { /* Form x := A'*x. */ if (lsame_(uplo, "U")) { kplus1 = *k + 1; if (*incx == 1) { for (j = *n; j >= 1; --j) { temp = x[j]; l = kplus1 - j; if (nounit) { temp *= a_ref(kplus1, j); } /* Computing MAX */ i__4 = 1, i__1 = j - *k; i__3 = max(i__4,i__1); for (i__ = j - 1; i__ >= i__3; --i__) { temp += a_ref(l + i__, j) * x[i__]; /* L90: */ } x[j] = temp; /* L100: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { temp = x[jx]; kx -= *incx; ix = kx; l = kplus1 - j; if (nounit) { temp *= a_ref(kplus1, j); } /* Computing MAX */ i__4 = 1, i__1 = j - *k; i__3 = max(i__4,i__1); for (i__ = j - 1; i__ >= i__3; --i__) { temp += a_ref(l + i__, j) * x[ix]; ix -= *incx; /* L110: */ } x[jx] = temp; jx -= *incx; /* L120: */ } } } else { if (*incx == 1) { i__3 = *n; for (j = 1; j <= i__3; ++j) { temp = x[j]; l = 1 - j; if (nounit) { temp *= a_ref(1, j); } /* Computing MIN */ i__1 = *n, i__2 = j + *k; i__4 = min(i__1,i__2); for (i__ = j + 1; i__ <= i__4; ++i__) { temp += a_ref(l + i__, j) * x[i__]; /* L130: */ } x[j] = temp; /* L140: */ } } else { jx = kx; i__3 = *n; for (j = 1; j <= i__3; ++j) { temp = x[jx]; kx += *incx; ix = kx; l = 1 - j; if (nounit) { temp *= a_ref(1, j); } /* Computing MIN */ i__1 = *n, i__2 = j + *k; i__4 = min(i__1,i__2); for (i__ = j + 1; i__ <= i__4; ++i__) { temp += a_ref(l + i__, j) * x[ix]; ix += *incx; /* L150: */ } x[jx] = temp; jx += *incx; /* L160: */ } } } } return 0; /* End of STBMV . */ } /* stbmv_ */
/* Subroutine */ int stgsen_(integer *ijob, logical *wantq, logical *wantz, logical *select, integer *n, real *a, integer *lda, real *b, integer * ldb, real *alphar, real *alphai, real *beta, real *q, integer *ldq, real *z__, integer *ldz, integer *m, real *pl, real *pr, real *dif, real *work, integer *lwork, integer *iwork, integer *liwork, integer * info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= STGSEN reorders the generalized real Schur decomposition of a real matrix pair (A, B) (in terms of an orthonormal equivalence trans- formation Q' * (A, B) * Z), so that a selected cluster of eigenvalues appears in the leading diagonal blocks of the upper quasi-triangular matrix A and the upper triangular B. The leading columns of Q and Z form orthonormal bases of the corresponding left and right eigen- spaces (deflating subspaces). (A, B) must be in generalized real Schur canonical form (as returned by SGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 diagonal blocks. B is upper triangular. STGSEN also computes the generalized eigenvalues w(j) = (ALPHAR(j) + i*ALPHAI(j))/BETA(j) of the reordered matrix pair (A, B). Optionally, STGSEN computes the estimates of reciprocal condition numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11), (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s) between the matrix pairs (A11, B11) and (A22,B22) that correspond to the selected cluster and the eigenvalues outside the cluster, resp., and norms of "projections" onto left and right eigenspaces w.r.t. the selected cluster in the (1,1)-block. Arguments ========= IJOB (input) INTEGER Specifies whether condition numbers are required for the cluster of eigenvalues (PL and PR) or the deflating subspaces (Difu and Difl): =0: Only reorder w.r.t. SELECT. No extras. =1: Reciprocal of norms of "projections" onto left and right eigenspaces w.r.t. the selected cluster (PL and PR). =2: Upper bounds on Difu and Difl. F-norm-based estimate (DIF(1:2)). =3: Estimate of Difu and Difl. 1-norm-based estimate (DIF(1:2)). About 5 times as expensive as IJOB = 2. =4: Compute PL, PR and DIF (i.e. 0, 1 and 2 above): Economic version to get it all. =5: Compute PL, PR and DIF (i.e. 0, 1 and 3 above) WANTQ (input) LOGICAL .TRUE. : update the left transformation matrix Q; .FALSE.: do not update Q. WANTZ (input) LOGICAL .TRUE. : update the right transformation matrix Z; .FALSE.: do not update Z. SELECT (input) LOGICAL array, dimension (N) SELECT specifies the eigenvalues in the selected cluster. To select a real eigenvalue w(j), SELECT(j) must be set to .TRUE.. To select a complex conjugate pair of eigenvalues w(j) and w(j+1), corresponding to a 2-by-2 diagonal block, either SELECT(j) or SELECT(j+1) or both must be set to .TRUE.; a complex conjugate pair of eigenvalues must be either both included in the cluster or both excluded. N (input) INTEGER The order of the matrices A and B. N >= 0. A (input/output) REAL array, dimension(LDA,N) On entry, the upper quasi-triangular matrix A, with (A, B) in generalized real Schur canonical form. On exit, A is overwritten by the reordered matrix A. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input/output) REAL array, dimension(LDB,N) On entry, the upper triangular matrix B, with (A, B) in generalized real Schur canonical form. On exit, B is overwritten by the reordered matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). ALPHAR (output) REAL array, dimension (N) ALPHAI (output) REAL array, dimension (N) BETA (output) REAL array, dimension (N) On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i and BETA(j),j=1,...,N are the diagonals of the complex Schur form (S,T) that would result if the 2-by-2 diagonal blocks of the real generalized Schur form of (A,B) were further reduced to triangular form using complex unitary transformations. If ALPHAI(j) is zero, then the j-th eigenvalue is real; if positive, then the j-th and (j+1)-st eigenvalues are a complex conjugate pair, with ALPHAI(j+1) negative. Q (input/output) REAL array, dimension (LDQ,N) On entry, if WANTQ = .TRUE., Q is an N-by-N matrix. On exit, Q has been postmultiplied by the left orthogonal transformation matrix which reorder (A, B); The leading M columns of Q form orthonormal bases for the specified pair of left eigenspaces (deflating subspaces). If WANTQ = .FALSE., Q is not referenced. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= 1; and if WANTQ = .TRUE., LDQ >= N. Z (input/output) REAL array, dimension (LDZ,N) On entry, if WANTZ = .TRUE., Z is an N-by-N matrix. On exit, Z has been postmultiplied by the left orthogonal transformation matrix which reorder (A, B); The leading M columns of Z form orthonormal bases for the specified pair of left eigenspaces (deflating subspaces). If WANTZ = .FALSE., Z is not referenced. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= 1; If WANTZ = .TRUE., LDZ >= N. M (output) INTEGER The dimension of the specified pair of left and right eigen- spaces (deflating subspaces). 0 <= M <= N. PL, PR (output) REAL If IJOB = 1, 4 or 5, PL, PR are lower bounds on the reciprocal of the norm of "projections" onto left and right eigenspaces with respect to the selected cluster. 0 < PL, PR <= 1. If M = 0 or M = N, PL = PR = 1. If IJOB = 0, 2 or 3, PL and PR are not referenced. DIF (output) REAL array, dimension (2). If IJOB >= 2, DIF(1:2) store the estimates of Difu and Difl. If IJOB = 2 or 4, DIF(1:2) are F-norm-based upper bounds on Difu and Difl. If IJOB = 3 or 5, DIF(1:2) are 1-norm-based estimates of Difu and Difl. If M = 0 or N, DIF(1:2) = F-norm([A, B]). If IJOB = 0 or 1, DIF is not referenced. WORK (workspace/output) REAL array, dimension (LWORK) IF IJOB = 0, WORK is not referenced. Otherwise, on exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= 4*N+16. If IJOB = 1, 2 or 4, LWORK >= MAX(4*N+16, 2*M*(N-M)). If IJOB = 3 or 5, LWORK >= MAX(4*N+16, 4*M*(N-M)). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. IWORK (workspace/output) INTEGER array, dimension (LIWORK) IF IJOB = 0, IWORK is not referenced. Otherwise, on exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. LIWORK (input) INTEGER The dimension of the array IWORK. LIWORK >= 1. If IJOB = 1, 2 or 4, LIWORK >= N+6. If IJOB = 3 or 5, LIWORK >= MAX(2*M*(N-M), N+6). If LIWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the IWORK array, returns this value as the first entry of the IWORK array, and no error message related to LIWORK is issued by XERBLA. INFO (output) INTEGER =0: Successful exit. <0: If INFO = -i, the i-th argument had an illegal value. =1: Reordering of (A, B) failed because the transformed matrix pair (A, B) would be too far from generalized Schur form; the problem is very ill-conditioned. (A, B) may have been partially reordered. If requested, 0 is returned in DIF(*), PL and PR. Further Details =============== STGSEN first collects the selected eigenvalues by computing orthogonal U and W that move them to the top left corner of (A, B). In other words, the selected eigenvalues are the eigenvalues of (A11, B11) in: U'*(A, B)*W = (A11 A12) (B11 B12) n1 ( 0 A22),( 0 B22) n2 n1 n2 n1 n2 where N = n1+n2 and U' means the transpose of U. The first n1 columns of U and W span the specified pair of left and right eigenspaces (deflating subspaces) of (A, B). If (A, B) has been obtained from the generalized real Schur decomposition of a matrix pair (C, D) = Q*(A, B)*Z', then the reordered generalized real Schur form of (C, D) is given by (C, D) = (Q*U)*(U'*(A, B)*W)*(Z*W)', and the first n1 columns of Q*U and Z*W span the corresponding deflating subspaces of (C, D) (Q and Z store Q*U and Z*W, resp.). Note that if the selected eigenvalue is sufficiently ill-conditioned, then its value may differ significantly from its value before reordering. The reciprocal condition numbers of the left and right eigenspaces spanned by the first n1 columns of U and W (or Q*U and Z*W) may be returned in DIF(1:2), corresponding to Difu and Difl, resp. The Difu and Difl are defined as: Difu[(A11, B11), (A22, B22)] = sigma-min( Zu ) and Difl[(A11, B11), (A22, B22)] = Difu[(A22, B22), (A11, B11)], where sigma-min(Zu) is the smallest singular value of the (2*n1*n2)-by-(2*n1*n2) matrix Zu = [ kron(In2, A11) -kron(A22', In1) ] [ kron(In2, B11) -kron(B22', In1) ]. Here, Inx is the identity matrix of size nx and A22' is the transpose of A22. kron(X, Y) is the Kronecker product between the matrices X and Y. When DIF(2) is small, small changes in (A, B) can cause large changes in the deflating subspace. An approximate (asymptotic) bound on the maximum angular error in the computed deflating subspaces is EPS * norm((A, B)) / DIF(2), where EPS is the machine precision. The reciprocal norm of the projectors on the left and right eigenspaces associated with (A11, B11) may be returned in PL and PR. They are computed as follows. First we compute L and R so that P*(A, B)*Q is block diagonal, where P = ( I -L ) n1 Q = ( I R ) n1 ( 0 I ) n2 and ( 0 I ) n2 n1 n2 n1 n2 and (L, R) is the solution to the generalized Sylvester equation A11*R - L*A22 = -A12 B11*R - L*B22 = -B12 Then PL = (F-norm(L)**2+1)**(-1/2) and PR = (F-norm(R)**2+1)**(-1/2). An approximate (asymptotic) bound on the average absolute error of the selected eigenvalues is EPS * norm((A, B)) / PL. There are also global error bounds which valid for perturbations up to a certain restriction: A lower bound (x) on the smallest F-norm(E,F) for which an eigenvalue of (A11, B11) may move and coalesce with an eigenvalue of (A22, B22) under perturbation (E,F), (i.e. (A + E, B + F), is x = min(Difu,Difl)/((1/(PL*PL)+1/(PR*PR))**(1/2)+2*max(1/PL,1/PR)). An approximate bound on x can be computed from DIF(1:2), PL and PR. If y = ( F-norm(E,F) / x) <= 1, the angles between the perturbed (L', R') and unperturbed (L, R) left and right deflating subspaces associated with the selected cluster in the (1,1)-blocks can be bounded as max-angle(L, L') <= arctan( y * PL / (1 - y * (1 - PL * PL)**(1/2)) max-angle(R, R') <= arctan( y * PR / (1 - y * (1 - PR * PR)**(1/2)) See LAPACK User's Guide section 4.11 or the following references for more information. Note that if the default method for computing the Frobenius-norm- based estimate DIF is not wanted (see SLATDF), then the parameter IDIFJB (see below) should be changed from 3 to 4 (routine SLATDF (IJOB = 2 will be used)). See STGSYL for more details. Based on contributions by Bo Kagstrom and Peter Poromaa, Department of Computing Science, Umea University, S-901 87 Umea, Sweden. References ========== [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the Generalized Real Schur Form of a Regular Matrix Pair (A, B), in M.S. Moonen et al (eds), Linear Algebra for Large Scale and Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified Eigenvalues of a Regular Matrix Pair (A, B) and Condition Estimation: Theory, Algorithms and Software, Report UMINF - 94.04, Department of Computing Science, Umea University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87. To appear in Numerical Algorithms, 1996. [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software for Solving the Generalized Sylvester Equation and Estimating the Separation between Regular Matrix Pairs, Report UMINF - 93.23, Department of Computing Science, Umea University, S-901 87 Umea, Sweden, December 1993, Revised April 1994, Also as LAPACK Working Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1, 1996. ===================================================================== Decode and test the input parameters Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c__2 = 2; static real c_b28 = 1.f; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, z_offset, i__1, i__2; real r__1; /* Builtin functions */ double sqrt(doublereal), r_sign(real *, real *); /* Local variables */ static integer kase; static logical pair; static integer ierr; static real dsum; static logical swap; extern /* Subroutine */ int slag2_(real *, integer *, real *, integer *, real *, real *, real *, real *, real *, real *); static integer i__, k; static logical wantd; static integer lwmin; static logical wantp; static integer n1, n2; static logical wantd1, wantd2; static integer kk; static real dscale; static integer ks; static real rdscal; extern doublereal slamch_(char *); extern /* Subroutine */ int xerbla_(char *, integer *), slacon_( integer *, real *, real *, integer *, real *, integer *), slacpy_( char *, integer *, integer *, real *, integer *, real *, integer * ), stgexc_(logical *, logical *, integer *, real *, integer *, real *, integer *, real *, integer *, real *, integer * , integer *, integer *, real *, integer *, integer *); static integer liwmin; extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, real *); static real smlnum; static integer mn2; static logical lquery; extern /* Subroutine */ int stgsyl_(char *, integer *, integer *, integer *, real *, integer *, real *, integer *, real *, integer *, real * , integer *, real *, integer *, real *, integer *, real *, real *, real *, integer *, integer *, integer *); static integer ijb; static real eps; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] --select; a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --alphar; --alphai; --beta; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --dif; --work; --iwork; /* Function Body */ *info = 0; lquery = *lwork == -1 || *liwork == -1; if (*ijob < 0 || *ijob > 5) { *info = -1; } else if (*n < 0) { *info = -5; } else if (*lda < max(1,*n)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -9; } else if (*ldq < 1 || *wantq && *ldq < *n) { *info = -14; } else if (*ldz < 1 || *wantz && *ldz < *n) { *info = -16; } if (*info != 0) { i__1 = -(*info); xerbla_("STGSEN", &i__1); return 0; } /* Get machine constants */ eps = slamch_("P"); smlnum = slamch_("S") / eps; ierr = 0; wantp = *ijob == 1 || *ijob >= 4; wantd1 = *ijob == 2 || *ijob == 4; wantd2 = *ijob == 3 || *ijob == 5; wantd = wantd1 || wantd2; /* Set M to the dimension of the specified pair of deflating subspaces. */ *m = 0; pair = FALSE_; i__1 = *n; for (k = 1; k <= i__1; ++k) { if (pair) { pair = FALSE_; } else { if (k < *n) { if (a_ref(k + 1, k) == 0.f) { if (select[k]) { ++(*m); } } else { pair = TRUE_; if (select[k] || select[k + 1]) { *m += 2; } } } else { if (select[*n]) { ++(*m); } } } /* L10: */ } if (*ijob == 1 || *ijob == 2 || *ijob == 4) { /* Computing MAX */ i__1 = 1, i__2 = (*n << 2) + 16, i__1 = max(i__1,i__2), i__2 = (*m << 1) * (*n - *m); lwmin = max(i__1,i__2); /* Computing MAX */ i__1 = 1, i__2 = *n + 6; liwmin = max(i__1,i__2); } else if (*ijob == 3 || *ijob == 5) { /* Computing MAX */ i__1 = 1, i__2 = (*n << 2) + 16, i__1 = max(i__1,i__2), i__2 = (*m << 2) * (*n - *m); lwmin = max(i__1,i__2); /* Computing MAX */ i__1 = 1, i__2 = (*m << 1) * (*n - *m), i__1 = max(i__1,i__2), i__2 = *n + 6; liwmin = max(i__1,i__2); } else { /* Computing MAX */ i__1 = 1, i__2 = (*n << 2) + 16; lwmin = max(i__1,i__2); liwmin = 1; } work[1] = (real) lwmin; iwork[1] = liwmin; if (*lwork < lwmin && ! lquery) { *info = -22; } else if (*liwork < liwmin && ! lquery) { *info = -24; } if (*info != 0) { i__1 = -(*info); xerbla_("STGSEN", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible. */ if (*m == *n || *m == 0) { if (wantp) { *pl = 1.f; *pr = 1.f; } if (wantd) { dscale = 0.f; dsum = 1.f; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { slassq_(n, &a_ref(1, i__), &c__1, &dscale, &dsum); slassq_(n, &b_ref(1, i__), &c__1, &dscale, &dsum); /* L20: */ } dif[1] = dscale * sqrt(dsum); dif[2] = dif[1]; } goto L60; } /* Collect the selected blocks at the top-left corner of (A, B). */ ks = 0; pair = FALSE_; i__1 = *n; for (k = 1; k <= i__1; ++k) { if (pair) { pair = FALSE_; } else { swap = select[k]; if (k < *n) { if (a_ref(k + 1, k) != 0.f) { pair = TRUE_; swap = swap || select[k + 1]; } } if (swap) { ++ks; /* Swap the K-th block to position KS. Perform the reordering of diagonal blocks in (A, B) by orthogonal transformation matrices and update Q and Z accordingly (if requested): */ kk = k; if (k != ks) { stgexc_(wantq, wantz, n, &a[a_offset], lda, &b[b_offset], ldb, &q[q_offset], ldq, &z__[z_offset], ldz, &kk, &ks, &work[1], lwork, &ierr); } if (ierr > 0) { /* Swap is rejected: exit. */ *info = 1; if (wantp) { *pl = 0.f; *pr = 0.f; } if (wantd) { dif[1] = 0.f; dif[2] = 0.f; } goto L60; } if (pair) { ++ks; } } } /* L30: */ } if (wantp) { /* Solve generalized Sylvester equation for R and L and compute PL and PR. */ n1 = *m; n2 = *n - *m; i__ = n1 + 1; ijb = 0; slacpy_("Full", &n1, &n2, &a_ref(1, i__), lda, &work[1], &n1); slacpy_("Full", &n1, &n2, &b_ref(1, i__), ldb, &work[n1 * n2 + 1], & n1); i__1 = *lwork - (n1 << 1) * n2; stgsyl_("N", &ijb, &n1, &n2, &a[a_offset], lda, &a_ref(i__, i__), lda, &work[1], &n1, &b[b_offset], ldb, &b_ref(i__, i__), ldb, & work[n1 * n2 + 1], &n1, &dscale, &dif[1], &work[(n1 * n2 << 1) + 1], &i__1, &iwork[1], &ierr); /* Estimate the reciprocal of norms of "projections" onto left and right eigenspaces. */ rdscal = 0.f; dsum = 1.f; i__1 = n1 * n2; slassq_(&i__1, &work[1], &c__1, &rdscal, &dsum); *pl = rdscal * sqrt(dsum); if (*pl == 0.f) { *pl = 1.f; } else { *pl = dscale / (sqrt(dscale * dscale / *pl + *pl) * sqrt(*pl)); } rdscal = 0.f; dsum = 1.f; i__1 = n1 * n2; slassq_(&i__1, &work[n1 * n2 + 1], &c__1, &rdscal, &dsum); *pr = rdscal * sqrt(dsum); if (*pr == 0.f) { *pr = 1.f; } else { *pr = dscale / (sqrt(dscale * dscale / *pr + *pr) * sqrt(*pr)); } } if (wantd) { /* Compute estimates of Difu and Difl. */ if (wantd1) { n1 = *m; n2 = *n - *m; i__ = n1 + 1; ijb = 3; /* Frobenius norm-based Difu-estimate. */ i__1 = *lwork - (n1 << 1) * n2; stgsyl_("N", &ijb, &n1, &n2, &a[a_offset], lda, &a_ref(i__, i__), lda, &work[1], &n1, &b[b_offset], ldb, &b_ref(i__, i__), ldb, &work[n1 * n2 + 1], &n1, &dscale, &dif[1], &work[(n1 << 1) * n2 + 1], &i__1, &iwork[1], &ierr); /* Frobenius norm-based Difl-estimate. */ i__1 = *lwork - (n1 << 1) * n2; stgsyl_("N", &ijb, &n2, &n1, &a_ref(i__, i__), lda, &a[a_offset], lda, &work[1], &n2, &b_ref(i__, i__), ldb, &b[b_offset], ldb, &work[n1 * n2 + 1], &n2, &dscale, &dif[2], &work[(n1 << 1) * n2 + 1], &i__1, &iwork[1], &ierr); } else { /* Compute 1-norm-based estimates of Difu and Difl using reversed communication with SLACON. In each step a generalized Sylvester equation or a transposed variant is solved. */ kase = 0; n1 = *m; n2 = *n - *m; i__ = n1 + 1; ijb = 0; mn2 = (n1 << 1) * n2; /* 1-norm-based estimate of Difu. */ L40: slacon_(&mn2, &work[mn2 + 1], &work[1], &iwork[1], &dif[1], &kase) ; if (kase != 0) { if (kase == 1) { /* Solve generalized Sylvester equation. */ i__1 = *lwork - (n1 << 1) * n2; stgsyl_("N", &ijb, &n1, &n2, &a[a_offset], lda, &a_ref( i__, i__), lda, &work[1], &n1, &b[b_offset], ldb, &b_ref(i__, i__), ldb, &work[n1 * n2 + 1], &n1, & dscale, &dif[1], &work[(n1 << 1) * n2 + 1], &i__1, &iwork[1], &ierr); } else { /* Solve the transposed variant. */ i__1 = *lwork - (n1 << 1) * n2; stgsyl_("T", &ijb, &n1, &n2, &a[a_offset], lda, &a_ref( i__, i__), lda, &work[1], &n1, &b[b_offset], ldb, &b_ref(i__, i__), ldb, &work[n1 * n2 + 1], &n1, & dscale, &dif[1], &work[(n1 << 1) * n2 + 1], &i__1, &iwork[1], &ierr); } goto L40; } dif[1] = dscale / dif[1]; /* 1-norm-based estimate of Difl. */ L50: slacon_(&mn2, &work[mn2 + 1], &work[1], &iwork[1], &dif[2], &kase) ; if (kase != 0) { if (kase == 1) { /* Solve generalized Sylvester equation. */ i__1 = *lwork - (n1 << 1) * n2; stgsyl_("N", &ijb, &n2, &n1, &a_ref(i__, i__), lda, &a[ a_offset], lda, &work[1], &n2, &b_ref(i__, i__), ldb, &b[b_offset], ldb, &work[n1 * n2 + 1], &n2, & dscale, &dif[2], &work[(n1 << 1) * n2 + 1], &i__1, &iwork[1], &ierr); } else { /* Solve the transposed variant. */ i__1 = *lwork - (n1 << 1) * n2; stgsyl_("T", &ijb, &n2, &n1, &a_ref(i__, i__), lda, &a[ a_offset], lda, &work[1], &n2, &b_ref(i__, i__), ldb, &b[b_offset], ldb, &work[n1 * n2 + 1], &n2, & dscale, &dif[2], &work[(n1 << 1) * n2 + 1], &i__1, &iwork[1], &ierr); } goto L50; } dif[2] = dscale / dif[2]; } } L60: /* Compute generalized eigenvalues of reordered pair (A, B) and normalize the generalized Schur form. */ pair = FALSE_; i__1 = *n; for (k = 1; k <= i__1; ++k) { if (pair) { pair = FALSE_; } else { if (k < *n) { if (a_ref(k + 1, k) != 0.f) { pair = TRUE_; } } if (pair) { /* Compute the eigenvalue(s) at position K. */ work[1] = a_ref(k, k); work[2] = a_ref(k + 1, k); work[3] = a_ref(k, k + 1); work[4] = a_ref(k + 1, k + 1); work[5] = b_ref(k, k); work[6] = b_ref(k + 1, k); work[7] = b_ref(k, k + 1); work[8] = b_ref(k + 1, k + 1); r__1 = smlnum * eps; slag2_(&work[1], &c__2, &work[5], &c__2, &r__1, &beta[k], & beta[k + 1], &alphar[k], &alphar[k + 1], &alphai[k]); alphai[k + 1] = -alphai[k]; } else { if (r_sign(&c_b28, &b_ref(k, k)) < 0.f) { /* If B(K,K) is negative, make it positive */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { a_ref(k, i__) = -a_ref(k, i__); b_ref(k, i__) = -b_ref(k, i__); q_ref(i__, k) = -q_ref(i__, k); /* L80: */ } } alphar[k] = a_ref(k, k); alphai[k] = 0.f; beta[k] = b_ref(k, k); } } /* L70: */ } work[1] = (real) lwmin; iwork[1] = liwmin; return 0; /* End of STGSEN */ } /* stgsen_ */
/* Subroutine */ int cungbr_(char *vect, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= CUNGBR generates one of the complex unitary matrices Q or P**H determined by CGEBRD when reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q and P**H are defined as products of elementary reflectors H(i) or G(i) respectively. If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q is of order M: if m >= k, Q = H(1) H(2) . . . H(k) and CUNGBR returns the first n columns of Q, where m >= n >= k; if m < k, Q = H(1) H(2) . . . H(m-1) and CUNGBR returns Q as an M-by-M matrix. If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H is of order N: if k < n, P**H = G(k) . . . G(2) G(1) and CUNGBR returns the first m rows of P**H, where n >= m >= k; if k >= n, P**H = G(n-1) . . . G(2) G(1) and CUNGBR returns P**H as an N-by-N matrix. Arguments ========= VECT (input) CHARACTER*1 Specifies whether the matrix Q or the matrix P**H is required, as defined in the transformation applied by CGEBRD: = 'Q': generate Q; = 'P': generate P**H. M (input) INTEGER The number of rows of the matrix Q or P**H to be returned. M >= 0. N (input) INTEGER The number of columns of the matrix Q or P**H to be returned. N >= 0. If VECT = 'Q', M >= N >= min(M,K); if VECT = 'P', N >= M >= min(N,K). K (input) INTEGER If VECT = 'Q', the number of columns in the original M-by-K matrix reduced by CGEBRD. If VECT = 'P', the number of rows in the original K-by-N matrix reduced by CGEBRD. K >= 0. A (input/output) COMPLEX array, dimension (LDA,N) On entry, the vectors which define the elementary reflectors, as returned by CGEBRD. On exit, the M-by-N matrix Q or P**H. LDA (input) INTEGER The leading dimension of the array A. LDA >= M. TAU (input) COMPLEX array, dimension (min(M,K)) if VECT = 'Q' (min(N,K)) if VECT = 'P' TAU(i) must contain the scalar factor of the elementary reflector H(i) or G(i), which determines Q or P**H, as returned by CGEBRD in its array argument TAUQ or TAUP. WORK (workspace/output) COMPLEX array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,min(M,N)). For optimum performance LWORK >= min(M,N)*NB, where NB is the optimal blocksize. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input arguments Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ static integer i__, j; extern logical lsame_(char *, char *); static integer iinfo; static logical wantq; static integer nb, mn; extern /* Subroutine */ int xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern /* Subroutine */ int cunglq_(integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, integer *), cungqr_(integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, integer *); static integer lwkopt; static logical lquery; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --tau; --work; /* Function Body */ *info = 0; wantq = lsame_(vect, "Q"); mn = min(*m,*n); lquery = *lwork == -1; if (! wantq && ! lsame_(vect, "P")) { *info = -1; } else if (*m < 0) { *info = -2; } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && ( *m > *n || *m < min(*n,*k))) { *info = -3; } else if (*k < 0) { *info = -4; } else if (*lda < max(1,*m)) { *info = -6; } else if (*lwork < max(1,mn) && ! lquery) { *info = -9; } if (*info == 0) { if (wantq) { nb = ilaenv_(&c__1, "CUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, ( ftnlen)1); } else { nb = ilaenv_(&c__1, "CUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, ( ftnlen)1); } lwkopt = max(1,mn) * nb; work[1].r = (real) lwkopt, work[1].i = 0.f; } if (*info != 0) { i__1 = -(*info); xerbla_("CUNGBR", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { work[1].r = 1.f, work[1].i = 0.f; return 0; } if (wantq) { /* Form Q, determined by a call to CGEBRD to reduce an m-by-k matrix */ if (*m >= *k) { /* If m >= k, assume m >= n >= k */ cungqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & iinfo); } else { /* If m < k, assume m = n Shift the vectors which define the elementary reflectors one column to the right, and set the first row and column of Q to those of the unit matrix */ for (j = *m; j >= 2; --j) { i__1 = a_subscr(1, j); a[i__1].r = 0.f, a[i__1].i = 0.f; i__1 = *m; for (i__ = j + 1; i__ <= i__1; ++i__) { i__2 = a_subscr(i__, j); i__3 = a_subscr(i__, j - 1); a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i; /* L10: */ } /* L20: */ } i__1 = a_subscr(1, 1); a[i__1].r = 1.f, a[i__1].i = 0.f; i__1 = *m; for (i__ = 2; i__ <= i__1; ++i__) { i__2 = a_subscr(i__, 1); a[i__2].r = 0.f, a[i__2].i = 0.f; /* L30: */ } if (*m > 1) { /* Form Q(2:m,2:m) */ i__1 = *m - 1; i__2 = *m - 1; i__3 = *m - 1; cungqr_(&i__1, &i__2, &i__3, &a_ref(2, 2), lda, &tau[1], & work[1], lwork, &iinfo); } } } else { /* Form P', determined by a call to CGEBRD to reduce a k-by-n matrix */ if (*k < *n) { /* If k < n, assume k <= m <= n */ cunglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, & iinfo); } else { /* If k >= n, assume m = n Shift the vectors which define the elementary reflectors one row downward, and set the first row and column of P' to those of the unit matrix */ i__1 = a_subscr(1, 1); a[i__1].r = 1.f, a[i__1].i = 0.f; i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { i__2 = a_subscr(i__, 1); a[i__2].r = 0.f, a[i__2].i = 0.f; /* L40: */ } i__1 = *n; for (j = 2; j <= i__1; ++j) { for (i__ = j - 1; i__ >= 2; --i__) { i__2 = a_subscr(i__, j); i__3 = a_subscr(i__ - 1, j); a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i; /* L50: */ } i__2 = a_subscr(1, j); a[i__2].r = 0.f, a[i__2].i = 0.f; /* L60: */ } if (*n > 1) { /* Form P'(2:n,2:n) */ i__1 = *n - 1; i__2 = *n - 1; i__3 = *n - 1; cunglq_(&i__1, &i__2, &i__3, &a_ref(2, 2), lda, &tau[1], & work[1], lwork, &iinfo); } } } work[1].r = (real) lwkopt, work[1].i = 0.f; return 0; /* End of CUNGBR */ } /* cungbr_ */
/* Subroutine */ int cunmql_(char *side, char *trans, integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *c__, integer *ldc, complex *work, integer *lwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= CUNMQL overwrites the general complex M-by-N matrix C with SIDE = 'L' SIDE = 'R' TRANS = 'N': Q * C C * Q TRANS = 'C': Q**H * C C * Q**H where Q is a complex unitary matrix defined as the product of k elementary reflectors Q = H(k) . . . H(2) H(1) as returned by CGEQLF. Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. Arguments ========= SIDE (input) CHARACTER*1 = 'L': apply Q or Q**H from the Left; = 'R': apply Q or Q**H from the Right. TRANS (input) CHARACTER*1 = 'N': No transpose, apply Q; = 'C': Transpose, apply Q**H. M (input) INTEGER The number of rows of the matrix C. M >= 0. N (input) INTEGER The number of columns of the matrix C. N >= 0. K (input) INTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = 'L', M >= K >= 0; if SIDE = 'R', N >= K >= 0. A (input) COMPLEX array, dimension (LDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CGEQLF in the last k columns of its array argument A. A is modified by the routine but restored on exit. LDA (input) INTEGER The leading dimension of the array A. If SIDE = 'L', LDA >= max(1,M); if SIDE = 'R', LDA >= max(1,N). TAU (input) COMPLEX array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by CGEQLF. C (input/output) COMPLEX array, dimension (LDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. LDC (input) INTEGER The leading dimension of the array C. LDC >= max(1,M). WORK (workspace/output) COMPLEX array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. If SIDE = 'L', LWORK >= max(1,N); if SIDE = 'R', LWORK >= max(1,M). For optimum performance LWORK >= N*NB if SIDE = 'L', and LWORK >= M*NB if SIDE = 'R', where NB is the optimal blocksize. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input arguments Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__2 = 2; static integer c__65 = 65; /* System generated locals */ address a__1[2]; integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, i__5; char ch__1[2]; /* Builtin functions Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); /* Local variables */ static logical left; static integer i__; static complex t[4160] /* was [65][64] */; extern logical lsame_(char *, char *); static integer nbmin, iinfo, i1, i2, i3; extern /* Subroutine */ int cunm2l_(char *, char *, integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, integer *); static integer ib, nb, mi, ni; extern /* Subroutine */ int clarfb_(char *, char *, char *, char *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, complex *, integer *); static integer nq, nw; extern /* Subroutine */ int clarft_(char *, char *, integer *, integer *, complex *, integer *, complex *, complex *, integer *), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); static logical notran; static integer ldwork, lwkopt; static logical lquery; static integer iws; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --tau; c_dim1 = *ldc; c_offset = 1 + c_dim1 * 1; c__ -= c_offset; --work; /* Function Body */ *info = 0; left = lsame_(side, "L"); notran = lsame_(trans, "N"); lquery = *lwork == -1; /* NQ is the order of Q and NW is the minimum dimension of WORK */ if (left) { nq = *m; nw = *n; } else { nq = *n; nw = *m; } if (! left && ! lsame_(side, "R")) { *info = -1; } else if (! notran && ! lsame_(trans, "C")) { *info = -2; } else if (*m < 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*k < 0 || *k > nq) { *info = -5; } else if (*lda < max(1,nq)) { *info = -7; } else if (*ldc < max(1,*m)) { *info = -10; } else if (*lwork < max(1,nw) && ! lquery) { *info = -12; } if (*info == 0) { /* Determine the block size. NB may be at most NBMAX, where NBMAX is used to define the local array T. Computing MIN Writing concatenation */ i__3[0] = 1, a__1[0] = side; i__3[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMQL", ch__1, m, n, k, &c_n1, ( ftnlen)6, (ftnlen)2); nb = min(i__1,i__2); lwkopt = max(1,nw) * nb; work[1].r = (real) lwkopt, work[1].i = 0.f; } if (*info != 0) { i__1 = -(*info); xerbla_("CUNMQL", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0 || *k == 0) { work[1].r = 1.f, work[1].i = 0.f; return 0; } nbmin = 2; ldwork = nw; if (nb > 1 && nb < *k) { iws = nw * nb; if (*lwork < iws) { nb = *lwork / ldwork; /* Computing MAX Writing concatenation */ i__3[0] = 1, a__1[0] = side; i__3[1] = 1, a__1[1] = trans; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); i__1 = 2, i__2 = ilaenv_(&c__2, "CUNMQL", ch__1, m, n, k, &c_n1, ( ftnlen)6, (ftnlen)2); nbmin = max(i__1,i__2); } } else { iws = nw; } if (nb < nbmin || nb >= *k) { /* Use unblocked code */ cunm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[ c_offset], ldc, &work[1], &iinfo); } else { /* Use blocked code */ if (left && notran || ! left && ! notran) { i1 = 1; i2 = *k; i3 = nb; } else { i1 = (*k - 1) / nb * nb + 1; i2 = 1; i3 = -nb; } if (left) { ni = *n; } else { mi = *m; } i__1 = i2; i__2 = i3; for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { /* Computing MIN */ i__4 = nb, i__5 = *k - i__ + 1; ib = min(i__4,i__5); /* Form the triangular factor of the block reflector H = H(i+ib-1) . . . H(i+1) H(i) */ i__4 = nq - *k + i__ + ib - 1; clarft_("Backward", "Columnwise", &i__4, &ib, &a_ref(1, i__), lda, &tau[i__], t, &c__65); if (left) { /* H or H' is applied to C(1:m-k+i+ib-1,1:n) */ mi = *m - *k + i__ + ib - 1; } else { /* H or H' is applied to C(1:m,1:n-k+i+ib-1) */ ni = *n - *k + i__ + ib - 1; } /* Apply H or H' */ clarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, & a_ref(1, i__), lda, t, &c__65, &c__[c_offset], ldc, &work[ 1], &ldwork); /* L10: */ } } work[1].r = (real) lwkopt, work[1].i = 0.f; return 0; /* End of CUNMQL */ } /* cunmql_ */