/*! _zrovector*zgematrix operator */ inline _zrovector operator*(const _zrovector& vec, const zgematrix& mat) { #ifdef CPPL_VERBOSE std::cerr << "# [MARK] operator*(const _zrovector&, const zgematrix&)" << std::endl; #endif//CPPL_VERBOSE #ifdef CPPL_DEBUG if(vec.L!=mat.M){ std::cerr << "[ERROR] operator*(const _zrovector&, const zgematrix&)" << std::endl << "These vector and matrix can not make a product." << std::endl << "Your input was (" << vec.L << ") * (" << mat.M << "x" << mat.N << ")." << std::endl; exit(1); } #endif//CPPL_DEBUG zrovector newvec(mat.N); zgemv_( 'T', mat.M, mat.N, std::complex<double>(1.0,0.0), mat.Array, mat.M, vec.Array, 1, std::complex<double>(0.0,0.0), newvec.array, 1 ); vec.destroy(); return _(newvec); }
PyObject* gemv(PyObject *self, PyObject *args) { Py_complex alpha; PyArrayObject* a; PyArrayObject* x; Py_complex beta; PyArrayObject* y; char trans = 't'; if (!PyArg_ParseTuple(args, "DOODO|c", &alpha, &a, &x, &beta, &y, &trans)) return NULL; int m, n, lda, itemsize, incx, incy; if (trans == 'n') { m = PyArray_DIMS(a)[1]; for (int i = 2; i < PyArray_NDIM(a); i++) m *= PyArray_DIMS(a)[i]; n = PyArray_DIMS(a)[0]; lda = MAX(1, m); } else { n = PyArray_DIMS(a)[0]; for (int i = 1; i < PyArray_NDIM(a)-1; i++) n *= PyArray_DIMS(a)[i]; m = PyArray_DIMS(a)[PyArray_NDIM(a)-1]; lda = MAX(1, m); } if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) itemsize = sizeof(double); else itemsize = sizeof(double_complex); incx = PyArray_STRIDES(x)[0]/itemsize; incy = 1; if (PyArray_DESCR(a)->type_num == NPY_DOUBLE) dgemv_(&trans, &m, &n, &(alpha.real), DOUBLEP(a), &lda, DOUBLEP(x), &incx, &(beta.real), DOUBLEP(y), &incy); else zgemv_(&trans, &m, &n, &alpha, (void*)COMPLEXP(a), &lda, (void*)COMPLEXP(x), &incx, &beta, (void*)COMPLEXP(y), &incy); Py_RETURN_NONE; }
int f2c_zgemv(char* trans, integer* M, integer* N, doublecomplex* alpha, doublecomplex* A, integer* lda, doublecomplex* X, integer* incX, doublecomplex* beta, doublecomplex* Y, integer* incY) { zgemv_(trans, M, N, alpha, A, lda, X, incX, beta, Y, incY); return 0; }
/*! zrovector*zgematrix operator */ inline _zrovector operator*(const zrovector& vec, const zgematrix& mat) {VERBOSE_REPORT; #ifdef CPPL_DEBUG if(vec.l!=mat.m){ ERROR_REPORT; std::cerr << "These vector and matrix can not make a product." << std::endl << "Your input was (" << vec.l << ") * (" << mat.m << "x" << mat.n << ")." << std::endl; exit(1); } #endif//CPPL_DEBUG zrovector newvec(mat.n); zgemv_( 'T', mat.m, mat.n, comple(1.0,0.0), mat.array, mat.m, vec.array, 1, comple(0.0,0.0), newvec.array, 1 ); return _(newvec); }
/*! _zgematrix*zcovector operator */ inline _zcovector operator*(const _zgematrix& mat, const zcovector& vec) {VERBOSE_REPORT; #ifdef CPPL_DEBUG if(mat.n!=vec.l){ ERROR_REPORT; std::cerr << "These matrix and vector can not make a product." << std::endl << "Your input was (" << mat.m << "x" << mat.n << ") * (" << vec.l << ")." << std::endl; exit(1); } #endif//CPPL_DEBUG zcovector newvec(mat.m); zgemv_( 'n', mat.m, mat.n, comple(1.0,0.0), mat.array, mat.m, vec.array, 1, comple(0.0,0.0), newvec.array, 1 ); mat.destroy(); return _(newvec); }
/*! zgematrix*zcovector operator */ inline _zcovector operator*(const zgematrix& mat, const zcovector& vec) { #ifdef CPPL_VERBOSE std::cerr << "# [MARK] operator*(const zgematrix&, const zcovector&)" << std::endl; #endif//CPPL_VERBOSE #ifdef CPPL_DEBUG if(mat.N!=vec.L){ std::cerr << "[ERROR] operator*(const zgematrix&, const zcovector&)" << std::endl << "These matrix and vector can not make a product." << std::endl << "Your input was (" << mat.M << "x" << mat.N << ") * (" << vec.L << ")." << std::endl; exit(1); } #endif//CPPL_DEBUG zcovector newvec(mat.M); zgemv_( 'N', mat.M, mat.N, std::complex<double>(1.0,0.0), mat.Array, mat.M, vec.Array, 1, std::complex<double>(0.0,0.0), newvec.array, 1 ); return _(newvec); }
/* Subroutine */ int zlarz_(char *side, integer *m, integer *n, integer *l, doublecomplex *v, integer *incv, doublecomplex *tau, doublecomplex * c__, integer *ldc, doublecomplex *work) { /* System generated locals */ integer c_dim1, c_offset; doublecomplex z__1; /* Local variables */ /* -- LAPACK routine (version 3.2) -- */ /* November 2006 */ /* Purpose */ /* ======= */ /* ZLARZ applies a complex elementary reflector H to a complex */ /* M-by-N matrix C, from either the left or the right. H is represented */ /* in the form */ /* H = I - tau * v * v' */ /* where tau is a complex scalar and v is a complex vector. */ /* If tau = 0, then H is taken to be the unit matrix. */ /* To apply H' (the conjugate transpose of H), supply conjg(tau) instead */ /* tau. */ /* H is a product of k elementary reflectors as returned by ZTZRZF. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': form H * C */ /* = 'R': form C * H */ /* M (input) INTEGER */ /* The number of rows of the matrix C. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. */ /* L (input) INTEGER */ /* The number of entries of the vector V containing */ /* the meaningful part of the Householder vectors. */ /* If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. */ /* V (input) COMPLEX*16 array, dimension (1+(L-1)*abs(INCV)) */ /* The vector v in the representation of H as returned by */ /* ZTZRZF. V is not used if TAU = 0. */ /* INCV (input) INTEGER */ /* The increment between elements of v. INCV <> 0. */ /* TAU (input) COMPLEX*16 */ /* The value tau in the representation of H. */ /* C (input/output) COMPLEX*16 array, dimension (LDC,N) */ /* On entry, the M-by-N matrix C. */ /* On exit, C is overwritten by the matrix H * C if SIDE = 'L', */ /* or C * H if SIDE = 'R'. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max(1,M). */ /* WORK (workspace) COMPLEX*16 array, dimension */ /* (N) if SIDE = 'L' */ /* or (M) if SIDE = 'R' */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ /* ===================================================================== */ /* Parameter adjustments */ --v; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ if (lsame_(side, "L")) { /* Form H * C */ if (tau->r != 0. || tau->i != 0.) { /* w( 1:n ) = conjg( C( 1, 1:n ) ) */ zcopy_(n, &c__[c_offset], ldc, &work[1], &c__1); zlacgv_(n, &work[1], &c__1); /* w( 1:n ) = conjg( w( 1:n ) + C( m-l+1:m, 1:n )' * v( 1:l ) ) */ zgemv_("Conjugate transpose", l, n, &c_b1, &c__[*m - *l + 1 + c_dim1], ldc, &v[1], incv, &c_b1, &work[1], &c__1); zlacgv_(n, &work[1], &c__1); /* C( 1, 1:n ) = C( 1, 1:n ) - tau * w( 1:n ) */ z__1.r = -tau->r, z__1.i = -tau->i; zaxpy_(n, &z__1, &work[1], &c__1, &c__[c_offset], ldc); /* tau * v( 1:l ) * conjg( w( 1:n )' ) */ z__1.r = -tau->r, z__1.i = -tau->i; zgeru_(l, n, &z__1, &v[1], incv, &work[1], &c__1, &c__[*m - *l + 1 + c_dim1], ldc); } } else { /* Form C * H */ if (tau->r != 0. || tau->i != 0.) { /* w( 1:m ) = C( 1:m, 1 ) */ zcopy_(m, &c__[c_offset], &c__1, &work[1], &c__1); /* w( 1:m ) = w( 1:m ) + C( 1:m, n-l+1:n, 1:n ) * v( 1:l ) */ zgemv_("No transpose", m, l, &c_b1, &c__[(*n - *l + 1) * c_dim1 + 1], ldc, &v[1], incv, &c_b1, &work[1], &c__1); /* C( 1:m, 1 ) = C( 1:m, 1 ) - tau * w( 1:m ) */ z__1.r = -tau->r, z__1.i = -tau->i; zaxpy_(m, &z__1, &work[1], &c__1, &c__[c_offset], &c__1); /* tau * w( 1:m ) * v( 1:l )' */ z__1.r = -tau->r, z__1.i = -tau->i; zgerc_(m, l, &z__1, &work[1], &c__1, &v[1], incv, &c__[(*n - *l + 1) * c_dim1 + 1], ldc); } } return 0; /* End of ZLARZ */ } /* zlarz_ */
/* Subroutine */ int zggglm_(integer *n, integer *m, integer *p, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *d, doublecomplex *x, doublecomplex *y, doublecomplex * work, integer *lwork, integer *info) { /* -- LAPACK driver routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZGGGLM solves a general Gauss-Markov linear model (GLM) problem: minimize || y ||_2 subject to d = A*x + B*y x where A is an N-by-M matrix, B is an N-by-P matrix, and d is a given N-vector. It is assumed that M <= N <= M+P, and rank(A) = M and rank( A B ) = N. Under these assumptions, the constrained equation is always consistent, and there is a unique solution x and a minimal 2-norm solution y, which is obtained using a generalized QR factorization of A and B. In particular, if matrix B is square nonsingular, then the problem GLM is equivalent to the following weighted linear least squares problem minimize || inv(B)*(d-A*x) ||_2 x where inv(B) denotes the inverse of B. Arguments ========= N (input) INTEGER The number of rows of the matrices A and B. N >= 0. M (input) INTEGER The number of columns of the matrix A. 0 <= M <= N. P (input) INTEGER The number of columns of the matrix B. P >= N-M. A (input/output) COMPLEX*16 array, dimension (LDA,M) On entry, the N-by-M matrix A. On exit, A is destroyed. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input/output) COMPLEX*16 array, dimension (LDB,P) On entry, the N-by-P matrix B. On exit, B is destroyed. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). D (input/output) COMPLEX*16 array, dimension (N) On entry, D is the left hand side of the GLM equation. On exit, D is destroyed. X (output) COMPLEX*16 array, dimension (M) Y (output) COMPLEX*16 array, dimension (P) On exit, X and Y are the solutions of the GLM problem. WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,N+M+P). For optimum performance, LWORK >= M+min(N,P)+max(N,P)*NB, where NB is an upper bound for the optimal blocksizes for ZGEQRF, CGERQF, ZUNMQR and CUNMRQ. INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. =================================================================== Test the input parameters Parameter adjustments Function Body */ /* Table of constant values */ static doublecomplex c_b2 = {1.,0.}; static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; doublereal d__1; doublecomplex z__1; /* Local variables */ static integer lopt, i; extern /* Subroutine */ int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *), zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), ztrsv_(char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); static integer np; extern /* Subroutine */ int xerbla_(char *, integer *), zggqrf_( integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, integer *), zunmqr_(char *, char *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmrq_(char *, char *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, integer *); #define D(I) d[(I)-1] #define X(I) x[(I)-1] #define Y(I) y[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] *info = 0; np = min(*n,*p); if (*n < 0) { *info = -1; } else if (*m < 0 || *m > *n) { *info = -2; } else if (*p < 0 || *p < *n - *m) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 1, i__2 = *n + *m + *p; if (*lwork < max(i__1,i__2)) { *info = -12; } } if (*info != 0) { i__1 = -(*info); xerbla_("ZGGGLM", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Compute the GQR factorization of matrices A and B: Q'*A = ( R11 ) M, Q'*B*Z' = ( T11 T12 ) M ( 0 ) N-M ( 0 T22 ) N-M M M+P-N N-M where R11 and T22 are upper triangular, and Q and Z are unitary. */ i__1 = *lwork - *m - np; zggqrf_(n, m, p, &A(1,1), lda, &WORK(1), &B(1,1), ldb, &WORK(*m + 1), &WORK(*m + np + 1), &i__1, info); i__1 = *m + np + 1; lopt = (integer) WORK(*m+np+1).r; /* Update left-hand-side vector d = Q'*d = ( d1 ) M ( d2 ) N-M */ i__1 = max(1,*n); i__2 = *lwork - *m - np; zunmqr_("Left", "Conjugate transpose", n, &c__1, m, &A(1,1), lda, & WORK(1), &D(1), &i__1, &WORK(*m + np + 1), &i__2, info); /* Computing MAX */ i__3 = *m + np + 1; i__1 = lopt, i__2 = (integer) WORK(*m+np+1).r; lopt = max(i__1,i__2); /* Solve T22*y2 = d2 for y2 */ i__1 = *n - *m; ztrsv_("Upper", "No transpose", "Non unit", &i__1, &B(*m+1,*m+*p-*n+1), ldb, &D(*m + 1), &c__1); i__1 = *n - *m; zcopy_(&i__1, &D(*m + 1), &c__1, &Y(*m + *p - *n + 1), &c__1); /* Set y1 = 0 */ i__1 = *m + *p - *n; for (i = 1; i <= *m+*p-*n; ++i) { i__2 = i; Y(i).r = 0., Y(i).i = 0.; /* L10: */ } /* Update d1 = d1 - T12*y2 */ i__1 = *n - *m; z__1.r = -1., z__1.i = 0.; zgemv_("No transpose", m, &i__1, &z__1, &B(1,*m+*p-*n+1), ldb, &Y(*m + *p - *n + 1), &c__1, &c_b2, &D(1), &c__1); /* Solve triangular system: R11*x = d1 */ ztrsv_("Upper", "No Transpose", "Non unit", m, &A(1,1), lda, &D(1), & c__1); /* Copy D to X */ zcopy_(m, &D(1), &c__1, &X(1), &c__1); /* Backward transformation y = Z'*y Computing MAX */ i__1 = 1, i__2 = *n - *p + 1; i__3 = max(1,*p); i__4 = *lwork - *m - np; zunmrq_("Left", "Conjugate transpose", p, &c__1, &np, &B(max(1,*n-*p+1),1), ldb, &WORK(*m + 1), &Y(1), &i__3, &WORK(*m + np + 1), & i__4, info); /* Computing MAX */ i__3 = *m + np + 1; i__1 = lopt, i__2 = (integer) WORK(*m+np+1).r; d__1 = (doublereal) max(i__1,i__2); WORK(1).r = d__1, WORK(1).i = 0.; return 0; /* End of ZGGGLM */ } /* zggglm_ */
/* Subroutine */ int ztrevc_(char *side, char *howmny, logical *select, integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, integer *mm, integer *m, doublecomplex *work, doublereal *rwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= ZTREVC computes some or all of the right and/or left eigenvectors of a complex upper triangular matrix T. The right eigenvector x and the left eigenvector y of T corresponding to an eigenvalue w are defined by: T*x = w*x, y'*T = w*y' where y' denotes the conjugate transpose of the vector y. If all eigenvectors are requested, the routine may either return the matrices X and/or Y of right or left eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an input unitary matrix. If T was obtained from the Schur factorization of an original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of right or left eigenvectors of A. Arguments ========= SIDE (input) CHARACTER*1 = 'R': compute right eigenvectors only; = 'L': compute left eigenvectors only; = 'B': compute both right and left eigenvectors. HOWMNY (input) CHARACTER*1 = 'A': compute all right and/or left eigenvectors; = 'B': compute all right and/or left eigenvectors, and backtransform them using the input matrices supplied in VR and/or VL; = 'S': compute selected right and/or left eigenvectors, specified by the logical array SELECT. SELECT (input) LOGICAL array, dimension (N) If HOWMNY = 'S', SELECT specifies the eigenvectors to be computed. If HOWMNY = 'A' or 'B', SELECT is not referenced. To select the eigenvector corresponding to the j-th eigenvalue, SELECT(j) must be set to .TRUE.. N (input) INTEGER The order of the matrix T. N >= 0. T (input/output) COMPLEX*16 array, dimension (LDT,N) The upper triangular matrix T. T is modified, but restored on exit. LDT (input) INTEGER The leading dimension of the array T. LDT >= max(1,N). VL (input/output) COMPLEX*16 array, dimension (LDVL,MM) On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must contain an N-by-N matrix Q (usually the unitary matrix Q of Schur vectors returned by ZHSEQR). On exit, if SIDE = 'L' or 'B', VL contains: if HOWMNY = 'A', the matrix Y of left eigenvectors of T; VL is lower triangular. The i-th column VL(i) of VL is the eigenvector corresponding to T(i,i). if HOWMNY = 'B', the matrix Q*Y; if HOWMNY = 'S', the left eigenvectors of T specified by SELECT, stored consecutively in the columns of VL, in the same order as their eigenvalues. If SIDE = 'R', VL is not referenced. LDVL (input) INTEGER The leading dimension of the array VL. LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise. VR (input/output) COMPLEX*16 array, dimension (LDVR,MM) On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must contain an N-by-N matrix Q (usually the unitary matrix Q of Schur vectors returned by ZHSEQR). On exit, if SIDE = 'R' or 'B', VR contains: if HOWMNY = 'A', the matrix X of right eigenvectors of T; VR is upper triangular. The i-th column VR(i) of VR is the eigenvector corresponding to T(i,i). if HOWMNY = 'B', the matrix Q*X; if HOWMNY = 'S', the right eigenvectors of T specified by SELECT, stored consecutively in the columns of VR, in the same order as their eigenvalues. If SIDE = 'L', VR is not referenced. LDVR (input) INTEGER The leading dimension of the array VR. LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise. MM (input) INTEGER The number of columns in the arrays VL and/or VR. MM >= M. M (output) INTEGER The number of columns in the arrays VL and/or VR actually used to store the eigenvectors. If HOWMNY = 'A' or 'B', M is set to N. Each selected eigenvector occupies one column. WORK (workspace) COMPLEX*16 array, dimension (2*N) RWORK (workspace) DOUBLE PRECISION array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Further Details =============== The algorithm used in this program is basically backward (forward) substitution, with scaling to make the the code robust against possible overflow. Each eigenvector is normalized so that the element of largest magnitude has magnitude 1; here the magnitude of a complex number (x,y) is taken to be |x| + |y|. ===================================================================== Decode and test the input parameters Parameter adjustments */ /* Table of constant values */ static doublecomplex c_b2 = {1.,0.}; static integer c__1 = 1; /* System generated locals */ integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2, d__3; doublecomplex z__1, z__2; /* Builtin functions */ double d_imag(doublecomplex *); void d_cnjg(doublecomplex *, doublecomplex *); /* Local variables */ static logical allv; static doublereal unfl, ovfl, smin; static logical over; static integer i__, j, k; static doublereal scale; extern logical lsame_(char *, char *); static doublereal remax; static logical leftv, bothv; extern /* Subroutine */ int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *); static logical somev; extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), dlabad_(doublereal *, doublereal *); static integer ii, ki; extern doublereal dlamch_(char *); static integer is; extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_( integer *, doublereal *, doublecomplex *, integer *); extern integer izamax_(integer *, doublecomplex *, integer *); static logical rightv; extern doublereal dzasum_(integer *, doublecomplex *, integer *); static doublereal smlnum; extern /* Subroutine */ int zlatrs_(char *, char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublereal *, integer *); static doublereal ulp; #define t_subscr(a_1,a_2) (a_2)*t_dim1 + a_1 #define t_ref(a_1,a_2) t[t_subscr(a_1,a_2)] #define vl_subscr(a_1,a_2) (a_2)*vl_dim1 + a_1 #define vl_ref(a_1,a_2) vl[vl_subscr(a_1,a_2)] #define vr_subscr(a_1,a_2) (a_2)*vr_dim1 + a_1 #define vr_ref(a_1,a_2) vr[vr_subscr(a_1,a_2)] --select; t_dim1 = *ldt; t_offset = 1 + t_dim1 * 1; t -= t_offset; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1 * 1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1 * 1; vr -= vr_offset; --work; --rwork; /* Function Body */ bothv = lsame_(side, "B"); rightv = lsame_(side, "R") || bothv; leftv = lsame_(side, "L") || bothv; allv = lsame_(howmny, "A"); over = lsame_(howmny, "B"); somev = lsame_(howmny, "S"); /* Set M to the number of columns required to store the selected eigenvectors. */ if (somev) { *m = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (select[j]) { ++(*m); } /* L10: */ } } else { *m = *n; } *info = 0; if (! rightv && ! leftv) { *info = -1; } else if (! allv && ! over && ! somev) { *info = -2; } else if (*n < 0) { *info = -4; } else if (*ldt < max(1,*n)) { *info = -6; } else if (*ldvl < 1 || leftv && *ldvl < *n) { *info = -8; } else if (*ldvr < 1 || rightv && *ldvr < *n) { *info = -10; } else if (*mm < *m) { *info = -11; } if (*info != 0) { i__1 = -(*info); xerbla_("ZTREVC", &i__1); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } /* Set the constants to control overflow. */ unfl = dlamch_("Safe minimum"); ovfl = 1. / unfl; dlabad_(&unfl, &ovfl); ulp = dlamch_("Precision"); smlnum = unfl * (*n / ulp); /* Store the diagonal elements of T in working array WORK. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__ + *n; i__3 = t_subscr(i__, i__); work[i__2].r = t[i__3].r, work[i__2].i = t[i__3].i; /* L20: */ } /* Compute 1-norm of each column of strictly upper triangular part of T to control overflow in triangular solver. */ rwork[1] = 0.; i__1 = *n; for (j = 2; j <= i__1; ++j) { i__2 = j - 1; rwork[j] = dzasum_(&i__2, &t_ref(1, j), &c__1); /* L30: */ } if (rightv) { /* Compute right eigenvectors. */ is = *m; for (ki = *n; ki >= 1; --ki) { if (somev) { if (! select[ki]) { goto L80; } } /* Computing MAX */ i__1 = t_subscr(ki, ki); d__3 = ulp * ((d__1 = t[i__1].r, abs(d__1)) + (d__2 = d_imag(& t_ref(ki, ki)), abs(d__2))); smin = max(d__3,smlnum); work[1].r = 1., work[1].i = 0.; /* Form right-hand side. */ i__1 = ki - 1; for (k = 1; k <= i__1; ++k) { i__2 = k; i__3 = t_subscr(k, ki); z__1.r = -t[i__3].r, z__1.i = -t[i__3].i; work[i__2].r = z__1.r, work[i__2].i = z__1.i; /* L40: */ } /* Solve the triangular system: (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK. */ i__1 = ki - 1; for (k = 1; k <= i__1; ++k) { i__2 = t_subscr(k, k); i__3 = t_subscr(k, k); i__4 = t_subscr(ki, ki); z__1.r = t[i__3].r - t[i__4].r, z__1.i = t[i__3].i - t[i__4] .i; t[i__2].r = z__1.r, t[i__2].i = z__1.i; i__2 = t_subscr(k, k); if ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t_ref(k, k)), abs(d__2)) < smin) { i__3 = t_subscr(k, k); t[i__3].r = smin, t[i__3].i = 0.; } /* L50: */ } if (ki > 1) { i__1 = ki - 1; zlatrs_("Upper", "No transpose", "Non-unit", "Y", &i__1, &t[ t_offset], ldt, &work[1], &scale, &rwork[1], info); i__1 = ki; work[i__1].r = scale, work[i__1].i = 0.; } /* Copy the vector x or Q*x to VR and normalize. */ if (! over) { zcopy_(&ki, &work[1], &c__1, &vr_ref(1, is), &c__1); ii = izamax_(&ki, &vr_ref(1, is), &c__1); i__1 = vr_subscr(ii, is); remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag( &vr_ref(ii, is)), abs(d__2))); zdscal_(&ki, &remax, &vr_ref(1, is), &c__1); i__1 = *n; for (k = ki + 1; k <= i__1; ++k) { i__2 = vr_subscr(k, is); vr[i__2].r = 0., vr[i__2].i = 0.; /* L60: */ } } else { if (ki > 1) { i__1 = ki - 1; z__1.r = scale, z__1.i = 0.; zgemv_("N", n, &i__1, &c_b2, &vr[vr_offset], ldvr, &work[ 1], &c__1, &z__1, &vr_ref(1, ki), &c__1); } ii = izamax_(n, &vr_ref(1, ki), &c__1); i__1 = vr_subscr(ii, ki); remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag( &vr_ref(ii, ki)), abs(d__2))); zdscal_(n, &remax, &vr_ref(1, ki), &c__1); } /* Set back the original diagonal elements of T. */ i__1 = ki - 1; for (k = 1; k <= i__1; ++k) { i__2 = t_subscr(k, k); i__3 = k + *n; t[i__2].r = work[i__3].r, t[i__2].i = work[i__3].i; /* L70: */ } --is; L80: ; } } if (leftv) { /* Compute left eigenvectors. */ is = 1; i__1 = *n; for (ki = 1; ki <= i__1; ++ki) { if (somev) { if (! select[ki]) { goto L130; } } /* Computing MAX */ i__2 = t_subscr(ki, ki); d__3 = ulp * ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(& t_ref(ki, ki)), abs(d__2))); smin = max(d__3,smlnum); i__2 = *n; work[i__2].r = 1., work[i__2].i = 0.; /* Form right-hand side. */ i__2 = *n; for (k = ki + 1; k <= i__2; ++k) { i__3 = k; d_cnjg(&z__2, &t_ref(ki, k)); z__1.r = -z__2.r, z__1.i = -z__2.i; work[i__3].r = z__1.r, work[i__3].i = z__1.i; /* L90: */ } /* Solve the triangular system: (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK. */ i__2 = *n; for (k = ki + 1; k <= i__2; ++k) { i__3 = t_subscr(k, k); i__4 = t_subscr(k, k); i__5 = t_subscr(ki, ki); z__1.r = t[i__4].r - t[i__5].r, z__1.i = t[i__4].i - t[i__5] .i; t[i__3].r = z__1.r, t[i__3].i = z__1.i; i__3 = t_subscr(k, k); if ((d__1 = t[i__3].r, abs(d__1)) + (d__2 = d_imag(&t_ref(k, k)), abs(d__2)) < smin) { i__4 = t_subscr(k, k); t[i__4].r = smin, t[i__4].i = 0.; } /* L100: */ } if (ki < *n) { i__2 = *n - ki; zlatrs_("Upper", "Conjugate transpose", "Non-unit", "Y", & i__2, &t_ref(ki + 1, ki + 1), ldt, &work[ki + 1], & scale, &rwork[1], info); i__2 = ki; work[i__2].r = scale, work[i__2].i = 0.; } /* Copy the vector x or Q*x to VL and normalize. */ if (! over) { i__2 = *n - ki + 1; zcopy_(&i__2, &work[ki], &c__1, &vl_ref(ki, is), &c__1); i__2 = *n - ki + 1; ii = izamax_(&i__2, &vl_ref(ki, is), &c__1) + ki - 1; i__2 = vl_subscr(ii, is); remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag( &vl_ref(ii, is)), abs(d__2))); i__2 = *n - ki + 1; zdscal_(&i__2, &remax, &vl_ref(ki, is), &c__1); i__2 = ki - 1; for (k = 1; k <= i__2; ++k) { i__3 = vl_subscr(k, is); vl[i__3].r = 0., vl[i__3].i = 0.; /* L110: */ } } else { if (ki < *n) { i__2 = *n - ki; z__1.r = scale, z__1.i = 0.; zgemv_("N", n, &i__2, &c_b2, &vl_ref(1, ki + 1), ldvl, & work[ki + 1], &c__1, &z__1, &vl_ref(1, ki), &c__1); } ii = izamax_(n, &vl_ref(1, ki), &c__1); i__2 = vl_subscr(ii, ki); remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag( &vl_ref(ii, ki)), abs(d__2))); zdscal_(n, &remax, &vl_ref(1, ki), &c__1); } /* Set back the original diagonal elements of T. */ i__2 = *n; for (k = ki + 1; k <= i__2; ++k) { i__3 = t_subscr(k, k); i__4 = k + *n; t[i__3].r = work[i__4].r, t[i__3].i = work[i__4].i; /* L120: */ } ++is; L130: ; } } return 0; /* End of ZTREVC */ } /* ztrevc_ */
/* Subroutine */ int znaitr_(integer *ido, char *bmat, integer *n, integer *k, integer *np, integer *nb, doublecomplex *resid, doublereal *rnorm, doublecomplex *v, integer *ldv, doublecomplex *h__, integer *ldh, integer *ipntr, doublecomplex *workd, integer *info, ftnlen bmat_len) { /* Initialized data */ static logical first = TRUE_; /* System generated locals */ integer h_dim1, h_offset, v_dim1, v_offset, i__1, i__2, i__3; doublereal d__1, d__2, d__3, d__4; doublecomplex z__1; /* Builtin functions */ double d_imag(doublecomplex *), sqrt(doublereal); /* Local variables */ static integer i__, j; static real t0, t1, t2, t3, t4, t5; static integer jj, ipj, irj, ivj; static doublereal ulp, tst1; static integer ierr, iter; static doublereal unfl, ovfl; static integer itry; static doublereal temp1; static logical orth1, orth2, step3, step4; static doublereal betaj; static integer infol; static doublecomplex cnorm; extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); static doublereal rtemp[2]; extern /* Subroutine */ int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen); static doublereal wnorm; extern /* Subroutine */ int dvout_(integer *, integer *, doublereal *, integer *, char *, ftnlen), zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), ivout_(integer *, integer *, integer *, integer *, char *, ftnlen), zaxpy_(integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *), zmout_(integer *, integer *, integer *, doublecomplex *, integer *, integer *, char *, ftnlen), zvout_(integer *, integer *, doublecomplex *, integer *, char *, ftnlen); extern doublereal dlapy2_(doublereal *, doublereal *); extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); extern doublereal dznrm2_(integer *, doublecomplex *, integer *); static doublereal rnorm1; extern /* Subroutine */ int zgetv0_(integer *, char *, integer *, logical *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublereal *, integer *, doublecomplex *, integer *, ftnlen); extern doublereal dlamch_(char *, ftnlen); extern /* Subroutine */ int second_(real *), zdscal_(integer *, doublereal *, doublecomplex *, integer *); static logical rstart; static integer msglvl; static doublereal smlnum; extern doublereal zlanhs_(char *, integer *, doublecomplex *, integer *, doublecomplex *, ftnlen); extern /* Subroutine */ int zlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublecomplex *, integer *, integer *, ftnlen); /* %----------------------------------------------------% */ /* | Include files for debugging and timing information | */ /* %----------------------------------------------------% */ /* \SCCS Information: @(#) */ /* FILE: debug.h SID: 2.3 DATE OF SID: 11/16/95 RELEASE: 2 */ /* %---------------------------------% */ /* | See debug.doc for documentation | */ /* %---------------------------------% */ /* %------------------% */ /* | Scalar Arguments | */ /* %------------------% */ /* %--------------------------------% */ /* | See stat.doc for documentation | */ /* %--------------------------------% */ /* \SCCS Information: @(#) */ /* FILE: stat.h SID: 2.2 DATE OF SID: 11/16/95 RELEASE: 2 */ /* %-----------------% */ /* | Array Arguments | */ /* %-----------------% */ /* %------------% */ /* | Parameters | */ /* %------------% */ /* %--------------% */ /* | Local Arrays | */ /* %--------------% */ /* %---------------% */ /* | Local Scalars | */ /* %---------------% */ /* %----------------------% */ /* | External Subroutines | */ /* %----------------------% */ /* %--------------------% */ /* | External Functions | */ /* %--------------------% */ /* %---------------------% */ /* | Intrinsic Functions | */ /* %---------------------% */ /* %-----------------% */ /* | Data statements | */ /* %-----------------% */ /* Parameter adjustments */ --workd; --resid; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; h_dim1 = *ldh; h_offset = 1 + h_dim1; h__ -= h_offset; --ipntr; /* Function Body */ /* %-----------------------% */ /* | Executable Statements | */ /* %-----------------------% */ if (first) { /* %-----------------------------------------% */ /* | Set machine-dependent constants for the | */ /* | the splitting and deflation criterion. | */ /* | If norm(H) <= sqrt(OVFL), | */ /* | overflow should not occur. | */ /* | REFERENCE: LAPACK subroutine zlahqr | */ /* %-----------------------------------------% */ unfl = dlamch_("safe minimum", (ftnlen)12); z__1.r = 1. / unfl, z__1.i = 0. / unfl; ovfl = z__1.r; dlabad_(&unfl, &ovfl); ulp = dlamch_("precision", (ftnlen)9); smlnum = unfl * (*n / ulp); first = FALSE_; } if (*ido == 0) { /* %-------------------------------% */ /* | Initialize timing statistics | */ /* | & message level for debugging | */ /* %-------------------------------% */ second_(&t0); msglvl = debug_1.mcaitr; /* %------------------------------% */ /* | Initial call to this routine | */ /* %------------------------------% */ *info = 0; step3 = FALSE_; step4 = FALSE_; rstart = FALSE_; orth1 = FALSE_; orth2 = FALSE_; j = *k + 1; ipj = 1; irj = ipj + *n; ivj = irj + *n; } /* %-------------------------------------------------% */ /* | When in reverse communication mode one of: | */ /* | STEP3, STEP4, ORTH1, ORTH2, RSTART | */ /* | will be .true. when .... | */ /* | STEP3: return from computing OP*v_{j}. | */ /* | STEP4: return from computing B-norm of OP*v_{j} | */ /* | ORTH1: return from computing B-norm of r_{j+1} | */ /* | ORTH2: return from computing B-norm of | */ /* | correction to the residual vector. | */ /* | RSTART: return from OP computations needed by | */ /* | zgetv0. | */ /* %-------------------------------------------------% */ if (step3) { goto L50; } if (step4) { goto L60; } if (orth1) { goto L70; } if (orth2) { goto L90; } if (rstart) { goto L30; } /* %-----------------------------% */ /* | Else this is the first step | */ /* %-----------------------------% */ /* %--------------------------------------------------------------% */ /* | | */ /* | A R N O L D I I T E R A T I O N L O O P | */ /* | | */ /* | Note: B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) | */ /* %--------------------------------------------------------------% */ L1000: if (msglvl > 1) { ivout_(&debug_1.logfil, &c__1, &j, &debug_1.ndigit, "_naitr: generat" "ing Arnoldi vector number", (ftnlen)40); dvout_(&debug_1.logfil, &c__1, rnorm, &debug_1.ndigit, "_naitr: B-no" "rm of the current residual is", (ftnlen)41); } /* %---------------------------------------------------% */ /* | STEP 1: Check if the B norm of j-th residual | */ /* | vector is zero. Equivalent to determine whether | */ /* | an exact j-step Arnoldi factorization is present. | */ /* %---------------------------------------------------% */ betaj = *rnorm; if (*rnorm > 0.) { goto L40; } /* %---------------------------------------------------% */ /* | Invariant subspace found, generate a new starting | */ /* | vector which is orthogonal to the current Arnoldi | */ /* | basis and continue the iteration. | */ /* %---------------------------------------------------% */ if (msglvl > 0) { ivout_(&debug_1.logfil, &c__1, &j, &debug_1.ndigit, "_naitr: ****** " "RESTART AT STEP ******", (ftnlen)37); } /* %---------------------------------------------% */ /* | ITRY is the loop variable that controls the | */ /* | maximum amount of times that a restart is | */ /* | attempted. NRSTRT is used by stat.h | */ /* %---------------------------------------------% */ betaj = 0.; ++timing_1.nrstrt; itry = 1; L20: rstart = TRUE_; *ido = 0; L30: /* %--------------------------------------% */ /* | If in reverse communication mode and | */ /* | RSTART = .true. flow returns here. | */ /* %--------------------------------------% */ zgetv0_(ido, bmat, &itry, &c_false, n, &j, &v[v_offset], ldv, &resid[1], rnorm, &ipntr[1], &workd[1], &ierr, (ftnlen)1); if (*ido != 99) { goto L9000; } if (ierr < 0) { ++itry; if (itry <= 3) { goto L20; } /* %------------------------------------------------% */ /* | Give up after several restart attempts. | */ /* | Set INFO to the size of the invariant subspace | */ /* | which spans OP and exit. | */ /* %------------------------------------------------% */ *info = j - 1; second_(&t1); timing_1.tcaitr += t1 - t0; *ido = 99; goto L9000; } L40: /* %---------------------------------------------------------% */ /* | STEP 2: v_{j} = r_{j-1}/rnorm and p_{j} = p_{j}/rnorm | */ /* | Note that p_{j} = B*r_{j-1}. In order to avoid overflow | */ /* | when reciprocating a small RNORM, test against lower | */ /* | machine bound. | */ /* %---------------------------------------------------------% */ zcopy_(n, &resid[1], &c__1, &v[j * v_dim1 + 1], &c__1); if (*rnorm >= unfl) { temp1 = 1. / *rnorm; zdscal_(n, &temp1, &v[j * v_dim1 + 1], &c__1); zdscal_(n, &temp1, &workd[ipj], &c__1); } else { /* %-----------------------------------------% */ /* | To scale both v_{j} and p_{j} carefully | */ /* | use LAPACK routine zlascl | */ /* %-----------------------------------------% */ zlascl_("General", &i__, &i__, rnorm, &c_b27, n, &c__1, &v[j * v_dim1 + 1], n, &infol, (ftnlen)7); zlascl_("General", &i__, &i__, rnorm, &c_b27, n, &c__1, &workd[ipj], n, &infol, (ftnlen)7); } /* %------------------------------------------------------% */ /* | STEP 3: r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} | */ /* | Note that this is not quite yet r_{j}. See STEP 4 | */ /* %------------------------------------------------------% */ step3 = TRUE_; ++timing_1.nopx; second_(&t2); zcopy_(n, &v[j * v_dim1 + 1], &c__1, &workd[ivj], &c__1); ipntr[1] = ivj; ipntr[2] = irj; ipntr[3] = ipj; *ido = 1; /* %-----------------------------------% */ /* | Exit in order to compute OP*v_{j} | */ /* %-----------------------------------% */ goto L9000; L50: /* %----------------------------------% */ /* | Back from reverse communication; | */ /* | WORKD(IRJ:IRJ+N-1) := OP*v_{j} | */ /* | if step3 = .true. | */ /* %----------------------------------% */ second_(&t3); timing_1.tmvopx += t3 - t2; step3 = FALSE_; /* %------------------------------------------% */ /* | Put another copy of OP*v_{j} into RESID. | */ /* %------------------------------------------% */ zcopy_(n, &workd[irj], &c__1, &resid[1], &c__1); /* %---------------------------------------% */ /* | STEP 4: Finish extending the Arnoldi | */ /* | factorization to length j. | */ /* %---------------------------------------% */ second_(&t2); if (*(unsigned char *)bmat == 'G') { ++timing_1.nbx; step4 = TRUE_; ipntr[1] = irj; ipntr[2] = ipj; *ido = 2; /* %-------------------------------------% */ /* | Exit in order to compute B*OP*v_{j} | */ /* %-------------------------------------% */ goto L9000; } else if (*(unsigned char *)bmat == 'I') { zcopy_(n, &resid[1], &c__1, &workd[ipj], &c__1); } L60: /* %----------------------------------% */ /* | Back from reverse communication; | */ /* | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j} | */ /* | if step4 = .true. | */ /* %----------------------------------% */ if (*(unsigned char *)bmat == 'G') { second_(&t3); timing_1.tmvbx += t3 - t2; } step4 = FALSE_; /* %-------------------------------------% */ /* | The following is needed for STEP 5. | */ /* | Compute the B-norm of OP*v_{j}. | */ /* %-------------------------------------% */ if (*(unsigned char *)bmat == 'G') { zdotc_(&z__1, n, &resid[1], &c__1, &workd[ipj], &c__1); cnorm.r = z__1.r, cnorm.i = z__1.i; d__1 = cnorm.r; d__2 = d_imag(&cnorm); wnorm = sqrt(dlapy2_(&d__1, &d__2)); } else if (*(unsigned char *)bmat == 'I') { wnorm = dznrm2_(n, &resid[1], &c__1); } /* %-----------------------------------------% */ /* | Compute the j-th residual corresponding | */ /* | to the j step factorization. | */ /* | Use Classical Gram Schmidt and compute: | */ /* | w_{j} <- V_{j}^T * B * OP * v_{j} | */ /* | r_{j} <- OP*v_{j} - V_{j} * w_{j} | */ /* %-----------------------------------------% */ /* %------------------------------------------% */ /* | Compute the j Fourier coefficients w_{j} | */ /* | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}. | */ /* %------------------------------------------% */ zgemv_("C", n, &j, &c_b1, &v[v_offset], ldv, &workd[ipj], &c__1, &c_b2, & h__[j * h_dim1 + 1], &c__1, (ftnlen)1); /* %--------------------------------------% */ /* | Orthogonalize r_{j} against V_{j}. | */ /* | RESID contains OP*v_{j}. See STEP 3. | */ /* %--------------------------------------% */ z__1.r = -1., z__1.i = -0.; zgemv_("N", n, &j, &z__1, &v[v_offset], ldv, &h__[j * h_dim1 + 1], &c__1, &c_b1, &resid[1], &c__1, (ftnlen)1); if (j > 1) { i__1 = j + (j - 1) * h_dim1; z__1.r = betaj, z__1.i = 0.; h__[i__1].r = z__1.r, h__[i__1].i = z__1.i; } second_(&t4); orth1 = TRUE_; second_(&t2); if (*(unsigned char *)bmat == 'G') { ++timing_1.nbx; zcopy_(n, &resid[1], &c__1, &workd[irj], &c__1); ipntr[1] = irj; ipntr[2] = ipj; *ido = 2; /* %----------------------------------% */ /* | Exit in order to compute B*r_{j} | */ /* %----------------------------------% */ goto L9000; } else if (*(unsigned char *)bmat == 'I') { zcopy_(n, &resid[1], &c__1, &workd[ipj], &c__1); } L70: /* %---------------------------------------------------% */ /* | Back from reverse communication if ORTH1 = .true. | */ /* | WORKD(IPJ:IPJ+N-1) := B*r_{j}. | */ /* %---------------------------------------------------% */ if (*(unsigned char *)bmat == 'G') { second_(&t3); timing_1.tmvbx += t3 - t2; } orth1 = FALSE_; /* %------------------------------% */ /* | Compute the B-norm of r_{j}. | */ /* %------------------------------% */ if (*(unsigned char *)bmat == 'G') { zdotc_(&z__1, n, &resid[1], &c__1, &workd[ipj], &c__1); cnorm.r = z__1.r, cnorm.i = z__1.i; d__1 = cnorm.r; d__2 = d_imag(&cnorm); *rnorm = sqrt(dlapy2_(&d__1, &d__2)); } else if (*(unsigned char *)bmat == 'I') { *rnorm = dznrm2_(n, &resid[1], &c__1); } /* %-----------------------------------------------------------% */ /* | STEP 5: Re-orthogonalization / Iterative refinement phase | */ /* | Maximum NITER_ITREF tries. | */ /* | | */ /* | s = V_{j}^T * B * r_{j} | */ /* | r_{j} = r_{j} - V_{j}*s | */ /* | alphaj = alphaj + s_{j} | */ /* | | */ /* | The stopping criteria used for iterative refinement is | */ /* | discussed in Parlett's book SEP, page 107 and in Gragg & | */ /* | Reichel ACM TOMS paper; Algorithm 686, Dec. 1990. | */ /* | Determine if we need to correct the residual. The goal is | */ /* | to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} || | */ /* | The following test determines whether the sine of the | */ /* | angle between OP*x and the computed residual is less | */ /* | than or equal to 0.717. | */ /* %-----------------------------------------------------------% */ if (*rnorm > wnorm * .717f) { goto L100; } iter = 0; ++timing_1.nrorth; /* %---------------------------------------------------% */ /* | Enter the Iterative refinement phase. If further | */ /* | refinement is necessary, loop back here. The loop | */ /* | variable is ITER. Perform a step of Classical | */ /* | Gram-Schmidt using all the Arnoldi vectors V_{j} | */ /* %---------------------------------------------------% */ L80: if (msglvl > 2) { rtemp[0] = wnorm; rtemp[1] = *rnorm; dvout_(&debug_1.logfil, &c__2, rtemp, &debug_1.ndigit, "_naitr: re-o" "rthogonalization; wnorm and rnorm are", (ftnlen)49); zvout_(&debug_1.logfil, &j, &h__[j * h_dim1 + 1], &debug_1.ndigit, "_naitr: j-th column of H", (ftnlen)24); } /* %----------------------------------------------------% */ /* | Compute V_{j}^T * B * r_{j}. | */ /* | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). | */ /* %----------------------------------------------------% */ zgemv_("C", n, &j, &c_b1, &v[v_offset], ldv, &workd[ipj], &c__1, &c_b2, & workd[irj], &c__1, (ftnlen)1); /* %---------------------------------------------% */ /* | Compute the correction to the residual: | */ /* | r_{j} = r_{j} - V_{j} * WORKD(IRJ:IRJ+J-1). | */ /* | The correction to H is v(:,1:J)*H(1:J,1:J) | */ /* | + v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j. | */ /* %---------------------------------------------% */ z__1.r = -1., z__1.i = -0.; zgemv_("N", n, &j, &z__1, &v[v_offset], ldv, &workd[irj], &c__1, &c_b1, & resid[1], &c__1, (ftnlen)1); zaxpy_(&j, &c_b1, &workd[irj], &c__1, &h__[j * h_dim1 + 1], &c__1); orth2 = TRUE_; second_(&t2); if (*(unsigned char *)bmat == 'G') { ++timing_1.nbx; zcopy_(n, &resid[1], &c__1, &workd[irj], &c__1); ipntr[1] = irj; ipntr[2] = ipj; *ido = 2; /* %-----------------------------------% */ /* | Exit in order to compute B*r_{j}. | */ /* | r_{j} is the corrected residual. | */ /* %-----------------------------------% */ goto L9000; } else if (*(unsigned char *)bmat == 'I') { zcopy_(n, &resid[1], &c__1, &workd[ipj], &c__1); } L90: /* %---------------------------------------------------% */ /* | Back from reverse communication if ORTH2 = .true. | */ /* %---------------------------------------------------% */ if (*(unsigned char *)bmat == 'G') { second_(&t3); timing_1.tmvbx += t3 - t2; } /* %-----------------------------------------------------% */ /* | Compute the B-norm of the corrected residual r_{j}. | */ /* %-----------------------------------------------------% */ if (*(unsigned char *)bmat == 'G') { zdotc_(&z__1, n, &resid[1], &c__1, &workd[ipj], &c__1); cnorm.r = z__1.r, cnorm.i = z__1.i; d__1 = cnorm.r; d__2 = d_imag(&cnorm); rnorm1 = sqrt(dlapy2_(&d__1, &d__2)); } else if (*(unsigned char *)bmat == 'I') { rnorm1 = dznrm2_(n, &resid[1], &c__1); } if (msglvl > 0 && iter > 0) { ivout_(&debug_1.logfil, &c__1, &j, &debug_1.ndigit, "_naitr: Iterati" "ve refinement for Arnoldi residual", (ftnlen)49); if (msglvl > 2) { rtemp[0] = *rnorm; rtemp[1] = rnorm1; dvout_(&debug_1.logfil, &c__2, rtemp, &debug_1.ndigit, "_naitr: " "iterative refinement ; rnorm and rnorm1 are", (ftnlen)51); } } /* %-----------------------------------------% */ /* | Determine if we need to perform another | */ /* | step of re-orthogonalization. | */ /* %-----------------------------------------% */ if (rnorm1 > *rnorm * .717f) { /* %---------------------------------------% */ /* | No need for further refinement. | */ /* | The cosine of the angle between the | */ /* | corrected residual vector and the old | */ /* | residual vector is greater than 0.717 | */ /* | In other words the corrected residual | */ /* | and the old residual vector share an | */ /* | angle of less than arcCOS(0.717) | */ /* %---------------------------------------% */ *rnorm = rnorm1; } else { /* %-------------------------------------------% */ /* | Another step of iterative refinement step | */ /* | is required. NITREF is used by stat.h | */ /* %-------------------------------------------% */ ++timing_1.nitref; *rnorm = rnorm1; ++iter; if (iter <= 1) { goto L80; } /* %-------------------------------------------------% */ /* | Otherwise RESID is numerically in the span of V | */ /* %-------------------------------------------------% */ i__1 = *n; for (jj = 1; jj <= i__1; ++jj) { i__2 = jj; resid[i__2].r = 0., resid[i__2].i = 0.; /* L95: */ } *rnorm = 0.; } /* %----------------------------------------------% */ /* | Branch here directly if iterative refinement | */ /* | wasn't necessary or after at most NITER_REF | */ /* | steps of iterative refinement. | */ /* %----------------------------------------------% */ L100: rstart = FALSE_; orth2 = FALSE_; second_(&t5); timing_1.titref += t5 - t4; /* %------------------------------------% */ /* | STEP 6: Update j = j+1; Continue | */ /* %------------------------------------% */ ++j; if (j > *k + *np) { second_(&t1); timing_1.tcaitr += t1 - t0; *ido = 99; i__1 = *k + *np - 1; for (i__ = max(1,*k); i__ <= i__1; ++i__) { /* %--------------------------------------------% */ /* | Check for splitting and deflation. | */ /* | Use a standard test as in the QR algorithm | */ /* | REFERENCE: LAPACK subroutine zlahqr | */ /* %--------------------------------------------% */ i__2 = i__ + i__ * h_dim1; d__1 = h__[i__2].r; d__2 = d_imag(&h__[i__ + i__ * h_dim1]); i__3 = i__ + 1 + (i__ + 1) * h_dim1; d__3 = h__[i__3].r; d__4 = d_imag(&h__[i__ + 1 + (i__ + 1) * h_dim1]); tst1 = dlapy2_(&d__1, &d__2) + dlapy2_(&d__3, &d__4); if (tst1 == 0.) { i__2 = *k + *np; tst1 = zlanhs_("1", &i__2, &h__[h_offset], ldh, &workd[*n + 1] , (ftnlen)1); } i__2 = i__ + 1 + i__ * h_dim1; d__1 = h__[i__2].r; d__2 = d_imag(&h__[i__ + 1 + i__ * h_dim1]); /* Computing MAX */ d__3 = ulp * tst1; if (dlapy2_(&d__1, &d__2) <= max(d__3,smlnum)) { i__3 = i__ + 1 + i__ * h_dim1; h__[i__3].r = 0., h__[i__3].i = 0.; } /* L110: */ } if (msglvl > 2) { i__1 = *k + *np; i__2 = *k + *np; zmout_(&debug_1.logfil, &i__1, &i__2, &h__[h_offset], ldh, & debug_1.ndigit, "_naitr: Final upper Hessenberg matrix H" " of order K+NP", (ftnlen)53); } goto L9000; } /* %--------------------------------------------------------% */ /* | Loop back to extend the factorization by another step. | */ /* %--------------------------------------------------------% */ goto L1000; /* %---------------------------------------------------------------% */ /* | | */ /* | E N D O F M A I N I T E R A T I O N L O O P | */ /* | | */ /* %---------------------------------------------------------------% */ L9000: return 0; /* %---------------% */ /* | End of znaitr | */ /* %---------------% */ } /* znaitr_ */
/* Subroutine */ int zlaqps_(integer *m, integer *n, integer *offset, integer *nb, integer *kb, doublecomplex *a, integer *lda, integer *jpvt, doublecomplex *tau, doublereal *vn1, doublereal *vn2, doublecomplex * auxv, doublecomplex *f, integer *ldf) { /* System generated locals */ integer a_dim1, a_offset, f_dim1, f_offset, i__1, i__2, i__3; doublereal d__1, d__2; doublecomplex z__1; /* Builtin functions */ double sqrt(doublereal); void d_cnjg(doublecomplex *, doublecomplex *); double z_abs(doublecomplex *); integer i_dnnt(doublereal *); /* Local variables */ integer j, k, rk; doublecomplex akk; integer pvt; doublereal temp, temp2, tol3z; integer itemp; extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *), zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *), zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); extern doublereal dznrm2_(integer *, doublecomplex *, integer *), dlamch_( char *); extern integer idamax_(integer *, doublereal *, integer *); integer lsticc; extern /* Subroutine */ int zlarfp_(integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *); integer lastrk; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ZLAQPS computes a step of QR factorization with column pivoting */ /* of a complex M-by-N matrix A by using Blas-3. It tries to factorize */ /* NB columns from A starting from the row OFFSET+1, and updates all */ /* of the matrix with Blas-3 xGEMM. */ /* In some cases, due to catastrophic cancellations, it cannot */ /* factorize NB columns. Hence, the actual number of factorized */ /* columns is returned in KB. */ /* Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0 */ /* OFFSET (input) INTEGER */ /* The number of rows of A that have been factorized in */ /* previous steps. */ /* NB (input) INTEGER */ /* The number of columns to factorize. */ /* KB (output) INTEGER */ /* The number of columns actually factorized. */ /* A (input/output) COMPLEX*16 array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, block A(OFFSET+1:M,1:KB) is the triangular */ /* factor obtained and block A(1:OFFSET,1:N) has been */ /* accordingly pivoted, but no factorized. */ /* The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has */ /* been updated. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* JPVT (input/output) INTEGER array, dimension (N) */ /* JPVT(I) = K <==> Column K of the full matrix A has been */ /* permuted into position I in AP. */ /* TAU (output) COMPLEX*16 array, dimension (KB) */ /* The scalar factors of the elementary reflectors. */ /* VN1 (input/output) DOUBLE PRECISION array, dimension (N) */ /* The vector with the partial column norms. */ /* VN2 (input/output) DOUBLE PRECISION array, dimension (N) */ /* The vector with the exact column norms. */ /* AUXV (input/output) COMPLEX*16 array, dimension (NB) */ /* Auxiliar vector. */ /* F (input/output) COMPLEX*16 array, dimension (LDF,NB) */ /* Matrix F' = L*Y'*A. */ /* LDF (input) INTEGER */ /* The leading dimension of the array F. LDF >= max(1,N). */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */ /* X. Sun, Computer Science Dept., Duke University, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --jpvt; --tau; --vn1; --vn2; --auxv; f_dim1 = *ldf; f_offset = 1 + f_dim1; f -= f_offset; /* Function Body */ /* Computing MIN */ i__1 = *m, i__2 = *n + *offset; lastrk = min(i__1,i__2); lsticc = 0; k = 0; tol3z = sqrt(dlamch_("Epsilon")); /* Beginning of while loop. */ L10: if (k < *nb && lsticc == 0) { ++k; rk = *offset + k; /* Determine ith pivot column and swap if necessary */ i__1 = *n - k + 1; pvt = k - 1 + idamax_(&i__1, &vn1[k], &c__1); if (pvt != k) { zswap_(m, &a[pvt * a_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1); i__1 = k - 1; zswap_(&i__1, &f[pvt + f_dim1], ldf, &f[k + f_dim1], ldf); itemp = jpvt[pvt]; jpvt[pvt] = jpvt[k]; jpvt[k] = itemp; vn1[pvt] = vn1[k]; vn2[pvt] = vn2[k]; } /* Apply previous Householder reflectors to column K: */ /* A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)'. */ if (k > 1) { i__1 = k - 1; for (j = 1; j <= i__1; ++j) { i__2 = k + j * f_dim1; d_cnjg(&z__1, &f[k + j * f_dim1]); f[i__2].r = z__1.r, f[i__2].i = z__1.i; /* L20: */ } i__1 = *m - rk + 1; i__2 = k - 1; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", &i__1, &i__2, &z__1, &a[rk + a_dim1], lda, &f[k + f_dim1], ldf, &c_b2, &a[rk + k * a_dim1], &c__1); i__1 = k - 1; for (j = 1; j <= i__1; ++j) { i__2 = k + j * f_dim1; d_cnjg(&z__1, &f[k + j * f_dim1]); f[i__2].r = z__1.r, f[i__2].i = z__1.i; /* L30: */ } } /* Generate elementary reflector H(k). */ if (rk < *m) { i__1 = *m - rk + 1; zlarfp_(&i__1, &a[rk + k * a_dim1], &a[rk + 1 + k * a_dim1], & c__1, &tau[k]); } else { zlarfp_(&c__1, &a[rk + k * a_dim1], &a[rk + k * a_dim1], &c__1, & tau[k]); } i__1 = rk + k * a_dim1; akk.r = a[i__1].r, akk.i = a[i__1].i; i__1 = rk + k * a_dim1; a[i__1].r = 1., a[i__1].i = 0.; /* Compute Kth column of F: */ /* Compute F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)'*A(RK:M,K). */ if (k < *n) { i__1 = *m - rk + 1; i__2 = *n - k; zgemv_("Conjugate transpose", &i__1, &i__2, &tau[k], &a[rk + (k + 1) * a_dim1], lda, &a[rk + k * a_dim1], &c__1, &c_b1, &f[ k + 1 + k * f_dim1], &c__1); } /* Padding F(1:K,K) with zeros. */ i__1 = k; for (j = 1; j <= i__1; ++j) { i__2 = j + k * f_dim1; f[i__2].r = 0., f[i__2].i = 0.; /* L40: */ } /* Incremental updating of F: */ /* F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)' */ /* *A(RK:M,K). */ if (k > 1) { i__1 = *m - rk + 1; i__2 = k - 1; i__3 = k; z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i; zgemv_("Conjugate transpose", &i__1, &i__2, &z__1, &a[rk + a_dim1] , lda, &a[rk + k * a_dim1], &c__1, &c_b1, &auxv[1], &c__1); i__1 = k - 1; zgemv_("No transpose", n, &i__1, &c_b2, &f[f_dim1 + 1], ldf, & auxv[1], &c__1, &c_b2, &f[k * f_dim1 + 1], &c__1); } /* Update the current row of A: */ /* A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)'. */ if (k < *n) { i__1 = *n - k; z__1.r = -1., z__1.i = -0.; zgemm_("No transpose", "Conjugate transpose", &c__1, &i__1, &k, & z__1, &a[rk + a_dim1], lda, &f[k + 1 + f_dim1], ldf, & c_b2, &a[rk + (k + 1) * a_dim1], lda); } /* Update partial column norms. */ if (rk < lastrk) { i__1 = *n; for (j = k + 1; j <= i__1; ++j) { if (vn1[j] != 0.) { /* NOTE: The following 4 lines follow from the analysis in */ /* Lapack Working Note 176. */ temp = z_abs(&a[rk + j * a_dim1]) / vn1[j]; /* Computing MAX */ d__1 = 0., d__2 = (temp + 1.) * (1. - temp); temp = max(d__1,d__2); /* Computing 2nd power */ d__1 = vn1[j] / vn2[j]; temp2 = temp * (d__1 * d__1); if (temp2 <= tol3z) { vn2[j] = (doublereal) lsticc; lsticc = j; } else { vn1[j] *= sqrt(temp); } } /* L50: */ } } i__1 = rk + k * a_dim1; a[i__1].r = akk.r, a[i__1].i = akk.i; /* End of while loop. */ goto L10; } *kb = k; rk = *offset + *kb; /* Apply the block reflector to the rest of the matrix: */ /* A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) - */ /* A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)'. */ /* Computing MIN */ i__1 = *n, i__2 = *m - *offset; if (*kb < min(i__1,i__2)) { i__1 = *m - rk; i__2 = *n - *kb; z__1.r = -1., z__1.i = -0.; zgemm_("No transpose", "Conjugate transpose", &i__1, &i__2, kb, &z__1, &a[rk + 1 + a_dim1], lda, &f[*kb + 1 + f_dim1], ldf, &c_b2, & a[rk + 1 + (*kb + 1) * a_dim1], lda); } /* Recomputation of difficult columns. */ L60: if (lsticc > 0) { itemp = i_dnnt(&vn2[lsticc]); i__1 = *m - rk; vn1[lsticc] = dznrm2_(&i__1, &a[rk + 1 + lsticc * a_dim1], &c__1); /* NOTE: The computation of VN1( LSTICC ) relies on the fact that */ /* SNRM2 does not fail on vectors with norm below the value of */ /* SQRT(DLAMCH('S')) */ vn2[lsticc] = vn1[lsticc]; lsticc = itemp; goto L60; } return 0; /* End of ZLAQPS */ } /* zlaqps_ */
/* Subroutine */ int zlauu2_(char *uplo, integer *n, doublecomplex *a, integer *lda, integer *info) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZLAUU2 computes the product U * U' or L' * L, where the triangular factor U or L is stored in the upper or lower triangular part of the array A. If UPLO = 'U' or 'u' then the upper triangle of the result is stored, overwriting the factor U in A. If UPLO = 'L' or 'l' then the lower triangle of the result is stored, overwriting the factor L in A. This is the unblocked form of the algorithm, calling Level 2 BLAS. Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the triangular factor stored in the array A is upper or lower triangular: = 'U': Upper triangular = 'L': Lower triangular N (input) INTEGER The order of the triangular factor U or L. N >= 0. A (input/output) COMPLEX*16 array, dimension (LDA,N) On entry, the triangular factor U or L. On exit, if UPLO = 'U', the upper triangle of A is overwritten with the upper triangle of the product U * U'; if UPLO = 'L', the lower triangle of A is overwritten with the lower triangle of the product L' * L. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -k, the k-th argument had an illegal value ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static doublecomplex c_b1 = {1.,0.}; static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; doublereal d__1; doublecomplex z__1; /* Local variables */ static integer i__; extern logical lsame_(char *, char *); extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); extern /* Subroutine */ int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *); static logical upper; extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_( integer *, doublereal *, doublecomplex *, integer *), zlacgv_( integer *, doublecomplex *, integer *); static doublereal aii; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); xerbla_("ZLAUU2", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (upper) { /* Compute the product U * U'. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = a_subscr(i__, i__); aii = a[i__2].r; if (i__ < *n) { i__2 = a_subscr(i__, i__); i__3 = *n - i__; zdotc_(&z__1, &i__3, &a_ref(i__, i__ + 1), lda, &a_ref(i__, i__ + 1), lda); d__1 = aii * aii + z__1.r; a[i__2].r = d__1, a[i__2].i = 0.; i__2 = *n - i__; zlacgv_(&i__2, &a_ref(i__, i__ + 1), lda); i__2 = i__ - 1; i__3 = *n - i__; z__1.r = aii, z__1.i = 0.; zgemv_("No transpose", &i__2, &i__3, &c_b1, &a_ref(1, i__ + 1) , lda, &a_ref(i__, i__ + 1), lda, &z__1, &a_ref(1, i__), &c__1); i__2 = *n - i__; zlacgv_(&i__2, &a_ref(i__, i__ + 1), lda); } else { zdscal_(&i__, &aii, &a_ref(1, i__), &c__1); } /* L10: */ } } else { /* Compute the product L' * L. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = a_subscr(i__, i__); aii = a[i__2].r; if (i__ < *n) { i__2 = a_subscr(i__, i__); i__3 = *n - i__; zdotc_(&z__1, &i__3, &a_ref(i__ + 1, i__), &c__1, &a_ref(i__ + 1, i__), &c__1); d__1 = aii * aii + z__1.r; a[i__2].r = d__1, a[i__2].i = 0.; i__2 = i__ - 1; zlacgv_(&i__2, &a_ref(i__, 1), lda); i__2 = *n - i__; i__3 = i__ - 1; z__1.r = aii, z__1.i = 0.; zgemv_("Conjugate transpose", &i__2, &i__3, &c_b1, &a_ref(i__ + 1, 1), lda, &a_ref(i__ + 1, i__), &c__1, &z__1, & a_ref(i__, 1), lda); i__2 = i__ - 1; zlacgv_(&i__2, &a_ref(i__, 1), lda); } else { zdscal_(&i__, &aii, &a_ref(i__, 1), lda); } /* L20: */ } } return 0; /* End of ZLAUU2 */ } /* zlauu2_ */
/* Subroutine */ int zlaghe_(integer *n, integer *k, doublereal *d, doublecomplex *a, integer *lda, integer *iseed, doublecomplex *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; doublereal d__1; doublecomplex z__1, z__2, z__3, z__4; /* Builtin functions */ double z_abs(doublecomplex *); void z_div(doublecomplex *, doublecomplex *, doublecomplex *), d_cnjg( doublecomplex *, doublecomplex *); /* Local variables */ extern /* Subroutine */ int zher2_(char *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); static integer i, j; static doublecomplex alpha; extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *), zscal_(integer *, doublecomplex *, doublecomplex *, integer *); extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); extern /* Subroutine */ int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *), zhemv_(char *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *), zaxpy_(integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *); extern doublereal dznrm2_(integer *, doublecomplex *, integer *); static doublecomplex wa, wb; static doublereal wn; extern /* Subroutine */ int xerbla_(char *, integer *), zlarnv_( integer *, integer *, integer *, doublecomplex *); static doublecomplex tau; /* -- LAPACK auxiliary test routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZLAGHE generates a complex hermitian matrix A, by pre- and post- multiplying a real diagonal matrix D with a random unitary matrix: A = U*D*U'. The semi-bandwidth may then be reduced to k by additional unitary transformations. Arguments ========= N (input) INTEGER The order of the matrix A. N >= 0. K (input) INTEGER The number of nonzero subdiagonals within the band of A. 0 <= K <= N-1. D (input) DOUBLE PRECISION array, dimension (N) The diagonal elements of the diagonal matrix D. A (output) COMPLEX*16 array, dimension (LDA,N) The generated n by n hermitian matrix A (the full matrix is stored). LDA (input) INTEGER The leading dimension of the array A. LDA >= N. ISEED (input/output) INTEGER array, dimension (4) On entry, the seed of the random number generator; the array elements must be between 0 and 4095, and ISEED(4) must be odd. On exit, the seed is updated. WORK (workspace) COMPLEX*16 array, dimension (2*N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input arguments Parameter adjustments */ --d; a_dim1 = *lda; a_offset = a_dim1 + 1; a -= a_offset; --iseed; --work; /* Function Body */ *info = 0; if (*n < 0) { *info = -1; } else if (*k < 0 || *k > *n - 1) { *info = -2; } else if (*lda < max(1,*n)) { *info = -5; } if (*info < 0) { i__1 = -(*info); xerbla_("ZLAGHE", &i__1); return 0; } /* initialize lower triangle of A to diagonal matrix */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i = j + 1; i <= i__2; ++i) { i__3 = i + j * a_dim1; a[i__3].r = 0., a[i__3].i = 0.; /* L10: */ } /* L20: */ } i__1 = *n; for (i = 1; i <= i__1; ++i) { i__2 = i + i * a_dim1; i__3 = i; a[i__2].r = d[i__3], a[i__2].i = 0.; /* L30: */ } /* Generate lower triangle of hermitian matrix */ for (i = *n - 1; i >= 1; --i) { /* generate random reflection */ i__1 = *n - i + 1; zlarnv_(&c__3, &iseed[1], &i__1, &work[1]); i__1 = *n - i + 1; wn = dznrm2_(&i__1, &work[1], &c__1); d__1 = wn / z_abs(&work[1]); z__1.r = d__1 * work[1].r, z__1.i = d__1 * work[1].i; wa.r = z__1.r, wa.i = z__1.i; if (wn == 0.) { tau.r = 0., tau.i = 0.; } else { z__1.r = work[1].r + wa.r, z__1.i = work[1].i + wa.i; wb.r = z__1.r, wb.i = z__1.i; i__1 = *n - i; z_div(&z__1, &c_b2, &wb); zscal_(&i__1, &z__1, &work[2], &c__1); work[1].r = 1., work[1].i = 0.; z_div(&z__1, &wb, &wa); d__1 = z__1.r; tau.r = d__1, tau.i = 0.; } /* apply random reflection to A(i:n,i:n) from the left and the right compute y := tau * A * u */ i__1 = *n - i + 1; zhemv_("Lower", &i__1, &tau, &a[i + i * a_dim1], lda, &work[1], &c__1, &c_b1, &work[*n + 1], &c__1); /* compute v := y - 1/2 * tau * ( y, u ) * u */ z__3.r = -.5, z__3.i = 0.; z__2.r = z__3.r * tau.r - z__3.i * tau.i, z__2.i = z__3.r * tau.i + z__3.i * tau.r; i__1 = *n - i + 1; zdotc_(&z__4, &i__1, &work[*n + 1], &c__1, &work[1], &c__1); z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * z__4.i + z__2.i * z__4.r; alpha.r = z__1.r, alpha.i = z__1.i; i__1 = *n - i + 1; zaxpy_(&i__1, &alpha, &work[1], &c__1, &work[*n + 1], &c__1); /* apply the transformation as a rank-2 update to A(i:n,i:n) */ i__1 = *n - i + 1; z__1.r = -1., z__1.i = 0.; zher2_("Lower", &i__1, &z__1, &work[1], &c__1, &work[*n + 1], &c__1, & a[i + i * a_dim1], lda); /* L40: */ } /* Reduce number of subdiagonals to K */ i__1 = *n - 1 - *k; for (i = 1; i <= i__1; ++i) { /* generate reflection to annihilate A(k+i+1:n,i) */ i__2 = *n - *k - i + 1; wn = dznrm2_(&i__2, &a[*k + i + i * a_dim1], &c__1); d__1 = wn / z_abs(&a[*k + i + i * a_dim1]); i__2 = *k + i + i * a_dim1; z__1.r = d__1 * a[i__2].r, z__1.i = d__1 * a[i__2].i; wa.r = z__1.r, wa.i = z__1.i; if (wn == 0.) { tau.r = 0., tau.i = 0.; } else { i__2 = *k + i + i * a_dim1; z__1.r = a[i__2].r + wa.r, z__1.i = a[i__2].i + wa.i; wb.r = z__1.r, wb.i = z__1.i; i__2 = *n - *k - i; z_div(&z__1, &c_b2, &wb); zscal_(&i__2, &z__1, &a[*k + i + 1 + i * a_dim1], &c__1); i__2 = *k + i + i * a_dim1; a[i__2].r = 1., a[i__2].i = 0.; z_div(&z__1, &wb, &wa); d__1 = z__1.r; tau.r = d__1, tau.i = 0.; } /* apply reflection to A(k+i:n,i+1:k+i-1) from the left */ i__2 = *n - *k - i + 1; i__3 = *k - 1; zgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a[*k + i + (i + 1) * a_dim1], lda, &a[*k + i + i * a_dim1], &c__1, &c_b1, &work[ 1], &c__1); i__2 = *n - *k - i + 1; i__3 = *k - 1; z__1.r = -tau.r, z__1.i = -tau.i; zgerc_(&i__2, &i__3, &z__1, &a[*k + i + i * a_dim1], &c__1, &work[1], &c__1, &a[*k + i + (i + 1) * a_dim1], lda); /* apply reflection to A(k+i:n,k+i:n) from the left and the rig ht compute y := tau * A * u */ i__2 = *n - *k - i + 1; zhemv_("Lower", &i__2, &tau, &a[*k + i + (*k + i) * a_dim1], lda, &a[* k + i + i * a_dim1], &c__1, &c_b1, &work[1], &c__1); /* compute v := y - 1/2 * tau * ( y, u ) * u */ z__3.r = -.5, z__3.i = 0.; z__2.r = z__3.r * tau.r - z__3.i * tau.i, z__2.i = z__3.r * tau.i + z__3.i * tau.r; i__2 = *n - *k - i + 1; zdotc_(&z__4, &i__2, &work[1], &c__1, &a[*k + i + i * a_dim1], &c__1); z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * z__4.i + z__2.i * z__4.r; alpha.r = z__1.r, alpha.i = z__1.i; i__2 = *n - *k - i + 1; zaxpy_(&i__2, &alpha, &a[*k + i + i * a_dim1], &c__1, &work[1], &c__1) ; /* apply hermitian rank-2 update to A(k+i:n,k+i:n) */ i__2 = *n - *k - i + 1; z__1.r = -1., z__1.i = 0.; zher2_("Lower", &i__2, &z__1, &a[*k + i + i * a_dim1], &c__1, &work[1] , &c__1, &a[*k + i + (*k + i) * a_dim1], lda); i__2 = *k + i + i * a_dim1; z__1.r = -wa.r, z__1.i = -wa.i; a[i__2].r = z__1.r, a[i__2].i = z__1.i; i__2 = *n; for (j = *k + i + 1; j <= i__2; ++j) { i__3 = j + i * a_dim1; a[i__3].r = 0., a[i__3].i = 0.; /* L50: */ } /* L60: */ } /* Store full hermitian matrix */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i = j + 1; i <= i__2; ++i) { i__3 = j + i * a_dim1; d_cnjg(&z__1, &a[i + j * a_dim1]); a[i__3].r = z__1.r, a[i__3].i = z__1.i; /* L70: */ } /* L80: */ } return 0; /* End of ZLAGHE */ } /* zlaghe_ */
/* Subroutine */ int zlahef_(char *uplo, integer *n, integer *nb, integer *kb, doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *w, integer *ldw, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZLAHEF computes a partial factorization of a complex Hermitian matrix A using the Bunch-Kaufman diagonal pivoting method. The partial factorization has the form: A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: ( 0 U22 ) ( 0 D ) ( U12' U22' ) A = ( L11 0 ) ( D 0 ) ( L11' L21' ) if UPLO = 'L' ( L21 I ) ( 0 A22 ) ( 0 I ) where the order of D is at most NB. The actual order is returned in the argument KB, and is either NB or NB-1, or N if N <= NB. Note that U' denotes the conjugate transpose of U. ZLAHEF is an auxiliary routine called by ZHETRF. It uses blocked code (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the upper or lower triangular part of the Hermitian matrix A is stored: = 'U': Upper triangular = 'L': Lower triangular N (input) INTEGER The order of the matrix A. N >= 0. NB (input) INTEGER The maximum number of columns of the matrix A that should be factored. NB should be at least 2 to allow for 2-by-2 pivot blocks. KB (output) INTEGER The number of columns of A that were actually factored. KB is either NB-1 or NB, or N if N <= NB. A (input/output) COMPLEX*16 array, dimension (LDA,N) On entry, the Hermitian matrix A. If UPLO = 'U', the leading n-by-n upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading n-by-n lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, A contains details of the partial factorization. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). IPIV (output) INTEGER array, dimension (N) Details of the interchanges and the block structure of D. If UPLO = 'U', only the last KB elements of IPIV are set; if UPLO = 'L', only the first KB elements are set. If IPIV(k) > 0, then rows and columns k and IPIV(k) were interchanged and D(k,k) is a 1-by-1 diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. W (workspace) COMPLEX*16 array, dimension (LDW,NB) LDW (input) INTEGER The leading dimension of the array W. LDW >= max(1,N). INFO (output) INTEGER = 0: successful exit > 0: if INFO = k, D(k,k) is exactly zero. The factorization has been completed, but the block diagonal matrix D is exactly singular. ===================================================================== Parameter adjustments */ /* Table of constant values */ static doublecomplex c_b1 = {1.,0.}; static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2, d__3, d__4; doublecomplex z__1, z__2, z__3, z__4; /* Builtin functions */ double sqrt(doublereal), d_imag(doublecomplex *); void d_cnjg(doublecomplex *, doublecomplex *), z_div(doublecomplex *, doublecomplex *, doublecomplex *); /* Local variables */ static integer imax, jmax, j, k; static doublereal t, alpha; extern logical lsame_(char *, char *); extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *); static integer kstep; extern /* Subroutine */ int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *); static doublereal r1; extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); static doublecomplex d11, d21, d22; static integer jb, jj, kk, jp, kp; static doublereal absakk; static integer kw; extern /* Subroutine */ int zdscal_(integer *, doublereal *, doublecomplex *, integer *); static doublereal colmax; extern /* Subroutine */ int zlacgv_(integer *, doublecomplex *, integer *) ; extern integer izamax_(integer *, doublecomplex *, integer *); static doublereal rowmax; static integer kkw; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define w_subscr(a_1,a_2) (a_2)*w_dim1 + a_1 #define w_ref(a_1,a_2) w[w_subscr(a_1,a_2)] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --ipiv; w_dim1 = *ldw; w_offset = 1 + w_dim1 * 1; w -= w_offset; /* Function Body */ *info = 0; /* Initialize ALPHA for use in choosing pivot block size. */ alpha = (sqrt(17.) + 1.) / 8.; if (lsame_(uplo, "U")) { /* Factorize the trailing columns of A using the upper triangle of A and working backwards, and compute the matrix W = U12*D for use in updating A11 (note that conjg(W) is actually stored) K is the main loop index, decreasing from N in steps of 1 or 2 KW is the column of W which corresponds to column K of A */ k = *n; L10: kw = *nb + k - *n; /* Exit from loop */ if (k <= *n - *nb + 1 && *nb < *n || k < 1) { goto L30; } /* Copy column K of A to column KW of W and update it */ i__1 = k - 1; zcopy_(&i__1, &a_ref(1, k), &c__1, &w_ref(1, kw), &c__1); i__1 = w_subscr(k, kw); i__2 = a_subscr(k, k); d__1 = a[i__2].r; w[i__1].r = d__1, w[i__1].i = 0.; if (k < *n) { i__1 = *n - k; z__1.r = -1., z__1.i = 0.; zgemv_("No transpose", &k, &i__1, &z__1, &a_ref(1, k + 1), lda, & w_ref(k, kw + 1), ldw, &c_b1, &w_ref(1, kw), &c__1); i__1 = w_subscr(k, kw); i__2 = w_subscr(k, kw); d__1 = w[i__2].r; w[i__1].r = d__1, w[i__1].i = 0.; } kstep = 1; /* Determine rows and columns to be interchanged and whether a 1-by-1 or 2-by-2 pivot block will be used */ i__1 = w_subscr(k, kw); absakk = (d__1 = w[i__1].r, abs(d__1)); /* IMAX is the row-index of the largest off-diagonal element in column K, and COLMAX is its absolute value */ if (k > 1) { i__1 = k - 1; imax = izamax_(&i__1, &w_ref(1, kw), &c__1); i__1 = w_subscr(imax, kw); colmax = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w_ref( imax, kw)), abs(d__2)); } else { colmax = 0.; } if (max(absakk,colmax) == 0.) { /* Column K is zero: set INFO and continue */ if (*info == 0) { *info = k; } kp = k; i__1 = a_subscr(k, k); i__2 = a_subscr(k, k); d__1 = a[i__2].r; a[i__1].r = d__1, a[i__1].i = 0.; } else { if (absakk >= alpha * colmax) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else { /* Copy column IMAX to column KW-1 of W and update it */ i__1 = imax - 1; zcopy_(&i__1, &a_ref(1, imax), &c__1, &w_ref(1, kw - 1), & c__1); i__1 = w_subscr(imax, kw - 1); i__2 = a_subscr(imax, imax); d__1 = a[i__2].r; w[i__1].r = d__1, w[i__1].i = 0.; i__1 = k - imax; zcopy_(&i__1, &a_ref(imax, imax + 1), lda, &w_ref(imax + 1, kw - 1), &c__1); i__1 = k - imax; zlacgv_(&i__1, &w_ref(imax + 1, kw - 1), &c__1); if (k < *n) { i__1 = *n - k; z__1.r = -1., z__1.i = 0.; zgemv_("No transpose", &k, &i__1, &z__1, &a_ref(1, k + 1), lda, &w_ref(imax, kw + 1), ldw, &c_b1, &w_ref(1, kw - 1), &c__1); i__1 = w_subscr(imax, kw - 1); i__2 = w_subscr(imax, kw - 1); d__1 = w[i__2].r; w[i__1].r = d__1, w[i__1].i = 0.; } /* JMAX is the column-index of the largest off-diagonal element in row IMAX, and ROWMAX is its absolute value */ i__1 = k - imax; jmax = imax + izamax_(&i__1, &w_ref(imax + 1, kw - 1), &c__1); i__1 = w_subscr(jmax, kw - 1); rowmax = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(& w_ref(jmax, kw - 1)), abs(d__2)); if (imax > 1) { i__1 = imax - 1; jmax = izamax_(&i__1, &w_ref(1, kw - 1), &c__1); /* Computing MAX */ i__1 = w_subscr(jmax, kw - 1); d__3 = rowmax, d__4 = (d__1 = w[i__1].r, abs(d__1)) + ( d__2 = d_imag(&w_ref(jmax, kw - 1)), abs(d__2)); rowmax = max(d__3,d__4); } if (absakk >= alpha * colmax * (colmax / rowmax)) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else /* if(complicated condition) */ { i__1 = w_subscr(imax, kw - 1); if ((d__1 = w[i__1].r, abs(d__1)) >= alpha * rowmax) { /* interchange rows and columns K and IMAX, use 1-by-1 pivot block */ kp = imax; /* copy column KW-1 of W to column KW */ zcopy_(&k, &w_ref(1, kw - 1), &c__1, &w_ref(1, kw), & c__1); } else { /* interchange rows and columns K-1 and IMAX, use 2-by-2 pivot block */ kp = imax; kstep = 2; } } } kk = k - kstep + 1; kkw = *nb + kk - *n; /* Updated column KP is already stored in column KKW of W */ if (kp != kk) { /* Copy non-updated column KK to column KP */ i__1 = a_subscr(kp, kp); i__2 = a_subscr(kk, kk); d__1 = a[i__2].r; a[i__1].r = d__1, a[i__1].i = 0.; i__1 = kk - 1 - kp; zcopy_(&i__1, &a_ref(kp + 1, kk), &c__1, &a_ref(kp, kp + 1), lda); i__1 = kk - 1 - kp; zlacgv_(&i__1, &a_ref(kp, kp + 1), lda); i__1 = kp - 1; zcopy_(&i__1, &a_ref(1, kk), &c__1, &a_ref(1, kp), &c__1); /* Interchange rows KK and KP in last KK columns of A and W */ if (kk < *n) { i__1 = *n - kk; zswap_(&i__1, &a_ref(kk, kk + 1), lda, &a_ref(kp, kk + 1), lda); } i__1 = *n - kk + 1; zswap_(&i__1, &w_ref(kk, kkw), ldw, &w_ref(kp, kkw), ldw); } if (kstep == 1) { /* 1-by-1 pivot block D(k): column KW of W now holds W(k) = U(k)*D(k) where U(k) is the k-th column of U Store U(k) in column k of A */ zcopy_(&k, &w_ref(1, kw), &c__1, &a_ref(1, k), &c__1); i__1 = a_subscr(k, k); r1 = 1. / a[i__1].r; i__1 = k - 1; zdscal_(&i__1, &r1, &a_ref(1, k), &c__1); /* Conjugate W(k) */ i__1 = k - 1; zlacgv_(&i__1, &w_ref(1, kw), &c__1); } else { /* 2-by-2 pivot block D(k): columns KW and KW-1 of W now hold ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) where U(k) and U(k-1) are the k-th and (k-1)-th columns of U */ if (k > 2) { /* Store U(k) and U(k-1) in columns k and k-1 of A */ i__1 = w_subscr(k - 1, kw); d21.r = w[i__1].r, d21.i = w[i__1].i; d_cnjg(&z__2, &d21); z_div(&z__1, &w_ref(k, kw), &z__2); d11.r = z__1.r, d11.i = z__1.i; z_div(&z__1, &w_ref(k - 1, kw - 1), &d21); d22.r = z__1.r, d22.i = z__1.i; z__1.r = d11.r * d22.r - d11.i * d22.i, z__1.i = d11.r * d22.i + d11.i * d22.r; t = 1. / (z__1.r - 1.); z__2.r = t, z__2.i = 0.; z_div(&z__1, &z__2, &d21); d21.r = z__1.r, d21.i = z__1.i; i__1 = k - 2; for (j = 1; j <= i__1; ++j) { i__2 = a_subscr(j, k - 1); i__3 = w_subscr(j, kw - 1); z__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i, z__3.i = d11.r * w[i__3].i + d11.i * w[i__3] .r; i__4 = w_subscr(j, kw); z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4] .i; z__1.r = d21.r * z__2.r - d21.i * z__2.i, z__1.i = d21.r * z__2.i + d21.i * z__2.r; a[i__2].r = z__1.r, a[i__2].i = z__1.i; i__2 = a_subscr(j, k); d_cnjg(&z__2, &d21); i__3 = w_subscr(j, kw); z__4.r = d22.r * w[i__3].r - d22.i * w[i__3].i, z__4.i = d22.r * w[i__3].i + d22.i * w[i__3] .r; i__4 = w_subscr(j, kw - 1); z__3.r = z__4.r - w[i__4].r, z__3.i = z__4.i - w[i__4] .i; z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i = z__2.r * z__3.i + z__2.i * z__3.r; a[i__2].r = z__1.r, a[i__2].i = z__1.i; /* L20: */ } } /* Copy D(k) to A */ i__1 = a_subscr(k - 1, k - 1); i__2 = w_subscr(k - 1, kw - 1); a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; i__1 = a_subscr(k - 1, k); i__2 = w_subscr(k - 1, kw); a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; i__1 = a_subscr(k, k); i__2 = w_subscr(k, kw); a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; /* Conjugate W(k) and W(k-1) */ i__1 = k - 1; zlacgv_(&i__1, &w_ref(1, kw), &c__1); i__1 = k - 2; zlacgv_(&i__1, &w_ref(1, kw - 1), &c__1); } } /* Store details of the interchanges in IPIV */ if (kstep == 1) { ipiv[k] = kp; } else { ipiv[k] = -kp; ipiv[k - 1] = -kp; } /* Decrease K and return to the start of the main loop */ k -= kstep; goto L10; L30: /* Update the upper triangle of A11 (= A(1:k,1:k)) as A11 := A11 - U12*D*U12' = A11 - U12*W' computing blocks of NB columns at a time (note that conjg(W) is actually stored) */ i__1 = -(*nb); for (j = (k - 1) / *nb * *nb + 1; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { /* Computing MIN */ i__2 = *nb, i__3 = k - j + 1; jb = min(i__2,i__3); /* Update the upper triangle of the diagonal block */ i__2 = j + jb - 1; for (jj = j; jj <= i__2; ++jj) { i__3 = a_subscr(jj, jj); i__4 = a_subscr(jj, jj); d__1 = a[i__4].r; a[i__3].r = d__1, a[i__3].i = 0.; i__3 = jj - j + 1; i__4 = *n - k; z__1.r = -1., z__1.i = 0.; zgemv_("No transpose", &i__3, &i__4, &z__1, &a_ref(j, k + 1), lda, &w_ref(jj, kw + 1), ldw, &c_b1, &a_ref(j, jj), & c__1); i__3 = a_subscr(jj, jj); i__4 = a_subscr(jj, jj); d__1 = a[i__4].r; a[i__3].r = d__1, a[i__3].i = 0.; /* L40: */ } /* Update the rectangular superdiagonal block */ i__2 = j - 1; i__3 = *n - k; z__1.r = -1., z__1.i = 0.; zgemm_("No transpose", "Transpose", &i__2, &jb, &i__3, &z__1, & a_ref(1, k + 1), lda, &w_ref(j, kw + 1), ldw, &c_b1, & a_ref(1, j), lda); /* L50: */ } /* Put U12 in standard form by partially undoing the interchanges in columns k+1:n */ j = k + 1; L60: jj = j; jp = ipiv[j]; if (jp < 0) { jp = -jp; ++j; } ++j; if (jp != jj && j <= *n) { i__1 = *n - j + 1; zswap_(&i__1, &a_ref(jp, j), lda, &a_ref(jj, j), lda); } if (j <= *n) { goto L60; } /* Set KB to the number of columns factorized */ *kb = *n - k; } else { /* Factorize the leading columns of A using the lower triangle of A and working forwards, and compute the matrix W = L21*D for use in updating A22 (note that conjg(W) is actually stored) K is the main loop index, increasing from 1 in steps of 1 or 2 */ k = 1; L70: /* Exit from loop */ if (k >= *nb && *nb < *n || k > *n) { goto L90; } /* Copy column K of A to column K of W and update it */ i__1 = w_subscr(k, k); i__2 = a_subscr(k, k); d__1 = a[i__2].r; w[i__1].r = d__1, w[i__1].i = 0.; if (k < *n) { i__1 = *n - k; zcopy_(&i__1, &a_ref(k + 1, k), &c__1, &w_ref(k + 1, k), &c__1); } i__1 = *n - k + 1; i__2 = k - 1; z__1.r = -1., z__1.i = 0.; zgemv_("No transpose", &i__1, &i__2, &z__1, &a_ref(k, 1), lda, &w_ref( k, 1), ldw, &c_b1, &w_ref(k, k), &c__1); i__1 = w_subscr(k, k); i__2 = w_subscr(k, k); d__1 = w[i__2].r; w[i__1].r = d__1, w[i__1].i = 0.; kstep = 1; /* Determine rows and columns to be interchanged and whether a 1-by-1 or 2-by-2 pivot block will be used */ i__1 = w_subscr(k, k); absakk = (d__1 = w[i__1].r, abs(d__1)); /* IMAX is the row-index of the largest off-diagonal element in column K, and COLMAX is its absolute value */ if (k < *n) { i__1 = *n - k; imax = k + izamax_(&i__1, &w_ref(k + 1, k), &c__1); i__1 = w_subscr(imax, k); colmax = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(&w_ref( imax, k)), abs(d__2)); } else { colmax = 0.; } if (max(absakk,colmax) == 0.) { /* Column K is zero: set INFO and continue */ if (*info == 0) { *info = k; } kp = k; i__1 = a_subscr(k, k); i__2 = a_subscr(k, k); d__1 = a[i__2].r; a[i__1].r = d__1, a[i__1].i = 0.; } else { if (absakk >= alpha * colmax) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else { /* Copy column IMAX to column K+1 of W and update it */ i__1 = imax - k; zcopy_(&i__1, &a_ref(imax, k), lda, &w_ref(k, k + 1), &c__1); i__1 = imax - k; zlacgv_(&i__1, &w_ref(k, k + 1), &c__1); i__1 = w_subscr(imax, k + 1); i__2 = a_subscr(imax, imax); d__1 = a[i__2].r; w[i__1].r = d__1, w[i__1].i = 0.; if (imax < *n) { i__1 = *n - imax; zcopy_(&i__1, &a_ref(imax + 1, imax), &c__1, &w_ref(imax + 1, k + 1), &c__1); } i__1 = *n - k + 1; i__2 = k - 1; z__1.r = -1., z__1.i = 0.; zgemv_("No transpose", &i__1, &i__2, &z__1, &a_ref(k, 1), lda, &w_ref(imax, 1), ldw, &c_b1, &w_ref(k, k + 1), &c__1); i__1 = w_subscr(imax, k + 1); i__2 = w_subscr(imax, k + 1); d__1 = w[i__2].r; w[i__1].r = d__1, w[i__1].i = 0.; /* JMAX is the column-index of the largest off-diagonal element in row IMAX, and ROWMAX is its absolute value */ i__1 = imax - k; jmax = k - 1 + izamax_(&i__1, &w_ref(k, k + 1), &c__1); i__1 = w_subscr(jmax, k + 1); rowmax = (d__1 = w[i__1].r, abs(d__1)) + (d__2 = d_imag(& w_ref(jmax, k + 1)), abs(d__2)); if (imax < *n) { i__1 = *n - imax; jmax = imax + izamax_(&i__1, &w_ref(imax + 1, k + 1), & c__1); /* Computing MAX */ i__1 = w_subscr(jmax, k + 1); d__3 = rowmax, d__4 = (d__1 = w[i__1].r, abs(d__1)) + ( d__2 = d_imag(&w_ref(jmax, k + 1)), abs(d__2)); rowmax = max(d__3,d__4); } if (absakk >= alpha * colmax * (colmax / rowmax)) { /* no interchange, use 1-by-1 pivot block */ kp = k; } else /* if(complicated condition) */ { i__1 = w_subscr(imax, k + 1); if ((d__1 = w[i__1].r, abs(d__1)) >= alpha * rowmax) { /* interchange rows and columns K and IMAX, use 1-by-1 pivot block */ kp = imax; /* copy column K+1 of W to column K */ i__1 = *n - k + 1; zcopy_(&i__1, &w_ref(k, k + 1), &c__1, &w_ref(k, k), & c__1); } else { /* interchange rows and columns K+1 and IMAX, use 2-by-2 pivot block */ kp = imax; kstep = 2; } } } kk = k + kstep - 1; /* Updated column KP is already stored in column KK of W */ if (kp != kk) { /* Copy non-updated column KK to column KP */ i__1 = a_subscr(kp, kp); i__2 = a_subscr(kk, kk); d__1 = a[i__2].r; a[i__1].r = d__1, a[i__1].i = 0.; i__1 = kp - kk - 1; zcopy_(&i__1, &a_ref(kk + 1, kk), &c__1, &a_ref(kp, kk + 1), lda); i__1 = kp - kk - 1; zlacgv_(&i__1, &a_ref(kp, kk + 1), lda); if (kp < *n) { i__1 = *n - kp; zcopy_(&i__1, &a_ref(kp + 1, kk), &c__1, &a_ref(kp + 1, kp), &c__1); } /* Interchange rows KK and KP in first KK columns of A and W */ i__1 = kk - 1; zswap_(&i__1, &a_ref(kk, 1), lda, &a_ref(kp, 1), lda); zswap_(&kk, &w_ref(kk, 1), ldw, &w_ref(kp, 1), ldw); } if (kstep == 1) { /* 1-by-1 pivot block D(k): column k of W now holds W(k) = L(k)*D(k) where L(k) is the k-th column of L Store L(k) in column k of A */ i__1 = *n - k + 1; zcopy_(&i__1, &w_ref(k, k), &c__1, &a_ref(k, k), &c__1); if (k < *n) { i__1 = a_subscr(k, k); r1 = 1. / a[i__1].r; i__1 = *n - k; zdscal_(&i__1, &r1, &a_ref(k + 1, k), &c__1); /* Conjugate W(k) */ i__1 = *n - k; zlacgv_(&i__1, &w_ref(k + 1, k), &c__1); } } else { /* 2-by-2 pivot block D(k): columns k and k+1 of W now hold ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) where L(k) and L(k+1) are the k-th and (k+1)-th columns of L */ if (k < *n - 1) { /* Store L(k) and L(k+1) in columns k and k+1 of A */ i__1 = w_subscr(k + 1, k); d21.r = w[i__1].r, d21.i = w[i__1].i; z_div(&z__1, &w_ref(k + 1, k + 1), &d21); d11.r = z__1.r, d11.i = z__1.i; d_cnjg(&z__2, &d21); z_div(&z__1, &w_ref(k, k), &z__2); d22.r = z__1.r, d22.i = z__1.i; z__1.r = d11.r * d22.r - d11.i * d22.i, z__1.i = d11.r * d22.i + d11.i * d22.r; t = 1. / (z__1.r - 1.); z__2.r = t, z__2.i = 0.; z_div(&z__1, &z__2, &d21); d21.r = z__1.r, d21.i = z__1.i; i__1 = *n; for (j = k + 2; j <= i__1; ++j) { i__2 = a_subscr(j, k); d_cnjg(&z__2, &d21); i__3 = w_subscr(j, k); z__4.r = d11.r * w[i__3].r - d11.i * w[i__3].i, z__4.i = d11.r * w[i__3].i + d11.i * w[i__3] .r; i__4 = w_subscr(j, k + 1); z__3.r = z__4.r - w[i__4].r, z__3.i = z__4.i - w[i__4] .i; z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i = z__2.r * z__3.i + z__2.i * z__3.r; a[i__2].r = z__1.r, a[i__2].i = z__1.i; i__2 = a_subscr(j, k + 1); i__3 = w_subscr(j, k + 1); z__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i, z__3.i = d22.r * w[i__3].i + d22.i * w[i__3] .r; i__4 = w_subscr(j, k); z__2.r = z__3.r - w[i__4].r, z__2.i = z__3.i - w[i__4] .i; z__1.r = d21.r * z__2.r - d21.i * z__2.i, z__1.i = d21.r * z__2.i + d21.i * z__2.r; a[i__2].r = z__1.r, a[i__2].i = z__1.i; /* L80: */ } } /* Copy D(k) to A */ i__1 = a_subscr(k, k); i__2 = w_subscr(k, k); a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; i__1 = a_subscr(k + 1, k); i__2 = w_subscr(k + 1, k); a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; i__1 = a_subscr(k + 1, k + 1); i__2 = w_subscr(k + 1, k + 1); a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i; /* Conjugate W(k) and W(k+1) */ i__1 = *n - k; zlacgv_(&i__1, &w_ref(k + 1, k), &c__1); i__1 = *n - k - 1; zlacgv_(&i__1, &w_ref(k + 2, k + 1), &c__1); } } /* Store details of the interchanges in IPIV */ if (kstep == 1) { ipiv[k] = kp; } else { ipiv[k] = -kp; ipiv[k + 1] = -kp; } /* Increase K and return to the start of the main loop */ k += kstep; goto L70; L90: /* Update the lower triangle of A22 (= A(k:n,k:n)) as A22 := A22 - L21*D*L21' = A22 - L21*W' computing blocks of NB columns at a time (note that conjg(W) is actually stored) */ i__1 = *n; i__2 = *nb; for (j = k; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Computing MIN */ i__3 = *nb, i__4 = *n - j + 1; jb = min(i__3,i__4); /* Update the lower triangle of the diagonal block */ i__3 = j + jb - 1; for (jj = j; jj <= i__3; ++jj) { i__4 = a_subscr(jj, jj); i__5 = a_subscr(jj, jj); d__1 = a[i__5].r; a[i__4].r = d__1, a[i__4].i = 0.; i__4 = j + jb - jj; i__5 = k - 1; z__1.r = -1., z__1.i = 0.; zgemv_("No transpose", &i__4, &i__5, &z__1, &a_ref(jj, 1), lda, &w_ref(jj, 1), ldw, &c_b1, &a_ref(jj, jj), &c__1); i__4 = a_subscr(jj, jj); i__5 = a_subscr(jj, jj); d__1 = a[i__5].r; a[i__4].r = d__1, a[i__4].i = 0.; /* L100: */ } /* Update the rectangular subdiagonal block */ if (j + jb <= *n) { i__3 = *n - j - jb + 1; i__4 = k - 1; z__1.r = -1., z__1.i = 0.; zgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &z__1, &a_ref(j + jb, 1), lda, &w_ref(j, 1), ldw, &c_b1, & a_ref(j + jb, j), lda); } /* L110: */ } /* Put L21 in standard form by partially undoing the interchanges in columns 1:k-1 */ j = k - 1; L120: jj = j; jp = ipiv[j]; if (jp < 0) { jp = -jp; --j; } --j; if (jp != jj && j >= 1) { zswap_(&j, &a_ref(jp, 1), lda, &a_ref(jj, 1), lda); } if (j >= 1) { goto L120; } /* Set KB to the number of columns factorized */ *kb = k - 1; } return 0; /* End of ZLAHEF */ } /* zlahef_ */
/* ----------------------------------------------------------------------| */ /* Subroutine */ int zgexpv(integer *n, integer *m, doublereal *t, doublecomplex *v, doublecomplex *w, doublereal *tol, doublereal * anorm, doublecomplex *wsp, integer *lwsp, integer *iwsp, integer * liwsp, S_fp matvec, void *matvecdata, integer *itrace, integer *iflag) { /* System generated locals */ integer i__1, i__2, i__3; doublereal d__1; complex q__1; doublecomplex z__1; /* Builtin functions */ /* Subroutine */ int s_stop(char *, ftnlen); double sqrt(doublereal), d_sign(doublereal *, doublereal *), pow_di( doublereal *, integer *), pow_dd(doublereal *, doublereal *), d_lg10(doublereal *); integer i_dnnt(doublereal *); double d_int(doublereal *); integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(); double z_abs(doublecomplex *); /* Local variables */ static integer ibrkflag; static doublereal step_min__, step_max__; static integer i__, j; static doublereal break_tol__; static integer k1; static doublereal p1, p2, p3; static integer ih, mh, iv, ns, mx; static doublereal xm; static integer j1v; static doublecomplex hij; static doublereal sgn, eps, hj1j, sqr1, beta, hump; static integer ifree, lfree; static doublereal t_old__; static integer iexph; static doublereal t_new__; static integer nexph; extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); static doublereal t_now__; extern /* Subroutine */ int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen); static integer nstep; static doublereal t_out__; static integer nmult; static doublereal vnorm; extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), zaxpy_(integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *); extern doublereal dznrm2_(integer *, doublecomplex *, integer *); static integer nscale; static doublereal rndoff; extern /* Subroutine */ int zdscal_(integer *, doublereal *, doublecomplex *, integer *), zgpadm_(integer *, integer *, doublereal *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, integer *, integer *), znchbv_( integer *, doublereal *, doublecomplex *, integer *, doublecomplex *, doublecomplex *); static doublereal t_step__, avnorm; static integer ireject; static doublereal err_loc__; static integer nreject, mbrkdwn; static doublereal tbrkdwn, s_error__, x_error__; /* Fortran I/O blocks */ static cilist io___40 = { 0, 6, 0, 0, 0 }; static cilist io___48 = { 0, 6, 0, 0, 0 }; static cilist io___49 = { 0, 6, 0, 0, 0 }; static cilist io___50 = { 0, 6, 0, 0, 0 }; static cilist io___51 = { 0, 6, 0, 0, 0 }; static cilist io___52 = { 0, 6, 0, 0, 0 }; static cilist io___53 = { 0, 6, 0, 0, 0 }; static cilist io___54 = { 0, 6, 0, 0, 0 }; static cilist io___55 = { 0, 6, 0, 0, 0 }; static cilist io___56 = { 0, 6, 0, 0, 0 }; static cilist io___57 = { 0, 6, 0, 0, 0 }; static cilist io___58 = { 0, 6, 0, 0, 0 }; static cilist io___59 = { 0, 6, 0, 0, 0 }; /* -----Purpose----------------------------------------------------------| */ /* --- ZGEXPV computes w = exp(t*A)*v */ /* for a Zomplex (i.e., complex double precision) matrix A */ /* It does not compute the matrix exponential in isolation but */ /* instead, it computes directly the action of the exponential */ /* operator on the operand vector. This way of doing so allows */ /* for addressing large sparse problems. */ /* The method used is based on Krylov subspace projection */ /* techniques and the matrix under consideration interacts only */ /* via the external routine `matvec' performing the matrix-vector */ /* product (matrix-free method). */ /* -----Arguments--------------------------------------------------------| */ /* n : (input) order of the principal matrix A. */ /* m : (input) maximum size for the Krylov basis. */ /* t : (input) time at wich the solution is needed (can be < 0). */ /* v(n) : (input) given operand vector. */ /* w(n) : (output) computed approximation of exp(t*A)*v. */ /* tol : (input/output) the requested accuracy tolerance on w. */ /* If on input tol=0.0d0 or tol is too small (tol.le.eps) */ /* the internal value sqrt(eps) is used, and tol is set to */ /* sqrt(eps) on output (`eps' denotes the machine epsilon). */ /* (`Happy breakdown' is assumed if h(j+1,j) .le. anorm*tol) */ /* anorm : (input) an approximation of some norm of A. */ /* wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+2)^2+4*(m+2)^2+ideg+1 */ /* +---------+-------+---------------+ */ /* (actually, ideg=6) V H wsp for PADE */ /* iwsp(liwsp): (workspace) liwsp .ge. m+2 */ /* matvec : external subroutine for matrix-vector multiplication. */ /* synopsis: matvec( x, y ) */ /* complex*16 x(*), y(*) */ /* computes: y(1:n) <- A*x(1:n) */ /* where A is the principal matrix. */ /* itrace : (input) running mode. 0=silent, 1=print step-by-step info */ /* iflag : (output) exit flag. */ /* <0 - bad input arguments */ /* 0 - no problem */ /* 1 - maximum number of steps reached without convergence */ /* 2 - requested tolerance was too high */ /* -----Accounts on the computation--------------------------------------| */ /* Upon exit, an interested user may retrieve accounts on the */ /* computations. They are located in the workspace arrays wsp and */ /* iwsp as indicated below: */ /* location mnemonic description */ /* -----------------------------------------------------------------| */ /* iwsp(1) = nmult, number of matrix-vector multiplications used */ /* iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated */ /* iwsp(3) = nscale, number of repeated squaring involved in Pade */ /* iwsp(4) = nstep, number of integration steps used up to completion */ /* iwsp(5) = nreject, number of rejected step-sizes */ /* iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and 0 otherwise */ /* iwsp(7) = mbrkdwn, if `happy brkdown', basis-size when it occured */ /* -----------------------------------------------------------------| */ /* wsp(1) = step_min, minimum step-size used during integration */ /* wsp(2) = step_max, maximum step-size used during integration */ /* wsp(3) = x_round, maximum among all roundoff errors (lower bound) */ /* wsp(4) = s_round, sum of roundoff errors (lower bound) */ /* wsp(5) = x_error, maximum among all local truncation errors */ /* wsp(6) = s_error, global sum of local truncation errors */ /* wsp(7) = tbrkdwn, if `happy breakdown', time when it occured */ /* wsp(8) = t_now, integration domain successfully covered */ /* wsp(9) = hump, i.e., max||exp(sA)||, s in [0,t] (or [t,0] if t<0) */ /* wsp(10) = ||w||/||v||, scaled norm of the solution w. */ /* -----------------------------------------------------------------| */ /* The `hump' is a measure of the conditioning of the problem. The */ /* matrix exponential is well-conditioned if hump = 1, whereas it is */ /* poorly-conditioned if hump >> 1. However the solution can still be */ /* relatively fairly accurate even when the hump is large (the hump */ /* is an upper bound), especially when the hump and the scaled norm */ /* of w [this is also computed and returned in wsp(10)] are of the */ /* same order of magnitude (further details in reference below). */ /* ----------------------------------------------------------------------| */ /* -----The following parameters may also be adjusted herein-------------| */ /* mxstep : maximum allowable number of integration steps. */ /* The value 0 means an infinite number of steps. */ /* mxreject: maximum allowable number of rejections at each step. */ /* The value 0 means an infinite number of rejections. */ /* ideg : the Pade approximation of type (ideg,ideg) is used as */ /* an approximation to exp(H). The value 0 switches to the */ /* uniform rational Chebyshev approximation of type (14,14) */ /* delta : local truncation error `safety factor' */ /* gamma : stepsize `shrinking factor' */ /* ----------------------------------------------------------------------| */ /* Roger B. Sidje ([email protected]) */ /* EXPOKIT: Software Package for Computing Matrix Exponentials. */ /* ACM - Transactions On Mathematical Software, 24(1):130-156, 1998 */ /* ----------------------------------------------------------------------| */ /* --- check restrictions on input parameters ... */ /* Parameter adjustments */ --w; --v; --wsp; --iwsp; /* Function Body */ *iflag = 0; /* Computing 2nd power */ i__1 = *m + 2; if (*lwsp < *n * (*m + 2) + i__1 * i__1 * 5 + 7) { *iflag = -1; } if (*liwsp < *m + 2) { *iflag = -2; } if (*m >= *n || *m <= 0) { *iflag = -3; } if (*iflag != 0) { s_stop("bad sizes (in input of ZGEXPV)", (ftnlen)30); } /* --- initialisations ... */ k1 = 2; mh = *m + 2; iv = 1; ih = iv + *n * (*m + 1) + *n; ifree = ih + mh * mh; lfree = *lwsp - ifree + 1; ibrkflag = 0; mbrkdwn = *m; nmult = 0; nreject = 0; nexph = 0; nscale = 0; t_out__ = abs(*t); tbrkdwn = 0.; step_min__ = t_out__; step_max__ = 0.; nstep = 0; s_error__ = 0.; x_error__ = 0.; t_now__ = 0.; t_new__ = 0.; p1 = 1.3333333333333333; L1: p2 = p1 - 1.; p3 = p2 + p2 + p2; eps = (d__1 = p3 - 1., abs(d__1)); if (eps == 0.) { goto L1; } if (*tol <= eps) { *tol = sqrt(eps); } rndoff = eps * *anorm; break_tol__ = 1e-7; /* >>> break_tol = tol */ /* >>> break_tol = anorm*tol */ sgn = d_sign(&c_b6, t); zcopy_(n, &v[1], &c__1, &w[1], &c__1); beta = dznrm2_(n, &w[1], &c__1); vnorm = beta; hump = beta; /* --- obtain the very first stepsize ... */ sqr1 = sqrt(.1); xm = 1. / (doublereal) (*m); d__1 = (*m + 1) / 2.72; i__1 = *m + 1; p2 = *tol * pow_di(&d__1, &i__1) * sqrt((*m + 1) * 6.2800000000000002); d__1 = p2 / (beta * 4. * *anorm); t_new__ = 1. / *anorm * pow_dd(&d__1, &xm); d__1 = d_lg10(&t_new__) - sqr1; i__1 = i_dnnt(&d__1) - 1; p1 = pow_di(&c_b10, &i__1); d__1 = t_new__ / p1 + .55; t_new__ = d_int(&d__1) * p1; /* --- step-by-step integration ... */ L100: if (t_now__ >= t_out__) { goto L500; } ++nstep; /* Computing MIN */ d__1 = t_out__ - t_now__; t_step__ = min(d__1,t_new__); p1 = 1. / beta; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iv + i__ - 1; i__3 = i__; z__1.r = p1 * w[i__3].r, z__1.i = p1 * w[i__3].i; wsp[i__2].r = z__1.r, wsp[i__2].i = z__1.i; } i__1 = mh * mh; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = ih + i__ - 1; wsp[i__2].r = 0., wsp[i__2].i = 0.; } /* --- Arnoldi loop ... */ j1v = iv + *n; i__1 = *m; for (j = 1; j <= i__1; ++j) { ++nmult; (*matvec)(matvecdata, &wsp[j1v - *n], &wsp[j1v]); i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { zdotc_(&z__1, n, &wsp[iv + (i__ - 1) * *n], &c__1, &wsp[j1v], & c__1); hij.r = z__1.r, hij.i = z__1.i; z__1.r = -hij.r, z__1.i = -hij.i; zaxpy_(n, &z__1, &wsp[iv + (i__ - 1) * *n], &c__1, &wsp[j1v], & c__1); i__3 = ih + (j - 1) * mh + i__ - 1; wsp[i__3].r = hij.r, wsp[i__3].i = hij.i; } hj1j = dznrm2_(n, &wsp[j1v], &c__1); /* --- if `happy breakdown' go straightforward at the end ... */ if (hj1j <= break_tol__) { s_wsle(&io___40); do_lio(&c__9, &c__1, "happy breakdown: mbrkdwn =", (ftnlen)26); do_lio(&c__3, &c__1, (char *)&j, (ftnlen)sizeof(integer)); do_lio(&c__9, &c__1, " h =", (ftnlen)4); do_lio(&c__5, &c__1, (char *)&hj1j, (ftnlen)sizeof(doublereal)); e_wsle(); k1 = 0; ibrkflag = 1; mbrkdwn = j; tbrkdwn = t_now__; t_step__ = t_out__ - t_now__; goto L300; } i__2 = ih + (j - 1) * mh + j; q__1.r = hj1j, q__1.i = (float)0.; wsp[i__2].r = q__1.r, wsp[i__2].i = q__1.i; d__1 = 1. / hj1j; zdscal_(n, &d__1, &wsp[j1v], &c__1); j1v += *n; /* L200: */ } ++nmult; (*matvec)(matvecdata, &wsp[j1v - *n], &wsp[j1v]); avnorm = dznrm2_(n, &wsp[j1v], &c__1); /* --- set 1 for the 2-corrected scheme ... */ L300: i__1 = ih + *m * mh + *m + 1; wsp[i__1].r = 1., wsp[i__1].i = 0.; /* --- loop while ireject<mxreject until the tolerance is reached ... */ ireject = 0; L401: /* --- compute w = beta*V*exp(t_step*H)*e1 ... */ ++nexph; mx = mbrkdwn + k1; if (TRUE_) { /* --- irreducible rational Pade approximation ... */ d__1 = sgn * t_step__; zgpadm_(&c__6, &mx, &d__1, &wsp[ih], &mh, &wsp[ifree], &lfree, &iwsp[ 1], &iexph, &ns, iflag); iexph = ifree + iexph - 1; nscale += ns; } else { /* --- uniform rational Chebyshev approximation ... */ iexph = ifree; i__1 = mx; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iexph + i__ - 1; wsp[i__2].r = 0., wsp[i__2].i = 0.; } i__1 = iexph; wsp[i__1].r = 1., wsp[i__1].i = 0.; d__1 = sgn * t_step__; znchbv_(&mx, &d__1, &wsp[ih], &mh, &wsp[iexph], &wsp[ifree + mx]); } /* L402: */ /* --- error estimate ... */ if (k1 == 0) { err_loc__ = *tol; } else { p1 = z_abs(&wsp[iexph + *m]) * beta; p2 = z_abs(&wsp[iexph + *m + 1]) * beta * avnorm; if (p1 > p2 * 10.) { err_loc__ = p2; xm = 1. / (doublereal) (*m); } else if (p1 > p2) { err_loc__ = p1 * p2 / (p1 - p2); xm = 1. / (doublereal) (*m); } else { err_loc__ = p1; xm = 1. / (doublereal) (*m - 1); } } /* --- reject the step-size if the error is not acceptable ... */ if (k1 != 0 && err_loc__ > t_step__ * 1.2 * *tol) { t_old__ = t_step__; d__1 = t_step__ * *tol / err_loc__; t_step__ = t_step__ * .9 * pow_dd(&d__1, &xm); d__1 = d_lg10(&t_step__) - sqr1; i__1 = i_dnnt(&d__1) - 1; p1 = pow_di(&c_b10, &i__1); d__1 = t_step__ / p1 + .55; t_step__ = d_int(&d__1) * p1; if (*itrace != 0) { s_wsle(&io___48); do_lio(&c__9, &c__1, "t_step =", (ftnlen)8); do_lio(&c__5, &c__1, (char *)&t_old__, (ftnlen)sizeof(doublereal)) ; e_wsle(); s_wsle(&io___49); do_lio(&c__9, &c__1, "err_loc =", (ftnlen)9); do_lio(&c__5, &c__1, (char *)&err_loc__, (ftnlen)sizeof( doublereal)); e_wsle(); s_wsle(&io___50); do_lio(&c__9, &c__1, "err_required =", (ftnlen)14); d__1 = t_old__ * 1.2 * *tol; do_lio(&c__5, &c__1, (char *)&d__1, (ftnlen)sizeof(doublereal)); e_wsle(); s_wsle(&io___51); do_lio(&c__9, &c__1, "stepsize rejected, stepping down to:", ( ftnlen)36); do_lio(&c__5, &c__1, (char *)&t_step__, (ftnlen)sizeof(doublereal) ); e_wsle(); } ++ireject; ++nreject; if (FALSE_) { s_wsle(&io___52); do_lio(&c__9, &c__1, "Failure in ZGEXPV: ---", (ftnlen)22); e_wsle(); s_wsle(&io___53); do_lio(&c__9, &c__1, "The requested tolerance is too high.", ( ftnlen)36); e_wsle(); s_wsle(&io___54); do_lio(&c__9, &c__1, "Rerun with a smaller value.", (ftnlen)27); e_wsle(); *iflag = 2; return 0; } goto L401; } /* --- now update w = beta*V*exp(t_step*H)*e1 and the hump ... */ /* Computing MAX */ i__1 = 0, i__2 = k1 - 1; mx = mbrkdwn + max(i__1,i__2); q__1.r = beta, q__1.i = (float)0.; hij.r = q__1.r, hij.i = q__1.i; zgemv_("n", n, &mx, &hij, &wsp[iv], n, &wsp[iexph], &c__1, &c_b1, &w[1], & c__1, (ftnlen)1); beta = dznrm2_(n, &w[1], &c__1); hump = max(hump,beta); /* --- suggested value for the next stepsize ... */ d__1 = t_step__ * *tol / err_loc__; t_new__ = t_step__ * .9 * pow_dd(&d__1, &xm); d__1 = d_lg10(&t_new__) - sqr1; i__1 = i_dnnt(&d__1) - 1; p1 = pow_di(&c_b10, &i__1); d__1 = t_new__ / p1 + .55; t_new__ = d_int(&d__1) * p1; err_loc__ = max(err_loc__,rndoff); /* --- update the time covered ... */ t_now__ += t_step__; /* --- display and keep some information ... */ if (*itrace != 0) { s_wsle(&io___55); do_lio(&c__9, &c__1, "integration", (ftnlen)11); do_lio(&c__3, &c__1, (char *)&nstep, (ftnlen)sizeof(integer)); do_lio(&c__9, &c__1, "---------------------------------", (ftnlen)33); e_wsle(); s_wsle(&io___56); do_lio(&c__9, &c__1, "scale-square =", (ftnlen)14); do_lio(&c__3, &c__1, (char *)&ns, (ftnlen)sizeof(integer)); e_wsle(); s_wsle(&io___57); do_lio(&c__9, &c__1, "step_size =", (ftnlen)11); do_lio(&c__5, &c__1, (char *)&t_step__, (ftnlen)sizeof(doublereal)); e_wsle(); s_wsle(&io___58); do_lio(&c__9, &c__1, "err_loc =", (ftnlen)11); do_lio(&c__5, &c__1, (char *)&err_loc__, (ftnlen)sizeof(doublereal)); e_wsle(); s_wsle(&io___59); do_lio(&c__9, &c__1, "next_step =", (ftnlen)11); do_lio(&c__5, &c__1, (char *)&t_new__, (ftnlen)sizeof(doublereal)); e_wsle(); } step_min__ = min(step_min__,t_step__); step_max__ = max(step_max__,t_step__); s_error__ += err_loc__; x_error__ = max(x_error__,err_loc__); if (nstep < 500) { goto L100; } *iflag = 1; L500: iwsp[1] = nmult; iwsp[2] = nexph; iwsp[3] = nscale; iwsp[4] = nstep; iwsp[5] = nreject; iwsp[6] = ibrkflag; iwsp[7] = mbrkdwn; q__1.r = step_min__, q__1.i = (float)0.; wsp[1].r = q__1.r, wsp[1].i = q__1.i; q__1.r = step_max__, q__1.i = (float)0.; wsp[2].r = q__1.r, wsp[2].i = q__1.i; wsp[3].r = (float)0., wsp[3].i = (float)0.; wsp[4].r = (float)0., wsp[4].i = (float)0.; q__1.r = x_error__, q__1.i = (float)0.; wsp[5].r = q__1.r, wsp[5].i = q__1.i; q__1.r = s_error__, q__1.i = (float)0.; wsp[6].r = q__1.r, wsp[6].i = q__1.i; q__1.r = tbrkdwn, q__1.i = (float)0.; wsp[7].r = q__1.r, wsp[7].i = q__1.i; d__1 = sgn * t_now__; q__1.r = d__1, q__1.i = (float)0.; wsp[8].r = q__1.r, wsp[8].i = q__1.i; d__1 = hump / vnorm; q__1.r = d__1, q__1.i = (float)0.; wsp[9].r = q__1.r, wsp[9].i = q__1.i; d__1 = beta / vnorm; q__1.r = d__1, q__1.i = (float)0.; wsp[10].r = q__1.r, wsp[10].i = q__1.i; return 0; } /* zgexpv_ */
int zlabrd_(int *m, int *n, int *nb, doublecomplex *a, int *lda, double *d__, double *e, doublecomplex *tauq, doublecomplex *taup, doublecomplex *x, int * ldx, doublecomplex *y, int *ldy) { /* System generated locals */ int a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, i__3; doublecomplex z__1; /* Local variables */ int i__; doublecomplex alpha; extern int zscal_(int *, doublecomplex *, doublecomplex *, int *), zgemv_(char *, int *, int *, doublecomplex *, doublecomplex *, int *, doublecomplex *, int *, doublecomplex *, doublecomplex *, int *), zlarfg_(int *, doublecomplex *, doublecomplex *, int *, doublecomplex *), zlacgv_(int *, doublecomplex *, int *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ZLABRD reduces the first NB rows and columns of a complex general */ /* m by n matrix A to upper or lower float bidiagonal form by a unitary */ /* transformation Q' * A * P, and returns the matrices X and Y which */ /* are needed to apply the transformation to the unreduced part of A. */ /* If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower */ /* bidiagonal form. */ /* This is an auxiliary routine called by ZGEBRD */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows in the matrix A. */ /* N (input) INTEGER */ /* The number of columns in the matrix A. */ /* NB (input) INTEGER */ /* The number of leading rows and columns of A to be reduced. */ /* A (input/output) COMPLEX*16 array, dimension (LDA,N) */ /* On entry, the m by n general matrix to be reduced. */ /* On exit, the first NB rows and columns of the matrix are */ /* overwritten; the rest of the array is unchanged. */ /* If m >= n, elements on and below the diagonal in the first NB */ /* columns, with the array TAUQ, represent the unitary */ /* matrix Q as a product of elementary reflectors; and */ /* elements above the diagonal in the first NB rows, with the */ /* array TAUP, represent the unitary matrix P as a product */ /* of elementary reflectors. */ /* If m < n, elements below the diagonal in the first NB */ /* columns, with the array TAUQ, represent the unitary */ /* matrix Q as a product of elementary reflectors, and */ /* elements on and above the diagonal in the first NB rows, */ /* with the array TAUP, represent the unitary matrix P as */ /* a product of elementary reflectors. */ /* See Further Details. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= MAX(1,M). */ /* D (output) DOUBLE PRECISION array, dimension (NB) */ /* The diagonal elements of the first NB rows and columns of */ /* the reduced matrix. D(i) = A(i,i). */ /* E (output) DOUBLE PRECISION array, dimension (NB) */ /* The off-diagonal elements of the first NB rows and columns of */ /* the reduced matrix. */ /* TAUQ (output) COMPLEX*16 array dimension (NB) */ /* The scalar factors of the elementary reflectors which */ /* represent the unitary matrix Q. See Further Details. */ /* TAUP (output) COMPLEX*16 array, dimension (NB) */ /* The scalar factors of the elementary reflectors which */ /* represent the unitary matrix P. See Further Details. */ /* X (output) COMPLEX*16 array, dimension (LDX,NB) */ /* The m-by-nb matrix X required to update the unreduced part */ /* of A. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. LDX >= MAX(1,M). */ /* Y (output) COMPLEX*16 array, dimension (LDY,NB) */ /* The n-by-nb matrix Y required to update the unreduced part */ /* of A. */ /* LDY (input) INTEGER */ /* The leading dimension of the array Y. LDY >= MAX(1,N). */ /* Further Details */ /* =============== */ /* The matrices Q and P are represented as products of elementary */ /* reflectors: */ /* Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) */ /* Each H(i) and G(i) has the form: */ /* H(i) = I - tauq * v * v' and G(i) = I - taup * u * u' */ /* where tauq and taup are complex scalars, and v and u are complex */ /* vectors. */ /* If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in */ /* A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in */ /* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). */ /* If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in */ /* A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in */ /* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). */ /* The elements of the vectors v and u together form the m-by-nb matrix */ /* V and the nb-by-n matrix U' which are needed, with X and Y, to apply */ /* the transformation to the unreduced part of the matrix, using a block */ /* update of the form: A := A - V*Y' - X*U'. */ /* The contents of A on exit are illustrated by the following examples */ /* with nb = 2: */ /* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): */ /* ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) */ /* ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) */ /* ( v1 v2 a a a ) ( v1 1 a a a a ) */ /* ( v1 v2 a a a ) ( v1 v2 a a a a ) */ /* ( v1 v2 a a a ) ( v1 v2 a a a a ) */ /* ( v1 v2 a a a ) */ /* where a denotes an element of the original matrix which is unchanged, */ /* vi denotes an element of the vector defining H(i), and ui an element */ /* of the vector defining G(i). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Quick return if possible */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --d__; --e; --tauq; --taup; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; y_dim1 = *ldy; y_offset = 1 + y_dim1; y -= y_offset; /* Function Body */ if (*m <= 0 || *n <= 0) { return 0; } if (*m >= *n) { /* Reduce to upper bidiagonal form */ i__1 = *nb; for (i__ = 1; i__ <= i__1; ++i__) { /* Update A(i:m,i) */ i__2 = i__ - 1; zlacgv_(&i__2, &y[i__ + y_dim1], ldy); i__2 = *m - i__ + 1; i__3 = i__ - 1; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b2, &a[i__ + i__ * a_dim1], & c__1); i__2 = i__ - 1; zlacgv_(&i__2, &y[i__ + y_dim1], ldy); i__2 = *m - i__ + 1; i__3 = i__ - 1; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", &i__2, &i__3, &z__1, &x[i__ + x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b2, &a[i__ + i__ * a_dim1], &c__1); /* Generate reflection Q(i) to annihilate A(i+1:m,i) */ i__2 = i__ + i__ * a_dim1; alpha.r = a[i__2].r, alpha.i = a[i__2].i; i__2 = *m - i__ + 1; /* Computing MIN */ i__3 = i__ + 1; zlarfg_(&i__2, &alpha, &a[MIN(i__3, *m)+ i__ * a_dim1], &c__1, & tauq[i__]); i__2 = i__; d__[i__2] = alpha.r; if (i__ < *n) { i__2 = i__ + i__ * a_dim1; a[i__2].r = 1., a[i__2].i = 0.; /* Compute Y(i+1:n,i) */ i__2 = *m - i__ + 1; i__3 = *n - i__; zgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a[i__ + ( i__ + 1) * a_dim1], lda, &a[i__ + i__ * a_dim1], & c__1, &c_b1, &y[i__ + 1 + i__ * y_dim1], &c__1); i__2 = *m - i__ + 1; i__3 = i__ - 1; zgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a[i__ + a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b1, & y[i__ * y_dim1 + 1], &c__1); i__2 = *n - i__; i__3 = i__ - 1; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", &i__2, &i__3, &z__1, &y[i__ + 1 + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b2, &y[ i__ + 1 + i__ * y_dim1], &c__1); i__2 = *m - i__ + 1; i__3 = i__ - 1; zgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &x[i__ + x_dim1], ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b1, & y[i__ * y_dim1 + 1], &c__1); i__2 = i__ - 1; i__3 = *n - i__; z__1.r = -1., z__1.i = -0.; zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &a[(i__ + 1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, & c_b2, &y[i__ + 1 + i__ * y_dim1], &c__1); i__2 = *n - i__; zscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); /* Update A(i,i+1:n) */ i__2 = *n - i__; zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); zlacgv_(&i__, &a[i__ + a_dim1], lda); i__2 = *n - i__; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", &i__2, &i__, &z__1, &y[i__ + 1 + y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b2, &a[i__ + ( i__ + 1) * a_dim1], lda); zlacgv_(&i__, &a[i__ + a_dim1], lda); i__2 = i__ - 1; zlacgv_(&i__2, &x[i__ + x_dim1], ldx); i__2 = i__ - 1; i__3 = *n - i__; z__1.r = -1., z__1.i = -0.; zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &a[(i__ + 1) * a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b2, & a[i__ + (i__ + 1) * a_dim1], lda); i__2 = i__ - 1; zlacgv_(&i__2, &x[i__ + x_dim1], ldx); /* Generate reflection P(i) to annihilate A(i,i+2:n) */ i__2 = i__ + (i__ + 1) * a_dim1; alpha.r = a[i__2].r, alpha.i = a[i__2].i; i__2 = *n - i__; /* Computing MIN */ i__3 = i__ + 2; zlarfg_(&i__2, &alpha, &a[i__ + MIN(i__3, *n)* a_dim1], lda, & taup[i__]); i__2 = i__; e[i__2] = alpha.r; i__2 = i__ + (i__ + 1) * a_dim1; a[i__2].r = 1., a[i__2].i = 0.; /* Compute X(i+1:m,i) */ i__2 = *m - i__; i__3 = *n - i__; zgemv_("No transpose", &i__2, &i__3, &c_b2, &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &c_b1, &x[i__ + 1 + i__ * x_dim1], &c__1); i__2 = *n - i__; zgemv_("Conjugate transpose", &i__2, &i__, &c_b2, &y[i__ + 1 + y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, & c_b1, &x[i__ * x_dim1 + 1], &c__1); i__2 = *m - i__; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", &i__2, &i__, &z__1, &a[i__ + 1 + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b2, &x[ i__ + 1 + i__ * x_dim1], &c__1); i__2 = i__ - 1; i__3 = *n - i__; zgemv_("No transpose", &i__2, &i__3, &c_b2, &a[(i__ + 1) * a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, & c_b1, &x[i__ * x_dim1 + 1], &c__1); i__2 = *m - i__; i__3 = i__ - 1; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", &i__2, &i__3, &z__1, &x[i__ + 1 + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b2, &x[ i__ + 1 + i__ * x_dim1], &c__1); i__2 = *m - i__; zscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); i__2 = *n - i__; zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); } /* L10: */ } } else { /* Reduce to lower bidiagonal form */ i__1 = *nb; for (i__ = 1; i__ <= i__1; ++i__) { /* Update A(i,i:n) */ i__2 = *n - i__ + 1; zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); i__2 = i__ - 1; zlacgv_(&i__2, &a[i__ + a_dim1], lda); i__2 = *n - i__ + 1; i__3 = i__ - 1; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", &i__2, &i__3, &z__1, &y[i__ + y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b2, &a[i__ + i__ * a_dim1], lda); i__2 = i__ - 1; zlacgv_(&i__2, &a[i__ + a_dim1], lda); i__2 = i__ - 1; zlacgv_(&i__2, &x[i__ + x_dim1], ldx); i__2 = i__ - 1; i__3 = *n - i__ + 1; z__1.r = -1., z__1.i = -0.; zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &a[i__ * a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b2, &a[i__ + i__ * a_dim1], lda); i__2 = i__ - 1; zlacgv_(&i__2, &x[i__ + x_dim1], ldx); /* Generate reflection P(i) to annihilate A(i,i+1:n) */ i__2 = i__ + i__ * a_dim1; alpha.r = a[i__2].r, alpha.i = a[i__2].i; i__2 = *n - i__ + 1; /* Computing MIN */ i__3 = i__ + 1; zlarfg_(&i__2, &alpha, &a[i__ + MIN(i__3, *n)* a_dim1], lda, & taup[i__]); i__2 = i__; d__[i__2] = alpha.r; if (i__ < *m) { i__2 = i__ + i__ * a_dim1; a[i__2].r = 1., a[i__2].i = 0.; /* Compute X(i+1:m,i) */ i__2 = *m - i__; i__3 = *n - i__ + 1; zgemv_("No transpose", &i__2, &i__3, &c_b2, &a[i__ + 1 + i__ * a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b1, &x[ i__ + 1 + i__ * x_dim1], &c__1); i__2 = *n - i__ + 1; i__3 = i__ - 1; zgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &y[i__ + y_dim1], ldy, &a[i__ + i__ * a_dim1], lda, &c_b1, &x[ i__ * x_dim1 + 1], &c__1); i__2 = *m - i__; i__3 = i__ - 1; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + 1 + a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b2, &x[ i__ + 1 + i__ * x_dim1], &c__1); i__2 = i__ - 1; i__3 = *n - i__ + 1; zgemv_("No transpose", &i__2, &i__3, &c_b2, &a[i__ * a_dim1 + 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b1, &x[i__ * x_dim1 + 1], &c__1); i__2 = *m - i__; i__3 = i__ - 1; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", &i__2, &i__3, &z__1, &x[i__ + 1 + x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b2, &x[ i__ + 1 + i__ * x_dim1], &c__1); i__2 = *m - i__; zscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1); i__2 = *n - i__ + 1; zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); /* Update A(i+1:m,i) */ i__2 = i__ - 1; zlacgv_(&i__2, &y[i__ + y_dim1], ldy); i__2 = *m - i__; i__3 = i__ - 1; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + 1 + a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b2, &a[i__ + 1 + i__ * a_dim1], &c__1); i__2 = i__ - 1; zlacgv_(&i__2, &y[i__ + y_dim1], ldy); i__2 = *m - i__; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", &i__2, &i__, &z__1, &x[i__ + 1 + x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b2, &a[ i__ + 1 + i__ * a_dim1], &c__1); /* Generate reflection Q(i) to annihilate A(i+2:m,i) */ i__2 = i__ + 1 + i__ * a_dim1; alpha.r = a[i__2].r, alpha.i = a[i__2].i; i__2 = *m - i__; /* Computing MIN */ i__3 = i__ + 2; zlarfg_(&i__2, &alpha, &a[MIN(i__3, *m)+ i__ * a_dim1], &c__1, &tauq[i__]); i__2 = i__; e[i__2] = alpha.r; i__2 = i__ + 1 + i__ * a_dim1; a[i__2].r = 1., a[i__2].i = 0.; /* Compute Y(i+1:n,i) */ i__2 = *m - i__; i__3 = *n - i__; zgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1] , &c__1, &c_b1, &y[i__ + 1 + i__ * y_dim1], &c__1); i__2 = *m - i__; i__3 = i__ - 1; zgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a[i__ + 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & c_b1, &y[i__ * y_dim1 + 1], &c__1); i__2 = *n - i__; i__3 = i__ - 1; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", &i__2, &i__3, &z__1, &y[i__ + 1 + y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b2, &y[ i__ + 1 + i__ * y_dim1], &c__1); i__2 = *m - i__; zgemv_("Conjugate transpose", &i__2, &i__, &c_b2, &x[i__ + 1 + x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, & c_b1, &y[i__ * y_dim1 + 1], &c__1); i__2 = *n - i__; z__1.r = -1., z__1.i = -0.; zgemv_("Conjugate transpose", &i__, &i__2, &z__1, &a[(i__ + 1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, & c_b2, &y[i__ + 1 + i__ * y_dim1], &c__1); i__2 = *n - i__; zscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1); } else { i__2 = *n - i__ + 1; zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda); } /* L20: */ } } return 0; /* End of ZLABRD */ } /* zlabrd_ */
/* Subroutine */ int zgetv0_(integer *ido, char *bmat, integer *itry, logical *initv, integer *n, integer *j, doublecomplex *v, integer *ldv, doublecomplex *resid, doublereal *rnorm, integer *ipntr, doublecomplex *workd, integer *ierr, ftnlen bmat_len) { /* Initialized data */ static logical inits = TRUE_; /* System generated locals */ integer v_dim1, v_offset, i__1, i__2; doublereal d__1, d__2; doublecomplex z__1; /* Local variables */ static real t0, t1, t2, t3; static integer jj, iter; static logical orth; static integer iseed[4], idist; static doublecomplex cnorm; extern /* Double Complex */ void zdotc_(doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); static logical first; extern /* Subroutine */ int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), dvout_(integer *, integer *, doublereal *, integer *, char *, ftnlen), zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), zvout_(integer *, integer *, doublecomplex *, integer *, char *, ftnlen); extern doublereal dlapy2_(doublereal *, doublereal *), dznrm2_(integer *, doublecomplex *, integer *); static doublereal rnorm0; extern /* Subroutine */ int arscnd_(real *); static integer msglvl; extern /* Subroutine */ int zlarnv_(integer *, integer *, integer *, doublecomplex *); /* %----------------------------------------------------% */ /* | Include files for debugging and timing information | */ /* %----------------------------------------------------% */ /* \SCCS Information: @(#) */ /* FILE: debug.h SID: 2.3 DATE OF SID: 11/16/95 RELEASE: 2 */ /* %---------------------------------% */ /* | See debug.doc for documentation | */ /* %---------------------------------% */ /* %------------------% */ /* | Scalar Arguments | */ /* %------------------% */ /* %--------------------------------% */ /* | See stat.doc for documentation | */ /* %--------------------------------% */ /* \SCCS Information: @(#) */ /* FILE: stat.h SID: 2.2 DATE OF SID: 11/16/95 RELEASE: 2 */ /* %-----------------% */ /* | Array Arguments | */ /* %-----------------% */ /* %------------% */ /* | Parameters | */ /* %------------% */ /* %------------------------% */ /* | Local Scalars & Arrays | */ /* %------------------------% */ /* %----------------------% */ /* | External Subroutines | */ /* %----------------------% */ /* %--------------------% */ /* | External Functions | */ /* %--------------------% */ /* %-----------------% */ /* | Data Statements | */ /* %-----------------% */ /* Parameter adjustments */ --workd; --resid; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; --ipntr; /* Function Body */ /* %-----------------------% */ /* | Executable Statements | */ /* %-----------------------% */ /* %-----------------------------------% */ /* | Initialize the seed of the LAPACK | */ /* | random number generator | */ /* %-----------------------------------% */ if (inits) { iseed[0] = 1; iseed[1] = 3; iseed[2] = 5; iseed[3] = 7; inits = FALSE_; } if (*ido == 0) { /* %-------------------------------% */ /* | Initialize timing statistics | */ /* | & message level for debugging | */ /* %-------------------------------% */ arscnd_(&t0); msglvl = debug_1.mgetv0; *ierr = 0; iter = 0; first = FALSE_; orth = FALSE_; /* %-----------------------------------------------------% */ /* | Possibly generate a random starting vector in RESID | */ /* | Use a LAPACK random number generator used by the | */ /* | matrix generation routines. | */ /* | idist = 1: uniform (0,1) distribution; | */ /* | idist = 2: uniform (-1,1) distribution; | */ /* | idist = 3: normal (0,1) distribution; | */ /* %-----------------------------------------------------% */ if (! (*initv)) { idist = 2; zlarnv_(&idist, iseed, n, &resid[1]); } /* %----------------------------------------------------------% */ /* | Force the starting vector into the range of OP to handle | */ /* | the generalized problem when B is possibly (singular). | */ /* %----------------------------------------------------------% */ arscnd_(&t2); if (*(unsigned char *)bmat == 'G') { ++timing_1.nopx; ipntr[1] = 1; ipntr[2] = *n + 1; zcopy_(n, &resid[1], &c__1, &workd[1], &c__1); *ido = -1; goto L9000; } } /* %----------------------------------------% */ /* | Back from computing B*(initial-vector) | */ /* %----------------------------------------% */ if (first) { goto L20; } /* %-----------------------------------------------% */ /* | Back from computing B*(orthogonalized-vector) | */ /* %-----------------------------------------------% */ if (orth) { goto L40; } arscnd_(&t3); timing_1.tmvopx += t3 - t2; /* %------------------------------------------------------% */ /* | Starting vector is now in the range of OP; r = OP*r; | */ /* | Compute B-norm of starting vector. | */ /* %------------------------------------------------------% */ arscnd_(&t2); first = TRUE_; if (*(unsigned char *)bmat == 'G') { ++timing_1.nbx; zcopy_(n, &workd[*n + 1], &c__1, &resid[1], &c__1); ipntr[1] = *n + 1; ipntr[2] = 1; *ido = 2; goto L9000; } else if (*(unsigned char *)bmat == 'I') { zcopy_(n, &resid[1], &c__1, &workd[1], &c__1); } L20: if (*(unsigned char *)bmat == 'G') { arscnd_(&t3); timing_1.tmvbx += t3 - t2; } first = FALSE_; if (*(unsigned char *)bmat == 'G') { zdotc_(&z__1, n, &resid[1], &c__1, &workd[1], &c__1); cnorm.r = z__1.r, cnorm.i = z__1.i; d__1 = cnorm.r; d__2 = d_imag(&cnorm); rnorm0 = sqrt(dlapy2_(&d__1, &d__2)); } else if (*(unsigned char *)bmat == 'I') { rnorm0 = dznrm2_(n, &resid[1], &c__1); } *rnorm = rnorm0; /* %---------------------------------------------% */ /* | Exit if this is the very first Arnoldi step | */ /* %---------------------------------------------% */ if (*j == 1) { goto L50; } /* %---------------------------------------------------------------- */ /* | Otherwise need to B-orthogonalize the starting vector against | */ /* | the current Arnoldi basis using Gram-Schmidt with iter. ref. | */ /* | This is the case where an invariant subspace is encountered | */ /* | in the middle of the Arnoldi factorization. | */ /* | | */ /* | s = V^{T}*B*r; r = r - V*s; | */ /* | | */ /* | Stopping criteria used for iter. ref. is discussed in | */ /* | Parlett's book, page 107 and in Gragg & Reichel TOMS paper. | */ /* %---------------------------------------------------------------% */ orth = TRUE_; L30: i__1 = *j - 1; zgemv_("C", n, &i__1, &c_b1, &v[v_offset], ldv, &workd[1], &c__1, &c_b2, & workd[*n + 1], &c__1, (ftnlen)1); i__1 = *j - 1; z__1.r = -1., z__1.i = -0.; zgemv_("N", n, &i__1, &z__1, &v[v_offset], ldv, &workd[*n + 1], &c__1, & c_b1, &resid[1], &c__1, (ftnlen)1); /* %----------------------------------------------------------% */ /* | Compute the B-norm of the orthogonalized starting vector | */ /* %----------------------------------------------------------% */ arscnd_(&t2); if (*(unsigned char *)bmat == 'G') { ++timing_1.nbx; zcopy_(n, &resid[1], &c__1, &workd[*n + 1], &c__1); ipntr[1] = *n + 1; ipntr[2] = 1; *ido = 2; goto L9000; } else if (*(unsigned char *)bmat == 'I') { zcopy_(n, &resid[1], &c__1, &workd[1], &c__1); } L40: if (*(unsigned char *)bmat == 'G') { arscnd_(&t3); timing_1.tmvbx += t3 - t2; } if (*(unsigned char *)bmat == 'G') { zdotc_(&z__1, n, &resid[1], &c__1, &workd[1], &c__1); cnorm.r = z__1.r, cnorm.i = z__1.i; d__1 = cnorm.r; d__2 = d_imag(&cnorm); *rnorm = sqrt(dlapy2_(&d__1, &d__2)); } else if (*(unsigned char *)bmat == 'I') { *rnorm = dznrm2_(n, &resid[1], &c__1); } /* %--------------------------------------% */ /* | Check for further orthogonalization. | */ /* %--------------------------------------% */ if (msglvl > 2) { dvout_(&debug_1.logfil, &c__1, &rnorm0, &debug_1.ndigit, "_getv0: re" "-orthonalization ; rnorm0 is", (ftnlen)38); dvout_(&debug_1.logfil, &c__1, rnorm, &debug_1.ndigit, "_getv0: re-o" "rthonalization ; rnorm is", (ftnlen)37); } if (*rnorm > rnorm0 * .717f) { goto L50; } ++iter; if (iter <= 1) { /* %-----------------------------------% */ /* | Perform iterative refinement step | */ /* %-----------------------------------% */ rnorm0 = *rnorm; goto L30; } else { /* %------------------------------------% */ /* | Iterative refinement step "failed" | */ /* %------------------------------------% */ i__1 = *n; for (jj = 1; jj <= i__1; ++jj) { i__2 = jj; resid[i__2].r = 0., resid[i__2].i = 0.; /* L45: */ } *rnorm = 0.; *ierr = -1; } L50: if (msglvl > 0) { dvout_(&debug_1.logfil, &c__1, rnorm, &debug_1.ndigit, "_getv0: B-no" "rm of initial / restarted starting vector", (ftnlen)53); } if (msglvl > 2) { zvout_(&debug_1.logfil, n, &resid[1], &debug_1.ndigit, "_getv0: init" "ial / restarted starting vector", (ftnlen)43); } *ido = 99; arscnd_(&t1); timing_1.tgetv0 += t1 - t0; L9000: return 0; /* %---------------% */ /* | End of zgetv0 | */ /* %---------------% */ } /* zgetv0_ */
/* Subroutine */ int ztgsna_(char *job, char *howmny, logical *select, integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer * ldvr, doublereal *s, doublereal *dif, integer *mm, integer *m, doublecomplex *work, integer *lwork, integer *iwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= ZTGSNA estimates reciprocal condition numbers for specified eigenvalues and/or eigenvectors of a matrix pair (A, B). (A, B) must be in generalized Schur canonical form, that is, A and B are both upper triangular. Arguments ========= JOB (input) CHARACTER*1 Specifies whether condition numbers are required for eigenvalues (S) or eigenvectors (DIF): = 'E': for eigenvalues only (S); = 'V': for eigenvectors only (DIF); = 'B': for both eigenvalues and eigenvectors (S and DIF). HOWMNY (input) CHARACTER*1 = 'A': compute condition numbers for all eigenpairs; = 'S': compute condition numbers for selected eigenpairs specified by the array SELECT. SELECT (input) LOGICAL array, dimension (N) If HOWMNY = 'S', SELECT specifies the eigenpairs for which condition numbers are required. To select condition numbers for the corresponding j-th eigenvalue and/or eigenvector, SELECT(j) must be set to .TRUE.. If HOWMNY = 'A', SELECT is not referenced. N (input) INTEGER The order of the square matrix pair (A, B). N >= 0. A (input) COMPLEX*16 array, dimension (LDA,N) The upper triangular matrix A in the pair (A,B). LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input) COMPLEX*16 array, dimension (LDB,N) The upper triangular matrix B in the pair (A, B). LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). VL (input) COMPLEX*16 array, dimension (LDVL,M) IF JOB = 'E' or 'B', VL must contain left eigenvectors of (A, B), corresponding to the eigenpairs specified by HOWMNY and SELECT. The eigenvectors must be stored in consecutive columns of VL, as returned by ZTGEVC. If JOB = 'V', VL is not referenced. LDVL (input) INTEGER The leading dimension of the array VL. LDVL >= 1; and If JOB = 'E' or 'B', LDVL >= N. VR (input) COMPLEX*16 array, dimension (LDVR,M) IF JOB = 'E' or 'B', VR must contain right eigenvectors of (A, B), corresponding to the eigenpairs specified by HOWMNY and SELECT. The eigenvectors must be stored in consecutive columns of VR, as returned by ZTGEVC. If JOB = 'V', VR is not referenced. LDVR (input) INTEGER The leading dimension of the array VR. LDVR >= 1; If JOB = 'E' or 'B', LDVR >= N. S (output) DOUBLE PRECISION array, dimension (MM) If JOB = 'E' or 'B', the reciprocal condition numbers of the selected eigenvalues, stored in consecutive elements of the array. If JOB = 'V', S is not referenced. DIF (output) DOUBLE PRECISION array, dimension (MM) If JOB = 'V' or 'B', the estimated reciprocal condition numbers of the selected eigenvectors, stored in consecutive elements of the array. If the eigenvalues cannot be reordered to compute DIF(j), DIF(j) is set to 0; this can only occur when the true value would be very small anyway. For each eigenvalue/vector specified by SELECT, DIF stores a Frobenius norm-based estimate of Difl. If JOB = 'E', DIF is not referenced. MM (input) INTEGER The number of elements in the arrays S and DIF. MM >= M. M (output) INTEGER The number of elements of the arrays S and DIF used to store the specified condition numbers; for each selected eigenvalue one element is used. If HOWMNY = 'A', M is set to N. WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) If JOB = 'E', WORK is not referenced. Otherwise, on exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= 1. If JOB = 'V' or 'B', LWORK >= 2*N*N. IWORK (workspace) INTEGER array, dimension (N+2) If JOB = 'E', IWORK is not referenced. INFO (output) INTEGER = 0: Successful exit < 0: If INFO = -i, the i-th argument had an illegal value Further Details =============== The reciprocal of the condition number of the i-th generalized eigenvalue w = (a, b) is defined as S(I) = (|v'Au|**2 + |v'Bu|**2)**(1/2) / (norm(u)*norm(v)) where u and v are the right and left eigenvectors of (A, B) corresponding to w; |z| denotes the absolute value of the complex number, and norm(u) denotes the 2-norm of the vector u. The pair (a, b) corresponds to an eigenvalue w = a/b (= v'Au/v'Bu) of the matrix pair (A, B). If both a and b equal zero, then (A,B) is singular and S(I) = -1 is returned. An approximate error bound on the chordal distance between the i-th computed generalized eigenvalue w and the corresponding exact eigenvalue lambda is chord(w, lambda) <= EPS * norm(A, B) / S(I), where EPS is the machine precision. The reciprocal of the condition number of the right eigenvector u and left eigenvector v corresponding to the generalized eigenvalue w is defined as follows. Suppose (A, B) = ( a * ) ( b * ) 1 ( 0 A22 ),( 0 B22 ) n-1 1 n-1 1 n-1 Then the reciprocal condition number DIF(I) is Difl[(a, b), (A22, B22)] = sigma-min( Zl ) where sigma-min(Zl) denotes the smallest singular value of Zl = [ kron(a, In-1) -kron(1, A22) ] [ kron(b, In-1) -kron(1, B22) ]. Here In-1 is the identity matrix of size n-1 and X' is the conjugate transpose of X. kron(X, Y) is the Kronecker product between the matrices X and Y. We approximate the smallest singular value of Zl with an upper bound. This is done by ZLATDF. An approximate error bound for a computed eigenvector VL(i) or VR(i) is given by EPS * norm(A, B) / DIF(i). See ref. [2-3] for more details and further references. Based on contributions by Bo Kagstrom and Peter Poromaa, Department of Computing Science, Umea University, S-901 87 Umea, Sweden. References ========== [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the Generalized Real Schur Form of a Regular Matrix Pair (A, B), in M.S. Moonen et al (eds), Linear Algebra for Large Scale and Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified Eigenvalues of a Regular Matrix Pair (A, B) and Condition Estimation: Theory, Algorithms and Software, Report UMINF - 94.04, Department of Computing Science, Umea University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87. To appear in Numerical Algorithms, 1996. [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software for Solving the Generalized Sylvester Equation and Estimating the Separation between Regular Matrix Pairs, Report UMINF - 93.23, Department of Computing Science, Umea University, S-901 87 Umea, Sweden, December 1993, Revised April 1994, Also as LAPACK Working Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1, 1996. ===================================================================== Decode and test the input parameters Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static doublecomplex c_b19 = {1.,0.}; static doublecomplex c_b20 = {0.,0.}; static logical c_false = FALSE_; static integer c__3 = 3; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2; doublereal d__1, d__2; doublecomplex z__1; /* Builtin functions */ double z_abs(doublecomplex *); /* Local variables */ static doublereal cond; static integer ierr, ifst; static doublereal lnrm; static doublecomplex yhax, yhbx; static integer ilst; static doublereal rnrm; static integer i__, k; static doublereal scale; extern logical lsame_(char *, char *); extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); static integer lwmin; extern /* Subroutine */ int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *); static logical wants; static integer llwrk, n1, n2; static doublecomplex dummy[1]; extern doublereal dlapy2_(doublereal *, doublereal *); extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); static doublecomplex dummy1[1]; extern doublereal dznrm2_(integer *, doublecomplex *, integer *), dlamch_( char *); static integer ks; extern /* Subroutine */ int xerbla_(char *, integer *); static doublereal bignum; static logical wantbh, wantdf, somcon; extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *), ztgexc_(logical *, logical *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, integer *); static doublereal smlnum; static logical lquery; extern /* Subroutine */ int ztgsyl_(char *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublereal *, doublereal *, doublecomplex *, integer *, integer *, integer *); static doublereal eps; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define vl_subscr(a_1,a_2) (a_2)*vl_dim1 + a_1 #define vl_ref(a_1,a_2) vl[vl_subscr(a_1,a_2)] #define vr_subscr(a_1,a_2) (a_2)*vr_dim1 + a_1 #define vr_ref(a_1,a_2) vr[vr_subscr(a_1,a_2)] --select; a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1 * 1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1 * 1; vr -= vr_offset; --s; --dif; --work; --iwork; /* Function Body */ wantbh = lsame_(job, "B"); wants = lsame_(job, "E") || wantbh; wantdf = lsame_(job, "V") || wantbh; somcon = lsame_(howmny, "S"); *info = 0; lquery = *lwork == -1; if (lsame_(job, "V") || lsame_(job, "B")) { /* Computing MAX */ i__1 = 1, i__2 = (*n << 1) * *n; lwmin = max(i__1,i__2); } else { lwmin = 1; } if (! wants && ! wantdf) { *info = -1; } else if (! lsame_(howmny, "A") && ! somcon) { *info = -2; } else if (*n < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldb < max(1,*n)) { *info = -8; } else if (wants && *ldvl < *n) { *info = -10; } else if (wants && *ldvr < *n) { *info = -12; } else { /* Set M to the number of eigenpairs for which condition numbers are required, and test MM. */ if (somcon) { *m = 0; i__1 = *n; for (k = 1; k <= i__1; ++k) { if (select[k]) { ++(*m); } /* L10: */ } } else { *m = *n; } if (*mm < *m) { *info = -15; } else if (*lwork < lwmin && ! lquery) { *info = -18; } } if (*info == 0) { work[1].r = (doublereal) lwmin, work[1].i = 0.; } if (*info != 0) { i__1 = -(*info); xerbla_("ZTGSNA", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Get machine constants */ eps = dlamch_("P"); smlnum = dlamch_("S") / eps; bignum = 1. / smlnum; dlabad_(&smlnum, &bignum); llwrk = *lwork - (*n << 1) * *n; ks = 0; i__1 = *n; for (k = 1; k <= i__1; ++k) { /* Determine whether condition numbers are required for the k-th eigenpair. */ if (somcon) { if (! select[k]) { goto L20; } } ++ks; if (wants) { /* Compute the reciprocal condition number of the k-th eigenvalue. */ rnrm = dznrm2_(n, &vr_ref(1, ks), &c__1); lnrm = dznrm2_(n, &vl_ref(1, ks), &c__1); zgemv_("N", n, n, &c_b19, &a[a_offset], lda, &vr_ref(1, ks), & c__1, &c_b20, &work[1], &c__1); zdotc_(&z__1, n, &work[1], &c__1, &vl_ref(1, ks), &c__1); yhax.r = z__1.r, yhax.i = z__1.i; zgemv_("N", n, n, &c_b19, &b[b_offset], ldb, &vr_ref(1, ks), & c__1, &c_b20, &work[1], &c__1); zdotc_(&z__1, n, &work[1], &c__1, &vl_ref(1, ks), &c__1); yhbx.r = z__1.r, yhbx.i = z__1.i; d__1 = z_abs(&yhax); d__2 = z_abs(&yhbx); cond = dlapy2_(&d__1, &d__2); if (cond == 0.) { s[ks] = -1.; } else { s[ks] = cond / (rnrm * lnrm); } } if (wantdf) { if (*n == 1) { d__1 = z_abs(&a_ref(1, 1)); d__2 = z_abs(&b_ref(1, 1)); dif[ks] = dlapy2_(&d__1, &d__2); goto L20; } /* Estimate the reciprocal condition number of the k-th eigenvectors. Copy the matrix (A, B) to the array WORK and move the (k,k)th pair to the (1,1) position. */ zlacpy_("Full", n, n, &a[a_offset], lda, &work[1], n); zlacpy_("Full", n, n, &b[b_offset], ldb, &work[*n * *n + 1], n); ifst = k; ilst = 1; ztgexc_(&c_false, &c_false, n, &work[1], n, &work[*n * *n + 1], n, dummy, &c__1, dummy1, &c__1, &ifst, &ilst, &ierr); if (ierr > 0) { /* Ill-conditioned problem - swap rejected. */ dif[ks] = 0.; } else { /* Reordering successful, solve generalized Sylvester equation for R and L, A22 * R - L * A11 = A12 B22 * R - L * B11 = B12, and compute estimate of Difl[(A11,B11), (A22, B22)]. */ n1 = 1; n2 = *n - n1; i__ = *n * *n + 1; ztgsyl_("N", &c__3, &n2, &n1, &work[*n * n1 + n1 + 1], n, & work[1], n, &work[n1 + 1], n, &work[*n * n1 + n1 + i__], n, &work[i__], n, &work[n1 + i__], n, &scale, & dif[ks], &work[(*n * *n << 1) + 1], &llwrk, &iwork[1], &ierr); } } L20: ; } work[1].r = (doublereal) lwmin, work[1].i = 0.; return 0; /* End of ZTGSNA */ } /* ztgsna_ */
/* Subroutine */ int zlatzm_(char *side, integer *m, integer *n, doublecomplex *v, integer *incv, doublecomplex *tau, doublecomplex * c1, doublecomplex *c2, integer *ldc, doublecomplex *work) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZLATZM applies a Householder matrix generated by ZTZRQF to a matrix. Let P = I - tau*u*u', u = ( 1 ), ( v ) where v is an (m-1) vector if SIDE = 'L', or a (n-1) vector if SIDE = 'R'. If SIDE equals 'L', let C = [ C1 ] 1 [ C2 ] m-1 n Then C is overwritten by P*C. If SIDE equals 'R', let C = [ C1, C2 ] m 1 n-1 Then C is overwritten by C*P. Arguments ========= SIDE (input) CHARACTER*1 = 'L': form P * C = 'R': form C * P M (input) INTEGER The number of rows of the matrix C. N (input) INTEGER The number of columns of the matrix C. V (input) COMPLEX*16 array, dimension (1 + (M-1)*abs(INCV)) if SIDE = 'L' (1 + (N-1)*abs(INCV)) if SIDE = 'R' The vector v in the representation of P. V is not used if TAU = 0. INCV (input) INTEGER The increment between elements of v. INCV <> 0 TAU (input) COMPLEX*16 The value tau in the representation of P. C1 (input/output) COMPLEX*16 array, dimension (LDC,N) if SIDE = 'L' (M,1) if SIDE = 'R' On entry, the n-vector C1 if SIDE = 'L', or the m-vector C1 if SIDE = 'R'. On exit, the first row of P*C if SIDE = 'L', or the first column of C*P if SIDE = 'R'. C2 (input/output) COMPLEX*16 array, dimension (LDC, N) if SIDE = 'L' (LDC, N-1) if SIDE = 'R' On entry, the (m - 1) x n matrix C2 if SIDE = 'L', or the m x (n - 1) matrix C2 if SIDE = 'R'. On exit, rows 2:m of P*C if SIDE = 'L', or columns 2:m of C*P if SIDE = 'R'. LDC (input) INTEGER The leading dimension of the arrays C1 and C2. LDC >= max(1,M). WORK (workspace) COMPLEX*16 array, dimension (N) if SIDE = 'L' (M) if SIDE = 'R' ===================================================================== Parameter adjustments Function Body */ /* Table of constant values */ static doublecomplex c_b1 = {1.,0.}; static integer c__1 = 1; /* System generated locals */ integer c1_dim1, c1_offset, c2_dim1, c2_offset, i__1; doublecomplex z__1; /* Local variables */ extern logical lsame_(char *, char *); extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *), zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *), zgeru_(integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *) , zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), zaxpy_(integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *), zlacgv_(integer *, doublecomplex *, integer *); #define V(I) v[(I)-1] #define WORK(I) work[(I)-1] #define C2(I,J) c2[(I)-1 + ((J)-1)* ( *ldc)] #define C1(I,J) c1[(I)-1 + ((J)-1)* ( *ldc)] if (min(*m,*n) == 0 || tau->r == 0. && tau->i == 0.) { return 0; } if (lsame_(side, "L")) { /* w := conjg( C1 + v' * C2 ) */ zcopy_(n, &C1(1,1), ldc, &WORK(1), &c__1); zlacgv_(n, &WORK(1), &c__1); i__1 = *m - 1; zgemv_("Conjugate transpose", &i__1, n, &c_b1, &C2(1,1), ldc, & V(1), incv, &c_b1, &WORK(1), &c__1); /* [ C1 ] := [ C1 ] - tau* [ 1 ] * w' [ C2 ] [ C2 ] [ v ] */ zlacgv_(n, &WORK(1), &c__1); z__1.r = -tau->r, z__1.i = -tau->i; zaxpy_(n, &z__1, &WORK(1), &c__1, &C1(1,1), ldc); i__1 = *m - 1; z__1.r = -tau->r, z__1.i = -tau->i; zgeru_(&i__1, n, &z__1, &V(1), incv, &WORK(1), &c__1, &C2(1,1), ldc); } else if (lsame_(side, "R")) { /* w := C1 + C2 * v */ zcopy_(m, &C1(1,1), &c__1, &WORK(1), &c__1); i__1 = *n - 1; zgemv_("No transpose", m, &i__1, &c_b1, &C2(1,1), ldc, &V(1), incv, &c_b1, &WORK(1), &c__1); /* [ C1, C2 ] := [ C1, C2 ] - tau* w * [ 1 , v'] */ z__1.r = -tau->r, z__1.i = -tau->i; zaxpy_(m, &z__1, &WORK(1), &c__1, &C1(1,1), &c__1); i__1 = *n - 1; z__1.r = -tau->r, z__1.i = -tau->i; zgerc_(m, &i__1, &z__1, &WORK(1), &c__1, &V(1), incv, &C2(1,1), ldc); } return 0; /* End of ZLATZM */ } /* zlatzm_ */
/* Subroutine */ int zlarf_(char *side, integer *m, integer *n, doublecomplex *v, integer *incv, doublecomplex *tau, doublecomplex *c__, integer * ldc, doublecomplex *work) { /* System generated locals */ integer c_dim1, c_offset, i__1; doublecomplex z__1; /* Local variables */ integer i__; logical applyleft; extern logical lsame_(char *, char *); integer lastc; extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *), zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *); integer lastv; extern integer ilazlc_(integer *, integer *, doublecomplex *, integer *), ilazlr_(integer *, integer *, doublecomplex *, integer *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ZLARF applies a complex elementary reflector H to a complex M-by-N */ /* matrix C, from either the left or the right. H is represented in the */ /* form */ /* H = I - tau * v * v' */ /* where tau is a complex scalar and v is a complex vector. */ /* If tau = 0, then H is taken to be the unit matrix. */ /* To apply H' (the conjugate transpose of H), supply conjg(tau) instead */ /* tau. */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'L': form H * C */ /* = 'R': form C * H */ /* M (input) INTEGER */ /* The number of rows of the matrix C. */ /* N (input) INTEGER */ /* The number of columns of the matrix C. */ /* V (input) COMPLEX*16 array, dimension */ /* (1 + (M-1)*abs(INCV)) if SIDE = 'L' */ /* or (1 + (N-1)*abs(INCV)) if SIDE = 'R' */ /* The vector v in the representation of H. V is not used if */ /* TAU = 0. */ /* INCV (input) INTEGER */ /* The increment between elements of v. INCV <> 0. */ /* TAU (input) COMPLEX*16 */ /* The value tau in the representation of H. */ /* C (input/output) COMPLEX*16 array, dimension (LDC,N) */ /* On entry, the M-by-N matrix C. */ /* On exit, C is overwritten by the matrix H * C if SIDE = 'L', */ /* or C * H if SIDE = 'R'. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= lmax(1,M). */ /* WORK (workspace) COMPLEX*16 array, dimension */ /* (N) if SIDE = 'L' */ /* or (M) if SIDE = 'R' */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --v; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ applyleft = lsame_(side, "L"); lastv = 0; lastc = 0; if (tau->r != 0. || tau->i != 0.) { /* Set up variables for scanning V. LASTV begins pointing to the end */ /* of V. */ if (applyleft) { lastv = *m; } else { lastv = *n; } if (*incv > 0) { i__ = (lastv - 1) * *incv + 1; } else { i__ = 1; } /* Look for the last non-zero row in V. */ for(;;) { /* while(complicated condition) */ i__1 = i__; if (!(lastv > 0 && (v[i__1].r == 0. && v[i__1].i == 0.))) break; --lastv; i__ -= *incv; } if (applyleft) { /* Scan for the last non-zero column in C(1:lastv,:). */ lastc = ilazlc_(&lastv, n, &c__[c_offset], ldc); } else { /* Scan for the last non-zero row in C(:,1:lastv). */ lastc = ilazlr_(m, &lastv, &c__[c_offset], ldc); } } /* Note that lastc.eq.0 renders the BLAS operations null; no special */ /* case is needed at this level. */ if (applyleft) { /* Form H * C */ if (lastv > 0) { /* w(1:lastc,1) := C(1:lastv,1:lastc)' * v(1:lastv,1) */ zgemv_("Conjugate transpose", &lastv, &lastc, &c_b1, &c__[ c_offset], ldc, &v[1], incv, &c_b2, &work[1], &c__1); /* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)' */ z__1.r = -tau->r, z__1.i = -tau->i; zgerc_(&lastv, &lastc, &z__1, &v[1], incv, &work[1], &c__1, &c__[ c_offset], ldc); } } else { /* Form C * H */ if (lastv > 0) { /* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */ zgemv_("No transpose", &lastc, &lastv, &c_b1, &c__[c_offset], ldc, &v[1], incv, &c_b2, &work[1], &c__1); /* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)' */ z__1.r = -tau->r, z__1.i = -tau->i; zgerc_(&lastc, &lastv, &z__1, &work[1], &c__1, &v[1], incv, &c__[ c_offset], ldc); } } return 0; /* End of ZLARF */ } /* zlarf_ */
int zlatrd_(char *uplo, int *n, int *nb, doublecomplex *a, int *lda, double *e, doublecomplex *tau, doublecomplex *w, int *ldw) { /* System generated locals */ int a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3; double d__1; doublecomplex z__1, z__2, z__3, z__4; /* Local variables */ int i__, iw; doublecomplex alpha; extern int lsame_(char *, char *); extern int zscal_(int *, doublecomplex *, doublecomplex *, int *); extern /* Double Complex */ VOID zdotc_(doublecomplex *, int *, doublecomplex *, int *, doublecomplex *, int *); extern int zgemv_(char *, int *, int *, doublecomplex *, doublecomplex *, int *, doublecomplex *, int *, doublecomplex *, doublecomplex *, int *), zhemv_(char *, int *, doublecomplex *, doublecomplex *, int *, doublecomplex *, int *, doublecomplex *, doublecomplex *, int *), zaxpy_(int *, doublecomplex *, doublecomplex *, int *, doublecomplex *, int *), zlarfg_(int *, doublecomplex *, doublecomplex *, int *, doublecomplex *), zlacgv_(int *, doublecomplex *, int *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ZLATRD reduces NB rows and columns of a complex Hermitian matrix A to */ /* Hermitian tridiagonal form by a unitary similarity */ /* transformation Q' * A * Q, and returns the matrices V and W which are */ /* needed to apply the transformation to the unreduced part of A. */ /* If UPLO = 'U', ZLATRD reduces the last NB rows and columns of a */ /* matrix, of which the upper triangle is supplied; */ /* if UPLO = 'L', ZLATRD reduces the first NB rows and columns of a */ /* matrix, of which the lower triangle is supplied. */ /* This is an auxiliary routine called by ZHETRD. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the upper or lower triangular part of the */ /* Hermitian matrix A is stored: */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* N (input) INTEGER */ /* The order of the matrix A. */ /* NB (input) INTEGER */ /* The number of rows and columns to be reduced. */ /* A (input/output) COMPLEX*16 array, dimension (LDA,N) */ /* On entry, the Hermitian matrix A. If UPLO = 'U', the leading */ /* n-by-n upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading n-by-n lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit: */ /* if UPLO = 'U', the last NB columns have been reduced to */ /* tridiagonal form, with the diagonal elements overwriting */ /* the diagonal elements of A; the elements above the diagonal */ /* with the array TAU, represent the unitary matrix Q as a */ /* product of elementary reflectors; */ /* if UPLO = 'L', the first NB columns have been reduced to */ /* tridiagonal form, with the diagonal elements overwriting */ /* the diagonal elements of A; the elements below the diagonal */ /* with the array TAU, represent the unitary matrix Q as a */ /* product of elementary reflectors. */ /* See Further Details. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= MAX(1,N). */ /* E (output) DOUBLE PRECISION array, dimension (N-1) */ /* If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal */ /* elements of the last NB columns of the reduced matrix; */ /* if UPLO = 'L', E(1:nb) contains the subdiagonal elements of */ /* the first NB columns of the reduced matrix. */ /* TAU (output) COMPLEX*16 array, dimension (N-1) */ /* The scalar factors of the elementary reflectors, stored in */ /* TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. */ /* See Further Details. */ /* W (output) COMPLEX*16 array, dimension (LDW,NB) */ /* The n-by-nb matrix W required to update the unreduced part */ /* of A. */ /* LDW (input) INTEGER */ /* The leading dimension of the array W. LDW >= MAX(1,N). */ /* Further Details */ /* =============== */ /* If UPLO = 'U', the matrix Q is represented as a product of elementary */ /* reflectors */ /* Q = H(n) H(n-1) . . . H(n-nb+1). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a complex scalar, and v is a complex vector with */ /* v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), */ /* and tau in TAU(i-1). */ /* If UPLO = 'L', the matrix Q is represented as a product of elementary */ /* reflectors */ /* Q = H(1) H(2) . . . H(nb). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a complex scalar, and v is a complex vector with */ /* v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), */ /* and tau in TAU(i). */ /* The elements of the vectors v together form the n-by-nb matrix V */ /* which is needed, with W, to apply the transformation to the unreduced */ /* part of the matrix, using a Hermitian rank-2k update of the form: */ /* A := A - V*W' - W*V'. */ /* The contents of A on exit are illustrated by the following examples */ /* with n = 5 and nb = 2: */ /* if UPLO = 'U': if UPLO = 'L': */ /* ( a a a v4 v5 ) ( d ) */ /* ( a a v4 v5 ) ( 1 d ) */ /* ( a 1 v5 ) ( v1 1 a ) */ /* ( d 1 ) ( v1 v2 a a ) */ /* ( d ) ( v1 v2 a a a ) */ /* where d denotes a diagonal element of the reduced matrix, a denotes */ /* an element of the original matrix that is unchanged, and vi denotes */ /* an element of the vector defining H(i). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Quick return if possible */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --e; --tau; w_dim1 = *ldw; w_offset = 1 + w_dim1; w -= w_offset; /* Function Body */ if (*n <= 0) { return 0; } if (lsame_(uplo, "U")) { /* Reduce last NB columns of upper triangle */ i__1 = *n - *nb + 1; for (i__ = *n; i__ >= i__1; --i__) { iw = i__ - *n + *nb; if (i__ < *n) { /* Update A(1:i,i) */ i__2 = i__ + i__ * a_dim1; i__3 = i__ + i__ * a_dim1; d__1 = a[i__3].r; a[i__2].r = d__1, a[i__2].i = 0.; i__2 = *n - i__; zlacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw); i__2 = *n - i__; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", &i__, &i__2, &z__1, &a[(i__ + 1) * a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, & c_b2, &a[i__ * a_dim1 + 1], &c__1); i__2 = *n - i__; zlacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw); i__2 = *n - i__; zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); i__2 = *n - i__; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", &i__, &i__2, &z__1, &w[(iw + 1) * w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, & c_b2, &a[i__ * a_dim1 + 1], &c__1); i__2 = *n - i__; zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda); i__2 = i__ + i__ * a_dim1; i__3 = i__ + i__ * a_dim1; d__1 = a[i__3].r; a[i__2].r = d__1, a[i__2].i = 0.; } if (i__ > 1) { /* Generate elementary reflector H(i) to annihilate */ /* A(1:i-2,i) */ i__2 = i__ - 1 + i__ * a_dim1; alpha.r = a[i__2].r, alpha.i = a[i__2].i; i__2 = i__ - 1; zlarfg_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &tau[i__ - 1]); i__2 = i__ - 1; e[i__2] = alpha.r; i__2 = i__ - 1 + i__ * a_dim1; a[i__2].r = 1., a[i__2].i = 0.; /* Compute W(1:i-1,i) */ i__2 = i__ - 1; zhemv_("Upper", &i__2, &c_b2, &a[a_offset], lda, &a[i__ * a_dim1 + 1], &c__1, &c_b1, &w[iw * w_dim1 + 1], &c__1); if (i__ < *n) { i__2 = i__ - 1; i__3 = *n - i__; zgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &w[(iw + 1) * w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], & c__1, &c_b1, &w[i__ + 1 + iw * w_dim1], &c__1); i__2 = i__ - 1; i__3 = *n - i__; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", &i__2, &i__3, &z__1, &a[(i__ + 1) * a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], & c__1, &c_b2, &w[iw * w_dim1 + 1], &c__1); i__2 = i__ - 1; i__3 = *n - i__; zgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a[( i__ + 1) * a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], &c__1, &c_b1, &w[i__ + 1 + iw * w_dim1], &c__1); i__2 = i__ - 1; i__3 = *n - i__; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", &i__2, &i__3, &z__1, &w[(iw + 1) * w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], & c__1, &c_b2, &w[iw * w_dim1 + 1], &c__1); } i__2 = i__ - 1; zscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1); z__3.r = -.5, z__3.i = -0.; i__2 = i__ - 1; z__2.r = z__3.r * tau[i__2].r - z__3.i * tau[i__2].i, z__2.i = z__3.r * tau[i__2].i + z__3.i * tau[i__2].r; i__3 = i__ - 1; zdotc_(&z__4, &i__3, &w[iw * w_dim1 + 1], &c__1, &a[i__ * a_dim1 + 1], &c__1); z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * z__4.i + z__2.i * z__4.r; alpha.r = z__1.r, alpha.i = z__1.i; i__2 = i__ - 1; zaxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw * w_dim1 + 1], &c__1); } /* L10: */ } } else { /* Reduce first NB columns of lower triangle */ i__1 = *nb; for (i__ = 1; i__ <= i__1; ++i__) { /* Update A(i:n,i) */ i__2 = i__ + i__ * a_dim1; i__3 = i__ + i__ * a_dim1; d__1 = a[i__3].r; a[i__2].r = d__1, a[i__2].i = 0.; i__2 = i__ - 1; zlacgv_(&i__2, &w[i__ + w_dim1], ldw); i__2 = *n - i__ + 1; i__3 = i__ - 1; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + a_dim1], lda, &w[i__ + w_dim1], ldw, &c_b2, &a[i__ + i__ * a_dim1], & c__1); i__2 = i__ - 1; zlacgv_(&i__2, &w[i__ + w_dim1], ldw); i__2 = i__ - 1; zlacgv_(&i__2, &a[i__ + a_dim1], lda); i__2 = *n - i__ + 1; i__3 = i__ - 1; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", &i__2, &i__3, &z__1, &w[i__ + w_dim1], ldw, &a[i__ + a_dim1], lda, &c_b2, &a[i__ + i__ * a_dim1], & c__1); i__2 = i__ - 1; zlacgv_(&i__2, &a[i__ + a_dim1], lda); i__2 = i__ + i__ * a_dim1; i__3 = i__ + i__ * a_dim1; d__1 = a[i__3].r; a[i__2].r = d__1, a[i__2].i = 0.; if (i__ < *n) { /* Generate elementary reflector H(i) to annihilate */ /* A(i+2:n,i) */ i__2 = i__ + 1 + i__ * a_dim1; alpha.r = a[i__2].r, alpha.i = a[i__2].i; i__2 = *n - i__; /* Computing MIN */ i__3 = i__ + 2; zlarfg_(&i__2, &alpha, &a[MIN(i__3, *n)+ i__ * a_dim1], &c__1, &tau[i__]); i__2 = i__; e[i__2] = alpha.r; i__2 = i__ + 1 + i__ * a_dim1; a[i__2].r = 1., a[i__2].i = 0.; /* Compute W(i+1:n,i) */ i__2 = *n - i__; zhemv_("Lower", &i__2, &c_b2, &a[i__ + 1 + (i__ + 1) * a_dim1] , lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b1, &w[ i__ + 1 + i__ * w_dim1], &c__1); i__2 = *n - i__; i__3 = i__ - 1; zgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &w[i__ + 1 + w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, & c_b1, &w[i__ * w_dim1 + 1], &c__1); i__2 = *n - i__; i__3 = i__ - 1; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + 1 + a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b2, &w[ i__ + 1 + i__ * w_dim1], &c__1); i__2 = *n - i__; i__3 = i__ - 1; zgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a[i__ + 1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, & c_b1, &w[i__ * w_dim1 + 1], &c__1); i__2 = *n - i__; i__3 = i__ - 1; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", &i__2, &i__3, &z__1, &w[i__ + 1 + w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b2, &w[ i__ + 1 + i__ * w_dim1], &c__1); i__2 = *n - i__; zscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1); z__3.r = -.5, z__3.i = -0.; i__2 = i__; z__2.r = z__3.r * tau[i__2].r - z__3.i * tau[i__2].i, z__2.i = z__3.r * tau[i__2].i + z__3.i * tau[i__2].r; i__3 = *n - i__; zdotc_(&z__4, &i__3, &w[i__ + 1 + i__ * w_dim1], &c__1, &a[ i__ + 1 + i__ * a_dim1], &c__1); z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r * z__4.i + z__2.i * z__4.r; alpha.r = z__1.r, alpha.i = z__1.i; i__2 = *n - i__; zaxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[ i__ + 1 + i__ * w_dim1], &c__1); } /* L20: */ } } return 0; /* End of ZLATRD */ } /* zlatrd_ */
/* Subroutine */ int ztgevc_(char *side, char *howmny, logical *select, integer *n, doublecomplex *s, integer *lds, doublecomplex *p, integer *ldp, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer * ldvr, integer *mm, integer *m, doublecomplex *work, doublereal *rwork, integer *info) { /* System generated locals */ integer p_dim1, p_offset, s_dim1, s_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2, d__3, d__4, d__5, d__6; doublecomplex z__1, z__2, z__3, z__4; /* Local variables */ doublecomplex d__; integer i__, j; doublecomplex ca, cb; integer je, im, jr; doublereal big; logical lsa, lsb; doublereal ulp; doublecomplex sum; integer ibeg, ieig, iend; doublereal dmin__; integer isrc; doublereal temp; doublecomplex suma, sumb; doublereal xmax, scale; logical ilall; integer iside; doublereal sbeta; doublereal small; logical compl; doublereal anorm, bnorm; logical compr; logical ilbbad; doublereal acoefa, bcoefa, acoeff; doublecomplex bcoeff; logical ilback; doublereal ascale, bscale; doublecomplex salpha; doublereal safmin; doublereal bignum; logical ilcomp; integer ihwmny; /* -- LAPACK routine (version 3.2) -- */ /* November 2006 */ /* Purpose */ /* ======= */ /* ZTGEVC computes some or all of the right and/or left eigenvectors of */ /* a pair of complex matrices (S,P), where S and P are upper triangular. */ /* Matrix pairs of this type are produced by the generalized Schur */ /* factorization of a complex matrix pair (A,B): */ /* A = Q*S*Z**H, B = Q*P*Z**H */ /* as computed by ZGGHRD + ZHGEQZ. */ /* The right eigenvector x and the left eigenvector y of (S,P) */ /* corresponding to an eigenvalue w are defined by: */ /* S*x = w*P*x, (y**H)*S = w*(y**H)*P, */ /* where y**H denotes the conjugate tranpose of y. */ /* The eigenvalues are not input to this routine, but are computed */ /* directly from the diagonal elements of S and P. */ /* This routine returns the matrices X and/or Y of right and left */ /* eigenvectors of (S,P), or the products Z*X and/or Q*Y, */ /* where Z and Q are input matrices. */ /* If Q and Z are the unitary factors from the generalized Schur */ /* factorization of a matrix pair (A,B), then Z*X and Q*Y */ /* are the matrices of right and left eigenvectors of (A,B). */ /* Arguments */ /* ========= */ /* SIDE (input) CHARACTER*1 */ /* = 'R': compute right eigenvectors only; */ /* = 'L': compute left eigenvectors only; */ /* = 'B': compute both right and left eigenvectors. */ /* HOWMNY (input) CHARACTER*1 */ /* = 'A': compute all right and/or left eigenvectors; */ /* = 'B': compute all right and/or left eigenvectors, */ /* backtransformed by the matrices in VR and/or VL; */ /* = 'S': compute selected right and/or left eigenvectors, */ /* specified by the logical array SELECT. */ /* SELECT (input) LOGICAL array, dimension (N) */ /* If HOWMNY='S', SELECT specifies the eigenvectors to be */ /* computed. The eigenvector corresponding to the j-th */ /* Not referenced if HOWMNY = 'A' or 'B'. */ /* N (input) INTEGER */ /* The order of the matrices S and P. N >= 0. */ /* S (input) COMPLEX*16 array, dimension (LDS,N) */ /* The upper triangular matrix S from a generalized Schur */ /* factorization, as computed by ZHGEQZ. */ /* LDS (input) INTEGER */ /* The leading dimension of array S. LDS >= max(1,N). */ /* P (input) COMPLEX*16 array, dimension (LDP,N) */ /* The upper triangular matrix P from a generalized Schur */ /* factorization, as computed by ZHGEQZ. P must have real */ /* diagonal elements. */ /* LDP (input) INTEGER */ /* The leading dimension of array P. LDP >= max(1,N). */ /* VL (input/output) COMPLEX*16 array, dimension (LDVL,MM) */ /* On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must */ /* contain an N-by-N matrix Q (usually the unitary matrix Q */ /* of left Schur vectors returned by ZHGEQZ). */ /* On exit, if SIDE = 'L' or 'B', VL contains: */ /* if HOWMNY = 'A', the matrix Y of left eigenvectors of (S,P); */ /* if HOWMNY = 'B', the matrix Q*Y; */ /* if HOWMNY = 'S', the left eigenvectors of (S,P) specified by */ /* SELECT, stored consecutively in the columns of */ /* VL, in the same order as their eigenvalues. */ /* Not referenced if SIDE = 'R'. */ /* LDVL (input) INTEGER */ /* The leading dimension of array VL. LDVL >= 1, and if */ /* SIDE = 'L' or 'l' or 'B' or 'b', LDVL >= N. */ /* VR (input/output) COMPLEX*16 array, dimension (LDVR,MM) */ /* On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must */ /* contain an N-by-N matrix Q (usually the unitary matrix Z */ /* of right Schur vectors returned by ZHGEQZ). */ /* On exit, if SIDE = 'R' or 'B', VR contains: */ /* if HOWMNY = 'A', the matrix X of right eigenvectors of (S,P); */ /* if HOWMNY = 'B', the matrix Z*X; */ /* if HOWMNY = 'S', the right eigenvectors of (S,P) specified by */ /* SELECT, stored consecutively in the columns of */ /* VR, in the same order as their eigenvalues. */ /* Not referenced if SIDE = 'L'. */ /* LDVR (input) INTEGER */ /* The leading dimension of the array VR. LDVR >= 1, and if */ /* SIDE = 'R' or 'B', LDVR >= N. */ /* MM (input) INTEGER */ /* The number of columns in the arrays VL and/or VR. MM >= M. */ /* M (output) INTEGER */ /* The number of columns in the arrays VL and/or VR actually */ /* used to store the eigenvectors. If HOWMNY = 'A' or 'B', M */ /* is set to N. Each selected eigenvector occupies one column. */ /* WORK (workspace) COMPLEX*16 array, dimension (2*N) */ /* RWORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* ===================================================================== */ /* Decode and Test the input parameters */ /* Parameter adjustments */ --select; s_dim1 = *lds; s_offset = 1 + s_dim1; s -= s_offset; p_dim1 = *ldp; p_offset = 1 + p_dim1; p -= p_offset; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1; vr -= vr_offset; --work; --rwork; /* Function Body */ if (lsame_(howmny, "A")) { ihwmny = 1; ilall = TRUE_; ilback = FALSE_; } else if (lsame_(howmny, "S")) { ihwmny = 2; ilall = FALSE_; ilback = FALSE_; } else if (lsame_(howmny, "B")) { ihwmny = 3; ilall = TRUE_; ilback = TRUE_; } else { ihwmny = -1; } if (lsame_(side, "R")) { iside = 1; compl = FALSE_; compr = TRUE_; } else if (lsame_(side, "L")) { iside = 2; compl = TRUE_; compr = FALSE_; } else if (lsame_(side, "B")) { iside = 3; compl = TRUE_; compr = TRUE_; } else { iside = -1; } *info = 0; if (iside < 0) { *info = -1; } else if (ihwmny < 0) { *info = -2; } else if (*n < 0) { *info = -4; } else if (*lds < max(1,*n)) { *info = -6; } else if (*ldp < max(1,*n)) { *info = -8; } if (*info != 0) { i__1 = -(*info); xerbla_("ZTGEVC", &i__1); return 0; } /* Count the number of eigenvectors */ if (! ilall) { im = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (select[j]) { ++im; } } } else { im = *n; } /* Check diagonal of B */ ilbbad = FALSE_; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (d_imag(&p[j + j * p_dim1]) != 0.) { ilbbad = TRUE_; } } if (ilbbad) { *info = -7; } else if (compl && *ldvl < *n || *ldvl < 1) { *info = -10; } else if (compr && *ldvr < *n || *ldvr < 1) { *info = -12; } else if (*mm < im) { *info = -13; } if (*info != 0) { i__1 = -(*info); xerbla_("ZTGEVC", &i__1); return 0; } /* Quick return if possible */ *m = im; if (*n == 0) { return 0; } /* Machine Constants */ safmin = dlamch_("Safe minimum"); big = 1. / safmin; dlabad_(&safmin, &big); ulp = dlamch_("Epsilon") * dlamch_("Base"); small = safmin * *n / ulp; big = 1. / small; bignum = 1. / (safmin * *n); /* Compute the 1-norm of each column of the strictly upper triangular */ /* part of A and B to check for possible overflow in the triangular */ /* solver. */ i__1 = s_dim1 + 1; anorm = (d__1 = s[i__1].r, abs(d__1)) + (d__2 = d_imag(&s[s_dim1 + 1]), abs(d__2)); i__1 = p_dim1 + 1; bnorm = (d__1 = p[i__1].r, abs(d__1)) + (d__2 = d_imag(&p[p_dim1 + 1]), abs(d__2)); rwork[1] = 0.; rwork[*n + 1] = 0.; i__1 = *n; for (j = 2; j <= i__1; ++j) { rwork[j] = 0.; rwork[*n + j] = 0.; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * s_dim1; rwork[j] += (d__1 = s[i__3].r, abs(d__1)) + (d__2 = d_imag(&s[i__ + j * s_dim1]), abs(d__2)); i__3 = i__ + j * p_dim1; rwork[*n + j] += (d__1 = p[i__3].r, abs(d__1)) + (d__2 = d_imag(& p[i__ + j * p_dim1]), abs(d__2)); } /* Computing MAX */ i__2 = j + j * s_dim1; d__3 = anorm, d__4 = rwork[j] + ((d__1 = s[i__2].r, abs(d__1)) + ( d__2 = d_imag(&s[j + j * s_dim1]), abs(d__2))); anorm = max(d__3,d__4); /* Computing MAX */ i__2 = j + j * p_dim1; d__3 = bnorm, d__4 = rwork[*n + j] + ((d__1 = p[i__2].r, abs(d__1)) + (d__2 = d_imag(&p[j + j * p_dim1]), abs(d__2))); bnorm = max(d__3,d__4); } ascale = 1. / max(anorm,safmin); bscale = 1. / max(bnorm,safmin); /* Left eigenvectors */ if (compl) { ieig = 0; /* Main loop over eigenvalues */ i__1 = *n; for (je = 1; je <= i__1; ++je) { if (ilall) { ilcomp = TRUE_; } else { ilcomp = select[je]; } if (ilcomp) { ++ieig; i__2 = je + je * s_dim1; i__3 = je + je * p_dim1; if ((d__2 = s[i__2].r, abs(d__2)) + (d__3 = d_imag(&s[je + je * s_dim1]), abs(d__3)) <= safmin && (d__1 = p[i__3].r, abs(d__1)) <= safmin) { /* Singular matrix pencil -- return unit eigenvector */ i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { i__3 = jr + ieig * vl_dim1; vl[i__3].r = 0., vl[i__3].i = 0.; } i__2 = ieig + ieig * vl_dim1; vl[i__2].r = 1., vl[i__2].i = 0.; goto L140; } /* Non-singular eigenvalue: */ /* Compute coefficients a and b in */ /* H */ /* y ( a A - b B ) = 0 */ /* Computing MAX */ i__2 = je + je * s_dim1; i__3 = je + je * p_dim1; d__4 = ((d__2 = s[i__2].r, abs(d__2)) + (d__3 = d_imag(&s[je + je * s_dim1]), abs(d__3))) * ascale, d__5 = (d__1 = p[i__3].r, abs(d__1)) * bscale, d__4 = max(d__4,d__5); temp = 1. / max(d__4,safmin); i__2 = je + je * s_dim1; z__2.r = temp * s[i__2].r, z__2.i = temp * s[i__2].i; z__1.r = ascale * z__2.r, z__1.i = ascale * z__2.i; salpha.r = z__1.r, salpha.i = z__1.i; i__2 = je + je * p_dim1; sbeta = temp * p[i__2].r * bscale; acoeff = sbeta * ascale; z__1.r = bscale * salpha.r, z__1.i = bscale * salpha.i; bcoeff.r = z__1.r, bcoeff.i = z__1.i; /* Scale to avoid underflow */ lsa = abs(sbeta) >= safmin && abs(acoeff) < small; lsb = (d__1 = salpha.r, abs(d__1)) + (d__2 = d_imag(&salpha), abs(d__2)) >= safmin && (d__3 = bcoeff.r, abs(d__3)) + (d__4 = d_imag(&bcoeff), abs(d__4)) < small; scale = 1.; if (lsa) { scale = small / abs(sbeta) * min(anorm,big); } if (lsb) { /* Computing MAX */ d__3 = scale, d__4 = small / ((d__1 = salpha.r, abs(d__1)) + (d__2 = d_imag(&salpha), abs(d__2))) * min( bnorm,big); scale = max(d__3,d__4); } if (lsa || lsb) { /* Computing MIN */ /* Computing MAX */ d__5 = 1., d__6 = abs(acoeff), d__5 = max(d__5,d__6), d__6 = (d__1 = bcoeff.r, abs(d__1)) + (d__2 = d_imag(&bcoeff), abs(d__2)); d__3 = scale, d__4 = 1. / (safmin * max(d__5,d__6)); scale = min(d__3,d__4); if (lsa) { acoeff = ascale * (scale * sbeta); } else { acoeff = scale * acoeff; } if (lsb) { z__2.r = scale * salpha.r, z__2.i = scale * salpha.i; z__1.r = bscale * z__2.r, z__1.i = bscale * z__2.i; bcoeff.r = z__1.r, bcoeff.i = z__1.i; } else { z__1.r = scale * bcoeff.r, z__1.i = scale * bcoeff.i; bcoeff.r = z__1.r, bcoeff.i = z__1.i; } } acoefa = abs(acoeff); bcoefa = (d__1 = bcoeff.r, abs(d__1)) + (d__2 = d_imag(& bcoeff), abs(d__2)); xmax = 1.; i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { i__3 = jr; work[i__3].r = 0., work[i__3].i = 0.; } i__2 = je; work[i__2].r = 1., work[i__2].i = 0.; /* Computing MAX */ d__1 = ulp * acoefa * anorm, d__2 = ulp * bcoefa * bnorm, d__1 = max(d__1,d__2); dmin__ = max(d__1,safmin); /* H */ /* Triangular solve of (a A - b B) y = 0 */ /* H */ /* (rowwise in (a A - b B) , or columnwise in a A - b B) */ i__2 = *n; for (j = je + 1; j <= i__2; ++j) { /* Compute */ /* j-1 */ /* SUM = sum conjg( a*S(k,j) - b*P(k,j) )*x(k) */ /* k=je */ /* (Scale if necessary) */ temp = 1. / xmax; if (acoefa * rwork[j] + bcoefa * rwork[*n + j] > bignum * temp) { i__3 = j - 1; for (jr = je; jr <= i__3; ++jr) { i__4 = jr; i__5 = jr; z__1.r = temp * work[i__5].r, z__1.i = temp * work[i__5].i; work[i__4].r = z__1.r, work[i__4].i = z__1.i; } xmax = 1.; } suma.r = 0., suma.i = 0.; sumb.r = 0., sumb.i = 0.; i__3 = j - 1; for (jr = je; jr <= i__3; ++jr) { d_cnjg(&z__3, &s[jr + j * s_dim1]); i__4 = jr; z__2.r = z__3.r * work[i__4].r - z__3.i * work[i__4] .i, z__2.i = z__3.r * work[i__4].i + z__3.i * work[i__4].r; z__1.r = suma.r + z__2.r, z__1.i = suma.i + z__2.i; suma.r = z__1.r, suma.i = z__1.i; d_cnjg(&z__3, &p[jr + j * p_dim1]); i__4 = jr; z__2.r = z__3.r * work[i__4].r - z__3.i * work[i__4] .i, z__2.i = z__3.r * work[i__4].i + z__3.i * work[i__4].r; z__1.r = sumb.r + z__2.r, z__1.i = sumb.i + z__2.i; sumb.r = z__1.r, sumb.i = z__1.i; } z__2.r = acoeff * suma.r, z__2.i = acoeff * suma.i; d_cnjg(&z__4, &bcoeff); z__3.r = z__4.r * sumb.r - z__4.i * sumb.i, z__3.i = z__4.r * sumb.i + z__4.i * sumb.r; z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i; sum.r = z__1.r, sum.i = z__1.i; /* Form x(j) = - SUM / conjg( a*S(j,j) - b*P(j,j) ) */ /* with scaling and perturbation of the denominator */ i__3 = j + j * s_dim1; z__3.r = acoeff * s[i__3].r, z__3.i = acoeff * s[i__3].i; i__4 = j + j * p_dim1; z__4.r = bcoeff.r * p[i__4].r - bcoeff.i * p[i__4].i, z__4.i = bcoeff.r * p[i__4].i + bcoeff.i * p[i__4] .r; z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i; d_cnjg(&z__1, &z__2); d__.r = z__1.r, d__.i = z__1.i; if ((d__1 = d__.r, abs(d__1)) + (d__2 = d_imag(&d__), abs( d__2)) <= dmin__) { z__1.r = dmin__, z__1.i = 0.; d__.r = z__1.r, d__.i = z__1.i; } if ((d__1 = d__.r, abs(d__1)) + (d__2 = d_imag(&d__), abs( d__2)) < 1.) { if ((d__1 = sum.r, abs(d__1)) + (d__2 = d_imag(&sum), abs(d__2)) >= bignum * ((d__3 = d__.r, abs( d__3)) + (d__4 = d_imag(&d__), abs(d__4)))) { temp = 1. / ((d__1 = sum.r, abs(d__1)) + (d__2 = d_imag(&sum), abs(d__2))); i__3 = j - 1; for (jr = je; jr <= i__3; ++jr) { i__4 = jr; i__5 = jr; z__1.r = temp * work[i__5].r, z__1.i = temp * work[i__5].i; work[i__4].r = z__1.r, work[i__4].i = z__1.i; } xmax = temp * xmax; z__1.r = temp * sum.r, z__1.i = temp * sum.i; sum.r = z__1.r, sum.i = z__1.i; } } i__3 = j; z__2.r = -sum.r, z__2.i = -sum.i; zladiv_(&z__1, &z__2, &d__); work[i__3].r = z__1.r, work[i__3].i = z__1.i; /* Computing MAX */ i__3 = j; d__3 = xmax, d__4 = (d__1 = work[i__3].r, abs(d__1)) + ( d__2 = d_imag(&work[j]), abs(d__2)); xmax = max(d__3,d__4); } /* Back transform eigenvector if HOWMNY='B'. */ if (ilback) { i__2 = *n + 1 - je; zgemv_("N", n, &i__2, &c_b2, &vl[je * vl_dim1 + 1], ldvl, &work[je], &c__1, &c_b1, &work[*n + 1], &c__1); isrc = 2; ibeg = 1; } else { isrc = 1; ibeg = je; } /* Copy and scale eigenvector into column of VL */ xmax = 0.; i__2 = *n; for (jr = ibeg; jr <= i__2; ++jr) { /* Computing MAX */ i__3 = (isrc - 1) * *n + jr; d__3 = xmax, d__4 = (d__1 = work[i__3].r, abs(d__1)) + ( d__2 = d_imag(&work[(isrc - 1) * *n + jr]), abs( d__2)); xmax = max(d__3,d__4); } if (xmax > safmin) { temp = 1. / xmax; i__2 = *n; for (jr = ibeg; jr <= i__2; ++jr) { i__3 = jr + ieig * vl_dim1; i__4 = (isrc - 1) * *n + jr; z__1.r = temp * work[i__4].r, z__1.i = temp * work[ i__4].i; vl[i__3].r = z__1.r, vl[i__3].i = z__1.i; } } else { ibeg = *n + 1; } i__2 = ibeg - 1; for (jr = 1; jr <= i__2; ++jr) { i__3 = jr + ieig * vl_dim1; vl[i__3].r = 0., vl[i__3].i = 0.; } } L140: ; } } /* Right eigenvectors */ if (compr) { ieig = im + 1; /* Main loop over eigenvalues */ for (je = *n; je >= 1; --je) { if (ilall) { ilcomp = TRUE_; } else { ilcomp = select[je]; } if (ilcomp) { --ieig; i__1 = je + je * s_dim1; i__2 = je + je * p_dim1; if ((d__2 = s[i__1].r, abs(d__2)) + (d__3 = d_imag(&s[je + je * s_dim1]), abs(d__3)) <= safmin && (d__1 = p[i__2].r, abs(d__1)) <= safmin) { /* Singular matrix pencil -- return unit eigenvector */ i__1 = *n; for (jr = 1; jr <= i__1; ++jr) { i__2 = jr + ieig * vr_dim1; vr[i__2].r = 0., vr[i__2].i = 0.; } i__1 = ieig + ieig * vr_dim1; vr[i__1].r = 1., vr[i__1].i = 0.; goto L250; } /* Non-singular eigenvalue: */ /* Compute coefficients a and b in */ /* ( a A - b B ) x = 0 */ /* Computing MAX */ i__1 = je + je * s_dim1; i__2 = je + je * p_dim1; d__4 = ((d__2 = s[i__1].r, abs(d__2)) + (d__3 = d_imag(&s[je + je * s_dim1]), abs(d__3))) * ascale, d__5 = (d__1 = p[i__2].r, abs(d__1)) * bscale, d__4 = max(d__4,d__5); temp = 1. / max(d__4,safmin); i__1 = je + je * s_dim1; z__2.r = temp * s[i__1].r, z__2.i = temp * s[i__1].i; z__1.r = ascale * z__2.r, z__1.i = ascale * z__2.i; salpha.r = z__1.r, salpha.i = z__1.i; i__1 = je + je * p_dim1; sbeta = temp * p[i__1].r * bscale; acoeff = sbeta * ascale; z__1.r = bscale * salpha.r, z__1.i = bscale * salpha.i; bcoeff.r = z__1.r, bcoeff.i = z__1.i; /* Scale to avoid underflow */ lsa = abs(sbeta) >= safmin && abs(acoeff) < small; lsb = (d__1 = salpha.r, abs(d__1)) + (d__2 = d_imag(&salpha), abs(d__2)) >= safmin && (d__3 = bcoeff.r, abs(d__3)) + (d__4 = d_imag(&bcoeff), abs(d__4)) < small; scale = 1.; if (lsa) { scale = small / abs(sbeta) * min(anorm,big); } if (lsb) { /* Computing MAX */ d__3 = scale, d__4 = small / ((d__1 = salpha.r, abs(d__1)) + (d__2 = d_imag(&salpha), abs(d__2))) * min( bnorm,big); scale = max(d__3,d__4); } if (lsa || lsb) { /* Computing MIN */ /* Computing MAX */ d__5 = 1., d__6 = abs(acoeff), d__5 = max(d__5,d__6), d__6 = (d__1 = bcoeff.r, abs(d__1)) + (d__2 = d_imag(&bcoeff), abs(d__2)); d__3 = scale, d__4 = 1. / (safmin * max(d__5,d__6)); scale = min(d__3,d__4); if (lsa) { acoeff = ascale * (scale * sbeta); } else { acoeff = scale * acoeff; } if (lsb) { z__2.r = scale * salpha.r, z__2.i = scale * salpha.i; z__1.r = bscale * z__2.r, z__1.i = bscale * z__2.i; bcoeff.r = z__1.r, bcoeff.i = z__1.i; } else { z__1.r = scale * bcoeff.r, z__1.i = scale * bcoeff.i; bcoeff.r = z__1.r, bcoeff.i = z__1.i; } } acoefa = abs(acoeff); bcoefa = (d__1 = bcoeff.r, abs(d__1)) + (d__2 = d_imag(& bcoeff), abs(d__2)); xmax = 1.; i__1 = *n; for (jr = 1; jr <= i__1; ++jr) { i__2 = jr; work[i__2].r = 0., work[i__2].i = 0.; } i__1 = je; work[i__1].r = 1., work[i__1].i = 0.; /* Computing MAX */ d__1 = ulp * acoefa * anorm, d__2 = ulp * bcoefa * bnorm, d__1 = max(d__1,d__2); dmin__ = max(d__1,safmin); /* Triangular solve of (a A - b B) x = 0 (columnwise) */ /* WORK(1:j-1) contains sums w, */ /* WORK(j+1:JE) contains x */ i__1 = je - 1; for (jr = 1; jr <= i__1; ++jr) { i__2 = jr; i__3 = jr + je * s_dim1; z__2.r = acoeff * s[i__3].r, z__2.i = acoeff * s[i__3].i; i__4 = jr + je * p_dim1; z__3.r = bcoeff.r * p[i__4].r - bcoeff.i * p[i__4].i, z__3.i = bcoeff.r * p[i__4].i + bcoeff.i * p[i__4] .r; z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i; work[i__2].r = z__1.r, work[i__2].i = z__1.i; } i__1 = je; work[i__1].r = 1., work[i__1].i = 0.; for (j = je - 1; j >= 1; --j) { /* Form x(j) := - w(j) / d */ /* with scaling and perturbation of the denominator */ i__1 = j + j * s_dim1; z__2.r = acoeff * s[i__1].r, z__2.i = acoeff * s[i__1].i; i__2 = j + j * p_dim1; z__3.r = bcoeff.r * p[i__2].r - bcoeff.i * p[i__2].i, z__3.i = bcoeff.r * p[i__2].i + bcoeff.i * p[i__2] .r; z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i; d__.r = z__1.r, d__.i = z__1.i; if ((d__1 = d__.r, abs(d__1)) + (d__2 = d_imag(&d__), abs( d__2)) <= dmin__) { z__1.r = dmin__, z__1.i = 0.; d__.r = z__1.r, d__.i = z__1.i; } if ((d__1 = d__.r, abs(d__1)) + (d__2 = d_imag(&d__), abs( d__2)) < 1.) { i__1 = j; if ((d__1 = work[i__1].r, abs(d__1)) + (d__2 = d_imag( &work[j]), abs(d__2)) >= bignum * ((d__3 = d__.r, abs(d__3)) + (d__4 = d_imag(&d__), abs( d__4)))) { i__1 = j; temp = 1. / ((d__1 = work[i__1].r, abs(d__1)) + ( d__2 = d_imag(&work[j]), abs(d__2))); i__1 = je; for (jr = 1; jr <= i__1; ++jr) { i__2 = jr; i__3 = jr; z__1.r = temp * work[i__3].r, z__1.i = temp * work[i__3].i; work[i__2].r = z__1.r, work[i__2].i = z__1.i; } } } i__1 = j; i__2 = j; z__2.r = -work[i__2].r, z__2.i = -work[i__2].i; zladiv_(&z__1, &z__2, &d__); work[i__1].r = z__1.r, work[i__1].i = z__1.i; if (j > 1) { /* w = w + x(j)*(a S(*,j) - b P(*,j) ) with scaling */ i__1 = j; if ((d__1 = work[i__1].r, abs(d__1)) + (d__2 = d_imag( &work[j]), abs(d__2)) > 1.) { i__1 = j; temp = 1. / ((d__1 = work[i__1].r, abs(d__1)) + ( d__2 = d_imag(&work[j]), abs(d__2))); if (acoefa * rwork[j] + bcoefa * rwork[*n + j] >= bignum * temp) { i__1 = je; for (jr = 1; jr <= i__1; ++jr) { i__2 = jr; i__3 = jr; z__1.r = temp * work[i__3].r, z__1.i = temp * work[i__3].i; work[i__2].r = z__1.r, work[i__2].i = z__1.i; } } } i__1 = j; z__1.r = acoeff * work[i__1].r, z__1.i = acoeff * work[i__1].i; ca.r = z__1.r, ca.i = z__1.i; i__1 = j; z__1.r = bcoeff.r * work[i__1].r - bcoeff.i * work[ i__1].i, z__1.i = bcoeff.r * work[i__1].i + bcoeff.i * work[i__1].r; cb.r = z__1.r, cb.i = z__1.i; i__1 = j - 1; for (jr = 1; jr <= i__1; ++jr) { i__2 = jr; i__3 = jr; i__4 = jr + j * s_dim1; z__3.r = ca.r * s[i__4].r - ca.i * s[i__4].i, z__3.i = ca.r * s[i__4].i + ca.i * s[i__4] .r; z__2.r = work[i__3].r + z__3.r, z__2.i = work[ i__3].i + z__3.i; i__5 = jr + j * p_dim1; z__4.r = cb.r * p[i__5].r - cb.i * p[i__5].i, z__4.i = cb.r * p[i__5].i + cb.i * p[i__5] .r; z__1.r = z__2.r - z__4.r, z__1.i = z__2.i - z__4.i; work[i__2].r = z__1.r, work[i__2].i = z__1.i; } } } /* Back transform eigenvector if HOWMNY='B'. */ if (ilback) { zgemv_("N", n, &je, &c_b2, &vr[vr_offset], ldvr, &work[1], &c__1, &c_b1, &work[*n + 1], &c__1); isrc = 2; iend = *n; } else { isrc = 1; iend = je; } /* Copy and scale eigenvector into column of VR */ xmax = 0.; i__1 = iend; for (jr = 1; jr <= i__1; ++jr) { /* Computing MAX */ i__2 = (isrc - 1) * *n + jr; d__3 = xmax, d__4 = (d__1 = work[i__2].r, abs(d__1)) + ( d__2 = d_imag(&work[(isrc - 1) * *n + jr]), abs( d__2)); xmax = max(d__3,d__4); } if (xmax > safmin) { temp = 1. / xmax; i__1 = iend; for (jr = 1; jr <= i__1; ++jr) { i__2 = jr + ieig * vr_dim1; i__3 = (isrc - 1) * *n + jr; z__1.r = temp * work[i__3].r, z__1.i = temp * work[ i__3].i; vr[i__2].r = z__1.r, vr[i__2].i = z__1.i; } } else { iend = 0; } i__1 = *n; for (jr = iend + 1; jr <= i__1; ++jr) { i__2 = jr + ieig * vr_dim1; vr[i__2].r = 0., vr[i__2].i = 0.; } } L250: ; } } return 0; /* End of ZTGEVC */ } /* ztgevc_ */
int pzgstrf_column_bmod( const int pnum, /* process number */ const int jcol, /* current column in the panel */ const int fpanelc,/* first column in the panel */ const int nseg, /* number of s-nodes to update jcol */ int *segrep,/* in */ int *repfnz,/* in */ doublecomplex *dense, /* modified */ doublecomplex *tempv, /* working array */ pxgstrf_shared_t *pxgstrf_shared, /* modified */ Gstat_t *Gstat /* modified */ ) { /* * -- SuperLU MT routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * and Xerox Palo Alto Research Center. * September 10, 2007 * * Purpose: * ======== * Performs numeric block updates (sup-col) in topological order. * It features: col-col, 2cols-col, 3cols-col, and sup-col updates. * Special processing on the supernodal portion of L\U[*,j]. * * Return value: * ============= * 0 - successful return * > 0 - number of bytes allocated when run out of space * */ #if ( MACH==CRAY_PVP ) _fcd ftcs1 = _cptofcd("L", strlen("L")), ftcs2 = _cptofcd("N", strlen("N")), ftcs3 = _cptofcd("U", strlen("U")); #endif #ifdef USE_VENDOR_BLAS int incx = 1, incy = 1; doublecomplex alpha, beta; #endif GlobalLU_t *Glu = pxgstrf_shared->Glu; /* modified */ /* krep = representative of current k-th supernode * fsupc = first supernodal column * nsupc = no of columns in supernode * nsupr = no of rows in supernode (used as leading dimension) * luptr = location of supernodal LU-block in storage * kfnz = first nonz in the k-th supernodal segment * no_zeros = no of leading zeros in a supernodal U-segment */ doublecomplex ukj, ukj1, ukj2; register int lptr, kfnz, isub, irow, i, no_zeros; register int luptr, luptr1, luptr2; int fsupc, nsupc, nsupr, segsze; int nrow; /* No of rows in the matrix of matrix-vector */ int jsupno, k, ksub, krep, krep_ind, ksupno; int ufirst, nextlu; int fst_col; /* First column within small LU update */ int d_fsupc; /* Distance between the first column of the current panel and the first column of the current snode.*/ int *xsup, *supno; int *lsub, *xlsub, *xlsub_end; doublecomplex *lusup; int *xlusup, *xlusup_end; doublecomplex *tempv1; int mem_error; register float flopcnt; doublecomplex zero = {0.0, 0.0}; doublecomplex one = {1.0, 0.0}; doublecomplex none = {-1.0, 0.0}; doublecomplex comp_temp, comp_temp1; xsup = Glu->xsup; supno = Glu->supno; lsub = Glu->lsub; xlsub = Glu->xlsub; xlsub_end = Glu->xlsub_end; lusup = Glu->lusup; xlusup = Glu->xlusup; xlusup_end = Glu->xlusup_end; jsupno = supno[jcol]; /* * For each nonz supernode segment of U[*,j] in topological order */ k = nseg - 1; for (ksub = 0; ksub < nseg; ksub++) { krep = segrep[k]; k--; ksupno = supno[krep]; #if ( DEBUGlvel>=2 ) if (jcol==BADCOL) printf("(%d) pzgstrf_column_bmod[1]: %d, nseg %d, krep %d, jsupno %d, ksupno %d\n", pnum, jcol, nseg, krep, jsupno, ksupno); #endif if ( jsupno != ksupno ) { /* Outside the rectangular supernode */ fsupc = xsup[ksupno]; fst_col = SUPERLU_MAX ( fsupc, fpanelc ); /* Distance from the current supernode to the current panel; d_fsupc=0 if fsupc >= fpanelc. */ d_fsupc = fst_col - fsupc; luptr = xlusup[fst_col] + d_fsupc; lptr = xlsub[fsupc] + d_fsupc; kfnz = repfnz[krep]; kfnz = SUPERLU_MAX ( kfnz, fpanelc ); segsze = krep - kfnz + 1; nsupc = krep - fst_col + 1; nsupr = xlsub_end[fsupc] - xlsub[fsupc]; /* Leading dimension */ nrow = nsupr - d_fsupc - nsupc; krep_ind = lptr + nsupc - 1; flopcnt = 4 * segsze * (segsze - 1) + 8 * nrow * segsze; Gstat->procstat[pnum].fcops += flopcnt; #if ( DEBUGlevel>=2 ) if (jcol==BADCOL) printf("(%d) pzgstrf_column_bmod[2]: %d, krep %d, kfnz %d, segsze %d, d_fsupc %d,\ fsupc %d, nsupr %d, nsupc %d\n", pnum, jcol, krep, kfnz, segsze, d_fsupc, fsupc, nsupr, nsupc); #endif /* * Case 1: Update U-segment of size 1 -- col-col update */ if ( segsze == 1 ) { ukj = dense[lsub[krep_ind]]; luptr += nsupr*(nsupc-1) + nsupc; for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) { irow = lsub[i]; zz_mult(&comp_temp, &ukj, &lusup[luptr]); z_sub(&dense[irow], &dense[irow], &comp_temp); luptr++; } } else if ( segsze <= 3 ) { ukj = dense[lsub[krep_ind]]; luptr += nsupr*(nsupc-1) + nsupc-1; ukj1 = dense[lsub[krep_ind - 1]]; luptr1 = luptr - nsupr; if ( segsze == 2 ) { /* Case 2: 2cols-col update */ zz_mult(&comp_temp, &ukj1, &lusup[luptr1]); z_sub(&ukj, &ukj, &comp_temp); dense[lsub[krep_ind]] = ukj; for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) { irow = lsub[i]; luptr++; luptr1++; zz_mult(&comp_temp, &ukj, &lusup[luptr]); zz_mult(&comp_temp1, &ukj1, &lusup[luptr1]); z_add(&comp_temp, &comp_temp, &comp_temp1); z_sub(&dense[irow], &dense[irow], &comp_temp); } } else { /* Case 3: 3cols-col update */ ukj2 = dense[lsub[krep_ind - 2]]; luptr2 = luptr1 - nsupr; zz_mult(&comp_temp, &ukj2, &lusup[luptr2-1]); z_sub(&ukj1, &ukj1, &comp_temp); zz_mult(&comp_temp, &ukj1, &lusup[luptr1]); zz_mult(&comp_temp1, &ukj2, &lusup[luptr2]); z_add(&comp_temp, &comp_temp, &comp_temp1); z_sub(&ukj, &ukj, &comp_temp); dense[lsub[krep_ind]] = ukj; dense[lsub[krep_ind-1]] = ukj1; for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) { irow = lsub[i]; luptr++; luptr1++; luptr2++; zz_mult(&comp_temp, &ukj, &lusup[luptr]); zz_mult(&comp_temp1, &ukj1, &lusup[luptr1]); z_add(&comp_temp, &comp_temp, &comp_temp1); zz_mult(&comp_temp1, &ukj2, &lusup[luptr2]); z_add(&comp_temp, &comp_temp, &comp_temp1); z_sub(&dense[irow], &dense[irow], &comp_temp); } } } else { /* * Case: sup-col update * Perform a triangular solve and block update, * then scatter the result of sup-col update to dense */ no_zeros = kfnz - fst_col; /* Copy U[*,j] segment from dense[*] to tempv[*] */ isub = lptr + no_zeros; for (i = 0; i < segsze; i++) { irow = lsub[isub]; tempv[i] = dense[irow]; ++isub; } /* Dense triangular solve -- start effective triangle */ luptr += nsupr * no_zeros + no_zeros; #ifdef USE_VENDOR_BLAS #if ( MACH==CRAY_PVP ) CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], &nsupr, tempv, &incx ); #else ztrsv_( "L", "N", "U", &segsze, &lusup[luptr], &nsupr, tempv, &incx ); #endif luptr += segsze; /* Dense matrix-vector */ tempv1 = &tempv[segsze]; alpha = one; beta = zero; #if ( MACH==CRAY_PVP ) CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], &nsupr, tempv, &incx, &beta, tempv1, &incy ); #else zgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], &nsupr, tempv, &incx, &beta, tempv1, &incy ); #endif #else zlsolve ( nsupr, segsze, &lusup[luptr], tempv ); luptr += segsze; /* Dense matrix-vector */ tempv1 = &tempv[segsze]; zmatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1); #endif /* Scatter tempv[] into SPA dense[*] */ isub = lptr + no_zeros; for (i = 0; i < segsze; i++) { irow = lsub[isub]; dense[irow] = tempv[i]; /* Scatter */ tempv[i] = zero; isub++; } /* Scatter tempv1[] into SPA dense[*] */ for (i = 0; i < nrow; i++) { irow = lsub[isub]; z_sub(&dense[irow], &dense[irow], &tempv1[i]); tempv1[i] = zero; ++isub; } } /* else segsze >= 4 */ } /* if jsupno ... */ } /* for each segment... */ /* ------------------------------------------ Process the supernodal portion of L\U[*,j] ------------------------------------------ */ fsupc = SUPER_FSUPC (jsupno); nsupr = xlsub_end[fsupc] - xlsub[fsupc]; if ( (mem_error = Glu_alloc(pnum, jcol, nsupr, LUSUP, &nextlu, pxgstrf_shared)) ) return mem_error; xlusup[jcol] = nextlu; lusup = Glu->lusup; /* Gather the nonzeros from SPA dense[*,j] into L\U[*,j] */ for (isub = xlsub[fsupc]; isub < xlsub_end[fsupc]; ++isub) { irow = lsub[isub]; lusup[nextlu] = dense[irow]; dense[irow] = zero; #ifdef DEBUG if (jcol == -1) printf("(%d) pzgstrf_column_bmod[lusup] jcol %d, irow %d, lusup %.10e\n", pnum, jcol, irow, lusup[nextlu]); #endif ++nextlu; } xlusup_end[jcol] = nextlu; /* close L\U[*,jcol] */ #if ( DEBUGlevel>=2 ) if (jcol == -1) { nrow = xlusup_end[jcol] - xlusup[jcol]; print_double_vec("before sup-col update", nrow, &lsub[xlsub[fsupc]], &lusup[xlusup[jcol]]); } #endif /* * For more updates within the panel (also within the current supernode), * should start from the first column of the panel, or the first column * of the supernode, whichever is bigger. There are 2 cases: * (1) fsupc < fpanelc, then fst_col := fpanelc * (2) fsupc >= fpanelc, then fst_col := fsupc */ fst_col = SUPERLU_MAX ( fsupc, fpanelc ); if ( fst_col < jcol ) { /* distance between the current supernode and the current panel; d_fsupc=0 if fsupc >= fpanelc. */ d_fsupc = fst_col - fsupc; lptr = xlsub[fsupc] + d_fsupc; luptr = xlusup[fst_col] + d_fsupc; nsupr = xlsub_end[fsupc] - xlsub[fsupc]; /* Leading dimension */ nsupc = jcol - fst_col; /* Excluding jcol */ nrow = nsupr - d_fsupc - nsupc; /* points to the beginning of jcol in supernode L\U[*,jsupno] */ ufirst = xlusup[jcol] + d_fsupc; #if ( DEBUGlevel>=2 ) if (jcol==BADCOL) printf("(%d) pzgstrf_column_bmod[3] jcol %d, fsupc %d, nsupr %d, nsupc %d, nrow %d\n", pnum, jcol, fsupc, nsupr, nsupc, nrow); #endif flopcnt = 4 * nsupc * (nsupc - 1) + 8 * nrow * nsupc; Gstat->procstat[pnum].fcops += flopcnt; /* ops[TRSV] += nsupc * (nsupc - 1); ops[GEMV] += 2 * nrow * nsupc; */ #ifdef USE_VENDOR_BLAS alpha = none; beta = one; /* y := beta*y + alpha*A*x */ #if ( MACH==CRAY_PVP ) CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, &lusup[ufirst], &incx ); CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy ); #else ztrsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, &lusup[ufirst], &incx ); zgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy ); #endif #else zlsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] ); zmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], &lusup[ufirst], tempv ); /* Copy updates from tempv[*] into lusup[*] */ isub = ufirst + nsupc; for (i = 0; i < nrow; i++) { z_sub(&lusup[isub], &lusup[isub], &tempv[i]); tempv[i] = zero; ++isub; } #endif } /* if fst_col < jcol ... */ return 0; }
void zgemv(char transa, int m, int n, doublecomplex *alpha, doublecomplex *a, int lda, doublecomplex *x, int incx, doublecomplex *beta, doublecomplex *y, int incy) { zgemv_(&transa, &m, &n, alpha, a, &lda, x, &incx, beta, y, &incy); }
int zggglm_(int *n, int *m, int *p, doublecomplex *a, int *lda, doublecomplex *b, int *ldb, doublecomplex *d__, doublecomplex *x, doublecomplex *y, doublecomplex *work, int *lwork, int *info) { /* System generated locals */ int a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; doublecomplex z__1; /* Local variables */ int i__, nb, np, nb1, nb2, nb3, nb4, lopt; extern int zgemv_(char *, int *, int *, doublecomplex *, doublecomplex *, int *, doublecomplex *, int *, doublecomplex *, doublecomplex *, int *), zcopy_(int *, doublecomplex *, int *, doublecomplex *, int *), xerbla_(char *, int *); extern int ilaenv_(int *, char *, char *, int *, int *, int *, int *); extern int zggqrf_(int *, int *, int *, doublecomplex *, int *, doublecomplex *, doublecomplex *, int *, doublecomplex *, doublecomplex *, int *, int *) ; int lwkmin, lwkopt; int lquery; extern int zunmqr_(char *, char *, int *, int *, int *, doublecomplex *, int *, doublecomplex *, doublecomplex *, int *, doublecomplex *, int *, int *), zunmrq_(char *, char *, int *, int *, int *, doublecomplex *, int *, doublecomplex *, doublecomplex *, int *, doublecomplex *, int *, int *), ztrtrs_(char *, char *, char *, int *, int *, doublecomplex *, int *, doublecomplex *, int *, int *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ZGGGLM solves a general Gauss-Markov linear model (GLM) problem: */ /* minimize || y ||_2 subject to d = A*x + B*y */ /* x */ /* where A is an N-by-M matrix, B is an N-by-P matrix, and d is a */ /* given N-vector. It is assumed that M <= N <= M+P, and */ /* rank(A) = M and rank( A B ) = N. */ /* Under these assumptions, the constrained equation is always */ /* consistent, and there is a unique solution x and a minimal 2-norm */ /* solution y, which is obtained using a generalized QR factorization */ /* of the matrices (A, B) given by */ /* A = Q*(R), B = Q*T*Z. */ /* (0) */ /* In particular, if matrix B is square nonsingular, then the problem */ /* GLM is equivalent to the following weighted linear least squares */ /* problem */ /* minimize || inv(B)*(d-A*x) ||_2 */ /* x */ /* where inv(B) denotes the inverse of B. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of rows of the matrices A and B. N >= 0. */ /* M (input) INTEGER */ /* The number of columns of the matrix A. 0 <= M <= N. */ /* P (input) INTEGER */ /* The number of columns of the matrix B. P >= N-M. */ /* A (input/output) COMPLEX*16 array, dimension (LDA,M) */ /* On entry, the N-by-M matrix A. */ /* On exit, the upper triangular part of the array A contains */ /* the M-by-M upper triangular matrix R. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= MAX(1,N). */ /* B (input/output) COMPLEX*16 array, dimension (LDB,P) */ /* On entry, the N-by-P matrix B. */ /* On exit, if N <= P, the upper triangle of the subarray */ /* B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; */ /* if N > P, the elements on and above the (N-P)th subdiagonal */ /* contain the N-by-P upper trapezoidal matrix T. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= MAX(1,N). */ /* D (input/output) COMPLEX*16 array, dimension (N) */ /* On entry, D is the left hand side of the GLM equation. */ /* On exit, D is destroyed. */ /* X (output) COMPLEX*16 array, dimension (M) */ /* Y (output) COMPLEX*16 array, dimension (P) */ /* On exit, X and Y are the solutions of the GLM problem. */ /* WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= MAX(1,N+M+P). */ /* For optimum performance, LWORK >= M+MIN(N,P)+MAX(N,P)*NB, */ /* where NB is an upper bound for the optimal blocksizes for */ /* ZGEQRF, ZGERQF, ZUNMQR and ZUNMRQ. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* = 1: the upper triangular factor R associated with A in the */ /* generalized QR factorization of the pair (A, B) is */ /* singular, so that rank(A) < M; the least squares */ /* solution could not be computed. */ /* = 2: the bottom (N-M) by (N-M) part of the upper trapezoidal */ /* factor T associated with B in the generalized QR */ /* factorization of the pair (A, B) is singular, so that */ /* rank( A B ) < N; the least squares solution could not */ /* be computed. */ /* =================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --d__; --x; --y; --work; /* Function Body */ *info = 0; np = MIN(*n,*p); lquery = *lwork == -1; if (*n < 0) { *info = -1; } else if (*m < 0 || *m > *n) { *info = -2; } else if (*p < 0 || *p < *n - *m) { *info = -3; } else if (*lda < MAX(1,*n)) { *info = -5; } else if (*ldb < MAX(1,*n)) { *info = -7; } /* Calculate workspace */ if (*info == 0) { if (*n == 0) { lwkmin = 1; lwkopt = 1; } else { nb1 = ilaenv_(&c__1, "ZGEQRF", " ", n, m, &c_n1, &c_n1); nb2 = ilaenv_(&c__1, "ZGERQF", " ", n, m, &c_n1, &c_n1); nb3 = ilaenv_(&c__1, "ZUNMQR", " ", n, m, p, &c_n1); nb4 = ilaenv_(&c__1, "ZUNMRQ", " ", n, m, p, &c_n1); /* Computing MAX */ i__1 = MAX(nb1,nb2), i__1 = MAX(i__1,nb3); nb = MAX(i__1,nb4); lwkmin = *m + *n + *p; lwkopt = *m + np + MAX(*n,*p) * nb; } work[1].r = (double) lwkopt, work[1].i = 0.; if (*lwork < lwkmin && ! lquery) { *info = -12; } } if (*info != 0) { i__1 = -(*info); xerbla_("ZGGGLM", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Compute the GQR factorization of matrices A and B: */ /* Q'*A = ( R11 ) M, Q'*B*Z' = ( T11 T12 ) M */ /* ( 0 ) N-M ( 0 T22 ) N-M */ /* M M+P-N N-M */ /* where R11 and T22 are upper triangular, and Q and Z are */ /* unitary. */ i__1 = *lwork - *m - np; zggqrf_(n, m, p, &a[a_offset], lda, &work[1], &b[b_offset], ldb, &work[*m + 1], &work[*m + np + 1], &i__1, info); i__1 = *m + np + 1; lopt = (int) work[i__1].r; /* Update left-hand-side vector d = Q'*d = ( d1 ) M */ /* ( d2 ) N-M */ i__1 = MAX(1,*n); i__2 = *lwork - *m - np; zunmqr_("Left", "Conjugate transpose", n, &c__1, m, &a[a_offset], lda, & work[1], &d__[1], &i__1, &work[*m + np + 1], &i__2, info); /* Computing MAX */ i__3 = *m + np + 1; i__1 = lopt, i__2 = (int) work[i__3].r; lopt = MAX(i__1,i__2); /* Solve T22*y2 = d2 for y2 */ if (*n > *m) { i__1 = *n - *m; i__2 = *n - *m; ztrtrs_("Upper", "No transpose", "Non unit", &i__1, &c__1, &b[*m + 1 + (*m + *p - *n + 1) * b_dim1], ldb, &d__[*m + 1], &i__2, info); if (*info > 0) { *info = 1; return 0; } i__1 = *n - *m; zcopy_(&i__1, &d__[*m + 1], &c__1, &y[*m + *p - *n + 1], &c__1); } /* Set y1 = 0 */ i__1 = *m + *p - *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; y[i__2].r = 0., y[i__2].i = 0.; /* L10: */ } /* Update d1 = d1 - T12*y2 */ i__1 = *n - *m; z__1.r = -1., z__1.i = -0.; zgemv_("No transpose", m, &i__1, &z__1, &b[(*m + *p - *n + 1) * b_dim1 + 1], ldb, &y[*m + *p - *n + 1], &c__1, &c_b2, &d__[1], &c__1); /* Solve triangular system: R11*x = d1 */ if (*m > 0) { ztrtrs_("Upper", "No Transpose", "Non unit", m, &c__1, &a[a_offset], lda, &d__[1], m, info); if (*info > 0) { *info = 2; return 0; } /* Copy D to X */ zcopy_(m, &d__[1], &c__1, &x[1], &c__1); } /* Backward transformation y = Z'*y */ /* Computing MAX */ i__1 = 1, i__2 = *n - *p + 1; i__3 = MAX(1,*p); i__4 = *lwork - *m - np; zunmrq_("Left", "Conjugate transpose", p, &c__1, &np, &b[MAX(i__1, i__2)+ b_dim1], ldb, &work[*m + 1], &y[1], &i__3, &work[*m + np + 1], & i__4, info); /* Computing MAX */ i__4 = *m + np + 1; i__2 = lopt, i__3 = (int) work[i__4].r; i__1 = *m + np + MAX(i__2,i__3); work[1].r = (double) i__1, work[1].i = 0.; return 0; /* End of ZGGGLM */ } /* zggglm_ */
/* Subroutine */ int ztgevc_(char *side, char *howmny, logical *select, integer *n, doublecomplex *s, integer *lds, doublecomplex *p, integer *ldp, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer * ldvr, integer *mm, integer *m, doublecomplex *work, doublereal *rwork, integer *info) { /* System generated locals */ integer p_dim1, p_offset, s_dim1, s_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2, d__3, d__4, d__5, d__6; doublecomplex z__1, z__2, z__3, z__4; /* Builtin functions */ double d_imag(doublecomplex *); void d_cnjg(doublecomplex *, doublecomplex *); /* Local variables */ doublecomplex d__; integer i__, j; doublecomplex ca, cb; integer je, im, jr; doublereal big; logical lsa, lsb; doublereal ulp; doublecomplex sum; integer ibeg, ieig, iend; doublereal dmin__; integer isrc; doublereal temp; doublecomplex suma, sumb; doublereal xmax, scale; logical ilall; integer iside; doublereal sbeta; extern logical lsame_(char *, char *); doublereal small; logical compl; doublereal anorm, bnorm; logical compr; extern /* Subroutine */ int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *), dlabad_(doublereal *, doublereal *); logical ilbbad; doublereal acoefa, bcoefa, acoeff; doublecomplex bcoeff; logical ilback; doublereal ascale, bscale; extern doublereal dlamch_(char *); doublecomplex salpha; doublereal safmin; extern /* Subroutine */ int xerbla_(char *, integer *); doublereal bignum; logical ilcomp; extern /* Double Complex */ VOID zladiv_(doublecomplex *, doublecomplex *, doublecomplex *); integer ihwmny; /* -- LAPACK computational routine (version 3.4.0) -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* November 2011 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Statement Functions .. */ /* .. */ /* .. Statement Function definitions .. */ /* .. */ /* .. Executable Statements .. */ /* Decode and Test the input parameters */ /* Parameter adjustments */ --select; s_dim1 = *lds; s_offset = 1 + s_dim1; s -= s_offset; p_dim1 = *ldp; p_offset = 1 + p_dim1; p -= p_offset; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1; vr -= vr_offset; --work; --rwork; /* Function Body */ if (lsame_(howmny, "A")) { ihwmny = 1; ilall = TRUE_; ilback = FALSE_; } else if (lsame_(howmny, "S")) { ihwmny = 2; ilall = FALSE_; ilback = FALSE_; } else if (lsame_(howmny, "B")) { ihwmny = 3; ilall = TRUE_; ilback = TRUE_; } else { ihwmny = -1; } if (lsame_(side, "R")) { iside = 1; compl = FALSE_; compr = TRUE_; } else if (lsame_(side, "L")) { iside = 2; compl = TRUE_; compr = FALSE_; } else if (lsame_(side, "B")) { iside = 3; compl = TRUE_; compr = TRUE_; } else { iside = -1; } *info = 0; if (iside < 0) { *info = -1; } else if (ihwmny < 0) { *info = -2; } else if (*n < 0) { *info = -4; } else if (*lds < max(1,*n)) { *info = -6; } else if (*ldp < max(1,*n)) { *info = -8; } if (*info != 0) { i__1 = -(*info); xerbla_("ZTGEVC", &i__1); return 0; } /* Count the number of eigenvectors */ if (! ilall) { im = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (select[j]) { ++im; } /* L10: */ } } else { im = *n; } /* Check diagonal of B */ ilbbad = FALSE_; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (d_imag(&p[j + j * p_dim1]) != 0.) { ilbbad = TRUE_; } /* L20: */ } if (ilbbad) { *info = -7; } else if (compl && *ldvl < *n || *ldvl < 1) { *info = -10; } else if (compr && *ldvr < *n || *ldvr < 1) { *info = -12; } else if (*mm < im) { *info = -13; } if (*info != 0) { i__1 = -(*info); xerbla_("ZTGEVC", &i__1); return 0; } /* Quick return if possible */ *m = im; if (*n == 0) { return 0; } /* Machine Constants */ safmin = dlamch_("Safe minimum"); big = 1. / safmin; dlabad_(&safmin, &big); ulp = dlamch_("Epsilon") * dlamch_("Base"); small = safmin * *n / ulp; big = 1. / small; bignum = 1. / (safmin * *n); /* Compute the 1-norm of each column of the strictly upper triangular */ /* part of A and B to check for possible overflow in the triangular */ /* solver. */ i__1 = s_dim1 + 1; anorm = (d__1 = s[i__1].r, abs(d__1)) + (d__2 = d_imag(&s[s_dim1 + 1]), abs(d__2)); i__1 = p_dim1 + 1; bnorm = (d__1 = p[i__1].r, abs(d__1)) + (d__2 = d_imag(&p[p_dim1 + 1]), abs(d__2)); rwork[1] = 0.; rwork[*n + 1] = 0.; i__1 = *n; for (j = 2; j <= i__1; ++j) { rwork[j] = 0.; rwork[*n + j] = 0.; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * s_dim1; rwork[j] += (d__1 = s[i__3].r, abs(d__1)) + (d__2 = d_imag(&s[i__ + j * s_dim1]), abs(d__2)); i__3 = i__ + j * p_dim1; rwork[*n + j] += (d__1 = p[i__3].r, abs(d__1)) + (d__2 = d_imag(& p[i__ + j * p_dim1]), abs(d__2)); /* L30: */ } /* Computing MAX */ i__2 = j + j * s_dim1; d__3 = anorm; d__4 = rwork[j] + ((d__1 = s[i__2].r, abs(d__1)) + ( d__2 = d_imag(&s[j + j * s_dim1]), abs(d__2))); // , expr subst anorm = max(d__3,d__4); /* Computing MAX */ i__2 = j + j * p_dim1; d__3 = bnorm; d__4 = rwork[*n + j] + ((d__1 = p[i__2].r, abs(d__1)) + (d__2 = d_imag(&p[j + j * p_dim1]), abs(d__2))); // , expr subst bnorm = max(d__3,d__4); /* L40: */ } ascale = 1. / max(anorm,safmin); bscale = 1. / max(bnorm,safmin); /* Left eigenvectors */ if (compl) { ieig = 0; /* Main loop over eigenvalues */ i__1 = *n; for (je = 1; je <= i__1; ++je) { if (ilall) { ilcomp = TRUE_; } else { ilcomp = select[je]; } if (ilcomp) { ++ieig; i__2 = je + je * s_dim1; i__3 = je + je * p_dim1; if ((d__2 = s[i__2].r, abs(d__2)) + (d__3 = d_imag(&s[je + je * s_dim1]), abs(d__3)) <= safmin && (d__1 = p[i__3].r, abs(d__1)) <= safmin) { /* Singular matrix pencil -- return unit eigenvector */ i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { i__3 = jr + ieig * vl_dim1; vl[i__3].r = 0.; vl[i__3].i = 0.; // , expr subst /* L50: */ } i__2 = ieig + ieig * vl_dim1; vl[i__2].r = 1.; vl[i__2].i = 0.; // , expr subst goto L140; } /* Non-singular eigenvalue: */ /* Compute coefficients a and b in */ /* H */ /* y ( a A - b B ) = 0 */ /* Computing MAX */ i__2 = je + je * s_dim1; i__3 = je + je * p_dim1; d__4 = ((d__2 = s[i__2].r, abs(d__2)) + (d__3 = d_imag(&s[je + je * s_dim1]), abs(d__3))) * ascale; d__5 = (d__1 = p[i__3].r, abs(d__1)) * bscale; d__4 = max(d__4,d__5); // ; expr subst temp = 1. / max(d__4,safmin); i__2 = je + je * s_dim1; z__2.r = temp * s[i__2].r; z__2.i = temp * s[i__2].i; // , expr subst z__1.r = ascale * z__2.r; z__1.i = ascale * z__2.i; // , expr subst salpha.r = z__1.r; salpha.i = z__1.i; // , expr subst i__2 = je + je * p_dim1; sbeta = temp * p[i__2].r * bscale; acoeff = sbeta * ascale; z__1.r = bscale * salpha.r; z__1.i = bscale * salpha.i; // , expr subst bcoeff.r = z__1.r; bcoeff.i = z__1.i; // , expr subst /* Scale to avoid underflow */ lsa = abs(sbeta) >= safmin && abs(acoeff) < small; lsb = (d__1 = salpha.r, abs(d__1)) + (d__2 = d_imag(&salpha), abs(d__2)) >= safmin && (d__3 = bcoeff.r, abs(d__3)) + (d__4 = d_imag(&bcoeff), abs(d__4)) < small; scale = 1.; if (lsa) { scale = small / abs(sbeta) * min(anorm,big); } if (lsb) { /* Computing MAX */ d__3 = scale; d__4 = small / ((d__1 = salpha.r, abs(d__1)) + (d__2 = d_imag(&salpha), abs(d__2))) * min( bnorm,big); // , expr subst scale = max(d__3,d__4); } if (lsa || lsb) { /* Computing MIN */ /* Computing MAX */ d__5 = 1., d__6 = abs(acoeff); d__5 = max(d__5,d__6); d__6 = (d__1 = bcoeff.r, abs(d__1)) + (d__2 = d_imag(&bcoeff), abs(d__2)); // ; expr subst d__3 = scale; d__4 = 1. / (safmin * max(d__5,d__6)); // , expr subst scale = min(d__3,d__4); if (lsa) { acoeff = ascale * (scale * sbeta); } else { acoeff = scale * acoeff; } if (lsb) { z__2.r = scale * salpha.r; z__2.i = scale * salpha.i; // , expr subst z__1.r = bscale * z__2.r; z__1.i = bscale * z__2.i; // , expr subst bcoeff.r = z__1.r; bcoeff.i = z__1.i; // , expr subst } else { z__1.r = scale * bcoeff.r; z__1.i = scale * bcoeff.i; // , expr subst bcoeff.r = z__1.r; bcoeff.i = z__1.i; // , expr subst } } acoefa = abs(acoeff); bcoefa = (d__1 = bcoeff.r, abs(d__1)) + (d__2 = d_imag(& bcoeff), abs(d__2)); xmax = 1.; i__2 = *n; for (jr = 1; jr <= i__2; ++jr) { i__3 = jr; work[i__3].r = 0.; work[i__3].i = 0.; // , expr subst /* L60: */ } i__2 = je; work[i__2].r = 1.; work[i__2].i = 0.; // , expr subst /* Computing MAX */ d__1 = ulp * acoefa * anorm; d__2 = ulp * bcoefa * bnorm; d__1 = max(d__1,d__2); // ; expr subst dmin__ = max(d__1,safmin); /* H */ /* Triangular solve of (a A - b B) y = 0 */ /* H */ /* (rowwise in (a A - b B) , or columnwise in a A - b B) */ i__2 = *n; for (j = je + 1; j <= i__2; ++j) { /* Compute */ /* j-1 */ /* SUM = sum conjg( a*S(k,j) - b*P(k,j) )*x(k) */ /* k=je */ /* (Scale if necessary) */ temp = 1. / xmax; if (acoefa * rwork[j] + bcoefa * rwork[*n + j] > bignum * temp) { i__3 = j - 1; for (jr = je; jr <= i__3; ++jr) { i__4 = jr; i__5 = jr; z__1.r = temp * work[i__5].r; z__1.i = temp * work[i__5].i; // , expr subst work[i__4].r = z__1.r; work[i__4].i = z__1.i; // , expr subst /* L70: */ } xmax = 1.; } suma.r = 0.; suma.i = 0.; // , expr subst sumb.r = 0.; sumb.i = 0.; // , expr subst i__3 = j - 1; for (jr = je; jr <= i__3; ++jr) { d_cnjg(&z__3, &s[jr + j * s_dim1]); i__4 = jr; z__2.r = z__3.r * work[i__4].r - z__3.i * work[i__4] .i; z__2.i = z__3.r * work[i__4].i + z__3.i * work[i__4].r; // , expr subst z__1.r = suma.r + z__2.r; z__1.i = suma.i + z__2.i; // , expr subst suma.r = z__1.r; suma.i = z__1.i; // , expr subst d_cnjg(&z__3, &p[jr + j * p_dim1]); i__4 = jr; z__2.r = z__3.r * work[i__4].r - z__3.i * work[i__4] .i; z__2.i = z__3.r * work[i__4].i + z__3.i * work[i__4].r; // , expr subst z__1.r = sumb.r + z__2.r; z__1.i = sumb.i + z__2.i; // , expr subst sumb.r = z__1.r; sumb.i = z__1.i; // , expr subst /* L80: */ } z__2.r = acoeff * suma.r; z__2.i = acoeff * suma.i; // , expr subst d_cnjg(&z__4, &bcoeff); z__3.r = z__4.r * sumb.r - z__4.i * sumb.i; z__3.i = z__4.r * sumb.i + z__4.i * sumb.r; // , expr subst z__1.r = z__2.r - z__3.r; z__1.i = z__2.i - z__3.i; // , expr subst sum.r = z__1.r; sum.i = z__1.i; // , expr subst /* Form x(j) = - SUM / conjg( a*S(j,j) - b*P(j,j) ) */ /* with scaling and perturbation of the denominator */ i__3 = j + j * s_dim1; z__3.r = acoeff * s[i__3].r; z__3.i = acoeff * s[i__3].i; // , expr subst i__4 = j + j * p_dim1; z__4.r = bcoeff.r * p[i__4].r - bcoeff.i * p[i__4].i; z__4.i = bcoeff.r * p[i__4].i + bcoeff.i * p[i__4] .r; // , expr subst z__2.r = z__3.r - z__4.r; z__2.i = z__3.i - z__4.i; // , expr subst d_cnjg(&z__1, &z__2); d__.r = z__1.r; d__.i = z__1.i; // , expr subst if ((d__1 = d__.r, abs(d__1)) + (d__2 = d_imag(&d__), abs( d__2)) <= dmin__) { z__1.r = dmin__; z__1.i = 0.; // , expr subst d__.r = z__1.r; d__.i = z__1.i; // , expr subst } if ((d__1 = d__.r, abs(d__1)) + (d__2 = d_imag(&d__), abs( d__2)) < 1.) { if ((d__1 = sum.r, abs(d__1)) + (d__2 = d_imag(&sum), abs(d__2)) >= bignum * ((d__3 = d__.r, abs( d__3)) + (d__4 = d_imag(&d__), abs(d__4)))) { temp = 1. / ((d__1 = sum.r, abs(d__1)) + (d__2 = d_imag(&sum), abs(d__2))); i__3 = j - 1; for (jr = je; jr <= i__3; ++jr) { i__4 = jr; i__5 = jr; z__1.r = temp * work[i__5].r; z__1.i = temp * work[i__5].i; // , expr subst work[i__4].r = z__1.r; work[i__4].i = z__1.i; // , expr subst /* L90: */ } xmax = temp * xmax; z__1.r = temp * sum.r; z__1.i = temp * sum.i; // , expr subst sum.r = z__1.r; sum.i = z__1.i; // , expr subst } } i__3 = j; z__2.r = -sum.r; z__2.i = -sum.i; // , expr subst zladiv_(&z__1, &z__2, &d__); work[i__3].r = z__1.r; work[i__3].i = z__1.i; // , expr subst /* Computing MAX */ i__3 = j; d__3 = xmax; d__4 = (d__1 = work[i__3].r, abs(d__1)) + ( d__2 = d_imag(&work[j]), abs(d__2)); // , expr subst xmax = max(d__3,d__4); /* L100: */ } /* Back transform eigenvector if HOWMNY='B'. */ if (ilback) { i__2 = *n + 1 - je; zgemv_("N", n, &i__2, &c_b2, &vl[je * vl_dim1 + 1], ldvl, &work[je], &c__1, &c_b1, &work[*n + 1], &c__1); isrc = 2; ibeg = 1; } else { isrc = 1; ibeg = je; } /* Copy and scale eigenvector into column of VL */ xmax = 0.; i__2 = *n; for (jr = ibeg; jr <= i__2; ++jr) { /* Computing MAX */ i__3 = (isrc - 1) * *n + jr; d__3 = xmax; d__4 = (d__1 = work[i__3].r, abs(d__1)) + ( d__2 = d_imag(&work[(isrc - 1) * *n + jr]), abs( d__2)); // , expr subst xmax = max(d__3,d__4); /* L110: */ } if (xmax > safmin) { temp = 1. / xmax; i__2 = *n; for (jr = ibeg; jr <= i__2; ++jr) { i__3 = jr + ieig * vl_dim1; i__4 = (isrc - 1) * *n + jr; z__1.r = temp * work[i__4].r; z__1.i = temp * work[ i__4].i; // , expr subst vl[i__3].r = z__1.r; vl[i__3].i = z__1.i; // , expr subst /* L120: */ } } else { ibeg = *n + 1; } i__2 = ibeg - 1; for (jr = 1; jr <= i__2; ++jr) { i__3 = jr + ieig * vl_dim1; vl[i__3].r = 0.; vl[i__3].i = 0.; // , expr subst /* L130: */ } } L140: ; } } /* Right eigenvectors */ if (compr) { ieig = im + 1; /* Main loop over eigenvalues */ for (je = *n; je >= 1; --je) { if (ilall) { ilcomp = TRUE_; } else { ilcomp = select[je]; } if (ilcomp) { --ieig; i__1 = je + je * s_dim1; i__2 = je + je * p_dim1; if ((d__2 = s[i__1].r, abs(d__2)) + (d__3 = d_imag(&s[je + je * s_dim1]), abs(d__3)) <= safmin && (d__1 = p[i__2].r, abs(d__1)) <= safmin) { /* Singular matrix pencil -- return unit eigenvector */ i__1 = *n; for (jr = 1; jr <= i__1; ++jr) { i__2 = jr + ieig * vr_dim1; vr[i__2].r = 0.; vr[i__2].i = 0.; // , expr subst /* L150: */ } i__1 = ieig + ieig * vr_dim1; vr[i__1].r = 1.; vr[i__1].i = 0.; // , expr subst goto L250; } /* Non-singular eigenvalue: */ /* Compute coefficients a and b in */ /* ( a A - b B ) x = 0 */ /* Computing MAX */ i__1 = je + je * s_dim1; i__2 = je + je * p_dim1; d__4 = ((d__2 = s[i__1].r, abs(d__2)) + (d__3 = d_imag(&s[je + je * s_dim1]), abs(d__3))) * ascale; d__5 = (d__1 = p[i__2].r, abs(d__1)) * bscale; d__4 = max(d__4,d__5); // ; expr subst temp = 1. / max(d__4,safmin); i__1 = je + je * s_dim1; z__2.r = temp * s[i__1].r; z__2.i = temp * s[i__1].i; // , expr subst z__1.r = ascale * z__2.r; z__1.i = ascale * z__2.i; // , expr subst salpha.r = z__1.r; salpha.i = z__1.i; // , expr subst i__1 = je + je * p_dim1; sbeta = temp * p[i__1].r * bscale; acoeff = sbeta * ascale; z__1.r = bscale * salpha.r; z__1.i = bscale * salpha.i; // , expr subst bcoeff.r = z__1.r; bcoeff.i = z__1.i; // , expr subst /* Scale to avoid underflow */ lsa = abs(sbeta) >= safmin && abs(acoeff) < small; lsb = (d__1 = salpha.r, abs(d__1)) + (d__2 = d_imag(&salpha), abs(d__2)) >= safmin && (d__3 = bcoeff.r, abs(d__3)) + (d__4 = d_imag(&bcoeff), abs(d__4)) < small; scale = 1.; if (lsa) { scale = small / abs(sbeta) * min(anorm,big); } if (lsb) { /* Computing MAX */ d__3 = scale; d__4 = small / ((d__1 = salpha.r, abs(d__1)) + (d__2 = d_imag(&salpha), abs(d__2))) * min( bnorm,big); // , expr subst scale = max(d__3,d__4); } if (lsa || lsb) { /* Computing MIN */ /* Computing MAX */ d__5 = 1., d__6 = abs(acoeff); d__5 = max(d__5,d__6); d__6 = (d__1 = bcoeff.r, abs(d__1)) + (d__2 = d_imag(&bcoeff), abs(d__2)); // ; expr subst d__3 = scale; d__4 = 1. / (safmin * max(d__5,d__6)); // , expr subst scale = min(d__3,d__4); if (lsa) { acoeff = ascale * (scale * sbeta); } else { acoeff = scale * acoeff; } if (lsb) { z__2.r = scale * salpha.r; z__2.i = scale * salpha.i; // , expr subst z__1.r = bscale * z__2.r; z__1.i = bscale * z__2.i; // , expr subst bcoeff.r = z__1.r; bcoeff.i = z__1.i; // , expr subst } else { z__1.r = scale * bcoeff.r; z__1.i = scale * bcoeff.i; // , expr subst bcoeff.r = z__1.r; bcoeff.i = z__1.i; // , expr subst } } acoefa = abs(acoeff); bcoefa = (d__1 = bcoeff.r, abs(d__1)) + (d__2 = d_imag(& bcoeff), abs(d__2)); xmax = 1.; i__1 = *n; for (jr = 1; jr <= i__1; ++jr) { i__2 = jr; work[i__2].r = 0.; work[i__2].i = 0.; // , expr subst /* L160: */ } i__1 = je; work[i__1].r = 1.; work[i__1].i = 0.; // , expr subst /* Computing MAX */ d__1 = ulp * acoefa * anorm; d__2 = ulp * bcoefa * bnorm; d__1 = max(d__1,d__2); // ; expr subst dmin__ = max(d__1,safmin); /* Triangular solve of (a A - b B) x = 0 (columnwise) */ /* WORK(1:j-1) contains sums w, */ /* WORK(j+1:JE) contains x */ i__1 = je - 1; for (jr = 1; jr <= i__1; ++jr) { i__2 = jr; i__3 = jr + je * s_dim1; z__2.r = acoeff * s[i__3].r; z__2.i = acoeff * s[i__3].i; // , expr subst i__4 = jr + je * p_dim1; z__3.r = bcoeff.r * p[i__4].r - bcoeff.i * p[i__4].i; z__3.i = bcoeff.r * p[i__4].i + bcoeff.i * p[i__4] .r; // , expr subst z__1.r = z__2.r - z__3.r; z__1.i = z__2.i - z__3.i; // , expr subst work[i__2].r = z__1.r; work[i__2].i = z__1.i; // , expr subst /* L170: */ } i__1 = je; work[i__1].r = 1.; work[i__1].i = 0.; // , expr subst for (j = je - 1; j >= 1; --j) { /* Form x(j) := - w(j) / d */ /* with scaling and perturbation of the denominator */ i__1 = j + j * s_dim1; z__2.r = acoeff * s[i__1].r; z__2.i = acoeff * s[i__1].i; // , expr subst i__2 = j + j * p_dim1; z__3.r = bcoeff.r * p[i__2].r - bcoeff.i * p[i__2].i; z__3.i = bcoeff.r * p[i__2].i + bcoeff.i * p[i__2] .r; // , expr subst z__1.r = z__2.r - z__3.r; z__1.i = z__2.i - z__3.i; // , expr subst d__.r = z__1.r; d__.i = z__1.i; // , expr subst if ((d__1 = d__.r, abs(d__1)) + (d__2 = d_imag(&d__), abs( d__2)) <= dmin__) { z__1.r = dmin__; z__1.i = 0.; // , expr subst d__.r = z__1.r; d__.i = z__1.i; // , expr subst } if ((d__1 = d__.r, abs(d__1)) + (d__2 = d_imag(&d__), abs( d__2)) < 1.) { i__1 = j; if ((d__1 = work[i__1].r, abs(d__1)) + (d__2 = d_imag( &work[j]), abs(d__2)) >= bignum * ((d__3 = d__.r, abs(d__3)) + (d__4 = d_imag(&d__), abs( d__4)))) { i__1 = j; temp = 1. / ((d__1 = work[i__1].r, abs(d__1)) + ( d__2 = d_imag(&work[j]), abs(d__2))); i__1 = je; for (jr = 1; jr <= i__1; ++jr) { i__2 = jr; i__3 = jr; z__1.r = temp * work[i__3].r; z__1.i = temp * work[i__3].i; // , expr subst work[i__2].r = z__1.r; work[i__2].i = z__1.i; // , expr subst /* L180: */ } } } i__1 = j; i__2 = j; z__2.r = -work[i__2].r; z__2.i = -work[i__2].i; // , expr subst zladiv_(&z__1, &z__2, &d__); work[i__1].r = z__1.r; work[i__1].i = z__1.i; // , expr subst if (j > 1) { /* w = w + x(j)*(a S(*,j) - b P(*,j) ) with scaling */ i__1 = j; if ((d__1 = work[i__1].r, abs(d__1)) + (d__2 = d_imag( &work[j]), abs(d__2)) > 1.) { i__1 = j; temp = 1. / ((d__1 = work[i__1].r, abs(d__1)) + ( d__2 = d_imag(&work[j]), abs(d__2))); if (acoefa * rwork[j] + bcoefa * rwork[*n + j] >= bignum * temp) { i__1 = je; for (jr = 1; jr <= i__1; ++jr) { i__2 = jr; i__3 = jr; z__1.r = temp * work[i__3].r; z__1.i = temp * work[i__3].i; // , expr subst work[i__2].r = z__1.r; work[i__2].i = z__1.i; // , expr subst /* L190: */ } } } i__1 = j; z__1.r = acoeff * work[i__1].r; z__1.i = acoeff * work[i__1].i; // , expr subst ca.r = z__1.r; ca.i = z__1.i; // , expr subst i__1 = j; z__1.r = bcoeff.r * work[i__1].r - bcoeff.i * work[ i__1].i; z__1.i = bcoeff.r * work[i__1].i + bcoeff.i * work[i__1].r; // , expr subst cb.r = z__1.r; cb.i = z__1.i; // , expr subst i__1 = j - 1; for (jr = 1; jr <= i__1; ++jr) { i__2 = jr; i__3 = jr; i__4 = jr + j * s_dim1; z__3.r = ca.r * s[i__4].r - ca.i * s[i__4].i; z__3.i = ca.r * s[i__4].i + ca.i * s[i__4] .r; // , expr subst z__2.r = work[i__3].r + z__3.r; z__2.i = work[ i__3].i + z__3.i; // , expr subst i__5 = jr + j * p_dim1; z__4.r = cb.r * p[i__5].r - cb.i * p[i__5].i; z__4.i = cb.r * p[i__5].i + cb.i * p[i__5] .r; // , expr subst z__1.r = z__2.r - z__4.r; z__1.i = z__2.i - z__4.i; // , expr subst work[i__2].r = z__1.r; work[i__2].i = z__1.i; // , expr subst /* L200: */ } } /* L210: */ } /* Back transform eigenvector if HOWMNY='B'. */ if (ilback) { zgemv_("N", n, &je, &c_b2, &vr[vr_offset], ldvr, &work[1], &c__1, &c_b1, &work[*n + 1], &c__1); isrc = 2; iend = *n; } else { isrc = 1; iend = je; } /* Copy and scale eigenvector into column of VR */ xmax = 0.; i__1 = iend; for (jr = 1; jr <= i__1; ++jr) { /* Computing MAX */ i__2 = (isrc - 1) * *n + jr; d__3 = xmax; d__4 = (d__1 = work[i__2].r, abs(d__1)) + ( d__2 = d_imag(&work[(isrc - 1) * *n + jr]), abs( d__2)); // , expr subst xmax = max(d__3,d__4); /* L220: */ } if (xmax > safmin) { temp = 1. / xmax; i__1 = iend; for (jr = 1; jr <= i__1; ++jr) { i__2 = jr + ieig * vr_dim1; i__3 = (isrc - 1) * *n + jr; z__1.r = temp * work[i__3].r; z__1.i = temp * work[ i__3].i; // , expr subst vr[i__2].r = z__1.r; vr[i__2].i = z__1.i; // , expr subst /* L230: */ } } else { iend = 0; } i__1 = *n; for (jr = iend + 1; jr <= i__1; ++jr) { i__2 = jr + ieig * vr_dim1; vr[i__2].r = 0.; vr[i__2].i = 0.; // , expr subst /* L240: */ } } L250: ; } } return 0; /* End of ZTGEVC */ }
/* Subroutine */ int zhptrs_(char *uplo, integer *n, integer *nrhs, doublecomplex *ap, integer *ipiv, doublecomplex *b, integer *ldb, integer *info) { /* System generated locals */ integer b_dim1, b_offset, i__1, i__2; doublecomplex z__1, z__2, z__3; /* Builtin functions */ void z_div(doublecomplex *, doublecomplex *, doublecomplex *), d_cnjg( doublecomplex *, doublecomplex *); /* Local variables */ integer j, k; doublereal s; doublecomplex ak, bk; integer kc, kp; doublecomplex akm1, bkm1, akm1k; extern logical lsame_(char *, char *); doublecomplex denom; extern /* Subroutine */ int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *); logical upper; extern /* Subroutine */ int zgeru_(integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *), zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(char *, integer *), zdscal_(integer *, doublereal *, doublecomplex *, integer *), zlacgv_(integer *, doublecomplex *, integer *); /* -- LAPACK routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ZHPTRS solves a system of linear equations A*X = B with a complex */ /* Hermitian matrix A stored in packed format using the factorization */ /* A = U*D*U**H or A = L*D*L**H computed by ZHPTRF. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the details of the factorization are stored */ /* as an upper or lower triangular matrix. */ /* = 'U': Upper triangular, form is A = U*D*U**H; */ /* = 'L': Lower triangular, form is A = L*D*L**H. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* AP (input) COMPLEX*16 array, dimension (N*(N+1)/2) */ /* The block diagonal matrix D and the multipliers used to */ /* obtain the factor U or L as computed by ZHPTRF, stored as a */ /* packed triangular matrix. */ /* IPIV (input) INTEGER array, dimension (N) */ /* Details of the interchanges and the block structure of D */ /* as determined by ZHPTRF. */ /* B (input/output) COMPLEX*16 array, dimension (LDB,NRHS) */ /* On entry, the right hand side matrix B. */ /* On exit, the solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --ap; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*ldb < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); xerbla_("ZHPTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { return 0; } if (upper) { /* Solve A*X = B, where A = U*D*U'. */ /* First solve U*D*X = B, overwriting B with X. */ /* K is the main loop index, decreasing from N to 1 in steps of */ /* 1 or 2, depending on the size of the diagonal blocks. */ k = *n; kc = *n * (*n + 1) / 2 + 1; L10: /* If K < 1, exit from loop. */ if (k < 1) { goto L30; } kc -= k; if (ipiv[k] > 0) { /* 1 x 1 diagonal block */ /* Interchange rows K and IPIV(K). */ kp = ipiv[k]; if (kp != k) { zswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); } /* Multiply by inv(U(K)), where U(K) is the transformation */ /* stored in column K of A. */ i__1 = k - 1; z__1.r = -1., z__1.i = -0.; zgeru_(&i__1, nrhs, &z__1, &ap[kc], &c__1, &b[k + b_dim1], ldb, & b[b_dim1 + 1], ldb); /* Multiply by the inverse of the diagonal block. */ i__1 = kc + k - 1; s = 1. / ap[i__1].r; zdscal_(nrhs, &s, &b[k + b_dim1], ldb); --k; } else { /* 2 x 2 diagonal block */ /* Interchange rows K-1 and -IPIV(K). */ kp = -ipiv[k]; if (kp != k - 1) { zswap_(nrhs, &b[k - 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); } /* Multiply by inv(U(K)), where U(K) is the transformation */ /* stored in columns K-1 and K of A. */ i__1 = k - 2; z__1.r = -1., z__1.i = -0.; zgeru_(&i__1, nrhs, &z__1, &ap[kc], &c__1, &b[k + b_dim1], ldb, & b[b_dim1 + 1], ldb); i__1 = k - 2; z__1.r = -1., z__1.i = -0.; zgeru_(&i__1, nrhs, &z__1, &ap[kc - (k - 1)], &c__1, &b[k - 1 + b_dim1], ldb, &b[b_dim1 + 1], ldb); /* Multiply by the inverse of the diagonal block. */ i__1 = kc + k - 2; akm1k.r = ap[i__1].r, akm1k.i = ap[i__1].i; z_div(&z__1, &ap[kc - 1], &akm1k); akm1.r = z__1.r, akm1.i = z__1.i; d_cnjg(&z__2, &akm1k); z_div(&z__1, &ap[kc + k - 1], &z__2); ak.r = z__1.r, ak.i = z__1.i; z__2.r = akm1.r * ak.r - akm1.i * ak.i, z__2.i = akm1.r * ak.i + akm1.i * ak.r; z__1.r = z__2.r - 1., z__1.i = z__2.i - 0.; denom.r = z__1.r, denom.i = z__1.i; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { z_div(&z__1, &b[k - 1 + j * b_dim1], &akm1k); bkm1.r = z__1.r, bkm1.i = z__1.i; d_cnjg(&z__2, &akm1k); z_div(&z__1, &b[k + j * b_dim1], &z__2); bk.r = z__1.r, bk.i = z__1.i; i__2 = k - 1 + j * b_dim1; z__3.r = ak.r * bkm1.r - ak.i * bkm1.i, z__3.i = ak.r * bkm1.i + ak.i * bkm1.r; z__2.r = z__3.r - bk.r, z__2.i = z__3.i - bk.i; z_div(&z__1, &z__2, &denom); b[i__2].r = z__1.r, b[i__2].i = z__1.i; i__2 = k + j * b_dim1; z__3.r = akm1.r * bk.r - akm1.i * bk.i, z__3.i = akm1.r * bk.i + akm1.i * bk.r; z__2.r = z__3.r - bkm1.r, z__2.i = z__3.i - bkm1.i; z_div(&z__1, &z__2, &denom); b[i__2].r = z__1.r, b[i__2].i = z__1.i; /* L20: */ } kc = kc - k + 1; k += -2; } goto L10; L30: /* Next solve U'*X = B, overwriting B with X. */ /* K is the main loop index, increasing from 1 to N in steps of */ /* 1 or 2, depending on the size of the diagonal blocks. */ k = 1; kc = 1; L40: /* If K > N, exit from loop. */ if (k > *n) { goto L50; } if (ipiv[k] > 0) { /* 1 x 1 diagonal block */ /* Multiply by inv(U'(K)), where U(K) is the transformation */ /* stored in column K of A. */ if (k > 1) { zlacgv_(nrhs, &b[k + b_dim1], ldb); i__1 = k - 1; z__1.r = -1., z__1.i = -0.; zgemv_("Conjugate transpose", &i__1, nrhs, &z__1, &b[b_offset] , ldb, &ap[kc], &c__1, &c_b1, &b[k + b_dim1], ldb); zlacgv_(nrhs, &b[k + b_dim1], ldb); } /* Interchange rows K and IPIV(K). */ kp = ipiv[k]; if (kp != k) { zswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); } kc += k; ++k; } else { /* 2 x 2 diagonal block */ /* Multiply by inv(U'(K+1)), where U(K+1) is the transformation */ /* stored in columns K and K+1 of A. */ if (k > 1) { zlacgv_(nrhs, &b[k + b_dim1], ldb); i__1 = k - 1; z__1.r = -1., z__1.i = -0.; zgemv_("Conjugate transpose", &i__1, nrhs, &z__1, &b[b_offset] , ldb, &ap[kc], &c__1, &c_b1, &b[k + b_dim1], ldb); zlacgv_(nrhs, &b[k + b_dim1], ldb); zlacgv_(nrhs, &b[k + 1 + b_dim1], ldb); i__1 = k - 1; z__1.r = -1., z__1.i = -0.; zgemv_("Conjugate transpose", &i__1, nrhs, &z__1, &b[b_offset] , ldb, &ap[kc + k], &c__1, &c_b1, &b[k + 1 + b_dim1], ldb); zlacgv_(nrhs, &b[k + 1 + b_dim1], ldb); } /* Interchange rows K and -IPIV(K). */ kp = -ipiv[k]; if (kp != k) { zswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); } kc = kc + (k << 1) + 1; k += 2; } goto L40; L50: ; } else { /* Solve A*X = B, where A = L*D*L'. */ /* First solve L*D*X = B, overwriting B with X. */ /* K is the main loop index, increasing from 1 to N in steps of */ /* 1 or 2, depending on the size of the diagonal blocks. */ k = 1; kc = 1; L60: /* If K > N, exit from loop. */ if (k > *n) { goto L80; } if (ipiv[k] > 0) { /* 1 x 1 diagonal block */ /* Interchange rows K and IPIV(K). */ kp = ipiv[k]; if (kp != k) { zswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); } /* Multiply by inv(L(K)), where L(K) is the transformation */ /* stored in column K of A. */ if (k < *n) { i__1 = *n - k; z__1.r = -1., z__1.i = -0.; zgeru_(&i__1, nrhs, &z__1, &ap[kc + 1], &c__1, &b[k + b_dim1], ldb, &b[k + 1 + b_dim1], ldb); } /* Multiply by the inverse of the diagonal block. */ i__1 = kc; s = 1. / ap[i__1].r; zdscal_(nrhs, &s, &b[k + b_dim1], ldb); kc = kc + *n - k + 1; ++k; } else { /* 2 x 2 diagonal block */ /* Interchange rows K+1 and -IPIV(K). */ kp = -ipiv[k]; if (kp != k + 1) { zswap_(nrhs, &b[k + 1 + b_dim1], ldb, &b[kp + b_dim1], ldb); } /* Multiply by inv(L(K)), where L(K) is the transformation */ /* stored in columns K and K+1 of A. */ if (k < *n - 1) { i__1 = *n - k - 1; z__1.r = -1., z__1.i = -0.; zgeru_(&i__1, nrhs, &z__1, &ap[kc + 2], &c__1, &b[k + b_dim1], ldb, &b[k + 2 + b_dim1], ldb); i__1 = *n - k - 1; z__1.r = -1., z__1.i = -0.; zgeru_(&i__1, nrhs, &z__1, &ap[kc + *n - k + 2], &c__1, &b[k + 1 + b_dim1], ldb, &b[k + 2 + b_dim1], ldb); } /* Multiply by the inverse of the diagonal block. */ i__1 = kc + 1; akm1k.r = ap[i__1].r, akm1k.i = ap[i__1].i; d_cnjg(&z__2, &akm1k); z_div(&z__1, &ap[kc], &z__2); akm1.r = z__1.r, akm1.i = z__1.i; z_div(&z__1, &ap[kc + *n - k + 1], &akm1k); ak.r = z__1.r, ak.i = z__1.i; z__2.r = akm1.r * ak.r - akm1.i * ak.i, z__2.i = akm1.r * ak.i + akm1.i * ak.r; z__1.r = z__2.r - 1., z__1.i = z__2.i - 0.; denom.r = z__1.r, denom.i = z__1.i; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { d_cnjg(&z__2, &akm1k); z_div(&z__1, &b[k + j * b_dim1], &z__2); bkm1.r = z__1.r, bkm1.i = z__1.i; z_div(&z__1, &b[k + 1 + j * b_dim1], &akm1k); bk.r = z__1.r, bk.i = z__1.i; i__2 = k + j * b_dim1; z__3.r = ak.r * bkm1.r - ak.i * bkm1.i, z__3.i = ak.r * bkm1.i + ak.i * bkm1.r; z__2.r = z__3.r - bk.r, z__2.i = z__3.i - bk.i; z_div(&z__1, &z__2, &denom); b[i__2].r = z__1.r, b[i__2].i = z__1.i; i__2 = k + 1 + j * b_dim1; z__3.r = akm1.r * bk.r - akm1.i * bk.i, z__3.i = akm1.r * bk.i + akm1.i * bk.r; z__2.r = z__3.r - bkm1.r, z__2.i = z__3.i - bkm1.i; z_div(&z__1, &z__2, &denom); b[i__2].r = z__1.r, b[i__2].i = z__1.i; /* L70: */ } kc = kc + (*n - k << 1) + 1; k += 2; } goto L60; L80: /* Next solve L'*X = B, overwriting B with X. */ /* K is the main loop index, decreasing from N to 1 in steps of */ /* 1 or 2, depending on the size of the diagonal blocks. */ k = *n; kc = *n * (*n + 1) / 2 + 1; L90: /* If K < 1, exit from loop. */ if (k < 1) { goto L100; } kc -= *n - k + 1; if (ipiv[k] > 0) { /* 1 x 1 diagonal block */ /* Multiply by inv(L'(K)), where L(K) is the transformation */ /* stored in column K of A. */ if (k < *n) { zlacgv_(nrhs, &b[k + b_dim1], ldb); i__1 = *n - k; z__1.r = -1., z__1.i = -0.; zgemv_("Conjugate transpose", &i__1, nrhs, &z__1, &b[k + 1 + b_dim1], ldb, &ap[kc + 1], &c__1, &c_b1, &b[k + b_dim1], ldb); zlacgv_(nrhs, &b[k + b_dim1], ldb); } /* Interchange rows K and IPIV(K). */ kp = ipiv[k]; if (kp != k) { zswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); } --k; } else { /* 2 x 2 diagonal block */ /* Multiply by inv(L'(K-1)), where L(K-1) is the transformation */ /* stored in columns K-1 and K of A. */ if (k < *n) { zlacgv_(nrhs, &b[k + b_dim1], ldb); i__1 = *n - k; z__1.r = -1., z__1.i = -0.; zgemv_("Conjugate transpose", &i__1, nrhs, &z__1, &b[k + 1 + b_dim1], ldb, &ap[kc + 1], &c__1, &c_b1, &b[k + b_dim1], ldb); zlacgv_(nrhs, &b[k + b_dim1], ldb); zlacgv_(nrhs, &b[k - 1 + b_dim1], ldb); i__1 = *n - k; z__1.r = -1., z__1.i = -0.; zgemv_("Conjugate transpose", &i__1, nrhs, &z__1, &b[k + 1 + b_dim1], ldb, &ap[kc - (*n - k)], &c__1, &c_b1, &b[k - 1 + b_dim1], ldb); zlacgv_(nrhs, &b[k - 1 + b_dim1], ldb); } /* Interchange rows K and -IPIV(K). */ kp = -ipiv[k]; if (kp != k) { zswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb); } kc -= *n - k + 2; k += -2; } goto L90; L100: ; } return 0; /* End of ZHPTRS */ } /* zhptrs_ */
int sp_ztrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, SuperMatrix *U, doublecomplex *x, int *info) { /* * Purpose * ======= * * sp_ztrsv() solves one of the systems of equations * A*x = b, or A'*x = b, * where b and x are n element vectors and A is a sparse unit , or * non-unit, upper or lower triangular matrix. * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. * * Parameters * ========== * * uplo - (input) char* * On entry, uplo specifies whether the matrix is an upper or * lower triangular matrix as follows: * uplo = 'U' or 'u' A is an upper triangular matrix. * uplo = 'L' or 'l' A is a lower triangular matrix. * * trans - (input) char* * On entry, trans specifies the equations to be solved as * follows: * trans = 'N' or 'n' A*x = b. * trans = 'T' or 't' A'*x = b. * trans = 'C' or 'c' A'*x = b. * * diag - (input) char* * On entry, diag specifies whether or not A is unit * triangular as follows: * diag = 'U' or 'u' A is assumed to be unit triangular. * diag = 'N' or 'n' A is not assumed to be unit * triangular. * * L - (input) SuperMatrix* * The factor L from the factorization Pr*A*Pc=L*U. Use * compressed row subscripts storage for supernodes, * i.e., L has types: Stype = SC, Dtype = SLU_Z, Mtype = TRLU. * * U - (input) SuperMatrix* * The factor U from the factorization Pr*A*Pc=L*U. * U has types: Stype = NC, Dtype = SLU_Z, Mtype = TRU. * * x - (input/output) doublecomplex* * Before entry, the incremented array X must contain the n * element right-hand side vector b. On exit, X is overwritten * with the solution vector x. * * info - (output) int* * If *info = -i, the i-th argument had an illegal value. * */ #ifdef _CRAY _fcd ftcs1 = _cptofcd("L", strlen("L")), ftcs2 = _cptofcd("N", strlen("N")), ftcs3 = _cptofcd("U", strlen("U")); #endif SCformat *Lstore; NCformat *Ustore; doublecomplex *Lval, *Uval; int incx = 1, incy = 1; doublecomplex alpha = {1.0, 0.0}, beta = {1.0, 0.0}; doublecomplex comp_zero = {0.0, 0.0}; int nrow; int fsupc, nsupr, nsupc, luptr, istart, irow; int i, k, iptr, jcol; doublecomplex *work; flops_t solve_ops; extern SuperLUStat_t SuperLUStat; /* Test the input parameters */ *info = 0; if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1; else if ( !lsame_(trans, "N") && !lsame_(trans, "T") ) *info = -2; else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3; else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4; else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5; if ( *info ) { i = -(*info); xerbla_("sp_ztrsv", &i); return 0; } Lstore = L->Store; Lval = Lstore->nzval; Ustore = U->Store; Uval = Ustore->nzval; solve_ops = 0; if ( !(work = doublecomplexCalloc(L->nrow)) ) ABORT("Malloc fails for work in sp_ztrsv()."); if ( lsame_(trans, "N") ) { /* Form x := inv(A)*x. */ if ( lsame_(uplo, "L") ) { /* Form x := inv(L)*x */ if ( L->nrow == 0 ) return 0; /* Quick return */ for (k = 0; k <= Lstore->nsuper; k++) { fsupc = L_FST_SUPC(k); istart = L_SUB_START(fsupc); nsupr = L_SUB_START(fsupc+1) - istart; nsupc = L_FST_SUPC(k+1) - fsupc; luptr = L_NZ_START(fsupc); nrow = nsupr - nsupc; solve_ops += 4 * nsupc * (nsupc - 1); solve_ops += 8 * nrow * nsupc; if ( nsupc == 1 ) { for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) { irow = L_SUB(iptr); ++luptr; zz_mult(&comp_zero, &x[fsupc], &Lval[luptr]); z_sub(&x[irow], &x[irow], &comp_zero); } } else { #ifdef USE_VENDOR_BLAS #ifdef _CRAY CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); CGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy); #else ztrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); zgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy); #endif #else zlsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]); zmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc], &x[fsupc], &work[0] ); #endif iptr = istart + nsupc; for (i = 0; i < nrow; ++i, ++iptr) { irow = L_SUB(iptr); z_sub(&x[irow], &x[irow], &work[i]); /* Scatter */ work[i] = comp_zero; } } } /* for k ... */ } else { /* Form x := inv(U)*x */ if ( U->nrow == 0 ) return 0; /* Quick return */ for (k = Lstore->nsuper; k >= 0; k--) { fsupc = L_FST_SUPC(k); nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc); nsupc = L_FST_SUPC(k+1) - fsupc; luptr = L_NZ_START(fsupc); solve_ops += 4 * nsupc * (nsupc + 1); if ( nsupc == 1 ) { z_div(&x[fsupc], &x[fsupc], &Lval[luptr]); for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) { irow = U_SUB(i); zz_mult(&comp_zero, &x[fsupc], &Uval[i]); z_sub(&x[irow], &x[irow], &comp_zero); } } else { #ifdef USE_VENDOR_BLAS #ifdef _CRAY CTRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); #else ztrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); #endif #else zusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] ); #endif for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol)); for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) { irow = U_SUB(i); zz_mult(&comp_zero, &x[jcol], &Uval[i]); z_sub(&x[irow], &x[irow], &comp_zero); } } } } /* for k ... */ } } else { /* Form x := inv(A')*x */ if ( lsame_(uplo, "L") ) { /* Form x := inv(L')*x */ if ( L->nrow == 0 ) return 0; /* Quick return */ for (k = Lstore->nsuper; k >= 0; --k) { fsupc = L_FST_SUPC(k); istart = L_SUB_START(fsupc); nsupr = L_SUB_START(fsupc+1) - istart; nsupc = L_FST_SUPC(k+1) - fsupc; luptr = L_NZ_START(fsupc); solve_ops += 8 * (nsupr - nsupc) * nsupc; for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { iptr = istart + nsupc; for (i = L_NZ_START(jcol) + nsupc; i < L_NZ_START(jcol+1); i++) { irow = L_SUB(iptr); zz_mult(&comp_zero, &x[irow], &Lval[i]); z_sub(&x[jcol], &x[jcol], &comp_zero); iptr++; } } if ( nsupc > 1 ) { solve_ops += 4 * nsupc * (nsupc - 1); #ifdef _CRAY ftcs1 = _cptofcd("L", strlen("L")); ftcs2 = _cptofcd("T", strlen("T")); ftcs3 = _cptofcd("U", strlen("U")); CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); #else ztrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); #endif } } } else { /* Form x := inv(U')*x */ if ( U->nrow == 0 ) return 0; /* Quick return */ for (k = 0; k <= Lstore->nsuper; k++) { fsupc = L_FST_SUPC(k); nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc); nsupc = L_FST_SUPC(k+1) - fsupc; luptr = L_NZ_START(fsupc); for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) { solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol)); for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) { irow = U_SUB(i); zz_mult(&comp_zero, &x[irow], &Uval[i]); z_sub(&x[jcol], &x[jcol], &comp_zero); } } solve_ops += 4 * nsupc * (nsupc + 1); if ( nsupc == 1 ) { z_div(&x[fsupc], &x[fsupc], &Lval[luptr]); } else { #ifdef _CRAY ftcs1 = _cptofcd("U", strlen("U")); ftcs2 = _cptofcd("T", strlen("T")); ftcs3 = _cptofcd("N", strlen("N")); CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); #else ztrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr, &x[fsupc], &incx); #endif } } /* for k ... */ } } SuperLUStat.ops[SOLVE] += solve_ops; SUPERLU_FREE(work); return 0; }
/* Subroutine */ int zlaror_slu(char *side, char *init, integer *m, integer *n, doublecomplex *a, integer *lda, integer *iseed, doublecomplex *x, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; doublecomplex z__1, z__2; /* Builtin functions */ double z_abs(doublecomplex *); void d_cnjg(doublecomplex *, doublecomplex *); /* Local variables */ static integer kbeg, jcol; static doublereal xabs; static integer irow, j; static doublecomplex csign; extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *), zscal_(integer *, doublecomplex *, doublecomplex *, integer *); static integer ixfrm; extern /* Subroutine */ int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *); static integer itype, nxfrm; static doublereal xnorm; extern doublereal dznrm2_(integer *, doublecomplex *, integer *); extern int input_error(char *, int *); static doublereal factor; extern /* Subroutine */ int zlacgv_slu(integer *, doublecomplex *, integer *) ; extern /* Double Complex */ VOID zlarnd_slu(doublecomplex *, integer *, integer *); extern /* Subroutine */ int zlaset_slu(char *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, integer *); static doublecomplex xnorms; /* -- LAPACK auxiliary test routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZLAROR pre- or post-multiplies an M by N matrix A by a random unitary matrix U, overwriting A. A may optionally be initialized to the identity matrix before multiplying by U. U is generated using the method of G.W. Stewart ( SIAM J. Numer. Anal. 17, 1980, pp. 403-409 ). (BLAS-2 version) Arguments ========= SIDE - CHARACTER*1 SIDE specifies whether A is multiplied on the left or right by U. SIDE = 'L' Multiply A on the left (premultiply) by U SIDE = 'R' Multiply A on the right (postmultiply) by U* SIDE = 'C' Multiply A on the left by U and the right by U* SIDE = 'T' Multiply A on the left by U and the right by U' Not modified. INIT - CHARACTER*1 INIT specifies whether or not A should be initialized to the identity matrix. INIT = 'I' Initialize A to (a section of) the identity matrix before applying U. INIT = 'N' No initialization. Apply U to the input matrix A. INIT = 'I' may be used to generate square (i.e., unitary) or rectangular orthogonal matrices (orthogonality being in the sense of ZDOTC): For square matrices, M=N, and SIDE many be either 'L' or 'R'; the rows will be orthogonal to each other, as will the columns. For rectangular matrices where M < N, SIDE = 'R' will produce a dense matrix whose rows will be orthogonal and whose columns will not, while SIDE = 'L' will produce a matrix whose rows will be orthogonal, and whose first M columns will be orthogonal, the remaining columns being zero. For matrices where M > N, just use the previous explaination, interchanging 'L' and 'R' and "rows" and "columns". Not modified. M - INTEGER Number of rows of A. Not modified. N - INTEGER Number of columns of A. Not modified. A - COMPLEX*16 array, dimension ( LDA, N ) Input and output array. Overwritten by U A ( if SIDE = 'L' ) or by A U ( if SIDE = 'R' ) or by U A U* ( if SIDE = 'C') or by U A U' ( if SIDE = 'T') on exit. LDA - INTEGER Leading dimension of A. Must be at least MAX ( 1, M ). Not modified. ISEED - INTEGER array, dimension ( 4 ) On entry ISEED specifies the seed of the random number generator. The array elements should be between 0 and 4095; if not they will be reduced mod 4096. Also, ISEED(4) must be odd. The random number generator uses a linear congruential sequence limited to small integers, and so should produce machine independent random numbers. The values of ISEED are changed on exit, and can be used in the next call to ZLAROR to continue the same random number sequence. Modified. X - COMPLEX*16 array, dimension ( 3*MAX( M, N ) ) Workspace. Of length: 2*M + N if SIDE = 'L', 2*N + M if SIDE = 'R', 3*N if SIDE = 'C' or 'T'. Modified. INFO - INTEGER An error flag. It is set to: 0 if no error. 1 if ZLARND returned a bad random number (installation problem) -1 if SIDE is not L, R, C, or T. -3 if M is negative. -4 if N is negative or if SIDE is C or T and N is not equal to M. -6 if LDA is less than M. ===================================================================== Parameter adjustments */ a_dim1 = *lda; a_offset = a_dim1 + 1; a -= a_offset; --iseed; --x; /* Function Body */ if (*n == 0 || *m == 0) { return 0; } itype = 0; if (strncmp(side, "L", 1)==0) { itype = 1; } else if (strncmp(side, "R", 1)==0) { itype = 2; } else if (strncmp(side, "C", 1)==0) { itype = 3; } else if (strncmp(side, "T", 1)==0) { itype = 4; } /* Check for argument errors. */ *info = 0; if (itype == 0) { *info = -1; } else if (*m < 0) { *info = -3; } else if (*n < 0 || itype == 3 && *n != *m) { *info = -4; } else if (*lda < *m) { *info = -6; } if (*info != 0) { i__1 = -(*info); input_error("ZLAROR", &i__1); return 0; } if (itype == 1) { nxfrm = *m; } else { nxfrm = *n; } /* Initialize A to the identity matrix if desired */ if (strncmp(init, "I", 1)==0) { zlaset_slu("Full", m, n, &c_b1, &c_b2, &a[a_offset], lda); } /* If no rotation possible, still multiply by a random complex number from the circle |x| = 1 2) Compute Rotation by computing Householder Transformations H(2), H(3), ..., H(n). Note that the order in which they are computed is irrelevant. */ i__1 = nxfrm; for (j = 1; j <= i__1; ++j) { i__2 = j; x[i__2].r = 0., x[i__2].i = 0.; /* L10: */ } i__1 = nxfrm; for (ixfrm = 2; ixfrm <= i__1; ++ixfrm) { kbeg = nxfrm - ixfrm + 1; /* Generate independent normal( 0, 1 ) random numbers */ i__2 = nxfrm; for (j = kbeg; j <= i__2; ++j) { i__3 = j; zlarnd_slu(&z__1, &c__3, &iseed[1]); x[i__3].r = z__1.r, x[i__3].i = z__1.i; /* L20: */ } /* Generate a Householder transformation from the random vector X */ xnorm = dznrm2_(&ixfrm, &x[kbeg], &c__1); xabs = z_abs(&x[kbeg]); if (xabs != 0.) { i__2 = kbeg; z__1.r = x[i__2].r / xabs, z__1.i = x[i__2].i / xabs; csign.r = z__1.r, csign.i = z__1.i; } else { csign.r = 1., csign.i = 0.; } z__1.r = xnorm * csign.r, z__1.i = xnorm * csign.i; xnorms.r = z__1.r, xnorms.i = z__1.i; i__2 = nxfrm + kbeg; z__1.r = -csign.r, z__1.i = -csign.i; x[i__2].r = z__1.r, x[i__2].i = z__1.i; factor = xnorm * (xnorm + xabs); if (abs(factor) < 1e-20) { *info = 1; i__2 = -(*info); input_error("ZLAROR", &i__2); return 0; } else { factor = 1. / factor; } i__2 = kbeg; i__3 = kbeg; z__1.r = x[i__3].r + xnorms.r, z__1.i = x[i__3].i + xnorms.i; x[i__2].r = z__1.r, x[i__2].i = z__1.i; /* Apply Householder transformation to A */ if (itype == 1 || itype == 3 || itype == 4) { /* Apply H(k) on the left of A */ zgemv_("C", &ixfrm, n, &c_b2, &a[kbeg + a_dim1], lda, &x[kbeg], & c__1, &c_b1, &x[(nxfrm << 1) + 1], &c__1); z__2.r = factor, z__2.i = 0.; z__1.r = -z__2.r, z__1.i = -z__2.i; zgerc_(&ixfrm, n, &z__1, &x[kbeg], &c__1, &x[(nxfrm << 1) + 1], & c__1, &a[kbeg + a_dim1], lda); } if (itype >= 2 && itype <= 4) { /* Apply H(k)* (or H(k)') on the right of A */ if (itype == 4) { zlacgv_slu(&ixfrm, &x[kbeg], &c__1); } zgemv_("N", m, &ixfrm, &c_b2, &a[kbeg * a_dim1 + 1], lda, &x[kbeg] , &c__1, &c_b1, &x[(nxfrm << 1) + 1], &c__1); z__2.r = factor, z__2.i = 0.; z__1.r = -z__2.r, z__1.i = -z__2.i; zgerc_(m, &ixfrm, &z__1, &x[(nxfrm << 1) + 1], &c__1, &x[kbeg], & c__1, &a[kbeg * a_dim1 + 1], lda); } /* L30: */ } zlarnd_slu(&z__1, &c__3, &iseed[1]); x[1].r = z__1.r, x[1].i = z__1.i; xabs = z_abs(&x[1]); if (xabs != 0.) { z__1.r = x[1].r / xabs, z__1.i = x[1].i / xabs; csign.r = z__1.r, csign.i = z__1.i; } else { csign.r = 1., csign.i = 0.; } i__1 = nxfrm << 1; x[i__1].r = csign.r, x[i__1].i = csign.i; /* Scale the matrix A by D. */ if (itype == 1 || itype == 3 || itype == 4) { i__1 = *m; for (irow = 1; irow <= i__1; ++irow) { d_cnjg(&z__1, &x[nxfrm + irow]); zscal_(n, &z__1, &a[irow + a_dim1], lda); /* L40: */ } } if (itype == 2 || itype == 3) { i__1 = *n; for (jcol = 1; jcol <= i__1; ++jcol) { zscal_(m, &x[nxfrm + jcol], &a[jcol * a_dim1 + 1], &c__1); /* L50: */ } } if (itype == 4) { i__1 = *n; for (jcol = 1; jcol <= i__1; ++jcol) { d_cnjg(&z__1, &x[nxfrm + jcol]); zscal_(m, &z__1, &a[jcol * a_dim1 + 1], &c__1); /* L60: */ } } return 0; /* End of ZLAROR */ } /* zlaror_slu */
/* Subroutine */ int zlarft_(char *direct, char *storev, integer *n, integer * k, doublecomplex *v, integer *ldv, doublecomplex *tau, doublecomplex * t, integer *ldt) { /* -- LAPACK auxiliary routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZLARFT forms the triangular factor T of a complex block reflector H of order n, which is defined as a product of k elementary reflectors. If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. If STOREV = 'C', the vector which defines the elementary reflector H(i) is stored in the i-th column of the array V, and H = I - V * T * V' If STOREV = 'R', the vector which defines the elementary reflector H(i) is stored in the i-th row of the array V, and H = I - V' * T * V Arguments ========= DIRECT (input) CHARACTER*1 Specifies the order in which the elementary reflectors are multiplied to form the block reflector: = 'F': H = H(1) H(2) . . . H(k) (Forward) = 'B': H = H(k) . . . H(2) H(1) (Backward) STOREV (input) CHARACTER*1 Specifies how the vectors which define the elementary reflectors are stored (see also Further Details): = 'C': columnwise = 'R': rowwise N (input) INTEGER The order of the block reflector H. N >= 0. K (input) INTEGER The order of the triangular factor T (= the number of elementary reflectors). K >= 1. V (input/output) COMPLEX*16 array, dimension (LDV,K) if STOREV = 'C' (LDV,N) if STOREV = 'R' The matrix V. See further details. LDV (input) INTEGER The leading dimension of the array V. If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. TAU (input) COMPLEX*16 array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i). T (output) COMPLEX*16 array, dimension (LDT,K) The k by k triangular factor T of the block reflector. If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is lower triangular. The rest of the array is not used. LDT (input) INTEGER The leading dimension of the array T. LDT >= K. Further Details =============== The shape of the matrix V and the storage of the vectors which define the H(i) is best illustrated by the following example with n = 5 and k = 3. The elements equal to 1 are not stored; the corresponding array elements are modified but restored on exit. The rest of the array is not used. DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) ( v1 1 ) ( 1 v2 v2 v2 ) ( v1 v2 1 ) ( 1 v3 v3 ) ( v1 v2 v3 ) ( v1 v2 v3 ) DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': V = ( v1 v2 v3 ) V = ( v1 v1 1 ) ( v1 v2 v3 ) ( v2 v2 v2 1 ) ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) ( 1 v3 ) ( 1 ) ===================================================================== Quick return if possible Parameter adjustments Function Body */ /* Table of constant values */ static doublecomplex c_b2 = {0.,0.}; static integer c__1 = 1; /* System generated locals */ integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4; doublecomplex z__1; /* Local variables */ static integer i, j; extern logical lsame_(char *, char *); extern /* Subroutine */ int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *), ztrmv_(char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, integer *), zlacgv_(integer *, doublecomplex *, integer *); static doublecomplex vii; #define TAU(I) tau[(I)-1] #define V(I,J) v[(I)-1 + ((J)-1)* ( *ldv)] #define T(I,J) t[(I)-1 + ((J)-1)* ( *ldt)] if (*n == 0) { return 0; } if (lsame_(direct, "F")) { i__1 = *k; for (i = 1; i <= *k; ++i) { i__2 = i; if (TAU(i).r == 0. && TAU(i).i == 0.) { /* H(i) = I */ i__2 = i; for (j = 1; j <= i; ++j) { i__3 = j + i * t_dim1; T(j,i).r = 0., T(j,i).i = 0.; /* L10: */ } } else { /* general case */ i__2 = i + i * v_dim1; vii.r = V(i,i).r, vii.i = V(i,i).i; i__2 = i + i * v_dim1; V(i,i).r = 1., V(i,i).i = 0.; if (lsame_(storev, "C")) { /* T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */ i__2 = *n - i + 1; i__3 = i - 1; i__4 = i; z__1.r = -TAU(i).r, z__1.i = -TAU(i).i; zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &V(i,1), ldv, &V(i,i), &c__1, &c_b2, & T(1,i), &c__1); } else { /* T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */ if (i < *n) { i__2 = *n - i; zlacgv_(&i__2, &V(i,i+1), ldv); } i__2 = i - 1; i__3 = *n - i + 1; i__4 = i; z__1.r = -TAU(i).r, z__1.i = -TAU(i).i; zgemv_("No transpose", &i__2, &i__3, &z__1, &V(1,i), ldv, &V(i,i), ldv, &c_b2, &T(1,i), &c__1); if (i < *n) { i__2 = *n - i; zlacgv_(&i__2, &V(i,i+1), ldv); } } i__2 = i + i * v_dim1; V(i,i).r = vii.r, V(i,i).i = vii.i; /* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ i__2 = i - 1; ztrmv_("Upper", "No transpose", "Non-unit", &i__2, &T(1,1), ldt, &T(1,i), &c__1); i__2 = i + i * t_dim1; i__3 = i; T(i,i).r = TAU(i).r, T(i,i).i = TAU(i).i; } /* L20: */ } } else { for (i = *k; i >= 1; --i) { i__1 = i; if (TAU(i).r == 0. && TAU(i).i == 0.) { /* H(i) = I */ i__1 = *k; for (j = i; j <= *k; ++j) { i__2 = j + i * t_dim1; T(j,i).r = 0., T(j,i).i = 0.; /* L30: */ } } else { /* general case */ if (i < *k) { if (lsame_(storev, "C")) { i__1 = *n - *k + i + i * v_dim1; vii.r = V(*n-*k+i,i).r, vii.i = V(*n-*k+i,i).i; i__1 = *n - *k + i + i * v_dim1; V(*n-*k+i,i).r = 1., V(*n-*k+i,i).i = 0.; /* T(i+1:k,i) := - tau(i) * V(1:n-k+i,i+1 :k)' * V(1:n-k+i,i) */ i__1 = *n - *k + i; i__2 = *k - i; i__3 = i; z__1.r = -TAU(i).r, z__1.i = -TAU(i).i; zgemv_("Conjugate transpose", &i__1, &i__2, &z__1, &V(1,i+1), ldv, &V(1,i) , &c__1, &c_b2, &T(i+1,i), &c__1); i__1 = *n - *k + i + i * v_dim1; V(*n-*k+i,i).r = vii.r, V(*n-*k+i,i).i = vii.i; } else { i__1 = i + (*n - *k + i) * v_dim1; vii.r = V(i,*n-*k+i).r, vii.i = V(i,*n-*k+i).i; i__1 = i + (*n - *k + i) * v_dim1; V(i,*n-*k+i).r = 1., V(i,*n-*k+i).i = 0.; /* T(i+1:k,i) := - tau(i) * V(i+1:k,1:n-k +i) * V(i,1:n-k+i)' */ i__1 = *n - *k + i - 1; zlacgv_(&i__1, &V(i,1), ldv); i__1 = *k - i; i__2 = *n - *k + i; i__3 = i; z__1.r = -TAU(i).r, z__1.i = -TAU(i).i; zgemv_("No transpose", &i__1, &i__2, &z__1, &V(i+1,1), ldv, &V(i,1), ldv, &c_b2, & T(i+1,i), &c__1); i__1 = *n - *k + i - 1; zlacgv_(&i__1, &V(i,1), ldv); i__1 = i + (*n - *k + i) * v_dim1; V(i,*n-*k+i).r = vii.r, V(i,*n-*k+i).i = vii.i; } /* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k, i) */ i__1 = *k - i; ztrmv_("Lower", "No transpose", "Non-unit", &i__1, &T(i+1,i+1), ldt, &T(i+1,i) , &c__1); } i__1 = i + i * t_dim1; i__2 = i; T(i,i).r = TAU(i).r, T(i,i).i = TAU(i).i; } /* L40: */ } } return 0; /* End of ZLARFT */ } /* zlarft_ */