void QuaternionDemo::paintGL() { glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glMatrixMode(GL_PROJECTION); glLoadIdentity(); gluPerspective(60.0, (double)width() / height(), 0.1, 10); glMatrixMode(GL_MODELVIEW); glLoadIdentity(); glTranslatef(0, 0, -5); glRotatef(-90, 1, 0, 0); glRotatef(90, 0, 0, 1); Eigen::Transform<float, 3, Eigen::Affine> t_ref(ref.conjugate()); glMultMatrixf(t_ref.data()); Eigen::Transform<float, 3, Eigen::Affine> t_q(q); glMultMatrixf(t_q.data()); glEnable(GL_DEPTH_TEST); glBegin(GL_QUADS); glColor3f(1, 1, 1); glVertex3f(-1, -1, -1); glVertex3f(1, -1, -1); glVertex3f(1, 1, -1); glVertex3f(-1, 1, -1); glColor3f(1, 0, 0); glVertex3f(-1, -1, 1); glVertex3f(1, -1, 1); glVertex3f(1, 1, 1); glVertex3f(-1, 1, 1); glColor3f(0, 1, 0); glVertex3f(-1, -1, 1); glVertex3f(-1, -1, -1); glVertex3f(-1, 1, -1); glVertex3f(-1, 1, 1); glColor3f(1, 0.5f, 0); glVertex3f(1, -1, 1); glVertex3f(1, -1, -1); glVertex3f(1, 1, -1); glVertex3f(1, 1, 1); glColor3f(0, 0, 1); glVertex3f(-1, -1, 1); glVertex3f(-1, -1, -1); glVertex3f(1, -1, -1); glVertex3f(1, -1, 1); glColor3f(1, 0, 0.5f); glVertex3f(-1, 1, 1); glVertex3f(-1, 1, -1); glVertex3f(1, 1, -1); glVertex3f(1, 1, 1); glEnd(); }
void ColumnMajorMatrixTest::setUp() { // Define commonly used matrices for testing a = new ColumnMajorMatrix(3, 3); ColumnMajorMatrix & a_ref = *a; a_ref(0, 0) = 1; a_ref(1, 0) = 2; a_ref(2, 0) = 3; a_ref(0, 1) = 4; a_ref(1, 1) = 5; a_ref(2, 1) = 6; a_ref(0, 2) = 7; a_ref(1, 2) = 8; a_ref(2, 2) = 9; t = new ColumnMajorMatrix(3, 2); ColumnMajorMatrix & t_ref = *t; t_ref(0, 0) = 1; t_ref(1, 0) = 2; t_ref(2, 0) = 3; t_ref(0, 1) = 4; t_ref(1, 1) = 5; t_ref(2, 1) = 6; two_mat = new ColumnMajorMatrix(2, 2); ColumnMajorMatrix & mat = *two_mat; mat(0, 0) = 1; mat(1, 0) = 2; mat(0, 1) = 3; mat(1, 1) = 4; add = new ColumnMajorMatrix(3, 2); add_solution = new ColumnMajorMatrix(3, 2); ColumnMajorMatrix & add_ref = *add; ColumnMajorMatrix & a_sol_ref = *add_solution; add_ref(0, 0) = 6; a_sol_ref(0, 0) = 7; add_ref(1, 0) = 5; a_sol_ref(1, 0) = 7; add_ref(2, 0) = 4; a_sol_ref(2, 0) = 7; add_ref(0, 1) = 1; a_sol_ref(0, 1) = 5; add_ref(1, 1) = 1; a_sol_ref(1, 1) = 6; add_ref(2, 1) = 1; a_sol_ref(2, 1) = 7; sub = new ColumnMajorMatrix(3, 2); sub_solution = new ColumnMajorMatrix(3, 2); ColumnMajorMatrix & sub_ref = *sub; ColumnMajorMatrix & s_sol_ref = *sub_solution; sub_ref(0, 0) = 0; s_sol_ref(0, 0) = 1; sub_ref(1, 0) = 1; s_sol_ref(1, 0) = 1; sub_ref(2, 0) = 2; s_sol_ref(2, 0) = 1; sub_ref(0, 1) = 1; s_sol_ref(0, 1) = 3; sub_ref(1, 1) = 1; s_sol_ref(1, 1) = 4; sub_ref(2, 1) = 1; s_sol_ref(2, 1) = 5; }
baseType escapeNoiseNeuron::callBack ( unsigned int eventSignature ) { if ( eventSignature == _fire_ ) { this->fire(); return this->time + t_ref() + gslNoise::getExponential ( exp ( - escapeNoise_a() * pot + escapeNoise_b() ) ); } else // if (eventSignature == _exciteRandomly_) { this->excite ( pcoBase::noiseIntensity() ); return this->time + gslNoise::getExponential ( pcoBase::noiseFrequency() ); } }
void CFDDataPack::reinitViscous() { grad_rho = duh[0]; grad_mom = RealTensor(duh[1], duh[2], duh[3]); for (int a = 0; a < 3; ++a) for (int b = 0; b < 3; ++b) grad_vel(a, b) = (grad_mom(a, b) - vel(a)*grad_rho(b))/r; tau = grad_vel + grad_vel.transpose(); vel_div = grad_vel.tr(); tau(0, 0) -= 2./3*vel_div; tau(1, 1) -= 2./3*vel_div; tau(2, 2) -= 2./3*vel_div; if(_cfd_problem._vis_type == 0) vis = 0.0; else if(_cfd_problem._vis_type == 1) vis = 1.0; else if(_cfd_problem._vis_type == 2) { Real t_ref(288), t_s(110.4); vis = pow(t/t_ref, 1.5)*(t_ref+t_s)/(t+t_s); } else mooseError("不可知的粘性模型"); tau *= vis/_reynolds; grad_enthalpy = (duh[4]-uh[4]/uh[0] * duh[0])/r - grad_vel.transpose() * vel; grad_enthalpy *= (vis/_reynolds)*(_gamma/_prandtl); invis_flux[0] = r*vel; invis_flux[1] = mom(0)*vel; invis_flux[1](0) += p; invis_flux[2] = mom(1)*vel; invis_flux[2](1) += p; invis_flux[3] = mom(2)*vel; invis_flux[3](2) += p; invis_flux[4] = r*h*vel; vis_flux[0].zero(); vis_flux[1] = tau.row(0); vis_flux[2] = tau.row(1); vis_flux[3] = tau.row(2); vis_flux[4] = tau * vel + grad_enthalpy; }
void pulseCoupledExcitatoryNeuron::excite ( baseType c ) // gibt zurück, ob das Neuron sofort feuert. { // numberOfExcitations++; if ( nextFiring() != numeric_limits<baseType>::max() ) return; if (!refractory) { baseType phase = dynNode::time - lastFiring; baseType pot = exp ( - leakage() * phase ); if ( pot + c > 1 ) eventHandler::decreaseKey(_fire_, dynNode::time + timeDelay()); else lastFiring = dynNode::time + 1.0/ leakage() * log (pot + c ); } if ( refractory && dynNode::time - lastFiring > t_ref()) { refractory = false; lastFiring = dynNode::time + 1.0 / leakage() * log ( c ); } }
baseType pulseCoupledExcitatoryNeuron::getState() { if (nextFiring() == numeric_limits<baseType>::max()) { if (refractory) { if (dynNode::time - lastFiring < t_ref()) return params<baseType>::getParams(0); else return 0; } else return ( exp ( leakage() * ( -dynNode::time + lastFiring )) ); } else return 1.0; // baseType res = numberOfExcitations; // numberOfExcitations = 0; // return res; }
/* Subroutine */ int ctrsen_(char *job, char *compq, logical *select, integer *n, complex *t, integer *ldt, complex *q, integer *ldq, complex *w, integer *m, real *s, real *sep, complex *work, integer *lwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= CTRSEN reorders the Schur factorization of a complex matrix A = Q*T*Q**H, so that a selected cluster of eigenvalues appears in the leading positions on the diagonal of the upper triangular matrix T, and the leading columns of Q form an orthonormal basis of the corresponding right invariant subspace. Optionally the routine computes the reciprocal condition numbers of the cluster of eigenvalues and/or the invariant subspace. Arguments ========= JOB (input) CHARACTER*1 Specifies whether condition numbers are required for the cluster of eigenvalues (S) or the invariant subspace (SEP): = 'N': none; = 'E': for eigenvalues only (S); = 'V': for invariant subspace only (SEP); = 'B': for both eigenvalues and invariant subspace (S and SEP). COMPQ (input) CHARACTER*1 = 'V': update the matrix Q of Schur vectors; = 'N': do not update Q. SELECT (input) LOGICAL array, dimension (N) SELECT specifies the eigenvalues in the selected cluster. To select the j-th eigenvalue, SELECT(j) must be set to .TRUE.. N (input) INTEGER The order of the matrix T. N >= 0. T (input/output) COMPLEX array, dimension (LDT,N) On entry, the upper triangular matrix T. On exit, T is overwritten by the reordered matrix T, with the selected eigenvalues as the leading diagonal elements. LDT (input) INTEGER The leading dimension of the array T. LDT >= max(1,N). Q (input/output) COMPLEX array, dimension (LDQ,N) On entry, if COMPQ = 'V', the matrix Q of Schur vectors. On exit, if COMPQ = 'V', Q has been postmultiplied by the unitary transformation matrix which reorders T; the leading M columns of Q form an orthonormal basis for the specified invariant subspace. If COMPQ = 'N', Q is not referenced. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= 1; and if COMPQ = 'V', LDQ >= N. W (output) COMPLEX array, dimension (N) The reordered eigenvalues of T, in the same order as they appear on the diagonal of T. M (output) INTEGER The dimension of the specified invariant subspace. 0 <= M <= N. S (output) REAL If JOB = 'E' or 'B', S is a lower bound on the reciprocal condition number for the selected cluster of eigenvalues. S cannot underestimate the true reciprocal condition number by more than a factor of sqrt(N). If M = 0 or N, S = 1. If JOB = 'N' or 'V', S is not referenced. SEP (output) REAL If JOB = 'V' or 'B', SEP is the estimated reciprocal condition number of the specified invariant subspace. If M = 0 or N, SEP = norm(T). If JOB = 'N' or 'E', SEP is not referenced. WORK (workspace/output) COMPLEX array, dimension (LWORK) If JOB = 'N', WORK is not referenced. Otherwise, on exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. If JOB = 'N', LWORK >= 1; if JOB = 'E', LWORK = M*(N-M); if JOB = 'V' or 'B', LWORK >= 2*M*(N-M). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Further Details =============== CTRSEN first collects the selected eigenvalues by computing a unitary transformation Z to move them to the top left corner of T. In other words, the selected eigenvalues are the eigenvalues of T11 in: Z'*T*Z = ( T11 T12 ) n1 ( 0 T22 ) n2 n1 n2 where N = n1+n2 and Z' means the conjugate transpose of Z. The first n1 columns of Z span the specified invariant subspace of T. If T has been obtained from the Schur factorization of a matrix A = Q*T*Q', then the reordered Schur factorization of A is given by A = (Q*Z)*(Z'*T*Z)*(Q*Z)', and the first n1 columns of Q*Z span the corresponding invariant subspace of A. The reciprocal condition number of the average of the eigenvalues of T11 may be returned in S. S lies between 0 (very badly conditioned) and 1 (very well conditioned). It is computed as follows. First we compute R so that P = ( I R ) n1 ( 0 0 ) n2 n1 n2 is the projector on the invariant subspace associated with T11. R is the solution of the Sylvester equation: T11*R - R*T22 = T12. Let F-norm(M) denote the Frobenius-norm of M and 2-norm(M) denote the two-norm of M. Then S is computed as the lower bound (1 + F-norm(R)**2)**(-1/2) on the reciprocal of 2-norm(P), the true reciprocal condition number. S cannot underestimate 1 / 2-norm(P) by more than a factor of sqrt(N). An approximate error bound for the computed average of the eigenvalues of T11 is EPS * norm(T) / S where EPS is the machine precision. The reciprocal condition number of the right invariant subspace spanned by the first n1 columns of Z (or of Q*Z) is returned in SEP. SEP is defined as the separation of T11 and T22: sep( T11, T22 ) = sigma-min( C ) where sigma-min(C) is the smallest singular value of the n1*n2-by-n1*n2 matrix C = kprod( I(n2), T11 ) - kprod( transpose(T22), I(n1) ) I(m) is an m by m identity matrix, and kprod denotes the Kronecker product. We estimate sigma-min(C) by the reciprocal of an estimate of the 1-norm of inverse(C). The true reciprocal 1-norm of inverse(C) cannot differ from sigma-min(C) by more than a factor of sqrt(n1*n2). When SEP is small, small changes in T can cause large changes in the invariant subspace. An approximate bound on the maximum angular error in the computed right invariant subspace is EPS * norm(T) / SEP ===================================================================== Decode and test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c_n1 = -1; /* System generated locals */ integer q_dim1, q_offset, t_dim1, t_offset, i__1, i__2, i__3; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer kase, ierr, k; static real scale; extern logical lsame_(char *, char *); static integer lwmin; static logical wantq, wants; static real rnorm; static integer n1, n2; static real rwork[1]; extern doublereal clange_(char *, integer *, integer *, complex *, integer *, real *); static integer nn, ks; extern /* Subroutine */ int clacon_(integer *, complex *, complex *, real *, integer *), clacpy_(char *, integer *, integer *, complex *, integer *, complex *, integer *), xerbla_(char *, integer *); static logical wantbh; extern /* Subroutine */ int ctrexc_(char *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, integer *); static logical wantsp; extern /* Subroutine */ int ctrsyl_(char *, char *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, real *, integer *); static logical lquery; static real est; #define t_subscr(a_1,a_2) (a_2)*t_dim1 + a_1 #define t_ref(a_1,a_2) t[t_subscr(a_1,a_2)] --select; t_dim1 = *ldt; t_offset = 1 + t_dim1 * 1; t -= t_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; --w; --work; /* Function Body */ wantbh = lsame_(job, "B"); wants = lsame_(job, "E") || wantbh; wantsp = lsame_(job, "V") || wantbh; wantq = lsame_(compq, "V"); /* Set M to the number of selected eigenvalues. */ *m = 0; i__1 = *n; for (k = 1; k <= i__1; ++k) { if (select[k]) { ++(*m); } /* L10: */ } n1 = *m; n2 = *n - *m; nn = n1 * n2; *info = 0; lquery = *lwork == -1; if (wantsp) { /* Computing MAX */ i__1 = 1, i__2 = nn << 1; lwmin = max(i__1,i__2); } else if (lsame_(job, "N")) { lwmin = 1; } else if (lsame_(job, "E")) { lwmin = max(1,nn); } if (! lsame_(job, "N") && ! wants && ! wantsp) { *info = -1; } else if (! lsame_(compq, "N") && ! wantq) { *info = -2; } else if (*n < 0) { *info = -4; } else if (*ldt < max(1,*n)) { *info = -6; } else if (*ldq < 1 || wantq && *ldq < *n) { *info = -8; } else if (*lwork < lwmin && ! lquery) { *info = -14; } if (*info == 0) { work[1].r = (real) lwmin, work[1].i = 0.f; } if (*info != 0) { i__1 = -(*info); xerbla_("CTRSEN", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*m == *n || *m == 0) { if (wants) { *s = 1.f; } if (wantsp) { *sep = clange_("1", n, n, &t[t_offset], ldt, rwork); } goto L40; } /* Collect the selected eigenvalues at the top left corner of T. */ ks = 0; i__1 = *n; for (k = 1; k <= i__1; ++k) { if (select[k]) { ++ks; /* Swap the K-th eigenvalue to position KS. */ if (k != ks) { ctrexc_(compq, n, &t[t_offset], ldt, &q[q_offset], ldq, &k, & ks, &ierr); } } /* L20: */ } if (wants) { /* Solve the Sylvester equation for R: T11*R - R*T22 = scale*T12 */ clacpy_("F", &n1, &n2, &t_ref(1, n1 + 1), ldt, &work[1], &n1); ctrsyl_("N", "N", &c_n1, &n1, &n2, &t[t_offset], ldt, &t_ref(n1 + 1, n1 + 1), ldt, &work[1], &n1, &scale, &ierr); /* Estimate the reciprocal of the condition number of the cluster of eigenvalues. */ rnorm = clange_("F", &n1, &n2, &work[1], &n1, rwork); if (rnorm == 0.f) { *s = 1.f; } else { *s = scale / (sqrt(scale * scale / rnorm + rnorm) * sqrt(rnorm)); } } if (wantsp) { /* Estimate sep(T11,T22). */ est = 0.f; kase = 0; L30: clacon_(&nn, &work[nn + 1], &work[1], &est, &kase); if (kase != 0) { if (kase == 1) { /* Solve T11*R - R*T22 = scale*X. */ ctrsyl_("N", "N", &c_n1, &n1, &n2, &t[t_offset], ldt, &t_ref( n1 + 1, n1 + 1), ldt, &work[1], &n1, &scale, &ierr); } else { /* Solve T11'*R - R*T22' = scale*X. */ ctrsyl_("C", "C", &c_n1, &n1, &n2, &t[t_offset], ldt, &t_ref( n1 + 1, n1 + 1), ldt, &work[1], &n1, &scale, &ierr); } goto L30; } *sep = scale / est; } L40: /* Copy reordered eigenvalues to W. */ i__1 = *n; for (k = 1; k <= i__1; ++k) { i__2 = k; i__3 = t_subscr(k, k); w[i__2].r = t[i__3].r, w[i__2].i = t[i__3].i; /* L50: */ } work[1].r = (real) lwmin, work[1].i = 0.f; return 0; /* End of CTRSEN */ } /* ctrsen_ */
/* Subroutine */ int stgex2_(logical *wantq, logical *wantz, integer *n, real *a, integer *lda, real *b, integer *ldb, real *q, integer *ldq, real * z__, integer *ldz, integer *j1, integer *n1, integer *n2, real *work, integer *lwork, integer *info) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= STGEX2 swaps adjacent diagonal blocks (A11, B11) and (A22, B22) of size 1-by-1 or 2-by-2 in an upper (quasi) triangular matrix pair (A, B) by an orthogonal equivalence transformation. (A, B) must be in generalized real Schur canonical form (as returned by SGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 diagonal blocks. B is upper triangular. Optionally, the matrices Q and Z of generalized Schur vectors are updated. Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)' Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)' Arguments ========= WANTQ (input) LOGICAL .TRUE. : update the left transformation matrix Q; .FALSE.: do not update Q. WANTZ (input) LOGICAL .TRUE. : update the right transformation matrix Z; .FALSE.: do not update Z. N (input) INTEGER The order of the matrices A and B. N >= 0. A (input/output) REAL arrays, dimensions (LDA,N) On entry, the matrix A in the pair (A, B). On exit, the updated matrix A. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input/output) REAL arrays, dimensions (LDB,N) On entry, the matrix B in the pair (A, B). On exit, the updated matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). Q (input/output) REAL array, dimension (LDZ,N) On entry, if WANTQ = .TRUE., the orthogonal matrix Q. On exit, the updated matrix Q. Not referenced if WANTQ = .FALSE.. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= 1. If WANTQ = .TRUE., LDQ >= N. Z (input/output) REAL array, dimension (LDZ,N) On entry, if WANTZ =.TRUE., the orthogonal matrix Z. On exit, the updated matrix Z. Not referenced if WANTZ = .FALSE.. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= 1. If WANTZ = .TRUE., LDZ >= N. J1 (input) INTEGER The index to the first block (A11, B11). 1 <= J1 <= N. N1 (input) INTEGER The order of the first block (A11, B11). N1 = 0, 1 or 2. N2 (input) INTEGER The order of the second block (A22, B22). N2 = 0, 1 or 2. WORK (workspace) REAL array, dimension (LWORK). LWORK (input) INTEGER The dimension of the array WORK. LWORK >= MAX( N*(N2+N1), (N2+N1)*(N2+N1)*2 ) INFO (output) INTEGER =0: Successful exit >0: If INFO = 1, the transformed matrix (A, B) would be too far from generalized Schur form; the blocks are not swapped and (A, B) and (Q, Z) are unchanged. The problem of swapping is too ill-conditioned. <0: If INFO = -16: LWORK is too small. Appropriate value for LWORK is returned in WORK(1). Further Details =============== Based on contributions by Bo Kagstrom and Peter Poromaa, Department of Computing Science, Umea University, S-901 87 Umea, Sweden. In the current code both weak and strong stability tests are performed. The user can omit the strong stability test by changing the internal logical parameter WANDS to .FALSE.. See ref. [2] for details. [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the Generalized Real Schur Form of a Regular Matrix Pair (A, B), in M.S. Moonen et al (eds), Linear Algebra for Large Scale and Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified Eigenvalues of a Regular Matrix Pair (A, B) and Condition Estimation: Theory, Algorithms and Software, Report UMINF - 94.04, Department of Computing Science, Umea University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87. To appear in Numerical Algorithms, 1996. ===================================================================== Parameter adjustments */ /* Table of constant values */ static integer c__16 = 16; static real c_b3 = 0.f; static integer c__0 = 0; static integer c__1 = 1; static integer c__4 = 4; static integer c__2 = 2; static real c_b38 = 1.f; static real c_b44 = -1.f; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, z_offset, i__1, i__2; real r__1, r__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static logical weak; static real ddum; static integer idum; static real taul[4], dsum, taur[4], scpy[16] /* was [4][4] */, tcpy[16] /* was [4][4] */; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); static real f, g; static integer i__, m; static real s[16] /* was [4][4] */, t[16] /* was [4][4] */, scale, bqra21, brqa21; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static real licop[16] /* was [4][4] */; static integer linfo; extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static real ircop[16] /* was [4][4] */, dnorm; static integer iwork[4]; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), slagv2_(real *, integer *, real *, integer *, real *, real *, real *, real *, real *, real *, real *), sgeqr2_(integer * , integer *, real *, integer *, real *, real *, integer *), sgerq2_(integer *, integer *, real *, integer *, real *, real *, integer *); static real be[2], ai[2]; extern /* Subroutine */ int sorg2r_(integer *, integer *, integer *, real *, integer *, real *, real *, integer *), sorgr2_(integer *, integer *, integer *, real *, integer *, real *, real *, integer * ); static real ar[2], sa, sb, li[16] /* was [4][4] */; extern /* Subroutine */ int sorm2r_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, integer *), sormr2_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer * , real *, integer *); static real dscale, ir[16] /* was [4][4] */; extern /* Subroutine */ int stgsy2_(char *, integer *, integer *, integer *, real *, integer *, real *, integer *, real *, integer *, real * , integer *, real *, integer *, real *, integer *, real *, real *, real *, integer *, integer *, integer *); static real ss; extern doublereal slamch_(char *); static real ws; extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *), slartg_(real *, real *, real *, real *, real *); static real thresh; extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, real *); static real smlnum; static logical strong; static real eps; #define scpy_ref(a_1,a_2) scpy[(a_2)*4 + a_1 - 5] #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] #define s_ref(a_1,a_2) s[(a_2)*4 + a_1 - 5] #define t_ref(a_1,a_2) t[(a_2)*4 + a_1 - 5] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] #define li_ref(a_1,a_2) li[(a_2)*4 + a_1 - 5] #define ir_ref(a_1,a_2) ir[(a_2)*4 + a_1 - 5] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --work; /* Function Body */ *info = 0; /* Quick return if possible */ if (*n <= 1 || *n1 <= 0 || *n2 <= 0) { return 0; } if (*n1 > *n || *j1 + *n1 > *n) { return 0; } m = *n1 + *n2; /* Computing MAX */ i__1 = *n * m, i__2 = m * m << 1; if (*lwork < max(i__1,i__2)) { *info = -16; /* Computing MAX */ i__1 = *n * m, i__2 = m * m << 1; work[1] = (real) max(i__1,i__2); return 0; } weak = FALSE_; strong = FALSE_; /* Make a local copy of selected block */ scopy_(&c__16, &c_b3, &c__0, li, &c__1); scopy_(&c__16, &c_b3, &c__0, ir, &c__1); slacpy_("Full", &m, &m, &a_ref(*j1, *j1), lda, s, &c__4); slacpy_("Full", &m, &m, &b_ref(*j1, *j1), ldb, t, &c__4); /* Compute threshold for testing acceptance of swapping. */ eps = slamch_("P"); smlnum = slamch_("S") / eps; dscale = 0.f; dsum = 1.f; slacpy_("Full", &m, &m, s, &c__4, &work[1], &m); i__1 = m * m; slassq_(&i__1, &work[1], &c__1, &dscale, &dsum); slacpy_("Full", &m, &m, t, &c__4, &work[1], &m); i__1 = m * m; slassq_(&i__1, &work[1], &c__1, &dscale, &dsum); dnorm = dscale * sqrt(dsum); /* Computing MAX */ r__1 = eps * 10.f * dnorm; thresh = dmax(r__1,smlnum); if (m == 2) { /* CASE 1: Swap 1-by-1 and 1-by-1 blocks. Compute orthogonal QL and RQ that swap 1-by-1 and 1-by-1 blocks using Givens rotations and perform the swap tentatively. */ f = s_ref(2, 2) * t_ref(1, 1) - t_ref(2, 2) * s_ref(1, 1); g = s_ref(2, 2) * t_ref(1, 2) - t_ref(2, 2) * s_ref(1, 2); sb = (r__1 = t_ref(2, 2), dabs(r__1)); sa = (r__1 = s_ref(2, 2), dabs(r__1)); slartg_(&f, &g, &ir_ref(1, 2), &ir_ref(1, 1), &ddum); ir_ref(2, 1) = -ir_ref(1, 2); ir_ref(2, 2) = ir_ref(1, 1); srot_(&c__2, &s_ref(1, 1), &c__1, &s_ref(1, 2), &c__1, &ir_ref(1, 1), &ir_ref(2, 1)); srot_(&c__2, &t_ref(1, 1), &c__1, &t_ref(1, 2), &c__1, &ir_ref(1, 1), &ir_ref(2, 1)); if (sa >= sb) { slartg_(&s_ref(1, 1), &s_ref(2, 1), &li_ref(1, 1), &li_ref(2, 1), &ddum); } else { slartg_(&t_ref(1, 1), &t_ref(2, 1), &li_ref(1, 1), &li_ref(2, 1), &ddum); } srot_(&c__2, &s_ref(1, 1), &c__4, &s_ref(2, 1), &c__4, &li_ref(1, 1), &li_ref(2, 1)); srot_(&c__2, &t_ref(1, 1), &c__4, &t_ref(2, 1), &c__4, &li_ref(1, 1), &li_ref(2, 1)); li_ref(2, 2) = li_ref(1, 1); li_ref(1, 2) = -li_ref(2, 1); /* Weak stability test: |S21| + |T21| <= O(EPS * F-norm((S, T))) */ ws = (r__1 = s_ref(2, 1), dabs(r__1)) + (r__2 = t_ref(2, 1), dabs( r__2)); weak = ws <= thresh; if (! weak) { goto L70; } if (TRUE_) { /* Strong stability test: F-norm((A-QL'*S*QR, B-QL'*T*QR)) <= O(EPS*F-norm((A,B))) */ slacpy_("Full", &m, &m, &a_ref(*j1, *j1), lda, &work[m * m + 1], & m); sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, s, &c__4, &c_b3, & work[1], &m); sgemm_("N", "T", &m, &m, &m, &c_b44, &work[1], &m, ir, &c__4, & c_b38, &work[m * m + 1], &m); dscale = 0.f; dsum = 1.f; i__1 = m * m; slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); slacpy_("Full", &m, &m, &b_ref(*j1, *j1), ldb, &work[m * m + 1], & m); sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, t, &c__4, &c_b3, & work[1], &m); sgemm_("N", "T", &m, &m, &m, &c_b44, &work[1], &m, ir, &c__4, & c_b38, &work[m * m + 1], &m); i__1 = m * m; slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); ss = dscale * sqrt(dsum); strong = ss <= thresh; if (! strong) { goto L70; } } /* Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). */ i__1 = *j1 + 1; srot_(&i__1, &a_ref(1, *j1), &c__1, &a_ref(1, *j1 + 1), &c__1, & ir_ref(1, 1), &ir_ref(2, 1)); i__1 = *j1 + 1; srot_(&i__1, &b_ref(1, *j1), &c__1, &b_ref(1, *j1 + 1), &c__1, & ir_ref(1, 1), &ir_ref(2, 1)); i__1 = *n - *j1 + 1; srot_(&i__1, &a_ref(*j1, *j1), lda, &a_ref(*j1 + 1, *j1), lda, & li_ref(1, 1), &li_ref(2, 1)); i__1 = *n - *j1 + 1; srot_(&i__1, &b_ref(*j1, *j1), ldb, &b_ref(*j1 + 1, *j1), ldb, & li_ref(1, 1), &li_ref(2, 1)); /* Set N1-by-N2 (2,1) - blocks to ZERO. */ a_ref(*j1 + 1, *j1) = 0.f; b_ref(*j1 + 1, *j1) = 0.f; /* Accumulate transformations into Q and Z if requested. */ if (*wantz) { srot_(n, &z___ref(1, *j1), &c__1, &z___ref(1, *j1 + 1), &c__1, & ir_ref(1, 1), &ir_ref(2, 1)); } if (*wantq) { srot_(n, &q_ref(1, *j1), &c__1, &q_ref(1, *j1 + 1), &c__1, & li_ref(1, 1), &li_ref(2, 1)); } /* Exit with INFO = 0 if swap was successfully performed. */ return 0; } else { /* CASE 2: Swap 1-by-1 and 2-by-2 blocks, or 2-by-2 and 2-by-2 blocks. Solve the generalized Sylvester equation S11 * R - L * S22 = SCALE * S12 T11 * R - L * T22 = SCALE * T12 for R and L. Solutions in LI and IR. */ slacpy_("Full", n1, n2, &t_ref(1, *n1 + 1), &c__4, li, &c__4); slacpy_("Full", n1, n2, &s_ref(1, *n1 + 1), &c__4, &ir_ref(*n2 + 1, * n1 + 1), &c__4); stgsy2_("N", &c__0, n1, n2, s, &c__4, &s_ref(*n1 + 1, *n1 + 1), &c__4, &ir_ref(*n2 + 1, *n1 + 1), &c__4, t, &c__4, &t_ref(*n1 + 1, * n1 + 1), &c__4, li, &c__4, &scale, &dsum, &dscale, iwork, & idum, &linfo); /* Compute orthogonal matrix QL: QL' * LI = [ TL ] [ 0 ] where LI = [ -L ] [ SCALE * identity(N2) ] */ i__1 = *n2; for (i__ = 1; i__ <= i__1; ++i__) { sscal_(n1, &c_b44, &li_ref(1, i__), &c__1); li_ref(*n1 + i__, i__) = scale; /* L10: */ } sgeqr2_(&m, n2, li, &c__4, taul, &work[1], &linfo); if (linfo != 0) { goto L70; } sorg2r_(&m, &m, n2, li, &c__4, taul, &work[1], &linfo); if (linfo != 0) { goto L70; } /* Compute orthogonal matrix RQ: IR * RQ' = [ 0 TR], where IR = [ SCALE * identity(N1), R ] */ i__1 = *n1; for (i__ = 1; i__ <= i__1; ++i__) { ir_ref(*n2 + i__, i__) = scale; /* L20: */ } sgerq2_(n1, &m, &ir_ref(*n2 + 1, 1), &c__4, taur, &work[1], &linfo); if (linfo != 0) { goto L70; } sorgr2_(&m, &m, n1, ir, &c__4, taur, &work[1], &linfo); if (linfo != 0) { goto L70; } /* Perform the swapping tentatively: */ sgemm_("T", "N", &m, &m, &m, &c_b38, li, &c__4, s, &c__4, &c_b3, & work[1], &m); sgemm_("N", "T", &m, &m, &m, &c_b38, &work[1], &m, ir, &c__4, &c_b3, s, &c__4); sgemm_("T", "N", &m, &m, &m, &c_b38, li, &c__4, t, &c__4, &c_b3, & work[1], &m); sgemm_("N", "T", &m, &m, &m, &c_b38, &work[1], &m, ir, &c__4, &c_b3, t, &c__4); slacpy_("F", &m, &m, s, &c__4, scpy, &c__4); slacpy_("F", &m, &m, t, &c__4, tcpy, &c__4); slacpy_("F", &m, &m, ir, &c__4, ircop, &c__4); slacpy_("F", &m, &m, li, &c__4, licop, &c__4); /* Triangularize the B-part by an RQ factorization. Apply transformation (from left) to A-part, giving S. */ sgerq2_(&m, &m, t, &c__4, taur, &work[1], &linfo); if (linfo != 0) { goto L70; } sormr2_("R", "T", &m, &m, &m, t, &c__4, taur, s, &c__4, &work[1], & linfo); if (linfo != 0) { goto L70; } sormr2_("L", "N", &m, &m, &m, t, &c__4, taur, ir, &c__4, &work[1], & linfo); if (linfo != 0) { goto L70; } /* Compute F-norm(S21) in BRQA21. (T21 is 0.) */ dscale = 0.f; dsum = 1.f; i__1 = *n2; for (i__ = 1; i__ <= i__1; ++i__) { slassq_(n1, &s_ref(*n2 + 1, i__), &c__1, &dscale, &dsum); /* L30: */ } brqa21 = dscale * sqrt(dsum); /* Triangularize the B-part by a QR factorization. Apply transformation (from right) to A-part, giving S. */ sgeqr2_(&m, &m, tcpy, &c__4, taul, &work[1], &linfo); if (linfo != 0) { goto L70; } sorm2r_("L", "T", &m, &m, &m, tcpy, &c__4, taul, scpy, &c__4, &work[1] , info); sorm2r_("R", "N", &m, &m, &m, tcpy, &c__4, taul, licop, &c__4, &work[ 1], info); if (linfo != 0) { goto L70; } /* Compute F-norm(S21) in BQRA21. (T21 is 0.) */ dscale = 0.f; dsum = 1.f; i__1 = *n2; for (i__ = 1; i__ <= i__1; ++i__) { slassq_(n1, &scpy_ref(*n2 + 1, i__), &c__1, &dscale, &dsum); /* L40: */ } bqra21 = dscale * sqrt(dsum); /* Decide which method to use. Weak stability test: F-norm(S21) <= O(EPS * F-norm((S, T))) */ if (bqra21 <= brqa21 && bqra21 <= thresh) { slacpy_("F", &m, &m, scpy, &c__4, s, &c__4); slacpy_("F", &m, &m, tcpy, &c__4, t, &c__4); slacpy_("F", &m, &m, ircop, &c__4, ir, &c__4); slacpy_("F", &m, &m, licop, &c__4, li, &c__4); } else if (brqa21 >= thresh) { goto L70; } /* Set lower triangle of B-part to zero */ i__1 = m; for (i__ = 2; i__ <= i__1; ++i__) { i__2 = m - i__ + 1; scopy_(&i__2, &c_b3, &c__0, &t_ref(i__, i__ - 1), &c__1); /* L50: */ } if (TRUE_) { /* Strong stability test: F-norm((A-QL*S*QR', B-QL*T*QR')) <= O(EPS*F-norm((A,B))) */ slacpy_("Full", &m, &m, &a_ref(*j1, *j1), lda, &work[m * m + 1], & m); sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, s, &c__4, &c_b3, & work[1], &m); sgemm_("N", "N", &m, &m, &m, &c_b44, &work[1], &m, ir, &c__4, & c_b38, &work[m * m + 1], &m); dscale = 0.f; dsum = 1.f; i__1 = m * m; slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); slacpy_("Full", &m, &m, &b_ref(*j1, *j1), ldb, &work[m * m + 1], & m); sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, t, &c__4, &c_b3, & work[1], &m); sgemm_("N", "N", &m, &m, &m, &c_b44, &work[1], &m, ir, &c__4, & c_b38, &work[m * m + 1], &m); i__1 = m * m; slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum); ss = dscale * sqrt(dsum); strong = ss <= thresh; if (! strong) { goto L70; } } /* If the swap is accepted ("weakly" and "strongly"), apply the transformations and set N1-by-N2 (2,1)-block to zero. */ i__1 = *n2; for (i__ = 1; i__ <= i__1; ++i__) { scopy_(n1, &c_b3, &c__0, &s_ref(*n2 + 1, i__), &c__1); /* L60: */ } /* copy back M-by-M diagonal block starting at index J1 of (A, B) */ slacpy_("F", &m, &m, s, &c__4, &a_ref(*j1, *j1), lda); slacpy_("F", &m, &m, t, &c__4, &b_ref(*j1, *j1), ldb); scopy_(&c__16, &c_b3, &c__0, t, &c__1); /* Standardize existing 2-by-2 blocks. */ i__1 = m * m; scopy_(&i__1, &c_b3, &c__0, &work[1], &c__1); work[1] = 1.f; t_ref(1, 1) = 1.f; idum = *lwork - m * m - 2; if (*n2 > 1) { slagv2_(&a_ref(*j1, *j1), lda, &b_ref(*j1, *j1), ldb, ar, ai, be, &work[1], &work[2], &t_ref(1, 1), &t_ref(2, 1)); work[m + 1] = -work[2]; work[m + 2] = work[1]; t_ref(*n2, *n2) = t_ref(1, 1); t_ref(1, 2) = -t_ref(2, 1); } work[m * m] = 1.f; t_ref(m, m) = 1.f; if (*n1 > 1) { slagv2_(&a_ref(*j1 + *n2, *j1 + *n2), lda, &b_ref(*j1 + *n2, *j1 + *n2), ldb, taur, taul, &work[m * m + 1], &work[*n2 * m + *n2 + 1], &work[*n2 * m + *n2 + 2], &t_ref(*n2 + 1, *n2 + 1), &t_ref(m, m - 1)); work[m * m] = work[*n2 * m + *n2 + 1]; work[m * m - 1] = -work[*n2 * m + *n2 + 2]; t_ref(m, m) = t_ref(*n2 + 1, *n2 + 1); t_ref(m - 1, m) = -t_ref(m, m - 1); } sgemm_("T", "N", n2, n1, n2, &c_b38, &work[1], &m, &a_ref(*j1, *j1 + * n2), lda, &c_b3, &work[m * m + 1], n2); slacpy_("Full", n2, n1, &work[m * m + 1], n2, &a_ref(*j1, *j1 + *n2), lda); sgemm_("T", "N", n2, n1, n2, &c_b38, &work[1], &m, &b_ref(*j1, *j1 + * n2), ldb, &c_b3, &work[m * m + 1], n2); slacpy_("Full", n2, n1, &work[m * m + 1], n2, &b_ref(*j1, *j1 + *n2), ldb); sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, &work[1], &m, &c_b3, & work[m * m + 1], &m); slacpy_("Full", &m, &m, &work[m * m + 1], &m, li, &c__4); sgemm_("N", "N", n2, n1, n1, &c_b38, &a_ref(*j1, *j1 + *n2), lda, & t_ref(*n2 + 1, *n2 + 1), &c__4, &c_b3, &work[1], n2); slacpy_("Full", n2, n1, &work[1], n2, &a_ref(*j1, *j1 + *n2), lda); sgemm_("N", "N", n2, n1, n1, &c_b38, &b_ref(*j1, *j1 + *n2), lda, & t_ref(*n2 + 1, *n2 + 1), &c__4, &c_b3, &work[1], n2); slacpy_("Full", n2, n1, &work[1], n2, &b_ref(*j1, *j1 + *n2), ldb); sgemm_("T", "N", &m, &m, &m, &c_b38, ir, &c__4, t, &c__4, &c_b3, & work[1], &m); slacpy_("Full", &m, &m, &work[1], &m, ir, &c__4); /* Accumulate transformations into Q and Z if requested. */ if (*wantq) { sgemm_("N", "N", n, &m, &m, &c_b38, &q_ref(1, *j1), ldq, li, & c__4, &c_b3, &work[1], n); slacpy_("Full", n, &m, &work[1], n, &q_ref(1, *j1), ldq); } if (*wantz) { sgemm_("N", "N", n, &m, &m, &c_b38, &z___ref(1, *j1), ldz, ir, & c__4, &c_b3, &work[1], n); slacpy_("Full", n, &m, &work[1], n, &z___ref(1, *j1), ldz); } /* Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). */ i__ = *j1 + m; if (i__ <= *n) { i__1 = *n - i__ + 1; sgemm_("T", "N", &m, &i__1, &m, &c_b38, li, &c__4, &a_ref(*j1, i__), lda, &c_b3, &work[1], &m); i__1 = *n - i__ + 1; slacpy_("Full", &m, &i__1, &work[1], &m, &a_ref(*j1, i__), lda); i__1 = *n - i__ + 1; sgemm_("T", "N", &m, &i__1, &m, &c_b38, li, &c__4, &b_ref(*j1, i__), lda, &c_b3, &work[1], &m); i__1 = *n - i__ + 1; slacpy_("Full", &m, &i__1, &work[1], &m, &b_ref(*j1, i__), lda); } i__ = *j1 - 1; if (i__ > 0) { sgemm_("N", "N", &i__, &m, &m, &c_b38, &a_ref(1, *j1), lda, ir, & c__4, &c_b3, &work[1], &i__); slacpy_("Full", &i__, &m, &work[1], &i__, &a_ref(1, *j1), lda); sgemm_("N", "N", &i__, &m, &m, &c_b38, &b_ref(1, *j1), ldb, ir, & c__4, &c_b3, &work[1], &i__); slacpy_("Full", &i__, &m, &work[1], &i__, &b_ref(1, *j1), ldb); } /* Exit with INFO = 0 if swap was successfully performed. */ return 0; } /* Exit with INFO = 1 if swap was rejected. */ L70: *info = 1; return 0; /* End of STGEX2 */ } /* stgex2_ */
/* Subroutine */ int clahrd_(integer *n, integer *k, integer *nb, complex *a, integer *lda, complex *tau, complex *t, integer *ldt, complex *y, integer *ldy) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= CLAHRD reduces the first NB columns of a complex general n-by-(n-k+1) matrix A so that elements below the k-th subdiagonal are zero. The reduction is performed by a unitary similarity transformation Q' * A * Q. The routine returns the matrices V and T which determine Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. This is an auxiliary routine called by CGEHRD. Arguments ========= N (input) INTEGER The order of the matrix A. K (input) INTEGER The offset for the reduction. Elements below the k-th subdiagonal in the first NB columns are reduced to zero. NB (input) INTEGER The number of columns to be reduced. A (input/output) COMPLEX array, dimension (LDA,N-K+1) On entry, the n-by-(n-k+1) general matrix A. On exit, the elements on and above the k-th subdiagonal in the first NB columns are overwritten with the corresponding elements of the reduced matrix; the elements below the k-th subdiagonal, with the array TAU, represent the matrix Q as a product of elementary reflectors. The other columns of A are unchanged. See Further Details. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). TAU (output) COMPLEX array, dimension (NB) The scalar factors of the elementary reflectors. See Further Details. T (output) COMPLEX array, dimension (LDT,NB) The upper triangular matrix T. LDT (input) INTEGER The leading dimension of the array T. LDT >= NB. Y (output) COMPLEX array, dimension (LDY,NB) The n-by-nb matrix Y. LDY (input) INTEGER The leading dimension of the array Y. LDY >= max(1,N). Further Details =============== The matrix Q is represented as a product of nb elementary reflectors Q = H(1) H(2) . . . H(nb). Each H(i) has the form H(i) = I - tau * v * v' where tau is a complex scalar, and v is a complex vector with v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in A(i+k+1:n,i), and tau in TAU(i). The elements of the vectors v together form the (n-k+1)-by-nb matrix V which is needed, with T and Y, to apply the transformation to the unreduced part of the matrix, using an update of the form: A := (I - V*T*V') * (A - Y*V'). The contents of A on exit are illustrated by the following example with n = 7, k = 3 and nb = 2: ( a h a a a ) ( a h a a a ) ( a h a a a ) ( h h a a a ) ( v1 h a a a ) ( v1 v2 a a a ) ( v1 v2 a a a ) where a denotes an element of the original matrix A, h denotes a modified element of the upper Hessenberg matrix H, and vi denotes an element of the vector defining H(i). ===================================================================== Quick return if possible Parameter adjustments */ /* Table of constant values */ static complex c_b1 = {0.f,0.f}; static complex c_b2 = {1.f,0.f}; static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, i__3; complex q__1; /* Local variables */ static integer i__; extern /* Subroutine */ int cscal_(integer *, complex *, complex *, integer *), cgemv_(char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *), ccopy_(integer *, complex *, integer *, complex *, integer *), caxpy_(integer *, complex *, complex *, integer *, complex *, integer *), ctrmv_(char *, char *, char *, integer *, complex *, integer *, complex *, integer *); static complex ei; extern /* Subroutine */ int clarfg_(integer *, complex *, complex *, integer *, complex *), clacgv_(integer *, complex *, integer *); #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define t_subscr(a_1,a_2) (a_2)*t_dim1 + a_1 #define t_ref(a_1,a_2) t[t_subscr(a_1,a_2)] #define y_subscr(a_1,a_2) (a_2)*y_dim1 + a_1 #define y_ref(a_1,a_2) y[y_subscr(a_1,a_2)] --tau; a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; t_dim1 = *ldt; t_offset = 1 + t_dim1 * 1; t -= t_offset; y_dim1 = *ldy; y_offset = 1 + y_dim1 * 1; y -= y_offset; /* Function Body */ if (*n <= 1) { return 0; } i__1 = *nb; for (i__ = 1; i__ <= i__1; ++i__) { if (i__ > 1) { /* Update A(1:n,i) Compute i-th column of A - Y * V' */ i__2 = i__ - 1; clacgv_(&i__2, &a_ref(*k + i__ - 1, 1), lda); i__2 = i__ - 1; q__1.r = -1.f, q__1.i = 0.f; cgemv_("No transpose", n, &i__2, &q__1, &y[y_offset], ldy, &a_ref( *k + i__ - 1, 1), lda, &c_b2, &a_ref(1, i__), &c__1); i__2 = i__ - 1; clacgv_(&i__2, &a_ref(*k + i__ - 1, 1), lda); /* Apply I - V * T' * V' to this column (call it b) from the left, using the last column of T as workspace Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) ( V2 ) ( b2 ) where V1 is unit lower triangular w := V1' * b1 */ i__2 = i__ - 1; ccopy_(&i__2, &a_ref(*k + 1, i__), &c__1, &t_ref(1, *nb), &c__1); i__2 = i__ - 1; ctrmv_("Lower", "Conjugate transpose", "Unit", &i__2, &a_ref(*k + 1, 1), lda, &t_ref(1, *nb), &c__1); /* w := w + V2'*b2 */ i__2 = *n - *k - i__ + 1; i__3 = i__ - 1; cgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a_ref(*k + i__, 1), lda, &a_ref(*k + i__, i__), &c__1, &c_b2, &t_ref( 1, *nb), &c__1); /* w := T'*w */ i__2 = i__ - 1; ctrmv_("Upper", "Conjugate transpose", "Non-unit", &i__2, &t[ t_offset], ldt, &t_ref(1, *nb), &c__1); /* b2 := b2 - V2*w */ i__2 = *n - *k - i__ + 1; i__3 = i__ - 1; q__1.r = -1.f, q__1.i = 0.f; cgemv_("No transpose", &i__2, &i__3, &q__1, &a_ref(*k + i__, 1), lda, &t_ref(1, *nb), &c__1, &c_b2, &a_ref(*k + i__, i__), &c__1); /* b1 := b1 - V1*w */ i__2 = i__ - 1; ctrmv_("Lower", "No transpose", "Unit", &i__2, &a_ref(*k + 1, 1), lda, &t_ref(1, *nb), &c__1); i__2 = i__ - 1; q__1.r = -1.f, q__1.i = 0.f; caxpy_(&i__2, &q__1, &t_ref(1, *nb), &c__1, &a_ref(*k + 1, i__), & c__1); i__2 = a_subscr(*k + i__ - 1, i__ - 1); a[i__2].r = ei.r, a[i__2].i = ei.i; } /* Generate the elementary reflector H(i) to annihilate A(k+i+1:n,i) */ i__2 = a_subscr(*k + i__, i__); ei.r = a[i__2].r, ei.i = a[i__2].i; /* Computing MIN */ i__2 = *k + i__ + 1; i__3 = *n - *k - i__ + 1; clarfg_(&i__3, &ei, &a_ref(min(i__2,*n), i__), &c__1, &tau[i__]); i__2 = a_subscr(*k + i__, i__); a[i__2].r = 1.f, a[i__2].i = 0.f; /* Compute Y(1:n,i) */ i__2 = *n - *k - i__ + 1; cgemv_("No transpose", n, &i__2, &c_b2, &a_ref(1, i__ + 1), lda, & a_ref(*k + i__, i__), &c__1, &c_b1, &y_ref(1, i__), &c__1); i__2 = *n - *k - i__ + 1; i__3 = i__ - 1; cgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a_ref(*k + i__, 1) , lda, &a_ref(*k + i__, i__), &c__1, &c_b1, &t_ref(1, i__), & c__1); i__2 = i__ - 1; q__1.r = -1.f, q__1.i = 0.f; cgemv_("No transpose", n, &i__2, &q__1, &y[y_offset], ldy, &t_ref(1, i__), &c__1, &c_b2, &y_ref(1, i__), &c__1); cscal_(n, &tau[i__], &y_ref(1, i__), &c__1); /* Compute T(1:i,i) */ i__2 = i__ - 1; i__3 = i__; q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i; cscal_(&i__2, &q__1, &t_ref(1, i__), &c__1); i__2 = i__ - 1; ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt, &t_ref(1, i__), &c__1); i__2 = t_subscr(i__, i__); i__3 = i__; t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i; /* L10: */ } i__1 = a_subscr(*k + *nb, *nb); a[i__1].r = ei.r, a[i__1].i = ei.i; return 0; /* End of CLAHRD */ } /* clahrd_ */
/* Subroutine */ int ctrexc_(char *compq, integer *n, complex *t, integer * ldt, complex *q, integer *ldq, integer *ifst, integer *ilst, integer * info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= CTREXC reorders the Schur factorization of a complex matrix A = Q*T*Q**H, so that the diagonal element of T with row index IFST is moved to row ILST. The Schur form T is reordered by a unitary similarity transformation Z**H*T*Z, and optionally the matrix Q of Schur vectors is updated by postmultplying it with Z. Arguments ========= COMPQ (input) CHARACTER*1 = 'V': update the matrix Q of Schur vectors; = 'N': do not update Q. N (input) INTEGER The order of the matrix T. N >= 0. T (input/output) COMPLEX array, dimension (LDT,N) On entry, the upper triangular matrix T. On exit, the reordered upper triangular matrix. LDT (input) INTEGER The leading dimension of the array T. LDT >= max(1,N). Q (input/output) COMPLEX array, dimension (LDQ,N) On entry, if COMPQ = 'V', the matrix Q of Schur vectors. On exit, if COMPQ = 'V', Q has been postmultiplied by the unitary transformation matrix Z which reorders T. If COMPQ = 'N', Q is not referenced. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= max(1,N). IFST (input) INTEGER ILST (input) INTEGER Specify the reordering of the diagonal elements of T: The element with row index IFST is moved to row ILST by a sequence of transpositions between adjacent elements. 1 <= IFST <= N; 1 <= ILST <= N. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Decode and test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer q_dim1, q_offset, t_dim1, t_offset, i__1, i__2, i__3; complex q__1; /* Builtin functions */ void r_cnjg(complex *, complex *); /* Local variables */ static complex temp; extern /* Subroutine */ int crot_(integer *, complex *, integer *, complex *, integer *, real *, complex *); static integer k; extern logical lsame_(char *, char *); static logical wantq; static integer m1, m2, m3; static real cs; static complex t11, t22, sn; extern /* Subroutine */ int clartg_(complex *, complex *, real *, complex *, complex *), xerbla_(char *, integer *); #define q_subscr(a_1,a_2) (a_2)*q_dim1 + a_1 #define q_ref(a_1,a_2) q[q_subscr(a_1,a_2)] #define t_subscr(a_1,a_2) (a_2)*t_dim1 + a_1 #define t_ref(a_1,a_2) t[t_subscr(a_1,a_2)] t_dim1 = *ldt; t_offset = 1 + t_dim1 * 1; t -= t_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; /* Function Body */ *info = 0; wantq = lsame_(compq, "V"); if (! lsame_(compq, "N") && ! wantq) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*ldt < max(1,*n)) { *info = -4; } else if (*ldq < 1 || wantq && *ldq < max(1,*n)) { *info = -6; } else if (*ifst < 1 || *ifst > *n) { *info = -7; } else if (*ilst < 1 || *ilst > *n) { *info = -8; } if (*info != 0) { i__1 = -(*info); xerbla_("CTREXC", &i__1); return 0; } /* Quick return if possible */ if (*n == 1 || *ifst == *ilst) { return 0; } if (*ifst < *ilst) { /* Move the IFST-th diagonal element forward down the diagonal. */ m1 = 0; m2 = -1; m3 = 1; } else { /* Move the IFST-th diagonal element backward up the diagonal. */ m1 = -1; m2 = 0; m3 = -1; } i__1 = *ilst + m2; i__2 = m3; for (k = *ifst + m1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { /* Interchange the k-th and (k+1)-th diagonal elements. */ i__3 = t_subscr(k, k); t11.r = t[i__3].r, t11.i = t[i__3].i; i__3 = t_subscr(k + 1, k + 1); t22.r = t[i__3].r, t22.i = t[i__3].i; /* Determine the transformation to perform the interchange. */ q__1.r = t22.r - t11.r, q__1.i = t22.i - t11.i; clartg_(&t_ref(k, k + 1), &q__1, &cs, &sn, &temp); /* Apply transformation to the matrix T. */ if (k + 2 <= *n) { i__3 = *n - k - 1; crot_(&i__3, &t_ref(k, k + 2), ldt, &t_ref(k + 1, k + 2), ldt, & cs, &sn); } i__3 = k - 1; r_cnjg(&q__1, &sn); crot_(&i__3, &t_ref(1, k), &c__1, &t_ref(1, k + 1), &c__1, &cs, &q__1) ; i__3 = t_subscr(k, k); t[i__3].r = t22.r, t[i__3].i = t22.i; i__3 = t_subscr(k + 1, k + 1); t[i__3].r = t11.r, t[i__3].i = t11.i; if (wantq) { /* Accumulate transformation in the matrix Q. */ r_cnjg(&q__1, &sn); crot_(n, &q_ref(1, k), &c__1, &q_ref(1, k + 1), &c__1, &cs, &q__1) ; } /* L10: */ } return 0; /* End of CTREXC */ } /* ctrexc_ */
extern "C" magma_int_t magma_zunmqr_gpu( magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, magmaDoubleComplex_ptr dA, size_t dA_offset, magma_int_t ldda, magmaDoubleComplex *tau, magmaDoubleComplex_ptr dC, size_t dC_offset, magma_int_t lddc, magmaDoubleComplex *hwork, magma_int_t lwork, magmaDoubleComplex_ptr dT, size_t dT_offset, magma_int_t nb, magma_queue_t queue, magma_int_t *info) { /* -- clMAGMA (version 1.3.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver @date November 2014 Purpose ======= ZUNMQR_GPU overwrites the general complex M-by-N matrix C with SIDE = 'L' SIDE = 'R' TRANS = 'N': Q * C C * Q TRANS = 'T': Q**H * C C * Q**H where Q is a complex orthogonal matrix defined as the product of k elementary reflectors Q = H(1) H(2) . . . H(k) as returned by ZGEQRF. Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. Arguments ========= SIDE (input) CHARACTER*1 = 'L': apply Q or Q**H from the Left; = 'R': apply Q or Q**H from the Right. TRANS (input) CHARACTER*1 = 'N': No transpose, apply Q; = 'T': Transpose, apply Q**H. M (input) INTEGER The number of rows of the matrix C. M >= 0. N (input) INTEGER The number of columns of the matrix C. N >= 0. K (input) INTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = 'L', M >= K >= 0; if SIDE = 'R', N >= K >= 0. DA (input) COMPLEX_16 array on the GPU, dimension (LDDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by ZGEQRF in the first k columns of its array argument DA. DA is modified by the routine but restored on exit. LDDA (input) INTEGER The leading dimension of the array DA. If SIDE = 'L', LDDA >= max(1,M); if SIDE = 'R', LDDA >= max(1,N). TAU (input) COMPLEX_16 array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by ZGEQRF. DC (input/output) COMPLEX_16 array on the GPU, dimension (LDDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q**H * C or C * Q**H or C*Q. LDDC (input) INTEGER The leading dimension of the array DC. LDDC >= max(1,M). HWORK (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, HWORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array HWORK. LWORK >= (M-K+NB)*(N+2*NB) if SIDE = 'L', and LWORK >= (N-K+NB)*(M+2*NB) if SIDE = 'R', where NB is the optimal blocksize. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the HWORK array, returns this value as the first entry of the HWORK array, and no error message related to LWORK is issued by XERBLA. DT (input) COMPLEX_16 array on the GPU that is the output (the 9th argument) of magma_zgeqrf_gpu. NB (input) INTEGER This is the blocking size that was used in pre-computing DT, e.g., the blocking size used in magma_zgeqrf_gpu. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== */ #define a_ref(a_1,a_2) dA, (dA_offset+(a_1)+(a_2)*(ldda)) #define c_ref(a_1,a_2) dC, (dC_offset+(a_1)+(a_2)*(lddc)) #define t_ref(a_1) dT, (dT_offset+(a_1)*nb) magmaDoubleComplex c_one = MAGMA_Z_ONE; magmaDoubleComplex_ptr dwork; magma_int_t i, lddwork; magma_int_t i1, i2, i3, ib, ic, jc, mi, ni, nq, nw, ret; int left, notran, lquery; magma_int_t lwkopt; *info = 0; left = (side == MagmaLeft); notran = (trans == MagmaNoTrans); lquery = (lwork == -1); if (!left || notran) printf("zunmqr_gpu called with arguments not yet supported\n"); /* NQ is the order of Q and NW is the minimum dimension of WORK */ if (left) { nq = m; nw = n; } else { nq = n; nw = m; } if ( (!left) && (side != MagmaRight) ) { *info = -1; } else if ( (!notran) && (trans != MagmaConjTrans) ) { *info = -2; } else if (m < 0) { *info = -3; } else if (n < 0) { *info = -4; } else if (k < 0 || k > nq) { *info = -5; } else if (ldda < max(1,nq)) { *info = -7; } else if (lddc < max(1,m)) { *info = -10; } else if (lwork < max(1,nw) && ! lquery) { *info = -12; } lwkopt = (m-k+nb)*(n+2*nb); hwork[0] = MAGMA_Z_MAKE( lwkopt, 0 ); if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) { return *info; } /* Quick return if possible */ if (m == 0 || n == 0 || k == 0) { hwork[0] = c_one; return *info; } lddwork= k; dwork = dT; size_t dwork_offset = 2*lddwork*nb; if ( (left && (! notran)) || ( (!left) && notran ) ) { i1 = 0; i2 = k-nb; i3 = nb; } else { i1 = (k - 1 - nb) / nb * nb; i2 = 0; i3 = -nb; } if (left) { ni = n; jc = 0; } else { mi = m; ic = 0; } if (nb < k) { for (i=i1; i3<0 ? i>i2 : i<i2; i+=i3) { ib = min(nb, k - i); if (left){ mi = m - i; ic = i; } else { ni = n - i; jc = i; } ret = magma_zlarfb_gpu( MagmaLeft, MagmaConjTrans, MagmaForward, MagmaColumnwise, mi, ni, ib, a_ref(i, i ), ldda, t_ref(i), nb, c_ref(ic, jc), lddc, dwork, dwork_offset, nw, queue); if ( ret != MAGMA_SUCCESS ) return ret; } } else { i = i1; } /* Use unblocked code to multiply the last or only block. */ if (i < k) { ib = k-i; if (left){ mi = m - i; ic = i; } else { ni = n - i; jc = i; } magma_zgetmatrix(mi, ib, a_ref(i, i), ldda, hwork, mi, queue); magma_zgetmatrix(mi, ni, c_ref(ic, jc), lddc, hwork+mi*ib, mi, queue); magma_int_t lhwork = lwork - mi*(ib + ni); lapackf77_zunmqr( MagmaLeftStr, MagmaConjTransStr, &mi, &ni, &ib, hwork, &mi, tau+i, hwork+mi*ib, &mi, hwork+mi*(ib+ni), &lhwork, info); // send the updated part of c back to the GPU magma_zsetmatrix(mi, ni, hwork+mi*ib, mi, c_ref(ic, jc), lddc, queue); } return *info; /* End of MAGMA_ZUNMQR_GPU */ }
extern "C" magma_int_t magma_dorgqr( magma_int_t m, magma_int_t n, magma_int_t k, double *a, magma_int_t lda, double *tau, magmaDouble_ptr dT, size_t dT_offset, magma_int_t nb, magma_queue_t queue, magma_int_t *info ) { /* -- clMAGMA (version 1.3.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver @date November 2014 Purpose ======= DORGQR generates an M-by-N DOUBLE_PRECISION matrix Q with orthonormal columns, which is defined as the first N columns of a product of K elementary reflectors of order M Q = H(1) H(2) . . . H(k) as returned by DGEQRF. Arguments ========= M (input) INTEGER The number of rows of the matrix Q. M >= 0. N (input) INTEGER The number of columns of the matrix Q. M >= N >= 0. K (input) INTEGER The number of elementary reflectors whose product defines the matrix Q. N >= K >= 0. A (input/output) DOUBLE_PRECISION array A, dimension (LDDA,N). On entry, the i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by DGEQRF_GPU in the first k columns of its array argument A. On exit, the M-by-N matrix Q. LDA (input) INTEGER The first dimension of the array A. LDA >= max(1,M). TAU (input) DOUBLE_PRECISION array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by DGEQRF_GPU. DT (input) DOUBLE_PRECISION array on the GPU device. DT contains the T matrices used in blocking the elementary reflectors H(i), e.g., this can be the 6th argument of magma_dgeqrf_gpu. NB (input) INTEGER This is the block size used in DGEQRF_GPU, and correspondingly the size of the T matrices, used in the factorization, and stored in DT. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument has an illegal value ===================================================================== */ #define a_ref(i,j) ( a + (j)*lda + (i)) #define da_ref(i,j) da, (da_offset + (j)*ldda + (i)) #define t_ref(a_1) dT, (dT_offset + (a_1)*nb) double c_zero = MAGMA_D_ZERO; magma_int_t i__1, i__2, i__3; magma_int_t lwork, ldda; magma_int_t i, ib, ki, kk, iinfo; magma_int_t lddwork = min(m, n); double *work; magmaDouble_ptr da, dwork; size_t da_offset, dwork_offset; magma_event_t event = NULL; *info = 0; if (m < 0) { *info = -1; } else if ((n < 0) || (n > m)) { *info = -2; } else if ((k < 0) || (k > n)) { *info = -3; } else if (lda < max(1,m)) { *info = -5; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } if (n <= 0) return *info; /* Allocate GPU work space */ ldda = ((m+31)/32)*32; lddwork = ((lddwork+31)/32)*32; if (MAGMA_SUCCESS != magma_dmalloc( &da, ((n)*ldda + nb*lddwork ) )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } da_offset = 0; dwork = da; dwork_offset = da_offset + (n)*ldda; /* Allocate CPU work space */ lwork = n * nb; magma_dmalloc_cpu( &work, lwork ); if( work == NULL ) { magma_free( da ); *info = MAGMA_ERR_HOST_ALLOC; return *info; } if ( (nb > 1) && (nb < k) ) { /* Use blocked code after the last block. The first kk columns are handled by the block method. */ ki = (k - nb - 1) / nb * nb; kk = min(k, ki + nb); /* Set A(1:kk,kk+1:n) to zero. */ magmablas_dlaset(MagmaFull, kk, n-kk, c_zero, c_zero, da_ref(0,kk), ldda, queue); } else kk = 0; /* Use unblocked code for the last or only block. */ if (kk < n) { i__1 = m - kk; i__2 = n - kk; i__3 = k - kk; lapackf77_dorgqr(&i__1, &i__2, &i__3, a_ref(kk, kk), &lda, &tau[kk], work, &lwork, &iinfo); magma_dsetmatrix(i__1, i__2, a_ref(kk, kk), lda, da_ref(kk, kk), ldda, queue); } if (kk > 0) { /* Use blocked code */ for (i = ki; i >= 0; i-=nb) { ib = min(nb, k - i); /* Send the current panel to the GPU */ i__2 = m - i; dpanel_to_q(MagmaUpper, ib, a_ref(i,i), lda, work); magma_dsetmatrix(i__2, ib, a_ref(i, i), lda, da_ref(i, i), ldda, queue); if (i + ib < n) { /* Apply H to A(i:m,i+ib:n) from the left */ i__3 = n - i - ib; magma_dlarfb_gpu( MagmaLeft, MagmaNoTrans, MagmaForward, MagmaColumnwise, i__2, i__3, ib, da_ref(i, i ), ldda, t_ref(i), nb, da_ref(i, i+ib), ldda, dwork, dwork_offset, lddwork, queue); } /* Apply H to rows i:m of current block on the CPU */ lapackf77_dorgqr(&i__2, &ib, &ib, a_ref(i, i), &lda, &tau[i], work, &lwork, &iinfo); magma_dsetmatrix_async( i__2, ib, a_ref(i,i), lda, da_ref(i,i), ldda, queue, &event ); /* Set rows 1:i-1 of current block to zero */ i__2 = i + ib; magmablas_dlaset(MagmaFull, i, i__2 - i, c_zero, c_zero, da_ref(0,i), ldda, queue); } } magma_dgetmatrix(m, n, da_ref(0, 0), ldda, a_ref(0, 0), lda, queue); //cudaStreamDestroy(stream); magma_free( da ); magma_free_cpu(work); return *info; } /* magma_dorgqr */
extern "C" magma_int_t magma_cgeqrf_gpu( magma_int_t m, magma_int_t n, magmaFloatComplex *dA, magma_int_t ldda, magmaFloatComplex *tau, magmaFloatComplex *dT, magma_int_t *info ) { /* -- MAGMA (version 1.4.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver August 2013 Purpose ======= CGEQRF computes a QR factorization of a complex M-by-N matrix A: A = Q * R. This version stores the triangular dT matrices used in the block QR factorization so that they can be applied directly (i.e., without being recomputed) later. As a result, the application of Q is much faster. Also, the upper triangular matrices for V have 0s in them. The corresponding parts of the upper triangular R are inverted and stored separately in dT. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. dA (input/output) COMPLEX array on the GPU, dimension (LDDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details). LDDA (input) INTEGER The leading dimension of the array dA. LDDA >= max(1,M). To benefit from coalescent memory accesses LDDA must be dividable by 16. TAU (output) COMPLEX array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details). dT (workspace/output) COMPLEX array on the GPU, dimension (2*MIN(M, N) + (N+31)/32*32 )*NB, where NB can be obtained through magma_get_cgeqrf_nb(M). It starts with MIN(M,N)*NB block that store the triangular T matrices, followed by the MIN(M,N)*NB block of the diagonal inverses for the R matrix. The rest of the array is used as workspace. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value or another error occured, such as memory allocation failed. Further Details =============== The matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(k), where k = min(m,n). Each H(i) has the form H(i) = I - tau * v * v' where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i). ===================================================================== */ #define a_ref(a_1,a_2) (dA+(a_2)*(ldda) + (a_1)) #define t_ref(a_1) (dT+(a_1)*nb) #define d_ref(a_1) (dT+(minmn+(a_1))*nb) #define dd_ref(a_1) (dT+(2*minmn+(a_1))*nb) #define work_ref(a_1) ( work + (a_1)) #define hwork ( work + (nb)*(m)) magma_int_t i, k, minmn, old_i, old_ib, rows, cols; magma_int_t ib, nb; magma_int_t ldwork, lddwork, lwork, lhwork; magmaFloatComplex *work, *ut; /* check arguments */ *info = 0; if (m < 0) { *info = -1; } else if (n < 0) { *info = -2; } else if (ldda < max(1,m)) { *info = -4; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } k = minmn = min(m,n); if (k == 0) return *info; nb = magma_get_cgeqrf_nb(m); lwork = (m + n + nb)*nb; lhwork = lwork - m*nb; if (MAGMA_SUCCESS != magma_cmalloc_pinned( &work, lwork )) { *info = MAGMA_ERR_HOST_ALLOC; return *info; } ut = hwork+nb*(n); memset( ut, 0, nb*nb*sizeof(magmaFloatComplex)); magma_queue_t stream[2]; magma_queue_create( &stream[0] ); magma_queue_create( &stream[1] ); ldwork = m; lddwork= n; if ( (nb > 1) && (nb < k) ) { /* Use blocked code initially */ old_i = 0; old_ib = nb; for (i = 0; i < k-nb; i += nb) { ib = min(k-i, nb); rows = m -i; magma_cgetmatrix_async( rows, ib, a_ref(i,i), ldda, work_ref(i), ldwork, stream[1] ); if (i>0){ /* Apply H' to A(i:m,i+2*ib:n) from the left */ cols = n-old_i-2*old_ib; magma_clarfb_gpu( MagmaLeft, MagmaConjTrans, MagmaForward, MagmaColumnwise, m-old_i, cols, old_ib, a_ref(old_i, old_i ), ldda, t_ref(old_i), nb, a_ref(old_i, old_i+2*old_ib), ldda, dd_ref(0), lddwork); /* store the diagonal */ magma_csetmatrix_async( old_ib, old_ib, ut, old_ib, d_ref(old_i), old_ib, stream[0] ); } magma_queue_sync( stream[1] ); lapackf77_cgeqrf(&rows, &ib, work_ref(i), &ldwork, tau+i, hwork, &lhwork, info); /* Form the triangular factor of the block reflector H = H(i) H(i+1) . . . H(i+ib-1) */ lapackf77_clarft( MagmaForwardStr, MagmaColumnwiseStr, &rows, &ib, work_ref(i), &ldwork, tau+i, hwork, &ib); /* Put 0s in the upper triangular part of a panel (and 1s on the diagonal); copy the upper triangular in ut and invert it. */ magma_queue_sync( stream[0] ); csplit_diag_block(ib, work_ref(i), ldwork, ut); magma_csetmatrix( rows, ib, work_ref(i), ldwork, a_ref(i,i), ldda ); if (i + ib < n) { /* Send the triangular factor T to the GPU */ magma_csetmatrix( ib, ib, hwork, ib, t_ref(i), nb ); if (i+nb < k-nb){ /* Apply H' to A(i:m,i+ib:i+2*ib) from the left */ magma_clarfb_gpu( MagmaLeft, MagmaConjTrans, MagmaForward, MagmaColumnwise, rows, ib, ib, a_ref(i, i ), ldda, t_ref(i), nb, a_ref(i, i+ib), ldda, dd_ref(0), lddwork); } else { cols = n-i-ib; magma_clarfb_gpu( MagmaLeft, MagmaConjTrans, MagmaForward, MagmaColumnwise, rows, cols, ib, a_ref(i, i ), ldda, t_ref(i), nb, a_ref(i, i+ib), ldda, dd_ref(0), lddwork); /* Fix the diagonal block */ magma_csetmatrix( ib, ib, ut, ib, d_ref(i), ib ); } old_i = i; old_ib = ib; } } } else { i = 0; } /* Use unblocked code to factor the last or only block. */ if (i < k) { ib = n-i; rows = m-i; magma_cgetmatrix( rows, ib, a_ref(i, i), ldda, work, rows ); lhwork = lwork - rows*ib; lapackf77_cgeqrf(&rows, &ib, work, &rows, tau+i, work+ib*rows, &lhwork, info); magma_csetmatrix( rows, ib, work, rows, a_ref(i, i), ldda ); } magma_queue_destroy( stream[0] ); magma_queue_destroy( stream[1] ); magma_free_pinned( work ); return *info; /* End of MAGMA_CGEQRF */ } /* magma_cgeqrf */
/* Subroutine */ int ddrges_(integer *nsizes, integer *nn, integer *ntypes, logical *dotype, integer *iseed, doublereal *thresh, integer *nounit, doublereal *a, integer *lda, doublereal *b, doublereal *s, doublereal *t, doublereal *q, integer *ldq, doublereal *z__, doublereal *alphar, doublereal *alphai, doublereal *beta, doublereal *work, integer * lwork, doublereal *result, logical *bwork, integer *info) { /* Initialized data */ static integer kclass[26] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2, 2,2,2,3 }; static integer kbmagn[26] = { 1,1,1,1,1,1,1,1,3,2,3,2,2,3,1,1,1,1,1,1,1,3, 2,3,2,1 }; static integer ktrian[26] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1, 1,1,1,1 }; static integer iasign[26] = { 0,0,0,0,0,0,2,0,2,2,0,0,2,2,2,0,2,0,0,0,2,2, 2,2,2,0 }; static integer ibsign[26] = { 0,0,0,0,0,0,0,2,0,0,2,2,0,0,2,0,2,0,0,0,0,0, 0,0,0,0 }; static integer kz1[6] = { 0,1,2,1,3,3 }; static integer kz2[6] = { 0,0,1,2,1,1 }; static integer kadd[6] = { 0,0,0,0,3,2 }; static integer katype[26] = { 0,1,0,1,2,3,4,1,4,4,1,1,4,4,4,2,4,5,8,7,9,4, 4,4,4,0 }; static integer kbtype[26] = { 0,0,1,1,2,-3,1,4,1,1,4,4,1,1,-4,2,-4,8,8,8, 8,8,8,8,8,0 }; static integer kazero[26] = { 1,1,1,1,1,1,2,1,2,2,1,1,2,2,3,1,3,5,5,5,5,3, 3,3,3,1 }; static integer kbzero[26] = { 1,1,1,1,1,1,1,2,1,1,2,2,1,1,4,1,4,6,6,6,6,4, 4,4,4,1 }; static integer kamagn[26] = { 1,1,1,1,1,1,1,1,2,3,2,3,2,3,1,1,1,1,1,1,1,2, 3,3,2,1 }; /* Format strings */ static char fmt_9999[] = "(\002 DDRGES: \002,a,\002 returned INFO=\002,i" "6,\002.\002,/9x,\002N=\002,i6,\002, JTYPE=\002,i6,\002, ISEED=" "(\002,4(i4,\002,\002),i5,\002)\002)"; static char fmt_9998[] = "(\002 DDRGES: DGET53 returned INFO=\002,i1," "\002 for eigenvalue \002,i6,\002.\002,/9x,\002N=\002,i6,\002, JT" "YPE=\002,i6,\002, ISEED=(\002,4(i4,\002,\002),i5,\002)\002)"; static char fmt_9997[] = "(\002 DDRGES: S not in Schur form at eigenvalu" "e \002,i6,\002.\002,/9x,\002N=\002,i6,\002, JTYPE=\002,i6,\002, " "ISEED=(\002,3(i5,\002,\002),i5,\002)\002)"; static char fmt_9996[] = "(/1x,a3,\002 -- Real Generalized Schur form dr" "iver\002)"; static char fmt_9995[] = "(\002 Matrix types (see DDRGES for details):" " \002)"; static char fmt_9994[] = "(\002 Special Matrices:\002,23x,\002(J'=transp" "osed Jordan block)\002,/\002 1=(0,0) 2=(I,0) 3=(0,I) 4=(I,I" ") 5=(J',J') \002,\0026=(diag(J',I), diag(I,J'))\002,/\002 Diag" "onal Matrices: ( \002,\002D=diag(0,1,2,...) )\002,/\002 7=(D," "I) 9=(large*D, small*I\002,\002) 11=(large*I, small*D) 13=(l" "arge*D, large*I)\002,/\002 8=(I,D) 10=(small*D, large*I) 12=" "(small*I, large*D) \002,\002 14=(small*D, small*I)\002,/\002 15" "=(D, reversed D)\002)"; static char fmt_9993[] = "(\002 Matrices Rotated by Random \002,a,\002 M" "atrices U, V:\002,/\002 16=Transposed Jordan Blocks " " 19=geometric \002,\002alpha, beta=0,1\002,/\002 17=arithm. alp" "ha&beta \002,\002 20=arithmetic alpha, beta=0," "1\002,/\002 18=clustered \002,\002alpha, beta=0,1 21" "=random alpha, beta=0,1\002,/\002 Large & Small Matrices:\002," "/\002 22=(large, small) \002,\00223=(small,large) 24=(smal" "l,small) 25=(large,large)\002,/\002 26=random O(1) matrices" ".\002)"; static char fmt_9992[] = "(/\002 Tests performed: (S is Schur, T is tri" "angular, \002,\002Q and Z are \002,a,\002,\002,/19x,\002l and r " "are the appropriate left and right\002,/19x,\002eigenvectors, re" "sp., a is alpha, b is beta, and\002,/19x,a,\002 means \002,a," "\002.)\002,/\002 Without ordering: \002,/\002 1 = | A - Q S " "Z\002,a,\002 | / ( |A| n ulp ) 2 = | B - Q T Z\002,a,\002 |" " / ( |B| n ulp )\002,/\002 3 = | I - QQ\002,a,\002 | / ( n ulp " ") 4 = | I - ZZ\002,a,\002 | / ( n ulp )\002,/\002 5" " = A is in Schur form S\002,/\002 6 = difference between (alpha" ",beta)\002,\002 and diagonals of (S,T)\002,/\002 With ordering:" " \002,/\002 7 = | (A,B) - Q (S,T) Z\002,a,\002 | / ( |(A,B)| n " "ulp ) \002,/\002 8 = | I - QQ\002,a,\002 | / ( n ulp ) " " 9 = | I - ZZ\002,a,\002 | / ( n ulp )\002,/\002 10 = A is in" " Schur form S\002,/\002 11 = difference between (alpha,beta) and" " diagonals\002,\002 of (S,T)\002,/\002 12 = SDIM is the correct " "number of \002,\002selected eigenvalues\002,/)"; static char fmt_9991[] = "(\002 Matrix order=\002,i5,\002, type=\002,i2" ",\002, seed=\002,4(i4,\002,\002),\002 result \002,i2,\002 is\002" ",0p,f8.2)"; static char fmt_9990[] = "(\002 Matrix order=\002,i5,\002, type=\002,i2" ",\002, seed=\002,4(i4,\002,\002),\002 result \002,i2,\002 is\002" ",1p,d10.3)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, s_dim1, s_offset, t_dim1, t_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4; doublereal d__1, d__2, d__3, d__4, d__5, d__6, d__7, d__8, d__9, d__10; /* Builtin functions */ double d_sign(doublereal *, doublereal *); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ static integer iadd, sdim, ierr, nmax, rsub; static char sort[1]; static doublereal temp1, temp2; static integer i__, j, n; static logical badnn; extern /* Subroutine */ int dget51_(integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *), dget53_( doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *), dget54_( integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *), dgges_(char *, char *, char *, L_fp, integer *, doublereal *, integer *, doublereal *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, logical *, integer *); static integer iinfo; static doublereal rmagn[4]; static integer nmats, jsize, nerrs, i1, jtype, ntest, n1, isort; extern /* Subroutine */ int dlatm4_(integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *), dorm2r_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), dlabad_(doublereal *, doublereal *); static logical ilabad; static integer jc, nb, in; extern doublereal dlamch_(char *); static integer jr; extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *); extern doublereal dlarnd_(integer *, integer *); extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); static doublereal safmin; static integer ioldsd[4]; static doublereal safmax; static integer knteig; extern logical dlctes_(doublereal *, doublereal *, doublereal *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern /* Subroutine */ int alasvm_(char *, integer *, integer *, integer *, integer *), dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); static integer minwrk, maxwrk; static doublereal ulpinv; static integer mtypes, ntestt; static doublereal ulp; /* Fortran I/O blocks */ static cilist io___40 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___46 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___52 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___53 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___55 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___56 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___57 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___58 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___59 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___60 = { 0, 0, 0, fmt_9991, 0 }; static cilist io___61 = { 0, 0, 0, fmt_9990, 0 }; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] #define s_ref(a_1,a_2) s[(a_2)*s_dim1 + a_1] #define t_ref(a_1,a_2) t[(a_2)*t_dim1 + a_1] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= DDRGES checks the nonsymmetric generalized eigenvalue (Schur form) problem driver DGGES. DGGES factors A and B as Q S Z' and Q T Z' , where ' means transpose, T is upper triangular, S is in generalized Schur form (block upper triangular, with 1x1 and 2x2 blocks on the diagonal, the 2x2 blocks corresponding to complex conjugate pairs of generalized eigenvalues), and Q and Z are orthogonal. It also computes the generalized eigenvalues (alpha(j),beta(j)), j=1,...,n, Thus, w(j) = alpha(j)/beta(j) is a root of the characteristic equation det( A - w(j) B ) = 0 Optionally it also reorder the eigenvalues so that a selected cluster of eigenvalues appears in the leading diagonal block of the Schur forms. When DDRGES is called, a number of matrix "sizes" ("N's") and a number of matrix "TYPES" are specified. For each size ("N") and each TYPE of matrix, a pair of matrices (A, B) will be generated and used for testing. For each matrix pair, the following 13 tests will be performed and compared with the threshhold THRESH except the tests (5), (11) and (13). (1) | A - Q S Z' | / ( |A| n ulp ) (no sorting of eigenvalues) (2) | B - Q T Z' | / ( |B| n ulp ) (no sorting of eigenvalues) (3) | I - QQ' | / ( n ulp ) (no sorting of eigenvalues) (4) | I - ZZ' | / ( n ulp ) (no sorting of eigenvalues) (5) if A is in Schur form (i.e. quasi-triangular form) (no sorting of eigenvalues) (6) if eigenvalues = diagonal blocks of the Schur form (S, T), i.e., test the maximum over j of D(j) where: if alpha(j) is real: |alpha(j) - S(j,j)| |beta(j) - T(j,j)| D(j) = ------------------------ + ----------------------- max(|alpha(j)|,|S(j,j)|) max(|beta(j)|,|T(j,j)|) if alpha(j) is complex: | det( s S - w T ) | D(j) = --------------------------------------------------- ulp max( s norm(S), |w| norm(T) )*norm( s S - w T ) and S and T are here the 2 x 2 diagonal blocks of S and T corresponding to the j-th and j+1-th eigenvalues. (no sorting of eigenvalues) (7) | (A,B) - Q (S,T) Z' | / ( | (A,B) | n ulp ) (with sorting of eigenvalues). (8) | I - QQ' | / ( n ulp ) (with sorting of eigenvalues). (9) | I - ZZ' | / ( n ulp ) (with sorting of eigenvalues). (10) if A is in Schur form (i.e. quasi-triangular form) (with sorting of eigenvalues). (11) if eigenvalues = diagonal blocks of the Schur form (S, T), i.e. test the maximum over j of D(j) where: if alpha(j) is real: |alpha(j) - S(j,j)| |beta(j) - T(j,j)| D(j) = ------------------------ + ----------------------- max(|alpha(j)|,|S(j,j)|) max(|beta(j)|,|T(j,j)|) if alpha(j) is complex: | det( s S - w T ) | D(j) = --------------------------------------------------- ulp max( s norm(S), |w| norm(T) )*norm( s S - w T ) and S and T are here the 2 x 2 diagonal blocks of S and T corresponding to the j-th and j+1-th eigenvalues. (with sorting of eigenvalues). (12) if sorting worked and SDIM is the number of eigenvalues which were SELECTed. Test Matrices ============= The sizes of the test matrices are specified by an array NN(1:NSIZES); the value of each element NN(j) specifies one size. The "types" are specified by a logical array DOTYPE( 1:NTYPES ); if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. Currently, the list of possible types is: (1) ( 0, 0 ) (a pair of zero matrices) (2) ( I, 0 ) (an identity and a zero matrix) (3) ( 0, I ) (an identity and a zero matrix) (4) ( I, I ) (a pair of identity matrices) t t (5) ( J , J ) (a pair of transposed Jordan blocks) t ( I 0 ) (6) ( X, Y ) where X = ( J 0 ) and Y = ( t ) ( 0 I ) ( 0 J ) and I is a k x k identity and J a (k+1)x(k+1) Jordan block; k=(N-1)/2 (7) ( D, I ) where D is diag( 0, 1,..., N-1 ) (a diagonal matrix with those diagonal entries.) (8) ( I, D ) (9) ( big*D, small*I ) where "big" is near overflow and small=1/big (10) ( small*D, big*I ) (11) ( big*I, small*D ) (12) ( small*I, big*D ) (13) ( big*D, big*I ) (14) ( small*D, small*I ) (15) ( D1, D2 ) where D1 is diag( 0, 0, 1, ..., N-3, 0 ) and D2 is diag( 0, N-3, N-4,..., 1, 0, 0 ) t t (16) Q ( J , J ) Z where Q and Z are random orthogonal matrices. (17) Q ( T1, T2 ) Z where T1 and T2 are upper triangular matrices with random O(1) entries above the diagonal and diagonal entries diag(T1) = ( 0, 0, 1, ..., N-3, 0 ) and diag(T2) = ( 0, N-3, N-4,..., 1, 0, 0 ) (18) Q ( T1, T2 ) Z diag(T1) = ( 0, 0, 1, 1, s, ..., s, 0 ) diag(T2) = ( 0, 1, 0, 1,..., 1, 0 ) s = machine precision. (19) Q ( T1, T2 ) Z diag(T1)=( 0,0,1,1, 1-d, ..., 1-(N-5)*d=s, 0 ) diag(T2) = ( 0, 1, 0, 1, ..., 1, 0 ) N-5 (20) Q ( T1, T2 ) Z diag(T1)=( 0, 0, 1, 1, a, ..., a =s, 0 ) diag(T2) = ( 0, 1, 0, 1, ..., 1, 0, 0 ) (21) Q ( T1, T2 ) Z diag(T1)=( 0, 0, 1, r1, r2, ..., r(N-4), 0 ) diag(T2) = ( 0, 1, 0, 1, ..., 1, 0, 0 ) where r1,..., r(N-4) are random. (22) Q ( big*T1, small*T2 ) Z diag(T1) = ( 0, 0, 1, ..., N-3, 0 ) diag(T2) = ( 0, 1, ..., 1, 0, 0 ) (23) Q ( small*T1, big*T2 ) Z diag(T1) = ( 0, 0, 1, ..., N-3, 0 ) diag(T2) = ( 0, 1, ..., 1, 0, 0 ) (24) Q ( small*T1, small*T2 ) Z diag(T1) = ( 0, 0, 1, ..., N-3, 0 ) diag(T2) = ( 0, 1, ..., 1, 0, 0 ) (25) Q ( big*T1, big*T2 ) Z diag(T1) = ( 0, 0, 1, ..., N-3, 0 ) diag(T2) = ( 0, 1, ..., 1, 0, 0 ) (26) Q ( T1, T2 ) Z where T1 and T2 are random upper-triangular matrices. Arguments ========= NSIZES (input) INTEGER The number of sizes of matrices to use. If it is zero, DDRGES does nothing. NSIZES >= 0. NN (input) INTEGER array, dimension (NSIZES) An array containing the sizes to be used for the matrices. Zero values will be skipped. NN >= 0. NTYPES (input) INTEGER The number of elements in DOTYPE. If it is zero, DDRGES does nothing. It must be at least zero. If it is MAXTYP+1 and NSIZES is 1, then an additional type, MAXTYP+1 is defined, which is to use whatever matrix is in A on input. This is only useful if DOTYPE(1:MAXTYP) is .FALSE. and DOTYPE(MAXTYP+1) is .TRUE. . DOTYPE (input) LOGICAL array, dimension (NTYPES) If DOTYPE(j) is .TRUE., then for each size in NN a matrix of that size and of type j will be generated. If NTYPES is smaller than the maximum number of types defined (PARAMETER MAXTYP), then types NTYPES+1 through MAXTYP will not be generated. If NTYPES is larger than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) will be ignored. ISEED (input/output) INTEGER array, dimension (4) On entry ISEED specifies the seed of the random number generator. The array elements should be between 0 and 4095; if not they will be reduced mod 4096. Also, ISEED(4) must be odd. The random number generator uses a linear congruential sequence limited to small integers, and so should produce machine independent random numbers. The values of ISEED are changed on exit, and can be used in the next call to DDRGES to continue the same random number sequence. THRESH (input) DOUBLE PRECISION A test will count as "failed" if the "error", computed as described above, exceeds THRESH. Note that the error is scaled to be O(1), so THRESH should be a reasonably small multiple of 1, e.g., 10 or 100. In particular, it should not depend on the precision (single vs. double) or the size of the matrix. THRESH >= 0. NOUNIT (input) INTEGER The FORTRAN unit number for printing out error messages (e.g., if a routine returns IINFO not equal to 0.) A (input/workspace) DOUBLE PRECISION array, dimension(LDA, max(NN)) Used to hold the original A matrix. Used as input only if NTYPES=MAXTYP+1, DOTYPE(1:MAXTYP)=.FALSE., and DOTYPE(MAXTYP+1)=.TRUE. LDA (input) INTEGER The leading dimension of A, B, S, and T. It must be at least 1 and at least max( NN ). B (input/workspace) DOUBLE PRECISION array, dimension(LDA, max(NN)) Used to hold the original B matrix. Used as input only if NTYPES=MAXTYP+1, DOTYPE(1:MAXTYP)=.FALSE., and DOTYPE(MAXTYP+1)=.TRUE. S (workspace) DOUBLE PRECISION array, dimension (LDA, max(NN)) The Schur form matrix computed from A by DGGES. On exit, S contains the Schur form matrix corresponding to the matrix in A. T (workspace) DOUBLE PRECISION array, dimension (LDA, max(NN)) The upper triangular matrix computed from B by DGGES. Q (workspace) DOUBLE PRECISION array, dimension (LDQ, max(NN)) The (left) orthogonal matrix computed by DGGES. LDQ (input) INTEGER The leading dimension of Q and Z. It must be at least 1 and at least max( NN ). Z (workspace) DOUBLE PRECISION array, dimension( LDQ, max(NN) ) The (right) orthogonal matrix computed by DGGES. ALPHAR (workspace) DOUBLE PRECISION array, dimension (max(NN)) ALPHAI (workspace) DOUBLE PRECISION array, dimension (max(NN)) BETA (workspace) DOUBLE PRECISION array, dimension (max(NN)) The generalized eigenvalues of (A,B) computed by DGGES. ( ALPHAR(k)+ALPHAI(k)*i ) / BETA(k) is the k-th generalized eigenvalue of A and B. WORK (workspace) DOUBLE PRECISION array, dimension (LWORK) LWORK (input) INTEGER The dimension of the array WORK. LWORK >= MAX( 10*(N+1), 3*N*N ), where N is the largest matrix dimension. RESULT (output) DOUBLE PRECISION array, dimension (15) The values computed by the tests described above. The values are currently limited to 1/ulp, to avoid overflow. BWORK (workspace) LOGICAL array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. > 0: A routine returned an error code. INFO is the absolute value of the INFO value returned. ===================================================================== Parameter adjustments */ --nn; --dotype; --iseed; t_dim1 = *lda; t_offset = 1 + t_dim1 * 1; t -= t_offset; s_dim1 = *lda; s_offset = 1 + s_dim1 * 1; s -= s_offset; b_dim1 = *lda; b_offset = 1 + b_dim1 * 1; b -= b_offset; a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; z_dim1 = *ldq; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; --alphar; --alphai; --beta; --work; --result; --bwork; /* Function Body Check for errors */ *info = 0; badnn = FALSE_; nmax = 1; i__1 = *nsizes; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = nmax, i__3 = nn[j]; nmax = max(i__2,i__3); if (nn[j] < 0) { badnn = TRUE_; } /* L10: */ } if (*nsizes < 0) { *info = -1; } else if (badnn) { *info = -2; } else if (*ntypes < 0) { *info = -3; } else if (*thresh < 0.) { *info = -6; } else if (*lda <= 1 || *lda < nmax) { *info = -9; } else if (*ldq <= 1 || *ldq < nmax) { *info = -14; } /* Compute workspace (Note: Comments in the code beginning "Workspace:" describe the minimal amount of workspace needed at that point in the code, as well as the preferred amount for good performance. NB refers to the optimal block size for the immediately following subroutine, as returned by ILAENV. */ minwrk = 1; if (*info == 0 && *lwork >= 1) { /* Computing MAX */ i__1 = (nmax + 1) * 10, i__2 = nmax * 3 * nmax; minwrk = max(i__1,i__2); /* Computing MAX */ i__1 = 1, i__2 = ilaenv_(&c__1, "DGEQRF", " ", &nmax, &nmax, &c_n1, & c_n1, (ftnlen)6, (ftnlen)1), i__1 = max(i__1,i__2), i__2 = ilaenv_(&c__1, "DORMQR", "LT", &nmax, &nmax, &nmax, &c_n1, ( ftnlen)6, (ftnlen)2), i__1 = max(i__1,i__2), i__2 = ilaenv_(& c__1, "DORGQR", " ", &nmax, &nmax, &nmax, &c_n1, (ftnlen)6, ( ftnlen)1); nb = max(i__1,i__2); /* Computing MAX */ i__1 = (nmax + 1) * 10, i__2 = (nmax << 1) + nmax * nb, i__1 = max( i__1,i__2), i__2 = nmax * 3 * nmax; maxwrk = max(i__1,i__2); work[1] = (doublereal) maxwrk; } if (*lwork < minwrk) { *info = -20; } if (*info != 0) { i__1 = -(*info); xerbla_("DDRGES", &i__1); return 0; } /* Quick return if possible */ if (*nsizes == 0 || *ntypes == 0) { return 0; } safmin = dlamch_("Safe minimum"); ulp = dlamch_("Epsilon") * dlamch_("Base"); safmin /= ulp; safmax = 1. / safmin; dlabad_(&safmin, &safmax); ulpinv = 1. / ulp; /* The values RMAGN(2:3) depend on N, see below. */ rmagn[0] = 0.; rmagn[1] = 1.; /* Loop over matrix sizes */ ntestt = 0; nerrs = 0; nmats = 0; i__1 = *nsizes; for (jsize = 1; jsize <= i__1; ++jsize) { n = nn[jsize]; n1 = max(1,n); rmagn[2] = safmax * ulp / (doublereal) n1; rmagn[3] = safmin * ulpinv * (doublereal) n1; if (*nsizes != 1) { mtypes = min(26,*ntypes); } else { mtypes = min(27,*ntypes); } /* Loop over matrix types */ i__2 = mtypes; for (jtype = 1; jtype <= i__2; ++jtype) { if (! dotype[jtype]) { goto L180; } ++nmats; ntest = 0; /* Save ISEED in case of an error. */ for (j = 1; j <= 4; ++j) { ioldsd[j - 1] = iseed[j]; /* L20: */ } /* Initialize RESULT */ for (j = 1; j <= 13; ++j) { result[j] = 0.; /* L30: */ } /* Generate test matrices A and B Description of control parameters: KZLASS: =1 means w/o rotation, =2 means w/ rotation, =3 means random. KATYPE: the "type" to be passed to DLATM4 for computing A. KAZERO: the pattern of zeros on the diagonal for A: =1: ( xxx ), =2: (0, xxx ) =3: ( 0, 0, xxx, 0 ), =4: ( 0, xxx, 0, 0 ), =5: ( 0, 0, 1, xxx, 0 ), =6: ( 0, 1, 0, xxx, 0 ). (xxx means a string of non-zero entries.) KAMAGN: the magnitude of the matrix: =0: zero, =1: O(1), =2: large, =3: small. IASIGN: 1 if the diagonal elements of A are to be multiplied by a random magnitude 1 number, =2 if randomly chosen diagonal blocks are to be rotated to form 2x2 blocks. KBTYPE, KBZERO, KBMAGN, IBSIGN: the same, but for B. KTRIAN: =0: don't fill in the upper triangle, =1: do. KZ1, KZ2, KADD: used to implement KAZERO and KBZERO. RMAGN: used to implement KAMAGN and KBMAGN. */ if (mtypes > 26) { goto L110; } iinfo = 0; if (kclass[jtype - 1] < 3) { /* Generate A (w/o rotation) */ if ((i__3 = katype[jtype - 1], abs(i__3)) == 3) { in = ((n - 1) / 2 << 1) + 1; if (in != n) { dlaset_("Full", &n, &n, &c_b26, &c_b26, &a[a_offset], lda); } } else { in = n; } dlatm4_(&katype[jtype - 1], &in, &kz1[kazero[jtype - 1] - 1], &kz2[kazero[jtype - 1] - 1], &iasign[jtype - 1], & rmagn[kamagn[jtype - 1]], &ulp, &rmagn[ktrian[jtype - 1] * kamagn[jtype - 1]], &c__2, &iseed[1], &a[ a_offset], lda); iadd = kadd[kazero[jtype - 1] - 1]; if (iadd > 0 && iadd <= n) { a_ref(iadd, iadd) = 1.; } /* Generate B (w/o rotation) */ if ((i__3 = kbtype[jtype - 1], abs(i__3)) == 3) { in = ((n - 1) / 2 << 1) + 1; if (in != n) { dlaset_("Full", &n, &n, &c_b26, &c_b26, &b[b_offset], lda); } } else { in = n; } dlatm4_(&kbtype[jtype - 1], &in, &kz1[kbzero[jtype - 1] - 1], &kz2[kbzero[jtype - 1] - 1], &ibsign[jtype - 1], & rmagn[kbmagn[jtype - 1]], &c_b32, &rmagn[ktrian[jtype - 1] * kbmagn[jtype - 1]], &c__2, &iseed[1], &b[ b_offset], lda); iadd = kadd[kbzero[jtype - 1] - 1]; if (iadd != 0 && iadd <= n) { b_ref(iadd, iadd) = 1.; } if (kclass[jtype - 1] == 2 && n > 0) { /* Include rotations Generate Q, Z as Householder transformations times a diagonal matrix. */ i__3 = n - 1; for (jc = 1; jc <= i__3; ++jc) { i__4 = n; for (jr = jc; jr <= i__4; ++jr) { q_ref(jr, jc) = dlarnd_(&c__3, &iseed[1]); z___ref(jr, jc) = dlarnd_(&c__3, &iseed[1]); /* L40: */ } i__4 = n + 1 - jc; dlarfg_(&i__4, &q_ref(jc, jc), &q_ref(jc + 1, jc), & c__1, &work[jc]); work[(n << 1) + jc] = d_sign(&c_b32, &q_ref(jc, jc)); q_ref(jc, jc) = 1.; i__4 = n + 1 - jc; dlarfg_(&i__4, &z___ref(jc, jc), &z___ref(jc + 1, jc), &c__1, &work[n + jc]); work[n * 3 + jc] = d_sign(&c_b32, &z___ref(jc, jc)); z___ref(jc, jc) = 1.; /* L50: */ } q_ref(n, n) = 1.; work[n] = 0.; d__1 = dlarnd_(&c__2, &iseed[1]); work[n * 3] = d_sign(&c_b32, &d__1); z___ref(n, n) = 1.; work[n * 2] = 0.; d__1 = dlarnd_(&c__2, &iseed[1]); work[n * 4] = d_sign(&c_b32, &d__1); /* Apply the diagonal matrices */ i__3 = n; for (jc = 1; jc <= i__3; ++jc) { i__4 = n; for (jr = 1; jr <= i__4; ++jr) { a_ref(jr, jc) = work[(n << 1) + jr] * work[n * 3 + jc] * a_ref(jr, jc); b_ref(jr, jc) = work[(n << 1) + jr] * work[n * 3 + jc] * b_ref(jr, jc); /* L60: */ } /* L70: */ } i__3 = n - 1; dorm2r_("L", "N", &n, &n, &i__3, &q[q_offset], ldq, &work[ 1], &a[a_offset], lda, &work[(n << 1) + 1], & iinfo); if (iinfo != 0) { goto L100; } i__3 = n - 1; dorm2r_("R", "T", &n, &n, &i__3, &z__[z_offset], ldq, & work[n + 1], &a[a_offset], lda, &work[(n << 1) + 1], &iinfo); if (iinfo != 0) { goto L100; } i__3 = n - 1; dorm2r_("L", "N", &n, &n, &i__3, &q[q_offset], ldq, &work[ 1], &b[b_offset], lda, &work[(n << 1) + 1], & iinfo); if (iinfo != 0) { goto L100; } i__3 = n - 1; dorm2r_("R", "T", &n, &n, &i__3, &z__[z_offset], ldq, & work[n + 1], &b[b_offset], lda, &work[(n << 1) + 1], &iinfo); if (iinfo != 0) { goto L100; } } } else { /* Random matrices */ i__3 = n; for (jc = 1; jc <= i__3; ++jc) { i__4 = n; for (jr = 1; jr <= i__4; ++jr) { a_ref(jr, jc) = rmagn[kamagn[jtype - 1]] * dlarnd_(& c__2, &iseed[1]); b_ref(jr, jc) = rmagn[kbmagn[jtype - 1]] * dlarnd_(& c__2, &iseed[1]); /* L80: */ } /* L90: */ } } L100: if (iinfo != 0) { io___40.ciunit = *nounit; s_wsfe(&io___40); do_fio(&c__1, "Generator", (ftnlen)9); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(iinfo); return 0; } L110: for (i__ = 1; i__ <= 13; ++i__) { result[i__] = -1.; /* L120: */ } /* Test with and without sorting of eigenvalues */ for (isort = 0; isort <= 1; ++isort) { if (isort == 0) { *(unsigned char *)sort = 'N'; rsub = 0; } else { *(unsigned char *)sort = 'S'; rsub = 5; } /* Call DGGES to compute H, T, Q, Z, alpha, and beta. */ dlacpy_("Full", &n, &n, &a[a_offset], lda, &s[s_offset], lda); dlacpy_("Full", &n, &n, &b[b_offset], lda, &t[t_offset], lda); ntest = rsub + 1 + isort; result[rsub + 1 + isort] = ulpinv; dgges_("V", "V", sort, (L_fp)dlctes_, &n, &s[s_offset], lda, & t[t_offset], lda, &sdim, &alphar[1], &alphai[1], & beta[1], &q[q_offset], ldq, &z__[z_offset], ldq, & work[1], lwork, &bwork[1], &iinfo); if (iinfo != 0 && iinfo != n + 2) { result[rsub + 1 + isort] = ulpinv; io___46.ciunit = *nounit; s_wsfe(&io___46); do_fio(&c__1, "DGGES", (ftnlen)5); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)) ; e_wsfe(); *info = abs(iinfo); goto L160; } ntest = rsub + 4; /* Do tests 1--4 (or tests 7--9 when reordering ) */ if (isort == 0) { dget51_(&c__1, &n, &a[a_offset], lda, &s[s_offset], lda, & q[q_offset], ldq, &z__[z_offset], ldq, &work[1], & result[1]); dget51_(&c__1, &n, &b[b_offset], lda, &t[t_offset], lda, & q[q_offset], ldq, &z__[z_offset], ldq, &work[1], & result[2]); } else { dget54_(&n, &a[a_offset], lda, &b[b_offset], lda, &s[ s_offset], lda, &t[t_offset], lda, &q[q_offset], ldq, &z__[z_offset], ldq, &work[1], &result[7]); } dget51_(&c__3, &n, &a[a_offset], lda, &t[t_offset], lda, &q[ q_offset], ldq, &q[q_offset], ldq, &work[1], &result[ rsub + 3]); dget51_(&c__3, &n, &b[b_offset], lda, &t[t_offset], lda, &z__[ z_offset], ldq, &z__[z_offset], ldq, &work[1], & result[rsub + 4]); /* Do test 5 and 6 (or Tests 10 and 11 when reordering): check Schur form of A and compare eigenvalues with diagonals. */ ntest = rsub + 6; temp1 = 0.; i__3 = n; for (j = 1; j <= i__3; ++j) { ilabad = FALSE_; if (alphai[j] == 0.) { /* Computing MAX */ d__7 = safmin, d__8 = (d__2 = alphar[j], abs(d__2)), d__7 = max(d__7,d__8), d__8 = (d__3 = s_ref(j, j), abs(d__3)); /* Computing MAX */ d__9 = safmin, d__10 = (d__5 = beta[j], abs(d__5)), d__9 = max(d__9,d__10), d__10 = (d__6 = t_ref( j, j), abs(d__6)); temp2 = ((d__1 = alphar[j] - s_ref(j, j), abs(d__1)) / max(d__7,d__8) + (d__4 = beta[j] - t_ref(j, j), abs(d__4)) / max(d__9,d__10)) / ulp; if (j < n) { if (s_ref(j + 1, j) != 0.) { ilabad = TRUE_; result[rsub + 5] = ulpinv; } } if (j > 1) { if (s_ref(j, j - 1) != 0.) { ilabad = TRUE_; result[rsub + 5] = ulpinv; } } } else { if (alphai[j] > 0.) { i1 = j; } else { i1 = j - 1; } if (i1 <= 0 || i1 >= n) { ilabad = TRUE_; } else if (i1 < n - 1) { if (s_ref(i1 + 2, i1 + 1) != 0.) { ilabad = TRUE_; result[rsub + 5] = ulpinv; } } else if (i1 > 1) { if (s_ref(i1, i1 - 1) != 0.) { ilabad = TRUE_; result[rsub + 5] = ulpinv; } } if (! ilabad) { dget53_(&s_ref(i1, i1), lda, &t_ref(i1, i1), lda, &beta[j], &alphar[j], &alphai[j], &temp2, &ierr); if (ierr >= 3) { io___52.ciunit = *nounit; s_wsfe(&io___52); do_fio(&c__1, (char *)&ierr, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&j, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof( integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen) sizeof(integer)); e_wsfe(); *info = abs(ierr); } } else { temp2 = ulpinv; } } temp1 = max(temp1,temp2); if (ilabad) { io___53.ciunit = *nounit; s_wsfe(&io___53); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)) ; do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof( integer)); e_wsfe(); } /* L130: */ } result[rsub + 6] = temp1; if (isort >= 1) { /* Do test 12 */ ntest = 12; result[12] = 0.; knteig = 0; i__3 = n; for (i__ = 1; i__ <= i__3; ++i__) { d__1 = -alphai[i__]; if (dlctes_(&alphar[i__], &alphai[i__], &beta[i__]) || dlctes_(&alphar[i__], &d__1, &beta[i__])) { ++knteig; } if (i__ < n) { d__1 = -alphai[i__ + 1]; d__2 = -alphai[i__]; if ((dlctes_(&alphar[i__ + 1], &alphai[i__ + 1], & beta[i__ + 1]) || dlctes_(&alphar[i__ + 1] , &d__1, &beta[i__ + 1])) && ! (dlctes_(& alphar[i__], &alphai[i__], &beta[i__]) || dlctes_(&alphar[i__], &d__2, &beta[i__])) && iinfo != n + 2) { result[12] = ulpinv; } } /* L140: */ } if (sdim != knteig) { result[12] = ulpinv; } } /* L150: */ } /* End of Loop -- Check for RESULT(j) > THRESH */ L160: ntestt += ntest; /* Print out tests which fail. */ i__3 = ntest; for (jr = 1; jr <= i__3; ++jr) { if (result[jr] >= *thresh) { /* If this is the first test to fail, print a header to the data file. */ if (nerrs == 0) { io___55.ciunit = *nounit; s_wsfe(&io___55); do_fio(&c__1, "DGS", (ftnlen)3); e_wsfe(); /* Matrix types */ io___56.ciunit = *nounit; s_wsfe(&io___56); e_wsfe(); io___57.ciunit = *nounit; s_wsfe(&io___57); e_wsfe(); io___58.ciunit = *nounit; s_wsfe(&io___58); do_fio(&c__1, "Orthogonal", (ftnlen)10); e_wsfe(); /* Tests performed */ io___59.ciunit = *nounit; s_wsfe(&io___59); do_fio(&c__1, "orthogonal", (ftnlen)10); do_fio(&c__1, "'", (ftnlen)1); do_fio(&c__1, "transpose", (ftnlen)9); for (j = 1; j <= 8; ++j) { do_fio(&c__1, "'", (ftnlen)1); } e_wsfe(); } ++nerrs; if (result[jr] < 1e4) { io___60.ciunit = *nounit; s_wsfe(&io___60); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)) ; do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&jr, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&result[jr], (ftnlen)sizeof( doublereal)); e_wsfe(); } else { io___61.ciunit = *nounit; s_wsfe(&io___61); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)) ; do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&jr, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&result[jr], (ftnlen)sizeof( doublereal)); e_wsfe(); } } /* L170: */ } L180: ; } /* L190: */ } /* Summary */ alasvm_("DGS", nounit, &nerrs, &ntestt, &c__0); work[1] = (doublereal) maxwrk; return 0; /* End of DDRGES */ } /* ddrges_ */
/* Subroutine */ int ztrsna_(char *job, char *howmny, logical *select, integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, doublereal *s, doublereal *sep, integer *mm, integer *m, doublecomplex *work, integer *ldwork, doublereal *rwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZTRSNA estimates reciprocal condition numbers for specified eigenvalues and/or right eigenvectors of a complex upper triangular matrix T (or of any matrix Q*T*Q**H with Q unitary). Arguments ========= JOB (input) CHARACTER*1 Specifies whether condition numbers are required for eigenvalues (S) or eigenvectors (SEP): = 'E': for eigenvalues only (S); = 'V': for eigenvectors only (SEP); = 'B': for both eigenvalues and eigenvectors (S and SEP). HOWMNY (input) CHARACTER*1 = 'A': compute condition numbers for all eigenpairs; = 'S': compute condition numbers for selected eigenpairs specified by the array SELECT. SELECT (input) LOGICAL array, dimension (N) If HOWMNY = 'S', SELECT specifies the eigenpairs for which condition numbers are required. To select condition numbers for the j-th eigenpair, SELECT(j) must be set to .TRUE.. If HOWMNY = 'A', SELECT is not referenced. N (input) INTEGER The order of the matrix T. N >= 0. T (input) COMPLEX*16 array, dimension (LDT,N) The upper triangular matrix T. LDT (input) INTEGER The leading dimension of the array T. LDT >= max(1,N). VL (input) COMPLEX*16 array, dimension (LDVL,M) If JOB = 'E' or 'B', VL must contain left eigenvectors of T (or of any Q*T*Q**H with Q unitary), corresponding to the eigenpairs specified by HOWMNY and SELECT. The eigenvectors must be stored in consecutive columns of VL, as returned by ZHSEIN or ZTREVC. If JOB = 'V', VL is not referenced. LDVL (input) INTEGER The leading dimension of the array VL. LDVL >= 1; and if JOB = 'E' or 'B', LDVL >= N. VR (input) COMPLEX*16 array, dimension (LDVR,M) If JOB = 'E' or 'B', VR must contain right eigenvectors of T (or of any Q*T*Q**H with Q unitary), corresponding to the eigenpairs specified by HOWMNY and SELECT. The eigenvectors must be stored in consecutive columns of VR, as returned by ZHSEIN or ZTREVC. If JOB = 'V', VR is not referenced. LDVR (input) INTEGER The leading dimension of the array VR. LDVR >= 1; and if JOB = 'E' or 'B', LDVR >= N. S (output) DOUBLE PRECISION array, dimension (MM) If JOB = 'E' or 'B', the reciprocal condition numbers of the selected eigenvalues, stored in consecutive elements of the array. Thus S(j), SEP(j), and the j-th columns of VL and VR all correspond to the same eigenpair (but not in general the j-th eigenpair, unless all eigenpairs are selected). If JOB = 'V', S is not referenced. SEP (output) DOUBLE PRECISION array, dimension (MM) If JOB = 'V' or 'B', the estimated reciprocal condition numbers of the selected eigenvectors, stored in consecutive elements of the array. If JOB = 'E', SEP is not referenced. MM (input) INTEGER The number of elements in the arrays S (if JOB = 'E' or 'B') and/or SEP (if JOB = 'V' or 'B'). MM >= M. M (output) INTEGER The number of elements of the arrays S and/or SEP actually used to store the estimated condition numbers. If HOWMNY = 'A', M is set to N. WORK (workspace) COMPLEX*16 array, dimension (LDWORK,N+1) If JOB = 'E', WORK is not referenced. LDWORK (input) INTEGER The leading dimension of the array WORK. LDWORK >= 1; and if JOB = 'V' or 'B', LDWORK >= N. RWORK (workspace) DOUBLE PRECISION array, dimension (N) If JOB = 'E', RWORK is not referenced. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Further Details =============== The reciprocal of the condition number of an eigenvalue lambda is defined as S(lambda) = |v'*u| / (norm(u)*norm(v)) where u and v are the right and left eigenvectors of T corresponding to lambda; v' denotes the conjugate transpose of v, and norm(u) denotes the Euclidean norm. These reciprocal condition numbers always lie between zero (very badly conditioned) and one (very well conditioned). If n = 1, S(lambda) is defined to be 1. An approximate error bound for a computed eigenvalue W(i) is given by EPS * norm(T) / S(i) where EPS is the machine precision. The reciprocal of the condition number of the right eigenvector u corresponding to lambda is defined as follows. Suppose T = ( lambda c ) ( 0 T22 ) Then the reciprocal condition number is SEP( lambda, T22 ) = sigma-min( T22 - lambda*I ) where sigma-min denotes the smallest singular value. We approximate the smallest singular value by the reciprocal of an estimate of the one-norm of the inverse of T22 - lambda*I. If n = 1, SEP(1) is defined to be abs(T(1,1)). An approximate error bound for a computed right eigenvector VR(i) is given by EPS * norm(T) / SEP(i) ===================================================================== Decode and test the input parameters Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, work_dim1, work_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2; doublecomplex z__1; /* Builtin functions */ double z_abs(doublecomplex *), d_imag(doublecomplex *); /* Local variables */ static integer kase, ierr; static doublecomplex prod; static doublereal lnrm, rnrm; static integer i__, j, k; static doublereal scale; extern logical lsame_(char *, char *); extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); static doublecomplex dummy[1]; static logical wants; static doublereal xnorm; extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); extern doublereal dznrm2_(integer *, doublecomplex *, integer *), dlamch_( char *); static integer ks, ix; extern /* Subroutine */ int xerbla_(char *, integer *); static doublereal bignum; static logical wantbh; extern /* Subroutine */ int zlacon_(integer *, doublecomplex *, doublecomplex *, doublereal *, integer *); extern integer izamax_(integer *, doublecomplex *, integer *); static logical somcon; extern /* Subroutine */ int zdrscl_(integer *, doublereal *, doublecomplex *, integer *); static char normin[1]; extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); static doublereal smlnum; static logical wantsp; extern /* Subroutine */ int zlatrs_(char *, char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublereal *, integer *), ztrexc_(char *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, integer *); static doublereal eps, est; #define work_subscr(a_1,a_2) (a_2)*work_dim1 + a_1 #define work_ref(a_1,a_2) work[work_subscr(a_1,a_2)] #define t_subscr(a_1,a_2) (a_2)*t_dim1 + a_1 #define t_ref(a_1,a_2) t[t_subscr(a_1,a_2)] #define vl_subscr(a_1,a_2) (a_2)*vl_dim1 + a_1 #define vl_ref(a_1,a_2) vl[vl_subscr(a_1,a_2)] #define vr_subscr(a_1,a_2) (a_2)*vr_dim1 + a_1 #define vr_ref(a_1,a_2) vr[vr_subscr(a_1,a_2)] --select; t_dim1 = *ldt; t_offset = 1 + t_dim1 * 1; t -= t_offset; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1 * 1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1 * 1; vr -= vr_offset; --s; --sep; work_dim1 = *ldwork; work_offset = 1 + work_dim1 * 1; work -= work_offset; --rwork; /* Function Body */ wantbh = lsame_(job, "B"); wants = lsame_(job, "E") || wantbh; wantsp = lsame_(job, "V") || wantbh; somcon = lsame_(howmny, "S"); /* Set M to the number of eigenpairs for which condition numbers are to be computed. */ if (somcon) { *m = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (select[j]) { ++(*m); } /* L10: */ } } else { *m = *n; } *info = 0; if (! wants && ! wantsp) { *info = -1; } else if (! lsame_(howmny, "A") && ! somcon) { *info = -2; } else if (*n < 0) { *info = -4; } else if (*ldt < max(1,*n)) { *info = -6; } else if (*ldvl < 1 || wants && *ldvl < *n) { *info = -8; } else if (*ldvr < 1 || wants && *ldvr < *n) { *info = -10; } else if (*mm < *m) { *info = -13; } else if (*ldwork < 1 || wantsp && *ldwork < *n) { *info = -16; } if (*info != 0) { i__1 = -(*info); xerbla_("ZTRSNA", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } if (*n == 1) { if (somcon) { if (! select[1]) { return 0; } } if (wants) { s[1] = 1.; } if (wantsp) { sep[1] = z_abs(&t_ref(1, 1)); } return 0; } /* Get machine constants */ eps = dlamch_("P"); smlnum = dlamch_("S") / eps; bignum = 1. / smlnum; dlabad_(&smlnum, &bignum); ks = 1; i__1 = *n; for (k = 1; k <= i__1; ++k) { if (somcon) { if (! select[k]) { goto L50; } } if (wants) { /* Compute the reciprocal condition number of the k-th eigenvalue. */ zdotc_(&z__1, n, &vr_ref(1, ks), &c__1, &vl_ref(1, ks), &c__1); prod.r = z__1.r, prod.i = z__1.i; rnrm = dznrm2_(n, &vr_ref(1, ks), &c__1); lnrm = dznrm2_(n, &vl_ref(1, ks), &c__1); s[ks] = z_abs(&prod) / (rnrm * lnrm); } if (wantsp) { /* Estimate the reciprocal condition number of the k-th eigenvector. Copy the matrix T to the array WORK and swap the k-th diagonal element to the (1,1) position. */ zlacpy_("Full", n, n, &t[t_offset], ldt, &work[work_offset], ldwork); ztrexc_("No Q", n, &work[work_offset], ldwork, dummy, &c__1, &k, & c__1, &ierr); /* Form C = T22 - lambda*I in WORK(2:N,2:N). */ i__2 = *n; for (i__ = 2; i__ <= i__2; ++i__) { i__3 = work_subscr(i__, i__); i__4 = work_subscr(i__, i__); i__5 = work_subscr(1, 1); z__1.r = work[i__4].r - work[i__5].r, z__1.i = work[i__4].i - work[i__5].i; work[i__3].r = z__1.r, work[i__3].i = z__1.i; /* L20: */ } /* Estimate a lower bound for the 1-norm of inv(C'). The 1st and (N+1)th columns of WORK are used to store work vectors. */ sep[ks] = 0.; est = 0.; kase = 0; *(unsigned char *)normin = 'N'; L30: i__2 = *n - 1; zlacon_(&i__2, &work_ref(1, *n + 1), &work[work_offset], &est, & kase); if (kase != 0) { if (kase == 1) { /* Solve C'*x = scale*b */ i__2 = *n - 1; zlatrs_("Upper", "Conjugate transpose", "Nonunit", normin, &i__2, &work_ref(2, 2), ldwork, &work[ work_offset], &scale, &rwork[1], &ierr); } else { /* Solve C*x = scale*b */ i__2 = *n - 1; zlatrs_("Upper", "No transpose", "Nonunit", normin, &i__2, &work_ref(2, 2), ldwork, &work[work_offset], & scale, &rwork[1], &ierr); } *(unsigned char *)normin = 'Y'; if (scale != 1.) { /* Multiply by 1/SCALE if doing so will not cause overflow. */ i__2 = *n - 1; ix = izamax_(&i__2, &work[work_offset], &c__1); i__2 = work_subscr(ix, 1); xnorm = (d__1 = work[i__2].r, abs(d__1)) + (d__2 = d_imag( &work_ref(ix, 1)), abs(d__2)); if (scale < xnorm * smlnum || scale == 0.) { goto L40; } zdrscl_(n, &scale, &work[work_offset], &c__1); } goto L30; } sep[ks] = 1. / max(est,smlnum); } L40: ++ks; L50: ; } return 0; /* End of ZTRSNA */ } /* ztrsna_ */
/* Subroutine */ int dlaqtr_(logical *ltran, logical *lreal, integer *n, doublereal *t, integer *ldt, doublereal *b, doublereal *w, doublereal *scale, doublereal *x, doublereal *work, integer *info) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= DLAQTR solves the real quasi-triangular system op(T)*p = scale*c, if LREAL = .TRUE. or the complex quasi-triangular systems op(T + iB)*(p+iq) = scale*(c+id), if LREAL = .FALSE. in real arithmetic, where T is upper quasi-triangular. If LREAL = .FALSE., then the first diagonal block of T must be 1 by 1, B is the specially structured matrix B = [ b(1) b(2) ... b(n) ] [ w ] [ w ] [ . ] [ w ] op(A) = A or A', A' denotes the conjugate transpose of matrix A. On input, X = [ c ]. On output, X = [ p ]. [ d ] [ q ] This subroutine is designed for the condition number estimation in routine DTRSNA. Arguments ========= LTRAN (input) LOGICAL On entry, LTRAN specifies the option of conjugate transpose: = .FALSE., op(T+i*B) = T+i*B, = .TRUE., op(T+i*B) = (T+i*B)'. LREAL (input) LOGICAL On entry, LREAL specifies the input matrix structure: = .FALSE., the input is complex = .TRUE., the input is real N (input) INTEGER On entry, N specifies the order of T+i*B. N >= 0. T (input) DOUBLE PRECISION array, dimension (LDT,N) On entry, T contains a matrix in Schur canonical form. If LREAL = .FALSE., then the first diagonal block of T mu be 1 by 1. LDT (input) INTEGER The leading dimension of the matrix T. LDT >= max(1,N). B (input) DOUBLE PRECISION array, dimension (N) On entry, B contains the elements to form the matrix B as described above. If LREAL = .TRUE., B is not referenced. W (input) DOUBLE PRECISION On entry, W is the diagonal element of the matrix B. If LREAL = .TRUE., W is not referenced. SCALE (output) DOUBLE PRECISION On exit, SCALE is the scale factor. X (input/output) DOUBLE PRECISION array, dimension (2*N) On entry, X contains the right hand side of the system. On exit, X is overwritten by the solution. WORK (workspace) DOUBLE PRECISION array, dimension (N) INFO (output) INTEGER On exit, INFO is set to 0: successful exit. 1: the some diagonal 1 by 1 block has been perturbed by a small number SMIN to keep nonsingularity. 2: the some diagonal 2 by 2 block has been perturbed by a small number in DLALN2 to keep nonsingularity. NOTE: In the interests of speed, this routine does not check the inputs for errors. ===================================================================== Do not test the input parameters for errors Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static logical c_false = FALSE_; static integer c__2 = 2; static doublereal c_b21 = 1.; static doublereal c_b25 = 0.; static logical c_true = TRUE_; /* System generated locals */ integer t_dim1, t_offset, i__1, i__2; doublereal d__1, d__2, d__3, d__4, d__5, d__6; /* Local variables */ extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, integer *); static integer ierr; static doublereal smin, xmax, d__[4] /* was [2][2] */; static integer i__, j, k; static doublereal v[4] /* was [2][2] */, z__; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); extern doublereal dasum_(integer *, doublereal *, integer *); extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static integer jnext, j1, j2; static doublereal sminw; static integer n1, n2; static doublereal xnorm; extern /* Subroutine */ int dlaln2_(logical *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal * , doublereal *, integer *, doublereal *, doublereal *, integer *); extern doublereal dlamch_(char *), dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); static doublereal si, xj; extern integer idamax_(integer *, doublereal *, integer *); static doublereal scaloc, sr; extern /* Subroutine */ int dladiv_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); static doublereal bignum; static logical notran; static doublereal smlnum, rec, eps, tjj, tmp; #define d___ref(a_1,a_2) d__[(a_2)*2 + a_1 - 3] #define t_ref(a_1,a_2) t[(a_2)*t_dim1 + a_1] #define v_ref(a_1,a_2) v[(a_2)*2 + a_1 - 3] t_dim1 = *ldt; t_offset = 1 + t_dim1 * 1; t -= t_offset; --b; --x; --work; /* Function Body */ notran = ! (*ltran); *info = 0; /* Quick return if possible */ if (*n == 0) { return 0; } /* Set constants to control overflow */ eps = dlamch_("P"); smlnum = dlamch_("S") / eps; bignum = 1. / smlnum; xnorm = dlange_("M", n, n, &t[t_offset], ldt, d__); if (! (*lreal)) { /* Computing MAX */ d__1 = xnorm, d__2 = abs(*w), d__1 = max(d__1,d__2), d__2 = dlange_( "M", n, &c__1, &b[1], n, d__); xnorm = max(d__1,d__2); } /* Computing MAX */ d__1 = smlnum, d__2 = eps * xnorm; smin = max(d__1,d__2); /* Compute 1-norm of each column of strictly upper triangular part of T to control overflow in triangular solver. */ work[1] = 0.; i__1 = *n; for (j = 2; j <= i__1; ++j) { i__2 = j - 1; work[j] = dasum_(&i__2, &t_ref(1, j), &c__1); /* L10: */ } if (! (*lreal)) { i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { work[i__] += (d__1 = b[i__], abs(d__1)); /* L20: */ } } n2 = *n << 1; n1 = *n; if (! (*lreal)) { n1 = n2; } k = idamax_(&n1, &x[1], &c__1); xmax = (d__1 = x[k], abs(d__1)); *scale = 1.; if (xmax > bignum) { *scale = bignum / xmax; dscal_(&n1, scale, &x[1], &c__1); xmax = bignum; } if (*lreal) { if (notran) { /* Solve T*p = scale*c */ jnext = *n; for (j = *n; j >= 1; --j) { if (j > jnext) { goto L30; } j1 = j; j2 = j; jnext = j - 1; if (j > 1) { if (t_ref(j, j - 1) != 0.) { j1 = j - 1; jnext = j - 2; } } if (j1 == j2) { /* Meet 1 by 1 diagonal block Scale to avoid overflow when computing x(j) = b(j)/T(j,j) */ xj = (d__1 = x[j1], abs(d__1)); tjj = (d__1 = t_ref(j1, j1), abs(d__1)); tmp = t_ref(j1, j1); if (tjj < smin) { tmp = smin; tjj = smin; *info = 1; } if (xj == 0.) { goto L30; } if (tjj < 1.) { if (xj > bignum * tjj) { rec = 1. / xj; dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } x[j1] /= tmp; xj = (d__1 = x[j1], abs(d__1)); /* Scale x if necessary to avoid overflow when adding a multiple of column j1 of T. */ if (xj > 1.) { rec = 1. / xj; if (work[j1] > (bignum - xmax) * rec) { dscal_(n, &rec, &x[1], &c__1); *scale *= rec; } } if (j1 > 1) { i__1 = j1 - 1; d__1 = -x[j1]; daxpy_(&i__1, &d__1, &t_ref(1, j1), &c__1, &x[1], & c__1); i__1 = j1 - 1; k = idamax_(&i__1, &x[1], &c__1); xmax = (d__1 = x[k], abs(d__1)); } } else { /* Meet 2 by 2 diagonal block Call 2 by 2 linear system solve, to take care of possible overflow by scaling factor. */ d___ref(1, 1) = x[j1]; d___ref(2, 1) = x[j2]; dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b21, &t_ref(j1, j1), ldt, &c_b21, &c_b21, d__, &c__2, &c_b25, & c_b25, v, &c__2, &scaloc, &xnorm, &ierr); if (ierr != 0) { *info = 2; } if (scaloc != 1.) { dscal_(n, &scaloc, &x[1], &c__1); *scale *= scaloc; } x[j1] = v_ref(1, 1); x[j2] = v_ref(2, 1); /* Scale V(1,1) (= X(J1)) and/or V(2,1) (=X(J2)) to avoid overflow in updating right-hand side. Computing MAX */ d__3 = (d__1 = v_ref(1, 1), abs(d__1)), d__4 = (d__2 = v_ref(2, 1), abs(d__2)); xj = max(d__3,d__4); if (xj > 1.) { rec = 1. / xj; /* Computing MAX */ d__1 = work[j1], d__2 = work[j2]; if (max(d__1,d__2) > (bignum - xmax) * rec) { dscal_(n, &rec, &x[1], &c__1); *scale *= rec; } } /* Update right-hand side */ if (j1 > 1) { i__1 = j1 - 1; d__1 = -x[j1]; daxpy_(&i__1, &d__1, &t_ref(1, j1), &c__1, &x[1], & c__1); i__1 = j1 - 1; d__1 = -x[j2]; daxpy_(&i__1, &d__1, &t_ref(1, j2), &c__1, &x[1], & c__1); i__1 = j1 - 1; k = idamax_(&i__1, &x[1], &c__1); xmax = (d__1 = x[k], abs(d__1)); } } L30: ; } } else { /* Solve T'*p = scale*c */ jnext = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (j < jnext) { goto L40; } j1 = j; j2 = j; jnext = j + 1; if (j < *n) { if (t_ref(j + 1, j) != 0.) { j2 = j + 1; jnext = j + 2; } } if (j1 == j2) { /* 1 by 1 diagonal block Scale if necessary to avoid overflow in forming the right-hand side element by inner product. */ xj = (d__1 = x[j1], abs(d__1)); if (xmax > 1.) { rec = 1. / xmax; if (work[j1] > (bignum - xj) * rec) { dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } i__2 = j1 - 1; x[j1] -= ddot_(&i__2, &t_ref(1, j1), &c__1, &x[1], &c__1); xj = (d__1 = x[j1], abs(d__1)); tjj = (d__1 = t_ref(j1, j1), abs(d__1)); tmp = t_ref(j1, j1); if (tjj < smin) { tmp = smin; tjj = smin; *info = 1; } if (tjj < 1.) { if (xj > bignum * tjj) { rec = 1. / xj; dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } x[j1] /= tmp; /* Computing MAX */ d__2 = xmax, d__3 = (d__1 = x[j1], abs(d__1)); xmax = max(d__2,d__3); } else { /* 2 by 2 diagonal block Scale if necessary to avoid overflow in forming the right-hand side elements by inner product. Computing MAX */ d__3 = (d__1 = x[j1], abs(d__1)), d__4 = (d__2 = x[j2], abs(d__2)); xj = max(d__3,d__4); if (xmax > 1.) { rec = 1. / xmax; /* Computing MAX */ d__1 = work[j2], d__2 = work[j1]; if (max(d__1,d__2) > (bignum - xj) * rec) { dscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } i__2 = j1 - 1; d___ref(1, 1) = x[j1] - ddot_(&i__2, &t_ref(1, j1), &c__1, &x[1], &c__1); i__2 = j1 - 1; d___ref(2, 1) = x[j2] - ddot_(&i__2, &t_ref(1, j2), &c__1, &x[1], &c__1); dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b21, &t_ref(j1, j1), ldt, &c_b21, &c_b21, d__, &c__2, &c_b25, & c_b25, v, &c__2, &scaloc, &xnorm, &ierr); if (ierr != 0) { *info = 2; } if (scaloc != 1.) { dscal_(n, &scaloc, &x[1], &c__1); *scale *= scaloc; } x[j1] = v_ref(1, 1); x[j2] = v_ref(2, 1); /* Computing MAX */ d__3 = (d__1 = x[j1], abs(d__1)), d__4 = (d__2 = x[j2], abs(d__2)), d__3 = max(d__3,d__4); xmax = max(d__3,xmax); } L40: ; } } } else { /* Computing MAX */ d__1 = eps * abs(*w); sminw = max(d__1,smin); if (notran) { /* Solve (T + iB)*(p+iq) = c+id */ jnext = *n; for (j = *n; j >= 1; --j) { if (j > jnext) { goto L70; } j1 = j; j2 = j; jnext = j - 1; if (j > 1) { if (t_ref(j, j - 1) != 0.) { j1 = j - 1; jnext = j - 2; } } if (j1 == j2) { /* 1 by 1 diagonal block Scale if necessary to avoid overflow in division */ z__ = *w; if (j1 == 1) { z__ = b[1]; } xj = (d__1 = x[j1], abs(d__1)) + (d__2 = x[*n + j1], abs( d__2)); tjj = (d__1 = t_ref(j1, j1), abs(d__1)) + abs(z__); tmp = t_ref(j1, j1); if (tjj < sminw) { tmp = sminw; tjj = sminw; *info = 1; } if (xj == 0.) { goto L70; } if (tjj < 1.) { if (xj > bignum * tjj) { rec = 1. / xj; dscal_(&n2, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } dladiv_(&x[j1], &x[*n + j1], &tmp, &z__, &sr, &si); x[j1] = sr; x[*n + j1] = si; xj = (d__1 = x[j1], abs(d__1)) + (d__2 = x[*n + j1], abs( d__2)); /* Scale x if necessary to avoid overflow when adding a multiple of column j1 of T. */ if (xj > 1.) { rec = 1. / xj; if (work[j1] > (bignum - xmax) * rec) { dscal_(&n2, &rec, &x[1], &c__1); *scale *= rec; } } if (j1 > 1) { i__1 = j1 - 1; d__1 = -x[j1]; daxpy_(&i__1, &d__1, &t_ref(1, j1), &c__1, &x[1], & c__1); i__1 = j1 - 1; d__1 = -x[*n + j1]; daxpy_(&i__1, &d__1, &t_ref(1, j1), &c__1, &x[*n + 1], &c__1); x[1] += b[j1] * x[*n + j1]; x[*n + 1] -= b[j1] * x[j1]; xmax = 0.; i__1 = j1 - 1; for (k = 1; k <= i__1; ++k) { /* Computing MAX */ d__3 = xmax, d__4 = (d__1 = x[k], abs(d__1)) + ( d__2 = x[k + *n], abs(d__2)); xmax = max(d__3,d__4); /* L50: */ } } } else { /* Meet 2 by 2 diagonal block */ d___ref(1, 1) = x[j1]; d___ref(2, 1) = x[j2]; d___ref(1, 2) = x[*n + j1]; d___ref(2, 2) = x[*n + j2]; d__1 = -(*w); dlaln2_(&c_false, &c__2, &c__2, &sminw, &c_b21, &t_ref(j1, j1), ldt, &c_b21, &c_b21, d__, &c__2, &c_b25, & d__1, v, &c__2, &scaloc, &xnorm, &ierr); if (ierr != 0) { *info = 2; } if (scaloc != 1.) { i__1 = *n << 1; dscal_(&i__1, &scaloc, &x[1], &c__1); *scale = scaloc * *scale; } x[j1] = v_ref(1, 1); x[j2] = v_ref(2, 1); x[*n + j1] = v_ref(1, 2); x[*n + j2] = v_ref(2, 2); /* Scale X(J1), .... to avoid overflow in updating right hand side. Computing MAX */ d__5 = (d__1 = v_ref(1, 1), abs(d__1)) + (d__2 = v_ref(1, 2), abs(d__2)), d__6 = (d__3 = v_ref(2, 1), abs( d__3)) + (d__4 = v_ref(2, 2), abs(d__4)); xj = max(d__5,d__6); if (xj > 1.) { rec = 1. / xj; /* Computing MAX */ d__1 = work[j1], d__2 = work[j2]; if (max(d__1,d__2) > (bignum - xmax) * rec) { dscal_(&n2, &rec, &x[1], &c__1); *scale *= rec; } } /* Update the right-hand side. */ if (j1 > 1) { i__1 = j1 - 1; d__1 = -x[j1]; daxpy_(&i__1, &d__1, &t_ref(1, j1), &c__1, &x[1], & c__1); i__1 = j1 - 1; d__1 = -x[j2]; daxpy_(&i__1, &d__1, &t_ref(1, j2), &c__1, &x[1], & c__1); i__1 = j1 - 1; d__1 = -x[*n + j1]; daxpy_(&i__1, &d__1, &t_ref(1, j1), &c__1, &x[*n + 1], &c__1); i__1 = j1 - 1; d__1 = -x[*n + j2]; daxpy_(&i__1, &d__1, &t_ref(1, j2), &c__1, &x[*n + 1], &c__1); x[1] = x[1] + b[j1] * x[*n + j1] + b[j2] * x[*n + j2]; x[*n + 1] = x[*n + 1] - b[j1] * x[j1] - b[j2] * x[j2]; xmax = 0.; i__1 = j1 - 1; for (k = 1; k <= i__1; ++k) { /* Computing MAX */ d__3 = (d__1 = x[k], abs(d__1)) + (d__2 = x[k + * n], abs(d__2)); xmax = max(d__3,xmax); /* L60: */ } } } L70: ; } } else { /* Solve (T + iB)'*(p+iq) = c+id */ jnext = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (j < jnext) { goto L80; } j1 = j; j2 = j; jnext = j + 1; if (j < *n) { if (t_ref(j + 1, j) != 0.) { j2 = j + 1; jnext = j + 2; } } if (j1 == j2) { /* 1 by 1 diagonal block Scale if necessary to avoid overflow in forming the right-hand side element by inner product. */ xj = (d__1 = x[j1], abs(d__1)) + (d__2 = x[j1 + *n], abs( d__2)); if (xmax > 1.) { rec = 1. / xmax; if (work[j1] > (bignum - xj) * rec) { dscal_(&n2, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } i__2 = j1 - 1; x[j1] -= ddot_(&i__2, &t_ref(1, j1), &c__1, &x[1], &c__1); i__2 = j1 - 1; x[*n + j1] -= ddot_(&i__2, &t_ref(1, j1), &c__1, &x[*n + 1], &c__1); if (j1 > 1) { x[j1] -= b[j1] * x[*n + 1]; x[*n + j1] += b[j1] * x[1]; } xj = (d__1 = x[j1], abs(d__1)) + (d__2 = x[j1 + *n], abs( d__2)); z__ = *w; if (j1 == 1) { z__ = b[1]; } /* Scale if necessary to avoid overflow in complex division */ tjj = (d__1 = t_ref(j1, j1), abs(d__1)) + abs(z__); tmp = t_ref(j1, j1); if (tjj < sminw) { tmp = sminw; tjj = sminw; *info = 1; } if (tjj < 1.) { if (xj > bignum * tjj) { rec = 1. / xj; dscal_(&n2, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } d__1 = -z__; dladiv_(&x[j1], &x[*n + j1], &tmp, &d__1, &sr, &si); x[j1] = sr; x[j1 + *n] = si; /* Computing MAX */ d__3 = (d__1 = x[j1], abs(d__1)) + (d__2 = x[j1 + *n], abs(d__2)); xmax = max(d__3,xmax); } else { /* 2 by 2 diagonal block Scale if necessary to avoid overflow in forming the right-hand side element by inner product. Computing MAX */ d__5 = (d__1 = x[j1], abs(d__1)) + (d__2 = x[*n + j1], abs(d__2)), d__6 = (d__3 = x[j2], abs(d__3)) + ( d__4 = x[*n + j2], abs(d__4)); xj = max(d__5,d__6); if (xmax > 1.) { rec = 1. / xmax; /* Computing MAX */ d__1 = work[j1], d__2 = work[j2]; if (max(d__1,d__2) > (bignum - xj) / xmax) { dscal_(&n2, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } i__2 = j1 - 1; d___ref(1, 1) = x[j1] - ddot_(&i__2, &t_ref(1, j1), &c__1, &x[1], &c__1); i__2 = j1 - 1; d___ref(2, 1) = x[j2] - ddot_(&i__2, &t_ref(1, j2), &c__1, &x[1], &c__1); i__2 = j1 - 1; d___ref(1, 2) = x[*n + j1] - ddot_(&i__2, &t_ref(1, j1), & c__1, &x[*n + 1], &c__1); i__2 = j1 - 1; d___ref(2, 2) = x[*n + j2] - ddot_(&i__2, &t_ref(1, j2), & c__1, &x[*n + 1], &c__1); d___ref(1, 1) = d___ref(1, 1) - b[j1] * x[*n + 1]; d___ref(2, 1) = d___ref(2, 1) - b[j2] * x[*n + 1]; d___ref(1, 2) = d___ref(1, 2) + b[j1] * x[1]; d___ref(2, 2) = d___ref(2, 2) + b[j2] * x[1]; dlaln2_(&c_true, &c__2, &c__2, &sminw, &c_b21, &t_ref(j1, j1), ldt, &c_b21, &c_b21, d__, &c__2, &c_b25, w, v, &c__2, &scaloc, &xnorm, &ierr); if (ierr != 0) { *info = 2; } if (scaloc != 1.) { dscal_(&n2, &scaloc, &x[1], &c__1); *scale = scaloc * *scale; } x[j1] = v_ref(1, 1); x[j2] = v_ref(2, 1); x[*n + j1] = v_ref(1, 2); x[*n + j2] = v_ref(2, 2); /* Computing MAX */ d__5 = (d__1 = x[j1], abs(d__1)) + (d__2 = x[*n + j1], abs(d__2)), d__6 = (d__3 = x[j2], abs(d__3)) + ( d__4 = x[*n + j2], abs(d__4)), d__5 = max(d__5, d__6); xmax = max(d__5,xmax); } L80: ; } } } return 0; /* End of DLAQTR */ } /* dlaqtr_ */
/* Subroutine */ int cdrvgg_(integer *nsizes, integer *nn, integer *ntypes, logical *dotype, integer *iseed, real *thresh, real *thrshn, integer * nounit, complex *a, integer *lda, complex *b, complex *s, complex *t, complex *s2, complex *t2, complex *q, integer *ldq, complex *z__, complex *alpha1, complex *beta1, complex *alpha2, complex *beta2, complex *vl, complex *vr, complex *work, integer *lwork, real *rwork, real *result, integer *info) { /* Initialized data */ static integer kclass[26] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2, 2,2,2,3 }; static integer kbmagn[26] = { 1,1,1,1,1,1,1,1,3,2,3,2,2,3,1,1,1,1,1,1,1,3, 2,3,2,1 }; static integer ktrian[26] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1, 1,1,1,1 }; static logical lasign[26] = { FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_, TRUE_,FALSE_,TRUE_,TRUE_,FALSE_,FALSE_,TRUE_,TRUE_,TRUE_,FALSE_, TRUE_,FALSE_,FALSE_,FALSE_,TRUE_,TRUE_,TRUE_,TRUE_,TRUE_,FALSE_ }; static logical lbsign[26] = { FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_, FALSE_,TRUE_,FALSE_,FALSE_,TRUE_,TRUE_,FALSE_,FALSE_,TRUE_,FALSE_, TRUE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_, FALSE_ }; static integer kz1[6] = { 0,1,2,1,3,3 }; static integer kz2[6] = { 0,0,1,2,1,1 }; static integer kadd[6] = { 0,0,0,0,3,2 }; static integer katype[26] = { 0,1,0,1,2,3,4,1,4,4,1,1,4,4,4,2,4,5,8,7,9,4, 4,4,4,0 }; static integer kbtype[26] = { 0,0,1,1,2,-3,1,4,1,1,4,4,1,1,-4,2,-4,8,8,8, 8,8,8,8,8,0 }; static integer kazero[26] = { 1,1,1,1,1,1,2,1,2,2,1,1,2,2,3,1,3,5,5,5,5,3, 3,3,3,1 }; static integer kbzero[26] = { 1,1,1,1,1,1,1,2,1,1,2,2,1,1,4,1,4,6,6,6,6,4, 4,4,4,1 }; static integer kamagn[26] = { 1,1,1,1,1,1,1,1,2,3,2,3,2,3,1,1,1,1,1,1,1,2, 3,3,2,1 }; /* Format strings */ static char fmt_9999[] = "(\002 CDRVGG: \002,a,\002 returned INFO=\002,i" "6,\002.\002,/9x,\002N=\002,i6,\002, JTYPE=\002,i6,\002, ISEED=" "(\002,3(i5,\002,\002),i5,\002)\002)"; static char fmt_9998[] = "(\002 CDRVGG: \002,a,\002 Eigenvectors from" " \002,a,\002 incorrectly \002,\002normalized.\002,/\002 Bits of " "error=\002,0p,g10.3,\002,\002,9x,\002N=\002,i6,\002, JTYPE=\002," "i6,\002, ISEED=(\002,3(i5,\002,\002),i5,\002)\002)"; static char fmt_9997[] = "(/1x,a3,\002 -- Complex Generalized eigenvalue" " problem driver\002)"; static char fmt_9996[] = "(\002 Matrix types (see CDRVGG for details):" " \002)"; static char fmt_9995[] = "(\002 Special Matrices:\002,23x,\002(J'=transp" "osed Jordan block)\002,/\002 1=(0,0) 2=(I,0) 3=(0,I) 4=(I,I" ") 5=(J',J') \002,\0026=(diag(J',I), diag(I,J'))\002,/\002 Diag" "onal Matrices: ( \002,\002D=diag(0,1,2,...) )\002,/\002 7=(D," "I) 9=(large*D, small*I\002,\002) 11=(large*I, small*D) 13=(l" "arge*D, large*I)\002,/\002 8=(I,D) 10=(small*D, large*I) 12=" "(small*I, large*D) \002,\002 14=(small*D, small*I)\002,/\002 15" "=(D, reversed D)\002)"; static char fmt_9994[] = "(\002 Matrices Rotated by Random \002,a,\002 M" "atrices U, V:\002,/\002 16=Transposed Jordan Blocks " " 19=geometric \002,\002alpha, beta=0,1\002,/\002 17=arithm. alp" "ha&beta \002,\002 20=arithmetic alpha, beta=0," "1\002,/\002 18=clustered \002,\002alpha, beta=0,1 21" "=random alpha, beta=0,1\002,/\002 Large & Small Matrices:\002," "/\002 22=(large, small) \002,\00223=(small,large) 24=(smal" "l,small) 25=(large,large)\002,/\002 26=random O(1) matrices" ".\002)"; static char fmt_9993[] = "(/\002 Tests performed: (S is Schur, T is tri" "angular, \002,\002Q and Z are \002,a,\002,\002,/20x,\002l and r " "are the appropriate left and right\002,/19x,\002eigenvectors, re" "sp., a is alpha, b is beta, and\002,/19x,a,\002 means \002,a," "\002.)\002,/\002 1 = | A - Q S Z\002,a,\002 | / ( |A| n ulp ) " " 2 = | B - Q T Z\002,a,\002 | / ( |B| n ulp )\002,/\002 3 = | " "I - QQ\002,a,\002 | / ( n ulp ) 4 = | I - ZZ\002,a" ",\002 | / ( n ulp )\002,/\002 5 = difference between (alpha,beta" ") and diagonals of\002,\002 (S,T)\002,/\002 6 = max | ( b A - a " "B )\002,a,\002 l | / const. 7 = max | ( b A - a B ) r | / cons" "t.\002,/1x)"; static char fmt_9992[] = "(\002 Matrix order=\002,i5,\002, type=\002,i2" ",\002, seed=\002,4(i4,\002,\002),\002 result \002,i3,\002 is\002" ",0p,f8.2)"; static char fmt_9991[] = "(\002 Matrix order=\002,i5,\002, type=\002,i2" ",\002, seed=\002,4(i4,\002,\002),\002 result \002,i3,\002 is\002" ",1p,e10.3)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, s_dim1, s_offset, s2_dim1, s2_offset, t_dim1, t_offset, t2_dim1, t2_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9, i__10, i__11; real r__1, r__2, r__3, r__4, r__5, r__6, r__7, r__8, r__9, r__10, r__11, r__12, r__13, r__14, r__15, r__16; complex q__1, q__2, q__3, q__4; /* Builtin functions */ double r_sign(real *, real *), c_abs(complex *); void r_cnjg(complex *, complex *); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); double r_imag(complex *); /* Local variables */ static integer iadd, nmax; static real temp1, temp2; static integer j, n; static logical badnn; extern /* Subroutine */ int cgegs_(char *, char *, integer *, complex *, integer *, complex *, integer *, complex *, complex *, complex *, integer *, complex *, integer *, complex *, integer *, real *, integer *), cgegv_(char *, char *, integer *, complex *, integer *, complex *, integer *, complex *, complex *, complex *, integer *, complex *, integer *, complex *, integer *, real *, integer *), cget51_(integer *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, complex *, real *, real *), cget52_(logical *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, complex *, complex *, complex *, real *, real *); static real dumma[4]; static integer iinfo; static real rmagn[4]; static complex ctemp; static integer nmats, jsize, nerrs, i1, jtype, ntest, n1; extern /* Subroutine */ int clatm4_(integer *, integer *, integer *, integer *, logical *, real *, real *, real *, integer *, integer * , complex *, integer *), cunm2r_(char *, char *, integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, integer *); static integer jc, nb; extern /* Subroutine */ int slabad_(real *, real *); static integer in, jr; extern /* Subroutine */ int clarfg_(integer *, complex *, complex *, integer *, complex *); static integer ns; extern /* Complex */ VOID clarnd_(complex *, integer *, integer *); extern doublereal slamch_(char *); extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex *, integer *, complex *, integer *); static real safmin, safmax; static integer ioldsd[4]; extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); extern /* Subroutine */ int alasvm_(char *, integer *, integer *, integer *, integer *), claset_(char *, integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *); static real ulpinv; static integer lwkopt, mtypes, ntestt, nbz; static real ulp; /* Fortran I/O blocks */ static cilist io___43 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___44 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___47 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___50 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___51 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___52 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___53 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___54 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___55 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___56 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___57 = { 0, 0, 0, fmt_9991, 0 }; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define q_subscr(a_1,a_2) (a_2)*q_dim1 + a_1 #define q_ref(a_1,a_2) q[q_subscr(a_1,a_2)] #define s_subscr(a_1,a_2) (a_2)*s_dim1 + a_1 #define s_ref(a_1,a_2) s[s_subscr(a_1,a_2)] #define t_subscr(a_1,a_2) (a_2)*t_dim1 + a_1 #define t_ref(a_1,a_2) t[t_subscr(a_1,a_2)] #define z___subscr(a_1,a_2) (a_2)*z_dim1 + a_1 #define z___ref(a_1,a_2) z__[z___subscr(a_1,a_2)] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CDRVGG checks the nonsymmetric generalized eigenvalue driver routines. T T T CGEGS factors A and B as Q S Z and Q T Z , where means transpose, T is upper triangular, S is in generalized Schur form (upper triangular), and Q and Z are unitary. It also computes the generalized eigenvalues (alpha(1),beta(1)), ..., (alpha(n),beta(n)), where alpha(j)=S(j,j) and beta(j)=T(j,j) -- thus, w(j) = alpha(j)/beta(j) is a root of the generalized eigenvalue problem det( A - w(j) B ) = 0 and m(j) = beta(j)/alpha(j) is a root of the essentially equivalent problem det( m(j) A - B ) = 0 CGEGV computes the generalized eigenvalues (alpha(1),beta(1)), ..., (alpha(n),beta(n)), the matrix L whose columns contain the generalized left eigenvectors l, and the matrix R whose columns contain the generalized right eigenvectors r for the pair (A,B). When CDRVGG is called, a number of matrix "sizes" ("n's") and a number of matrix "types" are specified. For each size ("n") and each type of matrix, one matrix will be generated and used to test the nonsymmetric eigenroutines. For each matrix, 7 tests will be performed and compared with the threshhold THRESH: Results from CGEGS: H (1) | A - Q S Z | / ( |A| n ulp ) H (2) | B - Q T Z | / ( |B| n ulp ) H (3) | I - QQ | / ( n ulp ) H (4) | I - ZZ | / ( n ulp ) (5) maximum over j of D(j) where: |alpha(j) - S(j,j)| |beta(j) - T(j,j)| D(j) = ------------------------ + ----------------------- max(|alpha(j)|,|S(j,j)|) max(|beta(j)|,|T(j,j)|) Results from CGEGV: (6) max over all left eigenvalue/-vector pairs (beta/alpha,l) of | l**H * (beta A - alpha B) | / ( ulp max( |beta A|, |alpha B| ) ) where l**H is the conjugate tranpose of l. (7) max over all right eigenvalue/-vector pairs (beta/alpha,r) of | (beta A - alpha B) r | / ( ulp max( |beta A|, |alpha B| ) ) Test Matrices ---- -------- The sizes of the test matrices are specified by an array NN(1:NSIZES); the value of each element NN(j) specifies one size. The "types" are specified by a logical array DOTYPE( 1:NTYPES ); if DOTYPE(j) is .TRUE., then matrix type "j" will be generated. Currently, the list of possible types is: (1) ( 0, 0 ) (a pair of zero matrices) (2) ( I, 0 ) (an identity and a zero matrix) (3) ( 0, I ) (an identity and a zero matrix) (4) ( I, I ) (a pair of identity matrices) t t (5) ( J , J ) (a pair of transposed Jordan blocks) t ( I 0 ) (6) ( X, Y ) where X = ( J 0 ) and Y = ( t ) ( 0 I ) ( 0 J ) and I is a k x k identity and J a (k+1)x(k+1) Jordan block; k=(N-1)/2 (7) ( D, I ) where D is diag( 0, 1,..., N-1 ) (a diagonal matrix with those diagonal entries.) (8) ( I, D ) (9) ( big*D, small*I ) where "big" is near overflow and small=1/big (10) ( small*D, big*I ) (11) ( big*I, small*D ) (12) ( small*I, big*D ) (13) ( big*D, big*I ) (14) ( small*D, small*I ) (15) ( D1, D2 ) where D1 is diag( 0, 0, 1, ..., N-3, 0 ) and D2 is diag( 0, N-3, N-4,..., 1, 0, 0 ) t t (16) Q ( J , J ) Z where Q and Z are random unitary matrices. (17) Q ( T1, T2 ) Z where T1 and T2 are upper triangular matrices with random O(1) entries above the diagonal and diagonal entries diag(T1) = ( 0, 0, 1, ..., N-3, 0 ) and diag(T2) = ( 0, N-3, N-4,..., 1, 0, 0 ) (18) Q ( T1, T2 ) Z diag(T1) = ( 0, 0, 1, 1, s, ..., s, 0 ) diag(T2) = ( 0, 1, 0, 1,..., 1, 0 ) s = machine precision. (19) Q ( T1, T2 ) Z diag(T1)=( 0,0,1,1, 1-d, ..., 1-(N-5)*d=s, 0 ) diag(T2) = ( 0, 1, 0, 1, ..., 1, 0 ) N-5 (20) Q ( T1, T2 ) Z diag(T1)=( 0, 0, 1, 1, a, ..., a =s, 0 ) diag(T2) = ( 0, 1, 0, 1, ..., 1, 0, 0 ) (21) Q ( T1, T2 ) Z diag(T1)=( 0, 0, 1, r1, r2, ..., r(N-4), 0 ) diag(T2) = ( 0, 1, 0, 1, ..., 1, 0, 0 ) where r1,..., r(N-4) are random. (22) Q ( big*T1, small*T2 ) Z diag(T1) = ( 0, 0, 1, ..., N-3, 0 ) diag(T2) = ( 0, 1, ..., 1, 0, 0 ) (23) Q ( small*T1, big*T2 ) Z diag(T1) = ( 0, 0, 1, ..., N-3, 0 ) diag(T2) = ( 0, 1, ..., 1, 0, 0 ) (24) Q ( small*T1, small*T2 ) Z diag(T1) = ( 0, 0, 1, ..., N-3, 0 ) diag(T2) = ( 0, 1, ..., 1, 0, 0 ) (25) Q ( big*T1, big*T2 ) Z diag(T1) = ( 0, 0, 1, ..., N-3, 0 ) diag(T2) = ( 0, 1, ..., 1, 0, 0 ) (26) Q ( T1, T2 ) Z where T1 and T2 are random upper-triangular matrices. Arguments ========= NSIZES (input) INTEGER The number of sizes of matrices to use. If it is zero, CDRVGG does nothing. It must be at least zero. NN (input) INTEGER array, dimension (NSIZES) An array containing the sizes to be used for the matrices. Zero values will be skipped. The values must be at least zero. NTYPES (input) INTEGER The number of elements in DOTYPE. If it is zero, CDRVGG does nothing. It must be at least zero. If it is MAXTYP+1 and NSIZES is 1, then an additional type, MAXTYP+1 is defined, which is to use whatever matrix is in A. This is only useful if DOTYPE(1:MAXTYP) is .FALSE. and DOTYPE(MAXTYP+1) is .TRUE. . DOTYPE (input) LOGICAL array, dimension (NTYPES) If DOTYPE(j) is .TRUE., then for each size in NN a matrix of that size and of type j will be generated. If NTYPES is smaller than the maximum number of types defined (PARAMETER MAXTYP), then types NTYPES+1 through MAXTYP will not be generated. If NTYPES is larger than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES) will be ignored. ISEED (input/output) INTEGER array, dimension (4) On entry ISEED specifies the seed of the random number generator. The array elements should be between 0 and 4095; if not they will be reduced mod 4096. Also, ISEED(4) must be odd. The random number generator uses a linear congruential sequence limited to small integers, and so should produce machine independent random numbers. The values of ISEED are changed on exit, and can be used in the next call to CDRVGG to continue the same random number sequence. THRESH (input) REAL A test will count as "failed" if the "error", computed as described above, exceeds THRESH. Note that the error is scaled to be O(1), so THRESH should be a reasonably small multiple of 1, e.g., 10 or 100. In particular, it should not depend on the precision (single vs. double) or the size of the matrix. It must be at least zero. THRSHN (input) REAL Threshhold for reporting eigenvector normalization error. If the normalization of any eigenvector differs from 1 by more than THRSHN*ulp, then a special error message will be printed. (This is handled separately from the other tests, since only a compiler or programming error should cause an error message, at least if THRSHN is at least 5--10.) NOUNIT (input) INTEGER The FORTRAN unit number for printing out error messages (e.g., if a routine returns IINFO not equal to 0.) A (input/workspace) COMPLEX array, dimension (LDA, max(NN)) Used to hold the original A matrix. Used as input only if NTYPES=MAXTYP+1, DOTYPE(1:MAXTYP)=.FALSE., and DOTYPE(MAXTYP+1)=.TRUE. LDA (input) INTEGER The leading dimension of A, B, S, T, S2, and T2. It must be at least 1 and at least max( NN ). B (input/workspace) COMPLEX array, dimension (LDA, max(NN)) Used to hold the original B matrix. Used as input only if NTYPES=MAXTYP+1, DOTYPE(1:MAXTYP)=.FALSE., and DOTYPE(MAXTYP+1)=.TRUE. S (workspace) COMPLEX array, dimension (LDA, max(NN)) The upper triangular matrix computed from A by CGEGS. T (workspace) COMPLEX array, dimension (LDA, max(NN)) The upper triangular matrix computed from B by CGEGS. S2 (workspace) COMPLEX array, dimension (LDA, max(NN)) The matrix computed from A by CGEGV. This will be the Schur (upper triangular) form of some matrix related to A, but will not, in general, be the same as S. T2 (workspace) COMPLEX array, dimension (LDA, max(NN)) The matrix computed from B by CGEGV. This will be the Schur form of some matrix related to B, but will not, in general, be the same as T. Q (workspace) COMPLEX array, dimension (LDQ, max(NN)) The (left) unitary matrix computed by CGEGS. LDQ (input) INTEGER The leading dimension of Q, Z, VL, and VR. It must be at least 1 and at least max( NN ). Z (workspace) COMPLEX array, dimension (LDQ, max(NN)) The (right) unitary matrix computed by CGEGS. ALPHA1 (workspace) COMPLEX array, dimension (max(NN)) BETA1 (workspace) COMPLEX array, dimension (max(NN)) The generalized eigenvalues of (A,B) computed by CGEGS. ALPHA1(k) / BETA1(k) is the k-th generalized eigenvalue of the matrices in A and B. ALPHA2 (workspace) COMPLEX array, dimension (max(NN)) BETA2 (workspace) COMPLEX array, dimension (max(NN)) The generalized eigenvalues of (A,B) computed by CGEGV. ALPHA2(k) / BETA2(k) is the k-th generalized eigenvalue of the matrices in A and B. VL (workspace) COMPLEX array, dimension (LDQ, max(NN)) The (lower triangular) left eigenvector matrix for the matrices in A and B. VR (workspace) COMPLEX array, dimension (LDQ, max(NN)) The (upper triangular) right eigenvector matrix for the matrices in A and B. WORK (workspace) COMPLEX array, dimension (LWORK) LWORK (input) INTEGER The number of entries in WORK. This must be at least MAX( 2*N, N*(NB+1), (k+1)*(2*k+N+1) ), where "k" is the sum of the blocksize and number-of-shifts for CHGEQZ, and NB is the greatest of the blocksizes for CGEQRF, CUNMQR, and CUNGQR. (The blocksizes and the number-of-shifts are retrieved through calls to ILAENV.) RWORK (workspace) REAL array, dimension (8*N) RESULT (output) REAL array, dimension (7) The values computed by the tests described above. The values are currently limited to 1/ulp, to avoid overflow. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. > 0: A routine returned an error code. INFO is the absolute value of the INFO value returned. ===================================================================== Parameter adjustments */ --nn; --dotype; --iseed; t2_dim1 = *lda; t2_offset = 1 + t2_dim1 * 1; t2 -= t2_offset; s2_dim1 = *lda; s2_offset = 1 + s2_dim1 * 1; s2 -= s2_offset; t_dim1 = *lda; t_offset = 1 + t_dim1 * 1; t -= t_offset; s_dim1 = *lda; s_offset = 1 + s_dim1 * 1; s -= s_offset; b_dim1 = *lda; b_offset = 1 + b_dim1 * 1; b -= b_offset; a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; vr_dim1 = *ldq; vr_offset = 1 + vr_dim1 * 1; vr -= vr_offset; vl_dim1 = *ldq; vl_offset = 1 + vl_dim1 * 1; vl -= vl_offset; z_dim1 = *ldq; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; --alpha1; --beta1; --alpha2; --beta2; --work; --rwork; --result; /* Function Body Check for errors */ *info = 0; badnn = FALSE_; nmax = 1; i__1 = *nsizes; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = nmax, i__3 = nn[j]; nmax = max(i__2,i__3); if (nn[j] < 0) { badnn = TRUE_; } /* L10: */ } /* Maximum blocksize and shift -- we assume that blocksize and number of shifts are monotone increasing functions of N. Computing MAX */ i__1 = 1, i__2 = ilaenv_(&c__1, "CGEQRF", " ", &nmax, &nmax, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1), i__1 = max(i__1,i__2), i__2 = ilaenv_(& c__1, "CUNMQR", "LC", &nmax, &nmax, &nmax, &c_n1, (ftnlen)6, ( ftnlen)2), i__1 = max(i__1,i__2), i__2 = ilaenv_(&c__1, "CUNGQR", " ", &nmax, &nmax, &nmax, &c_n1, (ftnlen)6, (ftnlen)1); nb = max(i__1,i__2); nbz = ilaenv_(&c__1, "CHGEQZ", "SII", &nmax, &c__1, &nmax, &c__0, (ftnlen) 6, (ftnlen)3); ns = ilaenv_(&c__4, "CHGEQZ", "SII", &nmax, &c__1, &nmax, &c__0, (ftnlen) 6, (ftnlen)3); i1 = nbz + ns; /* Computing MAX */ i__1 = nmax << 1, i__2 = nmax * (nb + 1), i__1 = max(i__1,i__2), i__2 = (( i1 << 1) + nmax + 1) * (i1 + 1); lwkopt = max(i__1,i__2); /* Check for errors */ if (*nsizes < 0) { *info = -1; } else if (badnn) { *info = -2; } else if (*ntypes < 0) { *info = -3; } else if (*thresh < 0.f) { *info = -6; } else if (*lda <= 1 || *lda < nmax) { *info = -10; } else if (*ldq <= 1 || *ldq < nmax) { *info = -19; } else if (lwkopt > *lwork) { *info = -30; } if (*info != 0) { i__1 = -(*info); xerbla_("CDRVGG", &i__1); return 0; } /* Quick return if possible */ if (*nsizes == 0 || *ntypes == 0) { return 0; } ulp = slamch_("Precision"); safmin = slamch_("Safe minimum"); safmin /= ulp; safmax = 1.f / safmin; slabad_(&safmin, &safmax); ulpinv = 1.f / ulp; /* The values RMAGN(2:3) depend on N, see below. */ rmagn[0] = 0.f; rmagn[1] = 1.f; /* Loop over sizes, types */ ntestt = 0; nerrs = 0; nmats = 0; i__1 = *nsizes; for (jsize = 1; jsize <= i__1; ++jsize) { n = nn[jsize]; n1 = max(1,n); rmagn[2] = safmax * ulp / (real) n1; rmagn[3] = safmin * ulpinv * n1; if (*nsizes != 1) { mtypes = min(26,*ntypes); } else { mtypes = min(27,*ntypes); } i__2 = mtypes; for (jtype = 1; jtype <= i__2; ++jtype) { if (! dotype[jtype]) { goto L150; } ++nmats; ntest = 0; /* Save ISEED in case of an error. */ for (j = 1; j <= 4; ++j) { ioldsd[j - 1] = iseed[j]; /* L20: */ } /* Initialize RESULT */ for (j = 1; j <= 7; ++j) { result[j] = 0.f; /* L30: */ } /* Compute A and B Description of control parameters: KCLASS: =1 means w/o rotation, =2 means w/ rotation, =3 means random. KATYPE: the "type" to be passed to CLATM4 for computing A. KAZERO: the pattern of zeros on the diagonal for A: =1: ( xxx ), =2: (0, xxx ) =3: ( 0, 0, xxx, 0 ), =4: ( 0, xxx, 0, 0 ), =5: ( 0, 0, 1, xxx, 0 ), =6: ( 0, 1, 0, xxx, 0 ). (xxx means a string of non-zero entries.) KAMAGN: the magnitude of the matrix: =0: zero, =1: O(1), =2: large, =3: small. LASIGN: .TRUE. if the diagonal elements of A are to be multiplied by a random magnitude 1 number. KBTYPE, KBZERO, KBMAGN, IBSIGN: the same, but for B. KTRIAN: =0: don't fill in the upper triangle, =1: do. KZ1, KZ2, KADD: used to implement KAZERO and KBZERO. RMAGN: used to implement KAMAGN and KBMAGN. */ if (mtypes > 26) { goto L110; } iinfo = 0; if (kclass[jtype - 1] < 3) { /* Generate A (w/o rotation) */ if ((i__3 = katype[jtype - 1], abs(i__3)) == 3) { in = ((n - 1) / 2 << 1) + 1; if (in != n) { claset_("Full", &n, &n, &c_b1, &c_b1, &a[a_offset], lda); } } else { in = n; } clatm4_(&katype[jtype - 1], &in, &kz1[kazero[jtype - 1] - 1], &kz2[kazero[jtype - 1] - 1], &lasign[jtype - 1], & rmagn[kamagn[jtype - 1]], &ulp, &rmagn[ktrian[jtype - 1] * kamagn[jtype - 1]], &c__2, &iseed[1], &a[ a_offset], lda); iadd = kadd[kazero[jtype - 1] - 1]; if (iadd > 0 && iadd <= n) { i__3 = a_subscr(iadd, iadd); i__4 = kamagn[jtype - 1]; a[i__3].r = rmagn[i__4], a[i__3].i = 0.f; } /* Generate B (w/o rotation) */ if ((i__3 = kbtype[jtype - 1], abs(i__3)) == 3) { in = ((n - 1) / 2 << 1) + 1; if (in != n) { claset_("Full", &n, &n, &c_b1, &c_b1, &b[b_offset], lda); } } else { in = n; } clatm4_(&kbtype[jtype - 1], &in, &kz1[kbzero[jtype - 1] - 1], &kz2[kbzero[jtype - 1] - 1], &lbsign[jtype - 1], & rmagn[kbmagn[jtype - 1]], &c_b39, &rmagn[ktrian[jtype - 1] * kbmagn[jtype - 1]], &c__2, &iseed[1], &b[ b_offset], lda); iadd = kadd[kbzero[jtype - 1] - 1]; if (iadd != 0 && iadd <= n) { i__3 = b_subscr(iadd, iadd); i__4 = kbmagn[jtype - 1]; b[i__3].r = rmagn[i__4], b[i__3].i = 0.f; } if (kclass[jtype - 1] == 2 && n > 0) { /* Include rotations Generate Q, Z as Householder transformations times a diagonal matrix. */ i__3 = n - 1; for (jc = 1; jc <= i__3; ++jc) { i__4 = n; for (jr = jc; jr <= i__4; ++jr) { i__5 = q_subscr(jr, jc); clarnd_(&q__1, &c__3, &iseed[1]); q[i__5].r = q__1.r, q[i__5].i = q__1.i; i__5 = z___subscr(jr, jc); clarnd_(&q__1, &c__3, &iseed[1]); z__[i__5].r = q__1.r, z__[i__5].i = q__1.i; /* L40: */ } i__4 = n + 1 - jc; clarfg_(&i__4, &q_ref(jc, jc), &q_ref(jc + 1, jc), & c__1, &work[jc]); i__4 = (n << 1) + jc; i__5 = q_subscr(jc, jc); r__2 = q[i__5].r; r__1 = r_sign(&c_b39, &r__2); work[i__4].r = r__1, work[i__4].i = 0.f; i__4 = q_subscr(jc, jc); q[i__4].r = 1.f, q[i__4].i = 0.f; i__4 = n + 1 - jc; clarfg_(&i__4, &z___ref(jc, jc), &z___ref(jc + 1, jc), &c__1, &work[n + jc]); i__4 = n * 3 + jc; i__5 = z___subscr(jc, jc); r__2 = z__[i__5].r; r__1 = r_sign(&c_b39, &r__2); work[i__4].r = r__1, work[i__4].i = 0.f; i__4 = z___subscr(jc, jc); z__[i__4].r = 1.f, z__[i__4].i = 0.f; /* L50: */ } clarnd_(&q__1, &c__3, &iseed[1]); ctemp.r = q__1.r, ctemp.i = q__1.i; i__3 = q_subscr(n, n); q[i__3].r = 1.f, q[i__3].i = 0.f; i__3 = n; work[i__3].r = 0.f, work[i__3].i = 0.f; i__3 = n * 3; r__1 = c_abs(&ctemp); q__1.r = ctemp.r / r__1, q__1.i = ctemp.i / r__1; work[i__3].r = q__1.r, work[i__3].i = q__1.i; clarnd_(&q__1, &c__3, &iseed[1]); ctemp.r = q__1.r, ctemp.i = q__1.i; i__3 = z___subscr(n, n); z__[i__3].r = 1.f, z__[i__3].i = 0.f; i__3 = n << 1; work[i__3].r = 0.f, work[i__3].i = 0.f; i__3 = n << 2; r__1 = c_abs(&ctemp); q__1.r = ctemp.r / r__1, q__1.i = ctemp.i / r__1; work[i__3].r = q__1.r, work[i__3].i = q__1.i; /* Apply the diagonal matrices */ i__3 = n; for (jc = 1; jc <= i__3; ++jc) { i__4 = n; for (jr = 1; jr <= i__4; ++jr) { i__5 = a_subscr(jr, jc); i__6 = (n << 1) + jr; r_cnjg(&q__3, &work[n * 3 + jc]); q__2.r = work[i__6].r * q__3.r - work[i__6].i * q__3.i, q__2.i = work[i__6].r * q__3.i + work[i__6].i * q__3.r; i__7 = a_subscr(jr, jc); q__1.r = q__2.r * a[i__7].r - q__2.i * a[i__7].i, q__1.i = q__2.r * a[i__7].i + q__2.i * a[ i__7].r; a[i__5].r = q__1.r, a[i__5].i = q__1.i; i__5 = b_subscr(jr, jc); i__6 = (n << 1) + jr; r_cnjg(&q__3, &work[n * 3 + jc]); q__2.r = work[i__6].r * q__3.r - work[i__6].i * q__3.i, q__2.i = work[i__6].r * q__3.i + work[i__6].i * q__3.r; i__7 = b_subscr(jr, jc); q__1.r = q__2.r * b[i__7].r - q__2.i * b[i__7].i, q__1.i = q__2.r * b[i__7].i + q__2.i * b[ i__7].r; b[i__5].r = q__1.r, b[i__5].i = q__1.i; /* L60: */ } /* L70: */ } i__3 = n - 1; cunm2r_("L", "N", &n, &n, &i__3, &q[q_offset], ldq, &work[ 1], &a[a_offset], lda, &work[(n << 1) + 1], & iinfo); if (iinfo != 0) { goto L100; } i__3 = n - 1; cunm2r_("R", "C", &n, &n, &i__3, &z__[z_offset], ldq, & work[n + 1], &a[a_offset], lda, &work[(n << 1) + 1], &iinfo); if (iinfo != 0) { goto L100; } i__3 = n - 1; cunm2r_("L", "N", &n, &n, &i__3, &q[q_offset], ldq, &work[ 1], &b[b_offset], lda, &work[(n << 1) + 1], & iinfo); if (iinfo != 0) { goto L100; } i__3 = n - 1; cunm2r_("R", "C", &n, &n, &i__3, &z__[z_offset], ldq, & work[n + 1], &b[b_offset], lda, &work[(n << 1) + 1], &iinfo); if (iinfo != 0) { goto L100; } } } else { /* Random matrices */ i__3 = n; for (jc = 1; jc <= i__3; ++jc) { i__4 = n; for (jr = 1; jr <= i__4; ++jr) { i__5 = a_subscr(jr, jc); i__6 = kamagn[jtype - 1]; clarnd_(&q__2, &c__4, &iseed[1]); q__1.r = rmagn[i__6] * q__2.r, q__1.i = rmagn[i__6] * q__2.i; a[i__5].r = q__1.r, a[i__5].i = q__1.i; i__5 = b_subscr(jr, jc); i__6 = kbmagn[jtype - 1]; clarnd_(&q__2, &c__4, &iseed[1]); q__1.r = rmagn[i__6] * q__2.r, q__1.i = rmagn[i__6] * q__2.i; b[i__5].r = q__1.r, b[i__5].i = q__1.i; /* L80: */ } /* L90: */ } } L100: if (iinfo != 0) { io___43.ciunit = *nounit; s_wsfe(&io___43); do_fio(&c__1, "Generator", (ftnlen)9); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(iinfo); return 0; } L110: /* Call CGEGS to compute H, T, Q, Z, alpha, and beta. */ clacpy_(" ", &n, &n, &a[a_offset], lda, &s[s_offset], lda); clacpy_(" ", &n, &n, &b[b_offset], lda, &t[t_offset], lda); ntest = 1; result[1] = ulpinv; cgegs_("V", "V", &n, &s[s_offset], lda, &t[t_offset], lda, & alpha1[1], &beta1[1], &q[q_offset], ldq, &z__[z_offset], ldq, &work[1], lwork, &rwork[1], &iinfo); if (iinfo != 0) { io___44.ciunit = *nounit; s_wsfe(&io___44); do_fio(&c__1, "CGEGS", (ftnlen)5); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(iinfo); goto L130; } ntest = 4; /* Do tests 1--4 */ cget51_(&c__1, &n, &a[a_offset], lda, &s[s_offset], lda, &q[ q_offset], ldq, &z__[z_offset], ldq, &work[1], &rwork[1], &result[1]); cget51_(&c__1, &n, &b[b_offset], lda, &t[t_offset], lda, &q[ q_offset], ldq, &z__[z_offset], ldq, &work[1], &rwork[1], &result[2]); cget51_(&c__3, &n, &b[b_offset], lda, &t[t_offset], lda, &q[ q_offset], ldq, &q[q_offset], ldq, &work[1], &rwork[1], & result[3]); cget51_(&c__3, &n, &b[b_offset], lda, &t[t_offset], lda, &z__[ z_offset], ldq, &z__[z_offset], ldq, &work[1], &rwork[1], &result[4]); /* Do test 5: compare eigenvalues with diagonals. */ temp1 = 0.f; i__3 = n; for (j = 1; j <= i__3; ++j) { i__4 = j; i__5 = s_subscr(j, j); q__2.r = alpha1[i__4].r - s[i__5].r, q__2.i = alpha1[i__4].i - s[i__5].i; q__1.r = q__2.r, q__1.i = q__2.i; i__6 = j; i__7 = t_subscr(j, j); q__4.r = beta1[i__6].r - t[i__7].r, q__4.i = beta1[i__6].i - t[i__7].i; q__3.r = q__4.r, q__3.i = q__4.i; /* Computing MAX */ i__8 = j; i__9 = s_subscr(j, j); r__13 = safmin, r__14 = (r__1 = alpha1[i__8].r, dabs(r__1)) + (r__2 = r_imag(&alpha1[j]), dabs(r__2)), r__13 = max( r__13,r__14), r__14 = (r__3 = s[i__9].r, dabs(r__3)) + (r__4 = r_imag(&s_ref(j, j)), dabs(r__4)); /* Computing MAX */ i__10 = j; i__11 = t_subscr(j, j); r__15 = safmin, r__16 = (r__5 = beta1[i__10].r, dabs(r__5)) + (r__6 = r_imag(&beta1[j]), dabs(r__6)), r__15 = max( r__15,r__16), r__16 = (r__7 = t[i__11].r, dabs(r__7)) + (r__8 = r_imag(&t_ref(j, j)), dabs(r__8)); temp2 = (((r__9 = q__1.r, dabs(r__9)) + (r__10 = r_imag(&q__1) , dabs(r__10))) / dmax(r__13,r__14) + ((r__11 = q__3.r, dabs(r__11)) + (r__12 = r_imag(&q__3), dabs( r__12))) / dmax(r__15,r__16)) / ulp; temp1 = dmax(temp1,temp2); /* L120: */ } result[5] = temp1; /* Call CGEGV to compute S2, T2, VL, and VR, do tests. Eigenvalues and Eigenvectors */ clacpy_(" ", &n, &n, &a[a_offset], lda, &s2[s2_offset], lda); clacpy_(" ", &n, &n, &b[b_offset], lda, &t2[t2_offset], lda); ntest = 6; result[6] = ulpinv; cgegv_("V", "V", &n, &s2[s2_offset], lda, &t2[t2_offset], lda, & alpha2[1], &beta2[1], &vl[vl_offset], ldq, &vr[vr_offset], ldq, &work[1], lwork, &rwork[1], &iinfo); if (iinfo != 0) { io___47.ciunit = *nounit; s_wsfe(&io___47); do_fio(&c__1, "CGEGV", (ftnlen)5); do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); *info = abs(iinfo); goto L130; } ntest = 7; /* Do Tests 6 and 7 */ cget52_(&c_true, &n, &a[a_offset], lda, &b[b_offset], lda, &vl[ vl_offset], ldq, &alpha2[1], &beta2[1], &work[1], &rwork[ 1], dumma); result[6] = dumma[0]; if (dumma[1] > *thrshn) { io___49.ciunit = *nounit; s_wsfe(&io___49); do_fio(&c__1, "Left", (ftnlen)4); do_fio(&c__1, "CGEGV", (ftnlen)5); do_fio(&c__1, (char *)&dumma[1], (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); } cget52_(&c_false, &n, &a[a_offset], lda, &b[b_offset], lda, &vr[ vr_offset], ldq, &alpha2[1], &beta2[1], &work[1], &rwork[ 1], dumma); result[7] = dumma[0]; if (dumma[1] > *thresh) { io___50.ciunit = *nounit; s_wsfe(&io___50); do_fio(&c__1, "Right", (ftnlen)5); do_fio(&c__1, "CGEGV", (ftnlen)5); do_fio(&c__1, (char *)&dumma[1], (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)); do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer)); e_wsfe(); } /* End of Loop -- Check for RESULT(j) > THRESH */ L130: ntestt += ntest; /* Print out tests which fail. */ i__3 = ntest; for (jr = 1; jr <= i__3; ++jr) { if (result[jr] >= *thresh) { /* If this is the first test to fail, print a header to the data file. */ if (nerrs == 0) { io___51.ciunit = *nounit; s_wsfe(&io___51); do_fio(&c__1, "CGG", (ftnlen)3); e_wsfe(); /* Matrix types */ io___52.ciunit = *nounit; s_wsfe(&io___52); e_wsfe(); io___53.ciunit = *nounit; s_wsfe(&io___53); e_wsfe(); io___54.ciunit = *nounit; s_wsfe(&io___54); do_fio(&c__1, "Unitary", (ftnlen)7); e_wsfe(); /* Tests performed */ io___55.ciunit = *nounit; s_wsfe(&io___55); do_fio(&c__1, "unitary", (ftnlen)7); do_fio(&c__1, "*", (ftnlen)1); do_fio(&c__1, "conjugate transpose", (ftnlen)19); for (j = 1; j <= 5; ++j) { do_fio(&c__1, "*", (ftnlen)1); } e_wsfe(); } ++nerrs; if (result[jr] < 1e4f) { io___56.ciunit = *nounit; s_wsfe(&io___56); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)) ; do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&jr, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&result[jr], (ftnlen)sizeof( real)); e_wsfe(); } else { io___57.ciunit = *nounit; s_wsfe(&io___57); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer)) ; do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&jr, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&result[jr], (ftnlen)sizeof( real)); e_wsfe(); } } /* L140: */ } L150: ; } /* L160: */ } /* Summary */ alasvm_("CGG", nounit, &nerrs, &ntestt, &c__0); return 0; /* End of CDRVGG */ } /* cdrvgg_ */
/* Subroutine */ int dtrexc_(char *compq, integer *n, doublereal *t, integer * ldt, doublereal *q, integer *ldq, integer *ifst, integer *ilst, doublereal *work, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= DTREXC reorders the real Schur factorization of a real matrix A = Q*T*Q**T, so that the diagonal block of T with row index IFST is moved to row ILST. The real Schur form T is reordered by an orthogonal similarity transformation Z**T*T*Z, and optionally the matrix Q of Schur vectors is updated by postmultiplying it with Z. T must be in Schur canonical form (as returned by DHSEQR), that is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block has its diagonal elements equal and its off-diagonal elements of opposite sign. Arguments ========= COMPQ (input) CHARACTER*1 = 'V': update the matrix Q of Schur vectors; = 'N': do not update Q. N (input) INTEGER The order of the matrix T. N >= 0. T (input/output) DOUBLE PRECISION array, dimension (LDT,N) On entry, the upper quasi-triangular matrix T, in Schur Schur canonical form. On exit, the reordered upper quasi-triangular matrix, again in Schur canonical form. LDT (input) INTEGER The leading dimension of the array T. LDT >= max(1,N). Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) On entry, if COMPQ = 'V', the matrix Q of Schur vectors. On exit, if COMPQ = 'V', Q has been postmultiplied by the orthogonal transformation matrix Z which reorders T. If COMPQ = 'N', Q is not referenced. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= max(1,N). IFST (input/output) INTEGER ILST (input/output) INTEGER Specify the reordering of the diagonal blocks of T. The block with row index IFST is moved to row ILST, by a sequence of transpositions between adjacent blocks. On exit, if IFST pointed on entry to the second row of a 2-by-2 block, it is changed to point to the first row; ILST always points to the first row of the block in its final position (which may differ from its input value by +1 or -1). 1 <= IFST <= N; 1 <= ILST <= N. WORK (workspace) DOUBLE PRECISION array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value = 1: two adjacent blocks were too close to swap (the problem is very ill-conditioned); T may have been partially reordered, and ILST points to the first row of the current position of the block being moved. ===================================================================== Decode and test the input arguments. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c__2 = 2; /* System generated locals */ integer q_dim1, q_offset, t_dim1, t_offset, i__1; /* Local variables */ static integer here; extern logical lsame_(char *, char *); static logical wantq; extern /* Subroutine */ int dlaexc_(logical *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *, doublereal *, integer *), xerbla_(char *, integer *); static integer nbnext, nbf, nbl; #define t_ref(a_1,a_2) t[(a_2)*t_dim1 + a_1] t_dim1 = *ldt; t_offset = 1 + t_dim1 * 1; t -= t_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; --work; /* Function Body */ *info = 0; wantq = lsame_(compq, "V"); if (! wantq && ! lsame_(compq, "N")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*ldt < max(1,*n)) { *info = -4; } else if (*ldq < 1 || wantq && *ldq < max(1,*n)) { *info = -6; } else if (*ifst < 1 || *ifst > *n) { *info = -7; } else if (*ilst < 1 || *ilst > *n) { *info = -8; } if (*info != 0) { i__1 = -(*info); xerbla_("DTREXC", &i__1); return 0; } /* Quick return if possible */ if (*n <= 1) { return 0; } /* Determine the first row of specified block and find out it is 1 by 1 or 2 by 2. */ if (*ifst > 1) { if (t_ref(*ifst, *ifst - 1) != 0.) { --(*ifst); } } nbf = 1; if (*ifst < *n) { if (t_ref(*ifst + 1, *ifst) != 0.) { nbf = 2; } } /* Determine the first row of the final block and find out it is 1 by 1 or 2 by 2. */ if (*ilst > 1) { if (t_ref(*ilst, *ilst - 1) != 0.) { --(*ilst); } } nbl = 1; if (*ilst < *n) { if (t_ref(*ilst + 1, *ilst) != 0.) { nbl = 2; } } if (*ifst == *ilst) { return 0; } if (*ifst < *ilst) { /* Update ILST */ if (nbf == 2 && nbl == 1) { --(*ilst); } if (nbf == 1 && nbl == 2) { ++(*ilst); } here = *ifst; L10: /* Swap block with next one below */ if (nbf == 1 || nbf == 2) { /* Current block either 1 by 1 or 2 by 2 */ nbnext = 1; if (here + nbf + 1 <= *n) { if (t_ref(here + nbf + 1, here + nbf) != 0.) { nbnext = 2; } } dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, & nbf, &nbnext, &work[1], info); if (*info != 0) { *ilst = here; return 0; } here += nbnext; /* Test if 2 by 2 block breaks into two 1 by 1 blocks */ if (nbf == 2) { if (t_ref(here + 1, here) == 0.) { nbf = 3; } } } else { /* Current block consists of two 1 by 1 blocks each of which must be swapped individually */ nbnext = 1; if (here + 3 <= *n) { if (t_ref(here + 3, here + 2) != 0.) { nbnext = 2; } } i__1 = here + 1; dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, & c__1, &nbnext, &work[1], info); if (*info != 0) { *ilst = here; return 0; } if (nbnext == 1) { /* Swap two 1 by 1 blocks, no problems possible */ dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & here, &c__1, &nbnext, &work[1], info); ++here; } else { /* Recompute NBNEXT in case 2 by 2 split */ if (t_ref(here + 2, here + 1) == 0.) { nbnext = 1; } if (nbnext == 2) { /* 2 by 2 Block did not split */ dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & here, &c__1, &nbnext, &work[1], info); if (*info != 0) { *ilst = here; return 0; } here += 2; } else { /* 2 by 2 Block did split */ dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & here, &c__1, &c__1, &work[1], info); i__1 = here + 1; dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & i__1, &c__1, &c__1, &work[1], info); here += 2; } } } if (here < *ilst) { goto L10; } } else { here = *ifst; L20: /* Swap block with next one above */ if (nbf == 1 || nbf == 2) { /* Current block either 1 by 1 or 2 by 2 */ nbnext = 1; if (here >= 3) { if (t_ref(here - 1, here - 2) != 0.) { nbnext = 2; } } i__1 = here - nbnext; dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, & nbnext, &nbf, &work[1], info); if (*info != 0) { *ilst = here; return 0; } here -= nbnext; /* Test if 2 by 2 block breaks into two 1 by 1 blocks */ if (nbf == 2) { if (t_ref(here + 1, here) == 0.) { nbf = 3; } } } else { /* Current block consists of two 1 by 1 blocks each of which must be swapped individually */ nbnext = 1; if (here >= 3) { if (t_ref(here - 1, here - 2) != 0.) { nbnext = 2; } } i__1 = here - nbnext; dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, & nbnext, &c__1, &work[1], info); if (*info != 0) { *ilst = here; return 0; } if (nbnext == 1) { /* Swap two 1 by 1 blocks, no problems possible */ dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & here, &nbnext, &c__1, &work[1], info); --here; } else { /* Recompute NBNEXT in case 2 by 2 split */ if (t_ref(here, here - 1) == 0.) { nbnext = 1; } if (nbnext == 2) { /* 2 by 2 Block did not split */ i__1 = here - 1; dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & i__1, &c__2, &c__1, &work[1], info); if (*info != 0) { *ilst = here; return 0; } here += -2; } else { /* 2 by 2 Block did split */ dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & here, &c__1, &c__1, &work[1], info); i__1 = here - 1; dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, & i__1, &c__1, &c__1, &work[1], info); here += -2; } } } if (here > *ilst) { goto L20; } } *ilst = here; return 0; /* End of DTREXC */ } /* dtrexc_ */
/* Subroutine */ int dlarft_(char *direct, char *storev, integer *n, integer * k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, integer *ldt) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= DLARFT forms the triangular factor T of a real block reflector H of order n, which is defined as a product of k elementary reflectors. If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. If STOREV = 'C', the vector which defines the elementary reflector H(i) is stored in the i-th column of the array V, and H = I - V * T * V' If STOREV = 'R', the vector which defines the elementary reflector H(i) is stored in the i-th row of the array V, and H = I - V' * T * V Arguments ========= DIRECT (input) CHARACTER*1 Specifies the order in which the elementary reflectors are multiplied to form the block reflector: = 'F': H = H(1) H(2) . . . H(k) (Forward) = 'B': H = H(k) . . . H(2) H(1) (Backward) STOREV (input) CHARACTER*1 Specifies how the vectors which define the elementary reflectors are stored (see also Further Details): = 'C': columnwise = 'R': rowwise N (input) INTEGER The order of the block reflector H. N >= 0. K (input) INTEGER The order of the triangular factor T (= the number of elementary reflectors). K >= 1. V (input/output) DOUBLE PRECISION array, dimension (LDV,K) if STOREV = 'C' (LDV,N) if STOREV = 'R' The matrix V. See further details. LDV (input) INTEGER The leading dimension of the array V. If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. TAU (input) DOUBLE PRECISION array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i). T (output) DOUBLE PRECISION array, dimension (LDT,K) The k by k triangular factor T of the block reflector. If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is lower triangular. The rest of the array is not used. LDT (input) INTEGER The leading dimension of the array T. LDT >= K. Further Details =============== The shape of the matrix V and the storage of the vectors which define the H(i) is best illustrated by the following example with n = 5 and k = 3. The elements equal to 1 are not stored; the corresponding array elements are modified but restored on exit. The rest of the array is not used. DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) ( v1 1 ) ( 1 v2 v2 v2 ) ( v1 v2 1 ) ( 1 v3 v3 ) ( v1 v2 v3 ) ( v1 v2 v3 ) DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': V = ( v1 v2 v3 ) V = ( v1 v1 1 ) ( v1 v2 v3 ) ( v2 v2 v2 1 ) ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) ( 1 v3 ) ( 1 ) ===================================================================== Quick return if possible Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static doublereal c_b8 = 0.; /* System generated locals */ integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3; doublereal d__1; /* Local variables */ static integer i__, j; extern logical lsame_(char *, char *); extern /* Subroutine */ int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *); static doublereal vii; #define t_ref(a_1,a_2) t[(a_2)*t_dim1 + a_1] #define v_ref(a_1,a_2) v[(a_2)*v_dim1 + a_1] v_dim1 = *ldv; v_offset = 1 + v_dim1 * 1; v -= v_offset; --tau; t_dim1 = *ldt; t_offset = 1 + t_dim1 * 1; t -= t_offset; /* Function Body */ if (*n == 0) { return 0; } if (lsame_(direct, "F")) { i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { if (tau[i__] == 0.) { /* H(i) = I */ i__2 = i__; for (j = 1; j <= i__2; ++j) { t_ref(j, i__) = 0.; /* L10: */ } } else { /* general case */ vii = v_ref(i__, i__); v_ref(i__, i__) = 1.; if (lsame_(storev, "C")) { /* T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */ i__2 = *n - i__ + 1; i__3 = i__ - 1; d__1 = -tau[i__]; dgemv_("Transpose", &i__2, &i__3, &d__1, &v_ref(i__, 1), ldv, &v_ref(i__, i__), &c__1, &c_b8, &t_ref(1, i__), &c__1); } else { /* T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */ i__2 = i__ - 1; i__3 = *n - i__ + 1; d__1 = -tau[i__]; dgemv_("No transpose", &i__2, &i__3, &d__1, &v_ref(1, i__) , ldv, &v_ref(i__, i__), ldv, &c_b8, &t_ref(1, i__), &c__1); } v_ref(i__, i__) = vii; /* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ i__2 = i__ - 1; dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[ t_offset], ldt, &t_ref(1, i__), &c__1); t_ref(i__, i__) = tau[i__]; } /* L20: */ } } else { for (i__ = *k; i__ >= 1; --i__) { if (tau[i__] == 0.) { /* H(i) = I */ i__1 = *k; for (j = i__; j <= i__1; ++j) { t_ref(j, i__) = 0.; /* L30: */ } } else { /* general case */ if (i__ < *k) { if (lsame_(storev, "C")) { vii = v_ref(*n - *k + i__, i__); v_ref(*n - *k + i__, i__) = 1.; /* T(i+1:k,i) := - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i) */ i__1 = *n - *k + i__; i__2 = *k - i__; d__1 = -tau[i__]; dgemv_("Transpose", &i__1, &i__2, &d__1, &v_ref(1, i__ + 1), ldv, &v_ref(1, i__), &c__1, &c_b8, & t_ref(i__ + 1, i__), &c__1); v_ref(*n - *k + i__, i__) = vii; } else { vii = v_ref(i__, *n - *k + i__); v_ref(i__, *n - *k + i__) = 1.; /* T(i+1:k,i) := - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)' */ i__1 = *k - i__; i__2 = *n - *k + i__; d__1 = -tau[i__]; dgemv_("No transpose", &i__1, &i__2, &d__1, &v_ref( i__ + 1, 1), ldv, &v_ref(i__, 1), ldv, &c_b8, &t_ref(i__ + 1, i__), &c__1); v_ref(i__, *n - *k + i__) = vii; } /* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */ i__1 = *k - i__; dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t_ref( i__ + 1, i__ + 1), ldt, &t_ref(i__ + 1, i__), & c__1); } t_ref(i__, i__) = tau[i__]; } /* L40: */ } } return 0; /* End of DLARFT */ } /* dlarft_ */
void escapeNoiseNeuron::excite ( baseType c ) { // baseType phase = 1.0 + pcoBase::exactTime - this->x[0]; if ( ( pcoBase::nextFiring() - this->time >= timeDelay() ) && ( this->time - lastFiring >t_ref() -timeDelay() ) ) // nicht mehr refraktär und noch nicht feuernd { pot = pot + c; baseType newKey = this->time + timeDelay() +gslNoise::getExponential ( exp ( - escapeNoise_a() * pot+ escapeNoise_b() ) ); // pcoBase::decreaseKey(); eventHandler::updateKey ( _fire_, newKey ); } }
template<typename Ty> magma_int_t magma_unmqr_gpu( magma_side_t side, magma_trans_t trans, magma_int_t m, magma_int_t n, magma_int_t k, cl_mem dA, size_t dA_offset, magma_int_t ldda, Ty *tau, cl_mem dC, size_t dC_offset, magma_int_t lddc, Ty *hwork, magma_int_t lwork, cl_mem dT, size_t dT_offset, magma_int_t nb, magma_queue_t queue, magma_int_t *info) { /* -- clMAGMA (version 0.1) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver @date Purpose ======= ZUNMQR_GPU overwrites the general complex M-by-N matrix C with SIDE = 'L' SIDE = 'R' TRANS = 'N': Q * C C * Q TRANS = 'T': Q**H * C C * Q**H where Q is a complex orthogonal matrix defined as the product of k elementary reflectors Q = H(1) H(2) . . . H(k) as returned by ZGEQRF. Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. Arguments ========= SIDE (input) CHARACTER*1 = 'L': apply Q or Q**H from the Left; = 'R': apply Q or Q**H from the Right. TRANS (input) CHARACTER*1 = 'N': No transpose, apply Q; = 'T': Transpose, apply Q**H. M (input) INTEGER The number of rows of the matrix C. M >= 0. N (input) INTEGER The number of columns of the matrix C. N >= 0. K (input) INTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = 'L', M >= K >= 0; if SIDE = 'R', N >= K >= 0. DA (input) COMPLEX_16 array on the GPU, dimension (LDDA,K) The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by ZGEQRF in the first k columns of its array argument DA. DA is modified by the routine but restored on exit. LDDA (input) INTEGER The leading dimension of the array DA. If SIDE = 'L', LDDA >= max(1,M); if SIDE = 'R', LDDA >= max(1,N). TAU (input) COMPLEX_16 array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by ZGEQRF. DC (input/output) COMPLEX_16 array on the GPU, dimension (LDDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q**H * C or C * Q**H or C*Q. LDDC (input) INTEGER The leading dimension of the array DC. LDDC >= max(1,M). HWORK (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, HWORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array HWORK. LWORK >= (M-K+NB)*(N+2*NB) if SIDE = 'L', and LWORK >= (N-K+NB)*(M+2*NB) if SIDE = 'R', where NB is the optimal blocksize. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the HWORK array, returns this value as the first entry of the HWORK array, and no error message related to LWORK is issued by XERBLA. DT (input) COMPLEX_16 array on the GPU that is the output (the 9th argument) of magma_zgeqrf_gpu. NB (input) INTEGER This is the blocking size that was used in pre-computing DT, e.g., the blocking size used in magma_zgeqrf_gpu. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== */ #define a_ref(a_1,a_2) dA, (dA_offset+(a_1)+(a_2)*(ldda)) #define c_ref(a_1,a_2) dC, (dC_offset+(a_1)+(a_2)*(lddc)) #define t_ref(a_1) dT, (dT_offset+(a_1)*nb) static const Ty c_one = magma_one<Ty>(); cl_mem dwork; magma_int_t i; magma_int_t i1, i2, step, ib, ic, jc, ma, mi, ni, nq, nw, ret; int left, notran, lquery; magma_int_t lwkopt; *info = 0; left = (side == MagmaLeft); notran = (trans == MagmaNoTrans); lquery = (lwork == -1); /* NQ is the order of Q and NW is the minimum dimension of WORK */ if (left) { nq = m; nw = n; } else { nq = n; nw = m; } if ( (!left) && (side != MagmaRight) ) { *info = -1; } else if ( (!notran) && (trans != MagmaConjTrans) ) { *info = -2; } else if (m < 0) { *info = -3; } else if (n < 0) { *info = -4; } else if (k < 0 || k > nq) { *info = -5; } else if (ldda < std::max(1,nq)) { *info = -7; } else if (lddc < std::max(1,m)) { *info = -10; } else if (lwork < std::max(1,nw) && ! lquery) { *info = -12; } lwkopt = (m-k+nb)*(n+2*nb); hwork[0] = magma_scalar<Ty>(lwkopt); if (*info != 0) { //magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) { return *info; } /* Quick return if possible */ if (m == 0 || n == 0 || k == 0) { hwork[0] = c_one; return *info; } magma_malloc<Ty>(&dwork, (((n+31)/32)*32)*nb); cpu_lapack_unmqr_work_func<Ty> cpu_lapack_unmqr; if ( (left && (! notran)) || ( (!left) && notran ) ) { i1 = 0; i2 = k-nb; step = nb; } else { i1 = (k - 1 - nb) / nb * nb; i2 = 0; step = -nb; } mi = 0; ni = 0; if (left) { ni = n; jc = 0; } else { mi = m; ic = 0; } static const bool is_real = magma_is_real<Ty>(); /* Use unblocked code to multiply last or only block (cases Q*C or C*Q^T). */ // workspace left: A(mi*nb) + C(mi*ni) + work(ni*nb_la) = (m-k-nb)*nb + (m-k-nb)*n + n*nb // workspace right: A(ni*nb) + C(mi*ni) + work(mi*nb_la) = (n-k-nb)*nb + m*(n-k-nb) + m*nb if ( step < 0 ) { // i is beginning of last block i = i1 - step; if ( i >= k ) { i = i1; } ib = k - i; if (left) { // ni=n, jc=0, H or H^T is applied to C(i:m-1,0:n-1) mi = m - i; ma = mi; ic = i; } else { // mi=m, ic=0, H or H^T is applied to C(0:m-1,i:n-1) ni = n - i; ma = ni; jc = i; } Ty* hA = hwork; Ty* hC = hwork + ma*ib; Ty* hW = hwork + ma*ib + mi*ni; magma_int_t lhwork = lwork - (ma*ib + mi*ni); magma_getmatrix<Ty>(ma, ib, a_ref(i, i ), ldda, hA, ma, queue); magma_getmatrix<Ty>(mi, ni, c_ref(ic, jc), lddc, hC, mi, queue); LAPACKE_CHECK(cpu_lapack_unmqr( side == MagmaRight ? 'R' : 'L', notran ? 'N' : (is_real ? 'T' : 'C'), mi, ni, ib, hA, ma, tau+i, hC, mi, hW, lhwork)); // send the updated part of C back to the GPU magma_setmatrix<Ty>( mi, ni, hC, mi, c_ref(ic, jc), lddc, queue); } if (nb < k) { for (i=i1; step<0 ? i>i2 : i<i2; i+=step) { ib = std::min(nb, k - i); if (left){ mi = m - i; ic = i; } else { ni = n - i; jc = i; } if (mi == 0 || ni == 0) break; ret = magma_larfb_gpu<Ty>(MagmaLeft, is_real ? MagmaTrans : MagmaConjTrans, MagmaForward, MagmaColumnwise, mi, ni, ib, a_ref(i, i ), ldda, t_ref(i), nb, c_ref(ic, jc), lddc, dwork, 0, nw, queue); if ( ret != MAGMA_SUCCESS ) return ret; } } else { i = i1; } /* Use unblocked code to multiply the last or only block (cases Q^T*C or C*Q). */ if ( step > 0 ) { ib = k-i; if (left) { // ni=n, jc=0, H or H^T is applied to C(i:m-1,0:n-1) mi = m - i; ma = mi; ic = i; } else { // mi=m, ic=0, H or H^T is applied to C(0:m-1,i:n-1) ni = n - i; ma = ni; jc = i; } Ty* hA = hwork; Ty* hC = hwork + ma*ib; Ty* hW = hwork + ma*ib + mi*ni; magma_int_t lhwork = lwork - (ma*ib + mi*ni); magma_getmatrix<Ty>(ma, ib, a_ref(i, i ), ldda, hA, ma, queue); magma_getmatrix<Ty>(mi, ni, c_ref(ic, jc), lddc, hC, mi, queue); LAPACKE_CHECK(cpu_lapack_unmqr( side == MagmaRight ? 'R' : 'L', notran ? 'N' : (is_real ? 'T' : 'C'), mi, ni, ib, hA, ma, tau+i, hC, mi, hW, lhwork)); // send the updated part of C back to the GPU magma_setmatrix<Ty>(mi, ni, hC, mi, c_ref(ic, jc), lddc, queue); } magma_free(dwork); return *info; /* End of MAGMA_ZUNMQR_GPU */ }
extern "C" magma_int_t magma_ssytrd_sy2sb( char uplo, magma_int_t n, magma_int_t nb, float *a, magma_int_t lda, float *tau, float *work, magma_int_t lwork, float *dT, magma_int_t threads, magma_int_t *info) { /* -- MAGMA (version 1.3.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver November 2012 Purpose ======= SSYTRD_HE2HB reduces a real symmetric matrix A to real symmetric band-diagonal form T by an orthogonal similarity transformation: Q**T * A * Q = T. This version stores the triangular matrices T used in the accumulated Householder transformations (I - V T V'). Arguments ========= UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) REAL array, dimension (LDA,N) On entry, the symmetric matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if UPLO = 'U', the Upper band-diagonal of A is overwritten by the corresponding elements of the band-diagonal matrix T, and the elements above the band diagonal, with the array TAU, represent the orthogonal matrix Q as a product of elementary reflectors; if UPLO = 'L', the the Lower band-diagonal of A is overwritten by the corresponding elements of the band-diagonal matrix T, and the elements below the band-diagonal, with the array TAU, represent the orthogonal matrix Q as a product of elementary reflectors. See Further Details. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). TAU (output) REAL array, dimension (N-1) The scalar factors of the elementary reflectors (see Further Details). WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= 1. For optimum performance LWORK >= N*NB, where NB is the optimal blocksize. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. dT (output) REAL array on the GPU, dimension N*NB, where NB is the optimal blocksize. On exit dT holds the upper triangular matrices T from the accumulated Householder transformations (I - V T V') used in the factorization. The nb x nb matrices T are ordered consecutively in memory one after another. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Further Details =============== If UPLO = 'U', the matrix Q is represented as a product of elementary reflectors Q = H(n-1) . . . H(2) H(1). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in A(1:i-1,i+1), and tau in TAU(i). If UPLO = 'L', the matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(n-1). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), and tau in TAU(i). The contents of A on exit are illustrated by the following examples with n = 5: if UPLO = 'U': if UPLO = 'L': ( d e v2 v3 v4 ) ( d ) ( d e v3 v4 ) ( e d ) ( d e v4 ) ( v1 e d ) ( d e ) ( v1 v2 e d ) ( d ) ( v1 v2 v3 e d ) where d and e denote diagonal and off-diagonal elements of T, and vi denotes an element of the vector defining H(i). ===================================================================== */ #define a_ref(a_1,a_2) ( a + ((a_2)-1)*( lda) + (a_1)-1) #define da_ref(a_1,a_2) (da + ((a_2)-1)*(ldda) + (a_1)-1) #define tau_ref(a_1) (tau + (a_1)-1) #define t_ref(a_1) (dT + ((a_1)-1)*(lddt)) char uplo_[2] = {uplo, 0}; int ldda = ((n+31)/32)*32; int lddt = nb; float c_neg_one = MAGMA_S_NEG_ONE; float c_neg_half = MAGMA_S_NEG_HALF; float c_one = MAGMA_S_ONE ; float c_zero = MAGMA_S_ZERO; float d_one = MAGMA_D_ONE; magma_int_t pm, pn, indi, indj, pk; magma_int_t pm_old=0, pn_old=0, indi_old=0, indj_old=0; int i; int lwkopt; int lquery; *info = 0; int upper = lapackf77_lsame(uplo_, "U"); lquery = lwork == -1; if (! upper && ! lapackf77_lsame(uplo_, "L")) { *info = -1; } else if (n < 0) { *info = -2; } else if (lda < max(1,n)) { *info = -4; } else if (lwork < 1 && ! lquery) { *info = -9; } if (*info == 0) { /* Determine the block size. */ lwkopt = n * nb; MAGMA_S_SET2REAL( work[0], lwkopt ); } if (*info != 0) return *info; else if (lquery) return *info; /* Quick return if possible */ if (n == 0) { work[0] = c_one; return *info; } float *da; if (MAGMA_SUCCESS != magma_smalloc( &da, (n + 2*nb)*ldda )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } magma_int_t mklth = min(threads,12); #if defined(USEMKL) mkl_set_num_threads(mklth); #endif #if defined(USEACML) omp_set_num_threads(mklth); #endif /* Use the first panel of da as work space */ float *dwork = da+n*ldda; float *dW = dwork + nb*ldda; #ifdef TRACING char buf[80]; #endif cudaStream_t stream[3]; magma_queue_create( &stream[0] ); magma_queue_create( &stream[1] ); stream[2] = 0; // default stream trace_init( 1, 1, 3, stream ); float *hT = work + lwork - nb*nb; lwork -= nb*nb; memset( hT, 0, nb*nb*sizeof(float)); magmablasSetKernelStream( stream[0] ); cudaEvent_t Pupdate_event; cudaEventCreateWithFlags(&Pupdate_event,cudaEventDisableTiming); //cudaEventCreate(&Pupdate_event); if (upper) { printf("SSYTRD_HE2HB is not yet implemented for upper matrix storage. Exit.\n"); exit(1); }else { /* Copy the matrix to the GPU */ if (1 <= n-nb){ trace_gpu_start( 0, 0, "set", "set A" ); magma_ssetmatrix_async( (n-nb), (n-nb), a_ref(nb+1, nb+1), lda, da_ref(nb+1, nb+1), ldda, stream[0] ); trace_gpu_end( 0, 0 ); } /* Reduce the lower triangle of A */ for (i = 1; i <= n-nb; i += nb) { indi = i+nb; indj = i; pm = n - i - nb + 1; //pn = min(i+nb-1, n-nb) -i + 1; pn = nb; /* Get the current panel (no need for the 1st iteration) */ if (i > 1 ){ // spanel_to_q copy the upper oof diagonal part of // the matrix to work to be restored later. acctually // the zero's and one's putted are not used this is only // because we don't have a function that copy only the // upper part of A to be restored after copying the // lookahead panel that has been computted from GPU to CPU. spanel_to_q(MagmaUpper, pn-1, a_ref(i, i+1), lda, work); trace_gpu_start( 0, 1, "get", "get panel" ); //magma_queue_sync( stream[0] ); cudaStreamWaitEvent(stream[1], Pupdate_event, 0); magma_sgetmatrix_async( (pm+pn), pn, da_ref( i, i), ldda, a_ref ( i, i), lda, stream[1] ); trace_gpu_end( 0, 1 ); trace_gpu_start( 0, 2, "syr2k", "syr2k" ); magma_ssyr2k(MagmaLower, MagmaNoTrans, pm_old-pn_old, pn_old, c_neg_one, da_ref(indi_old+pn_old, indj_old), ldda, dW + pn_old , pm_old, d_one, da_ref(indi_old+pn_old, indi_old+pn_old), ldda); trace_gpu_end( 0, 2 ); trace_cpu_start( 0, "sync", "sync on 1" ); magma_queue_sync( stream[1] ); trace_cpu_end( 0 ); sq_to_panel(MagmaUpper, pn-1, a_ref(i, i+1), lda, work); } /* ========================================================== QR factorization on a panel starting nb off of the diagonal. Prepare the V and T matrices. ========================================================== */ #ifdef TRACING snprintf( buf, sizeof(buf), "panel %d", i ); #endif trace_cpu_start( 0, "geqrf", buf ); lapackf77_sgeqrf(&pm, &pn, a_ref(indi, indj), &lda, tau_ref(i), work, &lwork, info); /* Form the matrix T */ pk=min(pm,pn); lapackf77_slarft( MagmaForwardStr, MagmaColumnwiseStr, &pm, &pk, a_ref(indi, indj), &lda, tau_ref(i), hT, &nb); /* Prepare V - put 0s in the upper triangular part of the panel (and 1s on the diagonal), temporaly storing the original in work */ spanel_to_q(MagmaUpper, pk, a_ref(indi, indj), lda, work); trace_cpu_end( 0 ); /* Send V from the CPU to the GPU */ trace_gpu_start( 0, 0, "set", "set V and T" ); magma_ssetmatrix_async( pm, pk, a_ref(indi, indj), lda, da_ref(indi, indj), ldda, stream[0] ); /* Send the triangular factor T to the GPU */ magma_ssetmatrix_async( pk, pk, hT, nb, t_ref(i), lddt, stream[0] ); trace_gpu_end( 0, 0 ); /* ========================================================== Compute W: 1. X = A (V T) 2. W = X - 0.5* V * (T' * (V' * X)) ========================================================== */ /* dwork = V T */ trace_cpu_start( 0, "sync", "sync on 0" ); // this sync is done here to be sure that the copy has been finished // because below we made a restore sq_to_panel and this restore need // to ensure that the copy has been finished. we did it here to allow // overlapp of restore with next gemm and symm. magma_queue_sync( stream[0] ); trace_cpu_end( 0 ); trace_gpu_start( 0, 2, "gemm", "work = V*T" ); magma_sgemm(MagmaNoTrans, MagmaNoTrans, pm, pk, pk, c_one, da_ref(indi, indj), ldda, t_ref(i), lddt, c_zero, dwork, pm); trace_gpu_end( 0, 2 ); /* dW = X = A*V*T. dW = A*dwork */ trace_gpu_start( 0, 2, "symm", "X = A*work" ); magma_ssymm(MagmaLeft, uplo, pm, pk, c_one, da_ref(indi, indi), ldda, dwork, pm, c_zero, dW, pm); trace_gpu_end( 0, 2 ); /* restore the panel */ sq_to_panel(MagmaUpper, pk, a_ref(indi, indj), lda, work); /* dwork = V*T already ==> dwork' = T'*V' * compute T'*V'*X ==> dwork'*W ==> * dwork + pm*nb = ((T' * V') * X) = dwork' * X = dwork' * W */ trace_gpu_start( 0, 2, "gemm", "work = T'*V'*X" ); magma_sgemm(MagmaTrans, MagmaNoTrans, pk, pk, pm, c_one, dwork, pm, dW, pm, c_zero, dwork + pm*nb, nb); trace_gpu_end( 0, 2 ); /* W = X - 0.5 * V * T'*V'*X * = X - 0.5 * V * (dwork + pm*nb) = W - 0.5 * V * (dwork + pm*nb) */ trace_gpu_start( 0, 2, "gemm", "W = X - 0.5*V*(T'*V'*X)" ); magma_sgemm(MagmaNoTrans, MagmaNoTrans, pm, pk, pk, c_neg_half, da_ref(indi, indj), ldda, dwork + pm*nb, nb, c_one, dW, pm); trace_gpu_end( 0, 2 ); /* ========================================================== Update the unreduced submatrix A(i+ib:n,i+ib:n), using an update of the form: A := A - V*W' - W*V' ========================================================== */ if (i + nb <= n-nb){ /* There would be next iteration; do lookahead - update the next panel */ trace_gpu_start( 0, 2, "gemm", "gemm 4 next panel left" ); magma_sgemm(MagmaNoTrans, MagmaTrans, pm, pn, pn, c_neg_one, da_ref(indi, indj), ldda, dW , pm, c_one, da_ref(indi, indi), ldda); trace_gpu_end( 0, 2 ); trace_gpu_start( 0, 2, "gemm", "gemm 5 next panel right" ); magma_sgemm(MagmaNoTrans, MagmaTrans, pm, pn, pn, c_neg_one, dW , pm, da_ref(indi, indj), ldda, c_one, da_ref(indi, indi), ldda); trace_gpu_end( 0, 2 ); cudaEventRecord(Pupdate_event, stream[0]); } else { /* no look-ahead as this is last iteration */ trace_gpu_start( 0, 2, "syr2k", "syr2k last iteration" ); magma_ssyr2k(MagmaLower, MagmaNoTrans, pk, pk, c_neg_one, da_ref(indi, indj), ldda, dW , pm, d_one, da_ref(indi, indi), ldda); trace_gpu_end( 0, 2 ); } indi_old = indi; indj_old = indj; pm_old = pm; pn_old = pn; } // end loop for(i) /* Send the last block to the CPU */ pk = min(pm,pn); if (1 <= n-nb){ spanel_to_q(MagmaUpper, pk-1, a_ref(n-pk+1, n-pk+2), lda, work); trace_gpu_start( 0, 2, "get", "get last block" ); magma_sgetmatrix( pk, pk, da_ref(n-pk+1, n-pk+1), ldda, a_ref(n-pk+1, n-pk+1), lda ); trace_gpu_end( 0, 2 ); sq_to_panel(MagmaUpper, pk-1, a_ref(n-pk+1, n-pk+2), lda, work); } }// end of LOWER trace_finalize( "ssytrd_sy2sb.svg", "trace.css" ); cudaEventDestroy(Pupdate_event); magma_queue_destroy( stream[0] ); magma_queue_destroy( stream[1] ); magma_free( da ); MAGMA_S_SET2REAL( work[0], lwkopt ); magmablasSetKernelStream( 0 ); #if defined(USEMKL) mkl_set_num_threads(1); #endif #if defined(USEACML) omp_set_num_threads(1); #endif return *info; } /* ssytrd_sy2sb_ */
/* Subroutine */ int sgqrts_(integer *n, integer *m, integer *p, real *a, real *af, real *q, real *r__, integer *lda, real *taua, real *b, real *bf, real *z__, real *t, real *bwk, integer *ldb, real *taub, real * work, integer *lwork, real *rwork, real *result) { /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, r_dim1, r_offset, q_dim1, q_offset, b_dim1, b_offset, bf_dim1, bf_offset, t_dim1, t_offset, z_dim1, z_offset, bwk_dim1, bwk_offset, i__1, i__2; real r__1; /* Local variables */ static integer info; static real unfl, resid; extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static real anorm, bnorm; extern /* Subroutine */ int ssyrk_(char *, char *, integer *, integer *, real *, real *, integer *, real *, real *, integer *); extern doublereal slamch_(char *), slange_(char *, integer *, integer *, real *, integer *, real *); extern /* Subroutine */ int sggqrf_(integer *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, real *, integer * , integer *), slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, real *, integer *); extern doublereal slansy_(char *, char *, integer *, real *, integer *, real *); extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real *, integer *, real *, real *, integer *, integer *), sorgrq_( integer *, integer *, integer *, real *, integer *, real *, real * , integer *, integer *); static real ulp; #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] #define t_ref(a_1,a_2) t[(a_2)*t_dim1 + a_1] #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] #define af_ref(a_1,a_2) af[(a_2)*af_dim1 + a_1] #define bf_ref(a_1,a_2) bf[(a_2)*bf_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= SGQRTS tests SGGQRF, which computes the GQR factorization of an N-by-M matrix A and a N-by-P matrix B: A = Q*R and B = Q*T*Z. Arguments ========= N (input) INTEGER The number of rows of the matrices A and B. N >= 0. M (input) INTEGER The number of columns of the matrix A. M >= 0. P (input) INTEGER The number of columns of the matrix B. P >= 0. A (input) REAL array, dimension (LDA,M) The N-by-M matrix A. AF (output) REAL array, dimension (LDA,N) Details of the GQR factorization of A and B, as returned by SGGQRF, see SGGQRF for further details. Q (output) REAL array, dimension (LDA,N) The M-by-M orthogonal matrix Q. R (workspace) REAL array, dimension (LDA,MAX(M,N)) LDA (input) INTEGER The leading dimension of the arrays A, AF, R and Q. LDA >= max(M,N). TAUA (output) REAL array, dimension (min(M,N)) The scalar factors of the elementary reflectors, as returned by SGGQRF. B (input) REAL array, dimension (LDB,P) On entry, the N-by-P matrix A. BF (output) REAL array, dimension (LDB,N) Details of the GQR factorization of A and B, as returned by SGGQRF, see SGGQRF for further details. Z (output) REAL array, dimension (LDB,P) The P-by-P orthogonal matrix Z. T (workspace) REAL array, dimension (LDB,max(P,N)) BWK (workspace) REAL array, dimension (LDB,N) LDB (input) INTEGER The leading dimension of the arrays B, BF, Z and T. LDB >= max(P,N). TAUB (output) REAL array, dimension (min(P,N)) The scalar factors of the elementary reflectors, as returned by SGGRQF. WORK (workspace) REAL array, dimension (LWORK) LWORK (input) INTEGER The dimension of the array WORK, LWORK >= max(N,M,P)**2. RWORK (workspace) REAL array, dimension (max(N,M,P)) RESULT (output) REAL array, dimension (4) The test ratios: RESULT(1) = norm( R - Q'*A ) / ( MAX(M,N)*norm(A)*ULP) RESULT(2) = norm( T*Z - Q'*B ) / (MAX(P,N)*norm(B)*ULP) RESULT(3) = norm( I - Q'*Q ) / ( M*ULP ) RESULT(4) = norm( I - Z'*Z ) / ( P*ULP ) ===================================================================== Parameter adjustments */ r_dim1 = *lda; r_offset = 1 + r_dim1 * 1; r__ -= r_offset; q_dim1 = *lda; q_offset = 1 + q_dim1 * 1; q -= q_offset; af_dim1 = *lda; af_offset = 1 + af_dim1 * 1; af -= af_offset; a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --taua; bwk_dim1 = *ldb; bwk_offset = 1 + bwk_dim1 * 1; bwk -= bwk_offset; t_dim1 = *ldb; t_offset = 1 + t_dim1 * 1; t -= t_offset; z_dim1 = *ldb; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; bf_dim1 = *ldb; bf_offset = 1 + bf_dim1 * 1; bf -= bf_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --taub; --work; --rwork; --result; /* Function Body */ ulp = slamch_("Precision"); unfl = slamch_("Safe minimum"); /* Copy the matrix A to the array AF. */ slacpy_("Full", n, m, &a[a_offset], lda, &af[af_offset], lda); slacpy_("Full", n, p, &b[b_offset], ldb, &bf[bf_offset], ldb); /* Computing MAX */ r__1 = slange_("1", n, m, &a[a_offset], lda, &rwork[1]); anorm = dmax(r__1,unfl); /* Computing MAX */ r__1 = slange_("1", n, p, &b[b_offset], ldb, &rwork[1]); bnorm = dmax(r__1,unfl); /* Factorize the matrices A and B in the arrays AF and BF. */ sggqrf_(n, m, p, &af[af_offset], lda, &taua[1], &bf[bf_offset], ldb, & taub[1], &work[1], lwork, &info); /* Generate the N-by-N matrix Q */ slaset_("Full", n, n, &c_b9, &c_b9, &q[q_offset], lda); i__1 = *n - 1; slacpy_("Lower", &i__1, m, &af_ref(2, 1), lda, &q_ref(2, 1), lda); i__1 = min(*n,*m); sorgqr_(n, n, &i__1, &q[q_offset], lda, &taua[1], &work[1], lwork, &info); /* Generate the P-by-P matrix Z */ slaset_("Full", p, p, &c_b9, &c_b9, &z__[z_offset], ldb); if (*n <= *p) { if (*n > 0 && *n < *p) { i__1 = *p - *n; slacpy_("Full", n, &i__1, &bf[bf_offset], ldb, &z___ref(*p - *n + 1, 1), ldb); } if (*n > 1) { i__1 = *n - 1; i__2 = *n - 1; slacpy_("Lower", &i__1, &i__2, &bf_ref(2, *p - *n + 1), ldb, & z___ref(*p - *n + 2, *p - *n + 1), ldb); } } else { if (*p > 1) { i__1 = *p - 1; i__2 = *p - 1; slacpy_("Lower", &i__1, &i__2, &bf_ref(*n - *p + 2, 1), ldb, & z___ref(2, 1), ldb); } } i__1 = min(*n,*p); sorgrq_(p, p, &i__1, &z__[z_offset], ldb, &taub[1], &work[1], lwork, & info); /* Copy R */ slaset_("Full", n, m, &c_b19, &c_b19, &r__[r_offset], lda); slacpy_("Upper", n, m, &af[af_offset], lda, &r__[r_offset], lda); /* Copy T */ slaset_("Full", n, p, &c_b19, &c_b19, &t[t_offset], ldb); if (*n <= *p) { slacpy_("Upper", n, n, &bf_ref(1, *p - *n + 1), ldb, &t_ref(1, *p - * n + 1), ldb); } else { i__1 = *n - *p; slacpy_("Full", &i__1, p, &bf[bf_offset], ldb, &t[t_offset], ldb); slacpy_("Upper", p, p, &bf_ref(*n - *p + 1, 1), ldb, &t_ref(*n - *p + 1, 1), ldb); } /* Compute R - Q'*A */ sgemm_("Transpose", "No transpose", n, m, n, &c_b30, &q[q_offset], lda, & a[a_offset], lda, &c_b31, &r__[r_offset], lda); /* Compute norm( R - Q'*A ) / ( MAX(M,N)*norm(A)*ULP ) . */ resid = slange_("1", n, m, &r__[r_offset], lda, &rwork[1]); if (anorm > 0.f) { /* Computing MAX */ i__1 = max(1,*m); result[1] = resid / (real) max(i__1,*n) / anorm / ulp; } else { result[1] = 0.f; } /* Compute T*Z - Q'*B */ sgemm_("No Transpose", "No transpose", n, p, p, &c_b31, &t[t_offset], ldb, &z__[z_offset], ldb, &c_b19, &bwk[bwk_offset], ldb); sgemm_("Transpose", "No transpose", n, p, n, &c_b30, &q[q_offset], lda, & b[b_offset], ldb, &c_b31, &bwk[bwk_offset], ldb); /* Compute norm( T*Z - Q'*B ) / ( MAX(P,N)*norm(A)*ULP ) . */ resid = slange_("1", n, p, &bwk[bwk_offset], ldb, &rwork[1]); if (bnorm > 0.f) { /* Computing MAX */ i__1 = max(1,*p); result[2] = resid / (real) max(i__1,*n) / bnorm / ulp; } else { result[2] = 0.f; } /* Compute I - Q'*Q */ slaset_("Full", n, n, &c_b19, &c_b31, &r__[r_offset], lda); ssyrk_("Upper", "Transpose", n, n, &c_b30, &q[q_offset], lda, &c_b31, & r__[r_offset], lda); /* Compute norm( I - Q'*Q ) / ( N * ULP ) . */ resid = slansy_("1", "Upper", n, &r__[r_offset], lda, &rwork[1]); result[3] = resid / (real) max(1,*n) / ulp; /* Compute I - Z'*Z */ slaset_("Full", p, p, &c_b19, &c_b31, &t[t_offset], ldb); ssyrk_("Upper", "Transpose", p, p, &c_b30, &z__[z_offset], ldb, &c_b31, & t[t_offset], ldb); /* Compute norm( I - Z'*Z ) / ( P*ULP ) . */ resid = slansy_("1", "Upper", p, &t[t_offset], ldb, &rwork[1]); result[4] = resid / (real) max(1,*p) / ulp; return 0; /* End of SGQRTS */ } /* sgqrts_ */
magma_int_t magma_cungqr_2stage_gpu(magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex *da, magma_int_t ldda, magmaFloatComplex *tau, magmaFloatComplex *dT, magma_int_t nb, magma_int_t *info) { /* -- MAGMA (version 1.4.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver August 2013 Purpose ======= CUNGQR generates an M-by-N COMPLEX matrix Q with orthonormal columns, which is defined as the first N columns of a product of K elementary reflectors of order M Q = H(1) H(2) . . . H(k) as returned by CGEQRF_GPU. Arguments ========= M (input) INTEGER The number of rows of the matrix Q. M >= 0. N (input) INTEGER The number of columns of the matrix Q. M >= N >= 0. K (input) INTEGER The number of elementary reflectors whose product defines the matrix Q. N >= K >= 0. DA (input/output) COMPLEX array A on the GPU device, dimension (LDDA,N). On entry, the i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CGEQRF_GPU in the first k columns of its array argument A. On exit, the M-by-N matrix Q. LDDA (input) INTEGER The first dimension of the array A. LDDA >= max(1,M). TAU (input) COMPLEX array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by CGEQRF_GPU. DT (input) COMPLEX work space array on the GPU device, dimension (MIN(M, N) )*NB. This must be the 6th argument of magma_cgeqrf_gpu [ note that if N here is bigger than N in magma_cgeqrf_gpu, the workspace requirement DT in magma_cgeqrf_gpu must be as specified in this routine ]. NB (input) INTEGER This is the block size used in CGEQRF_GPU, and correspondingly the size of the T matrices, used in the factorization, and stored in DT. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument has an illegal value ===================================================================== */ #define da_ref(a_1,a_2) (da+(a_2)*(ldda) + (a_1)) #define t_ref(a_1) (dT+(a_1)*nb) magma_int_t i__1, i__2, i__3; //magma_int_t lwork; magma_int_t i, ib, ki, kk; //, iinfo; //magma_int_t lddwork = min(m, n); //magmaFloatComplex *work, *panel; magmaFloatComplex *dwork; //magma_queue_t stream[2]; magma_int_t ldt=nb; // need to be an input parameter *info = 0; if (m < 0) { *info = -1; } else if ((n < 0) || (n > m)) { *info = -2; } else if ((k < 0) || (k > n)) { *info = -3; } else if (ldda < max(1,m)) { *info = -5; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } if (n <= 0) return *info; if(MAGMA_SUCCESS != magma_cmalloc( &dwork, n*nb )) { printf ("!!!! cungqr_2stage magma_alloc failed for: dwork\n" ); exit(-1); } if ( (nb > 1) && (nb < k) ) { /* Use blocked code after the last block. The first kk columns are handled by the block method. ki is start of 2nd-to-last block. */ ki = (k - nb - 1) / nb * nb; kk = min(k, ki + nb); /* Set A(1:kk,kk+1:n) to zero. */ magmablas_claset(MagmaUpperLower, kk, n-kk, da_ref(0,kk), ldda); /* A(kk+1:m, kk+1:n) = I */ magmablas_claset_identity(m-kk, n-kk, da_ref(kk,kk), ldda); } else { ki = 0; kk = 0; } /* Allocate work space on CPU in pinned memory */ //lwork = (n+m) * nb; //if (kk < n) // lwork = max(lwork, n * nb + (m-kk)*(n-kk)); //if (MAGMA_SUCCESS != magma_cmalloc_pinned( &work, (lwork) )) { // *info = MAGMA_ERR_HOST_ALLOC; // return *info; //} //panel = work + n * nb; //magma_queue_create( &stream[0] ); //magma_queue_create( &stream[1] ); /* Use unblocked code for the last or only block. */ if (kk < n) { i__1 = m - kk; i__2 = n - kk; i__3 = k - kk; //cublasGetMatrix(i__1, i__2, sizeof(magmaFloatComplex), // da_ref(kk, kk), ldda, panel, i__1); //lapackf77_cungqr(&i__1, &i__2, &i__3, panel, &i__1, &tau[kk], // work, &lwork, &iinfo); // //cublasSetMatrix(i__1, i__2, sizeof(magmaFloatComplex), // panel, i__1, da_ref(kk, kk), ldda); magma_clarfb_gpu( MagmaLeft, MagmaNoTrans, MagmaForward, MagmaColumnwise, i__1, i__2, i__3, da_ref(kk, kk-nb), ldda, t_ref(kk-nb), ldt, da_ref(kk, kk), ldda, dwork, i__2); //magmablas_claset(MagmaUpperLower, kk-nb, nb, da_ref(0,kk-nb), ldda); //magmablas_claset_identity(m-(kk-nb), nb, da_ref(kk-nb,kk-nb), ldda); } if (kk > 0) { /* Use blocked code */ for (i = ki; i >= nb; i-=nb) { ib = min(nb, k - i); /* Send current panel to the CPU for update */ i__2 = m - i; //cudaMemcpy2DAsync(panel, i__2 * sizeof(magmaFloatComplex), // da_ref(i,i), ldda * sizeof(magmaFloatComplex), // sizeof(magmaFloatComplex)*i__2, ib, // cudaMemcpyDeviceToHost,stream[0]); if (i + ib < n) { /* Apply H to A(i:m,i+ib:n) from the left */ i__3 = n - i; magmablas_claset(MagmaUpperLower, i, ib, da_ref(0,i), ldda); magmablas_claset_identity(m-i, ib, da_ref(i,i), ldda); magma_clarfb_gpu( MagmaLeft, MagmaNoTrans, MagmaForward, MagmaColumnwise, i__2, i__3, ib, da_ref(i, i-nb), ldda, t_ref(i-nb), ldt, da_ref(i, i), ldda, dwork, i__3); } /* Apply H to rows i:m of current block on the CPU */ //magma_queue_sync( stream[0] ); //lapackf77_cungqr(&i__2, &ib, &ib, panel, &i__2, &tau[i], // work, &lwork, &iinfo); //cudaMemcpy2DAsync(da_ref(i,i), ldda * sizeof(magmaFloatComplex), // panel, i__2 * sizeof(magmaFloatComplex), // sizeof(magmaFloatComplex)*i__2, ib, // cudaMemcpyHostToDevice,stream[1]); /* Set rows 1:i-1 of current block to zero */ i__2 = i + ib; //magmablas_claset(MagmaUpperLower, i-ib, ib, da_ref(0,i-ib), ldda); //magmablas_claset_identity(m-(i-ib), ib, da_ref(i-ib,i-ib), ldda); } } magmablas_claset_identity(m, nb, da_ref(0,0), ldda); magma_free( dwork ); //magma_free_pinned( work ); //magma_queue_destroy( stream[0] ); //magma_queue_destroy( stream[1] ); return *info; } /* magma_cungqr_gpu */
/* Subroutine */ int dlaexc_(logical *wantq, integer *n, doublereal *t, integer *ldt, doublereal *q, integer *ldq, integer *j1, integer *n1, integer *n2, doublereal *work, integer *info) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= DLAEXC swaps adjacent diagonal blocks T11 and T22 of order 1 or 2 in an upper quasi-triangular matrix T by an orthogonal similarity transformation. T must be in Schur canonical form, that is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block has its diagonal elemnts equal and its off-diagonal elements of opposite sign. Arguments ========= WANTQ (input) LOGICAL = .TRUE. : accumulate the transformation in the matrix Q; = .FALSE.: do not accumulate the transformation. N (input) INTEGER The order of the matrix T. N >= 0. T (input/output) DOUBLE PRECISION array, dimension (LDT,N) On entry, the upper quasi-triangular matrix T, in Schur canonical form. On exit, the updated matrix T, again in Schur canonical form. LDT (input) INTEGER The leading dimension of the array T. LDT >= max(1,N). Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) On entry, if WANTQ is .TRUE., the orthogonal matrix Q. On exit, if WANTQ is .TRUE., the updated matrix Q. If WANTQ is .FALSE., Q is not referenced. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= 1; and if WANTQ is .TRUE., LDQ >= N. J1 (input) INTEGER The index of the first row of the first block T11. N1 (input) INTEGER The order of the first block T11. N1 = 0, 1 or 2. N2 (input) INTEGER The order of the second block T22. N2 = 0, 1 or 2. WORK (workspace) DOUBLE PRECISION array, dimension (N) INFO (output) INTEGER = 0: successful exit = 1: the transformed matrix T would be too far from Schur form; the blocks are not swapped and T and Q are unchanged. ===================================================================== Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c__4 = 4; static logical c_false = FALSE_; static integer c_n1 = -1; static integer c__2 = 2; static integer c__3 = 3; /* System generated locals */ integer q_dim1, q_offset, t_dim1, t_offset, i__1; doublereal d__1, d__2, d__3, d__4, d__5, d__6; /* Local variables */ static integer ierr; static doublereal temp; extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); static doublereal d__[16] /* was [4][4] */; static integer k; static doublereal u[3], scale, x[4] /* was [2][2] */, dnorm; static integer j2, j3, j4; static doublereal xnorm, u1[3], u2[3]; extern /* Subroutine */ int dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), dlasy2_( logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static integer nd; static doublereal cs, t11, t22; extern doublereal dlamch_(char *); static doublereal t33; extern doublereal dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *); static doublereal sn; extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *), dlarfx_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *); static doublereal thresh, smlnum, wi1, wi2, wr1, wr2, eps, tau, tau1, tau2; #define d___ref(a_1,a_2) d__[(a_2)*4 + a_1 - 5] #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] #define t_ref(a_1,a_2) t[(a_2)*t_dim1 + a_1] #define x_ref(a_1,a_2) x[(a_2)*2 + a_1 - 3] t_dim1 = *ldt; t_offset = 1 + t_dim1 * 1; t -= t_offset; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; --work; /* Function Body */ *info = 0; /* Quick return if possible */ if (*n == 0 || *n1 == 0 || *n2 == 0) { return 0; } if (*j1 + *n1 > *n) { return 0; } j2 = *j1 + 1; j3 = *j1 + 2; j4 = *j1 + 3; if (*n1 == 1 && *n2 == 1) { /* Swap two 1-by-1 blocks. */ t11 = t_ref(*j1, *j1); t22 = t_ref(j2, j2); /* Determine the transformation to perform the interchange. */ d__1 = t22 - t11; dlartg_(&t_ref(*j1, j2), &d__1, &cs, &sn, &temp); /* Apply transformation to the matrix T. */ if (j3 <= *n) { i__1 = *n - *j1 - 1; drot_(&i__1, &t_ref(*j1, j3), ldt, &t_ref(j2, j3), ldt, &cs, &sn); } i__1 = *j1 - 1; drot_(&i__1, &t_ref(1, *j1), &c__1, &t_ref(1, j2), &c__1, &cs, &sn); t_ref(*j1, *j1) = t22; t_ref(j2, j2) = t11; if (*wantq) { /* Accumulate transformation in the matrix Q. */ drot_(n, &q_ref(1, *j1), &c__1, &q_ref(1, j2), &c__1, &cs, &sn); } } else { /* Swapping involves at least one 2-by-2 block. Copy the diagonal block of order N1+N2 to the local array D and compute its norm. */ nd = *n1 + *n2; dlacpy_("Full", &nd, &nd, &t_ref(*j1, *j1), ldt, d__, &c__4); dnorm = dlange_("Max", &nd, &nd, d__, &c__4, &work[1]); /* Compute machine-dependent threshold for test for accepting swap. */ eps = dlamch_("P"); smlnum = dlamch_("S") / eps; /* Computing MAX */ d__1 = eps * 10. * dnorm; thresh = max(d__1,smlnum); /* Solve T11*X - X*T22 = scale*T12 for X. */ dlasy2_(&c_false, &c_false, &c_n1, n1, n2, d__, &c__4, &d___ref(*n1 + 1, *n1 + 1), &c__4, &d___ref(1, *n1 + 1), &c__4, &scale, x, & c__2, &xnorm, &ierr); /* Swap the adjacent diagonal blocks. */ k = *n1 + *n1 + *n2 - 3; switch (k) { case 1: goto L10; case 2: goto L20; case 3: goto L30; } L10: /* N1 = 1, N2 = 2: generate elementary reflector H so that: ( scale, X11, X12 ) H = ( 0, 0, * ) */ u[0] = scale; u[1] = x_ref(1, 1); u[2] = x_ref(1, 2); dlarfg_(&c__3, &u[2], u, &c__1, &tau); u[2] = 1.; t11 = t_ref(*j1, *j1); /* Perform swap provisionally on diagonal block in D. */ dlarfx_("L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); dlarfx_("R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); /* Test whether to reject swap. Computing MAX */ d__4 = (d__1 = d___ref(3, 1), abs(d__1)), d__5 = (d__2 = d___ref(3, 2) , abs(d__2)), d__4 = max(d__4,d__5), d__5 = (d__3 = d___ref(3, 3) - t11, abs(d__3)); if (max(d__4,d__5) > thresh) { goto L50; } /* Accept swap: apply transformation to the entire matrix T. */ i__1 = *n - *j1 + 1; dlarfx_("L", &c__3, &i__1, u, &tau, &t_ref(*j1, *j1), ldt, &work[1]); dlarfx_("R", &j2, &c__3, u, &tau, &t_ref(1, *j1), ldt, &work[1]); t_ref(j3, *j1) = 0.; t_ref(j3, j2) = 0.; t_ref(j3, j3) = t11; if (*wantq) { /* Accumulate transformation in the matrix Q. */ dlarfx_("R", n, &c__3, u, &tau, &q_ref(1, *j1), ldq, &work[1]); } goto L40; L20: /* N1 = 2, N2 = 1: generate elementary reflector H so that: H ( -X11 ) = ( * ) ( -X21 ) = ( 0 ) ( scale ) = ( 0 ) */ u[0] = -x_ref(1, 1); u[1] = -x_ref(2, 1); u[2] = scale; dlarfg_(&c__3, u, &u[1], &c__1, &tau); u[0] = 1.; t33 = t_ref(j3, j3); /* Perform swap provisionally on diagonal block in D. */ dlarfx_("L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); dlarfx_("R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]); /* Test whether to reject swap. Computing MAX */ d__4 = (d__1 = d___ref(2, 1), abs(d__1)), d__5 = (d__2 = d___ref(3, 1) , abs(d__2)), d__4 = max(d__4,d__5), d__5 = (d__3 = d___ref(1, 1) - t33, abs(d__3)); if (max(d__4,d__5) > thresh) { goto L50; } /* Accept swap: apply transformation to the entire matrix T. */ dlarfx_("R", &j3, &c__3, u, &tau, &t_ref(1, *j1), ldt, &work[1]); i__1 = *n - *j1; dlarfx_("L", &c__3, &i__1, u, &tau, &t_ref(*j1, j2), ldt, &work[1]); t_ref(*j1, *j1) = t33; t_ref(j2, *j1) = 0.; t_ref(j3, *j1) = 0.; if (*wantq) { /* Accumulate transformation in the matrix Q. */ dlarfx_("R", n, &c__3, u, &tau, &q_ref(1, *j1), ldq, &work[1]); } goto L40; L30: /* N1 = 2, N2 = 2: generate elementary reflectors H(1) and H(2) so that: H(2) H(1) ( -X11 -X12 ) = ( * * ) ( -X21 -X22 ) ( 0 * ) ( scale 0 ) ( 0 0 ) ( 0 scale ) ( 0 0 ) */ u1[0] = -x_ref(1, 1); u1[1] = -x_ref(2, 1); u1[2] = scale; dlarfg_(&c__3, u1, &u1[1], &c__1, &tau1); u1[0] = 1.; temp = -tau1 * (x_ref(1, 2) + u1[1] * x_ref(2, 2)); u2[0] = -temp * u1[1] - x_ref(2, 2); u2[1] = -temp * u1[2]; u2[2] = scale; dlarfg_(&c__3, u2, &u2[1], &c__1, &tau2); u2[0] = 1.; /* Perform swap provisionally on diagonal block in D. */ dlarfx_("L", &c__3, &c__4, u1, &tau1, d__, &c__4, &work[1]) ; dlarfx_("R", &c__4, &c__3, u1, &tau1, d__, &c__4, &work[1]) ; dlarfx_("L", &c__3, &c__4, u2, &tau2, &d___ref(2, 1), &c__4, &work[1]); dlarfx_("R", &c__4, &c__3, u2, &tau2, &d___ref(1, 2), &c__4, &work[1]); /* Test whether to reject swap. Computing MAX */ d__5 = (d__1 = d___ref(3, 1), abs(d__1)), d__6 = (d__2 = d___ref(3, 2) , abs(d__2)), d__5 = max(d__5,d__6), d__6 = (d__3 = d___ref(4, 1), abs(d__3)), d__5 = max(d__5,d__6), d__6 = (d__4 = d___ref(4, 2), abs(d__4)); if (max(d__5,d__6) > thresh) { goto L50; } /* Accept swap: apply transformation to the entire matrix T. */ i__1 = *n - *j1 + 1; dlarfx_("L", &c__3, &i__1, u1, &tau1, &t_ref(*j1, *j1), ldt, &work[1]); dlarfx_("R", &j4, &c__3, u1, &tau1, &t_ref(1, *j1), ldt, &work[1]); i__1 = *n - *j1 + 1; dlarfx_("L", &c__3, &i__1, u2, &tau2, &t_ref(j2, *j1), ldt, &work[1]); dlarfx_("R", &j4, &c__3, u2, &tau2, &t_ref(1, j2), ldt, &work[1]); t_ref(j3, *j1) = 0.; t_ref(j3, j2) = 0.; t_ref(j4, *j1) = 0.; t_ref(j4, j2) = 0.; if (*wantq) { /* Accumulate transformation in the matrix Q. */ dlarfx_("R", n, &c__3, u1, &tau1, &q_ref(1, *j1), ldq, &work[1]); dlarfx_("R", n, &c__3, u2, &tau2, &q_ref(1, j2), ldq, &work[1]); } L40: if (*n2 == 2) { /* Standardize new 2-by-2 block T11 */ dlanv2_(&t_ref(*j1, *j1), &t_ref(*j1, j2), &t_ref(j2, *j1), & t_ref(j2, j2), &wr1, &wi1, &wr2, &wi2, &cs, &sn); i__1 = *n - *j1 - 1; drot_(&i__1, &t_ref(*j1, *j1 + 2), ldt, &t_ref(j2, *j1 + 2), ldt, &cs, &sn); i__1 = *j1 - 1; drot_(&i__1, &t_ref(1, *j1), &c__1, &t_ref(1, j2), &c__1, &cs, & sn); if (*wantq) { drot_(n, &q_ref(1, *j1), &c__1, &q_ref(1, j2), &c__1, &cs, & sn); } } if (*n1 == 2) { /* Standardize new 2-by-2 block T22 */ j3 = *j1 + *n2; j4 = j3 + 1; dlanv2_(&t_ref(j3, j3), &t_ref(j3, j4), &t_ref(j4, j3), &t_ref(j4, j4), &wr1, &wi1, &wr2, &wi2, &cs, &sn); if (j3 + 2 <= *n) { i__1 = *n - j3 - 1; drot_(&i__1, &t_ref(j3, j3 + 2), ldt, &t_ref(j4, j3 + 2), ldt, &cs, &sn); } i__1 = j3 - 1; drot_(&i__1, &t_ref(1, j3), &c__1, &t_ref(1, j4), &c__1, &cs, &sn) ; if (*wantq) { drot_(n, &q_ref(1, j3), &c__1, &q_ref(1, j4), &c__1, &cs, &sn) ; } } } return 0; /* Exit with INFO = 1 if swap was rejected. */ L50: *info = 1; return 0; /* End of DLAEXC */ } /* dlaexc_ */
extern "C" int magma_ztsqrt_gpu(int *m, int *n, magmaDoubleComplex *a1, magmaDoubleComplex *a2, int *lda, magmaDoubleComplex *tau, magmaDoubleComplex *work, int *lwork, magmaDoubleComplex *dwork, int *info ) { /* -- MAGMA (version 1.4.1) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver December 2013 Purpose ======= ZGEQRF computes a QR factorization of a complex M-by-N matrix A: A = Q * R. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. A (input/output) COMPLEX_16 array on the GPU, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details). LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). TAU (output) COMPLEX_16 array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details). WORK (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. Higher performance is achieved if WORK is in pinned memory, e.g. allocated using magma_malloc_pinned. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= (M+N+NB)*NB, where NB can be obtained through magma_get_zgeqrf_nb(M). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued. DWORK (workspace/output) COMPLEX_16 array on the GPU, dimension 2*N*NB, where NB can be obtained through magma_get_zgeqrf_nb(M). It starts with NB*NB blocks that store the triangular T matrices, followed by the NB*NB blocks of the diagonal inverses for the R matrix. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Further Details =============== The matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(k), where k = min(m,n). Each H(i) has the form H(i) = I - tau * v * v' where tau is a complex scalar, and v is a complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in TAU(i). ===================================================================== */ #define a1_ref(a_1,a_2) ( a1+(a_2)*(*lda) + (a_1)) #define a2_ref(a_1,a_2) ( a2+(a_2)*(*lda) + (a_1)) #define t_ref(a_1) (dwork+(a_1)) #define d_ref(a_1) (dwork+(lddwork+(a_1))*nb) #define dd_ref(a_1) (dwork+(2*lddwork+(a_1))*nb) #define work_a1 ( work ) #define work_a2 ( work + nb ) #define hwork ( work + (nb)*(*m)) int i, k, ldwork, lddwork, old_i, old_ib, rows, cols; int nbmin, ib, ldda; /* Function Body */ *info = 0; int nb = magma_get_zgeqrf_nb(*m); int lwkopt = (*n+*m) * nb; work[0] = (magmaDoubleComplex) lwkopt; int lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } else if (*lwork < max(1,*n) && ! lquery) { *info = -7; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) return *info; k = min(*m,*n); if (k == 0) { work[0] = 1.f; return *info; } int lhwork = *lwork - (*m)*nb; magma_queue_t stream[2]; magma_queue_create( &stream[0] ); magma_queue_create( &stream[1] ); ldda = *m; nbmin = 2; ldwork = *m; lddwork= k; // This is only blocked code for now for (i = 0; i < k; i += nb) { ib = min(k-i, nb); rows = *m -i; rows = *m; // Send the next panel (diagonal block of A1 & block column of A2) // to the CPU (in work_a1 and work_a2) magma_zgetmatrix_async( rows, ib, a2_ref(0,i), (*lda), work_a2, ldwork, stream[1] ); // a1_ref(i,i), (*lda)*sizeof(magmaDoubleComplex), // the diagonal of a1 is in d_ref generated and // passed from magma_zgeqrf_gpu magma_zgetmatrix_async( ib, ib, d_ref(i), ib, work_a1, ldwork, stream[1] ); if (i>0) { /* Apply H' to A(i:m,i+2*ib:n) from the left */ // update T2 cols = *n-old_i-2*old_ib; magma_zssrfb(*m, cols, &old_ib, a2_ref( 0, old_i), lda, t_ref(old_i), &lddwork, a1_ref(old_i, old_i+2*old_ib), lda, a2_ref( 0, old_i+2*old_ib), lda, dd_ref(0), &lddwork); } magma_queue_sync( stream[1] ); // TTT - here goes the CPU PLASMA code // Matrix T has to be put in hwork with lda = ib and 0s // in the parts that are not used - copied on GPU in t_ref(i) // Now diag of A1 is updated, send it back asynchronously to the GPU. // We have to play interchaning these copies to see which is faster magma_zsetmatrix_async( ib, ib, work_a1, ib, d_ref(i), ib, stream[0] ); // Send the panel from A2 back to the GPU magma_zsetmatrix( *m, ib, work_a2, ldwork, a2_ref(0,i), *lda ); if (i + ib < *n) { // Send the triangular factor T from hwork to the GPU in t_ref(i) magma_zsetmatrix( ib, ib, hwork, ib, t_ref(i), lddwork ); if (i+nb < k){ /* Apply H' to A(i:m,i+ib:i+2*ib) from the left */ // if we can do one more step, first update T1 magma_zssrfb(*m, ib, &ib, a2_ref(0, i), lda, t_ref(i), &lddwork, a1_ref(i, i+ib), lda, a2_ref(0, i+ib), lda, dd_ref(0), &lddwork); } else { cols = *n-i-ib; // otherwise, update until the end and fix the panel magma_zssrfb(*m, cols, &ib, a2_ref(0, i), lda, t_ref(i), &lddwork, a1_ref(i, i+ib), lda, a2_ref(0, i+ib), lda, dd_ref(0), &lddwork); } old_i = i; old_ib = ib; } } return *info; } /* magma_ztsqrt_gpu */
/* Subroutine */ int ztrevc_(char *side, char *howmny, logical *select, integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr, integer *mm, integer *m, doublecomplex *work, doublereal *rwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= ZTREVC computes some or all of the right and/or left eigenvectors of a complex upper triangular matrix T. The right eigenvector x and the left eigenvector y of T corresponding to an eigenvalue w are defined by: T*x = w*x, y'*T = w*y' where y' denotes the conjugate transpose of the vector y. If all eigenvectors are requested, the routine may either return the matrices X and/or Y of right or left eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an input unitary matrix. If T was obtained from the Schur factorization of an original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of right or left eigenvectors of A. Arguments ========= SIDE (input) CHARACTER*1 = 'R': compute right eigenvectors only; = 'L': compute left eigenvectors only; = 'B': compute both right and left eigenvectors. HOWMNY (input) CHARACTER*1 = 'A': compute all right and/or left eigenvectors; = 'B': compute all right and/or left eigenvectors, and backtransform them using the input matrices supplied in VR and/or VL; = 'S': compute selected right and/or left eigenvectors, specified by the logical array SELECT. SELECT (input) LOGICAL array, dimension (N) If HOWMNY = 'S', SELECT specifies the eigenvectors to be computed. If HOWMNY = 'A' or 'B', SELECT is not referenced. To select the eigenvector corresponding to the j-th eigenvalue, SELECT(j) must be set to .TRUE.. N (input) INTEGER The order of the matrix T. N >= 0. T (input/output) COMPLEX*16 array, dimension (LDT,N) The upper triangular matrix T. T is modified, but restored on exit. LDT (input) INTEGER The leading dimension of the array T. LDT >= max(1,N). VL (input/output) COMPLEX*16 array, dimension (LDVL,MM) On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must contain an N-by-N matrix Q (usually the unitary matrix Q of Schur vectors returned by ZHSEQR). On exit, if SIDE = 'L' or 'B', VL contains: if HOWMNY = 'A', the matrix Y of left eigenvectors of T; VL is lower triangular. The i-th column VL(i) of VL is the eigenvector corresponding to T(i,i). if HOWMNY = 'B', the matrix Q*Y; if HOWMNY = 'S', the left eigenvectors of T specified by SELECT, stored consecutively in the columns of VL, in the same order as their eigenvalues. If SIDE = 'R', VL is not referenced. LDVL (input) INTEGER The leading dimension of the array VL. LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise. VR (input/output) COMPLEX*16 array, dimension (LDVR,MM) On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must contain an N-by-N matrix Q (usually the unitary matrix Q of Schur vectors returned by ZHSEQR). On exit, if SIDE = 'R' or 'B', VR contains: if HOWMNY = 'A', the matrix X of right eigenvectors of T; VR is upper triangular. The i-th column VR(i) of VR is the eigenvector corresponding to T(i,i). if HOWMNY = 'B', the matrix Q*X; if HOWMNY = 'S', the right eigenvectors of T specified by SELECT, stored consecutively in the columns of VR, in the same order as their eigenvalues. If SIDE = 'L', VR is not referenced. LDVR (input) INTEGER The leading dimension of the array VR. LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise. MM (input) INTEGER The number of columns in the arrays VL and/or VR. MM >= M. M (output) INTEGER The number of columns in the arrays VL and/or VR actually used to store the eigenvectors. If HOWMNY = 'A' or 'B', M is set to N. Each selected eigenvector occupies one column. WORK (workspace) COMPLEX*16 array, dimension (2*N) RWORK (workspace) DOUBLE PRECISION array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Further Details =============== The algorithm used in this program is basically backward (forward) substitution, with scaling to make the the code robust against possible overflow. Each eigenvector is normalized so that the element of largest magnitude has magnitude 1; here the magnitude of a complex number (x,y) is taken to be |x| + |y|. ===================================================================== Decode and test the input parameters Parameter adjustments */ /* Table of constant values */ static doublecomplex c_b2 = {1.,0.}; static integer c__1 = 1; /* System generated locals */ integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2, i__3, i__4, i__5; doublereal d__1, d__2, d__3; doublecomplex z__1, z__2; /* Builtin functions */ double d_imag(doublecomplex *); void d_cnjg(doublecomplex *, doublecomplex *); /* Local variables */ static logical allv; static doublereal unfl, ovfl, smin; static logical over; static integer i__, j, k; static doublereal scale; extern logical lsame_(char *, char *); static doublereal remax; static logical leftv, bothv; extern /* Subroutine */ int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *); static logical somev; extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), dlabad_(doublereal *, doublereal *); static integer ii, ki; extern doublereal dlamch_(char *); static integer is; extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_( integer *, doublereal *, doublecomplex *, integer *); extern integer izamax_(integer *, doublecomplex *, integer *); static logical rightv; extern doublereal dzasum_(integer *, doublecomplex *, integer *); static doublereal smlnum; extern /* Subroutine */ int zlatrs_(char *, char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublereal *, integer *); static doublereal ulp; #define t_subscr(a_1,a_2) (a_2)*t_dim1 + a_1 #define t_ref(a_1,a_2) t[t_subscr(a_1,a_2)] #define vl_subscr(a_1,a_2) (a_2)*vl_dim1 + a_1 #define vl_ref(a_1,a_2) vl[vl_subscr(a_1,a_2)] #define vr_subscr(a_1,a_2) (a_2)*vr_dim1 + a_1 #define vr_ref(a_1,a_2) vr[vr_subscr(a_1,a_2)] --select; t_dim1 = *ldt; t_offset = 1 + t_dim1 * 1; t -= t_offset; vl_dim1 = *ldvl; vl_offset = 1 + vl_dim1 * 1; vl -= vl_offset; vr_dim1 = *ldvr; vr_offset = 1 + vr_dim1 * 1; vr -= vr_offset; --work; --rwork; /* Function Body */ bothv = lsame_(side, "B"); rightv = lsame_(side, "R") || bothv; leftv = lsame_(side, "L") || bothv; allv = lsame_(howmny, "A"); over = lsame_(howmny, "B"); somev = lsame_(howmny, "S"); /* Set M to the number of columns required to store the selected eigenvectors. */ if (somev) { *m = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (select[j]) { ++(*m); } /* L10: */ } } else { *m = *n; } *info = 0; if (! rightv && ! leftv) { *info = -1; } else if (! allv && ! over && ! somev) { *info = -2; } else if (*n < 0) { *info = -4; } else if (*ldt < max(1,*n)) { *info = -6; } else if (*ldvl < 1 || leftv && *ldvl < *n) { *info = -8; } else if (*ldvr < 1 || rightv && *ldvr < *n) { *info = -10; } else if (*mm < *m) { *info = -11; } if (*info != 0) { i__1 = -(*info); xerbla_("ZTREVC", &i__1); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } /* Set the constants to control overflow. */ unfl = dlamch_("Safe minimum"); ovfl = 1. / unfl; dlabad_(&unfl, &ovfl); ulp = dlamch_("Precision"); smlnum = unfl * (*n / ulp); /* Store the diagonal elements of T in working array WORK. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__ + *n; i__3 = t_subscr(i__, i__); work[i__2].r = t[i__3].r, work[i__2].i = t[i__3].i; /* L20: */ } /* Compute 1-norm of each column of strictly upper triangular part of T to control overflow in triangular solver. */ rwork[1] = 0.; i__1 = *n; for (j = 2; j <= i__1; ++j) { i__2 = j - 1; rwork[j] = dzasum_(&i__2, &t_ref(1, j), &c__1); /* L30: */ } if (rightv) { /* Compute right eigenvectors. */ is = *m; for (ki = *n; ki >= 1; --ki) { if (somev) { if (! select[ki]) { goto L80; } } /* Computing MAX */ i__1 = t_subscr(ki, ki); d__3 = ulp * ((d__1 = t[i__1].r, abs(d__1)) + (d__2 = d_imag(& t_ref(ki, ki)), abs(d__2))); smin = max(d__3,smlnum); work[1].r = 1., work[1].i = 0.; /* Form right-hand side. */ i__1 = ki - 1; for (k = 1; k <= i__1; ++k) { i__2 = k; i__3 = t_subscr(k, ki); z__1.r = -t[i__3].r, z__1.i = -t[i__3].i; work[i__2].r = z__1.r, work[i__2].i = z__1.i; /* L40: */ } /* Solve the triangular system: (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK. */ i__1 = ki - 1; for (k = 1; k <= i__1; ++k) { i__2 = t_subscr(k, k); i__3 = t_subscr(k, k); i__4 = t_subscr(ki, ki); z__1.r = t[i__3].r - t[i__4].r, z__1.i = t[i__3].i - t[i__4] .i; t[i__2].r = z__1.r, t[i__2].i = z__1.i; i__2 = t_subscr(k, k); if ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t_ref(k, k)), abs(d__2)) < smin) { i__3 = t_subscr(k, k); t[i__3].r = smin, t[i__3].i = 0.; } /* L50: */ } if (ki > 1) { i__1 = ki - 1; zlatrs_("Upper", "No transpose", "Non-unit", "Y", &i__1, &t[ t_offset], ldt, &work[1], &scale, &rwork[1], info); i__1 = ki; work[i__1].r = scale, work[i__1].i = 0.; } /* Copy the vector x or Q*x to VR and normalize. */ if (! over) { zcopy_(&ki, &work[1], &c__1, &vr_ref(1, is), &c__1); ii = izamax_(&ki, &vr_ref(1, is), &c__1); i__1 = vr_subscr(ii, is); remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag( &vr_ref(ii, is)), abs(d__2))); zdscal_(&ki, &remax, &vr_ref(1, is), &c__1); i__1 = *n; for (k = ki + 1; k <= i__1; ++k) { i__2 = vr_subscr(k, is); vr[i__2].r = 0., vr[i__2].i = 0.; /* L60: */ } } else { if (ki > 1) { i__1 = ki - 1; z__1.r = scale, z__1.i = 0.; zgemv_("N", n, &i__1, &c_b2, &vr[vr_offset], ldvr, &work[ 1], &c__1, &z__1, &vr_ref(1, ki), &c__1); } ii = izamax_(n, &vr_ref(1, ki), &c__1); i__1 = vr_subscr(ii, ki); remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag( &vr_ref(ii, ki)), abs(d__2))); zdscal_(n, &remax, &vr_ref(1, ki), &c__1); } /* Set back the original diagonal elements of T. */ i__1 = ki - 1; for (k = 1; k <= i__1; ++k) { i__2 = t_subscr(k, k); i__3 = k + *n; t[i__2].r = work[i__3].r, t[i__2].i = work[i__3].i; /* L70: */ } --is; L80: ; } } if (leftv) { /* Compute left eigenvectors. */ is = 1; i__1 = *n; for (ki = 1; ki <= i__1; ++ki) { if (somev) { if (! select[ki]) { goto L130; } } /* Computing MAX */ i__2 = t_subscr(ki, ki); d__3 = ulp * ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(& t_ref(ki, ki)), abs(d__2))); smin = max(d__3,smlnum); i__2 = *n; work[i__2].r = 1., work[i__2].i = 0.; /* Form right-hand side. */ i__2 = *n; for (k = ki + 1; k <= i__2; ++k) { i__3 = k; d_cnjg(&z__2, &t_ref(ki, k)); z__1.r = -z__2.r, z__1.i = -z__2.i; work[i__3].r = z__1.r, work[i__3].i = z__1.i; /* L90: */ } /* Solve the triangular system: (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK. */ i__2 = *n; for (k = ki + 1; k <= i__2; ++k) { i__3 = t_subscr(k, k); i__4 = t_subscr(k, k); i__5 = t_subscr(ki, ki); z__1.r = t[i__4].r - t[i__5].r, z__1.i = t[i__4].i - t[i__5] .i; t[i__3].r = z__1.r, t[i__3].i = z__1.i; i__3 = t_subscr(k, k); if ((d__1 = t[i__3].r, abs(d__1)) + (d__2 = d_imag(&t_ref(k, k)), abs(d__2)) < smin) { i__4 = t_subscr(k, k); t[i__4].r = smin, t[i__4].i = 0.; } /* L100: */ } if (ki < *n) { i__2 = *n - ki; zlatrs_("Upper", "Conjugate transpose", "Non-unit", "Y", & i__2, &t_ref(ki + 1, ki + 1), ldt, &work[ki + 1], & scale, &rwork[1], info); i__2 = ki; work[i__2].r = scale, work[i__2].i = 0.; } /* Copy the vector x or Q*x to VL and normalize. */ if (! over) { i__2 = *n - ki + 1; zcopy_(&i__2, &work[ki], &c__1, &vl_ref(ki, is), &c__1); i__2 = *n - ki + 1; ii = izamax_(&i__2, &vl_ref(ki, is), &c__1) + ki - 1; i__2 = vl_subscr(ii, is); remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag( &vl_ref(ii, is)), abs(d__2))); i__2 = *n - ki + 1; zdscal_(&i__2, &remax, &vl_ref(ki, is), &c__1); i__2 = ki - 1; for (k = 1; k <= i__2; ++k) { i__3 = vl_subscr(k, is); vl[i__3].r = 0., vl[i__3].i = 0.; /* L110: */ } } else { if (ki < *n) { i__2 = *n - ki; z__1.r = scale, z__1.i = 0.; zgemv_("N", n, &i__2, &c_b2, &vl_ref(1, ki + 1), ldvl, & work[ki + 1], &c__1, &z__1, &vl_ref(1, ki), &c__1); } ii = izamax_(n, &vl_ref(1, ki), &c__1); i__2 = vl_subscr(ii, ki); remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag( &vl_ref(ii, ki)), abs(d__2))); zdscal_(n, &remax, &vl_ref(1, ki), &c__1); } /* Set back the original diagonal elements of T. */ i__2 = *n; for (k = ki + 1; k <= i__2; ++k) { i__3 = t_subscr(k, k); i__4 = k + *n; t[i__3].r = work[i__4].r, t[i__3].i = work[i__4].i; /* L120: */ } ++is; L130: ; } } return 0; /* End of ZTREVC */ } /* ztrevc_ */
/* Subroutine */ int dget37_(doublereal *rmax, integer *lmax, integer *ninfo, integer *knt, integer *nin) { /* System generated locals */ integer i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void); /* Local variables */ static integer ifnd, icmp, iscl, info, lcmp[3], kmin; static doublereal wiin[20], vmax, tnrm, wrin[20], work[1200], vmul, stmp[ 20]; static integer i__, j, m, n; static doublereal s[20], t[400] /* was [20][20] */, v; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); static doublereal sepin[20]; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); static doublereal vimin, tolin, vrmin; static integer iwork[40]; static doublereal witmp[20], wrtmp[20]; extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); static doublereal le[400] /* was [20][20] */, re[400] /* was [20][ 20] */; extern doublereal dlamch_(char *), dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); static doublereal wi[20], wr[20]; extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); static logical select[20]; static doublereal bignum; extern /* Subroutine */ int dhseqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), dtrevc_(char *, char *, logical *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, integer *), dtrsna_(char *, char *, logical *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *, integer *); static doublereal septmp[20], smlnum, val[3], dum[1], eps, sep[20], sin__[ 20], tol, tmp[400] /* was [20][20] */; /* Fortran I/O blocks */ static cilist io___5 = { 0, 0, 0, 0, 0 }; static cilist io___8 = { 0, 0, 0, 0, 0 }; static cilist io___11 = { 0, 0, 0, 0, 0 }; #define t_ref(a_1,a_2) t[(a_2)*20 + a_1 - 21] #define le_ref(a_1,a_2) le[(a_2)*20 + a_1 - 21] #define re_ref(a_1,a_2) re[(a_2)*20 + a_1 - 21] #define tmp_ref(a_1,a_2) tmp[(a_2)*20 + a_1 - 21] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= DGET37 tests DTRSNA, a routine for estimating condition numbers of eigenvalues and/or right eigenvectors of a matrix. The test matrices are read from a file with logical unit number NIN. Arguments ========== RMAX (output) DOUBLE PRECISION array, dimension (3) Value of the largest test ratio. RMAX(1) = largest ratio comparing different calls to DTRSNA RMAX(2) = largest error in reciprocal condition numbers taking their conditioning into account RMAX(3) = largest error in reciprocal condition numbers not taking their conditioning into account (may be larger than RMAX(2)) LMAX (output) INTEGER array, dimension (3) LMAX(i) is example number where largest test ratio RMAX(i) is achieved. Also: If DGEHRD returns INFO nonzero on example i, LMAX(1)=i If DHSEQR returns INFO nonzero on example i, LMAX(2)=i If DTRSNA returns INFO nonzero on example i, LMAX(3)=i NINFO (output) INTEGER array, dimension (3) NINFO(1) = No. of times DGEHRD returned INFO nonzero NINFO(2) = No. of times DHSEQR returned INFO nonzero NINFO(3) = No. of times DTRSNA returned INFO nonzero KNT (output) INTEGER Total number of examples tested. NIN (input) INTEGER Input logical unit number ===================================================================== Parameter adjustments */ --ninfo; --lmax; --rmax; /* Function Body */ eps = dlamch_("P"); smlnum = dlamch_("S") / eps; bignum = 1. / smlnum; dlabad_(&smlnum, &bignum); /* EPSIN = 2**(-24) = precision to which input data computed */ eps = max(eps,5.9605e-8); rmax[1] = 0.; rmax[2] = 0.; rmax[3] = 0.; lmax[1] = 0; lmax[2] = 0; lmax[3] = 0; *knt = 0; ninfo[1] = 0; ninfo[2] = 0; ninfo[3] = 0; val[0] = sqrt(smlnum); val[1] = 1.; val[2] = sqrt(bignum); /* Read input data until N=0. Assume input eigenvalues are sorted lexicographically (increasing by real part, then decreasing by imaginary part) */ L10: io___5.ciunit = *nin; s_rsle(&io___5); do_lio(&c__3, &c__1, (char *)&n, (ftnlen)sizeof(integer)); e_rsle(); if (n == 0) { return 0; } i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { io___8.ciunit = *nin; s_rsle(&io___8); i__2 = n; for (j = 1; j <= i__2; ++j) { do_lio(&c__5, &c__1, (char *)&tmp_ref(i__, j), (ftnlen)sizeof( doublereal)); } e_rsle(); /* L20: */ } i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { io___11.ciunit = *nin; s_rsle(&io___11); do_lio(&c__5, &c__1, (char *)&wrin[i__ - 1], (ftnlen)sizeof( doublereal)); do_lio(&c__5, &c__1, (char *)&wiin[i__ - 1], (ftnlen)sizeof( doublereal)); do_lio(&c__5, &c__1, (char *)&sin__[i__ - 1], (ftnlen)sizeof( doublereal)); do_lio(&c__5, &c__1, (char *)&sepin[i__ - 1], (ftnlen)sizeof( doublereal)); e_rsle(); /* L30: */ } tnrm = dlange_("M", &n, &n, tmp, &c__20, work); /* Begin test */ for (iscl = 1; iscl <= 3; ++iscl) { /* Scale input matrix */ ++(*knt); dlacpy_("F", &n, &n, tmp, &c__20, t, &c__20); vmul = val[iscl - 1]; i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { dscal_(&n, &vmul, &t_ref(1, i__), &c__1); /* L40: */ } if (tnrm == 0.) { vmul = 1.; } /* Compute eigenvalues and eigenvectors */ i__1 = 1200 - n; dgehrd_(&n, &c__1, &n, t, &c__20, work, &work[n], &i__1, &info); if (info != 0) { lmax[1] = *knt; ++ninfo[1]; goto L240; } i__1 = n - 2; for (j = 1; j <= i__1; ++j) { i__2 = n; for (i__ = j + 2; i__ <= i__2; ++i__) { t_ref(i__, j) = 0.; /* L50: */ } /* L60: */ } /* Compute Schur form */ dhseqr_("S", "N", &n, &c__1, &n, t, &c__20, wr, wi, dum, &c__1, work, &c__1200, &info); if (info != 0) { lmax[2] = *knt; ++ninfo[2]; goto L240; } /* Compute eigenvectors */ dtrevc_("Both", "All", select, &n, t, &c__20, le, &c__20, re, &c__20, &n, &m, work, &info); /* Compute condition numbers */ dtrsna_("Both", "All", select, &n, t, &c__20, le, &c__20, re, &c__20, s, sep, &n, &m, work, &n, iwork, &info); if (info != 0) { lmax[3] = *knt; ++ninfo[3]; goto L240; } /* Sort eigenvalues and condition numbers lexicographically to compare with inputs */ dcopy_(&n, wr, &c__1, wrtmp, &c__1); dcopy_(&n, wi, &c__1, witmp, &c__1); dcopy_(&n, s, &c__1, stmp, &c__1); dcopy_(&n, sep, &c__1, septmp, &c__1); d__1 = 1. / vmul; dscal_(&n, &d__1, septmp, &c__1); i__1 = n - 1; for (i__ = 1; i__ <= i__1; ++i__) { kmin = i__; vrmin = wrtmp[i__ - 1]; vimin = witmp[i__ - 1]; i__2 = n; for (j = i__ + 1; j <= i__2; ++j) { if (wrtmp[j - 1] < vrmin) { kmin = j; vrmin = wrtmp[j - 1]; vimin = witmp[j - 1]; } /* L70: */ } wrtmp[kmin - 1] = wrtmp[i__ - 1]; witmp[kmin - 1] = witmp[i__ - 1]; wrtmp[i__ - 1] = vrmin; witmp[i__ - 1] = vimin; vrmin = stmp[kmin - 1]; stmp[kmin - 1] = stmp[i__ - 1]; stmp[i__ - 1] = vrmin; vrmin = septmp[kmin - 1]; septmp[kmin - 1] = septmp[i__ - 1]; septmp[i__ - 1] = vrmin; /* L80: */ } /* Compare condition numbers for eigenvalues taking their condition numbers into account Computing MAX */ d__1 = (doublereal) n * 2. * eps * tnrm; v = max(d__1,smlnum); if (tnrm == 0.) { v = 1.; } i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { if (v > septmp[i__ - 1]) { tol = 1.; } else { tol = v / septmp[i__ - 1]; } if (v > sepin[i__ - 1]) { tolin = 1.; } else { tolin = v / sepin[i__ - 1]; } /* Computing MAX */ d__1 = tol, d__2 = smlnum / eps; tol = max(d__1,d__2); /* Computing MAX */ d__1 = tolin, d__2 = smlnum / eps; tolin = max(d__1,d__2); if (eps * (sin__[i__ - 1] - tolin) > stmp[i__ - 1] + tol) { vmax = 1. / eps; } else if (sin__[i__ - 1] - tolin > stmp[i__ - 1] + tol) { vmax = (sin__[i__ - 1] - tolin) / (stmp[i__ - 1] + tol); } else if (sin__[i__ - 1] + tolin < eps * (stmp[i__ - 1] - tol)) { vmax = 1. / eps; } else if (sin__[i__ - 1] + tolin < stmp[i__ - 1] - tol) { vmax = (stmp[i__ - 1] - tol) / (sin__[i__ - 1] + tolin); } else { vmax = 1.; } if (vmax > rmax[2]) { rmax[2] = vmax; if (ninfo[2] == 0) { lmax[2] = *knt; } } /* L90: */ } /* Compare condition numbers for eigenvectors taking their condition numbers into account */ i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { if (v > septmp[i__ - 1] * stmp[i__ - 1]) { tol = septmp[i__ - 1]; } else { tol = v / stmp[i__ - 1]; } if (v > sepin[i__ - 1] * sin__[i__ - 1]) { tolin = sepin[i__ - 1]; } else { tolin = v / sin__[i__ - 1]; } /* Computing MAX */ d__1 = tol, d__2 = smlnum / eps; tol = max(d__1,d__2); /* Computing MAX */ d__1 = tolin, d__2 = smlnum / eps; tolin = max(d__1,d__2); if (eps * (sepin[i__ - 1] - tolin) > septmp[i__ - 1] + tol) { vmax = 1. / eps; } else if (sepin[i__ - 1] - tolin > septmp[i__ - 1] + tol) { vmax = (sepin[i__ - 1] - tolin) / (septmp[i__ - 1] + tol); } else if (sepin[i__ - 1] + tolin < eps * (septmp[i__ - 1] - tol)) { vmax = 1. / eps; } else if (sepin[i__ - 1] + tolin < septmp[i__ - 1] - tol) { vmax = (septmp[i__ - 1] - tol) / (sepin[i__ - 1] + tolin); } else { vmax = 1.; } if (vmax > rmax[2]) { rmax[2] = vmax; if (ninfo[2] == 0) { lmax[2] = *knt; } } /* L100: */ } /* Compare condition numbers for eigenvalues without taking their condition numbers into account */ i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { if (sin__[i__ - 1] <= (doublereal) (n << 1) * eps && stmp[i__ - 1] <= (doublereal) (n << 1) * eps) { vmax = 1.; } else if (eps * sin__[i__ - 1] > stmp[i__ - 1]) { vmax = 1. / eps; } else if (sin__[i__ - 1] > stmp[i__ - 1]) { vmax = sin__[i__ - 1] / stmp[i__ - 1]; } else if (sin__[i__ - 1] < eps * stmp[i__ - 1]) { vmax = 1. / eps; } else if (sin__[i__ - 1] < stmp[i__ - 1]) { vmax = stmp[i__ - 1] / sin__[i__ - 1]; } else { vmax = 1.; } if (vmax > rmax[3]) { rmax[3] = vmax; if (ninfo[3] == 0) { lmax[3] = *knt; } } /* L110: */ } /* Compare condition numbers for eigenvectors without taking their condition numbers into account */ i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { if (sepin[i__ - 1] <= v && septmp[i__ - 1] <= v) { vmax = 1.; } else if (eps * sepin[i__ - 1] > septmp[i__ - 1]) { vmax = 1. / eps; } else if (sepin[i__ - 1] > septmp[i__ - 1]) { vmax = sepin[i__ - 1] / septmp[i__ - 1]; } else if (sepin[i__ - 1] < eps * septmp[i__ - 1]) { vmax = 1. / eps; } else if (sepin[i__ - 1] < septmp[i__ - 1]) { vmax = septmp[i__ - 1] / sepin[i__ - 1]; } else { vmax = 1.; } if (vmax > rmax[3]) { rmax[3] = vmax; if (ninfo[3] == 0) { lmax[3] = *knt; } } /* L120: */ } /* Compute eigenvalue condition numbers only and compare */ vmax = 0.; dum[0] = -1.; dcopy_(&n, dum, &c__0, stmp, &c__1); dcopy_(&n, dum, &c__0, septmp, &c__1); dtrsna_("Eigcond", "All", select, &n, t, &c__20, le, &c__20, re, & c__20, stmp, septmp, &n, &m, work, &n, iwork, &info); if (info != 0) { lmax[3] = *knt; ++ninfo[3]; goto L240; } i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { if (stmp[i__ - 1] != s[i__ - 1]) { vmax = 1. / eps; } if (septmp[i__ - 1] != dum[0]) { vmax = 1. / eps; } /* L130: */ } /* Compute eigenvector condition numbers only and compare */ dcopy_(&n, dum, &c__0, stmp, &c__1); dcopy_(&n, dum, &c__0, septmp, &c__1); dtrsna_("Veccond", "All", select, &n, t, &c__20, le, &c__20, re, & c__20, stmp, septmp, &n, &m, work, &n, iwork, &info); if (info != 0) { lmax[3] = *knt; ++ninfo[3]; goto L240; } i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { if (stmp[i__ - 1] != dum[0]) { vmax = 1. / eps; } if (septmp[i__ - 1] != sep[i__ - 1]) { vmax = 1. / eps; } /* L140: */ } /* Compute all condition numbers using SELECT and compare */ i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { select[i__ - 1] = TRUE_; /* L150: */ } dcopy_(&n, dum, &c__0, stmp, &c__1); dcopy_(&n, dum, &c__0, septmp, &c__1); dtrsna_("Bothcond", "Some", select, &n, t, &c__20, le, &c__20, re, & c__20, stmp, septmp, &n, &m, work, &n, iwork, &info); if (info != 0) { lmax[3] = *knt; ++ninfo[3]; goto L240; } i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { if (septmp[i__ - 1] != sep[i__ - 1]) { vmax = 1. / eps; } if (stmp[i__ - 1] != s[i__ - 1]) { vmax = 1. / eps; } /* L160: */ } /* Compute eigenvalue condition numbers using SELECT and compare */ dcopy_(&n, dum, &c__0, stmp, &c__1); dcopy_(&n, dum, &c__0, septmp, &c__1); dtrsna_("Eigcond", "Some", select, &n, t, &c__20, le, &c__20, re, & c__20, stmp, septmp, &n, &m, work, &n, iwork, &info); if (info != 0) { lmax[3] = *knt; ++ninfo[3]; goto L240; } i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { if (stmp[i__ - 1] != s[i__ - 1]) { vmax = 1. / eps; } if (septmp[i__ - 1] != dum[0]) { vmax = 1. / eps; } /* L170: */ } /* Compute eigenvector condition numbers using SELECT and compare */ dcopy_(&n, dum, &c__0, stmp, &c__1); dcopy_(&n, dum, &c__0, septmp, &c__1); dtrsna_("Veccond", "Some", select, &n, t, &c__20, le, &c__20, re, & c__20, stmp, septmp, &n, &m, work, &n, iwork, &info); if (info != 0) { lmax[3] = *knt; ++ninfo[3]; goto L240; } i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { if (stmp[i__ - 1] != dum[0]) { vmax = 1. / eps; } if (septmp[i__ - 1] != sep[i__ - 1]) { vmax = 1. / eps; } /* L180: */ } if (vmax > rmax[1]) { rmax[1] = vmax; if (ninfo[1] == 0) { lmax[1] = *knt; } } /* Select first real and first complex eigenvalue */ if (wi[0] == 0.) { lcmp[0] = 1; ifnd = 0; i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { if (ifnd == 1 || wi[i__ - 1] == 0.) { select[i__ - 1] = FALSE_; } else { ifnd = 1; lcmp[1] = i__; lcmp[2] = i__ + 1; dcopy_(&n, &re_ref(1, i__), &c__1, &re_ref(1, 2), &c__1); dcopy_(&n, &re_ref(1, i__ + 1), &c__1, &re_ref(1, 3), & c__1); dcopy_(&n, &le_ref(1, i__), &c__1, &le_ref(1, 2), &c__1); dcopy_(&n, &le_ref(1, i__ + 1), &c__1, &le_ref(1, 3), & c__1); } /* L190: */ } if (ifnd == 0) { icmp = 1; } else { icmp = 3; } } else { lcmp[0] = 1; lcmp[1] = 2; ifnd = 0; i__1 = n; for (i__ = 3; i__ <= i__1; ++i__) { if (ifnd == 1 || wi[i__ - 1] != 0.) { select[i__ - 1] = FALSE_; } else { lcmp[2] = i__; ifnd = 1; dcopy_(&n, &re_ref(1, i__), &c__1, &re_ref(1, 3), &c__1); dcopy_(&n, &le_ref(1, i__), &c__1, &le_ref(1, 3), &c__1); } /* L200: */ } if (ifnd == 0) { icmp = 2; } else { icmp = 3; } } /* Compute all selected condition numbers */ dcopy_(&icmp, dum, &c__0, stmp, &c__1); dcopy_(&icmp, dum, &c__0, septmp, &c__1); dtrsna_("Bothcond", "Some", select, &n, t, &c__20, le, &c__20, re, & c__20, stmp, septmp, &n, &m, work, &n, iwork, &info); if (info != 0) { lmax[3] = *knt; ++ninfo[3]; goto L240; } i__1 = icmp; for (i__ = 1; i__ <= i__1; ++i__) { j = lcmp[i__ - 1]; if (septmp[i__ - 1] != sep[j - 1]) { vmax = 1. / eps; } if (stmp[i__ - 1] != s[j - 1]) { vmax = 1. / eps; } /* L210: */ } /* Compute selected eigenvalue condition numbers */ dcopy_(&icmp, dum, &c__0, stmp, &c__1); dcopy_(&icmp, dum, &c__0, septmp, &c__1); dtrsna_("Eigcond", "Some", select, &n, t, &c__20, le, &c__20, re, & c__20, stmp, septmp, &n, &m, work, &n, iwork, &info); if (info != 0) { lmax[3] = *knt; ++ninfo[3]; goto L240; } i__1 = icmp; for (i__ = 1; i__ <= i__1; ++i__) { j = lcmp[i__ - 1]; if (stmp[i__ - 1] != s[j - 1]) { vmax = 1. / eps; } if (septmp[i__ - 1] != dum[0]) { vmax = 1. / eps; } /* L220: */ } /* Compute selected eigenvector condition numbers */ dcopy_(&icmp, dum, &c__0, stmp, &c__1); dcopy_(&icmp, dum, &c__0, septmp, &c__1); dtrsna_("Veccond", "Some", select, &n, t, &c__20, le, &c__20, re, & c__20, stmp, septmp, &n, &m, work, &n, iwork, &info); if (info != 0) { lmax[3] = *knt; ++ninfo[3]; goto L240; } i__1 = icmp; for (i__ = 1; i__ <= i__1; ++i__) { j = lcmp[i__ - 1]; if (stmp[i__ - 1] != dum[0]) { vmax = 1. / eps; } if (septmp[i__ - 1] != sep[j - 1]) { vmax = 1. / eps; } /* L230: */ } if (vmax > rmax[1]) { rmax[1] = vmax; if (ninfo[1] == 0) { lmax[1] = *knt; } } L240: ; } goto L10; /* End of DGET37 */ } /* dget37_ */