void QuaternionDemo::paintGL()
{
	glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
	glMatrixMode(GL_PROJECTION);
	glLoadIdentity();
	gluPerspective(60.0, (double)width() / height(), 0.1, 10);

	glMatrixMode(GL_MODELVIEW);
	glLoadIdentity();
	glTranslatef(0, 0, -5);

	glRotatef(-90, 1, 0, 0);
	glRotatef(90, 0, 0, 1);

	Eigen::Transform<float, 3, Eigen::Affine> t_ref(ref.conjugate());
	glMultMatrixf(t_ref.data());
	Eigen::Transform<float, 3, Eigen::Affine> t_q(q);
	glMultMatrixf(t_q.data());

	glEnable(GL_DEPTH_TEST);
	glBegin(GL_QUADS);
		glColor3f(1, 1, 1);
		glVertex3f(-1, -1, -1);
		glVertex3f(1, -1, -1);
		glVertex3f(1, 1, -1);
		glVertex3f(-1, 1, -1);

		glColor3f(1, 0, 0);
		glVertex3f(-1, -1, 1);
		glVertex3f(1, -1, 1);
		glVertex3f(1, 1, 1);
		glVertex3f(-1, 1, 1);

		glColor3f(0, 1, 0);
		glVertex3f(-1, -1, 1);
		glVertex3f(-1, -1, -1);
		glVertex3f(-1, 1, -1);
		glVertex3f(-1, 1, 1);

		glColor3f(1, 0.5f, 0);
		glVertex3f(1, -1, 1);
		glVertex3f(1, -1, -1);
		glVertex3f(1, 1, -1);
		glVertex3f(1, 1, 1);

		glColor3f(0, 0, 1);
		glVertex3f(-1, -1, 1);
		glVertex3f(-1, -1, -1);
		glVertex3f(1, -1, -1);
		glVertex3f(1, -1, 1);

		glColor3f(1, 0, 0.5f);
		glVertex3f(-1, 1, 1);
		glVertex3f(-1, 1, -1);
		glVertex3f(1, 1, -1);
		glVertex3f(1, 1, 1);
	glEnd();
}
void
ColumnMajorMatrixTest::setUp()
{
  // Define commonly used matrices for testing
  a = new ColumnMajorMatrix(3, 3);
  ColumnMajorMatrix & a_ref = *a;

  a_ref(0, 0) = 1;
  a_ref(1, 0) = 2;
  a_ref(2, 0) = 3;
  a_ref(0, 1) = 4;
  a_ref(1, 1) = 5;
  a_ref(2, 1) = 6;
  a_ref(0, 2) = 7;
  a_ref(1, 2) = 8;
  a_ref(2, 2) = 9;

  t = new ColumnMajorMatrix(3, 2);
  ColumnMajorMatrix & t_ref = *t;

  t_ref(0, 0) = 1;
  t_ref(1, 0) = 2;
  t_ref(2, 0) = 3;
  t_ref(0, 1) = 4;
  t_ref(1, 1) = 5;
  t_ref(2, 1) = 6;

  two_mat = new ColumnMajorMatrix(2, 2);
  ColumnMajorMatrix & mat = *two_mat;

  mat(0, 0) = 1;
  mat(1, 0) = 2;
  mat(0, 1) = 3;
  mat(1, 1) = 4;

  add = new ColumnMajorMatrix(3, 2);
  add_solution = new ColumnMajorMatrix(3, 2);
  ColumnMajorMatrix & add_ref = *add;
  ColumnMajorMatrix & a_sol_ref = *add_solution;

  add_ref(0, 0) = 6; a_sol_ref(0, 0) = 7;
  add_ref(1, 0) = 5; a_sol_ref(1, 0) = 7;
  add_ref(2, 0) = 4; a_sol_ref(2, 0) = 7;
  add_ref(0, 1) = 1; a_sol_ref(0, 1) = 5;
  add_ref(1, 1) = 1; a_sol_ref(1, 1) = 6;
  add_ref(2, 1) = 1; a_sol_ref(2, 1) = 7;

  sub = new ColumnMajorMatrix(3, 2);
  sub_solution = new ColumnMajorMatrix(3, 2);
  ColumnMajorMatrix & sub_ref = *sub;
  ColumnMajorMatrix & s_sol_ref = *sub_solution;

  sub_ref(0, 0) = 0; s_sol_ref(0, 0) = 1;
  sub_ref(1, 0) = 1; s_sol_ref(1, 0) = 1;
  sub_ref(2, 0) = 2; s_sol_ref(2, 0) = 1;
  sub_ref(0, 1) = 1; s_sol_ref(0, 1) = 3;
  sub_ref(1, 1) = 1; s_sol_ref(1, 1) = 4;
  sub_ref(2, 1) = 1; s_sol_ref(2, 1) = 5;
}
	baseType escapeNoiseNeuron::callBack ( unsigned int eventSignature )
	{

		if ( eventSignature == _fire_ )
		{
			this->fire();
			return this->time + t_ref() + gslNoise::getExponential ( exp ( -   escapeNoise_a() * pot + escapeNoise_b() ) );
		}

		else // if (eventSignature == _exciteRandomly_)
		{
			this->excite ( pcoBase::noiseIntensity() );
			return this->time + gslNoise::getExponential ( pcoBase::noiseFrequency() );

		}

	}
Exemple #4
0
void CFDDataPack::reinitViscous()
{
	grad_rho = duh[0];
	grad_mom = RealTensor(duh[1], duh[2], duh[3]);

	for (int a = 0; a < 3; ++a)
		for (int b = 0; b < 3; ++b)
			grad_vel(a, b) = (grad_mom(a, b) - vel(a)*grad_rho(b))/r;

	tau = grad_vel + grad_vel.transpose();
	vel_div =  grad_vel.tr();
	tau(0, 0) -= 2./3*vel_div; tau(1, 1) -= 2./3*vel_div; tau(2, 2) -= 2./3*vel_div;

	if(_cfd_problem._vis_type == 0) vis = 0.0;
	else if(_cfd_problem._vis_type == 1) vis = 1.0;
	else if(_cfd_problem._vis_type == 2)
	{
		Real t_ref(288), t_s(110.4);
		vis = pow(t/t_ref, 1.5)*(t_ref+t_s)/(t+t_s);
	}
	else mooseError("不可知的粘性模型");
	tau *= vis/_reynolds;

	grad_enthalpy = (duh[4]-uh[4]/uh[0] * duh[0])/r - grad_vel.transpose() * vel;
	grad_enthalpy *= (vis/_reynolds)*(_gamma/_prandtl);

	invis_flux[0] = r*vel;

	invis_flux[1] = mom(0)*vel;
	invis_flux[1](0) += p;

	invis_flux[2] = mom(1)*vel;
	invis_flux[2](1) += p;

	invis_flux[3] = mom(2)*vel;
	invis_flux[3](2) += p;

	invis_flux[4] = r*h*vel;

	vis_flux[0].zero();
	vis_flux[1] = tau.row(0);
	vis_flux[2] = tau.row(1);
	vis_flux[3] = tau.row(2);
	vis_flux[4] = tau * vel + grad_enthalpy;
}
			void pulseCoupledExcitatoryNeuron::excite ( baseType c )           // gibt zurück, ob das Neuron sofort feuert.
			{
//				numberOfExcitations++;
				if ( nextFiring() != numeric_limits<baseType>::max() )
					return;
				if (!refractory)
				{

					baseType phase = dynNode::time - lastFiring;
					baseType pot = exp ( -  leakage() * phase );
					if ( pot + c > 1 )
						eventHandler::decreaseKey(_fire_, dynNode::time + timeDelay());

					else
						lastFiring = dynNode::time + 1.0/ leakage() * log (pot  + c );
				}
				if ( refractory && dynNode::time - lastFiring > t_ref())
				{
					refractory = false;
					lastFiring = dynNode::time +  1.0 / leakage() * log ( c );
				}
			}
			baseType pulseCoupledExcitatoryNeuron::getState() 
			{ 	
				if (nextFiring() == numeric_limits<baseType>::max())
				{
					if (refractory)
					{
					       if (dynNode::time - lastFiring < t_ref())
						       return params<baseType>::getParams(0);
					       else
						       return 0;
					}
					else
						return ( exp ( leakage() * ( -dynNode::time + lastFiring )) );	


				}
				else	
					return 1.0;
//				baseType res = numberOfExcitations;
//				numberOfExcitations = 0;
//				return res;
			}
Exemple #7
0
/* Subroutine */ int ctrsen_(char *job, char *compq, logical *select, integer 
	*n, complex *t, integer *ldt, complex *q, integer *ldq, complex *w, 
	integer *m, real *s, real *sep, complex *work, integer *lwork, 
	integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    CTRSEN reorders the Schur factorization of a complex matrix   
    A = Q*T*Q**H, so that a selected cluster of eigenvalues appears in   
    the leading positions on the diagonal of the upper triangular matrix   
    T, and the leading columns of Q form an orthonormal basis of the   
    corresponding right invariant subspace.   

    Optionally the routine computes the reciprocal condition numbers of   
    the cluster of eigenvalues and/or the invariant subspace.   

    Arguments   
    =========   

    JOB     (input) CHARACTER*1   
            Specifies whether condition numbers are required for the   
            cluster of eigenvalues (S) or the invariant subspace (SEP):   
            = 'N': none;   
            = 'E': for eigenvalues only (S);   
            = 'V': for invariant subspace only (SEP);   
            = 'B': for both eigenvalues and invariant subspace (S and   
                   SEP).   

    COMPQ   (input) CHARACTER*1   
            = 'V': update the matrix Q of Schur vectors;   
            = 'N': do not update Q.   

    SELECT  (input) LOGICAL array, dimension (N)   
            SELECT specifies the eigenvalues in the selected cluster. To   
            select the j-th eigenvalue, SELECT(j) must be set to .TRUE..   

    N       (input) INTEGER   
            The order of the matrix T. N >= 0.   

    T       (input/output) COMPLEX array, dimension (LDT,N)   
            On entry, the upper triangular matrix T.   
            On exit, T is overwritten by the reordered matrix T, with the   
            selected eigenvalues as the leading diagonal elements.   

    LDT     (input) INTEGER   
            The leading dimension of the array T. LDT >= max(1,N).   

    Q       (input/output) COMPLEX array, dimension (LDQ,N)   
            On entry, if COMPQ = 'V', the matrix Q of Schur vectors.   
            On exit, if COMPQ = 'V', Q has been postmultiplied by the   
            unitary transformation matrix which reorders T; the leading M   
            columns of Q form an orthonormal basis for the specified   
            invariant subspace.   
            If COMPQ = 'N', Q is not referenced.   

    LDQ     (input) INTEGER   
            The leading dimension of the array Q.   
            LDQ >= 1; and if COMPQ = 'V', LDQ >= N.   

    W       (output) COMPLEX array, dimension (N)   
            The reordered eigenvalues of T, in the same order as they   
            appear on the diagonal of T.   

    M       (output) INTEGER   
            The dimension of the specified invariant subspace.   
            0 <= M <= N.   

    S       (output) REAL   
            If JOB = 'E' or 'B', S is a lower bound on the reciprocal   
            condition number for the selected cluster of eigenvalues.   
            S cannot underestimate the true reciprocal condition number   
            by more than a factor of sqrt(N). If M = 0 or N, S = 1.   
            If JOB = 'N' or 'V', S is not referenced.   

    SEP     (output) REAL   
            If JOB = 'V' or 'B', SEP is the estimated reciprocal   
            condition number of the specified invariant subspace. If   
            M = 0 or N, SEP = norm(T).   
            If JOB = 'N' or 'E', SEP is not referenced.   

    WORK    (workspace/output) COMPLEX array, dimension (LWORK)   
            If JOB = 'N', WORK is not referenced.  Otherwise,   
            on exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The dimension of the array WORK.   
            If JOB = 'N', LWORK >= 1;   
            if JOB = 'E', LWORK = M*(N-M);   
            if JOB = 'V' or 'B', LWORK >= 2*M*(N-M).   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   

    Further Details   
    ===============   

    CTRSEN first collects the selected eigenvalues by computing a unitary   
    transformation Z to move them to the top left corner of T. In other   
    words, the selected eigenvalues are the eigenvalues of T11 in:   

                  Z'*T*Z = ( T11 T12 ) n1   
                           (  0  T22 ) n2   
                              n1  n2   

    where N = n1+n2 and Z' means the conjugate transpose of Z. The first   
    n1 columns of Z span the specified invariant subspace of T.   

    If T has been obtained from the Schur factorization of a matrix   
    A = Q*T*Q', then the reordered Schur factorization of A is given by   
    A = (Q*Z)*(Z'*T*Z)*(Q*Z)', and the first n1 columns of Q*Z span the   
    corresponding invariant subspace of A.   

    The reciprocal condition number of the average of the eigenvalues of   
    T11 may be returned in S. S lies between 0 (very badly conditioned)   
    and 1 (very well conditioned). It is computed as follows. First we   
    compute R so that   

                           P = ( I  R ) n1   
                               ( 0  0 ) n2   
                                 n1 n2   

    is the projector on the invariant subspace associated with T11.   
    R is the solution of the Sylvester equation:   

                          T11*R - R*T22 = T12.   

    Let F-norm(M) denote the Frobenius-norm of M and 2-norm(M) denote   
    the two-norm of M. Then S is computed as the lower bound   

                        (1 + F-norm(R)**2)**(-1/2)   

    on the reciprocal of 2-norm(P), the true reciprocal condition number.   
    S cannot underestimate 1 / 2-norm(P) by more than a factor of   
    sqrt(N).   

    An approximate error bound for the computed average of the   
    eigenvalues of T11 is   

                           EPS * norm(T) / S   

    where EPS is the machine precision.   

    The reciprocal condition number of the right invariant subspace   
    spanned by the first n1 columns of Z (or of Q*Z) is returned in SEP.   
    SEP is defined as the separation of T11 and T22:   

                       sep( T11, T22 ) = sigma-min( C )   

    where sigma-min(C) is the smallest singular value of the   
    n1*n2-by-n1*n2 matrix   

       C  = kprod( I(n2), T11 ) - kprod( transpose(T22), I(n1) )   

    I(m) is an m by m identity matrix, and kprod denotes the Kronecker   
    product. We estimate sigma-min(C) by the reciprocal of an estimate of   
    the 1-norm of inverse(C). The true reciprocal 1-norm of inverse(C)   
    cannot differ from sigma-min(C) by more than a factor of sqrt(n1*n2).   

    When SEP is small, small changes in T can cause large changes in   
    the invariant subspace. An approximate bound on the maximum angular   
    error in the computed right invariant subspace is   

                        EPS * norm(T) / SEP   

    =====================================================================   


       Decode and test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static integer c_n1 = -1;
    
    /* System generated locals */
    integer q_dim1, q_offset, t_dim1, t_offset, i__1, i__2, i__3;
    /* Builtin functions */
    double sqrt(doublereal);
    /* Local variables */
    static integer kase, ierr, k;
    static real scale;
    extern logical lsame_(char *, char *);
    static integer lwmin;
    static logical wantq, wants;
    static real rnorm;
    static integer n1, n2;
    static real rwork[1];
    extern doublereal clange_(char *, integer *, integer *, complex *, 
	    integer *, real *);
    static integer nn, ks;
    extern /* Subroutine */ int clacon_(integer *, complex *, complex *, real 
	    *, integer *), clacpy_(char *, integer *, integer *, complex *, 
	    integer *, complex *, integer *), xerbla_(char *, integer 
	    *);
    static logical wantbh;
    extern /* Subroutine */ int ctrexc_(char *, integer *, complex *, integer 
	    *, complex *, integer *, integer *, integer *, integer *);
    static logical wantsp;
    extern /* Subroutine */ int ctrsyl_(char *, char *, integer *, integer *, 
	    integer *, complex *, integer *, complex *, integer *, complex *, 
	    integer *, real *, integer *);
    static logical lquery;
    static real est;
#define t_subscr(a_1,a_2) (a_2)*t_dim1 + a_1
#define t_ref(a_1,a_2) t[t_subscr(a_1,a_2)]


    --select;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    --w;
    --work;

    /* Function Body */
    wantbh = lsame_(job, "B");
    wants = lsame_(job, "E") || wantbh;
    wantsp = lsame_(job, "V") || wantbh;
    wantq = lsame_(compq, "V");

/*     Set M to the number of selected eigenvalues. */

    *m = 0;
    i__1 = *n;
    for (k = 1; k <= i__1; ++k) {
	if (select[k]) {
	    ++(*m);
	}
/* L10: */
    }

    n1 = *m;
    n2 = *n - *m;
    nn = n1 * n2;

    *info = 0;
    lquery = *lwork == -1;

    if (wantsp) {
/* Computing MAX */
	i__1 = 1, i__2 = nn << 1;
	lwmin = max(i__1,i__2);
    } else if (lsame_(job, "N")) {
	lwmin = 1;
    } else if (lsame_(job, "E")) {
	lwmin = max(1,nn);
    }

    if (! lsame_(job, "N") && ! wants && ! wantsp) {
	*info = -1;
    } else if (! lsame_(compq, "N") && ! wantq) {
	*info = -2;
    } else if (*n < 0) {
	*info = -4;
    } else if (*ldt < max(1,*n)) {
	*info = -6;
    } else if (*ldq < 1 || wantq && *ldq < *n) {
	*info = -8;
    } else if (*lwork < lwmin && ! lquery) {
	*info = -14;
    }

    if (*info == 0) {
	work[1].r = (real) lwmin, work[1].i = 0.f;
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CTRSEN", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    if (*m == *n || *m == 0) {
	if (wants) {
	    *s = 1.f;
	}
	if (wantsp) {
	    *sep = clange_("1", n, n, &t[t_offset], ldt, rwork);
	}
	goto L40;
    }

/*     Collect the selected eigenvalues at the top left corner of T. */

    ks = 0;
    i__1 = *n;
    for (k = 1; k <= i__1; ++k) {
	if (select[k]) {
	    ++ks;

/*           Swap the K-th eigenvalue to position KS. */

	    if (k != ks) {
		ctrexc_(compq, n, &t[t_offset], ldt, &q[q_offset], ldq, &k, &
			ks, &ierr);
	    }
	}
/* L20: */
    }

    if (wants) {

/*        Solve the Sylvester equation for R:   

             T11*R - R*T22 = scale*T12 */

	clacpy_("F", &n1, &n2, &t_ref(1, n1 + 1), ldt, &work[1], &n1);
	ctrsyl_("N", "N", &c_n1, &n1, &n2, &t[t_offset], ldt, &t_ref(n1 + 1, 
		n1 + 1), ldt, &work[1], &n1, &scale, &ierr);

/*        Estimate the reciprocal of the condition number of the cluster   
          of eigenvalues. */

	rnorm = clange_("F", &n1, &n2, &work[1], &n1, rwork);
	if (rnorm == 0.f) {
	    *s = 1.f;
	} else {
	    *s = scale / (sqrt(scale * scale / rnorm + rnorm) * sqrt(rnorm));
	}
    }

    if (wantsp) {

/*        Estimate sep(T11,T22). */

	est = 0.f;
	kase = 0;
L30:
	clacon_(&nn, &work[nn + 1], &work[1], &est, &kase);
	if (kase != 0) {
	    if (kase == 1) {

/*              Solve T11*R - R*T22 = scale*X. */

		ctrsyl_("N", "N", &c_n1, &n1, &n2, &t[t_offset], ldt, &t_ref(
			n1 + 1, n1 + 1), ldt, &work[1], &n1, &scale, &ierr);
	    } else {

/*              Solve T11'*R - R*T22' = scale*X. */

		ctrsyl_("C", "C", &c_n1, &n1, &n2, &t[t_offset], ldt, &t_ref(
			n1 + 1, n1 + 1), ldt, &work[1], &n1, &scale, &ierr);
	    }
	    goto L30;
	}

	*sep = scale / est;
    }

L40:

/*     Copy reordered eigenvalues to W. */

    i__1 = *n;
    for (k = 1; k <= i__1; ++k) {
	i__2 = k;
	i__3 = t_subscr(k, k);
	w[i__2].r = t[i__3].r, w[i__2].i = t[i__3].i;
/* L50: */
    }

    work[1].r = (real) lwmin, work[1].i = 0.f;

    return 0;

/*     End of CTRSEN */

} /* ctrsen_ */
/* Subroutine */ int stgex2_(logical *wantq, logical *wantz, integer *n, real 
	*a, integer *lda, real *b, integer *ldb, real *q, integer *ldq, real *
	z__, integer *ldz, integer *j1, integer *n1, integer *n2, real *work, 
	integer *lwork, integer *info)
{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    STGEX2 swaps adjacent diagonal blocks (A11, B11) and (A22, B22)   
    of size 1-by-1 or 2-by-2 in an upper (quasi) triangular matrix pair   
    (A, B) by an orthogonal equivalence transformation.   

    (A, B) must be in generalized real Schur canonical form (as returned   
    by SGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2   
    diagonal blocks. B is upper triangular.   

    Optionally, the matrices Q and Z of generalized Schur vectors are   
    updated.   

           Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)'   
           Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)'   


    Arguments   
    =========   

    WANTQ   (input) LOGICAL   
            .TRUE. : update the left transformation matrix Q;   
            .FALSE.: do not update Q.   

    WANTZ   (input) LOGICAL   
            .TRUE. : update the right transformation matrix Z;   
            .FALSE.: do not update Z.   

    N       (input) INTEGER   
            The order of the matrices A and B. N >= 0.   

    A      (input/output) REAL arrays, dimensions (LDA,N)   
            On entry, the matrix A in the pair (A, B).   
            On exit, the updated matrix A.   

    LDA     (input)  INTEGER   
            The leading dimension of the array A. LDA >= max(1,N).   

    B      (input/output) REAL arrays, dimensions (LDB,N)   
            On entry, the matrix B in the pair (A, B).   
            On exit, the updated matrix B.   

    LDB     (input)  INTEGER   
            The leading dimension of the array B. LDB >= max(1,N).   

    Q       (input/output) REAL array, dimension (LDZ,N)   
            On entry, if WANTQ = .TRUE., the orthogonal matrix Q.   
            On exit, the updated matrix Q.   
            Not referenced if WANTQ = .FALSE..   

    LDQ     (input) INTEGER   
            The leading dimension of the array Q. LDQ >= 1.   
            If WANTQ = .TRUE., LDQ >= N.   

    Z       (input/output) REAL array, dimension (LDZ,N)   
            On entry, if WANTZ =.TRUE., the orthogonal matrix Z.   
            On exit, the updated matrix Z.   
            Not referenced if WANTZ = .FALSE..   

    LDZ     (input) INTEGER   
            The leading dimension of the array Z. LDZ >= 1.   
            If WANTZ = .TRUE., LDZ >= N.   

    J1      (input) INTEGER   
            The index to the first block (A11, B11). 1 <= J1 <= N.   

    N1      (input) INTEGER   
            The order of the first block (A11, B11). N1 = 0, 1 or 2.   

    N2      (input) INTEGER   
            The order of the second block (A22, B22). N2 = 0, 1 or 2.   

    WORK    (workspace) REAL array, dimension (LWORK).   

    LWORK   (input) INTEGER   
            The dimension of the array WORK.   
            LWORK >=  MAX( N*(N2+N1), (N2+N1)*(N2+N1)*2 )   

    INFO    (output) INTEGER   
              =0: Successful exit   
              >0: If INFO = 1, the transformed matrix (A, B) would be   
                  too far from generalized Schur form; the blocks are   
                  not swapped and (A, B) and (Q, Z) are unchanged.   
                  The problem of swapping is too ill-conditioned.   
              <0: If INFO = -16: LWORK is too small. Appropriate value   
                  for LWORK is returned in WORK(1).   

    Further Details   
    ===============   

    Based on contributions by   
       Bo Kagstrom and Peter Poromaa, Department of Computing Science,   
       Umea University, S-901 87 Umea, Sweden.   

    In the current code both weak and strong stability tests are   
    performed. The user can omit the strong stability test by changing   
    the internal logical parameter WANDS to .FALSE.. See ref. [2] for   
    details.   

    [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the   
        Generalized Real Schur Form of a Regular Matrix Pair (A, B), in   
        M.S. Moonen et al (eds), Linear Algebra for Large Scale and   
        Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218.   

    [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified   
        Eigenvalues of a Regular Matrix Pair (A, B) and Condition   
        Estimation: Theory, Algorithms and Software,   
        Report UMINF - 94.04, Department of Computing Science, Umea   
        University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working   
        Note 87. To appear in Numerical Algorithms, 1996.   

    =====================================================================   


       Parameter adjustments */
    /* Table of constant values */
    static integer c__16 = 16;
    static real c_b3 = 0.f;
    static integer c__0 = 0;
    static integer c__1 = 1;
    static integer c__4 = 4;
    static integer c__2 = 2;
    static real c_b38 = 1.f;
    static real c_b44 = -1.f;
    
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, z_dim1, 
	    z_offset, i__1, i__2;
    real r__1, r__2;
    /* Builtin functions */
    double sqrt(doublereal);
    /* Local variables */
    static logical weak;
    static real ddum;
    static integer idum;
    static real taul[4], dsum, taur[4], scpy[16]	/* was [4][4] */, 
	    tcpy[16]	/* was [4][4] */;
    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, 
	    integer *, real *, real *);
    static real f, g;
    static integer i__, m;
    static real s[16]	/* was [4][4] */, t[16]	/* was [4][4] */, scale, 
	    bqra21, brqa21;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static real licop[16]	/* was [4][4] */;
    static integer linfo;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    static real ircop[16]	/* was [4][4] */, dnorm;
    static integer iwork[4];
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *), slagv2_(real *, integer *, real *, integer *, real *, 
	    real *, real *, real *, real *, real *, real *), sgeqr2_(integer *
	    , integer *, real *, integer *, real *, real *, integer *), 
	    sgerq2_(integer *, integer *, real *, integer *, real *, real *, 
	    integer *);
    static real be[2], ai[2];
    extern /* Subroutine */ int sorg2r_(integer *, integer *, integer *, real 
	    *, integer *, real *, real *, integer *), sorgr2_(integer *, 
	    integer *, integer *, real *, integer *, real *, real *, integer *
	    );
    static real ar[2], sa, sb, li[16]	/* was [4][4] */;
    extern /* Subroutine */ int sorm2r_(char *, char *, integer *, integer *, 
	    integer *, real *, integer *, real *, real *, integer *, real *, 
	    integer *), sormr2_(char *, char *, integer *, 
	    integer *, integer *, real *, integer *, real *, real *, integer *
	    , real *, integer *);
    static real dscale, ir[16]	/* was [4][4] */;
    extern /* Subroutine */ int stgsy2_(char *, integer *, integer *, integer 
	    *, real *, integer *, real *, integer *, real *, integer *, real *
	    , integer *, real *, integer *, real *, integer *, real *, real *,
	     real *, integer *, integer *, integer *);
    static real ss;
    extern doublereal slamch_(char *);
    static real ws;
    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
	    integer *, real *, integer *), slartg_(real *, real *, 
	    real *, real *, real *);
    static real thresh;
    extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *, 
	    real *);
    static real smlnum;
    static logical strong;
    static real eps;
#define scpy_ref(a_1,a_2) scpy[(a_2)*4 + a_1 - 5]
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
#define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]
#define s_ref(a_1,a_2) s[(a_2)*4 + a_1 - 5]
#define t_ref(a_1,a_2) t[(a_2)*4 + a_1 - 5]
#define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]
#define li_ref(a_1,a_2) li[(a_2)*4 + a_1 - 5]
#define ir_ref(a_1,a_2) ir[(a_2)*4 + a_1 - 5]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;

    /* Function Body */
    *info = 0;

/*     Quick return if possible */

    if (*n <= 1 || *n1 <= 0 || *n2 <= 0) {
	return 0;
    }
    if (*n1 > *n || *j1 + *n1 > *n) {
	return 0;
    }
    m = *n1 + *n2;
/* Computing MAX */
    i__1 = *n * m, i__2 = m * m << 1;
    if (*lwork < max(i__1,i__2)) {
	*info = -16;
/* Computing MAX */
	i__1 = *n * m, i__2 = m * m << 1;
	work[1] = (real) max(i__1,i__2);
	return 0;
    }

    weak = FALSE_;
    strong = FALSE_;

/*     Make a local copy of selected block */

    scopy_(&c__16, &c_b3, &c__0, li, &c__1);
    scopy_(&c__16, &c_b3, &c__0, ir, &c__1);
    slacpy_("Full", &m, &m, &a_ref(*j1, *j1), lda, s, &c__4);
    slacpy_("Full", &m, &m, &b_ref(*j1, *j1), ldb, t, &c__4);

/*     Compute threshold for testing acceptance of swapping. */

    eps = slamch_("P");
    smlnum = slamch_("S") / eps;
    dscale = 0.f;
    dsum = 1.f;
    slacpy_("Full", &m, &m, s, &c__4, &work[1], &m);
    i__1 = m * m;
    slassq_(&i__1, &work[1], &c__1, &dscale, &dsum);
    slacpy_("Full", &m, &m, t, &c__4, &work[1], &m);
    i__1 = m * m;
    slassq_(&i__1, &work[1], &c__1, &dscale, &dsum);
    dnorm = dscale * sqrt(dsum);
/* Computing MAX */
    r__1 = eps * 10.f * dnorm;
    thresh = dmax(r__1,smlnum);

    if (m == 2) {

/*        CASE 1: Swap 1-by-1 and 1-by-1 blocks.   

          Compute orthogonal QL and RQ that swap 1-by-1 and 1-by-1 blocks   
          using Givens rotations and perform the swap tentatively. */

	f = s_ref(2, 2) * t_ref(1, 1) - t_ref(2, 2) * s_ref(1, 1);
	g = s_ref(2, 2) * t_ref(1, 2) - t_ref(2, 2) * s_ref(1, 2);
	sb = (r__1 = t_ref(2, 2), dabs(r__1));
	sa = (r__1 = s_ref(2, 2), dabs(r__1));
	slartg_(&f, &g, &ir_ref(1, 2), &ir_ref(1, 1), &ddum);
	ir_ref(2, 1) = -ir_ref(1, 2);
	ir_ref(2, 2) = ir_ref(1, 1);
	srot_(&c__2, &s_ref(1, 1), &c__1, &s_ref(1, 2), &c__1, &ir_ref(1, 1), 
		&ir_ref(2, 1));
	srot_(&c__2, &t_ref(1, 1), &c__1, &t_ref(1, 2), &c__1, &ir_ref(1, 1), 
		&ir_ref(2, 1));
	if (sa >= sb) {
	    slartg_(&s_ref(1, 1), &s_ref(2, 1), &li_ref(1, 1), &li_ref(2, 1), 
		    &ddum);
	} else {
	    slartg_(&t_ref(1, 1), &t_ref(2, 1), &li_ref(1, 1), &li_ref(2, 1), 
		    &ddum);
	}
	srot_(&c__2, &s_ref(1, 1), &c__4, &s_ref(2, 1), &c__4, &li_ref(1, 1), 
		&li_ref(2, 1));
	srot_(&c__2, &t_ref(1, 1), &c__4, &t_ref(2, 1), &c__4, &li_ref(1, 1), 
		&li_ref(2, 1));
	li_ref(2, 2) = li_ref(1, 1);
	li_ref(1, 2) = -li_ref(2, 1);

/*        Weak stability test:   
             |S21| + |T21| <= O(EPS * F-norm((S, T))) */

	ws = (r__1 = s_ref(2, 1), dabs(r__1)) + (r__2 = t_ref(2, 1), dabs(
		r__2));
	weak = ws <= thresh;
	if (! weak) {
	    goto L70;
	}

	if (TRUE_) {

/*           Strong stability test:   
               F-norm((A-QL'*S*QR, B-QL'*T*QR)) <= O(EPS*F-norm((A,B))) */

	    slacpy_("Full", &m, &m, &a_ref(*j1, *j1), lda, &work[m * m + 1], &
		    m);
	    sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, s, &c__4, &c_b3, &
		    work[1], &m);
	    sgemm_("N", "T", &m, &m, &m, &c_b44, &work[1], &m, ir, &c__4, &
		    c_b38, &work[m * m + 1], &m);
	    dscale = 0.f;
	    dsum = 1.f;
	    i__1 = m * m;
	    slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum);

	    slacpy_("Full", &m, &m, &b_ref(*j1, *j1), ldb, &work[m * m + 1], &
		    m);
	    sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, t, &c__4, &c_b3, &
		    work[1], &m);
	    sgemm_("N", "T", &m, &m, &m, &c_b44, &work[1], &m, ir, &c__4, &
		    c_b38, &work[m * m + 1], &m);
	    i__1 = m * m;
	    slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum);
	    ss = dscale * sqrt(dsum);
	    strong = ss <= thresh;
	    if (! strong) {
		goto L70;
	    }
	}

/*        Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and   
                 (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). */

	i__1 = *j1 + 1;
	srot_(&i__1, &a_ref(1, *j1), &c__1, &a_ref(1, *j1 + 1), &c__1, &
		ir_ref(1, 1), &ir_ref(2, 1));
	i__1 = *j1 + 1;
	srot_(&i__1, &b_ref(1, *j1), &c__1, &b_ref(1, *j1 + 1), &c__1, &
		ir_ref(1, 1), &ir_ref(2, 1));
	i__1 = *n - *j1 + 1;
	srot_(&i__1, &a_ref(*j1, *j1), lda, &a_ref(*j1 + 1, *j1), lda, &
		li_ref(1, 1), &li_ref(2, 1));
	i__1 = *n - *j1 + 1;
	srot_(&i__1, &b_ref(*j1, *j1), ldb, &b_ref(*j1 + 1, *j1), ldb, &
		li_ref(1, 1), &li_ref(2, 1));

/*        Set  N1-by-N2 (2,1) - blocks to ZERO. */

	a_ref(*j1 + 1, *j1) = 0.f;
	b_ref(*j1 + 1, *j1) = 0.f;

/*        Accumulate transformations into Q and Z if requested. */

	if (*wantz) {
	    srot_(n, &z___ref(1, *j1), &c__1, &z___ref(1, *j1 + 1), &c__1, &
		    ir_ref(1, 1), &ir_ref(2, 1));
	}
	if (*wantq) {
	    srot_(n, &q_ref(1, *j1), &c__1, &q_ref(1, *j1 + 1), &c__1, &
		    li_ref(1, 1), &li_ref(2, 1));
	}

/*        Exit with INFO = 0 if swap was successfully performed. */

	return 0;

    } else {

/*        CASE 2: Swap 1-by-1 and 2-by-2 blocks, or 2-by-2   
                  and 2-by-2 blocks.   

          Solve the generalized Sylvester equation   
                   S11 * R - L * S22 = SCALE * S12   
                   T11 * R - L * T22 = SCALE * T12   
          for R and L. Solutions in LI and IR. */

	slacpy_("Full", n1, n2, &t_ref(1, *n1 + 1), &c__4, li, &c__4);
	slacpy_("Full", n1, n2, &s_ref(1, *n1 + 1), &c__4, &ir_ref(*n2 + 1, *
		n1 + 1), &c__4);
	stgsy2_("N", &c__0, n1, n2, s, &c__4, &s_ref(*n1 + 1, *n1 + 1), &c__4,
		 &ir_ref(*n2 + 1, *n1 + 1), &c__4, t, &c__4, &t_ref(*n1 + 1, *
		n1 + 1), &c__4, li, &c__4, &scale, &dsum, &dscale, iwork, &
		idum, &linfo);

/*        Compute orthogonal matrix QL:   

                      QL' * LI = [ TL ]   
                                 [ 0  ]   
          where   
                      LI =  [      -L              ]   
                            [ SCALE * identity(N2) ] */

	i__1 = *n2;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    sscal_(n1, &c_b44, &li_ref(1, i__), &c__1);
	    li_ref(*n1 + i__, i__) = scale;
/* L10: */
	}
	sgeqr2_(&m, n2, li, &c__4, taul, &work[1], &linfo);
	if (linfo != 0) {
	    goto L70;
	}
	sorg2r_(&m, &m, n2, li, &c__4, taul, &work[1], &linfo);
	if (linfo != 0) {
	    goto L70;
	}

/*        Compute orthogonal matrix RQ:   

                      IR * RQ' =   [ 0  TR],   

           where IR = [ SCALE * identity(N1), R ] */

	i__1 = *n1;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    ir_ref(*n2 + i__, i__) = scale;
/* L20: */
	}
	sgerq2_(n1, &m, &ir_ref(*n2 + 1, 1), &c__4, taur, &work[1], &linfo);
	if (linfo != 0) {
	    goto L70;
	}
	sorgr2_(&m, &m, n1, ir, &c__4, taur, &work[1], &linfo);
	if (linfo != 0) {
	    goto L70;
	}

/*        Perform the swapping tentatively: */

	sgemm_("T", "N", &m, &m, &m, &c_b38, li, &c__4, s, &c__4, &c_b3, &
		work[1], &m);
	sgemm_("N", "T", &m, &m, &m, &c_b38, &work[1], &m, ir, &c__4, &c_b3, 
		s, &c__4);
	sgemm_("T", "N", &m, &m, &m, &c_b38, li, &c__4, t, &c__4, &c_b3, &
		work[1], &m);
	sgemm_("N", "T", &m, &m, &m, &c_b38, &work[1], &m, ir, &c__4, &c_b3, 
		t, &c__4);
	slacpy_("F", &m, &m, s, &c__4, scpy, &c__4);
	slacpy_("F", &m, &m, t, &c__4, tcpy, &c__4);
	slacpy_("F", &m, &m, ir, &c__4, ircop, &c__4);
	slacpy_("F", &m, &m, li, &c__4, licop, &c__4);

/*        Triangularize the B-part by an RQ factorization.   
          Apply transformation (from left) to A-part, giving S. */

	sgerq2_(&m, &m, t, &c__4, taur, &work[1], &linfo);
	if (linfo != 0) {
	    goto L70;
	}
	sormr2_("R", "T", &m, &m, &m, t, &c__4, taur, s, &c__4, &work[1], &
		linfo);
	if (linfo != 0) {
	    goto L70;
	}
	sormr2_("L", "N", &m, &m, &m, t, &c__4, taur, ir, &c__4, &work[1], &
		linfo);
	if (linfo != 0) {
	    goto L70;
	}

/*        Compute F-norm(S21) in BRQA21. (T21 is 0.) */

	dscale = 0.f;
	dsum = 1.f;
	i__1 = *n2;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    slassq_(n1, &s_ref(*n2 + 1, i__), &c__1, &dscale, &dsum);
/* L30: */
	}
	brqa21 = dscale * sqrt(dsum);

/*        Triangularize the B-part by a QR factorization.   
          Apply transformation (from right) to A-part, giving S. */

	sgeqr2_(&m, &m, tcpy, &c__4, taul, &work[1], &linfo);
	if (linfo != 0) {
	    goto L70;
	}
	sorm2r_("L", "T", &m, &m, &m, tcpy, &c__4, taul, scpy, &c__4, &work[1]
		, info);
	sorm2r_("R", "N", &m, &m, &m, tcpy, &c__4, taul, licop, &c__4, &work[
		1], info);
	if (linfo != 0) {
	    goto L70;
	}

/*        Compute F-norm(S21) in BQRA21. (T21 is 0.) */

	dscale = 0.f;
	dsum = 1.f;
	i__1 = *n2;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    slassq_(n1, &scpy_ref(*n2 + 1, i__), &c__1, &dscale, &dsum);
/* L40: */
	}
	bqra21 = dscale * sqrt(dsum);

/*        Decide which method to use.   
            Weak stability test:   
               F-norm(S21) <= O(EPS * F-norm((S, T))) */

	if (bqra21 <= brqa21 && bqra21 <= thresh) {
	    slacpy_("F", &m, &m, scpy, &c__4, s, &c__4);
	    slacpy_("F", &m, &m, tcpy, &c__4, t, &c__4);
	    slacpy_("F", &m, &m, ircop, &c__4, ir, &c__4);
	    slacpy_("F", &m, &m, licop, &c__4, li, &c__4);
	} else if (brqa21 >= thresh) {
	    goto L70;
	}

/*        Set lower triangle of B-part to zero */

	i__1 = m;
	for (i__ = 2; i__ <= i__1; ++i__) {
	    i__2 = m - i__ + 1;
	    scopy_(&i__2, &c_b3, &c__0, &t_ref(i__, i__ - 1), &c__1);
/* L50: */
	}

	if (TRUE_) {

/*           Strong stability test:   
                F-norm((A-QL*S*QR', B-QL*T*QR')) <= O(EPS*F-norm((A,B))) */

	    slacpy_("Full", &m, &m, &a_ref(*j1, *j1), lda, &work[m * m + 1], &
		    m);
	    sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, s, &c__4, &c_b3, &
		    work[1], &m);
	    sgemm_("N", "N", &m, &m, &m, &c_b44, &work[1], &m, ir, &c__4, &
		    c_b38, &work[m * m + 1], &m);
	    dscale = 0.f;
	    dsum = 1.f;
	    i__1 = m * m;
	    slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum);

	    slacpy_("Full", &m, &m, &b_ref(*j1, *j1), ldb, &work[m * m + 1], &
		    m);
	    sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, t, &c__4, &c_b3, &
		    work[1], &m);
	    sgemm_("N", "N", &m, &m, &m, &c_b44, &work[1], &m, ir, &c__4, &
		    c_b38, &work[m * m + 1], &m);
	    i__1 = m * m;
	    slassq_(&i__1, &work[m * m + 1], &c__1, &dscale, &dsum);
	    ss = dscale * sqrt(dsum);
	    strong = ss <= thresh;
	    if (! strong) {
		goto L70;
	    }

	}

/*        If the swap is accepted ("weakly" and "strongly"), apply the   
          transformations and set N1-by-N2 (2,1)-block to zero. */

	i__1 = *n2;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    scopy_(n1, &c_b3, &c__0, &s_ref(*n2 + 1, i__), &c__1);
/* L60: */
	}

/*        copy back M-by-M diagonal block starting at index J1 of (A, B) */

	slacpy_("F", &m, &m, s, &c__4, &a_ref(*j1, *j1), lda);
	slacpy_("F", &m, &m, t, &c__4, &b_ref(*j1, *j1), ldb);
	scopy_(&c__16, &c_b3, &c__0, t, &c__1);

/*        Standardize existing 2-by-2 blocks. */

	i__1 = m * m;
	scopy_(&i__1, &c_b3, &c__0, &work[1], &c__1);
	work[1] = 1.f;
	t_ref(1, 1) = 1.f;
	idum = *lwork - m * m - 2;
	if (*n2 > 1) {
	    slagv2_(&a_ref(*j1, *j1), lda, &b_ref(*j1, *j1), ldb, ar, ai, be, 
		    &work[1], &work[2], &t_ref(1, 1), &t_ref(2, 1));
	    work[m + 1] = -work[2];
	    work[m + 2] = work[1];
	    t_ref(*n2, *n2) = t_ref(1, 1);
	    t_ref(1, 2) = -t_ref(2, 1);
	}
	work[m * m] = 1.f;
	t_ref(m, m) = 1.f;

	if (*n1 > 1) {
	    slagv2_(&a_ref(*j1 + *n2, *j1 + *n2), lda, &b_ref(*j1 + *n2, *j1 
		    + *n2), ldb, taur, taul, &work[m * m + 1], &work[*n2 * m 
		    + *n2 + 1], &work[*n2 * m + *n2 + 2], &t_ref(*n2 + 1, *n2 
		    + 1), &t_ref(m, m - 1));
	    work[m * m] = work[*n2 * m + *n2 + 1];
	    work[m * m - 1] = -work[*n2 * m + *n2 + 2];
	    t_ref(m, m) = t_ref(*n2 + 1, *n2 + 1);
	    t_ref(m - 1, m) = -t_ref(m, m - 1);
	}
	sgemm_("T", "N", n2, n1, n2, &c_b38, &work[1], &m, &a_ref(*j1, *j1 + *
		n2), lda, &c_b3, &work[m * m + 1], n2);
	slacpy_("Full", n2, n1, &work[m * m + 1], n2, &a_ref(*j1, *j1 + *n2), 
		lda);
	sgemm_("T", "N", n2, n1, n2, &c_b38, &work[1], &m, &b_ref(*j1, *j1 + *
		n2), ldb, &c_b3, &work[m * m + 1], n2);
	slacpy_("Full", n2, n1, &work[m * m + 1], n2, &b_ref(*j1, *j1 + *n2), 
		ldb);
	sgemm_("N", "N", &m, &m, &m, &c_b38, li, &c__4, &work[1], &m, &c_b3, &
		work[m * m + 1], &m);
	slacpy_("Full", &m, &m, &work[m * m + 1], &m, li, &c__4);
	sgemm_("N", "N", n2, n1, n1, &c_b38, &a_ref(*j1, *j1 + *n2), lda, &
		t_ref(*n2 + 1, *n2 + 1), &c__4, &c_b3, &work[1], n2);
	slacpy_("Full", n2, n1, &work[1], n2, &a_ref(*j1, *j1 + *n2), lda);
	sgemm_("N", "N", n2, n1, n1, &c_b38, &b_ref(*j1, *j1 + *n2), lda, &
		t_ref(*n2 + 1, *n2 + 1), &c__4, &c_b3, &work[1], n2);
	slacpy_("Full", n2, n1, &work[1], n2, &b_ref(*j1, *j1 + *n2), ldb);
	sgemm_("T", "N", &m, &m, &m, &c_b38, ir, &c__4, t, &c__4, &c_b3, &
		work[1], &m);
	slacpy_("Full", &m, &m, &work[1], &m, ir, &c__4);

/*        Accumulate transformations into Q and Z if requested. */

	if (*wantq) {
	    sgemm_("N", "N", n, &m, &m, &c_b38, &q_ref(1, *j1), ldq, li, &
		    c__4, &c_b3, &work[1], n);
	    slacpy_("Full", n, &m, &work[1], n, &q_ref(1, *j1), ldq);

	}

	if (*wantz) {
	    sgemm_("N", "N", n, &m, &m, &c_b38, &z___ref(1, *j1), ldz, ir, &
		    c__4, &c_b3, &work[1], n);
	    slacpy_("Full", n, &m, &work[1], n, &z___ref(1, *j1), ldz);

	}

/*        Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and   
                  (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). */

	i__ = *j1 + m;
	if (i__ <= *n) {
	    i__1 = *n - i__ + 1;
	    sgemm_("T", "N", &m, &i__1, &m, &c_b38, li, &c__4, &a_ref(*j1, 
		    i__), lda, &c_b3, &work[1], &m);
	    i__1 = *n - i__ + 1;
	    slacpy_("Full", &m, &i__1, &work[1], &m, &a_ref(*j1, i__), lda);
	    i__1 = *n - i__ + 1;
	    sgemm_("T", "N", &m, &i__1, &m, &c_b38, li, &c__4, &b_ref(*j1, 
		    i__), lda, &c_b3, &work[1], &m);
	    i__1 = *n - i__ + 1;
	    slacpy_("Full", &m, &i__1, &work[1], &m, &b_ref(*j1, i__), lda);
	}
	i__ = *j1 - 1;
	if (i__ > 0) {
	    sgemm_("N", "N", &i__, &m, &m, &c_b38, &a_ref(1, *j1), lda, ir, &
		    c__4, &c_b3, &work[1], &i__);
	    slacpy_("Full", &i__, &m, &work[1], &i__, &a_ref(1, *j1), lda);
	    sgemm_("N", "N", &i__, &m, &m, &c_b38, &b_ref(1, *j1), ldb, ir, &
		    c__4, &c_b3, &work[1], &i__);
	    slacpy_("Full", &i__, &m, &work[1], &i__, &b_ref(1, *j1), ldb);
	}

/*        Exit with INFO = 0 if swap was successfully performed. */

	return 0;

    }

/*     Exit with INFO = 1 if swap was rejected. */

L70:

    *info = 1;
    return 0;

/*     End of STGEX2 */

} /* stgex2_ */
/* Subroutine */ int clahrd_(integer *n, integer *k, integer *nb, complex *a, 
	integer *lda, complex *tau, complex *t, integer *ldt, complex *y, 
	integer *ldy)
{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    CLAHRD reduces the first NB columns of a complex general n-by-(n-k+1)   
    matrix A so that elements below the k-th subdiagonal are zero. The   
    reduction is performed by a unitary similarity transformation   
    Q' * A * Q. The routine returns the matrices V and T which determine   
    Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T.   

    This is an auxiliary routine called by CGEHRD.   

    Arguments   
    =========   

    N       (input) INTEGER   
            The order of the matrix A.   

    K       (input) INTEGER   
            The offset for the reduction. Elements below the k-th   
            subdiagonal in the first NB columns are reduced to zero.   

    NB      (input) INTEGER   
            The number of columns to be reduced.   

    A       (input/output) COMPLEX array, dimension (LDA,N-K+1)   
            On entry, the n-by-(n-k+1) general matrix A.   
            On exit, the elements on and above the k-th subdiagonal in   
            the first NB columns are overwritten with the corresponding   
            elements of the reduced matrix; the elements below the k-th   
            subdiagonal, with the array TAU, represent the matrix Q as a   
            product of elementary reflectors. The other columns of A are   
            unchanged. See Further Details.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    TAU     (output) COMPLEX array, dimension (NB)   
            The scalar factors of the elementary reflectors. See Further   
            Details.   

    T       (output) COMPLEX array, dimension (LDT,NB)   
            The upper triangular matrix T.   

    LDT     (input) INTEGER   
            The leading dimension of the array T.  LDT >= NB.   

    Y       (output) COMPLEX array, dimension (LDY,NB)   
            The n-by-nb matrix Y.   

    LDY     (input) INTEGER   
            The leading dimension of the array Y. LDY >= max(1,N).   

    Further Details   
    ===============   

    The matrix Q is represented as a product of nb elementary reflectors   

       Q = H(1) H(2) . . . H(nb).   

    Each H(i) has the form   

       H(i) = I - tau * v * v'   

    where tau is a complex scalar, and v is a complex vector with   
    v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in   
    A(i+k+1:n,i), and tau in TAU(i).   

    The elements of the vectors v together form the (n-k+1)-by-nb matrix   
    V which is needed, with T and Y, to apply the transformation to the   
    unreduced part of the matrix, using an update of the form:   
    A := (I - V*T*V') * (A - Y*V').   

    The contents of A on exit are illustrated by the following example   
    with n = 7, k = 3 and nb = 2:   

       ( a   h   a   a   a )   
       ( a   h   a   a   a )   
       ( a   h   a   a   a )   
       ( h   h   a   a   a )   
       ( v1  h   a   a   a )   
       ( v1  v2  a   a   a )   
       ( v1  v2  a   a   a )   

    where a denotes an element of the original matrix A, h denotes a   
    modified element of the upper Hessenberg matrix H, and vi denotes an   
    element of the vector defining H(i).   

    =====================================================================   


       Quick return if possible   

       Parameter adjustments */
    /* Table of constant values */
    static complex c_b1 = {0.f,0.f};
    static complex c_b2 = {1.f,0.f};
    static integer c__1 = 1;
    
    /* System generated locals */
    integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, 
	    i__3;
    complex q__1;
    /* Local variables */
    static integer i__;
    extern /* Subroutine */ int cscal_(integer *, complex *, complex *, 
	    integer *), cgemv_(char *, integer *, integer *, complex *, 
	    complex *, integer *, complex *, integer *, complex *, complex *, 
	    integer *), ccopy_(integer *, complex *, integer *, 
	    complex *, integer *), caxpy_(integer *, complex *, complex *, 
	    integer *, complex *, integer *), ctrmv_(char *, char *, char *, 
	    integer *, complex *, integer *, complex *, integer *);
    static complex ei;
    extern /* Subroutine */ int clarfg_(integer *, complex *, complex *, 
	    integer *, complex *), clacgv_(integer *, complex *, integer *);
#define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1
#define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)]
#define t_subscr(a_1,a_2) (a_2)*t_dim1 + a_1
#define t_ref(a_1,a_2) t[t_subscr(a_1,a_2)]
#define y_subscr(a_1,a_2) (a_2)*y_dim1 + a_1
#define y_ref(a_1,a_2) y[y_subscr(a_1,a_2)]


    --tau;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    y_dim1 = *ldy;
    y_offset = 1 + y_dim1 * 1;
    y -= y_offset;

    /* Function Body */
    if (*n <= 1) {
	return 0;
    }

    i__1 = *nb;
    for (i__ = 1; i__ <= i__1; ++i__) {
	if (i__ > 1) {

/*           Update A(1:n,i)   

             Compute i-th column of A - Y * V' */

	    i__2 = i__ - 1;
	    clacgv_(&i__2, &a_ref(*k + i__ - 1, 1), lda);
	    i__2 = i__ - 1;
	    q__1.r = -1.f, q__1.i = 0.f;
	    cgemv_("No transpose", n, &i__2, &q__1, &y[y_offset], ldy, &a_ref(
		    *k + i__ - 1, 1), lda, &c_b2, &a_ref(1, i__), &c__1);
	    i__2 = i__ - 1;
	    clacgv_(&i__2, &a_ref(*k + i__ - 1, 1), lda);

/*           Apply I - V * T' * V' to this column (call it b) from the   
             left, using the last column of T as workspace   

             Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows)   
                      ( V2 )             ( b2 )   

             where V1 is unit lower triangular   

             w := V1' * b1 */

	    i__2 = i__ - 1;
	    ccopy_(&i__2, &a_ref(*k + 1, i__), &c__1, &t_ref(1, *nb), &c__1);
	    i__2 = i__ - 1;
	    ctrmv_("Lower", "Conjugate transpose", "Unit", &i__2, &a_ref(*k + 
		    1, 1), lda, &t_ref(1, *nb), &c__1);

/*           w := w + V2'*b2 */

	    i__2 = *n - *k - i__ + 1;
	    i__3 = i__ - 1;
	    cgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a_ref(*k + 
		    i__, 1), lda, &a_ref(*k + i__, i__), &c__1, &c_b2, &t_ref(
		    1, *nb), &c__1);

/*           w := T'*w */

	    i__2 = i__ - 1;
	    ctrmv_("Upper", "Conjugate transpose", "Non-unit", &i__2, &t[
		    t_offset], ldt, &t_ref(1, *nb), &c__1);

/*           b2 := b2 - V2*w */

	    i__2 = *n - *k - i__ + 1;
	    i__3 = i__ - 1;
	    q__1.r = -1.f, q__1.i = 0.f;
	    cgemv_("No transpose", &i__2, &i__3, &q__1, &a_ref(*k + i__, 1), 
		    lda, &t_ref(1, *nb), &c__1, &c_b2, &a_ref(*k + i__, i__), 
		    &c__1);

/*           b1 := b1 - V1*w */

	    i__2 = i__ - 1;
	    ctrmv_("Lower", "No transpose", "Unit", &i__2, &a_ref(*k + 1, 1), 
		    lda, &t_ref(1, *nb), &c__1);
	    i__2 = i__ - 1;
	    q__1.r = -1.f, q__1.i = 0.f;
	    caxpy_(&i__2, &q__1, &t_ref(1, *nb), &c__1, &a_ref(*k + 1, i__), &
		    c__1);

	    i__2 = a_subscr(*k + i__ - 1, i__ - 1);
	    a[i__2].r = ei.r, a[i__2].i = ei.i;
	}

/*        Generate the elementary reflector H(i) to annihilate   
          A(k+i+1:n,i) */

	i__2 = a_subscr(*k + i__, i__);
	ei.r = a[i__2].r, ei.i = a[i__2].i;
/* Computing MIN */
	i__2 = *k + i__ + 1;
	i__3 = *n - *k - i__ + 1;
	clarfg_(&i__3, &ei, &a_ref(min(i__2,*n), i__), &c__1, &tau[i__]);
	i__2 = a_subscr(*k + i__, i__);
	a[i__2].r = 1.f, a[i__2].i = 0.f;

/*        Compute  Y(1:n,i) */

	i__2 = *n - *k - i__ + 1;
	cgemv_("No transpose", n, &i__2, &c_b2, &a_ref(1, i__ + 1), lda, &
		a_ref(*k + i__, i__), &c__1, &c_b1, &y_ref(1, i__), &c__1);
	i__2 = *n - *k - i__ + 1;
	i__3 = i__ - 1;
	cgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a_ref(*k + i__, 1)
		, lda, &a_ref(*k + i__, i__), &c__1, &c_b1, &t_ref(1, i__), &
		c__1);
	i__2 = i__ - 1;
	q__1.r = -1.f, q__1.i = 0.f;
	cgemv_("No transpose", n, &i__2, &q__1, &y[y_offset], ldy, &t_ref(1, 
		i__), &c__1, &c_b2, &y_ref(1, i__), &c__1);
	cscal_(n, &tau[i__], &y_ref(1, i__), &c__1);

/*        Compute T(1:i,i) */

	i__2 = i__ - 1;
	i__3 = i__;
	q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
	cscal_(&i__2, &q__1, &t_ref(1, i__), &c__1);
	i__2 = i__ - 1;
	ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt, 
		&t_ref(1, i__), &c__1);
	i__2 = t_subscr(i__, i__);
	i__3 = i__;
	t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i;

/* L10: */
    }
    i__1 = a_subscr(*k + *nb, *nb);
    a[i__1].r = ei.r, a[i__1].i = ei.i;

    return 0;

/*     End of CLAHRD */

} /* clahrd_ */
/* Subroutine */ int ctrexc_(char *compq, integer *n, complex *t, integer *
	ldt, complex *q, integer *ldq, integer *ifst, integer *ilst, integer *
	info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       March 31, 1993   


    Purpose   
    =======   

    CTREXC reorders the Schur factorization of a complex matrix   
    A = Q*T*Q**H, so that the diagonal element of T with row index IFST   
    is moved to row ILST.   

    The Schur form T is reordered by a unitary similarity transformation   
    Z**H*T*Z, and optionally the matrix Q of Schur vectors is updated by   
    postmultplying it with Z.   

    Arguments   
    =========   

    COMPQ   (input) CHARACTER*1   
            = 'V':  update the matrix Q of Schur vectors;   
            = 'N':  do not update Q.   

    N       (input) INTEGER   
            The order of the matrix T. N >= 0.   

    T       (input/output) COMPLEX array, dimension (LDT,N)   
            On entry, the upper triangular matrix T.   
            On exit, the reordered upper triangular matrix.   

    LDT     (input) INTEGER   
            The leading dimension of the array T. LDT >= max(1,N).   

    Q       (input/output) COMPLEX array, dimension (LDQ,N)   
            On entry, if COMPQ = 'V', the matrix Q of Schur vectors.   
            On exit, if COMPQ = 'V', Q has been postmultiplied by the   
            unitary transformation matrix Z which reorders T.   
            If COMPQ = 'N', Q is not referenced.   

    LDQ     (input) INTEGER   
            The leading dimension of the array Q.  LDQ >= max(1,N).   

    IFST    (input) INTEGER   
    ILST    (input) INTEGER   
            Specify the reordering of the diagonal elements of T:   
            The element with row index IFST is moved to row ILST by a   
            sequence of transpositions between adjacent elements.   
            1 <= IFST <= N; 1 <= ILST <= N.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   

    =====================================================================   


       Decode and test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    
    /* System generated locals */
    integer q_dim1, q_offset, t_dim1, t_offset, i__1, i__2, i__3;
    complex q__1;
    /* Builtin functions */
    void r_cnjg(complex *, complex *);
    /* Local variables */
    static complex temp;
    extern /* Subroutine */ int crot_(integer *, complex *, integer *, 
	    complex *, integer *, real *, complex *);
    static integer k;
    extern logical lsame_(char *, char *);
    static logical wantq;
    static integer m1, m2, m3;
    static real cs;
    static complex t11, t22, sn;
    extern /* Subroutine */ int clartg_(complex *, complex *, real *, complex 
	    *, complex *), xerbla_(char *, integer *);
#define q_subscr(a_1,a_2) (a_2)*q_dim1 + a_1
#define q_ref(a_1,a_2) q[q_subscr(a_1,a_2)]
#define t_subscr(a_1,a_2) (a_2)*t_dim1 + a_1
#define t_ref(a_1,a_2) t[t_subscr(a_1,a_2)]


    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;

    /* Function Body */
    *info = 0;
    wantq = lsame_(compq, "V");
    if (! lsame_(compq, "N") && ! wantq) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*ldt < max(1,*n)) {
	*info = -4;
    } else if (*ldq < 1 || wantq && *ldq < max(1,*n)) {
	*info = -6;
    } else if (*ifst < 1 || *ifst > *n) {
	*info = -7;
    } else if (*ilst < 1 || *ilst > *n) {
	*info = -8;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CTREXC", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 1 || *ifst == *ilst) {
	return 0;
    }

    if (*ifst < *ilst) {

/*        Move the IFST-th diagonal element forward down the diagonal. */

	m1 = 0;
	m2 = -1;
	m3 = 1;
    } else {

/*        Move the IFST-th diagonal element backward up the diagonal. */

	m1 = -1;
	m2 = 0;
	m3 = -1;
    }

    i__1 = *ilst + m2;
    i__2 = m3;
    for (k = *ifst + m1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {

/*        Interchange the k-th and (k+1)-th diagonal elements. */

	i__3 = t_subscr(k, k);
	t11.r = t[i__3].r, t11.i = t[i__3].i;
	i__3 = t_subscr(k + 1, k + 1);
	t22.r = t[i__3].r, t22.i = t[i__3].i;

/*        Determine the transformation to perform the interchange. */

	q__1.r = t22.r - t11.r, q__1.i = t22.i - t11.i;
	clartg_(&t_ref(k, k + 1), &q__1, &cs, &sn, &temp);

/*        Apply transformation to the matrix T. */

	if (k + 2 <= *n) {
	    i__3 = *n - k - 1;
	    crot_(&i__3, &t_ref(k, k + 2), ldt, &t_ref(k + 1, k + 2), ldt, &
		    cs, &sn);
	}
	i__3 = k - 1;
	r_cnjg(&q__1, &sn);
	crot_(&i__3, &t_ref(1, k), &c__1, &t_ref(1, k + 1), &c__1, &cs, &q__1)
		;

	i__3 = t_subscr(k, k);
	t[i__3].r = t22.r, t[i__3].i = t22.i;
	i__3 = t_subscr(k + 1, k + 1);
	t[i__3].r = t11.r, t[i__3].i = t11.i;

	if (wantq) {

/*           Accumulate transformation in the matrix Q. */

	    r_cnjg(&q__1, &sn);
	    crot_(n, &q_ref(1, k), &c__1, &q_ref(1, k + 1), &c__1, &cs, &q__1)
		    ;
	}

/* L10: */
    }

    return 0;

/*     End of CTREXC */

} /* ctrexc_ */
Exemple #11
0
extern "C" magma_int_t
magma_zunmqr_gpu(
    magma_side_t side, magma_trans_t trans,
    magma_int_t m, magma_int_t n, magma_int_t k,
    magmaDoubleComplex_ptr dA, size_t dA_offset, magma_int_t ldda,
    magmaDoubleComplex *tau,
    magmaDoubleComplex_ptr dC, size_t dC_offset, magma_int_t lddc,
    magmaDoubleComplex *hwork, magma_int_t lwork,
    magmaDoubleComplex_ptr dT, size_t dT_offset, magma_int_t nb,
    magma_queue_t queue,
    magma_int_t *info)
{
/*  -- clMAGMA (version 1.3.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       @date November 2014

    Purpose
    =======
    ZUNMQR_GPU overwrites the general complex M-by-N matrix C with

                    SIDE = 'L'     SIDE = 'R'
    TRANS = 'N':      Q * C          C * Q
    TRANS = 'T':      Q**H * C       C * Q**H

    where Q is a complex orthogonal matrix defined as the product of k
    elementary reflectors

          Q = H(1) H(2) . . . H(k)

    as returned by ZGEQRF. Q is of order M if SIDE = 'L' and of order N
    if SIDE = 'R'.

    Arguments
    =========
    SIDE    (input) CHARACTER*1
            = 'L': apply Q or Q**H from the Left;
            = 'R': apply Q or Q**H from the Right.

    TRANS   (input) CHARACTER*1
            = 'N':  No transpose, apply Q;
            = 'T':  Transpose, apply Q**H.

    M       (input) INTEGER
            The number of rows of the matrix C. M >= 0.

    N       (input) INTEGER
            The number of columns of the matrix C. N >= 0.

    K       (input) INTEGER
            The number of elementary reflectors whose product defines
            the matrix Q.
            If SIDE = 'L', M >= K >= 0;
            if SIDE = 'R', N >= K >= 0.

    DA      (input) COMPLEX_16 array on the GPU, dimension (LDDA,K)
            The i-th column must contain the vector which defines the
            elementary reflector H(i), for i = 1,2,...,k, as returned by
            ZGEQRF in the first k columns of its array argument DA.
            DA is modified by the routine but restored on exit.

    LDDA    (input) INTEGER
            The leading dimension of the array DA.
            If SIDE = 'L', LDDA >= max(1,M);
            if SIDE = 'R', LDDA >= max(1,N).

    TAU     (input) COMPLEX_16 array, dimension (K)
            TAU(i) must contain the scalar factor of the elementary
            reflector H(i), as returned by ZGEQRF.

    DC      (input/output) COMPLEX_16 array on the GPU, dimension (LDDC,N)
            On entry, the M-by-N matrix C.
            On exit, C is overwritten by Q*C or Q**H * C or C * Q**H or C*Q.

    LDDC     (input) INTEGER
            The leading dimension of the array DC. LDDC >= max(1,M).

    HWORK    (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK))
            On exit, if INFO = 0, HWORK(1) returns the optimal LWORK.

    LWORK   (input) INTEGER
            The dimension of the array HWORK.
            LWORK >= (M-K+NB)*(N+2*NB) if SIDE = 'L',
            and LWORK >= (N-K+NB)*(M+2*NB) if SIDE = 'R', where NB is the
            optimal blocksize.

            If LWORK = -1, then a workspace query is assumed; the routine
            only calculates the optimal size of the HWORK array, returns
            this value as the first entry of the HWORK array, and no error
            message related to LWORK is issued by XERBLA.

    DT      (input) COMPLEX_16 array on the GPU that is the output
            (the 9th argument) of magma_zgeqrf_gpu.

    NB      (input) INTEGER
            This is the blocking size that was used in pre-computing DT, e.g.,
            the blocking size used in magma_zgeqrf_gpu.

    INFO    (output) INTEGER
            = 0:  successful exit
            < 0:  if INFO = -i, the i-th argument had an illegal value
    =====================================================================   */

    #define a_ref(a_1,a_2) dA, (dA_offset+(a_1)+(a_2)*(ldda))
    #define c_ref(a_1,a_2) dC, (dC_offset+(a_1)+(a_2)*(lddc))
    #define t_ref(a_1)     dT, (dT_offset+(a_1)*nb)

    magmaDoubleComplex c_one = MAGMA_Z_ONE;

    magmaDoubleComplex_ptr dwork;
    magma_int_t i, lddwork;

    magma_int_t i1, i2, i3, ib, ic, jc, mi, ni, nq, nw, ret;
    int left, notran, lquery;
    magma_int_t lwkopt;

    *info = 0;
    left   = (side == MagmaLeft);
    notran = (trans == MagmaNoTrans);
    lquery = (lwork == -1);

    if (!left || notran)
      printf("zunmqr_gpu called with arguments not yet supported\n");

    /* NQ is the order of Q and NW is the minimum dimension of WORK */
    if (left) {
        nq = m;
        nw = n;
    } else {
        nq = n;
        nw = m;
    }
    if ( (!left) && (side != MagmaRight) ) {
        *info = -1;
    } else if ( (!notran) && (trans != MagmaConjTrans) ) {
        *info = -2;
    } else if (m < 0) {
        *info = -3;
    } else if (n < 0) {
        *info = -4;
    } else if (k < 0 || k > nq) {
        *info = -5;
    } else if (ldda < max(1,nq)) {
        *info = -7;
    } else if (lddc < max(1,m)) {
        *info = -10;
    } else if (lwork < max(1,nw) && ! lquery) {
        *info = -12;
    }

    lwkopt = (m-k+nb)*(n+2*nb);
    hwork[0] = MAGMA_Z_MAKE( lwkopt, 0 );

    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }
    else if (lquery) {
        return *info;
    }

    /* Quick return if possible */
    if (m == 0 || n == 0 || k == 0) {
        hwork[0] = c_one;
        return *info;
    }

    lddwork= k;
    dwork  = dT;
    size_t dwork_offset = 2*lddwork*nb;

    if ( (left && (! notran)) || ( (!left) && notran ) ) {
        i1 = 0;
        i2 = k-nb;
        i3 = nb;
    } else {
        i1 = (k - 1 - nb) / nb * nb;
        i2 = 0;
        i3 = -nb;
    }

    if (left) {
        ni = n;
        jc = 0;
    } else {
        mi = m;
        ic = 0;
    }

    if (nb < k)
    {
        for (i=i1; i3<0 ? i>i2 : i<i2; i+=i3)
        {
            ib = min(nb, k - i);
            if (left){
                mi = m - i;
                ic = i;
            }
            else {
                ni = n - i;
                jc = i;
            }
            ret = magma_zlarfb_gpu( MagmaLeft, MagmaConjTrans, MagmaForward, MagmaColumnwise,
                                    mi, ni, ib,
                                    a_ref(i,  i ), ldda, t_ref(i), nb,
                                    c_ref(ic, jc), lddc, dwork, dwork_offset, nw, queue);
            if ( ret != MAGMA_SUCCESS )
              return ret;
        }
    }
    else
    {
        i = i1;
    }

    /* Use unblocked code to multiply the last or only block. */
    if (i < k) {
        ib   = k-i;
        if (left){
            mi = m - i;
            ic = i;
        }
        else {
            ni = n - i;
            jc = i;
        }

        magma_zgetmatrix(mi, ib, a_ref(i, i), ldda, hwork, mi, queue);
        magma_zgetmatrix(mi, ni, c_ref(ic, jc), lddc, hwork+mi*ib, mi, queue);

        magma_int_t lhwork = lwork - mi*(ib + ni);
        lapackf77_zunmqr( MagmaLeftStr, MagmaConjTransStr,
                          &mi, &ni, &ib,
                          hwork,       &mi, tau+i,
                          hwork+mi*ib, &mi,
                          hwork+mi*(ib+ni), &lhwork, info);

        // send the updated part of c back to the GPU
        magma_zsetmatrix(mi, ni, hwork+mi*ib, mi, c_ref(ic, jc), lddc, queue);
    }

    return *info;
    /* End of MAGMA_ZUNMQR_GPU */
}
Exemple #12
0
extern "C" magma_int_t
magma_dorgqr(
    magma_int_t m, magma_int_t n, magma_int_t k,
    double *a, magma_int_t lda,
    double *tau, magmaDouble_ptr dT, size_t dT_offset,
    magma_int_t nb,
    magma_queue_t queue,
    magma_int_t *info )
{
/*  -- clMAGMA (version 1.3.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       @date November 2014

    Purpose
    =======
    DORGQR generates an M-by-N DOUBLE_PRECISION matrix Q with orthonormal columns,
    which is defined as the first N columns of a product of K elementary
    reflectors of order M

          Q  =  H(1) H(2) . . . H(k)

    as returned by DGEQRF.

    Arguments
    =========
    M       (input) INTEGER
            The number of rows of the matrix Q. M >= 0.

    N       (input) INTEGER
            The number of columns of the matrix Q. M >= N >= 0.

    K       (input) INTEGER
            The number of elementary reflectors whose product defines the
            matrix Q. N >= K >= 0.

    A       (input/output) DOUBLE_PRECISION array A, dimension (LDDA,N).
            On entry, the i-th column must contain the vector
            which defines the elementary reflector H(i), for
            i = 1,2,...,k, as returned by DGEQRF_GPU in the
            first k columns of its array argument A.
            On exit, the M-by-N matrix Q.

    LDA     (input) INTEGER
            The first dimension of the array A. LDA >= max(1,M).

    TAU     (input) DOUBLE_PRECISION array, dimension (K)
            TAU(i) must contain the scalar factor of the elementary
            reflector H(i), as returned by DGEQRF_GPU.

    DT      (input) DOUBLE_PRECISION array on the GPU device.
            DT contains the T matrices used in blocking the elementary
            reflectors H(i), e.g., this can be the 6th argument of
            magma_dgeqrf_gpu.

    NB      (input) INTEGER
            This is the block size used in DGEQRF_GPU, and correspondingly
            the size of the T matrices, used in the factorization, and
            stored in DT.

    INFO    (output) INTEGER
            = 0:  successful exit
            < 0:  if INFO = -i, the i-th argument has an illegal value
    =====================================================================    */

    #define  a_ref(i,j)     ( a + (j)*lda  + (i))
    #define da_ref(i,j)     da, (da_offset + (j)*ldda + (i))
    #define t_ref(a_1)      dT, (dT_offset + (a_1)*nb)

    double c_zero = MAGMA_D_ZERO;
    
    magma_int_t  i__1, i__2, i__3;
    magma_int_t lwork, ldda;
    magma_int_t i, ib, ki, kk, iinfo;
    magma_int_t lddwork = min(m, n);
    double *work;
    magmaDouble_ptr da, dwork;
    size_t da_offset, dwork_offset;
    magma_event_t event = NULL;

    *info = 0;
    if (m < 0) {
        *info = -1;
    } else if ((n < 0) || (n > m)) {
        *info = -2;
    } else if ((k < 0) || (k > n)) {
        *info = -3;
    } else if (lda < max(1,m)) {
        *info = -5;
    }
    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    if (n <= 0)
      return *info;

    /* Allocate GPU work space */
    ldda = ((m+31)/32)*32;
    lddwork = ((lddwork+31)/32)*32;
    if (MAGMA_SUCCESS != magma_dmalloc( &da, ((n)*ldda + nb*lddwork ) )) {
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }
    da_offset = 0;
    dwork = da;
    dwork_offset = da_offset + (n)*ldda;

    /* Allocate CPU work space */
    lwork = n * nb;
    magma_dmalloc_cpu( &work, lwork );
    if( work == NULL ) {
        magma_free( da );
        *info = MAGMA_ERR_HOST_ALLOC;
        return *info;
    }

    if ( (nb > 1) && (nb < k) )
      {
        /*  Use blocked code after the last block.
            The first kk columns are handled by the block method. */
        ki = (k - nb - 1) / nb * nb;
        kk = min(k, ki + nb);

        /* Set A(1:kk,kk+1:n) to zero. */
        magmablas_dlaset(MagmaFull, kk, n-kk, c_zero, c_zero, da_ref(0,kk), ldda, queue);
      }
    else
      kk = 0;

    /* Use unblocked code for the last or only block. */
    if (kk < n)
      {
        i__1 = m - kk;
        i__2 = n - kk;
        i__3 = k - kk;
        lapackf77_dorgqr(&i__1, &i__2, &i__3,
                         a_ref(kk, kk), &lda,
                         &tau[kk], work, &lwork, &iinfo);
        
        magma_dsetmatrix(i__1, i__2, a_ref(kk, kk), lda, da_ref(kk, kk), ldda, queue);
      }

    if (kk > 0)
      {
        /* Use blocked code */
        for (i = ki; i >= 0; i-=nb)
          {
            ib = min(nb, k - i);

            /* Send the current panel to the GPU */
            i__2 = m - i;
            dpanel_to_q(MagmaUpper, ib, a_ref(i,i), lda, work);
            magma_dsetmatrix(i__2, ib, a_ref(i, i), lda, da_ref(i, i), ldda, queue);
                             
            if (i + ib < n)
              {
                /* Apply H to A(i:m,i+ib:n) from the left */
                i__3 = n - i - ib;
                magma_dlarfb_gpu( MagmaLeft, MagmaNoTrans, MagmaForward, MagmaColumnwise,
                                  i__2, i__3, ib,
                                  da_ref(i, i   ), ldda, t_ref(i),      nb,
                                  da_ref(i, i+ib), ldda,    dwork, dwork_offset, lddwork, queue);
              }

            /* Apply H to rows i:m of current block on the CPU */
            lapackf77_dorgqr(&i__2, &ib, &ib,
                             a_ref(i, i), &lda,
                             &tau[i], work, &lwork, &iinfo);
            magma_dsetmatrix_async( i__2, ib,
                                    a_ref(i,i), lda,
                                    da_ref(i,i), ldda, queue, &event );

            /* Set rows 1:i-1 of current block to zero */
            i__2 = i + ib;
            magmablas_dlaset(MagmaFull, i, i__2 - i, c_zero, c_zero, da_ref(0,i), ldda, queue);
          }
      }
    
    magma_dgetmatrix(m, n, da_ref(0, 0), ldda, a_ref(0, 0), lda, queue);
    
    //cudaStreamDestroy(stream);
    magma_free( da );
    magma_free_cpu(work);

    return *info;
} /* magma_dorgqr */
Exemple #13
0
extern "C" magma_int_t
magma_cgeqrf_gpu( magma_int_t m, magma_int_t n,
                  magmaFloatComplex *dA,   magma_int_t ldda,
                  magmaFloatComplex *tau, magmaFloatComplex *dT,
                  magma_int_t *info )
{
/*  -- MAGMA (version 1.4.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       August 2013

    Purpose
    =======
    CGEQRF computes a QR factorization of a complex M-by-N matrix A:
    A = Q * R.
    
    This version stores the triangular dT matrices used in
    the block QR factorization so that they can be applied directly (i.e.,
    without being recomputed) later. As a result, the application
    of Q is much faster. Also, the upper triangular matrices for V have 0s
    in them. The corresponding parts of the upper triangular R are inverted
    and stored separately in dT.
    
    Arguments
    =========
    M       (input) INTEGER
            The number of rows of the matrix A.  M >= 0.

    N       (input) INTEGER
            The number of columns of the matrix A.  N >= 0.

    dA      (input/output) COMPLEX array on the GPU, dimension (LDDA,N)
            On entry, the M-by-N matrix A.
            On exit, the elements on and above the diagonal of the array
            contain the min(M,N)-by-N upper trapezoidal matrix R (R is
            upper triangular if m >= n); the elements below the diagonal,
            with the array TAU, represent the orthogonal matrix Q as a
            product of min(m,n) elementary reflectors (see Further
            Details).

    LDDA     (input) INTEGER
            The leading dimension of the array dA.  LDDA >= max(1,M).
            To benefit from coalescent memory accesses LDDA must be
            dividable by 16.

    TAU     (output) COMPLEX array, dimension (min(M,N))
            The scalar factors of the elementary reflectors (see Further
            Details).

    dT      (workspace/output)  COMPLEX array on the GPU,
            dimension (2*MIN(M, N) + (N+31)/32*32 )*NB,
            where NB can be obtained through magma_get_cgeqrf_nb(M).
            It starts with MIN(M,N)*NB block that store the triangular T
            matrices, followed by the MIN(M,N)*NB block of the diagonal
            inverses for the R matrix. The rest of the array is used as workspace.

    INFO    (output) INTEGER
            = 0:  successful exit
            < 0:  if INFO = -i, the i-th argument had an illegal value
                  or another error occured, such as memory allocation failed.

    Further Details
    ===============
    The matrix Q is represented as a product of elementary reflectors

       Q = H(1) H(2) . . . H(k), where k = min(m,n).

    Each H(i) has the form

       H(i) = I - tau * v * v'

    where tau is a complex scalar, and v is a complex vector with
    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
    and tau in TAU(i).
    =====================================================================    */

    #define a_ref(a_1,a_2) (dA+(a_2)*(ldda) + (a_1))
    #define t_ref(a_1)     (dT+(a_1)*nb)
    #define d_ref(a_1)     (dT+(minmn+(a_1))*nb)
    #define dd_ref(a_1)    (dT+(2*minmn+(a_1))*nb)
    #define work_ref(a_1)  ( work + (a_1))
    #define hwork          ( work + (nb)*(m))

    magma_int_t i, k, minmn, old_i, old_ib, rows, cols;
    magma_int_t ib, nb;
    magma_int_t ldwork, lddwork, lwork, lhwork;
    magmaFloatComplex *work, *ut;

    /* check arguments */
    *info = 0;
    if (m < 0) {
        *info = -1;
    } else if (n < 0) {
        *info = -2;
    } else if (ldda < max(1,m)) {
        *info = -4;
    }
    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    k = minmn = min(m,n);
    if (k == 0)
        return *info;

    nb = magma_get_cgeqrf_nb(m);

    lwork  = (m + n + nb)*nb;
    lhwork = lwork - m*nb;

    if (MAGMA_SUCCESS != magma_cmalloc_pinned( &work, lwork )) {
        *info = MAGMA_ERR_HOST_ALLOC;
        return *info;
    }
    
    ut = hwork+nb*(n);
    memset( ut, 0, nb*nb*sizeof(magmaFloatComplex));

    magma_queue_t stream[2];
    magma_queue_create( &stream[0] );
    magma_queue_create( &stream[1] );

    ldwork = m;
    lddwork= n;

    if ( (nb > 1) && (nb < k) ) {
        /* Use blocked code initially */
        old_i = 0; old_ib = nb;
        for (i = 0; i < k-nb; i += nb) {
            ib = min(k-i, nb);
            rows = m -i;
            magma_cgetmatrix_async( rows, ib,
                                    a_ref(i,i),  ldda,
                                    work_ref(i), ldwork, stream[1] );
            if (i>0){
                /* Apply H' to A(i:m,i+2*ib:n) from the left */
                cols = n-old_i-2*old_ib;
                magma_clarfb_gpu( MagmaLeft, MagmaConjTrans, MagmaForward, MagmaColumnwise,
                                  m-old_i, cols, old_ib,
                                  a_ref(old_i, old_i         ), ldda, t_ref(old_i), nb,
                                  a_ref(old_i, old_i+2*old_ib), ldda, dd_ref(0),    lddwork);
                
                /* store the diagonal */
                magma_csetmatrix_async( old_ib, old_ib,
                                        ut,           old_ib,
                                        d_ref(old_i), old_ib, stream[0] );
            }

            magma_queue_sync( stream[1] );
            lapackf77_cgeqrf(&rows, &ib, work_ref(i), &ldwork, tau+i, hwork, &lhwork, info);
            /* Form the triangular factor of the block reflector
               H = H(i) H(i+1) . . . H(i+ib-1) */
            lapackf77_clarft( MagmaForwardStr, MagmaColumnwiseStr,
                              &rows, &ib,
                              work_ref(i), &ldwork, tau+i, hwork, &ib);

            /* Put 0s in the upper triangular part of a panel (and 1s on the
               diagonal); copy the upper triangular in ut and invert it. */
            magma_queue_sync( stream[0] );
            csplit_diag_block(ib, work_ref(i), ldwork, ut);
            magma_csetmatrix( rows, ib, work_ref(i), ldwork, a_ref(i,i), ldda );

            if (i + ib < n) {
                /* Send the triangular factor T to the GPU */
                magma_csetmatrix( ib, ib, hwork, ib, t_ref(i), nb );

                if (i+nb < k-nb){
                    /* Apply H' to A(i:m,i+ib:i+2*ib) from the left */
                    magma_clarfb_gpu( MagmaLeft, MagmaConjTrans, MagmaForward, MagmaColumnwise,
                                      rows, ib, ib,
                                      a_ref(i, i   ), ldda, t_ref(i),  nb,
                                      a_ref(i, i+ib), ldda, dd_ref(0), lddwork);
                }
                else {
                    cols = n-i-ib;
                    magma_clarfb_gpu( MagmaLeft, MagmaConjTrans, MagmaForward, MagmaColumnwise,
                                      rows, cols, ib,
                                      a_ref(i, i   ), ldda, t_ref(i),  nb,
                                      a_ref(i, i+ib), ldda, dd_ref(0), lddwork);
                    /* Fix the diagonal block */
                    magma_csetmatrix( ib, ib, ut, ib, d_ref(i), ib );
                }
                old_i  = i;
                old_ib = ib;
            }
        }
    } else {
        i = 0;
    }

    /* Use unblocked code to factor the last or only block. */
    if (i < k) {
        ib   = n-i;
        rows = m-i;
        magma_cgetmatrix( rows, ib, a_ref(i, i), ldda, work, rows );
        lhwork = lwork - rows*ib;
        lapackf77_cgeqrf(&rows, &ib, work, &rows, tau+i, work+ib*rows, &lhwork, info);
        
        magma_csetmatrix( rows, ib, work, rows, a_ref(i, i), ldda );
    }

    magma_queue_destroy( stream[0] );
    magma_queue_destroy( stream[1] );
    magma_free_pinned( work );
    return *info;

/*     End of MAGMA_CGEQRF */

} /* magma_cgeqrf */
Exemple #14
0
/* Subroutine */ int ddrges_(integer *nsizes, integer *nn, integer *ntypes, 
	logical *dotype, integer *iseed, doublereal *thresh, integer *nounit, 
	doublereal *a, integer *lda, doublereal *b, doublereal *s, doublereal 
	*t, doublereal *q, integer *ldq, doublereal *z__, doublereal *alphar, 
	doublereal *alphai, doublereal *beta, doublereal *work, integer *
	lwork, doublereal *result, logical *bwork, integer *info)
{
    /* Initialized data */

    static integer kclass[26] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,
	    2,2,2,3 };
    static integer kbmagn[26] = { 1,1,1,1,1,1,1,1,3,2,3,2,2,3,1,1,1,1,1,1,1,3,
	    2,3,2,1 };
    static integer ktrian[26] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,
	    1,1,1,1 };
    static integer iasign[26] = { 0,0,0,0,0,0,2,0,2,2,0,0,2,2,2,0,2,0,0,0,2,2,
	    2,2,2,0 };
    static integer ibsign[26] = { 0,0,0,0,0,0,0,2,0,0,2,2,0,0,2,0,2,0,0,0,0,0,
	    0,0,0,0 };
    static integer kz1[6] = { 0,1,2,1,3,3 };
    static integer kz2[6] = { 0,0,1,2,1,1 };
    static integer kadd[6] = { 0,0,0,0,3,2 };
    static integer katype[26] = { 0,1,0,1,2,3,4,1,4,4,1,1,4,4,4,2,4,5,8,7,9,4,
	    4,4,4,0 };
    static integer kbtype[26] = { 0,0,1,1,2,-3,1,4,1,1,4,4,1,1,-4,2,-4,8,8,8,
	    8,8,8,8,8,0 };
    static integer kazero[26] = { 1,1,1,1,1,1,2,1,2,2,1,1,2,2,3,1,3,5,5,5,5,3,
	    3,3,3,1 };
    static integer kbzero[26] = { 1,1,1,1,1,1,1,2,1,1,2,2,1,1,4,1,4,6,6,6,6,4,
	    4,4,4,1 };
    static integer kamagn[26] = { 1,1,1,1,1,1,1,1,2,3,2,3,2,3,1,1,1,1,1,1,1,2,
	    3,3,2,1 };

    /* Format strings */
    static char fmt_9999[] = "(\002 DDRGES: \002,a,\002 returned INFO=\002,i"
	    "6,\002.\002,/9x,\002N=\002,i6,\002, JTYPE=\002,i6,\002, ISEED="
	    "(\002,4(i4,\002,\002),i5,\002)\002)";
    static char fmt_9998[] = "(\002 DDRGES: DGET53 returned INFO=\002,i1,"
	    "\002 for eigenvalue \002,i6,\002.\002,/9x,\002N=\002,i6,\002, JT"
	    "YPE=\002,i6,\002, ISEED=(\002,4(i4,\002,\002),i5,\002)\002)";
    static char fmt_9997[] = "(\002 DDRGES: S not in Schur form at eigenvalu"
	    "e \002,i6,\002.\002,/9x,\002N=\002,i6,\002, JTYPE=\002,i6,\002, "
	    "ISEED=(\002,3(i5,\002,\002),i5,\002)\002)";
    static char fmt_9996[] = "(/1x,a3,\002 -- Real Generalized Schur form dr"
	    "iver\002)";
    static char fmt_9995[] = "(\002 Matrix types (see DDRGES for details):"
	    " \002)";
    static char fmt_9994[] = "(\002 Special Matrices:\002,23x,\002(J'=transp"
	    "osed Jordan block)\002,/\002   1=(0,0)  2=(I,0)  3=(0,I)  4=(I,I"
	    ")  5=(J',J')  \002,\0026=(diag(J',I), diag(I,J'))\002,/\002 Diag"
	    "onal Matrices:  ( \002,\002D=diag(0,1,2,...) )\002,/\002   7=(D,"
	    "I)   9=(large*D, small*I\002,\002)  11=(large*I, small*D)  13=(l"
	    "arge*D, large*I)\002,/\002   8=(I,D)  10=(small*D, large*I)  12="
	    "(small*I, large*D) \002,\002 14=(small*D, small*I)\002,/\002  15"
	    "=(D, reversed D)\002)";
    static char fmt_9993[] = "(\002 Matrices Rotated by Random \002,a,\002 M"
	    "atrices U, V:\002,/\002  16=Transposed Jordan Blocks            "
	    " 19=geometric \002,\002alpha, beta=0,1\002,/\002  17=arithm. alp"
	    "ha&beta             \002,\002      20=arithmetic alpha, beta=0,"
	    "1\002,/\002  18=clustered \002,\002alpha, beta=0,1            21"
	    "=random alpha, beta=0,1\002,/\002 Large & Small Matrices:\002,"
	    "/\002  22=(large, small)   \002,\00223=(small,large)    24=(smal"
	    "l,small)    25=(large,large)\002,/\002  26=random O(1) matrices"
	    ".\002)";
    static char fmt_9992[] = "(/\002 Tests performed:  (S is Schur, T is tri"
	    "angular, \002,\002Q and Z are \002,a,\002,\002,/19x,\002l and r "
	    "are the appropriate left and right\002,/19x,\002eigenvectors, re"
	    "sp., a is alpha, b is beta, and\002,/19x,a,\002 means \002,a,"
	    "\002.)\002,/\002 Without ordering: \002,/\002  1 = | A - Q S "
	    "Z\002,a,\002 | / ( |A| n ulp )      2 = | B - Q T Z\002,a,\002 |"
	    " / ( |B| n ulp )\002,/\002  3 = | I - QQ\002,a,\002 | / ( n ulp "
	    ")             4 = | I - ZZ\002,a,\002 | / ( n ulp )\002,/\002  5"
	    " = A is in Schur form S\002,/\002  6 = difference between (alpha"
	    ",beta)\002,\002 and diagonals of (S,T)\002,/\002 With ordering:"
	    " \002,/\002  7 = | (A,B) - Q (S,T) Z\002,a,\002 | / ( |(A,B)| n "
	    "ulp )  \002,/\002  8 = | I - QQ\002,a,\002 | / ( n ulp )        "
	    "    9 = | I - ZZ\002,a,\002 | / ( n ulp )\002,/\002 10 = A is in"
	    " Schur form S\002,/\002 11 = difference between (alpha,beta) and"
	    " diagonals\002,\002 of (S,T)\002,/\002 12 = SDIM is the correct "
	    "number of \002,\002selected eigenvalues\002,/)";
    static char fmt_9991[] = "(\002 Matrix order=\002,i5,\002, type=\002,i2"
	    ",\002, seed=\002,4(i4,\002,\002),\002 result \002,i2,\002 is\002"
	    ",0p,f8.2)";
    static char fmt_9990[] = "(\002 Matrix order=\002,i5,\002, type=\002,i2"
	    ",\002, seed=\002,4(i4,\002,\002),\002 result \002,i2,\002 is\002"
	    ",1p,d10.3)";

    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, s_dim1, 
	    s_offset, t_dim1, t_offset, z_dim1, z_offset, i__1, i__2, i__3, 
	    i__4;
    doublereal d__1, d__2, d__3, d__4, d__5, d__6, d__7, d__8, d__9, d__10;

    /* Builtin functions */
    double d_sign(doublereal *, doublereal *);
    integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void);

    /* Local variables */
    static integer iadd, sdim, ierr, nmax, rsub;
    static char sort[1];
    static doublereal temp1, temp2;
    static integer i__, j, n;
    static logical badnn;
    extern /* Subroutine */ int dget51_(integer *, integer *, doublereal *, 
	    integer *, doublereal *, integer *, doublereal *, integer *, 
	    doublereal *, integer *, doublereal *, doublereal *), dget53_(
	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
	    doublereal *, doublereal *, doublereal *, integer *), dget54_(
	    integer *, doublereal *, integer *, doublereal *, integer *, 
	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
	    integer *, doublereal *, integer *, doublereal *, doublereal *), 
	    dgges_(char *, char *, char *, L_fp, integer *, doublereal *, 
	    integer *, doublereal *, integer *, integer *, doublereal *, 
	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
	     integer *, doublereal *, integer *, logical *, integer *);
    static integer iinfo;
    static doublereal rmagn[4];
    static integer nmats, jsize, nerrs, i1, jtype, ntest, n1, isort;
    extern /* Subroutine */ int dlatm4_(integer *, integer *, integer *, 
	    integer *, integer *, doublereal *, doublereal *, doublereal *, 
	    integer *, integer *, doublereal *, integer *), dorm2r_(char *, 
	    char *, integer *, integer *, integer *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *, doublereal *, integer *), dlabad_(doublereal *, doublereal *);
    static logical ilabad;
    static integer jc, nb, in;
    extern doublereal dlamch_(char *);
    static integer jr;
    extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *,
	     integer *, doublereal *);
    extern doublereal dlarnd_(integer *, integer *);
    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, integer *);
    static doublereal safmin;
    static integer ioldsd[4];
    static doublereal safmax;
    static integer knteig;
    extern logical dlctes_(doublereal *, doublereal *, doublereal *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int alasvm_(char *, integer *, integer *, integer 
	    *, integer *), dlaset_(char *, integer *, integer *, 
	    doublereal *, doublereal *, doublereal *, integer *), 
	    xerbla_(char *, integer *);
    static integer minwrk, maxwrk;
    static doublereal ulpinv;
    static integer mtypes, ntestt;
    static doublereal ulp;

    /* Fortran I/O blocks */
    static cilist io___40 = { 0, 0, 0, fmt_9999, 0 };
    static cilist io___46 = { 0, 0, 0, fmt_9999, 0 };
    static cilist io___52 = { 0, 0, 0, fmt_9998, 0 };
    static cilist io___53 = { 0, 0, 0, fmt_9997, 0 };
    static cilist io___55 = { 0, 0, 0, fmt_9996, 0 };
    static cilist io___56 = { 0, 0, 0, fmt_9995, 0 };
    static cilist io___57 = { 0, 0, 0, fmt_9994, 0 };
    static cilist io___58 = { 0, 0, 0, fmt_9993, 0 };
    static cilist io___59 = { 0, 0, 0, fmt_9992, 0 };
    static cilist io___60 = { 0, 0, 0, fmt_9991, 0 };
    static cilist io___61 = { 0, 0, 0, fmt_9990, 0 };



#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
#define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]
#define s_ref(a_1,a_2) s[(a_2)*s_dim1 + a_1]
#define t_ref(a_1,a_2) t[(a_2)*t_dim1 + a_1]
#define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    DDRGES checks the nonsymmetric generalized eigenvalue (Schur form)   
    problem driver DGGES.   

    DGGES factors A and B as Q S Z'  and Q T Z' , where ' means   
    transpose, T is upper triangular, S is in generalized Schur form   
    (block upper triangular, with 1x1 and 2x2 blocks on the diagonal,   
    the 2x2 blocks corresponding to complex conjugate pairs of   
    generalized eigenvalues), and Q and Z are orthogonal. It also   
    computes the generalized eigenvalues (alpha(j),beta(j)), j=1,...,n,   
    Thus, w(j) = alpha(j)/beta(j) is a root of the characteristic   
    equation   
                    det( A - w(j) B ) = 0   
    Optionally it also reorder the eigenvalues so that a selected   
    cluster of eigenvalues appears in the leading diagonal block of the   
    Schur forms.   

    When DDRGES is called, a number of matrix "sizes" ("N's") and a   
    number of matrix "TYPES" are specified.  For each size ("N")   
    and each TYPE of matrix, a pair of matrices (A, B) will be generated   
    and used for testing. For each matrix pair, the following 13 tests   
    will be performed and compared with the threshhold THRESH except   
    the tests (5), (11) and (13).   


    (1)   | A - Q S Z' | / ( |A| n ulp ) (no sorting of eigenvalues)   


    (2)   | B - Q T Z' | / ( |B| n ulp ) (no sorting of eigenvalues)   


    (3)   | I - QQ' | / ( n ulp ) (no sorting of eigenvalues)   


    (4)   | I - ZZ' | / ( n ulp ) (no sorting of eigenvalues)   

    (5)   if A is in Schur form (i.e. quasi-triangular form)   
          (no sorting of eigenvalues)   

    (6)   if eigenvalues = diagonal blocks of the Schur form (S, T),   
          i.e., test the maximum over j of D(j)  where:   

          if alpha(j) is real:   
                        |alpha(j) - S(j,j)|        |beta(j) - T(j,j)|   
              D(j) = ------------------------ + -----------------------   
                     max(|alpha(j)|,|S(j,j)|)   max(|beta(j)|,|T(j,j)|)   

          if alpha(j) is complex:   
                                    | det( s S - w T ) |   
              D(j) = ---------------------------------------------------   
                     ulp max( s norm(S), |w| norm(T) )*norm( s S - w T )   

          and S and T are here the 2 x 2 diagonal blocks of S and T   
          corresponding to the j-th and j+1-th eigenvalues.   
          (no sorting of eigenvalues)   

    (7)   | (A,B) - Q (S,T) Z' | / ( | (A,B) | n ulp )   
               (with sorting of eigenvalues).   

    (8)   | I - QQ' | / ( n ulp ) (with sorting of eigenvalues).   

    (9)   | I - ZZ' | / ( n ulp ) (with sorting of eigenvalues).   

    (10)  if A is in Schur form (i.e. quasi-triangular form)   
          (with sorting of eigenvalues).   

    (11)  if eigenvalues = diagonal blocks of the Schur form (S, T),   
          i.e. test the maximum over j of D(j)  where:   

          if alpha(j) is real:   
                        |alpha(j) - S(j,j)|        |beta(j) - T(j,j)|   
              D(j) = ------------------------ + -----------------------   
                     max(|alpha(j)|,|S(j,j)|)   max(|beta(j)|,|T(j,j)|)   

          if alpha(j) is complex:   
                                    | det( s S - w T ) |   
              D(j) = ---------------------------------------------------   
                     ulp max( s norm(S), |w| norm(T) )*norm( s S - w T )   

          and S and T are here the 2 x 2 diagonal blocks of S and T   
          corresponding to the j-th and j+1-th eigenvalues.   
          (with sorting of eigenvalues).   

    (12)  if sorting worked and SDIM is the number of eigenvalues   
          which were SELECTed.   

    Test Matrices   
    =============   

    The sizes of the test matrices are specified by an array   
    NN(1:NSIZES); the value of each element NN(j) specifies one size.   
    The "types" are specified by a logical array DOTYPE( 1:NTYPES ); if   
    DOTYPE(j) is .TRUE., then matrix type "j" will be generated.   
    Currently, the list of possible types is:   

    (1)  ( 0, 0 )         (a pair of zero matrices)   

    (2)  ( I, 0 )         (an identity and a zero matrix)   

    (3)  ( 0, I )         (an identity and a zero matrix)   

    (4)  ( I, I )         (a pair of identity matrices)   

            t   t   
    (5)  ( J , J  )       (a pair of transposed Jordan blocks)   

                                        t                ( I   0  )   
    (6)  ( X, Y )         where  X = ( J   0  )  and Y = (      t )   
                                     ( 0   I  )          ( 0   J  )   
                          and I is a k x k identity and J a (k+1)x(k+1)   
                          Jordan block; k=(N-1)/2   

    (7)  ( D, I )         where D is diag( 0, 1,..., N-1 ) (a diagonal   
                          matrix with those diagonal entries.)   
    (8)  ( I, D )   

    (9)  ( big*D, small*I ) where "big" is near overflow and small=1/big   

    (10) ( small*D, big*I )   

    (11) ( big*I, small*D )   

    (12) ( small*I, big*D )   

    (13) ( big*D, big*I )   

    (14) ( small*D, small*I )   

    (15) ( D1, D2 )        where D1 is diag( 0, 0, 1, ..., N-3, 0 ) and   
                           D2 is diag( 0, N-3, N-4,..., 1, 0, 0 )   
              t   t   
    (16) Q ( J , J ) Z     where Q and Z are random orthogonal matrices.   

    (17) Q ( T1, T2 ) Z    where T1 and T2 are upper triangular matrices   
                           with random O(1) entries above the diagonal   
                           and diagonal entries diag(T1) =   
                           ( 0, 0, 1, ..., N-3, 0 ) and diag(T2) =   
                           ( 0, N-3, N-4,..., 1, 0, 0 )   

    (18) Q ( T1, T2 ) Z    diag(T1) = ( 0, 0, 1, 1, s, ..., s, 0 )   
                           diag(T2) = ( 0, 1, 0, 1,..., 1, 0 )   
                           s = machine precision.   

    (19) Q ( T1, T2 ) Z    diag(T1)=( 0,0,1,1, 1-d, ..., 1-(N-5)*d=s, 0 )   
                           diag(T2) = ( 0, 1, 0, 1, ..., 1, 0 )   

                                                           N-5   
    (20) Q ( T1, T2 ) Z    diag(T1)=( 0, 0, 1, 1, a, ..., a   =s, 0 )   
                           diag(T2) = ( 0, 1, 0, 1, ..., 1, 0, 0 )   

    (21) Q ( T1, T2 ) Z    diag(T1)=( 0, 0, 1, r1, r2, ..., r(N-4), 0 )   
                           diag(T2) = ( 0, 1, 0, 1, ..., 1, 0, 0 )   
                           where r1,..., r(N-4) are random.   

    (22) Q ( big*T1, small*T2 ) Z    diag(T1) = ( 0, 0, 1, ..., N-3, 0 )   
                                     diag(T2) = ( 0, 1, ..., 1, 0, 0 )   

    (23) Q ( small*T1, big*T2 ) Z    diag(T1) = ( 0, 0, 1, ..., N-3, 0 )   
                                     diag(T2) = ( 0, 1, ..., 1, 0, 0 )   

    (24) Q ( small*T1, small*T2 ) Z  diag(T1) = ( 0, 0, 1, ..., N-3, 0 )   
                                     diag(T2) = ( 0, 1, ..., 1, 0, 0 )   

    (25) Q ( big*T1, big*T2 ) Z      diag(T1) = ( 0, 0, 1, ..., N-3, 0 )   
                                     diag(T2) = ( 0, 1, ..., 1, 0, 0 )   

    (26) Q ( T1, T2 ) Z     where T1 and T2 are random upper-triangular   
                            matrices.   


    Arguments   
    =========   

    NSIZES  (input) INTEGER   
            The number of sizes of matrices to use.  If it is zero,   
            DDRGES does nothing.  NSIZES >= 0.   

    NN      (input) INTEGER array, dimension (NSIZES)   
            An array containing the sizes to be used for the matrices.   
            Zero values will be skipped.  NN >= 0.   

    NTYPES  (input) INTEGER   
            The number of elements in DOTYPE.   If it is zero, DDRGES   
            does nothing.  It must be at least zero.  If it is MAXTYP+1   
            and NSIZES is 1, then an additional type, MAXTYP+1 is   
            defined, which is to use whatever matrix is in A on input.   
            This is only useful if DOTYPE(1:MAXTYP) is .FALSE. and   
            DOTYPE(MAXTYP+1) is .TRUE. .   

    DOTYPE  (input) LOGICAL array, dimension (NTYPES)   
            If DOTYPE(j) is .TRUE., then for each size in NN a   
            matrix of that size and of type j will be generated.   
            If NTYPES is smaller than the maximum number of types   
            defined (PARAMETER MAXTYP), then types NTYPES+1 through   
            MAXTYP will not be generated. If NTYPES is larger   
            than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES)   
            will be ignored.   

    ISEED   (input/output) INTEGER array, dimension (4)   
            On entry ISEED specifies the seed of the random number   
            generator. The array elements should be between 0 and 4095;   
            if not they will be reduced mod 4096. Also, ISEED(4) must   
            be odd.  The random number generator uses a linear   
            congruential sequence limited to small integers, and so   
            should produce machine independent random numbers. The   
            values of ISEED are changed on exit, and can be used in the   
            next call to DDRGES to continue the same random number   
            sequence.   

    THRESH  (input) DOUBLE PRECISION   
            A test will count as "failed" if the "error", computed as   
            described above, exceeds THRESH.  Note that the error is   
            scaled to be O(1), so THRESH should be a reasonably small   
            multiple of 1, e.g., 10 or 100.  In particular, it should   
            not depend on the precision (single vs. double) or the size   
            of the matrix.  THRESH >= 0.   

    NOUNIT  (input) INTEGER   
            The FORTRAN unit number for printing out error messages   
            (e.g., if a routine returns IINFO not equal to 0.)   

    A       (input/workspace) DOUBLE PRECISION array,   
                                         dimension(LDA, max(NN))   
            Used to hold the original A matrix.  Used as input only   
            if NTYPES=MAXTYP+1, DOTYPE(1:MAXTYP)=.FALSE., and   
            DOTYPE(MAXTYP+1)=.TRUE.   

    LDA     (input) INTEGER   
            The leading dimension of A, B, S, and T.   
            It must be at least 1 and at least max( NN ).   

    B       (input/workspace) DOUBLE PRECISION array,   
                                         dimension(LDA, max(NN))   
            Used to hold the original B matrix.  Used as input only   
            if NTYPES=MAXTYP+1, DOTYPE(1:MAXTYP)=.FALSE., and   
            DOTYPE(MAXTYP+1)=.TRUE.   

    S       (workspace) DOUBLE PRECISION array, dimension (LDA, max(NN))   
            The Schur form matrix computed from A by DGGES.  On exit, S   
            contains the Schur form matrix corresponding to the matrix   
            in A.   

    T       (workspace) DOUBLE PRECISION array, dimension (LDA, max(NN))   
            The upper triangular matrix computed from B by DGGES.   

    Q       (workspace) DOUBLE PRECISION array, dimension (LDQ, max(NN))   
            The (left) orthogonal matrix computed by DGGES.   

    LDQ     (input) INTEGER   
            The leading dimension of Q and Z. It must   
            be at least 1 and at least max( NN ).   

    Z       (workspace) DOUBLE PRECISION array, dimension( LDQ, max(NN) )   
            The (right) orthogonal matrix computed by DGGES.   

    ALPHAR  (workspace) DOUBLE PRECISION array, dimension (max(NN))   
    ALPHAI  (workspace) DOUBLE PRECISION array, dimension (max(NN))   
    BETA    (workspace) DOUBLE PRECISION array, dimension (max(NN))   
            The generalized eigenvalues of (A,B) computed by DGGES.   
            ( ALPHAR(k)+ALPHAI(k)*i ) / BETA(k) is the k-th   
            generalized eigenvalue of A and B.   

    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)   

    LWORK   (input) INTEGER   
            The dimension of the array WORK.   
            LWORK >= MAX( 10*(N+1), 3*N*N ), where N is the largest   
            matrix dimension.   

    RESULT  (output) DOUBLE PRECISION array, dimension (15)   
            The values computed by the tests described above.   
            The values are currently limited to 1/ulp, to avoid overflow.   

    BWORK   (workspace) LOGICAL array, dimension (N)   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   
            > 0:  A routine returned an error code.  INFO is the   
                  absolute value of the INFO value returned.   

    =====================================================================   

       Parameter adjustments */
    --nn;
    --dotype;
    --iseed;
    t_dim1 = *lda;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    s_dim1 = *lda;
    s_offset = 1 + s_dim1 * 1;
    s -= s_offset;
    b_dim1 = *lda;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    z_dim1 = *ldq;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    --alphar;
    --alphai;
    --beta;
    --work;
    --result;
    --bwork;

    /* Function Body   

       Check for errors */

    *info = 0;

    badnn = FALSE_;
    nmax = 1;
    i__1 = *nsizes;
    for (j = 1; j <= i__1; ++j) {
/* Computing MAX */
	i__2 = nmax, i__3 = nn[j];
	nmax = max(i__2,i__3);
	if (nn[j] < 0) {
	    badnn = TRUE_;
	}
/* L10: */
    }

    if (*nsizes < 0) {
	*info = -1;
    } else if (badnn) {
	*info = -2;
    } else if (*ntypes < 0) {
	*info = -3;
    } else if (*thresh < 0.) {
	*info = -6;
    } else if (*lda <= 1 || *lda < nmax) {
	*info = -9;
    } else if (*ldq <= 1 || *ldq < nmax) {
	*info = -14;
    }

/*     Compute workspace   
        (Note: Comments in the code beginning "Workspace:" describe the   
         minimal amount of workspace needed at that point in the code,   
         as well as the preferred amount for good performance.   
         NB refers to the optimal block size for the immediately   
         following subroutine, as returned by ILAENV. */

    minwrk = 1;
    if (*info == 0 && *lwork >= 1) {
/* Computing MAX */
	i__1 = (nmax + 1) * 10, i__2 = nmax * 3 * nmax;
	minwrk = max(i__1,i__2);
/* Computing MAX */
	i__1 = 1, i__2 = ilaenv_(&c__1, "DGEQRF", " ", &nmax, &nmax, &c_n1, &
		c_n1, (ftnlen)6, (ftnlen)1), i__1 = max(i__1,i__2), i__2 = 
		ilaenv_(&c__1, "DORMQR", "LT", &nmax, &nmax, &nmax, &c_n1, (
		ftnlen)6, (ftnlen)2), i__1 = max(i__1,i__2), i__2 = ilaenv_(&
		c__1, "DORGQR", " ", &nmax, &nmax, &nmax, &c_n1, (ftnlen)6, (
		ftnlen)1);
	nb = max(i__1,i__2);
/* Computing MAX */
	i__1 = (nmax + 1) * 10, i__2 = (nmax << 1) + nmax * nb, i__1 = max(
		i__1,i__2), i__2 = nmax * 3 * nmax;
	maxwrk = max(i__1,i__2);
	work[1] = (doublereal) maxwrk;
    }

    if (*lwork < minwrk) {
	*info = -20;
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DDRGES", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*nsizes == 0 || *ntypes == 0) {
	return 0;
    }

    safmin = dlamch_("Safe minimum");
    ulp = dlamch_("Epsilon") * dlamch_("Base");
    safmin /= ulp;
    safmax = 1. / safmin;
    dlabad_(&safmin, &safmax);
    ulpinv = 1. / ulp;

/*     The values RMAGN(2:3) depend on N, see below. */

    rmagn[0] = 0.;
    rmagn[1] = 1.;

/*     Loop over matrix sizes */

    ntestt = 0;
    nerrs = 0;
    nmats = 0;

    i__1 = *nsizes;
    for (jsize = 1; jsize <= i__1; ++jsize) {
	n = nn[jsize];
	n1 = max(1,n);
	rmagn[2] = safmax * ulp / (doublereal) n1;
	rmagn[3] = safmin * ulpinv * (doublereal) n1;

	if (*nsizes != 1) {
	    mtypes = min(26,*ntypes);
	} else {
	    mtypes = min(27,*ntypes);
	}

/*        Loop over matrix types */

	i__2 = mtypes;
	for (jtype = 1; jtype <= i__2; ++jtype) {
	    if (! dotype[jtype]) {
		goto L180;
	    }
	    ++nmats;
	    ntest = 0;

/*           Save ISEED in case of an error. */

	    for (j = 1; j <= 4; ++j) {
		ioldsd[j - 1] = iseed[j];
/* L20: */
	    }

/*           Initialize RESULT */

	    for (j = 1; j <= 13; ++j) {
		result[j] = 0.;
/* L30: */
	    }

/*           Generate test matrices A and B   

             Description of control parameters:   

             KZLASS: =1 means w/o rotation, =2 means w/ rotation,   
                     =3 means random.   
             KATYPE: the "type" to be passed to DLATM4 for computing A.   
             KAZERO: the pattern of zeros on the diagonal for A:   
                     =1: ( xxx ), =2: (0, xxx ) =3: ( 0, 0, xxx, 0 ),   
                     =4: ( 0, xxx, 0, 0 ), =5: ( 0, 0, 1, xxx, 0 ),   
                     =6: ( 0, 1, 0, xxx, 0 ).  (xxx means a string of   
                     non-zero entries.)   
             KAMAGN: the magnitude of the matrix: =0: zero, =1: O(1),   
                     =2: large, =3: small.   
             IASIGN: 1 if the diagonal elements of A are to be   
                     multiplied by a random magnitude 1 number, =2 if   
                     randomly chosen diagonal blocks are to be rotated   
                     to form 2x2 blocks.   
             KBTYPE, KBZERO, KBMAGN, IBSIGN: the same, but for B.   
             KTRIAN: =0: don't fill in the upper triangle, =1: do.   
             KZ1, KZ2, KADD: used to implement KAZERO and KBZERO.   
             RMAGN: used to implement KAMAGN and KBMAGN. */

	    if (mtypes > 26) {
		goto L110;
	    }
	    iinfo = 0;
	    if (kclass[jtype - 1] < 3) {

/*              Generate A (w/o rotation) */

		if ((i__3 = katype[jtype - 1], abs(i__3)) == 3) {
		    in = ((n - 1) / 2 << 1) + 1;
		    if (in != n) {
			dlaset_("Full", &n, &n, &c_b26, &c_b26, &a[a_offset], 
				lda);
		    }
		} else {
		    in = n;
		}
		dlatm4_(&katype[jtype - 1], &in, &kz1[kazero[jtype - 1] - 1], 
			&kz2[kazero[jtype - 1] - 1], &iasign[jtype - 1], &
			rmagn[kamagn[jtype - 1]], &ulp, &rmagn[ktrian[jtype - 
			1] * kamagn[jtype - 1]], &c__2, &iseed[1], &a[
			a_offset], lda);
		iadd = kadd[kazero[jtype - 1] - 1];
		if (iadd > 0 && iadd <= n) {
		    a_ref(iadd, iadd) = 1.;
		}

/*              Generate B (w/o rotation) */

		if ((i__3 = kbtype[jtype - 1], abs(i__3)) == 3) {
		    in = ((n - 1) / 2 << 1) + 1;
		    if (in != n) {
			dlaset_("Full", &n, &n, &c_b26, &c_b26, &b[b_offset], 
				lda);
		    }
		} else {
		    in = n;
		}
		dlatm4_(&kbtype[jtype - 1], &in, &kz1[kbzero[jtype - 1] - 1], 
			&kz2[kbzero[jtype - 1] - 1], &ibsign[jtype - 1], &
			rmagn[kbmagn[jtype - 1]], &c_b32, &rmagn[ktrian[jtype 
			- 1] * kbmagn[jtype - 1]], &c__2, &iseed[1], &b[
			b_offset], lda);
		iadd = kadd[kbzero[jtype - 1] - 1];
		if (iadd != 0 && iadd <= n) {
		    b_ref(iadd, iadd) = 1.;
		}

		if (kclass[jtype - 1] == 2 && n > 0) {

/*                 Include rotations   

                   Generate Q, Z as Householder transformations times   
                   a diagonal matrix. */

		    i__3 = n - 1;
		    for (jc = 1; jc <= i__3; ++jc) {
			i__4 = n;
			for (jr = jc; jr <= i__4; ++jr) {
			    q_ref(jr, jc) = dlarnd_(&c__3, &iseed[1]);
			    z___ref(jr, jc) = dlarnd_(&c__3, &iseed[1]);
/* L40: */
			}
			i__4 = n + 1 - jc;
			dlarfg_(&i__4, &q_ref(jc, jc), &q_ref(jc + 1, jc), &
				c__1, &work[jc]);
			work[(n << 1) + jc] = d_sign(&c_b32, &q_ref(jc, jc));
			q_ref(jc, jc) = 1.;
			i__4 = n + 1 - jc;
			dlarfg_(&i__4, &z___ref(jc, jc), &z___ref(jc + 1, jc),
				 &c__1, &work[n + jc]);
			work[n * 3 + jc] = d_sign(&c_b32, &z___ref(jc, jc));
			z___ref(jc, jc) = 1.;
/* L50: */
		    }
		    q_ref(n, n) = 1.;
		    work[n] = 0.;
		    d__1 = dlarnd_(&c__2, &iseed[1]);
		    work[n * 3] = d_sign(&c_b32, &d__1);
		    z___ref(n, n) = 1.;
		    work[n * 2] = 0.;
		    d__1 = dlarnd_(&c__2, &iseed[1]);
		    work[n * 4] = d_sign(&c_b32, &d__1);

/*                 Apply the diagonal matrices */

		    i__3 = n;
		    for (jc = 1; jc <= i__3; ++jc) {
			i__4 = n;
			for (jr = 1; jr <= i__4; ++jr) {
			    a_ref(jr, jc) = work[(n << 1) + jr] * work[n * 3 
				    + jc] * a_ref(jr, jc);
			    b_ref(jr, jc) = work[(n << 1) + jr] * work[n * 3 
				    + jc] * b_ref(jr, jc);
/* L60: */
			}
/* L70: */
		    }
		    i__3 = n - 1;
		    dorm2r_("L", "N", &n, &n, &i__3, &q[q_offset], ldq, &work[
			    1], &a[a_offset], lda, &work[(n << 1) + 1], &
			    iinfo);
		    if (iinfo != 0) {
			goto L100;
		    }
		    i__3 = n - 1;
		    dorm2r_("R", "T", &n, &n, &i__3, &z__[z_offset], ldq, &
			    work[n + 1], &a[a_offset], lda, &work[(n << 1) + 
			    1], &iinfo);
		    if (iinfo != 0) {
			goto L100;
		    }
		    i__3 = n - 1;
		    dorm2r_("L", "N", &n, &n, &i__3, &q[q_offset], ldq, &work[
			    1], &b[b_offset], lda, &work[(n << 1) + 1], &
			    iinfo);
		    if (iinfo != 0) {
			goto L100;
		    }
		    i__3 = n - 1;
		    dorm2r_("R", "T", &n, &n, &i__3, &z__[z_offset], ldq, &
			    work[n + 1], &b[b_offset], lda, &work[(n << 1) + 
			    1], &iinfo);
		    if (iinfo != 0) {
			goto L100;
		    }
		}
	    } else {

/*              Random matrices */

		i__3 = n;
		for (jc = 1; jc <= i__3; ++jc) {
		    i__4 = n;
		    for (jr = 1; jr <= i__4; ++jr) {
			a_ref(jr, jc) = rmagn[kamagn[jtype - 1]] * dlarnd_(&
				c__2, &iseed[1]);
			b_ref(jr, jc) = rmagn[kbmagn[jtype - 1]] * dlarnd_(&
				c__2, &iseed[1]);
/* L80: */
		    }
/* L90: */
		}
	    }

L100:

	    if (iinfo != 0) {
		io___40.ciunit = *nounit;
		s_wsfe(&io___40);
		do_fio(&c__1, "Generator", (ftnlen)9);
		do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer));
		do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
		do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer));
		do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer));
		e_wsfe();
		*info = abs(iinfo);
		return 0;
	    }

L110:

	    for (i__ = 1; i__ <= 13; ++i__) {
		result[i__] = -1.;
/* L120: */
	    }

/*           Test with and without sorting of eigenvalues */

	    for (isort = 0; isort <= 1; ++isort) {
		if (isort == 0) {
		    *(unsigned char *)sort = 'N';
		    rsub = 0;
		} else {
		    *(unsigned char *)sort = 'S';
		    rsub = 5;
		}

/*              Call DGGES to compute H, T, Q, Z, alpha, and beta. */

		dlacpy_("Full", &n, &n, &a[a_offset], lda, &s[s_offset], lda);
		dlacpy_("Full", &n, &n, &b[b_offset], lda, &t[t_offset], lda);
		ntest = rsub + 1 + isort;
		result[rsub + 1 + isort] = ulpinv;
		dgges_("V", "V", sort, (L_fp)dlctes_, &n, &s[s_offset], lda, &
			t[t_offset], lda, &sdim, &alphar[1], &alphai[1], &
			beta[1], &q[q_offset], ldq, &z__[z_offset], ldq, &
			work[1], lwork, &bwork[1], &iinfo);
		if (iinfo != 0 && iinfo != n + 2) {
		    result[rsub + 1 + isort] = ulpinv;
		    io___46.ciunit = *nounit;
		    s_wsfe(&io___46);
		    do_fio(&c__1, "DGGES", (ftnlen)5);
		    do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer));
		    do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
		    do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer));
		    do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer))
			    ;
		    e_wsfe();
		    *info = abs(iinfo);
		    goto L160;
		}

		ntest = rsub + 4;

/*              Do tests 1--4 (or tests 7--9 when reordering ) */

		if (isort == 0) {
		    dget51_(&c__1, &n, &a[a_offset], lda, &s[s_offset], lda, &
			    q[q_offset], ldq, &z__[z_offset], ldq, &work[1], &
			    result[1]);
		    dget51_(&c__1, &n, &b[b_offset], lda, &t[t_offset], lda, &
			    q[q_offset], ldq, &z__[z_offset], ldq, &work[1], &
			    result[2]);
		} else {
		    dget54_(&n, &a[a_offset], lda, &b[b_offset], lda, &s[
			    s_offset], lda, &t[t_offset], lda, &q[q_offset], 
			    ldq, &z__[z_offset], ldq, &work[1], &result[7]);
		}
		dget51_(&c__3, &n, &a[a_offset], lda, &t[t_offset], lda, &q[
			q_offset], ldq, &q[q_offset], ldq, &work[1], &result[
			rsub + 3]);
		dget51_(&c__3, &n, &b[b_offset], lda, &t[t_offset], lda, &z__[
			z_offset], ldq, &z__[z_offset], ldq, &work[1], &
			result[rsub + 4]);

/*              Do test 5 and 6 (or Tests 10 and 11 when reordering):   
                check Schur form of A and compare eigenvalues with   
                diagonals. */

		ntest = rsub + 6;
		temp1 = 0.;

		i__3 = n;
		for (j = 1; j <= i__3; ++j) {
		    ilabad = FALSE_;
		    if (alphai[j] == 0.) {
/* Computing MAX */
			d__7 = safmin, d__8 = (d__2 = alphar[j], abs(d__2)), 
				d__7 = max(d__7,d__8), d__8 = (d__3 = s_ref(j,
				 j), abs(d__3));
/* Computing MAX */
			d__9 = safmin, d__10 = (d__5 = beta[j], abs(d__5)), 
				d__9 = max(d__9,d__10), d__10 = (d__6 = t_ref(
				j, j), abs(d__6));
			temp2 = ((d__1 = alphar[j] - s_ref(j, j), abs(d__1)) /
				 max(d__7,d__8) + (d__4 = beta[j] - t_ref(j, 
				j), abs(d__4)) / max(d__9,d__10)) / ulp;

			if (j < n) {
			    if (s_ref(j + 1, j) != 0.) {
				ilabad = TRUE_;
				result[rsub + 5] = ulpinv;
			    }
			}
			if (j > 1) {
			    if (s_ref(j, j - 1) != 0.) {
				ilabad = TRUE_;
				result[rsub + 5] = ulpinv;
			    }
			}

		    } else {
			if (alphai[j] > 0.) {
			    i1 = j;
			} else {
			    i1 = j - 1;
			}
			if (i1 <= 0 || i1 >= n) {
			    ilabad = TRUE_;
			} else if (i1 < n - 1) {
			    if (s_ref(i1 + 2, i1 + 1) != 0.) {
				ilabad = TRUE_;
				result[rsub + 5] = ulpinv;
			    }
			} else if (i1 > 1) {
			    if (s_ref(i1, i1 - 1) != 0.) {
				ilabad = TRUE_;
				result[rsub + 5] = ulpinv;
			    }
			}
			if (! ilabad) {
			    dget53_(&s_ref(i1, i1), lda, &t_ref(i1, i1), lda, 
				    &beta[j], &alphar[j], &alphai[j], &temp2, 
				    &ierr);
			    if (ierr >= 3) {
				io___52.ciunit = *nounit;
				s_wsfe(&io___52);
				do_fio(&c__1, (char *)&ierr, (ftnlen)sizeof(
					integer));
				do_fio(&c__1, (char *)&j, (ftnlen)sizeof(
					integer));
				do_fio(&c__1, (char *)&n, (ftnlen)sizeof(
					integer));
				do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(
					integer));
				do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)
					sizeof(integer));
				e_wsfe();
				*info = abs(ierr);
			    }
			} else {
			    temp2 = ulpinv;
			}

		    }
		    temp1 = max(temp1,temp2);
		    if (ilabad) {
			io___53.ciunit = *nounit;
			s_wsfe(&io___53);
			do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer));
			do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
			do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer))
				;
			do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(
				integer));
			e_wsfe();
		    }
/* L130: */
		}
		result[rsub + 6] = temp1;

		if (isort >= 1) {

/*                 Do test 12 */

		    ntest = 12;
		    result[12] = 0.;
		    knteig = 0;
		    i__3 = n;
		    for (i__ = 1; i__ <= i__3; ++i__) {
			d__1 = -alphai[i__];
			if (dlctes_(&alphar[i__], &alphai[i__], &beta[i__]) ||
				 dlctes_(&alphar[i__], &d__1, &beta[i__])) {
			    ++knteig;
			}
			if (i__ < n) {
			    d__1 = -alphai[i__ + 1];
			    d__2 = -alphai[i__];
			    if ((dlctes_(&alphar[i__ + 1], &alphai[i__ + 1], &
				    beta[i__ + 1]) || dlctes_(&alphar[i__ + 1]
				    , &d__1, &beta[i__ + 1])) && ! (dlctes_(&
				    alphar[i__], &alphai[i__], &beta[i__]) || 
				    dlctes_(&alphar[i__], &d__2, &beta[i__])) 
				    && iinfo != n + 2) {
				result[12] = ulpinv;
			    }
			}
/* L140: */
		    }
		    if (sdim != knteig) {
			result[12] = ulpinv;
		    }
		}

/* L150: */
	    }

/*           End of Loop -- Check for RESULT(j) > THRESH */

L160:

	    ntestt += ntest;

/*           Print out tests which fail. */

	    i__3 = ntest;
	    for (jr = 1; jr <= i__3; ++jr) {
		if (result[jr] >= *thresh) {

/*                 If this is the first test to fail,   
                   print a header to the data file. */

		    if (nerrs == 0) {
			io___55.ciunit = *nounit;
			s_wsfe(&io___55);
			do_fio(&c__1, "DGS", (ftnlen)3);
			e_wsfe();

/*                    Matrix types */

			io___56.ciunit = *nounit;
			s_wsfe(&io___56);
			e_wsfe();
			io___57.ciunit = *nounit;
			s_wsfe(&io___57);
			e_wsfe();
			io___58.ciunit = *nounit;
			s_wsfe(&io___58);
			do_fio(&c__1, "Orthogonal", (ftnlen)10);
			e_wsfe();

/*                    Tests performed */

			io___59.ciunit = *nounit;
			s_wsfe(&io___59);
			do_fio(&c__1, "orthogonal", (ftnlen)10);
			do_fio(&c__1, "'", (ftnlen)1);
			do_fio(&c__1, "transpose", (ftnlen)9);
			for (j = 1; j <= 8; ++j) {
			    do_fio(&c__1, "'", (ftnlen)1);
			}
			e_wsfe();

		    }
		    ++nerrs;
		    if (result[jr] < 1e4) {
			io___60.ciunit = *nounit;
			s_wsfe(&io___60);
			do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
			do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer))
				;
			do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(
				integer));
			do_fio(&c__1, (char *)&jr, (ftnlen)sizeof(integer));
			do_fio(&c__1, (char *)&result[jr], (ftnlen)sizeof(
				doublereal));
			e_wsfe();
		    } else {
			io___61.ciunit = *nounit;
			s_wsfe(&io___61);
			do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
			do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer))
				;
			do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(
				integer));
			do_fio(&c__1, (char *)&jr, (ftnlen)sizeof(integer));
			do_fio(&c__1, (char *)&result[jr], (ftnlen)sizeof(
				doublereal));
			e_wsfe();
		    }
		}
/* L170: */
	    }

L180:
	    ;
	}
/* L190: */
    }

/*     Summary */

    alasvm_("DGS", nounit, &nerrs, &ntestt, &c__0);

    work[1] = (doublereal) maxwrk;

    return 0;








/*     End of DDRGES */

} /* ddrges_ */
/* Subroutine */ int ztrsna_(char *job, char *howmny, logical *select, 
	integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl, 
	integer *ldvl, doublecomplex *vr, integer *ldvr, doublereal *s, 
	doublereal *sep, integer *mm, integer *m, doublecomplex *work, 
	integer *ldwork, doublereal *rwork, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    ZTRSNA estimates reciprocal condition numbers for specified   
    eigenvalues and/or right eigenvectors of a complex upper triangular   
    matrix T (or of any matrix Q*T*Q**H with Q unitary).   

    Arguments   
    =========   

    JOB     (input) CHARACTER*1   
            Specifies whether condition numbers are required for   
            eigenvalues (S) or eigenvectors (SEP):   
            = 'E': for eigenvalues only (S);   
            = 'V': for eigenvectors only (SEP);   
            = 'B': for both eigenvalues and eigenvectors (S and SEP).   

    HOWMNY  (input) CHARACTER*1   
            = 'A': compute condition numbers for all eigenpairs;   
            = 'S': compute condition numbers for selected eigenpairs   
                   specified by the array SELECT.   

    SELECT  (input) LOGICAL array, dimension (N)   
            If HOWMNY = 'S', SELECT specifies the eigenpairs for which   
            condition numbers are required. To select condition numbers   
            for the j-th eigenpair, SELECT(j) must be set to .TRUE..   
            If HOWMNY = 'A', SELECT is not referenced.   

    N       (input) INTEGER   
            The order of the matrix T. N >= 0.   

    T       (input) COMPLEX*16 array, dimension (LDT,N)   
            The upper triangular matrix T.   

    LDT     (input) INTEGER   
            The leading dimension of the array T. LDT >= max(1,N).   

    VL      (input) COMPLEX*16 array, dimension (LDVL,M)   
            If JOB = 'E' or 'B', VL must contain left eigenvectors of T   
            (or of any Q*T*Q**H with Q unitary), corresponding to the   
            eigenpairs specified by HOWMNY and SELECT. The eigenvectors   
            must be stored in consecutive columns of VL, as returned by   
            ZHSEIN or ZTREVC.   
            If JOB = 'V', VL is not referenced.   

    LDVL    (input) INTEGER   
            The leading dimension of the array VL.   
            LDVL >= 1; and if JOB = 'E' or 'B', LDVL >= N.   

    VR      (input) COMPLEX*16 array, dimension (LDVR,M)   
            If JOB = 'E' or 'B', VR must contain right eigenvectors of T   
            (or of any Q*T*Q**H with Q unitary), corresponding to the   
            eigenpairs specified by HOWMNY and SELECT. The eigenvectors   
            must be stored in consecutive columns of VR, as returned by   
            ZHSEIN or ZTREVC.   
            If JOB = 'V', VR is not referenced.   

    LDVR    (input) INTEGER   
            The leading dimension of the array VR.   
            LDVR >= 1; and if JOB = 'E' or 'B', LDVR >= N.   

    S       (output) DOUBLE PRECISION array, dimension (MM)   
            If JOB = 'E' or 'B', the reciprocal condition numbers of the   
            selected eigenvalues, stored in consecutive elements of the   
            array. Thus S(j), SEP(j), and the j-th columns of VL and VR   
            all correspond to the same eigenpair (but not in general the   
            j-th eigenpair, unless all eigenpairs are selected).   
            If JOB = 'V', S is not referenced.   

    SEP     (output) DOUBLE PRECISION array, dimension (MM)   
            If JOB = 'V' or 'B', the estimated reciprocal condition   
            numbers of the selected eigenvectors, stored in consecutive   
            elements of the array.   
            If JOB = 'E', SEP is not referenced.   

    MM      (input) INTEGER   
            The number of elements in the arrays S (if JOB = 'E' or 'B')   
             and/or SEP (if JOB = 'V' or 'B'). MM >= M.   

    M       (output) INTEGER   
            The number of elements of the arrays S and/or SEP actually   
            used to store the estimated condition numbers.   
            If HOWMNY = 'A', M is set to N.   

    WORK    (workspace) COMPLEX*16 array, dimension (LDWORK,N+1)   
            If JOB = 'E', WORK is not referenced.   

    LDWORK  (input) INTEGER   
            The leading dimension of the array WORK.   
            LDWORK >= 1; and if JOB = 'V' or 'B', LDWORK >= N.   

    RWORK   (workspace) DOUBLE PRECISION array, dimension (N)   
            If JOB = 'E', RWORK is not referenced.   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -i, the i-th argument had an illegal value   

    Further Details   
    ===============   

    The reciprocal of the condition number of an eigenvalue lambda is   
    defined as   

            S(lambda) = |v'*u| / (norm(u)*norm(v))   

    where u and v are the right and left eigenvectors of T corresponding   
    to lambda; v' denotes the conjugate transpose of v, and norm(u)   
    denotes the Euclidean norm. These reciprocal condition numbers always   
    lie between zero (very badly conditioned) and one (very well   
    conditioned). If n = 1, S(lambda) is defined to be 1.   

    An approximate error bound for a computed eigenvalue W(i) is given by   

                        EPS * norm(T) / S(i)   

    where EPS is the machine precision.   

    The reciprocal of the condition number of the right eigenvector u   
    corresponding to lambda is defined as follows. Suppose   

                T = ( lambda  c  )   
                    (   0    T22 )   

    Then the reciprocal condition number is   

            SEP( lambda, T22 ) = sigma-min( T22 - lambda*I )   

    where sigma-min denotes the smallest singular value. We approximate   
    the smallest singular value by the reciprocal of an estimate of the   
    one-norm of the inverse of T22 - lambda*I. If n = 1, SEP(1) is   
    defined to be abs(T(1,1)).   

    An approximate error bound for a computed right eigenvector VR(i)   
    is given by   

                        EPS * norm(T) / SEP(i)   

    =====================================================================   


       Decode and test the input parameters   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    
    /* System generated locals */
    integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, 
	    work_dim1, work_offset, i__1, i__2, i__3, i__4, i__5;
    doublereal d__1, d__2;
    doublecomplex z__1;
    /* Builtin functions */
    double z_abs(doublecomplex *), d_imag(doublecomplex *);
    /* Local variables */
    static integer kase, ierr;
    static doublecomplex prod;
    static doublereal lnrm, rnrm;
    static integer i__, j, k;
    static doublereal scale;
    extern logical lsame_(char *, char *);
    extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, 
	    doublecomplex *, integer *, doublecomplex *, integer *);
    static doublecomplex dummy[1];
    static logical wants;
    static doublereal xnorm;
    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
    extern doublereal dznrm2_(integer *, doublecomplex *, integer *), dlamch_(
	    char *);
    static integer ks, ix;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static doublereal bignum;
    static logical wantbh;
    extern /* Subroutine */ int zlacon_(integer *, doublecomplex *, 
	    doublecomplex *, doublereal *, integer *);
    extern integer izamax_(integer *, doublecomplex *, integer *);
    static logical somcon;
    extern /* Subroutine */ int zdrscl_(integer *, doublereal *, 
	    doublecomplex *, integer *);
    static char normin[1];
    extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, 
	    doublecomplex *, integer *, doublecomplex *, integer *);
    static doublereal smlnum;
    static logical wantsp;
    extern /* Subroutine */ int zlatrs_(char *, char *, char *, char *, 
	    integer *, doublecomplex *, integer *, doublecomplex *, 
	    doublereal *, doublereal *, integer *), ztrexc_(char *, integer *, doublecomplex *, integer *, 
	    doublecomplex *, integer *, integer *, integer *, integer *);
    static doublereal eps, est;
#define work_subscr(a_1,a_2) (a_2)*work_dim1 + a_1
#define work_ref(a_1,a_2) work[work_subscr(a_1,a_2)]
#define t_subscr(a_1,a_2) (a_2)*t_dim1 + a_1
#define t_ref(a_1,a_2) t[t_subscr(a_1,a_2)]
#define vl_subscr(a_1,a_2) (a_2)*vl_dim1 + a_1
#define vl_ref(a_1,a_2) vl[vl_subscr(a_1,a_2)]
#define vr_subscr(a_1,a_2) (a_2)*vr_dim1 + a_1
#define vr_ref(a_1,a_2) vr[vr_subscr(a_1,a_2)]


    --select;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    vl_dim1 = *ldvl;
    vl_offset = 1 + vl_dim1 * 1;
    vl -= vl_offset;
    vr_dim1 = *ldvr;
    vr_offset = 1 + vr_dim1 * 1;
    vr -= vr_offset;
    --s;
    --sep;
    work_dim1 = *ldwork;
    work_offset = 1 + work_dim1 * 1;
    work -= work_offset;
    --rwork;

    /* Function Body */
    wantbh = lsame_(job, "B");
    wants = lsame_(job, "E") || wantbh;
    wantsp = lsame_(job, "V") || wantbh;

    somcon = lsame_(howmny, "S");

/*     Set M to the number of eigenpairs for which condition numbers are   
       to be computed. */

    if (somcon) {
	*m = 0;
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    if (select[j]) {
		++(*m);
	    }
/* L10: */
	}
    } else {
	*m = *n;
    }

    *info = 0;
    if (! wants && ! wantsp) {
	*info = -1;
    } else if (! lsame_(howmny, "A") && ! somcon) {
	*info = -2;
    } else if (*n < 0) {
	*info = -4;
    } else if (*ldt < max(1,*n)) {
	*info = -6;
    } else if (*ldvl < 1 || wants && *ldvl < *n) {
	*info = -8;
    } else if (*ldvr < 1 || wants && *ldvr < *n) {
	*info = -10;
    } else if (*mm < *m) {
	*info = -13;
    } else if (*ldwork < 1 || wantsp && *ldwork < *n) {
	*info = -16;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("ZTRSNA", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

    if (*n == 1) {
	if (somcon) {
	    if (! select[1]) {
		return 0;
	    }
	}
	if (wants) {
	    s[1] = 1.;
	}
	if (wantsp) {
	    sep[1] = z_abs(&t_ref(1, 1));
	}
	return 0;
    }

/*     Get machine constants */

    eps = dlamch_("P");
    smlnum = dlamch_("S") / eps;
    bignum = 1. / smlnum;
    dlabad_(&smlnum, &bignum);

    ks = 1;
    i__1 = *n;
    for (k = 1; k <= i__1; ++k) {

	if (somcon) {
	    if (! select[k]) {
		goto L50;
	    }
	}

	if (wants) {

/*           Compute the reciprocal condition number of the k-th   
             eigenvalue. */

	    zdotc_(&z__1, n, &vr_ref(1, ks), &c__1, &vl_ref(1, ks), &c__1);
	    prod.r = z__1.r, prod.i = z__1.i;
	    rnrm = dznrm2_(n, &vr_ref(1, ks), &c__1);
	    lnrm = dznrm2_(n, &vl_ref(1, ks), &c__1);
	    s[ks] = z_abs(&prod) / (rnrm * lnrm);

	}

	if (wantsp) {

/*           Estimate the reciprocal condition number of the k-th   
             eigenvector.   

             Copy the matrix T to the array WORK and swap the k-th   
             diagonal element to the (1,1) position. */

	    zlacpy_("Full", n, n, &t[t_offset], ldt, &work[work_offset], 
		    ldwork);
	    ztrexc_("No Q", n, &work[work_offset], ldwork, dummy, &c__1, &k, &
		    c__1, &ierr);

/*           Form  C = T22 - lambda*I in WORK(2:N,2:N). */

	    i__2 = *n;
	    for (i__ = 2; i__ <= i__2; ++i__) {
		i__3 = work_subscr(i__, i__);
		i__4 = work_subscr(i__, i__);
		i__5 = work_subscr(1, 1);
		z__1.r = work[i__4].r - work[i__5].r, z__1.i = work[i__4].i - 
			work[i__5].i;
		work[i__3].r = z__1.r, work[i__3].i = z__1.i;
/* L20: */
	    }

/*           Estimate a lower bound for the 1-norm of inv(C'). The 1st   
             and (N+1)th columns of WORK are used to store work vectors. */

	    sep[ks] = 0.;
	    est = 0.;
	    kase = 0;
	    *(unsigned char *)normin = 'N';
L30:
	    i__2 = *n - 1;
	    zlacon_(&i__2, &work_ref(1, *n + 1), &work[work_offset], &est, &
		    kase);

	    if (kase != 0) {
		if (kase == 1) {

/*                 Solve C'*x = scale*b */

		    i__2 = *n - 1;
		    zlatrs_("Upper", "Conjugate transpose", "Nonunit", normin,
			     &i__2, &work_ref(2, 2), ldwork, &work[
			    work_offset], &scale, &rwork[1], &ierr);
		} else {

/*                 Solve C*x = scale*b */

		    i__2 = *n - 1;
		    zlatrs_("Upper", "No transpose", "Nonunit", normin, &i__2,
			     &work_ref(2, 2), ldwork, &work[work_offset], &
			    scale, &rwork[1], &ierr);
		}
		*(unsigned char *)normin = 'Y';
		if (scale != 1.) {

/*                 Multiply by 1/SCALE if doing so will not cause   
                   overflow. */

		    i__2 = *n - 1;
		    ix = izamax_(&i__2, &work[work_offset], &c__1);
		    i__2 = work_subscr(ix, 1);
		    xnorm = (d__1 = work[i__2].r, abs(d__1)) + (d__2 = d_imag(
			    &work_ref(ix, 1)), abs(d__2));
		    if (scale < xnorm * smlnum || scale == 0.) {
			goto L40;
		    }
		    zdrscl_(n, &scale, &work[work_offset], &c__1);
		}
		goto L30;
	    }

	    sep[ks] = 1. / max(est,smlnum);
	}

L40:
	++ks;
L50:
	;
    }
    return 0;

/*     End of ZTRSNA */

} /* ztrsna_ */
Exemple #16
0
/* Subroutine */ int dlaqtr_(logical *ltran, logical *lreal, integer *n, 
	doublereal *t, integer *ldt, doublereal *b, doublereal *w, doublereal 
	*scale, doublereal *x, doublereal *work, integer *info)
{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    DLAQTR solves the real quasi-triangular system   

                 op(T)*p = scale*c,               if LREAL = .TRUE.   

    or the complex quasi-triangular systems   

               op(T + iB)*(p+iq) = scale*(c+id),  if LREAL = .FALSE.   

    in real arithmetic, where T is upper quasi-triangular.   
    If LREAL = .FALSE., then the first diagonal block of T must be   
    1 by 1, B is the specially structured matrix   

                   B = [ b(1) b(2) ... b(n) ]   
                       [       w            ]   
                       [           w        ]   
                       [              .     ]   
                       [                 w  ]   

    op(A) = A or A', A' denotes the conjugate transpose of   
    matrix A.   

    On input, X = [ c ].  On output, X = [ p ].   
                  [ d ]                  [ q ]   

    This subroutine is designed for the condition number estimation   
    in routine DTRSNA.   

    Arguments   
    =========   

    LTRAN   (input) LOGICAL   
            On entry, LTRAN specifies the option of conjugate transpose:   
               = .FALSE.,    op(T+i*B) = T+i*B,   
               = .TRUE.,     op(T+i*B) = (T+i*B)'.   

    LREAL   (input) LOGICAL   
            On entry, LREAL specifies the input matrix structure:   
               = .FALSE.,    the input is complex   
               = .TRUE.,     the input is real   

    N       (input) INTEGER   
            On entry, N specifies the order of T+i*B. N >= 0.   

    T       (input) DOUBLE PRECISION array, dimension (LDT,N)   
            On entry, T contains a matrix in Schur canonical form.   
            If LREAL = .FALSE., then the first diagonal block of T mu   
            be 1 by 1.   

    LDT     (input) INTEGER   
            The leading dimension of the matrix T. LDT >= max(1,N).   

    B       (input) DOUBLE PRECISION array, dimension (N)   
            On entry, B contains the elements to form the matrix   
            B as described above.   
            If LREAL = .TRUE., B is not referenced.   

    W       (input) DOUBLE PRECISION   
            On entry, W is the diagonal element of the matrix B.   
            If LREAL = .TRUE., W is not referenced.   

    SCALE   (output) DOUBLE PRECISION   
            On exit, SCALE is the scale factor.   

    X       (input/output) DOUBLE PRECISION array, dimension (2*N)   
            On entry, X contains the right hand side of the system.   
            On exit, X is overwritten by the solution.   

    WORK    (workspace) DOUBLE PRECISION array, dimension (N)   

    INFO    (output) INTEGER   
            On exit, INFO is set to   
               0: successful exit.   
                 1: the some diagonal 1 by 1 block has been perturbed by   
                    a small number SMIN to keep nonsingularity.   
                 2: the some diagonal 2 by 2 block has been perturbed by   
                    a small number in DLALN2 to keep nonsingularity.   
            NOTE: In the interests of speed, this routine does not   
                  check the inputs for errors.   

   =====================================================================   


       Do not test the input parameters for errors   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static logical c_false = FALSE_;
    static integer c__2 = 2;
    static doublereal c_b21 = 1.;
    static doublereal c_b25 = 0.;
    static logical c_true = TRUE_;
    
    /* System generated locals */
    integer t_dim1, t_offset, i__1, i__2;
    doublereal d__1, d__2, d__3, d__4, d__5, d__6;
    /* Local variables */
    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, 
	    integer *);
    static integer ierr;
    static doublereal smin, xmax, d__[4]	/* was [2][2] */;
    static integer i__, j, k;
    static doublereal v[4]	/* was [2][2] */, z__;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
	    integer *);
    extern doublereal dasum_(integer *, doublereal *, integer *);
    extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, 
	    integer *, doublereal *, integer *);
    static integer jnext, j1, j2;
    static doublereal sminw;
    static integer n1, n2;
    static doublereal xnorm;
    extern /* Subroutine */ int dlaln2_(logical *, integer *, integer *, 
	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
	     doublereal *, doublereal *, integer *, doublereal *, doublereal *
	    , doublereal *, integer *, doublereal *, doublereal *, integer *);
    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
	    integer *, doublereal *, integer *, doublereal *);
    static doublereal si, xj;
    extern integer idamax_(integer *, doublereal *, integer *);
    static doublereal scaloc, sr;
    extern /* Subroutine */ int dladiv_(doublereal *, doublereal *, 
	    doublereal *, doublereal *, doublereal *, doublereal *);
    static doublereal bignum;
    static logical notran;
    static doublereal smlnum, rec, eps, tjj, tmp;
#define d___ref(a_1,a_2) d__[(a_2)*2 + a_1 - 3]
#define t_ref(a_1,a_2) t[(a_2)*t_dim1 + a_1]
#define v_ref(a_1,a_2) v[(a_2)*2 + a_1 - 3]


    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    --b;
    --x;
    --work;

    /* Function Body */
    notran = ! (*ltran);
    *info = 0;

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Set constants to control overflow */

    eps = dlamch_("P");
    smlnum = dlamch_("S") / eps;
    bignum = 1. / smlnum;

    xnorm = dlange_("M", n, n, &t[t_offset], ldt, d__);
    if (! (*lreal)) {
/* Computing MAX */
	d__1 = xnorm, d__2 = abs(*w), d__1 = max(d__1,d__2), d__2 = dlange_(
		"M", n, &c__1, &b[1], n, d__);
	xnorm = max(d__1,d__2);
    }
/* Computing MAX */
    d__1 = smlnum, d__2 = eps * xnorm;
    smin = max(d__1,d__2);

/*     Compute 1-norm of each column of strictly upper triangular   
       part of T to control overflow in triangular solver. */

    work[1] = 0.;
    i__1 = *n;
    for (j = 2; j <= i__1; ++j) {
	i__2 = j - 1;
	work[j] = dasum_(&i__2, &t_ref(1, j), &c__1);
/* L10: */
    }

    if (! (*lreal)) {
	i__1 = *n;
	for (i__ = 2; i__ <= i__1; ++i__) {
	    work[i__] += (d__1 = b[i__], abs(d__1));
/* L20: */
	}
    }

    n2 = *n << 1;
    n1 = *n;
    if (! (*lreal)) {
	n1 = n2;
    }
    k = idamax_(&n1, &x[1], &c__1);
    xmax = (d__1 = x[k], abs(d__1));
    *scale = 1.;

    if (xmax > bignum) {
	*scale = bignum / xmax;
	dscal_(&n1, scale, &x[1], &c__1);
	xmax = bignum;
    }

    if (*lreal) {

	if (notran) {

/*           Solve T*p = scale*c */

	    jnext = *n;
	    for (j = *n; j >= 1; --j) {
		if (j > jnext) {
		    goto L30;
		}
		j1 = j;
		j2 = j;
		jnext = j - 1;
		if (j > 1) {
		    if (t_ref(j, j - 1) != 0.) {
			j1 = j - 1;
			jnext = j - 2;
		    }
		}

		if (j1 == j2) {

/*                 Meet 1 by 1 diagonal block   

                   Scale to avoid overflow when computing   
                       x(j) = b(j)/T(j,j) */

		    xj = (d__1 = x[j1], abs(d__1));
		    tjj = (d__1 = t_ref(j1, j1), abs(d__1));
		    tmp = t_ref(j1, j1);
		    if (tjj < smin) {
			tmp = smin;
			tjj = smin;
			*info = 1;
		    }

		    if (xj == 0.) {
			goto L30;
		    }

		    if (tjj < 1.) {
			if (xj > bignum * tjj) {
			    rec = 1. / xj;
			    dscal_(n, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
		    }
		    x[j1] /= tmp;
		    xj = (d__1 = x[j1], abs(d__1));

/*                 Scale x if necessary to avoid overflow when adding a   
                   multiple of column j1 of T. */

		    if (xj > 1.) {
			rec = 1. / xj;
			if (work[j1] > (bignum - xmax) * rec) {
			    dscal_(n, &rec, &x[1], &c__1);
			    *scale *= rec;
			}
		    }
		    if (j1 > 1) {
			i__1 = j1 - 1;
			d__1 = -x[j1];
			daxpy_(&i__1, &d__1, &t_ref(1, j1), &c__1, &x[1], &
				c__1);
			i__1 = j1 - 1;
			k = idamax_(&i__1, &x[1], &c__1);
			xmax = (d__1 = x[k], abs(d__1));
		    }

		} else {

/*                 Meet 2 by 2 diagonal block   

                   Call 2 by 2 linear system solve, to take   
                   care of possible overflow by scaling factor. */

		    d___ref(1, 1) = x[j1];
		    d___ref(2, 1) = x[j2];
		    dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b21, &t_ref(j1, 
			    j1), ldt, &c_b21, &c_b21, d__, &c__2, &c_b25, &
			    c_b25, v, &c__2, &scaloc, &xnorm, &ierr);
		    if (ierr != 0) {
			*info = 2;
		    }

		    if (scaloc != 1.) {
			dscal_(n, &scaloc, &x[1], &c__1);
			*scale *= scaloc;
		    }
		    x[j1] = v_ref(1, 1);
		    x[j2] = v_ref(2, 1);

/*                 Scale V(1,1) (= X(J1)) and/or V(2,1) (=X(J2))   
                   to avoid overflow in updating right-hand side.   

   Computing MAX */
		    d__3 = (d__1 = v_ref(1, 1), abs(d__1)), d__4 = (d__2 = 
			    v_ref(2, 1), abs(d__2));
		    xj = max(d__3,d__4);
		    if (xj > 1.) {
			rec = 1. / xj;
/* Computing MAX */
			d__1 = work[j1], d__2 = work[j2];
			if (max(d__1,d__2) > (bignum - xmax) * rec) {
			    dscal_(n, &rec, &x[1], &c__1);
			    *scale *= rec;
			}
		    }

/*                 Update right-hand side */

		    if (j1 > 1) {
			i__1 = j1 - 1;
			d__1 = -x[j1];
			daxpy_(&i__1, &d__1, &t_ref(1, j1), &c__1, &x[1], &
				c__1);
			i__1 = j1 - 1;
			d__1 = -x[j2];
			daxpy_(&i__1, &d__1, &t_ref(1, j2), &c__1, &x[1], &
				c__1);
			i__1 = j1 - 1;
			k = idamax_(&i__1, &x[1], &c__1);
			xmax = (d__1 = x[k], abs(d__1));
		    }

		}

L30:
		;
	    }

	} else {

/*           Solve T'*p = scale*c */

	    jnext = 1;
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		if (j < jnext) {
		    goto L40;
		}
		j1 = j;
		j2 = j;
		jnext = j + 1;
		if (j < *n) {
		    if (t_ref(j + 1, j) != 0.) {
			j2 = j + 1;
			jnext = j + 2;
		    }
		}

		if (j1 == j2) {

/*                 1 by 1 diagonal block   

                   Scale if necessary to avoid overflow in forming the   
                   right-hand side element by inner product. */

		    xj = (d__1 = x[j1], abs(d__1));
		    if (xmax > 1.) {
			rec = 1. / xmax;
			if (work[j1] > (bignum - xj) * rec) {
			    dscal_(n, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
		    }

		    i__2 = j1 - 1;
		    x[j1] -= ddot_(&i__2, &t_ref(1, j1), &c__1, &x[1], &c__1);

		    xj = (d__1 = x[j1], abs(d__1));
		    tjj = (d__1 = t_ref(j1, j1), abs(d__1));
		    tmp = t_ref(j1, j1);
		    if (tjj < smin) {
			tmp = smin;
			tjj = smin;
			*info = 1;
		    }

		    if (tjj < 1.) {
			if (xj > bignum * tjj) {
			    rec = 1. / xj;
			    dscal_(n, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
		    }
		    x[j1] /= tmp;
/* Computing MAX */
		    d__2 = xmax, d__3 = (d__1 = x[j1], abs(d__1));
		    xmax = max(d__2,d__3);

		} else {

/*                 2 by 2 diagonal block   

                   Scale if necessary to avoid overflow in forming the   
                   right-hand side elements by inner product.   

   Computing MAX */
		    d__3 = (d__1 = x[j1], abs(d__1)), d__4 = (d__2 = x[j2], 
			    abs(d__2));
		    xj = max(d__3,d__4);
		    if (xmax > 1.) {
			rec = 1. / xmax;
/* Computing MAX */
			d__1 = work[j2], d__2 = work[j1];
			if (max(d__1,d__2) > (bignum - xj) * rec) {
			    dscal_(n, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
		    }

		    i__2 = j1 - 1;
		    d___ref(1, 1) = x[j1] - ddot_(&i__2, &t_ref(1, j1), &c__1,
			     &x[1], &c__1);
		    i__2 = j1 - 1;
		    d___ref(2, 1) = x[j2] - ddot_(&i__2, &t_ref(1, j2), &c__1,
			     &x[1], &c__1);

		    dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b21, &t_ref(j1, 
			    j1), ldt, &c_b21, &c_b21, d__, &c__2, &c_b25, &
			    c_b25, v, &c__2, &scaloc, &xnorm, &ierr);
		    if (ierr != 0) {
			*info = 2;
		    }

		    if (scaloc != 1.) {
			dscal_(n, &scaloc, &x[1], &c__1);
			*scale *= scaloc;
		    }
		    x[j1] = v_ref(1, 1);
		    x[j2] = v_ref(2, 1);
/* Computing MAX */
		    d__3 = (d__1 = x[j1], abs(d__1)), d__4 = (d__2 = x[j2], 
			    abs(d__2)), d__3 = max(d__3,d__4);
		    xmax = max(d__3,xmax);

		}
L40:
		;
	    }
	}

    } else {

/* Computing MAX */
	d__1 = eps * abs(*w);
	sminw = max(d__1,smin);
	if (notran) {

/*           Solve (T + iB)*(p+iq) = c+id */

	    jnext = *n;
	    for (j = *n; j >= 1; --j) {
		if (j > jnext) {
		    goto L70;
		}
		j1 = j;
		j2 = j;
		jnext = j - 1;
		if (j > 1) {
		    if (t_ref(j, j - 1) != 0.) {
			j1 = j - 1;
			jnext = j - 2;
		    }
		}

		if (j1 == j2) {

/*                 1 by 1 diagonal block   

                   Scale if necessary to avoid overflow in division */

		    z__ = *w;
		    if (j1 == 1) {
			z__ = b[1];
		    }
		    xj = (d__1 = x[j1], abs(d__1)) + (d__2 = x[*n + j1], abs(
			    d__2));
		    tjj = (d__1 = t_ref(j1, j1), abs(d__1)) + abs(z__);
		    tmp = t_ref(j1, j1);
		    if (tjj < sminw) {
			tmp = sminw;
			tjj = sminw;
			*info = 1;
		    }

		    if (xj == 0.) {
			goto L70;
		    }

		    if (tjj < 1.) {
			if (xj > bignum * tjj) {
			    rec = 1. / xj;
			    dscal_(&n2, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
		    }
		    dladiv_(&x[j1], &x[*n + j1], &tmp, &z__, &sr, &si);
		    x[j1] = sr;
		    x[*n + j1] = si;
		    xj = (d__1 = x[j1], abs(d__1)) + (d__2 = x[*n + j1], abs(
			    d__2));

/*                 Scale x if necessary to avoid overflow when adding a   
                   multiple of column j1 of T. */

		    if (xj > 1.) {
			rec = 1. / xj;
			if (work[j1] > (bignum - xmax) * rec) {
			    dscal_(&n2, &rec, &x[1], &c__1);
			    *scale *= rec;
			}
		    }

		    if (j1 > 1) {
			i__1 = j1 - 1;
			d__1 = -x[j1];
			daxpy_(&i__1, &d__1, &t_ref(1, j1), &c__1, &x[1], &
				c__1);
			i__1 = j1 - 1;
			d__1 = -x[*n + j1];
			daxpy_(&i__1, &d__1, &t_ref(1, j1), &c__1, &x[*n + 1],
				 &c__1);

			x[1] += b[j1] * x[*n + j1];
			x[*n + 1] -= b[j1] * x[j1];

			xmax = 0.;
			i__1 = j1 - 1;
			for (k = 1; k <= i__1; ++k) {
/* Computing MAX */
			    d__3 = xmax, d__4 = (d__1 = x[k], abs(d__1)) + (
				    d__2 = x[k + *n], abs(d__2));
			    xmax = max(d__3,d__4);
/* L50: */
			}
		    }

		} else {

/*                 Meet 2 by 2 diagonal block */

		    d___ref(1, 1) = x[j1];
		    d___ref(2, 1) = x[j2];
		    d___ref(1, 2) = x[*n + j1];
		    d___ref(2, 2) = x[*n + j2];
		    d__1 = -(*w);
		    dlaln2_(&c_false, &c__2, &c__2, &sminw, &c_b21, &t_ref(j1,
			     j1), ldt, &c_b21, &c_b21, d__, &c__2, &c_b25, &
			    d__1, v, &c__2, &scaloc, &xnorm, &ierr);
		    if (ierr != 0) {
			*info = 2;
		    }

		    if (scaloc != 1.) {
			i__1 = *n << 1;
			dscal_(&i__1, &scaloc, &x[1], &c__1);
			*scale = scaloc * *scale;
		    }
		    x[j1] = v_ref(1, 1);
		    x[j2] = v_ref(2, 1);
		    x[*n + j1] = v_ref(1, 2);
		    x[*n + j2] = v_ref(2, 2);

/*                 Scale X(J1), .... to avoid overflow in   
                   updating right hand side.   

   Computing MAX */
		    d__5 = (d__1 = v_ref(1, 1), abs(d__1)) + (d__2 = v_ref(1, 
			    2), abs(d__2)), d__6 = (d__3 = v_ref(2, 1), abs(
			    d__3)) + (d__4 = v_ref(2, 2), abs(d__4));
		    xj = max(d__5,d__6);
		    if (xj > 1.) {
			rec = 1. / xj;
/* Computing MAX */
			d__1 = work[j1], d__2 = work[j2];
			if (max(d__1,d__2) > (bignum - xmax) * rec) {
			    dscal_(&n2, &rec, &x[1], &c__1);
			    *scale *= rec;
			}
		    }

/*                 Update the right-hand side. */

		    if (j1 > 1) {
			i__1 = j1 - 1;
			d__1 = -x[j1];
			daxpy_(&i__1, &d__1, &t_ref(1, j1), &c__1, &x[1], &
				c__1);
			i__1 = j1 - 1;
			d__1 = -x[j2];
			daxpy_(&i__1, &d__1, &t_ref(1, j2), &c__1, &x[1], &
				c__1);

			i__1 = j1 - 1;
			d__1 = -x[*n + j1];
			daxpy_(&i__1, &d__1, &t_ref(1, j1), &c__1, &x[*n + 1],
				 &c__1);
			i__1 = j1 - 1;
			d__1 = -x[*n + j2];
			daxpy_(&i__1, &d__1, &t_ref(1, j2), &c__1, &x[*n + 1],
				 &c__1);

			x[1] = x[1] + b[j1] * x[*n + j1] + b[j2] * x[*n + j2];
			x[*n + 1] = x[*n + 1] - b[j1] * x[j1] - b[j2] * x[j2];

			xmax = 0.;
			i__1 = j1 - 1;
			for (k = 1; k <= i__1; ++k) {
/* Computing MAX */
			    d__3 = (d__1 = x[k], abs(d__1)) + (d__2 = x[k + *
				    n], abs(d__2));
			    xmax = max(d__3,xmax);
/* L60: */
			}
		    }

		}
L70:
		;
	    }

	} else {

/*           Solve (T + iB)'*(p+iq) = c+id */

	    jnext = 1;
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		if (j < jnext) {
		    goto L80;
		}
		j1 = j;
		j2 = j;
		jnext = j + 1;
		if (j < *n) {
		    if (t_ref(j + 1, j) != 0.) {
			j2 = j + 1;
			jnext = j + 2;
		    }
		}

		if (j1 == j2) {

/*                 1 by 1 diagonal block   

                   Scale if necessary to avoid overflow in forming the   
                   right-hand side element by inner product. */

		    xj = (d__1 = x[j1], abs(d__1)) + (d__2 = x[j1 + *n], abs(
			    d__2));
		    if (xmax > 1.) {
			rec = 1. / xmax;
			if (work[j1] > (bignum - xj) * rec) {
			    dscal_(&n2, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
		    }

		    i__2 = j1 - 1;
		    x[j1] -= ddot_(&i__2, &t_ref(1, j1), &c__1, &x[1], &c__1);
		    i__2 = j1 - 1;
		    x[*n + j1] -= ddot_(&i__2, &t_ref(1, j1), &c__1, &x[*n + 
			    1], &c__1);
		    if (j1 > 1) {
			x[j1] -= b[j1] * x[*n + 1];
			x[*n + j1] += b[j1] * x[1];
		    }
		    xj = (d__1 = x[j1], abs(d__1)) + (d__2 = x[j1 + *n], abs(
			    d__2));

		    z__ = *w;
		    if (j1 == 1) {
			z__ = b[1];
		    }

/*                 Scale if necessary to avoid overflow in   
                   complex division */

		    tjj = (d__1 = t_ref(j1, j1), abs(d__1)) + abs(z__);
		    tmp = t_ref(j1, j1);
		    if (tjj < sminw) {
			tmp = sminw;
			tjj = sminw;
			*info = 1;
		    }

		    if (tjj < 1.) {
			if (xj > bignum * tjj) {
			    rec = 1. / xj;
			    dscal_(&n2, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
		    }
		    d__1 = -z__;
		    dladiv_(&x[j1], &x[*n + j1], &tmp, &d__1, &sr, &si);
		    x[j1] = sr;
		    x[j1 + *n] = si;
/* Computing MAX */
		    d__3 = (d__1 = x[j1], abs(d__1)) + (d__2 = x[j1 + *n], 
			    abs(d__2));
		    xmax = max(d__3,xmax);

		} else {

/*                 2 by 2 diagonal block   

                   Scale if necessary to avoid overflow in forming the   
                   right-hand side element by inner product.   

   Computing MAX */
		    d__5 = (d__1 = x[j1], abs(d__1)) + (d__2 = x[*n + j1], 
			    abs(d__2)), d__6 = (d__3 = x[j2], abs(d__3)) + (
			    d__4 = x[*n + j2], abs(d__4));
		    xj = max(d__5,d__6);
		    if (xmax > 1.) {
			rec = 1. / xmax;
/* Computing MAX */
			d__1 = work[j1], d__2 = work[j2];
			if (max(d__1,d__2) > (bignum - xj) / xmax) {
			    dscal_(&n2, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
		    }

		    i__2 = j1 - 1;
		    d___ref(1, 1) = x[j1] - ddot_(&i__2, &t_ref(1, j1), &c__1,
			     &x[1], &c__1);
		    i__2 = j1 - 1;
		    d___ref(2, 1) = x[j2] - ddot_(&i__2, &t_ref(1, j2), &c__1,
			     &x[1], &c__1);
		    i__2 = j1 - 1;
		    d___ref(1, 2) = x[*n + j1] - ddot_(&i__2, &t_ref(1, j1), &
			    c__1, &x[*n + 1], &c__1);
		    i__2 = j1 - 1;
		    d___ref(2, 2) = x[*n + j2] - ddot_(&i__2, &t_ref(1, j2), &
			    c__1, &x[*n + 1], &c__1);
		    d___ref(1, 1) = d___ref(1, 1) - b[j1] * x[*n + 1];
		    d___ref(2, 1) = d___ref(2, 1) - b[j2] * x[*n + 1];
		    d___ref(1, 2) = d___ref(1, 2) + b[j1] * x[1];
		    d___ref(2, 2) = d___ref(2, 2) + b[j2] * x[1];

		    dlaln2_(&c_true, &c__2, &c__2, &sminw, &c_b21, &t_ref(j1, 
			    j1), ldt, &c_b21, &c_b21, d__, &c__2, &c_b25, w, 
			    v, &c__2, &scaloc, &xnorm, &ierr);
		    if (ierr != 0) {
			*info = 2;
		    }

		    if (scaloc != 1.) {
			dscal_(&n2, &scaloc, &x[1], &c__1);
			*scale = scaloc * *scale;
		    }
		    x[j1] = v_ref(1, 1);
		    x[j2] = v_ref(2, 1);
		    x[*n + j1] = v_ref(1, 2);
		    x[*n + j2] = v_ref(2, 2);
/* Computing MAX */
		    d__5 = (d__1 = x[j1], abs(d__1)) + (d__2 = x[*n + j1], 
			    abs(d__2)), d__6 = (d__3 = x[j2], abs(d__3)) + (
			    d__4 = x[*n + j2], abs(d__4)), d__5 = max(d__5,
			    d__6);
		    xmax = max(d__5,xmax);

		}

L80:
		;
	    }

	}

    }

    return 0;

/*     End of DLAQTR */

} /* dlaqtr_ */
Exemple #17
0
/* Subroutine */ int cdrvgg_(integer *nsizes, integer *nn, integer *ntypes, 
	logical *dotype, integer *iseed, real *thresh, real *thrshn, integer *
	nounit, complex *a, integer *lda, complex *b, complex *s, complex *t, 
	complex *s2, complex *t2, complex *q, integer *ldq, complex *z__, 
	complex *alpha1, complex *beta1, complex *alpha2, complex *beta2, 
	complex *vl, complex *vr, complex *work, integer *lwork, real *rwork, 
	real *result, integer *info)
{
    /* Initialized data */

    static integer kclass[26] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,
	    2,2,2,3 };
    static integer kbmagn[26] = { 1,1,1,1,1,1,1,1,3,2,3,2,2,3,1,1,1,1,1,1,1,3,
	    2,3,2,1 };
    static integer ktrian[26] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,
	    1,1,1,1 };
    static logical lasign[26] = { FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,
	    TRUE_,FALSE_,TRUE_,TRUE_,FALSE_,FALSE_,TRUE_,TRUE_,TRUE_,FALSE_,
	    TRUE_,FALSE_,FALSE_,FALSE_,TRUE_,TRUE_,TRUE_,TRUE_,TRUE_,FALSE_ };
    static logical lbsign[26] = { FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,
	    FALSE_,TRUE_,FALSE_,FALSE_,TRUE_,TRUE_,FALSE_,FALSE_,TRUE_,FALSE_,
	    TRUE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,FALSE_,
	    FALSE_ };
    static integer kz1[6] = { 0,1,2,1,3,3 };
    static integer kz2[6] = { 0,0,1,2,1,1 };
    static integer kadd[6] = { 0,0,0,0,3,2 };
    static integer katype[26] = { 0,1,0,1,2,3,4,1,4,4,1,1,4,4,4,2,4,5,8,7,9,4,
	    4,4,4,0 };
    static integer kbtype[26] = { 0,0,1,1,2,-3,1,4,1,1,4,4,1,1,-4,2,-4,8,8,8,
	    8,8,8,8,8,0 };
    static integer kazero[26] = { 1,1,1,1,1,1,2,1,2,2,1,1,2,2,3,1,3,5,5,5,5,3,
	    3,3,3,1 };
    static integer kbzero[26] = { 1,1,1,1,1,1,1,2,1,1,2,2,1,1,4,1,4,6,6,6,6,4,
	    4,4,4,1 };
    static integer kamagn[26] = { 1,1,1,1,1,1,1,1,2,3,2,3,2,3,1,1,1,1,1,1,1,2,
	    3,3,2,1 };

    /* Format strings */
    static char fmt_9999[] = "(\002 CDRVGG: \002,a,\002 returned INFO=\002,i"
	    "6,\002.\002,/9x,\002N=\002,i6,\002, JTYPE=\002,i6,\002, ISEED="
	    "(\002,3(i5,\002,\002),i5,\002)\002)";
    static char fmt_9998[] = "(\002 CDRVGG: \002,a,\002 Eigenvectors from"
	    " \002,a,\002 incorrectly \002,\002normalized.\002,/\002 Bits of "
	    "error=\002,0p,g10.3,\002,\002,9x,\002N=\002,i6,\002, JTYPE=\002,"
	    "i6,\002, ISEED=(\002,3(i5,\002,\002),i5,\002)\002)";
    static char fmt_9997[] = "(/1x,a3,\002 -- Complex Generalized eigenvalue"
	    " problem driver\002)";
    static char fmt_9996[] = "(\002 Matrix types (see CDRVGG for details):"
	    " \002)";
    static char fmt_9995[] = "(\002 Special Matrices:\002,23x,\002(J'=transp"
	    "osed Jordan block)\002,/\002   1=(0,0)  2=(I,0)  3=(0,I)  4=(I,I"
	    ")  5=(J',J')  \002,\0026=(diag(J',I), diag(I,J'))\002,/\002 Diag"
	    "onal Matrices:  ( \002,\002D=diag(0,1,2,...) )\002,/\002   7=(D,"
	    "I)   9=(large*D, small*I\002,\002)  11=(large*I, small*D)  13=(l"
	    "arge*D, large*I)\002,/\002   8=(I,D)  10=(small*D, large*I)  12="
	    "(small*I, large*D) \002,\002 14=(small*D, small*I)\002,/\002  15"
	    "=(D, reversed D)\002)";
    static char fmt_9994[] = "(\002 Matrices Rotated by Random \002,a,\002 M"
	    "atrices U, V:\002,/\002  16=Transposed Jordan Blocks            "
	    " 19=geometric \002,\002alpha, beta=0,1\002,/\002  17=arithm. alp"
	    "ha&beta             \002,\002      20=arithmetic alpha, beta=0,"
	    "1\002,/\002  18=clustered \002,\002alpha, beta=0,1            21"
	    "=random alpha, beta=0,1\002,/\002 Large & Small Matrices:\002,"
	    "/\002  22=(large, small)   \002,\00223=(small,large)    24=(smal"
	    "l,small)    25=(large,large)\002,/\002  26=random O(1) matrices"
	    ".\002)";
    static char fmt_9993[] = "(/\002 Tests performed:  (S is Schur, T is tri"
	    "angular, \002,\002Q and Z are \002,a,\002,\002,/20x,\002l and r "
	    "are the appropriate left and right\002,/19x,\002eigenvectors, re"
	    "sp., a is alpha, b is beta, and\002,/19x,a,\002 means \002,a,"
	    "\002.)\002,/\002 1 = | A - Q S Z\002,a,\002 | / ( |A| n ulp )   "
	    "   2 = | B - Q T Z\002,a,\002 | / ( |B| n ulp )\002,/\002 3 = | "
	    "I - QQ\002,a,\002 | / ( n ulp )             4 = | I - ZZ\002,a"
	    ",\002 | / ( n ulp )\002,/\002 5 = difference between (alpha,beta"
	    ") and diagonals of\002,\002 (S,T)\002,/\002 6 = max | ( b A - a "
	    "B )\002,a,\002 l | / const.   7 = max | ( b A - a B ) r | / cons"
	    "t.\002,/1x)";
    static char fmt_9992[] = "(\002 Matrix order=\002,i5,\002, type=\002,i2"
	    ",\002, seed=\002,4(i4,\002,\002),\002 result \002,i3,\002 is\002"
	    ",0p,f8.2)";
    static char fmt_9991[] = "(\002 Matrix order=\002,i5,\002, type=\002,i2"
	    ",\002, seed=\002,4(i4,\002,\002),\002 result \002,i3,\002 is\002"
	    ",1p,e10.3)";

    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, s_dim1, 
	    s_offset, s2_dim1, s2_offset, t_dim1, t_offset, t2_dim1, 
	    t2_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, z_dim1, 
	    z_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9, 
	    i__10, i__11;
    real r__1, r__2, r__3, r__4, r__5, r__6, r__7, r__8, r__9, r__10, r__11, 
	    r__12, r__13, r__14, r__15, r__16;
    complex q__1, q__2, q__3, q__4;

    /* Builtin functions */
    double r_sign(real *, real *), c_abs(complex *);
    void r_cnjg(complex *, complex *);
    integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void);
    double r_imag(complex *);

    /* Local variables */
    static integer iadd, nmax;
    static real temp1, temp2;
    static integer j, n;
    static logical badnn;
    extern /* Subroutine */ int cgegs_(char *, char *, integer *, complex *, 
	    integer *, complex *, integer *, complex *, complex *, complex *, 
	    integer *, complex *, integer *, complex *, integer *, real *, 
	    integer *), cgegv_(char *, char *, integer *, 
	    complex *, integer *, complex *, integer *, complex *, complex *, 
	    complex *, integer *, complex *, integer *, complex *, integer *, 
	    real *, integer *), cget51_(integer *, integer *, 
	    complex *, integer *, complex *, integer *, complex *, integer *, 
	    complex *, integer *, complex *, real *, real *), cget52_(logical 
	    *, integer *, complex *, integer *, complex *, integer *, complex 
	    *, integer *, complex *, complex *, complex *, real *, real *);
    static real dumma[4];
    static integer iinfo;
    static real rmagn[4];
    static complex ctemp;
    static integer nmats, jsize, nerrs, i1, jtype, ntest, n1;
    extern /* Subroutine */ int clatm4_(integer *, integer *, integer *, 
	    integer *, logical *, real *, real *, real *, integer *, integer *
	    , complex *, integer *), cunm2r_(char *, char *, integer *, 
	    integer *, integer *, complex *, integer *, complex *, complex *, 
	    integer *, complex *, integer *);
    static integer jc, nb;
    extern /* Subroutine */ int slabad_(real *, real *);
    static integer in, jr;
    extern /* Subroutine */ int clarfg_(integer *, complex *, complex *, 
	    integer *, complex *);
    static integer ns;
    extern /* Complex */ VOID clarnd_(complex *, integer *, integer *);
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex 
	    *, integer *, complex *, integer *);
    static real safmin, safmax;
    static integer ioldsd[4];
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int alasvm_(char *, integer *, integer *, integer 
	    *, integer *), claset_(char *, integer *, integer *, 
	    complex *, complex *, complex *, integer *), xerbla_(char 
	    *, integer *);
    static real ulpinv;
    static integer lwkopt, mtypes, ntestt, nbz;
    static real ulp;

    /* Fortran I/O blocks */
    static cilist io___43 = { 0, 0, 0, fmt_9999, 0 };
    static cilist io___44 = { 0, 0, 0, fmt_9999, 0 };
    static cilist io___47 = { 0, 0, 0, fmt_9999, 0 };
    static cilist io___49 = { 0, 0, 0, fmt_9998, 0 };
    static cilist io___50 = { 0, 0, 0, fmt_9998, 0 };
    static cilist io___51 = { 0, 0, 0, fmt_9997, 0 };
    static cilist io___52 = { 0, 0, 0, fmt_9996, 0 };
    static cilist io___53 = { 0, 0, 0, fmt_9995, 0 };
    static cilist io___54 = { 0, 0, 0, fmt_9994, 0 };
    static cilist io___55 = { 0, 0, 0, fmt_9993, 0 };
    static cilist io___56 = { 0, 0, 0, fmt_9992, 0 };
    static cilist io___57 = { 0, 0, 0, fmt_9991, 0 };



#define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1
#define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)]
#define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1
#define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)]
#define q_subscr(a_1,a_2) (a_2)*q_dim1 + a_1
#define q_ref(a_1,a_2) q[q_subscr(a_1,a_2)]
#define s_subscr(a_1,a_2) (a_2)*s_dim1 + a_1
#define s_ref(a_1,a_2) s[s_subscr(a_1,a_2)]
#define t_subscr(a_1,a_2) (a_2)*t_dim1 + a_1
#define t_ref(a_1,a_2) t[t_subscr(a_1,a_2)]
#define z___subscr(a_1,a_2) (a_2)*z_dim1 + a_1
#define z___ref(a_1,a_2) z__[z___subscr(a_1,a_2)]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CDRVGG  checks the nonsymmetric generalized eigenvalue driver   
    routines.   
                                  T          T        T   
    CGEGS factors A and B as Q S Z  and Q T Z , where   means   
    transpose, T is upper triangular, S is in generalized Schur form   
    (upper triangular), and Q and Z are unitary.  It also   
    computes the generalized eigenvalues (alpha(1),beta(1)), ...,   
    (alpha(n),beta(n)), where alpha(j)=S(j,j) and beta(j)=T(j,j) --   
    thus, w(j) = alpha(j)/beta(j) is a root of the generalized   
    eigenvalue problem   

        det( A - w(j) B ) = 0   

    and m(j) = beta(j)/alpha(j) is a root of the essentially equivalent   
    problem   

        det( m(j) A - B ) = 0   

    CGEGV computes the generalized eigenvalues (alpha(1),beta(1)), ...,   
    (alpha(n),beta(n)), the matrix L whose columns contain the   
    generalized left eigenvectors l, and the matrix R whose columns   
    contain the generalized right eigenvectors r for the pair (A,B).   

    When CDRVGG is called, a number of matrix "sizes" ("n's") and a   
    number of matrix "types" are specified.  For each size ("n")   
    and each type of matrix, one matrix will be generated and used   
    to test the nonsymmetric eigenroutines.  For each matrix, 7   
    tests will be performed and compared with the threshhold THRESH:   

    Results from CGEGS:   

                     H   
    (1)   | A - Q S Z  | / ( |A| n ulp )   

                     H   
    (2)   | B - Q T Z  | / ( |B| n ulp )   

                  H   
    (3)   | I - QQ  | / ( n ulp )   

                  H   
    (4)   | I - ZZ  | / ( n ulp )   

    (5)   maximum over j of D(j)  where:   

                        |alpha(j) - S(j,j)|        |beta(j) - T(j,j)|   
              D(j) = ------------------------ + -----------------------   
                     max(|alpha(j)|,|S(j,j)|)   max(|beta(j)|,|T(j,j)|)   

    Results from CGEGV:   

    (6)   max over all left eigenvalue/-vector pairs (beta/alpha,l) of   

       | l**H * (beta A - alpha B) | / ( ulp max( |beta A|, |alpha B| ) )   

          where l**H is the conjugate tranpose of l.   

    (7)   max over all right eigenvalue/-vector pairs (beta/alpha,r) of   

          | (beta A - alpha B) r | / ( ulp max( |beta A|, |alpha B| ) )   

    Test Matrices   
    ---- --------   

    The sizes of the test matrices are specified by an array   
    NN(1:NSIZES); the value of each element NN(j) specifies one size.   
    The "types" are specified by a logical array DOTYPE( 1:NTYPES ); if   
    DOTYPE(j) is .TRUE., then matrix type "j" will be generated.   
    Currently, the list of possible types is:   

    (1)  ( 0, 0 )         (a pair of zero matrices)   

    (2)  ( I, 0 )         (an identity and a zero matrix)   

    (3)  ( 0, I )         (an identity and a zero matrix)   

    (4)  ( I, I )         (a pair of identity matrices)   

            t   t   
    (5)  ( J , J  )       (a pair of transposed Jordan blocks)   

                                        t                ( I   0  )   
    (6)  ( X, Y )         where  X = ( J   0  )  and Y = (      t )   
                                     ( 0   I  )          ( 0   J  )   
                          and I is a k x k identity and J a (k+1)x(k+1)   
                          Jordan block; k=(N-1)/2   

    (7)  ( D, I )         where D is diag( 0, 1,..., N-1 ) (a diagonal   
                          matrix with those diagonal entries.)   
    (8)  ( I, D )   

    (9)  ( big*D, small*I ) where "big" is near overflow and small=1/big   

    (10) ( small*D, big*I )   

    (11) ( big*I, small*D )   

    (12) ( small*I, big*D )   

    (13) ( big*D, big*I )   

    (14) ( small*D, small*I )   

    (15) ( D1, D2 )        where D1 is diag( 0, 0, 1, ..., N-3, 0 ) and   
                           D2 is diag( 0, N-3, N-4,..., 1, 0, 0 )   
              t   t   
    (16) Q ( J , J ) Z     where Q and Z are random unitary matrices.   

    (17) Q ( T1, T2 ) Z    where T1 and T2 are upper triangular matrices   
                           with random O(1) entries above the diagonal   
                           and diagonal entries diag(T1) =   
                           ( 0, 0, 1, ..., N-3, 0 ) and diag(T2) =   
                           ( 0, N-3, N-4,..., 1, 0, 0 )   

    (18) Q ( T1, T2 ) Z    diag(T1) = ( 0, 0, 1, 1, s, ..., s, 0 )   
                           diag(T2) = ( 0, 1, 0, 1,..., 1, 0 )   
                           s = machine precision.   

    (19) Q ( T1, T2 ) Z    diag(T1)=( 0,0,1,1, 1-d, ..., 1-(N-5)*d=s, 0 )   
                           diag(T2) = ( 0, 1, 0, 1, ..., 1, 0 )   

                                                           N-5   
    (20) Q ( T1, T2 ) Z    diag(T1)=( 0, 0, 1, 1, a, ..., a   =s, 0 )   
                           diag(T2) = ( 0, 1, 0, 1, ..., 1, 0, 0 )   

    (21) Q ( T1, T2 ) Z    diag(T1)=( 0, 0, 1, r1, r2, ..., r(N-4), 0 )   
                           diag(T2) = ( 0, 1, 0, 1, ..., 1, 0, 0 )   
                           where r1,..., r(N-4) are random.   

    (22) Q ( big*T1, small*T2 ) Z    diag(T1) = ( 0, 0, 1, ..., N-3, 0 )   
                                     diag(T2) = ( 0, 1, ..., 1, 0, 0 )   

    (23) Q ( small*T1, big*T2 ) Z    diag(T1) = ( 0, 0, 1, ..., N-3, 0 )   
                                     diag(T2) = ( 0, 1, ..., 1, 0, 0 )   

    (24) Q ( small*T1, small*T2 ) Z  diag(T1) = ( 0, 0, 1, ..., N-3, 0 )   
                                     diag(T2) = ( 0, 1, ..., 1, 0, 0 )   

    (25) Q ( big*T1, big*T2 ) Z      diag(T1) = ( 0, 0, 1, ..., N-3, 0 )   
                                     diag(T2) = ( 0, 1, ..., 1, 0, 0 )   

    (26) Q ( T1, T2 ) Z     where T1 and T2 are random upper-triangular   
                            matrices.   

    Arguments   
    =========   

    NSIZES  (input) INTEGER   
            The number of sizes of matrices to use.  If it is zero,   
            CDRVGG does nothing.  It must be at least zero.   

    NN      (input) INTEGER array, dimension (NSIZES)   
            An array containing the sizes to be used for the matrices.   
            Zero values will be skipped.  The values must be at least   
            zero.   

    NTYPES  (input) INTEGER   
            The number of elements in DOTYPE.   If it is zero, CDRVGG   
            does nothing.  It must be at least zero.  If it is MAXTYP+1   
            and NSIZES is 1, then an additional type, MAXTYP+1 is   
            defined, which is to use whatever matrix is in A.  This   
            is only useful if DOTYPE(1:MAXTYP) is .FALSE. and   
            DOTYPE(MAXTYP+1) is .TRUE. .   

    DOTYPE  (input) LOGICAL array, dimension (NTYPES)   
            If DOTYPE(j) is .TRUE., then for each size in NN a   
            matrix of that size and of type j will be generated.   
            If NTYPES is smaller than the maximum number of types   
            defined (PARAMETER MAXTYP), then types NTYPES+1 through   
            MAXTYP will not be generated.  If NTYPES is larger   
            than MAXTYP, DOTYPE(MAXTYP+1) through DOTYPE(NTYPES)   
            will be ignored.   

    ISEED   (input/output) INTEGER array, dimension (4)   
            On entry ISEED specifies the seed of the random number   
            generator. The array elements should be between 0 and 4095;   
            if not they will be reduced mod 4096.  Also, ISEED(4) must   
            be odd.  The random number generator uses a linear   
            congruential sequence limited to small integers, and so   
            should produce machine independent random numbers. The   
            values of ISEED are changed on exit, and can be used in the   
            next call to CDRVGG to continue the same random number   
            sequence.   

    THRESH  (input) REAL   
            A test will count as "failed" if the "error", computed as   
            described above, exceeds THRESH.  Note that the error is   
            scaled to be O(1), so THRESH should be a reasonably small   
            multiple of 1, e.g., 10 or 100.  In particular, it should   
            not depend on the precision (single vs. double) or the size   
            of the matrix.  It must be at least zero.   

    THRSHN  (input) REAL   
            Threshhold for reporting eigenvector normalization error.   
            If the normalization of any eigenvector differs from 1 by   
            more than THRSHN*ulp, then a special error message will be   
            printed.  (This is handled separately from the other tests,   
            since only a compiler or programming error should cause an   
            error message, at least if THRSHN is at least 5--10.)   

    NOUNIT  (input) INTEGER   
            The FORTRAN unit number for printing out error messages   
            (e.g., if a routine returns IINFO not equal to 0.)   

    A       (input/workspace) COMPLEX array, dimension (LDA, max(NN))   
            Used to hold the original A matrix.  Used as input only   
            if NTYPES=MAXTYP+1, DOTYPE(1:MAXTYP)=.FALSE., and   
            DOTYPE(MAXTYP+1)=.TRUE.   

    LDA     (input) INTEGER   
            The leading dimension of A, B, S, T, S2, and T2.   
            It must be at least 1 and at least max( NN ).   

    B       (input/workspace) COMPLEX array, dimension (LDA, max(NN))   
            Used to hold the original B matrix.  Used as input only   
            if NTYPES=MAXTYP+1, DOTYPE(1:MAXTYP)=.FALSE., and   
            DOTYPE(MAXTYP+1)=.TRUE.   

    S       (workspace) COMPLEX array, dimension (LDA, max(NN))   
            The upper triangular matrix computed from A by CGEGS.   

    T       (workspace) COMPLEX array, dimension (LDA, max(NN))   
            The upper triangular matrix computed from B by CGEGS.   

    S2      (workspace) COMPLEX array, dimension (LDA, max(NN))   
            The matrix computed from A by CGEGV.  This will be the   
            Schur (upper triangular) form of some matrix related to A,   
            but will not, in general, be the same as S.   

    T2      (workspace) COMPLEX array, dimension (LDA, max(NN))   
            The matrix computed from B by CGEGV.  This will be the   
            Schur form of some matrix related to B, but will not, in   
            general, be the same as T.   

    Q       (workspace) COMPLEX array, dimension (LDQ, max(NN))   
            The (left) unitary matrix computed by CGEGS.   

    LDQ     (input) INTEGER   
            The leading dimension of Q, Z, VL, and VR.  It must   
            be at least 1 and at least max( NN ).   

    Z       (workspace) COMPLEX array, dimension (LDQ, max(NN))   
            The (right) unitary matrix computed by CGEGS.   

    ALPHA1  (workspace) COMPLEX array, dimension (max(NN))   
    BETA1   (workspace) COMPLEX array, dimension (max(NN))   
            The generalized eigenvalues of (A,B) computed by CGEGS.   
            ALPHA1(k) / BETA1(k)  is the k-th generalized eigenvalue of   
            the matrices in A and B.   

    ALPHA2  (workspace) COMPLEX array, dimension (max(NN))   
    BETA2   (workspace) COMPLEX array, dimension (max(NN))   
            The generalized eigenvalues of (A,B) computed by CGEGV.   
            ALPHA2(k) / BETA2(k)  is the k-th generalized eigenvalue of   
            the matrices in A and B.   

    VL      (workspace) COMPLEX array, dimension (LDQ, max(NN))   
            The (lower triangular) left eigenvector matrix for the   
            matrices in A and B.   

    VR      (workspace) COMPLEX array, dimension (LDQ, max(NN))   
            The (upper triangular) right eigenvector matrix for the   
            matrices in A and B.   

    WORK    (workspace) COMPLEX array, dimension (LWORK)   

    LWORK   (input) INTEGER   
            The number of entries in WORK.  This must be at least   
            MAX( 2*N, N*(NB+1), (k+1)*(2*k+N+1) ), where "k" is the   
            sum of the blocksize and number-of-shifts for CHGEQZ, and   
            NB is the greatest of the blocksizes for CGEQRF, CUNMQR,   
            and CUNGQR.  (The blocksizes and the number-of-shifts are   
            retrieved through calls to ILAENV.)   

    RWORK   (workspace) REAL array, dimension (8*N)   

    RESULT  (output) REAL array, dimension (7)   
            The values computed by the tests described above.   
            The values are currently limited to 1/ulp, to avoid   
            overflow.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   
            > 0:  A routine returned an error code.  INFO is the   
                  absolute value of the INFO value returned.   

    =====================================================================   

       Parameter adjustments */
    --nn;
    --dotype;
    --iseed;
    t2_dim1 = *lda;
    t2_offset = 1 + t2_dim1 * 1;
    t2 -= t2_offset;
    s2_dim1 = *lda;
    s2_offset = 1 + s2_dim1 * 1;
    s2 -= s2_offset;
    t_dim1 = *lda;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    s_dim1 = *lda;
    s_offset = 1 + s_dim1 * 1;
    s -= s_offset;
    b_dim1 = *lda;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    vr_dim1 = *ldq;
    vr_offset = 1 + vr_dim1 * 1;
    vr -= vr_offset;
    vl_dim1 = *ldq;
    vl_offset = 1 + vl_dim1 * 1;
    vl -= vl_offset;
    z_dim1 = *ldq;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    --alpha1;
    --beta1;
    --alpha2;
    --beta2;
    --work;
    --rwork;
    --result;

    /* Function Body   

       Check for errors */

    *info = 0;

    badnn = FALSE_;
    nmax = 1;
    i__1 = *nsizes;
    for (j = 1; j <= i__1; ++j) {
/* Computing MAX */
	i__2 = nmax, i__3 = nn[j];
	nmax = max(i__2,i__3);
	if (nn[j] < 0) {
	    badnn = TRUE_;
	}
/* L10: */
    }

/*     Maximum blocksize and shift -- we assume that blocksize and number   
       of shifts are monotone increasing functions of N.   

   Computing MAX */
    i__1 = 1, i__2 = ilaenv_(&c__1, "CGEQRF", " ", &nmax, &nmax, &c_n1, &c_n1,
	     (ftnlen)6, (ftnlen)1), i__1 = max(i__1,i__2), i__2 = ilaenv_(&
	    c__1, "CUNMQR", "LC", &nmax, &nmax, &nmax, &c_n1, (ftnlen)6, (
	    ftnlen)2), i__1 = max(i__1,i__2), i__2 = ilaenv_(&c__1, "CUNGQR", 
	    " ", &nmax, &nmax, &nmax, &c_n1, (ftnlen)6, (ftnlen)1);
    nb = max(i__1,i__2);
    nbz = ilaenv_(&c__1, "CHGEQZ", "SII", &nmax, &c__1, &nmax, &c__0, (ftnlen)
	    6, (ftnlen)3);
    ns = ilaenv_(&c__4, "CHGEQZ", "SII", &nmax, &c__1, &nmax, &c__0, (ftnlen)
	    6, (ftnlen)3);
    i1 = nbz + ns;
/* Computing MAX */
    i__1 = nmax << 1, i__2 = nmax * (nb + 1), i__1 = max(i__1,i__2), i__2 = ((
	    i1 << 1) + nmax + 1) * (i1 + 1);
    lwkopt = max(i__1,i__2);

/*     Check for errors */

    if (*nsizes < 0) {
	*info = -1;
    } else if (badnn) {
	*info = -2;
    } else if (*ntypes < 0) {
	*info = -3;
    } else if (*thresh < 0.f) {
	*info = -6;
    } else if (*lda <= 1 || *lda < nmax) {
	*info = -10;
    } else if (*ldq <= 1 || *ldq < nmax) {
	*info = -19;
    } else if (lwkopt > *lwork) {
	*info = -30;
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CDRVGG", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*nsizes == 0 || *ntypes == 0) {
	return 0;
    }

    ulp = slamch_("Precision");
    safmin = slamch_("Safe minimum");
    safmin /= ulp;
    safmax = 1.f / safmin;
    slabad_(&safmin, &safmax);
    ulpinv = 1.f / ulp;

/*     The values RMAGN(2:3) depend on N, see below. */

    rmagn[0] = 0.f;
    rmagn[1] = 1.f;

/*     Loop over sizes, types */

    ntestt = 0;
    nerrs = 0;
    nmats = 0;

    i__1 = *nsizes;
    for (jsize = 1; jsize <= i__1; ++jsize) {
	n = nn[jsize];
	n1 = max(1,n);
	rmagn[2] = safmax * ulp / (real) n1;
	rmagn[3] = safmin * ulpinv * n1;

	if (*nsizes != 1) {
	    mtypes = min(26,*ntypes);
	} else {
	    mtypes = min(27,*ntypes);
	}

	i__2 = mtypes;
	for (jtype = 1; jtype <= i__2; ++jtype) {
	    if (! dotype[jtype]) {
		goto L150;
	    }
	    ++nmats;
	    ntest = 0;

/*           Save ISEED in case of an error. */

	    for (j = 1; j <= 4; ++j) {
		ioldsd[j - 1] = iseed[j];
/* L20: */
	    }

/*           Initialize RESULT */

	    for (j = 1; j <= 7; ++j) {
		result[j] = 0.f;
/* L30: */
	    }

/*           Compute A and B   

             Description of control parameters:   

             KCLASS: =1 means w/o rotation, =2 means w/ rotation,   
                     =3 means random.   
             KATYPE: the "type" to be passed to CLATM4 for computing A.   
             KAZERO: the pattern of zeros on the diagonal for A:   
                     =1: ( xxx ), =2: (0, xxx ) =3: ( 0, 0, xxx, 0 ),   
                     =4: ( 0, xxx, 0, 0 ), =5: ( 0, 0, 1, xxx, 0 ),   
                     =6: ( 0, 1, 0, xxx, 0 ).  (xxx means a string of   
                     non-zero entries.)   
             KAMAGN: the magnitude of the matrix: =0: zero, =1: O(1),   
                     =2: large, =3: small.   
             LASIGN: .TRUE. if the diagonal elements of A are to be   
                     multiplied by a random magnitude 1 number.   
             KBTYPE, KBZERO, KBMAGN, IBSIGN: the same, but for B.   
             KTRIAN: =0: don't fill in the upper triangle, =1: do.   
             KZ1, KZ2, KADD: used to implement KAZERO and KBZERO.   
             RMAGN:  used to implement KAMAGN and KBMAGN. */

	    if (mtypes > 26) {
		goto L110;
	    }
	    iinfo = 0;
	    if (kclass[jtype - 1] < 3) {

/*              Generate A (w/o rotation) */

		if ((i__3 = katype[jtype - 1], abs(i__3)) == 3) {
		    in = ((n - 1) / 2 << 1) + 1;
		    if (in != n) {
			claset_("Full", &n, &n, &c_b1, &c_b1, &a[a_offset], 
				lda);
		    }
		} else {
		    in = n;
		}
		clatm4_(&katype[jtype - 1], &in, &kz1[kazero[jtype - 1] - 1], 
			&kz2[kazero[jtype - 1] - 1], &lasign[jtype - 1], &
			rmagn[kamagn[jtype - 1]], &ulp, &rmagn[ktrian[jtype - 
			1] * kamagn[jtype - 1]], &c__2, &iseed[1], &a[
			a_offset], lda);
		iadd = kadd[kazero[jtype - 1] - 1];
		if (iadd > 0 && iadd <= n) {
		    i__3 = a_subscr(iadd, iadd);
		    i__4 = kamagn[jtype - 1];
		    a[i__3].r = rmagn[i__4], a[i__3].i = 0.f;
		}

/*              Generate B (w/o rotation) */

		if ((i__3 = kbtype[jtype - 1], abs(i__3)) == 3) {
		    in = ((n - 1) / 2 << 1) + 1;
		    if (in != n) {
			claset_("Full", &n, &n, &c_b1, &c_b1, &b[b_offset], 
				lda);
		    }
		} else {
		    in = n;
		}
		clatm4_(&kbtype[jtype - 1], &in, &kz1[kbzero[jtype - 1] - 1], 
			&kz2[kbzero[jtype - 1] - 1], &lbsign[jtype - 1], &
			rmagn[kbmagn[jtype - 1]], &c_b39, &rmagn[ktrian[jtype 
			- 1] * kbmagn[jtype - 1]], &c__2, &iseed[1], &b[
			b_offset], lda);
		iadd = kadd[kbzero[jtype - 1] - 1];
		if (iadd != 0 && iadd <= n) {
		    i__3 = b_subscr(iadd, iadd);
		    i__4 = kbmagn[jtype - 1];
		    b[i__3].r = rmagn[i__4], b[i__3].i = 0.f;
		}

		if (kclass[jtype - 1] == 2 && n > 0) {

/*                 Include rotations   

                   Generate Q, Z as Householder transformations times   
                   a diagonal matrix. */

		    i__3 = n - 1;
		    for (jc = 1; jc <= i__3; ++jc) {
			i__4 = n;
			for (jr = jc; jr <= i__4; ++jr) {
			    i__5 = q_subscr(jr, jc);
			    clarnd_(&q__1, &c__3, &iseed[1]);
			    q[i__5].r = q__1.r, q[i__5].i = q__1.i;
			    i__5 = z___subscr(jr, jc);
			    clarnd_(&q__1, &c__3, &iseed[1]);
			    z__[i__5].r = q__1.r, z__[i__5].i = q__1.i;
/* L40: */
			}
			i__4 = n + 1 - jc;
			clarfg_(&i__4, &q_ref(jc, jc), &q_ref(jc + 1, jc), &
				c__1, &work[jc]);
			i__4 = (n << 1) + jc;
			i__5 = q_subscr(jc, jc);
			r__2 = q[i__5].r;
			r__1 = r_sign(&c_b39, &r__2);
			work[i__4].r = r__1, work[i__4].i = 0.f;
			i__4 = q_subscr(jc, jc);
			q[i__4].r = 1.f, q[i__4].i = 0.f;
			i__4 = n + 1 - jc;
			clarfg_(&i__4, &z___ref(jc, jc), &z___ref(jc + 1, jc),
				 &c__1, &work[n + jc]);
			i__4 = n * 3 + jc;
			i__5 = z___subscr(jc, jc);
			r__2 = z__[i__5].r;
			r__1 = r_sign(&c_b39, &r__2);
			work[i__4].r = r__1, work[i__4].i = 0.f;
			i__4 = z___subscr(jc, jc);
			z__[i__4].r = 1.f, z__[i__4].i = 0.f;
/* L50: */
		    }
		    clarnd_(&q__1, &c__3, &iseed[1]);
		    ctemp.r = q__1.r, ctemp.i = q__1.i;
		    i__3 = q_subscr(n, n);
		    q[i__3].r = 1.f, q[i__3].i = 0.f;
		    i__3 = n;
		    work[i__3].r = 0.f, work[i__3].i = 0.f;
		    i__3 = n * 3;
		    r__1 = c_abs(&ctemp);
		    q__1.r = ctemp.r / r__1, q__1.i = ctemp.i / r__1;
		    work[i__3].r = q__1.r, work[i__3].i = q__1.i;
		    clarnd_(&q__1, &c__3, &iseed[1]);
		    ctemp.r = q__1.r, ctemp.i = q__1.i;
		    i__3 = z___subscr(n, n);
		    z__[i__3].r = 1.f, z__[i__3].i = 0.f;
		    i__3 = n << 1;
		    work[i__3].r = 0.f, work[i__3].i = 0.f;
		    i__3 = n << 2;
		    r__1 = c_abs(&ctemp);
		    q__1.r = ctemp.r / r__1, q__1.i = ctemp.i / r__1;
		    work[i__3].r = q__1.r, work[i__3].i = q__1.i;

/*                 Apply the diagonal matrices */

		    i__3 = n;
		    for (jc = 1; jc <= i__3; ++jc) {
			i__4 = n;
			for (jr = 1; jr <= i__4; ++jr) {
			    i__5 = a_subscr(jr, jc);
			    i__6 = (n << 1) + jr;
			    r_cnjg(&q__3, &work[n * 3 + jc]);
			    q__2.r = work[i__6].r * q__3.r - work[i__6].i * 
				    q__3.i, q__2.i = work[i__6].r * q__3.i + 
				    work[i__6].i * q__3.r;
			    i__7 = a_subscr(jr, jc);
			    q__1.r = q__2.r * a[i__7].r - q__2.i * a[i__7].i, 
				    q__1.i = q__2.r * a[i__7].i + q__2.i * a[
				    i__7].r;
			    a[i__5].r = q__1.r, a[i__5].i = q__1.i;
			    i__5 = b_subscr(jr, jc);
			    i__6 = (n << 1) + jr;
			    r_cnjg(&q__3, &work[n * 3 + jc]);
			    q__2.r = work[i__6].r * q__3.r - work[i__6].i * 
				    q__3.i, q__2.i = work[i__6].r * q__3.i + 
				    work[i__6].i * q__3.r;
			    i__7 = b_subscr(jr, jc);
			    q__1.r = q__2.r * b[i__7].r - q__2.i * b[i__7].i, 
				    q__1.i = q__2.r * b[i__7].i + q__2.i * b[
				    i__7].r;
			    b[i__5].r = q__1.r, b[i__5].i = q__1.i;
/* L60: */
			}
/* L70: */
		    }
		    i__3 = n - 1;
		    cunm2r_("L", "N", &n, &n, &i__3, &q[q_offset], ldq, &work[
			    1], &a[a_offset], lda, &work[(n << 1) + 1], &
			    iinfo);
		    if (iinfo != 0) {
			goto L100;
		    }
		    i__3 = n - 1;
		    cunm2r_("R", "C", &n, &n, &i__3, &z__[z_offset], ldq, &
			    work[n + 1], &a[a_offset], lda, &work[(n << 1) + 
			    1], &iinfo);
		    if (iinfo != 0) {
			goto L100;
		    }
		    i__3 = n - 1;
		    cunm2r_("L", "N", &n, &n, &i__3, &q[q_offset], ldq, &work[
			    1], &b[b_offset], lda, &work[(n << 1) + 1], &
			    iinfo);
		    if (iinfo != 0) {
			goto L100;
		    }
		    i__3 = n - 1;
		    cunm2r_("R", "C", &n, &n, &i__3, &z__[z_offset], ldq, &
			    work[n + 1], &b[b_offset], lda, &work[(n << 1) + 
			    1], &iinfo);
		    if (iinfo != 0) {
			goto L100;
		    }
		}
	    } else {

/*              Random matrices */

		i__3 = n;
		for (jc = 1; jc <= i__3; ++jc) {
		    i__4 = n;
		    for (jr = 1; jr <= i__4; ++jr) {
			i__5 = a_subscr(jr, jc);
			i__6 = kamagn[jtype - 1];
			clarnd_(&q__2, &c__4, &iseed[1]);
			q__1.r = rmagn[i__6] * q__2.r, q__1.i = rmagn[i__6] * 
				q__2.i;
			a[i__5].r = q__1.r, a[i__5].i = q__1.i;
			i__5 = b_subscr(jr, jc);
			i__6 = kbmagn[jtype - 1];
			clarnd_(&q__2, &c__4, &iseed[1]);
			q__1.r = rmagn[i__6] * q__2.r, q__1.i = rmagn[i__6] * 
				q__2.i;
			b[i__5].r = q__1.r, b[i__5].i = q__1.i;
/* L80: */
		    }
/* L90: */
		}
	    }

L100:

	    if (iinfo != 0) {
		io___43.ciunit = *nounit;
		s_wsfe(&io___43);
		do_fio(&c__1, "Generator", (ftnlen)9);
		do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer));
		do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
		do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer));
		do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer));
		e_wsfe();
		*info = abs(iinfo);
		return 0;
	    }

L110:

/*           Call CGEGS to compute H, T, Q, Z, alpha, and beta. */

	    clacpy_(" ", &n, &n, &a[a_offset], lda, &s[s_offset], lda);
	    clacpy_(" ", &n, &n, &b[b_offset], lda, &t[t_offset], lda);
	    ntest = 1;
	    result[1] = ulpinv;

	    cgegs_("V", "V", &n, &s[s_offset], lda, &t[t_offset], lda, &
		    alpha1[1], &beta1[1], &q[q_offset], ldq, &z__[z_offset], 
		    ldq, &work[1], lwork, &rwork[1], &iinfo);
	    if (iinfo != 0) {
		io___44.ciunit = *nounit;
		s_wsfe(&io___44);
		do_fio(&c__1, "CGEGS", (ftnlen)5);
		do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer));
		do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
		do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer));
		do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer));
		e_wsfe();
		*info = abs(iinfo);
		goto L130;
	    }

	    ntest = 4;

/*           Do tests 1--4 */

	    cget51_(&c__1, &n, &a[a_offset], lda, &s[s_offset], lda, &q[
		    q_offset], ldq, &z__[z_offset], ldq, &work[1], &rwork[1], 
		    &result[1]);
	    cget51_(&c__1, &n, &b[b_offset], lda, &t[t_offset], lda, &q[
		    q_offset], ldq, &z__[z_offset], ldq, &work[1], &rwork[1], 
		    &result[2]);
	    cget51_(&c__3, &n, &b[b_offset], lda, &t[t_offset], lda, &q[
		    q_offset], ldq, &q[q_offset], ldq, &work[1], &rwork[1], &
		    result[3]);
	    cget51_(&c__3, &n, &b[b_offset], lda, &t[t_offset], lda, &z__[
		    z_offset], ldq, &z__[z_offset], ldq, &work[1], &rwork[1], 
		    &result[4]);

/*           Do test 5: compare eigenvalues with diagonals. */

	    temp1 = 0.f;

	    i__3 = n;
	    for (j = 1; j <= i__3; ++j) {
		i__4 = j;
		i__5 = s_subscr(j, j);
		q__2.r = alpha1[i__4].r - s[i__5].r, q__2.i = alpha1[i__4].i 
			- s[i__5].i;
		q__1.r = q__2.r, q__1.i = q__2.i;
		i__6 = j;
		i__7 = t_subscr(j, j);
		q__4.r = beta1[i__6].r - t[i__7].r, q__4.i = beta1[i__6].i - 
			t[i__7].i;
		q__3.r = q__4.r, q__3.i = q__4.i;
/* Computing MAX */
		i__8 = j;
		i__9 = s_subscr(j, j);
		r__13 = safmin, r__14 = (r__1 = alpha1[i__8].r, dabs(r__1)) + 
			(r__2 = r_imag(&alpha1[j]), dabs(r__2)), r__13 = max(
			r__13,r__14), r__14 = (r__3 = s[i__9].r, dabs(r__3)) 
			+ (r__4 = r_imag(&s_ref(j, j)), dabs(r__4));
/* Computing MAX */
		i__10 = j;
		i__11 = t_subscr(j, j);
		r__15 = safmin, r__16 = (r__5 = beta1[i__10].r, dabs(r__5)) + 
			(r__6 = r_imag(&beta1[j]), dabs(r__6)), r__15 = max(
			r__15,r__16), r__16 = (r__7 = t[i__11].r, dabs(r__7)) 
			+ (r__8 = r_imag(&t_ref(j, j)), dabs(r__8));
		temp2 = (((r__9 = q__1.r, dabs(r__9)) + (r__10 = r_imag(&q__1)
			, dabs(r__10))) / dmax(r__13,r__14) + ((r__11 = 
			q__3.r, dabs(r__11)) + (r__12 = r_imag(&q__3), dabs(
			r__12))) / dmax(r__15,r__16)) / ulp;
		temp1 = dmax(temp1,temp2);
/* L120: */
	    }
	    result[5] = temp1;

/*           Call CGEGV to compute S2, T2, VL, and VR, do tests.   

             Eigenvalues and Eigenvectors */

	    clacpy_(" ", &n, &n, &a[a_offset], lda, &s2[s2_offset], lda);
	    clacpy_(" ", &n, &n, &b[b_offset], lda, &t2[t2_offset], lda);
	    ntest = 6;
	    result[6] = ulpinv;

	    cgegv_("V", "V", &n, &s2[s2_offset], lda, &t2[t2_offset], lda, &
		    alpha2[1], &beta2[1], &vl[vl_offset], ldq, &vr[vr_offset],
		     ldq, &work[1], lwork, &rwork[1], &iinfo);
	    if (iinfo != 0) {
		io___47.ciunit = *nounit;
		s_wsfe(&io___47);
		do_fio(&c__1, "CGEGV", (ftnlen)5);
		do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer));
		do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
		do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer));
		do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer));
		e_wsfe();
		*info = abs(iinfo);
		goto L130;
	    }

	    ntest = 7;

/*           Do Tests 6 and 7 */

	    cget52_(&c_true, &n, &a[a_offset], lda, &b[b_offset], lda, &vl[
		    vl_offset], ldq, &alpha2[1], &beta2[1], &work[1], &rwork[
		    1], dumma);
	    result[6] = dumma[0];
	    if (dumma[1] > *thrshn) {
		io___49.ciunit = *nounit;
		s_wsfe(&io___49);
		do_fio(&c__1, "Left", (ftnlen)4);
		do_fio(&c__1, "CGEGV", (ftnlen)5);
		do_fio(&c__1, (char *)&dumma[1], (ftnlen)sizeof(real));
		do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
		do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer));
		do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer));
		e_wsfe();
	    }

	    cget52_(&c_false, &n, &a[a_offset], lda, &b[b_offset], lda, &vr[
		    vr_offset], ldq, &alpha2[1], &beta2[1], &work[1], &rwork[
		    1], dumma);
	    result[7] = dumma[0];
	    if (dumma[1] > *thresh) {
		io___50.ciunit = *nounit;
		s_wsfe(&io___50);
		do_fio(&c__1, "Right", (ftnlen)5);
		do_fio(&c__1, "CGEGV", (ftnlen)5);
		do_fio(&c__1, (char *)&dumma[1], (ftnlen)sizeof(real));
		do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
		do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer));
		do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer));
		e_wsfe();
	    }

/*           End of Loop -- Check for RESULT(j) > THRESH */

L130:

	    ntestt += ntest;

/*           Print out tests which fail. */

	    i__3 = ntest;
	    for (jr = 1; jr <= i__3; ++jr) {
		if (result[jr] >= *thresh) {

/*                 If this is the first test to fail,   
                   print a header to the data file. */

		    if (nerrs == 0) {
			io___51.ciunit = *nounit;
			s_wsfe(&io___51);
			do_fio(&c__1, "CGG", (ftnlen)3);
			e_wsfe();

/*                    Matrix types */

			io___52.ciunit = *nounit;
			s_wsfe(&io___52);
			e_wsfe();
			io___53.ciunit = *nounit;
			s_wsfe(&io___53);
			e_wsfe();
			io___54.ciunit = *nounit;
			s_wsfe(&io___54);
			do_fio(&c__1, "Unitary", (ftnlen)7);
			e_wsfe();

/*                    Tests performed */

			io___55.ciunit = *nounit;
			s_wsfe(&io___55);
			do_fio(&c__1, "unitary", (ftnlen)7);
			do_fio(&c__1, "*", (ftnlen)1);
			do_fio(&c__1, "conjugate transpose", (ftnlen)19);
			for (j = 1; j <= 5; ++j) {
			    do_fio(&c__1, "*", (ftnlen)1);
			}
			e_wsfe();

		    }
		    ++nerrs;
		    if (result[jr] < 1e4f) {
			io___56.ciunit = *nounit;
			s_wsfe(&io___56);
			do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
			do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer))
				;
			do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(
				integer));
			do_fio(&c__1, (char *)&jr, (ftnlen)sizeof(integer));
			do_fio(&c__1, (char *)&result[jr], (ftnlen)sizeof(
				real));
			e_wsfe();
		    } else {
			io___57.ciunit = *nounit;
			s_wsfe(&io___57);
			do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
			do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer))
				;
			do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(
				integer));
			do_fio(&c__1, (char *)&jr, (ftnlen)sizeof(integer));
			do_fio(&c__1, (char *)&result[jr], (ftnlen)sizeof(
				real));
			e_wsfe();
		    }
		}
/* L140: */
	    }

L150:
	    ;
	}
/* L160: */
    }

/*     Summary */

    alasvm_("CGG", nounit, &nerrs, &ntestt, &c__0);
    return 0;







/*     End of CDRVGG */

} /* cdrvgg_ */
/* Subroutine */ int dtrexc_(char *compq, integer *n, doublereal *t, integer *
	ldt, doublereal *q, integer *ldq, integer *ifst, integer *ilst, 
	doublereal *work, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       March 31, 1993   


    Purpose   
    =======   

    DTREXC reorders the real Schur factorization of a real matrix   
    A = Q*T*Q**T, so that the diagonal block of T with row index IFST is   
    moved to row ILST.   

    The real Schur form T is reordered by an orthogonal similarity   
    transformation Z**T*T*Z, and optionally the matrix Q of Schur vectors   
    is updated by postmultiplying it with Z.   

    T must be in Schur canonical form (as returned by DHSEQR), that is,   
    block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each   
    2-by-2 diagonal block has its diagonal elements equal and its   
    off-diagonal elements of opposite sign.   

    Arguments   
    =========   

    COMPQ   (input) CHARACTER*1   
            = 'V':  update the matrix Q of Schur vectors;   
            = 'N':  do not update Q.   

    N       (input) INTEGER   
            The order of the matrix T. N >= 0.   

    T       (input/output) DOUBLE PRECISION array, dimension (LDT,N)   
            On entry, the upper quasi-triangular matrix T, in Schur   
            Schur canonical form.   
            On exit, the reordered upper quasi-triangular matrix, again   
            in Schur canonical form.   

    LDT     (input) INTEGER   
            The leading dimension of the array T. LDT >= max(1,N).   

    Q       (input/output) DOUBLE PRECISION array, dimension (LDQ,N)   
            On entry, if COMPQ = 'V', the matrix Q of Schur vectors.   
            On exit, if COMPQ = 'V', Q has been postmultiplied by the   
            orthogonal transformation matrix Z which reorders T.   
            If COMPQ = 'N', Q is not referenced.   

    LDQ     (input) INTEGER   
            The leading dimension of the array Q.  LDQ >= max(1,N).   

    IFST    (input/output) INTEGER   
    ILST    (input/output) INTEGER   
            Specify the reordering of the diagonal blocks of T.   
            The block with row index IFST is moved to row ILST, by a   
            sequence of transpositions between adjacent blocks.   
            On exit, if IFST pointed on entry to the second row of a   
            2-by-2 block, it is changed to point to the first row; ILST   
            always points to the first row of the block in its final   
            position (which may differ from its input value by +1 or -1).   
            1 <= IFST <= N; 1 <= ILST <= N.   

    WORK    (workspace) DOUBLE PRECISION array, dimension (N)   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   
            = 1:  two adjacent blocks were too close to swap (the problem   
                  is very ill-conditioned); T may have been partially   
                  reordered, and ILST points to the first row of the   
                  current position of the block being moved.   

    =====================================================================   


       Decode and test the input arguments.   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c__2 = 2;
    
    /* System generated locals */
    integer q_dim1, q_offset, t_dim1, t_offset, i__1;
    /* Local variables */
    static integer here;
    extern logical lsame_(char *, char *);
    static logical wantq;
    extern /* Subroutine */ int dlaexc_(logical *, integer *, doublereal *, 
	    integer *, doublereal *, integer *, integer *, integer *, integer 
	    *, doublereal *, integer *), xerbla_(char *, integer *);
    static integer nbnext, nbf, nbl;
#define t_ref(a_1,a_2) t[(a_2)*t_dim1 + a_1]


    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    --work;

    /* Function Body */
    *info = 0;
    wantq = lsame_(compq, "V");
    if (! wantq && ! lsame_(compq, "N")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*ldt < max(1,*n)) {
	*info = -4;
    } else if (*ldq < 1 || wantq && *ldq < max(1,*n)) {
	*info = -6;
    } else if (*ifst < 1 || *ifst > *n) {
	*info = -7;
    } else if (*ilst < 1 || *ilst > *n) {
	*info = -8;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DTREXC", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n <= 1) {
	return 0;
    }

/*     Determine the first row of specified block   
       and find out it is 1 by 1 or 2 by 2. */

    if (*ifst > 1) {
	if (t_ref(*ifst, *ifst - 1) != 0.) {
	    --(*ifst);
	}
    }
    nbf = 1;
    if (*ifst < *n) {
	if (t_ref(*ifst + 1, *ifst) != 0.) {
	    nbf = 2;
	}
    }

/*     Determine the first row of the final block   
       and find out it is 1 by 1 or 2 by 2. */

    if (*ilst > 1) {
	if (t_ref(*ilst, *ilst - 1) != 0.) {
	    --(*ilst);
	}
    }
    nbl = 1;
    if (*ilst < *n) {
	if (t_ref(*ilst + 1, *ilst) != 0.) {
	    nbl = 2;
	}
    }

    if (*ifst == *ilst) {
	return 0;
    }

    if (*ifst < *ilst) {

/*        Update ILST */

	if (nbf == 2 && nbl == 1) {
	    --(*ilst);
	}
	if (nbf == 1 && nbl == 2) {
	    ++(*ilst);
	}

	here = *ifst;

L10:

/*        Swap block with next one below */

	if (nbf == 1 || nbf == 2) {

/*           Current block either 1 by 1 or 2 by 2 */

	    nbnext = 1;
	    if (here + nbf + 1 <= *n) {
		if (t_ref(here + nbf + 1, here + nbf) != 0.) {
		    nbnext = 2;
		}
	    }
	    dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, &
		    nbf, &nbnext, &work[1], info);
	    if (*info != 0) {
		*ilst = here;
		return 0;
	    }
	    here += nbnext;

/*           Test if 2 by 2 block breaks into two 1 by 1 blocks */

	    if (nbf == 2) {
		if (t_ref(here + 1, here) == 0.) {
		    nbf = 3;
		}
	    }

	} else {

/*           Current block consists of two 1 by 1 blocks each of which   
             must be swapped individually */

	    nbnext = 1;
	    if (here + 3 <= *n) {
		if (t_ref(here + 3, here + 2) != 0.) {
		    nbnext = 2;
		}
	    }
	    i__1 = here + 1;
	    dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &
		    c__1, &nbnext, &work[1], info);
	    if (*info != 0) {
		*ilst = here;
		return 0;
	    }
	    if (nbnext == 1) {

/*              Swap two 1 by 1 blocks, no problems possible */

		dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
			here, &c__1, &nbnext, &work[1], info);
		++here;
	    } else {

/*              Recompute NBNEXT in case 2 by 2 split */

		if (t_ref(here + 2, here + 1) == 0.) {
		    nbnext = 1;
		}
		if (nbnext == 2) {

/*                 2 by 2 Block did not split */

		    dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
			    here, &c__1, &nbnext, &work[1], info);
		    if (*info != 0) {
			*ilst = here;
			return 0;
		    }
		    here += 2;
		} else {

/*                 2 by 2 Block did split */

		    dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
			    here, &c__1, &c__1, &work[1], info);
		    i__1 = here + 1;
		    dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
			    i__1, &c__1, &c__1, &work[1], info);
		    here += 2;
		}
	    }
	}
	if (here < *ilst) {
	    goto L10;
	}

    } else {

	here = *ifst;
L20:

/*        Swap block with next one above */

	if (nbf == 1 || nbf == 2) {

/*           Current block either 1 by 1 or 2 by 2 */

	    nbnext = 1;
	    if (here >= 3) {
		if (t_ref(here - 1, here - 2) != 0.) {
		    nbnext = 2;
		}
	    }
	    i__1 = here - nbnext;
	    dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &
		    nbnext, &nbf, &work[1], info);
	    if (*info != 0) {
		*ilst = here;
		return 0;
	    }
	    here -= nbnext;

/*           Test if 2 by 2 block breaks into two 1 by 1 blocks */

	    if (nbf == 2) {
		if (t_ref(here + 1, here) == 0.) {
		    nbf = 3;
		}
	    }

	} else {

/*           Current block consists of two 1 by 1 blocks each of which   
             must be swapped individually */

	    nbnext = 1;
	    if (here >= 3) {
		if (t_ref(here - 1, here - 2) != 0.) {
		    nbnext = 2;
		}
	    }
	    i__1 = here - nbnext;
	    dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &
		    nbnext, &c__1, &work[1], info);
	    if (*info != 0) {
		*ilst = here;
		return 0;
	    }
	    if (nbnext == 1) {

/*              Swap two 1 by 1 blocks, no problems possible */

		dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
			here, &nbnext, &c__1, &work[1], info);
		--here;
	    } else {

/*              Recompute NBNEXT in case 2 by 2 split */

		if (t_ref(here, here - 1) == 0.) {
		    nbnext = 1;
		}
		if (nbnext == 2) {

/*                 2 by 2 Block did not split */

		    i__1 = here - 1;
		    dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
			    i__1, &c__2, &c__1, &work[1], info);
		    if (*info != 0) {
			*ilst = here;
			return 0;
		    }
		    here += -2;
		} else {

/*                 2 by 2 Block did split */

		    dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
			    here, &c__1, &c__1, &work[1], info);
		    i__1 = here - 1;
		    dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
			    i__1, &c__1, &c__1, &work[1], info);
		    here += -2;
		}
	    }
	}
	if (here > *ilst) {
	    goto L20;
	}
    }
    *ilst = here;

    return 0;

/*     End of DTREXC */

} /* dtrexc_ */
Exemple #19
0
/* Subroutine */ int dlarft_(char *direct, char *storev, integer *n, integer *
	k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t, 
	integer *ldt)
{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    DLARFT forms the triangular factor T of a real block reflector H   
    of order n, which is defined as a product of k elementary reflectors.   

    If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;   

    If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.   

    If STOREV = 'C', the vector which defines the elementary reflector   
    H(i) is stored in the i-th column of the array V, and   

       H  =  I - V * T * V'   

    If STOREV = 'R', the vector which defines the elementary reflector   
    H(i) is stored in the i-th row of the array V, and   

       H  =  I - V' * T * V   

    Arguments   
    =========   

    DIRECT  (input) CHARACTER*1   
            Specifies the order in which the elementary reflectors are   
            multiplied to form the block reflector:   
            = 'F': H = H(1) H(2) . . . H(k) (Forward)   
            = 'B': H = H(k) . . . H(2) H(1) (Backward)   

    STOREV  (input) CHARACTER*1   
            Specifies how the vectors which define the elementary   
            reflectors are stored (see also Further Details):   
            = 'C': columnwise   
            = 'R': rowwise   

    N       (input) INTEGER   
            The order of the block reflector H. N >= 0.   

    K       (input) INTEGER   
            The order of the triangular factor T (= the number of   
            elementary reflectors). K >= 1.   

    V       (input/output) DOUBLE PRECISION array, dimension   
                                 (LDV,K) if STOREV = 'C'   
                                 (LDV,N) if STOREV = 'R'   
            The matrix V. See further details.   

    LDV     (input) INTEGER   
            The leading dimension of the array V.   
            If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.   

    TAU     (input) DOUBLE PRECISION array, dimension (K)   
            TAU(i) must contain the scalar factor of the elementary   
            reflector H(i).   

    T       (output) DOUBLE PRECISION array, dimension (LDT,K)   
            The k by k triangular factor T of the block reflector.   
            If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is   
            lower triangular. The rest of the array is not used.   

    LDT     (input) INTEGER   
            The leading dimension of the array T. LDT >= K.   

    Further Details   
    ===============   

    The shape of the matrix V and the storage of the vectors which define   
    the H(i) is best illustrated by the following example with n = 5 and   
    k = 3. The elements equal to 1 are not stored; the corresponding   
    array elements are modified but restored on exit. The rest of the   
    array is not used.   

    DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':   

                 V = (  1       )                 V = (  1 v1 v1 v1 v1 )   
                     ( v1  1    )                     (     1 v2 v2 v2 )   
                     ( v1 v2  1 )                     (        1 v3 v3 )   
                     ( v1 v2 v3 )   
                     ( v1 v2 v3 )   

    DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':   

                 V = ( v1 v2 v3 )                 V = ( v1 v1  1       )   
                     ( v1 v2 v3 )                     ( v2 v2 v2  1    )   
                     (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )   
                     (     1 v3 )   
                     (        1 )   

    =====================================================================   


       Quick return if possible   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static doublereal c_b8 = 0.;
    
    /* System generated locals */
    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3;
    doublereal d__1;
    /* Local variables */
    static integer i__, j;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dgemv_(char *, integer *, integer *, 
	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *), dtrmv_(char *, 
	    char *, char *, integer *, doublereal *, integer *, doublereal *, 
	    integer *);
    static doublereal vii;
#define t_ref(a_1,a_2) t[(a_2)*t_dim1 + a_1]
#define v_ref(a_1,a_2) v[(a_2)*v_dim1 + a_1]


    v_dim1 = *ldv;
    v_offset = 1 + v_dim1 * 1;
    v -= v_offset;
    --tau;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;

    /* Function Body */
    if (*n == 0) {
	return 0;
    }

    if (lsame_(direct, "F")) {
	i__1 = *k;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    if (tau[i__] == 0.) {

/*              H(i)  =  I */

		i__2 = i__;
		for (j = 1; j <= i__2; ++j) {
		    t_ref(j, i__) = 0.;
/* L10: */
		}
	    } else {

/*              general case */

		vii = v_ref(i__, i__);
		v_ref(i__, i__) = 1.;
		if (lsame_(storev, "C")) {

/*                 T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */

		    i__2 = *n - i__ + 1;
		    i__3 = i__ - 1;
		    d__1 = -tau[i__];
		    dgemv_("Transpose", &i__2, &i__3, &d__1, &v_ref(i__, 1), 
			    ldv, &v_ref(i__, i__), &c__1, &c_b8, &t_ref(1, 
			    i__), &c__1);
		} else {

/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */

		    i__2 = i__ - 1;
		    i__3 = *n - i__ + 1;
		    d__1 = -tau[i__];
		    dgemv_("No transpose", &i__2, &i__3, &d__1, &v_ref(1, i__)
			    , ldv, &v_ref(i__, i__), ldv, &c_b8, &t_ref(1, 
			    i__), &c__1);
		}
		v_ref(i__, i__) = vii;

/*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */

		i__2 = i__ - 1;
		dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
			t_offset], ldt, &t_ref(1, i__), &c__1);
		t_ref(i__, i__) = tau[i__];
	    }
/* L20: */
	}
    } else {
	for (i__ = *k; i__ >= 1; --i__) {
	    if (tau[i__] == 0.) {

/*              H(i)  =  I */

		i__1 = *k;
		for (j = i__; j <= i__1; ++j) {
		    t_ref(j, i__) = 0.;
/* L30: */
		}
	    } else {

/*              general case */

		if (i__ < *k) {
		    if (lsame_(storev, "C")) {
			vii = v_ref(*n - *k + i__, i__);
			v_ref(*n - *k + i__, i__) = 1.;

/*                    T(i+1:k,i) :=   
                              - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i) */

			i__1 = *n - *k + i__;
			i__2 = *k - i__;
			d__1 = -tau[i__];
			dgemv_("Transpose", &i__1, &i__2, &d__1, &v_ref(1, 
				i__ + 1), ldv, &v_ref(1, i__), &c__1, &c_b8, &
				t_ref(i__ + 1, i__), &c__1);
			v_ref(*n - *k + i__, i__) = vii;
		    } else {
			vii = v_ref(i__, *n - *k + i__);
			v_ref(i__, *n - *k + i__) = 1.;

/*                    T(i+1:k,i) :=   
                              - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)' */

			i__1 = *k - i__;
			i__2 = *n - *k + i__;
			d__1 = -tau[i__];
			dgemv_("No transpose", &i__1, &i__2, &d__1, &v_ref(
				i__ + 1, 1), ldv, &v_ref(i__, 1), ldv, &c_b8, 
				&t_ref(i__ + 1, i__), &c__1);
			v_ref(i__, *n - *k + i__) = vii;
		    }

/*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */

		    i__1 = *k - i__;
		    dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t_ref(
			    i__ + 1, i__ + 1), ldt, &t_ref(i__ + 1, i__), &
			    c__1);
		}
		t_ref(i__, i__) = tau[i__];
	    }
/* L40: */
	}
    }
    return 0;

/*     End of DLARFT */

} /* dlarft_ */
	void escapeNoiseNeuron::excite ( baseType c )
	{
		//				baseType phase = 1.0 + pcoBase::exactTime - this->x[0];
		if ( ( pcoBase::nextFiring() - this->time >= timeDelay() ) && ( this->time - lastFiring >t_ref() -timeDelay() ) ) // nicht mehr refraktär und noch nicht feuernd
		{
			pot = pot + c;


			baseType newKey = this->time + timeDelay() +gslNoise::getExponential ( exp ( - escapeNoise_a() * pot+ escapeNoise_b() ) );
			//					pcoBase::decreaseKey();
			eventHandler::updateKey ( _fire_, newKey );


		}
	}
template<typename Ty>  magma_int_t
magma_unmqr_gpu(
    magma_side_t side, magma_trans_t trans,
    magma_int_t m, magma_int_t n, magma_int_t k,
    cl_mem dA, size_t dA_offset, magma_int_t ldda,
    Ty *tau,
    cl_mem dC, size_t dC_offset, magma_int_t lddc,
    Ty *hwork, magma_int_t lwork,
    cl_mem dT, size_t dT_offset, magma_int_t nb,
    magma_queue_t queue,
    magma_int_t *info)
{
/*  -- clMAGMA (version 0.1) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       @date

    Purpose
    =======
    ZUNMQR_GPU overwrites the general complex M-by-N matrix C with

                    SIDE = 'L'     SIDE = 'R'
    TRANS = 'N':      Q * C          C * Q
    TRANS = 'T':      Q**H * C       C * Q**H

    where Q is a complex orthogonal matrix defined as the product of k
    elementary reflectors

          Q = H(1) H(2) . . . H(k)

    as returned by ZGEQRF. Q is of order M if SIDE = 'L' and of order N
    if SIDE = 'R'.

    Arguments
    =========
    SIDE    (input) CHARACTER*1
            = 'L': apply Q or Q**H from the Left;
            = 'R': apply Q or Q**H from the Right.

    TRANS   (input) CHARACTER*1
            = 'N':  No transpose, apply Q;
            = 'T':  Transpose, apply Q**H.

    M       (input) INTEGER
            The number of rows of the matrix C. M >= 0.

    N       (input) INTEGER
            The number of columns of the matrix C. N >= 0.

    K       (input) INTEGER
            The number of elementary reflectors whose product defines
            the matrix Q.
            If SIDE = 'L', M >= K >= 0;
            if SIDE = 'R', N >= K >= 0.

    DA      (input) COMPLEX_16 array on the GPU, dimension (LDDA,K)
            The i-th column must contain the vector which defines the
            elementary reflector H(i), for i = 1,2,...,k, as returned by
            ZGEQRF in the first k columns of its array argument DA.
            DA is modified by the routine but restored on exit.

    LDDA    (input) INTEGER
            The leading dimension of the array DA.
            If SIDE = 'L', LDDA >= max(1,M);
            if SIDE = 'R', LDDA >= max(1,N).

    TAU     (input) COMPLEX_16 array, dimension (K)
            TAU(i) must contain the scalar factor of the elementary
            reflector H(i), as returned by ZGEQRF.

    DC      (input/output) COMPLEX_16 array on the GPU, dimension (LDDC,N)
            On entry, the M-by-N matrix C.
            On exit, C is overwritten by Q*C or Q**H * C or C * Q**H or C*Q.

    LDDC     (input) INTEGER
            The leading dimension of the array DC. LDDC >= max(1,M).

    HWORK    (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK))
            On exit, if INFO = 0, HWORK(1) returns the optimal LWORK.

    LWORK   (input) INTEGER
            The dimension of the array HWORK.
            LWORK >= (M-K+NB)*(N+2*NB) if SIDE = 'L',
            and LWORK >= (N-K+NB)*(M+2*NB) if SIDE = 'R', where NB is the
            optimal blocksize.

            If LWORK = -1, then a workspace query is assumed; the routine
            only calculates the optimal size of the HWORK array, returns
            this value as the first entry of the HWORK array, and no error
            message related to LWORK is issued by XERBLA.

    DT      (input) COMPLEX_16 array on the GPU that is the output
            (the 9th argument) of magma_zgeqrf_gpu.

    NB      (input) INTEGER
            This is the blocking size that was used in pre-computing DT, e.g.,
            the blocking size used in magma_zgeqrf_gpu.

    INFO    (output) INTEGER
            = 0:  successful exit
            < 0:  if INFO = -i, the i-th argument had an illegal value
    =====================================================================   */

    #define a_ref(a_1,a_2) dA, (dA_offset+(a_1)+(a_2)*(ldda))
    #define c_ref(a_1,a_2) dC, (dC_offset+(a_1)+(a_2)*(lddc))
    #define t_ref(a_1)     dT, (dT_offset+(a_1)*nb)

    static const Ty c_one = magma_one<Ty>();

    cl_mem dwork;
    magma_int_t i;

    magma_int_t i1, i2, step, ib, ic, jc, ma, mi, ni, nq, nw, ret;
    int left, notran, lquery;
    magma_int_t lwkopt;

    *info = 0;
    left   = (side == MagmaLeft);
    notran = (trans == MagmaNoTrans);
    lquery = (lwork == -1);

    /* NQ is the order of Q and NW is the minimum dimension of WORK */
    if (left) {
        nq = m;
        nw = n;
    } else {
        nq = n;
        nw = m;
    }
    if ( (!left) && (side != MagmaRight) ) {
        *info = -1;
    } else if ( (!notran) && (trans != MagmaConjTrans) ) {
        *info = -2;
    } else if (m < 0) {
        *info = -3;
    } else if (n < 0) {
        *info = -4;
    } else if (k < 0 || k > nq) {
        *info = -5;
    } else if (ldda < std::max(1,nq)) {
        *info = -7;
    } else if (lddc < std::max(1,m)) {
        *info = -10;
    } else if (lwork < std::max(1,nw) && ! lquery) {
        *info = -12;
    }

    lwkopt = (m-k+nb)*(n+2*nb);
    hwork[0] = magma_scalar<Ty>(lwkopt);

    if (*info != 0) {
        //magma_xerbla( __func__, -(*info) );
        return *info;
    }
    else if (lquery) {
        return *info;
    }

    /* Quick return if possible */
    if (m == 0 || n == 0 || k == 0) {
        hwork[0] = c_one;
        return *info;
    }

    magma_malloc<Ty>(&dwork, (((n+31)/32)*32)*nb);

    cpu_lapack_unmqr_work_func<Ty> cpu_lapack_unmqr;

    if ( (left && (! notran)) || ( (!left) && notran ) ) {
        i1 = 0;
        i2 = k-nb;
        step = nb;
    } else {
        i1 = (k - 1 - nb) / nb * nb;
        i2 = 0;
        step = -nb;
    }

    mi = 0;
    ni = 0;

    if (left) {
        ni = n;
        jc = 0;
    } else {
        mi = m;
        ic = 0;
    }

    static const bool is_real = magma_is_real<Ty>();

    /* Use unblocked code to multiply last or only block (cases Q*C or C*Q^T). */
    // workspace left:  A(mi*nb) + C(mi*ni) + work(ni*nb_la) = (m-k-nb)*nb + (m-k-nb)*n + n*nb
    // workspace right: A(ni*nb) + C(mi*ni) + work(mi*nb_la) = (n-k-nb)*nb + m*(n-k-nb) + m*nb
    if ( step < 0 ) {
        // i is beginning of last block
        i = i1 - step;
        if ( i >= k ) {
            i = i1;
        }
        ib = k - i;
        if (left) {
            // ni=n, jc=0, H or H^T is applied to C(i:m-1,0:n-1)
            mi = m - i;
            ma = mi;
            ic = i;
        }
        else {
            // mi=m, ic=0, H or H^T is applied to C(0:m-1,i:n-1)
            ni = n - i;
            ma = ni;
            jc = i;
        }

        Ty* hA = hwork;
        Ty* hC = hwork + ma*ib;
        Ty* hW = hwork + ma*ib + mi*ni;
        magma_int_t lhwork = lwork - (ma*ib + mi*ni);

        magma_getmatrix<Ty>(ma, ib, a_ref(i,  i ), ldda, hA, ma, queue);
        magma_getmatrix<Ty>(mi, ni, c_ref(ic, jc), lddc, hC, mi, queue);

        LAPACKE_CHECK(cpu_lapack_unmqr(
                          side == MagmaRight ? 'R' : 'L',
                          notran ? 'N' : (is_real ? 'T' : 'C'),
                          mi, ni, ib,
                          hA, ma, tau+i,
                          hC, mi,
                          hW, lhwork));

        // send the updated part of C back to the GPU
        magma_setmatrix<Ty>( mi, ni, hC, mi, c_ref(ic, jc), lddc, queue);
    }


    if (nb < k)
    {
        for (i=i1; step<0 ? i>i2 : i<i2; i+=step)
        {
            ib = std::min(nb, k - i);
            if (left){
                mi = m - i;
                ic = i;
            }
            else {
                ni = n - i;
                jc = i;
            }

            if (mi == 0 || ni == 0) break;

            ret = magma_larfb_gpu<Ty>(MagmaLeft,
                                      is_real ? MagmaTrans : MagmaConjTrans,
                                      MagmaForward, MagmaColumnwise,
                                      mi, ni, ib,
                                      a_ref(i,  i ), ldda, t_ref(i), nb,
                                      c_ref(ic, jc), lddc, dwork, 0, nw, queue);
            if ( ret != MAGMA_SUCCESS )
              return ret;
        }
    }
    else
    {
        i = i1;
    }

    /* Use unblocked code to multiply the last or only block (cases Q^T*C or C*Q). */
    if ( step > 0 ) {
        ib = k-i;
        if (left) {
            // ni=n, jc=0, H or H^T is applied to C(i:m-1,0:n-1)
            mi = m - i;
            ma = mi;
            ic = i;
        }
        else {
            // mi=m, ic=0, H or H^T is applied to C(0:m-1,i:n-1)
            ni = n - i;
            ma = ni;
            jc = i;
        }

        Ty* hA = hwork;
        Ty* hC = hwork + ma*ib;
        Ty* hW = hwork + ma*ib + mi*ni;
        magma_int_t lhwork = lwork - (ma*ib + mi*ni);

        magma_getmatrix<Ty>(ma, ib, a_ref(i,  i ), ldda, hA, ma, queue);
        magma_getmatrix<Ty>(mi, ni, c_ref(ic, jc), lddc, hC, mi, queue);

        LAPACKE_CHECK(cpu_lapack_unmqr(
                          side == MagmaRight ? 'R' : 'L',
                          notran ? 'N' : (is_real ? 'T' : 'C'),
                          mi, ni, ib,
                          hA, ma, tau+i,
                          hC, mi,
                          hW, lhwork));

        // send the updated part of C back to the GPU
        magma_setmatrix<Ty>(mi, ni, hC, mi, c_ref(ic, jc), lddc, queue);
    }

    magma_free(dwork);

    return *info;
    /* End of MAGMA_ZUNMQR_GPU */
}
Exemple #22
0
extern "C" magma_int_t
magma_ssytrd_sy2sb( char uplo, magma_int_t n, magma_int_t nb,
                    float *a, magma_int_t lda, 
                    float *tau,
                    float *work, magma_int_t lwork,
                    float *dT,
                    magma_int_t threads, magma_int_t *info)
{
/*  -- MAGMA (version 1.3.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       November 2012

    Purpose   
    =======   
    SSYTRD_HE2HB reduces a real symmetric matrix A to real symmetric   
    band-diagonal form T by an orthogonal similarity transformation:   
    Q**T * A * Q = T.   
    This version stores the triangular matrices T used in the accumulated
    Householder transformations (I - V T V').

    Arguments   
    =========   
    UPLO    (input) CHARACTER*1   
            = 'U':  Upper triangle of A is stored;   
            = 'L':  Lower triangle of A is stored.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the symmetric matrix A.  If UPLO = 'U', the leading   
            N-by-N upper triangular part of A contains the upper   
            triangular part of the matrix A, and the strictly lower   
            triangular part of A is not referenced.  If UPLO = 'L', the   
            leading N-by-N lower triangular part of A contains the lower   
            triangular part of the matrix A, and the strictly upper   
            triangular part of A is not referenced.   
            On exit, if UPLO = 'U', the Upper band-diagonal of A is 
            overwritten by the corresponding elements of the   
            band-diagonal matrix T, and the elements above the band   
            diagonal, with the array TAU, represent the orthogonal   
            matrix Q as a product of elementary reflectors; if UPLO   
            = 'L', the the Lower band-diagonal of A is overwritten by 
            the corresponding elements of the band-diagonal   
            matrix T, and the elements below the band-diagonal, with   
            the array TAU, represent the orthogonal matrix Q as a product   
            of elementary reflectors. See Further Details.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    TAU     (output) REAL array, dimension (N-1)   
            The scalar factors of the elementary reflectors (see Further   
            Details).   

    WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The dimension of the array WORK.  LWORK >= 1.   
            For optimum performance LWORK >= N*NB, where NB is the   
            optimal blocksize.   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    dT      (output) REAL array on the GPU, dimension N*NB, 
            where NB is the optimal blocksize.
            On exit dT holds the upper triangular matrices T from the 
            accumulated Householder transformations (I - V T V') used
            in the factorization. The nb x nb matrices T are ordered 
            consecutively in memory one after another.

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   

    Further Details   
    ===============   
    If UPLO = 'U', the matrix Q is represented as a product of elementary   
    reflectors   

       Q = H(n-1) . . . H(2) H(1).   

    Each H(i) has the form   

       H(i) = I - tau * v * v'

    where tau is a real scalar, and v is a real vector with   
    v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in   
    A(1:i-1,i+1), and tau in TAU(i).   

    If UPLO = 'L', the matrix Q is represented as a product of elementary   
    reflectors   

       Q = H(1) H(2) . . . H(n-1).   

    Each H(i) has the form   

       H(i) = I - tau * v * v'   

    where tau is a real scalar, and v is a real vector with   
    v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),   
    and tau in TAU(i).

    The contents of A on exit are illustrated by the following examples   
    with n = 5:   

    if UPLO = 'U':                       if UPLO = 'L':   

      (  d   e   v2  v3  v4 )              (  d                  )   
      (      d   e   v3  v4 )              (  e   d              )   
      (          d   e   v4 )              (  v1  e   d          )   
      (              d   e  )              (  v1  v2  e   d      )   
      (                  d  )              (  v1  v2  v3  e   d  )   

    where d and e denote diagonal and off-diagonal elements of T, and vi   
    denotes an element of the vector defining H(i).   
    =====================================================================    */

    #define a_ref(a_1,a_2)  ( a  + ((a_2)-1)*( lda) + (a_1)-1)
    #define da_ref(a_1,a_2) (da  + ((a_2)-1)*(ldda) + (a_1)-1)
    #define tau_ref(a_1)    (tau + (a_1)-1)
    #define t_ref(a_1)      (dT  + ((a_1)-1)*(lddt))

    char uplo_[2] = {uplo, 0};

    int ldda = ((n+31)/32)*32;
    int lddt = nb;
   
    float c_neg_one  = MAGMA_S_NEG_ONE;
    float c_neg_half = MAGMA_S_NEG_HALF;
    float c_one  = MAGMA_S_ONE ;
    float c_zero = MAGMA_S_ZERO;
    float  d_one = MAGMA_D_ONE;

    magma_int_t pm, pn, indi, indj, pk;
    magma_int_t pm_old=0, pn_old=0, indi_old=0, indj_old=0;

    int i;
    int lwkopt;
    int lquery;

    *info = 0;
    int upper = lapackf77_lsame(uplo_, "U");
    lquery = lwork == -1;
    if (! upper && ! lapackf77_lsame(uplo_, "L")) {
        *info = -1;
    } else if (n < 0) {
        *info = -2;
    } else if (lda < max(1,n)) {
        *info = -4;
    } else if (lwork < 1 && ! lquery) {
        *info = -9;
    }

    if (*info == 0) {
      /* Determine the block size. */
      lwkopt = n * nb;
      MAGMA_S_SET2REAL( work[0], lwkopt );
    }

    if (*info != 0)
      return *info;
    else if (lquery)
      return *info;

    /* Quick return if possible */
    if (n == 0) {
        work[0] = c_one;
        return *info;
    }

    float *da;
    if (MAGMA_SUCCESS != magma_smalloc( &da, (n + 2*nb)*ldda )) {
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }

    magma_int_t mklth = min(threads,12);
#if defined(USEMKL)
    mkl_set_num_threads(mklth);
#endif
#if defined(USEACML)
    omp_set_num_threads(mklth);
#endif


    /* Use the first panel of da as work space */
    float *dwork = da+n*ldda;
    float *dW    = dwork + nb*ldda;

    #ifdef TRACING
    char buf[80];
    #endif
    cudaStream_t stream[3];
    magma_queue_create( &stream[0] );
    magma_queue_create( &stream[1] );
    stream[2] = 0;  // default stream
    
    trace_init( 1, 1, 3, stream );

    float *hT = work + lwork - nb*nb;
    lwork -= nb*nb;
    memset( hT, 0, nb*nb*sizeof(float));

    magmablasSetKernelStream( stream[0] );
    cudaEvent_t Pupdate_event;
    cudaEventCreateWithFlags(&Pupdate_event,cudaEventDisableTiming);
    //cudaEventCreate(&Pupdate_event);


    if (upper) {
      printf("SSYTRD_HE2HB is not yet implemented for upper matrix storage. Exit.\n");
      exit(1);

    }else {
        /* Copy the matrix to the GPU */
        if (1 <= n-nb){
            trace_gpu_start( 0, 0, "set", "set A" );
            magma_ssetmatrix_async( (n-nb), (n-nb),
                                    a_ref(nb+1, nb+1),  lda,
                                    da_ref(nb+1, nb+1), ldda, stream[0] );
            trace_gpu_end( 0, 0 );
        }

        /* Reduce the lower triangle of A */
        for (i = 1; i <= n-nb; i += nb) 
        {
             indi = i+nb;
             indj = i;
             pm   = n - i - nb + 1;
             //pn   = min(i+nb-1, n-nb) -i + 1;
             pn   = nb;
             
             /*   Get the current panel (no need for the 1st iteration) */
             if (i > 1 ){
                 // spanel_to_q copy the upper oof diagonal part of 
                 // the matrix to work to be restored later. acctually
                 //  the zero's and one's putted are not used this is only
                 //   because we don't have a function that copy only the
                 //    upper part of A to be restored after copying the 
                 //    lookahead panel that has been computted from GPU to CPU. 
                 spanel_to_q(MagmaUpper, pn-1, a_ref(i, i+1), lda, work);

                 trace_gpu_start( 0, 1, "get", "get panel" );
                 //magma_queue_sync( stream[0] );
                 cudaStreamWaitEvent(stream[1], Pupdate_event, 0);
                 magma_sgetmatrix_async( (pm+pn), pn,
                                         da_ref( i, i), ldda,
                                         a_ref ( i, i), lda, stream[1] );
                 trace_gpu_end( 0, 1 );

                 trace_gpu_start( 0, 2, "syr2k", "syr2k" );
                 magma_ssyr2k(MagmaLower, MagmaNoTrans, pm_old-pn_old, pn_old, c_neg_one,
                      da_ref(indi_old+pn_old, indj_old), ldda,
                      dW + pn_old           , pm_old, d_one,
                      da_ref(indi_old+pn_old, indi_old+pn_old), ldda);
                 trace_gpu_end( 0, 2 );

                 trace_cpu_start( 0, "sync", "sync on 1" );
                 magma_queue_sync( stream[1] );
                 trace_cpu_end( 0 );
                 sq_to_panel(MagmaUpper, pn-1, a_ref(i, i+1), lda, work);
             }

             /* ==========================================================
                QR factorization on a panel starting nb off of the diagonal.
                Prepare the V and T matrices. 
                ==========================================================  */
             #ifdef TRACING
             snprintf( buf, sizeof(buf), "panel %d", i );
             #endif
             trace_cpu_start( 0, "geqrf", buf );
             lapackf77_sgeqrf(&pm, &pn, a_ref(indi, indj), &lda, 
                        tau_ref(i), work, &lwork, info);
             
             /* Form the matrix T */
                         pk=min(pm,pn);
             lapackf77_slarft( MagmaForwardStr, MagmaColumnwiseStr,
                           &pm, &pk, a_ref(indi, indj), &lda,
                           tau_ref(i), hT, &nb);

             /* Prepare V - put 0s in the upper triangular part of the panel
                (and 1s on the diagonal), temporaly storing the original in work */
             spanel_to_q(MagmaUpper, pk, a_ref(indi, indj), lda, work);
             trace_cpu_end( 0 );

             /* Send V from the CPU to the GPU */
             trace_gpu_start( 0, 0, "set", "set V and T" );
             magma_ssetmatrix_async( pm, pk,
                                     a_ref(indi, indj),  lda,
                                     da_ref(indi, indj), ldda, stream[0] );

             /* Send the triangular factor T to the GPU */
             magma_ssetmatrix_async( pk, pk,
                                     hT,       nb,
                                     t_ref(i), lddt, stream[0] );
             trace_gpu_end( 0, 0 );
             
             /* ==========================================================
                Compute W:
                1. X = A (V T)
                2. W = X - 0.5* V * (T' * (V' * X)) 
                ==========================================================  */
             /* dwork = V T */
             trace_cpu_start( 0, "sync", "sync on 0" );
             // this sync is done here to be sure that the copy has been finished
             // because below we made a restore sq_to_panel and this restore need
             // to ensure that the copy has been finished. we did it here to allow
             // overlapp of restore with next gemm and symm.
             magma_queue_sync( stream[0] );
             trace_cpu_end( 0 );
             
             trace_gpu_start( 0, 2, "gemm", "work = V*T" );
             magma_sgemm(MagmaNoTrans, MagmaNoTrans, pm, pk, pk,
                         c_one, da_ref(indi, indj), ldda, 
                         t_ref(i), lddt,
                         c_zero, dwork, pm);
             trace_gpu_end( 0, 2 );
             
             /* dW = X = A*V*T. dW = A*dwork */ 
             trace_gpu_start( 0, 2, "symm", "X = A*work" );
             magma_ssymm(MagmaLeft, uplo, pm, pk,
                         c_one, da_ref(indi, indi), ldda,
                         dwork, pm,
                         c_zero, dW, pm);
             trace_gpu_end( 0, 2 );
             /* restore the panel */
             sq_to_panel(MagmaUpper, pk, a_ref(indi, indj), lda, work);
             
             /* dwork = V*T already ==> dwork' = T'*V'
              * compute T'*V'*X ==> dwork'*W ==>
              * dwork + pm*nb = ((T' * V') * X) = dwork' * X = dwork' * W */
             trace_gpu_start( 0, 2, "gemm", "work = T'*V'*X" );
             magma_sgemm(MagmaTrans, MagmaNoTrans, pk, pk, pm,
                         c_one, dwork, pm, 
                         dW, pm,
                         c_zero, dwork + pm*nb, nb);
             trace_gpu_end( 0, 2 );
             
             /* W = X - 0.5 * V * T'*V'*X
              *   = X - 0.5 * V * (dwork + pm*nb) = W - 0.5 * V * (dwork + pm*nb) */
             trace_gpu_start( 0, 2, "gemm", "W = X - 0.5*V*(T'*V'*X)" );
             magma_sgemm(MagmaNoTrans, MagmaNoTrans, pm, pk, pk,
                         c_neg_half, da_ref(indi, indj), ldda,
                         dwork + pm*nb, nb, 
                         c_one,     dW, pm);
             trace_gpu_end( 0, 2 );

             /* ==========================================================
                Update the unreduced submatrix A(i+ib:n,i+ib:n), using   
                an update of the form:  A := A - V*W' - W*V' 
                ==========================================================  */
             if (i + nb <= n-nb){
                 /* There would be next iteration;
                    do lookahead - update the next panel */
                 trace_gpu_start( 0, 2, "gemm", "gemm 4 next panel left" );
                 magma_sgemm(MagmaNoTrans, MagmaTrans, pm, pn, pn, c_neg_one,
                             da_ref(indi, indj), ldda,
                             dW                , pm, c_one,
                             da_ref(indi, indi), ldda);
                 trace_gpu_end( 0, 2 );
             
                 trace_gpu_start( 0, 2, "gemm", "gemm 5 next panel right" );
                 magma_sgemm(MagmaNoTrans, MagmaTrans, pm, pn, pn, c_neg_one,
                             dW                , pm,
                             da_ref(indi, indj), ldda, c_one,
                             da_ref(indi, indi), ldda);
                 trace_gpu_end( 0, 2 );
                 cudaEventRecord(Pupdate_event, stream[0]);
             }
             else {
                 /* no look-ahead as this is last iteration */
                 trace_gpu_start( 0, 2, "syr2k", "syr2k last iteration" );
                 magma_ssyr2k(MagmaLower, MagmaNoTrans, pk, pk, c_neg_one,
                              da_ref(indi, indj), ldda,
                              dW                , pm, d_one,
                              da_ref(indi, indi), ldda);
                 trace_gpu_end( 0, 2 );
             }
             
             indi_old = indi;
             indj_old = indj;
             pm_old   = pm;
             pn_old   = pn;
        }  // end loop for(i)

        /* Send the last block to the CPU */
        pk = min(pm,pn);
        if (1 <= n-nb){
            spanel_to_q(MagmaUpper, pk-1, a_ref(n-pk+1, n-pk+2), lda, work);
            trace_gpu_start( 0, 2, "get", "get last block" );
            magma_sgetmatrix( pk, pk,
                              da_ref(n-pk+1, n-pk+1), ldda,
                              a_ref(n-pk+1, n-pk+1),  lda );
            trace_gpu_end( 0, 2 );
            sq_to_panel(MagmaUpper, pk-1, a_ref(n-pk+1, n-pk+2), lda, work);
        }
    }// end of LOWER
    
    trace_finalize( "ssytrd_sy2sb.svg", "trace.css" );

    cudaEventDestroy(Pupdate_event);
    magma_queue_destroy( stream[0] );
    magma_queue_destroy( stream[1] );
    magma_free( da );
    MAGMA_S_SET2REAL( work[0], lwkopt );
    magmablasSetKernelStream( 0 );
    
#if defined(USEMKL)
    mkl_set_num_threads(1);
#endif
#if defined(USEACML)
    omp_set_num_threads(1);
#endif
    

    return *info;
} /* ssytrd_sy2sb_ */
Exemple #23
0
/* Subroutine */ int sgqrts_(integer *n, integer *m, integer *p, real *a, 
	real *af, real *q, real *r__, integer *lda, real *taua, real *b, real 
	*bf, real *z__, real *t, real *bwk, integer *ldb, real *taub, real *
	work, integer *lwork, real *rwork, real *result)
{
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, r_dim1, r_offset, q_dim1, 
	    q_offset, b_dim1, b_offset, bf_dim1, bf_offset, t_dim1, t_offset, 
	    z_dim1, z_offset, bwk_dim1, bwk_offset, i__1, i__2;
    real r__1;

    /* Local variables */
    static integer info;
    static real unfl, resid;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    static real anorm, bnorm;
    extern /* Subroutine */ int ssyrk_(char *, char *, integer *, integer *, 
	    real *, real *, integer *, real *, real *, integer *);
    extern doublereal slamch_(char *), slange_(char *, integer *, 
	    integer *, real *, integer *, real *);
    extern /* Subroutine */ int sggqrf_(integer *, integer *, integer *, real 
	    *, integer *, real *, real *, integer *, real *, real *, integer *
	    , integer *), slacpy_(char *, integer *, integer *, real *, 
	    integer *, real *, integer *), slaset_(char *, integer *, 
	    integer *, real *, real *, real *, integer *);
    extern doublereal slansy_(char *, char *, integer *, real *, integer *, 
	    real *);
    extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real 
	    *, integer *, real *, real *, integer *, integer *), sorgrq_(
	    integer *, integer *, integer *, real *, integer *, real *, real *
	    , integer *, integer *);
    static real ulp;


#define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]
#define t_ref(a_1,a_2) t[(a_2)*t_dim1 + a_1]
#define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]
#define af_ref(a_1,a_2) af[(a_2)*af_dim1 + a_1]
#define bf_ref(a_1,a_2) bf[(a_2)*bf_dim1 + a_1]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    SGQRTS tests SGGQRF, which computes the GQR factorization of an   
    N-by-M matrix A and a N-by-P matrix B: A = Q*R and B = Q*T*Z.   

    Arguments   
    =========   

    N       (input) INTEGER   
            The number of rows of the matrices A and B.  N >= 0.   

    M       (input) INTEGER   
            The number of columns of the matrix A.  M >= 0.   

    P       (input) INTEGER   
            The number of columns of the matrix B.  P >= 0.   

    A       (input) REAL array, dimension (LDA,M)   
            The N-by-M matrix A.   

    AF      (output) REAL array, dimension (LDA,N)   
            Details of the GQR factorization of A and B, as returned   
            by SGGQRF, see SGGQRF for further details.   

    Q       (output) REAL array, dimension (LDA,N)   
            The M-by-M orthogonal matrix Q.   

    R       (workspace) REAL array, dimension (LDA,MAX(M,N))   

    LDA     (input) INTEGER   
            The leading dimension of the arrays A, AF, R and Q.   
            LDA >= max(M,N).   

    TAUA    (output) REAL array, dimension (min(M,N))   
            The scalar factors of the elementary reflectors, as returned   
            by SGGQRF.   

    B       (input) REAL array, dimension (LDB,P)   
            On entry, the N-by-P matrix A.   

    BF      (output) REAL array, dimension (LDB,N)   
            Details of the GQR factorization of A and B, as returned   
            by SGGQRF, see SGGQRF for further details.   

    Z       (output) REAL array, dimension (LDB,P)   
            The P-by-P orthogonal matrix Z.   

    T       (workspace) REAL array, dimension (LDB,max(P,N))   

    BWK     (workspace) REAL array, dimension (LDB,N)   

    LDB     (input) INTEGER   
            The leading dimension of the arrays B, BF, Z and T.   
            LDB >= max(P,N).   

    TAUB    (output) REAL array, dimension (min(P,N))   
            The scalar factors of the elementary reflectors, as returned   
            by SGGRQF.   

    WORK    (workspace) REAL array, dimension (LWORK)   

    LWORK   (input) INTEGER   
            The dimension of the array WORK, LWORK >= max(N,M,P)**2.   

    RWORK   (workspace) REAL array, dimension (max(N,M,P))   

    RESULT  (output) REAL array, dimension (4)   
            The test ratios:   
              RESULT(1) = norm( R - Q'*A ) / ( MAX(M,N)*norm(A)*ULP)   
              RESULT(2) = norm( T*Z - Q'*B ) / (MAX(P,N)*norm(B)*ULP)   
              RESULT(3) = norm( I - Q'*Q ) / ( M*ULP )   
              RESULT(4) = norm( I - Z'*Z ) / ( P*ULP )   

    =====================================================================   


       Parameter adjustments */
    r_dim1 = *lda;
    r_offset = 1 + r_dim1 * 1;
    r__ -= r_offset;
    q_dim1 = *lda;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    af_dim1 = *lda;
    af_offset = 1 + af_dim1 * 1;
    af -= af_offset;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --taua;
    bwk_dim1 = *ldb;
    bwk_offset = 1 + bwk_dim1 * 1;
    bwk -= bwk_offset;
    t_dim1 = *ldb;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    z_dim1 = *ldb;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    bf_dim1 = *ldb;
    bf_offset = 1 + bf_dim1 * 1;
    bf -= bf_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --taub;
    --work;
    --rwork;
    --result;

    /* Function Body */
    ulp = slamch_("Precision");
    unfl = slamch_("Safe minimum");

/*     Copy the matrix A to the array AF. */

    slacpy_("Full", n, m, &a[a_offset], lda, &af[af_offset], lda);
    slacpy_("Full", n, p, &b[b_offset], ldb, &bf[bf_offset], ldb);

/* Computing MAX */
    r__1 = slange_("1", n, m, &a[a_offset], lda, &rwork[1]);
    anorm = dmax(r__1,unfl);
/* Computing MAX */
    r__1 = slange_("1", n, p, &b[b_offset], ldb, &rwork[1]);
    bnorm = dmax(r__1,unfl);

/*     Factorize the matrices A and B in the arrays AF and BF. */

    sggqrf_(n, m, p, &af[af_offset], lda, &taua[1], &bf[bf_offset], ldb, &
	    taub[1], &work[1], lwork, &info);

/*     Generate the N-by-N matrix Q */

    slaset_("Full", n, n, &c_b9, &c_b9, &q[q_offset], lda);
    i__1 = *n - 1;
    slacpy_("Lower", &i__1, m, &af_ref(2, 1), lda, &q_ref(2, 1), lda);
    i__1 = min(*n,*m);
    sorgqr_(n, n, &i__1, &q[q_offset], lda, &taua[1], &work[1], lwork, &info);

/*     Generate the P-by-P matrix Z */

    slaset_("Full", p, p, &c_b9, &c_b9, &z__[z_offset], ldb);
    if (*n <= *p) {
	if (*n > 0 && *n < *p) {
	    i__1 = *p - *n;
	    slacpy_("Full", n, &i__1, &bf[bf_offset], ldb, &z___ref(*p - *n + 
		    1, 1), ldb);
	}
	if (*n > 1) {
	    i__1 = *n - 1;
	    i__2 = *n - 1;
	    slacpy_("Lower", &i__1, &i__2, &bf_ref(2, *p - *n + 1), ldb, &
		    z___ref(*p - *n + 2, *p - *n + 1), ldb);
	}
    } else {
	if (*p > 1) {
	    i__1 = *p - 1;
	    i__2 = *p - 1;
	    slacpy_("Lower", &i__1, &i__2, &bf_ref(*n - *p + 2, 1), ldb, &
		    z___ref(2, 1), ldb);
	}
    }
    i__1 = min(*n,*p);
    sorgrq_(p, p, &i__1, &z__[z_offset], ldb, &taub[1], &work[1], lwork, &
	    info);

/*     Copy R */

    slaset_("Full", n, m, &c_b19, &c_b19, &r__[r_offset], lda);
    slacpy_("Upper", n, m, &af[af_offset], lda, &r__[r_offset], lda);

/*     Copy T */

    slaset_("Full", n, p, &c_b19, &c_b19, &t[t_offset], ldb);
    if (*n <= *p) {
	slacpy_("Upper", n, n, &bf_ref(1, *p - *n + 1), ldb, &t_ref(1, *p - *
		n + 1), ldb);
    } else {
	i__1 = *n - *p;
	slacpy_("Full", &i__1, p, &bf[bf_offset], ldb, &t[t_offset], ldb);
	slacpy_("Upper", p, p, &bf_ref(*n - *p + 1, 1), ldb, &t_ref(*n - *p + 
		1, 1), ldb);
    }

/*     Compute R - Q'*A */

    sgemm_("Transpose", "No transpose", n, m, n, &c_b30, &q[q_offset], lda, &
	    a[a_offset], lda, &c_b31, &r__[r_offset], lda);

/*     Compute norm( R - Q'*A ) / ( MAX(M,N)*norm(A)*ULP ) . */

    resid = slange_("1", n, m, &r__[r_offset], lda, &rwork[1]);
    if (anorm > 0.f) {
/* Computing MAX */
	i__1 = max(1,*m);
	result[1] = resid / (real) max(i__1,*n) / anorm / ulp;
    } else {
	result[1] = 0.f;
    }

/*     Compute T*Z - Q'*B */

    sgemm_("No Transpose", "No transpose", n, p, p, &c_b31, &t[t_offset], ldb,
	     &z__[z_offset], ldb, &c_b19, &bwk[bwk_offset], ldb);
    sgemm_("Transpose", "No transpose", n, p, n, &c_b30, &q[q_offset], lda, &
	    b[b_offset], ldb, &c_b31, &bwk[bwk_offset], ldb);

/*     Compute norm( T*Z - Q'*B ) / ( MAX(P,N)*norm(A)*ULP ) . */

    resid = slange_("1", n, p, &bwk[bwk_offset], ldb, &rwork[1]);
    if (bnorm > 0.f) {
/* Computing MAX */
	i__1 = max(1,*p);
	result[2] = resid / (real) max(i__1,*n) / bnorm / ulp;
    } else {
	result[2] = 0.f;
    }

/*     Compute I - Q'*Q */

    slaset_("Full", n, n, &c_b19, &c_b31, &r__[r_offset], lda);
    ssyrk_("Upper", "Transpose", n, n, &c_b30, &q[q_offset], lda, &c_b31, &
	    r__[r_offset], lda);

/*     Compute norm( I - Q'*Q ) / ( N * ULP ) . */

    resid = slansy_("1", "Upper", n, &r__[r_offset], lda, &rwork[1]);
    result[3] = resid / (real) max(1,*n) / ulp;

/*     Compute I - Z'*Z */

    slaset_("Full", p, p, &c_b19, &c_b31, &t[t_offset], ldb);
    ssyrk_("Upper", "Transpose", p, p, &c_b30, &z__[z_offset], ldb, &c_b31, &
	    t[t_offset], ldb);

/*     Compute norm( I - Z'*Z ) / ( P*ULP ) . */

    resid = slansy_("1", "Upper", p, &t[t_offset], ldb, &rwork[1]);
    result[4] = resid / (real) max(1,*p) / ulp;

    return 0;

/*     End of SGQRTS */

} /* sgqrts_ */
Exemple #24
0
magma_int_t magma_cungqr_2stage_gpu(magma_int_t m, magma_int_t n, magma_int_t k,
                 magmaFloatComplex *da, magma_int_t ldda,
                 magmaFloatComplex *tau, magmaFloatComplex *dT,
                 magma_int_t nb, magma_int_t *info)
{
/*  -- MAGMA (version 1.4.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       August 2013

    Purpose
    =======
    CUNGQR generates an M-by-N COMPLEX matrix Q with orthonormal columns,
    which is defined as the first N columns of a product of K elementary
    reflectors of order M

          Q  =  H(1) H(2) . . . H(k)

    as returned by CGEQRF_GPU.

    Arguments
    =========
    M       (input) INTEGER
            The number of rows of the matrix Q. M >= 0.

    N       (input) INTEGER
            The number of columns of the matrix Q. M >= N >= 0.

    K       (input) INTEGER
            The number of elementary reflectors whose product defines the
            matrix Q. N >= K >= 0.

    DA      (input/output) COMPLEX array A on the GPU device,
            dimension (LDDA,N). On entry, the i-th column must contain
            the vector which defines the elementary reflector H(i), for
            i = 1,2,...,k, as returned by CGEQRF_GPU in the first k
            columns of its array argument A.
            On exit, the M-by-N matrix Q.

    LDDA    (input) INTEGER
            The first dimension of the array A. LDDA >= max(1,M).

    TAU     (input) COMPLEX array, dimension (K)
            TAU(i) must contain the scalar factor of the elementary
            reflector H(i), as returned by CGEQRF_GPU.

    DT      (input) COMPLEX work space array on the GPU device,
            dimension (MIN(M, N) )*NB.
            This must be the 6th argument of magma_cgeqrf_gpu
            [ note that if N here is bigger than N in magma_cgeqrf_gpu,
              the workspace requirement DT in magma_cgeqrf_gpu must be
              as specified in this routine ].

    NB      (input) INTEGER
            This is the block size used in CGEQRF_GPU, and correspondingly
            the size of the T matrices, used in the factorization, and
            stored in DT.

    INFO    (output) INTEGER
            = 0:  successful exit
            < 0:  if INFO = -i, the i-th argument has an illegal value
    =====================================================================    */

    #define da_ref(a_1,a_2) (da+(a_2)*(ldda) + (a_1))
    #define t_ref(a_1)      (dT+(a_1)*nb)

    magma_int_t  i__1, i__2, i__3;
    //magma_int_t lwork;
    magma_int_t i, ib, ki, kk;  //, iinfo;
    //magma_int_t lddwork = min(m, n);
    //magmaFloatComplex *work, *panel;
    magmaFloatComplex *dwork;
    //magma_queue_t stream[2];
    magma_int_t ldt=nb; // need to be an input parameter

    *info = 0;
    if (m < 0) {
        *info = -1;
    } else if ((n < 0) || (n > m)) {
        *info = -2;
    } else if ((k < 0) || (k > n)) {
        *info = -3;
    } else if (ldda < max(1,m)) {
        *info = -5;
    }
    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    if (n <= 0)
        return *info;

    if(MAGMA_SUCCESS != magma_cmalloc( &dwork, n*nb )) {
        printf ("!!!! cungqr_2stage magma_alloc failed for: dwork\n" );
        exit(-1);
    }

    if ( (nb > 1) && (nb < k) ) {
        /*  Use blocked code after the last block.
            The first kk columns are handled by the block method.
            ki is start of 2nd-to-last block. */
        ki = (k - nb - 1) / nb * nb;
        kk = min(k, ki + nb);

        /* Set A(1:kk,kk+1:n) to zero. */
        magmablas_claset(MagmaUpperLower, kk, n-kk, da_ref(0,kk), ldda);
        /* A(kk+1:m, kk+1:n) = I */
        magmablas_claset_identity(m-kk, n-kk, da_ref(kk,kk), ldda);
    }
    else {
        ki = 0;
        kk = 0;
    }
    
    /* Allocate work space on CPU in pinned memory */
    //lwork = (n+m) * nb;
    //if (kk < n)
    //  lwork = max(lwork, n * nb + (m-kk)*(n-kk));

    //if (MAGMA_SUCCESS != magma_cmalloc_pinned( &work, (lwork) )) {
    //    *info = MAGMA_ERR_HOST_ALLOC;
    //    return *info;
    //}
    //panel = work + n * nb;

    //magma_queue_create( &stream[0] );
    //magma_queue_create( &stream[1] );
    /* Use unblocked code for the last or only block. */
    if (kk < n) {
        i__1 = m - kk;
        i__2 = n - kk;
        i__3 = k - kk;
        //cublasGetMatrix(i__1, i__2, sizeof(magmaFloatComplex),
        //                da_ref(kk, kk), ldda, panel, i__1);
        //lapackf77_cungqr(&i__1, &i__2, &i__3, panel, &i__1, &tau[kk],
        //                 work, &lwork, &iinfo);
        //
        //cublasSetMatrix(i__1, i__2, sizeof(magmaFloatComplex),
        //              panel, i__1, da_ref(kk, kk), ldda);
        
        magma_clarfb_gpu( MagmaLeft, MagmaNoTrans, MagmaForward, MagmaColumnwise,
                          i__1, i__2, i__3,
                          da_ref(kk, kk-nb), ldda, t_ref(kk-nb),          ldt,
                                  da_ref(kk, kk), ldda, dwork, i__2);
        
        //magmablas_claset(MagmaUpperLower, kk-nb, nb, da_ref(0,kk-nb), ldda);
        //magmablas_claset_identity(m-(kk-nb), nb, da_ref(kk-nb,kk-nb), ldda);
    }

    if (kk > 0) {
        /* Use blocked code */
        for (i = ki; i >= nb; i-=nb) {
            ib = min(nb, k - i);
            /* Send current panel to the CPU for update */
            i__2 = m - i;
            //cudaMemcpy2DAsync(panel,       i__2 * sizeof(magmaFloatComplex),
            //                  da_ref(i,i), ldda * sizeof(magmaFloatComplex),
            //                  sizeof(magmaFloatComplex)*i__2, ib,
            //                  cudaMemcpyDeviceToHost,stream[0]);
            if (i + ib < n) {
                /* Apply H to A(i:m,i+ib:n) from the left */
                i__3 = n - i;

                magmablas_claset(MagmaUpperLower, i, ib, da_ref(0,i), ldda);
                magmablas_claset_identity(m-i, ib, da_ref(i,i), ldda);

                magma_clarfb_gpu( MagmaLeft, MagmaNoTrans, MagmaForward, MagmaColumnwise,
                                  i__2, i__3, ib,
                                  da_ref(i, i-nb), ldda, t_ref(i-nb),             ldt,
                                  da_ref(i, i), ldda, dwork, i__3);
            }

            /* Apply H to rows i:m of current block on the CPU */
            //magma_queue_sync( stream[0] );
            //lapackf77_cungqr(&i__2, &ib, &ib, panel, &i__2, &tau[i],
            //                 work, &lwork, &iinfo);
            //cudaMemcpy2DAsync(da_ref(i,i), ldda * sizeof(magmaFloatComplex),
            //                  panel,       i__2 * sizeof(magmaFloatComplex),
            //                  sizeof(magmaFloatComplex)*i__2, ib,
            //                  cudaMemcpyHostToDevice,stream[1]);

            /* Set rows 1:i-1 of current block to zero */
            i__2 = i + ib;
            //magmablas_claset(MagmaUpperLower, i-ib, ib, da_ref(0,i-ib), ldda);
            //magmablas_claset_identity(m-(i-ib), ib, da_ref(i-ib,i-ib), ldda);
        }
    }

    magmablas_claset_identity(m, nb, da_ref(0,0), ldda);

    magma_free( dwork );
    //magma_free_pinned( work );
    //magma_queue_destroy( stream[0] );
    //magma_queue_destroy( stream[1] );

    return *info;
} /* magma_cungqr_gpu */
/* Subroutine */ int dlaexc_(logical *wantq, integer *n, doublereal *t, 
	integer *ldt, doublereal *q, integer *ldq, integer *j1, integer *n1, 
	integer *n2, doublereal *work, integer *info)
{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    DLAEXC swaps adjacent diagonal blocks T11 and T22 of order 1 or 2 in   
    an upper quasi-triangular matrix T by an orthogonal similarity   
    transformation.   

    T must be in Schur canonical form, that is, block upper triangular   
    with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block   
    has its diagonal elemnts equal and its off-diagonal elements of   
    opposite sign.   

    Arguments   
    =========   

    WANTQ   (input) LOGICAL   
            = .TRUE. : accumulate the transformation in the matrix Q;   
            = .FALSE.: do not accumulate the transformation.   

    N       (input) INTEGER   
            The order of the matrix T. N >= 0.   

    T       (input/output) DOUBLE PRECISION array, dimension (LDT,N)   
            On entry, the upper quasi-triangular matrix T, in Schur   
            canonical form.   
            On exit, the updated matrix T, again in Schur canonical form.   

    LDT     (input)  INTEGER   
            The leading dimension of the array T. LDT >= max(1,N).   

    Q       (input/output) DOUBLE PRECISION array, dimension (LDQ,N)   
            On entry, if WANTQ is .TRUE., the orthogonal matrix Q.   
            On exit, if WANTQ is .TRUE., the updated matrix Q.   
            If WANTQ is .FALSE., Q is not referenced.   

    LDQ     (input) INTEGER   
            The leading dimension of the array Q.   
            LDQ >= 1; and if WANTQ is .TRUE., LDQ >= N.   

    J1      (input) INTEGER   
            The index of the first row of the first block T11.   

    N1      (input) INTEGER   
            The order of the first block T11. N1 = 0, 1 or 2.   

    N2      (input) INTEGER   
            The order of the second block T22. N2 = 0, 1 or 2.   

    WORK    (workspace) DOUBLE PRECISION array, dimension (N)   

    INFO    (output) INTEGER   
            = 0: successful exit   
            = 1: the transformed matrix T would be too far from Schur   
                 form; the blocks are not swapped and T and Q are   
                 unchanged.   

    =====================================================================   


       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c__4 = 4;
    static logical c_false = FALSE_;
    static integer c_n1 = -1;
    static integer c__2 = 2;
    static integer c__3 = 3;
    
    /* System generated locals */
    integer q_dim1, q_offset, t_dim1, t_offset, i__1;
    doublereal d__1, d__2, d__3, d__4, d__5, d__6;
    /* Local variables */
    static integer ierr;
    static doublereal temp;
    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, 
	    doublereal *, integer *, doublereal *, doublereal *);
    static doublereal d__[16]	/* was [4][4] */;
    static integer k;
    static doublereal u[3], scale, x[4]	/* was [2][2] */, dnorm;
    static integer j2, j3, j4;
    static doublereal xnorm, u1[3], u2[3];
    extern /* Subroutine */ int dlanv2_(doublereal *, doublereal *, 
	    doublereal *, doublereal *, doublereal *, doublereal *, 
	    doublereal *, doublereal *, doublereal *, doublereal *), dlasy2_(
	    logical *, logical *, integer *, integer *, integer *, doublereal 
	    *, integer *, doublereal *, integer *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *, doublereal *, integer *);
    static integer nd;
    static doublereal cs, t11, t22;
    extern doublereal dlamch_(char *);
    static doublereal t33;
    extern doublereal dlange_(char *, integer *, integer *, doublereal *, 
	    integer *, doublereal *);
    extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *,
	     integer *, doublereal *);
    static doublereal sn;
    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, integer *), 
	    dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, 
	    doublereal *), dlarfx_(char *, integer *, integer *, doublereal *,
	     doublereal *, doublereal *, integer *, doublereal *);
    static doublereal thresh, smlnum, wi1, wi2, wr1, wr2, eps, tau, tau1, 
	    tau2;
#define d___ref(a_1,a_2) d__[(a_2)*4 + a_1 - 5]
#define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]
#define t_ref(a_1,a_2) t[(a_2)*t_dim1 + a_1]
#define x_ref(a_1,a_2) x[(a_2)*2 + a_1 - 3]


    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    --work;

    /* Function Body */
    *info = 0;

/*     Quick return if possible */

    if (*n == 0 || *n1 == 0 || *n2 == 0) {
	return 0;
    }
    if (*j1 + *n1 > *n) {
	return 0;
    }

    j2 = *j1 + 1;
    j3 = *j1 + 2;
    j4 = *j1 + 3;

    if (*n1 == 1 && *n2 == 1) {

/*        Swap two 1-by-1 blocks. */

	t11 = t_ref(*j1, *j1);
	t22 = t_ref(j2, j2);

/*        Determine the transformation to perform the interchange. */

	d__1 = t22 - t11;
	dlartg_(&t_ref(*j1, j2), &d__1, &cs, &sn, &temp);

/*        Apply transformation to the matrix T. */

	if (j3 <= *n) {
	    i__1 = *n - *j1 - 1;
	    drot_(&i__1, &t_ref(*j1, j3), ldt, &t_ref(j2, j3), ldt, &cs, &sn);
	}
	i__1 = *j1 - 1;
	drot_(&i__1, &t_ref(1, *j1), &c__1, &t_ref(1, j2), &c__1, &cs, &sn);

	t_ref(*j1, *j1) = t22;
	t_ref(j2, j2) = t11;

	if (*wantq) {

/*           Accumulate transformation in the matrix Q. */

	    drot_(n, &q_ref(1, *j1), &c__1, &q_ref(1, j2), &c__1, &cs, &sn);
	}

    } else {

/*        Swapping involves at least one 2-by-2 block.   

          Copy the diagonal block of order N1+N2 to the local array D   
          and compute its norm. */

	nd = *n1 + *n2;
	dlacpy_("Full", &nd, &nd, &t_ref(*j1, *j1), ldt, d__, &c__4);
	dnorm = dlange_("Max", &nd, &nd, d__, &c__4, &work[1]);

/*        Compute machine-dependent threshold for test for accepting   
          swap. */

	eps = dlamch_("P");
	smlnum = dlamch_("S") / eps;
/* Computing MAX */
	d__1 = eps * 10. * dnorm;
	thresh = max(d__1,smlnum);

/*        Solve T11*X - X*T22 = scale*T12 for X. */

	dlasy2_(&c_false, &c_false, &c_n1, n1, n2, d__, &c__4, &d___ref(*n1 + 
		1, *n1 + 1), &c__4, &d___ref(1, *n1 + 1), &c__4, &scale, x, &
		c__2, &xnorm, &ierr);

/*        Swap the adjacent diagonal blocks. */

	k = *n1 + *n1 + *n2 - 3;
	switch (k) {
	    case 1:  goto L10;
	    case 2:  goto L20;
	    case 3:  goto L30;
	}

L10:

/*        N1 = 1, N2 = 2: generate elementary reflector H so that:   

          ( scale, X11, X12 ) H = ( 0, 0, * ) */

	u[0] = scale;
	u[1] = x_ref(1, 1);
	u[2] = x_ref(1, 2);
	dlarfg_(&c__3, &u[2], u, &c__1, &tau);
	u[2] = 1.;
	t11 = t_ref(*j1, *j1);

/*        Perform swap provisionally on diagonal block in D. */

	dlarfx_("L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]);
	dlarfx_("R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]);

/*        Test whether to reject swap.   

   Computing MAX */
	d__4 = (d__1 = d___ref(3, 1), abs(d__1)), d__5 = (d__2 = d___ref(3, 2)
		, abs(d__2)), d__4 = max(d__4,d__5), d__5 = (d__3 = d___ref(3,
		 3) - t11, abs(d__3));
	if (max(d__4,d__5) > thresh) {
	    goto L50;
	}

/*        Accept swap: apply transformation to the entire matrix T. */

	i__1 = *n - *j1 + 1;
	dlarfx_("L", &c__3, &i__1, u, &tau, &t_ref(*j1, *j1), ldt, &work[1]);
	dlarfx_("R", &j2, &c__3, u, &tau, &t_ref(1, *j1), ldt, &work[1]);

	t_ref(j3, *j1) = 0.;
	t_ref(j3, j2) = 0.;
	t_ref(j3, j3) = t11;

	if (*wantq) {

/*           Accumulate transformation in the matrix Q. */

	    dlarfx_("R", n, &c__3, u, &tau, &q_ref(1, *j1), ldq, &work[1]);
	}
	goto L40;

L20:

/*        N1 = 2, N2 = 1: generate elementary reflector H so that:   

          H (  -X11 ) = ( * )   
            (  -X21 ) = ( 0 )   
            ( scale ) = ( 0 ) */

	u[0] = -x_ref(1, 1);
	u[1] = -x_ref(2, 1);
	u[2] = scale;
	dlarfg_(&c__3, u, &u[1], &c__1, &tau);
	u[0] = 1.;
	t33 = t_ref(j3, j3);

/*        Perform swap provisionally on diagonal block in D. */

	dlarfx_("L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]);
	dlarfx_("R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]);

/*        Test whether to reject swap.   

   Computing MAX */
	d__4 = (d__1 = d___ref(2, 1), abs(d__1)), d__5 = (d__2 = d___ref(3, 1)
		, abs(d__2)), d__4 = max(d__4,d__5), d__5 = (d__3 = d___ref(1,
		 1) - t33, abs(d__3));
	if (max(d__4,d__5) > thresh) {
	    goto L50;
	}

/*        Accept swap: apply transformation to the entire matrix T. */

	dlarfx_("R", &j3, &c__3, u, &tau, &t_ref(1, *j1), ldt, &work[1]);
	i__1 = *n - *j1;
	dlarfx_("L", &c__3, &i__1, u, &tau, &t_ref(*j1, j2), ldt, &work[1]);

	t_ref(*j1, *j1) = t33;
	t_ref(j2, *j1) = 0.;
	t_ref(j3, *j1) = 0.;

	if (*wantq) {

/*           Accumulate transformation in the matrix Q. */

	    dlarfx_("R", n, &c__3, u, &tau, &q_ref(1, *j1), ldq, &work[1]);
	}
	goto L40;

L30:

/*        N1 = 2, N2 = 2: generate elementary reflectors H(1) and H(2) so   
          that:   

          H(2) H(1) (  -X11  -X12 ) = (  *  * )   
                    (  -X21  -X22 )   (  0  * )   
                    ( scale    0  )   (  0  0 )   
                    (    0  scale )   (  0  0 ) */

	u1[0] = -x_ref(1, 1);
	u1[1] = -x_ref(2, 1);
	u1[2] = scale;
	dlarfg_(&c__3, u1, &u1[1], &c__1, &tau1);
	u1[0] = 1.;

	temp = -tau1 * (x_ref(1, 2) + u1[1] * x_ref(2, 2));
	u2[0] = -temp * u1[1] - x_ref(2, 2);
	u2[1] = -temp * u1[2];
	u2[2] = scale;
	dlarfg_(&c__3, u2, &u2[1], &c__1, &tau2);
	u2[0] = 1.;

/*        Perform swap provisionally on diagonal block in D. */

	dlarfx_("L", &c__3, &c__4, u1, &tau1, d__, &c__4, &work[1])
		;
	dlarfx_("R", &c__4, &c__3, u1, &tau1, d__, &c__4, &work[1])
		;
	dlarfx_("L", &c__3, &c__4, u2, &tau2, &d___ref(2, 1), &c__4, &work[1]);
	dlarfx_("R", &c__4, &c__3, u2, &tau2, &d___ref(1, 2), &c__4, &work[1]);

/*        Test whether to reject swap.   

   Computing MAX */
	d__5 = (d__1 = d___ref(3, 1), abs(d__1)), d__6 = (d__2 = d___ref(3, 2)
		, abs(d__2)), d__5 = max(d__5,d__6), d__6 = (d__3 = d___ref(4,
		 1), abs(d__3)), d__5 = max(d__5,d__6), d__6 = (d__4 = 
		d___ref(4, 2), abs(d__4));
	if (max(d__5,d__6) > thresh) {
	    goto L50;
	}

/*        Accept swap: apply transformation to the entire matrix T. */

	i__1 = *n - *j1 + 1;
	dlarfx_("L", &c__3, &i__1, u1, &tau1, &t_ref(*j1, *j1), ldt, &work[1]);
	dlarfx_("R", &j4, &c__3, u1, &tau1, &t_ref(1, *j1), ldt, &work[1]);
	i__1 = *n - *j1 + 1;
	dlarfx_("L", &c__3, &i__1, u2, &tau2, &t_ref(j2, *j1), ldt, &work[1]);
	dlarfx_("R", &j4, &c__3, u2, &tau2, &t_ref(1, j2), ldt, &work[1]);

	t_ref(j3, *j1) = 0.;
	t_ref(j3, j2) = 0.;
	t_ref(j4, *j1) = 0.;
	t_ref(j4, j2) = 0.;

	if (*wantq) {

/*           Accumulate transformation in the matrix Q. */

	    dlarfx_("R", n, &c__3, u1, &tau1, &q_ref(1, *j1), ldq, &work[1]);
	    dlarfx_("R", n, &c__3, u2, &tau2, &q_ref(1, j2), ldq, &work[1]);
	}

L40:

	if (*n2 == 2) {

/*           Standardize new 2-by-2 block T11 */

	    dlanv2_(&t_ref(*j1, *j1), &t_ref(*j1, j2), &t_ref(j2, *j1), &
		    t_ref(j2, j2), &wr1, &wi1, &wr2, &wi2, &cs, &sn);
	    i__1 = *n - *j1 - 1;
	    drot_(&i__1, &t_ref(*j1, *j1 + 2), ldt, &t_ref(j2, *j1 + 2), ldt, 
		    &cs, &sn);
	    i__1 = *j1 - 1;
	    drot_(&i__1, &t_ref(1, *j1), &c__1, &t_ref(1, j2), &c__1, &cs, &
		    sn);
	    if (*wantq) {
		drot_(n, &q_ref(1, *j1), &c__1, &q_ref(1, j2), &c__1, &cs, &
			sn);
	    }
	}

	if (*n1 == 2) {

/*           Standardize new 2-by-2 block T22 */

	    j3 = *j1 + *n2;
	    j4 = j3 + 1;
	    dlanv2_(&t_ref(j3, j3), &t_ref(j3, j4), &t_ref(j4, j3), &t_ref(j4,
		     j4), &wr1, &wi1, &wr2, &wi2, &cs, &sn);
	    if (j3 + 2 <= *n) {
		i__1 = *n - j3 - 1;
		drot_(&i__1, &t_ref(j3, j3 + 2), ldt, &t_ref(j4, j3 + 2), ldt,
			 &cs, &sn);
	    }
	    i__1 = j3 - 1;
	    drot_(&i__1, &t_ref(1, j3), &c__1, &t_ref(1, j4), &c__1, &cs, &sn)
		    ;
	    if (*wantq) {
		drot_(n, &q_ref(1, j3), &c__1, &q_ref(1, j4), &c__1, &cs, &sn)
			;
	    }
	}

    }
    return 0;

/*     Exit with INFO = 1 if swap was rejected. */

L50:
    *info = 1;
    return 0;

/*     End of DLAEXC */

} /* dlaexc_ */
Exemple #26
0
extern "C" int
magma_ztsqrt_gpu(int *m, int *n,
                 magmaDoubleComplex *a1, magmaDoubleComplex *a2, int  *lda,
                 magmaDoubleComplex  *tau, magmaDoubleComplex *work,
                 int *lwork, magmaDoubleComplex *dwork, int *info )
{
/*  -- MAGMA (version 1.4.1) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       December 2013

    Purpose
    =======
    ZGEQRF computes a QR factorization of a complex M-by-N matrix A:
    A = Q * R.

    Arguments
    =========
    M       (input) INTEGER
            The number of rows of the matrix A.  M >= 0.

    N       (input) INTEGER
            The number of columns of the matrix A.  N >= 0.

    A       (input/output) COMPLEX_16 array on the GPU, dimension (LDA,N)
            On entry, the M-by-N matrix A.
            On exit, the elements on and above the diagonal of the array
            contain the min(M,N)-by-N upper trapezoidal matrix R (R is
            upper triangular if m >= n); the elements below the diagonal,
            with the array TAU, represent the orthogonal matrix Q as a
            product of min(m,n) elementary reflectors (see Further
            Details).

    LDA     (input) INTEGER
            The leading dimension of the array A.  LDA >= max(1,M).

    TAU     (output) COMPLEX_16 array, dimension (min(M,N))
            The scalar factors of the elementary reflectors (see Further
            Details).

    WORK    (workspace/output) COMPLEX_16 array, dimension (MAX(1,LWORK))
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.

            Higher performance is achieved if WORK is in pinned memory, e.g.
            allocated using magma_malloc_pinned.

    LWORK   (input) INTEGER
            The dimension of the array WORK.  LWORK >= (M+N+NB)*NB,
            where NB can be obtained through magma_get_zgeqrf_nb(M).

            If LWORK = -1, then a workspace query is assumed; the routine
            only calculates the optimal size of the WORK array, returns
            this value as the first entry of the WORK array, and no error
            message related to LWORK is issued.

    DWORK   (workspace/output)  COMPLEX_16 array on the GPU, dimension 2*N*NB,
            where NB can be obtained through magma_get_zgeqrf_nb(M).
            It starts with NB*NB blocks that store the triangular T
            matrices, followed by the NB*NB blocks of the diagonal
            inverses for the R matrix.

    INFO    (output) INTEGER
            = 0:  successful exit
            < 0:  if INFO = -i, the i-th argument had an illegal value

    Further Details
    ===============
    The matrix Q is represented as a product of elementary reflectors

        Q = H(1) H(2) . . . H(k), where k = min(m,n).

    Each H(i) has the form

        H(i) = I - tau * v * v'

    where tau is a complex scalar, and v is a complex vector with
    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
    and tau in TAU(i).
    =====================================================================    */

    #define a1_ref(a_1,a_2) ( a1+(a_2)*(*lda) + (a_1))
    #define a2_ref(a_1,a_2) ( a2+(a_2)*(*lda) + (a_1))
    #define t_ref(a_1)     (dwork+(a_1))
    #define d_ref(a_1)     (dwork+(lddwork+(a_1))*nb)
    #define dd_ref(a_1)    (dwork+(2*lddwork+(a_1))*nb)
    #define work_a1        ( work )
    #define work_a2        ( work + nb )
    #define hwork          ( work + (nb)*(*m))
    
    int i, k, ldwork, lddwork, old_i, old_ib, rows, cols;
    int nbmin, ib, ldda;
    
    /* Function Body */
    *info = 0;
    int nb = magma_get_zgeqrf_nb(*m);
    
    int lwkopt = (*n+*m) * nb;
    work[0] = (magmaDoubleComplex) lwkopt;
    int lquery = *lwork == -1;
    if (*m < 0) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*lda < max(1,*m)) {
        *info = -4;
    } else if (*lwork < max(1,*n) && ! lquery) {
        *info = -7;
    }
    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }
    else if (lquery)
        return *info;
    
    k = min(*m,*n);
    if (k == 0) {
        work[0] = 1.f;
        return *info;
    }
    
    int lhwork = *lwork - (*m)*nb;
    
    magma_queue_t stream[2];
    magma_queue_create( &stream[0] );
    magma_queue_create( &stream[1] );
    
    ldda = *m;
    nbmin = 2;
    ldwork = *m;
    lddwork= k;
    
    // This is only blocked code for now
    for (i = 0; i < k; i += nb) {
        ib = min(k-i, nb);
        rows = *m -i;
        rows = *m;
        // Send the next panel (diagonal block of A1 & block column of A2)
        // to the CPU (in work_a1 and work_a2)
        magma_zgetmatrix_async( rows, ib,
                                a2_ref(0,i), (*lda),
                                work_a2,     ldwork, stream[1] );
        
                            // a1_ref(i,i), (*lda)*sizeof(magmaDoubleComplex),
                            // the diagonal of a1 is in d_ref generated and
                            // passed from magma_zgeqrf_gpu
        magma_zgetmatrix_async( ib, ib,
                                d_ref(i), ib,
                                work_a1,  ldwork, stream[1] );
        
        if (i>0) {
            /* Apply H' to A(i:m,i+2*ib:n) from the left */
            // update T2
            cols = *n-old_i-2*old_ib;
            magma_zssrfb(*m, cols, &old_ib,
                         a2_ref(    0, old_i), lda, t_ref(old_i), &lddwork,
                         a1_ref(old_i, old_i+2*old_ib), lda,
                         a2_ref(    0, old_i+2*old_ib), lda,
                         dd_ref(0), &lddwork);
        }
        
        magma_queue_sync( stream[1] );
        
        // TTT - here goes the CPU PLASMA code
        //       Matrix T has to be put in hwork with lda = ib and 0s
        //       in the parts that are not used - copied on GPU in t_ref(i)
        
        // Now diag of A1 is updated, send it back asynchronously to the GPU.
        // We have to play interchaning these copies to see which is faster
        magma_zsetmatrix_async( ib, ib,
                                work_a1,  ib,
                                d_ref(i), ib, stream[0] );
        // Send the panel from A2 back to the GPU
        magma_zsetmatrix( *m, ib, work_a2, ldwork, a2_ref(0,i), *lda );
        
        if (i + ib < *n) {
            // Send the triangular factor T from hwork to the GPU in t_ref(i)
            magma_zsetmatrix( ib, ib, hwork, ib, t_ref(i), lddwork );
            
            if (i+nb < k){
                /* Apply H' to A(i:m,i+ib:i+2*ib) from the left */
                // if we can do one more step, first update T1
                magma_zssrfb(*m, ib, &ib,
                             a2_ref(0, i),    lda, t_ref(i), &lddwork,
                             a1_ref(i, i+ib), lda,
                             a2_ref(0, i+ib), lda,
                             dd_ref(0), &lddwork);
            }
            else {
                cols = *n-i-ib;
                // otherwise, update until the end and fix the panel
                magma_zssrfb(*m, cols, &ib,
                             a2_ref(0, i),    lda, t_ref(i), &lddwork,
                             a1_ref(i, i+ib), lda,
                             a2_ref(0, i+ib), lda,
                             dd_ref(0), &lddwork);
            }
            old_i = i;
            old_ib = ib;
        }
    }
    
    return *info;
} /* magma_ztsqrt_gpu */
/* Subroutine */ int ztrevc_(char *side, char *howmny, logical *select, 
	integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl, 
	integer *ldvl, doublecomplex *vr, integer *ldvr, integer *mm, integer 
	*m, doublecomplex *work, doublereal *rwork, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    ZTREVC computes some or all of the right and/or left eigenvectors of   
    a complex upper triangular matrix T.   

    The right eigenvector x and the left eigenvector y of T corresponding   
    to an eigenvalue w are defined by:   

                 T*x = w*x,     y'*T = w*y'   

    where y' denotes the conjugate transpose of the vector y.   

    If all eigenvectors are requested, the routine may either return the   
    matrices X and/or Y of right or left eigenvectors of T, or the   
    products Q*X and/or Q*Y, where Q is an input unitary   
    matrix. If T was obtained from the Schur factorization of an   
    original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of   
    right or left eigenvectors of A.   

    Arguments   
    =========   

    SIDE    (input) CHARACTER*1   
            = 'R':  compute right eigenvectors only;   
            = 'L':  compute left eigenvectors only;   
            = 'B':  compute both right and left eigenvectors.   

    HOWMNY  (input) CHARACTER*1   
            = 'A':  compute all right and/or left eigenvectors;   
            = 'B':  compute all right and/or left eigenvectors,   
                    and backtransform them using the input matrices   
                    supplied in VR and/or VL;   
            = 'S':  compute selected right and/or left eigenvectors,   
                    specified by the logical array SELECT.   

    SELECT  (input) LOGICAL array, dimension (N)   
            If HOWMNY = 'S', SELECT specifies the eigenvectors to be   
            computed.   
            If HOWMNY = 'A' or 'B', SELECT is not referenced.   
            To select the eigenvector corresponding to the j-th   
            eigenvalue, SELECT(j) must be set to .TRUE..   

    N       (input) INTEGER   
            The order of the matrix T. N >= 0.   

    T       (input/output) COMPLEX*16 array, dimension (LDT,N)   
            The upper triangular matrix T.  T is modified, but restored   
            on exit.   

    LDT     (input) INTEGER   
            The leading dimension of the array T. LDT >= max(1,N).   

    VL      (input/output) COMPLEX*16 array, dimension (LDVL,MM)   
            On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must   
            contain an N-by-N matrix Q (usually the unitary matrix Q of   
            Schur vectors returned by ZHSEQR).   
            On exit, if SIDE = 'L' or 'B', VL contains:   
            if HOWMNY = 'A', the matrix Y of left eigenvectors of T;   
                             VL is lower triangular. The i-th column   
                             VL(i) of VL is the eigenvector corresponding   
                             to T(i,i).   
            if HOWMNY = 'B', the matrix Q*Y;   
            if HOWMNY = 'S', the left eigenvectors of T specified by   
                             SELECT, stored consecutively in the columns   
                             of VL, in the same order as their   
                             eigenvalues.   
            If SIDE = 'R', VL is not referenced.   

    LDVL    (input) INTEGER   
            The leading dimension of the array VL.  LDVL >= max(1,N) if   
            SIDE = 'L' or 'B'; LDVL >= 1 otherwise.   

    VR      (input/output) COMPLEX*16 array, dimension (LDVR,MM)   
            On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must   
            contain an N-by-N matrix Q (usually the unitary matrix Q of   
            Schur vectors returned by ZHSEQR).   
            On exit, if SIDE = 'R' or 'B', VR contains:   
            if HOWMNY = 'A', the matrix X of right eigenvectors of T;   
                             VR is upper triangular. The i-th column   
                             VR(i) of VR is the eigenvector corresponding   
                             to T(i,i).   
            if HOWMNY = 'B', the matrix Q*X;   
            if HOWMNY = 'S', the right eigenvectors of T specified by   
                             SELECT, stored consecutively in the columns   
                             of VR, in the same order as their   
                             eigenvalues.   
            If SIDE = 'L', VR is not referenced.   

    LDVR    (input) INTEGER   
            The leading dimension of the array VR.  LDVR >= max(1,N) if   
             SIDE = 'R' or 'B'; LDVR >= 1 otherwise.   

    MM      (input) INTEGER   
            The number of columns in the arrays VL and/or VR. MM >= M.   

    M       (output) INTEGER   
            The number of columns in the arrays VL and/or VR actually   
            used to store the eigenvectors.  If HOWMNY = 'A' or 'B', M   
            is set to N.  Each selected eigenvector occupies one   
            column.   

    WORK    (workspace) COMPLEX*16 array, dimension (2*N)   

    RWORK   (workspace) DOUBLE PRECISION array, dimension (N)   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   

    Further Details   
    ===============   

    The algorithm used in this program is basically backward (forward)   
    substitution, with scaling to make the the code robust against   
    possible overflow.   

    Each eigenvector is normalized so that the element of largest   
    magnitude has magnitude 1; here the magnitude of a complex number   
    (x,y) is taken to be |x| + |y|.   

    =====================================================================   


       Decode and test the input parameters   

       Parameter adjustments */
    /* Table of constant values */
    static doublecomplex c_b2 = {1.,0.};
    static integer c__1 = 1;
    
    /* System generated locals */
    integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, 
	    i__2, i__3, i__4, i__5;
    doublereal d__1, d__2, d__3;
    doublecomplex z__1, z__2;
    /* Builtin functions */
    double d_imag(doublecomplex *);
    void d_cnjg(doublecomplex *, doublecomplex *);
    /* Local variables */
    static logical allv;
    static doublereal unfl, ovfl, smin;
    static logical over;
    static integer i__, j, k;
    static doublereal scale;
    extern logical lsame_(char *, char *);
    static doublereal remax;
    static logical leftv, bothv;
    extern /* Subroutine */ int zgemv_(char *, integer *, integer *, 
	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
	    integer *, doublecomplex *, doublecomplex *, integer *);
    static logical somev;
    extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, 
	    doublecomplex *, integer *), dlabad_(doublereal *, doublereal *);
    static integer ii, ki;
    extern doublereal dlamch_(char *);
    static integer is;
    extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_(
	    integer *, doublereal *, doublecomplex *, integer *);
    extern integer izamax_(integer *, doublecomplex *, integer *);
    static logical rightv;
    extern doublereal dzasum_(integer *, doublecomplex *, integer *);
    static doublereal smlnum;
    extern /* Subroutine */ int zlatrs_(char *, char *, char *, char *, 
	    integer *, doublecomplex *, integer *, doublecomplex *, 
	    doublereal *, doublereal *, integer *);
    static doublereal ulp;
#define t_subscr(a_1,a_2) (a_2)*t_dim1 + a_1
#define t_ref(a_1,a_2) t[t_subscr(a_1,a_2)]
#define vl_subscr(a_1,a_2) (a_2)*vl_dim1 + a_1
#define vl_ref(a_1,a_2) vl[vl_subscr(a_1,a_2)]
#define vr_subscr(a_1,a_2) (a_2)*vr_dim1 + a_1
#define vr_ref(a_1,a_2) vr[vr_subscr(a_1,a_2)]


    --select;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    vl_dim1 = *ldvl;
    vl_offset = 1 + vl_dim1 * 1;
    vl -= vl_offset;
    vr_dim1 = *ldvr;
    vr_offset = 1 + vr_dim1 * 1;
    vr -= vr_offset;
    --work;
    --rwork;

    /* Function Body */
    bothv = lsame_(side, "B");
    rightv = lsame_(side, "R") || bothv;
    leftv = lsame_(side, "L") || bothv;

    allv = lsame_(howmny, "A");
    over = lsame_(howmny, "B");
    somev = lsame_(howmny, "S");

/*     Set M to the number of columns required to store the selected   
       eigenvectors. */

    if (somev) {
	*m = 0;
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    if (select[j]) {
		++(*m);
	    }
/* L10: */
	}
    } else {
	*m = *n;
    }

    *info = 0;
    if (! rightv && ! leftv) {
	*info = -1;
    } else if (! allv && ! over && ! somev) {
	*info = -2;
    } else if (*n < 0) {
	*info = -4;
    } else if (*ldt < max(1,*n)) {
	*info = -6;
    } else if (*ldvl < 1 || leftv && *ldvl < *n) {
	*info = -8;
    } else if (*ldvr < 1 || rightv && *ldvr < *n) {
	*info = -10;
    } else if (*mm < *m) {
	*info = -11;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("ZTREVC", &i__1);
	return 0;
    }

/*     Quick return if possible. */

    if (*n == 0) {
	return 0;
    }

/*     Set the constants to control overflow. */

    unfl = dlamch_("Safe minimum");
    ovfl = 1. / unfl;
    dlabad_(&unfl, &ovfl);
    ulp = dlamch_("Precision");
    smlnum = unfl * (*n / ulp);

/*     Store the diagonal elements of T in working array WORK. */

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	i__2 = i__ + *n;
	i__3 = t_subscr(i__, i__);
	work[i__2].r = t[i__3].r, work[i__2].i = t[i__3].i;
/* L20: */
    }

/*     Compute 1-norm of each column of strictly upper triangular   
       part of T to control overflow in triangular solver. */

    rwork[1] = 0.;
    i__1 = *n;
    for (j = 2; j <= i__1; ++j) {
	i__2 = j - 1;
	rwork[j] = dzasum_(&i__2, &t_ref(1, j), &c__1);
/* L30: */
    }

    if (rightv) {

/*        Compute right eigenvectors. */

	is = *m;
	for (ki = *n; ki >= 1; --ki) {

	    if (somev) {
		if (! select[ki]) {
		    goto L80;
		}
	    }
/* Computing MAX */
	    i__1 = t_subscr(ki, ki);
	    d__3 = ulp * ((d__1 = t[i__1].r, abs(d__1)) + (d__2 = d_imag(&
		    t_ref(ki, ki)), abs(d__2)));
	    smin = max(d__3,smlnum);

	    work[1].r = 1., work[1].i = 0.;

/*           Form right-hand side. */

	    i__1 = ki - 1;
	    for (k = 1; k <= i__1; ++k) {
		i__2 = k;
		i__3 = t_subscr(k, ki);
		z__1.r = -t[i__3].r, z__1.i = -t[i__3].i;
		work[i__2].r = z__1.r, work[i__2].i = z__1.i;
/* L40: */
	    }

/*           Solve the triangular system:   
                (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK. */

	    i__1 = ki - 1;
	    for (k = 1; k <= i__1; ++k) {
		i__2 = t_subscr(k, k);
		i__3 = t_subscr(k, k);
		i__4 = t_subscr(ki, ki);
		z__1.r = t[i__3].r - t[i__4].r, z__1.i = t[i__3].i - t[i__4]
			.i;
		t[i__2].r = z__1.r, t[i__2].i = z__1.i;
		i__2 = t_subscr(k, k);
		if ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t_ref(k, 
			k)), abs(d__2)) < smin) {
		    i__3 = t_subscr(k, k);
		    t[i__3].r = smin, t[i__3].i = 0.;
		}
/* L50: */
	    }

	    if (ki > 1) {
		i__1 = ki - 1;
		zlatrs_("Upper", "No transpose", "Non-unit", "Y", &i__1, &t[
			t_offset], ldt, &work[1], &scale, &rwork[1], info);
		i__1 = ki;
		work[i__1].r = scale, work[i__1].i = 0.;
	    }

/*           Copy the vector x or Q*x to VR and normalize. */

	    if (! over) {
		zcopy_(&ki, &work[1], &c__1, &vr_ref(1, is), &c__1);

		ii = izamax_(&ki, &vr_ref(1, is), &c__1);
		i__1 = vr_subscr(ii, is);
		remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag(
			&vr_ref(ii, is)), abs(d__2)));
		zdscal_(&ki, &remax, &vr_ref(1, is), &c__1);

		i__1 = *n;
		for (k = ki + 1; k <= i__1; ++k) {
		    i__2 = vr_subscr(k, is);
		    vr[i__2].r = 0., vr[i__2].i = 0.;
/* L60: */
		}
	    } else {
		if (ki > 1) {
		    i__1 = ki - 1;
		    z__1.r = scale, z__1.i = 0.;
		    zgemv_("N", n, &i__1, &c_b2, &vr[vr_offset], ldvr, &work[
			    1], &c__1, &z__1, &vr_ref(1, ki), &c__1);
		}

		ii = izamax_(n, &vr_ref(1, ki), &c__1);
		i__1 = vr_subscr(ii, ki);
		remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag(
			&vr_ref(ii, ki)), abs(d__2)));
		zdscal_(n, &remax, &vr_ref(1, ki), &c__1);
	    }

/*           Set back the original diagonal elements of T. */

	    i__1 = ki - 1;
	    for (k = 1; k <= i__1; ++k) {
		i__2 = t_subscr(k, k);
		i__3 = k + *n;
		t[i__2].r = work[i__3].r, t[i__2].i = work[i__3].i;
/* L70: */
	    }

	    --is;
L80:
	    ;
	}
    }

    if (leftv) {

/*        Compute left eigenvectors. */

	is = 1;
	i__1 = *n;
	for (ki = 1; ki <= i__1; ++ki) {

	    if (somev) {
		if (! select[ki]) {
		    goto L130;
		}
	    }
/* Computing MAX */
	    i__2 = t_subscr(ki, ki);
	    d__3 = ulp * ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&
		    t_ref(ki, ki)), abs(d__2)));
	    smin = max(d__3,smlnum);

	    i__2 = *n;
	    work[i__2].r = 1., work[i__2].i = 0.;

/*           Form right-hand side. */

	    i__2 = *n;
	    for (k = ki + 1; k <= i__2; ++k) {
		i__3 = k;
		d_cnjg(&z__2, &t_ref(ki, k));
		z__1.r = -z__2.r, z__1.i = -z__2.i;
		work[i__3].r = z__1.r, work[i__3].i = z__1.i;
/* L90: */
	    }

/*           Solve the triangular system:   
                (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK. */

	    i__2 = *n;
	    for (k = ki + 1; k <= i__2; ++k) {
		i__3 = t_subscr(k, k);
		i__4 = t_subscr(k, k);
		i__5 = t_subscr(ki, ki);
		z__1.r = t[i__4].r - t[i__5].r, z__1.i = t[i__4].i - t[i__5]
			.i;
		t[i__3].r = z__1.r, t[i__3].i = z__1.i;
		i__3 = t_subscr(k, k);
		if ((d__1 = t[i__3].r, abs(d__1)) + (d__2 = d_imag(&t_ref(k, 
			k)), abs(d__2)) < smin) {
		    i__4 = t_subscr(k, k);
		    t[i__4].r = smin, t[i__4].i = 0.;
		}
/* L100: */
	    }

	    if (ki < *n) {
		i__2 = *n - ki;
		zlatrs_("Upper", "Conjugate transpose", "Non-unit", "Y", &
			i__2, &t_ref(ki + 1, ki + 1), ldt, &work[ki + 1], &
			scale, &rwork[1], info);
		i__2 = ki;
		work[i__2].r = scale, work[i__2].i = 0.;
	    }

/*           Copy the vector x or Q*x to VL and normalize. */

	    if (! over) {
		i__2 = *n - ki + 1;
		zcopy_(&i__2, &work[ki], &c__1, &vl_ref(ki, is), &c__1);

		i__2 = *n - ki + 1;
		ii = izamax_(&i__2, &vl_ref(ki, is), &c__1) + ki - 1;
		i__2 = vl_subscr(ii, is);
		remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag(
			&vl_ref(ii, is)), abs(d__2)));
		i__2 = *n - ki + 1;
		zdscal_(&i__2, &remax, &vl_ref(ki, is), &c__1);

		i__2 = ki - 1;
		for (k = 1; k <= i__2; ++k) {
		    i__3 = vl_subscr(k, is);
		    vl[i__3].r = 0., vl[i__3].i = 0.;
/* L110: */
		}
	    } else {
		if (ki < *n) {
		    i__2 = *n - ki;
		    z__1.r = scale, z__1.i = 0.;
		    zgemv_("N", n, &i__2, &c_b2, &vl_ref(1, ki + 1), ldvl, &
			    work[ki + 1], &c__1, &z__1, &vl_ref(1, ki), &c__1);
		}

		ii = izamax_(n, &vl_ref(1, ki), &c__1);
		i__2 = vl_subscr(ii, ki);
		remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag(
			&vl_ref(ii, ki)), abs(d__2)));
		zdscal_(n, &remax, &vl_ref(1, ki), &c__1);
	    }

/*           Set back the original diagonal elements of T. */

	    i__2 = *n;
	    for (k = ki + 1; k <= i__2; ++k) {
		i__3 = t_subscr(k, k);
		i__4 = k + *n;
		t[i__3].r = work[i__4].r, t[i__3].i = work[i__4].i;
/* L120: */
	    }

	    ++is;
L130:
	    ;
	}
    }

    return 0;

/*     End of ZTREVC */

} /* ztrevc_ */
Exemple #28
0
/* Subroutine */ int dget37_(doublereal *rmax, integer *lmax, integer *ninfo, 
	integer *knt, integer *nin)
{
    /* System generated locals */
    integer i__1, i__2;
    doublereal d__1, d__2;

    /* Builtin functions */
    double sqrt(doublereal);
    integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), 
	    e_rsle(void);

    /* Local variables */
    static integer ifnd, icmp, iscl, info, lcmp[3], kmin;
    static doublereal wiin[20], vmax, tnrm, wrin[20], work[1200], vmul, stmp[
	    20];
    static integer i__, j, m, n;
    static doublereal s[20], t[400]	/* was [20][20] */, v;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
	    integer *);
    static doublereal sepin[20];
    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
	    doublereal *, integer *);
    static doublereal vimin, tolin, vrmin;
    static integer iwork[40];
    static doublereal witmp[20], wrtmp[20];
    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
    static doublereal le[400]	/* was [20][20] */, re[400]	/* was [20][
	    20] */;
    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
	    integer *, doublereal *, integer *, doublereal *);
    extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
	    integer *);
    static doublereal wi[20], wr[20];
    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, integer *);
    static logical select[20];
    static doublereal bignum;
    extern /* Subroutine */ int dhseqr_(char *, char *, integer *, integer *, 
	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
	    doublereal *, integer *, doublereal *, integer *, integer *), dtrevc_(char *, char *, logical *, integer *, 
	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
	    integer *, integer *, integer *, doublereal *, integer *), dtrsna_(char *, char *, logical *, integer *, doublereal 
	    *, integer *, doublereal *, integer *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *, integer *, doublereal *, 
	    integer *, integer *, integer *);
    static doublereal septmp[20], smlnum, val[3], dum[1], eps, sep[20], sin__[
	    20], tol, tmp[400]	/* was [20][20] */;

    /* Fortran I/O blocks */
    static cilist io___5 = { 0, 0, 0, 0, 0 };
    static cilist io___8 = { 0, 0, 0, 0, 0 };
    static cilist io___11 = { 0, 0, 0, 0, 0 };



#define t_ref(a_1,a_2) t[(a_2)*20 + a_1 - 21]
#define le_ref(a_1,a_2) le[(a_2)*20 + a_1 - 21]
#define re_ref(a_1,a_2) re[(a_2)*20 + a_1 - 21]
#define tmp_ref(a_1,a_2) tmp[(a_2)*20 + a_1 - 21]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    DGET37 tests DTRSNA, a routine for estimating condition numbers of   
    eigenvalues and/or right eigenvectors of a matrix.   

    The test matrices are read from a file with logical unit number NIN.   

    Arguments   
    ==========   

    RMAX    (output) DOUBLE PRECISION array, dimension (3)   
            Value of the largest test ratio.   
            RMAX(1) = largest ratio comparing different calls to DTRSNA   
            RMAX(2) = largest error in reciprocal condition   
                      numbers taking their conditioning into account   
            RMAX(3) = largest error in reciprocal condition   
                      numbers not taking their conditioning into   
                      account (may be larger than RMAX(2))   

    LMAX    (output) INTEGER array, dimension (3)   
            LMAX(i) is example number where largest test ratio   
            RMAX(i) is achieved. Also:   
            If DGEHRD returns INFO nonzero on example i, LMAX(1)=i   
            If DHSEQR returns INFO nonzero on example i, LMAX(2)=i   
            If DTRSNA returns INFO nonzero on example i, LMAX(3)=i   

    NINFO   (output) INTEGER array, dimension (3)   
            NINFO(1) = No. of times DGEHRD returned INFO nonzero   
            NINFO(2) = No. of times DHSEQR returned INFO nonzero   
            NINFO(3) = No. of times DTRSNA returned INFO nonzero   

    KNT     (output) INTEGER   
            Total number of examples tested.   

    NIN     (input) INTEGER   
            Input logical unit number   

    =====================================================================   


       Parameter adjustments */
    --ninfo;
    --lmax;
    --rmax;

    /* Function Body */
    eps = dlamch_("P");
    smlnum = dlamch_("S") / eps;
    bignum = 1. / smlnum;
    dlabad_(&smlnum, &bignum);

/*     EPSIN = 2**(-24) = precision to which input data computed */

    eps = max(eps,5.9605e-8);
    rmax[1] = 0.;
    rmax[2] = 0.;
    rmax[3] = 0.;
    lmax[1] = 0;
    lmax[2] = 0;
    lmax[3] = 0;
    *knt = 0;
    ninfo[1] = 0;
    ninfo[2] = 0;
    ninfo[3] = 0;

    val[0] = sqrt(smlnum);
    val[1] = 1.;
    val[2] = sqrt(bignum);

/*     Read input data until N=0.  Assume input eigenvalues are sorted   
       lexicographically (increasing by real part, then decreasing by   
       imaginary part) */

L10:
    io___5.ciunit = *nin;
    s_rsle(&io___5);
    do_lio(&c__3, &c__1, (char *)&n, (ftnlen)sizeof(integer));
    e_rsle();
    if (n == 0) {
	return 0;
    }
    i__1 = n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	io___8.ciunit = *nin;
	s_rsle(&io___8);
	i__2 = n;
	for (j = 1; j <= i__2; ++j) {
	    do_lio(&c__5, &c__1, (char *)&tmp_ref(i__, j), (ftnlen)sizeof(
		    doublereal));
	}
	e_rsle();
/* L20: */
    }
    i__1 = n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	io___11.ciunit = *nin;
	s_rsle(&io___11);
	do_lio(&c__5, &c__1, (char *)&wrin[i__ - 1], (ftnlen)sizeof(
		doublereal));
	do_lio(&c__5, &c__1, (char *)&wiin[i__ - 1], (ftnlen)sizeof(
		doublereal));
	do_lio(&c__5, &c__1, (char *)&sin__[i__ - 1], (ftnlen)sizeof(
		doublereal));
	do_lio(&c__5, &c__1, (char *)&sepin[i__ - 1], (ftnlen)sizeof(
		doublereal));
	e_rsle();
/* L30: */
    }
    tnrm = dlange_("M", &n, &n, tmp, &c__20, work);

/*     Begin test */

    for (iscl = 1; iscl <= 3; ++iscl) {

/*        Scale input matrix */

	++(*knt);
	dlacpy_("F", &n, &n, tmp, &c__20, t, &c__20);
	vmul = val[iscl - 1];
	i__1 = n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    dscal_(&n, &vmul, &t_ref(1, i__), &c__1);
/* L40: */
	}
	if (tnrm == 0.) {
	    vmul = 1.;
	}

/*        Compute eigenvalues and eigenvectors */

	i__1 = 1200 - n;
	dgehrd_(&n, &c__1, &n, t, &c__20, work, &work[n], &i__1, &info);
	if (info != 0) {
	    lmax[1] = *knt;
	    ++ninfo[1];
	    goto L240;
	}
	i__1 = n - 2;
	for (j = 1; j <= i__1; ++j) {
	    i__2 = n;
	    for (i__ = j + 2; i__ <= i__2; ++i__) {
		t_ref(i__, j) = 0.;
/* L50: */
	    }
/* L60: */
	}

/*        Compute Schur form */

	dhseqr_("S", "N", &n, &c__1, &n, t, &c__20, wr, wi, dum, &c__1, work, 
		&c__1200, &info);
	if (info != 0) {
	    lmax[2] = *knt;
	    ++ninfo[2];
	    goto L240;
	}

/*        Compute eigenvectors */

	dtrevc_("Both", "All", select, &n, t, &c__20, le, &c__20, re, &c__20, 
		&n, &m, work, &info);

/*        Compute condition numbers */

	dtrsna_("Both", "All", select, &n, t, &c__20, le, &c__20, re, &c__20, 
		s, sep, &n, &m, work, &n, iwork, &info);
	if (info != 0) {
	    lmax[3] = *knt;
	    ++ninfo[3];
	    goto L240;
	}

/*        Sort eigenvalues and condition numbers lexicographically   
          to compare with inputs */

	dcopy_(&n, wr, &c__1, wrtmp, &c__1);
	dcopy_(&n, wi, &c__1, witmp, &c__1);
	dcopy_(&n, s, &c__1, stmp, &c__1);
	dcopy_(&n, sep, &c__1, septmp, &c__1);
	d__1 = 1. / vmul;
	dscal_(&n, &d__1, septmp, &c__1);
	i__1 = n - 1;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    kmin = i__;
	    vrmin = wrtmp[i__ - 1];
	    vimin = witmp[i__ - 1];
	    i__2 = n;
	    for (j = i__ + 1; j <= i__2; ++j) {
		if (wrtmp[j - 1] < vrmin) {
		    kmin = j;
		    vrmin = wrtmp[j - 1];
		    vimin = witmp[j - 1];
		}
/* L70: */
	    }
	    wrtmp[kmin - 1] = wrtmp[i__ - 1];
	    witmp[kmin - 1] = witmp[i__ - 1];
	    wrtmp[i__ - 1] = vrmin;
	    witmp[i__ - 1] = vimin;
	    vrmin = stmp[kmin - 1];
	    stmp[kmin - 1] = stmp[i__ - 1];
	    stmp[i__ - 1] = vrmin;
	    vrmin = septmp[kmin - 1];
	    septmp[kmin - 1] = septmp[i__ - 1];
	    septmp[i__ - 1] = vrmin;
/* L80: */
	}

/*        Compare condition numbers for eigenvalues   
          taking their condition numbers into account   

   Computing MAX */
	d__1 = (doublereal) n * 2. * eps * tnrm;
	v = max(d__1,smlnum);
	if (tnrm == 0.) {
	    v = 1.;
	}
	i__1 = n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    if (v > septmp[i__ - 1]) {
		tol = 1.;
	    } else {
		tol = v / septmp[i__ - 1];
	    }
	    if (v > sepin[i__ - 1]) {
		tolin = 1.;
	    } else {
		tolin = v / sepin[i__ - 1];
	    }
/* Computing MAX */
	    d__1 = tol, d__2 = smlnum / eps;
	    tol = max(d__1,d__2);
/* Computing MAX */
	    d__1 = tolin, d__2 = smlnum / eps;
	    tolin = max(d__1,d__2);
	    if (eps * (sin__[i__ - 1] - tolin) > stmp[i__ - 1] + tol) {
		vmax = 1. / eps;
	    } else if (sin__[i__ - 1] - tolin > stmp[i__ - 1] + tol) {
		vmax = (sin__[i__ - 1] - tolin) / (stmp[i__ - 1] + tol);
	    } else if (sin__[i__ - 1] + tolin < eps * (stmp[i__ - 1] - tol)) {
		vmax = 1. / eps;
	    } else if (sin__[i__ - 1] + tolin < stmp[i__ - 1] - tol) {
		vmax = (stmp[i__ - 1] - tol) / (sin__[i__ - 1] + tolin);
	    } else {
		vmax = 1.;
	    }
	    if (vmax > rmax[2]) {
		rmax[2] = vmax;
		if (ninfo[2] == 0) {
		    lmax[2] = *knt;
		}
	    }
/* L90: */
	}

/*        Compare condition numbers for eigenvectors   
          taking their condition numbers into account */

	i__1 = n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    if (v > septmp[i__ - 1] * stmp[i__ - 1]) {
		tol = septmp[i__ - 1];
	    } else {
		tol = v / stmp[i__ - 1];
	    }
	    if (v > sepin[i__ - 1] * sin__[i__ - 1]) {
		tolin = sepin[i__ - 1];
	    } else {
		tolin = v / sin__[i__ - 1];
	    }
/* Computing MAX */
	    d__1 = tol, d__2 = smlnum / eps;
	    tol = max(d__1,d__2);
/* Computing MAX */
	    d__1 = tolin, d__2 = smlnum / eps;
	    tolin = max(d__1,d__2);
	    if (eps * (sepin[i__ - 1] - tolin) > septmp[i__ - 1] + tol) {
		vmax = 1. / eps;
	    } else if (sepin[i__ - 1] - tolin > septmp[i__ - 1] + tol) {
		vmax = (sepin[i__ - 1] - tolin) / (septmp[i__ - 1] + tol);
	    } else if (sepin[i__ - 1] + tolin < eps * (septmp[i__ - 1] - tol))
		     {
		vmax = 1. / eps;
	    } else if (sepin[i__ - 1] + tolin < septmp[i__ - 1] - tol) {
		vmax = (septmp[i__ - 1] - tol) / (sepin[i__ - 1] + tolin);
	    } else {
		vmax = 1.;
	    }
	    if (vmax > rmax[2]) {
		rmax[2] = vmax;
		if (ninfo[2] == 0) {
		    lmax[2] = *knt;
		}
	    }
/* L100: */
	}

/*        Compare condition numbers for eigenvalues   
          without taking their condition numbers into account */

	i__1 = n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    if (sin__[i__ - 1] <= (doublereal) (n << 1) * eps && stmp[i__ - 1]
		     <= (doublereal) (n << 1) * eps) {
		vmax = 1.;
	    } else if (eps * sin__[i__ - 1] > stmp[i__ - 1]) {
		vmax = 1. / eps;
	    } else if (sin__[i__ - 1] > stmp[i__ - 1]) {
		vmax = sin__[i__ - 1] / stmp[i__ - 1];
	    } else if (sin__[i__ - 1] < eps * stmp[i__ - 1]) {
		vmax = 1. / eps;
	    } else if (sin__[i__ - 1] < stmp[i__ - 1]) {
		vmax = stmp[i__ - 1] / sin__[i__ - 1];
	    } else {
		vmax = 1.;
	    }
	    if (vmax > rmax[3]) {
		rmax[3] = vmax;
		if (ninfo[3] == 0) {
		    lmax[3] = *knt;
		}
	    }
/* L110: */
	}

/*        Compare condition numbers for eigenvectors   
          without taking their condition numbers into account */

	i__1 = n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    if (sepin[i__ - 1] <= v && septmp[i__ - 1] <= v) {
		vmax = 1.;
	    } else if (eps * sepin[i__ - 1] > septmp[i__ - 1]) {
		vmax = 1. / eps;
	    } else if (sepin[i__ - 1] > septmp[i__ - 1]) {
		vmax = sepin[i__ - 1] / septmp[i__ - 1];
	    } else if (sepin[i__ - 1] < eps * septmp[i__ - 1]) {
		vmax = 1. / eps;
	    } else if (sepin[i__ - 1] < septmp[i__ - 1]) {
		vmax = septmp[i__ - 1] / sepin[i__ - 1];
	    } else {
		vmax = 1.;
	    }
	    if (vmax > rmax[3]) {
		rmax[3] = vmax;
		if (ninfo[3] == 0) {
		    lmax[3] = *knt;
		}
	    }
/* L120: */
	}

/*        Compute eigenvalue condition numbers only and compare */

	vmax = 0.;
	dum[0] = -1.;
	dcopy_(&n, dum, &c__0, stmp, &c__1);
	dcopy_(&n, dum, &c__0, septmp, &c__1);
	dtrsna_("Eigcond", "All", select, &n, t, &c__20, le, &c__20, re, &
		c__20, stmp, septmp, &n, &m, work, &n, iwork, &info);
	if (info != 0) {
	    lmax[3] = *knt;
	    ++ninfo[3];
	    goto L240;
	}
	i__1 = n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    if (stmp[i__ - 1] != s[i__ - 1]) {
		vmax = 1. / eps;
	    }
	    if (septmp[i__ - 1] != dum[0]) {
		vmax = 1. / eps;
	    }
/* L130: */
	}

/*        Compute eigenvector condition numbers only and compare */

	dcopy_(&n, dum, &c__0, stmp, &c__1);
	dcopy_(&n, dum, &c__0, septmp, &c__1);
	dtrsna_("Veccond", "All", select, &n, t, &c__20, le, &c__20, re, &
		c__20, stmp, septmp, &n, &m, work, &n, iwork, &info);
	if (info != 0) {
	    lmax[3] = *knt;
	    ++ninfo[3];
	    goto L240;
	}
	i__1 = n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    if (stmp[i__ - 1] != dum[0]) {
		vmax = 1. / eps;
	    }
	    if (septmp[i__ - 1] != sep[i__ - 1]) {
		vmax = 1. / eps;
	    }
/* L140: */
	}

/*        Compute all condition numbers using SELECT and compare */

	i__1 = n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    select[i__ - 1] = TRUE_;
/* L150: */
	}
	dcopy_(&n, dum, &c__0, stmp, &c__1);
	dcopy_(&n, dum, &c__0, septmp, &c__1);
	dtrsna_("Bothcond", "Some", select, &n, t, &c__20, le, &c__20, re, &
		c__20, stmp, septmp, &n, &m, work, &n, iwork, &info);
	if (info != 0) {
	    lmax[3] = *knt;
	    ++ninfo[3];
	    goto L240;
	}
	i__1 = n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    if (septmp[i__ - 1] != sep[i__ - 1]) {
		vmax = 1. / eps;
	    }
	    if (stmp[i__ - 1] != s[i__ - 1]) {
		vmax = 1. / eps;
	    }
/* L160: */
	}

/*        Compute eigenvalue condition numbers using SELECT and compare */

	dcopy_(&n, dum, &c__0, stmp, &c__1);
	dcopy_(&n, dum, &c__0, septmp, &c__1);
	dtrsna_("Eigcond", "Some", select, &n, t, &c__20, le, &c__20, re, &
		c__20, stmp, septmp, &n, &m, work, &n, iwork, &info);
	if (info != 0) {
	    lmax[3] = *knt;
	    ++ninfo[3];
	    goto L240;
	}
	i__1 = n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    if (stmp[i__ - 1] != s[i__ - 1]) {
		vmax = 1. / eps;
	    }
	    if (septmp[i__ - 1] != dum[0]) {
		vmax = 1. / eps;
	    }
/* L170: */
	}

/*        Compute eigenvector condition numbers using SELECT and compare */

	dcopy_(&n, dum, &c__0, stmp, &c__1);
	dcopy_(&n, dum, &c__0, septmp, &c__1);
	dtrsna_("Veccond", "Some", select, &n, t, &c__20, le, &c__20, re, &
		c__20, stmp, septmp, &n, &m, work, &n, iwork, &info);
	if (info != 0) {
	    lmax[3] = *knt;
	    ++ninfo[3];
	    goto L240;
	}
	i__1 = n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    if (stmp[i__ - 1] != dum[0]) {
		vmax = 1. / eps;
	    }
	    if (septmp[i__ - 1] != sep[i__ - 1]) {
		vmax = 1. / eps;
	    }
/* L180: */
	}
	if (vmax > rmax[1]) {
	    rmax[1] = vmax;
	    if (ninfo[1] == 0) {
		lmax[1] = *knt;
	    }
	}

/*        Select first real and first complex eigenvalue */

	if (wi[0] == 0.) {
	    lcmp[0] = 1;
	    ifnd = 0;
	    i__1 = n;
	    for (i__ = 2; i__ <= i__1; ++i__) {
		if (ifnd == 1 || wi[i__ - 1] == 0.) {
		    select[i__ - 1] = FALSE_;
		} else {
		    ifnd = 1;
		    lcmp[1] = i__;
		    lcmp[2] = i__ + 1;
		    dcopy_(&n, &re_ref(1, i__), &c__1, &re_ref(1, 2), &c__1);
		    dcopy_(&n, &re_ref(1, i__ + 1), &c__1, &re_ref(1, 3), &
			    c__1);
		    dcopy_(&n, &le_ref(1, i__), &c__1, &le_ref(1, 2), &c__1);
		    dcopy_(&n, &le_ref(1, i__ + 1), &c__1, &le_ref(1, 3), &
			    c__1);
		}
/* L190: */
	    }
	    if (ifnd == 0) {
		icmp = 1;
	    } else {
		icmp = 3;
	    }
	} else {
	    lcmp[0] = 1;
	    lcmp[1] = 2;
	    ifnd = 0;
	    i__1 = n;
	    for (i__ = 3; i__ <= i__1; ++i__) {
		if (ifnd == 1 || wi[i__ - 1] != 0.) {
		    select[i__ - 1] = FALSE_;
		} else {
		    lcmp[2] = i__;
		    ifnd = 1;
		    dcopy_(&n, &re_ref(1, i__), &c__1, &re_ref(1, 3), &c__1);
		    dcopy_(&n, &le_ref(1, i__), &c__1, &le_ref(1, 3), &c__1);
		}
/* L200: */
	    }
	    if (ifnd == 0) {
		icmp = 2;
	    } else {
		icmp = 3;
	    }
	}

/*        Compute all selected condition numbers */

	dcopy_(&icmp, dum, &c__0, stmp, &c__1);
	dcopy_(&icmp, dum, &c__0, septmp, &c__1);
	dtrsna_("Bothcond", "Some", select, &n, t, &c__20, le, &c__20, re, &
		c__20, stmp, septmp, &n, &m, work, &n, iwork, &info);
	if (info != 0) {
	    lmax[3] = *knt;
	    ++ninfo[3];
	    goto L240;
	}
	i__1 = icmp;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    j = lcmp[i__ - 1];
	    if (septmp[i__ - 1] != sep[j - 1]) {
		vmax = 1. / eps;
	    }
	    if (stmp[i__ - 1] != s[j - 1]) {
		vmax = 1. / eps;
	    }
/* L210: */
	}

/*        Compute selected eigenvalue condition numbers */

	dcopy_(&icmp, dum, &c__0, stmp, &c__1);
	dcopy_(&icmp, dum, &c__0, septmp, &c__1);
	dtrsna_("Eigcond", "Some", select, &n, t, &c__20, le, &c__20, re, &
		c__20, stmp, septmp, &n, &m, work, &n, iwork, &info);
	if (info != 0) {
	    lmax[3] = *knt;
	    ++ninfo[3];
	    goto L240;
	}
	i__1 = icmp;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    j = lcmp[i__ - 1];
	    if (stmp[i__ - 1] != s[j - 1]) {
		vmax = 1. / eps;
	    }
	    if (septmp[i__ - 1] != dum[0]) {
		vmax = 1. / eps;
	    }
/* L220: */
	}

/*        Compute selected eigenvector condition numbers */

	dcopy_(&icmp, dum, &c__0, stmp, &c__1);
	dcopy_(&icmp, dum, &c__0, septmp, &c__1);
	dtrsna_("Veccond", "Some", select, &n, t, &c__20, le, &c__20, re, &
		c__20, stmp, septmp, &n, &m, work, &n, iwork, &info);
	if (info != 0) {
	    lmax[3] = *knt;
	    ++ninfo[3];
	    goto L240;
	}
	i__1 = icmp;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    j = lcmp[i__ - 1];
	    if (stmp[i__ - 1] != dum[0]) {
		vmax = 1. / eps;
	    }
	    if (septmp[i__ - 1] != sep[j - 1]) {
		vmax = 1. / eps;
	    }
/* L230: */
	}
	if (vmax > rmax[1]) {
	    rmax[1] = vmax;
	    if (ninfo[1] == 0) {
		lmax[1] = *knt;
	    }
	}
L240:
	;
    }
    goto L10;

/*     End of DGET37 */

} /* dget37_ */