Пример #1
0
//---------------------------------------------------------
void eig(const DMat& A, DVec& Re, DMat& VR)
//---------------------------------------------------------
{
  // Compute eigenvalues and RIGHT eigenvectors of a real 
  // general matrix.  NOT returning imaginary components.
  DMat VL("VL");
  eig(A, Re, VL, VR, false, true);
}
Пример #2
0
//---------------------------------------------------------
void eig(const DMat& A, DVec& Re)
//---------------------------------------------------------
{
  // Compute eigenvalues of a real general matrix
  // Currently NOT returning imaginary components
  DMat VL("VL"), VR("VR");
  eig(A, Re, VL, VR, false, false);
}
Пример #3
0
/*! calculate left eigenvalues and left eigenvectors\n
  All of the arguments need not to be initialized. 
  wr, wi, vrr, vri are overwitten and become 
  real and imaginary part of left eigenvalues and left eigenvectors, 
  respectively. 
  This matrix is also overwritten. 
*/
inline long dgematrix::dgeev(std::vector<double>& wr, std::vector<double>& wi, 
                             std::vector<drovector>& vlr, 
                             std::vector<drovector>& vli)
{
#ifdef  CPPL_VERBOSE
  std::cerr << "# [MARK] dgematrix::dgeev(std::vector<double>&, std::vector<double>&, std::vector<drovector>&, std::vector<drovector>&)"
            << std::endl;
#endif//CPPL_VERBOSE
  
#ifdef  CPPL_DEBUG
  if(M!=N){
    std::cerr << "[ERROR] dgematrix::dgeev"
              << "(vector<double>&, vector<double>&, "
              << "vector<drovector>&, vector<drovector>&) "
              << std::endl
              << "This matrix is not a square matrix." << std::endl
              << "This matrix is (" << M << "x" << N << ")." << std::endl;
    exit(1);
  }
#endif//CPPL_DEBUG
  
  wr.resize(N); wi.resize(N); vlr.resize(N); vli.resize(N);
  for(long i=0; i<N; i++){ vlr[i].resize(N); vli[i].resize(N); }
  dgematrix VL(N,N);
  char JOBVL('V'), JOBVR('N');
  long LDA(N), LDVL(N), LDVR(1), LWORK(4*N), INFO(1);
  double *VR(NULL), *WORK(new double[LWORK]);
  dgeev_(JOBVL, JOBVR, N, Array, LDA, &wr[0], &wi[0], 
         VL.Array, LDVL, VR, LDVR, WORK, LWORK, INFO);
  delete [] WORK; delete [] VR;

  //// forming ////
  for(long j=0; j<N; j++){
    if(fabs(wi[j])<1e-10){
      for(long i=0; i<N; i++){
        vlr[j](i) = VL(i,j);  vli[j](i) = 0.0;
      }
    }
    else{
      for(long i=0; i<N; i++){
        vlr[j](i)   = VL(i,j);  vli[j](i)   =-VL(i,j+1);
        vlr[j+1](i) = VL(i,j);  vli[j+1](i) = VL(i,j+1);
      }
      j++;
    }
  }
  

  if(INFO!=0){
    std::cerr << "[WARNING] dgematrix::dgeev"
              << "(vector<double>&, vector<double>&, "
              << "vector<drovector>&, vector<drovector>&) "
              << std::endl
              << "Serious trouble happend. INFO = " << INFO << "."
              << std::endl;
  }
  return INFO;
}
Пример #4
0
void tree_build_js(struct env *env) //struct particle **ps, struct tree *head)
{
    env->tree = NULL;
    if (env->tree == NULL)
    {
        env->tree = alloc_node();
        assert(env->tree != NULL);
        env->tree->iLower = 0;
        env->tree->iUpper = env->N - 1;
        env->tree->left = env->tree->right = NULL;
    }

    comparisonCount = 0;
    nodeCount = 1;

    crop(env->p, env->tree, 0, env->N - 1);

    sort(env, MAX_PIC);
    //populate(ps, head);

    VL(2) fprintf(err, "nodeCount = %i\n", nodeCount);
    VL(2) fprintf(err, "head->mass = %f\n", env->tree->mass);
    VL(2) fprintf(err, "head->r = %f %f %f\n", env->tree->r[0], env->tree->r[1], env->tree->r[2]);
}
Пример #5
0
void tree_free_jpc(struct env *env)
{
    VL(1) fprintf(err, "tree_free_jps: Deallocating tree.\n");
    if (node_pool != NULL)
    {
        int i;
        for (i=0; i < nodePool_seg; i++)
            if (node_pool[i] != NULL) free(node_pool[i]);
        free(node_pool);
        node_pool = NULL;
    }

    nodePool_seg = 0;
    nodePool_off = NODE_POOL_SEGMENT_SIZE;

    env->tree = NULL;
}
Пример #6
0
void h_boialg ( h_hms *m )
{
  int i, status;

  int repeat = pow( m->p->rr, m->g->l );

  H_DBL t = m->g->t;
  H_DBL dt = m->g->dt;

  h_hms *m_c = h_alloc_hms( );

  
  do {
      repeat--;
      
      status = _h_step ( t, t+dt, dt, m->g->u, m ); 

      if ( status != GSL_SUCCESS ) {
          _STAT_MSG("BO integration algorithm",
                    "step status != GSL_SUCCESS",
                    H_WA, 0);
          break;
      }

      VL(("stepping for l=%d, m=%d, repeat=%d, dt=%e\n",
          m->g->l,
          m->g->m,
          repeat, dt));
      /* sleep ( 2 ); */
      
      for (i = 0; i < m->g->Nchildren; i++) {
          m_c->g = (h_grid*) m->g->children[i];
          m_c->p = m->p;
          m_c->f = m->f;
          h_boialg ( m_c );
      }
      m->g->t = t+dt;
      
      h_update ( m->g, m_c->g );
      
  } while ( repeat > 0 );
  
}
Пример #7
0
// construct identity matrix
vector diaggen(matrix& a)
{
	int N=a.Rows;
	char JOBVL='V';
	char JOBVR='V';
	int INFO=0;
	int LDA=N;
	vector WR(N);
	vector WI(N);
	int LDVL=N;
	int LDVR=N;
	matrix VL(LDVL,N);
	matrix VR(LDVR,N);
	int LWORK=4*N;
	vector WORK(LWORK);
	vector W(N);
	FORTRAN(dgeev)(&JOBVL,&JOBVR,&N,a.TheMatrix,&LDA,WR.TheVector, 
	WI.TheVector,VL.TheMatrix,&LDVL,VR.TheMatrix,&LDVR,WORK.TheVector, 
	&LWORK,&INFO);
	if (INFO != 0) cerr<<"diagonalization failed"<<endl; 
	return WR; 
}
Пример #8
0
/* Subroutine */ int ztgevc_(char *side, char *howmny, logical *select, 
	integer *n, doublecomplex *a, integer *lda, doublecomplex *b, integer 
	*ldb, doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *
	ldvr, integer *mm, integer *m, doublecomplex *work, doublereal *rwork,
	 integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   



    Purpose   
    =======   

    ZTGEVC computes some or all of the right and/or left generalized   
    eigenvectors of a pair of complex upper triangular matrices (A,B).   

    The right generalized eigenvector x and the left generalized   
    eigenvector y of (A,B) corresponding to a generalized eigenvalue   
    w are defined by:   

            (A - wB) * x = 0  and  y**H * (A - wB) = 0   

    where y**H denotes the conjugate tranpose of y.   

    If an eigenvalue w is determined by zero diagonal elements of both A 
  
    and B, a unit vector is returned as the corresponding eigenvector.   

    If all eigenvectors are requested, the routine may either return   
    the matrices X and/or Y of right or left eigenvectors of (A,B), or   
    the products Z*X and/or Q*Y, where Z and Q are input unitary   
    matrices.  If (A,B) was obtained from the generalized Schur   
    factorization of an original pair of matrices   
       (A0,B0) = (Q*A*Z**H,Q*B*Z**H),   
    then Z*X and Q*Y are the matrices of right or left eigenvectors of   
    A.   

    Arguments   
    =========   

    SIDE    (input) CHARACTER*1   
            = 'R': compute right eigenvectors only;   
            = 'L': compute left eigenvectors only;   
            = 'B': compute both right and left eigenvectors.   

    HOWMNY  (input) CHARACTER*1   
            = 'A': compute all right and/or left eigenvectors;   
            = 'B': compute all right and/or left eigenvectors, and   
                   backtransform them using the input matrices supplied   
                   in VR and/or VL;   
            = 'S': compute selected right and/or left eigenvectors,   
                   specified by the logical array SELECT.   

    SELECT  (input) LOGICAL array, dimension (N)   
            If HOWMNY='S', SELECT specifies the eigenvectors to be   
            computed.   
            If HOWMNY='A' or 'B', SELECT is not referenced.   
            To select the eigenvector corresponding to the j-th   
            eigenvalue, SELECT(j) must be set to .TRUE..   

    N       (input) INTEGER   
            The order of the matrices A and B.  N >= 0.   

    A       (input) COMPLEX*16 array, dimension (LDA,N)   
            The upper triangular matrix A.   

    LDA     (input) INTEGER   
            The leading dimension of array A.  LDA >= max(1,N).   

    B       (input) COMPLEX*16 array, dimension (LDB,N)   
            The upper triangular matrix B.  B must have real diagonal   
            elements.   

    LDB     (input) INTEGER   
            The leading dimension of array B.  LDB >= max(1,N).   

    VL      (input/output) COMPLEX*16 array, dimension (LDVL,MM)   
            On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must   
            contain an N-by-N matrix Q (usually the unitary matrix Q   
            of left Schur vectors returned by ZHGEQZ).   
            On exit, if SIDE = 'L' or 'B', VL contains:   
            if HOWMNY = 'A', the matrix Y of left eigenvectors of (A,B); 
  
            if HOWMNY = 'B', the matrix Q*Y;   
            if HOWMNY = 'S', the left eigenvectors of (A,B) specified by 
  
                        SELECT, stored consecutively in the columns of   
                        VL, in the same order as their eigenvalues.   
            If SIDE = 'R', VL is not referenced.   

    LDVL    (input) INTEGER   
            The leading dimension of array VL.   
            LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise.   

    VR      (input/output) COMPLEX*16 array, dimension (LDVR,MM)   
            On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must   
            contain an N-by-N matrix Q (usually the unitary matrix Z   
            of right Schur vectors returned by ZHGEQZ).   
            On exit, if SIDE = 'R' or 'B', VR contains:   
            if HOWMNY = 'A', the matrix X of right eigenvectors of (A,B); 
  
            if HOWMNY = 'B', the matrix Z*X;   
            if HOWMNY = 'S', the right eigenvectors of (A,B) specified by 
  
                        SELECT, stored consecutively in the columns of   
                        VR, in the same order as their eigenvalues.   
            If SIDE = 'L', VR is not referenced.   

    LDVR    (input) INTEGER   
            The leading dimension of the array VR.   
            LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise.   

    MM      (input) INTEGER   
            The leading dimension of the array VR.   
            LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise.   

    MM      (input) INTEGER   
            The number of columns in the arrays VL and/or VR. MM >= M.   

    M       (output) INTEGER   
            The number of columns in the arrays VL and/or VR actually   
            used to store the eigenvectors.  If HOWMNY = 'A' or 'B', M   
            is set to N.  Each selected eigenvector occupies one column. 
  

    WORK    (workspace) COMPLEX*16 array, dimension (2*N)   

    RWORK   (workspace) DOUBLE PRECISION array, dimension (2*N)   

    INFO    (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   

    ===================================================================== 
  


       Decode and Test the input parameters   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static doublecomplex c_b1 = {0.,0.};
    static doublecomplex c_b2 = {1.,0.};
    static integer c__1 = 1;
    
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, 
	    vr_offset, i__1, i__2, i__3, i__4, i__5;
    doublereal d__1, d__2, d__3, d__4, d__5, d__6;
    doublecomplex z__1, z__2, z__3, z__4;
    /* Builtin functions */
    double d_imag(doublecomplex *);
    void d_cnjg(doublecomplex *, doublecomplex *), z_div(doublecomplex *, 
	    doublecomplex *, doublecomplex *);
    /* Local variables */
    static integer ibeg, ieig, iend;
    static doublereal dmin__;
    static integer isrc;
    static doublereal temp;
    static doublecomplex suma, sumb;
    static doublereal xmax;
    static doublecomplex d;
    static integer i, j;
    static doublereal scale;
    static logical ilall;
    static integer iside;
    static doublereal sbeta;
    extern logical lsame_(char *, char *);
    static doublereal small;
    static logical compl;
    static doublereal anorm, bnorm;
    static logical compr;
    extern /* Subroutine */ int zgemv_(char *, integer *, integer *, 
	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
	    integer *, doublecomplex *, doublecomplex *, integer *);
    static doublecomplex ca, cb;
    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
    static logical ilbbad;
    static doublereal acoefa;
    static integer je;
    static doublereal bcoefa, acoeff;
    static doublecomplex bcoeff;
    static logical ilback;
    static integer im;
    static doublereal ascale, bscale;
    extern doublereal dlamch_(char *);
    static integer jr;
    static doublecomplex salpha;
    static doublereal safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static doublereal bignum;
    static logical ilcomp;
    static integer ihwmny;
    static doublereal big;
    static logical lsa, lsb;
    static doublereal ulp;
    static doublecomplex sum;



#define SELECT(I) select[(I)-1]
#define WORK(I) work[(I)-1]
#define RWORK(I) rwork[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
#define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)]
#define VL(I,J) vl[(I)-1 + ((J)-1)* ( *ldvl)]
#define VR(I,J) vr[(I)-1 + ((J)-1)* ( *ldvr)]

    if (lsame_(howmny, "A")) {
	ihwmny = 1;
	ilall = TRUE_;
	ilback = FALSE_;
    } else if (lsame_(howmny, "S")) {
	ihwmny = 2;
	ilall = FALSE_;
	ilback = FALSE_;
    } else if (lsame_(howmny, "B") || lsame_(howmny, "T")) {
	ihwmny = 3;
	ilall = TRUE_;
	ilback = TRUE_;
    } else {
	ihwmny = -1;
    }

    if (lsame_(side, "R")) {
	iside = 1;
	compl = FALSE_;
	compr = TRUE_;
    } else if (lsame_(side, "L")) {
	iside = 2;
	compl = TRUE_;
	compr = FALSE_;
    } else if (lsame_(side, "B")) {
	iside = 3;
	compl = TRUE_;
	compr = TRUE_;
    } else {
	iside = -1;
    }

/*     Count the number of eigenvectors */

    if (! ilall) {
	im = 0;
	i__1 = *n;
	for (j = 1; j <= *n; ++j) {
	    if (SELECT(j)) {
		++im;
	    }
/* L10: */
	}
    } else {
	im = *n;
    }

/*     Check diagonal of B */

    ilbbad = FALSE_;
    i__1 = *n;
    for (j = 1; j <= *n; ++j) {
	if (d_imag(&B(j,j)) != 0.) {
	    ilbbad = TRUE_;
	}
/* L20: */
    }

    *info = 0;
    if (iside < 0) {
	*info = -1;
    } else if (ihwmny < 0) {
	*info = -2;
    } else if (*n < 0) {
	*info = -4;
    } else if (*lda < max(1,*n)) {
	*info = -6;
    } else if (ilbbad) {
	*info = -7;
    } else if (*ldb < max(1,*n)) {
	*info = -8;
    } else if (compl && *ldvl < *n || *ldvl < 1) {
	*info = -10;
    } else if (compr && *ldvr < *n || *ldvr < 1) {
	*info = -12;
    } else if (*mm < im) {
	*info = -13;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("ZTGEVC", &i__1);
	return 0;
    }

/*     Quick return if possible */

    *m = im;
    if (*n == 0) {
	return 0;
    }

/*     Machine Constants */

    safmin = dlamch_("Safe minimum");
    big = 1. / safmin;
    dlabad_(&safmin, &big);
    ulp = dlamch_("Epsilon") * dlamch_("Base");
    small = safmin * *n / ulp;
    big = 1. / small;
    bignum = 1. / (safmin * *n);

/*     Compute the 1-norm of each column of the strictly upper triangular 
  
       part of A and B to check for possible overflow in the triangular   
       solver. */

    i__1 = a_dim1 + 1;
    anorm = (d__1 = A(1,1).r, abs(d__1)) + (d__2 = d_imag(&A(1,1)), 
	    abs(d__2));
    i__1 = b_dim1 + 1;
    bnorm = (d__1 = B(1,1).r, abs(d__1)) + (d__2 = d_imag(&B(1,1)), 
	    abs(d__2));
    RWORK(1) = 0.;
    RWORK(*n + 1) = 0.;
    i__1 = *n;
    for (j = 2; j <= *n; ++j) {
	RWORK(j) = 0.;
	RWORK(*n + j) = 0.;
	i__2 = j - 1;
	for (i = 1; i <= j-1; ++i) {
	    i__3 = i + j * a_dim1;
	    RWORK(j) += (d__1 = A(i,j).r, abs(d__1)) + (d__2 = d_imag(&A(i,j)), abs(d__2));
	    i__3 = i + j * b_dim1;
	    RWORK(*n + j) += (d__1 = B(i,j).r, abs(d__1)) + (d__2 = d_imag(&
		    B(i,j)), abs(d__2));
/* L30: */
	}
/* Computing MAX */
	i__2 = j + j * a_dim1;
	d__3 = anorm, d__4 = RWORK(j) + ((d__1 = A(j,j).r, abs(d__1)) + (
		d__2 = d_imag(&A(j,j)), abs(d__2)));
	anorm = max(d__3,d__4);
/* Computing MAX */
	i__2 = j + j * b_dim1;
	d__3 = bnorm, d__4 = RWORK(*n + j) + ((d__1 = B(j,j).r, abs(d__1)) + 
		(d__2 = d_imag(&B(j,j)), abs(d__2)));
	bnorm = max(d__3,d__4);
/* L40: */
    }

    ascale = 1. / max(anorm,safmin);
    bscale = 1. / max(bnorm,safmin);

/*     Left eigenvectors */

    if (compl) {
	ieig = 0;

/*        Main loop over eigenvalues */

	i__1 = *n;
	for (je = 1; je <= *n; ++je) {
	    if (ilall) {
		ilcomp = TRUE_;
	    } else {
		ilcomp = SELECT(je);
	    }
	    if (ilcomp) {
		++ieig;

		i__2 = je + je * a_dim1;
		i__3 = je + je * b_dim1;
		if ((d__1 = A(je,je).r, abs(d__1)) + (d__2 = d_imag(&A(je,je)), abs(d__2)) <= safmin && (d__3 = B(je,je).r,
			 abs(d__3)) <= safmin) {

/*                 Singular matrix pencil -- return unit e
igenvector */

		    i__2 = *n;
		    for (jr = 1; jr <= *n; ++jr) {
			i__3 = jr + ieig * vl_dim1;
			VL(jr,ieig).r = 0., VL(jr,ieig).i = 0.;
/* L50: */
		    }
		    i__2 = ieig + ieig * vl_dim1;
		    VL(ieig,ieig).r = 1., VL(ieig,ieig).i = 0.;
		    goto L140;
		}

/*              Non-singular eigenvalue:   
                Compute coefficients  a  and  b  in   
                     H   
                   y  ( a A - b B ) = 0   

   Computing MAX */
		i__2 = je + je * a_dim1;
		i__3 = je + je * b_dim1;
		d__4 = ((d__1 = A(je,je).r, abs(d__1)) + (d__2 = d_imag(&A(je,je)), abs(d__2))) * ascale, d__5 = (d__3 = 
			B(je,je).r, abs(d__3)) * bscale, d__4 = max(d__4,d__5);
		temp = 1. / max(d__4,safmin);
		i__2 = je + je * a_dim1;
		z__2.r = temp * A(je,je).r, z__2.i = temp * A(je,je).i;
		z__1.r = ascale * z__2.r, z__1.i = ascale * z__2.i;
		salpha.r = z__1.r, salpha.i = z__1.i;
		i__2 = je + je * b_dim1;
		sbeta = temp * B(je,je).r * bscale;
		acoeff = sbeta * ascale;
		z__1.r = bscale * salpha.r, z__1.i = bscale * salpha.i;
		bcoeff.r = z__1.r, bcoeff.i = z__1.i;

/*              Scale to avoid underflow */

		lsa = abs(sbeta) >= safmin && abs(acoeff) < small;
		lsb = (d__1 = salpha.r, abs(d__1)) + (d__2 = d_imag(&salpha), 
			abs(d__2)) >= safmin && (d__3 = bcoeff.r, abs(d__3)) 
			+ (d__4 = d_imag(&bcoeff), abs(d__4)) < small;

		scale = 1.;
		if (lsa) {
		    scale = small / abs(sbeta) * min(anorm,big);
		}
		if (lsb) {
/* Computing MAX */
		    d__3 = scale, d__4 = small / ((d__1 = salpha.r, abs(d__1))
			     + (d__2 = d_imag(&salpha), abs(d__2))) * min(
			    bnorm,big);
		    scale = max(d__3,d__4);
		}
		if (lsa || lsb) {
/* Computing MIN   
   Computing MAX */
		    d__5 = 1., d__6 = abs(acoeff), d__5 = max(d__5,d__6), 
			    d__6 = (d__1 = bcoeff.r, abs(d__1)) + (d__2 = 
			    d_imag(&bcoeff), abs(d__2));
		    d__3 = scale, d__4 = 1. / (safmin * max(d__5,d__6));
		    scale = min(d__3,d__4);
		    if (lsa) {
			acoeff = ascale * (scale * sbeta);
		    } else {
			acoeff = scale * acoeff;
		    }
		    if (lsb) {
			z__2.r = scale * salpha.r, z__2.i = scale * salpha.i;
			z__1.r = bscale * z__2.r, z__1.i = bscale * z__2.i;
			bcoeff.r = z__1.r, bcoeff.i = z__1.i;
		    } else {
			z__1.r = scale * bcoeff.r, z__1.i = scale * bcoeff.i;
			bcoeff.r = z__1.r, bcoeff.i = z__1.i;
		    }
		}

		acoefa = abs(acoeff);
		bcoefa = (d__1 = bcoeff.r, abs(d__1)) + (d__2 = d_imag(&
			bcoeff), abs(d__2));
		xmax = 1.;
		i__2 = *n;
		for (jr = 1; jr <= *n; ++jr) {
		    i__3 = jr;
		    WORK(jr).r = 0., WORK(jr).i = 0.;
/* L60: */
		}
		i__2 = je;
		WORK(je).r = 1., WORK(je).i = 0.;
/* Computing MAX */
		d__1 = ulp * acoefa * anorm, d__2 = ulp * bcoefa * bnorm, 
			d__1 = max(d__1,d__2);
		dmin__ = max(d__1,safmin);

/*                                              H   
                Triangular solve of  (a A - b B)  y = 0   

                                        H   
                (rowwise in  (a A - b B) , or columnwise in a 
A - b B) */

		i__2 = *n;
		for (j = je + 1; j <= *n; ++j) {

/*                 Compute   
                         j-1   
                   SUM = sum  conjg( a*A(k,j) - b*B(k,j) )
*x(k)   
                         k=je   
                   (Scale if necessary) */

		    temp = 1. / xmax;
		    if (acoefa * RWORK(j) + bcoefa * RWORK(*n + j) > bignum * 
			    temp) {
			i__3 = j - 1;
			for (jr = je; jr <= j-1; ++jr) {
			    i__4 = jr;
			    i__5 = jr;
			    z__1.r = temp * WORK(jr).r, z__1.i = temp * 
				    WORK(jr).i;
			    WORK(jr).r = z__1.r, WORK(jr).i = z__1.i;
/* L70: */
			}
			xmax = 1.;
		    }
		    suma.r = 0., suma.i = 0.;
		    sumb.r = 0., sumb.i = 0.;

		    i__3 = j - 1;
		    for (jr = je; jr <= j-1; ++jr) {
			d_cnjg(&z__3, &A(jr,j));
			i__4 = jr;
			z__2.r = z__3.r * WORK(jr).r - z__3.i * WORK(jr)
				.i, z__2.i = z__3.r * WORK(jr).i + z__3.i * 
				WORK(jr).r;
			z__1.r = suma.r + z__2.r, z__1.i = suma.i + z__2.i;
			suma.r = z__1.r, suma.i = z__1.i;
			d_cnjg(&z__3, &B(jr,j));
			i__4 = jr;
			z__2.r = z__3.r * WORK(jr).r - z__3.i * WORK(jr)
				.i, z__2.i = z__3.r * WORK(jr).i + z__3.i * 
				WORK(jr).r;
			z__1.r = sumb.r + z__2.r, z__1.i = sumb.i + z__2.i;
			sumb.r = z__1.r, sumb.i = z__1.i;
/* L80: */
		    }
		    z__2.r = acoeff * suma.r, z__2.i = acoeff * suma.i;
		    d_cnjg(&z__4, &bcoeff);
		    z__3.r = z__4.r * sumb.r - z__4.i * sumb.i, z__3.i = 
			    z__4.r * sumb.i + z__4.i * sumb.r;
		    z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i;
		    sum.r = z__1.r, sum.i = z__1.i;

/*                 Form x(j) = - SUM / conjg( a*A(j,j) - b
*B(j,j) )   

                   with scaling and perturbation of the de
nominator */

		    i__3 = j + j * a_dim1;
		    z__3.r = acoeff * A(j,j).r, z__3.i = acoeff * A(j,j).i;
		    i__4 = j + j * b_dim1;
		    z__4.r = bcoeff.r * B(j,j).r - bcoeff.i * B(j,j).i, 
			    z__4.i = bcoeff.r * B(j,j).i + bcoeff.i * B(j,j)
			    .r;
		    z__2.r = z__3.r - z__4.r, z__2.i = z__3.i - z__4.i;
		    d_cnjg(&z__1, &z__2);
		    d.r = z__1.r, d.i = z__1.i;
		    if ((d__1 = d.r, abs(d__1)) + (d__2 = d_imag(&d), abs(
			    d__2)) <= dmin__) {
			z__1.r = dmin__, z__1.i = 0.;
			d.r = z__1.r, d.i = z__1.i;
		    }

		    if ((d__1 = d.r, abs(d__1)) + (d__2 = d_imag(&d), abs(
			    d__2)) < 1.) {
			if ((d__1 = sum.r, abs(d__1)) + (d__2 = d_imag(&sum), 
				abs(d__2)) >= bignum * ((d__3 = d.r, abs(d__3)
				) + (d__4 = d_imag(&d), abs(d__4)))) {
			    temp = 1. / ((d__1 = sum.r, abs(d__1)) + (d__2 = 
				    d_imag(&sum), abs(d__2)));
			    i__3 = j - 1;
			    for (jr = je; jr <= j-1; ++jr) {
				i__4 = jr;
				i__5 = jr;
				z__1.r = temp * WORK(jr).r, z__1.i = temp * 
					WORK(jr).i;
				WORK(jr).r = z__1.r, WORK(jr).i = z__1.i;
/* L90: */
			    }
			    xmax = temp * xmax;
			    z__1.r = temp * sum.r, z__1.i = temp * sum.i;
			    sum.r = z__1.r, sum.i = z__1.i;
			}
		    }
		    i__3 = j;
		    z__2.r = -sum.r, z__2.i = -sum.i;
		    z_div(&z__1, &z__2, &d);
		    WORK(j).r = z__1.r, WORK(j).i = z__1.i;
/* Computing MAX */
		    i__3 = j;
		    d__3 = xmax, d__4 = (d__1 = WORK(j).r, abs(d__1)) + (
			    d__2 = d_imag(&WORK(j)), abs(d__2));
		    xmax = max(d__3,d__4);
/* L100: */
		}

/*              Back transform eigenvector if HOWMNY='B'. */

		if (ilback) {
		    i__2 = *n + 1 - je;
		    zgemv_("N", n, &i__2, &c_b2, &VL(1,je), ldvl, 
			    &WORK(je), &c__1, &c_b1, &WORK(*n + 1), &c__1)
			    ;
		    isrc = 2;
		    ibeg = 1;
		} else {
		    isrc = 1;
		    ibeg = je;
		}

/*              Copy and scale eigenvector into column of VL 
*/

		xmax = 0.;
		i__2 = *n;
		for (jr = ibeg; jr <= *n; ++jr) {
/* Computing MAX */
		    i__3 = (isrc - 1) * *n + jr;
		    d__3 = xmax, d__4 = (d__1 = WORK((isrc-1)**n+jr).r, abs(d__1)) + (
			    d__2 = d_imag(&WORK((isrc - 1) * *n + jr)), abs(
			    d__2));
		    xmax = max(d__3,d__4);
/* L110: */
		}

		if (xmax > safmin) {
		    temp = 1. / xmax;
		    i__2 = *n;
		    for (jr = ibeg; jr <= *n; ++jr) {
			i__3 = jr + ieig * vl_dim1;
			i__4 = (isrc - 1) * *n + jr;
			z__1.r = temp * WORK((isrc-1)**n+jr).r, z__1.i = temp * WORK(
				(isrc-1)**n+jr).i;
			VL(jr,ieig).r = z__1.r, VL(jr,ieig).i = z__1.i;
/* L120: */
		    }
		} else {
		    ibeg = *n + 1;
		}

		i__2 = ibeg - 1;
		for (jr = 1; jr <= ibeg-1; ++jr) {
		    i__3 = jr + ieig * vl_dim1;
		    VL(jr,ieig).r = 0., VL(jr,ieig).i = 0.;
/* L130: */
		}

	    }
L140:
	    ;
	}
    }

/*     Right eigenvectors */

    if (compr) {
	ieig = im + 1;

/*        Main loop over eigenvalues */

	for (je = *n; je >= 1; --je) {
	    if (ilall) {
		ilcomp = TRUE_;
	    } else {
		ilcomp = SELECT(je);
	    }
	    if (ilcomp) {
		--ieig;

		i__1 = je + je * a_dim1;
		i__2 = je + je * b_dim1;
		if ((d__1 = A(je,je).r, abs(d__1)) + (d__2 = d_imag(&A(je,je)), abs(d__2)) <= safmin && (d__3 = B(je,je).r,
			 abs(d__3)) <= safmin) {

/*                 Singular matrix pencil -- return unit e
igenvector */

		    i__1 = *n;
		    for (jr = 1; jr <= *n; ++jr) {
			i__2 = jr + ieig * vr_dim1;
			VR(jr,ieig).r = 0., VR(jr,ieig).i = 0.;
/* L150: */
		    }
		    i__1 = ieig + ieig * vr_dim1;
		    VR(ieig,ieig).r = 1., VR(ieig,ieig).i = 0.;
		    goto L250;
		}

/*              Non-singular eigenvalue:   
                Compute coefficients  a  and  b  in   

                ( a A - b B ) x  = 0   

   Computing MAX */
		i__1 = je + je * a_dim1;
		i__2 = je + je * b_dim1;
		d__4 = ((d__1 = A(je,je).r, abs(d__1)) + (d__2 = d_imag(&A(je,je)), abs(d__2))) * ascale, d__5 = (d__3 = 
			B(je,je).r, abs(d__3)) * bscale, d__4 = max(d__4,d__5);
		temp = 1. / max(d__4,safmin);
		i__1 = je + je * a_dim1;
		z__2.r = temp * A(je,je).r, z__2.i = temp * A(je,je).i;
		z__1.r = ascale * z__2.r, z__1.i = ascale * z__2.i;
		salpha.r = z__1.r, salpha.i = z__1.i;
		i__1 = je + je * b_dim1;
		sbeta = temp * B(je,je).r * bscale;
		acoeff = sbeta * ascale;
		z__1.r = bscale * salpha.r, z__1.i = bscale * salpha.i;
		bcoeff.r = z__1.r, bcoeff.i = z__1.i;

/*              Scale to avoid underflow */

		lsa = abs(sbeta) >= safmin && abs(acoeff) < small;
		lsb = (d__1 = salpha.r, abs(d__1)) + (d__2 = d_imag(&salpha), 
			abs(d__2)) >= safmin && (d__3 = bcoeff.r, abs(d__3)) 
			+ (d__4 = d_imag(&bcoeff), abs(d__4)) < small;

		scale = 1.;
		if (lsa) {
		    scale = small / abs(sbeta) * min(anorm,big);
		}
		if (lsb) {
/* Computing MAX */
		    d__3 = scale, d__4 = small / ((d__1 = salpha.r, abs(d__1))
			     + (d__2 = d_imag(&salpha), abs(d__2))) * min(
			    bnorm,big);
		    scale = max(d__3,d__4);
		}
		if (lsa || lsb) {
/* Computing MIN   
   Computing MAX */
		    d__5 = 1., d__6 = abs(acoeff), d__5 = max(d__5,d__6), 
			    d__6 = (d__1 = bcoeff.r, abs(d__1)) + (d__2 = 
			    d_imag(&bcoeff), abs(d__2));
		    d__3 = scale, d__4 = 1. / (safmin * max(d__5,d__6));
		    scale = min(d__3,d__4);
		    if (lsa) {
			acoeff = ascale * (scale * sbeta);
		    } else {
			acoeff = scale * acoeff;
		    }
		    if (lsb) {
			z__2.r = scale * salpha.r, z__2.i = scale * salpha.i;
			z__1.r = bscale * z__2.r, z__1.i = bscale * z__2.i;
			bcoeff.r = z__1.r, bcoeff.i = z__1.i;
		    } else {
			z__1.r = scale * bcoeff.r, z__1.i = scale * bcoeff.i;
			bcoeff.r = z__1.r, bcoeff.i = z__1.i;
		    }
		}

		acoefa = abs(acoeff);
		bcoefa = (d__1 = bcoeff.r, abs(d__1)) + (d__2 = d_imag(&
			bcoeff), abs(d__2));
		xmax = 1.;
		i__1 = *n;
		for (jr = 1; jr <= *n; ++jr) {
		    i__2 = jr;
		    WORK(jr).r = 0., WORK(jr).i = 0.;
/* L160: */
		}
		i__1 = je;
		WORK(je).r = 1., WORK(je).i = 0.;
/* Computing MAX */
		d__1 = ulp * acoefa * anorm, d__2 = ulp * bcoefa * bnorm, 
			d__1 = max(d__1,d__2);
		dmin__ = max(d__1,safmin);

/*              Triangular solve of  (a A - b B) x = 0  (colum
nwise)   

                WORK(1:j-1) contains sums w,   
                WORK(j+1:JE) contains x */

		i__1 = je - 1;
		for (jr = 1; jr <= je-1; ++jr) {
		    i__2 = jr;
		    i__3 = jr + je * a_dim1;
		    z__2.r = acoeff * A(jr,je).r, z__2.i = acoeff * A(jr,je).i;
		    i__4 = jr + je * b_dim1;
		    z__3.r = bcoeff.r * B(jr,je).r - bcoeff.i * B(jr,je).i, 
			    z__3.i = bcoeff.r * B(jr,je).i + bcoeff.i * B(jr,je)
			    .r;
		    z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i;
		    WORK(jr).r = z__1.r, WORK(jr).i = z__1.i;
/* L170: */
		}
		i__1 = je;
		WORK(je).r = 1., WORK(je).i = 0.;

		for (j = je - 1; j >= 1; --j) {

/*                 Form x(j) := - w(j) / d   
                   with scaling and perturbation of the de
nominator */

		    i__1 = j + j * a_dim1;
		    z__2.r = acoeff * A(j,j).r, z__2.i = acoeff * A(j,j).i;
		    i__2 = j + j * b_dim1;
		    z__3.r = bcoeff.r * B(j,j).r - bcoeff.i * B(j,j).i, 
			    z__3.i = bcoeff.r * B(j,j).i + bcoeff.i * B(j,j)
			    .r;
		    z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i;
		    d.r = z__1.r, d.i = z__1.i;
		    if ((d__1 = d.r, abs(d__1)) + (d__2 = d_imag(&d), abs(
			    d__2)) <= dmin__) {
			z__1.r = dmin__, z__1.i = 0.;
			d.r = z__1.r, d.i = z__1.i;
		    }

		    if ((d__1 = d.r, abs(d__1)) + (d__2 = d_imag(&d), abs(
			    d__2)) < 1.) {
			i__1 = j;
			if ((d__1 = WORK(j).r, abs(d__1)) + (d__2 = d_imag(
				&WORK(j)), abs(d__2)) >= bignum * ((d__3 = 
				d.r, abs(d__3)) + (d__4 = d_imag(&d), abs(
				d__4)))) {
			    i__1 = j;
			    temp = 1. / ((d__1 = WORK(j).r, abs(d__1)) + (
				    d__2 = d_imag(&WORK(j)), abs(d__2)));
			    i__1 = je;
			    for (jr = 1; jr <= je; ++jr) {
				i__2 = jr;
				i__3 = jr;
				z__1.r = temp * WORK(jr).r, z__1.i = temp * 
					WORK(jr).i;
				WORK(jr).r = z__1.r, WORK(jr).i = z__1.i;
/* L180: */
			    }
			}
		    }

		    i__1 = j;
		    i__2 = j;
		    z__2.r = -WORK(j).r, z__2.i = -WORK(j).i;
		    z_div(&z__1, &z__2, &d);
		    WORK(j).r = z__1.r, WORK(j).i = z__1.i;

		    if (j > 1) {

/*                    w = w + x(j)*(a A(*,j) - b B(*,j
) ) with scaling */

			i__1 = j;
			if ((d__1 = WORK(j).r, abs(d__1)) + (d__2 = d_imag(
				&WORK(j)), abs(d__2)) > 1.) {
			    i__1 = j;
			    temp = 1. / ((d__1 = WORK(j).r, abs(d__1)) + (
				    d__2 = d_imag(&WORK(j)), abs(d__2)));
			    if (acoefa * RWORK(j) + bcoefa * RWORK(*n + j) >= 
				    bignum * temp) {
				i__1 = je;
				for (jr = 1; jr <= je; ++jr) {
				    i__2 = jr;
				    i__3 = jr;
				    z__1.r = temp * WORK(jr).r, z__1.i = 
					    temp * WORK(jr).i;
				    WORK(jr).r = z__1.r, WORK(jr).i = 
					    z__1.i;
/* L190: */
				}
			    }
			}

			i__1 = j;
			z__1.r = acoeff * WORK(j).r, z__1.i = acoeff * 
				WORK(j).i;
			ca.r = z__1.r, ca.i = z__1.i;
			i__1 = j;
			z__1.r = bcoeff.r * WORK(j).r - bcoeff.i * WORK(
				j).i, z__1.i = bcoeff.r * WORK(j).i + 
				bcoeff.i * WORK(j).r;
			cb.r = z__1.r, cb.i = z__1.i;
			i__1 = j - 1;
			for (jr = 1; jr <= j-1; ++jr) {
			    i__2 = jr;
			    i__3 = jr;
			    i__4 = jr + j * a_dim1;
			    z__3.r = ca.r * A(jr,j).r - ca.i * A(jr,j).i, 
				    z__3.i = ca.r * A(jr,j).i + ca.i * A(jr,j)
				    .r;
			    z__2.r = WORK(jr).r + z__3.r, z__2.i = WORK(
				    jr).i + z__3.i;
			    i__5 = jr + j * b_dim1;
			    z__4.r = cb.r * B(jr,j).r - cb.i * B(jr,j).i, 
				    z__4.i = cb.r * B(jr,j).i + cb.i * B(jr,j)
				    .r;
			    z__1.r = z__2.r - z__4.r, z__1.i = z__2.i - 
				    z__4.i;
			    WORK(jr).r = z__1.r, WORK(jr).i = z__1.i;
/* L200: */
			}
		    }
/* L210: */
		}

/*              Back transform eigenvector if HOWMNY='B'. */

		if (ilback) {
		    zgemv_("N", n, &je, &c_b2, &VR(1,1), ldvr, &WORK(1),
			     &c__1, &c_b1, &WORK(*n + 1), &c__1);
		    isrc = 2;
		    iend = *n;
		} else {
		    isrc = 1;
		    iend = je;
		}

/*              Copy and scale eigenvector into column of VR 
*/

		xmax = 0.;
		i__1 = iend;
		for (jr = 1; jr <= iend; ++jr) {
/* Computing MAX */
		    i__2 = (isrc - 1) * *n + jr;
		    d__3 = xmax, d__4 = (d__1 = WORK((isrc-1)**n+jr).r, abs(d__1)) + (
			    d__2 = d_imag(&WORK((isrc - 1) * *n + jr)), abs(
			    d__2));
		    xmax = max(d__3,d__4);
/* L220: */
		}

		if (xmax > safmin) {
		    temp = 1. / xmax;
		    i__1 = iend;
		    for (jr = 1; jr <= iend; ++jr) {
			i__2 = jr + ieig * vr_dim1;
			i__3 = (isrc - 1) * *n + jr;
			z__1.r = temp * WORK((isrc-1)**n+jr).r, z__1.i = temp * WORK(
				(isrc-1)**n+jr).i;
			VR(jr,ieig).r = z__1.r, VR(jr,ieig).i = z__1.i;
/* L230: */
		    }
		} else {
		    iend = 0;
		}

		i__1 = *n;
		for (jr = iend + 1; jr <= *n; ++jr) {
		    i__2 = jr + ieig * vr_dim1;
		    VR(jr,ieig).r = 0., VR(jr,ieig).i = 0.;
/* L240: */
		}

	    }
L250:
	    ;
	}
    }

    return 0;

/*     End of ZTGEVC */

} /* ztgevc_ */
Пример #9
0
/**
    Purpose
    -------
    SGEEV computes for an N-by-N real nonsymmetric matrix A, the
    eigenvalues and, optionally, the left and/or right eigenvectors.

    The right eigenvector v(j) of A satisfies
                     A * v(j) = lambda(j) * v(j)
    where lambda(j) is its eigenvalue.
    The left eigenvector u(j) of A satisfies
                  u(j)**T * A = lambda(j) * u(j)**T
    where u(j)**T denotes the transpose of u(j).

    The computed eigenvectors are normalized to have Euclidean norm
    equal to 1 and largest component real.

    Arguments
    ---------
    @param[in]
    jobvl   magma_vec_t
      -     = MagmaNoVec: left eigenvectors of A are not computed;
      -     = MagmaVec:   left eigenvectors of are computed.

    @param[in]
    jobvr   magma_vec_t
      -     = MagmaNoVec: right eigenvectors of A are not computed;
      -     = MagmaVec:   right eigenvectors of A are computed.

    @param[in]
    n       INTEGER
            The order of the matrix A. N >= 0.

    @param[in,out]
    A       REAL array, dimension (LDA,N)
            On entry, the N-by-N matrix A.
            On exit, A has been overwritten.

    @param[in]
    lda     INTEGER
            The leading dimension of the array A.  LDA >= max(1,N).

    @param[out]
    wr      REAL array, dimension (N)
    @param[out]
    wi      REAL array, dimension (N)
            WR and WI contain the real and imaginary parts,
            respectively, of the computed eigenvalues.  Complex
            conjugate pairs of eigenvalues appear consecutively
            with the eigenvalue having the positive imaginary part
            first.

    @param[out]
    VL      REAL array, dimension (LDVL,N)
            If JOBVL = MagmaVec, the left eigenvectors u(j) are stored one
            after another in the columns of VL, in the same order
            as their eigenvalues.
            If JOBVL = MagmaNoVec, VL is not referenced.
            u(j) = VL(:,j), the j-th column of VL.

    @param[in]
    ldvl    INTEGER
            The leading dimension of the array VL.  LDVL >= 1; if
            JOBVL = MagmaVec, LDVL >= N.

    @param[out]
    VR      REAL array, dimension (LDVR,N)
            If JOBVR = MagmaVec, the right eigenvectors v(j) are stored one
            after another in the columns of VR, in the same order
            as their eigenvalues.
            If JOBVR = MagmaNoVec, VR is not referenced.
            v(j) = VR(:,j), the j-th column of VR.

    @param[in]
    ldvr    INTEGER
            The leading dimension of the array VR.  LDVR >= 1; if
            JOBVR = MagmaVec, LDVR >= N.

    @param[out]
    work    (workspace) REAL array, dimension (MAX(1,LWORK))
            On exit, if INFO = 0, WORK[0] returns the optimal LWORK.

    @param[in]
    lwork   INTEGER
            The dimension of the array WORK.  LWORK >= (2 +   nb + nb*ngpu)*N.
            For optimal performance,          LWORK >= (2 + 2*nb + nb*ngpu)*N.
    \n
            If LWORK = -1, then a workspace query is assumed; the routine
            only calculates the optimal size of the WORK array, returns
            this value as the first entry of the WORK array, and no error
            message related to LWORK is issued by XERBLA.

    @param[out]
    info    INTEGER
      -     = 0:  successful exit
      -     < 0:  if INFO = -i, the i-th argument had an illegal value.
      -     > 0:  if INFO = i, the QR algorithm failed to compute all the
                  eigenvalues, and no eigenvectors have been computed;
                  elements and i+1:N of W contain eigenvalues which have
                  converged.

    @ingroup magma_sgeev_driver
    ********************************************************************/
extern "C" magma_int_t
magma_sgeev_m(
    magma_vec_t jobvl, magma_vec_t jobvr, magma_int_t n,
    float *A, magma_int_t lda,
    #ifdef COMPLEX
    float *w,
    #else
    float *wr, float *wi,
    #endif
    float *VL, magma_int_t ldvl,
    float *VR, magma_int_t ldvr,
    float *work, magma_int_t lwork,
    #ifdef COMPLEX
    float *rwork,
    #endif
    magma_int_t *info )
{
    #define VL(i,j)  (VL + (i) + (j)*ldvl)
    #define VR(i,j)  (VR + (i) + (j)*ldvr)
    
    const magma_int_t ione  = 1;
    const magma_int_t izero = 0;
    
    float d__1, d__2;
    float r, cs, sn, scl;
    float dum[1], eps;
    float anrm, cscale, bignum, smlnum;
    magma_int_t i, k, ilo, ihi;
    magma_int_t ibal, ierr, itau, iwrk, nout, liwrk, nb;
    magma_int_t scalea, minwrk, optwrk, lquery, wantvl, wantvr, select[1];
    
    magma_side_t side = MagmaRight;
    magma_int_t ngpu = magma_num_gpus();
    
    magma_timer_t time_total=0, time_gehrd=0, time_unghr=0, time_hseqr=0, time_trevc=0, time_sum=0;
    magma_flops_t flop_total=0, flop_gehrd=0, flop_unghr=0, flop_hseqr=0, flop_trevc=0, flop_sum=0;
    timer_start( time_total );
    flops_start( flop_total );
    
    *info = 0;
    lquery = (lwork == -1);
    wantvl = (jobvl == MagmaVec);
    wantvr = (jobvr == MagmaVec);
    if (! wantvl && jobvl != MagmaNoVec) {
        *info = -1;
    } else if (! wantvr && jobvr != MagmaNoVec) {
        *info = -2;
    } else if (n < 0) {
        *info = -3;
    } else if (lda < max(1,n)) {
        *info = -5;
    } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) {
        *info = -9;
    } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) {
        *info = -11;
    }

    /* Compute workspace */
    nb = magma_get_sgehrd_nb( n );
    if (*info == 0) {
        minwrk = (2 +   nb + nb*ngpu)*n;
        optwrk = (2 + 2*nb + nb*ngpu)*n;
        work[0] = magma_smake_lwork( optwrk );
        
        if (lwork < minwrk && ! lquery) {
            *info = -13;
        }
    }

    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }
    else if (lquery) {
        return *info;
    }

    /* Quick return if possible */
    if (n == 0) {
        return *info;
    }
   
    #if defined(Version3)
    float *dT;
    if (MAGMA_SUCCESS != magma_smalloc( &dT, nb*n )) {
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }
    #endif
    #if defined(Version5)
    float *T;
    if (MAGMA_SUCCESS != magma_smalloc_cpu( &T, nb*n )) {
        *info = MAGMA_ERR_HOST_ALLOC;
        return *info;
    }
    #endif

    /* Get machine constants */
    eps    = lapackf77_slamch( "P" );
    smlnum = lapackf77_slamch( "S" );
    bignum = 1. / smlnum;
    lapackf77_slabad( &smlnum, &bignum );
    smlnum = magma_ssqrt( smlnum ) / eps;
    bignum = 1. / smlnum;

    /* Scale A if max element outside range [SMLNUM,BIGNUM] */
    anrm = lapackf77_slange( "M", &n, &n, A, &lda, dum );
    scalea = 0;
    if (anrm > 0. && anrm < smlnum) {
        scalea = 1;
        cscale = smlnum;
    } else if (anrm > bignum) {
        scalea = 1;
        cscale = bignum;
    }
    if (scalea) {
        lapackf77_slascl( "G", &izero, &izero, &anrm, &cscale, &n, &n, A, &lda, &ierr );
    }

    /* Balance the matrix
     * (Workspace: need N)
     *  - this space is reserved until after gebak */
    ibal = 0;
    lapackf77_sgebal( "B", &n, A, &lda, &ilo, &ihi, &work[ibal], &ierr );

    /* Reduce to upper Hessenberg form
     * (Workspace: need 3*N, prefer 2*N + N*NB + NB*NGPU)
     *  - added NB*NGPU needed for multi-GPU magma_sgehrd_m
     *  - including N reserved for gebal/gebak, unused by sgehrd */
    itau = ibal + n;
    iwrk = itau + n;
    liwrk = lwork - iwrk;

    timer_start( time_gehrd );
    flops_start( flop_gehrd );
    #if defined(Version1)
        // Version 1 - LAPACK
        lapackf77_sgehrd( &n, &ilo, &ihi, A, &lda,
                          &work[itau], &work[iwrk], &liwrk, &ierr );
    #elif defined(Version2)
        // Version 2 - LAPACK consistent HRD
        magma_sgehrd2( n, ilo, ihi, A, lda,
                       &work[itau], &work[iwrk], liwrk, &ierr );
    #elif defined(Version3)
        // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored,
        magma_sgehrd( n, ilo, ihi, A, lda,
                      &work[itau], &work[iwrk], liwrk, dT, &ierr );
    #elif defined(Version5)
        // Version 4 - Multi-GPU, T on host
        magma_sgehrd_m( n, ilo, ihi, A, lda,
                        &work[itau], &work[iwrk], liwrk, T, &ierr );
    #endif
    time_sum += timer_stop( time_gehrd );
    flop_sum += flops_stop( flop_gehrd );

    if (wantvl) {
        /* Want left eigenvectors
         * Copy Householder vectors to VL */
        side = MagmaLeft;
        lapackf77_slacpy( MagmaLowerStr, &n, &n, A, &lda, VL, &ldvl );

        /* Generate orthogonal matrix in VL
         * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB)
         *  - including N reserved for gebal/gebak, unused by sorghr */
        timer_start( time_unghr );
        flops_start( flop_unghr );
        #if defined(Version1) || defined(Version2)
            // Version 1 & 2 - LAPACK
            lapackf77_sorghr( &n, &ilo, &ihi, VL, &ldvl, &work[itau],
                              &work[iwrk], &liwrk, &ierr );
        #elif defined(Version3)
            // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored
            magma_sorghr( n, ilo, ihi, VL, ldvl, &work[itau], dT, nb, &ierr );
        #elif defined(Version5)
            // Version 5 - Multi-GPU, T on host
            magma_sorghr_m( n, ilo, ihi, VL, ldvl, &work[itau], T, nb, &ierr );
        #endif
        time_sum += timer_stop( time_unghr );
        flop_sum += flops_stop( flop_unghr );

        timer_start( time_hseqr );
        flops_start( flop_hseqr );
        /* Perform QR iteration, accumulating Schur vectors in VL
         * (Workspace: need N+1, prefer N+HSWORK (see comments) )
         *  - including N reserved for gebal/gebak, unused by shseqr */
        iwrk = itau;
        liwrk = lwork - iwrk;
        lapackf77_shseqr( "S", "V", &n, &ilo, &ihi, A, &lda, wr, wi,
                          VL, &ldvl, &work[iwrk], &liwrk, info );
        time_sum += timer_stop( time_hseqr );
        flop_sum += flops_stop( flop_hseqr );

        if (wantvr) {
            /* Want left and right eigenvectors
             * Copy Schur vectors to VR */
            side = MagmaBothSides;
            lapackf77_slacpy( "F", &n, &n, VL, &ldvl, VR, &ldvr );
        }
    }
    else if (wantvr) {
        /* Want right eigenvectors
         * Copy Householder vectors to VR */
        side = MagmaRight;
        lapackf77_slacpy( "L", &n, &n, A, &lda, VR, &ldvr );

        /* Generate orthogonal matrix in VR
         * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB)
         *  - including N reserved for gebal/gebak, unused by sorghr */
        timer_start( time_unghr );
        flops_start( flop_unghr );
        #if defined(Version1) || defined(Version2)
            // Version 1 & 2 - LAPACK
            lapackf77_sorghr( &n, &ilo, &ihi, VR, &ldvr, &work[itau],
                              &work[iwrk], &liwrk, &ierr );
        #elif defined(Version3)
            // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored
            magma_sorghr( n, ilo, ihi, VR, ldvr, &work[itau], dT, nb, &ierr );
        #elif defined(Version5)
            // Version 5 - Multi-GPU, T on host
            magma_sorghr_m( n, ilo, ihi, VR, ldvr, &work[itau], T, nb, &ierr );
        #endif
        time_sum += timer_stop( time_unghr );
        flop_sum += flops_stop( flop_unghr );

        /* Perform QR iteration, accumulating Schur vectors in VR
         * (Workspace: need N+1, prefer N+HSWORK (see comments) )
         *  - including N reserved for gebal/gebak, unused by shseqr */
        timer_start( time_hseqr );
        flops_start( flop_hseqr );
        iwrk = itau;
        liwrk = lwork - iwrk;
        lapackf77_shseqr( "S", "V", &n, &ilo, &ihi, A, &lda, wr, wi,
                          VR, &ldvr, &work[iwrk], &liwrk, info );
        time_sum += timer_stop( time_hseqr );
        flop_sum += flops_stop( flop_hseqr );
    }
    else {
        /* Compute eigenvalues only
         * (Workspace: need N+1, prefer N+HSWORK (see comments) )
         *  - including N reserved for gebal/gebak, unused by shseqr */
        timer_start( time_hseqr );
        flops_start( flop_hseqr );
        iwrk = itau;
        liwrk = lwork - iwrk;
        lapackf77_shseqr( "E", "N", &n, &ilo, &ihi, A, &lda, wr, wi,
                          VR, &ldvr, &work[iwrk], &liwrk, info );
        time_sum += timer_stop( time_hseqr );
        flop_sum += flops_stop( flop_hseqr );
    }

    /* If INFO > 0 from SHSEQR, then quit */
    if (*info > 0) {
        goto CLEANUP;
    }

    timer_start( time_trevc );
    flops_start( flop_trevc );
    if (wantvl || wantvr) {
        /* Compute left and/or right eigenvectors
         * (Workspace: need 4*N, prefer (2 + 2*nb)*N)
         *  - including N reserved for gebal/gebak, unused by strevc */
        liwrk = lwork - iwrk;
        #if TREVC_VERSION == 1
        lapackf77_strevc( lapack_side_const(side), "B", select, &n, A, &lda, VL, &ldvl,
                          VR, &ldvr, &n, &nout, &work[iwrk], &ierr );
        #elif TREVC_VERSION == 2
        lapackf77_strevc3( lapack_side_const(side), "B", select, &n, A, &lda, VL, &ldvl,
                           VR, &ldvr, &n, &nout, &work[iwrk], &liwrk, &ierr );
        #elif TREVC_VERSION == 3
        magma_strevc3( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl,
                       VR, ldvr, n, &nout, &work[iwrk], liwrk, &ierr );
        #elif TREVC_VERSION == 4
        magma_strevc3_mt( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl,
                          VR, ldvr, n, &nout, &work[iwrk], liwrk, &ierr );
        #elif TREVC_VERSION == 5
        magma_strevc3_mt_gpu( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl,
                              VR, ldvr, n, &nout, &work[iwrk], liwrk, &ierr );
        #else
        #error Unknown TREVC_VERSION
        #endif
    }
    time_sum += timer_stop( time_trevc );
    flop_sum += flops_stop( flop_trevc );

    if (wantvl) {
        /* Undo balancing of left eigenvectors
         * (Workspace: need N) */
        lapackf77_sgebak( "B", "L", &n, &ilo, &ihi, &work[ibal], &n,
                          VL, &ldvl, &ierr );

        /* Normalize left eigenvectors and make largest component real */
        for (i = 0; i < n; ++i) {
            if ( wi[i] == 0. ) {
                scl = 1. / magma_cblas_snrm2( n, VL(0,i), 1 );
                blasf77_sscal( &n, &scl, VL(0,i), &ione );
            }
            else if ( wi[i] > 0. ) {
                d__1 = magma_cblas_snrm2( n, VL(0,i), 1 );
                d__2 = magma_cblas_snrm2( n, VL(0,i+1), 1 );
                scl = 1. / lapackf77_slapy2( &d__1, &d__2 );
                blasf77_sscal( &n, &scl, VL(0,i), &ione );
                blasf77_sscal( &n, &scl, VL(0,i+1), &ione );
                for (k = 0; k < n; ++k) {
                    /* Computing 2nd power */
                    d__1 = *VL(k,i);
                    d__2 = *VL(k,i+1);
                    work[iwrk + k] = d__1*d__1 + d__2*d__2;
                }
                k = blasf77_isamax( &n, &work[iwrk], &ione ) - 1;  // subtract 1; k is 0-based
                lapackf77_slartg( VL(k,i), VL(k,i+1), &cs, &sn, &r );
                blasf77_srot( &n, VL(0,i), &ione, VL(0,i+1), &ione, &cs, &sn );
                *VL(k,i+1) = 0.;
            }
        }
    }

    if (wantvr) {
        /* Undo balancing of right eigenvectors
         * (Workspace: need N) */
        lapackf77_sgebak( "B", "R", &n, &ilo, &ihi, &work[ibal], &n,
                          VR, &ldvr, &ierr );

        /* Normalize right eigenvectors and make largest component real */
        for (i = 0; i < n; ++i) {
            if ( wi[i] == 0. ) {
                scl = 1. / magma_cblas_snrm2( n, VR(0,i), 1 );
                blasf77_sscal( &n, &scl, VR(0,i), &ione );
            }
            else if ( wi[i] > 0. ) {
                d__1 = magma_cblas_snrm2( n, VR(0,i), 1 );
                d__2 = magma_cblas_snrm2( n, VR(0,i+1), 1 );
                scl = 1. / lapackf77_slapy2( &d__1, &d__2 );
                blasf77_sscal( &n, &scl, VR(0,i), &ione );
                blasf77_sscal( &n, &scl, VR(0,i+1), &ione );
                for (k = 0; k < n; ++k) {
                    /* Computing 2nd power */
                    d__1 = *VR(k,i);
                    d__2 = *VR(k,i+1);
                    work[iwrk + k] = d__1*d__1 + d__2*d__2;
                }
                k = blasf77_isamax( &n, &work[iwrk], &ione ) - 1;  // subtract 1; k is 0-based
                lapackf77_slartg( VR(k,i), VR(k,i+1), &cs, &sn, &r );
                blasf77_srot( &n, VR(0,i), &ione, VR(0,i+1), &ione, &cs, &sn );
                *VR(k,i+1) = 0.;
            }
        }
    }

CLEANUP:
    /* Undo scaling if necessary */
    if (scalea) {
        // converged eigenvalues, stored in wr[i+1:n] and wi[i+1:n] for i = INFO
        magma_int_t nval = n - (*info);
        magma_int_t ld = max( nval, 1 );
        lapackf77_slascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, wr + (*info), &ld, &ierr );
        lapackf77_slascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, wi + (*info), &ld, &ierr );
        if (*info > 0) {
            // first ilo columns were already upper triangular,
            // so the corresponding eigenvalues are also valid.
            nval = ilo - 1;
            lapackf77_slascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, wr, &n, &ierr );
            lapackf77_slascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, wi, &n, &ierr );
        }
    }

    #if defined(Version3)
    magma_free( dT );
    #endif
    #if defined(Version5)
    magma_free_cpu( T );
    #endif
    
    timer_stop( time_total );
    flops_stop( flop_total );
    timer_printf( "sgeev times n %5d, gehrd %7.3f, unghr %7.3f, hseqr %7.3f, trevc %7.3f, total %7.3f, sum %7.3f\n",
                  (int) n, time_gehrd, time_unghr, time_hseqr, time_trevc, time_total, time_sum );
    timer_printf( "sgeev flops n %5d, gehrd %7lld, unghr %7lld, hseqr %7lld, trevc %7lld, total %7lld, sum %7lld\n",
                  (int) n, flop_gehrd, flop_unghr, flop_hseqr, flop_trevc, flop_total, flop_sum );
    
    work[0] = magma_smake_lwork( optwrk );
    
    return *info;
} /* magma_sgeev */
Пример #10
0
int main(int argc, char **argv)
{
    int i;

    struct env env;

    in = stdin;
    out = stdout;
    err = stderr;
    logfp = NULL;

    static struct option long_options[] = {
        {"file", required_argument, 0, 'f'},
        {"help", no_argument, 0, 'h'},
        {0, 0, 0, 0}
    };

    infile = NULL;
    outfilebase = NULL;
    verbosity = 0;
    memset(&env, 0, sizeof(env));

    /*========================================================================
     * Process the command line flags
     *======================================================================*/
    while (1)
    {
        int option_index = 0;
        int c = getopt_long(argc, argv, "f:vlo:",
                            long_options, &option_index);

        if (c == -1) break;

        switch (c)
        {
            case 0:
                break;

            case 'f': infile      = optarg;       break;
            case 'o': outfilebase = optarg;       break;
            case 'v': verbosity++;                break;
            case 'l': loglevel++;                 break;

            case 'h': help(); break;
            case '?': break;
        }
    }

    if (loglevel != 0)
    {
        int i;
        char logfile[256];

        snprintf(logfile, 256, "%s.log.%i", "pint", getpid());
        for (i=1; i <= 1000 && (!access(logfile, R_OK) || !access(logfile, W_OK)); i++)
        {
            snprintf(logfile, 256, "%s.log.%i-%i", "pint", getpid(), i);
        }

        logfp = fopen(logfile, "w");
        if (logfp == NULL) 
        {
            fprintf(err, "WARNING: Can't create log file %s. No log will be kept.", logfile);
        }

        VL(1) fprintf(out, "Logfile: %s\n", logfile);
    }

    VL(1) fprintf(out, "Verbosity level %i\n", verbosity);

    sleep(5);

    /*========================================================================
     * Load the initial conditions
     *======================================================================*/
    if (infile != NULL)
    {
        LOG(1) fprintf(logfp, "Loading %s.\n", infile);
        if (judge_and_load_file(infile, &env))
        {
            fprintf(err, "Unable to load file.\n");
            exit(1);
        }
    }
    else
    {
        ic_threebody(&env);
    }

    if (outfilebase != NULL)
    {
    }


    for (i=0; i < 15; i++)
    {

#if USE_TREE_JS
        tree_build_js(&env);
        tree_free_js(&env);
#endif

#if USE_TREE_JPC
        tree_build_jpc(&env);
        tree_free_jpc(&env);
#endif
    }

    free(env.ps);
    env.ps = NULL;
    free(env.p);
    env.p = NULL;

    if (logfp != NULL && logfp != stdin && logfp != in && logfp != out && logfp != err)
        fclose(logfp);

    return 0;
}
Пример #11
0
/* Subroutine */ int dgeev_(char *jobvl, char *jobvr, integer *n, doublereal *
	a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl, 
	integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work, 
	integer *lwork, integer *info)
{
/*  -- LAPACK driver routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    DGEEV computes for an N-by-N real nonsymmetric matrix A, the   
    eigenvalues and, optionally, the left and/or right eigenvectors.   

    The right eigenvector v(j) of A satisfies   
                     A * v(j) = lambda(j) * v(j)   
    where lambda(j) is its eigenvalue.   
    The left eigenvector u(j) of A satisfies   
                  u(j)**H * A = lambda(j) * u(j)**H   
    where u(j)**H denotes the conjugate transpose of u(j).   

    The computed eigenvectors are normalized to have Euclidean norm   
    equal to 1 and largest component real.   

    Arguments   
    =========   

    JOBVL   (input) CHARACTER*1   
            = 'N': left eigenvectors of A are not computed;   
            = 'V': left eigenvectors of A are computed.   

    JOBVR   (input) CHARACTER*1   
            = 'N': right eigenvectors of A are not computed;   
            = 'V': right eigenvectors of A are computed.   

    N       (input) INTEGER   
            The order of the matrix A. N >= 0.   

    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)   
            On entry, the N-by-N matrix A.   
            On exit, A has been overwritten.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    WR      (output) DOUBLE PRECISION array, dimension (N)   
    WI      (output) DOUBLE PRECISION array, dimension (N)   
            WR and WI contain the real and imaginary parts,   
            respectively, of the computed eigenvalues.  Complex   
            conjugate pairs of eigenvalues appear consecutively   
            with the eigenvalue having the positive imaginary part   
            first.   

    VL      (output) DOUBLE PRECISION array, dimension (LDVL,N)   
            If JOBVL = 'V', the left eigenvectors u(j) are stored one   
            after another in the columns of VL, in the same order   
            as their eigenvalues.   
            If JOBVL = 'N', VL is not referenced.   
            If the j-th eigenvalue is real, then u(j) = VL(:,j),   
            the j-th column of VL.   
            If the j-th and (j+1)-st eigenvalues form a complex   
            conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and   
            u(j+1) = VL(:,j) - i*VL(:,j+1).   

    LDVL    (input) INTEGER   
            The leading dimension of the array VL.  LDVL >= 1; if   
            JOBVL = 'V', LDVL >= N.   

    VR      (output) DOUBLE PRECISION array, dimension (LDVR,N)   
            If JOBVR = 'V', the right eigenvectors v(j) are stored one   
            after another in the columns of VR, in the same order   
            as their eigenvalues.   
            If JOBVR = 'N', VR is not referenced.   
            If the j-th eigenvalue is real, then v(j) = VR(:,j),   
            the j-th column of VR.   
            If the j-th and (j+1)-st eigenvalues form a complex   
            conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and   
            v(j+1) = VR(:,j) - i*VR(:,j+1).   

    LDVR    (input) INTEGER   
            The leading dimension of the array VR.  LDVR >= 1; if   
            JOBVR = 'V', LDVR >= N.   

    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK) 
  
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The dimension of the array WORK.  LWORK >= max(1,3*N), and   
            if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N.  For good   
            performance, LWORK must generally be larger.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   
            > 0:  if INFO = i, the QR algorithm failed to compute all the 
  
                  eigenvalues, and no eigenvectors have been computed;   
                  elements i+1:N of WR and WI contain eigenvalues which   
                  have converged.   

    ===================================================================== 
  


       Test the input arguments   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c__0 = 0;
    static integer c__8 = 8;
    static integer c_n1 = -1;
    static integer c__4 = 4;
    
    /* System generated locals */
    integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, 
	    i__2, i__3, i__4;
    doublereal d__1, d__2;
    /* Builtin functions */
    double sqrt(doublereal);
    /* Local variables */
    static integer ibal;
    static char side[1];
    static integer maxb;
    static doublereal anrm;
    static integer ierr, itau;
    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, 
	    doublereal *, integer *, doublereal *, doublereal *);
    static integer iwrk, nout;
    extern doublereal dnrm2_(integer *, doublereal *, integer *);
    static integer i, k;
    static doublereal r;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
	    integer *);
    extern logical lsame_(char *, char *);
    extern doublereal dlapy2_(doublereal *, doublereal *);
    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebak_(
	    char *, char *, integer *, integer *, integer *, doublereal *, 
	    integer *, doublereal *, integer *, integer *), 
	    dgebal_(char *, integer *, doublereal *, integer *, integer *, 
	    integer *, doublereal *, integer *);
    static doublereal cs;
    static logical scalea;
    extern doublereal dlamch_(char *);
    static doublereal cscale;
    extern doublereal dlange_(char *, integer *, integer *, doublereal *, 
	    integer *, doublereal *);
    extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
	    integer *);
    static doublereal sn;
    extern /* Subroutine */ int dlascl_(char *, integer *, integer *, 
	    doublereal *, doublereal *, integer *, integer *, doublereal *, 
	    integer *, integer *);
    extern integer idamax_(integer *, doublereal *, integer *);
    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, integer *), 
	    dlartg_(doublereal *, doublereal *, doublereal *, doublereal *, 
	    doublereal *), xerbla_(char *, integer *);
    static logical select[1];
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    static doublereal bignum;
    extern /* Subroutine */ int dorghr_(integer *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
	    integer *), dhseqr_(char *, char *, integer *, integer *, integer 
	    *, doublereal *, integer *, doublereal *, doublereal *, 
	    doublereal *, integer *, doublereal *, integer *, integer *), dtrevc_(char *, char *, logical *, integer *, 
	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
	    integer *, integer *, integer *, doublereal *, integer *);
    static integer minwrk, maxwrk;
    static logical wantvl;
    static doublereal smlnum;
    static integer hswork;
    static logical wantvr;
    static integer ihi;
    static doublereal scl;
    static integer ilo;
    static doublereal dum[1], eps;



#define DUM(I) dum[(I)]
#define WR(I) wr[(I)-1]
#define WI(I) wi[(I)-1]
#define WORK(I) work[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
#define VL(I,J) vl[(I)-1 + ((J)-1)* ( *ldvl)]
#define VR(I,J) vr[(I)-1 + ((J)-1)* ( *ldvr)]

    *info = 0;
    wantvl = lsame_(jobvl, "V");
    wantvr = lsame_(jobvr, "V");
    if (! wantvl && ! lsame_(jobvl, "N")) {
	*info = -1;
    } else if (! wantvr && ! lsame_(jobvr, "N")) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    } else if (*ldvl < 1 || wantvl && *ldvl < *n) {
	*info = -9;
    } else if (*ldvr < 1 || wantvr && *ldvr < *n) {
	*info = -11;
    }

/*     Compute workspace   
        (Note: Comments in the code beginning "Workspace:" describe the   
         minimal amount of workspace needed at that point in the code,   
         as well as the preferred amount for good performance.   
         NB refers to the optimal block size for the immediately   
         following subroutine, as returned by ILAENV.   
         HSWORK refers to the workspace preferred by DHSEQR, as   
         calculated below. HSWORK is computed assuming ILO=1 and IHI=N,   
         the worst case.) */

    minwrk = 1;
    if (*info == 0 && *lwork >= 1) {
	maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, n, &
		c__0, 6L, 1L);
	if (! wantvl && ! wantvr) {
/* Computing MAX */
	    i__1 = 1, i__2 = *n * 3;
	    minwrk = max(i__1,i__2);
/* Computing MAX */
	    i__1 = ilaenv_(&c__8, "DHSEQR", "EN", n, &c__1, n, &c_n1, 6L, 2L);
	    maxb = max(i__1,2);
/* Computing MIN   
   Computing MAX */
	    i__3 = 2, i__4 = ilaenv_(&c__4, "DHSEQR", "EN", n, &c__1, n, &
		    c_n1, 6L, 2L);
	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
	    k = min(i__1,i__2);
/* Computing MAX */
	    i__1 = k * (k + 2), i__2 = *n << 1;
	    hswork = max(i__1,i__2);
/* Computing MAX */
	    i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n + 
		    hswork;
	    maxwrk = max(i__1,i__2);
	} else {
/* Computing MAX */
	    i__1 = 1, i__2 = *n << 2;
	    minwrk = max(i__1,i__2);
/* Computing MAX */
	    i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, "DOR"
		    "GHR", " ", n, &c__1, n, &c_n1, 6L, 1L);
	    maxwrk = max(i__1,i__2);
/* Computing MAX */
	    i__1 = ilaenv_(&c__8, "DHSEQR", "SV", n, &c__1, n, &c_n1, 6L, 2L);
	    maxb = max(i__1,2);
/* Computing MIN   
   Computing MAX */
	    i__3 = 2, i__4 = ilaenv_(&c__4, "DHSEQR", "SV", n, &c__1, n, &
		    c_n1, 6L, 2L);
	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
	    k = min(i__1,i__2);
/* Computing MAX */
	    i__1 = k * (k + 2), i__2 = *n << 1;
	    hswork = max(i__1,i__2);
/* Computing MAX */
	    i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n + 
		    hswork;
	    maxwrk = max(i__1,i__2);
/* Computing MAX */
	    i__1 = maxwrk, i__2 = *n << 2;
	    maxwrk = max(i__1,i__2);
	}
	WORK(1) = (doublereal) maxwrk;
    }
    if (*lwork < minwrk) {
	*info = -13;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DGEEV ", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Get machine constants */

    eps = dlamch_("P");
    smlnum = dlamch_("S");
    bignum = 1. / smlnum;
    dlabad_(&smlnum, &bignum);
    smlnum = sqrt(smlnum) / eps;
    bignum = 1. / smlnum;

/*     Scale A if max element outside range [SMLNUM,BIGNUM] */

    anrm = dlange_("M", n, n, &A(1,1), lda, dum);
    scalea = FALSE_;
    if (anrm > 0. && anrm < smlnum) {
	scalea = TRUE_;
	cscale = smlnum;
    } else if (anrm > bignum) {
	scalea = TRUE_;
	cscale = bignum;
    }
    if (scalea) {
	dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &A(1,1), lda, &
		ierr);
    }

/*     Balance the matrix   
       (Workspace: need N) */

    ibal = 1;
    dgebal_("B", n, &A(1,1), lda, &ilo, &ihi, &WORK(ibal), &ierr);

/*     Reduce to upper Hessenberg form   
       (Workspace: need 3*N, prefer 2*N+N*NB) */

    itau = ibal + *n;
    iwrk = itau + *n;
    i__1 = *lwork - iwrk + 1;
    dgehrd_(n, &ilo, &ihi, &A(1,1), lda, &WORK(itau), &WORK(iwrk), &i__1,
	     &ierr);

    if (wantvl) {

/*        Want left eigenvectors   
          Copy Householder vectors to VL */

	*(unsigned char *)side = 'L';
	dlacpy_("L", n, n, &A(1,1), lda, &VL(1,1), ldvl);

/*        Generate orthogonal matrix in VL   
          (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */

	i__1 = *lwork - iwrk + 1;
	dorghr_(n, &ilo, &ihi, &VL(1,1), ldvl, &WORK(itau), &WORK(iwrk),
		 &i__1, &ierr);

/*        Perform QR iteration, accumulating Schur vectors in VL   
          (Workspace: need N+1, prefer N+HSWORK (see comments) ) */

	iwrk = itau;
	i__1 = *lwork - iwrk + 1;
	dhseqr_("S", "V", n, &ilo, &ihi, &A(1,1), lda, &WR(1), &WI(1), &
		VL(1,1), ldvl, &WORK(iwrk), &i__1, info);

	if (wantvr) {

/*           Want left and right eigenvectors   
             Copy Schur vectors to VR */

	    *(unsigned char *)side = 'B';
	    dlacpy_("F", n, n, &VL(1,1), ldvl, &VR(1,1), ldvr)
		    ;
	}

    } else if (wantvr) {

/*        Want right eigenvectors   
          Copy Householder vectors to VR */

	*(unsigned char *)side = 'R';
	dlacpy_("L", n, n, &A(1,1), lda, &VR(1,1), ldvr);

/*        Generate orthogonal matrix in VR   
          (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */

	i__1 = *lwork - iwrk + 1;
	dorghr_(n, &ilo, &ihi, &VR(1,1), ldvr, &WORK(itau), &WORK(iwrk),
		 &i__1, &ierr);

/*        Perform QR iteration, accumulating Schur vectors in VR   
          (Workspace: need N+1, prefer N+HSWORK (see comments) ) */

	iwrk = itau;
	i__1 = *lwork - iwrk + 1;
	dhseqr_("S", "V", n, &ilo, &ihi, &A(1,1), lda, &WR(1), &WI(1), &
		VR(1,1), ldvr, &WORK(iwrk), &i__1, info);

    } else {

/*        Compute eigenvalues only   
          (Workspace: need N+1, prefer N+HSWORK (see comments) ) */

	iwrk = itau;
	i__1 = *lwork - iwrk + 1;
	dhseqr_("E", "N", n, &ilo, &ihi, &A(1,1), lda, &WR(1), &WI(1), &
		VR(1,1), ldvr, &WORK(iwrk), &i__1, info);
    }

/*     If INFO > 0 from DHSEQR, then quit */

    if (*info > 0) {
	goto L50;
    }

    if (wantvl || wantvr) {

/*        Compute left and/or right eigenvectors   
          (Workspace: need 4*N) */

	dtrevc_(side, "B", select, n, &A(1,1), lda, &VL(1,1), ldvl,
		 &VR(1,1), ldvr, n, &nout, &WORK(iwrk), &ierr);
    }

    if (wantvl) {

/*        Undo balancing of left eigenvectors   
          (Workspace: need N) */

	dgebak_("B", "L", n, &ilo, &ihi, &WORK(ibal), n, &VL(1,1), ldvl,
		 &ierr);

/*        Normalize left eigenvectors and make largest component real 
*/

	i__1 = *n;
	for (i = 1; i <= *n; ++i) {
	    if (WI(i) == 0.) {
		scl = 1. / dnrm2_(n, &VL(1,i), &c__1);
		dscal_(n, &scl, &VL(1,i), &c__1);
	    } else if (WI(i) > 0.) {
		d__1 = dnrm2_(n, &VL(1,i), &c__1);
		d__2 = dnrm2_(n, &VL(1,i+1), &c__1);
		scl = 1. / dlapy2_(&d__1, &d__2);
		dscal_(n, &scl, &VL(1,i), &c__1);
		dscal_(n, &scl, &VL(1,i+1), &c__1);
		i__2 = *n;
		for (k = 1; k <= *n; ++k) {
/* Computing 2nd power */
		    d__1 = VL(k,i);
/* Computing 2nd power */
		    d__2 = VL(k,i+1);
		    WORK(iwrk + k - 1) = d__1 * d__1 + d__2 * d__2;
/* L10: */
		}
		k = idamax_(n, &WORK(iwrk), &c__1);
		dlartg_(&VL(k,i), &VL(k,i+1), &cs,
			 &sn, &r);
		drot_(n, &VL(1,i), &c__1, &VL(1,i+1), &c__1, &cs, &sn);
		VL(k,i+1) = 0.;
	    }
/* L20: */
	}
    }

    if (wantvr) {

/*        Undo balancing of right eigenvectors   
          (Workspace: need N) */

	dgebak_("B", "R", n, &ilo, &ihi, &WORK(ibal), n, &VR(1,1), ldvr,
		 &ierr);

/*        Normalize right eigenvectors and make largest component real
 */

	i__1 = *n;
	for (i = 1; i <= *n; ++i) {
	    if (WI(i) == 0.) {
		scl = 1. / dnrm2_(n, &VR(1,i), &c__1);
		dscal_(n, &scl, &VR(1,i), &c__1);
	    } else if (WI(i) > 0.) {
		d__1 = dnrm2_(n, &VR(1,i), &c__1);
		d__2 = dnrm2_(n, &VR(1,i+1), &c__1);
		scl = 1. / dlapy2_(&d__1, &d__2);
		dscal_(n, &scl, &VR(1,i), &c__1);
		dscal_(n, &scl, &VR(1,i+1), &c__1);
		i__2 = *n;
		for (k = 1; k <= *n; ++k) {
/* Computing 2nd power */
		    d__1 = VR(k,i);
/* Computing 2nd power */
		    d__2 = VR(k,i+1);
		    WORK(iwrk + k - 1) = d__1 * d__1 + d__2 * d__2;
/* L30: */
		}
		k = idamax_(n, &WORK(iwrk), &c__1);
		dlartg_(&VR(k,i), &VR(k,i+1), &cs,
			 &sn, &r);
		drot_(n, &VR(1,i), &c__1, &VR(1,i+1), &c__1, &cs, &sn);
		VR(k,i+1) = 0.;
	    }
/* L40: */
	}
    }

/*     Undo scaling if necessary */

L50:
    if (scalea) {
	i__1 = *n - *info;
/* Computing MAX */
	i__3 = *n - *info;
	i__2 = max(i__3,1);
	dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &WR(*info + 
		1), &i__2, &ierr);
	i__1 = *n - *info;
/* Computing MAX */
	i__3 = *n - *info;
	i__2 = max(i__3,1);
	dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &WI(*info + 
		1), &i__2, &ierr);
	if (*info > 0) {
	    i__1 = ilo - 1;
	    dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &WR(1), 
		    n, &ierr);
	    i__1 = ilo - 1;
	    dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &WI(1), 
		    n, &ierr);
	}
    }

    WORK(1) = (doublereal) maxwrk;
    return 0;

/*     End of DGEEV */

} /* dgeev_ */
Пример #12
0
/***************************************************************************//**
    Purpose
    -------
    CGEEV computes for an N-by-N complex nonsymmetric matrix A, the
    eigenvalues and, optionally, the left and/or right eigenvectors.

    The right eigenvector v(j) of A satisfies
                     A * v(j) = lambda(j) * v(j)
    where lambda(j) is its eigenvalue.
    The left eigenvector u(j) of A satisfies
                  u(j)**H * A = lambda(j) * u(j)**H
    where u(j)**H denotes the conjugate transpose of u(j).

    The computed eigenvectors are normalized to have Euclidean norm
    equal to 1 and largest component real.

    Arguments
    ---------
    @param[in]
    jobvl   magma_vec_t
      -     = MagmaNoVec: left eigenvectors of A are not computed;
      -     = MagmaVec:   left eigenvectors of are computed.

    @param[in]
    jobvr   magma_vec_t
      -     = MagmaNoVec: right eigenvectors of A are not computed;
      -     = MagmaVec:   right eigenvectors of A are computed.

    @param[in]
    n       INTEGER
            The order of the matrix A. N >= 0.

    @param[in,out]
    A       COMPLEX array, dimension (LDA,N)
            On entry, the N-by-N matrix A.
            On exit, A has been overwritten.

    @param[in]
    lda     INTEGER
            The leading dimension of the array A.  LDA >= max(1,N).

    @param[out]
    w       COMPLEX array, dimension (N)
            W contains the computed eigenvalues.

    @param[out]
    VL      COMPLEX array, dimension (LDVL,N)
            If JOBVL = MagmaVec, the left eigenvectors u(j) are stored one
            after another in the columns of VL, in the same order
            as their eigenvalues.
            If JOBVL = MagmaNoVec, VL is not referenced.
            u(j) = VL(:,j), the j-th column of VL.

    @param[in]
    ldvl    INTEGER
            The leading dimension of the array VL.  LDVL >= 1; if
            JOBVL = MagmaVec, LDVL >= N.

    @param[out]
    VR      COMPLEX array, dimension (LDVR,N)
            If JOBVR = MagmaVec, the right eigenvectors v(j) are stored one
            after another in the columns of VR, in the same order
            as their eigenvalues.
            If JOBVR = MagmaNoVec, VR is not referenced.
            v(j) = VR(:,j), the j-th column of VR.

    @param[in]
    ldvr    INTEGER
            The leading dimension of the array VR.  LDVR >= 1; if
            JOBVR = MagmaVec, LDVR >= N.

    @param[out]
    work    (workspace) COMPLEX array, dimension (MAX(1,LWORK))
            On exit, if INFO = 0, WORK[0] returns the optimal LWORK.

    @param[in]
    lwork   INTEGER
            The dimension of the array WORK.  LWORK >= (1 +   nb + nb*ngpu)*N.
            For optimal performance,          LWORK >= (1 + 2*nb + nb*ngpu)*N.
    \n
            If LWORK = -1, then a workspace query is assumed; the routine
            only calculates the optimal size of the WORK array, returns
            this value as the first entry of the WORK array, and no error
            message related to LWORK is issued by XERBLA.

    @param
    rwork   (workspace) REAL array, dimension (2*N)

    @param[out]
    info    INTEGER
      -     = 0:  successful exit
      -     < 0:  if INFO = -i, the i-th argument had an illegal value.
      -     > 0:  if INFO = i, the QR algorithm failed to compute all the
                  eigenvalues, and no eigenvectors have been computed;
                  elements and i+1:N of W contain eigenvalues which have
                  converged.

    @ingroup magma_geev
*******************************************************************************/
extern "C" magma_int_t
magma_cgeev_m(
    magma_vec_t jobvl, magma_vec_t jobvr, magma_int_t n,
    magmaFloatComplex *A, magma_int_t lda,
    #ifdef COMPLEX
    magmaFloatComplex *w,
    #else
    float *wr, float *wi,
    #endif
    magmaFloatComplex *VL, magma_int_t ldvl,
    magmaFloatComplex *VR, magma_int_t ldvr,
    magmaFloatComplex *work, magma_int_t lwork,
    #ifdef COMPLEX
    float *rwork,
    #endif
    magma_int_t *info )
{
    #define VL(i,j)  (VL + (i) + (j)*ldvl)
    #define VR(i,j)  (VR + (i) + (j)*ldvr)
    
    const magma_int_t ione  = 1;
    const magma_int_t izero = 0;
    
    float d__1, d__2;
    magmaFloatComplex tmp;
    float scl;
    float dum[1], eps;
    float anrm, cscale, bignum, smlnum;
    magma_int_t i, k, ilo, ihi;
    magma_int_t ibal, ierr, itau, iwrk, nout, liwrk, nb;
    magma_int_t scalea, minwrk, optwrk, irwork, lquery, wantvl, wantvr, select[1];

    magma_side_t side = MagmaRight;
    magma_int_t ngpu = magma_num_gpus();

    irwork = 0;
    *info = 0;
    lquery = (lwork == -1);
    wantvl = (jobvl == MagmaVec);
    wantvr = (jobvr == MagmaVec);
    if (! wantvl && jobvl != MagmaNoVec) {
        *info = -1;
    } else if (! wantvr && jobvr != MagmaNoVec) {
        *info = -2;
    } else if (n < 0) {
        *info = -3;
    } else if (lda < max(1,n)) {
        *info = -5;
    } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) {
        *info = -8;
    } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) {
        *info = -10;
    }

    /* Compute workspace */
    nb = magma_get_cgehrd_nb( n );
    if (*info == 0) {
        minwrk = (1 +   nb + nb*ngpu)*n;
        optwrk = (1 + 2*nb + nb*ngpu)*n;
        work[0] = magma_cmake_lwork( optwrk );

        if (lwork < minwrk && ! lquery) {
            *info = -12;
        }
    }

    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }
    else if (lquery) {
        return *info;
    }

    /* Quick return if possible */
    if (n == 0) {
        return *info;
    }
    
    #if defined(Version3)
    magmaFloatComplex *dT;
    if (MAGMA_SUCCESS != magma_cmalloc( &dT, nb*n )) {
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }
    #endif
    #if defined(Version5)
    magmaFloatComplex *T;
    if (MAGMA_SUCCESS != magma_cmalloc_cpu( &T, nb*n )) {
        *info = MAGMA_ERR_HOST_ALLOC;
        return *info;
    }
    #endif

    /* Get machine constants */
    eps    = lapackf77_slamch( "P" );
    smlnum = lapackf77_slamch( "S" );
    bignum = 1. / smlnum;
    lapackf77_slabad( &smlnum, &bignum );
    smlnum = magma_ssqrt( smlnum ) / eps;
    bignum = 1. / smlnum;

    /* Scale A if max element outside range [SMLNUM,BIGNUM] */
    anrm = lapackf77_clange( "M", &n, &n, A, &lda, dum );
    scalea = 0;
    if (anrm > 0. && anrm < smlnum) {
        scalea = 1;
        cscale = smlnum;
    } else if (anrm > bignum) {
        scalea = 1;
        cscale = bignum;
    }
    if (scalea) {
        lapackf77_clascl( "G", &izero, &izero, &anrm, &cscale, &n, &n, A, &lda, &ierr );
    }

    /* Balance the matrix
     * (CWorkspace: none)
     * (RWorkspace: need N)
     *  - this space is reserved until after gebak */
    ibal = 0;
    lapackf77_cgebal( "B", &n, A, &lda, &ilo, &ihi, &rwork[ibal], &ierr );

    /* Reduce to upper Hessenberg form
     * (CWorkspace: need 2*N, prefer N + N*NB + NB*NGPU)
     * (RWorkspace: N)
     *  - added NB*NGPU needed for multi-GPU magma_cgehrd_m
     *  - including N reserved for gebal/gebak, unused by cgehrd */
    itau = 0;
    iwrk = itau + n;
    liwrk = lwork - iwrk;

    #if defined(Version1)
        // Version 1 - LAPACK
        lapackf77_cgehrd( &n, &ilo, &ihi, A, &lda,
                          &work[itau], &work[iwrk], &liwrk, &ierr );
    #elif defined(Version2)
        // Version 2 - LAPACK consistent HRD
        magma_cgehrd2( n, ilo, ihi, A, lda,
                       &work[itau], &work[iwrk], liwrk, &ierr );
    #elif defined(Version3)
        // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored,
        magma_cgehrd( n, ilo, ihi, A, lda,
                      &work[itau], &work[iwrk], liwrk, dT, &ierr );
    #elif defined(Version5)
        // Version 4 - Multi-GPU, T on host
        magma_cgehrd_m( n, ilo, ihi, A, lda,
                        &work[itau], &work[iwrk], liwrk, T, &ierr );
    #endif

    if (wantvl) {
        /* Want left eigenvectors
         * Copy Householder vectors to VL */
        side = MagmaLeft;
        lapackf77_clacpy( MagmaLowerStr, &n, &n, A, &lda, VL, &ldvl );

        /* Generate unitary matrix in VL
         * (CWorkspace: need 2*N-1, prefer N + (N-1)*NB)
         * (RWorkspace: N)
         *  - including N reserved for gebal/gebak, unused by cunghr */
        #if defined(Version1) || defined(Version2)
            // Version 1 & 2 - LAPACK
            lapackf77_cunghr( &n, &ilo, &ihi, VL, &ldvl, &work[itau],
                              &work[iwrk], &liwrk, &ierr );
        #elif defined(Version3)
            // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored
            magma_cunghr( n, ilo, ihi, VL, ldvl, &work[itau], dT, nb, &ierr );
        #elif defined(Version5)
            // Version 5 - Multi-GPU, T on host
            magma_cunghr_m( n, ilo, ihi, VL, ldvl, &work[itau], T, nb, &ierr );
        #endif

        /* Perform QR iteration, accumulating Schur vectors in VL
         * (CWorkspace: need 1, prefer HSWORK (see comments) )
         * (RWorkspace: N)
         *  - including N reserved for gebal/gebak, unused by chseqr */
        iwrk = itau;
        liwrk = lwork - iwrk;
        lapackf77_chseqr( "S", "V", &n, &ilo, &ihi, A, &lda, w,
                          VL, &ldvl, &work[iwrk], &liwrk, info );

        if (wantvr) {
            /* Want left and right eigenvectors
             * Copy Schur vectors to VR */
            side = MagmaBothSides;
            lapackf77_clacpy( "F", &n, &n, VL, &ldvl, VR, &ldvr );
        }
    }
    else if (wantvr) {
        /* Want right eigenvectors
         * Copy Householder vectors to VR */
        side = MagmaRight;
        lapackf77_clacpy( "L", &n, &n, A, &lda, VR, &ldvr );

        /* Generate unitary matrix in VR
         * (CWorkspace: need 2*N-1, prefer N + (N-1)*NB)
         * (RWorkspace: N)
         *  - including N reserved for gebal/gebak, unused by cunghr */
        #if defined(Version1) || defined(Version2)
            // Version 1 & 2 - LAPACK
            lapackf77_cunghr( &n, &ilo, &ihi, VR, &ldvr, &work[itau],
                              &work[iwrk], &liwrk, &ierr );
        #elif defined(Version3)
            // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored
            magma_cunghr( n, ilo, ihi, VR, ldvr, &work[itau], dT, nb, &ierr );
        #elif defined(Version5)
            // Version 5 - Multi-GPU, T on host
            magma_cunghr_m( n, ilo, ihi, VR, ldvr, &work[itau], T, nb, &ierr );
        #endif

        /* Perform QR iteration, accumulating Schur vectors in VR
         * (CWorkspace: need 1, prefer HSWORK (see comments) )
         * (RWorkspace: N)
         *  - including N reserved for gebal/gebak, unused by chseqr */
        iwrk = itau;
        liwrk = lwork - iwrk;
        lapackf77_chseqr( "S", "V", &n, &ilo, &ihi, A, &lda, w,
                          VR, &ldvr, &work[iwrk], &liwrk, info );
    }
    else {
        /* Compute eigenvalues only
         * (CWorkspace: need 1, prefer HSWORK (see comments) )
         * (RWorkspace: N)
         *  - including N reserved for gebal/gebak, unused by chseqr */
        iwrk = itau;
        liwrk = lwork - iwrk;
        lapackf77_chseqr( "E", "N", &n, &ilo, &ihi, A, &lda, w,
                          VR, &ldvr, &work[iwrk], &liwrk, info );
    }

    /* If INFO > 0 from CHSEQR, then quit */
    if (*info > 0) {
        goto CLEANUP;
    }

    if (wantvl || wantvr) {
        /* Compute left and/or right eigenvectors
         * (CWorkspace: need 2*N)
         * (RWorkspace: need 2*N)
         *  - including N reserved for gebal/gebak, unused by ctrevc */
        irwork = ibal + n;
        #if TREVC_VERSION == 1
        lapackf77_ctrevc( lapack_side_const(side), "B", select, &n, A, &lda, VL, &ldvl,
                          VR, &ldvr, &n, &nout, &work[iwrk], &rwork[irwork], &ierr );
        #elif TREVC_VERSION == 2
        liwrk = lwork - iwrk;
        lapackf77_ctrevc3( lapack_side_const(side), "B", select, &n, A, &lda, VL, &ldvl,
                           VR, &ldvr, &n, &nout, &work[iwrk], &liwrk, &rwork[irwork], &ierr );
        #elif TREVC_VERSION == 3
        magma_ctrevc3( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl,
                       VR, ldvr, n, &nout, &work[iwrk], liwrk, &rwork[irwork], &ierr );
        #elif TREVC_VERSION == 4
        magma_ctrevc3_mt( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl,
                          VR, ldvr, n, &nout, &work[iwrk], liwrk, &rwork[irwork], &ierr );
        #elif TREVC_VERSION == 5
        magma_ctrevc3_mt_gpu( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl,
                              VR, ldvr, n, &nout, &work[iwrk], liwrk, &rwork[irwork], &ierr );
        #else
        #error Unknown TREVC_VERSION
        #endif
    }

    if (wantvl) {
        /* Undo balancing of left eigenvectors
         * (CWorkspace: none)
         * (RWorkspace: need N) */
        lapackf77_cgebak( "B", "L", &n, &ilo, &ihi, &rwork[ibal], &n,
                          VL, &ldvl, &ierr );

        /* Normalize left eigenvectors and make largest component real */
        for (i = 0; i < n; ++i) {
            scl = 1. / magma_cblas_scnrm2( n, VL(0,i), 1 );
            blasf77_csscal( &n, &scl, VL(0,i), &ione );
            for (k = 0; k < n; ++k) {
                /* Computing 2nd power */
                d__1 = MAGMA_C_REAL( *VL(k,i) );
                d__2 = MAGMA_C_IMAG( *VL(k,i) );
                rwork[irwork + k] = d__1*d__1 + d__2*d__2;
            }
            k = blasf77_isamax( &n, &rwork[irwork], &ione ) - 1;  // subtract 1; k is 0-based
            tmp = MAGMA_C_CONJ( *VL(k,i) ) / magma_ssqrt( rwork[irwork + k] );
            blasf77_cscal( &n, &tmp, VL(0,i), &ione );
            *VL(k,i) = MAGMA_C_MAKE( MAGMA_C_REAL( *VL(k,i) ), 0 );
        }
    }

    if (wantvr) {
        /* Undo balancing of right eigenvectors
         * (CWorkspace: none)
         * (RWorkspace: need N) */
        lapackf77_cgebak( "B", "R", &n, &ilo, &ihi, &rwork[ibal], &n,
                          VR, &ldvr, &ierr );

        /* Normalize right eigenvectors and make largest component real */
        for (i = 0; i < n; ++i) {
            scl = 1. / magma_cblas_scnrm2( n, VR(0,i), 1 );
            blasf77_csscal( &n, &scl, VR(0,i), &ione );
            for (k = 0; k < n; ++k) {
                /* Computing 2nd power */
                d__1 = MAGMA_C_REAL( *VR(k,i) );
                d__2 = MAGMA_C_IMAG( *VR(k,i) );
                rwork[irwork + k] = d__1*d__1 + d__2*d__2;
            }
            k = blasf77_isamax( &n, &rwork[irwork], &ione ) - 1;  // subtract 1; k is 0-based
            tmp = MAGMA_C_CONJ( *VR(k,i) ) / magma_ssqrt( rwork[irwork + k] );
            blasf77_cscal( &n, &tmp, VR(0,i), &ione );
            *VR(k,i) = MAGMA_C_MAKE( MAGMA_C_REAL( *VR(k,i) ), 0 );
        }
    }

CLEANUP:
    /* Undo scaling if necessary */
    if (scalea) {
        // converged eigenvalues, stored in WR[i+1:n] and WI[i+1:n] for i = INFO
        magma_int_t nval = n - (*info);
        magma_int_t ld   = max( nval, 1 );
        lapackf77_clascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, w + (*info), &ld, &ierr );
        if (*info > 0) {
            // first ilo columns were already upper triangular,
            // so the corresponding eigenvalues are also valid.
            nval = ilo - 1;
            lapackf77_clascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, w, &n, &ierr );
        }
    }

    #if defined(Version3)
    magma_free( dT );
    #endif
    #if defined(Version5)
    magma_free_cpu( T );
    #endif
    
    work[0] = magma_cmake_lwork( minwrk );  // TODO use optwrk as in dgeev

    return *info;
} /* magma_cgeev */
Пример #13
0
void setup (int N, const Parameter &param, Array<double, 1> &WR, Array<double,2> &ev, Array<double,2> &evInv)
{
   int Nm1 = N;
   int i;
   Array<double, 1> x;
   Array<double, 2> D;
   Array<double, 1> r;
   Array<double, 2> Dsec;
   Array<double, 1> XX;
   Array<double, 1> YY;
   Array<double, 2> A(N,N);
   Array<double, 2> B(N,N);
   Array<int, 1> IPIV(Nm1);
    char BALANC[1];
    char JOBVL[1];
    char JOBVR[1];
    char SENSE[1];
    int LDA;
    int LDVL;
    int LDVR;
    int NRHS;
    int LDB;
    int INFO;

	//resize output arrays
	WR.resize(N);
	ev.resize(N, N);
	evInv.resize(N, N);


// parameters for DGEEVX
    Array<double, 1>  WI(Nm1); // WR(Nm1),
           // The real and imaginary part of the eig.values
	Array<double, 2> VL(N, N);
    Array<double, 2> VR(Nm1,Nm1); //VR(Nm1,Nm1); 
           // The left and rigth eigenvectors
    int ILO, IHI;        // Info on the balanced output matrix
    Array<double, 1> SCALE(Nm1);     // Scaling factors applied for balancing
    double ABNRM;        // 1-Norm of the balanced matrix
    Array<double, 1> RCONDE(Nm1);  
           // the reciprocal cond. numb of the respective eig.val
    Array<double, 1> RCONDV(Nm1); 
           // the reciprocal cond. numb of the respective eig.vec
    int LWORK = (N+1)*(N+7); // Depending on SENSE            
    Array<double, 1> WORK(LWORK);
    Array<int, 1> IWORK(2*(N+1)-2);


// Compute the Chebyshev differensiation matrix and D*D

//   cheb(N, x, D);
   cheb(N, x, D);
   Dsec.resize(D.shape());
   MatrixMatrixMultiply(D, D, Dsec);


// Compute the 1. and 2. derivatives of the transformations

   XYmat(N, param, XX, YY, r);

   // Set up the full timepropagation matrix A
   // dy/dt = - i A y
   Range range(1, N); //Dsec and D have range 0, N+1. 
   					  //We don't want the edge points in A
   A = XX(tensor::i) * Dsec(range, range) + YY(tensor::i) * D(range, range);
   //Transpose A
   for (int i=0; i<A.extent(0); i++)
   {
		for (int j=0; j<i; j++)
		{
			double t = A(i,j);
			A(i,j) = A(j, i);
			A(j,i) = t;
		}
   }


// Add radialpart of non-time dependent potential here
/* 2D radial
	for (int i=0; i<A.extent(0); i++)
   {
		A(i, i) += 0.25 / (r(i)*r(i));
   }
*/

// Compute eigen decomposition

   BALANC[0] ='B';
   JOBVL[0]  ='V';
   JOBVR[0]  ='V';
   SENSE[0]  ='B';
   LDA = Nm1;
   LDVL = Nm1;
   LDVR = Nm1;

   FORTRAN_NAME(dgeevx)(BALANC, JOBVL, JOBVR, SENSE, &Nm1,
            A.data(), &LDA, WR.data(), WI.data(),
            VL.data(), &LDVL, VR.data(), &LDVR, &ILO, &IHI,
            SCALE.data(), &ABNRM,
            RCONDE.data(), RCONDV.data(), WORK.data(), &LWORK,
            IWORK.data(), &INFO);

// Compute the inverse of the eigen vector matrix

    NRHS = Nm1;
	
    evInv = VR ;// VL;
    LDB = LDA;
    B = 0.0;
    for (i=0; i<Nm1; i++) B(i,i) = 1.0; 

    FORTRAN_NAME(dgesv)(&Nm1, &NRHS, evInv.data(), &LDA, IPIV.data(), B.data(), &LDB, &INFO);

	ev = VR(tensor::j, tensor::i);   //Transpose
	evInv = B(tensor::j, tensor::i); //Transpose
	
	//cout << "Eigenvectors (right): " << ev << endl;
	//cout << "Eigenvectors (inv): " << evInv << endl;
    //printf(" Done  inverse, INFO = %d \n", INFO);
} // done
Пример #14
0
//A is an antisymmetric matrix and B is the output rotation matrix
void make_rotation_matrix_notworking(const Array2 <doublevar> & A,
                                     Array2 <doublevar> & B) {
    int n=A.GetDim(0);
    assert(A.GetDim(1)==n);
    B.Resize(n,n);

    Array2 <dcomplex> skew(n,n),VL(n,n),VR(n,n);
    Array1 <dcomplex> evals(n);
    for(int i=0; i< n; i++) {
        for(int j=0; j < n; j++) {
            skew(i,j)=A(i,j);
        }
    }
    GeneralizedEigenSystemSolverComplexGeneralMatrices(skew,evals,VL,VR);

    cout << "evals " << endl;
    for(int i=0; i< n; i++) cout << evals(i) << " ";
    cout << endl;
    cout << "VR " << endl;
    for(int i=0; i< n; i++) {
        for(int j=0; j< n; j++) {
            cout << VR(i,j) << " ";
        }
        cout << endl;
    }
    cout << "VL " << endl;
    for(int i=0; i< n; i++) {
        for(int j=0; j< n; j++) {
            cout << VL(i,j) << " ";
        }
        cout << endl;
    }
    //this is horribly inefficient,most likely

    skew=dcomplex(0.0,0.); //we don't need that any more so we reuse it
    Array2 <dcomplex> work(n,n);
    work=dcomplex(0.0,0.);
    for(int i=0; i< n; i++) {
        skew(i,i)=exp(evals(i));
    }
    for(int i=0; i< n; i++) {
        for(int j=0; j<n; j++) {
            for(int k=0; k< n; k++) {
                work(i,k)+=skew(i,j)*VR(j,k);
            }
        }
    }
    skew=dcomplex(0.,0.);
    for(int i=0; i< n; i++) {
        for(int j=0; j<n; j++) {
            for(int k=0; k< n; k++) {
//skew(i,k)+=conj(VL(i,j))*work(j,k);
                skew(i,k)=conj(VR(j,i))*work(j,k);
            }
        }
    }


    cout << "rotation " << endl;
    for(int i=0; i< n; i++) {
        for(int j=0; j< n; j++) {
            cout << skew(i,j) << " ";
        }
        cout << endl;
    }


}
Пример #15
0
/* Subroutine */ int dtrsna_(char *job, char *howmny, logical *select, 
	integer *n, doublereal *t, integer *ldt, doublereal *vl, integer *
	ldvl, doublereal *vr, integer *ldvr, doublereal *s, doublereal *sep, 
	integer *mm, integer *m, doublereal *work, integer *ldwork, integer *
	iwork, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    DTRSNA estimates reciprocal condition numbers for specified   
    eigenvalues and/or right eigenvectors of a real upper   
    quasi-triangular matrix T (or of any matrix Q*T*Q**T with Q   
    orthogonal).   

    T must be in Schur canonical form (as returned by DHSEQR), that is,   
    block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each   
    2-by-2 diagonal block has its diagonal elements equal and its   
    off-diagonal elements of opposite sign.   

    Arguments   
    =========   

    JOB     (input) CHARACTER*1   
            Specifies whether condition numbers are required for   
            eigenvalues (S) or eigenvectors (SEP):   
            = 'E': for eigenvalues only (S);   
            = 'V': for eigenvectors only (SEP);   
            = 'B': for both eigenvalues and eigenvectors (S and SEP).   

    HOWMNY  (input) CHARACTER*1   
            = 'A': compute condition numbers for all eigenpairs;   
            = 'S': compute condition numbers for selected eigenpairs   
                   specified by the array SELECT.   

    SELECT  (input) LOGICAL array, dimension (N)   
            If HOWMNY = 'S', SELECT specifies the eigenpairs for which   
            condition numbers are required. To select condition numbers   
            for the eigenpair corresponding to a real eigenvalue w(j),   
            SELECT(j) must be set to .TRUE.. To select condition numbers 
  
            corresponding to a complex conjugate pair of eigenvalues w(j) 
  
            and w(j+1), either SELECT(j) or SELECT(j+1) or both, must be 
  
            set to .TRUE..   
            If HOWMNY = 'A', SELECT is not referenced.   

    N       (input) INTEGER   
            The order of the matrix T. N >= 0.   

    T       (input) DOUBLE PRECISION array, dimension (LDT,N)   
            The upper quasi-triangular matrix T, in Schur canonical form. 
  

    LDT     (input) INTEGER   
            The leading dimension of the array T. LDT >= max(1,N).   

    VL      (input) DOUBLE PRECISION array, dimension (LDVL,M)   
            If JOB = 'E' or 'B', VL must contain left eigenvectors of T   
            (or of any Q*T*Q**T with Q orthogonal), corresponding to the 
  
            eigenpairs specified by HOWMNY and SELECT. The eigenvectors   
            must be stored in consecutive columns of VL, as returned by   
            DHSEIN or DTREVC.   
            If JOB = 'V', VL is not referenced.   

    LDVL    (input) INTEGER   
            The leading dimension of the array VL.   
            LDVL >= 1; and if JOB = 'E' or 'B', LDVL >= N.   

    VR      (input) DOUBLE PRECISION array, dimension (LDVR,M)   
            If JOB = 'E' or 'B', VR must contain right eigenvectors of T 
  
            (or of any Q*T*Q**T with Q orthogonal), corresponding to the 
  
            eigenpairs specified by HOWMNY and SELECT. The eigenvectors   
            must be stored in consecutive columns of VR, as returned by   
            DHSEIN or DTREVC.   
            If JOB = 'V', VR is not referenced.   

    LDVR    (input) INTEGER   
            The leading dimension of the array VR.   
            LDVR >= 1; and if JOB = 'E' or 'B', LDVR >= N.   

    S       (output) DOUBLE PRECISION array, dimension (MM)   
            If JOB = 'E' or 'B', the reciprocal condition numbers of the 
  
            selected eigenvalues, stored in consecutive elements of the   
            array. For a complex conjugate pair of eigenvalues two   
            consecutive elements of S are set to the same value. Thus   
            S(j), SEP(j), and the j-th columns of VL and VR all   
            correspond to the same eigenpair (but not in general the   
            j-th eigenpair, unless all eigenpairs are selected).   
            If JOB = 'V', S is not referenced.   

    SEP     (output) DOUBLE PRECISION array, dimension (MM)   
            If JOB = 'V' or 'B', the estimated reciprocal condition   
            numbers of the selected eigenvectors, stored in consecutive   
            elements of the array. For a complex eigenvector two   
            consecutive elements of SEP are set to the same value. If   
            the eigenvalues cannot be reordered to compute SEP(j), SEP(j) 
  
            is set to 0; this can only occur when the true value would be 
  
            very small anyway.   
            If JOB = 'E', SEP is not referenced.   

    MM      (input) INTEGER   
            The number of elements in the arrays S (if JOB = 'E' or 'B') 
  
             and/or SEP (if JOB = 'V' or 'B'). MM >= M.   

    M       (output) INTEGER   
            The number of elements of the arrays S and/or SEP actually   
            used to store the estimated condition numbers.   
            If HOWMNY = 'A', M is set to N.   

    WORK    (workspace) DOUBLE PRECISION array, dimension (LDWORK,N+1)   
            If JOB = 'E', WORK is not referenced.   

    LDWORK  (input) INTEGER   
            The leading dimension of the array WORK.   
            LDWORK >= 1; and if JOB = 'V' or 'B', LDWORK >= N.   

    IWORK   (workspace) INTEGER array, dimension (N)   
            If JOB = 'E', IWORK is not referenced.   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -i, the i-th argument had an illegal value   

    Further Details   
    ===============   

    The reciprocal of the condition number of an eigenvalue lambda is   
    defined as   

            S(lambda) = |v'*u| / (norm(u)*norm(v))   

    where u and v are the right and left eigenvectors of T corresponding 
  
    to lambda; v' denotes the conjugate-transpose of v, and norm(u)   
    denotes the Euclidean norm. These reciprocal condition numbers always 
  
    lie between zero (very badly conditioned) and one (very well   
    conditioned). If n = 1, S(lambda) is defined to be 1.   

    An approximate error bound for a computed eigenvalue W(i) is given by 
  

                        EPS * norm(T) / S(i)   

    where EPS is the machine precision.   

    The reciprocal of the condition number of the right eigenvector u   
    corresponding to lambda is defined as follows. Suppose   

                T = ( lambda  c  )   
                    (   0    T22 )   

    Then the reciprocal condition number is   

            SEP( lambda, T22 ) = sigma-min( T22 - lambda*I )   

    where sigma-min denotes the smallest singular value. We approximate   
    the smallest singular value by the reciprocal of an estimate of the   
    one-norm of the inverse of T22 - lambda*I. If n = 1, SEP(1) is   
    defined to be abs(T(1,1)).   

    An approximate error bound for a computed right eigenvector VR(i)   
    is given by   

                        EPS * norm(T) / SEP(i)   

    ===================================================================== 
  


       Decode and test the input parameters   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static integer c__1 = 1;
    static logical c_true = TRUE_;
    static logical c_false = FALSE_;
    
    /* System generated locals */
    integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, 
	    work_dim1, work_offset, i__1, i__2;
    doublereal d__1, d__2;
    /* Builtin functions */
    double sqrt(doublereal);
    /* Local variables */
    static integer kase;
    static doublereal cond;
    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, 
	    integer *);
    static logical pair;
    static integer ierr;
    static doublereal dumm, prod;
    static integer ifst;
    static doublereal lnrm;
    static integer ilst;
    static doublereal rnrm;
    extern doublereal dnrm2_(integer *, doublereal *, integer *);
    static doublereal prod1, prod2;
    static integer i, j, k;
    static doublereal scale, delta;
    extern logical lsame_(char *, char *);
    static logical wants;
    static doublereal dummy[1];
    static integer n2;
    extern doublereal dlapy2_(doublereal *, doublereal *);
    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
    static doublereal cs;
    extern doublereal dlamch_(char *);
    static integer nn, ks;
    extern /* Subroutine */ int dlacon_(integer *, doublereal *, doublereal *,
	     integer *, doublereal *, integer *);
    static doublereal sn, mu;
    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, integer *), 
	    xerbla_(char *, integer *);
    static doublereal bignum;
    static logical wantbh;
    extern /* Subroutine */ int dlaqtr_(logical *, logical *, integer *, 
	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
	     doublereal *, doublereal *, integer *), dtrexc_(char *, integer *
	    , doublereal *, integer *, doublereal *, integer *, integer *, 
	    integer *, doublereal *, integer *);
    static logical somcon;
    static doublereal smlnum;
    static logical wantsp;
    static doublereal eps, est;



#define DUMMY(I) dummy[(I)]
#define SELECT(I) select[(I)-1]
#define S(I) s[(I)-1]
#define SEP(I) sep[(I)-1]
#define IWORK(I) iwork[(I)-1]

#define T(I,J) t[(I)-1 + ((J)-1)* ( *ldt)]
#define VL(I,J) vl[(I)-1 + ((J)-1)* ( *ldvl)]
#define VR(I,J) vr[(I)-1 + ((J)-1)* ( *ldvr)]
#define WORK(I,J) work[(I)-1 + ((J)-1)* ( *ldwork)]

    wantbh = lsame_(job, "B");
    wants = lsame_(job, "E") || wantbh;
    wantsp = lsame_(job, "V") || wantbh;

    somcon = lsame_(howmny, "S");

    *info = 0;
    if (! wants && ! wantsp) {
	*info = -1;
    } else if (! lsame_(howmny, "A") && ! somcon) {
	*info = -2;
    } else if (*n < 0) {
	*info = -4;
    } else if (*ldt < max(1,*n)) {
	*info = -6;
    } else if (*ldvl < 1 || wants && *ldvl < *n) {
	*info = -8;
    } else if (*ldvr < 1 || wants && *ldvr < *n) {
	*info = -10;
    } else {

/*        Set M to the number of eigenpairs for which condition number
s   
          are required, and test MM. */

	if (somcon) {
	    *m = 0;
	    pair = FALSE_;
	    i__1 = *n;
	    for (k = 1; k <= *n; ++k) {
		if (pair) {
		    pair = FALSE_;
		} else {
		    if (k < *n) {
			if (T(k+1,k) == 0.) {
			    if (SELECT(k)) {
				++(*m);
			    }
			} else {
			    pair = TRUE_;
			    if (SELECT(k) || SELECT(k + 1)) {
				*m += 2;
			    }
			}
		    } else {
			if (SELECT(*n)) {
			    ++(*m);
			}
		    }
		}
/* L10: */
	    }
	} else {
	    *m = *n;
	}

	if (*mm < *m) {
	    *info = -13;
	} else if (*ldwork < 1 || wantsp && *ldwork < *n) {
	    *info = -16;
	}
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DTRSNA", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

    if (*n == 1) {
	if (somcon) {
	    if (! SELECT(1)) {
		return 0;
	    }
	}
	if (wants) {
	    S(1) = 1.;
	}
	if (wantsp) {
	    SEP(1) = (d__1 = T(1,1), abs(d__1));
	}
	return 0;
    }

/*     Get machine constants */

    eps = dlamch_("P");
    smlnum = dlamch_("S") / eps;
    bignum = 1. / smlnum;
    dlabad_(&smlnum, &bignum);

    ks = 0;
    pair = FALSE_;
    i__1 = *n;
    for (k = 1; k <= *n; ++k) {

/*        Determine whether T(k,k) begins a 1-by-1 or 2-by-2 block. */

	if (pair) {
	    pair = FALSE_;
	    goto L60;
	} else {
	    if (k < *n) {
		pair = T(k+1,k) != 0.;
	    }
	}

/*        Determine whether condition numbers are required for the k-t
h   
          eigenpair. */

	if (somcon) {
	    if (pair) {
		if (! SELECT(k) && ! SELECT(k + 1)) {
		    goto L60;
		}
	    } else {
		if (! SELECT(k)) {
		    goto L60;
		}
	    }
	}

	++ks;

	if (wants) {

/*           Compute the reciprocal condition number of the k-th 
  
             eigenvalue. */

	    if (! pair) {

/*              Real eigenvalue. */

		prod = ddot_(n, &VR(1,ks), &c__1, &VL(1,ks), &c__1);
		rnrm = dnrm2_(n, &VR(1,ks), &c__1);
		lnrm = dnrm2_(n, &VL(1,ks), &c__1);
		S(ks) = abs(prod) / (rnrm * lnrm);
	    } else {

/*              Complex eigenvalue. */

		prod1 = ddot_(n, &VR(1,ks), &c__1, &VL(1,ks), &c__1);
		prod1 += ddot_(n, &VR(1,ks+1), &c__1, &VL(1,ks+1), &c__1);
		prod2 = ddot_(n, &VL(1,ks), &c__1, &VR(1,ks+1), &c__1);
		prod2 -= ddot_(n, &VL(1,ks+1), &c__1, &VR(1,ks), &c__1);
		d__1 = dnrm2_(n, &VR(1,ks), &c__1);
		d__2 = dnrm2_(n, &VR(1,ks+1), &c__1);
		rnrm = dlapy2_(&d__1, &d__2);
		d__1 = dnrm2_(n, &VL(1,ks), &c__1);
		d__2 = dnrm2_(n, &VL(1,ks+1), &c__1);
		lnrm = dlapy2_(&d__1, &d__2);
		cond = dlapy2_(&prod1, &prod2) / (rnrm * lnrm);
		S(ks) = cond;
		S(ks + 1) = cond;
	    }
	}

	if (wantsp) {

/*           Estimate the reciprocal condition number of the k-th 
  
             eigenvector.   

             Copy the matrix T to the array WORK and swap the diag
onal   
             block beginning at T(k,k) to the (1,1) position. */

	    dlacpy_("Full", n, n, &T(1,1), ldt, &WORK(1,1), 
		    ldwork);
	    ifst = k;
	    ilst = 1;
	    dtrexc_("No Q", n, &WORK(1,1), ldwork, dummy, &c__1, &
		    ifst, &ilst, &WORK(1,*n+1), &ierr);

	    if (ierr == 1 || ierr == 2) {

/*              Could not swap because blocks not well separat
ed */

		scale = 1.;
		est = bignum;
	    } else {

/*              Reordering successful */

		if (WORK(2,1) == 0.) {

/*                 Form C = T22 - lambda*I in WORK(2:N,2:N
). */

		    i__2 = *n;
		    for (i = 2; i <= *n; ++i) {
			WORK(i,i) -= WORK(1,1);
/* L20: */
		    }
		    n2 = 1;
		    nn = *n - 1;
		} else {

/*                 Triangularize the 2 by 2 block by unita
ry   
                   transformation U = [  cs   i*ss ]   
                                      [ i*ss   cs  ].   
                   such that the (1,1) position of WORK is
 complex   
                   eigenvalue lambda with positive imagina
ry part. (2,2)   
                   position of WORK is the complex eigenva
lue lambda   
                   with negative imaginary  part. */

		    mu = sqrt((d__1 = WORK(1,2), abs(d__1))) 
			    * sqrt((d__2 = WORK(2,1), abs(d__2)));
		    delta = dlapy2_(&mu, &WORK(2,1));
		    cs = mu / delta;
		    sn = -WORK(2,1) / delta;

/*                 Form   

                   C' = WORK(2:N,2:N) + i*[rwork(1) ..... 
rwork(n-1) ]   
                                          [   mu          
           ]   
                                          [         ..    
           ]   
                                          [             ..
           ]   
                                          [               
   mu      ]   
                   where C' is conjugate transpose of comp
lex matrix C,   
                   and RWORK is stored starting in the N+1
-st column of   
                   WORK. */

		    i__2 = *n;
		    for (j = 3; j <= *n; ++j) {
			WORK(2,j) = cs * WORK(2,j)
				;
			WORK(j,j) -= WORK(1,1);
/* L30: */
		    }
		    WORK(2,2) = 0.;

		    WORK(1,*n+1) = mu * 2.;
		    i__2 = *n - 1;
		    for (i = 2; i <= *n-1; ++i) {
			WORK(i,*n+1) = sn * WORK(1,i+1);
/* L40: */
		    }
		    n2 = 2;
		    nn = *n - 1 << 1;
		}

/*              Estimate norm(inv(C')) */

		est = 0.;
		kase = 0;
L50:
		dlacon_(&nn, &WORK(1,*n+2), &WORK(1,*n+4), &IWORK(1), &est, &kase);
		if (kase != 0) {
		    if (kase == 1) {
			if (n2 == 1) {

/*                       Real eigenvalue: solve C'
*x = scale*c. */

			    i__2 = *n - 1;
			    dlaqtr_(&c_true, &c_true, &i__2, &WORK(2,2), ldwork, dummy, &dumm, &scale, 
				    &WORK(1,*n+4), &WORK(1,*n+6), &ierr);
			} else {

/*                       Complex eigenvalue: solve
   
                         C'*(p+iq) = scale*(c+id) 
in real arithmetic. */

			    i__2 = *n - 1;
			    dlaqtr_(&c_true, &c_false, &i__2, &WORK(2,2), ldwork, &WORK(1,*n+1), &mu, &scale, &WORK(1,*n+4), &WORK(1,*n+6), &ierr);
			}
		    } else {
			if (n2 == 1) {

/*                       Real eigenvalue: solve C*
x = scale*c. */

			    i__2 = *n - 1;
			    dlaqtr_(&c_false, &c_true, &i__2, &WORK(2,2), ldwork, dummy, &
				    dumm, &scale, &WORK(1,*n+4), &WORK(1,*n+6), &
				    ierr);
			} else {

/*                       Complex eigenvalue: solve
   
                         C*(p+iq) = scale*(c+id) i
n real arithmetic. */

			    i__2 = *n - 1;
			    dlaqtr_(&c_false, &c_false, &i__2, &WORK(2,2), ldwork, &WORK(1,*n+1), &mu, &scale, &WORK(1,*n+4), &WORK(1,*n+6), &ierr);

			}
		    }

		    goto L50;
		}
	    }

	    SEP(ks) = scale / max(est,smlnum);
	    if (pair) {
		SEP(ks + 1) = SEP(ks);
	    }
	}

	if (pair) {
	    ++ks;
	}

L60:
	;
    }
    return 0;

/*     End of DTRSNA */

} /* dtrsna_ */
Пример #16
0
magma_int_t magma_ztrevc3(
    magma_side_t side, magma_vec_t howmany,
    magma_int_t *select,  // logical in Fortran
    magma_int_t n,
    magmaDoubleComplex *T,  magma_int_t ldt,
    magmaDoubleComplex *VL, magma_int_t ldvl,
    magmaDoubleComplex *VR, magma_int_t ldvr,
    magma_int_t mm, magma_int_t *mout,
    magmaDoubleComplex *work, magma_int_t lwork,
    double *rwork, magma_int_t *info )
{
    #define  T(i,j)  ( T + (i) + (j)*ldt )
    #define VL(i,j)  (VL + (i) + (j)*ldvl)
    #define VR(i,j)  (VR + (i) + (j)*ldvr)
    #define work(i,j) (work + (i) + (j)*n)

    // .. Parameters ..
    const magmaDoubleComplex c_zero = MAGMA_Z_ZERO;
    const magmaDoubleComplex c_one  = MAGMA_Z_ONE;
    const magma_int_t  nbmin = 16, nbmax = 128;
    const magma_int_t  ione = 1;
    
    // .. Local Scalars ..
    magma_int_t            allv, bothv, leftv, over, rightv, somev;
    magma_int_t            i, ii, is, j, k, ki, iv, n2, nb, nb2, version;
    double                 ovfl, remax, scale, smin, smlnum, ulp, unfl;
    
    // Decode and test the input parameters
    bothv  = (side == MagmaBothSides);
    rightv = (side == MagmaRight) || bothv;
    leftv  = (side == MagmaLeft ) || bothv;

    allv  = (howmany == MagmaAllVec);
    over  = (howmany == MagmaBacktransVec);
    somev = (howmany == MagmaSomeVec);

    // Set mout to the number of columns required to store the selected
    // eigenvectors.
    if ( somev ) {
        *mout = 0;
        for( j=0; j < n; ++j ) {
            if ( select[j] ) {
                *mout += 1;
            }
        }
    }
    else {
        *mout = n;
    }

    *info = 0;
    if ( ! rightv && ! leftv )
        *info = -1;
    else if ( ! allv && ! over && ! somev )
        *info = -2;
    else if ( n < 0 )
        *info = -4;
    else if ( ldt < max( 1, n ) )
        *info = -6;
    else if ( ldvl < 1 || ( leftv && ldvl < n ) )
        *info = -8;
    else if ( ldvr < 1 || ( rightv && ldvr < n ) )
        *info = -10;
    else if ( mm < *mout )
        *info = -11;
    else if ( lwork < max( 1, 2*n ) )
        *info = -14;
    
    if ( *info != 0 ) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    // Quick return if possible.
    if ( n == 0 ) {
        return *info;
    }
    
    // Use blocked version (2) if sufficient workspace.
    // Requires 1 vector to save diagonal elements, and 2*nb vectors for x and Q*x.
    // (Compared to dtrevc3, rwork stores 1-norms.)
    // Zero-out the workspace to avoid potential NaN propagation.
    nb = 2;
    if ( lwork >= n + 2*n*nbmin ) {
        version = 2;
        nb = (lwork - n) / (2*n);
        nb = min( nb, nbmax );
        nb2 = 1 + 2*nb;
        lapackf77_zlaset( "F", &n, &nb2, &c_zero, &c_zero, work, &n );
    }
    else {
        version = 1;
    }

    // Set the constants to control overflow.
    unfl = lapackf77_dlamch( "Safe minimum" );
    ovfl = 1. / unfl;
    lapackf77_dlabad( &unfl, &ovfl );
    ulp = lapackf77_dlamch( "Precision" );
    smlnum = unfl*( n / ulp );

    // Store the diagonal elements of T in working array work.
    for( i=0; i < n; ++i ) {
        *work(i,0) = *T(i,i);
    }

    // Compute 1-norm of each column of strictly upper triangular
    // part of T to control overflow in triangular solver.
    rwork[0] = 0.;
    for( j=1; j < n; ++j ) {
        rwork[j] = cblas_dzasum( j, T(0,j), ione );
    }

    magma_timer_t time_total=0, time_trsv=0, time_gemm=0, time_gemv=0, time_trsv_sum=0, time_gemm_sum=0, time_gemv_sum=0;
    timer_start( time_total );

    if ( rightv ) {
        // ============================================================
        // Compute right eigenvectors.
        // iv is index of column in current block.
        // Non-blocked version always uses iv=1;
        // blocked     version starts with iv=nb, goes down to 1.
        // (Note the "0-th" column is used to store the original diagonal.)
        iv = 1;
        if ( version == 2 ) {
            iv = nb;
        }
        
        timer_start( time_trsv );
        is = *mout - 1;
        for( ki=n-1; ki >= 0; --ki ) {
            if ( somev ) {
                if ( ! select[ki] ) {
                    continue;
                }
            }
            smin = max( ulp*( MAGMA_Z_ABS1( *T(ki,ki) ) ), smlnum );

            // --------------------------------------------------------
            // Complex right eigenvector
            *work(ki,iv) = c_one;

            // Form right-hand side.
            for( k=0; k < ki; ++k ) {
                *work(k,iv) = -(*T(k,ki));
            }

            // Solve upper triangular system:
            // [ T(1:ki-1,1:ki-1) - T(ki,ki) ]*X = scale*work.
            for( k=0; k < ki; ++k ) {
                *T(k,k) -= *T(ki,ki);
                if ( MAGMA_Z_ABS1( *T(k,k) ) < smin ) {
                    *T(k,k) = MAGMA_Z_MAKE( smin, 0. );
                }
            }

            if ( ki > 0 ) {
                lapackf77_zlatrs( "Upper", "No transpose", "Non-unit", "Y",
                                  &ki, T, &ldt,
                                  work(0,iv), &scale, rwork, info );
                *work(ki,iv) = MAGMA_Z_MAKE( scale, 0. );
            }

            // Copy the vector x or Q*x to VR and normalize.
            if ( ! over ) {
                // ------------------------------
                // no back-transform: copy x to VR and normalize
                n2 = ki+1;
                blasf77_zcopy( &n2, work(0,iv), &ione, VR(0,is), &ione );

                ii = blasf77_izamax( &n2, VR(0,is), &ione ) - 1;
                remax = 1. / MAGMA_Z_ABS1( *VR(ii,is) );
                blasf77_zdscal( &n2, &remax, VR(0,is), &ione );

                for( k=ki+1; k < n; ++k ) {
                    *VR(k,is) = c_zero;
                }
            }
            else if ( version == 1 ) {
                // ------------------------------
                // version 1: back-transform each vector with GEMV, Q*x.
                time_trsv_sum += timer_stop( time_trsv );
                timer_start( time_gemv );
                if ( ki > 0 ) {
                    blasf77_zgemv( "n", &n, &ki, &c_one,
                                   VR, &ldvr,
                                   work(0, iv), &ione,
                                   work(ki,iv), VR(0,ki), &ione );
                }
                time_gemv_sum += timer_stop( time_gemv );
                ii = blasf77_izamax( &n, VR(0,ki), &ione ) - 1;
                remax = 1. / MAGMA_Z_ABS1( *VR(ii,ki) );
                blasf77_zdscal( &n, &remax, VR(0,ki), &ione );
                timer_start( time_trsv );
            }
            else if ( version == 2 ) {
                // ------------------------------
                // version 2: back-transform block of vectors with GEMM
                // zero out below vector
                for( k=ki+1; k < n; ++k ) {
                    *work(k,iv) = c_zero;
                }

                // Columns iv:nb of work are valid vectors.
                // When the number of vectors stored reaches nb,
                // or if this was last vector, do the GEMM
                if ( (iv == 1) || (ki == 0) ) {
                    time_trsv_sum += timer_stop( time_trsv );
                    timer_start( time_gemm );
                    nb2 = nb-iv+1;
                    n2  = ki+nb-iv+1;
                    blasf77_zgemm( "n", "n", &n, &nb2, &n2, &c_one,
                                   VR, &ldvr,
                                   work(0,iv   ), &n, &c_zero,
                                   work(0,nb+iv), &n );
                    time_gemm_sum += timer_stop( time_gemm );
                    
                    // normalize vectors
                    // TODO if somev, should copy vectors individually to correct location.
                    for( k = iv; k <= nb; ++k ) {
                        ii = blasf77_izamax( &n, work(0,nb+k), &ione ) - 1;
                        remax = 1. / MAGMA_Z_ABS1( *work(ii,nb+k) );
                        blasf77_zdscal( &n, &remax, work(0,nb+k), &ione );
                    }
                    lapackf77_zlacpy( "F", &n, &nb2, work(0,nb+iv), &n, VR(0,ki), &ldvr );
                    iv = nb;
                    timer_start( time_trsv );
                }
                else {
                    iv -= 1;
                }
            } // blocked back-transform

            // Restore the original diagonal elements of T.
            for( k=0; k <= ki - 1; ++k ) {
                *T(k,k) = *work(k,0);
            }

            is -= 1;
        }
    }
    timer_stop( time_trsv );

    timer_stop( time_total );
    timer_printf( "trevc trsv %.4f, gemm %.4f, gemv %.4f, total %.4f\n",
                  time_trsv_sum, time_gemm_sum, time_gemv_sum, time_total );

    if ( leftv ) {
        // ============================================================
        // Compute left eigenvectors.
        // iv is index of column in current block.
        // Non-blocked version always uses iv=1;
        // blocked     version starts with iv=1, goes up to nb.
        // (Note the "0-th" column is used to store the original diagonal.)
        iv = 1;
        is = 0;
        for( ki=0; ki < n; ++ki ) {
            if ( somev ) {
                if ( ! select[ki] ) {
                    continue;
                }
            }
            smin = max( ulp*MAGMA_Z_ABS1( *T(ki,ki) ), smlnum );

            // --------------------------------------------------------
            // Complex left eigenvector
            *work(ki,iv) = c_one;

            // Form right-hand side.
            for( k = ki + 1; k < n; ++k ) {
                *work(k,iv) = -MAGMA_Z_CNJG( *T(ki,k) );
            }

            // Solve conjugate-transposed triangular system:
            // [ T(ki+1:n,ki+1:n) - T(ki,ki) ]**H * X = scale*work.
            for( k = ki + 1; k < n; ++k ) {
                *T(k,k) -= *T(ki,ki);
                if ( MAGMA_Z_ABS1( *T(k,k) ) < smin ) {
                    *T(k,k) = MAGMA_Z_MAKE( smin, 0. );
                }
            }

            if ( ki < n-1 ) {
                n2 = n-ki-1;
                lapackf77_zlatrs( "Upper", "Conjugate transpose", "Non-unit", "Y",
                                  &n2, T(ki+1,ki+1), &ldt,
                                  work(ki+1,iv), &scale, rwork, info );
                *work(ki,iv) = MAGMA_Z_MAKE( scale, 0. );
            }

            // Copy the vector x or Q*x to VL and normalize.
            if ( ! over ) {
                // ------------------------------
                // no back-transform: copy x to VL and normalize
                n2 = n-ki;
                blasf77_zcopy( &n2, work(ki,iv), &ione, VL(ki,is), &ione );

                ii = blasf77_izamax( &n2, VL(ki,is), &ione ) + ki - 1;
                remax = 1. / MAGMA_Z_ABS1( *VL(ii,is) );
                blasf77_zdscal( &n2, &remax, VL(ki,is), &ione );

                for( k=0; k < ki; ++k ) {
                    *VL(k,is) = c_zero;
                }
            }
            else if ( version == 1 ) {
                // ------------------------------
                // version 1: back-transform each vector with GEMV, Q*x.
                if ( ki < n-1 ) {
                    n2 = n-ki-1;
                    blasf77_zgemv( "n", &n, &n2, &c_one,
                                   VL(0,ki+1), &ldvl,
                                   work(ki+1,iv), &ione,
                                   work(ki,  iv), VL(0,ki), &ione );
                }
                ii = blasf77_izamax( &n, VL(0,ki), &ione ) - 1;
                remax = 1. / MAGMA_Z_ABS1( *VL(ii,ki) );
                blasf77_zdscal( &n, &remax, VL(0,ki), &ione );
            }
            else if ( version == 2 ) {
                // ------------------------------
                // version 2: back-transform block of vectors with GEMM
                // zero out above vector
                // could go from (ki+1)-NV+1 to ki
                for( k=0; k < ki; ++k ) {
                    *work(k,iv) = c_zero;
                }

                // Columns 1:iv of work are valid vectors.
                // When the number of vectors stored reaches nb,
                // or if this was last vector, do the GEMM
                if ( (iv == nb) || (ki == n-1) ) {
                    n2 = n-(ki+1)+iv;
                    blasf77_zgemm( "n", "n", &n, &iv, &n2, &c_one,
                                   VL(0,ki-iv+1), &ldvl,
                                   work(ki-iv+1,1   ), &n, &c_zero,
                                   work(0,      nb+1), &n );
                    // normalize vectors
                    for( k=1; k <= iv; ++k ) {
                        ii = blasf77_izamax( &n, work(0,nb+k), &ione ) - 1;
                        remax = 1. / MAGMA_Z_ABS1( *work(ii,nb+k) );
                        blasf77_zdscal( &n, &remax, work(0,nb+k), &ione );
                    }
                    lapackf77_zlacpy( "F", &n, &iv, work(0,nb+1), &n, VL(0,ki-iv+1), &ldvl );
                    iv = 1;
                }
                else {
                    iv += 1;
                }
            } // blocked back-transform

            // Restore the original diagonal elements of T.
            for( k = ki + 1; k < n; ++k ) {
                *T(k,k) = *work(k,0);
            }

            is += 1;
        }
    }
    
    return *info;
}  // End of ZTREVC
Пример #17
0
/* Subroutine */ int ztrsna_(char *job, char *howmny, logical *select, 
	integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl, 
	integer *ldvl, doublecomplex *vr, integer *ldvr, doublereal *s, 
	doublereal *sep, integer *mm, integer *m, doublecomplex *work, 
	integer *ldwork, doublereal *rwork, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    ZTRSNA estimates reciprocal condition numbers for specified   
    eigenvalues and/or right eigenvectors of a complex upper triangular   
    matrix T (or of any matrix Q*T*Q**H with Q unitary).   

    Arguments   
    =========   

    JOB     (input) CHARACTER*1   
            Specifies whether condition numbers are required for   
            eigenvalues (S) or eigenvectors (SEP):   
            = 'E': for eigenvalues only (S);   
            = 'V': for eigenvectors only (SEP);   
            = 'B': for both eigenvalues and eigenvectors (S and SEP).   

    HOWMNY  (input) CHARACTER*1   
            = 'A': compute condition numbers for all eigenpairs;   
            = 'S': compute condition numbers for selected eigenpairs   
                   specified by the array SELECT.   

    SELECT  (input) LOGICAL array, dimension (N)   
            If HOWMNY = 'S', SELECT specifies the eigenpairs for which   
            condition numbers are required. To select condition numbers   
            for the j-th eigenpair, SELECT(j) must be set to .TRUE..   
            If HOWMNY = 'A', SELECT is not referenced.   

    N       (input) INTEGER   
            The order of the matrix T. N >= 0.   

    T       (input) COMPLEX*16 array, dimension (LDT,N)   
            The upper triangular matrix T.   

    LDT     (input) INTEGER   
            The leading dimension of the array T. LDT >= max(1,N).   

    VL      (input) COMPLEX*16 array, dimension (LDVL,M)   
            If JOB = 'E' or 'B', VL must contain left eigenvectors of T   
            (or of any Q*T*Q**H with Q unitary), corresponding to the   
            eigenpairs specified by HOWMNY and SELECT. The eigenvectors   
            must be stored in consecutive columns of VL, as returned by   
            ZHSEIN or ZTREVC.   
            If JOB = 'V', VL is not referenced.   

    LDVL    (input) INTEGER   
            The leading dimension of the array VL.   
            LDVL >= 1; and if JOB = 'E' or 'B', LDVL >= N.   

    VR      (input) COMPLEX*16 array, dimension (LDVR,M)   
            If JOB = 'E' or 'B', VR must contain right eigenvectors of T 
  
            (or of any Q*T*Q**H with Q unitary), corresponding to the   
            eigenpairs specified by HOWMNY and SELECT. The eigenvectors   
            must be stored in consecutive columns of VR, as returned by   
            ZHSEIN or ZTREVC.   
            If JOB = 'V', VR is not referenced.   

    LDVR    (input) INTEGER   
            The leading dimension of the array VR.   
            LDVR >= 1; and if JOB = 'E' or 'B', LDVR >= N.   

    S       (output) DOUBLE PRECISION array, dimension (MM)   
            If JOB = 'E' or 'B', the reciprocal condition numbers of the 
  
            selected eigenvalues, stored in consecutive elements of the   
            array. Thus S(j), SEP(j), and the j-th columns of VL and VR   
            all correspond to the same eigenpair (but not in general the 
  
            j-th eigenpair, unless all eigenpairs are selected).   
            If JOB = 'V', S is not referenced.   

    SEP     (output) DOUBLE PRECISION array, dimension (MM)   
            If JOB = 'V' or 'B', the estimated reciprocal condition   
            numbers of the selected eigenvectors, stored in consecutive   
            elements of the array.   
            If JOB = 'E', SEP is not referenced.   

    MM      (input) INTEGER   
            The number of elements in the arrays S (if JOB = 'E' or 'B') 
  
             and/or SEP (if JOB = 'V' or 'B'). MM >= M.   

    M       (output) INTEGER   
            The number of elements of the arrays S and/or SEP actually   
            used to store the estimated condition numbers.   
            If HOWMNY = 'A', M is set to N.   

    WORK    (workspace) COMPLEX*16 array, dimension (LDWORK,N+1)   
            If JOB = 'E', WORK is not referenced.   

    LDWORK  (input) INTEGER   
            The leading dimension of the array WORK.   
            LDWORK >= 1; and if JOB = 'V' or 'B', LDWORK >= N.   

    RWORK   (workspace) DOUBLE PRECISION array, dimension (N)   
            If JOB = 'E', RWORK is not referenced.   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -i, the i-th argument had an illegal value   

    Further Details   
    ===============   

    The reciprocal of the condition number of an eigenvalue lambda is   
    defined as   

            S(lambda) = |v'*u| / (norm(u)*norm(v))   

    where u and v are the right and left eigenvectors of T corresponding 
  
    to lambda; v' denotes the conjugate transpose of v, and norm(u)   
    denotes the Euclidean norm. These reciprocal condition numbers always 
  
    lie between zero (very badly conditioned) and one (very well   
    conditioned). If n = 1, S(lambda) is defined to be 1.   

    An approximate error bound for a computed eigenvalue W(i) is given by 
  

                        EPS * norm(T) / S(i)   

    where EPS is the machine precision.   

    The reciprocal of the condition number of the right eigenvector u   
    corresponding to lambda is defined as follows. Suppose   

                T = ( lambda  c  )   
                    (   0    T22 )   

    Then the reciprocal condition number is   

            SEP( lambda, T22 ) = sigma-min( T22 - lambda*I )   

    where sigma-min denotes the smallest singular value. We approximate   
    the smallest singular value by the reciprocal of an estimate of the   
    one-norm of the inverse of T22 - lambda*I. If n = 1, SEP(1) is   
    defined to be abs(T(1,1)).   

    An approximate error bound for a computed right eigenvector VR(i)   
    is given by   

                        EPS * norm(T) / SEP(i)   

    ===================================================================== 
  


       Decode and test the input parameters   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static integer c__1 = 1;
    
    /* System generated locals */
    integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, 
	    work_dim1, work_offset, i__1, i__2, i__3, i__4, i__5;
    doublereal d__1, d__2;
    doublecomplex z__1;
    /* Builtin functions */
    double z_abs(doublecomplex *), d_imag(doublecomplex *);
    /* Local variables */
    static integer kase, ierr;
    static doublecomplex prod;
    static doublereal lnrm, rnrm;
    static integer i, j, k;
    static doublereal scale;
    extern logical lsame_(char *, char *);
    extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, 
	    doublecomplex *, integer *, doublecomplex *, integer *);
    static doublecomplex dummy[1];
    static logical wants;
    static doublereal xnorm;
    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
    extern doublereal dznrm2_(integer *, doublecomplex *, integer *), dlamch_(
	    char *);
    static integer ks, ix;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static doublereal bignum;
    static logical wantbh;
    extern /* Subroutine */ int zlacon_(integer *, doublecomplex *, 
	    doublecomplex *, doublereal *, integer *);
    extern integer izamax_(integer *, doublecomplex *, integer *);
    static logical somcon;
    extern /* Subroutine */ int zdrscl_(integer *, doublereal *, 
	    doublecomplex *, integer *);
    static char normin[1];
    extern /* Subroutine */ int zlacpy_(char *, integer *, integer *, 
	    doublecomplex *, integer *, doublecomplex *, integer *);
    static doublereal smlnum;
    static logical wantsp;
    extern /* Subroutine */ int zlatrs_(char *, char *, char *, char *, 
	    integer *, doublecomplex *, integer *, doublecomplex *, 
	    doublereal *, doublereal *, integer *), ztrexc_(char *, integer *, doublecomplex *, integer *, 
	    doublecomplex *, integer *, integer *, integer *, integer *);
    static doublereal eps, est;



#define DUMMY(I) dummy[(I)]
#define SELECT(I) select[(I)-1]
#define S(I) s[(I)-1]
#define SEP(I) sep[(I)-1]
#define RWORK(I) rwork[(I)-1]

#define T(I,J) t[(I)-1 + ((J)-1)* ( *ldt)]
#define VL(I,J) vl[(I)-1 + ((J)-1)* ( *ldvl)]
#define VR(I,J) vr[(I)-1 + ((J)-1)* ( *ldvr)]
#define WORK(I,J) work[(I)-1 + ((J)-1)* ( *ldwork)]

    wantbh = lsame_(job, "B");
    wants = lsame_(job, "E") || wantbh;
    wantsp = lsame_(job, "V") || wantbh;

    somcon = lsame_(howmny, "S");

/*     Set M to the number of eigenpairs for which condition numbers are 
  
       to be computed. */

    if (somcon) {
	*m = 0;
	i__1 = *n;
	for (j = 1; j <= *n; ++j) {
	    if (SELECT(j)) {
		++(*m);
	    }
/* L10: */
	}
    } else {
	*m = *n;
    }

    *info = 0;
    if (! wants && ! wantsp) {
	*info = -1;
    } else if (! lsame_(howmny, "A") && ! somcon) {
	*info = -2;
    } else if (*n < 0) {
	*info = -4;
    } else if (*ldt < max(1,*n)) {
	*info = -6;
    } else if (*ldvl < 1 || wants && *ldvl < *n) {
	*info = -8;
    } else if (*ldvr < 1 || wants && *ldvr < *n) {
	*info = -10;
    } else if (*mm < *m) {
	*info = -13;
    } else if (*ldwork < 1 || wantsp && *ldwork < *n) {
	*info = -16;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("ZTRSNA", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

    if (*n == 1) {
	if (somcon) {
	    if (! SELECT(1)) {
		return 0;
	    }
	}
	if (wants) {
	    S(1) = 1.;
	}
	if (wantsp) {
	    SEP(1) = z_abs(&T(1,1));
	}
	return 0;
    }

/*     Get machine constants */

    eps = dlamch_("P");
    smlnum = dlamch_("S") / eps;
    bignum = 1. / smlnum;
    dlabad_(&smlnum, &bignum);

    ks = 1;
    i__1 = *n;
    for (k = 1; k <= *n; ++k) {

	if (somcon) {
	    if (! SELECT(k)) {
		goto L50;
	    }
	}

	if (wants) {

/*           Compute the reciprocal condition number of the k-th 
  
             eigenvalue. */

	    zdotc_(&z__1, n, &VR(1,ks), &c__1, &VL(1,ks), &c__1);
	    prod.r = z__1.r, prod.i = z__1.i;
	    rnrm = dznrm2_(n, &VR(1,ks), &c__1);
	    lnrm = dznrm2_(n, &VL(1,ks), &c__1);
	    S(ks) = z_abs(&prod) / (rnrm * lnrm);

	}

	if (wantsp) {

/*           Estimate the reciprocal condition number of the k-th 
  
             eigenvector.   

             Copy the matrix T to the array WORK and swap the k-th
   
             diagonal element to the (1,1) position. */

	    zlacpy_("Full", n, n, &T(1,1), ldt, &WORK(1,1), 
		    ldwork);
	    ztrexc_("No Q", n, &WORK(1,1), ldwork, dummy, &c__1, &k, &
		    c__1, &ierr);

/*           Form  C = T22 - lambda*I in WORK(2:N,2:N). */

	    i__2 = *n;
	    for (i = 2; i <= *n; ++i) {
		i__3 = i + i * work_dim1;
		i__4 = i + i * work_dim1;
		i__5 = work_dim1 + 1;
		z__1.r = WORK(i,i).r - WORK(1,1).r, z__1.i = WORK(i,i).i - 
			WORK(1,1).i;
		WORK(i,i).r = z__1.r, WORK(i,i).i = z__1.i;
/* L20: */
	    }

/*           Estimate a lower bound for the 1-norm of inv(C'). The
 1st   
             and (N+1)th columns of WORK are used to store work ve
ctors. */

	    SEP(ks) = 0.;
	    est = 0.;
	    kase = 0;
	    *(unsigned char *)normin = 'N';
L30:
	    i__2 = *n - 1;
	    zlacon_(&i__2, &WORK(1,*n+1), &WORK(1,1)
		    , &est, &kase);

	    if (kase != 0) {
		if (kase == 1) {

/*                 Solve C'*x = scale*b */

		    i__2 = *n - 1;
		    zlatrs_("Upper", "Conjugate transpose", "Nonunit", normin,
			     &i__2, &WORK(2,2), ldwork, &
			    WORK(1,1), &scale, &RWORK(1), &ierr);
		} else {

/*                 Solve C*x = scale*b */

		    i__2 = *n - 1;
		    zlatrs_("Upper", "No transpose", "Nonunit", normin, &i__2,
			     &WORK(2,2), ldwork, &WORK(1,1), &scale, &RWORK(1), &ierr);
		}
		*(unsigned char *)normin = 'Y';
		if (scale != 1.) {

/*                 Multiply by 1/SCALE if doing so will no
t cause   
                   overflow. */

		    i__2 = *n - 1;
		    ix = izamax_(&i__2, &WORK(1,1), &c__1);
		    i__2 = ix + work_dim1;
		    xnorm = (d__1 = WORK(ix,1).r, abs(d__1)) + (d__2 = d_imag(
			    &WORK(ix,1)), abs(d__2));
		    if (scale < xnorm * smlnum || scale == 0.) {
			goto L40;
		    }
		    zdrscl_(n, &scale, &WORK(1,1), &c__1);
		}
		goto L30;
	    }

	    SEP(ks) = 1. / max(est,smlnum);
	}

L40:
	++ks;
L50:
	;
    }
    return 0;

/*     End of ZTRSNA */

} /* ztrsna_ */
Пример #18
0
Vector SchemeRoe(const Cell& Cell1,const Cell& Cell2,const Cell& Cell3,const Cell& Cell4, int AxisNo)
{

		// Local variables

		Vector  V1, V2, V3, V4;                 // Velocities
		real 		rho1, rho2, rho3, rho4;					// Densities
		real		p1, p2, p3, p4;									// Pressures
		real		rhoE1, rhoE2, rhoE3, rhoE4;	    // Energies
		Vector  Result(QuantityNb);

		Vector	F1(QuantityNb);                 // Fluxes
		Vector  F2(QuantityNb);
		Vector  F3(QuantityNb);
		Vector  F4(QuantityNb);

		Vector Q1, Q2, Q3, Q4;									// Conservative quantities

		Vector  FL(QuantityNb),FR(QuantityNb);  // Left and right fluxex
		Vector  QL(QuantityNb),QR(QuantityNb);  // Left and right conservative quantities
		real    rhoL, rhoR;              				// Left and right densities
		real		pL, pR;                         // Left and right pressures
		real		rhoEL, rhoER;                   // Left and right energies
 		Vector	VL(Dimension), VR(Dimension);   // Left and right velocities

		Vector  One(QuantityNb);

		real		rho;														 // central density with Roe's average
		Vector  V(Dimension);										 // central velocity with Roe's average
    		real    H;                               // central enthalpy with Roe's average
    		real    c;                               // central speed of sound with Roe's average
		real		Roe;														 // Coefficient for Roe's average

		Matrix L, R;                              // left and right eigenmatrix
		Matrix Lambda(QuantityNb);								// diagonal matrix containing the eigenvalues
		Matrix A;																	// absolute value of the jacobian matrix

		Vector	 Lim;															// limiter (Van Leer)

	  int i;        														// coutner


		// vector one.

		for(i=1; i<=QuantityNb; i++ )
			One.setValue(i,1.);

 		// --- Get conservative quantities ---

		Q1 = Cell1.average();
		Q2 = Cell2.average();
		Q3 = Cell3.average();
		Q4 = Cell4.average();

		// --- Get primitive variables ---

		// density

    		rho1   = Cell1.density();
    		rho2   = Cell2.density();
    		rho3   = Cell3.density();
    		rho4   = Cell4.density();

		// velocity

 		V1 = Cell1.velocity();
 		V2 = Cell2.velocity();
 		V3 = Cell3.velocity();
 		V4 = Cell4.velocity();

		// energy

		rhoE1 = Cell1.energy();
		rhoE2 = Cell2.energy();
		rhoE3 = Cell3.energy();
		rhoE4 = Cell4.energy();

		// pressure

		p1 = Cell1.pressure();
		p2 = Cell2.pressure();
		p3 = Cell3.pressure();
		p4 = Cell4.pressure();

    // --- Compute Euler fluxes ---

		F1.setValue(1,rho1*V1.value(AxisNo));
		F2.setValue(1,rho2*V2.value(AxisNo));
 		F3.setValue(1,rho3*V3.value(AxisNo));
		F4.setValue(1,rho4*V4.value(AxisNo));

		for(i=1; i<=Dimension; i++)
		{
			F1.setValue(i+1, rho1*V1.value(AxisNo)*V1.value(i) + ((AxisNo == i)? p1 : 0.));
			F2.setValue(i+1, rho2*V2.value(AxisNo)*V2.value(i) + ((AxisNo == i)? p2 : 0.));
 			F3.setValue(i+1, rho3*V3.value(AxisNo)*V3.value(i) + ((AxisNo == i)? p3 : 0.));
			F4.setValue(i+1, rho4*V4.value(AxisNo)*V4.value(i) + ((AxisNo == i)? p4 : 0.));
		}

		F1.setValue(QuantityNb,(rhoE1+p1)*V1.value(AxisNo));
		F2.setValue(QuantityNb,(rhoE2+p2)*V2.value(AxisNo));
 		F3.setValue(QuantityNb,(rhoE3+p3)*V3.value(AxisNo));
		F4.setValue(QuantityNb,(rhoE4+p4)*V4.value(AxisNo));

		// --- Van Leer limiter ---

	 	// Left

	 	Lim = Limiter(Q3-Q2, Q2-Q1);
		FL  = F2 + 0.5*(Lim|(F2-F1)) + 0.5*((One-Lim)|(F3-F2));
		QL  = Q2 + 0.5*(Lim|(Q2-Q1)) + 0.5*((One-Lim)|(Q3-Q2));

		// Right

		Lim = Limiter(Q3-Q2, Q4-Q3);
		FR  = F3 - 0.5*(Lim|(F4-F3)) - 0.5*((One-Lim)|(F3-F2));
		QR  = Q3 - 0.5*(Lim|(Q4-Q3)) - 0.5*((One-Lim)|(Q3-Q2));

/*
		FL = F2;
		FR = F3;
		QL = Q2;
		QR = Q3;
*/

		// --- Extract left and right primitive variables ---

		rhoL = QL.value(1);
		rhoR = QR.value(1);

		for (i=1; i<= Dimension; i++)
		{
			 VL.setValue(i,QL.value(i+1)/rhoL);
			 VR.setValue(i,QR.value(i+1)/rhoR);
		}

		rhoEL=QL.value(QuantityNb);
		rhoER=QR.value(QuantityNb);

		pL = (Gamma-1)*(rhoEL - .5*rhoL*(VL*VL));
		pR = (Gamma-1)*(rhoER - .5*rhoR*(VR*VR));

    		// --- Compute Roe's averages ---

		Roe = sqrt(rhoR/rhoL);

		rho = Roe*rhoL;
		V   = 1./(1.+Roe)*( Roe*VR + VL );
		H   = 1./(1.+Roe)*( Roe*(rhoER+pR)/rhoR + (rhoEL+pL)/rhoL );

		c   = sqrt ( (Gamma-1)*( H - 0.5*(V*V) ) );

    		// --- Compute diagonal matrix containing the absolute value of the eigenvalues ---

		for (i=1;i<=Dimension;i++)
			Lambda.setValue(i,i, fabs(V.value(AxisNo)));

		Lambda.setValue(Dimension+1, Dimension+1, fabs(V.value(AxisNo)+c));
		Lambda.setValue(Dimension+2, Dimension+2, fabs(V.value(AxisNo)-c));

		// --- Set left and right eigenmatrices ---

		L.setEigenMatrix(true, AxisNo, V, c);
		R.setEigenMatrix(false, AxisNo, V, c, H);

		// --- Compute absolute Jacobian matrix ---

  		A = R*Lambda*L;
	
    		// --- Compute Euler Flux ---

		Result = 0.5*(FL+FR) - 0.5*(A*(QR-QL));

		return Result;
}
Пример #19
0
magma_int_t magma_strevc3(
    magma_side_t side, magma_vec_t howmany,
    magma_int_t *select,  // logical in fortran
    magma_int_t n,
    float *T,  magma_int_t ldt,
    float *VL, magma_int_t ldvl,
    float *VR, magma_int_t ldvr,
    magma_int_t mm, magma_int_t *mout,
    float *work, magma_int_t lwork,
    #ifdef COMPLEX
    float *rwork,
    #endif
    magma_int_t *info )
{
#define T(i,j)  (T  + (i) + (j)*ldt)
#define VL(i,j) (VL + (i) + (j)*ldvl)
#define VR(i,j) (VR + (i) + (j)*ldvr)
#define X(i,j)  (X  + (i)-1 + ((j)-1)*2)  // still as 1-based indices
#define work(i,j) (work + (i) + (j)*n)

    // constants
    const magma_int_t ione = 1;
    const float c_zero = 0;
    const float c_one  = 1;
    const magma_int_t nbmin = 16, nbmax = 256;

    // .. Local Scalars ..
    magma_int_t allv, bothv, leftv, over, pair, rightv, somev;
    magma_int_t i, ierr, ii, ip, is, j, k, ki, ki2,
                iv, n2, nb, nb2, version;
    float emax, remax;
    
    // .. Local Arrays ..
    // since iv is a 1-based index, allocate one extra here
    magma_int_t iscomplex[ nbmax+1 ];

    // Decode and test the input parameters
    bothv  = (side == MagmaBothSides);
    rightv = (side == MagmaRight) || bothv;
    leftv  = (side == MagmaLeft ) || bothv;

    allv  = (howmany == MagmaAllVec);
    over  = (howmany == MagmaBacktransVec);
    somev = (howmany == MagmaSomeVec);

    *info = 0;
    if ( ! rightv && ! leftv )
        *info = -1;
    else if ( ! allv && ! over && ! somev )
        *info = -2;
    else if ( n < 0 )
        *info = -4;
    else if ( ldt < max( 1, n ) )
        *info = -6;
    else if ( ldvl < 1 || ( leftv && ldvl < n ) )
        *info = -8;
    else if ( ldvr < 1 || ( rightv && ldvr < n ) )
        *info = -10;
    else if ( lwork < max( 1, 3*n ) )
        *info = -14;
    else {
        // Set mout to the number of columns required to store the selected
        // eigenvectors, standardize the array select if necessary, and
        // test mm.
        if ( somev ) {
            *mout = 0;
            pair = false;
            for( j=0; j < n; ++j ) {
                if ( pair ) {
                    pair = false;
                    select[j] = false;
                }
                else {
                    if ( j < n-1 ) {
                        if ( *T(j+1,j) == c_zero ) {
                            if ( select[j] ) {
                                *mout += 1;
                            }
                        }
                        else {
                            pair = true;
                            if ( select[j] || select[j+1] ) {
                                select[j] = true;
                                *mout += 2;
                            }
                        }
                    }
                    else if ( select[n-1] ) {
                        *mout += 1;
                    }
                }
            }
        }
        else {
            *mout = n;
        }
        if ( mm < *mout ) {
            *info = -11;
        }
    }
    
    if ( *info != 0 ) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    // Quick return if possible.
    if ( n == 0 ) {
        return *info;
    }
    
    // Use blocked version (2) if sufficient workspace.
    // Requires 1 vector for 1-norms, and 2*nb vectors for x and Q*x.
    // Zero-out the workspace to avoid potential NaN propagation.
    nb = 2;
    if ( lwork >= n + 2*n*nbmin ) {
        version = 2;
        nb = (lwork - n) / (2*n);
        nb = min( nb, nbmax );
        nb2 = 1 + 2*nb;
        lapackf77_slaset( "F", &n, &nb2, &c_zero, &c_zero, work, &n );
    }
    else {
        version = 1;
    }

    // Compute 1-norm of each column of strictly upper triangular
    // part of T to control overflow in triangular solver.
    *work(0,0) = c_zero;
    for( j=1; j < n; ++j ) {
        *work(j,0) = c_zero;
        for( i=0; i < j; ++i ) {
            *work(j,0) += fabsf( *T(i,j) );
        }
    }

    magma_timer_t time_total=0, time_trsv=0, time_gemm=0, time_gemv=0, time_trsv_sum=0, time_gemm_sum=0, time_gemv_sum=0;
    timer_start( time_total );

    // Index ip is used to specify the real or complex eigenvalue:
    // ip =  0, real eigenvalue (wr),
    //    =  1, first  of conjugate complex pair: (wr,wi)
    //    = -1, second of conjugate complex pair: (wr,wi)
    // iscomplex array stores ip for each column in current block.
    if ( rightv ) {
        // ============================================================
        // Compute right eigenvectors.
        // iv is index of column in current block (1-based).
        // For complex right vector, uses iv-1 for real part and iv for complex part.
        // Non-blocked version always uses iv=2;
        // blocked     version starts with iv=nb, goes down to 1 or 2.
        // (Note the "0-th" column is used for 1-norms computed above.)
        iv = 2;
        if ( version == 2 ) {
            iv = nb;
        }

        timer_start( time_trsv );
        ip = 0;
        is = *mout - 1;
        for( ki=n-1; ki >= 0; --ki ) {
            if ( ip == -1 ) {
                // previous iteration (ki+1) was second of conjugate pair,
                // so this ki is first of conjugate pair; skip to end of loop
                ip = 1;
                continue;
            }
            else if ( ki == 0 ) {
                // last column, so this ki must be real eigenvalue
                ip = 0;
            }
            else if ( *T(ki,ki-1) == c_zero ) {
                // zero on sub-diagonal, so this ki is real eigenvalue
                ip = 0;
            }
            else {
                // non-zero on sub-diagonal, so this ki is second of conjugate pair
                ip = -1;
            }

            if ( somev ) {
                if ( ip == 0 ) {
                    if ( ! select[ki] ) {
                        continue;
                    }
                }
                else {
                    if ( ! select[ki-1] ) {
                        continue;
                    }
                }
            }

            if ( ip == 0 ) {
                // ------------------------------------------------------------
                // Real right eigenvector
                // Solve upper quasi-triangular system:
                // [ T(0:ki-1,0:ki-1) - wr ]*X = -T(0:ki-1,ki)
                magma_slaqtrsd( MagmaNoTrans, ki+1, T(0,0), ldt,
                                work(0,iv), n, work(0,0), &ierr );
                
                // Copy the vector x or Q*x to VR and normalize.
                if ( ! over ) {
                    // ------------------------------
                    // no back-transform: copy x to VR and normalize.
                    n2 = ki+1;
                    blasf77_scopy( &n2, work(0,iv), &ione, VR(0,is), &ione );

                    ii = blasf77_isamax( &n2, VR(0,is), &ione ) - 1;  // subtract 1; ii is 0-based
                    remax = c_one / fabsf( *VR(ii,is) );
                    blasf77_sscal( &n2, &remax, VR(0,is), &ione );

                    for( k=ki + 1; k < n; ++k ) {
                        *VR(k,is) = c_zero;
                    }
                }
                else if ( version == 1 ) {
                    // ------------------------------
                    // version 1: back-transform each vector with GEMV, Q*x.
                    time_trsv_sum += timer_stop( time_trsv );
                    timer_start( time_gemv );
                    if ( ki > 0 ) {
                        n2 = ki;
                        blasf77_sgemv( "n", &n, &n2, &c_one,
                                       VR, &ldvr,
                                       work(0, iv), &ione,
                                       work(ki,iv), VR(0,ki), &ione );
                    }
                    time_gemv_sum += timer_stop( time_gemv );
                    ii = blasf77_isamax( &n, VR(0,ki), &ione ) - 1;  // subtract 1; ii is 0-based
                    remax = c_one / fabsf( *VR(ii,ki) );
                    blasf77_sscal( &n, &remax, VR(0,ki), &ione );
                    timer_start( time_trsv );
                }
                else if ( version == 2 ) {
                    // ------------------------------
                    // version 2: back-transform block of vectors with GEMM
                    // zero out below vector
                    for( k=ki + 1; k < n; ++k ) {
                        *work(k,iv) = c_zero;
                    }
                    iscomplex[ iv ] = ip;
                    // back-transform and normalization is done below
                }
            }  // end real eigenvector
            else {
                // ------------------------------------------------------------
                // Complex right eigenvector
                // Solve upper quasi-triangular system:
                // [ T(0:ki-2,0:ki-2) - (wr+i*wi) ]*x = u
                magma_slaqtrsd( MagmaNoTrans, ki+1, T(0,0), ldt,
                                work(0,iv-1), n, work(0,0), &ierr );

                // Copy the vector x or Q*x to VR and normalize.
                if ( ! over ) {
                    // ------------------------------
                    // no back-transform: copy x to VR and normalize.
                    n2 = ki+1;
                    blasf77_scopy( &n2, work(0,iv-1), &ione, VR(0,is-1), &ione );
                    blasf77_scopy( &n2, work(0,iv  ), &ione, VR(0,is  ), &ione );

                    emax = c_zero;
                    for( k=0; k <= ki; ++k ) {
                        emax = max( emax, fabsf(*VR(k,is-1)) + fabsf(*VR(k,is)) );
                    }
                    remax = c_one / emax;
                    blasf77_sscal( &n2, &remax, VR(0,is-1), &ione );
                    blasf77_sscal( &n2, &remax, VR(0,is  ), &ione );

                    for( k=ki + 1; k < n; ++k ) {
                        *VR(k,is-1) = c_zero;
                        *VR(k,is  ) = c_zero;
                    }
                }
                else if ( version == 1 ) {
                    // ------------------------------
                    // version 1: back-transform each vector with GEMV, Q*x.
                    time_trsv_sum += timer_stop( time_trsv );
                    timer_start( time_gemv );
                    if ( ki > 1 ) {
                        n2 = ki-1;
                        blasf77_sgemv( "n", &n, &n2, &c_one,
                                       VR, &ldvr,
                                       work(0,   iv-1), &ione,
                                       work(ki-1,iv-1), VR(0,ki-1), &ione );
                        blasf77_sgemv( "n", &n, &n2, &c_one,
                                       VR, &ldvr,
                                       work(0, iv), &ione,
                                       work(ki,iv), VR(0,ki), &ione );
                    }
                    else {
                        blasf77_sscal( &n, work(ki-1,iv-1), VR(0,ki-1), &ione );
                        blasf77_sscal( &n, work(ki,  iv  ), VR(0,ki  ), &ione );
                    }
                    time_gemv_sum += timer_stop( time_gemv );

                    emax = c_zero;
                    for( k=0; k < n; ++k ) {
                        emax = max( emax, fabsf(*VR(k,ki-1)) + fabsf(*VR(k,ki)) );
                    }
                    remax = c_one / emax;
                    blasf77_sscal( &n, &remax, VR(0,ki-1), &ione );
                    blasf77_sscal( &n, &remax, VR(0,ki  ), &ione );
                    timer_start( time_trsv );
                }
                else if ( version == 2 ) {
                    // ------------------------------
                    // version 2: back-transform block of vectors with GEMM
                    // zero out below vector
                    for( k=ki + 1; k < n; ++k ) {
                        *work(k,iv-1) = c_zero;
                        *work(k,iv  ) = c_zero;
                    }
                    iscomplex[ iv-1 ] = -ip;
                    iscomplex[ iv   ] =  ip;
                    iv -= 1;
                    // back-transform and normalization is done below
                }
            }  // end real or complex vector

            if ( version == 2 ) {
                // ------------------------------------------------------------
                // Blocked version of back-transform
                // For complex case, ki2 includes both vectors (ki-1 and ki)
                if ( ip == 0 ) {
                    ki2 = ki;
                }
                else {
                    ki2 = ki - 1;
                }

                // Columns iv:nb of work are valid vectors.
                // When the number of vectors stored reaches nb-1 or nb,
                // or if this was last vector, do the GEMM
                if ( (iv <= 2) || (ki2 == 0) ) {
                    time_trsv_sum += timer_stop( time_trsv );
                    timer_start( time_gemm );
                    nb2 = nb-iv+1;
                    n2  = ki2+nb-iv+1;
                    blasf77_sgemm( "n", "n", &n, &nb2, &n2, &c_one,
                                   VR, &ldvr,
                                   work(0,iv), &n,
                                   &c_zero,
                                   work(0,nb+iv), &n );
                    time_gemm_sum += timer_stop( time_gemm );
                    
                    // normalize vectors
                    // TODO if somev, should copy vectors individually to correct location.
                    for( k=iv; k <= nb; ++k ) {
                        if ( iscomplex[k] == 0 ) {
                            // real eigenvector
                            ii = blasf77_isamax( &n, work(0,nb+k), &ione ) - 1;  // subtract 1; ii is 0-based
                            remax = c_one / fabsf( *work(ii,nb+k) );
                        }
                        else if ( iscomplex[k] == 1 ) {
                            // first eigenvector of conjugate pair
                            emax = c_zero;
                            for( ii=0; ii < n; ++ii ) {
                                emax = max( emax, fabsf( *work(ii,nb+k  ) )
                                                + fabsf( *work(ii,nb+k+1) ) );
                            }
                            remax = c_one / emax;
                        // else if iscomplex[k] == -1
                        //     second eigenvector of conjugate pair
                        //     reuse same remax as previous k
                        }
                        blasf77_sscal( &n, &remax, work(0,nb+k), &ione );
                    }
                    nb2 = nb-iv+1;
                    lapackf77_slacpy( "F", &n, &nb2,
                                      work(0,nb+iv), &n,
                                      VR(0,ki2), &ldvr );
                    iv = nb;
                    timer_start( time_trsv );
                }
                else {
                    iv -= 1;
                }
            }  // end blocked back-transform

            is -= 1;
            if ( ip != 0 ) {
                is -= 1;
            }
        }
    }
    timer_stop( time_trsv );

    timer_stop( time_total );
    timer_printf( "trevc trsv %.4f, gemm %.4f, gemv %.4f, total %.4f\n",
                  time_trsv_sum, time_gemm_sum, time_gemv_sum, time_total );

    if ( leftv ) {
        // ============================================================
        // Compute left eigenvectors.
        // iv is index of column in current block (1-based).
        // For complex left vector, uses iv for real part and iv+1 for complex part.
        // Non-blocked version always uses iv=1;
        // blocked     version starts with iv=1, goes up to nb-1 or nb.
        // (Note the "0-th" column is used for 1-norms computed above.)
        iv = 1;
        ip = 0;
        is = 0;
        for( ki=0; ki < n; ++ki ) {
            if ( ip == 1 ) {
                // previous iteration (ki-1) was first of conjugate pair,
                // so this ki is second of conjugate pair; skip to end of loop
                ip = -1;
                continue;
            }
            else if ( ki == n-1 ) {
                // last column, so this ki must be real eigenvalue
                ip = 0;
            }
            else if ( *T(ki+1,ki) == c_zero ) {
                // zero on sub-diagonal, so this ki is real eigenvalue
                ip = 0;
            }
            else {
                // non-zero on sub-diagonal, so this ki is first of conjugate pair
                ip = 1;
            }

            if ( somev ) {
                if ( ! select[ki] ) {
                    continue;
                }
            }

            if ( ip == 0 ) {
                // ------------------------------------------------------------
                // Real left eigenvector
                // Solve transposed quasi-triangular system:
                // [ T(ki+1:n,ki+1:n) - wr ]**T * X = -T(ki+1:n,ki)
                magma_slaqtrsd( MagmaTrans, n-ki, T(ki,ki), ldt,
                                work(ki,iv), n, work(ki,0), &ierr );

                // Copy the vector x or Q*x to VL and normalize.
                if ( ! over ) {
                    // ------------------------------
                    // no back-transform: copy x to VL and normalize.
                    n2 = n-ki;
                    blasf77_scopy( &n2, work(ki,iv), &ione, VL(ki,is), &ione );

                    ii = blasf77_isamax( &n2, VL(ki,is), &ione ) + ki - 1;  // subtract 1; ii is 0-based
                    remax = c_one / fabsf( *VL(ii,is) );
                    blasf77_sscal( &n2, &remax, VL(ki,is), &ione );

                    for( k=0; k < ki; ++k ) {
                        *VL(k,is) = c_zero;
                    }
                }
                else if ( version == 1 ) {
                    // ------------------------------
                    // version 1: back-transform each vector with GEMV, Q*x.
                    if ( ki < n-1 ) {
                        n2 = n-ki-1;
                        blasf77_sgemv( "n", &n, &n2, &c_one,
                                       VL(0,ki+1), &ldvl,
                                       work(ki+1,iv), &ione,
                                       work(ki,  iv), VL(0,ki), &ione );
                    }
                    ii = blasf77_isamax( &n, VL(0,ki), &ione ) - 1;  // subtract 1; ii is 0-based
                    remax = c_one / fabsf( *VL(ii,ki) );
                    blasf77_sscal( &n, &remax, VL(0,ki), &ione );
                }
                else if ( version == 2 ) {
                    // ------------------------------
                    // version 2: back-transform block of vectors with GEMM
                    // zero out above vector
                    // could go from (ki+1)-NV+1 to ki
                    for( k=0; k < ki; ++k ) {
                        *work(k,iv) = c_zero;
                    }
                    iscomplex[ iv ] = ip;
                    // back-transform and normalization is done below
                }
            }  // end real eigenvector
            else {
                // ------------------------------------------------------------
                // Complex left eigenvector
                // Solve transposed quasi-triangular system:
                // [ T(ki+2:n,ki+2:n)**T - (wr-i*wi) ]*X = V
                magma_slaqtrsd( MagmaTrans, n-ki, T(ki,ki), ldt,
                                work(ki,iv), n, work(ki,0), &ierr );

                // Copy the vector x or Q*x to VL and normalize.
                if ( ! over ) {
                    // ------------------------------
                    // no back-transform: copy x to VL and normalize.
                    n2 = n-ki;
                    blasf77_scopy( &n2, work(ki,iv  ), &ione, VL(ki,is  ), &ione );
                    blasf77_scopy( &n2, work(ki,iv+1), &ione, VL(ki,is+1), &ione );

                    emax = c_zero;
                    for( k=ki; k < n; ++k ) {
                        emax = max( emax, fabsf(*VL(k,is))+ fabsf(*VL(k,is+1)) );
                    }
                    remax = c_one / emax;
                    blasf77_sscal( &n2, &remax, VL(ki,is  ), &ione );
                    blasf77_sscal( &n2, &remax, VL(ki,is+1), &ione );

                    for( k=0; k < ki; ++k ) {
                        *VL(k,is  ) = c_zero;
                        *VL(k,is+1) = c_zero;
                    }
                }
                else if ( version == 1 ) {
                    // ------------------------------
                    // version 1: back-transform each vector with GEMV, Q*x.
                    if ( ki < n-2 ) {
                        n2 = n-ki-2;
                        blasf77_sgemv( "n", &n, &n2, &c_one,
                                       VL(0,ki+2), &ldvl,
                                       work(ki+2,iv), &ione,
                                       work(ki,  iv), VL(0,ki), &ione );
                        blasf77_sgemv( "n", &n, &n2, &c_one,
                                       VL(0,ki+2), &ldvl,
                                       work(ki+2,iv+1), &ione,
                                       work(ki+1,iv+1), VL(0,ki+1), &ione );
                    }
                    else {
                        blasf77_sscal( &n, work(ki,  iv  ), VL(0, ki  ), &ione );
                        blasf77_sscal( &n, work(ki+1,iv+1), VL(0, ki+1), &ione );
                    }

                    emax = c_zero;
                    for( k=0; k < n; ++k ) {
                        emax = max( emax, fabsf(*VL(k,ki))+ fabsf(*VL(k,ki+1)) );
                    }
                    remax = c_one / emax;
                    blasf77_sscal( &n, &remax, VL(0,ki  ), &ione );
                    blasf77_sscal( &n, &remax, VL(0,ki+1), &ione );
                }
                else if ( version == 2 ) {
                    // ------------------------------
                    // version 2: back-transform block of vectors with GEMM
                    // zero out above vector
                    // could go from (ki+1)-NV+1 to ki
                    for( k=0; k < ki; ++k ) {
                        *work(k,iv  ) = c_zero;
                        *work(k,iv+1) = c_zero;
                    }
                    iscomplex[ iv   ] =  ip;
                    iscomplex[ iv+1 ] = -ip;
                    iv += 1;
                    // back-transform and normalization is done below
                }
            }  // end real or complex eigenvector

            if ( version == 2 ) {
                // -------------------------------------------------
                // Blocked version of back-transform
                // For complex case, (ki2+1) includes both vectors (ki+1) and (ki+2)
                if ( ip == 0 ) {
                    ki2 = ki;
                }
                else {
                    ki2 = ki + 1;
                }

                // Columns 1:iv of work are valid vectors.
                // When the number of vectors stored reaches nb-1 or nb,
                // or if this was last vector, do the GEMM
                if ( (iv >= nb-1) || (ki2 == n-1) ) {
                    n2 = n-(ki2+1)+iv;
                    blasf77_sgemm( "n", "n", &n, &iv, &n2, &c_one,
                                   VL(0,ki2-iv+1), &ldvl,
                                   work(ki2-iv+1,1), &n,
                                   &c_zero,
                                   work(0,nb+1), &n );
                    // normalize vectors
                    for( k=1; k <= iv; ++k ) {
                        if ( iscomplex[k] == 0 ) {
                            // real eigenvector
                            ii = blasf77_isamax( &n, work(0,nb+k), &ione ) - 1;  // subtract 1; ii is 0-based
                            remax = c_one / fabsf( *work(ii,nb+k) );
                        }
                        else if ( iscomplex[k] == 1) {
                            // first eigenvector of conjugate pair
                            emax = c_zero;
                            for( ii=0; ii < n; ++ii ) {
                                emax = max( emax, fabsf( *work(ii,nb+k  ) )
                                                + fabsf( *work(ii,nb+k+1) ) );
                            }
                            remax = c_one / emax;
                        // else if iscomplex[k] == -1
                        //     second eigenvector of conjugate pair
                        //     reuse same remax as previous k
                        }
                        blasf77_sscal( &n, &remax, work(0,nb+k), &ione );
                    }
                    lapackf77_slacpy( "F", &n, &iv,
                                      work(0,nb+1), &n,
                                      VL(0,ki2-iv+1), &ldvl );
                    iv = 1;
                }
                else {
                    iv += 1;
                }
            } // blocked back-transform

            is += 1;
            if ( ip != 0 ) {
                is += 1;
            }
        }
    }

    return *info;
}  // end of STREVC3
Пример #20
0
void tree_build(struct env *env)
{
    VL(1) fprintf(err, "Finding leaf...\n");
    int i;

    struct particle *p = env->ps;
    for (i=env->N - 1; i >= 0; i--, p++)
    {
        struct tree *node = env->tree;

        while (node->left != NULL) // && node->right != NULL)
        {
            register float t = p->r[node->d] - node->split;
            int q0 = 0-signbit(t);
            int q1 = signbit(t)-1;

            long r0 = (long)(node->left);
            long r1 = (long)(node->right);
            long q2 = r0 & q0;
            long q3 = r1 & q1;
            node = (struct tree *)(q2 + q3);

            //node = (struct tree *)(((unsigned long)(node->left)  &  q1)
            //                    + ((unsigned long)(node->right) & ~q1));
        }

        while (node->count == MAX_PIC)
        {
            int d, j;
            real split;

            assign_split(node, &d, &split);

            CREATE_BRANCH(node, d, left, -);
            CREATE_BRANCH(node, d, right, +);

            struct tree *top = node;
            struct tree *left = node->left;
            struct tree *right = node->right;
            struct particle **tlist = top->list;

            int nl=0, nr=0;
            //for ( ; tlist != NULL; tlist++)

#define SELECT_D(d) \
switch (d) { \
    case 0: SELECT_COUNT(0); break; \
    case 1: SELECT_COUNT(1); break; \
    case 2: SELECT_COUNT(2); break; }

#define SELECT_COUNT(y) \
switch (top->count) { \
    case 8: GO_LEFT_OR_RIGHT(7, y); /* no break */ \
    case 7: GO_LEFT_OR_RIGHT(6, y); /* no break */ \
    case 6: GO_LEFT_OR_RIGHT(5, y); /* no break */ \
    case 5: GO_LEFT_OR_RIGHT(4, y); /* no break */ \
    case 4: GO_LEFT_OR_RIGHT(3, y); /* no break */ \
    case 3: GO_LEFT_OR_RIGHT(2, y); /* no break */ \
    case 2: GO_LEFT_OR_RIGHT(1, y); /* no break */ \
    case 1: GO_LEFT_OR_RIGHT(0, y); /* no break */ }

#define GO_LEFT_OR_RIGHT(x,y) { \
if (tlist[ x ]->r[ y ] < split) \
    left->list[nl++] = tlist[ x ]; \
else \
    right->list[nr++] = tlist[ x ]; }

            j = top->count;
            SELECT_D(d);

            left->count  = nl;
            right->count = nr;

            top->count = 0;
            top->list[0] = NULL;

            register unsigned long q1 = -(p->r[d] < split);

            node = (struct tree *)(((unsigned long)(node->left)  &  q1)
                                   + ((unsigned long)(node->right) & ~q1));

        }

        node->list[node->count++] = p;
    }

}
Пример #21
0
Vector SchemeAUSMDV(const Cell& Cell1,const Cell& Cell2,const Cell& Cell3,const Cell& Cell4, int AxisNo)
{

	if(Dimension<3){
           cout<<"Flux AUSMDV is just implement to 3D problems. Program exit."<<endl;
           exit(1);
        }


	// --- Local variables ---------------------------------------------------------------

	// General variables

	Vector	LeftAverage(QuantityNb);	//
	Vector	RightAverage(QuantityNb);	// Conservative quantities
	Vector	Result(QuantityNb);		// Euler flux

	// Variables for the AUSMDV scheme

	Vector	VL(Dimension), VR(Dimension);	// Left and right velocities
	real	rhoL=0., rhoR=0.;		// Left and right densities
	real	eL=0., eR=0.;			// Left and right energies per unit of mass
	real	HL=0., HR=0.;			// Left and right enthalpies
	//real    YL=0., YR=0.;     		// Left and right partial masses

	real	pL =0., pR =0.;			// Left, right  pressures


	// Variables for the limiter

	real   r, Limiter, LeftSlope = 0., RightSlope = 0.; // Left and right slopes
  	//real   DefaultLimiter = (LimiterNo >= 3)? 2.:1.;
	int i;

//	--- Limiter function ---------------------------------------------------------

for (i=1; i<=QuantityNb; i++)
	{
		// --- Compute left cell-average value ---
			
		if (Cell2.average(i) != Cell1.average(i))
		{
			RightSlope 	= Cell3.average(i)-Cell2.average(i);
			LeftSlope	= Cell2.average(i)-Cell1.average(i);
			r		= RightSlope/LeftSlope;
			Limiter 	=  (r > 0) ? (r*r+r)/(1+r*r) : 0.; // same as AUSM
                                             //max (0.0,min(1.0,r));
				            //( (r>=1.0) ? 1.0 : ( (r>=0.0)? r : 0.0) ) ;
			LeftAverage.setValue(i, Cell2.average(i) + double(0.5)*Limiter*LeftSlope);
		}
		else
			LeftAverage.setValue(i, Cell2.average(i));
				
		// --- Compute right cell-average value ---
			
		if (Cell3.average(i) != Cell2.average(i))
		{
			RightSlope = Cell4.average(i)-Cell3.average(i);
			LeftSlope    = Cell3.average(i)-Cell2.average(i);
			r		= RightSlope/LeftSlope;
			Limiter 	=  (r > 0) ? (r*r+r)/(1+r*r) : 0.; //same as AUSM
 						// max(0.0,min(1.0,r)) ;
				            ( (r>=1.0) ? 1.0 : ( (r>=0.0)? r : 0.0) ) ;
			RightAverage.setValue(i, Cell3.average(i) - 0.5*Limiter*LeftSlope);
		}
		else
			RightAverage.setValue(i, Cell3.average(i));
	}
	



	// --- Scheme -------------------------------------------------------------

	// --- Extract left and right natural variables ---

	// Left and right densities
	rhoL = LeftAverage.value(1);
	rhoR = RightAverage.value(1);

	// Left and right velocities
	for (i=1;i<=Dimension;i++)
	{
		VL.setValue( i, LeftAverage.value(i+1)/rhoL );
		VR.setValue( i, RightAverage.value(i+1)/rhoR );
	}

	// Left and right energies per unit of mass
	eL = LeftAverage.value(Dimension+2)/rhoL;
	eR = RightAverage.value(Dimension+2)/rhoR;
	
	
        pL = (Gamma -1.)*rhoL* ( eL - double(0.5)*(VL*VL) );
        pR = (Gamma -1.)*rhoR*( eR - double(0.5)*(VR*VR) );

	// Left and right enthalpies per unit of mass
	HL = eL + pL/rhoL;
	HR = eR + pR/rhoR;

	//Set mu to point to  the component of the system that corresponds to momentum in the direction of this slice, mv and mw to the orthogonal momentum:

	int mu,mv,mw;

//AxisNo=1 dimension=3, velocity positions 2,3,4

	switch (AxisNo){
	case 1:
       		mu = 1;  mv = 2;  mw = 3;
       		break;
	case 2:
		mu = 2 ; mv = 3;  mw = 1;
                break;
      	default:
          	mu = 3;	 mv = 1;  mw = 2;
 }

real uL,uR, vR,vL,wR,wL;
  
uL=VL.value(mu); 	uR=VR.value(mu);
vL=VL.value(mv); 	vR=VR.value(mv);
wL=VL.value(mw);	wR=VR.value(mw);


// -------------------------------------------------------------Compute momentum AUSMD pages 639-640,  eq 31

// ... Auxiliar variables

real	cL =0., cR =0., cMax;			// Left, right  speeds of sound


real aux=0., 
	pLrhoL = pL/rhoL, 
	pRrhoR = pR/rhoR;

// ....... Compute max sound speed  [ Eq. 26 c_m = max(c_R,c_L)]  based on cL, cR, left and right speeds of sound
cL = sqrt(Gamma*pL/rhoL);
cR = sqrt(Gamma*pR/rhoR);
cMax= ( (cL>=cR)?cL:cR ); 


real   alphaL, alphaR,
       uLplus, uRminus, 
       pLplus, pRminus;


// ....... Compute alpha_L and alpha_R Eq 25

aux= pLrhoL+pRrhoR;   
alphaL= double(2.0)* (pLrhoL) / aux;
alphaR= double(2.0)* (pRrhoR) / aux;

// Left-plus u and p. Here we are using aL instead of cm (for us aM).

// ....... Compute uL+, pL+, to avoid the if we do it in steps, first the otherwise

uLplus= double(0.5) * (uL+ fabs(uL));  // Eq 23, otherwise case
pLplus= pL * uLplus/uL;        // Eq 28, otherwise case
	 
if(fabs(uL)<=cMax)
{
    aux=  double(0.25)* (uL+cL)*(uL+cL) /cMax; //ralf use cL  instead of cMax
    uLplus= alphaL* aux + (double(1.0)-alphaL) * uLplus; //Eq. 23 WL
    pLplus= pL* aux/cMax * (double(2.0)-uL/cMax); //Eq. 28 WL
}
	//right-minus u and p. Here we are using aR instead of cm (for us aM).

uRminus= double(0.5) * (uR - fabs(uR)); // Eq 24, otherwise case 
pRminus= pR * uRminus/uR;// Eq 29, otherwise case

        if(fabs(uR)<=cMax)
	{
           aux= (uR-cR)*(uR-cR)/(double(4.0)*cMax);
           uRminus= -alphaR* aux + (double(1.0)-alphaR) * uRminus; //Eq 24 WL
           pRminus=  pR* aux/cMax * (double(2.0)+uR/cMax); //Eq. 29 WL
	}


real auxRhoR, auxRhoL, auxP12;

// MassFlux_Eq22, MassFlux_Eq_31_AUSMD, MassFlux_Eq_30_AUSMV, MassFlux_Eq_33_AUSMDV,Abs_MassFlux_Eq_33_AUSMDV ;

auxP12=pLplus +pRminus;

//MassFlux_Eq22= uLplus*rhoL + uRminus*rhoR;
//MassFlux_Eq_31_AUSMD= 0.5 * (MassFlux_Eq22 * (uL+uR) - fabs(MassFlux_Eq22) * (uR-uL));
//MassFlux_Eq_30_AUSMV= uLplus*rhoL*uL + uRminus*rhoR*uR;

	// -------- Blending between AUSMV and AUSMD  section 2.3 end of page 640, ref eq 30 and 31 -------------
	// sf=1 gives AUSMV, sf= -1 gives AUSMD

real s, Kfactor= double(10.0); //page 642 eq. 34, Kfactor is a forced parameter

aux= (pL<pR)?pL:pR; 
s = min(double(1.0), Kfactor*fabs(pR-pL)/aux); // 0<= sf<= 1/2

//MassFlux_Eq_33_AUSMDV= double(0.5) *( (1+s) * MassFlux_Eq_30_AUSMV + (1-s) * MassFlux_Eq_31_AUSMD);
//Abs_MassFlux_Eq_33_AUSMDV = fabs(MassFlux_Eq_33_AUSMDV);



auxRhoR= double(0.5)* (uRminus*rhoR - fabs(uRminus*rhoR)); 
auxRhoL= double(0.5)*  (uLplus*rhoL  + fabs(uLplus*rhoL)); 

aux = (auxRhoL+auxRhoR);
Result.setValue(1, aux); // for rho

aux = (auxRhoL*HL + auxRhoR*HR);
Result.setValue(Dimension+2, aux);//for rho*H


aux = double(0.5) *( (double(1.0)+s)*uLplus*rhoL*uL  
								+ (double(1.0)-s) *auxRhoL*uL ) 
           + auxP12 + 
           double(0.5) *( (double(1.0)+s)*uRminus*rhoR*uR 
                                                      + (double(1.0)-s) *auxRhoR*uR ) ;
Result.setValue(mu+1,aux); //for velocity component of the axis

aux =  (auxRhoL*vL+ auxRhoR*vR); // for rho
Result.setValue(mv+1,aux);//for velocity component perpendicular to the axis

aux = (auxRhoL*wL + auxRhoR*wR); // for rho
Result.setValue(mw+1,aux);//for velocity component perpendicular to the axis

// --- Return Euler flux ---------------------------------------------------------------

return Result;
}
Пример #22
0
/**
    Purpose
    -------
    ZGEEV computes for an N-by-N complex nonsymmetric matrix A, the
    eigenvalues and, optionally, the left and/or right eigenvectors.

    The right eigenvector v(j) of A satisfies
        A * v(j) = lambda(j) * v(j)
    where lambda(j) is its eigenvalue.
    The left eigenvector u(j) of A satisfies
        u(j)**H * A = lambda(j) * u(j)**H
    where u(j)**H denotes the conjugate transpose of u(j).

    The computed eigenvectors are normalized to have Euclidean norm
    equal to 1 and largest component real.

    Arguments
    ---------
    @param[in]
    jobvl   magma_vec_t
      -     = MagmaNoVec:        left eigenvectors of A are not computed;
      -     = MagmaVec:          left eigenvectors of are computed.

    @param[in]
    jobvr   magma_vec_t
      -     = MagmaNoVec:        right eigenvectors of A are not computed;
      -     = MagmaVec:          right eigenvectors of A are computed.

    @param[in]
    n       INTEGER
            The order of the matrix A. N >= 0.

    @param[in,out]
    A       COMPLEX_16 array, dimension (LDA,N)
            On entry, the N-by-N matrix A.
            On exit, A has been overwritten.

    @param[in]
    lda     INTEGER
            The leading dimension of the array A.  LDA >= max(1,N).

    @param[out]
    w       COMPLEX_16 array, dimension (N)
            w contains the computed eigenvalues.

    @param[out]
    VL      COMPLEX_16 array, dimension (LDVL,N)
            If JOBVL = MagmaVec, the left eigenvectors u(j) are stored one
            after another in the columns of VL, in the same order
            as their eigenvalues.
            If JOBVL = MagmaNoVec, VL is not referenced.
            u(j) = VL(:,j), the j-th column of VL.

    @param[in]
    ldvl    INTEGER
            The leading dimension of the array VL.  LDVL >= 1; if
            JOBVL = MagmaVec, LDVL >= N.

    @param[out]
    VR      COMPLEX_16 array, dimension (LDVR,N)
            If JOBVR = MagmaVec, the right eigenvectors v(j) are stored one
            after another in the columns of VR, in the same order
            as their eigenvalues.
            If JOBVR = MagmaNoVec, VR is not referenced.
            v(j) = VR(:,j), the j-th column of VR.

    @param[in]
    ldvr    INTEGER
            The leading dimension of the array VR.  LDVR >= 1; if
            JOBVR = MagmaVec, LDVR >= N.

    @param[out]
    work    (workspace) COMPLEX_16 array, dimension (MAX(1,LWORK))
            On exit, if INFO = 0, WORK[0] returns the optimal LWORK.

    @param[in]
    lwork   INTEGER
            The dimension of the array WORK.  LWORK >= (1+nb)*N.
            For optimal performance, LWORK >= (1+2*nb)*N.
    \n
            If LWORK = -1, then a workspace query is assumed; the routine
            only calculates the optimal size of the WORK array, returns
            this value as the first entry of the WORK array, and no error
            message related to LWORK is issued by XERBLA.

    @param
    rwork   (workspace) DOUBLE PRECISION array, dimension (2*N)

    @param[out]
    info    INTEGER
      -     = 0:  successful exit
      -     < 0:  if INFO = -i, the i-th argument had an illegal value.
      -     > 0:  if INFO = i, the QR algorithm failed to compute all the
                  eigenvalues, and no eigenvectors have been computed;
                  elements and i+1:N of w contain eigenvalues which have
                  converged.

    @ingroup magma_zgeev_driver
    ********************************************************************/
extern "C" magma_int_t
magma_zgeev(
    magma_vec_t jobvl, magma_vec_t jobvr, magma_int_t n,
    magmaDoubleComplex *A, magma_int_t lda,
    #ifdef COMPLEX
    magmaDoubleComplex *w,
    #else
    double *wr, double *wi,
    #endif
    magmaDoubleComplex *VL, magma_int_t ldvl,
    magmaDoubleComplex *VR, magma_int_t ldvr,
    magmaDoubleComplex *work, magma_int_t lwork,
    #ifdef COMPLEX
    double *rwork,
    #endif
    magma_int_t *info )
{
    #define VL(i,j)  (VL + (i) + (j)*ldvl)
    #define VR(i,j)  (VR + (i) + (j)*ldvr)
    
    const magma_int_t ione  = 1;
    const magma_int_t izero = 0;
    
    double d__1, d__2;
    magmaDoubleComplex tmp;
    double scl;
    double dum[1], eps;
    double anrm, cscale, bignum, smlnum;
    magma_int_t i, k, ilo, ihi;
    magma_int_t ibal, ierr, itau, iwrk, nout, liwrk, nb;
    magma_int_t scalea, minwrk, optwrk, irwork, lquery, wantvl, wantvr, select[1];

    magma_side_t side = MagmaRight;

    magma_timer_t time_total=0, time_gehrd=0, time_unghr=0, time_hseqr=0, time_trevc=0, time_sum=0;
    magma_flops_t flop_total=0, flop_gehrd=0, flop_unghr=0, flop_hseqr=0, flop_trevc=0, flop_sum=0;
    timer_start( time_total );
    flops_start( flop_total );
    
    irwork = 0;
    *info = 0;
    lquery = (lwork == -1);
    wantvl = (jobvl == MagmaVec);
    wantvr = (jobvr == MagmaVec);
    if (! wantvl && jobvl != MagmaNoVec) {
        *info = -1;
    } else if (! wantvr && jobvr != MagmaNoVec) {
        *info = -2;
    } else if (n < 0) {
        *info = -3;
    } else if (lda < max(1,n)) {
        *info = -5;
    } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) {
        *info = -8;
    } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) {
        *info = -10;
    }

    /* Compute workspace */
    nb = magma_get_zgehrd_nb( n );
    if (*info == 0) {
        minwrk = (1+  nb)*n;
        optwrk = (1+2*nb)*n;
        work[0] = MAGMA_Z_MAKE( optwrk, 0 );

        if (lwork < minwrk && ! lquery) {
            *info = -12;
        }
    }

    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }
    else if (lquery) {
        return *info;
    }

    /* Quick return if possible */
    if (n == 0) {
        return *info;
    }
    
    #if defined(VERSION3)
    magmaDoubleComplex_ptr dT;
    if (MAGMA_SUCCESS != magma_zmalloc( &dT, nb*n )) {
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }
    #endif

    /* Get machine constants */
    eps    = lapackf77_dlamch( "P" );
    smlnum = lapackf77_dlamch( "S" );
    bignum = 1. / smlnum;
    lapackf77_dlabad( &smlnum, &bignum );
    smlnum = magma_dsqrt( smlnum ) / eps;
    bignum = 1. / smlnum;

    /* Scale A if max element outside range [SMLNUM,BIGNUM] */
    anrm = lapackf77_zlange( "M", &n, &n, A, &lda, dum );
    scalea = 0;
    if (anrm > 0. && anrm < smlnum) {
        scalea = 1;
        cscale = smlnum;
    } else if (anrm > bignum) {
        scalea = 1;
        cscale = bignum;
    }
    if (scalea) {
        lapackf77_zlascl( "G", &izero, &izero, &anrm, &cscale, &n, &n, A, &lda, &ierr );
    }

    /* Balance the matrix
     * (CWorkspace: none)
     * (RWorkspace: need N)
     *  - this space is reserved until after gebak */
    ibal = 0;
    lapackf77_zgebal( "B", &n, A, &lda, &ilo, &ihi, &rwork[ibal], &ierr );

    /* Reduce to upper Hessenberg form
     * (CWorkspace: need 2*N, prefer N + N*NB)
     * (RWorkspace: N)
     *  - including N reserved for gebal/gebak, unused by zgehrd */
    itau = 0;
    iwrk = itau + n;
    liwrk = lwork - iwrk;

    timer_start( time_gehrd );
    flops_start( flop_gehrd );
    #if defined(VERSION1)
        // Version 1 - LAPACK
        lapackf77_zgehrd( &n, &ilo, &ihi, A, &lda,
                          &work[itau], &work[iwrk], &liwrk, &ierr );
    #elif defined(VERSION2)
        // Version 2 - LAPACK consistent HRD
        magma_zgehrd2( n, ilo, ihi, A, lda,
                       &work[itau], &work[iwrk], liwrk, &ierr );
    #elif defined(VERSION3)
        // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored,
        magma_zgehrd( n, ilo, ihi, A, lda,
                      &work[itau], &work[iwrk], liwrk, dT, &ierr );
    #endif
    time_sum += timer_stop( time_gehrd );
    flop_sum += flops_stop( flop_gehrd );

    if (wantvl) {
        /* Want left eigenvectors
         * Copy Householder vectors to VL */
        side = MagmaLeft;
        lapackf77_zlacpy( MagmaLowerStr, &n, &n, A, &lda, VL, &ldvl );

        /* Generate unitary matrix in VL
         * (CWorkspace: need 2*N-1, prefer N + (N-1)*NB)
         * (RWorkspace: N)
         *  - including N reserved for gebal/gebak, unused by zunghr */
        timer_start( time_unghr );
        flops_start( flop_unghr );
        #if defined(VERSION1) || defined(VERSION2)
            // Version 1 & 2 - LAPACK
            lapackf77_zunghr( &n, &ilo, &ihi, VL, &ldvl, &work[itau],
                              &work[iwrk], &liwrk, &ierr );
        #elif defined(VERSION3)
            // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored
            magma_zunghr( n, ilo, ihi, VL, ldvl, &work[itau], dT, nb, &ierr );
        #endif
        time_sum += timer_stop( time_unghr );
        flop_sum += flops_stop( flop_unghr );
        
        timer_start( time_hseqr );
        flops_start( flop_hseqr );
        /* Perform QR iteration, accumulating Schur vectors in VL
         * (CWorkspace: need 1, prefer HSWORK (see comments) )
         * (RWorkspace: N)
         *  - including N reserved for gebal/gebak, unused by zhseqr */
        iwrk = itau;
        liwrk = lwork - iwrk;
        lapackf77_zhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, w,
                          VL, &ldvl, &work[iwrk], &liwrk, info );
        time_sum += timer_stop( time_hseqr );
        flop_sum += flops_stop( flop_hseqr );

        if (wantvr) {
            /* Want left and right eigenvectors
             * Copy Schur vectors to VR */
            side = MagmaBothSides;
            lapackf77_zlacpy( "F", &n, &n, VL, &ldvl, VR, &ldvr );
        }
    }
    else if (wantvr) {
        /* Want right eigenvectors
         * Copy Householder vectors to VR */
        side = MagmaRight;
        lapackf77_zlacpy( "L", &n, &n, A, &lda, VR, &ldvr );

        /* Generate unitary matrix in VR
         * (CWorkspace: need 2*N-1, prefer N + (N-1)*NB)
         * (RWorkspace: N)
         *  - including N reserved for gebal/gebak, unused by zunghr */
        timer_start( time_unghr );
        flops_start( flop_unghr );
        #if defined(VERSION1) || defined(VERSION2)
            // Version 1 & 2 - LAPACK
            lapackf77_zunghr( &n, &ilo, &ihi, VR, &ldvr, &work[itau],
                              &work[iwrk], &liwrk, &ierr );
        #elif defined(VERSION3)
            // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored
            magma_zunghr( n, ilo, ihi, VR, ldvr, &work[itau], dT, nb, &ierr );
        #endif
        time_sum += timer_stop( time_unghr );
        flop_sum += flops_stop( flop_unghr );

        /* Perform QR iteration, accumulating Schur vectors in VR
         * (CWorkspace: need 1, prefer HSWORK (see comments) )
         * (RWorkspace: N)
         *  - including N reserved for gebal/gebak, unused by zhseqr */
        timer_start( time_hseqr );
        flops_start( flop_hseqr );
        iwrk = itau;
        liwrk = lwork - iwrk;
        lapackf77_zhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, w,
                          VR, &ldvr, &work[iwrk], &liwrk, info );
        time_sum += timer_stop( time_hseqr );
        flop_sum += flops_stop( flop_hseqr );
    }
    else {
        /* Compute eigenvalues only
         * (CWorkspace: need 1, prefer HSWORK (see comments) )
         * (RWorkspace: N)
         *  - including N reserved for gebal/gebak, unused by zhseqr */
        timer_start( time_hseqr );
        flops_start( flop_hseqr );
        iwrk = itau;
        liwrk = lwork - iwrk;
        lapackf77_zhseqr( "E", "N", &n, &ilo, &ihi, A, &lda, w,
                          VR, &ldvr, &work[iwrk], &liwrk, info );
        time_sum += timer_stop( time_hseqr );
        flop_sum += flops_stop( flop_hseqr );
    }

    /* If INFO > 0 from ZHSEQR, then quit */
    if (*info > 0) {
        goto CLEANUP;
    }

    timer_start( time_trevc );
    flops_start( flop_trevc );
    if (wantvl || wantvr) {
        /* Compute left and/or right eigenvectors
         * (CWorkspace: need 2*N)
         * (RWorkspace: need 2*N)
         *  - including N reserved for gebal/gebak, unused by ztrevc */
        irwork = ibal + n;
        #if TREVC_VERSION == 1
        lapackf77_ztrevc( lapack_side_const(side), "B", select, &n, A, &lda, VL, &ldvl,
                          VR, &ldvr, &n, &nout, &work[iwrk], &rwork[irwork], &ierr );
        #elif TREVC_VERSION == 2
        liwrk = lwork - iwrk;
        lapackf77_ztrevc3( lapack_side_const(side), "B", select, &n, A, &lda, VL, &ldvl,
                           VR, &ldvr, &n, &nout, &work[iwrk], &liwrk, &rwork[irwork], &ierr );
        #elif TREVC_VERSION == 3
        magma_ztrevc3( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl,
                       VR, ldvr, n, &nout, &work[iwrk], liwrk, &rwork[irwork], &ierr );
        #elif TREVC_VERSION == 4
        magma_ztrevc3_mt( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl,
                          VR, ldvr, n, &nout, &work[iwrk], liwrk, &rwork[irwork], &ierr );
        #elif TREVC_VERSION == 5
        magma_ztrevc3_mt_gpu( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl,
                              VR, ldvr, n, &nout, &work[iwrk], liwrk, &rwork[irwork], &ierr );
        #else
        #error Unknown TREVC_VERSION
        #endif
    }
    time_sum += timer_stop( time_trevc );
    flop_sum += flops_stop( flop_trevc );

    if (wantvl) {
        /* Undo balancing of left eigenvectors
         * (CWorkspace: none)
         * (RWorkspace: need N) */
        lapackf77_zgebak( "B", "L", &n, &ilo, &ihi, &rwork[ibal], &n,
                          VL, &ldvl, &ierr );

        /* Normalize left eigenvectors and make largest component real */
        for (i = 0; i < n; ++i) {
            scl = 1. / magma_cblas_dznrm2( n, VL(0,i), 1 );
            blasf77_zdscal( &n, &scl, VL(0,i), &ione );
            for (k = 0; k < n; ++k) {
                /* Computing 2nd power */
                d__1 = MAGMA_Z_REAL( *VL(k,i) );
                d__2 = MAGMA_Z_IMAG( *VL(k,i) );
                rwork[irwork + k] = d__1*d__1 + d__2*d__2;
            }
            k = blasf77_idamax( &n, &rwork[irwork], &ione ) - 1;  // subtract 1; k is 0-based
            tmp = MAGMA_Z_CNJG( *VL(k,i) ) / magma_dsqrt( rwork[irwork + k] );
            blasf77_zscal( &n, &tmp, VL(0,i), &ione );
            *VL(k,i) = MAGMA_Z_MAKE( MAGMA_Z_REAL( *VL(k,i) ), 0 );
        }
    }

    if (wantvr) {
        /* Undo balancing of right eigenvectors
         * (CWorkspace: none)
         * (RWorkspace: need N) */
        lapackf77_zgebak( "B", "R", &n, &ilo, &ihi, &rwork[ibal], &n,
                          VR, &ldvr, &ierr );

        /* Normalize right eigenvectors and make largest component real */
        for (i = 0; i < n; ++i) {
            scl = 1. / magma_cblas_dznrm2( n, VR(0,i), 1 );
            blasf77_zdscal( &n, &scl, VR(0,i), &ione );
            for (k = 0; k < n; ++k) {
                /* Computing 2nd power */
                d__1 = MAGMA_Z_REAL( *VR(k,i) );
                d__2 = MAGMA_Z_IMAG( *VR(k,i) );
                rwork[irwork + k] = d__1*d__1 + d__2*d__2;
            }
            k = blasf77_idamax( &n, &rwork[irwork], &ione ) - 1;  // subtract 1; k is 0-based
            tmp = MAGMA_Z_CNJG( *VR(k,i) ) / magma_dsqrt( rwork[irwork + k] );
            blasf77_zscal( &n, &tmp, VR(0,i), &ione );
            *VR(k,i) = MAGMA_Z_MAKE( MAGMA_Z_REAL( *VR(k,i) ), 0 );
        }
    }

CLEANUP:
    /* Undo scaling if necessary */
    if (scalea) {
        // converged eigenvalues, stored in WR[i+1:n] and WI[i+1:n] for i = INFO
        magma_int_t nval = n - (*info);
        magma_int_t ld   = max( nval, 1 );
        lapackf77_zlascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, w + (*info), &ld, &ierr );
        if (*info > 0) {
            // first ilo columns were already upper triangular,
            // so the corresponding eigenvalues are also valid.
            nval = ilo - 1;
            lapackf77_zlascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, w, &n, &ierr );
        }
    }

    #if defined(VERSION3)
    magma_free( dT );
    #endif
    
    timer_stop( time_total );
    flops_stop( flop_total );
    timer_printf( "dgeev times n %5d, gehrd %7.3f, unghr %7.3f, hseqr %7.3f, trevc %7.3f, total %7.3f, sum %7.3f\n",
                  (int) n, time_gehrd, time_unghr, time_hseqr, time_trevc, time_total, time_sum );
    timer_printf( "dgeev flops n %5d, gehrd %7lld, unghr %7lld, hseqr %7lld, trevc %7lld, total %7lld, sum %7lld\n",
                  (int) n, flop_gehrd, flop_unghr, flop_hseqr, flop_trevc, flop_total, flop_sum );

    work[0] = MAGMA_Z_MAKE( (double) optwrk, 0. );

    return *info;
} /* magma_zgeev */
Пример #23
0
magma_int_t magma_ztrevc3_mt(
    magma_side_t side, magma_vec_t howmany,
    magma_int_t *select,  // logical in Fortran
    magma_int_t n,
    magmaDoubleComplex *T,  magma_int_t ldt,
    magmaDoubleComplex *VL, magma_int_t ldvl,
    magmaDoubleComplex *VR, magma_int_t ldvr,
    magma_int_t mm, magma_int_t *mout,
    magmaDoubleComplex *work, magma_int_t lwork,
    #ifdef COMPLEX
    double *rwork,
    #endif
    magma_int_t *info )
{
    #define  T(i,j)  ( T + (i) + (j)*ldt )
    #define VL(i,j)  (VL + (i) + (j)*ldvl)
    #define VR(i,j)  (VR + (i) + (j)*ldvr)
    #define work(i,j) (work + (i) + (j)*n)

    // .. Parameters ..
    const magmaDoubleComplex c_zero = MAGMA_Z_ZERO;
    const magmaDoubleComplex c_one  = MAGMA_Z_ONE;
    const magma_int_t  nbmin = 16, nbmax = 128;
    const magma_int_t  ione = 1;
    
    // .. Local Scalars ..
    magma_int_t            allv, bothv, leftv, over, rightv, somev;
    magma_int_t            i, ii, is, j, k, ki, iv, n2, nb, nb2, version;
    double                 ovfl, remax, unfl;  //smlnum, smin, ulp
    
    // Decode and test the input parameters
    bothv  = (side == MagmaBothSides);
    rightv = (side == MagmaRight) || bothv;
    leftv  = (side == MagmaLeft ) || bothv;

    allv  = (howmany == MagmaAllVec);
    over  = (howmany == MagmaBacktransVec);
    somev = (howmany == MagmaSomeVec);

    // Set mout to the number of columns required to store the selected
    // eigenvectors.
    if ( somev ) {
        *mout = 0;
        for( j=0; j < n; ++j ) {
            if ( select[j] ) {
                *mout += 1;
            }
        }
    }
    else {
        *mout = n;
    }

    *info = 0;
    if ( ! rightv && ! leftv )
        *info = -1;
    else if ( ! allv && ! over && ! somev )
        *info = -2;
    else if ( n < 0 )
        *info = -4;
    else if ( ldt < max( 1, n ) )
        *info = -6;
    else if ( ldvl < 1 || ( leftv && ldvl < n ) )
        *info = -8;
    else if ( ldvr < 1 || ( rightv && ldvr < n ) )
        *info = -10;
    else if ( mm < *mout )
        *info = -11;
    else if ( lwork < max( 1, 2*n ) )
        *info = -14;
    
    if ( *info != 0 ) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    // Quick return if possible.
    if ( n == 0 ) {
        return *info;
    }
    
    // Use blocked version (2) if sufficient workspace.
    // Requires 1 vector to save diagonal elements, and 2*nb vectors for x and Q*x.
    // (Compared to dtrevc3, rwork stores 1-norms.)
    // Zero-out the workspace to avoid potential NaN propagation.
    nb = 2;
    if ( lwork >= n + 2*n*nbmin ) {
        version = 2;
        nb = (lwork - n) / (2*n);
        nb = min( nb, nbmax );
        nb2 = 1 + 2*nb;
        lapackf77_zlaset( "F", &n, &nb2, &c_zero, &c_zero, work, &n );
    }
    else {
        version = 1;
    }

    // Set the constants to control overflow.
    unfl = lapackf77_dlamch( "Safe minimum" );
    ovfl = 1. / unfl;
    lapackf77_dlabad( &unfl, &ovfl );
    //ulp = lapackf77_dlamch( "Precision" );
    //smlnum = unfl*( n / ulp );

    // Store the diagonal elements of T in working array work.
    for( i=0; i < n; ++i ) {
        *work(i,0) = *T(i,i);
    }

    // Compute 1-norm of each column of strictly upper triangular
    // part of T to control overflow in triangular solver.
    rwork[0] = 0.;
    for( j=1; j < n; ++j ) {
        rwork[j] = magma_cblas_dzasum( j, T(0,j), ione );
    }

    // launch threads -- each single-threaded MKL
    magma_int_t nthread = magma_get_parallel_numthreads();
    magma_int_t lapack_nthread = magma_get_lapack_numthreads();
    magma_set_lapack_numthreads( 1 );
    magma_thread_queue queue;
    queue.launch( nthread );
    //printf( "nthread %d, %d\n", nthread, lapack_nthread );
    
    // gemm_nb = N/thread, rounded up to multiple of 16,
    // but avoid multiples of page size, e.g., 512*8 bytes = 4096.
    magma_int_t gemm_nb = magma_int_t( ceil( ceil( ((double)n) / nthread ) / 16. ) * 16. );
    if ( gemm_nb % 512 == 0 ) {
        gemm_nb += 32;
    }
    
    magma_timer_t time_total=0, time_trsv=0, time_gemm=0, time_gemv=0, time_trsv_sum=0, time_gemm_sum=0, time_gemv_sum=0;
    timer_start( time_total );

    if ( rightv ) {
        // ============================================================
        // Compute right eigenvectors.
        // iv is index of column in current block.
        // Non-blocked version always uses iv=1;
        // blocked     version starts with iv=nb, goes down to 1.
        // (Note the "0-th" column is used to store the original diagonal.)
        iv = 1;
        if ( version == 2 ) {
            iv = nb;
        }
        
        timer_start( time_trsv );
        is = *mout - 1;
        for( ki=n-1; ki >= 0; --ki ) {
            if ( somev ) {
                if ( ! select[ki] ) {
                    continue;
                }
            }
            //smin = max( ulp*MAGMA_Z_ABS1( *T(ki,ki) ), smlnum );

            // --------------------------------------------------------
            // Complex right eigenvector
            *work(ki,iv) = c_one;

            // Form right-hand side.
            for( k=0; k < ki; ++k ) {
                *work(k,iv) = -(*T(k,ki));
            }

            // Solve upper triangular system:
            // [ T(1:ki-1,1:ki-1) - T(ki,ki) ]*X = scale*work.
            if ( ki > 0 ) {
                queue.push_task( new magma_zlatrsd_task(
                    MagmaUpper, MagmaNoTrans, MagmaNonUnit, MagmaTrue,
                    ki, T, ldt, *T(ki,ki),
                    work(0,iv), work(ki,iv), rwork ));
            }

            // Copy the vector x or Q*x to VR and normalize.
            if ( ! over ) {
                // ------------------------------
                // no back-transform: copy x to VR and normalize
                queue.sync();
                n2 = ki+1;
                blasf77_zcopy( &n2, work(0,iv), &ione, VR(0,is), &ione );

                ii = blasf77_izamax( &n2, VR(0,is), &ione ) - 1;
                remax = 1. / MAGMA_Z_ABS1( *VR(ii,is) );
                blasf77_zdscal( &n2, &remax, VR(0,is), &ione );

                for( k=ki+1; k < n; ++k ) {
                    *VR(k,is) = c_zero;
                }
            }
            else if ( version == 1 ) {
                // ------------------------------
                // version 1: back-transform each vector with GEMV, Q*x.
                queue.sync();
                time_trsv_sum += timer_stop( time_trsv );
                timer_start( time_gemv );
                if ( ki > 0 ) {
                    blasf77_zgemv( "n", &n, &ki, &c_one,
                                   VR, &ldvr,
                                   work(0, iv), &ione,
                                   work(ki,iv), VR(0,ki), &ione );
                }
                time_gemv_sum += timer_stop( time_gemv );
                ii = blasf77_izamax( &n, VR(0,ki), &ione ) - 1;
                remax = 1. / MAGMA_Z_ABS1( *VR(ii,ki) );
                blasf77_zdscal( &n, &remax, VR(0,ki), &ione );
                timer_start( time_trsv );
            }
            else if ( version == 2 ) {
                // ------------------------------
                // version 2: back-transform block of vectors with GEMM
                // zero out below vector
                for( k=ki+1; k < n; ++k ) {
                    *work(k,iv) = c_zero;
                }

                // Columns iv:nb of work are valid vectors.
                // When the number of vectors stored reaches nb,
                // or if this was last vector, do the GEMM
                if ( (iv == 1) || (ki == 0) ) {
                    queue.sync();
                    time_trsv_sum += timer_stop( time_trsv );
                    timer_start( time_gemm );
                    nb2 = nb-iv+1;
                    n2  = ki+nb-iv+1;
                    
                    // split gemm into multiple tasks, each doing one block row
                    for( i=0; i < n; i += gemm_nb ) {
                        magma_int_t ib = min( gemm_nb, n-i );
                        queue.push_task( new zgemm_task(
                            MagmaNoTrans, MagmaNoTrans, ib, nb2, n2, c_one,
                            VR(i,0), ldvr,
                            work(0,iv   ), n, c_zero,
                            work(i,nb+iv), n ));
                    }
                    queue.sync();
                    time_gemm_sum += timer_stop( time_gemm );
                    
                    // normalize vectors
                    // TODO if somev, should copy vectors individually to correct location.
                    for( k = iv; k <= nb; ++k ) {
                        ii = blasf77_izamax( &n, work(0,nb+k), &ione ) - 1;
                        remax = 1. / MAGMA_Z_ABS1( *work(ii,nb+k) );
                        blasf77_zdscal( &n, &remax, work(0,nb+k), &ione );
                    }
                    lapackf77_zlacpy( "F", &n, &nb2, work(0,nb+iv), &n, VR(0,ki), &ldvr );
                    iv = nb;
                    timer_start( time_trsv );
                }
                else {
                    iv -= 1;
                }
            } // blocked back-transform

            is -= 1;
        }
    }
    timer_stop( time_trsv );
    
    timer_stop( time_total );
    timer_printf( "trevc trsv %.4f, gemm %.4f, gemv %.4f, total %.4f\n",
                  time_trsv_sum, time_gemm_sum, time_gemv_sum, time_total );

    if ( leftv ) {
        // ============================================================
        // Compute left eigenvectors.
        // iv is index of column in current block.
        // Non-blocked version always uses iv=1;
        // blocked     version starts with iv=1, goes up to nb.
        // (Note the "0-th" column is used to store the original diagonal.)
        iv = 1;
        is = 0;
        for( ki=0; ki < n; ++ki ) {
            if ( somev ) {
                if ( ! select[ki] ) {
                    continue;
                }
            }
            //smin = max( ulp*MAGMA_Z_ABS1( *T(ki,ki) ), smlnum );
        
            // --------------------------------------------------------
            // Complex left eigenvector
            *work(ki,iv) = c_one;
        
            // Form right-hand side.
            for( k = ki + 1; k < n; ++k ) {
                *work(k,iv) = -MAGMA_Z_CONJ( *T(ki,k) );
            }
            
            // Solve conjugate-transposed triangular system:
            // [ T(ki+1:n,ki+1:n) - T(ki,ki) ]**H * X = scale*work.
            // TODO what happens with T(k,k) - lambda is small? Used to have < smin test.
            if ( ki < n-1 ) {
                n2 = n-ki-1;
                queue.push_task( new magma_zlatrsd_task(
                    MagmaUpper, MagmaConjTrans, MagmaNonUnit, MagmaTrue,
                    n2, T(ki+1,ki+1), ldt, *T(ki,ki),
                    work(ki+1,iv), work(ki,iv), rwork ));
            }
            
            // Copy the vector x or Q*x to VL and normalize.
            if ( ! over ) {
                // ------------------------------
                // no back-transform: copy x to VL and normalize
                queue.sync();
                n2 = n-ki;
                blasf77_zcopy( &n2, work(ki,iv), &ione, VL(ki,is), &ione );
        
                ii = blasf77_izamax( &n2, VL(ki,is), &ione ) + ki - 1;
                remax = 1. / MAGMA_Z_ABS1( *VL(ii,is) );
                blasf77_zdscal( &n2, &remax, VL(ki,is), &ione );
        
                for( k=0; k < ki; ++k ) {
                    *VL(k,is) = c_zero;
                }
            }
            else if ( version == 1 ) {
                // ------------------------------
                // version 1: back-transform each vector with GEMV, Q*x.
                queue.sync();
                if ( ki < n-1 ) {
                    n2 = n-ki-1;
                    blasf77_zgemv( "n", &n, &n2, &c_one,
                                   VL(0,ki+1), &ldvl,
                                   work(ki+1,iv), &ione,
                                   work(ki,  iv), VL(0,ki), &ione );
                }
                ii = blasf77_izamax( &n, VL(0,ki), &ione ) - 1;
                remax = 1. / MAGMA_Z_ABS1( *VL(ii,ki) );
                blasf77_zdscal( &n, &remax, VL(0,ki), &ione );
            }
            else if ( version == 2 ) {
                // ------------------------------
                // version 2: back-transform block of vectors with GEMM
                // zero out above vector
                // could go from (ki+1)-NV+1 to ki
                for( k=0; k < ki; ++k ) {
                    *work(k,iv) = c_zero;
                }
        
                // Columns 1:iv of work are valid vectors.
                // When the number of vectors stored reaches nb,
                // or if this was last vector, do the GEMM
                if ( (iv == nb) || (ki == n-1) ) {
                    queue.sync();
                    n2 = n-(ki+1)+iv;
                    
                    // split gemm into multiple tasks, each doing one block row
                    for( i=0; i < n; i += gemm_nb ) {
                        magma_int_t ib = min( gemm_nb, n-i );
                        queue.push_task( new zgemm_task(
                            MagmaNoTrans, MagmaNoTrans, ib, iv, n2, c_one,
                            VL(i,ki-iv+1), ldvl,
                            work(ki-iv+1,1), n, c_zero,
                            work(i,nb+1), n ));
                    }
                    queue.sync();
                    // normalize vectors
                    for( k=1; k <= iv; ++k ) {
                        ii = blasf77_izamax( &n, work(0,nb+k), &ione ) - 1;
                        remax = 1. / MAGMA_Z_ABS1( *work(ii,nb+k) );
                        blasf77_zdscal( &n, &remax, work(0,nb+k), &ione );
                    }
                    lapackf77_zlacpy( "F", &n, &iv, work(0,nb+1), &n, VL(0,ki-iv+1), &ldvl );
                    iv = 1;
                }
                else {
                    iv += 1;
                }
            } // blocked back-transform
        
            is += 1;
        }
    }
    
    // close down threads
    queue.quit();
    magma_set_lapack_numthreads( lapack_nthread );
    
    return *info;
}  // End of ZTREVC
Пример #24
0
/* Subroutine */ int zgeevx_(char *balanc, char *jobvl, char *jobvr, char *
                             sense, integer *n, doublecomplex *a, integer *lda, doublecomplex *w,
                             doublecomplex *vl, integer *ldvl, doublecomplex *vr, integer *ldvr,
                             integer *ilo, integer *ihi, doublereal *scale, doublereal *abnrm,
                             doublereal *rconde, doublereal *rcondv, doublecomplex *work, integer *
                             lwork, doublereal *rwork, integer *info)
{
    /*  -- LAPACK driver routine (version 2.0) --
           Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
           Courant Institute, Argonne National Lab, and Rice University
           September 30, 1994


        Purpose
        =======

        ZGEEVX computes for an N-by-N complex nonsymmetric matrix A, the
        eigenvalues and, optionally, the left and/or right eigenvectors.

        Optionally also, it computes a balancing transformation to improve
        the conditioning of the eigenvalues and eigenvectors (ILO, IHI,
        SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues
        (RCONDE), and reciprocal condition numbers for the right
        eigenvectors (RCONDV).

        The right eigenvector v(j) of A satisfies
                         A * v(j) = lambda(j) * v(j)
        where lambda(j) is its eigenvalue.
        The left eigenvector u(j) of A satisfies
                      u(j)**H * A = lambda(j) * u(j)**H
        where u(j)**H denotes the conjugate transpose of u(j).

        The computed eigenvectors are normalized to have Euclidean norm
        equal to 1 and largest component real.

        Balancing a matrix means permuting the rows and columns to make it
        more nearly upper triangular, and applying a diagonal similarity
        transformation D * A * D**(-1), where D is a diagonal matrix, to
        make its rows and columns closer in norm and the condition numbers
        of its eigenvalues and eigenvectors smaller.  The computed
        reciprocal condition numbers correspond to the balanced matrix.
        Permuting rows and columns will not change the condition numbers
        (in exact arithmetic) but diagonal scaling will.  For further
        explanation of balancing, see section 4.10.2 of the LAPACK
        Users' Guide.

        Arguments
        =========

        BALANC  (input) CHARACTER*1
                Indicates how the input matrix should be diagonally scaled
                and/or permuted to improve the conditioning of its
                eigenvalues.
                = 'N': Do not diagonally scale or permute;
                = 'P': Perform permutations to make the matrix more nearly
                       upper triangular. Do not diagonally scale;
                = 'S': Diagonally scale the matrix, ie. replace A by
                       D*A*D**(-1), where D is a diagonal matrix chosen
                       to make the rows and columns of A more equal in
                       norm. Do not permute;
                = 'B': Both diagonally scale and permute A.

                Computed reciprocal condition numbers will be for the matrix

                after balancing and/or permuting. Permuting does not change
                condition numbers (in exact arithmetic), but balancing does.


        JOBVL   (input) CHARACTER*1
                = 'N': left eigenvectors of A are not computed;
                = 'V': left eigenvectors of A are computed.
                If SENSE = 'E' or 'B', JOBVL must = 'V'.

        JOBVR   (input) CHARACTER*1
                = 'N': right eigenvectors of A are not computed;
                = 'V': right eigenvectors of A are computed.
                If SENSE = 'E' or 'B', JOBVR must = 'V'.

        SENSE   (input) CHARACTER*1
                Determines which reciprocal condition numbers are computed.
                = 'N': None are computed;
                = 'E': Computed for eigenvalues only;
                = 'V': Computed for right eigenvectors only;
                = 'B': Computed for eigenvalues and right eigenvectors.

                If SENSE = 'E' or 'B', both left and right eigenvectors
                must also be computed (JOBVL = 'V' and JOBVR = 'V').

        N       (input) INTEGER
                The order of the matrix A. N >= 0.

        A       (input/output) COMPLEX*16 array, dimension (LDA,N)
                On entry, the N-by-N matrix A.
                On exit, A has been overwritten.  If JOBVL = 'V' or
                JOBVR = 'V', A contains the Schur form of the balanced
                version of the matrix A.

        LDA     (input) INTEGER
                The leading dimension of the array A.  LDA >= max(1,N).

        W       (output) COMPLEX*16 array, dimension (N)
                W contains the computed eigenvalues.

        VL      (output) COMPLEX*16 array, dimension (LDVL,N)
                If JOBVL = 'V', the left eigenvectors u(j) are stored one
                after another in the columns of VL, in the same order
                as their eigenvalues.
                If JOBVL = 'N', VL is not referenced.
                u(j) = VL(:,j), the j-th column of VL.

        LDVL    (input) INTEGER
                The leading dimension of the array VL.  LDVL >= 1; if
                JOBVL = 'V', LDVL >= N.

        VR      (output) COMPLEX*16 array, dimension (LDVR,N)
                If JOBVR = 'V', the right eigenvectors v(j) are stored one
                after another in the columns of VR, in the same order
                as their eigenvalues.
                If JOBVR = 'N', VR is not referenced.
                v(j) = VR(:,j), the j-th column of VR.

        LDVR    (input) INTEGER
                The leading dimension of the array VR.  LDVR >= 1; if
                JOBVR = 'V', LDVR >= N.

        ILO,IHI (output) INTEGER
                ILO and IHI are integer values determined when A was
                balanced.  The balanced A(i,j) = 0 if I > J and
                J = 1,...,ILO-1 or I = IHI+1,...,N.

        SCALE   (output) DOUBLE PRECISION array, dimension (N)
                Details of the permutations and scaling factors applied
                when balancing A.  If P(j) is the index of the row and column

                interchanged with row and column j, and D(j) is the scaling
                factor applied to row and column j, then
                SCALE(J) = P(J),    for J = 1,...,ILO-1
                         = D(J),    for J = ILO,...,IHI
                         = P(J)     for J = IHI+1,...,N.
                The order in which the interchanges are made is N to IHI+1,
                then 1 to ILO-1.

        ABNRM   (output) DOUBLE PRECISION
                The one-norm of the balanced matrix (the maximum
                of the sum of absolute values of elements of any column).

        RCONDE  (output) DOUBLE PRECISION array, dimension (N)
                RCONDE(j) is the reciprocal condition number of the j-th
                eigenvalue.

        RCONDV  (output) DOUBLE PRECISION array, dimension (N)
                RCONDV(j) is the reciprocal condition number of the j-th
                right eigenvector.

        WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
                On exit, if INFO = 0, WORK(1) returns the optimal LWORK.

        LWORK   (input) INTEGER
                The dimension of the array WORK.  If SENSE = 'N' or 'E',
                LWORK >= max(1,2*N), and if SENSE = 'V' or 'B',
                LWORK >= N*N+2*N.
                For good performance, LWORK must generally be larger.

        RWORK   (workspace) DOUBLE PRECISION array, dimension (2*N)

        INFO    (output) INTEGER
                = 0:  successful exit
                < 0:  if INFO = -i, the i-th argument had an illegal value.
                > 0:  if INFO = i, the QR algorithm failed to compute all the

                      eigenvalues, and no eigenvectors or condition numbers
                      have been computed; elements 1:ILO-1 and i+1:N of W
                      contain eigenvalues which have converged.

        =====================================================================



           Test the input arguments


       Parameter adjustments
           Function Body */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c__0 = 0;
    static integer c__8 = 8;
    static integer c_n1 = -1;
    static integer c__4 = 4;

    /* System generated locals */
    integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
            i__2, i__3, i__4;
    doublereal d__1, d__2;
    doublecomplex z__1, z__2;
    /* Builtin functions */
    double sqrt(doublereal), d_imag(doublecomplex *);
    void d_cnjg(doublecomplex *, doublecomplex *);
    /* Local variables */
    static char side[1];
    static integer maxb;
    static doublereal anrm;
    static integer ierr, itau, iwrk, nout, i, k, icond;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int zscal_(integer *, doublecomplex *,
                                       doublecomplex *, integer *), dlabad_(doublereal *, doublereal *);
    extern doublereal dznrm2_(integer *, doublecomplex *, integer *);
    static logical scalea;
    extern doublereal dlamch_(char *);
    static doublereal cscale;
    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
                                        doublereal *, doublereal *, integer *, integer *, doublereal *,
                                        integer *, integer *), zgebak_(char *, char *, integer *,
                                                integer *, integer *, doublereal *, integer *, doublecomplex *,
                                                integer *, integer *), zgebal_(char *, integer *,
                                                        doublecomplex *, integer *, integer *, integer *, doublereal *,
                                                        integer *);
    extern integer idamax_(integer *, doublereal *, integer *);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
                           integer *, integer *, ftnlen, ftnlen);
    static logical select[1];
    extern /* Subroutine */ int zdscal_(integer *, doublereal *,
                                        doublecomplex *, integer *);
    static doublereal bignum;
    extern doublereal zlange_(char *, integer *, integer *, doublecomplex *,
                              integer *, doublereal *);
    extern /* Subroutine */ int zgehrd_(integer *, integer *, integer *,
                                        doublecomplex *, integer *, doublecomplex *, doublecomplex *,
                                        integer *, integer *), zlascl_(char *, integer *, integer *,
                                                doublereal *, doublereal *, integer *, integer *, doublecomplex *,
                                                integer *, integer *), zlacpy_(char *, integer *,
                                                        integer *, doublecomplex *, integer *, doublecomplex *, integer *);
    static integer minwrk, maxwrk;
    static logical wantvl, wntsnb;
    static integer hswork;
    static logical wntsne;
    static doublereal smlnum;
    extern /* Subroutine */ int zhseqr_(char *, char *, integer *, integer *,
                                        integer *, doublecomplex *, integer *, doublecomplex *,
                                        doublecomplex *, integer *, doublecomplex *, integer *, integer *);
    static logical wantvr;
    extern /* Subroutine */ int ztrevc_(char *, char *, logical *, integer *,
                                        doublecomplex *, integer *, doublecomplex *, integer *,
                                        doublecomplex *, integer *, integer *, integer *, doublecomplex *,
                                        doublereal *, integer *), ztrsna_(char *, char *,
                                                logical *, integer *, doublecomplex *, integer *, doublecomplex *
                                                , integer *, doublecomplex *, integer *, doublereal *, doublereal
                                                *, integer *, integer *, doublecomplex *, integer *, doublereal *,
                                                integer *), zunghr_(integer *, integer *,
                                                        integer *, doublecomplex *, integer *, doublecomplex *,
                                                        doublecomplex *, integer *, integer *);
    static logical wntsnn, wntsnv;
    static char job[1];
    static doublereal scl, dum[1], eps;
    static doublecomplex tmp;



#define DUM(I) dum[(I)]
#define W(I) w[(I)-1]
#define SCALE(I) scale[(I)-1]
#define RCONDE(I) rconde[(I)-1]
#define RCONDV(I) rcondv[(I)-1]
#define WORK(I) work[(I)-1]
#define RWORK(I) rwork[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
#define VL(I,J) vl[(I)-1 + ((J)-1)* ( *ldvl)]
#define VR(I,J) vr[(I)-1 + ((J)-1)* ( *ldvr)]

    *info = 0;
    wantvl = lsame_(jobvl, "V");
    wantvr = lsame_(jobvr, "V");
    wntsnn = lsame_(sense, "N");
    wntsne = lsame_(sense, "E");
    wntsnv = lsame_(sense, "V");
    wntsnb = lsame_(sense, "B");
    if (! (lsame_(balanc, "N") || lsame_(balanc, "S") ||
            lsame_(balanc, "P") || lsame_(balanc, "B"))) {
        *info = -1;
    } else if (! wantvl && ! lsame_(jobvl, "N")) {
        *info = -2;
    } else if (! wantvr && ! lsame_(jobvr, "N")) {
        *info = -3;
    } else if (! (wntsnn || wntsne || wntsnb || wntsnv) || (wntsne || wntsnb)
               && ! (wantvl && wantvr)) {
        *info = -4;
    } else if (*n < 0) {
        *info = -5;
    } else if (*lda < max(1,*n)) {
        *info = -7;
    } else if (*ldvl < 1 || wantvl && *ldvl < *n) {
        *info = -10;
    } else if (*ldvr < 1 || wantvr && *ldvr < *n) {
        *info = -12;
    }

    /*     Compute workspace
            (Note: Comments in the code beginning "Workspace:" describe the
             minimal amount of workspace needed at that point in the code,
             as well as the preferred amount for good performance.
             CWorkspace refers to complex workspace, and RWorkspace to real
             workspace. NB refers to the optimal block size for the
             immediately following subroutine, as returned by ILAENV.
             HSWORK refers to the workspace preferred by ZHSEQR, as
             calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
             the worst case.) */

    minwrk = 1;
    if (*info == 0 && *lwork >= 1) {
        maxwrk = *n + *n * ilaenv_(&c__1, "ZGEHRD", " ", n, &c__1, n, &c__0,
                                   6L, 1L);
        if (! wantvl && ! wantvr) {
            /* Computing MAX */
            i__1 = 1, i__2 = *n << 1;
            minwrk = max(i__1,i__2);
            if (! (wntsnn || wntsne)) {
                /* Computing MAX */
                i__1 = minwrk, i__2 = *n * *n + (*n << 1);
                minwrk = max(i__1,i__2);
            }
            /* Computing MAX */
            i__1 = ilaenv_(&c__8, "ZHSEQR", "SN", n, &c__1, n, &c_n1, 6L, 2L);
            maxb = max(i__1,2);
            if (wntsnn) {
                /* Computing MIN
                   Computing MAX */
                i__3 = 2, i__4 = ilaenv_(&c__4, "ZHSEQR", "EN", n, &c__1, n, &
                                         c_n1, 6L, 2L);
                i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
                k = min(i__1,i__2);
            } else {
                /* Computing MIN
                   Computing MAX */
                i__3 = 2, i__4 = ilaenv_(&c__4, "ZHSEQR", "SN", n, &c__1, n, &
                                         c_n1, 6L, 2L);
                i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
                k = min(i__1,i__2);
            }
            /* Computing MAX */
            i__1 = k * (k + 2), i__2 = *n << 1;
            hswork = max(i__1,i__2);
            /* Computing MAX */
            i__1 = max(maxwrk,1);
            maxwrk = max(i__1,hswork);
            if (! (wntsnn || wntsne)) {
                /* Computing MAX */
                i__1 = maxwrk, i__2 = *n * *n + (*n << 1);
                maxwrk = max(i__1,i__2);
            }
        } else {
            /* Computing MAX */
            i__1 = 1, i__2 = *n << 1;
            minwrk = max(i__1,i__2);
            if (! (wntsnn || wntsne)) {
                /* Computing MAX */
                i__1 = minwrk, i__2 = *n * *n + (*n << 1);
                minwrk = max(i__1,i__2);
            }
            /* Computing MAX */
            i__1 = ilaenv_(&c__8, "ZHSEQR", "SN", n, &c__1, n, &c_n1, 6L, 2L);
            maxb = max(i__1,2);
            /* Computing MIN
               Computing MAX */
            i__3 = 2, i__4 = ilaenv_(&c__4, "ZHSEQR", "EN", n, &c__1, n, &
                                     c_n1, 6L, 2L);
            i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
            k = min(i__1,i__2);
            /* Computing MAX */
            i__1 = k * (k + 2), i__2 = *n << 1;
            hswork = max(i__1,i__2);
            /* Computing MAX */
            i__1 = max(maxwrk,1);
            maxwrk = max(i__1,hswork);
            /* Computing MAX */
            i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "ZUNGHR",
                                  " ", n, &c__1, n, &c_n1, 6L, 1L);
            maxwrk = max(i__1,i__2);
            if (! (wntsnn || wntsne)) {
                /* Computing MAX */
                i__1 = maxwrk, i__2 = *n * *n + (*n << 1);
                maxwrk = max(i__1,i__2);
            }
            /* Computing MAX */
            i__1 = maxwrk, i__2 = *n << 1, i__1 = max(i__1,i__2);
            maxwrk = max(i__1,1);
        }
        WORK(1).r = (doublereal) maxwrk, WORK(1).i = 0.;
    }
    if (*lwork < minwrk) {
        *info = -20;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_("ZGEEVX", &i__1);
        return 0;
    }

    /*     Quick return if possible */

    if (*n == 0) {
        return 0;
    }

    /*     Get machine constants */

    eps = dlamch_("P");
    smlnum = dlamch_("S");
    bignum = 1. / smlnum;
    dlabad_(&smlnum, &bignum);
    smlnum = sqrt(smlnum) / eps;
    bignum = 1. / smlnum;

    /*     Scale A if max element outside range [SMLNUM,BIGNUM] */

    icond = 0;
    anrm = zlange_("M", n, n, &A(1,1), lda, dum);
    scalea = FALSE_;
    if (anrm > 0. && anrm < smlnum) {
        scalea = TRUE_;
        cscale = smlnum;
    } else if (anrm > bignum) {
        scalea = TRUE_;
        cscale = bignum;
    }
    if (scalea) {
        zlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &A(1,1), lda, &
                ierr);
    }

    /*     Balance the matrix and compute ABNRM */

    zgebal_(balanc, n, &A(1,1), lda, ilo, ihi, &SCALE(1), &ierr);
    *abnrm = zlange_("1", n, n, &A(1,1), lda, dum);
    if (scalea) {
        DUM(0) = *abnrm;
        dlascl_("G", &c__0, &c__0, &cscale, &anrm, &c__1, &c__1, dum, &c__1, &
                ierr);
        *abnrm = DUM(0);
    }

    /*     Reduce to upper Hessenberg form
           (CWorkspace: need 2*N, prefer N+N*NB)
           (RWorkspace: none) */

    itau = 1;
    iwrk = itau + *n;
    i__1 = *lwork - iwrk + 1;
    zgehrd_(n, ilo, ihi, &A(1,1), lda, &WORK(itau), &WORK(iwrk), &i__1, &
            ierr);

    if (wantvl) {

        /*        Want left eigenvectors
                  Copy Householder vectors to VL */

        *(unsigned char *)side = 'L';
        zlacpy_("L", n, n, &A(1,1), lda, &VL(1,1), ldvl);

        /*        Generate unitary matrix in VL
                  (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
                  (RWorkspace: none) */

        i__1 = *lwork - iwrk + 1;
        zunghr_(n, ilo, ihi, &VL(1,1), ldvl, &WORK(itau), &WORK(iwrk), &
                i__1, &ierr);

        /*        Perform QR iteration, accumulating Schur vectors in VL
                  (CWorkspace: need 1, prefer HSWORK (see comments) )
                  (RWorkspace: none) */

        iwrk = itau;
        i__1 = *lwork - iwrk + 1;
        zhseqr_("S", "V", n, ilo, ihi, &A(1,1), lda, &W(1), &VL(1,1), ldvl, &WORK(iwrk), &i__1, info);

        if (wantvr) {

            /*           Want left and right eigenvectors
                         Copy Schur vectors to VR */

            *(unsigned char *)side = 'B';
            zlacpy_("F", n, n, &VL(1,1), ldvl, &VR(1,1), ldvr)
            ;
        }

    } else if (wantvr) {

        /*        Want right eigenvectors
                  Copy Householder vectors to VR */

        *(unsigned char *)side = 'R';
        zlacpy_("L", n, n, &A(1,1), lda, &VR(1,1), ldvr);

        /*        Generate unitary matrix in VR
                  (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
                  (RWorkspace: none) */

        i__1 = *lwork - iwrk + 1;
        zunghr_(n, ilo, ihi, &VR(1,1), ldvr, &WORK(itau), &WORK(iwrk), &
                i__1, &ierr);

        /*        Perform QR iteration, accumulating Schur vectors in VR
                  (CWorkspace: need 1, prefer HSWORK (see comments) )
                  (RWorkspace: none) */

        iwrk = itau;
        i__1 = *lwork - iwrk + 1;
        zhseqr_("S", "V", n, ilo, ihi, &A(1,1), lda, &W(1), &VR(1,1), ldvr, &WORK(iwrk), &i__1, info);

    } else {

        /*        Compute eigenvalues only
                  If condition numbers desired, compute Schur form */

        if (wntsnn) {
            *(unsigned char *)job = 'E';
        } else {
            *(unsigned char *)job = 'S';
        }

        /*        (CWorkspace: need 1, prefer HSWORK (see comments) )
                  (RWorkspace: none) */

        iwrk = itau;
        i__1 = *lwork - iwrk + 1;
        zhseqr_(job, "N", n, ilo, ihi, &A(1,1), lda, &W(1), &VR(1,1), ldvr, &WORK(iwrk), &i__1, info);
    }

    /*     If INFO > 0 from ZHSEQR, then quit */

    if (*info > 0) {
        goto L50;
    }

    if (wantvl || wantvr) {

        /*        Compute left and/or right eigenvectors
                  (CWorkspace: need 2*N)
                  (RWorkspace: need N) */

        ztrevc_(side, "B", select, n, &A(1,1), lda, &VL(1,1), ldvl,
                &VR(1,1), ldvr, n, &nout, &WORK(iwrk), &RWORK(1), &
                ierr);
    }

    /*     Compute condition numbers if desired
           (CWorkspace: need N*N+2*N unless SENSE = 'E')
           (RWorkspace: need 2*N unless SENSE = 'E') */

    if (! wntsnn) {
        ztrsna_(sense, "A", select, n, &A(1,1), lda, &VL(1,1),
                ldvl, &VR(1,1), ldvr, &RCONDE(1), &RCONDV(1), n, &nout,
                &WORK(iwrk), n, &RWORK(1), &icond);
    }

    if (wantvl) {

        /*        Undo balancing of left eigenvectors */

        zgebak_(balanc, "L", n, ilo, ihi, &SCALE(1), n, &VL(1,1), ldvl,
                &ierr);

        /*        Normalize left eigenvectors and make largest component real
        */

        i__1 = *n;
        for (i = 1; i <= *n; ++i) {
            scl = 1. / dznrm2_(n, &VL(1,i), &c__1);
            zdscal_(n, &scl, &VL(1,i), &c__1);
            i__2 = *n;
            for (k = 1; k <= *n; ++k) {
                i__3 = k + i * vl_dim1;
                /* Computing 2nd power */
                d__1 = VL(k,i).r;
                /* Computing 2nd power */
                d__2 = d_imag(&VL(k,i));
                RWORK(k) = d__1 * d__1 + d__2 * d__2;
                /* L10: */
            }
            k = idamax_(n, &RWORK(1), &c__1);
            d_cnjg(&z__2, &VL(k,i));
            d__1 = sqrt(RWORK(k));
            z__1.r = z__2.r / d__1, z__1.i = z__2.i / d__1;
            tmp.r = z__1.r, tmp.i = z__1.i;
            zscal_(n, &tmp, &VL(1,i), &c__1);
            i__2 = k + i * vl_dim1;
            i__3 = k + i * vl_dim1;
            d__1 = VL(k,i).r;
            z__1.r = d__1, z__1.i = 0.;
            VL(k,i).r = z__1.r, VL(k,i).i = z__1.i;
            /* L20: */
        }
    }

    if (wantvr) {

        /*        Undo balancing of right eigenvectors */

        zgebak_(balanc, "R", n, ilo, ihi, &SCALE(1), n, &VR(1,1), ldvr,
                &ierr);

        /*        Normalize right eigenvectors and make largest component real
         */

        i__1 = *n;
        for (i = 1; i <= *n; ++i) {
            scl = 1. / dznrm2_(n, &VR(1,i), &c__1);
            zdscal_(n, &scl, &VR(1,i), &c__1);
            i__2 = *n;
            for (k = 1; k <= *n; ++k) {
                i__3 = k + i * vr_dim1;
                /* Computing 2nd power */
                d__1 = VR(k,i).r;
                /* Computing 2nd power */
                d__2 = d_imag(&VR(k,i));
                RWORK(k) = d__1 * d__1 + d__2 * d__2;
                /* L30: */
            }
            k = idamax_(n, &RWORK(1), &c__1);
            d_cnjg(&z__2, &VR(k,i));
            d__1 = sqrt(RWORK(k));
            z__1.r = z__2.r / d__1, z__1.i = z__2.i / d__1;
            tmp.r = z__1.r, tmp.i = z__1.i;
            zscal_(n, &tmp, &VR(1,i), &c__1);
            i__2 = k + i * vr_dim1;
            i__3 = k + i * vr_dim1;
            d__1 = VR(k,i).r;
            z__1.r = d__1, z__1.i = 0.;
            VR(k,i).r = z__1.r, VR(k,i).i = z__1.i;
            /* L40: */
        }
    }

    /*     Undo scaling if necessary */

L50:
    if (scalea) {
        i__1 = *n - *info;
        /* Computing MAX */
        i__3 = *n - *info;
        i__2 = max(i__3,1);
        zlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &W(*info + 1)
                , &i__2, &ierr);
        if (*info == 0) {
            if ((wntsnv || wntsnb) && icond == 0) {
                dlascl_("G", &c__0, &c__0, &cscale, &anrm, n, &c__1, &RCONDV(
                            1), n, &ierr);
            }
        } else {
            i__1 = *ilo - 1;
            zlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &W(1), n,
                    &ierr);
        }
    }

    WORK(1).r = (doublereal) maxwrk, WORK(1).i = 0.;
    return 0;

    /*     End of ZGEEVX */

} /* zgeevx_ */