コード例 #1
0
ファイル: wave3d_check.cpp プロジェクト: arbenson/ddfmm
//---------------------------------------------------------------------
int Wave3d::check(ParVec<int, cpx, PtPrtn>& den, ParVec<int, cpx, PtPrtn>& val,
                  IntNumVec& chkkeys, double& relerr) {
#ifndef RELEASE
    CallStackEntry entry("Wave3d::check");
#endif
    SAFE_FUNC_EVAL( MPI_Barrier(MPI_COMM_WORLD) );
  
    _self = this;
    int mpirank = getMPIRank();
    ParVec<int, Point3, PtPrtn>& pos = (*_posptr);
  
    //1. get pos
    std::vector<int> all(1,1);
    std::vector<int> chkkeyvec;
    for (int i = 0; i < chkkeys.m(); ++i) {
        chkkeyvec.push_back( chkkeys(i) );
    }
    pos.getBegin(chkkeyvec, all);
    pos.getEnd(all);
    std::vector<Point3> tmpsrcpos;
    for (std::map<int,Point3>::iterator mi = pos.lclmap().begin();
        mi != pos.lclmap().end(); ++mi) {
        if(pos.prtn().owner(mi->first) == mpirank) {
            tmpsrcpos.push_back(mi->second);
        }
    }
    std::vector<cpx> tmpsrcden;
    for (std::map<int,cpx>::iterator mi = den.lclmap().begin();
        mi != den.lclmap().end(); ++mi) {
        if(den.prtn().owner(mi->first) == mpirank) {
            tmpsrcden.push_back(mi->second);
        }
    }
  
    std::vector<Point3> tmptrgpos;
    for (int i = 0; i < chkkeyvec.size(); ++i) {
        tmptrgpos.push_back( pos.access(chkkeyvec[i]) );
    }

    DblNumMat srcpos(3, tmpsrcpos.size(), false, (double*)&(tmpsrcpos[0]));
    CpxNumVec srcden(tmpsrcden.size(), false, (cpx*)&(tmpsrcden[0]));
    DblNumMat trgpos(3, tmptrgpos.size(), false, (double*)&(tmptrgpos[0]));
    CpxNumVec trgval(tmptrgpos.size());

    CpxNumMat inter;
    SAFE_FUNC_EVAL( _kernel.kernel(trgpos, srcpos, srcpos, inter) );
    // If no points were assigned to this processor, then the trgval
    // should be zero.
    if (inter.n() != 0) {
	SAFE_FUNC_EVAL( zgemv(1.0, inter, srcden, 0.0, trgval) );
    } else {
	for (int i = 0; i < trgval.m(); ++i) {
	    trgval(i) = 0;
	}
    }

    CpxNumVec allval(trgval.m());
    SAFE_FUNC_EVAL( MPI_Barrier(MPI_COMM_WORLD) );
    // Note: 2 doubles per complex number
    SAFE_FUNC_EVAL( MPI_Allreduce(trgval.data(), allval.data(), trgval.m() * 2, MPI_DOUBLE,
                                  MPI_SUM, MPI_COMM_WORLD) );
  
    //2. get val
    val.getBegin(chkkeyvec, all);  val.getEnd(all);
    CpxNumVec truval(chkkeyvec.size());
    for(int i = 0; i < chkkeyvec.size(); ++i)
        truval(i) = val.access(chkkeyvec[i]);
  
    CpxNumVec errval(chkkeyvec.size());
    for(int i = 0; i < chkkeyvec.size(); ++i)
        errval(i) = allval(i) - truval(i);
  
    double tn = sqrt( energy(truval) );
    double en = sqrt( energy(errval) );
    relerr = en / tn;
  
    SAFE_FUNC_EVAL( MPI_Barrier(MPI_COMM_WORLD) );
    return 0;
}
コード例 #2
0
ファイル: pjdher_bi.c プロジェクト: annube/tmLQCD
void pjdher_bi(int n, int lda, double tau, double tol, 
	       int kmax, int jmax, int jmin, int itmax,
	       int blksize, int blkwise, 
	       int V0dim, complex *V0, 
	       int solver_flag, 
	       int linitmax, double eps_tr, double toldecay,
	       int verbosity,
	       int *k_conv, complex *Q, double *lambda, int *it,
	       int maxmin, const int shift_mode,
	       matrix_mult_bi A_psi){
  
  /****************************************************************************
  *                                                                          *
  * Local variables                                                          *
  *                                                                          *
  ****************************************************************************/
  
  /* constants */

  /* allocatables: 
   * initialize with NULL, so we can free even unallocated ptrs */
  double *s = NULL, *resnrm = NULL, *resnrm_old = NULL, *dtemp = NULL, *rwork = NULL;

  complex *V_ = NULL, *V, *Vtmp = NULL, *U = NULL, *M = NULL, *Z = NULL,
    *Res_ = NULL, *Res,
    *eigwork = NULL, *temp1_ = NULL, *temp1;

  int *idx1 = NULL, *idx2 = NULL, 
    *convind = NULL, *keepind = NULL, *solvestep = NULL, 
    *actcorrits = NULL;

  /* non-allocated ptrs */
  complex *q, *v, *u, *r = NULL;  
/*   complex *matdummy, *vecdummy; */
  
  /* scalar vars */
  double theta, alpha, it_tol, dum;


  int i, k, j, actblksize, eigworklen, found, conv, keep, n2, N = n*sizeof(complex)/sizeof(bispinor);
  int act, cnt, idummy, info, CntCorrIts=0, endflag=0;

  /* variables for random number generator */
  int IDIST = 1;
  int ISEED[4] = {2, 3, 5, 7};
  ISEED[0] = g_proc_id;

 /****************************************************************************
  *                                                                          *
  * Of course on the CRAY everything is different :( !!                      *
  * that's why we need something more.
  *                                                                          *
  ****************************************************************************/

#ifdef CRAY
  fupl_u = _cptofcd(cupl_u, strlen(cupl_u));
  fupl_c = _cptofcd(cupl_c, strlen(cupl_c));
  fupl_n = _cptofcd(cupl_n, strlen(cupl_n));
  fupl_a = _cptofcd(cupl_a, strlen(cupl_a));
  fupl_v = _cptofcd(cupl_v, strlen(cupl_v));
  filaenv = _cptofcd(cilaenv, strlen(cilaenv));
  fvu = _cptofcd(cvu, strlen(cvu));
#endif

 /****************************************************************************
  *                                                                          *
  * Execution starts here...                                                 *
  *                                                                          *
  ****************************************************************************/

  /*   NEW PART FOR GAUGE_COPY */
#ifdef _GAUGE_COPY
  update_backward_gauge();
#endif
  /* END NEW PART */

  /* print info header */
  if (verbosity >= 2 && g_proc_id == g_stdio_proc) {
    printf("Jacobi-Davidson method for hermitian Matrices\n");
    printf("Solving  A*x = lambda*x \n\n");
    printf("  N=      %10d  ITMAX=%4d\n", n, itmax);
    printf("  KMAX=%3d  JMIN=%3d  JMAX=%3d  V0DIM=%3d\n", 
	   kmax, jmin, jmax, V0dim);
    printf("  BLKSIZE=        %2d  BLKWISE=      %5s\n", 
	   blksize, blkwise ? "TRUE" : "FALSE");
    printf("  TOL=  %11.4e TAU=  %11.4e\n", 
	   tol, tau);
    printf("  LINITMAX=    %5d  EPS_TR=  %10.3e  TOLDECAY=%9.2e\n", 
	   linitmax, eps_tr, toldecay);
    printf("\n Computing %s eigenvalues\n",
	   maxmin ? "maximal" : "minimal");
    printf("\n");
    fflush( stdout );
  }

  /* validate input parameters */
  if(tol <= 0) jderrorhandler(401,"");
  if(kmax <= 0 || kmax > n) jderrorhandler(402,"");
  if(jmax <= 0 || jmax > n) jderrorhandler(403,"");
  if(jmin <= 0 || jmin > jmax) jderrorhandler(404,"");
  if(itmax < 0) jderrorhandler(405,"");
  if(blksize > jmin || blksize > (jmax - jmin)) jderrorhandler(406,"");
  if(blksize <= 0 || blksize > kmax) jderrorhandler(406,"");
  if(blkwise < 0 || blkwise > 1) jderrorhandler(407,"");
  if(V0dim < 0 || V0dim >= jmax) jderrorhandler(408,"");
  if(linitmax < 0) jderrorhandler(409,"");
  if(eps_tr < 0.) jderrorhandler(500,"");
  if(toldecay <= 1.0) jderrorhandler(501,"");

#ifdef ESSL
  _CONE._data._re=1.; _CONE._data._im=0.;
  _CMONE._data._re=-1.; _CMONE._data._im=0.;
  _CZERO._data._re=0.; _CZERO._data._im=0.;
#endif
  CONE.re=1.; CONE.im=0.;
  CZERO.re=0.; CZERO.im=0.;
  CMONE.re=-1.; CMONE.im=0.;

  /* Get hardware-dependent values:
   * Opt size of workspace for ZHEEV is (NB+1)*j, where NB is the opt.
   * block size... */
#ifdef ESSL
  eigworklen = 8*jmax;
#else
  eigworklen = (2 + _FT(ilaenv)(&ONE, filaenv, fvu, &jmax, &MONE, &MONE, &MONE, 6, 2)) * jmax;
#endif


  /* Allocating memory for matrices & vectors */ 
#if (defined SSE || defined SSE2 || defined SSE3)
  V_ = (complex *)malloc((lda * jmax + 4) * sizeof(complex));
  V = (complex*)(((unsigned long int)(V_)+ALIGN_BASE)&~ALIGN_BASE);
#else
  V_ = (complex *)malloc(lda * jmax * sizeof(complex));
  V = V_;
#endif
  if(errno == ENOMEM) jderrorhandler(300,"V in pjdher");
  U = (complex *)malloc(jmax * jmax * sizeof(complex));
  if(errno == ENOMEM) jderrorhandler(300,"U in pjdher");
  s = (double *)malloc(jmax * sizeof(double));
  if(errno == ENOMEM) jderrorhandler(300,"s in pjdher");
#if (defined SSE || defined SSE2 || defined SSE3)
  Res_ = (complex *)malloc((lda * blksize+4) * sizeof(complex));
  Res = (complex*)(((unsigned long int)(Res_)+ALIGN_BASE)&~ALIGN_BASE);
#else
  Res_ = (complex *)malloc(lda * blksize * sizeof(complex));
  Res = Res_;
#endif
  if(errno == ENOMEM) jderrorhandler(300,"Res in pjdher");
  resnrm = (double *)malloc(blksize * sizeof(double));
  if(errno == ENOMEM) jderrorhandler(300,"resnrm in pjdher");
  resnrm_old = (double *)calloc(blksize,sizeof(double));
  if(errno == ENOMEM) jderrorhandler(300,"resnrm_old in pjdher");
  M = (complex *)malloc(jmax * jmax * sizeof(complex));
  if(errno == ENOMEM) jderrorhandler(300,"M in pjdher");
#ifdef ESSL
  Z = (complex *)malloc(jmax * jmax * sizeof(complex)); 
  if(errno == ENOMEM) jderrorhandler(300,"Z in pjdher"); 
#endif
  Vtmp = (complex *)malloc(jmax * jmax * sizeof(complex));
  if(errno == ENOMEM) jderrorhandler(300,"Vtmp in pjdher");
  p_work = (complex *)malloc(n * sizeof(complex));
  if(errno == ENOMEM) jderrorhandler(300,"p_work in pjdher");

  /* ... */
  idx1 = (int *)malloc(jmax * sizeof(int));
  if(errno == ENOMEM) jderrorhandler(300,"idx1 in pjdher");
  idx2 = (int *)malloc(jmax * sizeof(int));
  if(errno == ENOMEM) jderrorhandler(300,"idx2 in pjdher");

  /* Indices for (non-)converged approximations */
  convind = (int *)malloc(blksize * sizeof(int));
  if(errno == ENOMEM) jderrorhandler(300,"convind in pjdher");
  keepind = (int *)malloc(blksize * sizeof(int));
  if(errno == ENOMEM) jderrorhandler(300,"keepind in pjdher");
  solvestep = (int *)malloc(blksize * sizeof(int));
  if(errno == ENOMEM) jderrorhandler(300,"solvestep in pjdher");
  actcorrits = (int *)malloc(blksize * sizeof(int));
  if(errno == ENOMEM) jderrorhandler(300,"actcorrits in pjdher");

  rwork = (double *)malloc(3*jmax * sizeof(double));
  if(errno == ENOMEM) jderrorhandler(300,"rwork in pjdher");

  eigwork = (complex *)malloc(eigworklen * sizeof(complex));
  if(errno == ENOMEM) jderrorhandler(300,"eigwork in pjdher");

#if (defined SSE || defined SSE2 || defined SSE3)
  temp1_ = (complex *)malloc((lda+4) * sizeof(complex));
  temp1 = (complex*)(((unsigned long int)(temp1_)+ALIGN_BASE)&~ALIGN_BASE);
#else
  temp1_ = (complex *)malloc(lda * sizeof(complex));
  temp1 = temp1_;
#endif
  if(errno == ENOMEM) jderrorhandler(300,"temp1 in pjdher");
  dtemp = (double *)malloc(n * sizeof(complex));
  if(errno == ENOMEM) jderrorhandler(300,"dtemp in pjdher");

  /* Set variables for Projection routines */
  n2 = 2*n;
  p_n = n;
  p_n2 = n2;
  p_Q = Q;
  p_A_psi = A_psi;
  p_lda = lda;


  /**************************************************************************
   *                                                                        *
   * Generate initial search subspace V. Vectors are taken from V0 and if   *
   * necessary randomly generated.                                          *
   *                                                                        *
   **************************************************************************/

  /* copy V0 to V */
  _FT(zlacpy)(fupl_a, &n, &V0dim, V0, &lda, V, &lda, 1);
  j = V0dim;
  /* if V0dim < blksize: generate additional random vectors */
  if (V0dim < blksize) {
    idummy = (blksize - V0dim)*n; /* nof random numbers */
    _FT(zlarnv)(&IDIST, ISEED, &idummy, V + V0dim*lda);
    j = blksize;
  }
  for (cnt = 0; cnt < j; cnt ++) {
    pModifiedGS_bi(V + cnt*lda, n, cnt, V, lda);
    alpha = sqrt(square_norm_bi((bispinor*)(V+cnt*lda), N));
    alpha = 1.0 / alpha;
#ifdef ESSL
    dscal(n2, alpha, (double *)(V + cnt*lda), 1);
#else
    _FT(dscal)(&n2, &alpha, (double *)(V + cnt*lda), &ONE);
#endif
  }

  /* Generate interaction matrix M = V^dagger*A*V. Only the upper triangle
     is computed. */
  /* Store M at the moment locally */
  for (cnt = 0; cnt < j; cnt++){
    A_psi((bispinor*) temp1, (bispinor*) (V+cnt*lda));
    idummy = cnt+1;
    for(i = 0; i < idummy; i++){
      M[cnt*jmax+i] = scalar_prod_bi((bispinor*) (V+i*lda), (bispinor*) temp1, N);
    }
  }

  /* Other initializations */
  k = 0; (*it) = 0; 
  /* if we are in a restart situation */
  if((*k_conv) > 0) {
    k = (*k_conv);
  }

  actblksize = blksize; 
  for(act = 0; act < blksize; act ++){
    solvestep[act] = 1;
  }


 /****************************************************************************
  *                                                                          *
  * Main JD-iteration loop                                                   *
  *                                                                          *
  ****************************************************************************/

  while((*it) < itmax) {
    /* Iteration loop */
  
 /****************************************************************************
  *                                                                          *
  * Solving the projected eigenproblem                                       *
  *                                                                          *
  * M*u = V^dagger*A*V*u = s*u                                               *
  * M is hermitian, only the upper triangle is stored                        *
  *                                                                          *
  ****************************************************************************/

#ifdef ESSL
    info = 0;
    for(act = 0; act < j; act++) {
      for(cnt = 0; cnt <= act; cnt++) {
	Z[info] = M[act*jmax+cnt];
	info++;
      }
    }
/*     _FT(zlacpy)(fupl_u, &j, &j, M, &jmax, Z, &j, 1); */
#else
    _FT(zlacpy)(fupl_u, &j, &j, M, &jmax, U, &jmax, 1);
#endif
#ifdef ESSL
    zhpev(21, (dcmplx*) Z, s, U, jmax, j, rwork, eigworklen);
    info = 0;
#else
    _FT(zheev)(fupl_v, fupl_u, &j, U, &jmax, s, eigwork, &eigworklen, rwork, &info, 1, 1);
#endif

    if (info != 0) {
      printf("error solving the projected eigenproblem.");
      printf(" zheev: info = %d\n", info);
    }
    if(info != 0) jderrorhandler(502,"problem in zheev");

    /* Reverse order of eigenvalues if maximal value is needed */
    if(maxmin == 1){
      sorteig(j, s, U, jmax, s[j-1], dtemp, idx1, idx2, 0); 
    }
    else{
      sorteig(j, s, U, jmax, 0., dtemp, idx1, idx2, 0); 
    }

 /****************************************************************************
  *                                                                          *
  * Convergence/Restart Check                                                *
  *                                                                          *
  * In case of convergence, strip off a whole block or just the converged    *
  * ones and put 'em into Q.  Update the matrices Q, V, U, s                 *
  *                                                                          *
  * In case of a restart update the V, U and M matrices and recompute the    *
  * Eigenvectors                                                             *
  *                                                                          *
  ****************************************************************************/

    found = 1;
    while(found) {

      /* conv/keep = Number of converged/non-converged Approximations */
      conv = 0; keep = 0;

      for(act=0; act < actblksize; act++){

	/* Setting pointers for single vectors */
	q = Q + (act+k)*lda; 
	u = U + act*jmax; 
	r = Res + act*lda; 
	
	/* Compute Ritz-Vector Q[:,k+cnt1]=V*U[:,cnt1] */
	theta = s[act];
#ifdef ESSL
	zgemv(fupl_n, n, j, _CONE, V, lda, (dcmplx*)u, 1, _CZERO, (dcmplx*)q, 1);
#else
	_FT(zgemv)(fupl_n, &n, &j, &CONE, V, &lda, u, &ONE, &CZERO, q, &ONE, 1);
#endif

	/* Compute the residual */
	A_psi((bispinor*) r, (bispinor*) q); 

	theta = -theta;
#ifdef ESSL
	daxpy(n2, theta, (double*) q, 1, (double*) r, 1);
#else
	_FT(daxpy)(&n2, &theta, (double*) q, &ONE, (double*) r, &ONE);
#endif
	/* Compute norm of the residual and update arrays convind/keepind*/
	resnrm_old[act] = resnrm[act];
	resnrm[act] = sqrt(square_norm_bi((bispinor*) r, N));
	if (resnrm[act] < tol){
	  convind[conv] = act; 
	  conv = conv + 1; 
	}
	else{
	  keepind[keep] = act; 
	  keep = keep + 1; 
	}
	
      }  /* for(act = 0; act < actblksize; act ++) */

      /* Check whether the blkwise-mode is chosen and ALL the
	 approximations converged, or whether the strip-off mode is
	 active and SOME of the approximations converged */

      found = ((blkwise==1 && conv==actblksize) || (blkwise==0 && conv!=0)) 
	&& (j > actblksize || k == kmax - actblksize);
      
  /***************************************************************************
   *                                                                        *
   * Convergence Case                                                       *
   *                                                                        *
   * In case of convergence, strip off a whole block or just the converged  *
   * ones and put 'em into Q.  Update the matrices Q, V, U, s               *
   *                                                                        *
   **************************************************************************/

      if (found) {

	/* Store Eigenvalues */
	for(act = 0; act < conv; act++)
	  lambda[k+act] = s[convind[act]];
	 
	/* Re-use non approximated Ritz-Values */
	for(act = 0; act < keep; act++)
	  s[act] = s[keepind[act]];

	/* Shift the others in the right position */
	for(act = 0; act < (j-actblksize); act ++)
	  s[act+keep] = s[act+actblksize];

	/* Update V. Re-use the V-Vectors not looked at yet. */
	idummy = j - actblksize;
	for (act = 0; act < n; act = act + jmax) {
	  cnt = act + jmax > n ? n-act : jmax;
	  _FT(zlacpy)(fupl_a, &cnt, &j, V+act, &lda, Vtmp, &jmax, 1);
#ifdef ESSL
	  zgemm(fupl_n, fupl_n, cnt, idummy, j, _CONE, Vtmp, 
		jmax,  U+actblksize*jmax, jmax, _CZERO,  V+act+keep*lda, lda);
#else
	  _FT(zgemm)(fupl_n, fupl_n, &cnt, &idummy, &j, &CONE, Vtmp, 
		     &jmax, U+actblksize*jmax, &jmax, &CZERO, V+act+keep*lda, &lda, 1, 1);
#endif
	}

	/* Insert the not converged approximations as first columns in V */
	for(act = 0; act < keep; act++){
	  _FT(zlacpy)(fupl_a, &n, &ONE, Q+(k+keepind[act])*lda, &lda, V+act*lda, &lda, 1);
	}

	/* Store Eigenvectors */
	for(act = 0; act < conv; act++){
	  _FT(zlacpy)(fupl_a, &n, &ONE, Q+(k+convind[act])*lda, &lda, Q+(k+act)*lda, &lda, 1);
	}

	/* Update SearchSpaceSize j */
	j = j - conv;

	/* Let M become a diagonalmatrix with the Ritzvalues as entries ... */ 
	_FT(zlaset)(fupl_u, &j, &j, &CZERO, &CZERO, M, &jmax, 1);
	for (act = 0; act < j; act++){
	  M[act*jmax + act].re = s[act];
	}
	
	/* ... and U the Identity(jnew,jnew) */
	_FT(zlaset)(fupl_a, &j, &j, &CZERO, &CONE, U, &jmax, 1);

	if(shift_mode == 1){
	  if(maxmin == 0){
	    for(act = 0; act < conv; act ++){
	      if (lambda[k+act] > tau){
		tau = lambda[k+act];
	      }
	    }
	  }
	  else{
	    for(act = 0; act < conv; act ++){
	      if (lambda[k+act] < tau){
		tau = lambda[k+act];
	      }
	    } 
	  }
	}

	/* Update Converged-Eigenpair-counter */
	k = k + conv;

	/* Update the new blocksize */
	actblksize=min(blksize, kmax-k);

	/* Exit main iteration loop when kmax eigenpairs have been
           approximated */
	if (k == kmax){
	  endflag = 1;
	  break;
	}
	/* Counter for the linear-solver-accuracy */
	for(act = 0; act < keep; act++)
	  solvestep[act] = solvestep[keepind[act]];
	/* Now we expect to have the next eigenvalues */
	/* allready with some accuracy                */
	/* So we do not need to start from scratch... */
	for(act = keep; act < blksize; act++)
	  solvestep[act] = 1;

      } /* if(found) */
      if(endflag == 1){
	break;
      }
  /**************************************************************************
   *                                                                        *
   * Restart                                                                *
   *                                                                        *
   * The Eigenvector-Aproximations corresponding to the first jmin          *
   * Ritz-Vectors are kept.                                                 *
   *                                                                        *
   **************************************************************************/
      if (j+actblksize > jmax) {

	idummy = j; j = jmin;

	for (act = 0; act < n; act = act + jmax) { /* V = V * U(:,1:j) */
	  cnt = act+jmax > n ? n-act : jmax;
	  _FT(zlacpy)(fupl_a, &cnt, &idummy, V+act, &lda, Vtmp, &jmax, 1);
#ifdef ESSL
	  zgemm(fupl_n, fupl_n, cnt, j, idummy, _CONE, Vtmp,
		jmax, U, jmax, _CZERO, V+act, lda);
#else
	  _FT(zgemm)(fupl_n, fupl_n, &cnt, &j, &idummy, &CONE, Vtmp, 
		     &jmax, U, &jmax, &CZERO, V+act, &lda, 1, 1);
#endif
	}
	  
	_FT(zlaset)(fupl_a, &j, &j, &CZERO, &CONE, U, &jmax, 1);
	_FT(zlaset)(fupl_u, &j, &j, &CZERO, &CZERO, M, &jmax, 1);
	for (act = 0; act < j; act++)
	  M[act*jmax + act].re = s[act];
      }

    } /* while(found) */    

    if(endflag == 1){
      break;
    }

 /****************************************************************************
  *                                                                          *
  * Solving the correction equations                                         *
  *                                                                          *
  *                                                                          *
  ****************************************************************************/

    /* Solve actblksize times the correction equation ... */
    for (act = 0; act < actblksize; act ++) {      

      /* Setting start-value for vector v as zeros(n,1). Guarantees
         orthogonality */
      v = V + j*lda;
      for (cnt = 0; cnt < n; cnt ++){ 
	v[cnt].re = 0.;
	v[cnt].im = 0.;
      }

      /* Adaptive accuracy and shift for the lin.solver. In case the
	 residual is big, we don't need a too precise solution for the
	 correction equation, since even in exact arithmetic the
	 solution wouldn't be too usefull for the Eigenproblem. */
      r = Res + act*lda;

      if (resnrm[act] < eps_tr && resnrm[act] < s[act] && resnrm_old[act] > resnrm[act]){
	p_theta = s[act];
      }
      else{
	p_theta = tau;
      }
      p_k = k + actblksize;

      /* compute the new accuracy to used in the solver */
      /* if we are in blockwise mode, we do not want to */
      /* iterate solutions much more, if they have      */
      /* allready the desired precision                 */
      if(blkwise == 1 && resnrm[act] < tol) {
	it_tol = pow(toldecay, (double)(-5));
      }
      else {
	it_tol = pow(toldecay, (double)(-solvestep[act]));
      }
      solvestep[act] = solvestep[act] + 1;

      /* equation and project if necessary */
      pModifiedGS_bi(r, n, k + actblksize, Q, lda);

      i = g_sloppy_precision_flag;
      g_sloppy_precision = 1;
      g_sloppy_precision_flag = 1;
      /* Solve the correction equation ... */
      if (solver_flag == BICGSTAB){
	info = bicgstab_complex_bi((bispinor*) v, (bispinor*) r, linitmax, it_tol*it_tol, g_relative_precision_flag, VOLUME/2, &pProj_A_psi_bi);
      }
      else if(solver_flag == CG){ 
	info = cg_her_bi((bispinor*) v, (bispinor*) r, linitmax, it_tol*it_tol, g_relative_precision_flag, VOLUME/2, &pProj_A_psi_bi, 0, 0); 
      } 
      else{
	info = cg_her_bi((bispinor*) v, (bispinor*) r, linitmax, it_tol*it_tol, g_relative_precision_flag, VOLUME/2, &pProj_A_psi_bi, 0, 0); 
      }
      
      g_sloppy_precision = 0;
      g_sloppy_precision_flag = i;
      /* Actualizing profiling data */
      if (info == -1){
	CntCorrIts += linitmax;
      }
      else{
	CntCorrIts += info;
      }
      actcorrits[act] = info;

      /* orthonormalize v to Q, cause the implicit
	 orthogonalization in the solvers may be too inaccurate. Then
	 apply "IteratedCGS" to prevent numerical breakdown 
         in order to orthogonalize v to V */

      pModifiedGS_bi(v, n, k+actblksize, Q, lda);

      pIteratedClassicalGS_bi(v, &alpha, n, j, V, temp1, lda);

      alpha = 1.0 / alpha;
#ifdef ESSL
      dscal(n2, alpha, (double*) v, 1);
#else
      _FT(dscal)(&n2, &alpha, (double*) v, &ONE);
#endif      
      /* update interaction matrix M */
      A_psi((bispinor*) temp1, (bispinor*) v);
      idummy = j+1;
      for(i = 0; i < idummy; i++){
	M[j*jmax+i] = scalar_prod_bi((bispinor*) (V+i*lda), (bispinor*) temp1, N);
      }

      /* Increasing SearchSpaceSize j */
      j ++;
    }   /* for (act = 0;act < actblksize; act ++) */    

    /* Print information line */
    if(g_proc_id == g_stdio_proc){
      print_status(verbosity, *it, k, j - blksize, kmax, blksize, actblksize, 
		   s, resnrm, actcorrits);    
    }

    /* Increase iteration-counter for outer loop  */
    (*it) = (*it) + 1;

  } /* Main iteration loop */
  
  /******************************************************************
   *                                                                *
   * Eigensolutions converged or iteration limit reached            *
   *                                                                *
   * Print statistics. Free memory. Return.                         *
   *                                                                *
   ******************************************************************/

  (*k_conv) = k;
  if (verbosity >= 1) {
    if(g_proc_id == g_stdio_proc){
      printf("\nJDHER execution statistics\n\n");
      printf("IT_OUTER=%d   IT_INNER_TOT=%d   IT_INNER_AVG=%8.2f\n",
	     (*it), CntCorrIts, (double)CntCorrIts/(*it));
      printf("\nConverged eigensolutions in order of convergence:\n");
      printf("\n  I              LAMBDA(I)      RES(I)\n");
      printf("---------------------------------------\n");
    }
    for (act = 0; act < *k_conv; act ++) {
      /* Compute the residual for solution act */
      q = Q + act*lda;
      theta = -lambda[act];
      A_psi((bispinor*) r, (bispinor*) q);
#ifdef ESSL
      daxpy(n2, theta, (double*) q, 1, (double*) r, 1);
#else
      _FT(daxpy)(&n2, &theta, (double*) q, &ONE, (double*) r, &ONE);
#endif
      alpha = sqrt(square_norm_bi((bispinor*) r, N));
      if(g_proc_id == g_stdio_proc){ 
	printf("%3d %22.15e %12.5e\n", act+1, lambda[act],
	       alpha);
/* 	       _FT(dnrm2)(&n2, (double*) r, &ONE)); */
      } 
    }
    if(g_proc_id == g_stdio_proc){ 
      printf("\n");
      fflush( stdout );
    }
  }

  free(V_); free(Vtmp); free(U); 
  free(s); free(Res_); free(resnrm); 
  free(M); free(Z);
  free(eigwork); free(temp1_);
  free(dtemp); free(rwork);
  free(p_work);
  free(idx1); free(idx2); 
  free(convind); free(keepind); free(solvestep); free(actcorrits);
  
} /* jdher(.....) */