Beispiel #1
0
void ComputeFinalEvecs_Z
    ( int nvecs, int n, Complex_Z *evecsl, int ldvl, Complex_Z *evecsr, int ldvr, Complex_Z *H, int ldh, char SRT_OPT, 
      double epsi, Complex_Z *evals, double *ernorms, double *xlnorms, double *xrnorms, Complex_Z *angles, 
      void (*matvec)(void *, void *, void *), void *params, Complex_Z *work, int worksize)
{

     Complex_Z *tmpH,*COEFL,*COEFR,*Res;
     int i,allelems,infoCG,ONE=1;
     

     tmpH = (Complex_Z *) calloc(ldh*nvecs,sizeof(Complex_Z));
     if(tmpH == NULL){
        printf("ERROR: not able to allocate tmpH in ComputeFinalEvecs\n");
        exit(2);}


     COEFL = (Complex_Z *) calloc(nvecs*nvecs,sizeof(Complex_Z));
     if(COEFL == NULL){
        printf("ERROR: not able to allocate COEFL in ComputeFinalEvecs\n");
        exit(2);}

     COEFR = (Complex_Z *) calloc(nvecs*nvecs,sizeof(Complex_Z));
     if(COEFR == NULL){
        printf("ERROR: not able to allocate COEFR in ComputeFinalEvecs\n");
        exit(2);} 

     Res = (Complex_Z *) calloc(n,sizeof(Complex_Z));
     if(Res == NULL){
         printf("ERROR: not able to allocate Res in ComputeFinalEvecs\n");
         exit(2);}


     allelems=ldh*nvecs;
     BLAS_ZCOPY(&allelems,H,&ONE,tmpH,&ONE);

     //void CG_eval(Complex_C *A, int N, int LDA, Complex_C *W, char SRT_OPT, 
     //        Complex_C *VL, int LDVL, Complex_C *VR, int LDVR, int *info);

     ZG_eval(tmpH,nvecs,ldh,evals,SRT_OPT,epsi,COEFL,nvecs,COEFR,nvecs,&infoCG);

     if(infoCG != 0){
         printf("ERROR: CG_eval inside ComputeFinalEvecs returned with flag %d\n",infoCG);
         exit(3);}
        
      
     //void restart_X(Complex_C *X, int ldx, Complex_C *hVecs, int nLocal, 
     //          int basisSize, int restartSize, Complex_C *rwork, int rworkSize) 


     Zrestart_X(evecsl, ldvl, COEFL, n, nvecs, nvecs, work, worksize);
     Zrestart_X(evecsr, ldvr, COEFR, n, nvecs, nvecs, work, worksize); 


    //No need to biorthogonalize because evecsl and evecsr were orginally biorthogonal 
    //and COEFL and COEFR comes out of CG_eval as biorthogonal.

    //Compute Ritz values, residual norms, etc.
    for(i=0; i < nvecs; i++){
       //void computeResNorm( Complex_C *xr, Complex_C *xl, Complex_C *lambda, float *rnorm, int n, Complex_C *Res, 
       //float *xlnorm, float *xrnorm, Complex_C *cangle, void (*matvec)(void *, void *, void *), void *params)
       ZcomputeResNorm(&evecsr[i*ldvr], &evecsl[i*ldvl],&evals[i],&ernorms[i],n,Res,&xlnorms[i],&xrnorms[i],
                      &angles[i],matvec,params);}

 

return;

}
Beispiel #2
0
void eigcg(int n, int lde, spinor * const x, spinor * const b, double *normb, 
           const double eps_sq, double restart_eps_sq, const int rel_prec, int maxit, int *iter, 
           double *reshist, int *flag, spinor **work, matrix_mult f, 
           int nev, int v_max, spinor *V, int esize, _Complex double *ework)
{
  double tolb;        
  double alpha, beta; /* CG scalars */
  double rho, rhoprev;
  double pAp;
  int it;   /* current iteration number */
  int i, j; /* loop variables */
  int zs,ds,tmpsize;
  spinor *r, *p, *Ap;   /* ptrs in work for CG vectors */
  _Complex double tempz;        /* double precision complex temp var */
  double tempd;         /* double temp var */
  int tempi;            /* int temp var */
  int ONE = 1;          /* var for passing 1 into BLAS routines */
  /*----------------------------------------------------------------------
         Eigen variables and setup    
    ----------------------------------------------------------------------*/
  /* Some constants */
  char cR = 'R'; char cL = 'L'; char cN ='N'; 
  char cV = 'V'; char cU = 'U'; char cC ='C';
  double betaprev, alphaprev;     /* remember the previous iterations scalars */
  int v_size;                     /* tracks the size of V */
  int lwork = 3*v_max;            /* the size of zwork */
  spinor *Ap_prev;
  void *_h;     
  _Complex double *H;         /* the V'AV projection matrix */
  void *_hevecs;
  _Complex double *Hevecs;    /* the eigenvectors of H */
  void *_hevecsold;
  _Complex double *Hevecsold; /* the eigenvectors of H(v_max-1,v_max-1) */
  void *_hevals;
  double    *Hevals;    /* the eigenvalues of H */
  void *_hevalsold;
  double    *Hevalsold; /* the eigenvalues of H(m-1,m-1) */
  void *_tau;
  _Complex double *TAU;	         
  void *_zwork;
  _Complex double *zwork;        /* double complex work array needed by zheev */
  void *_rwork;
  double *rwork;        /* double work array needed by zheev */

  int parallel;
  
  double tmpd;
  _Complex double tmpz;

  zs = sizeof(_Complex double);  
  ds = sizeof(double);

  int info, allelems = v_max*v_max;
  
#ifdef MPI
  parallel=1;
#else
  parallel=0;
#endif

  if(nev > 0)   /*allocate memory only if eigenvalues will be used */
  {
    #if (defined SSE || defined SSE2 || defined SSE3)
    if ((_h = calloc(v_max*v_max+ALIGN_BASE,zs)) == NULL)
    {
      if( g_proc_id == g_stdio_proc) 
      {fprintf(stderr,"ERROR Could not allocate H\n"); exit(1);}  
    }
    else
      H = (_Complex double *)(((unsigned long int)(_h)+ALIGN_BASE)&~ALIGN_BASE);
  
  
    if ((_hevecs = calloc(v_max*v_max+ALIGN_BASE,zs)) == NULL)
    {
      if( g_proc_id == g_stdio_proc ) 
      {fprintf(stderr, "ERROR Could not allocate Hevecs\n"); exit(1);}
    }else
      Hevecs = (_Complex double *)(((unsigned long int)(_hevecs)+ALIGN_BASE)&~ALIGN_BASE);
  
    if ((_hevecsold = calloc(v_max*v_max+ALIGN_BASE,zs)) == NULL)
    {
      if( g_proc_id == g_stdio_proc ) 
        {fprintf(stderr, "ERROR Could not allocate Hevecsold\n"); exit(1);}  
    }else
      Hevecsold = (_Complex double *)(((unsigned long int)(_hevecsold)+ALIGN_BASE)&~ALIGN_BASE);
  
    if ((_hevals = calloc(v_max+ALIGN_BASE,ds)) == NULL)
    {
      if( g_proc_id == g_stdio_proc) 
        {fprintf(stderr, "ERROR Could not allocate Hevals\n"); exit(1);}
    
    }else
      Hevals = (double *)(((unsigned long int)(_hevals)+ALIGN_BASE)&~ALIGN_BASE);
  
    if ((_hevalsold = calloc(v_max+ALIGN_BASE,ds)) == NULL) 
    {
      if( g_proc_id == g_stdio_proc)
        {fprintf(stderr, "ERROR Could not allocate Hevalsold\n"); exit(1); }
    
    }else
      Hevalsold = (double *)(((unsigned long int)(_hevalsold)+ALIGN_BASE)&~ALIGN_BASE);
  
    if ((_tau = calloc(2*nev+ALIGN_BASE,zs)) == NULL)  
    {
      if( g_proc_id == g_stdio_proc ) 
        {fprintf(stderr, "ERROR Could not allocate TAU\n"); exit(1); }
    
    }else
      TAU = (_Complex double *)(((unsigned long int)(_tau)+ALIGN_BASE)&~ALIGN_BASE);
  
    if ((_zwork = calloc(lwork+ALIGN_BASE,zs)) == NULL)   
    {
      if( g_proc_id == g_stdio_proc)
      {fprintf(stderr, "ERROR Could not allocate zwork\n"); exit(1);}
    
    }else
      zwork = (_Complex double *)(((unsigned long int)(_zwork)+ALIGN_BASE)&~ALIGN_BASE);
  
    if ((_rwork = calloc(3*v_max+ALIGN_BASE,ds)) == NULL) 
    {
      if( g_proc_id == g_stdio_proc)
        {fprintf(stderr, "ERROR Could not allocate rwork\n"); exit(1);}
    
    }else
      rwork = (double *)(((unsigned long int)(_rwork)+ALIGN_BASE)&~ALIGN_BASE);
  
    #else
  
    if ((H = (_Complex double *) calloc(v_max*v_max, zs)) == NULL)
    {
      if( g_proc_id == g_stdio_proc) 
        {fprintf(stderr, "ERROR Could not allocate H\n"); exit(1);}
    }

    if ((Hevecs = (_Complex double *) calloc(v_max*v_max, zs)) == NULL)
    {
      if( g_proc_id == g_stdio_proc ) 
        {fprintf(stderr, "ERROR Could not allocate Hevecs\n"); exit(1);}
    }

    if ((Hevecsold = (_Complex double *) calloc(v_max*v_max, zs)) == NULL)
    {
      if( g_proc_id == g_stdio_proc ) 
      {fprintf(stderr, "ERROR Could not allocate Hevecsold\n"); exit(1);}
    }

    if ((Hevals = (double *) calloc(v_max, ds)) == NULL)
    {
      if( g_proc_id == g_stdio_proc) 
        {fprintf(stderr, "ERROR Could not allocate Hevals\n"); exit(1);}
    }
     

    if ((Hevalsold = (double *) calloc(v_max, ds)) == NULL) 
    {
      if( g_proc_id == g_stdio_proc)
        {fprintf(stderr, "ERROR Could not allocate Hevalsold\n"); exit(1); }
    }


    if ((TAU = (_Complex double *) calloc(2*nev, zs)) == NULL)
    {
      if( g_proc_id == g_stdio_proc ) 
       {fprintf(stderr, "ERROR Could not allocate TAU\n"); exit(1); }
    
    }
  
  
    if ((zwork = (_Complex double *) calloc(lwork, zs)) == NULL) 
    {
      if( g_proc_id == g_stdio_proc)
      {fprintf(stderr, "ERROR Could not allocate zwork\n"); exit(1);}
    
    }
  
    if ((rwork = (double *) calloc(3*v_max, ds)) == NULL) 
    {
      if( g_proc_id == g_stdio_proc)
      {fprintf(stderr, "ERROR Could not allocate rwork\n"); exit(1);}
    
    }

    #endif 
  } /* end if (nev > 0) */  

  /*----------------------------------------------------------------------*/

  /* setup pointers into work */
  r = work[0];
  p = work[1];
  Ap = work[2];
  Ap_prev = work[3];
  


  /*--------------------------------------------------------------------
     Initialization phase 
    --------------------------------------------------------------------*/
  
  if (*flag != 3) 
  {
    
    /* If flag == 3, the eigCG is called after restart with the same b 
     * whose norm is already known in normb, so no need for these    */
    
    tempd = square_norm(b,n,parallel); /* Norm of rhs, b */
    *normb = sqrt(tempd);

    /* If right hand side is zero return zero solution. ITER stays the same */
    if (*normb == 0.0) 
    {
      for (i=0; i<n; i++) 
      {
	_vector_null(x[i].s0);
        _vector_null(x[i].s1);
        _vector_null(x[i].s2);
        _vector_null(x[i].s3);
      }       
    
      *flag = 0;		
      *reshist = 0.0;
      if( g_debug_level > 0 && g_proc_id == g_stdio_proc)
        displayInfo(eps_sq,maxit,*flag,*iter,*reshist);
      return;
     }
     
  }
  
  /* Set up for the method */
  *flag = 1;
  tolb = eps_sq * (*normb)*(*normb);	/* Relative to b tolerance */

  /* Zero-th residual: r = b - A*x  */
  f(r,x);
  diff(r,b,r,n);
  
  rho = 0.0;
  alpha = 1.0;
  beta = 0.0;
  v_size = 0;

  double reshist_init=square_norm(r,n,parallel);

  //if( g_proc_id == g_stdio_proc )
    //fprintf(stdout, "reshist init %f\n", reshist_init);
  
  /*--------------------------------------------------------------------
     main CG loop
    --------------------------------------------------------------------*/
  for (it = 0; it < maxit; it++) {
   
    rhoprev = rho;
    rho=square_norm(r,n,parallel);
    *reshist = rho;
    if ( (g_debug_level > 2) && (g_proc_id == g_stdio_proc) )
    { fprintf(stdout, " Linsys res( %d ): %g\n",*iter+it,*reshist); fflush(stdout); }

    /* Convergence test */
    if ( ( (*reshist < eps_sq) && (rel_prec==0) ) || ( (*reshist < eps_sq*(*normb)*(*normb)) && (rel_prec ==1 ) )   ) 
    { 
       *flag = 0;
       break;  /* break do not return */
    }
    
    /* Restart test */
    if(nev==0)
    {
       if (*reshist < (restart_eps_sq*reshist_init) ) 
       {  
           *flag = 3;
            break;  /* break do not return */
       }
    }

    if (it == 0)
      assign(p,r,n);
    else {
      betaprev = beta;
      beta = rho / rhoprev;
      if (beta == 0.0) {
	       *flag = 2;
	       break;
      }
      assign_mul_add_r(p,beta,r,n); /* p = beta*p + r */
    }

    /*----- eigCG specific code -------------------------------------------*/
    /* Remember Ap from previous iteration to be used at restart */
    if (nev > 0 && v_size == v_max)
      assign(Ap_prev,Ap,n); 
    /*---------------------------------------------------------------------*/

    f(Ap,p);

    /*----- eigCG specific code -------------------------------------------*/
    if (nev > 0) {
      /* record the diagonal vAv for the previous vector */
      if (it > 0) {
	H[(v_size-1)*v_max+v_size-1]= 1.0/alpha + betaprev/alphaprev;
	//H[(v_size-1)*v_max+v_size-1].im = 0.0;
      }
      
      /* Restarting V */
      if (v_size == v_max) {
	/* Solve (v_max) and (v_max-1) eigenproblems */
	tempi = v_max;
	allelems=v_max*v_max;
	_FT(zcopy)(&allelems, H, &ONE, Hevecs, &ONE);
	_FT(zheev)(&cV,&cU,&tempi,Hevecs,&v_max,Hevals,zwork,&lwork,rwork,&info,1,1);
	if( (info != 0 ) && (g_proc_id==g_stdio_proc))
	{fprintf(stderr, "Error: ZHEEV in eigcg at v_max step, info %d\n",info); exit(1);}
	
	tempi = v_max-1;
	_FT(zcopy)(&allelems, H, &ONE, Hevecsold, &ONE);
	_FT(zheev)(&cV,&cU,&tempi,Hevecsold,&v_max,Hevalsold,zwork,&lwork,rwork,&info,1,1);
	       
	if( (info != 0 ) && (g_proc_id==g_stdio_proc))
	{fprintf(stderr, "Error: ZHEEV in eigcg at (v_max-1) step, info %d\n",info); exit(1);}
	       
	
	/* fill 0s in vmax-th elem of oldevecs to match Hevecs */
	for(i=1; i <= v_max ; i++)
	{Hevecsold[i*v_max-1] = 0.0 ;}

	/* Attach the first nev oldevecs at the end of the nev latest ones */
	tempi = nev*v_max;
	_FT(zcopy)(&tempi,Hevecsold,&ONE,&Hevecs[tempi],&ONE);

        /* Orthogonalize the 2*nev (new+old) vectors Hevecs=QR */
	v_size = 2*nev; 
	_FT(zgeqrf)(&v_max,&v_size,Hevecs,&v_max,TAU,zwork,&lwork,&info) ;
 
	if( (info != 0 ) && (g_proc_id==g_stdio_proc))
	{fprintf(stderr, "Error: ZGEQRF in eigcg info %d\n",info); exit(1);}
	
	/* use as a temp space Hevecsold = Q^THQ */
	_FT(zcopy)(&allelems,H,&ONE,Hevecsold,&ONE); 
	_FT(zunmqr)(&cR,&cN,&v_max,&v_max,&v_size,Hevecs,&v_max,
		               TAU,Hevecsold,&v_max,zwork,&lwork,&info);
	
	if( (info != 0 ) && (g_proc_id==g_stdio_proc))
	{fprintf(stderr, "Error: ZGEQRF call 1 in eigcg info %d\n",info); exit(1);}
	
	_FT(zunmqr)(&cL,&cC,&v_max,&v_size,&v_size,Hevecs,&v_max,
		               TAU,Hevecsold,&v_max,zwork,&lwork,&info);
	
	if( (info != 0 ) && (g_proc_id==g_stdio_proc))
	{fprintf(stderr, "Error: ZGEQRF call 2 in eigcg info %d\n",info); exit(1);}

        /* solve the small Hevecsold v_size x v_size eigenproblem */
	_FT(zheev)(&cV,&cU,&v_size,Hevecsold,&v_max,Hevals, zwork,&lwork,rwork,&info,1,1);
	if( (info != 0 ) && (g_proc_id==g_stdio_proc))
	{fprintf(stderr, "Error: ZHEEV in eigcg info %d\n",info); exit(1);}



	/* zero out unused part of eigenectors in Hevecsold */
	tempi = 0;
	for(i = 0; i < v_size; i++ ) 
	{
	  for(j = v_size; j < v_max; j++)
	  {Hevecsold[tempi + j]=0.0;}
	  tempi += v_max;
	  
	}


	/* Compute the Hevecsold = Hevecs*Hevecsold */
	_FT(zunmqr)(&cL,&cN,&v_max,&v_size,&v_size,Hevecs,&v_max,
		               TAU,Hevecsold,&v_max,zwork,&lwork,&info);

	           
	if( (info != 0 ) && (g_proc_id==g_stdio_proc))
	{fprintf(stderr, "Error: ZUNMQR, info %d\n",info); exit(1);}   
	      
	  
	/* Restart V = V(n,v_max)*Hevecsold(v_max,v_size) */
	Zrestart_X((_Complex double *) V, 12*lde, Hevecsold, 12*n, v_max, v_size, ework, esize); 
	
	/* Restart H = diag(Hevals) plus a column and a row */
	for (i = 0; i < allelems; i++ )  {H[i] = 0.0; }
    	for (i = 0; i < v_size; i++) H[i*(v_max+1)]= Hevals[i];

	 
	  
        /* The next residual to be added (v = r/sqrt(rho)) 
     	 * needs the (nev+1)-th column and row, through V(:,1:vs)'*A*v. 
	 * Instead of a matvec, we use the Ap and Ap_prev to obtain this:
	 * V(:,1:vs)'*A*V(:,vs+1) = V(:,1:vs)'*A*r/sqrt(rho) = 
	 * V'(A(p-beta*p_prev))/sqrt(rho) = V'(Ap - beta*Ap_prev)/sqrt(rho)*/
	  
	tmpd=-beta;
	assign_mul_add_r(Ap_prev,tmpd,Ap,n);   /* Ap_prev=Ap-beta*Ap_prev */
	  
	tempi=v_size*v_max;
	for (i=0; i<v_size; i++){
	  tmpz=scalar_prod(&V[i*lde],Ap_prev,n,parallel);
	  H[v_size+i*v_max]=tmpz/sqrt(rho);
	  H[i+tempi]=conj(tmpz)/sqrt(rho);
	}
	
      } /* end of if v_size == v_max */
      else 
      {
	/* update (vs+1,vs),(vs,vs+1) elements of tridigonal which are real*/
        if ( it > 0) 
	{
	  H[(v_size-1)*v_max + v_size]= -sqrt(beta)/alpha;
	  H[v_size*v_max + v_size-1] = creal(H[(v_size-1)*v_max + v_size]);
	}
	
      } /* of else */
      /* Augment V with the current CG residual r normalized by sqrt(rho) */

      tmpd=1.0/sqrt(rho);
      mul_r(&V[v_size*lde],tmpd,r,n);
      v_size++;
    } /* end of if nev >0 , ie., the eigCG specific code */
    /*---------------------------------------------------------------------*/

    /* pAp = p' * Ap */
    tempz=scalar_prod(p,Ap,n,parallel);
    pAp = creal(tempz);
    if (pAp == 0.0) {
      *flag = 2;
      break;
    } 

    alphaprev = alpha;
    alpha = rho / pAp;
    
    assign_add_mul_r(x,p,alpha,n);  /*update x*/
    tmpd=-alpha;
    assign_add_mul_r(r,Ap,tmpd,n);   /*update r*/
    
    //next line useful for debugging
    //printf("%d beta, alpha, rho, pAp %le %le %le %le\n",it,beta,alpha,rho,pAp);
  } /* for it = 0 : maxit-1 */
  
  *iter = *iter + it+1; /* record the number of CG iterations plus any older */
  if( g_proc_id == g_stdio_proc && g_debug_level > 0)
    displayInfo(eps_sq,maxit,*flag,*iter-1,*reshist);

  
  if(nev > 0 )
  {
    #if (defined SSE || defined SSE2 || defined SSE3)
    H= NULL;
    free(_h);
    Hevecs=NULL;
    free(_hevecs);
    Hevecsold=NULL;
    free(_hevecsold);
    Hevals=NULL;
    free(_hevals);
    Hevalsold=NULL;
    free(_hevalsold);
    TAU=NULL;
    free(_tau);
    zwork=NULL;
    free(_zwork);
    rwork=NULL;
    free(_rwork);
    #else
    free(H);
    free(Hevecs);
    free(Hevecsold);
    free(Hevals);
    free(Hevalsold);
    free(TAU);
    free(zwork);
    free(rwork);
    #endif
  }

 return;
}