void ComputeFinalEvecs_Z ( int nvecs, int n, Complex_Z *evecsl, int ldvl, Complex_Z *evecsr, int ldvr, Complex_Z *H, int ldh, char SRT_OPT, double epsi, Complex_Z *evals, double *ernorms, double *xlnorms, double *xrnorms, Complex_Z *angles, void (*matvec)(void *, void *, void *), void *params, Complex_Z *work, int worksize) { Complex_Z *tmpH,*COEFL,*COEFR,*Res; int i,allelems,infoCG,ONE=1; tmpH = (Complex_Z *) calloc(ldh*nvecs,sizeof(Complex_Z)); if(tmpH == NULL){ printf("ERROR: not able to allocate tmpH in ComputeFinalEvecs\n"); exit(2);} COEFL = (Complex_Z *) calloc(nvecs*nvecs,sizeof(Complex_Z)); if(COEFL == NULL){ printf("ERROR: not able to allocate COEFL in ComputeFinalEvecs\n"); exit(2);} COEFR = (Complex_Z *) calloc(nvecs*nvecs,sizeof(Complex_Z)); if(COEFR == NULL){ printf("ERROR: not able to allocate COEFR in ComputeFinalEvecs\n"); exit(2);} Res = (Complex_Z *) calloc(n,sizeof(Complex_Z)); if(Res == NULL){ printf("ERROR: not able to allocate Res in ComputeFinalEvecs\n"); exit(2);} allelems=ldh*nvecs; BLAS_ZCOPY(&allelems,H,&ONE,tmpH,&ONE); //void CG_eval(Complex_C *A, int N, int LDA, Complex_C *W, char SRT_OPT, // Complex_C *VL, int LDVL, Complex_C *VR, int LDVR, int *info); ZG_eval(tmpH,nvecs,ldh,evals,SRT_OPT,epsi,COEFL,nvecs,COEFR,nvecs,&infoCG); if(infoCG != 0){ printf("ERROR: CG_eval inside ComputeFinalEvecs returned with flag %d\n",infoCG); exit(3);} //void restart_X(Complex_C *X, int ldx, Complex_C *hVecs, int nLocal, // int basisSize, int restartSize, Complex_C *rwork, int rworkSize) Zrestart_X(evecsl, ldvl, COEFL, n, nvecs, nvecs, work, worksize); Zrestart_X(evecsr, ldvr, COEFR, n, nvecs, nvecs, work, worksize); //No need to biorthogonalize because evecsl and evecsr were orginally biorthogonal //and COEFL and COEFR comes out of CG_eval as biorthogonal. //Compute Ritz values, residual norms, etc. for(i=0; i < nvecs; i++){ //void computeResNorm( Complex_C *xr, Complex_C *xl, Complex_C *lambda, float *rnorm, int n, Complex_C *Res, //float *xlnorm, float *xrnorm, Complex_C *cangle, void (*matvec)(void *, void *, void *), void *params) ZcomputeResNorm(&evecsr[i*ldvr], &evecsl[i*ldvl],&evals[i],&ernorms[i],n,Res,&xlnorms[i],&xrnorms[i], &angles[i],matvec,params);} return; }
void eigcg(int n, int lde, spinor * const x, spinor * const b, double *normb, const double eps_sq, double restart_eps_sq, const int rel_prec, int maxit, int *iter, double *reshist, int *flag, spinor **work, matrix_mult f, int nev, int v_max, spinor *V, int esize, _Complex double *ework) { double tolb; double alpha, beta; /* CG scalars */ double rho, rhoprev; double pAp; int it; /* current iteration number */ int i, j; /* loop variables */ int zs,ds,tmpsize; spinor *r, *p, *Ap; /* ptrs in work for CG vectors */ _Complex double tempz; /* double precision complex temp var */ double tempd; /* double temp var */ int tempi; /* int temp var */ int ONE = 1; /* var for passing 1 into BLAS routines */ /*---------------------------------------------------------------------- Eigen variables and setup ----------------------------------------------------------------------*/ /* Some constants */ char cR = 'R'; char cL = 'L'; char cN ='N'; char cV = 'V'; char cU = 'U'; char cC ='C'; double betaprev, alphaprev; /* remember the previous iterations scalars */ int v_size; /* tracks the size of V */ int lwork = 3*v_max; /* the size of zwork */ spinor *Ap_prev; void *_h; _Complex double *H; /* the V'AV projection matrix */ void *_hevecs; _Complex double *Hevecs; /* the eigenvectors of H */ void *_hevecsold; _Complex double *Hevecsold; /* the eigenvectors of H(v_max-1,v_max-1) */ void *_hevals; double *Hevals; /* the eigenvalues of H */ void *_hevalsold; double *Hevalsold; /* the eigenvalues of H(m-1,m-1) */ void *_tau; _Complex double *TAU; void *_zwork; _Complex double *zwork; /* double complex work array needed by zheev */ void *_rwork; double *rwork; /* double work array needed by zheev */ int parallel; double tmpd; _Complex double tmpz; zs = sizeof(_Complex double); ds = sizeof(double); int info, allelems = v_max*v_max; #ifdef MPI parallel=1; #else parallel=0; #endif if(nev > 0) /*allocate memory only if eigenvalues will be used */ { #if (defined SSE || defined SSE2 || defined SSE3) if ((_h = calloc(v_max*v_max+ALIGN_BASE,zs)) == NULL) { if( g_proc_id == g_stdio_proc) {fprintf(stderr,"ERROR Could not allocate H\n"); exit(1);} } else H = (_Complex double *)(((unsigned long int)(_h)+ALIGN_BASE)&~ALIGN_BASE); if ((_hevecs = calloc(v_max*v_max+ALIGN_BASE,zs)) == NULL) { if( g_proc_id == g_stdio_proc ) {fprintf(stderr, "ERROR Could not allocate Hevecs\n"); exit(1);} }else Hevecs = (_Complex double *)(((unsigned long int)(_hevecs)+ALIGN_BASE)&~ALIGN_BASE); if ((_hevecsold = calloc(v_max*v_max+ALIGN_BASE,zs)) == NULL) { if( g_proc_id == g_stdio_proc ) {fprintf(stderr, "ERROR Could not allocate Hevecsold\n"); exit(1);} }else Hevecsold = (_Complex double *)(((unsigned long int)(_hevecsold)+ALIGN_BASE)&~ALIGN_BASE); if ((_hevals = calloc(v_max+ALIGN_BASE,ds)) == NULL) { if( g_proc_id == g_stdio_proc) {fprintf(stderr, "ERROR Could not allocate Hevals\n"); exit(1);} }else Hevals = (double *)(((unsigned long int)(_hevals)+ALIGN_BASE)&~ALIGN_BASE); if ((_hevalsold = calloc(v_max+ALIGN_BASE,ds)) == NULL) { if( g_proc_id == g_stdio_proc) {fprintf(stderr, "ERROR Could not allocate Hevalsold\n"); exit(1); } }else Hevalsold = (double *)(((unsigned long int)(_hevalsold)+ALIGN_BASE)&~ALIGN_BASE); if ((_tau = calloc(2*nev+ALIGN_BASE,zs)) == NULL) { if( g_proc_id == g_stdio_proc ) {fprintf(stderr, "ERROR Could not allocate TAU\n"); exit(1); } }else TAU = (_Complex double *)(((unsigned long int)(_tau)+ALIGN_BASE)&~ALIGN_BASE); if ((_zwork = calloc(lwork+ALIGN_BASE,zs)) == NULL) { if( g_proc_id == g_stdio_proc) {fprintf(stderr, "ERROR Could not allocate zwork\n"); exit(1);} }else zwork = (_Complex double *)(((unsigned long int)(_zwork)+ALIGN_BASE)&~ALIGN_BASE); if ((_rwork = calloc(3*v_max+ALIGN_BASE,ds)) == NULL) { if( g_proc_id == g_stdio_proc) {fprintf(stderr, "ERROR Could not allocate rwork\n"); exit(1);} }else rwork = (double *)(((unsigned long int)(_rwork)+ALIGN_BASE)&~ALIGN_BASE); #else if ((H = (_Complex double *) calloc(v_max*v_max, zs)) == NULL) { if( g_proc_id == g_stdio_proc) {fprintf(stderr, "ERROR Could not allocate H\n"); exit(1);} } if ((Hevecs = (_Complex double *) calloc(v_max*v_max, zs)) == NULL) { if( g_proc_id == g_stdio_proc ) {fprintf(stderr, "ERROR Could not allocate Hevecs\n"); exit(1);} } if ((Hevecsold = (_Complex double *) calloc(v_max*v_max, zs)) == NULL) { if( g_proc_id == g_stdio_proc ) {fprintf(stderr, "ERROR Could not allocate Hevecsold\n"); exit(1);} } if ((Hevals = (double *) calloc(v_max, ds)) == NULL) { if( g_proc_id == g_stdio_proc) {fprintf(stderr, "ERROR Could not allocate Hevals\n"); exit(1);} } if ((Hevalsold = (double *) calloc(v_max, ds)) == NULL) { if( g_proc_id == g_stdio_proc) {fprintf(stderr, "ERROR Could not allocate Hevalsold\n"); exit(1); } } if ((TAU = (_Complex double *) calloc(2*nev, zs)) == NULL) { if( g_proc_id == g_stdio_proc ) {fprintf(stderr, "ERROR Could not allocate TAU\n"); exit(1); } } if ((zwork = (_Complex double *) calloc(lwork, zs)) == NULL) { if( g_proc_id == g_stdio_proc) {fprintf(stderr, "ERROR Could not allocate zwork\n"); exit(1);} } if ((rwork = (double *) calloc(3*v_max, ds)) == NULL) { if( g_proc_id == g_stdio_proc) {fprintf(stderr, "ERROR Could not allocate rwork\n"); exit(1);} } #endif } /* end if (nev > 0) */ /*----------------------------------------------------------------------*/ /* setup pointers into work */ r = work[0]; p = work[1]; Ap = work[2]; Ap_prev = work[3]; /*-------------------------------------------------------------------- Initialization phase --------------------------------------------------------------------*/ if (*flag != 3) { /* If flag == 3, the eigCG is called after restart with the same b * whose norm is already known in normb, so no need for these */ tempd = square_norm(b,n,parallel); /* Norm of rhs, b */ *normb = sqrt(tempd); /* If right hand side is zero return zero solution. ITER stays the same */ if (*normb == 0.0) { for (i=0; i<n; i++) { _vector_null(x[i].s0); _vector_null(x[i].s1); _vector_null(x[i].s2); _vector_null(x[i].s3); } *flag = 0; *reshist = 0.0; if( g_debug_level > 0 && g_proc_id == g_stdio_proc) displayInfo(eps_sq,maxit,*flag,*iter,*reshist); return; } } /* Set up for the method */ *flag = 1; tolb = eps_sq * (*normb)*(*normb); /* Relative to b tolerance */ /* Zero-th residual: r = b - A*x */ f(r,x); diff(r,b,r,n); rho = 0.0; alpha = 1.0; beta = 0.0; v_size = 0; double reshist_init=square_norm(r,n,parallel); //if( g_proc_id == g_stdio_proc ) //fprintf(stdout, "reshist init %f\n", reshist_init); /*-------------------------------------------------------------------- main CG loop --------------------------------------------------------------------*/ for (it = 0; it < maxit; it++) { rhoprev = rho; rho=square_norm(r,n,parallel); *reshist = rho; if ( (g_debug_level > 2) && (g_proc_id == g_stdio_proc) ) { fprintf(stdout, " Linsys res( %d ): %g\n",*iter+it,*reshist); fflush(stdout); } /* Convergence test */ if ( ( (*reshist < eps_sq) && (rel_prec==0) ) || ( (*reshist < eps_sq*(*normb)*(*normb)) && (rel_prec ==1 ) ) ) { *flag = 0; break; /* break do not return */ } /* Restart test */ if(nev==0) { if (*reshist < (restart_eps_sq*reshist_init) ) { *flag = 3; break; /* break do not return */ } } if (it == 0) assign(p,r,n); else { betaprev = beta; beta = rho / rhoprev; if (beta == 0.0) { *flag = 2; break; } assign_mul_add_r(p,beta,r,n); /* p = beta*p + r */ } /*----- eigCG specific code -------------------------------------------*/ /* Remember Ap from previous iteration to be used at restart */ if (nev > 0 && v_size == v_max) assign(Ap_prev,Ap,n); /*---------------------------------------------------------------------*/ f(Ap,p); /*----- eigCG specific code -------------------------------------------*/ if (nev > 0) { /* record the diagonal vAv for the previous vector */ if (it > 0) { H[(v_size-1)*v_max+v_size-1]= 1.0/alpha + betaprev/alphaprev; //H[(v_size-1)*v_max+v_size-1].im = 0.0; } /* Restarting V */ if (v_size == v_max) { /* Solve (v_max) and (v_max-1) eigenproblems */ tempi = v_max; allelems=v_max*v_max; _FT(zcopy)(&allelems, H, &ONE, Hevecs, &ONE); _FT(zheev)(&cV,&cU,&tempi,Hevecs,&v_max,Hevals,zwork,&lwork,rwork,&info,1,1); if( (info != 0 ) && (g_proc_id==g_stdio_proc)) {fprintf(stderr, "Error: ZHEEV in eigcg at v_max step, info %d\n",info); exit(1);} tempi = v_max-1; _FT(zcopy)(&allelems, H, &ONE, Hevecsold, &ONE); _FT(zheev)(&cV,&cU,&tempi,Hevecsold,&v_max,Hevalsold,zwork,&lwork,rwork,&info,1,1); if( (info != 0 ) && (g_proc_id==g_stdio_proc)) {fprintf(stderr, "Error: ZHEEV in eigcg at (v_max-1) step, info %d\n",info); exit(1);} /* fill 0s in vmax-th elem of oldevecs to match Hevecs */ for(i=1; i <= v_max ; i++) {Hevecsold[i*v_max-1] = 0.0 ;} /* Attach the first nev oldevecs at the end of the nev latest ones */ tempi = nev*v_max; _FT(zcopy)(&tempi,Hevecsold,&ONE,&Hevecs[tempi],&ONE); /* Orthogonalize the 2*nev (new+old) vectors Hevecs=QR */ v_size = 2*nev; _FT(zgeqrf)(&v_max,&v_size,Hevecs,&v_max,TAU,zwork,&lwork,&info) ; if( (info != 0 ) && (g_proc_id==g_stdio_proc)) {fprintf(stderr, "Error: ZGEQRF in eigcg info %d\n",info); exit(1);} /* use as a temp space Hevecsold = Q^THQ */ _FT(zcopy)(&allelems,H,&ONE,Hevecsold,&ONE); _FT(zunmqr)(&cR,&cN,&v_max,&v_max,&v_size,Hevecs,&v_max, TAU,Hevecsold,&v_max,zwork,&lwork,&info); if( (info != 0 ) && (g_proc_id==g_stdio_proc)) {fprintf(stderr, "Error: ZGEQRF call 1 in eigcg info %d\n",info); exit(1);} _FT(zunmqr)(&cL,&cC,&v_max,&v_size,&v_size,Hevecs,&v_max, TAU,Hevecsold,&v_max,zwork,&lwork,&info); if( (info != 0 ) && (g_proc_id==g_stdio_proc)) {fprintf(stderr, "Error: ZGEQRF call 2 in eigcg info %d\n",info); exit(1);} /* solve the small Hevecsold v_size x v_size eigenproblem */ _FT(zheev)(&cV,&cU,&v_size,Hevecsold,&v_max,Hevals, zwork,&lwork,rwork,&info,1,1); if( (info != 0 ) && (g_proc_id==g_stdio_proc)) {fprintf(stderr, "Error: ZHEEV in eigcg info %d\n",info); exit(1);} /* zero out unused part of eigenectors in Hevecsold */ tempi = 0; for(i = 0; i < v_size; i++ ) { for(j = v_size; j < v_max; j++) {Hevecsold[tempi + j]=0.0;} tempi += v_max; } /* Compute the Hevecsold = Hevecs*Hevecsold */ _FT(zunmqr)(&cL,&cN,&v_max,&v_size,&v_size,Hevecs,&v_max, TAU,Hevecsold,&v_max,zwork,&lwork,&info); if( (info != 0 ) && (g_proc_id==g_stdio_proc)) {fprintf(stderr, "Error: ZUNMQR, info %d\n",info); exit(1);} /* Restart V = V(n,v_max)*Hevecsold(v_max,v_size) */ Zrestart_X((_Complex double *) V, 12*lde, Hevecsold, 12*n, v_max, v_size, ework, esize); /* Restart H = diag(Hevals) plus a column and a row */ for (i = 0; i < allelems; i++ ) {H[i] = 0.0; } for (i = 0; i < v_size; i++) H[i*(v_max+1)]= Hevals[i]; /* The next residual to be added (v = r/sqrt(rho)) * needs the (nev+1)-th column and row, through V(:,1:vs)'*A*v. * Instead of a matvec, we use the Ap and Ap_prev to obtain this: * V(:,1:vs)'*A*V(:,vs+1) = V(:,1:vs)'*A*r/sqrt(rho) = * V'(A(p-beta*p_prev))/sqrt(rho) = V'(Ap - beta*Ap_prev)/sqrt(rho)*/ tmpd=-beta; assign_mul_add_r(Ap_prev,tmpd,Ap,n); /* Ap_prev=Ap-beta*Ap_prev */ tempi=v_size*v_max; for (i=0; i<v_size; i++){ tmpz=scalar_prod(&V[i*lde],Ap_prev,n,parallel); H[v_size+i*v_max]=tmpz/sqrt(rho); H[i+tempi]=conj(tmpz)/sqrt(rho); } } /* end of if v_size == v_max */ else { /* update (vs+1,vs),(vs,vs+1) elements of tridigonal which are real*/ if ( it > 0) { H[(v_size-1)*v_max + v_size]= -sqrt(beta)/alpha; H[v_size*v_max + v_size-1] = creal(H[(v_size-1)*v_max + v_size]); } } /* of else */ /* Augment V with the current CG residual r normalized by sqrt(rho) */ tmpd=1.0/sqrt(rho); mul_r(&V[v_size*lde],tmpd,r,n); v_size++; } /* end of if nev >0 , ie., the eigCG specific code */ /*---------------------------------------------------------------------*/ /* pAp = p' * Ap */ tempz=scalar_prod(p,Ap,n,parallel); pAp = creal(tempz); if (pAp == 0.0) { *flag = 2; break; } alphaprev = alpha; alpha = rho / pAp; assign_add_mul_r(x,p,alpha,n); /*update x*/ tmpd=-alpha; assign_add_mul_r(r,Ap,tmpd,n); /*update r*/ //next line useful for debugging //printf("%d beta, alpha, rho, pAp %le %le %le %le\n",it,beta,alpha,rho,pAp); } /* for it = 0 : maxit-1 */ *iter = *iter + it+1; /* record the number of CG iterations plus any older */ if( g_proc_id == g_stdio_proc && g_debug_level > 0) displayInfo(eps_sq,maxit,*flag,*iter-1,*reshist); if(nev > 0 ) { #if (defined SSE || defined SSE2 || defined SSE3) H= NULL; free(_h); Hevecs=NULL; free(_hevecs); Hevecsold=NULL; free(_hevecsold); Hevals=NULL; free(_hevals); Hevalsold=NULL; free(_hevalsold); TAU=NULL; free(_tau); zwork=NULL; free(_zwork); rwork=NULL; free(_rwork); #else free(H); free(Hevecs); free(Hevecsold); free(Hevals); free(Hevalsold); free(TAU); free(zwork); free(rwork); #endif } return; }