double det_acc(const int id, hamiltonian_field_t * const hf) { monomial * mnl = &monomial_list[id]; int save_iter = ITER_MAX_BCG; int save_sloppy = g_sloppy_precision_flag; g_mu = mnl->mu; boundary(mnl->kappa); if(mnl->even_odd_flag) { if(mnl->solver == CG) { ITER_MAX_BCG = 0; } chrono_guess(g_spinor_field[2], mnl->pf, mnl->csg_field, mnl->csg_index_array, mnl->csg_N, mnl->csg_n, VOLUME/2, &Qtm_plus_psi); g_sloppy_precision_flag = 0; mnl->iter0 = bicg(g_spinor_field[2], mnl->pf, mnl->accprec, g_relative_precision_flag); g_sloppy_precision_flag = save_sloppy; /* Compute the energy contr. from first field */ mnl->energy1 = square_norm(g_spinor_field[2], VOLUME/2, 1); } else { if(mnl->solver == CG) { chrono_guess(g_spinor_field[DUM_DERI+5], mnl->pf, mnl->csg_field, mnl->csg_index_array, mnl->csg_N, mnl->csg_n, VOLUME/2, &Q_pm_psi); mnl->iter0 = cg_her(g_spinor_field[DUM_DERI+5], mnl->pf, mnl->maxiter, mnl->accprec, g_relative_precision_flag, VOLUME, Q_pm_psi); Q_minus_psi(g_spinor_field[2], g_spinor_field[DUM_DERI+5]); /* Compute the energy contr. from first field */ mnl->energy1 = square_norm(g_spinor_field[2], VOLUME, 1); } else { chrono_guess(g_spinor_field[2], mnl->pf, mnl->csg_field, mnl->csg_index_array, mnl->csg_N, mnl->csg_n, VOLUME/2, &Q_plus_psi); mnl->iter0 += bicgstab_complex(g_spinor_field[2], mnl->pf, mnl->maxiter, mnl->forceprec, g_relative_precision_flag, VOLUME, Q_plus_psi); mnl->energy1 = square_norm(g_spinor_field[2], VOLUME, 1); } } g_mu = g_mu1; boundary(g_kappa); if(g_proc_id == 0 && g_debug_level > 3) { printf("called det_acc for id %d %d dH = %1.4e\n", id, mnl->even_odd_flag, mnl->energy1 - mnl->energy0); } ITER_MAX_BCG = save_iter; return(mnl->energy1 - mnl->energy0); }
void det_derivative(const int id, hamiltonian_field_t * const hf) { monomial * mnl = &monomial_list[id]; /* This factor 2 a missing factor 2 in trace_lambda */ (*mnl).forcefactor = 2.; if(mnl->even_odd_flag) { /********************************************************************* * * even/odd version * * This a term is det(\hat Q^2(\mu)) * *********************************************************************/ g_mu = mnl->mu; boundary(mnl->kappa); if(mnl->solver != CG) { fprintf(stderr, "Bicgstab currently not implemented, using CG instead! (det_monomial.c)\n"); } /* Invert Q_{+} Q_{-} */ /* X_o -> DUM_DERI+1 */ chrono_guess(g_spinor_field[DUM_DERI+1], mnl->pf, mnl->csg_field, mnl->csg_index_array, mnl->csg_N, mnl->csg_n, VOLUME/2, &Qtm_pm_psi); mnl->iter1 += cg_her(g_spinor_field[DUM_DERI+1], mnl->pf, mnl->maxiter, mnl->forceprec, g_relative_precision_flag, VOLUME/2, &Qtm_pm_psi); chrono_add_solution(g_spinor_field[DUM_DERI+1], mnl->csg_field, mnl->csg_index_array, mnl->csg_N, &mnl->csg_n, VOLUME/2); /* Y_o -> DUM_DERI */ Qtm_minus_psi(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]); /* apply Hopping Matrix M_{eo} */ /* to get the even sites of X_e */ H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+1], EO, -1.); /* \delta Q sandwitched by Y_o^\dagger and X_e */ deriv_Sb(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], hf); /* to get the even sites of Y_e */ H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI], EO, +1); /* \delta Q sandwitched by Y_e^\dagger and X_o */ deriv_Sb(EO, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], hf); } else { /********************************************************************* * non even/odd version * * This term is det(Q^2 + \mu_1^2) * *********************************************************************/ g_mu = mnl->mu; boundary(mnl->kappa); if(mnl->solver == CG) { /* Invert Q_{+} Q_{-} */ /* X -> DUM_DERI+1 */ chrono_guess(g_spinor_field[DUM_DERI+1], mnl->pf, mnl->csg_field, mnl->csg_index_array, mnl->csg_N, mnl->csg_n, VOLUME/2, &Q_pm_psi); mnl->iter1 += cg_her(g_spinor_field[DUM_DERI+1], mnl->pf, mnl->maxiter, mnl->forceprec, g_relative_precision_flag, VOLUME, &Q_pm_psi); chrono_add_solution(g_spinor_field[DUM_DERI+1], mnl->csg_field, mnl->csg_index_array, mnl->csg_N, &mnl->csg_n, VOLUME/2); /* Y -> DUM_DERI */ Q_minus_psi(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]); } else { /* Invert first Q_+ */ /* Y -> DUM_DERI */ chrono_guess(g_spinor_field[DUM_DERI], mnl->pf, mnl->csg_field, mnl->csg_index_array, mnl->csg_N, mnl->csg_n, VOLUME/2, &Q_plus_psi); mnl->iter1 += bicgstab_complex(g_spinor_field[DUM_DERI], mnl->pf, mnl->maxiter, mnl->forceprec, g_relative_precision_flag, VOLUME, Q_plus_psi); chrono_add_solution(g_spinor_field[DUM_DERI], mnl->csg_field, mnl->csg_index_array, mnl->csg_N, &mnl->csg_n, VOLUME/2); /* Now Q_- */ /* X -> DUM_DERI+1 */ g_mu = -g_mu; chrono_guess(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI], mnl->csg_field2, mnl->csg_index_array2, mnl->csg_N2, mnl->csg_n2, VOLUME/2, &Q_minus_psi); mnl->iter1 += bicgstab_complex(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI], mnl->maxiter, mnl->forceprec, g_relative_precision_flag, VOLUME, Q_minus_psi); chrono_add_solution(g_spinor_field[DUM_DERI+1], mnl->csg_field2, mnl->csg_index_array2, mnl->csg_N2, &mnl->csg_n2, VOLUME/2); g_mu = -g_mu; } /* \delta Q sandwitched by Y^\dagger and X */ deriv_Sb_D_psi(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], hf); } g_mu = g_mu1; boundary(g_kappa); return; }
void jdher(int n, int lda, double tau, double tol, int kmax, int jmax, int jmin, int itmax, int blksize, int blkwise, int V0dim, _Complex double *V0, int solver_flag, int linitmax, double eps_tr, double toldecay, int verbosity, int *k_conv, _Complex double *Q, double *lambda, int *it, int maxmin, int shift_mode, matrix_mult A_psi) { /**************************************************************************** * * * Local variables * * * ****************************************************************************/ /* constants */ /* allocatables: * initialize with NULL, so we can free even unallocated ptrs */ double *s = NULL, *resnrm = NULL, *resnrm_old = NULL, *dtemp = NULL, *rwork = NULL; _Complex double *V_ = NULL, *V, *Vtmp = NULL, *U = NULL, *M = NULL, *Z = NULL, *Res_ = NULL, *Res, *eigwork = NULL, *temp1_ = NULL, *temp1; int *idx1 = NULL, *idx2 = NULL, *convind = NULL, *keepind = NULL, *solvestep = NULL, *actcorrits = NULL; /* non-allocated ptrs */ _Complex double *q, *v, *u, *r = NULL; /* _Complex double *matdummy, *vecdummy; */ /* scalar vars */ double theta, alpha, it_tol; int i, k, j, actblksize, eigworklen, found, conv, keep, n2; int act, cnt, idummy, info, CntCorrIts=0, endflag=0; int N = n*sizeof(_Complex double)/sizeof(spinor); /* variables for random number generator */ int IDIST = 1; int ISEED[4] = {2, 3, 5, 7}; ISEED[0] = g_proc_id+2; /**************************************************************************** * * * Execution starts here... * * * ****************************************************************************/ /* print info header */ if ((verbosity > 2) && (g_proc_id == 0)){ printf("Jacobi-Davidson method for hermitian Matrices\n"); printf("Solving A*x = lambda*x \n\n"); printf(" N= %10d ITMAX=%4d\n", n, itmax); printf(" KMAX=%3d JMIN=%3d JMAX=%3d V0DIM=%3d\n", kmax, jmin, jmax, V0dim); printf(" BLKSIZE= %2d BLKWISE= %5s\n", blksize, blkwise ? "TRUE" : "FALSE"); printf(" TOL= %11.4e TAU= %11.4e\n", tol, tau); printf(" LINITMAX= %5d EPS_TR= %10.3e TOLDECAY=%9.2e\n", linitmax, eps_tr, toldecay); printf("\n Computing %s eigenvalues\n", maxmin ? "maximal" : "minimal"); printf("\n"); fflush( stdout ); } /* validate input parameters */ if(tol <= 0) jderrorhandler(401,""); if(kmax <= 0 || kmax > n) jderrorhandler(402,""); if(jmax <= 0 || jmax > n) jderrorhandler(403,""); if(jmin <= 0 || jmin > jmax) jderrorhandler(404,""); if(itmax < 0) jderrorhandler(405,""); if(blksize > jmin || blksize > (jmax - jmin)) jderrorhandler(406,""); if(blksize <= 0 || blksize > kmax) jderrorhandler(406,""); if(blkwise < 0 || blkwise > 1) jderrorhandler(407,""); if(V0dim < 0 || V0dim >= jmax) jderrorhandler(408,""); if(linitmax < 0) jderrorhandler(409,""); if(eps_tr < 0.) jderrorhandler(500,""); if(toldecay <= 1.0) jderrorhandler(501,""); CONE = 1.; CZERO = 0.; CMONE = -1.; /* Get hardware-dependent values: * Opt size of workspace for ZHEEV is (NB+1)*j, where NB is the opt. * block size... */ eigworklen = (2 + _FT(ilaenv)(&ONE, filaenv, fvu, &jmax, &MONE, &MONE, &MONE, 6, 2)) * jmax; /* Allocating memory for matrices & vectors */ if((void*)(V_ = (_Complex double *)malloc((lda * jmax + 4) * sizeof(_Complex double))) == NULL) { errno = 0; jderrorhandler(300,"V in jdher"); } #if (defined SSE || defined SSE2 || defined SSE3) V = (_Complex double*)(((unsigned long int)(V_)+ALIGN_BASE)&~ALIGN_BASE); #else V = V_; #endif if((void*)(U = (_Complex double *)malloc(jmax * jmax * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"U in jdher"); } if((void*)(s = (double *)malloc(jmax * sizeof(double))) == NULL) { jderrorhandler(300,"s in jdher"); } if((void*)(Res_ = (_Complex double *)malloc((lda * blksize+4) * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"Res in jdher"); } #if (defined SSE || defined SSE2 || defined SSE3) Res = (_Complex double*)(((unsigned long int)(Res_)+ALIGN_BASE)&~ALIGN_BASE); #else Res = Res_; #endif if((void*)(resnrm = (double *)malloc(blksize * sizeof(double))) == NULL) { jderrorhandler(300,"resnrm in jdher"); } if((void*)(resnrm_old = (double *)calloc(blksize,sizeof(double))) == NULL) { jderrorhandler(300,"resnrm_old in jdher"); } if((void*)(M = (_Complex double *)malloc(jmax * jmax * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"M in jdher"); } if((void*)(Vtmp = (_Complex double *)malloc(jmax * jmax * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"Vtmp in jdher"); } if((void*)(p_work = (_Complex double *)malloc(lda * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"p_work in jdher"); } /* ... */ if((void*)(idx1 = (int *)malloc(jmax * sizeof(int))) == NULL) { jderrorhandler(300,"idx1 in jdher"); } if((void*)(idx2 = (int *)malloc(jmax * sizeof(int))) == NULL) { jderrorhandler(300,"idx2 in jdher"); } /* Indices for (non-)converged approximations */ if((void*)(convind = (int *)malloc(blksize * sizeof(int))) == NULL) { jderrorhandler(300,"convind in jdher"); } if((void*)(keepind = (int *)malloc(blksize * sizeof(int))) == NULL) { jderrorhandler(300,"keepind in jdher"); } if((void*)(solvestep = (int *)malloc(blksize * sizeof(int))) == NULL) { jderrorhandler(300,"solvestep in jdher"); } if((void*)(actcorrits = (int *)malloc(blksize * sizeof(int))) == NULL) { jderrorhandler(300,"actcorrits in jdher"); } if((void*)(eigwork = (_Complex double *)malloc(eigworklen * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"eigwork in jdher"); } if((void*)(rwork = (double *)malloc(3*jmax * sizeof(double))) == NULL) { jderrorhandler(300,"rwork in jdher"); } if((void*)(temp1_ = (_Complex double *)malloc((lda+4) * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"temp1 in jdher"); } #if (defined SSE || defined SSE2 || defined SSE3) temp1 = (_Complex double*)(((unsigned long int)(temp1_)+ALIGN_BASE)&~ALIGN_BASE); #else temp1 = temp1_; #endif if((void*)(dtemp = (double *)malloc(lda * sizeof(_Complex double))) == NULL) { jderrorhandler(300,"dtemp in jdher"); } /* Set variables for Projection routines */ n2 = 2*n; p_n = n; p_n2 = n2; p_Q = Q; p_A_psi = A_psi; p_lda = lda; /************************************************************************** * * * Generate initial search subspace V. Vectors are taken from V0 and if * * necessary randomly generated. * * * **************************************************************************/ /* copy V0 to V */ _FT(zlacpy)(fupl_a, &n, &V0dim, V0, &lda, V, &lda, 1); j = V0dim; /* if V0dim < blksize: generate additional random vectors */ if (V0dim < blksize) { idummy = (blksize - V0dim)*n; /* nof random numbers */ _FT(zlarnv)(&IDIST, ISEED, &idummy, V + V0dim*lda); j = blksize; } for (cnt = 0; cnt < j; cnt ++) { ModifiedGS(V + cnt*lda, n, cnt, V, lda); alpha = sqrt(square_norm((spinor*)(V+cnt*lda), N, 1)); alpha = 1.0 / alpha; _FT(dscal)(&n2, &alpha, (double *)(V + cnt*lda), &ONE); } /* Generate interaction matrix M = V^dagger*A*V. Only the upper triangle is computed. */ for (cnt = 0; cnt < j; cnt++){ A_psi((spinor*) temp1, (spinor*)(V+cnt*lda)); idummy = cnt+1; for(i = 0; i < idummy; i++){ M[cnt*jmax+i] = scalar_prod((spinor*)(V+i*lda), (spinor*) temp1, N, 1); } } /* Other initializations */ k = 0; (*it) = 0; if((*k_conv) > 0) { k = *k_conv; } actblksize = blksize; for(act = 0; act < blksize; act ++){ solvestep[act] = 1; } /**************************************************************************** * * * Main JD-iteration loop * * * ****************************************************************************/ while((*it) < itmax) { /**************************************************************************** * * * Solving the projected eigenproblem * * * * M*u = V^dagger*A*V*u = s*u * * M is hermitian, only the upper triangle is stored * * * ****************************************************************************/ _FT(zlacpy)(fupl_u, &j, &j, M, &jmax, U, &jmax, 1); _FT(zheev)(fupl_v, fupl_u, &j, U, &jmax, s, eigwork, &eigworklen, rwork, &info, 1, 1); if (info != 0) { printf("error solving the projected eigenproblem."); printf(" zheev: info = %d\n", info); } if(info != 0) jderrorhandler(502,"proble in zheev"); /* Reverse order of eigenvalues if maximal value is needed */ if(maxmin == 1){ sorteig(j, s, U, jmax, s[j-1], dtemp, idx1, idx2, 0); } else{ sorteig(j, s, U, jmax, 0., dtemp, idx1, idx2, 0); } /**************************************************************************** * * * Convergence/Restart Check * * * * In case of convergence, strip off a whole block or just the converged * * ones and put 'em into Q. Update the matrices Q, V, U, s * * * * In case of a restart update the V, U and M matrices and recompute the * * Eigenvectors * * * ****************************************************************************/ found = 1; while(found) { /* conv/keep = Number of converged/non-converged Approximations */ conv = 0; keep = 0; for(act=0; act < actblksize; act++){ /* Setting pointers for single vectors */ q = Q + (act+k)*lda; u = U + act*jmax; r = Res + act*lda; /* Compute Ritz-Vector Q[:,k+cnt1]=V*U[:,cnt1] */ theta = s[act]; _FT(zgemv)(fupl_n, &n, &j, &CONE, V, &lda, u, &ONE, &CZERO, q, &ONE, 1); /* Compute the residual */ A_psi((spinor*) r, (spinor*) q); theta = -theta; _FT(daxpy)(&n2, &theta, (double*) q, &ONE, (double*) r, &ONE); /* Compute norm of the residual and update arrays convind/keepind*/ resnrm_old[act] = resnrm[act]; resnrm[act] = sqrt(square_norm((spinor*) r, N, 1)); if (resnrm[act] < tol){ convind[conv] = act; conv = conv + 1; } else{ keepind[keep] = act; keep = keep + 1; } } /* for(act = 0; act < actblksize; act ++) */ /* Check whether the blkwise-mode is chosen and ALL the approximations converged, or whether the strip-off mode is active and SOME of the approximations converged */ found = ((blkwise==1 && conv==actblksize) || (blkwise==0 && conv!=0)) && (j > actblksize || k == kmax - actblksize); /*************************************************************************** * * * Convergence Case * * * * In case of convergence, strip off a whole block or just the converged * * ones and put 'em into Q. Update the matrices Q, V, U, s * * * **************************************************************************/ if (found) { /* Store Eigenvalues */ for(act = 0; act < conv; act++) lambda[k+act] = s[convind[act]]; /* Re-use non approximated Ritz-Values */ for(act = 0; act < keep; act++) s[act] = s[keepind[act]]; /* Shift the others in the right position */ for(act = 0; act < (j-actblksize); act ++) s[act+keep] = s[act+actblksize]; /* Update V. Re-use the V-Vectors not looked at yet. */ idummy = j - actblksize; for (act = 0; act < n; act = act + jmax) { cnt = act + jmax > n ? n-act : jmax; _FT(zlacpy)(fupl_a, &cnt, &j, V+act, &lda, Vtmp, &jmax, 1); _FT(zgemm)(fupl_n, fupl_n, &cnt, &idummy, &j, &CONE, Vtmp, &jmax, U+actblksize*jmax, &jmax, &CZERO, V+act+keep*lda, &lda, 1, 1); } /* Insert the not converged approximations as first columns in V */ for(act = 0; act < keep; act++){ _FT(zlacpy)(fupl_a,&n,&ONE,Q+(k+keepind[act])*lda,&lda,V+act*lda,&lda,1); } /* Store Eigenvectors */ for(act = 0; act < conv; act++){ _FT(zlacpy)(fupl_a,&n,&ONE,Q+(k+convind[act])*lda,&lda,Q+(k+act)*lda,&lda,1); } /* Update SearchSpaceSize j */ j = j - conv; /* Let M become a diagonalmatrix with the Ritzvalues as entries ... */ _FT(zlaset)(fupl_u, &j, &j, &CZERO, &CZERO, M, &jmax, 1); for (act = 0; act < j; act++) M[act*jmax + act] = s[act]; /* ... and U the Identity(jnew,jnew) */ _FT(zlaset)(fupl_a, &j, &j, &CZERO, &CONE, U, &jmax, 1); if(shift_mode == 1){ if(maxmin == 0){ for(act = 0; act < conv; act ++){ if (lambda[k+act] > tau){ tau = lambda[k+act]; } } } else{ for(act = 0; act < conv; act ++){ if (lambda[k+act] < tau){ tau = lambda[k+act]; } } } } /* Update Converged-Eigenpair-counter and Pro_k */ k = k + conv; /* Update the new blocksize */ actblksize=min(blksize, kmax-k); /* Exit main iteration loop when kmax eigenpairs have been approximated */ if (k == kmax){ endflag = 1; break; } /* Counter for the linear-solver-accuracy */ for(act = 0; act < keep; act++) solvestep[act] = solvestep[keepind[act]]; /* Now we expect to have the next eigenvalues */ /* allready with some accuracy */ /* So we do not need to start from scratch... */ for(act = keep; act < blksize; act++) solvestep[act] = 1; } /* if(found) */ if(endflag == 1){ break; } /************************************************************************** * * * Restart * * * * The Eigenvector-Aproximations corresponding to the first jmin * * Petrov-Vectors are kept. if (j+actblksize > jmax) { * * * **************************************************************************/ if (j+actblksize > jmax) { idummy = j; j = jmin; for (act = 0; act < n; act = act + jmax) { /* V = V * U(:,1:j) */ cnt = act+jmax > n ? n-act : jmax; _FT(zlacpy)(fupl_a, &cnt, &idummy, V+act, &lda, Vtmp, &jmax, 1); _FT(zgemm)(fupl_n, fupl_n, &cnt, &j, &idummy, &CONE, Vtmp, &jmax, U, &jmax, &CZERO, V+act, &lda, 1, 1); } _FT(zlaset)(fupl_a, &j, &j, &CZERO, &CONE, U, &jmax, 1); _FT(zlaset)(fupl_u, &j, &j, &CZERO, &CZERO, M, &jmax, 1); for (act = 0; act < j; act++) M[act*jmax + act] = s[act]; } } /* while(found) */ if(endflag == 1){ break; } /**************************************************************************** * * * Solving the correction equations * * * * * ****************************************************************************/ /* Solve actblksize times the correction equation ... */ for (act = 0; act < actblksize; act ++) { /* Setting start-value for vector v as zeros(n,1). Guarantees orthogonality */ v = V + j*lda; for (cnt = 0; cnt < n; cnt ++){ v[cnt] = 0.; } /* Adaptive accuracy and shift for the lin.solver. In case the residual is big, we don't need a too precise solution for the correction equation, since even in exact arithmetic the solution wouldn't be too usefull for the Eigenproblem. */ r = Res + act*lda; if (resnrm[act] < eps_tr && resnrm[act] < s[act] && resnrm_old[act] > resnrm[act]){ p_theta = s[act]; } else{ p_theta = tau; } p_k = k + actblksize; /* if we are in blockwise mode, we do not want to */ /* iterate solutions much more, if they have */ /* allready the desired precision */ if(blkwise == 1 && resnrm[act] < tol) { it_tol = pow(toldecay, (double)(-5)); } else { it_tol = pow(toldecay, (double)(-solvestep[act])); } solvestep[act] = solvestep[act] + 1; /* equation and project if necessary */ ModifiedGS(r, n, k + actblksize, Q, lda); /* Solve the correction equation ... */ g_sloppy_precision = 1; if(solver_flag == GMRES){ /* info = gmres((spinor*) v, (spinor*) r, 10, linitmax/10, it_tol*it_tol, &Proj_A_psi, &Proj_A_psi); */ info = gmres((spinor*) v, (spinor*) r, 10, linitmax/10, it_tol*it_tol, 0, n*sizeof(_Complex double)/sizeof(spinor), 1, &Proj_A_psi); } if(solver_flag == CGS){ info = cgs_real((spinor*) v, (spinor*) r, linitmax, it_tol*it_tol, 0, n*sizeof(_Complex double)/sizeof(spinor), &Proj_A_psi); } else if (solver_flag == BICGSTAB){ info = bicgstab_complex((spinor*) v, (spinor*) r, linitmax, it_tol*it_tol, 0, n*sizeof(_Complex double)/sizeof(spinor), &Proj_A_psi); } else if (solver_flag == CG){ info = cg_her((spinor*) v, (spinor*) r, linitmax, it_tol*it_tol, 0, n*sizeof(_Complex double)/sizeof(spinor), &Proj_A_psi); } else{ info = gmres((spinor*) v, (spinor*) r, 10, linitmax, it_tol*it_tol, 0, n*sizeof(_Complex double)/sizeof(spinor), 1, &Proj_A_psi); } g_sloppy_precision = 0; /* Actualizing profiling data */ if (info == -1){ CntCorrIts += linitmax; } else{ CntCorrIts += info; } actcorrits[act] = info; /* orthonormalize v to Q, cause the implicit orthogonalization in the solvers may be too inaccurate. Then apply "IteratedCGS" to prevent numerical breakdown in order to orthogonalize v to V */ ModifiedGS(v, n, k+actblksize, Q, lda); IteratedClassicalGS(v, &alpha, n, j, V, temp1, lda); alpha = 1.0 / alpha; _FT(dscal)(&n2, &alpha, (double*) v, &ONE); /* update interaction matrix M */ A_psi((spinor*) temp1, (spinor*) v); idummy = j+1; for(i = 0; i < idummy; i++) { M[j*jmax+i] = scalar_prod((spinor*)(V+i*lda), (spinor*) temp1, N, 1); } /* Increasing SearchSpaceSize j */ j ++; } /* for (act = 0;act < actblksize; act ++) */ /* Print information line */ if(g_proc_id == 0) { print_status(verbosity, *it, k, j - blksize, kmax, blksize, actblksize, s, resnrm, actcorrits); } /* Increase iteration-counter for outer loop */ (*it) = (*it) + 1; } /* Main iteration loop */ /****************************************************************** * * * Eigensolutions converged or iteration limit reached * * * * Print statistics. Free memory. Return. * * * ******************************************************************/ (*k_conv) = k; if (g_proc_id == 0 && verbosity > 0) { printf("\nJDHER execution statistics\n\n"); printf("IT_OUTER=%d IT_INNER_TOT=%d IT_INNER_AVG=%8.2f\n", (*it), CntCorrIts, (double)CntCorrIts/(*it)); printf("\nConverged eigensolutions in order of convergence:\n"); printf("\n I LAMBDA(I) RES(I)\n"); printf("---------------------------------------\n"); } for (act = 0; act < *k_conv; act ++) { /* Compute the residual for solution act */ q = Q + act*lda; theta = -lambda[act]; A_psi((spinor*) r, (spinor*) q); _FT(daxpy)(&n2, &theta, (double*) q, &ONE, (double*) r, &ONE); alpha = sqrt(square_norm((spinor*) r, N, 1)); if(g_proc_id == 0 && verbosity > 0) { printf("%3d %22.15e %12.5e\n", act+1, lambda[act], alpha); } } if(g_proc_id == 0 && verbosity > 0) { printf("\n"); fflush( stdout ); } free(V_); free(Vtmp); free(U); free(s); free(Res_); free(resnrm); free(resnrm_old); free(M); free(Z); free(eigwork); free(temp1_); free(dtemp); free(rwork); free(p_work); free(idx1); free(idx2); free(convind); free(keepind); free(solvestep); free(actcorrits); } /* jdher(.....) */