/******************************************************************* Subroutine to compute the inverse matrix and determinant matrix *cov: the pointer to the covariance matrix matrix *inv_cov: the pointer to the inverse covariance matrix matrix *cov_mat: the pointer to the approximate covariance matrix when singular. If unsingular, it equals to cov double *det_cov: the pointer to determinant return value: '1' - successfully exit '0' - exit with waring/error *******************************************************************/ int veCov(matrix *cov, matrix *inv_cov, matrix *cov_mat, double *det_cov) { int i, j; matrix eigvec_re; matrix eigvec_im; vector eigval_re; vector eigval_im; int *eig_order; int eig_info; int num_v; // the number of eigenvalue int rank_c; double sum_v; double factor = 0.02; double ass_value; double min_real; mnew(&eigvec_re, cov->m, cov->n); mnew(&eigvec_im, cov->m, cov->n); vnew(&eigval_re, cov->n); vnew(&eigval_im, cov->n); eig_order = new int[cov->n]; // the eigenvector and eigenvalue of covariance matrix eig_info = eig(cov, &eigvec_re, &eigvec_im, &eigval_re, &eigval_im); //vprint(&eigval_re); //vprint(&eigval_im); if (!eig_info) { printf(" The eigenvalue computation failed! \n"); return 0; //.... } // the rank of covariance matrix num_v = cov->n; /*rank_c = num_v; for (i=0; i<num_v; i++) { if ((fabs(*(eigval_re.pr+i)) < ZEROTHRESH) && (fabs(*(eigval_im.pr+i)) < ZEROTHRESH)) { rank_c--; } } printf("rank = %d", rank_c);*/ rank_c = rank(cov, TOLERANCE); // compute the inverse and determinate if (rank_c == num_v) { // nonsingular inv(cov, inv_cov); mcopy(cov, cov_mat); *det_cov = det(cov); } else { // singular min_real = pow(10, (((double)-250) / ((double) cov->m))); /*for (i=0; i<num_v; i++) { if ((*(eigval_re.pr+i) < ZEROTHRESH) || (*(eigval_im.pr+i) != 0)) { *(eigval_re.pr+i) = 0; // ???? keep the real part of complex or not *(eigval_im.pr+i) = 0; } } sort(&eigval_re, eig_order, 'd'); */ for (i=0; i<num_v; i++) { // when negtive real eigenvalue, change to absolute value // to ensure all the real eigenvalues are positive if ((eigval_re.pr[i] < 0) && (eigval_im.pr[i] == 0)) { eigval_re.pr[i] *= -1; // the i-th column of eigenvector should also be changed the sign for (j=0; j<(eigvec_re.m); j++) { eigvec_re.pr[j*(eigvec_re.n)+i] *= -1; } } } //vprint(&eigval_re); //vprint(&eigval_im); // sort real eigenvalues descendingly, put complex ones at the end sorteig(&eigval_re, &eigval_im, eig_order); for (i=rank_c; i<num_v; i++) { *(eigval_re.pr+i) = 0; *(eigval_im.pr+i) = 0; } //vprint(&eigval_re); //vprint(&eigval_im); sum_v = vsum(&eigval_re); ass_value = factor * sum_v / (num_v - rank_c); if (ass_value < (0.5 * (*(eigval_re.pr+rank_c)) * (1 - factor))) { if (ass_value > min_real) { for (i=rank_c; i<num_v; i++) { *(eigval_re.pr+i) = ass_value; } for (i=0; i<rank_c; i++) { *(eigval_re.pr+i) *= 1 - factor; } } else { for (i=rank_c; i<num_v; i++) { *(eigval_re.pr+i) = min_real; } } } else { ass_value = 0.5 * (*(eigval_re.pr+rank_c)) * (1 - factor); if (ass_value > min_real) { for (i=rank_c; i<num_v; i++) { *(eigval_re.pr+i) = ass_value; } for (i=0; i<rank_c; i++) { *(eigval_re.pr+i) = *(eigval_re.pr+i) - ass_value * (num_v - rank_c) * (*(eigval_re.pr+i)) / sum_v; } } else { for (i=rank_c; i<num_v; i++) { *(eigval_re.pr+i) = min_real; } } } //vprint(&eigval_re); //vprint(&eigval_im); matrix eigvec_re_sorted; matrix eigvec_re_sorted_t; mnew(&eigvec_re_sorted, num_v, num_v); mnew(&eigvec_re_sorted_t, num_v, num_v); sortcols(eig_order, &eigvec_re, &eigvec_re_sorted); transpose(&eigvec_re_sorted, &eigvec_re_sorted_t); matrix inv_eig_vl_s; mnew(&inv_eig_vl_s, num_v, num_v); for (i=1; i<num_v; i++) { *(inv_eig_vl_s.pr + i*num_v + i) = 1 / (*(eigval_re.pr+i)); } matrix tmp; mnew(&tmp, num_v, num_v); mmMul(&eigvec_re_sorted, &inv_eig_vl_s, &tmp); mmMul(&tmp, &eigvec_re_sorted_t, inv_cov); matrix diag_eigval; mnew(&diag_eigval, num_v, num_v); for (i=0; i<num_v; i++) { *(diag_eigval.pr + i*num_v + i) = *(eigval_re.pr+i); } mmMul(&eigvec_re_sorted, &diag_eigval, &tmp); mmMul(&tmp, &eigvec_re_sorted_t, cov_mat); *det_cov = 1; for (i=0; i<num_v; i++) { *det_cov = (*det_cov) * (*(eigval_re.pr+i)); } mdelete(&inv_eig_vl_s); mdelete(&eigvec_re_sorted); mdelete(&eigvec_re_sorted_t); mdelete(&tmp); mdelete(&diag_eigval); } #ifdef _DEBUG printf("rank = %d \n", rank_c); printf("\n det_cov = %e \n", *det_cov); printf("inv_cov = \n"); mprint(inv_cov); printf("cov_mat = \n"); mprint(cov_mat); #endif mdelete(&eigvec_re); mdelete(&eigvec_im); vdelete(&eigval_re); vdelete(&eigval_im); delete []eig_order; return 1; }
void jdher_bi(int n, int lda, double tau, double tol, int kmax, int jmax, int jmin, int itmax, int blksize, int blkwise, int V0dim, complex *V0, int solver_flag, int linitmax, double eps_tr, double toldecay, int verbosity, int *k_conv, complex *Q, double *lambda, int *it, int maxmin, const int shift_mode, matrix_mult_bi A_psi){ /**************************************************************************** * * * Local variables * * * ****************************************************************************/ /* constants */ /* allocatables: * initialize with NULL, so we can free even unallocated ptrs */ double *s = NULL, *resnrm = NULL, *resnrm_old = NULL, *dtemp = NULL, *rwork = NULL; complex *V_ = NULL, *V, *Vtmp = NULL, *U = NULL, *M = NULL, *Z = NULL, *Res_ = NULL, *Res, *eigwork = NULL, *temp1_ = NULL, *temp1; int *idx1 = NULL, *idx2 = NULL, *convind = NULL, *keepind = NULL, *solvestep = NULL, *actcorrits = NULL; /* non-allocated ptrs */ complex *q, *v, *u, *r = NULL; /* complex *matdummy, *vecdummy; */ /* scalar vars */ double theta, alpha, it_tol; int i, k, j, actblksize, eigworklen, found, conv, keep, n2, N = n*sizeof(complex)/sizeof(bispinor); int act, cnt, idummy, info, CntCorrIts=0, endflag=0; /* variables for random number generator */ int IDIST = 1; int ISEED[4] = {2, 3, 5, 7}; ISEED[0] = g_proc_id; /**************************************************************************** * * * Of course on the CRAY everything is different :( !! * * that's why we need something more. * * ****************************************************************************/ #ifdef CRAY fupl_u = _cptofcd(cupl_u, strlen(cupl_u)); fupl_c = _cptofcd(cupl_c, strlen(cupl_c)); fupl_n = _cptofcd(cupl_n, strlen(cupl_n)); fupl_a = _cptofcd(cupl_a, strlen(cupl_a)); fupl_v = _cptofcd(cupl_v, strlen(cupl_v)); filaenv = _cptofcd(cilaenv, strlen(cilaenv)); fvu = _cptofcd(cvu, strlen(cvu)); #endif /**************************************************************************** * * * Execution starts here... * * * ****************************************************************************/ /* NEW PART FOR GAUGE_COPY */ #ifdef _GAUGE_COPY update_backward_gauge(); #endif /* END NEW PART */ /* print info header */ if (verbosity > 1 && g_proc_id == 0) { printf("Jacobi-Davidson method for hermitian Matrices\n"); printf("Solving A*x = lambda*x \n\n"); printf(" N= %10d ITMAX=%4d\n", n, itmax); printf(" KMAX=%3d JMIN=%3d JMAX=%3d V0DIM=%3d\n", kmax, jmin, jmax, V0dim); printf(" BLKSIZE= %2d BLKWISE= %5s\n", blksize, blkwise ? "TRUE" : "FALSE"); printf(" TOL= %11.4e TAU= %11.4e\n", tol, tau); printf(" LINITMAX= %5d EPS_TR= %10.3e TOLDECAY=%9.2e\n", linitmax, eps_tr, toldecay); printf("\n Computing %s eigenvalues\n", maxmin ? "maximal" : "minimal"); printf("\n"); fflush( stdout ); } /* validate input parameters */ if(tol <= 0) jderrorhandler(401,""); if(kmax <= 0 || kmax > n) jderrorhandler(402,""); if(jmax <= 0 || jmax > n) jderrorhandler(403,""); if(jmin <= 0 || jmin > jmax) jderrorhandler(404,""); if(itmax < 0) jderrorhandler(405,""); if(blksize > jmin || blksize > (jmax - jmin)) jderrorhandler(406,""); if(blksize <= 0 || blksize > kmax) jderrorhandler(406,""); if(blkwise < 0 || blkwise > 1) jderrorhandler(407,""); if(V0dim < 0 || V0dim >= jmax) jderrorhandler(408,""); if(linitmax < 0) jderrorhandler(409,""); if(eps_tr < 0.) jderrorhandler(500,""); if(toldecay <= 1.0) jderrorhandler(501,""); CONE.re=1.; CONE.im=0.; CZERO.re=0.; CZERO.im=0.; CMONE.re=-1.; CMONE.im=0.; /* Get hardware-dependent values: * Opt size of workspace for ZHEEV is (NB+1)*j, where NB is the opt. * block size... */ eigworklen = (2 + _FT(ilaenv)(&ONE, filaenv, fvu, &jmax, &MONE, &MONE, &MONE, 6, 2)) * jmax; /* Allocating memory for matrices & vectors */ if((void*)(V_ = (complex *)malloc((lda * jmax + 4) * sizeof(complex))) == NULL) { errno = 0; jderrorhandler(300,"V in jdher_bi"); } #if (defined SSE || defined SSE2 || defined SSE3) V = (complex*)(((unsigned long int)(V_)+ALIGN_BASE)&~ALIGN_BASE); #else V = V_; #endif if((void*)(U = (complex *)malloc(jmax * jmax * sizeof(complex))) == NULL) { jderrorhandler(300,"U in jdher_bi"); } if((void*)(s = (double *)malloc(jmax * sizeof(double))) == NULL) { jderrorhandler(300,"s in jdher_bi"); } if((void*)(Res_ = (complex *)malloc((lda * blksize+4) * sizeof(complex))) == NULL) { jderrorhandler(300,"Res in jdher_bi"); } #if (defined SSE || defined SSE2 || defined SSE3) Res = (complex*)(((unsigned long int)(Res_)+ALIGN_BASE)&~ALIGN_BASE); #else Res = Res_; #endif if((void*)(resnrm = (double *)malloc(blksize * sizeof(double))) == NULL) { jderrorhandler(300,"resnrm in jdher_bi"); } if((void*)(resnrm_old = (double *)calloc(blksize,sizeof(double))) == NULL) { jderrorhandler(300,"resnrm_old in jdher_bi"); } if((void*)(M = (complex *)malloc(jmax * jmax * sizeof(complex))) == NULL) { jderrorhandler(300,"M in jdher_bi"); } if((void*)(Vtmp = (complex *)malloc(jmax * jmax * sizeof(complex))) == NULL) { jderrorhandler(300,"Vtmp in jdher_bi"); } if((void*)(p_work_bi = (complex *)malloc(lda * sizeof(complex))) == NULL) { jderrorhandler(300,"p_work_bi in jdher_bi"); } /* ... */ if((void*)(idx1 = (int *)malloc(jmax * sizeof(int))) == NULL) { jderrorhandler(300,"idx1 in jdher_bi"); } if((void*)(idx2 = (int *)malloc(jmax * sizeof(int))) == NULL) { jderrorhandler(300,"idx2 in jdher_bi"); } /* Indices for (non-)converged approximations */ if((void*)(convind = (int *)malloc(blksize * sizeof(int))) == NULL) { jderrorhandler(300,"convind in jdher_bi"); } if((void*)(keepind = (int *)malloc(blksize * sizeof(int))) == NULL) { jderrorhandler(300,"keepind in jdher_bi"); } if((void*)(solvestep = (int *)malloc(blksize * sizeof(int))) == NULL) { jderrorhandler(300,"solvestep in jdher_bi"); } if((void*)(actcorrits = (int *)malloc(blksize * sizeof(int))) == NULL) { jderrorhandler(300,"actcorrits in jdher_bi"); } if((void*)(eigwork = (complex *)malloc(eigworklen * sizeof(complex))) == NULL) { jderrorhandler(300,"eigwork in jdher_bi"); } if((void*)(rwork = (double *)malloc(3*jmax * sizeof(double))) == NULL) { jderrorhandler(300,"rwork in jdher_bi"); } if((void*)(temp1_ = (complex *)malloc((lda+4) * sizeof(complex))) == NULL) { jderrorhandler(300,"temp1 in jdher_bi"); } #if (defined SSE || defined SSE2 || defined SSE3) temp1 = (complex*)(((unsigned long int)(temp1_)+ALIGN_BASE)&~ALIGN_BASE); #else temp1 = temp1_; #endif if((void*)(dtemp = (double *)malloc(lda * sizeof(complex))) == NULL) { jderrorhandler(300,"dtemp in jdher_bi"); } /* Set variables for Projection routines */ n2 = 2*n; p_n = n; p_n2 = n2; p_Q_bi = Q; p_A_psi_bi = A_psi; p_lda = lda; /************************************************************************** * * * Generate initial search subspace V. Vectors are taken from V0 and if * * necessary randomly generated. * * * **************************************************************************/ /* copy V0 to V */ _FT(zlacpy)(fupl_a, &n, &V0dim, V0, &lda, V, &lda, 1); j = V0dim; /* if V0dim < blksize: generate additional random vectors */ if (V0dim < blksize) { idummy = (blksize - V0dim)*n; /* nof random numbers */ _FT(zlarnv)(&IDIST, ISEED, &idummy, V + V0dim*lda); j = blksize; } for (cnt = 0; cnt < j; cnt ++) { ModifiedGS_bi(V + cnt*lda, n, cnt, V, lda); alpha = sqrt(square_norm_bi((bispinor*)(V+cnt*lda), N)); alpha = 1.0 / alpha; _FT(dscal)(&n2, &alpha, (double *)(V + cnt*lda), &ONE); } /* Generate interaction matrix M = V^dagger*A*V. Only the upper triangle is computed. */ for (cnt = 0; cnt < j; cnt++){ A_psi((bispinor*) temp1, (bispinor*) (V+cnt*lda)); idummy = cnt+1; for(i = 0; i < idummy; i++) { M[cnt*jmax+i] = scalar_prod_bi((bispinor*)(V+i*lda), (bispinor*) temp1, N); } } /* Other initializations */ k = 0; (*it) = 0; if((*k_conv) > 0) { k = (*k_conv); } actblksize = blksize; for(act = 0; act < blksize; act ++){ solvestep[act] = 1; } /**************************************************************************** * * * Main JD-iteration loop * * * ****************************************************************************/ while((*it) < itmax) { /**************************************************************************** * * * Solving the projected eigenproblem * * * * M*u = V^dagger*A*V*u = s*u * * M is hermitian, only the upper triangle is stored * * * ****************************************************************************/ _FT(zlacpy)(fupl_u, &j, &j, M, &jmax, U, &jmax, 1); _FT(zheev)(fupl_v, fupl_u, &j, U, &jmax, s, eigwork, &eigworklen, rwork, &info, 1, 1); if (info != 0) { printf("error solving the projected eigenproblem."); printf(" zheev: info = %d\n", info); } if(info != 0) jderrorhandler(502,"problem in zheev for jdher_bi"); /* Reverse order of eigenvalues if maximal value is needed */ if(maxmin == 1){ sorteig(j, s, U, jmax, s[j-1], dtemp, idx1, idx2, 0); } else{ sorteig(j, s, U, jmax, 0., dtemp, idx1, idx2, 0); } /**************************************************************************** * * * Convergence/Restart Check * * * * In case of convergence, strip off a whole block or just the converged * * ones and put 'em into Q. Update the matrices Q, V, U, s * * * * In case of a restart update the V, U and M matrices and recompute the * * Eigenvectors * * * ****************************************************************************/ found = 1; while(found) { /* conv/keep = Number of converged/non-converged Approximations */ conv = 0; keep = 0; for(act=0; act < actblksize; act++){ /* Setting pointers for single vectors */ q = Q + (act+k)*lda; u = U + act*jmax; r = Res + act*lda; /* Compute Ritz-Vector Q[:,k+cnt1]=V*U[:,cnt1] */ theta = s[act]; _FT(zgemv)(fupl_n, &n, &j, &CONE, V, &lda, u, &ONE, &CZERO, q, &ONE, 1); /* Compute the residual */ A_psi((bispinor*) r, (bispinor*) q); theta = -theta; _FT(daxpy)(&n2, &theta, (double*) q, &ONE, (double*) r, &ONE); /* Compute norm of the residual and update arrays convind/keepind*/ resnrm_old[act] = resnrm[act]; resnrm[act] = sqrt(square_norm_bi((bispinor*) r, N)); if (resnrm[act] < tol){ convind[conv] = act; conv = conv + 1; } else{ keepind[keep] = act; keep = keep + 1; } } /* for(act = 0; act < actblksize; act ++) */ /* Check whether the blkwise-mode is chosen and ALL the approximations converged, or whether the strip-off mode is active and SOME of the approximations converged */ found = ((blkwise==1 && conv==actblksize) || (blkwise==0 && conv!=0)) && (j > actblksize || k == kmax - actblksize); /*************************************************************************** * * * Convergence Case * * * * In case of convergence, strip off a whole block or just the converged * * ones and put 'em into Q. Update the matrices Q, V, U, s * * * **************************************************************************/ if (found) { /* Store Eigenvalues */ for(act = 0; act < conv; act++) lambda[k+act] = s[convind[act]]; /* Re-use non approximated Ritz-Values */ for(act = 0; act < keep; act++) s[act] = s[keepind[act]]; /* Shift the others in the right position */ for(act = 0; act < (j-actblksize); act ++) s[act+keep] = s[act+actblksize]; /* Update V. Re-use the V-Vectors not looked at yet. */ idummy = j - actblksize; for (act = 0; act < n; act = act + jmax) { cnt = act + jmax > n ? n-act : jmax; _FT(zlacpy)(fupl_a, &cnt, &j, V+act, &lda, Vtmp, &jmax, 1); _FT(zgemm)(fupl_n, fupl_n, &cnt, &idummy, &j, &CONE, Vtmp, &jmax, U+actblksize*jmax, &jmax, &CZERO, V+act+keep*lda, &lda, 1, 1); } /* Insert the not converged approximations as first columns in V */ for(act = 0; act < keep; act++){ _FT(zlacpy)(fupl_a,&n,&ONE,Q+(k+keepind[act])*lda,&lda,V+act*lda,&lda,1); } /* Store Eigenvectors */ for(act = 0; act < conv; act++){ _FT(zlacpy)(fupl_a,&n,&ONE,Q+(k+convind[act])*lda,&lda,Q+(k+act)*lda,&lda,1); } /* Update SearchSpaceSize j */ j = j - conv; /* Let M become a diagonalmatrix with the Ritzvalues as entries ... */ _FT(zlaset)(fupl_u, &j, &j, &CZERO, &CZERO, M, &jmax, 1); for (act = 0; act < j; act++){ M[act*jmax + act].re = s[act]; } /* ... and U the Identity(jnew,jnew) */ _FT(zlaset)(fupl_a, &j, &j, &CZERO, &CONE, U, &jmax, 1); if(shift_mode == 1){ if(maxmin == 0){ for(act = 0; act < conv; act ++){ if (lambda[k+act] > tau){ tau = lambda[k+act]; } } } else{ for(act = 0; act < conv; act ++){ if (lambda[k+act] < tau){ tau = lambda[k+act]; } } } } /* Update Converged-Eigenpair-counter and Pro_k */ k = k + conv; /* Update the new blocksize */ actblksize=min(blksize, kmax-k); /* Exit main iteration loop when kmax eigenpairs have been approximated */ if (k == kmax){ endflag = 1; break; } /* Counter for the linear-solver-accuracy */ for(act = 0; act < keep; act++) solvestep[act] = solvestep[keepind[act]]; /* Now we expect to have the next eigenvalues */ /* allready with some accuracy */ /* So we do not need to start from scratch... */ for(act = keep; act < blksize; act++) solvestep[act] = 1; } /* if(found) */ if(endflag == 1){ break; } /************************************************************************** * * * Restart * * * * The Eigenvector-Aproximations corresponding to the first jmin * * Petrov-Vectors are kept. if (j+actblksize > jmax) { * * * **************************************************************************/ if (j+actblksize > jmax) { idummy = j; j = jmin; for (act = 0; act < n; act = act + jmax) { /* V = V * U(:,1:j) */ cnt = act+jmax > n ? n-act : jmax; _FT(zlacpy)(fupl_a, &cnt, &idummy, V+act, &lda, Vtmp, &jmax, 1); _FT(zgemm)(fupl_n, fupl_n, &cnt, &j, &idummy, &CONE, Vtmp, &jmax, U, &jmax, &CZERO, V+act, &lda, 1, 1); } _FT(zlaset)(fupl_a, &j, &j, &CZERO, &CONE, U, &jmax, 1); _FT(zlaset)(fupl_u, &j, &j, &CZERO, &CZERO, M, &jmax, 1); for (act = 0; act < j; act++) M[act*jmax + act].re = s[act]; } } /* while(found) */ if(endflag == 1){ break; } /**************************************************************************** * * * Solving the correction equations * * * * * ****************************************************************************/ /* Solve actblksize times the correction equation ... */ for (act = 0; act < actblksize; act ++) { /* Setting start-value for vector v as zeros(n,1). Guarantees orthogonality */ v = V + j*lda; for (cnt = 0; cnt < n; cnt ++){ v[cnt].re = 0.; v[cnt].im = 0.; } /* Adaptive accuracy and shift for the lin.solver. In case the residual is big, we don't need a too precise solution for the correction equation, since even in exact arithmetic the solution wouldn't be too usefull for the Eigenproblem. */ r = Res + act*lda; if (resnrm[act] < eps_tr && resnrm[act] < s[act] && resnrm_old[act] > resnrm[act]){ p_theta = s[act]; } else{ p_theta = tau; } p_k = k + actblksize; /* if we are in blockwise mode, we do not want to */ /* iterate solutions much more, if they have */ /* allready the desired precision */ if(blkwise == 1 && resnrm[act] < tol) { it_tol = pow(toldecay, (double)(-5)); } else { it_tol = pow(toldecay, (double)(-solvestep[act])); } solvestep[act] = solvestep[act] + 1; /* equation and project if necessary */ ModifiedGS_bi(r, n, k + actblksize, Q, lda); /* for(i=0;i<n;i++){ */ /* r[i].re*=-1.; */ /* r[i].im*=-1.; */ /* } */ g_sloppy_precision = 1; /* Solve the correction equation ... */ if (solver_flag == BICGSTAB){ info = bicgstab_complex_bi((bispinor*) v, (bispinor*) r, linitmax, it_tol*it_tol, g_relative_precision_flag, VOLUME/2, &Proj_A_psi_bi); } else if(solver_flag == CG){ info = cg_her_bi((bispinor*) v, (bispinor*) r, linitmax, it_tol*it_tol, g_relative_precision_flag, VOLUME/2, &Proj_A_psi_bi); } else{ info = bicgstab_complex_bi((bispinor*) v, (bispinor*) r, linitmax, it_tol*it_tol, g_relative_precision_flag, VOLUME/2, &Proj_A_psi_bi); } g_sloppy_precision = 0; /* Actualizing profiling data */ if (info == -1){ CntCorrIts += linitmax; } else{ CntCorrIts += info; } actcorrits[act] = info; /* orthonormalize v to Q, cause the implicit orthogonalization in the solvers may be too inaccurate. Then apply "IteratedCGS" to prevent numerical breakdown in order to orthogonalize v to V */ ModifiedGS_bi(v, n, k+actblksize, Q, lda); IteratedClassicalGS_bi(v, &alpha, n, j, V, temp1, lda); alpha = 1.0 / alpha; _FT(dscal)(&n2, &alpha, (double*) v, &ONE); /* update interaction matrix M */ A_psi((bispinor*) temp1, (bispinor*) v); idummy = j+1; for(i = 0; i < idummy; i++){ M[j*jmax+i] = scalar_prod_bi((bispinor*) (V+i*lda), (bispinor*) temp1, N); } /* Increasing SearchSpaceSize j */ j ++; } /* for (act = 0;act < actblksize; act ++) */ /* Print information line */ if(g_proc_id == 0) { print_status(verbosity, *it, k, j - blksize, kmax, blksize, actblksize, s, resnrm, actcorrits); } /* Increase iteration-counter for outer loop */ (*it) = (*it) + 1; } /* Main iteration loop */ /****************************************************************** * * * Eigensolutions converged or iteration limit reached * * * * Print statistics. Free memory. Return. * * * ******************************************************************/ *k_conv = k; if (verbosity >= 1) { if(g_proc_id == 0) { printf("\nJDHER execution statistics\n\n"); printf("IT_OUTER=%d IT_INNER_TOT=%d IT_INNER_AVG=%8.2f\n", (*it), CntCorrIts, (double)CntCorrIts/(*it)); printf("\nConverged eigensolutions in order of convergence:\n"); printf("\n I LAMBDA(I) RES(I)\n"); printf("---------------------------------------\n"); } for (act = 0; act < *k_conv; act ++) { /* Compute the residual for solution act */ q = Q + act*lda; theta = -lambda[act]; A_psi((bispinor*) r, (bispinor*) q); _FT(daxpy)(&n2, &theta, (double*) q, &ONE, (double*) r, &ONE); alpha = sqrt(square_norm_bi((bispinor*) r, N)); if(g_proc_id == 0) { printf("%3d %22.15e %12.5e\n", act+1, lambda[act], alpha); } } if(g_proc_id == 0) { printf("\n"); fflush( stdout ); } } free(V_); free(Vtmp); free(U); free(s); free(Res_); free(resnrm); free(resnrm_old); free(M); free(Z); free(eigwork); free(temp1_); free(dtemp); free(rwork); free(p_work_bi); free(idx1); free(idx2); free(convind); free(keepind); free(solvestep); free(actcorrits); } /* jdher(.....) */