int arpack_cg( /* solver params */ const int N, /* (IN) Number of lattice sites for this process*/ solver_params_t solver_params, /* (IN) parameters for solver */ spinor * const x, /* (IN/OUT) initial guess on input, solution on output for this RHS*/ spinor * const b, /* (IN) right-hand side*/ matrix_mult f, /* (IN) f(s,r) computes s=A*r, i.e. matrix-vector multiply in double precision */ matrix_mult f32, /* (IN) f(s,r) computes s=A*r, i.e. matrix-vector multiply in single precision */ const double eps_sq, /* (IN) squared tolerance of convergence of the linear system for systems nrhs1+1 till nrhs*/ const int rel_prec, /* (IN) 0 for using absoute error for convergence 1 for using relative error for convergence*/ const int maxit, /* (IN) Maximum allowed number of iterations to solution for the linear system*/ matrix_mult f_final, /* (IN) final operator application during projection of type 1 */ matrix_mult f_initial /* (IN) initial operator application during projection of type 1 */ ) { /* Static variables and arrays. */ static int ncurRHS=0; /* current number of the system being solved */ static void *_ax,*_r,*_tmps1,*_tmps2; static spinor *ax,*r,*tmps1,*tmps2; static _Complex double *evecs,*evals,*H,*HU,*Hinv,*initwork,*tmpv1; static _Complex double *zheev_work; static double *hevals,*zheev_rwork; static int *IPIV; static int info_arpack=0; static int nconv=0; /* number of converged eigenvectors as returned by arpack */ int i,j,tmpsize; char cV='V',cN='N', cU='U'; int ONE=1; int zheev_lwork,zheev_info; _Complex double c1, c2, c3, tpone=1.0,tzero=0.0; double d1,d2,d3; double et1,et2; /* timing variables */ char evecs_filename[500]; char howmny = 'P'; FILE *evecs_fs=NULL; size_t evecs_count; WRITER *evecs_writer=NULL; spinor *evecs_ptr0 = NULL, *evecs_ptr1 = NULL; paramsPropagatorFormat *evecs_propagatorFormat = NULL; void *evecs_io_buffer = NULL; int parallel; /* for parallel processing of the scalar products */ #ifdef TM_USE_MPI parallel=1; #else parallel=0; #endif /* leading dimension for spinor vectors */ int LDN; if(N==VOLUME) LDN = VOLUMEPLUSRAND; else LDN = VOLUMEPLUSRAND/2; /*(IN) Number of right-hand sides to be solved*/ const int nrhs = solver_params.arpackcg_nrhs; /*(IN) First number of right-hand sides to be solved using tolerance eps_sq1*/ const int nrhs1 = solver_params.arpackcg_nrhs1; /*(IN) squared tolerance of convergence of the linear system for systems 1 till nrhs1*/ const double eps_sq1 = solver_params.arpackcg_eps_sq1; /*(IN) suqared tolerance for restarting cg */ const double res_eps_sq = solver_params.arpackcg_res_eps_sq; /* parameters for arpack */ /*(IN) number of eigenvectors to be computed by arpack*/ const int nev = solver_params.arpackcg_nev; /*(IN) size of the subspace used by arpack with the condition (nev+1) =< ncv*/ const int ncv = solver_params.arpackcg_ncv; /*(IN) tolerance for computing eigenvalues with arpack */ double arpack_eig_tol = solver_params.arpackcg_eig_tol; /*(IN) maximum number of iterations to be used by arpack*/ int arpack_eig_maxiter = solver_params.arpackcg_eig_maxiter; /*(IN) 0 for eigenvalues with smallest real part "SR" 1 for eigenvalues with largest real part "LR" 2 for eigenvalues with smallest absolute value "SM" 3 for eigenvalues with largest absolute value "LM" 4 for eigenvalues with smallest imaginary part "SI" 5 for eigenvalues with largest imaginary part "LI"*/ int kind = solver_params.arpackcg_evals_kind; /*(IN) 0 don't compute the eiegnvalues and their residuals of the original system 1 compute the eigenvalues and the residuals for the original system (the orthonormal basis still be used in deflation and they are not overwritten).*/ int comp_evecs = solver_params.arpackcg_comp_evecs; /*(IN) 0 no polynomial acceleration; 1 use polynomial acceleration*/ int acc = solver_params.use_acc; /*(IN) degree of the Chebyshev polynomial (irrelevant if acc=0)*/ int cheb_k = solver_params.cheb_k; /*(IN) lower end of the interval where the acceleration will be used (irrelevant if acc=0)*/ double emin = solver_params.op_evmin; /*(IN) upper end of the interval where the acceleration will be used (irrelevant if acc=0)*/ double emax = solver_params.op_evmax; /*(IN) file name to be used for printing out debugging information from arpack*/ char *arpack_logfile = solver_params.arpack_logfile; /*(IN) read eigenvectors in Schur basis from file */ int arpack_read_ev = solver_params.arpackcg_read_ev; /*(IN) write eigenvectors in Schur basis to file */ int arpack_write_ev = solver_params.arpackcg_write_ev; /*(IN) file name to be used for reading and writing evecs from and to disc */ char *arpack_evecs_filename = solver_params.arpack_evecs_filename; /*(IN) precision used for writing eigenvectors */ int arpack_evecs_writeprec = solver_params.arpack_evecs_writeprec; /* how to project with approximate eigenvectors */ int projection_type = solver_params.projection_type; /* file format for evecs used by arpack */ char *arpack_evecs_fileformat = solver_params.arpack_evecs_fileformat; /*------------------------------------------------------------- if this is the first right hand side, allocate memory, call arpack, and compute resiudals of eigenvectors if needed -------------------------------------------------------------*/ if(ncurRHS==0){ #if (defined SSE || defined SSE2 || defined SSE3) _ax = malloc((LDN+ALIGN_BASE)*sizeof(spinor)); if(_ax==NULL) { if(g_proc_id == g_stdio_proc) fprintf(stderr,"[arpack_cg] insufficient memory for _ax inside arpack_cg.\n"); exit(1); } else {ax = (spinor *) ( ((unsigned long int)(_ax)+ALIGN_BASE)&~ALIGN_BASE);} _r = malloc((LDN+ALIGN_BASE)*sizeof(spinor)); if(_r==NULL) { if(g_proc_id == g_stdio_proc) fprintf(stderr,"[arpack_cg] insufficient memory for _r inside arpack_cg.\n"); exit(1); } else {r = (spinor *) ( ((unsigned long int)(_r)+ALIGN_BASE)&~ALIGN_BASE);} _tmps1 = malloc((LDN+ALIGN_BASE)*sizeof(spinor)); if(_tmps1==NULL) { if(g_proc_id == g_stdio_proc) fprintf(stderr,"[arpack_cg] insufficient memory for _tmps1 inside arpack_cg.\n"); exit(1); } else {tmps1 = (spinor *) ( ((unsigned long int)(_tmps1)+ALIGN_BASE)&~ALIGN_BASE);} _tmps2 = malloc((LDN+ALIGN_BASE)*sizeof(spinor)); if(_tmps2==NULL) { if(g_proc_id == g_stdio_proc) fprintf(stderr,"[arpack_cg] insufficient memory for _tmps2 inside arpack_cg.\n"); exit(1); } else {tmps2 = (spinor *) ( ((unsigned long int)(_tmps2)+ALIGN_BASE)&~ALIGN_BASE);} #else ax = (spinor *) malloc(LDN*sizeof(spinor)); r = (spinor *) malloc(LDN*sizeof(spinor)); tmps1 = (spinor *) malloc(LDN*sizeof(spinor)); tmps2 = (spinor *) malloc(LDN*sizeof(spinor)); if( (ax == NULL) || (r==NULL) || (tmps1==NULL) || (tmps2==NULL) ) { if(g_proc_id == g_stdio_proc) fprintf(stderr,"[arpack_cg] insufficient memory for ax,r,tmps1,tmps2 inside arpack_cg.\n"); exit(1); } #endif evecs = (_Complex double *) malloc(ncv*12*N*sizeof(_Complex double)); /* note: no extra buffer */ evals = (_Complex double *) malloc(ncv*sizeof(_Complex double)); tmpv1 = (_Complex double *) malloc(12*N*sizeof(_Complex double)); if((evecs == NULL) || (evals==NULL) || (tmpv1==NULL)) { if(g_proc_id == g_stdio_proc) fprintf(stderr,"[arpack_cg] insufficient memory for evecs and evals inside arpack_cg.\n"); exit(1); } if ( arpack_read_ev == 1) { if (strcmp(arpack_evecs_fileformat, "partfile") == 0) { /* set evec filenmae */ sprintf(evecs_filename, "%s.%.5d.pt%.2dpx%.2dpy%.2dpz%.2d", arpack_evecs_filename, nev, g_proc_coords[0], g_proc_coords[1], g_proc_coords[2], g_proc_coords[3]); evecs_fs = fopen(evecs_filename, "r"); if (evecs_fs == NULL) { fprintf(stderr, "[arpack_cg] (%.4d) Error, could not open file %s for reading\n", g_cart_id, evecs_filename); return(-2); } fprintf(stdout, "# [arpack_cg] reading eigenvectors from file %s\n", evecs_filename); if(arpack_evecs_writeprec == 64) { evecs_io_buffer = (void*)evecs; et1=gettime(); evecs_count = fread( evecs_io_buffer, sizeof(_Complex double), (size_t)nev*12*N, evecs_fs); et2=gettime(); } else { evecs_io_buffer = malloc(sizeof(_Complex double) * (size_t)nev*12*N ); if( evecs_io_buffer == NULL) { fprintf(stderr, "[arpack_cg] (%.4d) Error, could not allocate memory for evecs_io_buffer\n", g_cart_id); return(-42); } et1=gettime(); evecs_count = fread( evecs_io_buffer, sizeof(_Complex double)/2, (size_t)nev*12*N, evecs_fs); et2=gettime(); single2double(evecs, evecs_io_buffer, nev*24*N); free( evecs_io_buffer ); evecs_io_buffer = NULL; } if( evecs_count != ((size_t)nev*12*N) ) { fprintf(stderr, "[arpack_cg] (%.4d) Error, could not proper amount of data from file %s\n", g_cart_id, evecs_filename); return(-3); } fclose(evecs_fs); evecs_fs = NULL; if(g_proc_id == g_stdio_proc) { fprintf(stdout,"# [arpack_cg] ARPACK time for reading %d eigenvectors: %+e seconds\n", nev, et2-et1); } } else if(strcmp(arpack_evecs_fileformat, "single") == 0) { if(N==VOLUME) { for(i=0; i<nev; i++) { sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, i); evecs_ptr0 = (spinor*)&(evecs[i*12*N]); evecs_ptr1 = NULL; read_spinor(evecs_ptr0, evecs_ptr1, evecs_filename, 0); } /* end of loop on eigenvectors */ } else if(N==VOLUME/2) { for(i=0; i<nev/2; i++) { sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, 2*i); evecs_ptr0 = (spinor*)&(evecs[(2*i )*12*N]); evecs_ptr1 = (spinor*)&(evecs[(2*i+1)*12*N]); read_spinor(evecs_ptr0, evecs_ptr1, evecs_filename, 0); } /* end of loop on eigenvectors */ } } /* of if arpack_evecs_fileformat */ /* set info_arpack pro forma to SUCCESS */ nconv = nev; info_arpack = 0; } else { et1=gettime(); evals_arpack(N,nev,ncv,kind,howmny,acc,cheb_k,emin,emax,evals,evecs,arpack_eig_tol,arpack_eig_maxiter,f,&info_arpack,&nconv,arpack_logfile); et2=gettime(); if(info_arpack != 0){ /* arpack didn't converge */ if(g_proc_id == g_stdio_proc) fprintf(stderr,"[arpack_cg] WARNING: ARPACK didn't converge. exiting..\n"); return -1; } if(g_proc_id == g_stdio_proc) { fprintf(stdout,"# [arpack_cg] ARPACK has computed %d eigenvectors\n",nconv); fprintf(stdout,"# [arpack_cg] ARPACK time: %+e\n",et2-et1); } if ( arpack_write_ev == 1) { if(strcmp(arpack_evecs_fileformat, "partfile") == 0 ) { if( g_cart_id == 0 ) fprintf(stdout, "# [arpack_cg] writing evecs in partfile format\n"); /* set evec filenmae */ sprintf(evecs_filename, "%s.%.5d.pt%.2dpx%.2dpy%.2dpz%.2d", arpack_evecs_filename, nconv, g_proc_coords[0], g_proc_coords[1], g_proc_coords[2], g_proc_coords[3]); evecs_fs = fopen(evecs_filename, "w"); if (evecs_fs == NULL) { fprintf(stderr, "[arpack_cg] (%.4d) Error, could not open file %s for writing\n", g_cart_id, evecs_filename); return(-4); } if(arpack_evecs_writeprec == 64) { evecs_io_buffer = (void*)evecs; et1=gettime(); evecs_count = fwrite( evecs_io_buffer, sizeof(_Complex double), (size_t)nconv*12*N, evecs_fs); et2=gettime(); } else { evecs_io_buffer = malloc(sizeof(_Complex double) * (size_t)nconv*12*N ); if( evecs_io_buffer == NULL) { fprintf(stderr, "[arpack_cg] (%.4d) Error, could not allocate memory for evecs_io_buffer\n", g_cart_id); return(-41); } double2single(evecs_io_buffer, evecs, nconv*24*N); et1=gettime(); evecs_count = fwrite( evecs_io_buffer, sizeof(_Complex double)/2, (size_t)nconv*12*N, evecs_fs); et2=gettime(); free(evecs_io_buffer); evecs_io_buffer = NULL; } if( evecs_count != ((size_t)nconv*12*N) ) { fprintf(stderr, "[arpack_cg] (%.4d) Error, could not write proper amount of data to file %s\n", g_cart_id, evecs_filename); return(-5); } fclose(evecs_fs); evecs_fs = NULL; if(g_proc_id == g_stdio_proc) { fprintf(stdout,"[arpack_cg] (%.4d) ARPACK time for writing %d eigenvectors: %+e seconds\n", g_cart_id, nconv, et2-et1); } } else if (strcmp(arpack_evecs_fileformat, "single") == 0) { if(N==VOLUME) { for(i=0; i<nconv; i++) { sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, i); construct_writer(&evecs_writer, evecs_filename, 0); evecs_propagatorFormat = construct_paramsPropagatorFormat(arpack_evecs_writeprec, 1); write_propagator_format(evecs_writer, evecs_propagatorFormat); free(evecs_propagatorFormat); evecs_ptr0 = (spinor*)&(evecs[i*12*N]); evecs_ptr1 = NULL; write_spinor(evecs_writer, &evecs_ptr0, &evecs_ptr1, 1, arpack_evecs_writeprec); destruct_writer(evecs_writer); evecs_writer=NULL; } /* end of loop on converged eigenvectors */ } else if(N==VOLUME/2) { for(i=0; i<nconv/2; i++) { sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, 2*i); construct_writer(&evecs_writer, evecs_filename, 0); evecs_propagatorFormat = construct_paramsPropagatorFormat(arpack_evecs_writeprec, 1); write_propagator_format(evecs_writer, evecs_propagatorFormat); free(evecs_propagatorFormat); evecs_ptr0 = (spinor*)&(evecs[(2*i )*12*N]); evecs_ptr1 = (spinor*)&(evecs[(2*i+1)*12*N]); write_spinor(evecs_writer, &evecs_ptr0, &evecs_ptr1,1, arpack_evecs_writeprec); destruct_writer(evecs_writer); evecs_writer=NULL; } /* end of loop on converged eigenvectors */ } /* end of if N == VOLUME */ } /* of if arpack_evecs_fileformat */ } /* end of if arpack_write_ev == 1 */ } /* end of if arpack_read_ev == 1 */ H = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); Hinv = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); initwork = (_Complex double *) malloc(nconv*sizeof(_Complex double)); IPIV = (int *) malloc(nconv*sizeof(int)); zheev_lwork = 3*nconv; zheev_work = (_Complex double *) malloc(zheev_lwork*sizeof(_Complex double)); zheev_rwork = (double *) malloc(3*nconv*sizeof(double)); hevals = (double *) malloc(nconv*sizeof(double)); if((H==NULL) || (Hinv==NULL) || (initwork==NULL) || (IPIV==NULL) || (zheev_lwork==NULL) || (zheev_rwork==NULL) || (hevals==NULL)) { if(g_proc_id == g_stdio_proc) fprintf(stderr,"[arpack_cg] insufficient memory for H, Hinv, initwork, IPIV, zheev_lwork, zheev_rwork, hevals inside arpack_cg.\n"); exit(1); } et1=gettime(); /* compute the elements of the hermitian matrix H leading dimension is nconv and active dimension is nconv */ if( projection_type == 0) { for(i=0; i<nconv; i++) { assign_complex_to_spinor(r,&evecs[i*12*N],12*N); f(ax,r); c1 = scalar_prod(r,ax,N,parallel); H[i+nconv*i] = creal(c1); /* diagonal should be real */ for(j=i+1; j<nconv; j++) { assign_complex_to_spinor(r,&evecs[j*12*N],12*N); c1 = scalar_prod(r,ax,N,parallel); H[j+nconv*i] = c1; H[i+nconv*j] = conj(c1); /* enforce hermiticity */ } } } else if ( projection_type == 1 ) { for(i=0; i<nconv; i++) { assign_complex_to_spinor(tmps1, &evecs[i*12*N], 12*N); f_final(r, tmps1); f(ax,r); c1 = scalar_prod(r,ax,N,parallel); c2 = scalar_prod(r,r,N,parallel); H[i+nconv*i] = creal(c1) / creal(c2); /* diagonal should be real */ for(j=i+1; j<nconv; j++) { assign_complex_to_spinor(tmps1, &evecs[j*12*N], 12*N); f_final(r, tmps1); c1 = scalar_prod(r,ax,N,parallel); c3 = scalar_prod(r, r, N, parallel); H[j+nconv*i] = c1 / sqrt( creal(c2) * creal(c3) ); H[i+nconv*j] = conj(c1) / sqrt( creal(c2) * creal(c3) ); /* enforce hermiticity */ } } } et2=gettime(); if(g_proc_id == g_stdio_proc) { fprintf(stdout,"[arpack_cg] time to compute H: %+e\n",et2-et1); } /* if(g_cart_id == 0) { for(i=0; i<nconv; i++) { for(j=0; j<nconv; j++) { fprintf(stdout, "# [arpack_cg] H[%d, %d] = %25.16e %25.16e\n", i, j, creal(H[i*nconv+j]), cimag(H[i*nconv+j])); }} } */ et1=gettime(); /* compute Ritz values and Ritz vectors if needed */ if( (nconv>0) && (comp_evecs !=0)) { HU = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); if( HU==NULL ) { if(g_proc_id == g_stdio_proc) fprintf(stderr,"[arpack_cg] insufficient memory for HU inside arpack_cg\n"); exit(2); } /* copy H into HU */ tmpsize=nconv*nconv; _FT(zcopy)(&tmpsize,H,&ONE,HU,&ONE); /* compute eigenvalues and eigenvectors of HU*/ /* SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK,INFO ) */ _FT(zheev)(&cV,&cU,&nconv,HU,&nconv,hevals,zheev_work,&zheev_lwork,zheev_rwork,&zheev_info,1,1); if(zheev_info != 0) { if(g_proc_id == g_stdio_proc) { fprintf(stderr,"[arpack_cg] Error in ZHEEV:, info = %d\n",zheev_info); fflush(stderr); } exit(1); } /* If you want to replace the schur (orthonormal) basis by eigen basis use something like this. It is better to use the schur basis because they are better conditioned. Use this part only to get the eigenvalues and their resduals for the operator (D^\daggerD) esize=(ncv-nconv)*12*N; Zrestart_X(evecs,12*N,HU,12*N,nconv,nconv,&evecs[nconv*N],esize); */ /* compute residuals and print out results */ if(g_proc_id == g_stdio_proc) {fprintf(stdout,"# [arpack_cg] Ritz values of A and their residulas (||A*x-lambda*x||/||x||\n"); fprintf(stdout,"# [arpack_cg] =============================================================\n"); fflush(stdout);} for(i=0; i<nconv; i++) { tmpsize=12*N; _FT(zgemv)(&cN,&tmpsize,&nconv,&tpone,evecs,&tmpsize, &HU[i*nconv],&ONE,&tzero,tmpv1,&ONE,1); assign_complex_to_spinor(r,tmpv1,12*N); d1=square_norm(r,N,parallel); f(ax,r); mul_r(tmps1,hevals[i],r,N); diff(tmps2,ax,tmps1,N); d2= square_norm(tmps2,N,parallel); d3= sqrt(d2/d1); if(g_proc_id == g_stdio_proc) {fprintf(stdout,"Eval[%06d]: %22.15E rnorm: %22.15E\n", i, hevals[i], d3); fflush(stdout);} } free( HU ); HU = NULL; } /* if( (nconv_arpack>0) && (comp_evecs !=0)) */ et2=gettime(); if(g_proc_id == g_stdio_proc) { fprintf(stdout,"[arpack_cg] time to compute eigenvectors: %+e\n",et2-et1); } } /* if(ncurRHS==0) */ double eps_sq_used,restart_eps_sq_used; /* tolerance squared for the linear system */ double cur_res; /* current residual squared */ /*increment the RHS counter*/ ncurRHS = ncurRHS +1; /* set the tolerance to be used for this right-hand side */ if(ncurRHS > nrhs1){ eps_sq_used = eps_sq; } else{ eps_sq_used = eps_sq1; } if(g_proc_id == g_stdio_proc && g_debug_level > 0) { fprintf(stdout, "# [arpack_cg] System %d, eps_sq %e, projection type %d\n",ncurRHS,eps_sq_used, projection_type); fflush(stdout); } /*---------------------------------------------------------------*/ /* Call init-CG until this right-hand side converges */ /*---------------------------------------------------------------*/ double wt1,wt2,wE,wI; double normsq,tol_sq; int flag,maxit_remain,numIts,its; int info_lapack; wE = 0.0; wI = 0.0; /* Start accumulator timers */ flag = -1; /* System has not converged yet */ maxit_remain = maxit; /* Initialize Max and current # of iters */ numIts = 0; restart_eps_sq_used=res_eps_sq; while( flag == -1 ) { if(nconv > 0) { /* --------------------------------------------------------- */ /* Perform init-CG with evecs vectors */ /* xinit = xinit + evecs*Hinv*evec'*(b-Ax0) */ /* --------------------------------------------------------- */ wt1 = gettime(); /*r0=b-Ax0*/ f(ax,x); /*ax = A*x */ diff(r,b,ax,N); /* r=b-A*x */ if( projection_type == 0) { /* x = x + evecs*inv(H)*evecs'*r */ for(int i=0; i < nconv; i++) { assign_complex_to_spinor(tmps1,&evecs[i*12*N],12*N); initwork[i]= scalar_prod(tmps1,r,N,parallel); } /* solve the linear system H y = c */ tmpsize=nconv*nconv; _FT(zcopy) (&tmpsize,H,&ONE,Hinv,&ONE); /* copy H into Hinv */ /* SUBROUTINE ZGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */ _FT(zgesv) (&nconv,&ONE,Hinv,&nconv,IPIV,initwork,&nconv,&info_lapack); if(info_lapack != 0) { if(g_proc_id == g_stdio_proc) { fprintf(stderr, "[arpack_cg] Error in ZGESV:, info = %d\n",info_lapack); fflush(stderr); } exit(1); } /* x = x + evecs*inv(H)*evecs'*r */ for(i=0; i<nconv; i++) { assign_complex_to_spinor(tmps1,&evecs[i*12*N],12*N); assign_add_mul(x,tmps1,initwork[i],N); } } else if ( projection_type == 1 ) { /* x = x + evecs*inv(H)*evecs'*r */ /* tmps2 = Q^+ r */ f_initial(tmps2, r); for(int i=0; i < nconv; i++) { /* tmps1 = v_i */ assign_complex_to_spinor(tmps1,&evecs[i*12*N],12*N); /* initwork_i = v_i^+ Q^+ r / lambda_i^2 */ initwork[i]= scalar_prod(tmps1, tmps2, N, parallel) / ( H[i*nconv+i] * H[i*nconv+i] ); } memset(tmps2, 0, N*sizeof(spinor) ); for(i=0; i<nconv; i++) { assign_complex_to_spinor(tmps1, &evecs[i*12*N], 12*N); assign_add_mul(tmps2, tmps1, initwork[i], N); } /* apply final operator */ f_final(tmps1, tmps2); assign_add_mul(x, tmps1, 1., N); } /* end of if projection type */ /* compute elapsed time and add to accumulator */ wt2 = gettime(); wI = wI + wt2-wt1; }/* if(nconv > 0) */ /* which tolerance to use */ if(eps_sq_used > restart_eps_sq_used) { tol_sq = eps_sq_used; flag = 1; /* shouldn't restart again */ } else { tol_sq = restart_eps_sq_used; } wt1 = gettime(); its = cg_her(x,b,maxit_remain,tol_sq,rel_prec,N,f); wt2 = gettime(); wE = wE + wt2-wt1; /* check convergence */ if(its == -1) { /* cg didn't converge */ if(g_proc_id == g_stdio_proc) { fprintf(stderr, "[arpack_cg] CG didn't converge within the maximum number of iterations in arpack_cg. Exiting...\n"); fflush(stderr); exit(1); } } else { numIts += its; maxit_remain = maxit - numIts; /* remaining number of iterations */ restart_eps_sq_used = restart_eps_sq_used*res_eps_sq; /* prepare for the next restart */ } } /* end while (flag ==-1) */ /* ---------- */ /* Reporting */ /* ---------- */ /* compute the exact residual */ f(ax,x); /* ax= A*x */ diff(r,b,ax,N); /* r=b-A*x */ normsq=square_norm(r,N,parallel); if(g_debug_level > 0 && g_proc_id == g_stdio_proc) { fprintf(stdout, "# [arpack_cg] For this rhs:\n"); fprintf(stdout, "# [arpack_cg] Total initCG Wallclock : %+e\n", wI); fprintf(stdout, "# [arpack_cg] Total cg Wallclock : %+e\n", wE); fprintf(stdout, "# [arpack_cg] Iterations: %-d\n", numIts); fprintf(stdout, "# [arpack_cg] Actual Resid of LinSys : %+e\n",normsq); } /* free memory if this was your last system to solve */ if(ncurRHS == nrhs){ #if ( (defined SSE) || (defined SSE2) || (defined SSE3)) free(_ax); free(_r); free(_tmps1); free(_tmps2); #else free(ax); free(r); free(tmps1); free(tmps2); #endif free(evecs); free(evals); free(H); free(Hinv); free(initwork); free(tmpv1); free(zheev_work); free(hevals); free(zheev_rwork); free(IPIV); } return numIts; }
double eigenvalues(int * nr_of_eigenvalues, const int max_iterations, const double precision, const int maxmin, const int readwrite, const int nstore, const int even_odd_flag) { double returnvalue; complex norm2; #ifdef HAVE_LAPACK static spinor * eigenvectors_ = NULL; static int allocated = 0; char filename[200]; FILE * ofs; #ifdef MPI double atime, etime; #endif /********************** * For Jacobi-Davidson **********************/ int verbosity = g_debug_level, converged = 0, blocksize = 1, blockwise = 0; int solver_it_max = 50, j_max, j_min, ii, jj; /*int it_max = 10000;*/ /* complex *eigv_ = NULL, *eigv; */ double decay_min = 1.7, decay_max = 1.5, prec, threshold_min = 1.e-3, threshold_max = 5.e-2; /* static int v0dim = 0; */ int v0dim = 0; matrix_mult f; int N = (VOLUME)/2, N2 = (VOLUMEPLUSRAND)/2; spinor * max_eigenvector_ = NULL, * max_eigenvector; /********************** * General variables **********************/ int returncode=0; int returncode2=0; char eigenvector_prefix[512]; char eigenvalue_prefix[512]; no_eigenvalues = *nr_of_eigenvalues; sprintf(eigenvector_prefix,"eigenvector.%%s.%%.2d.%%.4d"); sprintf(eigenvalue_prefix,"eigenvalues.%%s.%%.4d"); if(!even_odd_flag) { N = (VOLUME); N2 = (VOLUMEPLUSRAND); f = &Q_pm_psi; } else { f = &Qtm_pm_psi; } evlength = N2; if(g_proc_id == g_stdio_proc && g_debug_level >0) { printf("Number of %s eigenvalues to compute = %d\n", maxmin ? "maximal" : "minimal",(*nr_of_eigenvalues)); printf("Using Jacobi-Davidson method! \n"); } if((*nr_of_eigenvalues) < 8){ j_max = 15; j_min = 8; } else{ j_max = 2*(*nr_of_eigenvalues); j_min = (*nr_of_eigenvalues); } if(precision < 1.e-14){ prec = 1.e-14; } else{ prec = precision; } #if (defined SSE || defined SSE2 || defined SSE3) max_eigenvector_ = calloc(N2+1, sizeof(spinor)); max_eigenvector = (spinor *)(((unsigned long int)(max_eigenvector_)+ALIGN_BASE)&~ALIGN_BASE); #else max_eigenvector_= calloc(N2, sizeof(spinor)); max_eigenvector = max_eigenvector_; #endif if(allocated == 0) { allocated = 1; #if (defined SSE || defined SSE2 || defined SSE3) eigenvectors_ = calloc(N2*(*nr_of_eigenvalues)+1, sizeof(spinor)); eigenvectors = (spinor *)(((unsigned long int)(eigenvectors_)+ALIGN_BASE)&~ALIGN_BASE); #else eigenvectors_= calloc(N2*(*nr_of_eigenvalues), sizeof(spinor)); eigenvectors = eigenvectors_; #endif eigenvls = (double*)malloc((*nr_of_eigenvalues)*sizeof(double)); inv_eigenvls = (double*)malloc((*nr_of_eigenvalues)*sizeof(double)); } solver_it_max = 50; /* compute the maximal one first */ jdher(N*sizeof(spinor)/sizeof(complex), N2*sizeof(spinor)/sizeof(complex), 50., 1.e-12, 1, 15, 8, max_iterations, 1, 0, 0, NULL, CG, solver_it_max, threshold_max, decay_max, verbosity, &converged, (complex*) max_eigenvector, (double*) &max_eigenvalue, &returncode2, JD_MAXIMAL, 1, f); if(readwrite) { if(even_odd_flag){ for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) { sprintf(filename, eigenvector_prefix , maxmin ? "max" : "min", v0dim, nstore); if((read_eospinor(&eigenvectors[v0dim*N2], filename)) != 0) { break; } } } else { FILE *testfile; spinor *s; double sqnorm; for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) { sprintf(filename, eigenvector_prefix, maxmin ? "max" : "min", v0dim, nstore); printf("reading eigenvectors ... "); testfile=fopen(filename,"r"); if( testfile != NULL){ fclose(testfile); s=(spinor*)&eigenvectors[v0dim*N2]; read_spinor(s,NULL, filename,0); sqnorm=square_norm(s,VOLUME,1); printf(" has | |^2 = %e \n",sqnorm); } else { printf(" no more eigenvectors \n"); break; } } } } if(readwrite != 2) { #ifdef MPI atime = MPI_Wtime(); #endif /* (re-) compute minimal eigenvalues */ converged = 0; solver_it_max = 200; if(maxmin) jdher(N*sizeof(spinor)/sizeof(complex), N2*sizeof(spinor)/sizeof(complex), 50., prec, (*nr_of_eigenvalues), j_max, j_min, max_iterations, blocksize, blockwise, v0dim, (complex*) eigenvectors, CG, solver_it_max, threshold_max, decay_max, verbosity, &converged, (complex*) eigenvectors, eigenvls, &returncode, JD_MAXIMAL, 1, f); else jdher(N*sizeof(spinor)/sizeof(complex), N2*sizeof(spinor)/sizeof(complex), 0., prec, (*nr_of_eigenvalues), j_max, j_min, max_iterations, blocksize, blockwise, v0dim, (complex*) eigenvectors, CG, solver_it_max, threshold_min, decay_min, verbosity, &converged, (complex*) eigenvectors, eigenvls, &returncode, JD_MINIMAL, 1, f); #ifdef MPI etime = MPI_Wtime(); if(g_proc_id == 0) { printf("Eigenvalues computed in %e sec. (MPI_Wtime)\n", etime-atime); } #endif } else { sprintf(filename, eigenvalue_prefix, maxmin ? "max" : "min", nstore); if((ofs = fopen(filename, "r")) != (FILE*) NULL) { for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) { fscanf(ofs, "%d %lf\n", &v0dim, &eigenvls[v0dim]); if(feof(ofs)) break; converged = v0dim; } } fclose(ofs); } (*nr_of_eigenvalues) = converged; no_eigenvalues = converged; ev_minev = eigenvls[(*nr_of_eigenvalues)-1]; eigenvalues_for_cg_computed = converged; for (ii = 0; ii < (*nr_of_eigenvalues); ii++){ for (jj = 0; jj <= ii; jj++){ norm2 = scalar_prod(&(eigenvectors[ii*N2]),&(eigenvectors[jj*N2]), VOLUME, 1); if(ii==jj){ if((fabs(1.-norm2.re)>1e-12) || (fabs(norm2.im)>1e-12) || 1) { if(g_proc_id == g_stdio_proc){ printf("< %d | %d> =\t %e +i * %e \n", ii+1, jj+1, norm2.re, norm2.im); fflush(stdout); } } } else{ if((fabs(norm2.re)>1e-12) || (fabs(norm2.im)>1e-12) || 1) { if(g_proc_id == g_stdio_proc){ printf("< %d | %d> =\t %e +i * %e \n", ii+1, jj+1, norm2.re, norm2.im); fflush(stdout); } } } } } if(readwrite == 1 ) { if(even_odd_flag) for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) { sprintf(filename, eigenvector_prefix, maxmin ? "max" : "min", v0dim, nstore); if((write_eospinor(&eigenvectors[v0dim*N2], filename, eigenvls[v0dim], prec, nstore)) != 0) { break; } } else{ WRITER *writer=NULL; spinor *s; double sqnorm; paramsPropagatorFormat *propagatorFormat = NULL; for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) { sprintf(filename, eigenvector_prefix, maxmin ? "max" : "min", v0dim, nstore); construct_writer(&writer, filename, 0); /* todo write propagator format */ propagatorFormat = construct_paramsPropagatorFormat(64, 1); write_propagator_format(writer, propagatorFormat); free(propagatorFormat); s=(spinor*)&eigenvectors[v0dim*N2]; write_spinor(writer, &s,NULL, 1, 64); destruct_writer(writer); writer=NULL; sqnorm=square_norm(s,VOLUME,1); printf(" wrote eigenvector | |^2 = %e \n",sqnorm); } } } if(g_proc_id == 0 && readwrite != 2) { sprintf(filename, eigenvalue_prefix , maxmin ? "max" : "min", nstore); ofs = fopen(filename, "w"); for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) { fprintf(ofs, "%d %e\n", v0dim, eigenvls[v0dim]); } fclose(ofs); } for(v0dim = 0; v0dim < converged; v0dim++) { inv_eigenvls[v0dim] = 1./eigenvls[v0dim]; } ev_qnorm=1.0/(sqrt(max_eigenvalue)+0.1); ev_minev*=ev_qnorm*ev_qnorm; /* ov_n_cheby is initialized in Dov_psi.c */ returnvalue=eigenvls[0]; free(max_eigenvector_); #else fprintf(stderr, "lapack not available, so JD method for EV computation not available \n"); #endif return(returnvalue); }
void prepare_source(const int nstore, const int isample, const int ix, const int op_id, const int read_source_flag, const int source_location) { FILE * ifs = NULL; int is = ix / 3, ic = ix %3, err = 0, rstat=0, t = 0; operator * optr = &operator_list[op_id]; char source_filename[100]; int source_type = SourceInfo.type; static int nstore_ = -1; static int isample_ = -1; static int ix_ = -1; static int op_id_ = -1; SourceInfo.nstore = nstore; SourceInfo.sample = isample; SourceInfo.ix = ix; if(optr->type != DBTMWILSON && optr->type != DBCLOVER && optr->type != BSM && optr->type != BSM2b && optr->type != BSM2m ) { SourceInfo.no_flavours = 1; /* no volume sources */ if(source_type != 1) { /* either "Don't read inversion source from file" or */ /* "Don't read inversion source from file, but save the one generated" */ if (read_source_flag == 0 || read_source_flag == 2) { if (source_location == 0) { source_spinor_field(g_spinor_field[0], g_spinor_field[1], is, ic); } else { source_spinor_field_point_from_file(g_spinor_field[0], g_spinor_field[1], is, ic, source_location); } } /* "Read inversion source from file" */ else { if (SourceInfo.splitted) { /* timeslice needs to be put into filename */ if(SourceInfo.automaticTS) { /* automatic timeslice detection */ if(g_proc_id == 0) { for(t = 0; t < g_nproc_t*T; t++) { if(T_global > 99) sprintf(source_filename, "%s.%.4d.%.3d.%.2d", SourceInfo.basename, nstore, t, ix); else sprintf(source_filename, "%s.%.4d.%.2d.%.2d", SourceInfo.basename, nstore, t, ix); if( (ifs = fopen(source_filename, "r")) != NULL) { fclose(ifs); break; } } } #ifdef MPI MPI_Bcast(&t, 1, MPI_INT, 0, MPI_COMM_WORLD); #endif SourceInfo.t = t; } if(T_global > 99) sprintf(source_filename, "%s.%.4d.%.3d.%.2d", SourceInfo.basename, nstore, SourceInfo.t, ix); else sprintf(source_filename, "%s.%.4d.%.2d.%.2d", SourceInfo.basename, nstore, SourceInfo.t, ix); if (g_cart_id == 0) { printf("# Trying to read source from %s\n", source_filename); } rstat = read_spinor(g_spinor_field[0], g_spinor_field[1], source_filename, 0); } else { sprintf(source_filename, "%s", SourceInfo.basename); if (g_cart_id == 0) { printf("# Trying to read source no %d from %s\n", ix, source_filename); } rstat = read_spinor(g_spinor_field[0], g_spinor_field[1], source_filename, ix); } if(rstat) { fprintf(stderr, "Error reading file %s in prepare_source.c\nUnable to proceed, aborting....\n", source_filename); exit(-1); } } if (PropInfo.splitted) { if(T_global > 99) sprintf(source_filename, "%s.%.4d.%.3d.%.2d.inverted", PropInfo.basename, nstore, SourceInfo.t, ix); else sprintf(source_filename, "%s.%.4d.%.2d.%.2d.inverted", PropInfo.basename, nstore, SourceInfo.t, ix); } else { if(T_global > 99) sprintf(source_filename, "%s.%.4d.%.3d.inverted", PropInfo.basename, nstore, SourceInfo.t); else sprintf(source_filename, "%s.%.4d.%.2d.inverted", PropInfo.basename, nstore, SourceInfo.t); } } else if(source_type == 1) { /* Volume sources */ if(read_source_flag == 0 || read_source_flag == 2) { if(g_proc_id == 0 && g_debug_level > 0) { printf("# Preparing 1 flavour volume source\n"); } gaussian_volume_source(g_spinor_field[0], g_spinor_field[1], isample, nstore, 0); } else { sprintf(source_filename, "%s.%.4d.%.5d", SourceInfo.basename, nstore, isample); if (g_cart_id == 0) { printf("# Trying to read source from %s\n", source_filename); } rstat = read_spinor(g_spinor_field[0], g_spinor_field[1], source_filename, 0); if(rstat) { fprintf(stderr, "Error reading file %s in prepare_source.c.\nUnable to proceed, aborting....\n", source_filename); exit(-1); } } sprintf(source_filename, "%s.%.4d.%.5d.inverted", PropInfo.basename, nstore, isample); } optr->sr0 = g_spinor_field[0]; optr->sr1 = g_spinor_field[1]; optr->prop0 = g_spinor_field[2]; optr->prop1 = g_spinor_field[3]; /* If the solver is _not_ CG we might read in */ /* here some better guess */ /* This also works for re-iteration */ if (optr->solver != CG && optr->solver != PCG && optr->solver != MIXEDCG && optr->solver != RGMIXEDCG) { ifs = fopen(source_filename, "r"); if (ifs != NULL) { if (g_cart_id == 0) { printf("# Trying to read guess from file %s\n", source_filename); fflush(stdout); } fclose(ifs); err = 0; /* iter = get_propagator_type(source_filename); */ rstat = read_spinor(optr->prop0, optr->prop1, source_filename, (PropInfo.splitted ? 0 : ix)); if(rstat) { fprintf(stderr, "Error reading file %s in prepare_source.c, rstat = %d\n", source_filename, rstat); exit(-1); } if (g_kappa != 0.) { mul_r(optr->prop1, 1. / (2*optr->kappa), optr->prop1, VOLUME / 2); mul_r(optr->prop0, 1. / (2*optr->kappa), optr->prop0, VOLUME / 2); } if (err != 0) { zero_spinor_field(optr->prop0, VOLUME / 2); zero_spinor_field(optr->prop1, VOLUME / 2); } } else { zero_spinor_field(optr->prop0, VOLUME / 2); zero_spinor_field(optr->prop1, VOLUME / 2); } } else { zero_spinor_field(optr->prop0, VOLUME / 2); zero_spinor_field(optr->prop1, VOLUME / 2); } /* if(optr->even_odd_flag) { */ /* assign(optr->sr0, g_spinor_field[0], VOLUME/2); */ /* assign(optr->sr1, g_spinor_field[1], VOLUME/2); */ /* } */ /* else { */ /* convert_eo_to_lexic(optr->sr0, g_spinor_field[0], g_spinor_field[1]); */ /* } */ } else { /* for the ND 2 flavour twisted operator and BSM(2) */ SourceInfo.no_flavours = 2; zero_spinor_field(g_spinor_field[0], VOLUME/2); zero_spinor_field(g_spinor_field[1], VOLUME/2); if(source_type != 1) { if(read_source_flag == 0 || read_source_flag == 2) { if(source_location == 0) { source_spinor_field(g_spinor_field[2], g_spinor_field[3], is, ic); } else { source_spinor_field_point_from_file(g_spinor_field[2], g_spinor_field[3], is, ic, source_location); } } else { if(SourceInfo.splitted) { if(T_global > 99) sprintf(source_filename, "%s.%.4d.%.3d.%.2d", SourceInfo.basename, nstore, SourceInfo.t, ix); else sprintf(source_filename, "%s.%.4d.%.2d.%.2d", SourceInfo.basename, nstore, SourceInfo.t, ix); } else { sprintf(source_filename,"%s", SourceInfo.basename); } if(g_proc_id == 0) { printf("# Trying to read source from %s\n", source_filename); } if(read_spinor(g_spinor_field[2], g_spinor_field[3], source_filename, 0) != 0) { fprintf(stderr, "Error reading source! Aborting...\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(-1); } } } else if(source_type == 1) { /* Volume sources */ if(g_proc_id == 0 && g_debug_level > 0) { printf("# Preparing 2 flavour volume source\n"); } gaussian_volume_source(g_spinor_field[0], g_spinor_field[1], isample, nstore, 1); gaussian_volume_source(g_spinor_field[2], g_spinor_field[3], isample, nstore, 2); } if( optr->type != BSM && optr->type != BSM2b && optr->type != BSM2m ) { mul_one_pm_itau2(g_spinor_field[4], g_spinor_field[6], g_spinor_field[0], g_spinor_field[2], +1., VOLUME/2); mul_one_pm_itau2(g_spinor_field[5], g_spinor_field[7], g_spinor_field[1], g_spinor_field[3], +1., VOLUME/2); assign(g_spinor_field[0], g_spinor_field[4], VOLUME/2); assign(g_spinor_field[1], g_spinor_field[5], VOLUME/2); assign(g_spinor_field[2], g_spinor_field[6], VOLUME/2); assign(g_spinor_field[3], g_spinor_field[7], VOLUME/2); } optr->sr0 = g_spinor_field[0]; optr->sr1 = g_spinor_field[1]; optr->sr2 = g_spinor_field[2]; optr->sr3 = g_spinor_field[3]; optr->prop0 = g_spinor_field[4]; optr->prop1 = g_spinor_field[5]; optr->prop2 = g_spinor_field[6]; optr->prop3 = g_spinor_field[7]; } nstore_ = nstore; isample_ = isample; ix_ = ix; op_id_ = op_id; return; }