int tmLQCD_invert(double * const propagator, double * const source, const int op_id, const int write_prop) { unsigned int index_start = 0; g_mu = 0.; if(!tmLQCD_invert_initialised) { fprintf(stderr, "tmLQCD_invert: tmLQCD_inver_init must be called first. Aborting...\n"); return(-1); } if(op_id < 0 || op_id >= no_operators) { fprintf(stderr, "tmLQCD_invert: op_id=%d not in valid range. Aborting...\n", op_id); return(-1); } operator_list[op_id].sr0 = g_spinor_field[0]; operator_list[op_id].sr1 = g_spinor_field[1]; operator_list[op_id].prop0 = g_spinor_field[2]; operator_list[op_id].prop1 = g_spinor_field[3]; zero_spinor_field(operator_list[op_id].prop0, VOLUME / 2); zero_spinor_field(operator_list[op_id].prop1, VOLUME / 2); // convert to even/odd order convert_lexic_to_eo(operator_list[op_id].sr0, operator_list[op_id].sr1, (spinor*) source); // invert operator_list[op_id].inverter(op_id, index_start, write_prop); // convert back to lexicographic order convert_eo_to_lexic((spinor*) propagator, operator_list[op_id].prop0, operator_list[op_id].prop1); return(0); }
void source_spinor_field(spinor * const P, spinor * const Q, int is, int ic) { spinor * s; zero_spinor_field(P,VOLUME/2); zero_spinor_field(Q,VOLUME/2); if (g_proc_coords[0] == 0 && g_proc_coords[1] == 0 && g_proc_coords[2] == 0 && g_proc_coords[3] == 0) { s = P; /* put source to 1.0 */ if (is==0){ if (ic==0) (*s).s0.c0.re=1.0; else if (ic==1) (*s).s0.c1.re=1.0; else if (ic==2) (*s).s0.c2.re=1.0; } else if (is==1){ if (ic==0) (*s).s1.c0.re=1.0; else if (ic==1) (*s).s1.c1.re=1.0; else if (ic==2) (*s).s1.c2.re=1.0; } else if (is==2){ if (ic==0) (*s).s2.c0.re=1.0; else if (ic==1) (*s).s2.c1.re=1.0; else if (ic==2) (*s).s2.c2.re=1.0; } else if (is==3){ if (ic==0) (*s).s3.c0.re=1.0; else if (ic==1) (*s).s3.c1.re=1.0; else if (ic==2) (*s).s3.c2.re=1.0; } } }
double reweighting_factor_nd(const int N, const int repro) { int i, n_iter; double sq_norm, corr, sum=0., sq_sum = 0., temp1; double mu1, mu2; _Complex double temp2; mu1 = g_mu1; mu2 = g_mu1; /* Use spinor_field 2,3,5 */ /* in order not to conflict with anything else... */ for(i = 0; i < N; ++i) { random_spinor_field_eo(g_chi_up_spinor_field[2], repro, RN_GAUSS); random_spinor_field_eo(g_chi_dn_spinor_field[2], repro, RN_GAUSS); zero_spinor_field(g_chi_up_spinor_field[3], VOLUME/2); zero_spinor_field(g_chi_dn_spinor_field[3], VOLUME/2); temp1 = phmc_ptilde_cheby_coef[0]; phmc_ptilde_cheby_coef[0] = temp1 - 1; Ptilde_ndpsi(g_chi_up_spinor_field[3], g_chi_dn_spinor_field[3], phmc_ptilde_cheby_coef, phmc_ptilde_n_cheby, g_chi_up_spinor_field[2], g_chi_dn_spinor_field[2], &Qtm_pm_ndpsi); phmc_ptilde_cheby_coef[0] = temp1; temp2 = scalar_prod(g_chi_up_spinor_field[2], g_chi_up_spinor_field[3], VOLUME / 2, 1); if(cimag(temp2) > 1.0e-8) { printf("!!! WARNING Immaginary part of CORR-UP LARGER than 10^-8 !!! \n"); printf(" CORR-UP: Re=%12.10e Im=%12.10e \n", creal(temp2), cimag(temp2)); } corr = temp2; printf(" CORR-UP: Re=%12.10e \n", corr); temp2 = scalar_prod(g_chi_dn_spinor_field[2], g_chi_dn_spinor_field[3], VOLUME / 2, 1); if(cimag(temp2) > 1.0e-8) { printf("!!! WARNING Immaginary part of CORR_DN LARGER than 10^-8 !!! \n"); printf(" CORR-DN: Re=%12.10e Im=%12.10e \n", creal(temp2), cimag(temp2)); } corr += temp2; printf(" CORR-DN: Re=%12.10e \n", cimag(temp2)); temp1 = -corr; sum += temp1; sq_sum += temp1 * temp1; printf("rew: n_iter = %d, sq_norm = %e, corr = %e\n", n_iter, sq_norm, corr); } sum /= N; sq_sum /= N; printf("rew: factor = %e, err = %e\n", sum, sqrt(sum * sum - sq_sum) / (N - 1)); return(sum); }
void extended_pion_source(spinor * const P, spinor * const Q, spinor * const R, spinor * const S, const int t0, const double px, const double py, const double pz) { int lt, lx, ly, lz, i, x, y, z, id=0, t; int coords[4]; spinor * p, * q, r; complex efac; zero_spinor_field(P,VOLUME/2); zero_spinor_field(Q,VOLUME/2); t=((g_nproc_t*T)/2+t0)%(g_nproc_t*T); lt = t - g_proc_coords[0]*T; coords[0] = t / T; for(x = 0; x < LX*g_nproc_x; x++) { lx = x - g_proc_coords[1]*LX; coords[1] = x / LX; for(y = 0; y < LY*g_nproc_y; y++) { ly = y - g_proc_coords[2]*LY; coords[2] = y / LY; for(z = 0; z < LZ*g_nproc_z; z++) { lz = z - g_proc_coords[3]*LZ; coords[3] = z / LZ; #ifdef MPI MPI_Cart_rank(g_cart_grid, coords, &id); #endif if(g_cart_id == id) { efac.re= cos(px*x + py*y + pz*z); efac.im=-sin(px*x + py*y + pz*z); i = g_lexic2eosub[ g_ipt[lt][lx][ly][lz] ]; if((lt+lx+ly+lz+g_proc_coords[3]*LZ+g_proc_coords[2]*LY + g_proc_coords[0]*T+g_proc_coords[1]*LX)%2 == 0) { p = (P + i); q = (R + i); } else { p = (Q + i); q = (S + i); } _gamma5(r, (*q)); _spinor_mul_complex((*p),efac,r); } } } } return; }
int mr(spinor * const P, spinor * const Q, const int max_iter, const double eps_sq, const int rel_prec, const int N, const int parallel, matrix_mult f){ int i=0; double norm_r,beta; _Complex double alpha; spinor * r; spinor ** solver_field = NULL; const int nr_sf = 3; if(N == VOLUME) { init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); } else { init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); } r = solver_field[0]; zero_spinor_field(P, N); f(solver_field[2], P); diff(r, Q, solver_field[2], N); norm_r=square_norm(solver_field[0], N, parallel); if(g_proc_id == g_stdio_proc && g_debug_level > 2) { printf("MR iteration number: %d, |res|^2 = %e\n", i, norm_r); fflush( stdout ); } while((norm_r > eps_sq) && (i < max_iter)){ i++; f(solver_field[1], r); alpha=scalar_prod(solver_field[1], r, N, parallel); beta=square_norm(solver_field[1], N, parallel); alpha /= beta; assign_add_mul(P, r, alpha, N); if(i%50 == 0){ f(solver_field[2], P); } else{ assign_add_mul(solver_field[2], solver_field[1], alpha, N); } diff(r, Q, solver_field[2], N); norm_r=square_norm(solver_field[0], N, parallel); if(g_proc_id == g_stdio_proc && g_debug_level > 2) { printf("# MR iteration= %d |res|^2= %g\n", i, norm_r); fflush(stdout); } } finalize_solver(solver_field, nr_sf); if(norm_r > eps_sq){ return(-1); } return(i); }
int bicg(spinor * const k, spinor * const l, double eps_sq) { int iteration; double xxx; xxx=0.0; gamma5(g_spinor_field[DUM_SOLVER+1], l, VOLUME/2); /* main loop */ for(iteration=1;iteration<=ITER_MAX_BCG;iteration++) { /* compute the residual*/ M_psi(DUM_SOLVER,k,q_off); xxx=diff_and_square_norm(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+1], VOLUME/2); /*apply the solver step for the residual*/ M_psi(DUM_SOLVER+2,DUM_SOLVER,q_off-(2.+2.*q_off)); assign_add_mul_r(k,-1./((1.+q_off)*(1.+q_off)),g_spinor_field[DUM_SOLVER+2], VOLUME/2); if(xxx <= eps_sq) break; } if(g_proc_id==0) { sout = fopen(solvout, "a"); fprintf(sout, "%d %e %f\n",iteration,xxx, g_mu); fclose(sout); } /* if the geometric series fails, redo with conjugate gradient */ if(iteration>=ITER_MAX_BCG) { if(ITER_MAX_BCG == 0) { iteration = 0; } zero_spinor_field(k,VOLUME/2); iteration += solve_cg(k,l,q_off,eps_sq); Q_psi(k,k,q_off); if(ITER_MAX_BCG != 0) { iteration -= 1000000; } if(g_proc_id == 0) { sout = fopen(solvout, "a"); fprintf(sout, "%d %e\n",iteration, g_mu); fclose(sout); } } return iteration; }
void cloverdetratio_heatbath(const int id, hamiltonian_field_t * const hf) { monomial * mnl = &monomial_list[id]; g_mu = mnl->mu; g_c_sw = mnl->c_sw; boundary(mnl->kappa); mnl->csg_n = 0; mnl->csg_n2 = 0; mnl->iter0 = 0; mnl->iter1 = 0; init_sw_fields(); sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); sw_invert(EE, mnl->mu); random_spinor_field(g_spinor_field[4], VOLUME/2, mnl->rngrepro); mnl->energy0 = square_norm(g_spinor_field[4], VOLUME/2, 1); g_mu3 = mnl->rho; mnl->Qp(g_spinor_field[3], g_spinor_field[4]); g_mu3 = mnl->rho2; zero_spinor_field(mnl->pf,VOLUME/2); mnl->iter0 = cg_her(mnl->pf, g_spinor_field[3], mnl->maxiter, mnl->accprec, g_relative_precision_flag, VOLUME/2, mnl->Qsq); chrono_add_solution(mnl->pf, mnl->csg_field, mnl->csg_index_array, mnl->csg_N, &mnl->csg_n, VOLUME/2); mnl->Qm(mnl->pf, mnl->pf); if(g_proc_id == 0 && g_debug_level > 3) { printf("called cloverdetratio_heatbath for id %d \n", id); } g_mu3 = 0.; g_mu = g_mu1; boundary(g_kappa); return; }
void source_spinor_field_point_from_file(spinor * const P, spinor * const Q, int is, int ic, int source_indx) { int tmp; int source_coord[4],source_pe_coord[4],source_loc_coord[4]; int source_pe_indx,source_loc_indx; spinor * s; /* set fields to zero */ zero_spinor_field(P,VOLUME/2); zero_spinor_field(Q,VOLUME/2); /* Check if source_indx is valid */ if((source_indx < 0) || (source_indx >= (g_nproc_t*g_nproc_x*g_nproc_y*g_nproc_z*T*LX*LY*LZ))) { printf("Error in the input parameter file, SourceLocation must be in [0,VOLUME-1]! Exiting...!\n"); exit(1); } /* translate it into global coordinate */ /* For a T*L^3 lattice then L = g_nproc_z * LZ = g_nproc_y * LY = g_nproc_x * LX */ source_coord[3]=source_indx % (g_nproc_z * LZ); tmp = source_indx / (g_nproc_z * LZ); source_coord[2]=tmp % (g_nproc_y * LY); tmp = tmp / (g_nproc_y * LY); source_coord[1]=tmp % (g_nproc_x * LX); tmp = tmp / (g_nproc_x * LX); source_coord[0]=tmp; if(3*is+ic == index_start && g_proc_id == g_stdio_proc) printf("# The source site number is %i which corresponds to (t,x,y,z) = (%i,%i,%i,%i)\n",source_indx,source_coord[0],source_coord[1],source_coord[2],source_coord[3]); /* compute the coordinates and the index of the node*/ /* be careful!!! nodes indices have different convention (see io.c)*/ source_pe_coord[0] = source_coord[0]/T; source_pe_coord[1] = source_coord[1]/LX; source_pe_coord[2] = source_coord[2]/LY; source_pe_coord[3] = source_coord[3]/LZ; #ifdef MPI MPI_Cart_rank(g_cart_grid, source_pe_coord, &source_pe_indx); #else source_pe_indx=0; #endif /* compute the local (inside the node) coordinates and index*/ source_loc_coord[0] = source_coord[0] - source_pe_coord[0] * T; source_loc_coord[1] = source_coord[1] - source_pe_coord[1] * LX; source_loc_coord[2] = source_coord[2] - source_pe_coord[2] * LY; source_loc_coord[3] = source_coord[3] - source_pe_coord[3] * LZ; source_loc_indx=g_ipt[source_loc_coord[0]][source_loc_coord[1]][source_loc_coord[2]][source_loc_coord[3]]; /* Essayer g_proc_id au lieu de g_cart_id */ if(source_pe_indx == g_cart_id) { if(3*is + ic == index_start && g_debug_level > 1) { printf("g_cart_id =%i\n",g_cart_id); printf("source_loc_coord[0] = %i\n",source_loc_coord[0]); printf("source_loc_coord[1] = %i\n",source_loc_coord[1]); printf("source_loc_coord[2] = %i\n",source_loc_coord[2]); printf("source_loc_coord[3] = %i\n",source_loc_coord[3]); printf("source_loc_indx = %i\n",source_loc_indx); } /* Check which spinor field (even or odd) needs to be initialized */ if(g_lexic2eo[source_loc_indx] < VOLUME/2) s = P + g_lexic2eo[source_loc_indx]; else s = Q + g_lexic2eosub[source_loc_indx]; /* put source to 1.0 */ if (is==0){ if (ic==0) (*s).s0.c0.re=1.0; else if (ic==1) (*s).s0.c1.re=1.0; else if (ic==2) (*s).s0.c2.re=1.0; } else if (is==1){ if (ic==0) (*s).s1.c0.re=1.0; else if (ic==1) (*s).s1.c1.re=1.0; else if (ic==2) (*s).s1.c2.re=1.0; } else if (is==2){ if (ic==0) (*s).s2.c0.re=1.0; else if (ic==1) (*s).s2.c1.re=1.0; else if (ic==2) (*s).s2.c2.re=1.0; } else if (is==3){ if (ic==0) (*s).s3.c0.re=1.0; else if (ic==1) (*s).s3.c1.re=1.0; else if (ic==2) (*s).s3.c2.re=1.0; } } }
int fgmres(spinor * const P,spinor * const Q, const int m, const int max_restarts, const double eps_sq, const int rel_prec, const int N, const int precon, matrix_mult f){ int restart, i, j, k; double beta, eps, norm; complex tmp1, tmp2; spinor * r0; spinor ** solver_field = NULL; const int nr_sf = 3; if(N == VOLUME) { init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); } else { init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); } eps=sqrt(eps_sq); init_gmres(m, VOLUMEPLUSRAND); r0 = solver_field[0]; norm = sqrt(square_norm(Q, N, 1)); assign(solver_field[2], P, N); for(restart = 0; restart < max_restarts; restart++){ /* r_0=Q-AP (b=Q, x+0=P) */ f(r0, solver_field[2]); diff(r0, Q, r0, N); /* v_0=r_0/||r_0|| */ alpha[0].re=sqrt(square_norm(r0, N, 1)); if(g_proc_id == g_stdio_proc && g_debug_level > 0){ printf("FGMRES %d\t%g true residue\n", restart*m, alpha[0].re*alpha[0].re); fflush(stdout); } if(alpha[0].re==0.){ assign(P, solver_field[2], N); finalize_solver(solver_field, nr_sf); return(restart*m); } mul_r(V[0], 1./alpha[0].re, r0, N); for(j = 0; j < m; j++){ /* solver_field[0]=A*M^-1*v_j */ if(precon == 0) { assign(Z[j], V[j], N); } else { zero_spinor_field(Z[j], N); /* poly_nonherm_precon(Z[j], V[j], 0.3, 1.1, 80, N); */ Msap(Z[j], V[j], 8); } f(r0, Z[j]); /* Set h_ij and omega_j */ /* solver_field[1] <- omega_j */ assign(solver_field[1], solver_field[0], N); for(i = 0; i <= j; i++){ H[i][j] = scalar_prod(V[i], solver_field[1], N, 1); assign_diff_mul(solver_field[1], V[i], H[i][j], N); } _complex_set(H[j+1][j], sqrt(square_norm(solver_field[1], N, 1)), 0.); for(i = 0; i < j; i++){ tmp1 = H[i][j]; tmp2 = H[i+1][j]; _mult_real(H[i][j], tmp2, s[i]); _add_assign_complex_conj(H[i][j], c[i], tmp1); _mult_real(H[i+1][j], tmp1, s[i]); _diff_assign_complex(H[i+1][j], c[i], tmp2); } /* Set beta, s, c, alpha[j],[j+1] */ beta = sqrt(_complex_square_norm(H[j][j]) + _complex_square_norm(H[j+1][j])); s[j] = H[j+1][j].re / beta; _mult_real(c[j], H[j][j], 1./beta); _complex_set(H[j][j], beta, 0.); _mult_real(alpha[j+1], alpha[j], s[j]); tmp1 = alpha[j]; _mult_assign_complex_conj(alpha[j], c[j], tmp1); /* precision reached? */ if(g_proc_id == g_stdio_proc && g_debug_level > 0){ printf("FGMRES\t%d\t%g iterated residue\n", restart*m+j, alpha[j+1].re*alpha[j+1].re); fflush(stdout); } if(((alpha[j+1].re <= eps) && (rel_prec == 0)) || ((alpha[j+1].re <= eps*norm) && (rel_prec == 1))){ _mult_real(alpha[j], alpha[j], 1./H[j][j].re); assign_add_mul(solver_field[2], Z[j], alpha[j], N); for(i = j-1; i >= 0; i--){ for(k = i+1; k <= j; k++){ _mult_assign_complex(tmp1, H[i][k], alpha[k]); _diff_complex(alpha[i], tmp1); } _mult_real(alpha[i], alpha[i], 1./H[i][i].re); assign_add_mul(solver_field[2], Z[i], alpha[i], N); } for(i = 0; i < m; i++){ alpha[i].im = 0.; } assign(P, solver_field[2], N); finalize_solver(solver_field, nr_sf); return(restart*m+j); } /* if not */ else{ if(j != m-1){ mul_r(V[(j+1)], 1./H[j+1][j].re, solver_field[1], N); } } } j=m-1; /* prepare for restart */ _mult_real(alpha[j], alpha[j], 1./H[j][j].re); assign_add_mul(solver_field[2], Z[j], alpha[j], N); for(i = j-1; i >= 0; i--){ for(k = i+1; k <= j; k++){ _mult_assign_complex(tmp1, H[i][k], alpha[k]); _diff_complex(alpha[i], tmp1); } _mult_real(alpha[i], alpha[i], 1./H[i][i].re); assign_add_mul(solver_field[2], Z[i], alpha[i], N); } for(i = 0; i < m; i++){ alpha[i].im = 0.; } } /* If maximal number of restarts is reached */ assign(P, solver_field[2], N); finalize_solver(solver_field, nr_sf); return(-1); }
void source_generation_nucleon(spinor * const P, spinor * const Q, const int is, const int ic, const int t, const int nt, const int nx, const int sample, const int nstore, const int meson) { double rnumber, si=0., co=0., sqr2; int rlxd_state[105]; int reset = 0, seed, r, tt, lt, xx, lx, yy, ly, zz, lz; int coords[4], id=0, i; complex * p = NULL; const double s0=0.; const double c0=1.; const double s1=sin(2.*M_PI/3.); const double c1=cos(2.*M_PI/3.); const double s2=sin(4.*M_PI/3.); const double c2=cos(4.*M_PI/3.); zero_spinor_field(P,VOLUME/2); zero_spinor_field(Q,VOLUME/2); sqr2 = 1./sqrt(2.); /* save the ranlxd_state if neccessary */ if(ranlxd_init == 1) { rlxd_get(rlxd_state); reset = 1; } /* Compute the seed */ seed =(int) abs(1 + sample + t*10*97 + nstore*100*53); rlxd_init(1, seed); for(tt = t; tt < T*g_nproc_t; tt+=nt) { lt = tt - g_proc_coords[0]*T; coords[0] = tt / T; for(xx = 0; xx < LX*g_nproc_x; xx+=nx) { lx = xx - g_proc_coords[1]*LX; coords[1] = xx / LX; for(yy = 0; yy < LY*g_nproc_y; yy+=nx) { ly = yy - g_proc_coords[2]*LY; coords[2] = yy / LY; for(zz = 0; zz < LZ*g_nproc_z; zz+=nx) { lz = zz - g_proc_coords[3]*LZ; coords[3] = zz / LZ; #ifdef MPI MPI_Cart_rank(g_cart_grid, coords, &id); #endif ranlxd(&rnumber, 1); if(g_cart_id == id) { if(meson) { r = (int)floor(4.*rnumber); if(r == 0) { si = sqr2; co = sqr2; } else if(r == 1) { si = -sqr2; co = sqr2; } else if(r==2) { si = sqr2; co = -sqr2; } else { si = -sqr2; co = -sqr2; } } else { r = (int)floor(3.*rnumber); if(r == 0) { si = s0; co = c0; } else if(r == 1) { si = s1; co = c1; } else { si = s2; co = c2; } } i = g_lexic2eosub[ g_ipt[lt][lx][ly][lz] ]; if((lt+lx+ly+lz+g_proc_coords[3]*LZ+g_proc_coords[2]*LY + g_proc_coords[0]*T+g_proc_coords[1]*LX)%2 == 0) { p = (complex*)(P + i); } else { p = (complex*)(Q + i); } (*(p+3*is+ic)).re = co; (*(p+3*is+ic)).im = si; } } } } } /* reset the ranlxd if neccessary */ if(reset) { rlxd_reset(rlxd_state); } return; }
/* Florian Burger 4.11.2009 */ void source_generation_pion_zdir(spinor * const P, spinor * const Q, const int z, const int sample, const int nstore) { int reset = 0, i, x, y, t, is, ic, lt, lx, ly, lz, id=0; int coords[4], seed, r; double rnumber, si=0., co=0.; int rlxd_state[105]; const double sqr2 = 1./sqrt(2.); complex * p = NULL; zero_spinor_field(P,VOLUME/2); zero_spinor_field(Q,VOLUME/2); /* save the ranlxd_state if neccessary */ if(ranlxd_init == 1) { rlxd_get(rlxd_state); reset = 1; } /* Compute the seed */ seed =(int) abs(1 + sample + z*10*97 + nstore*100*53 + g_cart_id*13); rlxd_init(1, seed); lz = z - g_proc_coords[3]*LZ; coords[3] = z / LZ; for(t = 0; t < T*g_nproc_t; t++) { lt = t - g_proc_coords[0]*T; coords[0] = t / T; for(x = 0; x < LX*g_nproc_x; x++) { lx = x - g_proc_coords[1]*LX; coords[1] = x / LX; for(y = 0; y < LY*g_nproc_y; y++) { ly = y - g_proc_coords[2]*LY; coords[2] = y / LY; #ifdef MPI MPI_Cart_rank(g_cart_grid, coords, &id); #endif for(is = 0; is < 4; is++) { for(ic = 0; ic < 3; ic++) { ranlxd(&rnumber, 1); if(g_cart_id == id) { r = (int)floor(4.*rnumber); if(r == 0) { si = sqr2; co = sqr2; } else if(r == 1) { si = -sqr2; co = sqr2; } else if(r==2) { si = sqr2; co = -sqr2; } else { si = -sqr2; co = -sqr2; } i = g_lexic2eosub[ g_ipt[lt][lx][ly][lz] ]; if((lt+lx+ly+lz+g_proc_coords[3]*LZ+g_proc_coords[2]*LY + g_proc_coords[0]*T+g_proc_coords[1]*LX)%2 == 0) { p = (complex*)(P + i); } else { p = (complex*)(Q + i); } (*(p+3*is+ic)).re = co; (*(p+3*is+ic)).im = si; } } } } } } /* reset the ranlxd if neccessary */ if(reset) { rlxd_reset(rlxd_state); } return; }
void prepare_source(const int nstore, const int isample, const int ix, const int op_id, const int read_source_flag, const int source_location) { FILE * ifs = NULL; int is = ix / 3, ic = ix %3, err = 0, rstat=0, t = 0; operator * optr = &operator_list[op_id]; char source_filename[100]; int source_type = SourceInfo.type; static int nstore_ = -1; static int isample_ = -1; static int ix_ = -1; static int op_id_ = -1; SourceInfo.nstore = nstore; SourceInfo.sample = isample; SourceInfo.ix = ix; if(optr->type != DBTMWILSON && optr->type != DBCLOVER && optr->type != BSM && optr->type != BSM2b && optr->type != BSM2m ) { SourceInfo.no_flavours = 1; /* no volume sources */ if(source_type != 1) { /* either "Don't read inversion source from file" or */ /* "Don't read inversion source from file, but save the one generated" */ if (read_source_flag == 0 || read_source_flag == 2) { if (source_location == 0) { source_spinor_field(g_spinor_field[0], g_spinor_field[1], is, ic); } else { source_spinor_field_point_from_file(g_spinor_field[0], g_spinor_field[1], is, ic, source_location); } } /* "Read inversion source from file" */ else { if (SourceInfo.splitted) { /* timeslice needs to be put into filename */ if(SourceInfo.automaticTS) { /* automatic timeslice detection */ if(g_proc_id == 0) { for(t = 0; t < g_nproc_t*T; t++) { if(T_global > 99) sprintf(source_filename, "%s.%.4d.%.3d.%.2d", SourceInfo.basename, nstore, t, ix); else sprintf(source_filename, "%s.%.4d.%.2d.%.2d", SourceInfo.basename, nstore, t, ix); if( (ifs = fopen(source_filename, "r")) != NULL) { fclose(ifs); break; } } } #ifdef MPI MPI_Bcast(&t, 1, MPI_INT, 0, MPI_COMM_WORLD); #endif SourceInfo.t = t; } if(T_global > 99) sprintf(source_filename, "%s.%.4d.%.3d.%.2d", SourceInfo.basename, nstore, SourceInfo.t, ix); else sprintf(source_filename, "%s.%.4d.%.2d.%.2d", SourceInfo.basename, nstore, SourceInfo.t, ix); if (g_cart_id == 0) { printf("# Trying to read source from %s\n", source_filename); } rstat = read_spinor(g_spinor_field[0], g_spinor_field[1], source_filename, 0); } else { sprintf(source_filename, "%s", SourceInfo.basename); if (g_cart_id == 0) { printf("# Trying to read source no %d from %s\n", ix, source_filename); } rstat = read_spinor(g_spinor_field[0], g_spinor_field[1], source_filename, ix); } if(rstat) { fprintf(stderr, "Error reading file %s in prepare_source.c\nUnable to proceed, aborting....\n", source_filename); exit(-1); } } if (PropInfo.splitted) { if(T_global > 99) sprintf(source_filename, "%s.%.4d.%.3d.%.2d.inverted", PropInfo.basename, nstore, SourceInfo.t, ix); else sprintf(source_filename, "%s.%.4d.%.2d.%.2d.inverted", PropInfo.basename, nstore, SourceInfo.t, ix); } else { if(T_global > 99) sprintf(source_filename, "%s.%.4d.%.3d.inverted", PropInfo.basename, nstore, SourceInfo.t); else sprintf(source_filename, "%s.%.4d.%.2d.inverted", PropInfo.basename, nstore, SourceInfo.t); } } else if(source_type == 1) { /* Volume sources */ if(read_source_flag == 0 || read_source_flag == 2) { if(g_proc_id == 0 && g_debug_level > 0) { printf("# Preparing 1 flavour volume source\n"); } gaussian_volume_source(g_spinor_field[0], g_spinor_field[1], isample, nstore, 0); } else { sprintf(source_filename, "%s.%.4d.%.5d", SourceInfo.basename, nstore, isample); if (g_cart_id == 0) { printf("# Trying to read source from %s\n", source_filename); } rstat = read_spinor(g_spinor_field[0], g_spinor_field[1], source_filename, 0); if(rstat) { fprintf(stderr, "Error reading file %s in prepare_source.c.\nUnable to proceed, aborting....\n", source_filename); exit(-1); } } sprintf(source_filename, "%s.%.4d.%.5d.inverted", PropInfo.basename, nstore, isample); } optr->sr0 = g_spinor_field[0]; optr->sr1 = g_spinor_field[1]; optr->prop0 = g_spinor_field[2]; optr->prop1 = g_spinor_field[3]; /* If the solver is _not_ CG we might read in */ /* here some better guess */ /* This also works for re-iteration */ if (optr->solver != CG && optr->solver != PCG && optr->solver != MIXEDCG && optr->solver != RGMIXEDCG) { ifs = fopen(source_filename, "r"); if (ifs != NULL) { if (g_cart_id == 0) { printf("# Trying to read guess from file %s\n", source_filename); fflush(stdout); } fclose(ifs); err = 0; /* iter = get_propagator_type(source_filename); */ rstat = read_spinor(optr->prop0, optr->prop1, source_filename, (PropInfo.splitted ? 0 : ix)); if(rstat) { fprintf(stderr, "Error reading file %s in prepare_source.c, rstat = %d\n", source_filename, rstat); exit(-1); } if (g_kappa != 0.) { mul_r(optr->prop1, 1. / (2*optr->kappa), optr->prop1, VOLUME / 2); mul_r(optr->prop0, 1. / (2*optr->kappa), optr->prop0, VOLUME / 2); } if (err != 0) { zero_spinor_field(optr->prop0, VOLUME / 2); zero_spinor_field(optr->prop1, VOLUME / 2); } } else { zero_spinor_field(optr->prop0, VOLUME / 2); zero_spinor_field(optr->prop1, VOLUME / 2); } } else { zero_spinor_field(optr->prop0, VOLUME / 2); zero_spinor_field(optr->prop1, VOLUME / 2); } /* if(optr->even_odd_flag) { */ /* assign(optr->sr0, g_spinor_field[0], VOLUME/2); */ /* assign(optr->sr1, g_spinor_field[1], VOLUME/2); */ /* } */ /* else { */ /* convert_eo_to_lexic(optr->sr0, g_spinor_field[0], g_spinor_field[1]); */ /* } */ } else { /* for the ND 2 flavour twisted operator and BSM(2) */ SourceInfo.no_flavours = 2; zero_spinor_field(g_spinor_field[0], VOLUME/2); zero_spinor_field(g_spinor_field[1], VOLUME/2); if(source_type != 1) { if(read_source_flag == 0 || read_source_flag == 2) { if(source_location == 0) { source_spinor_field(g_spinor_field[2], g_spinor_field[3], is, ic); } else { source_spinor_field_point_from_file(g_spinor_field[2], g_spinor_field[3], is, ic, source_location); } } else { if(SourceInfo.splitted) { if(T_global > 99) sprintf(source_filename, "%s.%.4d.%.3d.%.2d", SourceInfo.basename, nstore, SourceInfo.t, ix); else sprintf(source_filename, "%s.%.4d.%.2d.%.2d", SourceInfo.basename, nstore, SourceInfo.t, ix); } else { sprintf(source_filename,"%s", SourceInfo.basename); } if(g_proc_id == 0) { printf("# Trying to read source from %s\n", source_filename); } if(read_spinor(g_spinor_field[2], g_spinor_field[3], source_filename, 0) != 0) { fprintf(stderr, "Error reading source! Aborting...\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(-1); } } } else if(source_type == 1) { /* Volume sources */ if(g_proc_id == 0 && g_debug_level > 0) { printf("# Preparing 2 flavour volume source\n"); } gaussian_volume_source(g_spinor_field[0], g_spinor_field[1], isample, nstore, 1); gaussian_volume_source(g_spinor_field[2], g_spinor_field[3], isample, nstore, 2); } if( optr->type != BSM && optr->type != BSM2b && optr->type != BSM2m ) { mul_one_pm_itau2(g_spinor_field[4], g_spinor_field[6], g_spinor_field[0], g_spinor_field[2], +1., VOLUME/2); mul_one_pm_itau2(g_spinor_field[5], g_spinor_field[7], g_spinor_field[1], g_spinor_field[3], +1., VOLUME/2); assign(g_spinor_field[0], g_spinor_field[4], VOLUME/2); assign(g_spinor_field[1], g_spinor_field[5], VOLUME/2); assign(g_spinor_field[2], g_spinor_field[6], VOLUME/2); assign(g_spinor_field[3], g_spinor_field[7], VOLUME/2); } optr->sr0 = g_spinor_field[0]; optr->sr1 = g_spinor_field[1]; optr->sr2 = g_spinor_field[2]; optr->sr3 = g_spinor_field[3]; optr->prop0 = g_spinor_field[4]; optr->prop1 = g_spinor_field[5]; optr->prop2 = g_spinor_field[6]; optr->prop3 = g_spinor_field[7]; } nstore_ = nstore; isample_ = isample; ix_ = ix; op_id_ = op_id; return; }
void Q_over_sqrt_Q_sqr(spinor * const R, double * const c, const int n, spinor * const S, const double rnorm, const double minev) { int j; double fact1, fact2, temp1, temp2, temp3, temp4, maxev, tnorm; spinor *sv, *d, *dd, *aux, *aux3; double ap_eps_sq = 0.; sv=lock_Dov_WS_spinor(2); d=lock_Dov_WS_spinor(3); dd=lock_Dov_WS_spinor(4); aux=lock_Dov_WS_spinor(5); aux3=lock_Dov_WS_spinor(6); eigenvalues_for_cg_computed = no_eigenvalues - 1; if(eigenvalues_for_cg_computed < 0) eigenvalues_for_cg_computed = 0; maxev=1.0; fact1=4/(maxev-minev); fact2=-2*(maxev+minev)/(maxev-minev); zero_spinor_field(d, VOLUME); zero_spinor_field(dd, VOLUME); if(1) assign_sub_lowest_eigenvalues(aux3, S, no_eigenvalues-1, VOLUME); else assign(aux3, S, VOLUME); /* Check whether switch for adaptive precision is on */ /* this might be implemented again in the future */ /* Use the 'old' version using Clenshaw's recursion for the Chebysheff polynomial */ if(1) { for (j = n-1; j >= 1; j--) { assign(sv, d, VOLUME); if ( (j%10) == 0 ) { assign_sub_lowest_eigenvalues(aux, d, no_eigenvalues-1, VOLUME); } else { assign(aux, d, VOLUME); } norm_Q_sqr_psi(R, aux, rnorm); /* printf("%d %e %e\n", j, R[0].s0.c0.re, R[0].s0.c0.im); */ /* printf("%e %e\n", R[0].s1.c0.re, R[0].s1.c0.im); */ temp1=-1.0; temp2=c[j]; assign_mul_add_mul_add_mul_add_mul_r(d, R, dd, aux3, fact2, fact1, temp1, temp2, VOLUME); assign(dd, sv, VOLUME); } if(1) assign_sub_lowest_eigenvalues(R, d, no_eigenvalues-1, VOLUME); else assign(R, d, VOLUME); norm_Q_sqr_psi(aux, R, rnorm); temp1=-1.0; temp2=c[0]/2.; temp3=fact1/2.; temp4=fact2/2.; assign_mul_add_mul_add_mul_add_mul_r(aux, d, dd, aux3, temp3, temp4, temp1, temp2, VOLUME); norm_Q_n_psi(R, aux, 1, rnorm); } else { /* Use the adaptive precision version using the forward recursion for the Chebysheff polynomial */ /* d = T_0(Q^2) */ assign(d, aux3, VOLUME); /* dd = T_1(Q^2) */ norm_Q_sqr_psi(dd, d, rnorm); temp3 = fact1/2.; temp4 = fact2/2.; assign_mul_add_mul_r(dd, d, temp3, temp4, VOLUME); /* r = c_1 T_1(Q^2) + 1./2 c_0 */ temp1 = c[1]; temp2 = c[0]/2.; mul_add_mul_r(R, dd, d, temp1, temp2, VOLUME); temp1=-1.0; for (j = 2; j <= n-1; j++) { /* aux = T_j(Q^2) = 2 Q^2 T_{j-1}(Q^2) - T_{j-2}(Q^2) */ norm_Q_sqr_psi(aux, dd, rnorm); assign_mul_add_mul_add_mul_r(aux, dd, d, fact1, fact2, temp1, VOLUME); /* r = r + c_j T_j(Q^2) */ temp2 = c[j]; assign_add_mul_r(R, aux, temp2, VOLUME); /* The stoppping criterio tnorm = |T_j(Q^2)| */ tnorm=square_norm(aux, VOLUME, 1); tnorm*=(temp2*temp2); /* auxnorm=square_norm(R); if(g_proc_id == g_stdio_proc){printf("j= %d\t|c T|^2= %g\t c_j= %g\t|r|^2= %g\n",j,tnorm,temp2,auxnorm); fflush( stdout);}; */ if(tnorm < ap_eps_sq) break; /* d = T_{j-1}(Q^2) */ assign(d, dd, VOLUME); /* dd = T_{j}(Q^2) */ assign(dd, aux, VOLUME); } if(g_proc_id == g_stdio_proc && g_debug_level > 0) { printf("Order of Chebysheff approximation = %d\n",j); fflush( stdout); } /* r = Q r */ assign(aux, R, VOLUME); norm_Q_n_psi(R, aux, 1, rnorm); } /* add in piece from projected subspace */ addproj_q_invsqrt(R, S, no_eigenvalues-1, VOLUME); unlock_Dov_WS_spinor(2); unlock_Dov_WS_spinor(3); unlock_Dov_WS_spinor(4); unlock_Dov_WS_spinor(5); unlock_Dov_WS_spinor(6); return; }
void poly_precon(spinor * const R, spinor * const S, const double prec, const int n) { int j; double fact1, fact2, temp1, temp2, temp3, temp4, invmaxev = 1./4., maxev=4., tnorm, minev=g_mu*g_mu, auxnorm; static spinor *sv_, *sv, *d_, *d, *dd_, *dd, *aux_, *aux, *aux3_, *aux3; static int initp = 0; static double * c; const int N = VOLUME; maxev = 4.0; invmaxev = 1./maxev; minev = 0.1; /* minev = 1.5*1.5*g_mu*g_mu; */ if(initp == 0) { c = (double*)calloc(1000, sizeof(double)); #if (defined SSE || defined SSE2 || defined SSE3) sv_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); sv = (spinor *)(((unsigned long int)(sv_)+ALIGN_BASE)&~ALIGN_BASE); d_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); d = (spinor *)(((unsigned long int)(d_)+ALIGN_BASE)&~ALIGN_BASE); dd_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); dd = (spinor *)(((unsigned long int)(dd_)+ALIGN_BASE)&~ALIGN_BASE); aux_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); aux = (spinor *)(((unsigned long int)(aux_)+ALIGN_BASE)&~ALIGN_BASE); aux3_= calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); aux3 = (spinor *)(((unsigned long int)(aux3_)+ALIGN_BASE)&~ALIGN_BASE); #else sv_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); sv = sv_; d_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); d = d_; dd_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); dd = dd_; aux_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); aux = aux_; aux3_= calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); aux3 = aux3_; #endif get_c(minev, maxev, c, 100); initp = 1; } fact1 = 4. / (maxev - minev); fact2 = -2 * (maxev + minev) / (maxev - minev); zero_spinor_field(&d[0], N); zero_spinor_field(&dd[0], N); assign(&aux3[0], &S[0], N); /* gamma5(&aux3[0], &S[0], N); */ /* Use the adaptive precision version using the forward recursion for the Chebysheff polynomial */ /* d = T_0(Q^2) */ assign(&d[0], &aux3[0], N); /* dd = T_1(Q^2) */ Q_pm_psi(&dd[0], &d[0]); /* mul_r(dd, invmaxev, dd, N); */ /* norm_Q_sqr_psi(&dd[0], &d[0], g_m_D_psi, rnorm); */ temp3 = fact1/2; temp4 = fact2/2; assign_mul_add_mul_r(&dd[0], &d[0], temp3, temp4, N); /* r = c_1 T_1(Q^2) + 1/2 c_0 */ temp1 = c[1]; temp2 = c[0]/2; mul_add_mul_r(&R[0], &dd[0], &d[0], temp1, temp2, N); temp1 = -1.0; for (j=2; j<=n-1; j++) { /* aux = T_j(Q^2) = 2 Q^2 T_{j-1}(Q^2) - T_{j-2}(Q^2) */ Q_pm_psi(&aux[0], &dd[0]); /* mul_r(aux, invmaxev, aux, N); */ /* norm_Q_sqr_psi(&aux[0], &dd[0], g_m_D_psi, rnorm); */ assign_mul_add_mul_add_mul_r(&aux[0],&dd[0],&d[0],fact1,fact2,temp1, N); /* r = r + c_j T_j(Q^2) */ temp2=c[j]; assign_add_mul_r(&R[0],&aux[0],temp2, N); /* The stoppping criterio tnorm = |T_j(Q^2)| */ tnorm = square_norm(aux, N, 1); tnorm *= (temp2*temp2); auxnorm = square_norm(R, N, 1); if(g_proc_id == g_stdio_proc) { printf("j= %d\t|c T|^2= %g\t%g\t c_j= %g\t|r|^2= %g\n",j,tnorm,prec, temp2,auxnorm); fflush( stdout); fflush(stdout); } if(tnorm < prec) break; /* d = T_{j-1}(Q^2) */ assign(&d[0], &dd[0], N); /* dd = T_{j}(Q^2) */ assign(&dd[0], &aux[0], N); } if(g_proc_id == g_stdio_proc) { printf("Order of Chebysheff approximation = %d\n",j); fflush( stdout); } /* r = Q r */ /* assign(aux, R, N); */ /* Q_minus_psi(R, aux); */ return; }
/* P output = solution , Q input = source */ int cg_mms_tm(spinor * const P, spinor * const Q, const int max_iter, double eps_sq, const int rel_prec, const int N, matrix_mult f) { static double normsq, pro, err, alpha_cg = 1., beta_cg = 0., squarenorm; int iteration, im, append = 0; char filename[100]; static double gamma, alpham1; int const cg_mms_default_precision = 32; double tmp_mu = g_mu; WRITER * writer = NULL; paramsInverterInfo *inverterInfo = NULL; paramsPropagatorFormat *propagatorFormat = NULL; spinor * temp_save; //used to save all the masses spinor ** solver_field = NULL; const int nr_sf = 5; init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); init_mms_tm(g_no_extra_masses); /* currently only implemented for P=0 */ zero_spinor_field(P, N); /* Value of the bare MMS-masses (\mu^2 - \mu_0^2) */ for(im = 0; im < g_no_extra_masses; im++) { sigma[im] = g_extra_masses[im]*g_extra_masses[im] - g_mu*g_mu; assign(xs_mms_solver[im], P, N); assign(ps_mms_solver[im], Q, N); zitam1[im] = 1.0; zita[im] = 1.0; alphas[im] = 1.0; betas[im] = 0.0; } squarenorm = square_norm(Q, N, 1); assign(solver_field[0], P, N); /* normsp = square_norm(P, N, 1); */ /* initialize residue r and search vector p */ /* if(normsp == 0){ */ /* currently only implemented for P=0 */ if(1) { /* if a starting solution vector equal to zero is chosen */ assign(solver_field[1], Q, N); assign(solver_field[2], Q, N); normsq = square_norm(Q, N, 1); } else{ /* if a starting solution vector different from zero is chosen */ f(solver_field[3], solver_field[0]); diff(solver_field[1], Q, solver_field[3], N); assign(solver_field[2], solver_field[1], N); normsq = square_norm(solver_field[2], N, 1); } /* main loop */ for(iteration = 0; iteration < max_iter; iteration++) { /* Q^2*p and then (p,Q^2*p) */ f(solver_field[4], solver_field[2]); pro = scalar_prod_r(solver_field[2], solver_field[4], N, 1); /* For the update of the coeff. of the shifted pol. we need alpha_cg(i-1) and alpha_cg(i). This is the reason why we need this double definition of alpha */ alpham1 = alpha_cg; /* Compute alpha_cg(i+1) */ alpha_cg = normsq/pro; for(im = 0; im < g_no_extra_masses; im++) { /* Now gamma is a temp variable that corresponds to zita(i+1) */ gamma = zita[im]*alpham1/(alpha_cg*beta_cg*(1.-zita[im]/zitam1[im]) + alpham1*(1.+sigma[im]*alpha_cg)); /* Now zita(i-1) is put equal to the old zita(i) */ zitam1[im] = zita[im]; /* Now zita(i+1) is updated */ zita[im] = gamma; /* Update of alphas(i) = alpha_cg(i)*zita(i+1)/zita(i) */ alphas[im] = alpha_cg*zita[im]/zitam1[im]; /* Compute xs(i+1) = xs(i) + alphas(i)*ps(i) */ assign_add_mul_r(xs_mms_solver[im], ps_mms_solver[im], alphas[im], N); } /* Compute x_(i+1) = x_i + alpha_cg(i+1) p_i */ assign_add_mul_r(solver_field[0], solver_field[2], alpha_cg, N); /* Compute r_(i+1) = r_i - alpha_cg(i+1) Qp_i */ assign_add_mul_r(solver_field[1], solver_field[4], -alpha_cg, N); /* Check whether the precision eps_sq is reached */ err = square_norm(solver_field[1], N, 1); if(g_debug_level > 2 && g_proc_id == g_stdio_proc) { printf("CGMMS iteration: %d residue: %g\n", iteration, err); fflush( stdout ); } if( ((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1)) ) { assign(P, solver_field[0], N); f(solver_field[2], P); diff(solver_field[3], solver_field[2], Q, N); err = square_norm(solver_field[3], N, 1); if(g_debug_level > 0 && g_proc_id == g_stdio_proc) { printf("# CG MMS true residue at final iteration (%d) was %g.\n", iteration, err); fflush( stdout); } g_sloppy_precision = 0; g_mu = tmp_mu; /* save all the results of (Q^dagger Q)^(-1) \gamma_5 \phi */ /* here ... */ /* when im == -1 save the base mass*/ for(im = -1; im < g_no_extra_masses; im++) { if(im==-1) { temp_save=solver_field[0]; } else { temp_save=xs_mms_solver[im]; } if(SourceInfo.type != 1) { if (PropInfo.splitted) { sprintf(filename, "%s.%.4d.%.2d.%.2d.cgmms.%.2d.inverted", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, SourceInfo.ix, im+1); } else { sprintf(filename, "%s.%.4d.%.2d.cgmms.%.2d.inverted", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, im+1); } } else { sprintf(filename, "%s.%.4d.%.5d.cgmms.%.2d.0", SourceInfo.basename, SourceInfo.nstore, SourceInfo.sample, im+1); } if(g_kappa != 0) { mul_r(temp_save, (2*g_kappa)*(2*g_kappa), temp_save, N); } append = !PropInfo.splitted; construct_writer(&writer, filename, append); if (PropInfo.splitted || SourceInfo.ix == index_start) { //Create the inverter info NOTE: always set to TWILSON=12 and 1 flavour (to be adjusted) inverterInfo = construct_paramsInverterInfo(err, iteration+1, 12, 1); if (im == -1) { inverterInfo->cgmms_mass = inverterInfo->mu; } else { inverterInfo->cgmms_mass = g_extra_masses[im]/(2 * inverterInfo->kappa); } write_spinor_info(writer, PropInfo.format, inverterInfo, append); //Create the propagatorFormat NOTE: always set to 1 flavour (to be adjusted) propagatorFormat = construct_paramsPropagatorFormat(cg_mms_default_precision, 1); write_propagator_format(writer, propagatorFormat); free(inverterInfo); free(propagatorFormat); } convert_lexic_to_eo(solver_field[2], solver_field[1], temp_save); write_spinor(writer, &solver_field[2], &solver_field[1], 1, 32); destruct_writer(writer); } finalize_solver(solver_field, nr_sf); return(iteration+1); } /* Compute beta_cg(i+1) = (r(i+1),r(i+1))/(r(i),r(i)) Compute p(i+1) = r(i+1) + beta(i+1)*p(i) */ beta_cg = err/normsq; assign_mul_add_r(solver_field[2], beta_cg, solver_field[1], N); normsq = err; /* Compute betas(i+1) = beta_cg(i)*(zita(i+1)*alphas(i))/(zita(i)*alpha_cg(i)) Compute ps(i+1) = zita(i+1)*r(i+1) + betas(i+1)*ps(i) */ for(im = 0; im < g_no_extra_masses; im++) { betas[im] = beta_cg*zita[im]*alphas[im]/(zitam1[im]*alpha_cg); assign_mul_add_mul_r(ps_mms_solver[im], solver_field[1], betas[im], zita[im], N); } } assign(P, solver_field[0], N); g_sloppy_precision = 0; finalize_solver(solver_field, nr_sf); return(-1); }
/* k output , l input */ int solve_cg(spinor * const k, spinor * const l, double eps_sq, const int rel_prec) { static double normsq, pro, err, alpha_cg, beta_cg, squarenorm, sqnrm, sqnrm2; int iteration = 0, i, j; int save_sloppy = g_sloppy_precision; double atime, etime, flops; spinor *x, *delta, *y; /* initialize residue r and search vector p */ #ifdef MPI atime = MPI_Wtime(); #else atime = ((double)clock())/((double)(CLOCKS_PER_SEC)); #endif squarenorm = square_norm(l, VOLUME/2, 1); if(g_sloppy_precision_flag == 1) { delta = g_spinor_field[DUM_SOLVER+3]; x = g_spinor_field[DUM_SOLVER+4]; y = g_spinor_field[DUM_SOLVER+5]; assign(delta, l, VOLUME/2); Qtm_pm_psi(y, k); diff(delta, l, y, VOLUME/2); sqnrm = square_norm(delta, VOLUME/2, 1); if(((sqnrm <= eps_sq) && (rel_prec == 0)) || ((sqnrm <= eps_sq*squarenorm) && (rel_prec == 1))) { return(0); } for(i = 0; i < 20; i++) { g_sloppy_precision = 1; /* main CG loop in lower precision */ zero_spinor_field(x, VOLUME/2); assign(g_spinor_field[DUM_SOLVER+1], delta, VOLUME/2); assign(g_spinor_field[DUM_SOLVER+2], delta, VOLUME/2); sqnrm2 = sqnrm; for(j = 0; j <= ITER_MAX_CG; j++) { Qtm_pm_psi(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+2]); pro = scalar_prod_r(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], VOLUME/2, 1); alpha_cg = sqnrm2 / pro; assign_add_mul_r(x, g_spinor_field[DUM_SOLVER+2], alpha_cg, VOLUME/2); assign_mul_add_r(g_spinor_field[DUM_SOLVER], -alpha_cg, g_spinor_field[DUM_SOLVER+1], VOLUME/2); err = square_norm(g_spinor_field[DUM_SOLVER], VOLUME/2, 1); if(g_proc_id == g_stdio_proc && g_debug_level > 1) { printf("inner CG: %d res^2 %g\n", iteration+j+1, err); fflush(stdout); } if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))){ break; } beta_cg = err / sqnrm2; assign_mul_add_r(g_spinor_field[DUM_SOLVER+2], beta_cg, g_spinor_field[DUM_SOLVER], VOLUME/2); assign(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER], VOLUME/2); sqnrm2 = err; } /* end main CG loop */ iteration += j; g_sloppy_precision = 0; add(k, k, x, VOLUME/2); Qtm_pm_psi(y, x); diff(delta, delta, y, VOLUME/2); sqnrm = square_norm(delta, VOLUME/2, 1); if(g_debug_level > 0 && g_proc_id == g_stdio_proc) { printf("mixed CG(linsolve): true residue %d\t%g\t\n",iteration, sqnrm); fflush( stdout); } if(((sqnrm <= eps_sq) && (rel_prec == 0)) || ((sqnrm <= eps_sq*squarenorm) && (rel_prec == 1))) { break; } iteration++; } } else { Qtm_pm_psi(g_spinor_field[DUM_SOLVER], k); diff(g_spinor_field[DUM_SOLVER+1], l, g_spinor_field[DUM_SOLVER], VOLUME/2); assign(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER+1], VOLUME/2); normsq=square_norm(g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1); /* main loop */ for(iteration = 1; iteration <= ITER_MAX_CG; iteration++) { Qtm_pm_psi(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+2]); pro=scalar_prod_r(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], VOLUME/2, 1); alpha_cg=normsq/pro; assign_add_mul_r(k, g_spinor_field[DUM_SOLVER+2], alpha_cg, VOLUME/2); assign_mul_add_r(g_spinor_field[DUM_SOLVER], -alpha_cg, g_spinor_field[DUM_SOLVER+1], VOLUME/2); err=square_norm(g_spinor_field[DUM_SOLVER], VOLUME/2, 1); if(g_proc_id == g_stdio_proc && g_debug_level > 1) { printf("CG (linsolve): iterations: %d res^2 %e\n", iteration, err); fflush(stdout); } if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))){ break; } beta_cg = err/normsq; assign_mul_add_r(g_spinor_field[DUM_SOLVER+2], beta_cg, g_spinor_field[DUM_SOLVER], VOLUME/2); assign(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER], VOLUME/2); normsq=err; } } #ifdef MPI etime = MPI_Wtime(); #else etime = ((double)clock())/((double)(CLOCKS_PER_SEC)); #endif /* 2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns ) */ /* 2*1320.0 because the linalg is over VOLUME/2 */ flops = (2*(2*1320.0+2*3*4) + 2*3*4 + iteration*(2.*(2*1320.0+2*3*4) + 10*3*4))*VOLUME/2/1.0e6f; if(g_proc_id==0 && g_debug_level > 0) { printf("CG: iter: %d eps_sq: %1.4e t/s: %1.4e\n", iteration, eps_sq, etime-atime); printf("CG: flopcount: t/s: %1.4e mflops_local: %.1f mflops: %.1f\n", etime-atime, flops/(etime-atime), g_nproc*flops/(etime-atime)); } g_sloppy_precision = save_sloppy; return(iteration); }
int bicgstabell(spinor * const x0, spinor * const b, const int max_iter, double eps_sq, const int rel_prec, const int _l, const int N, matrix_mult f) { double err; int i, j, k, l; double rho0, rho1, beta, alpha, omega, gamma0 = 0., squarenorm; spinor * r[5], * u[5], * r0_tilde, * x; double tau[5][5], gamma[25], gammap[25], gammapp[25], sigma[25]; spinor ** solver_field = NULL; const int nr_sf = 2*(_l+1)+2; l = _l; k = -l; if(N == VOLUME) { init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); } else { init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); } r0_tilde = solver_field[0]; for(i = 0; i <= l; i++){ r[i] = solver_field[2+2*i]; u[i] = solver_field[3+2*i]; } x = x0; assign(u[0], b, N); f(r0_tilde, x); diff(r[0], u[0], r0_tilde, N); zero_spinor_field(solver_field[1], N); assign(r0_tilde, r[0], N); squarenorm = square_norm(b, N, 1); rho0 = 1.; alpha = 0.; omega = 1.; err = square_norm(r0_tilde, N, 1); while( k < max_iter && (((err > eps_sq) && (rel_prec == 0)) || ((err > eps_sq*squarenorm) && (rel_prec == 1)) )) { k+=l; /* The BiCG part */ rho0 *= -omega; for(j = 0; j < l; j++) { rho1 = scalar_prod_r(r[j], r0_tilde, N, 1); beta = (rho1/rho0); beta *= alpha; rho0 = rho1; for(i = 0; i <= j; i++) { /* u_i = r_i - \beta u_i */ assign_mul_add_r(u[i], -beta, r[i], N); } f(u[j+1], u[j]); gamma0 = scalar_prod_r(u[j+1], r0_tilde, N, 1); alpha = rho0/gamma0; /* r_i = r_i - \alpha u_{i+1} */ for(i = 0; i <= j; i++) { assign_add_mul_r(r[i], u[i+1], -alpha, N); } f(r[j+1], r[j]); /* x = x + \alpha u_0 */ assign_add_mul_r(x, u[0], alpha, N); err = square_norm(r[j+1], N, 1); if(g_proc_id == 0 && g_debug_level > 1) {printf("%d %d err = %e\n", k, j, err);fflush(stdout);} } /* The MR part */ for(j = 1; j <= l; j++){ for(i = 1; i < j; i++){ tau[i][j] = scalar_prod_r(r[j], r[i], N, 1)/sigma[i]; assign_add_mul_r(r[j], r[i], -tau[i][j], N); } sigma[j] = scalar_prod_r(r[j], r[j], N, 1); gammap[j] = scalar_prod_r(r[0], r[j], N, 1)/sigma[j]; } gamma[l] = gammap[l]; omega = gamma[l]; for(j = l-1; j > 0; j--) { gamma[j] = gammap[j]; for(i = j+1; i <= l; i++) { gamma[j] -= (tau[j][i]*gamma[i]); } } for(j = 1; j < l; j++) { gammapp[j] = gamma[j+1]; for(i = j+1; i < l; i++){ gammapp[j] += (tau[j][i]*gamma[i+1]); } } assign_add_mul_r(x, r[0], gamma[1], N); assign_add_mul_r(r[0], r[l], -gammap[l], N); for(j = 1; j < l; j++){ assign_add_mul_r(x, r[j], gammapp[j], N); assign_add_mul_r(r[0], r[j], -gammap[j], N); } assign_add_mul_r(u[0], u[l], -gamma[l], N); for(j = 1; j < l; j++){ assign_add_mul_r(u[0], u[j], -gamma[j], N); } err = square_norm(r[0], N, 1); if(g_proc_id == 0 && g_debug_level > 0){ printf(" BiCGstabell iterated %d %d, %e rho0 = %e, alpha = %e, gamma0= %e\n", l, k, err, rho0, alpha, gamma0); fflush( stdout ); } } finalize_solver(solver_field, nr_sf); if(k == max_iter) return(-1); return(k); }
void ndpoly_heatbath(const int id) { int j; double temp; monomial * mnl = &monomial_list[id]; (*mnl).energy0 = 0.; random_spinor_field(g_chi_up_spinor_field[0], VOLUME/2, (*mnl).rngrepro); (*mnl).energy0 = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1); if(g_epsbar!=0.0 || phmc_exact_poly == 0){ random_spinor_field(g_chi_dn_spinor_field[0], VOLUME/2, (*mnl).rngrepro); (*mnl).energy0 += square_norm(g_chi_dn_spinor_field[0], VOLUME/2, 1); } else { zero_spinor_field(g_chi_dn_spinor_field[0], VOLUME/2); } if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) { printf("PHMC: Here comes the computation of H_old with \n \n"); printf("PHMC: First: random spinors and their norm \n "); printf("PHMC: OLD Ennergy UP %e \n", (*mnl).energy0); printf("PHMC: OLD Energy DN + UP %e \n\n", (*mnl).energy0); } if(phmc_exact_poly==0){ QNon_degenerate(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0]); for(j = 1; j < (phmc_dop_n_cheby); j++){ assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2); assign(g_chi_dn_spinor_field[0], g_chi_dn_spinor_field[1], VOLUME/2); Q_tau1_min_cconst_ND(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], phmc_root[phmc_dop_n_cheby-2+j]); } Poly_tilde_ND(g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], phmc_ptilde_cheby_coef, phmc_ptilde_n_cheby, g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1]); } else if( phmc_exact_poly==1 && g_epsbar!=0.0) { /* Attention this is Q * tau1, up/dn are exchanged in the input spinor */ /* this is used as an preconditioner */ QNon_degenerate(g_chi_up_spinor_field[1],g_chi_dn_spinor_field[1], g_chi_dn_spinor_field[0],g_chi_up_spinor_field[0]); assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2); assign(g_chi_dn_spinor_field[0], g_chi_dn_spinor_field[1], VOLUME/2); /* solve Q*tau1*P(Q^2) *x=y */ cg_her_nd(g_chi_up_spinor_field[1],g_chi_dn_spinor_field[1], g_chi_up_spinor_field[0],g_chi_dn_spinor_field[0], 1000,1.e-16,0,VOLUME/2, Qtau1_P_ND); /* phi= Bdagger phi */ for(j = 1; j < (phmc_dop_n_cheby); j++){ assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2); assign(g_chi_dn_spinor_field[0], g_chi_dn_spinor_field[1], VOLUME/2); Q_tau1_min_cconst_ND(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], phmc_root[phmc_dop_n_cheby-2+j]); } assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2); assign(g_chi_dn_spinor_field[0], g_chi_dn_spinor_field[1], VOLUME/2); } else if(phmc_exact_poly==1 && g_epsbar==0.0) { Qtm_pm_psi(g_chi_up_spinor_field[1], g_chi_up_spinor_field[0]); assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2); /* solve (Q+)*(Q-)*P((Q+)*(Q-)) *x=y */ cg_her(g_chi_up_spinor_field[1], g_chi_up_spinor_field[0], 1000,1.e-16,0,VOLUME/2, Qtm_pm_Ptm_pm_psi); /* phi= Bdagger phi */ for(j = 1; j < (phmc_dop_n_cheby); j++){ assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2); Qtm_pm_min_cconst_nrm(g_chi_up_spinor_field[1], g_chi_up_spinor_field[0], phmc_root[phmc_dop_n_cheby-2+j]); } assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2); } assign(mnl->pf, g_chi_up_spinor_field[0], VOLUME/2); assign(mnl->pf2, g_chi_dn_spinor_field[0], VOLUME/2); temp = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1); if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) { printf("PHMC: Then: evaluate Norm of pseudofermion heatbath BHB \n "); printf("PHMC: Norm of BHB up squared %e \n", temp); } if(g_epsbar!=0.0 || phmc_exact_poly==0) temp += square_norm(g_chi_dn_spinor_field[0], VOLUME/2, 1); if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)){ printf("PHMC: Norm of BHB up + BHB dn squared %e \n\n", temp); } if(g_proc_id == 0 && g_debug_level > 3) { printf("called ndpoly_heatbath for id %d with g_running_phmc = %d\n", id, g_running_phmc); } return; }
double reweighting_factor_nd(const int N) { int i, n_iter; double sq_norm, corr, sum=0., sq_sum = 0., temp1; double mu1, mu2; complex temp2; mu1 = g_mu1; mu2 = g_mu1; /* Use spinor_field 2,3,5 */ /* in order not to conflict with anything else... */ for(i = 0; i < N; i++) { random_spinor_field(g_chi_up_spinor_field[2],VOLUME/2, 1); random_spinor_field(g_chi_dn_spinor_field[2],VOLUME/2, 1); zero_spinor_field(g_chi_up_spinor_field[3],VOLUME/2); zero_spinor_field(g_chi_dn_spinor_field[3],VOLUME/2); temp1 = phmc_ptilde_cheby_coef[0]; phmc_ptilde_cheby_coef[0] = temp1 - 1; Poly_tilde_ND(g_chi_up_spinor_field[3], g_chi_dn_spinor_field[3], phmc_ptilde_cheby_coef, phmc_ptilde_n_cheby, g_chi_up_spinor_field[2], g_chi_dn_spinor_field[2]); phmc_ptilde_cheby_coef[0] = temp1; temp2 = scalar_prod(g_chi_up_spinor_field[2], g_chi_up_spinor_field[3], VOLUME/2, 1); if(temp2.im > 1.0e-8) { printf("!!! WARNING Immaginary part of CORR-UP LARGER than 10^-8 !!! \n"); printf(" CORR-UP: Re=%12.10e Im=%12.10e \n", temp2.re, temp2.im); } corr = temp2.re; printf(" CORR-UP: Re=%12.10e \n", corr); temp2 = scalar_prod(g_chi_dn_spinor_field[2], g_chi_dn_spinor_field[3], VOLUME/2, 1); if(temp2.im > 1.0e-8) { printf("!!! WARNING Immaginary part of CORR_DN LARGER than 10^-8 !!! \n"); printf(" CORR-DN: Re=%12.10e Im=%12.10e \n", temp2.re, temp2.im); } corr += temp2.re; printf(" CORR-DN: Re=%12.10e \n", temp2.im); temp1 = -corr; sum += temp1; sq_sum += temp1*temp1; printf("rew: n_iter = %d, sq_norm = %e, corr = %e\n", n_iter, sq_norm, corr); /* random_spinor_field(g_spinor_field[2],VOLUME/2, 1); g_mu = mu2; zero_spinor_field(g_spinor_field[3],VOLUME/2); n_iter = solve_cg(3, 2, 0., 1.e-15, 1); g_mu = mu1; Qtm_pm_psi(g_spinor_field[5] , g_spinor_field[3]); sq_norm = square_norm(g_spinor_field[2], VOLUME/2, 1); corr = scalar_prod_r(g_spinor_field[2], g_spinor_field[5], VOLUME/2, 1); sq_norm -= corr; temp1 = sq_norm; sum += temp1; sq_sum += temp1*temp1; printf("rew: n_iter = %d, sq_norm = %e, corr = %e\n", n_iter, sq_norm, corr); */ } sum/=(double)N; sq_sum/=(double)N; printf("rew: factor = %e, err = %e\n", sum, sqrt(sum*sum-sq_sum)/((double)N-1)); return(sum); }
int bicgstab2(spinor * const x0, spinor * const b, const int max_iter, double eps_sq, const int rel_prec, const int N, matrix_mult f) { const int l = 2; double err; int i, j, k; int update_app = 0, update_res = 0; double rho0, rho1, beta, alpha, omega, gamma_hat, sigma, kappa0, kappal, rho, zeta0; double squarenorm, Mx=0., Mr=0.; spinor * r[5], * u[5], * r0_tilde, * u0, * x, * xp, * bp; double Z[3][3], y0[3], yl[3], yp[3], ypp[3]; spinor ** solver_field = NULL; const int nr_sf = 10; k = -l; if(N == VOLUME) { init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); } else { init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); } r0_tilde = solver_field[0]; u0 = solver_field[1]; r[0] = solver_field[2]; u[0] = solver_field[3]; r[1] = solver_field[4]; u[1] = solver_field[5]; r[2] = solver_field[6]; u[2] = solver_field[7]; bp = solver_field[8]; xp = x0; x = solver_field[9]; zero_spinor_field(x, N); assign(u[0], b, N); f(r0_tilde, xp); diff(r[0], u[0], r0_tilde, N); zero_spinor_field(u0, N); assign(r0_tilde, r[0], N); /* random_spinor_field(r0_tilde, N); */ assign(bp, r[0], N); squarenorm = square_norm(b, N, 1); rho0 = 1.; alpha = rho0; omega = rho0; err = square_norm(r[0], N, 1); Mr = err; Mx = err; zeta0 = err; while( k < max_iter && (((err > eps_sq) && (rel_prec == 0)) || ((err > eps_sq*squarenorm) && (rel_prec == 1)) )) { k+=l; /* The BiCG part */ rho0 *= -omega; for(j = 0; j < l; j++) { rho1 = scalar_prod_r(r[j], r0_tilde, N, 1); beta = alpha*(rho1/rho0); rho0 = rho1; /* if(g_proc_id == 0) {printf("beta = %e, alpha = %e, rho0 = %e\n", beta, alpha, rho0);fflush(stdout);} */ for(i = 0; i <= j; i++) { /* u_i = r_i - \beta u_i */ assign_mul_add_r(u[i], -beta, r[i], N); } f(u[j+1], u[j]); sigma = scalar_prod_r(u[j+1], r0_tilde, N, 1); alpha = rho1/sigma; /* if(g_proc_id == 0) {printf("sigma = %e, alpha = %e\n", sigma, alpha);fflush(stdout);} */ /* x = x + \alpha u_0 */ assign_add_mul_r(x, u[0], alpha, N); /* r_i = r_i - \alpha u_{i+1} */ for(i = 0; i <= j; i++) { assign_add_mul_r(r[i], u[i+1], -alpha, N); } f(r[j+1], r[j]); err = square_norm(r[j+1], N, 1); if(g_proc_id == 0 && g_debug_level > 1) {printf("%d %d err = %e\n", k, j, err);fflush(stdout);} if(err > Mr) Mr = err; if(err > Mx) Mx = err; } /* The polynomial part */ /* Z = R* R */ for(i = 0; i <= l; i++){ for(j = 0; j <= i; j++){ Z[i][j] = scalar_prod_r(r[j], r[i], N, 1); Z[j][i] = Z[i][j]; } } /* r0tilde and rl_tilde */ y0[0] = -1; y0[2] = 0.; y0[1] = Z[1][0]/Z[1][1]; yl[0] = 0.; yl[2] = -1.; yl[1] = Z[1][2]/Z[1][1]; /* Convex combination */ for(i = 0; i < l+1; i++){ yp[i] = 0.; ypp[i] = 0.; for(j = 0; j < l+1; j++) { yp[i] +=Z[i][j]*y0[j]; ypp[i] +=Z[i][j]*yl[j]; } } kappa0 = sqrt( y0[0]*yp[0] + y0[1]*yp[1] + y0[2]*yp[2] ); kappal = sqrt( yl[0]*ypp[0] + yl[1]*ypp[1] + yl[2]*ypp[2] ); rho = (yl[0]*yp[0] + yl[1]*yp[1] + yl[2]*yp[2])/kappa0/kappal; if(fabs(rho) > 0.7) { gamma_hat = rho; } else { gamma_hat = rho*0.7/fabs(rho); } for(i = 0; i <= l; i++) { y0[i] -= gamma_hat*kappa0*yl[i]/kappal; } /* Update */ omega = y0[l]; for(i = 1; i < l+1; i++) { assign_add_mul_r(u[0], u[i], -y0[i], N); assign_add_mul_r(x, r[i-1], y0[i], N); assign_add_mul_r(r[0], r[i], -y0[i], N); } err = kappa0*kappa0; /* Reliable update part */ if(err > Mr) Mr = err; if(err > Mx) Mx = err; update_app = (err < 1.e-4*zeta0 && zeta0 <= Mx); update_res = ((err < 1.e-4*Mr && zeta0 <= Mr) || update_app); if(update_res) { if(g_proc_id == 0 && g_debug_level > 1) printf("Update res\n"); f(r[0], x); diff(r[0], bp, r[0], N); Mr = err; if(update_app) { if(g_proc_id == 0 && g_debug_level > 1) printf("Update app\n"); Mx = err; assign_add_mul_r(xp, x, 1., N); zero_spinor_field(x, N); assign(bp, r[0], N); } } update_app = 0; update_res = 0; if(g_proc_id == 0 && g_debug_level > 0){ printf(" BiCGstab(2)convex iterated %d %d, %e rho0 = %e, alpha = %e, gamma_hat= %e\n", l, k, err, rho0, alpha, gamma_hat); fflush( stdout ); } } assign_add_mul_r(x, xp, 1., N); assign(x0, x, N); if(k == max_iter) return(-1); return(k); }
int mixed_cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn, spinor * const Qup, spinor * const Qdn, solver_pm_t * solver_pm) { double eps_sq = solver_pm->squared_solver_prec; int noshifts = solver_pm->no_shifts; int rel_prec = solver_pm->rel_prec; int max_iter = solver_pm->max_iter; int check_abs, check_rel; double * shifts = solver_pm->shifts; int Nshift = noshifts; // algorithm double rr_up, rr_dn, rr, rr_old, r0r0, dAd_up, dAd_dn, dAd; if(rel_prec){ check_rel = 1; check_abs = 0; } else{ check_rel = 0; check_abs = 1; } int use_eo=1, eofactor=2; //not even-odd? if(solver_pm->sdim == VOLUME) { eofactor = 1; use_eo = 0; } int N = VOLUME/eofactor; int Vol = VOLUMEPLUSRAND/eofactor; // norm of source rr_up = square_norm(Qup, N, 1); rr_dn = square_norm(Qdn, N, 1); rr = rr_up + rr_dn; if( (g_cart_id == 0 && g_debug_level > 2)) printf("# CGMMSND_mixed: Initial mms residue: %.6e\n", rr); if(rr < 1.0e-4){ if( (g_cart_id == 0 && g_debug_level > 2)) printf("# CGMMSND_mixed: norm of source too low: falling back to double mms solver %.6e\n", rr); return(cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_pm)); } r0r0 = rr; // for relative precision rr_old = rr; // for the first iteration //allocate an auxiliary solver fields spinor ** sf = NULL; const int nr_sf = 6; init_solver_field(&sf, Vol, nr_sf); spinor32 ** sf32 = NULL; const int nr_sf32 = 8; init_solver_field_32(&sf32, Vol, nr_sf32); //spinor fields //we need one less than shifts, since one field is cared of by the usual cg fields init_mms_tm_nd_32(noshifts-1, Vol); // Pup/dn can be used as auxiliary field to work on, as it is not later used (could be used as initial guess at the very start) // Q_up/dn can be used as feedback, or if not, also as auxiliary field //allocate cg constants double * sigma; double * zitam1, * zita; double * alphas, * betas; double gamma; double alpham1; sigma = (double*)calloc((noshifts), sizeof(double)); zitam1 = (double*)calloc((noshifts), sizeof(double)); zita = (double*)calloc((noshifts), sizeof(double)); alphas = (double*)calloc((noshifts), sizeof(double)); betas = (double*)calloc((noshifts), sizeof(double)); spinor32 * r_up, * r_dn, * Ad_up, * Ad_dn, * x_up, * x_dn, * d_up, * d_dn; spinor * r_up_d, * r_dn_d, * x_up_d, * x_dn_d, * Ax_up_d, * Ax_dn_d; // iteration counter int j; //reliable update flag int rel_update = 0; //no of reliable updates done int no_rel_update = 0; //use reliable update flag int use_reliable = 1; double rel_delta = 1.0e-10; int trigger_shift = -1; double * res; double * res0; double * maxres; res = (double*)calloc((noshifts), sizeof(double)); res0 = (double*)calloc((noshifts), sizeof(double)); maxres = (double*)calloc((noshifts), sizeof(double)); ///////////////// // ASSIGNMENTS // ///////////////// x_up = sf32[0]; x_dn = sf32[1]; r_up = sf32[2]; r_dn = sf32[3]; d_up = sf32[4]; d_dn = sf32[5]; Ad_up = sf32[6]; Ad_dn = sf32[7]; x_up_d = sf[0]; x_dn_d = sf[1]; r_up_d = sf[2]; r_dn_d = sf[3]; Ax_up_d = sf[4]; Ax_dn_d = sf[5]; /* //matrix test spinor32 * help_low_up = sf32[0]; spinor32 * help_low_dn = sf32[1]; spinor * help_high_up = sf[0]; spinor * help_high_dn = sf[1]; assign_to_32(help_low_up, Qup, N); assign_to_32(help_low_dn, Qdn, N); assign(help_high_up, Qup, N); assign(help_high_dn, Qdn, N); double sqn_high = square_norm(help_high_up,N,1) + square_norm(help_high_dn,N,1); printf("square_norm(Q_high) = %e\n", sqn_high); float sqn_low = square_norm_32(help_low_up,N,1) + square_norm_32(help_low_dn,N,1); printf("square_norm(Q_low) = %e\n", sqn_low); solver_pm->M_ndpsi32(sf32[2], sf32[3], help_low_up, help_low_dn); solver_pm->M_ndpsi(sf[2], sf[3], help_high_up, help_high_dn); assign_to_64(sf[4], sf32[2], N); assign_to_64(sf[5], sf32[3], N); diff(sf[0], sf[4], sf[2], N); diff(sf[1], sf[5], sf[3], N); double sqnrm = square_norm(sf[0], N, 1) + square_norm(sf[1], N, 1); printf("Operator 32 test: (square_norm) / (spinor component) = %.8e\n", sqnrm/24.0/N); exit(1); */ // r(0) = b assign_to_32(r_up, Qup, N); assign_to_32(r_dn, Qdn, N); // d(0) = b assign_to_32(d_up, Qup, N); assign_to_32(d_dn, Qdn, N); maxres[0] = rr; res[0] = rr; res0[0] = rr; alphas[0] = 1.0; betas[0] = 0.0; sigma[0] = shifts[0]*shifts[0]; if(g_cart_id == 0 && g_debug_level > 2) printf("# CGMMSND_mixed: shift %d is %e\n", 0, sigma[0]); // currently only implemented for P=0 for(int im = 1; im < noshifts; im++) { maxres[im] = rr; res[im] = rr; res0[im] = rr; sigma[im] = shifts[im]*shifts[im] - sigma[0]; if(g_cart_id == 0 && g_debug_level > 2) printf("# CGMMSND_mixed: shift %d is %e\n", im, sigma[im]); // these will be the result spinor fields zero_spinor_field_32(mms_x_up[im-1], N); zero_spinor_field_32(mms_x_dn[im-1], N); assign_to_32(mms_d_up[im-1], Qup, N); assign_to_32(mms_d_dn[im-1], Qdn, N); zitam1[im] = 1.0; zita[im] = 1.0; alphas[im] = 1.0; betas[im] = 0.0; } //zero fields for solution Pup, Pdn for(int im = 0; im < noshifts; im++){ zero_spinor_field(Pup[im], N); zero_spinor_field(Pdn[im], N); } ////////// // LOOP // ////////// for (j = 0; j < max_iter; j++) { // A*d(k) solver_pm->M_ndpsi32(Ad_up, Ad_dn, d_up, d_dn); //add zero'th shift assign_add_mul_r_32(Ad_up, d_up, (float) sigma[0], N); assign_add_mul_r_32(Ad_dn, d_dn, (float) sigma[0], N); // alpha = r(k)*r(k) / d(k)*A*d(k) dAd_up = scalar_prod_r_32(d_up, Ad_up, N, 1); dAd_dn = scalar_prod_r_32(d_dn, Ad_dn, N, 1); dAd = dAd_up + dAd_dn; alpham1 = alphas[0]; alphas[0] = rr_old / dAd; // rr_old is taken from the last iteration respectively // r(k+1) assign_add_mul_r_32(r_up, Ad_up, (float) -alphas[0],N); assign_add_mul_r_32(r_dn, Ad_dn, (float) -alphas[0],N); // r(k+1)*r(k+1) rr_up = square_norm_32(r_up, N, 1); rr_dn = square_norm_32(r_dn, N, 1); rr = rr_up + rr_dn; if((g_cart_id == 0) && (g_debug_level > 2)) printf("# CGMMSND_mixed: mms iteration j = %i: rr = %.6e\n", j, rr); // aborting ?? // check wether precision is reached ... if ( ((check_abs)&&(rr <= eps_sq)) || ((check_rel)&&(rr <= eps_sq*r0r0)) ) { if ((check_rel)&&(rr <= eps_sq*r0r0)) { if((g_cart_id == 0) && (g_debug_level > 3)) printf("# CGMMSND_mixed: Reached relative solver precision of eps_rel = %.2e\n", eps_sq); } break; } // update alphas and zitas // used later for(int im = 1; im < noshifts; im++) { gamma = zita[im]*alpham1/(alphas[0]*betas[0]*(1.-zita[im]/zitam1[im]) + alpham1*(1.+sigma[im]*alphas[0])); zitam1[im] = zita[im]; zita[im] = gamma; alphas[im] = alphas[0]*zita[im]/zitam1[im]; } //check for reliable update res[0] = rr; for(int im=1; im<noshifts; im++) res[im] = rr * zita[im]; rel_update = 0; for(int im = (noshifts-1); im >= 0; im--) { if( res[im] > maxres[im] ) maxres[im] = res[im]; if( (res[im] < rel_delta*res0[im]) && (res0[im]<=maxres[im]) && (use_reliable) ) rel_update=1; if( rel_update && ( trigger_shift == -1) ) trigger_shift = im; } if(!rel_update) { // x_j(k+1) = x_j(k) + alpha_j*d_j(k) // alphas are set above assign_add_mul_r_32(x_up, d_up, (float) alphas[0], N); assign_add_mul_r_32(x_dn, d_dn, (float) alphas[0], N); for(int im = 1; im < noshifts; im++) { assign_add_mul_r_32(mms_x_up[im-1], mms_d_up[im-1], (float) alphas[im], N); assign_add_mul_r_32(mms_x_dn[im-1], mms_d_dn[im-1], (float) alphas[im], N); } // beta = r(k+1)*r(k+1) / r(k)*r(k) betas[0] = rr / rr_old; rr_old = rr; // for next iteration // d_0(k+1) = r(k+1) + beta*d_0(k) assign_mul_add_r_32(d_up, (float) betas[0], r_up, N); assign_mul_add_r_32(d_dn, (float) betas[0], r_dn, N); // d_j(k+1) = zita*r(k+1) + beta*d_j(k) for(int im = 1; im < noshifts; im++) { betas[im] = betas[0]*zita[im]*alphas[im]/(zitam1[im]*alphas[0]); assign_mul_add_mul_r_32(mms_d_up[im-1], r_up, (float) betas[im], (float) zita[im], N); assign_mul_add_mul_r_32(mms_d_dn[im-1], r_dn, (float) betas[im], (float) zita[im], N); } } else{ //reliable update if( (g_cart_id == 0) && (g_debug_level > 3) ){ printf("# CGMMSND_mixed: Shift %d with offset squared %e triggered a reliable update\n", trigger_shift, sigma[trigger_shift]); } //add low prec solutions assign_add_mul_r_32(x_up, d_up, (float) alphas[0], N); assign_add_mul_r_32(x_dn, d_dn, (float) alphas[0], N); addto_32(Pup[0], x_up, N); addto_32(Pdn[0], x_dn, N); for(int im = 1; im < noshifts; im++) { assign_add_mul_r_32(mms_x_up[im-1], mms_d_up[im-1], alphas[im], N); assign_add_mul_r_32(mms_x_dn[im-1], mms_d_dn[im-1], alphas[im], N); addto_32(Pup[im], mms_x_up[im-1], N); addto_32(Pdn[im], mms_x_dn[im-1], N); } //add low precision for shift 0 only addto_32(x_up_d, x_up, N); addto_32(x_dn_d, x_dn, N); solver_pm->M_ndpsi(Ax_up_d, Ax_dn_d, x_up_d, x_dn_d); //add zero'th shift assign_add_mul_r(Ax_up_d, x_up_d, sigma[0], N); assign_add_mul_r(Ax_dn_d, x_dn_d, sigma[0], N); diff(r_up_d, Qup, Ax_up_d, N); diff(r_dn_d, Qdn, Ax_dn_d, N); rr_up = square_norm(r_up_d, N, 1); rr_dn = square_norm(r_dn_d, N, 1); rr = rr_up + rr_dn; if ((g_cart_id == 0) && (g_debug_level > 3) ) printf("# CGMMSND_mixed: New residue after reliable update: %.6e\n", rr); //update res[im] res[0] = rr; if(res[trigger_shift] > res0[trigger_shift]){ if(g_cart_id == 0) printf("# CGMMSND_mixed: Warning: residue of shift no %d got larger after rel. update\n", trigger_shift); //if this is the zero'th shift not getting better -> no further convergence, break if(trigger_shift == 0) break; } //zero float fields zero_spinor_field_32(x_up, N); zero_spinor_field_32(x_dn, N); for(int im = 1; im < noshifts; im++) { zero_spinor_field_32(mms_x_up[im-1], N); zero_spinor_field_32(mms_x_dn[im-1], N); } //update the source assign_to_32(r_up, r_up_d, N); assign_to_32(r_dn, r_dn_d, N); betas[0] = res[0]/rr_old; rr_old = rr; // d_0(k+1) = r(k+1) + beta*d_0(k) assign_mul_add_r_32(d_up, betas[0], r_up, N); assign_mul_add_r_32(d_dn, betas[0], r_dn, N); // d_j(k+1) = r(k+1) + beta*d_j(k) for(int im = 1; im < noshifts; im++) { betas[im] = betas[0]*zita[im]*alphas[im]/(zitam1[im]*alphas[0]); assign_mul_add_mul_r_32(mms_d_up[im-1], r_up, (float) betas[im], (float) zita[im], N); assign_mul_add_mul_r_32(mms_d_dn[im-1], r_dn, (float) betas[im], (float) zita[im], N); } //new maxres for the shift that initiated the reliable update res[trigger_shift] = res[0]*zita[trigger_shift]*zita[trigger_shift]; res0[trigger_shift] = res[trigger_shift]; maxres[trigger_shift] = res[trigger_shift]; trigger_shift = -1; no_rel_update ++; } //reliable update //check if some shift is converged for(int im = 1; im < noshifts; im++) { if(j > 0 && (j % 10 == 0) && (im == noshifts-1)) { double sn = square_norm_32(mms_d_up[im-1], N, 1); sn += square_norm_32(mms_d_dn[im-1], N, 1); if(alphas[noshifts-1]*alphas[noshifts-1]*sn <= eps_sq) { noshifts--; if( (g_debug_level > 1) && (g_cart_id == 0) ) { printf("# CGMMSND_mixed: at iteration %d removed one shift, %d remaining\n", j, noshifts); } //if removed we add the latest solution vector for this shift addto_32(Pup[im], mms_x_up[im-1], N); addto_32(Pdn[im], mms_x_dn[im-1], N); } } } }//LOOP if( (g_cart_id == 0) && (g_debug_level > 1) ) printf("Final mms residue: %.6e\n", rr); //add the latest solutions for(int im = 0; im < noshifts; im++) { if(im == 0){ addto_32(Pup[0], x_up, N); addto_32(Pdn[0], x_dn, N); } else{ addto_32(Pup[im], mms_x_up[im-1], N); addto_32(Pdn[im], mms_x_dn[im-1], N); } } if(g_debug_level > 4){ if(g_cart_id == 0) printf("# CGMMSND_mixed: Checking mms result:\n"); //loop over all shifts (-> Nshift) for(int im = 0; im < Nshift; im++){ solver_pm->M_ndpsi(sf[0], sf[1], Pup[im], Pdn[im]); assign_add_mul_r(sf[0], Pup[im] , shifts[im]*shifts[im], N); assign_add_mul_r(sf[1], Pdn[im] , shifts[im]*shifts[im], N); diff(sf[2], sf[0], Qup, N); diff(sf[3], sf[1], Qdn, N); rr_up = square_norm(sf[2], N, 1); rr_dn = square_norm(sf[3], N, 1); rr = rr_up + rr_dn; if(g_cart_id == 0) printf("# CGMMSND_mixed: Shift[%d] squared residue: %e\n", im, rr); } } finalize_solver(sf, nr_sf); finalize_solver_32(sf32, nr_sf32); //free cg constants free(sigma); free(zitam1); free(zita); free(alphas); free(betas); //free reliable update stuff free(res); free(res0); free(maxres); //if not converged -> return(-1) if(j<max_iter){ return(j); } else{ return(-1); } }//
/* P output = solution , Q input = source */ int mixed_cg_her(spinor * const P, spinor * const Q, solver_params_t solver_params, const int max_iter, double eps_sq, const int rel_prec, const int N, matrix_mult f, matrix_mult32 f32) { int i = 0, iter = 0, j = 0; float sqnrm = 0., sqnrm2, squarenorm; float pro, err, alpha_cg, beta_cg; double sourcesquarenorm, sqnrm_d, squarenorm_d; spinor *delta, *y, *xhigh; spinor32 *x, *stmp; spinor ** solver_field = NULL; spinor32 ** solver_field32 = NULL; const int nr_sf = 3; const int nr_sf32 = 4; int max_inner_it = mixcg_maxinnersolverit; int N_outer = max_iter/max_inner_it; //to be on the save side we allow at least 10 outer iterations if(N_outer < 10) N_outer = 10; int save_sloppy = g_sloppy_precision_flag; double atime, etime, flops; if(N == VOLUME) { init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); init_solver_field_32(&solver_field32, VOLUMEPLUSRAND, nr_sf32); } else { init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); init_solver_field_32(&solver_field32, VOLUMEPLUSRAND/2, nr_sf32); } squarenorm_d = square_norm(Q, N, 1); sourcesquarenorm = squarenorm_d; sqnrm_d = squarenorm_d; delta = solver_field[0]; y = solver_field[1]; xhigh = solver_field[2]; x = solver_field32[3]; assign(delta, Q, N); //set solution to zero zero_spinor_field(P, N); atime = gettime(); for(i = 0; i < N_outer; i++) { /* main CG loop in lower precision */ zero_spinor_field_32(x, N); zero_spinor_field_32(solver_field32[0], N); assign_to_32(solver_field32[1], delta, N); assign_to_32(solver_field32[2], delta, N); sqnrm = (float) sqnrm_d; sqnrm2 = sqnrm; /*inner CG loop */ for(j = 0; j <= max_inner_it; j++) { f32(solver_field32[0], solver_field32[2]); pro = scalar_prod_r_32(solver_field32[2], solver_field32[0], N, 1); alpha_cg = sqnrm2 / pro; assign_add_mul_r_32(x, solver_field32[2], alpha_cg, N); assign_mul_add_r_32(solver_field32[0], -alpha_cg, solver_field32[1], N); err = square_norm_32(solver_field32[0], N, 1); if(g_proc_id == g_stdio_proc && g_debug_level > 2) { printf("inner CG: %d res^2 %g\n", iter+j, err); fflush(stdout); } //if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))){ if((err <= mixcg_innereps*sqnrm)|| (j==max_inner_it) || ((1.3*err <= eps_sq) && (rel_prec == 0)) || ((1.3*err <= eps_sq*sourcesquarenorm) && (rel_prec == 1))) { break; } beta_cg = err / sqnrm2; assign_mul_add_r_32(solver_field32[2], beta_cg, solver_field32[0], N); stmp = solver_field32[0]; solver_field32[0] = solver_field32[1]; solver_field32[1] = stmp; sqnrm2 = err; } /* end inner CG loop */ iter += j; /* we want to apply a true double matrix with f(y,P) -> set sloppy off here*/ g_sloppy_precision_flag = 0; /* calculate defect in double precision */ assign_to_64(xhigh, x, N); add(P, P, xhigh, N); f(y, P); diff(delta, Q, y, N); sqnrm_d = square_norm(delta, N, 1); if(g_debug_level > 2 && g_proc_id == 0) { printf("mixed CG: last inner residue: %g\t\n", err); printf("mixed CG: true residue %d %g\t\n",iter, sqnrm_d); fflush(stdout); } /* here we can reset it to its initial value*/ g_sloppy_precision_flag = save_sloppy; if(((sqnrm_d <= eps_sq) && (rel_prec == 0)) || ((sqnrm_d <= eps_sq*sourcesquarenorm) && (rel_prec == 1))) { etime = gettime(); if(g_debug_level > 0 && g_proc_id == 0) { if(N != VOLUME){ /* 2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns ) */ /* 2*1608.0 because the linalg is over VOLUME/2 */ flops = (2*(2*1608.0+2*3*4) + 2*3*4 + iter*(2.*(2*1608.0+2*3*4) + 10*3*4))*N/1.0e6f; printf("# mixed CG: iter: %d eps_sq: %1.4e t/s: %1.4e\n", iter, eps_sq, etime-atime); printf("# mixed CG: flopcount (for e/o tmWilson only): t/s: %1.4e mflops_local: %.1f mflops: %.1f\n", etime-atime, flops/(etime-atime), g_nproc*flops/(etime-atime)); } else{ /* 2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns ) */ flops = (2*(1608.0+2*3*4) + 2*3*4 + iter*(2.*(1608.0+2*3*4) + 10*3*4))*N/1.0e6f; printf("# mixed CG: iter: %d eps_sq: %1.4e t/s: %1.4e\n", iter, eps_sq, etime-atime); printf("# mixed CG: flopcount (for non-e/o tmWilson only): t/s: %1.4e mflops_local: %.1f mflops: %.1f\n", etime-atime, flops/(etime-atime), g_nproc*flops/(etime-atime)); } } finalize_solver(solver_field, nr_sf); finalize_solver_32(solver_field32, nr_sf32); return(iter+i); } iter++; } finalize_solver(solver_field, nr_sf); finalize_solver_32(solver_field32, nr_sf32); return(-1); }
int gcr(spinor * const P, spinor * const Q, const int m, const int max_restarts, const double eps_sq, const int rel_prec, const int N, const int precon, matrix_mult f) { int k, l, restart, i, iter = 0; double norm_sq, err; spinor * rho, * tmp; complex ctmp; spinor ** solver_field = NULL; const int nr_sf = 2; if(N == VOLUME) { init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); } else { init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); } rho = solver_field[0]; tmp = solver_field[1]; init_gcr(m, N+RAND); norm_sq = square_norm(Q, N, 1); if(norm_sq < 1.e-32) { norm_sq = 1.; } for(restart = 0; restart < max_restarts; restart++) { dfl_sloppy_prec = 0; f(tmp, P); diff(rho, Q, tmp, N); err = square_norm(rho, N, 1); if(g_proc_id == g_stdio_proc && g_debug_level > 2){ printf("GCR: iteration number: %d, true residue: %g\n", iter, err); fflush(stdout); } if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) { finalize_solver(solver_field, nr_sf); return(iter); } for(k = 0; k < m; k++) { if(precon == 0) { assign(xi[k], rho, N); } else { zero_spinor_field(xi[k], N); Msap_eo(xi[k], rho, 6); /* Msap(xi[k], rho, 8); */ } dfl_sloppy_prec = 1; dfl_little_D_prec = 1.e-12; f(tmp, xi[k]); /* tmp will become chi[k] */ for(l = 0; l < k; l++) { a[l][k] = scalar_prod(chi[l], tmp, N, 1); assign_diff_mul(tmp, chi[l], a[l][k], N); } b[k] = sqrt(square_norm(tmp, N, 1)); mul_r(chi[k], 1./b[k], tmp, N); c[k] = scalar_prod(chi[k], rho, N, 1); assign_diff_mul(rho, chi[k], c[k], N); err = square_norm(rho, N, 1); iter ++; if(g_proc_id == g_stdio_proc && g_debug_level > 0){ if(rel_prec == 1) printf("# GCR: %d\t%g >= %g iterated residue\n", iter, err, eps_sq*norm_sq); else printf("# GCR: %d\t%g >= %giterated residue\n", iter, err, eps_sq); fflush(stdout); } /* Precision reached? */ if((k == m-1) || ((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) { break; } } /* prepare for restart */ _mult_real(c[k], c[k], 1./b[k]); assign_add_mul(P, xi[k], c[k], N); for(l = k-1; l >= 0; l--) { for(i = l+1; i <= k; i++) { _mult_assign_complex(ctmp, a[l][i], c[i]); /* c[l] -= ctmp */ _diff_complex(c[l], ctmp); } _mult_real(c[l], c[l], 1./b[l]); assign_add_mul(P, xi[l], c[l], N); } } finalize_solver(solver_field, nr_sf); return(-1); }
/*lambda: largest eigenvalue, k eigenvector */ int evamax(double *rz, int k, double q_off, double eps_sq) { static double ritz,norm0,normg,normg0,beta_cg; static double costh,sinth,cosd,sind,aaa,normp,xxx; static double xs1,xs2,xs3; int iteration; /* Initialize k to be gaussian */ random_spinor_field(g_spinor_field[k], VOLUME/2); norm0=square_norm(g_spinor_field[k], VOLUME/2, 1); /*normalize k */ assign_mul_bra_add_mul_r( g_spinor_field[k], 1./sqrt(norm0),0., g_spinor_field[k], VOLUME/2); Q_psi(DUM_SOLVER,k,q_off); Q_psi(DUM_SOLVER,DUM_SOLVER,q_off); /*compute the ritz functional */ /*put g on DUM_SOLVER+2 and p on DUM_SOLVER+1*/ ritz=scalar_prod_r(g_spinor_field[DUM_SOLVER], g_spinor_field[k], VOLUME/2, 1); zero_spinor_field(g_spinor_field[DUM_SOLVER+2],VOLUME/2); assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], g_spinor_field[k], 1., -ritz, VOLUME/2); assign(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER+2], VOLUME/2); normg0=square_norm(g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1); /* main loop */ for(iteration=1;iteration<=ITER_MAX_BCG;iteration++) { if(normg0 <= eps_sq) break; Q_psi(DUM_SOLVER+2,DUM_SOLVER+1,q_off); Q_psi(DUM_SOLVER+2,DUM_SOLVER+2,q_off); /* compute costh and sinth */ normp=square_norm(g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1); xxx=scalar_prod_r(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1); xs1=0.5*(ritz+xxx/normp); xs2=0.5*(ritz-xxx/normp); normp=sqrt(normp); xs3=normg0/normp; aaa=sqrt(xs2*xs2+xs3*xs3); cosd=xs2/aaa; sind=xs3/aaa; if(cosd>=0.) { costh=sqrt(0.5*(1.+cosd)); sinth=0.5*sind/costh; } else { sinth=sqrt(0.5*(1.-cosd)); costh=0.5*sind/sinth; } ritz=xs1+aaa; assign_add_mul_r_add_mul(g_spinor_field[k], g_spinor_field[k], g_spinor_field[DUM_SOLVER+1], costh-1., sinth/normp, VOLUME/2); assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+2], costh-1., sinth/normp, VOLUME/2); /* compute g */ zero_spinor_field(g_spinor_field[DUM_SOLVER+2],VOLUME/2); assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], g_spinor_field[k], 1., -ritz, VOLUME/2); /* calculate the norm of g' and beta_cg=costh g'^2/g^2 */ normg=square_norm(g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1); beta_cg=costh*normg/normg0; if(beta_cg*costh*normp>20.*sqrt(normg)) beta_cg=0.; normg0=normg; /* compute the new value of p */ assign_add_mul_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[k], -scalar_prod_r(g_spinor_field[k], g_spinor_field[DUM_SOLVER+1], VOLUME/2), VOLUME/2, 1); assign_mul_add_r(g_spinor_field[DUM_SOLVER+1],beta_cg, g_spinor_field[DUM_SOLVER+2], VOLUME/2); /* restore the state of the iteration */ if(iteration%20==0) { /* readjust x */ xxx=sqrt(square_norm(g_spinor_field[k], VOLUME/2), 1); assign_mul_bra_add_mul_r( g_spinor_field[k], 1./xxx,0., g_spinor_field[k], VOLUME/2); Q_psi(DUM_SOLVER,k,q_off); Q_psi(DUM_SOLVER,DUM_SOLVER,q_off); /*compute the ritz functional */ ritz=scalar_prod_r(g_spinor_field[DUM_SOLVER], g_spinor_field[k], VOLUME/2, 1); /*put g on DUM_SOLVER+2 and p on DUM_SOLVER+1*/ zero_spinor_field(g_spinor_field[DUM_SOLVER+2],VOLUME/2); assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], g_spinor_field[k], 1., -ritz, VOLUME/2); normg0=square_norm(g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1); /*subtract a linear combination of x and g from p to insure (x,p)=0 and (p,g)=(g,g) */ cosd=scalar_prod_r(g_spinor_field[k], g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1); assign_add_mul_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[k], -cosd, VOLUME/2); cosd=scalar_prod_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1)-normg0; assign_add_mul_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER+2], -cosd/sqrt(normg0), VOLUME/2); } } *rz=ritz; return iteration; }
/* P output = solution , Q input = source */ int cg_mms_tm(spinor ** const P, spinor * const Q, solver_params_t * solver_params, double * cgmms_reached_prec) { static double normsq, pro, err, squarenorm; int iteration, N = solver_params->sdim, no_shifts = solver_params->no_shifts; static double gamma, alpham1; spinor ** solver_field = NULL; double atime, etime; const int nr_sf = 3; atime = gettime(); if(solver_params->sdim == VOLUME) { init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); init_mms_tm(no_shifts, VOLUMEPLUSRAND); } else { init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); init_mms_tm(no_shifts, VOLUMEPLUSRAND/2); } zero_spinor_field(P[0], N); alphas[0] = 1.0; betas[0] = 0.0; sigma[0] = solver_params->shifts[0]*solver_params->shifts[0]; if(g_proc_id == 0 && g_debug_level > 1) printf("# CGMMS: shift %d is %e\n", 0, sigma[0]); for(int im = 1; im < no_shifts; im++) { sigma[im] = solver_params->shifts[im]*solver_params->shifts[im] - sigma[0]; if(g_proc_id == 0 && g_debug_level > 1) printf("# CGMMS: shift %d is %e\n", im, sigma[im]); // these will be the result spinor fields zero_spinor_field(P[im], N); // these are intermediate fields assign(ps_mms_solver[im-1], Q, N); zitam1[im] = 1.0; zita[im] = 1.0; alphas[im] = 1.0; betas[im] = 0.0; } /* currently only implemented for P=0 */ squarenorm = square_norm(Q, N, 1); /* if a starting solution vector equal to zero is chosen */ assign(solver_field[0], Q, N); assign(solver_field[1], Q, N); normsq = squarenorm; /* main loop */ for(iteration = 0; iteration < solver_params->max_iter; iteration++) { /* Q^2*p and then (p,Q^2*p) */ solver_params->M_psi(solver_field[2], solver_field[1]); // add the zero's shift assign_add_mul_r(solver_field[2], solver_field[1], sigma[0], N); pro = scalar_prod_r(solver_field[1], solver_field[2], N, 1); /* For the update of the coeff. of the shifted pol. we need alphas[0](i-1) and alpha_cg(i). This is the reason why we need this double definition of alpha */ alpham1 = alphas[0]; /* Compute alphas[0](i+1) */ alphas[0] = normsq/pro; for(int im = 1; im < no_shifts; im++) { /* Now gamma is a temp variable that corresponds to zita(i+1) */ gamma = zita[im]*alpham1/(alphas[0]*betas[0]*(1.-zita[im]/zitam1[im]) + alpham1*(1.+sigma[im]*alphas[0])); // Now zita(i-1) is put equal to the old zita(i) zitam1[im] = zita[im]; // Now zita(i+1) is updated zita[im] = gamma; // Update of alphas(i) = alphas[0](i)*zita(i+1)/zita(i) alphas[im] = alphas[0]*zita[im]/zitam1[im]; // Compute xs(i+1) = xs(i) + alphas(i)*ps(i) assign_add_mul_r(P[im], ps_mms_solver[im-1], alphas[im], N); // in the CG the corrections are decreasing with the iteration number increasing // therefore, we can remove shifts when the norm of the correction vector // falls below a threshold // this is useful for computing time and needed, because otherwise // zita might get smaller than DOUBLE_EPS and, hence, zero if(iteration > 0 && (iteration % 20 == 0) && (im == no_shifts-1)) { double sn = square_norm(ps_mms_solver[im-1], N, 1); if(alphas[no_shifts-1]*alphas[no_shifts-1]*sn <= solver_params->squared_solver_prec) { no_shifts--; if(g_debug_level > 2 && g_proc_id == 0) { printf("# CGMMS: at iteration %d removed one shift, %d remaining\n", iteration, no_shifts); } } } } /* Compute x_(i+1) = x_i + alphas[0](i+1) p_i */ assign_add_mul_r(P[0], solver_field[1], alphas[0], N); /* Compute r_(i+1) = r_i - alphas[0](i+1) Qp_i */ assign_add_mul_r(solver_field[0], solver_field[2], -alphas[0], N); /* Check whether the precision eps_sq is reached */ err = square_norm(solver_field[0], N, 1); if(g_debug_level > 2 && g_proc_id == g_stdio_proc) { printf("# CGMMS iteration: %d residue: %g\n", iteration, err); fflush( stdout ); } if( ((err <= solver_params->squared_solver_prec) && (solver_params->rel_prec == 0)) || ((err <= solver_params->squared_solver_prec*squarenorm) && (solver_params->rel_prec > 0)) || (iteration == solver_params->max_iter -1) ) { /* FIXME temporary output of precision until a better solution can be found */ *cgmms_reached_prec = err; break; } /* Compute betas[0](i+1) = (r(i+1),r(i+1))/(r(i),r(i)) Compute p(i+1) = r(i+1) + beta(i+1)*p(i) */ betas[0] = err/normsq; assign_mul_add_r(solver_field[1], betas[0], solver_field[0], N); normsq = err; /* Compute betas(i+1) = betas[0](i+1)*(zita(i+1)*alphas(i))/(zita(i)*alphas[0](i)) Compute ps(i+1) = zita(i+1)*r(i+1) + betas(i+1)*ps(i) */ for(int im = 1; im < no_shifts; im++) { betas[im] = betas[0]*zita[im]*alphas[im]/(zitam1[im]*alphas[0]); assign_mul_add_mul_r(ps_mms_solver[im-1], solver_field[0], betas[im], zita[im], N); } } etime = gettime(); g_sloppy_precision = 0; if(iteration == solver_params->max_iter -1) iteration = -1; else iteration++; if(g_debug_level > 0 && g_proc_id == 0) { printf("# CGMMS (%d shifts): iter: %d eps_sq: %1.4e %1.4e t/s\n", solver_params->no_shifts, iteration, solver_params->squared_solver_prec, etime - atime); } finalize_solver(solver_field, nr_sf); return(iteration); }
int chrono_guess(spinor * const trial, spinor * const phi, spinor ** const v, int index_array[], const int _N, const int _n, const int V, matrix_mult f) { int info = 0; int i, j, N=_N, n=_n; _Complex double s; static int init_csg = 0; static _Complex double *bn = NULL; static _Complex double *G = NULL; int max_N = 20; if(N > 0) { if(g_proc_id == 0 && g_debug_level > 1) { printf("CSG: preparing trial vector \n"); fflush(stdout); } if(init_csg == 0) { init_csg = 1; bn = (_Complex double*) malloc(max_N*sizeof(_Complex double)); G = (_Complex double*) malloc(max_N*max_N*sizeof(_Complex double)); } /* Construct an orthogonal basis */ for(j = n-1; j > n-2; j--) { for(i = j-1; i > -1; i--) { s = scalar_prod(v[index_array[j]], v[index_array[i]], V, 1); assign_diff_mul(v[index_array[i]], v[index_array[j]], s, V); if(g_debug_level > 2) { s = scalar_prod(v[index_array[i]], v[index_array[j]], V, 1); if(g_proc_id == 0) { printf("CSG: <%d,%d> = %e +i %e \n", i, j, creal(s), cimag(s));fflush(stdout); } } } } /* Generate "interaction matrix" V^\dagger f V */ /* We assume that f is hermitian */ /* Generate also the right hand side */ for (j = 0; j < n; j++){ f(trial, v[index_array[j]]); /* Only the upper triangular part is stored */ for(i = 0; i < j+1; i++){ G[i*N + j] = scalar_prod(v[index_array[i]], trial, V, 1); if(j != i) { (G[j*N + i]) = conj(G[i*N + j]); } if(g_proc_id == 0 && g_debug_level > 2) { printf("CSG: G[%d*N + %d]= %e + i %e \n", i, j, creal(G[i*N + j]), cimag(G[i*N + j])); fflush(stdout); } } /* The right hand side */ bn[j] = scalar_prod(v[index_array[j]], phi, V, 1); } /* Solver G y = bn for y and store it in bn */ LUSolve(n, G, N, bn); /* Construct the new guess vector */ if(info == 0) { mul(trial, bn[n-1], v[index_array[n-1]], V); if(g_proc_id == 0 && g_debug_level > 2) { printf("CSG: bn[%d] = %f %f\n", index_array[n-1], creal(bn[index_array[n-1]]), cimag(bn[index_array[n-1]])); } for(i = n-2; i > -1; i--) { assign_add_mul(trial, v[index_array[i]], bn[i], V); if(g_proc_id == 0 && g_debug_level > 2) { printf("CSG: bn[%d] = %f %f\n", index_array[i], creal(bn[index_array[i]]), cimag(bn[index_array[i]])); } } } else { assign(trial, phi, V); } if(g_proc_id == 0 && g_debug_level > 1) { printf("CSG: done! n= %d N=%d \n", n, N);fflush(stdout); } } else { if(g_proc_id == 0 && g_debug_level > 1) { printf("CSG: using zero trial vector \n"); fflush(stdout); } zero_spinor_field(trial, V); } return(info); }
int mrblk(spinor * const P, spinor * const Q, const int max_iter, const double eps_sq, const int rel_prec, const int N, matrix_mult_blk f, const int blk) { static int mr_init=0; int i = 0; double norm_r,beta; _Complex double alpha; spinor * r; const int parallel = 0; spinor * s[3]; static spinor *s_=NULL; static int N_; if(mr_init == 0 || N != N_) { if(N!= N_ && mr_init != 0) { free(s_); } N_ = N; s_ = calloc(3*(N+1)+1, sizeof(spinor)); mr_init = 1; } #if (defined SSE || defined SSE2 || defined SSE3) s[0] = (spinor *)(((unsigned long int)(s_)+ALIGN_BASE)&~ALIGN_BASE); #else s[0] = s_; #endif s[1] = s[0] + N + 1; s[2] = s[1] + N + 1; r = s[0]; norm_r = square_norm(Q, N, parallel); zero_spinor_field(P, N); f(s[2], P, blk); diff(r, Q, s[2], N); norm_r = square_norm(r, N, parallel); if(g_proc_id == g_stdio_proc && g_debug_level > 2 && blk == 0) { printf("MRblk iteration= %d |res|^2= %e\n", i, norm_r); fflush( stdout ); } while((norm_r > eps_sq) && (i < max_iter)){ i++; f(s[1], r, blk); alpha = scalar_prod(s[1], r, N, parallel); beta = square_norm(s[1], N, parallel); alpha /= beta; assign_add_mul(P, r, alpha, N); if(i%50 == 0) { f(s[2], P,blk); } else{ assign_add_mul(s[2], s[1], alpha, N); } diff(r, Q, s[2], N); norm_r = square_norm(r, N, parallel); if(g_proc_id == g_stdio_proc && g_debug_level > 2 && blk == 0) { printf("MRblk iteration= %d |res|^2= %g\n", i, norm_r); fflush(stdout); } } /* free(s_); */ if(norm_r > eps_sq){ return(-1); } return(i); }
int main(int argc,char *argv[]) { FILE *parameterfile=NULL,*rlxdfile=NULL, *countfile=NULL; char * filename = NULL; char datafilename[50]; char parameterfilename[50]; char gauge_filename[50]; char * nstore_filename = ".nstore_counter"; char * input_filename = NULL; int rlxd_state[105]; int j,ix,mu; int k; struct timeval t1; int g_nev, max_iter_ev; double stop_prec_ev; /* Energy corresponding to the Gauge part */ double eneg = 0., plaquette_energy = 0., rectangle_energy = 0.; /* Acceptance rate */ int Rate=0; /* Do we want to perform reversibility checks */ /* See also return_check_flag in read_input.h */ int return_check = 0; /* For getopt */ int c; /* For the Polyakov loop: */ int dir = 2; _Complex double pl, pl4; verbose = 0; g_use_clover_flag = 0; g_nr_of_psf = 1; #ifndef XLC signal(SIGUSR1,&catch_del_sig); signal(SIGUSR2,&catch_del_sig); signal(SIGTERM,&catch_del_sig); signal(SIGXCPU,&catch_del_sig); #endif while ((c = getopt(argc, argv, "h?f:o:")) != -1) { switch (c) { case 'f': input_filename = calloc(200, sizeof(char)); strcpy(input_filename,optarg); break; case 'o': filename = calloc(200, sizeof(char)); strcpy(filename,optarg); break; case 'h': case '?': default: usage(); break; } } if(input_filename == NULL){ input_filename = "hmc.input"; } if(filename == NULL){ filename = "output"; } /* Read the input file */ read_input(input_filename); mpi_init(argc, argv); if(Nsave == 0){ Nsave = 1; } if(nstore == -1) { countfile = fopen(nstore_filename, "r"); if(countfile != NULL) { fscanf(countfile, "%d\n", &nstore); fclose(countfile); } else { nstore = 0; } } if(g_rgi_C1 == 0.) { g_dbw2rand = 0; } #ifndef TM_USE_MPI g_dbw2rand = 0; #endif /* Reorder the mu parameter and the number of iterations */ if(g_mu3 > 0.) { g_mu = g_mu1; g_mu1 = g_mu3; g_mu3 = g_mu; j = int_n[1]; int_n[1] = int_n[3]; int_n[3] = j; j = g_csg_N[0]; g_csg_N[0] = g_csg_N[4]; g_csg_N[4] = j; g_csg_N[6] = j; if(fabs(g_mu3) > 0) { g_csg_N[6] = 0; } g_nr_of_psf = 3; } else if(g_mu2 > 0.) { g_mu = g_mu1; g_mu1 = g_mu2; g_mu2 = g_mu; int_n[3] = int_n[1]; int_n[1] = int_n[2]; int_n[2] = int_n[3]; /* For chronological inverter */ g_csg_N[4] = g_csg_N[0]; g_csg_N[0] = g_csg_N[2]; g_csg_N[2] = g_csg_N[4]; if(fabs(g_mu2) > 0) { g_csg_N[4] = 0; } g_csg_N[6] = 0; g_nr_of_psf = 2; } else { g_csg_N[2] = g_csg_N[0]; if(fabs(g_mu2) > 0) { g_csg_N[2] = 0; } g_csg_N[4] = 0; g_csg_N[6] = 0; } for(j = 0; j < g_nr_of_psf+1; j++) { if(int_n[j] == 0) int_n[j] = 1; } if(g_nr_of_psf == 3) { g_eps_sq_force = g_eps_sq_force1; g_eps_sq_force1 = g_eps_sq_force3; g_eps_sq_force3 = g_eps_sq_force; g_eps_sq_acc = g_eps_sq_acc1; g_eps_sq_acc1 = g_eps_sq_acc3; g_eps_sq_acc3 = g_eps_sq_acc; } if(g_nr_of_psf == 2) { g_eps_sq_force = g_eps_sq_force1; g_eps_sq_force1 = g_eps_sq_force2; g_eps_sq_force2 = g_eps_sq_force; g_eps_sq_acc = g_eps_sq_acc1; g_eps_sq_acc1 = g_eps_sq_acc2; g_eps_sq_acc2 = g_eps_sq_acc; } g_mu = g_mu1; g_eps_sq_acc = g_eps_sq_acc1; g_eps_sq_force = g_eps_sq_force1; #ifdef _GAUGE_COPY j = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else j = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); #endif if ( j!= 0) { fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n"); exit(0); } j = init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand); if ( j!= 0) { fprintf(stderr, "Not enough memory for geometry_indices! Aborting...\n"); exit(0); } j = init_spinor_field(VOLUMEPLUSRAND/2, NO_OF_SPINORFIELDS); if ( j!= 0) { fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); exit(0); } j = init_bispinor_field(VOLUME/2, NO_OF_SPINORFIELDS); j = init_csg_field(VOLUMEPLUSRAND/2, g_csg_N); if ( j!= 0) { fprintf(stderr, "Not enough memory for csg fields! Aborting...\n"); exit(0); } j = init_moment_field(VOLUME, VOLUMEPLUSRAND); if ( j!= 0) { fprintf(stderr, "Not enough memory for moment fields! Aborting...\n"); exit(0); } zero_spinor_field(g_spinor_field[DUM_DERI+4],VOLUME/2); zero_spinor_field(g_spinor_field[DUM_DERI+5],VOLUME/2); zero_spinor_field(g_spinor_field[DUM_DERI+6],VOLUME/2); if(g_proc_id == 0){ /* fscanf(fp6,"%s",filename); */ /*construct the filenames for the observables and the parameters*/ strcpy(datafilename,filename); strcat(datafilename,".data"); strcpy(parameterfilename,filename); strcat(parameterfilename,".para"); parameterfile=fopen(parameterfilename, "w"); printf("# This is the hmc code for twisted Mass Wilson QCD\n\nVersion %s\n", Version); #ifdef SSE printf("# The code was compiled with SSE instructions\n"); #endif #ifdef SSE2 printf("# The code was compiled with SSE2 instructions\n"); #endif #ifdef SSE3 printf("# The code was compiled with SSE3 instructions\n"); #endif #ifdef P4 printf("# The code was compiled for Pentium4\n"); #endif #ifdef OPTERON printf("# The code was compiled for AMD Opteron\n"); #endif #ifdef _NEW_GEOMETRY printf("# The code was compiled with -D_NEW_GEOMETRY\n"); #endif #ifdef _GAUGE_COPY printf("# The code was compiled with -D_GAUGE_COPY\n"); #endif printf("# The lattice size is %d x %d x %d x %d\n", (int)(T*g_nproc_t), (int)(LX*g_nproc_x), (int)(LY), (int)(LZ)); printf("# The local lattice size is %d x %d x %d x %d\n", (int)(T), (int)(LX), (int)(LY),(int) LZ); printf("# beta = %f , kappa= %f\n", g_beta, g_kappa); printf("# mus = %f, %f, %f\n", g_mu1, g_mu2, g_mu3); printf("# int_n_gauge = %d, int_n_ferm1 = %d, int_n_ferm2 = %d, int_n_ferm3 = %d\n", int_n[0], int_n[1], int_n[2], int_n[3]); printf("# g_rgi_C0 = %f, g_rgi_C1 = %f\n", g_rgi_C0, g_rgi_C1); printf("# Number of pseudo-fermion fields: %d\n", g_nr_of_psf); printf("# g_eps_sq_force = %e, g_eps_sq_acc = %e\n", g_eps_sq_force, g_eps_sq_acc); printf("# Integration scheme: "); if(integtyp == 1) printf("leap-frog (single time scale)\n"); if(integtyp == 2) printf("Sexton-Weingarten (single time scale)\n"); if(integtyp == 3) printf("leap-frog (multiple time scales)\n"); if(integtyp == 4) printf("Sexton-Weingarten (multiple time scales)\n"); if(integtyp == 5) printf("higher order and leap-frog (multiple time scales)\n"); printf("# Using %s precision for the inversions!\n", g_relative_precision_flag ? "relative" : "absolute"); printf("# Using in chronological inverter for spinor_field 1,2,3 a history of %d, %d, %d, respectively\n", g_csg_N[0], g_csg_N[2], g_csg_N[4]); fprintf(parameterfile, "The lattice size is %d x %d x %d x %d\n", (int)(g_nproc_t*T), (int)(g_nproc_x*LX), (int)(LY), (int)(LZ)); fprintf(parameterfile, "The local lattice size is %d x %d x %d x %d\n", (int)(T), (int)(LX), (int)(LY), (int)(LZ)); fprintf(parameterfile, "g_beta = %f , g_kappa= %f, c_sw = %f \n",g_beta,g_kappa,g_c_sw); fprintf(parameterfile, "boundary of fermion fields (t,x,y,z): %f %f %f %f \n",X0,X1,X2,X3); fprintf(parameterfile, "EPS_SQ0=%e, EPS_SQ1=%e EPS_SQ2=%e, EPS_SQ3=%e \n" ,EPS_SQ0,EPS_SQ1,EPS_SQ2,EPS_SQ3); fprintf(parameterfile, "g_eps_sq_force = %e, g_eps_sq_acc = %e\n", g_eps_sq_force, g_eps_sq_acc); fprintf(parameterfile, "dtau=%f, Nsteps=%d, Nmeas=%d, Nsave=%d, integtyp=%d, nsmall=%d \n", dtau,Nsteps,Nmeas,Nsave,integtyp,nsmall); fprintf(parameterfile, "mu = %f, mu2=%f, mu3=%f\n ", g_mu, g_mu2, g_mu3); fprintf(parameterfile, "int_n_gauge = %d, int_n_ferm1 = %d, int_n_ferm2 = %d, int_n_ferm3 = %d\n ", int_n[0], int_n[1], int_n[2], int_n[3]); fprintf(parameterfile, "g_rgi_C0 = %f, g_rgi_C1 = %f\n", g_rgi_C0, g_rgi_C1); fprintf(parameterfile, "# Number of pseudo-fermion fields: %d\n", g_nr_of_psf); fprintf(parameterfile, "# Integration scheme: "); if(integtyp == 1) fprintf(parameterfile, "leap-frog (single time scale)\n"); if(integtyp == 2) fprintf(parameterfile, "Sexton-Weingarten (single time scale)\n"); if(integtyp == 3) fprintf(parameterfile, "leap-frog (multiple time scales)\n"); if(integtyp == 4) fprintf(parameterfile, "Sexton-Weingarten (multiple time scales)\n"); if(integtyp == 5) fprintf(parameterfile, "higher order and leap-frog (multiple time scales)\n"); fprintf(parameterfile, "Using %s precision for the inversions!\n", g_relative_precision_flag ? "relative" : "absolute"); fprintf(parameterfile, "Using in chronological inverter for spinor_field 1,2,3 a history of %d, %d, %d, respectively\n", g_csg_N[0], g_csg_N[2], g_csg_N[4]); fflush(stdout); fflush(parameterfile); } /* define the geometry */ geometry(); /* define the boundary conditions for the fermion fields */ boundary(); check_geometry(); if(g_proc_id == 0) { #if defined GEOMETRIC if(g_proc_id==0) fprintf(parameterfile,"The geometric series is used as solver \n\n"); #else if(g_proc_id==0) fprintf(parameterfile,"The BICG_stab is used as solver \n\n"); #endif fflush(parameterfile); } /* Continue */ if(startoption == 3){ rlxdfile = fopen(rlxd_input_filename,"r"); if(rlxdfile != NULL) { if(g_proc_id == 0) { fread(rlxd_state,sizeof(rlxd_state),1,rlxdfile); } } else { if(g_proc_id == 0) { printf("%s does not exist, switching to restart...\n", rlxd_input_filename); } startoption = 2; } fclose(rlxdfile); if(startoption != 2) { if(g_proc_id == 0) { rlxd_reset(rlxd_state); printf("Reading Gauge field from file %s\n", gauge_input_filename); fflush(stdout); } read_gauge_field_time_p(gauge_input_filename,g_gauge_field); } } if(startoption != 3){ /* Initialize random number generator */ if(g_proc_id == 0) { rlxd_init(1, random_seed); /* hot */ if(startoption == 1) { random_gauge_field(); } rlxd_get(rlxd_state); #ifdef TM_USE_MPI MPI_Send(&rlxd_state[0], 105, MPI_INT, 1, 99, MPI_COMM_WORLD); MPI_Recv(&rlxd_state[0], 105, MPI_INT, g_nproc-1, 99, MPI_COMM_WORLD, &status); rlxd_reset(rlxd_state); #endif } #ifdef TM_USE_MPI else { MPI_Recv(&rlxd_state[0], 105, MPI_INT, g_proc_id-1, 99, MPI_COMM_WORLD, &status); rlxd_reset(rlxd_state); /* hot */ if(startoption == 1) { random_gauge_field(); } k=g_proc_id+1; if(k==g_nproc){ k=0; } rlxd_get(rlxd_state); MPI_Send(&rlxd_state[0], 105, MPI_INT, k, 99, MPI_COMM_WORLD); } #endif /* Cold */ if(startoption == 0) { unit_g_gauge_field(); } /* Restart */ else if(startoption == 2) { if (g_proc_id == 0){ printf("Reading Gauge field from file %s\n", gauge_input_filename); fflush(stdout); } read_gauge_field_time_p(gauge_input_filename,g_gauge_field); } } /*For parallelization: exchange the gaugefield */ #ifdef TM_USE_MPI xchange_gauge(g_gauge_field); #endif #ifdef _GAUGE_COPY update_backward_gauge(); #endif /*compute the energy of the gauge field*/ plaquette_energy=measure_gauge_action(); if(g_rgi_C1 > 0. || g_rgi_C1 < 0.) { rectangle_energy = measure_rectangles(); if(g_proc_id==0){ fprintf(parameterfile,"#First rectangle value: %14.12f \n",rectangle_energy/(12.*VOLUME*g_nproc)); } } eneg = g_rgi_C0 * plaquette_energy + g_rgi_C1 * rectangle_energy; /* Measure and print the Polyakov loop: */ polyakov_loop(&pl, dir); if(g_proc_id==0){ fprintf(parameterfile,"#First plaquette value: %14.12f \n", plaquette_energy/(6.*VOLUME*g_nproc)); fprintf(parameterfile,"#First Polyakov loop value in %d-direction |L(%d)|= %14.12f \n", dir, dir, cabs(pl)); } dir=3; polyakov_loop(&pl, dir); if(g_proc_id==0){ fprintf(parameterfile,"#First Polyakov loop value in %d-direction |L(%d)|= %14.12f \n", dir, dir, cabs(pl)); fclose(parameterfile); } /* set ddummy to zero */ for(ix = 0; ix < VOLUME+RAND; ix++){ for(mu=0; mu<4; mu++){ ddummy[ix][mu].d1=0.; ddummy[ix][mu].d2=0.; ddummy[ix][mu].d3=0.; ddummy[ix][mu].d4=0.; ddummy[ix][mu].d5=0.; ddummy[ix][mu].d6=0.; ddummy[ix][mu].d7=0.; ddummy[ix][mu].d8=0.; } } if(g_proc_id == 0) { gettimeofday(&t1,NULL); countfile = fopen("history_hmc_tm", "a"); fprintf(countfile, "!!! Timestamp %ld, Nsave = %d, g_mu = %e, g_mu1 = %e, g_mu_2 = %e, g_mu3 = %e, beta = %f, kappa = %f, C1 = %f, int0 = %d, int1 = %d, int2 = %d, int3 = %d, g_eps_sq_force = %e, g_eps_sq_acc = %e, ", t1.tv_sec, Nsave, g_mu, g_mu1, g_mu2, g_mu3, g_beta, g_kappa, g_rgi_C1, int_n[0], int_n[1], int_n[2], int_n[3], g_eps_sq_force, g_eps_sq_acc); fprintf(countfile, "Nsteps = %d, dtau = %e, tau = %e, integtyp = %d, rel. prec. = %d\n", Nsteps, dtau, tau, integtyp, g_relative_precision_flag); fclose(countfile); } /* HERE THE CALLS FOR SOME EIGENVALUES */ /* for lowest g_nev = 10; */ /* for largest */ g_nev = 10; max_iter_ev = 1000; stop_prec_ev = 1.e-10; if(g_proc_id==0) { printf(" Values of mu = %e mubar = %e eps = %e precision = %e \n \n", g_mu, g_mubar, g_epsbar, stop_prec_ev); } eigenvalues(&g_nev, operator_flag, max_iter_ev, stop_prec_ev); g_nev = 4; max_iter_ev = 200; stop_prec_ev = 1.e-03; max_eigenvalues(&g_nev, operator_flag, max_iter_ev, stop_prec_ev); if(g_proc_id==0) { printf(" Values of mu = %e mubar = %e eps = %e precision = %e \n \n", g_mu, g_mubar, g_epsbar, stop_prec_ev); /* printf(" Values of mu = %e precision = %e \n \n", g_mu, stop_prec_ev); */ } /* END OF EIGENVALUES CALLS */ if(g_proc_id==0) { rlxd_get(rlxd_state); rlxdfile=fopen("last_state","w"); fwrite(rlxd_state,sizeof(rlxd_state),1,rlxdfile); fclose(rlxdfile); printf("Acceptance Rate was: %e Prozent\n", 100.*(double)Rate/(double)Nmeas); fflush(stdout); parameterfile = fopen(parameterfilename, "a"); fprintf(parameterfile, "Acceptance Rate was: %e Prozent\n", 100.*(double)Rate/(double)Nmeas); fclose(parameterfile); } #ifdef TM_USE_MPI MPI_Finalize(); #endif free_gauge_tmp(); free_gauge_field(); free_geometry_indices(); free_spinor_field(); free_bispinor_field(); free_moment_field(); return(0); }
void Ptilde_ndpsi(spinor *R_s, spinor *R_c, double *dd, int n, spinor *S_s, spinor *S_c, matrix_mult_nd Qsq) { int j; double fact1, fact2, temp1, temp2, temp3, temp4; spinor *svs_=NULL, *svs=NULL, *ds_=NULL, *ds=NULL, *dds_=NULL, *dds=NULL, *auxs_=NULL, *auxs=NULL, *aux2s_=NULL, *aux2s=NULL, *aux3s_=NULL, *aux3s=NULL; spinor *svc_=NULL, *svc=NULL, *dc_=NULL, *dc=NULL, *ddc_=NULL, *ddc=NULL, *auxc_=NULL, *auxc=NULL, *aux2c_=NULL, *aux2c=NULL, *aux3c_=NULL, *aux3c=NULL; svs_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); svs = (spinor *)(((unsigned long int)(svs_)+ALIGN_BASE)&~ALIGN_BASE); ds_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); ds = (spinor *)(((unsigned long int)(ds_)+ALIGN_BASE)&~ALIGN_BASE); dds_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); dds = (spinor *)(((unsigned long int)(dds_)+ALIGN_BASE)&~ALIGN_BASE); auxs_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); auxs = (spinor *)(((unsigned long int)(auxs_)+ALIGN_BASE)&~ALIGN_BASE); aux2s_= calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); aux2s = (spinor *)(((unsigned long int)(aux2s_)+ALIGN_BASE)&~ALIGN_BASE); aux3s_= calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); aux3s = (spinor *)(((unsigned long int)(aux3s_)+ALIGN_BASE)&~ALIGN_BASE); svc_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); svc = (spinor *)(((unsigned long int)(svc_)+ALIGN_BASE)&~ALIGN_BASE); dc_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); dc = (spinor *)(((unsigned long int)(dc_)+ALIGN_BASE)&~ALIGN_BASE); ddc_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); ddc = (spinor *)(((unsigned long int)(ddc_)+ALIGN_BASE)&~ALIGN_BASE); auxc_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); auxc = (spinor *)(((unsigned long int)(auxc_)+ALIGN_BASE)&~ALIGN_BASE); aux2c_= calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); aux2c = (spinor *)(((unsigned long int)(aux2c_)+ALIGN_BASE)&~ALIGN_BASE); aux3c_= calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); aux3c = (spinor *)(((unsigned long int)(aux3c_)+ALIGN_BASE)&~ALIGN_BASE); fact1=4/(phmc_cheb_evmax-phmc_cheb_evmin); fact2=-2*(phmc_cheb_evmax+phmc_cheb_evmin)/(phmc_cheb_evmax-phmc_cheb_evmin); zero_spinor_field(&ds[0],VOLUME/2); zero_spinor_field(&dds[0],VOLUME/2); zero_spinor_field(&dc[0],VOLUME/2); zero_spinor_field(&ddc[0],VOLUME/2); /* sub_low_ev(&aux3[0], &S[0]); */ assign(&aux3s[0], &S_s[0],VOLUME/2); assign(&aux3c[0], &S_c[0],VOLUME/2); /* Use the Clenshaw's recursion for the Chebysheff polynomial */ for (j=n-1; j>=1; j--) { assign(&svs[0],&ds[0],VOLUME/2); assign(&svc[0],&dc[0],VOLUME/2); /* * if ( (j%10) == 0 ) { * sub_low_ev(&aux[0], &d[0]); * } else { */ assign(&auxs[0], &ds[0], VOLUME/2); assign(&auxc[0], &dc[0], VOLUME/2); /* } */ Qsq(&R_s[0], &R_c[0], &auxs[0], &auxc[0]); temp1=-1.0; temp2=dd[j]; assign_mul_add_mul_add_mul_add_mul_r(&ds[0] , &R_s[0], &dds[0], &aux3s[0], fact2, fact1, temp1, temp2,VOLUME/2); assign_mul_add_mul_add_mul_add_mul_r(&dc[0] , &R_c[0], &ddc[0], &aux3c[0], fact2, fact1, temp1, temp2,VOLUME/2); assign(&dds[0], &svs[0],VOLUME/2); assign(&ddc[0], &svc[0],VOLUME/2); } assign(&R_s[0], &ds[0],VOLUME/2); assign(&R_c[0], &dc[0],VOLUME/2); Qsq(&auxs[0], &auxc[0], &R_s[0], &R_c[0]); temp1=-1.0; temp2=dd[0]/2; temp3=fact1/2; temp4=fact2/2; assign_mul_add_mul_add_mul_add_mul_r(&auxs[0], &ds[0], &dds[0], &aux3s[0], temp3, temp4, temp1, temp2,VOLUME/2); assign_mul_add_mul_add_mul_add_mul_r(&auxc[0], &dc[0], &ddc[0], &aux3c[0], temp3, temp4, temp1, temp2,VOLUME/2); assign(&R_s[0], &auxs[0],VOLUME/2); assign(&R_c[0], &auxc[0],VOLUME/2); free(svs_); free(ds_); free(dds_); free(auxs_); free(aux2s_); free(aux3s_); free(svc_); free(dc_); free(ddc_); free(auxc_); free(aux2c_); free(aux3c_); }