int invert_cloverdoublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s, spinor * const Even_new_c, spinor * const Odd_new_c, spinor * const Even_s, spinor * const Odd_s, spinor * const Even_c, spinor * const Odd_c, const double precision, const int max_iter, const int solver_flag, const int rel_prec) { int iter = 0; /* here comes the inversion using even/odd preconditioning */ if(g_proc_id == 0) {printf("# Using even/odd preconditioning!\n"); fflush(stdout);} Msw_ee_inv_ndpsi(Even_new_s, Even_new_c, Even_s, Even_c); Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even_new_s); Hopping_Matrix(OE, g_spinor_field[DUM_DERI+1], Even_new_c); /* The sign is plus, since in Hopping_Matrix */ /* the minus is missing */ assign_mul_add_r(g_spinor_field[DUM_DERI], +1., Odd_s, VOLUME/2); assign_mul_add_r(g_spinor_field[DUM_DERI+1], +1., Odd_c, VOLUME/2); /* Do the inversion with the preconditioned */ /* matrix to get the odd sites */ /* Here we invert the hermitean operator squared */ if(g_proc_id == 0) { printf("# Using CG for TMWILSON flavour doublet!\n"); fflush(stdout); } gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2); gamma5(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+1], VOLUME/2); iter = cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision, rel_prec, VOLUME/2, &Qsw_pm_ndpsi); Qsw_dagger_ndpsi(Odd_new_s, Odd_new_c, Odd_new_s, Odd_new_c); /* Reconstruct the even sites */ Hopping_Matrix(EO, g_spinor_field[DUM_DERI], Odd_new_s); Hopping_Matrix(EO, g_spinor_field[DUM_DERI+1], Odd_new_c); Msw_ee_inv_ndpsi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]); /* The sign is plus, since in Hopping_Matrix */ /* the minus is missing */ assign_add_mul_r(Even_new_s, g_spinor_field[DUM_DERI+2], +1., VOLUME/2); assign_add_mul_r(Even_new_c, g_spinor_field[DUM_DERI+3], +1., VOLUME/2); return(iter); }
void M_full(spinor * const Even_new, spinor * const Odd_new, spinor * const Even, spinor * const Odd) { /* Even sites */ Hopping_Matrix(EO, g_spinor_field[DUM_DERI], Odd); assign_mul_one_pm_imu(Even_new, Even, 1., VOLUME/2); assign_add_mul_r(Even_new, g_spinor_field[DUM_DERI], -1., VOLUME/2); /* Odd sites */ Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even); assign_mul_one_pm_imu(Odd_new, Odd, 1., VOLUME/2); assign_add_mul_r(Odd_new, g_spinor_field[DUM_DERI], -1., VOLUME/2); }
// applies ((Q_h\tau_1 * R)^2 - 1) void apply_Z_ndpsi(spinor * const k_up, spinor * const k_dn, spinor * const l_up, spinor * const l_dn, const int id, hamiltonian_field_t * const hf, solver_params_t * solver_params) { monomial * mnl = &monomial_list[id]; mnl->iter0 += solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field, l_up, l_dn, solver_params); // apply R to the pseudo-fermion fields assign(k_up, l_up, VOLUME/2); assign(k_dn, l_dn, VOLUME/2); for(int j = (mnl->rat.np-1); j > -1; j--) { assign_add_mul_r(k_up, g_chi_up_spinor_field[j], mnl->rat.rmu[j], VOLUME/2); assign_add_mul_r(k_dn, g_chi_dn_spinor_field[j], mnl->rat.rmu[j], VOLUME/2); } // apply R a second time mnl->iter0 += solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field, k_up, k_dn, solver_params); for(int j = (mnl->rat.np-1); j > -1; j--) { assign_add_mul_r(k_up, g_chi_up_spinor_field[j], mnl->rat.rmu[j], VOLUME/2); assign_add_mul_r(k_dn, g_chi_dn_spinor_field[j], mnl->rat.rmu[j], VOLUME/2); } mul_r(g_chi_up_spinor_field[mnl->rat.np], mnl->rat.A*mnl->rat.A, k_up, VOLUME/2); mul_r(g_chi_dn_spinor_field[mnl->rat.np], mnl->rat.A*mnl->rat.A, k_dn, VOLUME/2); // apply Q^2 and compute the residue solver_params->M_ndpsi(k_up, k_dn, g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np]); diff(k_up, k_up, l_up, VOLUME/2); diff(k_dn, k_dn, l_dn, VOLUME/2); }
void CGeoSmoother(spinor * const P, spinor * const Q, const int Ncy, const int dummy) { spinor ** solver_field = NULL; const int nr_sf = 5; double musave = g_mu; g_mu = g_mu1; init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); convert_lexic_to_eo(solver_field[0], solver_field[1], Q); if(g_c_sw > 0) assign_mul_one_sw_pm_imu_inv(EE,solver_field[2], solver_field[0], g_mu); else assign_mul_one_pm_imu_inv(solver_field[2], solver_field[0], +1., VOLUME/2); Hopping_Matrix(OE, solver_field[4], solver_field[2]); /* The sign is plus, since in Hopping_Matrix */ /* the minus is missing */ assign_mul_add_r(solver_field[4], +1., solver_field[1], VOLUME/2); /* Do the inversion with the preconditioned */ /* matrix to get the odd sites */ gamma5(solver_field[4], solver_field[4], VOLUME/2); if(g_c_sw > 0) { cg_her(solver_field[3], solver_field[4], Ncy, 1.e-8, 1, VOLUME/2, &Qsw_pm_psi); Qsw_minus_psi(solver_field[3], solver_field[3]); /* Reconstruct the even sites */ Hopping_Matrix(EO, solver_field[2], solver_field[3]); assign_mul_one_sw_pm_imu_inv(EE,solver_field[4],solver_field[2], g_mu); } else { cg_her(solver_field[3], solver_field[4], Ncy, 1.e-8, 1, VOLUME/2, &Qtm_pm_psi); Qtm_minus_psi(solver_field[3], solver_field[3]); /* Reconstruct the even sites */ Hopping_Matrix(EO, solver_field[4], solver_field[3]); mul_one_pm_imu_inv(solver_field[4], +1., VOLUME/2); } /* The sign is plus, since in Hopping_Matrix */ /* the minus is missing */ assign_add_mul_r(solver_field[2], solver_field[4], +1., VOLUME/2); convert_eo_to_lexic(P, solver_field[2], solver_field[3]); g_mu = musave; finalize_solver(solver_field, nr_sf); return; }
double rat_acc(const int id, hamiltonian_field_t * const hf) { solver_pm_t solver_pm; monomial * mnl = &monomial_list[id]; double atime, etime, dummy; atime = gettime(); // only for non-twisted operators g_mu = 0.; g_mu3 = 0.; boundary(mnl->kappa); if(mnl->type == CLOVERRAT) { g_c_sw = mnl->c_sw; sw_term((const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); sw_invert(EE, 0.); } mnl->energy1 = 0.; solver_pm.max_iter = mnl->maxiter; solver_pm.squared_solver_prec = mnl->accprec; solver_pm.no_shifts = mnl->rat.np; solver_pm.shifts = mnl->rat.mu; solver_pm.type = CGMMS; solver_pm.M_psi = mnl->Qsq; solver_pm.sdim = VOLUME/2; solver_pm.rel_prec = g_relative_precision_flag; mnl->iter0 += cg_mms_tm(g_chi_up_spinor_field, mnl->pf, &solver_pm, &dummy); // apply R to the pseudo-fermion fields assign(mnl->w_fields[0], mnl->pf, VOLUME/2); for(int j = (mnl->rat.np-1); j > -1; j--) { assign_add_mul_r(mnl->w_fields[0], g_chi_up_spinor_field[j], mnl->rat.rmu[j], VOLUME/2); } mnl->energy1 = scalar_prod_r(mnl->pf, mnl->w_fields[0], VOLUME/2, 1); etime = gettime(); if(g_proc_id == 0) { if(g_debug_level > 1) { printf("# Time for %s monomial acc step: %e s\n", mnl->name, etime-atime); } if(g_debug_level > 0) { // shoud be 3 printf("called rat_acc for id %d dH = %1.10e\n", id, mnl->energy1 - mnl->energy0); } } return(mnl->energy1 - mnl->energy0); }
int bicg(spinor * const k, spinor * const l, double eps_sq) { int iteration; double xxx; xxx=0.0; gamma5(g_spinor_field[DUM_SOLVER+1], l, VOLUME/2); /* main loop */ for(iteration=1;iteration<=ITER_MAX_BCG;iteration++) { /* compute the residual*/ M_psi(DUM_SOLVER,k,q_off); xxx=diff_and_square_norm(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+1], VOLUME/2); /*apply the solver step for the residual*/ M_psi(DUM_SOLVER+2,DUM_SOLVER,q_off-(2.+2.*q_off)); assign_add_mul_r(k,-1./((1.+q_off)*(1.+q_off)),g_spinor_field[DUM_SOLVER+2], VOLUME/2); if(xxx <= eps_sq) break; } if(g_proc_id==0) { sout = fopen(solvout, "a"); fprintf(sout, "%d %e %f\n",iteration,xxx, g_mu); fclose(sout); } /* if the geometric series fails, redo with conjugate gradient */ if(iteration>=ITER_MAX_BCG) { if(ITER_MAX_BCG == 0) { iteration = 0; } zero_spinor_field(k,VOLUME/2); iteration += solve_cg(k,l,q_off,eps_sq); Q_psi(k,k,q_off); if(ITER_MAX_BCG != 0) { iteration -= 1000000; } if(g_proc_id == 0) { sout = fopen(solvout, "a"); fprintf(sout, "%d %e\n",iteration, g_mu); fclose(sout); } } return iteration; }
int bicgstabell(spinor * const x0, spinor * const b, const int max_iter, double eps_sq, const int rel_prec, const int _l, const int N, matrix_mult f) { double err; int i, j, k, l; double rho0, rho1, beta, alpha, omega, gamma0 = 0., squarenorm; spinor * r[5], * u[5], * r0_tilde, * x; double tau[5][5], gamma[25], gammap[25], gammapp[25], sigma[25]; spinor ** solver_field = NULL; const int nr_sf = 2*(_l+1)+2; l = _l; k = -l; if(N == VOLUME) { init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); } else { init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); } r0_tilde = solver_field[0]; for(i = 0; i <= l; i++){ r[i] = solver_field[2+2*i]; u[i] = solver_field[3+2*i]; } x = x0; assign(u[0], b, N); f(r0_tilde, x); diff(r[0], u[0], r0_tilde, N); zero_spinor_field(solver_field[1], N); assign(r0_tilde, r[0], N); squarenorm = square_norm(b, N, 1); rho0 = 1.; alpha = 0.; omega = 1.; err = square_norm(r0_tilde, N, 1); while( k < max_iter && (((err > eps_sq) && (rel_prec == 0)) || ((err > eps_sq*squarenorm) && (rel_prec == 1)) )) { k+=l; /* The BiCG part */ rho0 *= -omega; for(j = 0; j < l; j++) { rho1 = scalar_prod_r(r[j], r0_tilde, N, 1); beta = (rho1/rho0); beta *= alpha; rho0 = rho1; for(i = 0; i <= j; i++) { /* u_i = r_i - \beta u_i */ assign_mul_add_r(u[i], -beta, r[i], N); } f(u[j+1], u[j]); gamma0 = scalar_prod_r(u[j+1], r0_tilde, N, 1); alpha = rho0/gamma0; /* r_i = r_i - \alpha u_{i+1} */ for(i = 0; i <= j; i++) { assign_add_mul_r(r[i], u[i+1], -alpha, N); } f(r[j+1], r[j]); /* x = x + \alpha u_0 */ assign_add_mul_r(x, u[0], alpha, N); err = square_norm(r[j+1], N, 1); if(g_proc_id == 0 && g_debug_level > 1) {printf("%d %d err = %e\n", k, j, err);fflush(stdout);} } /* The MR part */ for(j = 1; j <= l; j++){ for(i = 1; i < j; i++){ tau[i][j] = scalar_prod_r(r[j], r[i], N, 1)/sigma[i]; assign_add_mul_r(r[j], r[i], -tau[i][j], N); } sigma[j] = scalar_prod_r(r[j], r[j], N, 1); gammap[j] = scalar_prod_r(r[0], r[j], N, 1)/sigma[j]; } gamma[l] = gammap[l]; omega = gamma[l]; for(j = l-1; j > 0; j--) { gamma[j] = gammap[j]; for(i = j+1; i <= l; i++) { gamma[j] -= (tau[j][i]*gamma[i]); } } for(j = 1; j < l; j++) { gammapp[j] = gamma[j+1]; for(i = j+1; i < l; i++){ gammapp[j] += (tau[j][i]*gamma[i+1]); } } assign_add_mul_r(x, r[0], gamma[1], N); assign_add_mul_r(r[0], r[l], -gammap[l], N); for(j = 1; j < l; j++){ assign_add_mul_r(x, r[j], gammapp[j], N); assign_add_mul_r(r[0], r[j], -gammap[j], N); } assign_add_mul_r(u[0], u[l], -gamma[l], N); for(j = 1; j < l; j++){ assign_add_mul_r(u[0], u[j], -gamma[j], N); } err = square_norm(r[0], N, 1); if(g_proc_id == 0 && g_debug_level > 0){ printf(" BiCGstabell iterated %d %d, %e rho0 = %e, alpha = %e, gamma0= %e\n", l, k, err, rho0, alpha, gamma0); fflush( stdout ); } } finalize_solver(solver_field, nr_sf); if(k == max_iter) return(-1); return(k); }
void ndratcor_heatbath(const int id, hamiltonian_field_t * const hf) { monomial * mnl = &monomial_list[id]; double atime, etime, delta; spinor * up0, * dn0, * up1, * dn1, * tup, * tdn, * Zup, * Zdn; double coefs[6] = {1./4., -3./32., 7./128., -77./2048., 231./8192., -1463./65536.}; // series of (1+x)^(1/4) double coefs_check[6] = {1./2., -1./8., 1./16., -5./128., 7./256., -21./1024.}; // series of (1+x)^(1/2) atime = gettime(); nd_set_global_parameter(mnl); g_mu3 = 0.; mnl->iter0 = 0; if(mnl->type == NDCLOVERRATCOR) { init_sw_fields(); sw_term((const su3**)hf->gaugefield, mnl->kappa, mnl->c_sw); sw_invert_nd(mnl->mubar*mnl->mubar - mnl->epsbar*mnl->epsbar); copy_32_sw_fields(); } // we measure before the trajectory! if((mnl->rec_ev != 0) && (hf->traj_counter%mnl->rec_ev == 0)) { if(mnl->type != NDCLOVERRAT) phmc_compute_ev(hf->traj_counter-1, id, &Qtm_pm_ndbipsi); else phmc_compute_ev(hf->traj_counter-1, id, &Qsw_pm_ndbipsi); } // the Gaussian distributed random fields mnl->energy0 = 0.; random_spinor_field_eo(mnl->pf, mnl->rngrepro, RN_GAUSS); mnl->energy0 = square_norm(mnl->pf, VOLUME/2, 1); random_spinor_field_eo(mnl->pf2, mnl->rngrepro, RN_GAUSS); mnl->energy0 += square_norm(mnl->pf2, VOLUME/2, 1); mnl->solver_params.max_iter = mnl->maxiter; mnl->solver_params.squared_solver_prec = mnl->accprec; mnl->solver_params.no_shifts = mnl->rat.np; mnl->solver_params.shifts = mnl->rat.mu; mnl->solver_params.type = mnl->solver; mnl->solver_params.M_ndpsi = &Qtm_pm_ndpsi; mnl->solver_params.M_ndpsi32 = &Qtm_pm_ndpsi_32; if(mnl->type == NDCLOVERRATCOR) { mnl->solver_params.M_ndpsi = &Qsw_pm_ndpsi; mnl->solver_params.M_ndpsi32 = &Qsw_pm_ndpsi_32; } mnl->solver_params.sdim = VOLUME/2; mnl->solver_params.rel_prec = g_relative_precision_flag; // apply B to the random field to generate pseudo-fermion fields up0 = mnl->w_fields[0]; dn0 = mnl->w_fields[1]; up1 = mnl->w_fields[2]; dn1 = mnl->w_fields[3]; Zup = mnl->w_fields[4]; Zdn = mnl->w_fields[5]; apply_Z_ndpsi(up0, dn0, mnl->pf, mnl->pf2, id, hf, &(mnl->solver_params)); // computing correction to energy1 delta = coefs_check[0]*(scalar_prod_r(mnl->pf, up0, VOLUME/2, 1) + scalar_prod_r(mnl->pf2, dn0, VOLUME/2, 1)); if(g_debug_level > 2 && g_proc_id == 0) printf("# NDRATCOR heatbath: c_%d*(R * Z^%d * R) = %e\n", 1, 1, delta); // debug for showing that the old check was giving a smaller delta if(g_debug_level > 3) { double delta_old = square_norm(up0, VOLUME/2, 1) + square_norm(dn0, VOLUME/2, 1); if(g_proc_id == 0) { printf("# NDRATCOR old check: || Z^%d * R ||^2 = %e\n", 1, delta_old); printf("# NDRATCOR new check: (c_%d*(R * Z^%d * R))^2 = %e\n", 1, 1, delta*delta); } } if(delta*delta > mnl->accprec) { assign_add_mul_r(mnl->pf, up0, coefs[0], VOLUME/2); assign_add_mul_r(mnl->pf2, dn0, coefs[0], VOLUME/2); // saving first application assign(Zup, up0, VOLUME/2); assign(Zdn, dn0, VOLUME/2); for(int i = 2; i < 8; i++) { // computing next order correction to energy1 delta = coefs_check[i-1]*(scalar_prod_r(Zup, up0, VOLUME/2, 1) + scalar_prod_r(Zup, dn0, VOLUME/2, 1)); if(g_debug_level > 2 && g_proc_id == 0) printf("# NDRATCOR heatbath: c_%d*(R * Z^%d * R) = %e\n", i, i, delta); // debug for showing that the old check was giving a smaller delta if(g_debug_level > 3) { double delta_old = square_norm(up0, VOLUME/2, 1) + square_norm(dn0, VOLUME/2, 1); if(g_proc_id == 0) { printf("# NDRATCOR old check: || Z^%d * R ||^2 = %e\n", 1, delta_old); printf("# NDRATCOR new check: (c_%d*(R * Z^%d * R))^2 = %e\n", 1, 1, delta*delta); } } if(delta*delta < mnl->accprec) break; apply_Z_ndpsi(up1, dn1, up0, dn0, id, hf, &(mnl->solver_params)); assign_add_mul_r(mnl->pf, up1, coefs[i-1], VOLUME/2); assign_add_mul_r(mnl->pf2, dn1, coefs[i-1], VOLUME/2); tup = up0; tdn = dn0; up0 = up1; dn0 = dn1; up1 = tup; dn1 = tdn; } } etime = gettime(); if(g_proc_id == 0) { if(g_debug_level > 1) { printf("# Time for %s monomial heatbath: %e s\n", mnl->name, etime-atime); } if(g_debug_level > 3) { printf("called ndratcor_heatbath for id %d energy %f\n", id, mnl->energy0); } } return; }
// computes ||(1 - C^dagger R C) phi|| void check_C_ndpsi(spinor * const k_up, spinor * const k_dn, spinor * const l_up, spinor * const l_dn, const int id, hamiltonian_field_t * const hf, solver_params_t * solver_params) { monomial * mnl = &monomial_list[id]; mnl->iter0 = solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field, l_up, l_dn, solver_params); assign(k_up, l_up, VOLUME/2); assign(k_dn, l_dn, VOLUME/2); // apply C to the random field to generate pseudo-fermion fields for(int j = (mnl->rat.np-1); j > -1; j--) { // Q_h * tau^1 - i nu_j // this needs phmc_Cpol = 1 to work! if(mnl->type == NDCLOVERRATCOR || mnl->type == NDCLOVERRAT) { Qsw_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np], g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], I*mnl->rat.nu[j], 1., mnl->EVMaxInv); } else { Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np], g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], I*mnl->rat.nu[j], 1., mnl->EVMaxInv); } assign_add_mul(k_up, g_chi_up_spinor_field[mnl->rat.np], I*mnl->rat.rnu[j], VOLUME/2); assign_add_mul(k_dn, g_chi_dn_spinor_field[mnl->rat.np], I*mnl->rat.rnu[j], VOLUME/2); } //apply R solver_params->shifts = mnl->rat.mu; solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field, k_up, k_dn, solver_params); for(int j = (mnl->rat.np-1); j > -1; j--) { assign_add_mul_r(k_up, g_chi_up_spinor_field[j], mnl->rat.rmu[j], VOLUME/2); assign_add_mul_r(k_dn, g_chi_dn_spinor_field[j], mnl->rat.rmu[j], VOLUME/2); } // apply C^dagger solver_params->shifts = mnl->rat.nu; solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field, k_up, k_dn, solver_params); for(int j = (mnl->rat.np-1); j > -1; j--) { // Q_h * tau^1 + i nu_j if(mnl->type == NDCLOVERRATCOR || mnl->type == NDCLOVERRAT) { Qsw_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np], g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], -I*mnl->rat.nu[j], 1., mnl->EVMaxInv); } else { Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np], g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], -I*mnl->rat.nu[j], 1., mnl->EVMaxInv); } assign_add_mul(k_up, g_chi_up_spinor_field[mnl->rat.np], -I*mnl->rat.rnu[j], VOLUME/2); assign_add_mul(k_dn, g_chi_dn_spinor_field[mnl->rat.np], -I*mnl->rat.rnu[j], VOLUME/2); } diff(k_up, k_up, l_up, VOLUME/2); diff(k_dn, k_dn, l_dn, VOLUME/2); double resi = square_norm(k_up, VOLUME/2, 1); resi += square_norm(k_dn, VOLUME/2, 1); if(g_proc_id == 0) printf("|| (1-C^dagger R C)*phi|| = %e\n", resi); return; }
void eigcg(int n, int lde, spinor * const x, spinor * const b, double *normb, const double eps_sq, double restart_eps_sq, const int rel_prec, int maxit, int *iter, double *reshist, int *flag, spinor **work, matrix_mult f, int nev, int v_max, spinor *V, int esize, _Complex double *ework) { double tolb; double alpha, beta; /* CG scalars */ double rho, rhoprev; double pAp; int it; /* current iteration number */ int i, j; /* loop variables */ int zs,ds,tmpsize; spinor *r, *p, *Ap; /* ptrs in work for CG vectors */ _Complex double tempz; /* double precision complex temp var */ double tempd; /* double temp var */ int tempi; /* int temp var */ int ONE = 1; /* var for passing 1 into BLAS routines */ /*---------------------------------------------------------------------- Eigen variables and setup ----------------------------------------------------------------------*/ /* Some constants */ char cR = 'R'; char cL = 'L'; char cN ='N'; char cV = 'V'; char cU = 'U'; char cC ='C'; double betaprev, alphaprev; /* remember the previous iterations scalars */ int v_size; /* tracks the size of V */ int lwork = 3*v_max; /* the size of zwork */ spinor *Ap_prev; void *_h; _Complex double *H; /* the V'AV projection matrix */ void *_hevecs; _Complex double *Hevecs; /* the eigenvectors of H */ void *_hevecsold; _Complex double *Hevecsold; /* the eigenvectors of H(v_max-1,v_max-1) */ void *_hevals; double *Hevals; /* the eigenvalues of H */ void *_hevalsold; double *Hevalsold; /* the eigenvalues of H(m-1,m-1) */ void *_tau; _Complex double *TAU; void *_zwork; _Complex double *zwork; /* double complex work array needed by zheev */ void *_rwork; double *rwork; /* double work array needed by zheev */ int parallel; double tmpd; _Complex double tmpz; zs = sizeof(_Complex double); ds = sizeof(double); int info, allelems = v_max*v_max; #ifdef MPI parallel=1; #else parallel=0; #endif if(nev > 0) /*allocate memory only if eigenvalues will be used */ { #if (defined SSE || defined SSE2 || defined SSE3) if ((_h = calloc(v_max*v_max+ALIGN_BASE,zs)) == NULL) { if( g_proc_id == g_stdio_proc) {fprintf(stderr,"ERROR Could not allocate H\n"); exit(1);} } else H = (_Complex double *)(((unsigned long int)(_h)+ALIGN_BASE)&~ALIGN_BASE); if ((_hevecs = calloc(v_max*v_max+ALIGN_BASE,zs)) == NULL) { if( g_proc_id == g_stdio_proc ) {fprintf(stderr, "ERROR Could not allocate Hevecs\n"); exit(1);} }else Hevecs = (_Complex double *)(((unsigned long int)(_hevecs)+ALIGN_BASE)&~ALIGN_BASE); if ((_hevecsold = calloc(v_max*v_max+ALIGN_BASE,zs)) == NULL) { if( g_proc_id == g_stdio_proc ) {fprintf(stderr, "ERROR Could not allocate Hevecsold\n"); exit(1);} }else Hevecsold = (_Complex double *)(((unsigned long int)(_hevecsold)+ALIGN_BASE)&~ALIGN_BASE); if ((_hevals = calloc(v_max+ALIGN_BASE,ds)) == NULL) { if( g_proc_id == g_stdio_proc) {fprintf(stderr, "ERROR Could not allocate Hevals\n"); exit(1);} }else Hevals = (double *)(((unsigned long int)(_hevals)+ALIGN_BASE)&~ALIGN_BASE); if ((_hevalsold = calloc(v_max+ALIGN_BASE,ds)) == NULL) { if( g_proc_id == g_stdio_proc) {fprintf(stderr, "ERROR Could not allocate Hevalsold\n"); exit(1); } }else Hevalsold = (double *)(((unsigned long int)(_hevalsold)+ALIGN_BASE)&~ALIGN_BASE); if ((_tau = calloc(2*nev+ALIGN_BASE,zs)) == NULL) { if( g_proc_id == g_stdio_proc ) {fprintf(stderr, "ERROR Could not allocate TAU\n"); exit(1); } }else TAU = (_Complex double *)(((unsigned long int)(_tau)+ALIGN_BASE)&~ALIGN_BASE); if ((_zwork = calloc(lwork+ALIGN_BASE,zs)) == NULL) { if( g_proc_id == g_stdio_proc) {fprintf(stderr, "ERROR Could not allocate zwork\n"); exit(1);} }else zwork = (_Complex double *)(((unsigned long int)(_zwork)+ALIGN_BASE)&~ALIGN_BASE); if ((_rwork = calloc(3*v_max+ALIGN_BASE,ds)) == NULL) { if( g_proc_id == g_stdio_proc) {fprintf(stderr, "ERROR Could not allocate rwork\n"); exit(1);} }else rwork = (double *)(((unsigned long int)(_rwork)+ALIGN_BASE)&~ALIGN_BASE); #else if ((H = (_Complex double *) calloc(v_max*v_max, zs)) == NULL) { if( g_proc_id == g_stdio_proc) {fprintf(stderr, "ERROR Could not allocate H\n"); exit(1);} } if ((Hevecs = (_Complex double *) calloc(v_max*v_max, zs)) == NULL) { if( g_proc_id == g_stdio_proc ) {fprintf(stderr, "ERROR Could not allocate Hevecs\n"); exit(1);} } if ((Hevecsold = (_Complex double *) calloc(v_max*v_max, zs)) == NULL) { if( g_proc_id == g_stdio_proc ) {fprintf(stderr, "ERROR Could not allocate Hevecsold\n"); exit(1);} } if ((Hevals = (double *) calloc(v_max, ds)) == NULL) { if( g_proc_id == g_stdio_proc) {fprintf(stderr, "ERROR Could not allocate Hevals\n"); exit(1);} } if ((Hevalsold = (double *) calloc(v_max, ds)) == NULL) { if( g_proc_id == g_stdio_proc) {fprintf(stderr, "ERROR Could not allocate Hevalsold\n"); exit(1); } } if ((TAU = (_Complex double *) calloc(2*nev, zs)) == NULL) { if( g_proc_id == g_stdio_proc ) {fprintf(stderr, "ERROR Could not allocate TAU\n"); exit(1); } } if ((zwork = (_Complex double *) calloc(lwork, zs)) == NULL) { if( g_proc_id == g_stdio_proc) {fprintf(stderr, "ERROR Could not allocate zwork\n"); exit(1);} } if ((rwork = (double *) calloc(3*v_max, ds)) == NULL) { if( g_proc_id == g_stdio_proc) {fprintf(stderr, "ERROR Could not allocate rwork\n"); exit(1);} } #endif } /* end if (nev > 0) */ /*----------------------------------------------------------------------*/ /* setup pointers into work */ r = work[0]; p = work[1]; Ap = work[2]; Ap_prev = work[3]; /*-------------------------------------------------------------------- Initialization phase --------------------------------------------------------------------*/ if (*flag != 3) { /* If flag == 3, the eigCG is called after restart with the same b * whose norm is already known in normb, so no need for these */ tempd = square_norm(b,n,parallel); /* Norm of rhs, b */ *normb = sqrt(tempd); /* If right hand side is zero return zero solution. ITER stays the same */ if (*normb == 0.0) { for (i=0; i<n; i++) { _vector_null(x[i].s0); _vector_null(x[i].s1); _vector_null(x[i].s2); _vector_null(x[i].s3); } *flag = 0; *reshist = 0.0; if( g_debug_level > 0 && g_proc_id == g_stdio_proc) displayInfo(eps_sq,maxit,*flag,*iter,*reshist); return; } } /* Set up for the method */ *flag = 1; tolb = eps_sq * (*normb)*(*normb); /* Relative to b tolerance */ /* Zero-th residual: r = b - A*x */ f(r,x); diff(r,b,r,n); rho = 0.0; alpha = 1.0; beta = 0.0; v_size = 0; double reshist_init=square_norm(r,n,parallel); //if( g_proc_id == g_stdio_proc ) //fprintf(stdout, "reshist init %f\n", reshist_init); /*-------------------------------------------------------------------- main CG loop --------------------------------------------------------------------*/ for (it = 0; it < maxit; it++) { rhoprev = rho; rho=square_norm(r,n,parallel); *reshist = rho; if ( (g_debug_level > 2) && (g_proc_id == g_stdio_proc) ) { fprintf(stdout, " Linsys res( %d ): %g\n",*iter+it,*reshist); fflush(stdout); } /* Convergence test */ if ( ( (*reshist < eps_sq) && (rel_prec==0) ) || ( (*reshist < eps_sq*(*normb)*(*normb)) && (rel_prec ==1 ) ) ) { *flag = 0; break; /* break do not return */ } /* Restart test */ if(nev==0) { if (*reshist < (restart_eps_sq*reshist_init) ) { *flag = 3; break; /* break do not return */ } } if (it == 0) assign(p,r,n); else { betaprev = beta; beta = rho / rhoprev; if (beta == 0.0) { *flag = 2; break; } assign_mul_add_r(p,beta,r,n); /* p = beta*p + r */ } /*----- eigCG specific code -------------------------------------------*/ /* Remember Ap from previous iteration to be used at restart */ if (nev > 0 && v_size == v_max) assign(Ap_prev,Ap,n); /*---------------------------------------------------------------------*/ f(Ap,p); /*----- eigCG specific code -------------------------------------------*/ if (nev > 0) { /* record the diagonal vAv for the previous vector */ if (it > 0) { H[(v_size-1)*v_max+v_size-1]= 1.0/alpha + betaprev/alphaprev; //H[(v_size-1)*v_max+v_size-1].im = 0.0; } /* Restarting V */ if (v_size == v_max) { /* Solve (v_max) and (v_max-1) eigenproblems */ tempi = v_max; allelems=v_max*v_max; _FT(zcopy)(&allelems, H, &ONE, Hevecs, &ONE); _FT(zheev)(&cV,&cU,&tempi,Hevecs,&v_max,Hevals,zwork,&lwork,rwork,&info,1,1); if( (info != 0 ) && (g_proc_id==g_stdio_proc)) {fprintf(stderr, "Error: ZHEEV in eigcg at v_max step, info %d\n",info); exit(1);} tempi = v_max-1; _FT(zcopy)(&allelems, H, &ONE, Hevecsold, &ONE); _FT(zheev)(&cV,&cU,&tempi,Hevecsold,&v_max,Hevalsold,zwork,&lwork,rwork,&info,1,1); if( (info != 0 ) && (g_proc_id==g_stdio_proc)) {fprintf(stderr, "Error: ZHEEV in eigcg at (v_max-1) step, info %d\n",info); exit(1);} /* fill 0s in vmax-th elem of oldevecs to match Hevecs */ for(i=1; i <= v_max ; i++) {Hevecsold[i*v_max-1] = 0.0 ;} /* Attach the first nev oldevecs at the end of the nev latest ones */ tempi = nev*v_max; _FT(zcopy)(&tempi,Hevecsold,&ONE,&Hevecs[tempi],&ONE); /* Orthogonalize the 2*nev (new+old) vectors Hevecs=QR */ v_size = 2*nev; _FT(zgeqrf)(&v_max,&v_size,Hevecs,&v_max,TAU,zwork,&lwork,&info) ; if( (info != 0 ) && (g_proc_id==g_stdio_proc)) {fprintf(stderr, "Error: ZGEQRF in eigcg info %d\n",info); exit(1);} /* use as a temp space Hevecsold = Q^THQ */ _FT(zcopy)(&allelems,H,&ONE,Hevecsold,&ONE); _FT(zunmqr)(&cR,&cN,&v_max,&v_max,&v_size,Hevecs,&v_max, TAU,Hevecsold,&v_max,zwork,&lwork,&info); if( (info != 0 ) && (g_proc_id==g_stdio_proc)) {fprintf(stderr, "Error: ZGEQRF call 1 in eigcg info %d\n",info); exit(1);} _FT(zunmqr)(&cL,&cC,&v_max,&v_size,&v_size,Hevecs,&v_max, TAU,Hevecsold,&v_max,zwork,&lwork,&info); if( (info != 0 ) && (g_proc_id==g_stdio_proc)) {fprintf(stderr, "Error: ZGEQRF call 2 in eigcg info %d\n",info); exit(1);} /* solve the small Hevecsold v_size x v_size eigenproblem */ _FT(zheev)(&cV,&cU,&v_size,Hevecsold,&v_max,Hevals, zwork,&lwork,rwork,&info,1,1); if( (info != 0 ) && (g_proc_id==g_stdio_proc)) {fprintf(stderr, "Error: ZHEEV in eigcg info %d\n",info); exit(1);} /* zero out unused part of eigenectors in Hevecsold */ tempi = 0; for(i = 0; i < v_size; i++ ) { for(j = v_size; j < v_max; j++) {Hevecsold[tempi + j]=0.0;} tempi += v_max; } /* Compute the Hevecsold = Hevecs*Hevecsold */ _FT(zunmqr)(&cL,&cN,&v_max,&v_size,&v_size,Hevecs,&v_max, TAU,Hevecsold,&v_max,zwork,&lwork,&info); if( (info != 0 ) && (g_proc_id==g_stdio_proc)) {fprintf(stderr, "Error: ZUNMQR, info %d\n",info); exit(1);} /* Restart V = V(n,v_max)*Hevecsold(v_max,v_size) */ Zrestart_X((_Complex double *) V, 12*lde, Hevecsold, 12*n, v_max, v_size, ework, esize); /* Restart H = diag(Hevals) plus a column and a row */ for (i = 0; i < allelems; i++ ) {H[i] = 0.0; } for (i = 0; i < v_size; i++) H[i*(v_max+1)]= Hevals[i]; /* The next residual to be added (v = r/sqrt(rho)) * needs the (nev+1)-th column and row, through V(:,1:vs)'*A*v. * Instead of a matvec, we use the Ap and Ap_prev to obtain this: * V(:,1:vs)'*A*V(:,vs+1) = V(:,1:vs)'*A*r/sqrt(rho) = * V'(A(p-beta*p_prev))/sqrt(rho) = V'(Ap - beta*Ap_prev)/sqrt(rho)*/ tmpd=-beta; assign_mul_add_r(Ap_prev,tmpd,Ap,n); /* Ap_prev=Ap-beta*Ap_prev */ tempi=v_size*v_max; for (i=0; i<v_size; i++){ tmpz=scalar_prod(&V[i*lde],Ap_prev,n,parallel); H[v_size+i*v_max]=tmpz/sqrt(rho); H[i+tempi]=conj(tmpz)/sqrt(rho); } } /* end of if v_size == v_max */ else { /* update (vs+1,vs),(vs,vs+1) elements of tridigonal which are real*/ if ( it > 0) { H[(v_size-1)*v_max + v_size]= -sqrt(beta)/alpha; H[v_size*v_max + v_size-1] = creal(H[(v_size-1)*v_max + v_size]); } } /* of else */ /* Augment V with the current CG residual r normalized by sqrt(rho) */ tmpd=1.0/sqrt(rho); mul_r(&V[v_size*lde],tmpd,r,n); v_size++; } /* end of if nev >0 , ie., the eigCG specific code */ /*---------------------------------------------------------------------*/ /* pAp = p' * Ap */ tempz=scalar_prod(p,Ap,n,parallel); pAp = creal(tempz); if (pAp == 0.0) { *flag = 2; break; } alphaprev = alpha; alpha = rho / pAp; assign_add_mul_r(x,p,alpha,n); /*update x*/ tmpd=-alpha; assign_add_mul_r(r,Ap,tmpd,n); /*update r*/ //next line useful for debugging //printf("%d beta, alpha, rho, pAp %le %le %le %le\n",it,beta,alpha,rho,pAp); } /* for it = 0 : maxit-1 */ *iter = *iter + it+1; /* record the number of CG iterations plus any older */ if( g_proc_id == g_stdio_proc && g_debug_level > 0) displayInfo(eps_sq,maxit,*flag,*iter-1,*reshist); if(nev > 0 ) { #if (defined SSE || defined SSE2 || defined SSE3) H= NULL; free(_h); Hevecs=NULL; free(_hevecs); Hevecsold=NULL; free(_hevecsold); Hevals=NULL; free(_hevals); Hevalsold=NULL; free(_hevalsold); TAU=NULL; free(_tau); zwork=NULL; free(_zwork); rwork=NULL; free(_rwork); #else free(H); free(Hevecs); free(Hevecsold); free(Hevals); free(Hevalsold); free(TAU); free(zwork); free(rwork); #endif } return; }
int invert_doublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s, spinor * const Even_new_c, spinor * const Odd_new_c, spinor * const Even_s, spinor * const Odd_s, spinor * const Even_c, spinor * const Odd_c, const double precision, const int max_iter, const int solver_flag, const int rel_prec, solver_params_t solver_params, const ExternalInverter external_inverter, const SloppyPrecision sloppy, const CompressionType compression) { int iter = 0; #ifdef TM_USE_QUDA if( external_inverter==QUDA_INVERTER ) { return invert_doublet_eo_quda( Even_new_s, Odd_new_s, Even_new_c, Odd_new_c, Even_s, Odd_s, Even_c, Odd_c, precision, max_iter, solver_flag, rel_prec, 1, sloppy, compression ); } #endif #ifdef HAVE_GPU # ifdef TEMPORALGAUGE if (usegpu_flag) { gtrafo_eo_nd(Even_s, Odd_s, Even_c, Odd_c, (spinor*const)NULL, (spinor*const)NULL, (spinor*const)NULL, (spinor*const)NULL, GTRAFO_APPLY); } # endif #endif /* HAVE_GPU*/ /* here comes the inversion using even/odd preconditioning */ if(g_proc_id == 0) {printf("# Using even/odd preconditioning!\n"); fflush(stdout);} M_ee_inv_ndpsi(Even_new_s, Even_new_c, Even_s, Even_c, g_mubar, g_epsbar); Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even_new_s); Hopping_Matrix(OE, g_spinor_field[DUM_DERI+1], Even_new_c); /* The sign is plus, since in Hopping_Matrix */ /* the minus is missing */ assign_mul_add_r(g_spinor_field[DUM_DERI], +1., Odd_s, VOLUME/2); assign_mul_add_r(g_spinor_field[DUM_DERI+1], +1., Odd_c, VOLUME/2); /* Do the inversion with the preconditioned */ /* matrix to get the odd sites */ /* Here we invert the hermitean operator squared */ if(g_proc_id == 0) { printf("# Using CG for TMWILSON flavour doublet!\n"); fflush(stdout); } if ( external_inverter == NO_EXT_INV ){ gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2); gamma5(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+1], VOLUME/2); #ifdef HAVE_GPU if (usegpu_flag) { // GPU, mixed precision solver # if ( defined TM_USE_MPI && defined PARALLELT ) iter = mixedsolve_eo_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision, rel_prec); # elif ( !defined TM_USE_MPI && !defined PARALLELT ) iter = mixedsolve_eo_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision, rel_prec); # else printf("MPI and/or PARALLELT are not appropriately set for the GPU implementation. Aborting...\n"); exit(-1); # endif } else { // CPU, conjugate gradient iter = cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision, rel_prec, VOLUME/2, &Qtm_pm_ndpsi); } #else // CPU, conjugate gradient if(solver_flag == RGMIXEDCG){ iter = rg_mixed_cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], solver_params, max_iter, precision, rel_prec, VOLUME/2, &Qtm_pm_ndpsi, &Qtm_pm_ndpsi_32); } else { iter = cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision, rel_prec, VOLUME/2, &Qtm_pm_ndpsi); } #endif Qtm_dagger_ndpsi(Odd_new_s, Odd_new_c, Odd_new_s, Odd_new_c); } // if(NO_EXT_INV) #ifdef TM_USE_QPHIX else if (external_inverter == QPHIX_INVERTER ) { // using QPhiX, we invert M M^dagger y = b, so we don't need gamma_5 multiplications iter = invert_eo_qphix_twoflavour(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision, solver_flag, rel_prec, solver_params, sloppy, compression); // and it multiplies y internally by M^dagger, returning M^{-1} b as required } #endif // TM_USE_QPHIX /* Reconstruct the even sites */ Hopping_Matrix(EO, g_spinor_field[DUM_DERI], Odd_new_s); Hopping_Matrix(EO, g_spinor_field[DUM_DERI+1], Odd_new_c); M_ee_inv_ndpsi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], g_mubar, g_epsbar); /* The sign is plus, since in Hopping_Matrix */ /* the minus is missing */ assign_add_mul_r(Even_new_s, g_spinor_field[DUM_DERI+2], +1., VOLUME/2); assign_add_mul_r(Even_new_c, g_spinor_field[DUM_DERI+3], +1., VOLUME/2); #ifdef HAVE_GPU /* return from temporal gauge again */ # ifdef TEMPORALGAUGE if (usegpu_flag) { gtrafo_eo_nd(Even_s, Odd_s, Even_c, Odd_c, Even_new_s, Odd_new_s, Even_new_c, Odd_new_c, GTRAFO_REVERT); } # endif #endif return(iter); }
int invert_cloverdoublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s, spinor * const Even_new_c, spinor * const Odd_new_c, spinor * const Even_s, spinor * const Odd_s, spinor * const Even_c, spinor * const Odd_c, const double precision, const int max_iter, const int solver_flag, const int rel_prec, solver_params_t solver_params, const ExternalInverter external_inverter, const SloppyPrecision sloppy, const CompressionType compression) { int iter = 0; #ifdef TM_USE_QUDA if( external_inverter==QUDA_INVERTER ) { return invert_doublet_eo_quda( Even_new_s, Odd_new_s, Even_new_c, Odd_new_c, Even_s, Odd_s, Even_c, Odd_c, precision, max_iter, solver_flag, rel_prec, 1, sloppy, compression ); } #endif /* here comes the inversion using even/odd preconditioning */ if(g_proc_id == 0) {printf("# Using even/odd preconditioning!\n"); fflush(stdout);} Msw_ee_inv_ndpsi(Even_new_s, Even_new_c, Even_s, Even_c); Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even_new_s); Hopping_Matrix(OE, g_spinor_field[DUM_DERI+1], Even_new_c); /* The sign is plus, since in Hopping_Matrix */ /* the minus is missing */ assign_mul_add_r(g_spinor_field[DUM_DERI], +1., Odd_s, VOLUME/2); assign_mul_add_r(g_spinor_field[DUM_DERI+1], +1., Odd_c, VOLUME/2); if( external_inverter == NO_EXT_INV ){ /* Do the inversion with the preconditioned */ /* matrix to get the odd sites */ /* Here we invert the hermitean operator squared */ if(g_proc_id == 0) { printf("# Using CG for TMWILSON flavour doublet!\n"); fflush(stdout); } gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2); gamma5(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+1], VOLUME/2); if(solver_flag == RGMIXEDCG){ iter = rg_mixed_cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], solver_params, max_iter, precision, rel_prec, VOLUME/2, &Qsw_pm_ndpsi, &Qsw_pm_ndpsi_32); } else { iter = cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision, rel_prec, VOLUME/2, &Qsw_pm_ndpsi); } Qsw_dagger_ndpsi(Odd_new_s, Odd_new_c, Odd_new_s, Odd_new_c); } // if(NO_EXT_INV) #ifdef TM_USE_QPHIX else if (external_inverter == QPHIX_INVERTER ) { // using QPhiX, we invert M M^dagger y = b, so we don't need gamma_5 multiplications iter = invert_eo_qphix_twoflavour(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision, solver_flag, rel_prec, solver_params, sloppy, compression); // and it multiplies y internally by M^dagger, returning M^{-1} b as required } #endif // TM_USE_QPHIX /* Reconstruct the even sites */ Hopping_Matrix(EO, g_spinor_field[DUM_DERI], Odd_new_s); Hopping_Matrix(EO, g_spinor_field[DUM_DERI+1], Odd_new_c); Msw_ee_inv_ndpsi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]); /* The sign is plus, since in Hopping_Matrix */ /* the minus is missing */ assign_add_mul_r(Even_new_s, g_spinor_field[DUM_DERI+2], +1., VOLUME/2); assign_add_mul_r(Even_new_c, g_spinor_field[DUM_DERI+3], +1., VOLUME/2); return(iter); }
int invert_clover_eo(spinor * const Even_new, spinor * const Odd_new, spinor * const Even, spinor * const Odd, const double precision, const int max_iter, const int solver_flag, const int rel_prec,solver_params_t solver_params, su3 *** gf, matrix_mult Qsq, matrix_mult Qm) { int iter; if(g_proc_id == 0 && g_debug_level > 0) { printf("# Using even/odd preconditioning!\n"); fflush(stdout); } assign_mul_one_sw_pm_imu_inv(EE, Even_new, Even, +g_mu); Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even_new); /* The sign is plus, since in Hopping_Matrix */ /* the minus is missing */ assign_mul_add_r(g_spinor_field[DUM_DERI], +1., Odd, VOLUME/2); /* Do the inversion with the preconditioned */ /* matrix to get the odd sites */ /* Here we invert the hermitean operator squared */ gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2); if(g_proc_id == 0) { //printf("# Using CG!\n"); printf("# mu = %f, kappa = %f, csw = %f\n", g_mu/2./g_kappa, g_kappa, g_c_sw); fflush(stdout); } if(solver_flag == CG) { if(g_proc_id == 0) { printf("# Using CG!\n"); fflush(stdout); } iter = cg_her(Odd_new, g_spinor_field[DUM_DERI], max_iter, precision, rel_prec, VOLUME/2, Qsq); Qm(Odd_new, Odd_new); } else if(solver_flag == INCREIGCG) { if(g_proc_id == 0) { printf("# Using Incremental Eig-CG!\n"); fflush(stdout); } iter = incr_eigcg(VOLUME/2,solver_params.eigcg_nrhs, solver_params.eigcg_nrhs1, Odd_new, g_spinor_field[DUM_DERI], solver_params.eigcg_ldh, Qsq, solver_params.eigcg_tolsq1, solver_params.eigcg_tolsq, solver_params.eigcg_restolsq , solver_params.eigcg_rand_guess_opt, rel_prec, max_iter, solver_params.eigcg_nev, solver_params.eigcg_vmax); Qm(Odd_new, Odd_new); } else { if(g_proc_id == 0) { printf("# This solver is not available for this operator. Exisiting!\n"); fflush(stdout); } return 0; } /* Reconstruct the even sites */ Hopping_Matrix(EO, g_spinor_field[DUM_DERI], Odd_new); clover_inv(g_spinor_field[DUM_DERI], +1, g_mu); /* The sign is plus, since in Hopping_Matrix */ /* the minus is missing */ assign_add_mul_r(Even_new, g_spinor_field[DUM_DERI], +1., VOLUME/2); return(iter); }
/*lambda: largest eigenvalue, k eigenvector */ int evamax(double *rz, int k, double q_off, double eps_sq) { static double ritz,norm0,normg,normg0,beta_cg; static double costh,sinth,cosd,sind,aaa,normp,xxx; static double xs1,xs2,xs3; int iteration; /* Initialize k to be gaussian */ random_spinor_field(g_spinor_field[k], VOLUME/2); norm0=square_norm(g_spinor_field[k], VOLUME/2, 1); /*normalize k */ assign_mul_bra_add_mul_r( g_spinor_field[k], 1./sqrt(norm0),0., g_spinor_field[k], VOLUME/2); Q_psi(DUM_SOLVER,k,q_off); Q_psi(DUM_SOLVER,DUM_SOLVER,q_off); /*compute the ritz functional */ /*put g on DUM_SOLVER+2 and p on DUM_SOLVER+1*/ ritz=scalar_prod_r(g_spinor_field[DUM_SOLVER], g_spinor_field[k], VOLUME/2, 1); zero_spinor_field(g_spinor_field[DUM_SOLVER+2],VOLUME/2); assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], g_spinor_field[k], 1., -ritz, VOLUME/2); assign(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER+2], VOLUME/2); normg0=square_norm(g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1); /* main loop */ for(iteration=1;iteration<=ITER_MAX_BCG;iteration++) { if(normg0 <= eps_sq) break; Q_psi(DUM_SOLVER+2,DUM_SOLVER+1,q_off); Q_psi(DUM_SOLVER+2,DUM_SOLVER+2,q_off); /* compute costh and sinth */ normp=square_norm(g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1); xxx=scalar_prod_r(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1); xs1=0.5*(ritz+xxx/normp); xs2=0.5*(ritz-xxx/normp); normp=sqrt(normp); xs3=normg0/normp; aaa=sqrt(xs2*xs2+xs3*xs3); cosd=xs2/aaa; sind=xs3/aaa; if(cosd>=0.) { costh=sqrt(0.5*(1.+cosd)); sinth=0.5*sind/costh; } else { sinth=sqrt(0.5*(1.-cosd)); costh=0.5*sind/sinth; } ritz=xs1+aaa; assign_add_mul_r_add_mul(g_spinor_field[k], g_spinor_field[k], g_spinor_field[DUM_SOLVER+1], costh-1., sinth/normp, VOLUME/2); assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+2], costh-1., sinth/normp, VOLUME/2); /* compute g */ zero_spinor_field(g_spinor_field[DUM_SOLVER+2],VOLUME/2); assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], g_spinor_field[k], 1., -ritz, VOLUME/2); /* calculate the norm of g' and beta_cg=costh g'^2/g^2 */ normg=square_norm(g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1); beta_cg=costh*normg/normg0; if(beta_cg*costh*normp>20.*sqrt(normg)) beta_cg=0.; normg0=normg; /* compute the new value of p */ assign_add_mul_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[k], -scalar_prod_r(g_spinor_field[k], g_spinor_field[DUM_SOLVER+1], VOLUME/2), VOLUME/2, 1); assign_mul_add_r(g_spinor_field[DUM_SOLVER+1],beta_cg, g_spinor_field[DUM_SOLVER+2], VOLUME/2); /* restore the state of the iteration */ if(iteration%20==0) { /* readjust x */ xxx=sqrt(square_norm(g_spinor_field[k], VOLUME/2), 1); assign_mul_bra_add_mul_r( g_spinor_field[k], 1./xxx,0., g_spinor_field[k], VOLUME/2); Q_psi(DUM_SOLVER,k,q_off); Q_psi(DUM_SOLVER,DUM_SOLVER,q_off); /*compute the ritz functional */ ritz=scalar_prod_r(g_spinor_field[DUM_SOLVER], g_spinor_field[k], VOLUME/2, 1); /*put g on DUM_SOLVER+2 and p on DUM_SOLVER+1*/ zero_spinor_field(g_spinor_field[DUM_SOLVER+2],VOLUME/2); assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], g_spinor_field[k], 1., -ritz, VOLUME/2); normg0=square_norm(g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1); /*subtract a linear combination of x and g from p to insure (x,p)=0 and (p,g)=(g,g) */ cosd=scalar_prod_r(g_spinor_field[k], g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1); assign_add_mul_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[k], -cosd, VOLUME/2); cosd=scalar_prod_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1)-normg0; assign_add_mul_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER+2], -cosd/sqrt(normg0), VOLUME/2); } } *rz=ritz; return iteration; }
int bicgstab2(spinor * const x0, spinor * const b, const int max_iter, double eps_sq, const int rel_prec, const int N, matrix_mult f) { const int l = 2; double err; int i, j, k; int update_app = 0, update_res = 0; double rho0, rho1, beta, alpha, omega, gamma_hat, sigma, kappa0, kappal, rho, zeta0; double squarenorm, Mx=0., Mr=0.; spinor * r[5], * u[5], * r0_tilde, * u0, * x, * xp, * bp; double Z[3][3], y0[3], yl[3], yp[3], ypp[3]; spinor ** solver_field = NULL; const int nr_sf = 10; k = -l; if(N == VOLUME) { init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); } else { init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); } r0_tilde = solver_field[0]; u0 = solver_field[1]; r[0] = solver_field[2]; u[0] = solver_field[3]; r[1] = solver_field[4]; u[1] = solver_field[5]; r[2] = solver_field[6]; u[2] = solver_field[7]; bp = solver_field[8]; xp = x0; x = solver_field[9]; zero_spinor_field(x, N); assign(u[0], b, N); f(r0_tilde, xp); diff(r[0], u[0], r0_tilde, N); zero_spinor_field(u0, N); assign(r0_tilde, r[0], N); /* random_spinor_field(r0_tilde, N); */ assign(bp, r[0], N); squarenorm = square_norm(b, N, 1); rho0 = 1.; alpha = rho0; omega = rho0; err = square_norm(r[0], N, 1); Mr = err; Mx = err; zeta0 = err; while( k < max_iter && (((err > eps_sq) && (rel_prec == 0)) || ((err > eps_sq*squarenorm) && (rel_prec == 1)) )) { k+=l; /* The BiCG part */ rho0 *= -omega; for(j = 0; j < l; j++) { rho1 = scalar_prod_r(r[j], r0_tilde, N, 1); beta = alpha*(rho1/rho0); rho0 = rho1; /* if(g_proc_id == 0) {printf("beta = %e, alpha = %e, rho0 = %e\n", beta, alpha, rho0);fflush(stdout);} */ for(i = 0; i <= j; i++) { /* u_i = r_i - \beta u_i */ assign_mul_add_r(u[i], -beta, r[i], N); } f(u[j+1], u[j]); sigma = scalar_prod_r(u[j+1], r0_tilde, N, 1); alpha = rho1/sigma; /* if(g_proc_id == 0) {printf("sigma = %e, alpha = %e\n", sigma, alpha);fflush(stdout);} */ /* x = x + \alpha u_0 */ assign_add_mul_r(x, u[0], alpha, N); /* r_i = r_i - \alpha u_{i+1} */ for(i = 0; i <= j; i++) { assign_add_mul_r(r[i], u[i+1], -alpha, N); } f(r[j+1], r[j]); err = square_norm(r[j+1], N, 1); if(g_proc_id == 0 && g_debug_level > 1) {printf("%d %d err = %e\n", k, j, err);fflush(stdout);} if(err > Mr) Mr = err; if(err > Mx) Mx = err; } /* The polynomial part */ /* Z = R* R */ for(i = 0; i <= l; i++){ for(j = 0; j <= i; j++){ Z[i][j] = scalar_prod_r(r[j], r[i], N, 1); Z[j][i] = Z[i][j]; } } /* r0tilde and rl_tilde */ y0[0] = -1; y0[2] = 0.; y0[1] = Z[1][0]/Z[1][1]; yl[0] = 0.; yl[2] = -1.; yl[1] = Z[1][2]/Z[1][1]; /* Convex combination */ for(i = 0; i < l+1; i++){ yp[i] = 0.; ypp[i] = 0.; for(j = 0; j < l+1; j++) { yp[i] +=Z[i][j]*y0[j]; ypp[i] +=Z[i][j]*yl[j]; } } kappa0 = sqrt( y0[0]*yp[0] + y0[1]*yp[1] + y0[2]*yp[2] ); kappal = sqrt( yl[0]*ypp[0] + yl[1]*ypp[1] + yl[2]*ypp[2] ); rho = (yl[0]*yp[0] + yl[1]*yp[1] + yl[2]*yp[2])/kappa0/kappal; if(fabs(rho) > 0.7) { gamma_hat = rho; } else { gamma_hat = rho*0.7/fabs(rho); } for(i = 0; i <= l; i++) { y0[i] -= gamma_hat*kappa0*yl[i]/kappal; } /* Update */ omega = y0[l]; for(i = 1; i < l+1; i++) { assign_add_mul_r(u[0], u[i], -y0[i], N); assign_add_mul_r(x, r[i-1], y0[i], N); assign_add_mul_r(r[0], r[i], -y0[i], N); } err = kappa0*kappa0; /* Reliable update part */ if(err > Mr) Mr = err; if(err > Mx) Mx = err; update_app = (err < 1.e-4*zeta0 && zeta0 <= Mx); update_res = ((err < 1.e-4*Mr && zeta0 <= Mr) || update_app); if(update_res) { if(g_proc_id == 0 && g_debug_level > 1) printf("Update res\n"); f(r[0], x); diff(r[0], bp, r[0], N); Mr = err; if(update_app) { if(g_proc_id == 0 && g_debug_level > 1) printf("Update app\n"); Mx = err; assign_add_mul_r(xp, x, 1., N); zero_spinor_field(x, N); assign(bp, r[0], N); } } update_app = 0; update_res = 0; if(g_proc_id == 0 && g_debug_level > 0){ printf(" BiCGstab(2)convex iterated %d %d, %e rho0 = %e, alpha = %e, gamma_hat= %e\n", l, k, err, rho0, alpha, gamma_hat); fflush( stdout ); } } assign_add_mul_r(x, xp, 1., N); assign(x0, x, N); if(k == max_iter) return(-1); return(k); }
void Msap_eo(spinor * const P, spinor * const Q, const int Ncy) { int blk, ncy = 0, eo, vol; spinor * r, * a, * b; double nrm; spinor * b_even, * b_odd, * a_even, * a_odd; spinor ** solver_field = NULL; const int nr_sf = 3; /* * here it would be probably better to get the working fields as a parameter * from the calling function */ init_solver_field(&solver_field, VOLUME, nr_sf); r = solver_field[0]; a = solver_field[1]; b = solver_field[2]; vol = block_list[0].volume/2; b_even = b; b_odd = b + vol + 1; a_even = a; a_odd = a + vol + 1; for(ncy = 0; ncy < Ncy; ncy++) { /* compute the global residue */ /* this can be done more efficiently */ /* here only a naive implementation */ for(eo = 0; eo < 2; eo++) { D_psi(r, P); diff(r, Q, r, VOLUME); nrm = square_norm(r, VOLUME, 1); if(g_proc_id == 0 && g_debug_level > 1 && eo == 1) { printf("Msap: %d %1.3e\n", ncy, nrm); } /* choose the even (odd) block */ for (blk = 0; blk < nb_blocks; blk++) { if(block_list[blk].evenodd == eo) { /* get part of r corresponding to block blk into b_even and b_odd */ copy_global_to_block_eo(b_even, b_odd, r, blk); assign_mul_one_pm_imu_inv(a_even, b_even, +1., vol); Block_H_psi(&block_list[blk], a_odd, a_even, OE); /* a_odd = a_odd - b_odd */ assign_mul_add_r(a_odd, -1., b_odd, vol); mrblk(b_odd, a_odd, 3, 1.e-31, 1, vol, &Mtm_plus_block_psi, blk); Block_H_psi(&block_list[blk], b_even, b_odd, EO); mul_one_pm_imu_inv(b_even, +1., vol); /* a_even = a_even - b_even */ assign_add_mul_r(a_even, b_even, -1., vol); /* add even and odd part up to full spinor P */ add_eo_block_to_global(P, a_even, b_odd, blk); } } } } finalize_solver(solver_field, nr_sf); return; }
int mixed_cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn, spinor * const Qup, spinor * const Qdn, solver_pm_t * solver_pm) { double eps_sq = solver_pm->squared_solver_prec; int noshifts = solver_pm->no_shifts; int rel_prec = solver_pm->rel_prec; int max_iter = solver_pm->max_iter; int check_abs, check_rel; double * shifts = solver_pm->shifts; int Nshift = noshifts; // algorithm double rr_up, rr_dn, rr, rr_old, r0r0, dAd_up, dAd_dn, dAd; if(rel_prec){ check_rel = 1; check_abs = 0; } else{ check_rel = 0; check_abs = 1; } int use_eo=1, eofactor=2; //not even-odd? if(solver_pm->sdim == VOLUME) { eofactor = 1; use_eo = 0; } int N = VOLUME/eofactor; int Vol = VOLUMEPLUSRAND/eofactor; // norm of source rr_up = square_norm(Qup, N, 1); rr_dn = square_norm(Qdn, N, 1); rr = rr_up + rr_dn; if( (g_cart_id == 0 && g_debug_level > 2)) printf("# CGMMSND_mixed: Initial mms residue: %.6e\n", rr); if(rr < 1.0e-4){ if( (g_cart_id == 0 && g_debug_level > 2)) printf("# CGMMSND_mixed: norm of source too low: falling back to double mms solver %.6e\n", rr); return(cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_pm)); } r0r0 = rr; // for relative precision rr_old = rr; // for the first iteration //allocate an auxiliary solver fields spinor ** sf = NULL; const int nr_sf = 6; init_solver_field(&sf, Vol, nr_sf); spinor32 ** sf32 = NULL; const int nr_sf32 = 8; init_solver_field_32(&sf32, Vol, nr_sf32); //spinor fields //we need one less than shifts, since one field is cared of by the usual cg fields init_mms_tm_nd_32(noshifts-1, Vol); // Pup/dn can be used as auxiliary field to work on, as it is not later used (could be used as initial guess at the very start) // Q_up/dn can be used as feedback, or if not, also as auxiliary field //allocate cg constants double * sigma; double * zitam1, * zita; double * alphas, * betas; double gamma; double alpham1; sigma = (double*)calloc((noshifts), sizeof(double)); zitam1 = (double*)calloc((noshifts), sizeof(double)); zita = (double*)calloc((noshifts), sizeof(double)); alphas = (double*)calloc((noshifts), sizeof(double)); betas = (double*)calloc((noshifts), sizeof(double)); spinor32 * r_up, * r_dn, * Ad_up, * Ad_dn, * x_up, * x_dn, * d_up, * d_dn; spinor * r_up_d, * r_dn_d, * x_up_d, * x_dn_d, * Ax_up_d, * Ax_dn_d; // iteration counter int j; //reliable update flag int rel_update = 0; //no of reliable updates done int no_rel_update = 0; //use reliable update flag int use_reliable = 1; double rel_delta = 1.0e-10; int trigger_shift = -1; double * res; double * res0; double * maxres; res = (double*)calloc((noshifts), sizeof(double)); res0 = (double*)calloc((noshifts), sizeof(double)); maxres = (double*)calloc((noshifts), sizeof(double)); ///////////////// // ASSIGNMENTS // ///////////////// x_up = sf32[0]; x_dn = sf32[1]; r_up = sf32[2]; r_dn = sf32[3]; d_up = sf32[4]; d_dn = sf32[5]; Ad_up = sf32[6]; Ad_dn = sf32[7]; x_up_d = sf[0]; x_dn_d = sf[1]; r_up_d = sf[2]; r_dn_d = sf[3]; Ax_up_d = sf[4]; Ax_dn_d = sf[5]; /* //matrix test spinor32 * help_low_up = sf32[0]; spinor32 * help_low_dn = sf32[1]; spinor * help_high_up = sf[0]; spinor * help_high_dn = sf[1]; assign_to_32(help_low_up, Qup, N); assign_to_32(help_low_dn, Qdn, N); assign(help_high_up, Qup, N); assign(help_high_dn, Qdn, N); double sqn_high = square_norm(help_high_up,N,1) + square_norm(help_high_dn,N,1); printf("square_norm(Q_high) = %e\n", sqn_high); float sqn_low = square_norm_32(help_low_up,N,1) + square_norm_32(help_low_dn,N,1); printf("square_norm(Q_low) = %e\n", sqn_low); solver_pm->M_ndpsi32(sf32[2], sf32[3], help_low_up, help_low_dn); solver_pm->M_ndpsi(sf[2], sf[3], help_high_up, help_high_dn); assign_to_64(sf[4], sf32[2], N); assign_to_64(sf[5], sf32[3], N); diff(sf[0], sf[4], sf[2], N); diff(sf[1], sf[5], sf[3], N); double sqnrm = square_norm(sf[0], N, 1) + square_norm(sf[1], N, 1); printf("Operator 32 test: (square_norm) / (spinor component) = %.8e\n", sqnrm/24.0/N); exit(1); */ // r(0) = b assign_to_32(r_up, Qup, N); assign_to_32(r_dn, Qdn, N); // d(0) = b assign_to_32(d_up, Qup, N); assign_to_32(d_dn, Qdn, N); maxres[0] = rr; res[0] = rr; res0[0] = rr; alphas[0] = 1.0; betas[0] = 0.0; sigma[0] = shifts[0]*shifts[0]; if(g_cart_id == 0 && g_debug_level > 2) printf("# CGMMSND_mixed: shift %d is %e\n", 0, sigma[0]); // currently only implemented for P=0 for(int im = 1; im < noshifts; im++) { maxres[im] = rr; res[im] = rr; res0[im] = rr; sigma[im] = shifts[im]*shifts[im] - sigma[0]; if(g_cart_id == 0 && g_debug_level > 2) printf("# CGMMSND_mixed: shift %d is %e\n", im, sigma[im]); // these will be the result spinor fields zero_spinor_field_32(mms_x_up[im-1], N); zero_spinor_field_32(mms_x_dn[im-1], N); assign_to_32(mms_d_up[im-1], Qup, N); assign_to_32(mms_d_dn[im-1], Qdn, N); zitam1[im] = 1.0; zita[im] = 1.0; alphas[im] = 1.0; betas[im] = 0.0; } //zero fields for solution Pup, Pdn for(int im = 0; im < noshifts; im++){ zero_spinor_field(Pup[im], N); zero_spinor_field(Pdn[im], N); } ////////// // LOOP // ////////// for (j = 0; j < max_iter; j++) { // A*d(k) solver_pm->M_ndpsi32(Ad_up, Ad_dn, d_up, d_dn); //add zero'th shift assign_add_mul_r_32(Ad_up, d_up, (float) sigma[0], N); assign_add_mul_r_32(Ad_dn, d_dn, (float) sigma[0], N); // alpha = r(k)*r(k) / d(k)*A*d(k) dAd_up = scalar_prod_r_32(d_up, Ad_up, N, 1); dAd_dn = scalar_prod_r_32(d_dn, Ad_dn, N, 1); dAd = dAd_up + dAd_dn; alpham1 = alphas[0]; alphas[0] = rr_old / dAd; // rr_old is taken from the last iteration respectively // r(k+1) assign_add_mul_r_32(r_up, Ad_up, (float) -alphas[0],N); assign_add_mul_r_32(r_dn, Ad_dn, (float) -alphas[0],N); // r(k+1)*r(k+1) rr_up = square_norm_32(r_up, N, 1); rr_dn = square_norm_32(r_dn, N, 1); rr = rr_up + rr_dn; if((g_cart_id == 0) && (g_debug_level > 2)) printf("# CGMMSND_mixed: mms iteration j = %i: rr = %.6e\n", j, rr); // aborting ?? // check wether precision is reached ... if ( ((check_abs)&&(rr <= eps_sq)) || ((check_rel)&&(rr <= eps_sq*r0r0)) ) { if ((check_rel)&&(rr <= eps_sq*r0r0)) { if((g_cart_id == 0) && (g_debug_level > 3)) printf("# CGMMSND_mixed: Reached relative solver precision of eps_rel = %.2e\n", eps_sq); } break; } // update alphas and zitas // used later for(int im = 1; im < noshifts; im++) { gamma = zita[im]*alpham1/(alphas[0]*betas[0]*(1.-zita[im]/zitam1[im]) + alpham1*(1.+sigma[im]*alphas[0])); zitam1[im] = zita[im]; zita[im] = gamma; alphas[im] = alphas[0]*zita[im]/zitam1[im]; } //check for reliable update res[0] = rr; for(int im=1; im<noshifts; im++) res[im] = rr * zita[im]; rel_update = 0; for(int im = (noshifts-1); im >= 0; im--) { if( res[im] > maxres[im] ) maxres[im] = res[im]; if( (res[im] < rel_delta*res0[im]) && (res0[im]<=maxres[im]) && (use_reliable) ) rel_update=1; if( rel_update && ( trigger_shift == -1) ) trigger_shift = im; } if(!rel_update) { // x_j(k+1) = x_j(k) + alpha_j*d_j(k) // alphas are set above assign_add_mul_r_32(x_up, d_up, (float) alphas[0], N); assign_add_mul_r_32(x_dn, d_dn, (float) alphas[0], N); for(int im = 1; im < noshifts; im++) { assign_add_mul_r_32(mms_x_up[im-1], mms_d_up[im-1], (float) alphas[im], N); assign_add_mul_r_32(mms_x_dn[im-1], mms_d_dn[im-1], (float) alphas[im], N); } // beta = r(k+1)*r(k+1) / r(k)*r(k) betas[0] = rr / rr_old; rr_old = rr; // for next iteration // d_0(k+1) = r(k+1) + beta*d_0(k) assign_mul_add_r_32(d_up, (float) betas[0], r_up, N); assign_mul_add_r_32(d_dn, (float) betas[0], r_dn, N); // d_j(k+1) = zita*r(k+1) + beta*d_j(k) for(int im = 1; im < noshifts; im++) { betas[im] = betas[0]*zita[im]*alphas[im]/(zitam1[im]*alphas[0]); assign_mul_add_mul_r_32(mms_d_up[im-1], r_up, (float) betas[im], (float) zita[im], N); assign_mul_add_mul_r_32(mms_d_dn[im-1], r_dn, (float) betas[im], (float) zita[im], N); } } else{ //reliable update if( (g_cart_id == 0) && (g_debug_level > 3) ){ printf("# CGMMSND_mixed: Shift %d with offset squared %e triggered a reliable update\n", trigger_shift, sigma[trigger_shift]); } //add low prec solutions assign_add_mul_r_32(x_up, d_up, (float) alphas[0], N); assign_add_mul_r_32(x_dn, d_dn, (float) alphas[0], N); addto_32(Pup[0], x_up, N); addto_32(Pdn[0], x_dn, N); for(int im = 1; im < noshifts; im++) { assign_add_mul_r_32(mms_x_up[im-1], mms_d_up[im-1], alphas[im], N); assign_add_mul_r_32(mms_x_dn[im-1], mms_d_dn[im-1], alphas[im], N); addto_32(Pup[im], mms_x_up[im-1], N); addto_32(Pdn[im], mms_x_dn[im-1], N); } //add low precision for shift 0 only addto_32(x_up_d, x_up, N); addto_32(x_dn_d, x_dn, N); solver_pm->M_ndpsi(Ax_up_d, Ax_dn_d, x_up_d, x_dn_d); //add zero'th shift assign_add_mul_r(Ax_up_d, x_up_d, sigma[0], N); assign_add_mul_r(Ax_dn_d, x_dn_d, sigma[0], N); diff(r_up_d, Qup, Ax_up_d, N); diff(r_dn_d, Qdn, Ax_dn_d, N); rr_up = square_norm(r_up_d, N, 1); rr_dn = square_norm(r_dn_d, N, 1); rr = rr_up + rr_dn; if ((g_cart_id == 0) && (g_debug_level > 3) ) printf("# CGMMSND_mixed: New residue after reliable update: %.6e\n", rr); //update res[im] res[0] = rr; if(res[trigger_shift] > res0[trigger_shift]){ if(g_cart_id == 0) printf("# CGMMSND_mixed: Warning: residue of shift no %d got larger after rel. update\n", trigger_shift); //if this is the zero'th shift not getting better -> no further convergence, break if(trigger_shift == 0) break; } //zero float fields zero_spinor_field_32(x_up, N); zero_spinor_field_32(x_dn, N); for(int im = 1; im < noshifts; im++) { zero_spinor_field_32(mms_x_up[im-1], N); zero_spinor_field_32(mms_x_dn[im-1], N); } //update the source assign_to_32(r_up, r_up_d, N); assign_to_32(r_dn, r_dn_d, N); betas[0] = res[0]/rr_old; rr_old = rr; // d_0(k+1) = r(k+1) + beta*d_0(k) assign_mul_add_r_32(d_up, betas[0], r_up, N); assign_mul_add_r_32(d_dn, betas[0], r_dn, N); // d_j(k+1) = r(k+1) + beta*d_j(k) for(int im = 1; im < noshifts; im++) { betas[im] = betas[0]*zita[im]*alphas[im]/(zitam1[im]*alphas[0]); assign_mul_add_mul_r_32(mms_d_up[im-1], r_up, (float) betas[im], (float) zita[im], N); assign_mul_add_mul_r_32(mms_d_dn[im-1], r_dn, (float) betas[im], (float) zita[im], N); } //new maxres for the shift that initiated the reliable update res[trigger_shift] = res[0]*zita[trigger_shift]*zita[trigger_shift]; res0[trigger_shift] = res[trigger_shift]; maxres[trigger_shift] = res[trigger_shift]; trigger_shift = -1; no_rel_update ++; } //reliable update //check if some shift is converged for(int im = 1; im < noshifts; im++) { if(j > 0 && (j % 10 == 0) && (im == noshifts-1)) { double sn = square_norm_32(mms_d_up[im-1], N, 1); sn += square_norm_32(mms_d_dn[im-1], N, 1); if(alphas[noshifts-1]*alphas[noshifts-1]*sn <= eps_sq) { noshifts--; if( (g_debug_level > 1) && (g_cart_id == 0) ) { printf("# CGMMSND_mixed: at iteration %d removed one shift, %d remaining\n", j, noshifts); } //if removed we add the latest solution vector for this shift addto_32(Pup[im], mms_x_up[im-1], N); addto_32(Pdn[im], mms_x_dn[im-1], N); } } } }//LOOP if( (g_cart_id == 0) && (g_debug_level > 1) ) printf("Final mms residue: %.6e\n", rr); //add the latest solutions for(int im = 0; im < noshifts; im++) { if(im == 0){ addto_32(Pup[0], x_up, N); addto_32(Pdn[0], x_dn, N); } else{ addto_32(Pup[im], mms_x_up[im-1], N); addto_32(Pdn[im], mms_x_dn[im-1], N); } } if(g_debug_level > 4){ if(g_cart_id == 0) printf("# CGMMSND_mixed: Checking mms result:\n"); //loop over all shifts (-> Nshift) for(int im = 0; im < Nshift; im++){ solver_pm->M_ndpsi(sf[0], sf[1], Pup[im], Pdn[im]); assign_add_mul_r(sf[0], Pup[im] , shifts[im]*shifts[im], N); assign_add_mul_r(sf[1], Pdn[im] , shifts[im]*shifts[im], N); diff(sf[2], sf[0], Qup, N); diff(sf[3], sf[1], Qdn, N); rr_up = square_norm(sf[2], N, 1); rr_dn = square_norm(sf[3], N, 1); rr = rr_up + rr_dn; if(g_cart_id == 0) printf("# CGMMSND_mixed: Shift[%d] squared residue: %e\n", im, rr); } } finalize_solver(sf, nr_sf); finalize_solver_32(sf32, nr_sf32); //free cg constants free(sigma); free(zitam1); free(zita); free(alphas); free(betas); //free reliable update stuff free(res); free(res0); free(maxres); //if not converged -> return(-1) if(j<max_iter){ return(j); } else{ return(-1); } }//
/* P output = solution , Q input = source */ int cg_her_nd(spinor * const P_up,spinor * P_dn, spinor * const Q_up, spinor * const Q_dn, const int max_iter, double eps_sq, const int rel_prec, const int N, matrix_mult_nd f) { double normsp, normsq, pro, err, alpha_cg, beta_cg, squarenorm; int iteration; double err1, err2; spinor ** up_field = NULL; spinor ** dn_field = NULL; const int nr_sf = 5; /* do we really need so many fields??? */ init_solver_field(&up_field, VOLUMEPLUSRAND, nr_sf); init_solver_field(&dn_field, VOLUMEPLUSRAND, nr_sf); squarenorm = square_norm(Q_up, N, 1); squarenorm+= square_norm(Q_dn, N, 1); /* !!!! INITIALIZATION !!!! */ assign(up_field[0], P_up, N); assign(dn_field[0], P_dn, N); /* (r_0,r_0) = normsq */ normsp =square_norm(P_up, N, 1); normsp+=square_norm(P_dn, N, 1); /* assign(up_field[5], Q_up, N); */ /* assign(dn_field[5], Q_dn, N); */ /* initialize residue r and search vector p */ if(normsp==0){ /* if a starting solution vector equal to zero is chosen */ assign(up_field[1], Q_up, N); assign(dn_field[1], Q_dn, N); assign(up_field[2], Q_up, N); assign(dn_field[2], Q_dn, N); normsq =square_norm(Q_up, N, 1); normsq+=square_norm(Q_dn, N, 1); } else { /* if a starting solution vector different from zero is chosen */ f(up_field[3],dn_field[3], up_field[0],dn_field[0]); diff(up_field[1], Q_up, up_field[3], N); diff(dn_field[1], Q_dn, dn_field[3], N); assign(up_field[2], up_field[1], N); assign(dn_field[2], dn_field[1], N); normsq =square_norm(up_field[2], N, 1); normsq+=square_norm(dn_field[2], N, 1); } /* main loop */ for(iteration=0;iteration<max_iter;iteration++){ f(up_field[4],dn_field[4], up_field[2],dn_field[2]); pro =scalar_prod_r(up_field[2], up_field[4], N, 1); pro+=scalar_prod_r(dn_field[2], dn_field[4], N, 1); /* Compute alpha_cg(i+1) */ alpha_cg=normsq/pro; /* Compute x_(i+1) = x_i + alpha_cg(i+1) p_i */ assign_add_mul_r(up_field[0], up_field[2], alpha_cg, N); assign_add_mul_r(dn_field[0], dn_field[2], alpha_cg, N); /* Compute r_(i+1) = r_i - alpha_cg(i+1) Qp_i */ assign_add_mul_r(up_field[1], up_field[4], -alpha_cg, N); assign_add_mul_r(dn_field[1], dn_field[4], -alpha_cg, N); /* Check whether the precision is reached ... */ err1 =square_norm(up_field[1], N, 1); err2 =square_norm(dn_field[1], N, 1); err = err1 + err2; if(g_debug_level > 1 && g_proc_id == g_stdio_proc) { printf("cg_her_nd : i = %d esqr %e = %e + %e \n",iteration,err, err1, err2); fflush( stdout); } if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))) { assign(P_up, up_field[0], N); assign(P_dn, dn_field[0], N); g_sloppy_precision = 0; finalize_solver(up_field, nr_sf); finalize_solver(dn_field, nr_sf); return(iteration+1); } #ifdef _USE_HALFSPINOR if(((err*err <= eps_sq) && (rel_prec == 0)) || ((err*err <= eps_sq*squarenorm) && (rel_prec == 1))) { g_sloppy_precision = 1; if(g_debug_level > 2 && g_proc_id == g_stdio_proc) { printf("sloppy precision on\n"); fflush( stdout); } } #endif /* Compute beta_cg(i+1) Compute p_(i+1) = r_i+1 + beta_(i+1) p_i */ beta_cg=err/normsq; assign_mul_add_r(up_field[2], beta_cg, up_field[1], N); assign_mul_add_r(dn_field[2], beta_cg, dn_field[1], N); normsq=err; } assign(P_up, up_field[0], N); assign(P_dn, dn_field[0], N); g_sloppy_precision = 0; finalize_solver(up_field, nr_sf); finalize_solver(dn_field, nr_sf); return(-1); }
void op_invert(const int op_id, const int index_start, const int write_prop) { operator * optr = &operator_list[op_id]; double atime = 0., etime = 0., nrm1 = 0., nrm2 = 0.; int i; optr->iterations = 0; optr->reached_prec = -1.; g_kappa = optr->kappa; boundary(g_kappa); atime = gettime(); if(optr->type == TMWILSON || optr->type == WILSON || optr->type == CLOVER) { g_mu = optr->mu; g_c_sw = optr->c_sw; if(optr->type == CLOVER) { if (g_cart_id == 0 && g_debug_level > 1) { printf("#\n# csw = %e, computing clover leafs\n", g_c_sw); } init_sw_fields(VOLUME); sw_term( (const su3**) g_gauge_field, optr->kappa, optr->c_sw); /* this must be EE here! */ /* to match clover_inv in Qsw_psi */ sw_invert(EE, optr->mu); } for(i = 0; i < 2; i++) { if (g_cart_id == 0) { printf("#\n# 2 kappa mu = %e, kappa = %e, c_sw = %e\n", g_mu, g_kappa, g_c_sw); } if(optr->type != CLOVER) { if(use_preconditioning){ g_precWS=(void*)optr->precWS; } else { g_precWS=NULL; } optr->iterations = invert_eo( optr->prop0, optr->prop1, optr->sr0, optr->sr1, optr->eps_sq, optr->maxiter, optr->solver, optr->rel_prec, 0, optr->even_odd_flag,optr->no_extra_masses, optr->extra_masses, optr->id ); /* check result */ M_full(g_spinor_field[4], g_spinor_field[5], optr->prop0, optr->prop1); } else { optr->iterations = invert_clover_eo(optr->prop0, optr->prop1, optr->sr0, optr->sr1, optr->eps_sq, optr->maxiter, optr->solver, optr->rel_prec, &g_gauge_field, &Qsw_pm_psi, &Qsw_minus_psi); /* check result */ Msw_full(g_spinor_field[4], g_spinor_field[5], optr->prop0, optr->prop1); } diff(g_spinor_field[4], g_spinor_field[4], optr->sr0, VOLUME / 2); diff(g_spinor_field[5], g_spinor_field[5], optr->sr1, VOLUME / 2); nrm1 = square_norm(g_spinor_field[4], VOLUME / 2, 1); nrm2 = square_norm(g_spinor_field[5], VOLUME / 2, 1); optr->reached_prec = nrm1 + nrm2; /* convert to standard normalisation */ /* we have to mult. by 2*kappa */ if (optr->kappa != 0.) { mul_r(optr->prop0, (2*optr->kappa), optr->prop0, VOLUME / 2); mul_r(optr->prop1, (2*optr->kappa), optr->prop1, VOLUME / 2); } if (optr->solver != CGMMS && write_prop) /* CGMMS handles its own I/O */ optr->write_prop(op_id, index_start, i); if(optr->DownProp) { optr->mu = -optr->mu; } else break; } } else if(optr->type == DBTMWILSON || optr->type == DBCLOVER) { g_mubar = optr->mubar; g_epsbar = optr->epsbar; g_c_sw = 0.; if(optr->type == DBCLOVER) { g_c_sw = optr->c_sw; if (g_cart_id == 0 && g_debug_level > 1) { printf("#\n# csw = %e, computing clover leafs\n", g_c_sw); } init_sw_fields(VOLUME); sw_term( (const su3**) g_gauge_field, optr->kappa, optr->c_sw); sw_invert_nd(optr->mubar*optr->mubar-optr->epsbar*optr->epsbar); } for(i = 0; i < SourceInfo.no_flavours; i++) { if(optr->type != DBCLOVER) { optr->iterations = invert_doublet_eo( optr->prop0, optr->prop1, optr->prop2, optr->prop3, optr->sr0, optr->sr1, optr->sr2, optr->sr3, optr->eps_sq, optr->maxiter, optr->solver, optr->rel_prec); } else { optr->iterations = invert_cloverdoublet_eo( optr->prop0, optr->prop1, optr->prop2, optr->prop3, optr->sr0, optr->sr1, optr->sr2, optr->sr3, optr->eps_sq, optr->maxiter, optr->solver, optr->rel_prec); } g_mu = optr->mubar; if(optr->type != DBCLOVER) { M_full(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], optr->prop0, optr->prop1); } else { Msw_full(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], optr->prop0, optr->prop1); } assign_add_mul_r(g_spinor_field[DUM_DERI+1], optr->prop2, -optr->epsbar, VOLUME/2); assign_add_mul_r(g_spinor_field[DUM_DERI+2], optr->prop3, -optr->epsbar, VOLUME/2); g_mu = -g_mu; if(optr->type != DBCLOVER) { M_full(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+4], optr->prop2, optr->prop3); } else { Msw_full(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+4], optr->prop2, optr->prop3); } assign_add_mul_r(g_spinor_field[DUM_DERI+3], optr->prop0, -optr->epsbar, VOLUME/2); assign_add_mul_r(g_spinor_field[DUM_DERI+4], optr->prop1, -optr->epsbar, VOLUME/2); diff(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+1], optr->sr0, VOLUME/2); diff(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+2], optr->sr1, VOLUME/2); diff(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+3], optr->sr2, VOLUME/2); diff(g_spinor_field[DUM_DERI+4], g_spinor_field[DUM_DERI+4], optr->sr3, VOLUME/2); nrm1 = square_norm(g_spinor_field[DUM_DERI+1], VOLUME/2, 1); nrm1 += square_norm(g_spinor_field[DUM_DERI+2], VOLUME/2, 1); nrm1 += square_norm(g_spinor_field[DUM_DERI+3], VOLUME/2, 1); nrm1 += square_norm(g_spinor_field[DUM_DERI+4], VOLUME/2, 1); optr->reached_prec = nrm1; g_mu = g_mu1; /* For standard normalisation */ /* we have to mult. by 2*kappa */ mul_r(g_spinor_field[DUM_DERI], (2*optr->kappa), optr->prop0, VOLUME/2); mul_r(g_spinor_field[DUM_DERI+1], (2*optr->kappa), optr->prop1, VOLUME/2); mul_r(g_spinor_field[DUM_DERI+2], (2*optr->kappa), optr->prop2, VOLUME/2); mul_r(g_spinor_field[DUM_DERI+3], (2*optr->kappa), optr->prop3, VOLUME/2); /* the final result should be stored in the convention used in */ /* hep-lat/0606011 */ /* this requires multiplication of source with */ /* (1+itau_2)/sqrt(2) and the result with (1-itau_2)/sqrt(2) */ mul_one_pm_itau2(optr->prop0, optr->prop2, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], -1., VOLUME/2); mul_one_pm_itau2(optr->prop1, optr->prop3, g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+3], -1., VOLUME/2); /* write propagator */ if(write_prop) optr->write_prop(op_id, index_start, i); mul_r(optr->prop0, 1./(2*optr->kappa), g_spinor_field[DUM_DERI], VOLUME/2); mul_r(optr->prop1, 1./(2*optr->kappa), g_spinor_field[DUM_DERI+1], VOLUME/2); mul_r(optr->prop2, 1./(2*optr->kappa), g_spinor_field[DUM_DERI+2], VOLUME/2); mul_r(optr->prop3, 1./(2*optr->kappa), g_spinor_field[DUM_DERI+3], VOLUME/2); /* mirror source, but not for volume sources */ if(i == 0 && SourceInfo.no_flavours == 2 && SourceInfo.type != 1) { if (g_cart_id == 0) { fprintf(stdout, "# Inversion done in %d iterations, squared residue = %e!\n", optr->iterations, optr->reached_prec); } mul_one_pm_itau2(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], optr->sr0, optr->sr2, -1., VOLUME/2); mul_one_pm_itau2(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+3], optr->sr1, optr->sr3, -1., VOLUME/2); mul_one_pm_itau2(optr->sr0, optr->sr2, g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI], +1., VOLUME/2); mul_one_pm_itau2(optr->sr1, optr->sr3, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], +1., VOLUME/2); } /* volume sources need only one inversion */ else if(SourceInfo.type == 1) i++; } } else if(optr->type == OVERLAP) { g_mu = 0.; m_ov=optr->m; eigenvalues(&optr->no_ev, 5000, optr->ev_prec, 0, optr->ev_readwrite, nstore, optr->even_odd_flag); /* ov_check_locality(); */ /* index_jd(&optr->no_ev_index, 5000, 1.e-12, optr->conf_input, nstore, 4); */ ov_n_cheby=optr->deg_poly; if(use_preconditioning==1) g_precWS=(void*)optr->precWS; else g_precWS=NULL; if(g_debug_level > 3) ov_check_ginsparg_wilson_relation_strong(); invert_overlap(op_id, index_start); if(write_prop) optr->write_prop(op_id, index_start, 0); } etime = gettime(); if (g_cart_id == 0 && g_debug_level > 0) { fprintf(stdout, "# Inversion done in %d iterations, squared residue = %e!\n", optr->iterations, optr->reached_prec); fprintf(stdout, "# Inversion done in %1.2e sec. \n", etime - atime); } return; }
void Q_over_sqrt_Q_sqr(spinor * const R, double * const c, const int n, spinor * const S, const double rnorm, const double minev) { int j; double fact1, fact2, temp1, temp2, temp3, temp4, maxev, tnorm; spinor *sv, *d, *dd, *aux, *aux3; double ap_eps_sq = 0.; sv=lock_Dov_WS_spinor(2); d=lock_Dov_WS_spinor(3); dd=lock_Dov_WS_spinor(4); aux=lock_Dov_WS_spinor(5); aux3=lock_Dov_WS_spinor(6); eigenvalues_for_cg_computed = no_eigenvalues - 1; if(eigenvalues_for_cg_computed < 0) eigenvalues_for_cg_computed = 0; maxev=1.0; fact1=4/(maxev-minev); fact2=-2*(maxev+minev)/(maxev-minev); zero_spinor_field(d, VOLUME); zero_spinor_field(dd, VOLUME); if(1) assign_sub_lowest_eigenvalues(aux3, S, no_eigenvalues-1, VOLUME); else assign(aux3, S, VOLUME); /* Check whether switch for adaptive precision is on */ /* this might be implemented again in the future */ /* Use the 'old' version using Clenshaw's recursion for the Chebysheff polynomial */ if(1) { for (j = n-1; j >= 1; j--) { assign(sv, d, VOLUME); if ( (j%10) == 0 ) { assign_sub_lowest_eigenvalues(aux, d, no_eigenvalues-1, VOLUME); } else { assign(aux, d, VOLUME); } norm_Q_sqr_psi(R, aux, rnorm); /* printf("%d %e %e\n", j, R[0].s0.c0.re, R[0].s0.c0.im); */ /* printf("%e %e\n", R[0].s1.c0.re, R[0].s1.c0.im); */ temp1=-1.0; temp2=c[j]; assign_mul_add_mul_add_mul_add_mul_r(d, R, dd, aux3, fact2, fact1, temp1, temp2, VOLUME); assign(dd, sv, VOLUME); } if(1) assign_sub_lowest_eigenvalues(R, d, no_eigenvalues-1, VOLUME); else assign(R, d, VOLUME); norm_Q_sqr_psi(aux, R, rnorm); temp1=-1.0; temp2=c[0]/2.; temp3=fact1/2.; temp4=fact2/2.; assign_mul_add_mul_add_mul_add_mul_r(aux, d, dd, aux3, temp3, temp4, temp1, temp2, VOLUME); norm_Q_n_psi(R, aux, 1, rnorm); } else { /* Use the adaptive precision version using the forward recursion for the Chebysheff polynomial */ /* d = T_0(Q^2) */ assign(d, aux3, VOLUME); /* dd = T_1(Q^2) */ norm_Q_sqr_psi(dd, d, rnorm); temp3 = fact1/2.; temp4 = fact2/2.; assign_mul_add_mul_r(dd, d, temp3, temp4, VOLUME); /* r = c_1 T_1(Q^2) + 1./2 c_0 */ temp1 = c[1]; temp2 = c[0]/2.; mul_add_mul_r(R, dd, d, temp1, temp2, VOLUME); temp1=-1.0; for (j = 2; j <= n-1; j++) { /* aux = T_j(Q^2) = 2 Q^2 T_{j-1}(Q^2) - T_{j-2}(Q^2) */ norm_Q_sqr_psi(aux, dd, rnorm); assign_mul_add_mul_add_mul_r(aux, dd, d, fact1, fact2, temp1, VOLUME); /* r = r + c_j T_j(Q^2) */ temp2 = c[j]; assign_add_mul_r(R, aux, temp2, VOLUME); /* The stoppping criterio tnorm = |T_j(Q^2)| */ tnorm=square_norm(aux, VOLUME, 1); tnorm*=(temp2*temp2); /* auxnorm=square_norm(R); if(g_proc_id == g_stdio_proc){printf("j= %d\t|c T|^2= %g\t c_j= %g\t|r|^2= %g\n",j,tnorm,temp2,auxnorm); fflush( stdout);}; */ if(tnorm < ap_eps_sq) break; /* d = T_{j-1}(Q^2) */ assign(d, dd, VOLUME); /* dd = T_{j}(Q^2) */ assign(dd, aux, VOLUME); } if(g_proc_id == g_stdio_proc && g_debug_level > 0) { printf("Order of Chebysheff approximation = %d\n",j); fflush( stdout); } /* r = Q r */ assign(aux, R, VOLUME); norm_Q_n_psi(R, aux, 1, rnorm); } /* add in piece from projected subspace */ addproj_q_invsqrt(R, S, no_eigenvalues-1, VOLUME); unlock_Dov_WS_spinor(2); unlock_Dov_WS_spinor(3); unlock_Dov_WS_spinor(4); unlock_Dov_WS_spinor(5); unlock_Dov_WS_spinor(6); return; }
/* P output = solution , Q input = source */ int pcg_her(spinor * const P, spinor * const Q, const int max_iter, double eps_sq, const int rel_prec, const int N, matrix_mult f) { double normsp, pro, pro2, err, alpha_cg, beta_cg, squarenorm; int iteration; spinor ** solver_field = NULL; const int nr_sf = 5; if(N == VOLUME) { init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); } else { init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); } squarenorm = square_norm(Q, N, 1); /* !!!! INITIALIZATION !!!! */ assign(solver_field[0], P, N); /* (r_0,r_0) = normsq */ normsp = square_norm(P, N, 1); assign(solver_field[3], Q, N); /* initialize residue r and search vector p */ if(normsp==0){ /* if a starting solution vector equal to zero is chosen */ /* r0 */ assign(solver_field[1], solver_field[3], N); /* p0 */ } else{ /* if a starting solution vector different from zero is chosen */ /* r0 = b - A x0 */ f(solver_field[2], solver_field[0]); diff(solver_field[1], solver_field[3], solver_field[2], N); } /* z0 = M^-1 r0 */ invert_eigenvalue_part(solver_field[3], solver_field[1], 10, N); /* p0 = z0 */ assign(solver_field[2], solver_field[3], N); /* Is this really real? */ pro2 = scalar_prod_r(solver_field[1], solver_field[3], N, 1); /* main loop */ for(iteration = 0; iteration < max_iter; iteration++) { /* A p */ f(solver_field[4], solver_field[2]); pro = scalar_prod_r(solver_field[2], solver_field[4], N, 1); /* Compute alpha_cg(i+1) */ alpha_cg=pro2/pro; /* Compute x_(i+1) = x_i + alpha_cg(i+1) p_i */ assign_add_mul_r(solver_field[0], solver_field[2], alpha_cg, N); /* Compute r_(i+1) = r_i - alpha_cg(i+1) Qp_i */ assign_add_mul_r(solver_field[1], solver_field[4], -alpha_cg, N); /* Check whether the precision is reached ... */ err=square_norm(solver_field[1], N, 1); if(g_debug_level > 1 && g_proc_id == g_stdio_proc) { printf("%d\t%g\n",iteration,err); fflush( stdout); } if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))) { assign(P, solver_field[0], N); g_sloppy_precision = 0; finalize_solver(solver_field, nr_sf); return(iteration+1); } #ifdef _USE_HALFSPINOR if(((err*err <= eps_sq) && (rel_prec == 0)) || ((err*err <= eps_sq*squarenorm) && (rel_prec == 1)) || iteration > 1400) { g_sloppy_precision = 1; if(g_debug_level > 2 && g_proc_id == g_stdio_proc) { printf("sloppy precision on\n"); fflush( stdout); } } #endif /* z_j */ beta_cg = 1/pro2; /* invert_eigenvalue_part(solver_field[3], solver_field[1], 10, N); */ /* Compute beta_cg(i+1) Compute p_(i+1) = r_i+1 + beta_(i+1) p_i */ pro2 = scalar_prod_r(solver_field[1], solver_field[3], N, 1); beta_cg *= pro2; assign_mul_add_r(solver_field[2], beta_cg, solver_field[3], N); } assign(P, solver_field[0], N); g_sloppy_precision = 0; /* return(-1); */ finalize_solver(solver_field, nr_sf); return(1); }
int invert_doublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s, spinor * const Even_new_c, spinor * const Odd_new_c, spinor * const Even_s, spinor * const Odd_s, spinor * const Even_c, spinor * const Odd_c, const double precision, const int max_iter, const int solver_flag, const int rel_prec) { int iter = 0; #ifdef HAVE_GPU # ifdef TEMPORALGAUGE /* initialize temporal gauge here */ int retval; double dret1, dret2; double plaquette1 = 0.0; double plaquette2 = 0.0; if (usegpu_flag) { /* need VOLUME here (not N=VOLUME/2)*/ if ((retval = init_temporalgauge_trafo(VOLUME, g_gauge_field)) != 0 ) { // initializes the transformation matrices if (g_proc_id == 0) printf("Error while gauge fixing to temporal gauge. Aborting...\n"); // g_tempgauge_field as a copy of g_gauge_field exit(200); } /* do trafo */ plaquette1 = measure_plaquette(g_gauge_field); apply_gtrafo(g_gauge_field, g_trafo); // transformation of the gauge field plaquette2 = measure_plaquette(g_gauge_field); if (g_proc_id == 0) printf("\tPlaquette before gauge fixing: %.16e\n", plaquette1/6./VOLUME); if (g_proc_id == 0) printf("\tPlaquette after gauge fixing: %.16e\n", plaquette2/6./VOLUME); /* do trafo to odd_s part of source */ dret1 = square_norm(Odd_s, VOLUME/2 , 1); apply_gtrafo_spinor_odd(Odd_s, g_trafo); // odd spinor transformation, strange dret2 = square_norm(Odd_s, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); /* do trafo to odd_c part of source */ dret1 = square_norm(Odd_c, VOLUME/2 , 1); apply_gtrafo_spinor_odd(Odd_c, g_trafo); // odd spinor transformation, charm dret2 = square_norm(Odd_c, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); /* do trafo to even_s part of source */ dret1 = square_norm(Even_s, VOLUME/2 , 1); apply_gtrafo_spinor_even(Even_s, g_trafo); // even spinor transformation, strange dret2 = square_norm(Even_s, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); /* do trafo to even_c part of source */ dret1 = square_norm(Even_c, VOLUME/2 , 1); apply_gtrafo_spinor_even(Even_c, g_trafo); // even spinor transformation, charm dret2 = square_norm(Even_c, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); # ifdef MPI xchange_gauge(g_gauge_field); # endif } # endif #endif /* HAVE_GPU*/ /* here comes the inversion using even/odd preconditioning */ if(g_proc_id == 0) {printf("# Using even/odd preconditioning!\n"); fflush(stdout);} M_ee_inv_ndpsi(Even_new_s, Even_new_c, Even_s, Even_c, g_mubar, g_epsbar); Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even_new_s); Hopping_Matrix(OE, g_spinor_field[DUM_DERI+1], Even_new_c); /* The sign is plus, since in Hopping_Matrix */ /* the minus is missing */ assign_mul_add_r(g_spinor_field[DUM_DERI], +1., Odd_s, VOLUME/2); assign_mul_add_r(g_spinor_field[DUM_DERI+1], +1., Odd_c, VOLUME/2); /* Do the inversion with the preconditioned */ /* matrix to get the odd sites */ /* Here we invert the hermitean operator squared */ if(g_proc_id == 0) { printf("# Using CG for TMWILSON flavour doublet!\n"); fflush(stdout); } gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2); gamma5(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+1], VOLUME/2); #ifdef HAVE_GPU if (usegpu_flag) { // GPU, mixed precision solver # if defined(MPI) && defined(PARALLELT) iter = mixedsolve_eo_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision, rel_prec); # elif !defined(MPI) && !defined(PARALLELT) iter = mixedsolve_eo_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision, rel_prec); # else printf("MPI and/or PARALLELT are not appropriately set for the GPU implementation. Aborting...\n"); exit(-1); # endif } else { // CPU, conjugate gradient iter = cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision, rel_prec, VOLUME/2, &Qtm_pm_ndpsi); } #else // CPU, conjugate gradient iter = cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision, rel_prec, VOLUME/2, &Qtm_pm_ndpsi); #endif Qtm_dagger_ndpsi(Odd_new_s, Odd_new_c, Odd_new_s, Odd_new_c); /* Reconstruct the even sites */ Hopping_Matrix(EO, g_spinor_field[DUM_DERI], Odd_new_s); Hopping_Matrix(EO, g_spinor_field[DUM_DERI+1], Odd_new_c); M_ee_inv_ndpsi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], g_mubar, g_epsbar); /* The sign is plus, since in Hopping_Matrix */ /* the minus is missing */ assign_add_mul_r(Even_new_s, g_spinor_field[DUM_DERI+2], +1., VOLUME/2); assign_add_mul_r(Even_new_c, g_spinor_field[DUM_DERI+3], +1., VOLUME/2); #ifdef HAVE_GPU /* return from temporal gauge again */ # ifdef TEMPORALGAUGE if (usegpu_flag) { /* undo trafo */ /* apply_inv_gtrafo(g_gauge_field, g_trafo);*/ /* copy back the saved original field located in g_tempgauge_field -> update necessary*/ plaquette1 = measure_plaquette(g_gauge_field); copy_gauge_field(g_gauge_field, g_tempgauge_field); g_update_gauge_copy = 1; plaquette2 = measure_plaquette(g_gauge_field); if (g_proc_id == 0) printf("\tPlaquette before inverse gauge fixing: %.16e\n", plaquette1/6./VOLUME); if (g_proc_id == 0) printf("\tPlaquette after inverse gauge fixing: %.16e\n", plaquette2/6./VOLUME); /* undo trafo to source Even_s */ dret1 = square_norm(Even_s, VOLUME/2 , 1); apply_inv_gtrafo_spinor_even(Even_s, g_trafo); dret2 = square_norm(Even_s, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); /* undo trafo to source Even_c */ dret1 = square_norm(Even_c, VOLUME/2 , 1); apply_inv_gtrafo_spinor_even(Even_c, g_trafo); dret2 = square_norm(Even_c, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); /* undo trafo to source Odd_s */ dret1 = square_norm(Odd_s, VOLUME/2 , 1); apply_inv_gtrafo_spinor_odd(Odd_s, g_trafo); dret2 = square_norm(Odd_s, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); /* undo trafo to source Odd_c */ dret1 = square_norm(Odd_c, VOLUME/2 , 1); apply_inv_gtrafo_spinor_odd(Odd_c, g_trafo); dret2 = square_norm(Odd_c, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); // Even_new_s dret1 = square_norm(Even_new_s, VOLUME/2 , 1); apply_inv_gtrafo_spinor_even(Even_new_s, g_trafo); dret2 = square_norm(Even_new_s, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); // Even_new_c dret1 = square_norm(Even_new_c, VOLUME/2 , 1); apply_inv_gtrafo_spinor_even(Even_new_c, g_trafo); dret2 = square_norm(Even_new_c, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); // Odd_new_s dret1 = square_norm(Odd_new_s, VOLUME/2 , 1); apply_inv_gtrafo_spinor_odd(Odd_new_s, g_trafo); dret2 = square_norm(Odd_new_s, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); // Odd_new_c dret1 = square_norm(Odd_new_c, VOLUME/2 , 1); apply_inv_gtrafo_spinor_odd(Odd_new_c, g_trafo); dret2 = square_norm(Odd_new_c, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); finalize_temporalgauge(); # ifdef MPI xchange_gauge(g_gauge_field); # endif } # endif #endif return(iter); }
/* k output , l input */ int solve_cg(spinor * const k, spinor * const l, double eps_sq, const int rel_prec) { static double normsq, pro, err, alpha_cg, beta_cg, squarenorm, sqnrm, sqnrm2; int iteration = 0, i, j; int save_sloppy = g_sloppy_precision; double atime, etime, flops; spinor *x, *delta, *y; /* initialize residue r and search vector p */ #ifdef MPI atime = MPI_Wtime(); #else atime = ((double)clock())/((double)(CLOCKS_PER_SEC)); #endif squarenorm = square_norm(l, VOLUME/2, 1); if(g_sloppy_precision_flag == 1) { delta = g_spinor_field[DUM_SOLVER+3]; x = g_spinor_field[DUM_SOLVER+4]; y = g_spinor_field[DUM_SOLVER+5]; assign(delta, l, VOLUME/2); Qtm_pm_psi(y, k); diff(delta, l, y, VOLUME/2); sqnrm = square_norm(delta, VOLUME/2, 1); if(((sqnrm <= eps_sq) && (rel_prec == 0)) || ((sqnrm <= eps_sq*squarenorm) && (rel_prec == 1))) { return(0); } for(i = 0; i < 20; i++) { g_sloppy_precision = 1; /* main CG loop in lower precision */ zero_spinor_field(x, VOLUME/2); assign(g_spinor_field[DUM_SOLVER+1], delta, VOLUME/2); assign(g_spinor_field[DUM_SOLVER+2], delta, VOLUME/2); sqnrm2 = sqnrm; for(j = 0; j <= ITER_MAX_CG; j++) { Qtm_pm_psi(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+2]); pro = scalar_prod_r(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], VOLUME/2, 1); alpha_cg = sqnrm2 / pro; assign_add_mul_r(x, g_spinor_field[DUM_SOLVER+2], alpha_cg, VOLUME/2); assign_mul_add_r(g_spinor_field[DUM_SOLVER], -alpha_cg, g_spinor_field[DUM_SOLVER+1], VOLUME/2); err = square_norm(g_spinor_field[DUM_SOLVER], VOLUME/2, 1); if(g_proc_id == g_stdio_proc && g_debug_level > 1) { printf("inner CG: %d res^2 %g\n", iteration+j+1, err); fflush(stdout); } if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))){ break; } beta_cg = err / sqnrm2; assign_mul_add_r(g_spinor_field[DUM_SOLVER+2], beta_cg, g_spinor_field[DUM_SOLVER], VOLUME/2); assign(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER], VOLUME/2); sqnrm2 = err; } /* end main CG loop */ iteration += j; g_sloppy_precision = 0; add(k, k, x, VOLUME/2); Qtm_pm_psi(y, x); diff(delta, delta, y, VOLUME/2); sqnrm = square_norm(delta, VOLUME/2, 1); if(g_debug_level > 0 && g_proc_id == g_stdio_proc) { printf("mixed CG(linsolve): true residue %d\t%g\t\n",iteration, sqnrm); fflush( stdout); } if(((sqnrm <= eps_sq) && (rel_prec == 0)) || ((sqnrm <= eps_sq*squarenorm) && (rel_prec == 1))) { break; } iteration++; } } else { Qtm_pm_psi(g_spinor_field[DUM_SOLVER], k); diff(g_spinor_field[DUM_SOLVER+1], l, g_spinor_field[DUM_SOLVER], VOLUME/2); assign(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER+1], VOLUME/2); normsq=square_norm(g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1); /* main loop */ for(iteration = 1; iteration <= ITER_MAX_CG; iteration++) { Qtm_pm_psi(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+2]); pro=scalar_prod_r(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], VOLUME/2, 1); alpha_cg=normsq/pro; assign_add_mul_r(k, g_spinor_field[DUM_SOLVER+2], alpha_cg, VOLUME/2); assign_mul_add_r(g_spinor_field[DUM_SOLVER], -alpha_cg, g_spinor_field[DUM_SOLVER+1], VOLUME/2); err=square_norm(g_spinor_field[DUM_SOLVER], VOLUME/2, 1); if(g_proc_id == g_stdio_proc && g_debug_level > 1) { printf("CG (linsolve): iterations: %d res^2 %e\n", iteration, err); fflush(stdout); } if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))){ break; } beta_cg = err/normsq; assign_mul_add_r(g_spinor_field[DUM_SOLVER+2], beta_cg, g_spinor_field[DUM_SOLVER], VOLUME/2); assign(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER], VOLUME/2); normsq=err; } } #ifdef MPI etime = MPI_Wtime(); #else etime = ((double)clock())/((double)(CLOCKS_PER_SEC)); #endif /* 2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns ) */ /* 2*1320.0 because the linalg is over VOLUME/2 */ flops = (2*(2*1320.0+2*3*4) + 2*3*4 + iteration*(2.*(2*1320.0+2*3*4) + 10*3*4))*VOLUME/2/1.0e6f; if(g_proc_id==0 && g_debug_level > 0) { printf("CG: iter: %d eps_sq: %1.4e t/s: %1.4e\n", iteration, eps_sq, etime-atime); printf("CG: flopcount: t/s: %1.4e mflops_local: %.1f mflops: %.1f\n", etime-atime, flops/(etime-atime), g_nproc*flops/(etime-atime)); } g_sloppy_precision = save_sloppy; return(iteration); }
/* P output = solution , Q input = source */ int cg_mms_tm(spinor ** const P, spinor * const Q, solver_params_t * solver_params, double * cgmms_reached_prec) { static double normsq, pro, err, squarenorm; int iteration, N = solver_params->sdim, no_shifts = solver_params->no_shifts; static double gamma, alpham1; spinor ** solver_field = NULL; double atime, etime; const int nr_sf = 3; atime = gettime(); if(solver_params->sdim == VOLUME) { init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); init_mms_tm(no_shifts, VOLUMEPLUSRAND); } else { init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); init_mms_tm(no_shifts, VOLUMEPLUSRAND/2); } zero_spinor_field(P[0], N); alphas[0] = 1.0; betas[0] = 0.0; sigma[0] = solver_params->shifts[0]*solver_params->shifts[0]; if(g_proc_id == 0 && g_debug_level > 1) printf("# CGMMS: shift %d is %e\n", 0, sigma[0]); for(int im = 1; im < no_shifts; im++) { sigma[im] = solver_params->shifts[im]*solver_params->shifts[im] - sigma[0]; if(g_proc_id == 0 && g_debug_level > 1) printf("# CGMMS: shift %d is %e\n", im, sigma[im]); // these will be the result spinor fields zero_spinor_field(P[im], N); // these are intermediate fields assign(ps_mms_solver[im-1], Q, N); zitam1[im] = 1.0; zita[im] = 1.0; alphas[im] = 1.0; betas[im] = 0.0; } /* currently only implemented for P=0 */ squarenorm = square_norm(Q, N, 1); /* if a starting solution vector equal to zero is chosen */ assign(solver_field[0], Q, N); assign(solver_field[1], Q, N); normsq = squarenorm; /* main loop */ for(iteration = 0; iteration < solver_params->max_iter; iteration++) { /* Q^2*p and then (p,Q^2*p) */ solver_params->M_psi(solver_field[2], solver_field[1]); // add the zero's shift assign_add_mul_r(solver_field[2], solver_field[1], sigma[0], N); pro = scalar_prod_r(solver_field[1], solver_field[2], N, 1); /* For the update of the coeff. of the shifted pol. we need alphas[0](i-1) and alpha_cg(i). This is the reason why we need this double definition of alpha */ alpham1 = alphas[0]; /* Compute alphas[0](i+1) */ alphas[0] = normsq/pro; for(int im = 1; im < no_shifts; im++) { /* Now gamma is a temp variable that corresponds to zita(i+1) */ gamma = zita[im]*alpham1/(alphas[0]*betas[0]*(1.-zita[im]/zitam1[im]) + alpham1*(1.+sigma[im]*alphas[0])); // Now zita(i-1) is put equal to the old zita(i) zitam1[im] = zita[im]; // Now zita(i+1) is updated zita[im] = gamma; // Update of alphas(i) = alphas[0](i)*zita(i+1)/zita(i) alphas[im] = alphas[0]*zita[im]/zitam1[im]; // Compute xs(i+1) = xs(i) + alphas(i)*ps(i) assign_add_mul_r(P[im], ps_mms_solver[im-1], alphas[im], N); // in the CG the corrections are decreasing with the iteration number increasing // therefore, we can remove shifts when the norm of the correction vector // falls below a threshold // this is useful for computing time and needed, because otherwise // zita might get smaller than DOUBLE_EPS and, hence, zero if(iteration > 0 && (iteration % 20 == 0) && (im == no_shifts-1)) { double sn = square_norm(ps_mms_solver[im-1], N, 1); if(alphas[no_shifts-1]*alphas[no_shifts-1]*sn <= solver_params->squared_solver_prec) { no_shifts--; if(g_debug_level > 2 && g_proc_id == 0) { printf("# CGMMS: at iteration %d removed one shift, %d remaining\n", iteration, no_shifts); } } } } /* Compute x_(i+1) = x_i + alphas[0](i+1) p_i */ assign_add_mul_r(P[0], solver_field[1], alphas[0], N); /* Compute r_(i+1) = r_i - alphas[0](i+1) Qp_i */ assign_add_mul_r(solver_field[0], solver_field[2], -alphas[0], N); /* Check whether the precision eps_sq is reached */ err = square_norm(solver_field[0], N, 1); if(g_debug_level > 2 && g_proc_id == g_stdio_proc) { printf("# CGMMS iteration: %d residue: %g\n", iteration, err); fflush( stdout ); } if( ((err <= solver_params->squared_solver_prec) && (solver_params->rel_prec == 0)) || ((err <= solver_params->squared_solver_prec*squarenorm) && (solver_params->rel_prec > 0)) || (iteration == solver_params->max_iter -1) ) { /* FIXME temporary output of precision until a better solution can be found */ *cgmms_reached_prec = err; break; } /* Compute betas[0](i+1) = (r(i+1),r(i+1))/(r(i),r(i)) Compute p(i+1) = r(i+1) + beta(i+1)*p(i) */ betas[0] = err/normsq; assign_mul_add_r(solver_field[1], betas[0], solver_field[0], N); normsq = err; /* Compute betas(i+1) = betas[0](i+1)*(zita(i+1)*alphas(i))/(zita(i)*alphas[0](i)) Compute ps(i+1) = zita(i+1)*r(i+1) + betas(i+1)*ps(i) */ for(int im = 1; im < no_shifts; im++) { betas[im] = betas[0]*zita[im]*alphas[im]/(zitam1[im]*alphas[0]); assign_mul_add_mul_r(ps_mms_solver[im-1], solver_field[0], betas[im], zita[im], N); } } etime = gettime(); g_sloppy_precision = 0; if(iteration == solver_params->max_iter -1) iteration = -1; else iteration++; if(g_debug_level > 0 && g_proc_id == 0) { printf("# CGMMS (%d shifts): iter: %d eps_sq: %1.4e %1.4e t/s\n", solver_params->no_shifts, iteration, solver_params->squared_solver_prec, etime - atime); } finalize_solver(solver_field, nr_sf); return(iteration); }
/* P output = solution , Q input = source */ int cg_mms_tm(spinor * const P, spinor * const Q, const int max_iter, double eps_sq, const int rel_prec, const int N, matrix_mult f) { static double normsq, pro, err, alpha_cg = 1., beta_cg = 0., squarenorm; int iteration, im, append = 0; char filename[100]; static double gamma, alpham1; int const cg_mms_default_precision = 32; double tmp_mu = g_mu; WRITER * writer = NULL; paramsInverterInfo *inverterInfo = NULL; paramsPropagatorFormat *propagatorFormat = NULL; spinor * temp_save; //used to save all the masses spinor ** solver_field = NULL; const int nr_sf = 5; init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); init_mms_tm(g_no_extra_masses); /* currently only implemented for P=0 */ zero_spinor_field(P, N); /* Value of the bare MMS-masses (\mu^2 - \mu_0^2) */ for(im = 0; im < g_no_extra_masses; im++) { sigma[im] = g_extra_masses[im]*g_extra_masses[im] - g_mu*g_mu; assign(xs_mms_solver[im], P, N); assign(ps_mms_solver[im], Q, N); zitam1[im] = 1.0; zita[im] = 1.0; alphas[im] = 1.0; betas[im] = 0.0; } squarenorm = square_norm(Q, N, 1); assign(solver_field[0], P, N); /* normsp = square_norm(P, N, 1); */ /* initialize residue r and search vector p */ /* if(normsp == 0){ */ /* currently only implemented for P=0 */ if(1) { /* if a starting solution vector equal to zero is chosen */ assign(solver_field[1], Q, N); assign(solver_field[2], Q, N); normsq = square_norm(Q, N, 1); } else{ /* if a starting solution vector different from zero is chosen */ f(solver_field[3], solver_field[0]); diff(solver_field[1], Q, solver_field[3], N); assign(solver_field[2], solver_field[1], N); normsq = square_norm(solver_field[2], N, 1); } /* main loop */ for(iteration = 0; iteration < max_iter; iteration++) { /* Q^2*p and then (p,Q^2*p) */ f(solver_field[4], solver_field[2]); pro = scalar_prod_r(solver_field[2], solver_field[4], N, 1); /* For the update of the coeff. of the shifted pol. we need alpha_cg(i-1) and alpha_cg(i). This is the reason why we need this double definition of alpha */ alpham1 = alpha_cg; /* Compute alpha_cg(i+1) */ alpha_cg = normsq/pro; for(im = 0; im < g_no_extra_masses; im++) { /* Now gamma is a temp variable that corresponds to zita(i+1) */ gamma = zita[im]*alpham1/(alpha_cg*beta_cg*(1.-zita[im]/zitam1[im]) + alpham1*(1.+sigma[im]*alpha_cg)); /* Now zita(i-1) is put equal to the old zita(i) */ zitam1[im] = zita[im]; /* Now zita(i+1) is updated */ zita[im] = gamma; /* Update of alphas(i) = alpha_cg(i)*zita(i+1)/zita(i) */ alphas[im] = alpha_cg*zita[im]/zitam1[im]; /* Compute xs(i+1) = xs(i) + alphas(i)*ps(i) */ assign_add_mul_r(xs_mms_solver[im], ps_mms_solver[im], alphas[im], N); } /* Compute x_(i+1) = x_i + alpha_cg(i+1) p_i */ assign_add_mul_r(solver_field[0], solver_field[2], alpha_cg, N); /* Compute r_(i+1) = r_i - alpha_cg(i+1) Qp_i */ assign_add_mul_r(solver_field[1], solver_field[4], -alpha_cg, N); /* Check whether the precision eps_sq is reached */ err = square_norm(solver_field[1], N, 1); if(g_debug_level > 2 && g_proc_id == g_stdio_proc) { printf("CGMMS iteration: %d residue: %g\n", iteration, err); fflush( stdout ); } if( ((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1)) ) { assign(P, solver_field[0], N); f(solver_field[2], P); diff(solver_field[3], solver_field[2], Q, N); err = square_norm(solver_field[3], N, 1); if(g_debug_level > 0 && g_proc_id == g_stdio_proc) { printf("# CG MMS true residue at final iteration (%d) was %g.\n", iteration, err); fflush( stdout); } g_sloppy_precision = 0; g_mu = tmp_mu; /* save all the results of (Q^dagger Q)^(-1) \gamma_5 \phi */ /* here ... */ /* when im == -1 save the base mass*/ for(im = -1; im < g_no_extra_masses; im++) { if(im==-1) { temp_save=solver_field[0]; } else { temp_save=xs_mms_solver[im]; } if(SourceInfo.type != 1) { if (PropInfo.splitted) { sprintf(filename, "%s.%.4d.%.2d.%.2d.cgmms.%.2d.inverted", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, SourceInfo.ix, im+1); } else { sprintf(filename, "%s.%.4d.%.2d.cgmms.%.2d.inverted", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, im+1); } } else { sprintf(filename, "%s.%.4d.%.5d.cgmms.%.2d.0", SourceInfo.basename, SourceInfo.nstore, SourceInfo.sample, im+1); } if(g_kappa != 0) { mul_r(temp_save, (2*g_kappa)*(2*g_kappa), temp_save, N); } append = !PropInfo.splitted; construct_writer(&writer, filename, append); if (PropInfo.splitted || SourceInfo.ix == index_start) { //Create the inverter info NOTE: always set to TWILSON=12 and 1 flavour (to be adjusted) inverterInfo = construct_paramsInverterInfo(err, iteration+1, 12, 1); if (im == -1) { inverterInfo->cgmms_mass = inverterInfo->mu; } else { inverterInfo->cgmms_mass = g_extra_masses[im]/(2 * inverterInfo->kappa); } write_spinor_info(writer, PropInfo.format, inverterInfo, append); //Create the propagatorFormat NOTE: always set to 1 flavour (to be adjusted) propagatorFormat = construct_paramsPropagatorFormat(cg_mms_default_precision, 1); write_propagator_format(writer, propagatorFormat); free(inverterInfo); free(propagatorFormat); } convert_lexic_to_eo(solver_field[2], solver_field[1], temp_save); write_spinor(writer, &solver_field[2], &solver_field[1], 1, 32); destruct_writer(writer); } finalize_solver(solver_field, nr_sf); return(iteration+1); } /* Compute beta_cg(i+1) = (r(i+1),r(i+1))/(r(i),r(i)) Compute p(i+1) = r(i+1) + beta(i+1)*p(i) */ beta_cg = err/normsq; assign_mul_add_r(solver_field[2], beta_cg, solver_field[1], N); normsq = err; /* Compute betas(i+1) = beta_cg(i)*(zita(i+1)*alphas(i))/(zita(i)*alpha_cg(i)) Compute ps(i+1) = zita(i+1)*r(i+1) + betas(i+1)*ps(i) */ for(im = 0; im < g_no_extra_masses; im++) { betas[im] = beta_cg*zita[im]*alphas[im]/(zitam1[im]*alpha_cg); assign_mul_add_mul_r(ps_mms_solver[im], solver_field[1], betas[im], zita[im], N); } } assign(P, solver_field[0], N); g_sloppy_precision = 0; finalize_solver(solver_field, nr_sf); return(-1); }
void poly_precon(spinor * const R, spinor * const S, const double prec, const int n) { int j; double fact1, fact2, temp1, temp2, temp3, temp4, invmaxev = 1./4., maxev=4., tnorm, minev=g_mu*g_mu, auxnorm; static spinor *sv_, *sv, *d_, *d, *dd_, *dd, *aux_, *aux, *aux3_, *aux3; static int initp = 0; static double * c; const int N = VOLUME; maxev = 4.0; invmaxev = 1./maxev; minev = 0.1; /* minev = 1.5*1.5*g_mu*g_mu; */ if(initp == 0) { c = (double*)calloc(1000, sizeof(double)); #if (defined SSE || defined SSE2 || defined SSE3) sv_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); sv = (spinor *)(((unsigned long int)(sv_)+ALIGN_BASE)&~ALIGN_BASE); d_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); d = (spinor *)(((unsigned long int)(d_)+ALIGN_BASE)&~ALIGN_BASE); dd_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); dd = (spinor *)(((unsigned long int)(dd_)+ALIGN_BASE)&~ALIGN_BASE); aux_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); aux = (spinor *)(((unsigned long int)(aux_)+ALIGN_BASE)&~ALIGN_BASE); aux3_= calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); aux3 = (spinor *)(((unsigned long int)(aux3_)+ALIGN_BASE)&~ALIGN_BASE); #else sv_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); sv = sv_; d_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); d = d_; dd_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); dd = dd_; aux_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); aux = aux_; aux3_= calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); aux3 = aux3_; #endif get_c(minev, maxev, c, 100); initp = 1; } fact1 = 4. / (maxev - minev); fact2 = -2 * (maxev + minev) / (maxev - minev); zero_spinor_field(&d[0], N); zero_spinor_field(&dd[0], N); assign(&aux3[0], &S[0], N); /* gamma5(&aux3[0], &S[0], N); */ /* Use the adaptive precision version using the forward recursion for the Chebysheff polynomial */ /* d = T_0(Q^2) */ assign(&d[0], &aux3[0], N); /* dd = T_1(Q^2) */ Q_pm_psi(&dd[0], &d[0]); /* mul_r(dd, invmaxev, dd, N); */ /* norm_Q_sqr_psi(&dd[0], &d[0], g_m_D_psi, rnorm); */ temp3 = fact1/2; temp4 = fact2/2; assign_mul_add_mul_r(&dd[0], &d[0], temp3, temp4, N); /* r = c_1 T_1(Q^2) + 1/2 c_0 */ temp1 = c[1]; temp2 = c[0]/2; mul_add_mul_r(&R[0], &dd[0], &d[0], temp1, temp2, N); temp1 = -1.0; for (j=2; j<=n-1; j++) { /* aux = T_j(Q^2) = 2 Q^2 T_{j-1}(Q^2) - T_{j-2}(Q^2) */ Q_pm_psi(&aux[0], &dd[0]); /* mul_r(aux, invmaxev, aux, N); */ /* norm_Q_sqr_psi(&aux[0], &dd[0], g_m_D_psi, rnorm); */ assign_mul_add_mul_add_mul_r(&aux[0],&dd[0],&d[0],fact1,fact2,temp1, N); /* r = r + c_j T_j(Q^2) */ temp2=c[j]; assign_add_mul_r(&R[0],&aux[0],temp2, N); /* The stoppping criterio tnorm = |T_j(Q^2)| */ tnorm = square_norm(aux, N, 1); tnorm *= (temp2*temp2); auxnorm = square_norm(R, N, 1); if(g_proc_id == g_stdio_proc) { printf("j= %d\t|c T|^2= %g\t%g\t c_j= %g\t|r|^2= %g\n",j,tnorm,prec, temp2,auxnorm); fflush( stdout); fflush(stdout); } if(tnorm < prec) break; /* d = T_{j-1}(Q^2) */ assign(&d[0], &dd[0], N); /* dd = T_{j}(Q^2) */ assign(&dd[0], &aux[0], N); } if(g_proc_id == g_stdio_proc) { printf("Order of Chebysheff approximation = %d\n",j); fflush( stdout); } /* r = Q r */ /* assign(aux, R, N); */ /* Q_minus_psi(R, aux); */ return; }