int tmLQCD_invert(double * const propagator, double * const source, const int op_id, const int write_prop) { unsigned int index_start = 0; g_mu = 0.; if(!tmLQCD_invert_initialised) { fprintf(stderr, "tmLQCD_invert: tmLQCD_inver_init must be called first. Aborting...\n"); return(-1); } if(op_id < 0 || op_id >= no_operators) { fprintf(stderr, "tmLQCD_invert: op_id=%d not in valid range. Aborting...\n", op_id); return(-1); } operator_list[op_id].sr0 = g_spinor_field[0]; operator_list[op_id].sr1 = g_spinor_field[1]; operator_list[op_id].prop0 = g_spinor_field[2]; operator_list[op_id].prop1 = g_spinor_field[3]; zero_spinor_field(operator_list[op_id].prop0, VOLUME / 2); zero_spinor_field(operator_list[op_id].prop1, VOLUME / 2); // convert to even/odd order convert_lexic_to_eo(operator_list[op_id].sr0, operator_list[op_id].sr1, (spinor*) source); // invert operator_list[op_id].inverter(op_id, index_start, write_prop); // convert back to lexicographic order convert_eo_to_lexic((spinor*) propagator, operator_list[op_id].prop0, operator_list[op_id].prop1); return(0); }
// if even_odd_flag set void M_full_quda(spinor * const Even_new, spinor * const Odd_new, spinor * const Even, spinor * const Odd) { inv_param.kappa = g_kappa; inv_param.mu = fabs(g_mu); inv_param.epsilon = 0.0; // IMPORTANT: use opposite TM flavor since gamma5 -> -gamma5 (until LXLYLZT prob. resolved) inv_param.twist_flavor = (g_mu < 0.0 ? QUDA_TWIST_PLUS : QUDA_TWIST_MINUS); inv_param.Ls = (inv_param.twist_flavor == QUDA_TWIST_NONDEG_DOUBLET || inv_param.twist_flavor == QUDA_TWIST_DEG_DOUBLET ) ? 2 : 1; void *spinorIn = (void*)g_spinor_field[DUM_DERI]; // source void *spinorOut = (void*)g_spinor_field[DUM_DERI+1]; // solution // reorder spinor convert_eo_to_lexic( spinorIn, Even, Odd ); reorder_spinor_toQuda( (double*)spinorIn, inv_param.cpu_prec, 0, NULL ); // multiply inv_param.solution_type = QUDA_MAT_SOLUTION; MatQuda( spinorOut, spinorIn, &inv_param); // reorder spinor reorder_spinor_fromQuda( (double*)spinorOut, inv_param.cpu_prec, 0, NULL ); convert_lexic_to_eo( Even_new, Odd_new, spinorOut ); }
void CGeoSmoother(spinor * const P, spinor * const Q, const int Ncy, const int dummy) { spinor ** solver_field = NULL; const int nr_sf = 5; double musave = g_mu; g_mu = g_mu1; init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); convert_lexic_to_eo(solver_field[0], solver_field[1], Q); if(g_c_sw > 0) assign_mul_one_sw_pm_imu_inv(EE,solver_field[2], solver_field[0], g_mu); else assign_mul_one_pm_imu_inv(solver_field[2], solver_field[0], +1., VOLUME/2); Hopping_Matrix(OE, solver_field[4], solver_field[2]); /* The sign is plus, since in Hopping_Matrix */ /* the minus is missing */ assign_mul_add_r(solver_field[4], +1., solver_field[1], VOLUME/2); /* Do the inversion with the preconditioned */ /* matrix to get the odd sites */ gamma5(solver_field[4], solver_field[4], VOLUME/2); if(g_c_sw > 0) { cg_her(solver_field[3], solver_field[4], Ncy, 1.e-8, 1, VOLUME/2, &Qsw_pm_psi); Qsw_minus_psi(solver_field[3], solver_field[3]); /* Reconstruct the even sites */ Hopping_Matrix(EO, solver_field[2], solver_field[3]); assign_mul_one_sw_pm_imu_inv(EE,solver_field[4],solver_field[2], g_mu); } else { cg_her(solver_field[3], solver_field[4], Ncy, 1.e-8, 1, VOLUME/2, &Qtm_pm_psi); Qtm_minus_psi(solver_field[3], solver_field[3]); /* Reconstruct the even sites */ Hopping_Matrix(EO, solver_field[4], solver_field[3]); mul_one_pm_imu_inv(solver_field[4], +1., VOLUME/2); } /* The sign is plus, since in Hopping_Matrix */ /* the minus is missing */ assign_add_mul_r(solver_field[2], solver_field[4], +1., VOLUME/2); convert_eo_to_lexic(P, solver_field[2], solver_field[3]); g_mu = musave; finalize_solver(solver_field, nr_sf); return; }
/* P output = solution , Q input = source */ int cg_mms_tm(spinor * const P, spinor * const Q, const int max_iter, double eps_sq, const int rel_prec, const int N, matrix_mult f) { static double normsq, pro, err, alpha_cg = 1., beta_cg = 0., squarenorm; int iteration, im, append = 0; char filename[100]; static double gamma, alpham1; int const cg_mms_default_precision = 32; double tmp_mu = g_mu; WRITER * writer = NULL; paramsInverterInfo *inverterInfo = NULL; paramsPropagatorFormat *propagatorFormat = NULL; spinor * temp_save; //used to save all the masses spinor ** solver_field = NULL; const int nr_sf = 5; init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); init_mms_tm(g_no_extra_masses); /* currently only implemented for P=0 */ zero_spinor_field(P, N); /* Value of the bare MMS-masses (\mu^2 - \mu_0^2) */ for(im = 0; im < g_no_extra_masses; im++) { sigma[im] = g_extra_masses[im]*g_extra_masses[im] - g_mu*g_mu; assign(xs_mms_solver[im], P, N); assign(ps_mms_solver[im], Q, N); zitam1[im] = 1.0; zita[im] = 1.0; alphas[im] = 1.0; betas[im] = 0.0; } squarenorm = square_norm(Q, N, 1); assign(solver_field[0], P, N); /* normsp = square_norm(P, N, 1); */ /* initialize residue r and search vector p */ /* if(normsp == 0){ */ /* currently only implemented for P=0 */ if(1) { /* if a starting solution vector equal to zero is chosen */ assign(solver_field[1], Q, N); assign(solver_field[2], Q, N); normsq = square_norm(Q, N, 1); } else{ /* if a starting solution vector different from zero is chosen */ f(solver_field[3], solver_field[0]); diff(solver_field[1], Q, solver_field[3], N); assign(solver_field[2], solver_field[1], N); normsq = square_norm(solver_field[2], N, 1); } /* main loop */ for(iteration = 0; iteration < max_iter; iteration++) { /* Q^2*p and then (p,Q^2*p) */ f(solver_field[4], solver_field[2]); pro = scalar_prod_r(solver_field[2], solver_field[4], N, 1); /* For the update of the coeff. of the shifted pol. we need alpha_cg(i-1) and alpha_cg(i). This is the reason why we need this double definition of alpha */ alpham1 = alpha_cg; /* Compute alpha_cg(i+1) */ alpha_cg = normsq/pro; for(im = 0; im < g_no_extra_masses; im++) { /* Now gamma is a temp variable that corresponds to zita(i+1) */ gamma = zita[im]*alpham1/(alpha_cg*beta_cg*(1.-zita[im]/zitam1[im]) + alpham1*(1.+sigma[im]*alpha_cg)); /* Now zita(i-1) is put equal to the old zita(i) */ zitam1[im] = zita[im]; /* Now zita(i+1) is updated */ zita[im] = gamma; /* Update of alphas(i) = alpha_cg(i)*zita(i+1)/zita(i) */ alphas[im] = alpha_cg*zita[im]/zitam1[im]; /* Compute xs(i+1) = xs(i) + alphas(i)*ps(i) */ assign_add_mul_r(xs_mms_solver[im], ps_mms_solver[im], alphas[im], N); } /* Compute x_(i+1) = x_i + alpha_cg(i+1) p_i */ assign_add_mul_r(solver_field[0], solver_field[2], alpha_cg, N); /* Compute r_(i+1) = r_i - alpha_cg(i+1) Qp_i */ assign_add_mul_r(solver_field[1], solver_field[4], -alpha_cg, N); /* Check whether the precision eps_sq is reached */ err = square_norm(solver_field[1], N, 1); if(g_debug_level > 2 && g_proc_id == g_stdio_proc) { printf("CGMMS iteration: %d residue: %g\n", iteration, err); fflush( stdout ); } if( ((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1)) ) { assign(P, solver_field[0], N); f(solver_field[2], P); diff(solver_field[3], solver_field[2], Q, N); err = square_norm(solver_field[3], N, 1); if(g_debug_level > 0 && g_proc_id == g_stdio_proc) { printf("# CG MMS true residue at final iteration (%d) was %g.\n", iteration, err); fflush( stdout); } g_sloppy_precision = 0; g_mu = tmp_mu; /* save all the results of (Q^dagger Q)^(-1) \gamma_5 \phi */ /* here ... */ /* when im == -1 save the base mass*/ for(im = -1; im < g_no_extra_masses; im++) { if(im==-1) { temp_save=solver_field[0]; } else { temp_save=xs_mms_solver[im]; } if(SourceInfo.type != 1) { if (PropInfo.splitted) { sprintf(filename, "%s.%.4d.%.2d.%.2d.cgmms.%.2d.inverted", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, SourceInfo.ix, im+1); } else { sprintf(filename, "%s.%.4d.%.2d.cgmms.%.2d.inverted", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, im+1); } } else { sprintf(filename, "%s.%.4d.%.5d.cgmms.%.2d.0", SourceInfo.basename, SourceInfo.nstore, SourceInfo.sample, im+1); } if(g_kappa != 0) { mul_r(temp_save, (2*g_kappa)*(2*g_kappa), temp_save, N); } append = !PropInfo.splitted; construct_writer(&writer, filename, append); if (PropInfo.splitted || SourceInfo.ix == index_start) { //Create the inverter info NOTE: always set to TWILSON=12 and 1 flavour (to be adjusted) inverterInfo = construct_paramsInverterInfo(err, iteration+1, 12, 1); if (im == -1) { inverterInfo->cgmms_mass = inverterInfo->mu; } else { inverterInfo->cgmms_mass = g_extra_masses[im]/(2 * inverterInfo->kappa); } write_spinor_info(writer, PropInfo.format, inverterInfo, append); //Create the propagatorFormat NOTE: always set to 1 flavour (to be adjusted) propagatorFormat = construct_paramsPropagatorFormat(cg_mms_default_precision, 1); write_propagator_format(writer, propagatorFormat); free(inverterInfo); free(propagatorFormat); } convert_lexic_to_eo(solver_field[2], solver_field[1], temp_save); write_spinor(writer, &solver_field[2], &solver_field[1], 1, 32); destruct_writer(writer); } finalize_solver(solver_field, nr_sf); return(iteration+1); } /* Compute beta_cg(i+1) = (r(i+1),r(i+1))/(r(i),r(i)) Compute p(i+1) = r(i+1) + beta(i+1)*p(i) */ beta_cg = err/normsq; assign_mul_add_r(solver_field[2], beta_cg, solver_field[1], N); normsq = err; /* Compute betas(i+1) = beta_cg(i)*(zita(i+1)*alphas(i))/(zita(i)*alpha_cg(i)) Compute ps(i+1) = zita(i+1)*r(i+1) + betas(i+1)*ps(i) */ for(im = 0; im < g_no_extra_masses; im++) { betas[im] = beta_cg*zita[im]*alphas[im]/(zitam1[im]*alpha_cg); assign_mul_add_mul_r(ps_mms_solver[im], solver_field[1], betas[im], zita[im], N); } } assign(P, solver_field[0], N); g_sloppy_precision = 0; finalize_solver(solver_field, nr_sf); return(-1); }
void invert_overlap(const int op_id, const int index_start) { operator * optr; void (*op)(spinor*,spinor*); static complex alpha={0,0}; spinorPrecWS *ws; optr = &operator_list[op_id]; op=&Dov_psi; /* here we need to (re)compute the kernel eigenvectors */ /* for new gauge fields */ if(g_proc_id == 0) {printf("# Not using even/odd preconditioning!\n"); fflush(stdout);} convert_eo_to_lexic(g_spinor_field[DUM_DERI], optr->sr0, optr->sr1); convert_eo_to_lexic(g_spinor_field[DUM_DERI+1], optr->prop0, optr->prop1); if(optr->solver == 13 ){ optr->iterations = sumr(g_spinor_field[DUM_DERI+1],g_spinor_field[DUM_DERI] , optr->maxiter, optr->eps_sq); } else if(optr->solver == 1 /* CG */) { gamma5(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI], VOLUME); if(use_preconditioning==1 && g_precWS!=NULL){ ws=(spinorPrecWS*)g_precWS; printf("# Using preconditioning (which one?)!\n"); alpha.re=ws->precExpo[2]; spinorPrecondition(g_spinor_field[DUM_DERI+1],g_spinor_field[DUM_DERI+1],ws,T,L,alpha,0,1); /* iter = cg_her(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision, */ /* rel_prec, VOLUME, &Q_pm_psi_prec); */ optr->iterations = cg_her(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], optr->maxiter, optr->eps_sq, optr->rel_prec, VOLUME, &Qov_sq_psi_prec); alpha.re=ws->precExpo[0]; spinorPrecondition(g_spinor_field[DUM_DERI],g_spinor_field[DUM_DERI],ws,T,L,alpha,0,1); } else { printf("# Not using preconditioning (which one?)!\n"); /* iter = cg_her(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision, */ /* rel_prec, VOLUME, &Q_pm_psi); */ optr->iterations = cg_her(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], optr->maxiter, optr->eps_sq, optr->rel_prec, VOLUME, &Qov_sq_psi); } Qov_psi(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI]); if(use_preconditioning == 1 && g_precWS!=NULL){ ws=(spinorPrecWS*)g_precWS; alpha.re=ws->precExpo[1]; spinorPrecondition(g_spinor_field[DUM_DERI+1],g_spinor_field[DUM_DERI+1],ws,T,L,alpha,0,1); } } op(g_spinor_field[4],g_spinor_field[DUM_DERI+1]); convert_eo_to_lexic(g_spinor_field[DUM_DERI], optr->sr0, optr->sr1); optr->reached_prec=diff_and_square_norm(g_spinor_field[4],g_spinor_field[DUM_DERI],VOLUME); convert_lexic_to_eo(optr->prop0, optr->prop1 , g_spinor_field[DUM_DERI+1]); return; }
int invert_doublet_eo_quda(spinor * const Even_new_s, spinor * const Odd_new_s, spinor * const Even_new_c, spinor * const Odd_new_c, spinor * const Even_s, spinor * const Odd_s, spinor * const Even_c, spinor * const Odd_c, const double precision, const int max_iter, const int solver_flag, const int rel_prec, const int even_odd_flag, const SloppyPrecision sloppy_precision, CompressionType compression) { spinor ** solver_field = NULL; const int nr_sf = 4; init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); convert_eo_to_lexic(solver_field[0], Even_s, Odd_s); convert_eo_to_lexic(solver_field[1], Even_c, Odd_c); // convert_eo_to_lexic(g_spinor_field[DUM_DERI+1], Even_new, Odd_new); void *spinorIn = (void*)solver_field[0]; // source void *spinorIn_c = (void*)solver_field[1]; // charme source void *spinorOut = (void*)solver_field[2]; // solution void *spinorOut_c = (void*)solver_field[3]; // charme solution if ( rel_prec ) inv_param.residual_type = QUDA_L2_RELATIVE_RESIDUAL; else inv_param.residual_type = QUDA_L2_ABSOLUTE_RESIDUAL; inv_param.kappa = g_kappa; // IMPORTANT: use opposite TM mu-flavor since gamma5 -> -gamma5 inv_param.mu = -g_mubar /2./g_kappa; inv_param.epsilon = g_epsbar/2./g_kappa; // figure out which BC to use (theta, trivial...) set_boundary_conditions(&compression); // set the sloppy precision of the mixed prec solver set_sloppy_prec(sloppy_precision); // load gauge after setting precision _loadGaugeQuda(compression); // choose dslash type if( g_c_sw > 0.0 ) { inv_param.dslash_type = QUDA_TWISTED_CLOVER_DSLASH; inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN; inv_param.solution_type = QUDA_MAT_SOLUTION; inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER; inv_param.clover_coeff = g_c_sw*g_kappa; } else { inv_param.dslash_type = QUDA_TWISTED_MASS_DSLASH; inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN_ASYMMETRIC; inv_param.solution_type = QUDA_MAT_SOLUTION; } // choose solver if(solver_flag == BICGSTAB) { if(g_proc_id == 0) {printf("# QUDA: Using BiCGstab!\n"); fflush(stdout);} inv_param.inv_type = QUDA_BICGSTAB_INVERTER; } else { /* Here we invert the hermitean operator squared */ inv_param.inv_type = QUDA_CG_INVERTER; if(g_proc_id == 0) { printf("# QUDA: Using mixed precision CG!\n"); printf("# QUDA: mu = %f, kappa = %f\n", g_mu/2./g_kappa, g_kappa); fflush(stdout); } } if( even_odd_flag ) { inv_param.solve_type = QUDA_NORMOP_PC_SOLVE; if(g_proc_id == 0) printf("# QUDA: Using preconditioning!\n"); } else { inv_param.solve_type = QUDA_NORMOP_SOLVE; if(g_proc_id == 0) printf("# QUDA: Not using preconditioning!\n"); } inv_param.tol = sqrt(precision)*0.25; inv_param.maxiter = max_iter; inv_param.twist_flavor = QUDA_TWIST_NONDEG_DOUBLET; inv_param.Ls = 2; // NULL pointers to host fields to force // construction instead of download of the clover field: if( g_c_sw > 0.0 ) loadCloverQuda(NULL, NULL, &inv_param); // reorder spinor reorder_spinor_toQuda( (double*)spinorIn, inv_param.cpu_prec, 1, (double*)spinorIn_c ); // perform the inversion invertQuda(spinorOut, spinorIn, &inv_param); if( inv_param.verbosity == QUDA_VERBOSE ) if(g_proc_id == 0) printf("# QUDA: Device memory used: Spinor: %f GiB, Gauge: %f GiB, Clover: %f GiB\n", inv_param.spinorGiB, gauge_param.gaugeGiB, inv_param.cloverGiB); if( inv_param.verbosity > QUDA_SILENT ) if(g_proc_id == 0) printf("# QUDA: Done: %i iter / %g secs = %g Gflops\n", inv_param.iter, inv_param.secs, inv_param.gflops/inv_param.secs); // number of CG iterations int iteration = inv_param.iter; // reorder spinor reorder_spinor_fromQuda( (double*)spinorIn, inv_param.cpu_prec, 1, (double*)spinorIn_c ); reorder_spinor_fromQuda( (double*)spinorOut, inv_param.cpu_prec, 1, (double*)spinorOut_c ); convert_lexic_to_eo(Even_s, Odd_s, solver_field[0]); convert_lexic_to_eo(Even_c, Odd_c, solver_field[1]); convert_lexic_to_eo(Even_new_s, Odd_new_s, solver_field[2]); convert_lexic_to_eo(Even_new_c, Odd_new_c, solver_field[3]); finalize_solver(solver_field, nr_sf); freeGaugeQuda(); if(iteration >= max_iter) return(-1); return(iteration); }