void Dov_psi(spinor * const P, spinor * const S) { double c0,c1; spinor *s; static int n_cheby = 0; static int rec_coefs = 1; ov_s = 0.5*(1./g_kappa - 8.) - 1.; /* printf("Degree of Polynomial set to %d\n", ov_n_cheby); */ if(n_cheby != ov_n_cheby || rec_coefs) { calculateOverlapPolynomial(); n_cheby = ov_n_cheby; rec_coefs = 0; } if(dov_ws==NULL){ init_Dov_WS(); } /* s_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); */ /* #if (defined SSE3 || defined SSE2 || defined SSE) */ /* s = (spinor*)(((unsigned long int)(s_)+ALIGN_BASE)&~ALIGN_BASE); */ /* #else */ /* s = s_; */ /* #endif */ s=lock_Dov_WS_spinor(0); /* here we do with M = 1 + s */ /* M + m_ov/2 + (M - m_ov/2) \gamma_5 sign(Q(-M)) */ c0 = -(1.0 + ov_s - 0.5*m_ov); c1 = -(1.0 + ov_s + 0.5*m_ov); Q_over_sqrt_Q_sqr(s, ov_cheby_coef, ov_n_cheby, S, ev_qnorm, ev_minev); gamma5(s, s, VOLUME); assign_mul_add_mul_r(s, S, c0, c1, VOLUME); assign(P, s, VOLUME); /* free(s_); */ unlock_Dov_WS_spinor(0); return; }
/* P output = solution , Q input = source */ int cg_mms_tm(spinor * const P, spinor * const Q, const int max_iter, double eps_sq, const int rel_prec, const int N, matrix_mult f) { static double normsq, pro, err, alpha_cg = 1., beta_cg = 0., squarenorm; int iteration, im, append = 0; char filename[100]; static double gamma, alpham1; int const cg_mms_default_precision = 32; double tmp_mu = g_mu; WRITER * writer = NULL; paramsInverterInfo *inverterInfo = NULL; paramsPropagatorFormat *propagatorFormat = NULL; spinor * temp_save; //used to save all the masses spinor ** solver_field = NULL; const int nr_sf = 5; init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); init_mms_tm(g_no_extra_masses); /* currently only implemented for P=0 */ zero_spinor_field(P, N); /* Value of the bare MMS-masses (\mu^2 - \mu_0^2) */ for(im = 0; im < g_no_extra_masses; im++) { sigma[im] = g_extra_masses[im]*g_extra_masses[im] - g_mu*g_mu; assign(xs_mms_solver[im], P, N); assign(ps_mms_solver[im], Q, N); zitam1[im] = 1.0; zita[im] = 1.0; alphas[im] = 1.0; betas[im] = 0.0; } squarenorm = square_norm(Q, N, 1); assign(solver_field[0], P, N); /* normsp = square_norm(P, N, 1); */ /* initialize residue r and search vector p */ /* if(normsp == 0){ */ /* currently only implemented for P=0 */ if(1) { /* if a starting solution vector equal to zero is chosen */ assign(solver_field[1], Q, N); assign(solver_field[2], Q, N); normsq = square_norm(Q, N, 1); } else{ /* if a starting solution vector different from zero is chosen */ f(solver_field[3], solver_field[0]); diff(solver_field[1], Q, solver_field[3], N); assign(solver_field[2], solver_field[1], N); normsq = square_norm(solver_field[2], N, 1); } /* main loop */ for(iteration = 0; iteration < max_iter; iteration++) { /* Q^2*p and then (p,Q^2*p) */ f(solver_field[4], solver_field[2]); pro = scalar_prod_r(solver_field[2], solver_field[4], N, 1); /* For the update of the coeff. of the shifted pol. we need alpha_cg(i-1) and alpha_cg(i). This is the reason why we need this double definition of alpha */ alpham1 = alpha_cg; /* Compute alpha_cg(i+1) */ alpha_cg = normsq/pro; for(im = 0; im < g_no_extra_masses; im++) { /* Now gamma is a temp variable that corresponds to zita(i+1) */ gamma = zita[im]*alpham1/(alpha_cg*beta_cg*(1.-zita[im]/zitam1[im]) + alpham1*(1.+sigma[im]*alpha_cg)); /* Now zita(i-1) is put equal to the old zita(i) */ zitam1[im] = zita[im]; /* Now zita(i+1) is updated */ zita[im] = gamma; /* Update of alphas(i) = alpha_cg(i)*zita(i+1)/zita(i) */ alphas[im] = alpha_cg*zita[im]/zitam1[im]; /* Compute xs(i+1) = xs(i) + alphas(i)*ps(i) */ assign_add_mul_r(xs_mms_solver[im], ps_mms_solver[im], alphas[im], N); } /* Compute x_(i+1) = x_i + alpha_cg(i+1) p_i */ assign_add_mul_r(solver_field[0], solver_field[2], alpha_cg, N); /* Compute r_(i+1) = r_i - alpha_cg(i+1) Qp_i */ assign_add_mul_r(solver_field[1], solver_field[4], -alpha_cg, N); /* Check whether the precision eps_sq is reached */ err = square_norm(solver_field[1], N, 1); if(g_debug_level > 2 && g_proc_id == g_stdio_proc) { printf("CGMMS iteration: %d residue: %g\n", iteration, err); fflush( stdout ); } if( ((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1)) ) { assign(P, solver_field[0], N); f(solver_field[2], P); diff(solver_field[3], solver_field[2], Q, N); err = square_norm(solver_field[3], N, 1); if(g_debug_level > 0 && g_proc_id == g_stdio_proc) { printf("# CG MMS true residue at final iteration (%d) was %g.\n", iteration, err); fflush( stdout); } g_sloppy_precision = 0; g_mu = tmp_mu; /* save all the results of (Q^dagger Q)^(-1) \gamma_5 \phi */ /* here ... */ /* when im == -1 save the base mass*/ for(im = -1; im < g_no_extra_masses; im++) { if(im==-1) { temp_save=solver_field[0]; } else { temp_save=xs_mms_solver[im]; } if(SourceInfo.type != 1) { if (PropInfo.splitted) { sprintf(filename, "%s.%.4d.%.2d.%.2d.cgmms.%.2d.inverted", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, SourceInfo.ix, im+1); } else { sprintf(filename, "%s.%.4d.%.2d.cgmms.%.2d.inverted", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, im+1); } } else { sprintf(filename, "%s.%.4d.%.5d.cgmms.%.2d.0", SourceInfo.basename, SourceInfo.nstore, SourceInfo.sample, im+1); } if(g_kappa != 0) { mul_r(temp_save, (2*g_kappa)*(2*g_kappa), temp_save, N); } append = !PropInfo.splitted; construct_writer(&writer, filename, append); if (PropInfo.splitted || SourceInfo.ix == index_start) { //Create the inverter info NOTE: always set to TWILSON=12 and 1 flavour (to be adjusted) inverterInfo = construct_paramsInverterInfo(err, iteration+1, 12, 1); if (im == -1) { inverterInfo->cgmms_mass = inverterInfo->mu; } else { inverterInfo->cgmms_mass = g_extra_masses[im]/(2 * inverterInfo->kappa); } write_spinor_info(writer, PropInfo.format, inverterInfo, append); //Create the propagatorFormat NOTE: always set to 1 flavour (to be adjusted) propagatorFormat = construct_paramsPropagatorFormat(cg_mms_default_precision, 1); write_propagator_format(writer, propagatorFormat); free(inverterInfo); free(propagatorFormat); } convert_lexic_to_eo(solver_field[2], solver_field[1], temp_save); write_spinor(writer, &solver_field[2], &solver_field[1], 1, 32); destruct_writer(writer); } finalize_solver(solver_field, nr_sf); return(iteration+1); } /* Compute beta_cg(i+1) = (r(i+1),r(i+1))/(r(i),r(i)) Compute p(i+1) = r(i+1) + beta(i+1)*p(i) */ beta_cg = err/normsq; assign_mul_add_r(solver_field[2], beta_cg, solver_field[1], N); normsq = err; /* Compute betas(i+1) = beta_cg(i)*(zita(i+1)*alphas(i))/(zita(i)*alpha_cg(i)) Compute ps(i+1) = zita(i+1)*r(i+1) + betas(i+1)*ps(i) */ for(im = 0; im < g_no_extra_masses; im++) { betas[im] = beta_cg*zita[im]*alphas[im]/(zitam1[im]*alpha_cg); assign_mul_add_mul_r(ps_mms_solver[im], solver_field[1], betas[im], zita[im], N); } } assign(P, solver_field[0], N); g_sloppy_precision = 0; finalize_solver(solver_field, nr_sf); return(-1); }
/* P output = solution , Q input = source */ int cg_mms_tm(spinor ** const P, spinor * const Q, solver_params_t * solver_params, double * cgmms_reached_prec) { static double normsq, pro, err, squarenorm; int iteration, N = solver_params->sdim, no_shifts = solver_params->no_shifts; static double gamma, alpham1; spinor ** solver_field = NULL; double atime, etime; const int nr_sf = 3; atime = gettime(); if(solver_params->sdim == VOLUME) { init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); init_mms_tm(no_shifts, VOLUMEPLUSRAND); } else { init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); init_mms_tm(no_shifts, VOLUMEPLUSRAND/2); } zero_spinor_field(P[0], N); alphas[0] = 1.0; betas[0] = 0.0; sigma[0] = solver_params->shifts[0]*solver_params->shifts[0]; if(g_proc_id == 0 && g_debug_level > 1) printf("# CGMMS: shift %d is %e\n", 0, sigma[0]); for(int im = 1; im < no_shifts; im++) { sigma[im] = solver_params->shifts[im]*solver_params->shifts[im] - sigma[0]; if(g_proc_id == 0 && g_debug_level > 1) printf("# CGMMS: shift %d is %e\n", im, sigma[im]); // these will be the result spinor fields zero_spinor_field(P[im], N); // these are intermediate fields assign(ps_mms_solver[im-1], Q, N); zitam1[im] = 1.0; zita[im] = 1.0; alphas[im] = 1.0; betas[im] = 0.0; } /* currently only implemented for P=0 */ squarenorm = square_norm(Q, N, 1); /* if a starting solution vector equal to zero is chosen */ assign(solver_field[0], Q, N); assign(solver_field[1], Q, N); normsq = squarenorm; /* main loop */ for(iteration = 0; iteration < solver_params->max_iter; iteration++) { /* Q^2*p and then (p,Q^2*p) */ solver_params->M_psi(solver_field[2], solver_field[1]); // add the zero's shift assign_add_mul_r(solver_field[2], solver_field[1], sigma[0], N); pro = scalar_prod_r(solver_field[1], solver_field[2], N, 1); /* For the update of the coeff. of the shifted pol. we need alphas[0](i-1) and alpha_cg(i). This is the reason why we need this double definition of alpha */ alpham1 = alphas[0]; /* Compute alphas[0](i+1) */ alphas[0] = normsq/pro; for(int im = 1; im < no_shifts; im++) { /* Now gamma is a temp variable that corresponds to zita(i+1) */ gamma = zita[im]*alpham1/(alphas[0]*betas[0]*(1.-zita[im]/zitam1[im]) + alpham1*(1.+sigma[im]*alphas[0])); // Now zita(i-1) is put equal to the old zita(i) zitam1[im] = zita[im]; // Now zita(i+1) is updated zita[im] = gamma; // Update of alphas(i) = alphas[0](i)*zita(i+1)/zita(i) alphas[im] = alphas[0]*zita[im]/zitam1[im]; // Compute xs(i+1) = xs(i) + alphas(i)*ps(i) assign_add_mul_r(P[im], ps_mms_solver[im-1], alphas[im], N); // in the CG the corrections are decreasing with the iteration number increasing // therefore, we can remove shifts when the norm of the correction vector // falls below a threshold // this is useful for computing time and needed, because otherwise // zita might get smaller than DOUBLE_EPS and, hence, zero if(iteration > 0 && (iteration % 20 == 0) && (im == no_shifts-1)) { double sn = square_norm(ps_mms_solver[im-1], N, 1); if(alphas[no_shifts-1]*alphas[no_shifts-1]*sn <= solver_params->squared_solver_prec) { no_shifts--; if(g_debug_level > 2 && g_proc_id == 0) { printf("# CGMMS: at iteration %d removed one shift, %d remaining\n", iteration, no_shifts); } } } } /* Compute x_(i+1) = x_i + alphas[0](i+1) p_i */ assign_add_mul_r(P[0], solver_field[1], alphas[0], N); /* Compute r_(i+1) = r_i - alphas[0](i+1) Qp_i */ assign_add_mul_r(solver_field[0], solver_field[2], -alphas[0], N); /* Check whether the precision eps_sq is reached */ err = square_norm(solver_field[0], N, 1); if(g_debug_level > 2 && g_proc_id == g_stdio_proc) { printf("# CGMMS iteration: %d residue: %g\n", iteration, err); fflush( stdout ); } if( ((err <= solver_params->squared_solver_prec) && (solver_params->rel_prec == 0)) || ((err <= solver_params->squared_solver_prec*squarenorm) && (solver_params->rel_prec > 0)) || (iteration == solver_params->max_iter -1) ) { /* FIXME temporary output of precision until a better solution can be found */ *cgmms_reached_prec = err; break; } /* Compute betas[0](i+1) = (r(i+1),r(i+1))/(r(i),r(i)) Compute p(i+1) = r(i+1) + beta(i+1)*p(i) */ betas[0] = err/normsq; assign_mul_add_r(solver_field[1], betas[0], solver_field[0], N); normsq = err; /* Compute betas(i+1) = betas[0](i+1)*(zita(i+1)*alphas(i))/(zita(i)*alphas[0](i)) Compute ps(i+1) = zita(i+1)*r(i+1) + betas(i+1)*ps(i) */ for(int im = 1; im < no_shifts; im++) { betas[im] = betas[0]*zita[im]*alphas[im]/(zitam1[im]*alphas[0]); assign_mul_add_mul_r(ps_mms_solver[im-1], solver_field[0], betas[im], zita[im], N); } } etime = gettime(); g_sloppy_precision = 0; if(iteration == solver_params->max_iter -1) iteration = -1; else iteration++; if(g_debug_level > 0 && g_proc_id == 0) { printf("# CGMMS (%d shifts): iter: %d eps_sq: %1.4e %1.4e t/s\n", solver_params->no_shifts, iteration, solver_params->squared_solver_prec, etime - atime); } finalize_solver(solver_field, nr_sf); return(iteration); }
void Q_over_sqrt_Q_sqr(spinor * const R, double * const c, const int n, spinor * const S, const double rnorm, const double minev) { int j; double fact1, fact2, temp1, temp2, temp3, temp4, maxev, tnorm; spinor *sv, *d, *dd, *aux, *aux3; double ap_eps_sq = 0.; sv=lock_Dov_WS_spinor(2); d=lock_Dov_WS_spinor(3); dd=lock_Dov_WS_spinor(4); aux=lock_Dov_WS_spinor(5); aux3=lock_Dov_WS_spinor(6); eigenvalues_for_cg_computed = no_eigenvalues - 1; if(eigenvalues_for_cg_computed < 0) eigenvalues_for_cg_computed = 0; maxev=1.0; fact1=4/(maxev-minev); fact2=-2*(maxev+minev)/(maxev-minev); zero_spinor_field(d, VOLUME); zero_spinor_field(dd, VOLUME); if(1) assign_sub_lowest_eigenvalues(aux3, S, no_eigenvalues-1, VOLUME); else assign(aux3, S, VOLUME); /* Check whether switch for adaptive precision is on */ /* this might be implemented again in the future */ /* Use the 'old' version using Clenshaw's recursion for the Chebysheff polynomial */ if(1) { for (j = n-1; j >= 1; j--) { assign(sv, d, VOLUME); if ( (j%10) == 0 ) { assign_sub_lowest_eigenvalues(aux, d, no_eigenvalues-1, VOLUME); } else { assign(aux, d, VOLUME); } norm_Q_sqr_psi(R, aux, rnorm); /* printf("%d %e %e\n", j, R[0].s0.c0.re, R[0].s0.c0.im); */ /* printf("%e %e\n", R[0].s1.c0.re, R[0].s1.c0.im); */ temp1=-1.0; temp2=c[j]; assign_mul_add_mul_add_mul_add_mul_r(d, R, dd, aux3, fact2, fact1, temp1, temp2, VOLUME); assign(dd, sv, VOLUME); } if(1) assign_sub_lowest_eigenvalues(R, d, no_eigenvalues-1, VOLUME); else assign(R, d, VOLUME); norm_Q_sqr_psi(aux, R, rnorm); temp1=-1.0; temp2=c[0]/2.; temp3=fact1/2.; temp4=fact2/2.; assign_mul_add_mul_add_mul_add_mul_r(aux, d, dd, aux3, temp3, temp4, temp1, temp2, VOLUME); norm_Q_n_psi(R, aux, 1, rnorm); } else { /* Use the adaptive precision version using the forward recursion for the Chebysheff polynomial */ /* d = T_0(Q^2) */ assign(d, aux3, VOLUME); /* dd = T_1(Q^2) */ norm_Q_sqr_psi(dd, d, rnorm); temp3 = fact1/2.; temp4 = fact2/2.; assign_mul_add_mul_r(dd, d, temp3, temp4, VOLUME); /* r = c_1 T_1(Q^2) + 1./2 c_0 */ temp1 = c[1]; temp2 = c[0]/2.; mul_add_mul_r(R, dd, d, temp1, temp2, VOLUME); temp1=-1.0; for (j = 2; j <= n-1; j++) { /* aux = T_j(Q^2) = 2 Q^2 T_{j-1}(Q^2) - T_{j-2}(Q^2) */ norm_Q_sqr_psi(aux, dd, rnorm); assign_mul_add_mul_add_mul_r(aux, dd, d, fact1, fact2, temp1, VOLUME); /* r = r + c_j T_j(Q^2) */ temp2 = c[j]; assign_add_mul_r(R, aux, temp2, VOLUME); /* The stoppping criterio tnorm = |T_j(Q^2)| */ tnorm=square_norm(aux, VOLUME, 1); tnorm*=(temp2*temp2); /* auxnorm=square_norm(R); if(g_proc_id == g_stdio_proc){printf("j= %d\t|c T|^2= %g\t c_j= %g\t|r|^2= %g\n",j,tnorm,temp2,auxnorm); fflush( stdout);}; */ if(tnorm < ap_eps_sq) break; /* d = T_{j-1}(Q^2) */ assign(d, dd, VOLUME); /* dd = T_{j}(Q^2) */ assign(dd, aux, VOLUME); } if(g_proc_id == g_stdio_proc && g_debug_level > 0) { printf("Order of Chebysheff approximation = %d\n",j); fflush( stdout); } /* r = Q r */ assign(aux, R, VOLUME); norm_Q_n_psi(R, aux, 1, rnorm); } /* add in piece from projected subspace */ addproj_q_invsqrt(R, S, no_eigenvalues-1, VOLUME); unlock_Dov_WS_spinor(2); unlock_Dov_WS_spinor(3); unlock_Dov_WS_spinor(4); unlock_Dov_WS_spinor(5); unlock_Dov_WS_spinor(6); return; }
void poly_precon(spinor * const R, spinor * const S, const double prec, const int n) { int j; double fact1, fact2, temp1, temp2, temp3, temp4, invmaxev = 1./4., maxev=4., tnorm, minev=g_mu*g_mu, auxnorm; static spinor *sv_, *sv, *d_, *d, *dd_, *dd, *aux_, *aux, *aux3_, *aux3; static int initp = 0; static double * c; const int N = VOLUME; maxev = 4.0; invmaxev = 1./maxev; minev = 0.1; /* minev = 1.5*1.5*g_mu*g_mu; */ if(initp == 0) { c = (double*)calloc(1000, sizeof(double)); #if (defined SSE || defined SSE2 || defined SSE3) sv_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); sv = (spinor *)(((unsigned long int)(sv_)+ALIGN_BASE)&~ALIGN_BASE); d_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); d = (spinor *)(((unsigned long int)(d_)+ALIGN_BASE)&~ALIGN_BASE); dd_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); dd = (spinor *)(((unsigned long int)(dd_)+ALIGN_BASE)&~ALIGN_BASE); aux_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); aux = (spinor *)(((unsigned long int)(aux_)+ALIGN_BASE)&~ALIGN_BASE); aux3_= calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); aux3 = (spinor *)(((unsigned long int)(aux3_)+ALIGN_BASE)&~ALIGN_BASE); #else sv_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); sv = sv_; d_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); d = d_; dd_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); dd = dd_; aux_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); aux = aux_; aux3_= calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); aux3 = aux3_; #endif get_c(minev, maxev, c, 100); initp = 1; } fact1 = 4. / (maxev - minev); fact2 = -2 * (maxev + minev) / (maxev - minev); zero_spinor_field(&d[0], N); zero_spinor_field(&dd[0], N); assign(&aux3[0], &S[0], N); /* gamma5(&aux3[0], &S[0], N); */ /* Use the adaptive precision version using the forward recursion for the Chebysheff polynomial */ /* d = T_0(Q^2) */ assign(&d[0], &aux3[0], N); /* dd = T_1(Q^2) */ Q_pm_psi(&dd[0], &d[0]); /* mul_r(dd, invmaxev, dd, N); */ /* norm_Q_sqr_psi(&dd[0], &d[0], g_m_D_psi, rnorm); */ temp3 = fact1/2; temp4 = fact2/2; assign_mul_add_mul_r(&dd[0], &d[0], temp3, temp4, N); /* r = c_1 T_1(Q^2) + 1/2 c_0 */ temp1 = c[1]; temp2 = c[0]/2; mul_add_mul_r(&R[0], &dd[0], &d[0], temp1, temp2, N); temp1 = -1.0; for (j=2; j<=n-1; j++) { /* aux = T_j(Q^2) = 2 Q^2 T_{j-1}(Q^2) - T_{j-2}(Q^2) */ Q_pm_psi(&aux[0], &dd[0]); /* mul_r(aux, invmaxev, aux, N); */ /* norm_Q_sqr_psi(&aux[0], &dd[0], g_m_D_psi, rnorm); */ assign_mul_add_mul_add_mul_r(&aux[0],&dd[0],&d[0],fact1,fact2,temp1, N); /* r = r + c_j T_j(Q^2) */ temp2=c[j]; assign_add_mul_r(&R[0],&aux[0],temp2, N); /* The stoppping criterio tnorm = |T_j(Q^2)| */ tnorm = square_norm(aux, N, 1); tnorm *= (temp2*temp2); auxnorm = square_norm(R, N, 1); if(g_proc_id == g_stdio_proc) { printf("j= %d\t|c T|^2= %g\t%g\t c_j= %g\t|r|^2= %g\n",j,tnorm,prec, temp2,auxnorm); fflush( stdout); fflush(stdout); } if(tnorm < prec) break; /* d = T_{j-1}(Q^2) */ assign(&d[0], &dd[0], N); /* dd = T_{j}(Q^2) */ assign(&dd[0], &aux[0], N); } if(g_proc_id == g_stdio_proc) { printf("Order of Chebysheff approximation = %d\n",j); fflush( stdout); } /* r = Q r */ /* assign(aux, R, N); */ /* Q_minus_psi(R, aux); */ return; }