/* On a lattice with lattice constant len, along each axis, multiply two adjacent smeared links in src to form a coarse link res on a lattice s with lattice constant 2 * len */ void RG_create_gauge(QDP_ColorMatrix *res[RG_Nd], QDP_ColorMatrix *src[RG_Nd], QDP_Sub_Block s, int len) { int i,j,k; int v[RG_Nd]; QDP_ColorMatrix *temp,*temp1; QDP_Shift offset; temp = QDP_create_M(); temp1 = QDP_create_M(); for(j=0; j<RG_Nd; ++j) { /* On axis displacement of length len */ for(k=0; k<RG_Nd; ++k) v[k] = 0; v[j] = len; offset = QDP_create_shift(v); SQDP_M_eq_M_times_sM(res[j],src[j],src[j],offset,QDP_forward,s); QDP_destroy_shift(offset); // printf("Multp........node %d for %d\n",this_node,j); fflush(stdout); } // node0_printf(".......................done\n"); fflush(stdout); QDP_destroy_M(temp); QDP_destroy_M(temp1); return; }
/* Computes the staple : mu +-------+ nu | | | | X X Where the mu link can be any su3_matrix. The result is saved in staple. if staple==NULL then the result is not saved. It also adds the computed staple to the fatlink[mu] with weight coef. */ static void compute_gen_staple(QDP_ColorMatrix *staple, int mu, int nu, QDP_ColorMatrix *link, double dcoef, QDP_ColorMatrix *gauge[], QDP_ColorMatrix *fl[]) { QLA_Real coef = dcoef; QDP_ColorMatrix *ts0, *ts1; QDP_ColorMatrix *tmat1, *tmat2; QDP_ColorMatrix *tempmat; ts0 = QDP_create_M(); ts1 = QDP_create_M(); tmat1 = QDP_create_M(); tmat2 = QDP_create_M(); tempmat = QDP_create_M(); /* Upper staple */ QDP_M_eq_sM(ts0, link, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_eq_sM(ts1, gauge[nu], QDP_neighbor[mu], QDP_forward, QDP_all); if(staple!=NULL) { /* Save the staple */ QDP_M_eq_M_times_Ma(tmat1, ts0, ts1, QDP_all); QDP_M_eq_M_times_M(staple, gauge[nu], tmat1, QDP_all); } else { /* No need to save the staple. Add it to the fatlinks */ QDP_M_eq_M_times_Ma(tmat1, ts0, ts1, QDP_all); QDP_M_eq_M_times_M(tmat2, gauge[nu], tmat1, QDP_all); QDP_M_peq_r_times_M(fl[mu], &coef, tmat2, QDP_all); } /* lower staple */ QDP_M_eq_sM(ts0, gauge[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_Ma_times_M(tmat1, gauge[nu], link, QDP_all); QDP_M_eq_M_times_M(tempmat, tmat1, ts0, QDP_all); QDP_M_eq_sM(ts0, tempmat, QDP_neighbor[nu], QDP_backward, QDP_all); if(staple!=NULL) { /* Save the staple */ QDP_M_peq_M(staple, ts0, QDP_all); QDP_M_peq_r_times_M(fl[mu], &coef, staple, QDP_all); } else { /* No need to save the staple. Add it to the fatlinks */ QDP_M_peq_r_times_M(fl[mu], &coef, ts0, QDP_all); } QDP_destroy_M(ts0); QDP_destroy_M(ts1); QDP_destroy_M(tmat1); QDP_destroy_M(tmat2); QDP_destroy_M(tempmat); } /* compute_gen_staple */
void RG_smearing(QDP_ColorMatrix *dest[RG_Nd], QDP_ColorMatrix *src[RG_Nd],QDP_Sub_Block s, int len) { int i; QLA_Real staple_w,link0; QLA_Int space_only; QDP_ColorMatrix *temp[RG_Nd],*sm_link[RG_Nd],*pr_sm_link[RG_Nd]; for(i=0; i< RG_Nd; ++i) { sm_link[i] = QDP_create_M(); pr_sm_link[i] = QDP_create_M(); } RG_value(&staple_w,&link0,&space_only); /* Two smearing steps */ RG_smearing_qdp(sm_link, src, &staple_w, &link0, s, len); #ifdef CHECK_SMEAR_QDP_MILC project_qdp(sm_link, dest,&space_only); #else #ifdef CHECK_DEGRAND_W_SMEAR project_qdp(sm_link, dest,&space_only); #else #ifdef CHECK_SMEAR_GAUGE_2 project_qdp(sm_link, dest,&space_only); #else project_qdp(sm_link, pr_sm_link,&space_only); RG_smearing_qdp(sm_link, pr_sm_link,&staple_w,&link0,s,len); project_qdp(sm_link, dest,&space_only); #endif #endif #endif for(i=0; i< RG_Nd; ++i) { QDP_destroy_M(sm_link[i]); QDP_destroy_M(pr_sm_link[i]); } return; }
/* Working from the finest lattice, smear the links at each level of coarseness and multiply to form the links on the next higher level. Results in rg_link. */ void RG_gauge(QDP_ColorMatrix *rg_link[NRG][RG_Nd], QDP_ColorMatrix *link_qdp[RG_Nd], QDP_Sub_Block s[NRG+1]) { int i,j,len; QDP_ColorMatrix *pr_sm_link[RG_Nd]; // node0_printf("Smearing links with Degrand trick........\n"); fflush(stdout); for(i=0; i< RG_Nd; ++i) pr_sm_link[i] = QDP_create_M(); #ifdef CHECK_DEGRAND_WO_SMEAR for(i=0; i< RG_Nd; ++i) SQDP_M_eq_M(rg_link[nrg-1][i],link_qdp[i],s[nrg]); #else RG_smearing(rg_link[nrg-1],link_qdp,s[nrg],1); #ifdef CHECK_DEGRAND_W_SMEAR SQDP_M_eq_M(rg_link[nrg-1][3],link_qdp[3],s[nrg]); #endif #endif /* Work from the finest to the coarsest level */ for (i=1;i<nrg;i++) { len = intpow(2,i-1); // printf("node %d: Smear links of length %d x a'\n",this_node,len); fflush(stdout); // printf("node %d: rg_links of length %d x a'\n",this_node,2*len); fflush(stdout); /* Smear the links */ #ifdef CHECK_DEGRAND_WO_SMEAR for(j=0; j< RG_Nd; ++j) SQDP_M_eq_M(pr_sm_link[j],rg_link[nrg-i][j],s[nrg-i+1]); #else RG_smearing(pr_sm_link,rg_link[nrg-i],s[nrg-i+1],len); #ifdef CHECK_DEGRAND_W_SMEAR SQDP_M_eq_M(pr_sm_link[3],rg_link[nrg-i][3],s[nrg-i+1]); #endif #endif /* Multiply the links */ RG_create_gauge(rg_link[nrg-i-1],pr_sm_link,s[nrg-i],len); } // node0_printf(".......................done\n"); fflush(stdout); for(i=0; i< RG_Nd; ++i) QDP_destroy_M(pr_sm_link[i]); return; }
void QOP_hisq_force_multi_fnmat2_qdp(QOP_info_t *info, QOP_FermionLinksHisq *flh, QDP_ColorMatrix *force[], QOP_hisq_coeffs_t *hisq_coeff, REAL *residues, QDP_ColorVector *x[], int *n_orders_naik) { #define NC QDP_get_nc(force[0]) double dtime = QOP_time(); QDP_ColorMatrix *deriv[4]; for(int mu=0; mu<4; mu++) { deriv[mu] = QDP_create_M(); QDP_M_eq_zero(deriv[mu], QDP_all); } QOP_hisq_deriv_multi_fnmat2_qdp(info, flh, deriv, hisq_coeff, residues, x, n_orders_naik); // contraction with the link in question should be done here, // after contributions from all levels of smearing are taken into account // Put antihermitian traceless part into momentum // add force to momentum QDP_ColorMatrix *mtmp = QDP_create_M(); for(int dir=0; dir<4; dir++) { QDP_M_eq_M_times_Ma(mtmp, flh->U_links[dir], deriv[dir], QDP_all); QDP_M_eq_antiherm_M(deriv[dir], mtmp, QDP_all); QDP_M_peq_M(force[dir], deriv[dir], QDP_all); } info->final_flop += (4.*(198+24+18))*QDP_sites_on_node; QDP_destroy_M(mtmp); for(int mu=0; mu<4; mu++) { QDP_destroy_M(deriv[mu]); } info->final_sec = QOP_time() - dtime; //QOP_printf0("HISQ force flops = %g\n", info->final_flop); #undef NC }
void RG_check_subset(QDP_Sub_Block QDP_block[NRG+1]) { int i,j,len; QDP_ColorMatrix *link_qdp[RG_Nd],*prova[RG_Nd]; QLA_Complex unit; for(i=0; i< RG_Nd; ++i) { link_qdp[i] = QDP_create_M(); prova[i] = QDP_create_M(); } QLA_c_eq_r(unit,1.0) for(i=0; i<RG_Nd; ++i) SQDP_M_eq_c(link_qdp[i],&unit,QDP_block[nrg]); // printf("Created!!! this node %d\n",this_node); fflush(stdout); for(i=0; i<RG_Nd; ++i) SQDP_M_eq_sM(prova[i], link_qdp[i], QDP_neighbor[i], QDP_forward, QDP_block[nrg-1]); // printf("I am out!!! this node %d\n",this_node); fflush(stdout); // SQDP_M_eq_func(prova[0],print_gl,QDP_block[nrg-1]); // printf("I have printed!!! this node %d\n",this_node); fflush(stdout); for(i=0; i< RG_Nd; ++i) { QDP_destroy_M(link_qdp[i]); QDP_destroy_M(prova[i]); } printf("I have destroyed every thing!!! this node %d\n",this_node); fflush(stdout); return; }
static void setup_cg(void) { static int is_setup=0; if(!is_setup) { int i; is_setup = 1; psi = QDP_create_D(); chi = QDP_create_D(); cgp = QDP_create_D(); cgr = QDP_create_D(); mp = QDP_create_D(); ttt = QDP_create_D(); tt1 = QDP_create_D(); tt2 = QDP_create_D(); t1 = QDP_create_D(); t2 = QDP_create_D(); t3 = QDP_create_D(); //dtemp0 = QDP_create_H(); for(i=0; i<4; i++) { #ifndef PRESHIFT_LINKS gaugelink[i] = QDP_create_M(); #endif } for(i=0; i<8; i++) { #ifdef PRESHIFT_LINKS gaugelink[i] = QDP_create_M(); #endif //dtemp1[i] = QDP_create_H(); //temp1[i] = QDP_create_H(); //temp2[i] = QDP_create_H(); temp1[i] = QDP_create_D(); temp2[i] = QDP_create_D(); temp3[i] = QDP_create_D(); temp4[i] = QDP_create_D(); } } }
static QDP_ColorMatrix * cacheshift(QDP_ColorMatrix **tmp, QDP_ColorMatrix *in, int mu, QDP_ShiftDir dir, int redo) { #define NC QDP_get_nc(in) QDP_ColorMatrix *r = *tmp; if(r==NULL) { r = *tmp = QDP_create_M(); redo = 1; } if(redo) { QDP_M_eq_sM(r, in, QDP_neighbor[mu], dir, QDP_all); } return r; #undef NC }
void qopWilsonSolve(Layout *l, real *x, real *u[8], real mass, real *y, double rsq, char *sub) { QDP_ColorMatrix *qu[4]; QDP_DiracFermion *out, *in; in = QDP_create_D(); out = QDP_create_D(); unpackD(l, in, y); unpackD(l, out, x); for(int i=0; i<4; i++) { qu[i] = QDP_create_M(); unpackM(l, qu[i], u[2*i]); QLA_Real two = 2; QDP_M_eq_r_times_M(qu[i], &two, qu[i], QDP_all); } QOP_FermionLinksWilson *fla; fla = QOP_wilson_create_L_from_qdp(qu, NULL); QOP_evenodd_t eo=QOP_EVENODD; if(sub[0]=='e') { eo = QOP_EVEN; } if(sub[0]=='o') { eo = QOP_ODD; } QOP_info_t info = QOP_INFO_ZERO; QOP_invert_arg_t inv_arg = QOP_INVERT_ARG_DEFAULT; QOP_resid_arg_t res_arg = QOP_RESID_ARG_DEFAULT; res_arg.rsqmin = rsq; inv_arg.max_iter = 1000; inv_arg.restart = 500; inv_arg.max_restarts = 5; inv_arg.evenodd = eo; inv_arg.mixed_rsq = 0; QDP_D_eq_zero(out, QDP_even); //QOP_verbose(3); QOP_wilson_invert_qdp(&info, fla, &inv_arg, &res_arg, mass, out, in); //QLA_Real n2; //QDP_r_eq_norm2_D(&n2, (QDP_DiracFermion*)out, QDP_all); printf0("QOP its: %i\n", res_arg.final_iter); packD(l, x, out); QDP_destroy_D(in); QDP_destroy_D(out); for(int i=0; i<4; i++) { QDP_destroy_M(qu[i]); } }
void qopWilsonDslash(Layout *l, real *x, real *u[8], real mass, int sign, real *y, char *sub) { QDP_ColorMatrix *qu[4]; QDP_DiracFermion *out, *in; in = QDP_create_D(); out = QDP_create_D(); unpackD(l, in, y); unpackD(l, out, x); for(int i=0; i<4; i++) { qu[i] = QDP_create_M(); unpackM(l, qu[i], u[2*i]); QLA_Real two = 2; QDP_M_eq_r_times_M(qu[i], &two, qu[i], QDP_all); } QOP_FermionLinksWilson *fla; fla = QOP_wilson_create_L_from_qdp(qu, NULL); QOP_evenodd_t eoOut=QOP_EVENODD, eoIn=QOP_EVENODD; if(sub[0]=='e') { eoOut = QOP_EVEN; eoIn = QOP_ODD; } if(sub[0]=='o') { eoOut = QOP_ODD; eoIn = QOP_EVEN; } real kappa = 0.5/(4+mass); QOP_wilson_dslash_qdp(NULL, fla, kappa, sign, out, in, eoOut, eoIn); QLA_Real n2; QDP_r_eq_norm2_D(&n2, out, QDP_all); printf0("out2: %g\n", n2); packD(l, x, out); QDP_destroy_D(in); QDP_destroy_D(out); for(int i=0; i<4; i++) { QDP_destroy_M(qu[i]); } }
void RG_smear_dir (QDP_ColorMatrix *sm_link, QDP_ColorMatrix *link[], QLA_Real w_l, QLA_Real w_s, QLA_Int dir, QDP_Sub_Block s, int len) { int i,v[RG_Nd],n; QLA_Int nu,mu=dir; QDP_Subset sub; QDP_Shift offset[RG_Nd]; QLA_Complex unit; QDP_ColorMatrix *temp1, *temp2, *temp3, *temp4, *temp5, *temp6; temp1 = QDP_create_M(); temp2 = QDP_create_M(); temp3 = QDP_create_M(); temp4 = QDP_create_M(); temp5 = QDP_create_M(); temp6 = QDP_create_M(); for(nu=0; nu < RG_Nd ; nu++) { for(i=0; i<RG_Nd;i++) v[i] = 0; v[nu] = len; offset[nu] = QDP_create_shift(v); } SQDP_M_eq_r_times_M(temp6,&w_l,link[mu],s); /* Set temp4 to zero */ SQDP_M_eq_zero(temp4,s); n = RG_Nd; #ifdef CHECK_SMEAR_QDP_MILC n = 3; #endif /* Sum on staples */ for(nu=0; nu < n ; nu++)if(nu != mu) { /* For forward staples */ SQDP_M_eq_sM(temp1, link[mu], offset[nu], QDP_forward, s); SQDP_M_eq_sM(temp2, link[nu], offset[mu], QDP_forward, s); SQDP_M_eq_M_times_Ma(temp3, temp1, temp2, s); SQDP_M_peq_M_times_M(temp4, link[nu], temp3, s); /* For backward staples */ SQDP_M_eq_M_times_M(temp3, link[mu], temp2, s); SQDP_M_eq_Ma_times_M(temp1, link[nu], temp3, s); SQDP_M_eq_sM(temp5, temp1, offset[nu], QDP_backward, s); SQDP_M_peq_M(temp4, temp5, s); } /* U_smeared = w_l * U + w_s * U_staple */ SQDP_M_eq_r_times_M_plus_M(sm_link,&w_s,temp4,temp6,s); QDP_destroy_M(temp1); QDP_destroy_M(temp2); QDP_destroy_M(temp3); QDP_destroy_M(temp4); QDP_destroy_M(temp5); QDP_destroy_M(temp6); for(nu=0; nu < RG_Nd ; nu++) QDP_destroy_shift(offset[nu]); return ; }
void QOPPC(symanzik_1loop_gauge_force1) (QOP_info_t *info, QOP_GaugeField *gauge, QOP_Force *force, QOP_gauge_coeffs_t *coeffs, REAL eps) { REAL Plaq, Rect, Pgm ; QDP_ColorMatrix *tempmom_qdp[4]; QDP_ColorMatrix *Amu[6]; // products of 2 links Unu(x)*Umu(x+nu) QDP_ColorMatrix *tmpmat; QDP_ColorMatrix *tmpmat1; QDP_ColorMatrix *tmpmat2; QDP_ColorMatrix *staples; QDP_ColorMatrix *tmpmat3; QDP_ColorMatrix *tmpmat4; int i, k; int mu, nu, sig; double dtime; //REAL eb3 = -eps*beta/3.0; REAL eb3 = -eps/3.0; int j[3][2] = {{1,2}, {0,2}, {0,1}}; // QOP_printf0("beta: %e, eb3: %e\n", beta, eb3); dtime = -QOP_time(); for(mu=0; mu<4; mu++) { tempmom_qdp[mu] = QDP_create_M(); QDP_M_eq_zero(tempmom_qdp[mu], QDP_all); } tmpmat = QDP_create_M(); for(i=0; i<QOP_common.ndim; i++) { fblink[i] = gauge->links[i]; fblink[OPP_DIR(i)] = QDP_create_M(); QDP_M_eq_sM(tmpmat, fblink[i], QDP_neighbor[i], QDP_backward, QDP_all); QDP_M_eq_Ma(fblink[OPP_DIR(i)], tmpmat, QDP_all); } for(i=0; i<6; i++) { Amu[i] = QDP_create_M(); } staples = QDP_create_M(); tmpmat1 = QDP_create_M(); tmpmat2 = QDP_create_M(); tmpmat3 = QDP_create_M(); tmpmat4 = QDP_create_M(); Plaq = coeffs->plaquette; Rect = coeffs->rectangle; Pgm = coeffs->parallelogram; //Construct 3-staples and rectangles for(mu=0; mu<4; mu++) { i=0; for(nu=0; nu<4; nu++) { if(nu!=mu){ // tmpmat1 = Umu(x+nu) QDP_M_eq_sM(tmpmat1, fblink[mu], QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_eq_M_times_M(Amu[i], fblink[nu], tmpmat1, QDP_all); //tmpmat2 = Umu(x-nu) QDP_M_eq_sM(tmpmat2, fblink[mu], QDP_neighbor[nu], QDP_backward, QDP_all); QDP_M_eq_M_times_M(Amu[i+3], fblink[OPP_DIR(nu)], tmpmat2, QDP_all); //tmpmat = U_{nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_Ma(staples, Amu[i], tmpmat, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Plaq, staples, QDP_all); //tmpmat = U_{-nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_Ma_times_M(tmpmat3, fblink[OPP_DIR(nu)], staples, QDP_all); QDP_M_eq_M_times_M(tmpmat4, tmpmat3, tmpmat, QDP_all); QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Rect, tmpmat, QDP_all); QDP_M_eq_Ma_times_M(tmpmat4, tmpmat2, tmpmat3, QDP_all); QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[mu], QDP_backward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat3, QDP_all); //tmpmat = U_{-nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_Ma(tmpmat3, tmpmat2, tmpmat, QDP_all); QDP_M_eq_M_times_Ma(tmpmat, tmpmat3, staples, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat3, QDP_all); //tmpmat = U_{-nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_Ma(staples, Amu[i+3], tmpmat, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Plaq, staples, QDP_all); QDP_M_eq_Ma_times_M(tmpmat3, fblink[nu], staples, QDP_all); QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_M(tmpmat4, tmpmat3, tmpmat, QDP_all); QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_backward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Rect, tmpmat, QDP_all); QDP_M_eq_Ma_times_M(tmpmat, tmpmat3, tmpmat1, QDP_all); QDP_M_eq_sM(tmpmat4, tmpmat, QDP_neighbor[mu], QDP_backward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat4, QDP_all); QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_M(tmpmat3, staples, tmpmat, QDP_all); QDP_M_eq_M_times_Ma(tmpmat4, tmpmat3, tmpmat1, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat4, QDP_all); i++; } } // Construct the pgm staples and add them to force QDP_M_eq_zero(staples, QDP_all); i=0; for(nu=0; nu<4; nu++){ if(nu!=mu){ k=0; for(sig=0; sig<4;sig ++){ if(sig!=mu && nu!=sig){ // the nu_sig_mu ... staple and 3 reflections //tmpmat = Amu["sig"](x+nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]], QDP_neighbor[nu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["sig"](x+nu) QDP_M_eq_M_times_M(tmpmat1, fblink[nu], tmpmat, QDP_all); //tmpmat3 = Unu(x+mu+sig) QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_forward, QDP_all); // HERE? //tmpmat2 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = Usig(x+mu) QDP_M_eq_sM(tmpmat, fblink[sig], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); //tmpmat = Amu["sig"](x-nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]], QDP_neighbor[nu], QDP_backward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["sig"](x-nu) QDP_M_eq_M_times_M(tmpmat1, fblink[OPP_DIR(nu)], tmpmat, QDP_all); //tmpmat3 = U_{-nu}(x+mu+sig) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_forward, QDP_all); // HERE? //tmpmat2 = U_{-nu}nu(x)*Amu["sig"](x-nu)*adj(Unu(x+mu+sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = Usig(x+mu) QDP_M_eq_sM(tmpmat, fblink[sig], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["sig"](x-nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); //tmpmat = Amu["-sig"](x-nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]+3], QDP_neighbor[nu], QDP_backward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["-sig"](x-nu) QDP_M_eq_M_times_M(tmpmat1, fblink[OPP_DIR(nu)], tmpmat, QDP_all); //tmpmat = U_{-nu}(x+mu-sig) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_backward, QDP_all); // HERE? //tmpmat2 = U_{-nu}nu(x)*Amu["-sig"](x-nu)*adj(Unu(x+mu-sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = U_{-sig}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(sig)], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["-sig"](x-nu)*adj(Unu(x+mu-sig))*adj(U_{-sig}(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); //tmpmat = Amu["-sig"](x+nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]+3], QDP_neighbor[nu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["-sig"](x+nu) QDP_M_eq_M_times_M(tmpmat1, fblink[nu], tmpmat, QDP_all); //tmpmat3 = Unu(x+mu-sig) QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_backward, QDP_all); // HERE? //tmpmat2 = Unu(x)*Amu["-sig"](x+nu)*adj(Unu(x+mu-sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = U_{-sig}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(sig)], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); k++; }//close if sig!=nu ... }//close sig loop i++; }// close if nu!=mu }//close the pgm nu loop QDP_M_peq_r_times_M(tempmom_qdp[mu], &Pgm, staples, QDP_all); }// closes the mu loop #ifdef CHKSUM QLA_ColorMatrix qcm; QLA_Complex det, chk; QLA_c_eq_r(chk, 0); #endif for(mu=0; mu<4; mu++){ QDP_M_eq_M_times_Ma(tmpmat, fblink[mu], tempmom_qdp[mu], QDP_all); // HERE? QDP_M_eq_r_times_M_plus_M( tempmom_qdp[mu], &eb3, tmpmat, force->force[mu], QDP_all);// HERE? QDP_M_eq_antiherm_M(force->force[mu], tempmom_qdp[mu], QDP_all);// HERE #ifdef CHKSUM QDP_m_eq_sum_M(&qcm, force->force[mu], QDP_all); QLA_C_eq_det_M(&det, &qcm); QLA_c_peq_c(chk, det); #endif } #ifdef CHKSUM QOP_printf0("chksum: %g %g\n", QLA_real(chk), QLA_imag(chk)); #endif //DESTROY various fields QDP_destroy_M(tmpmat); QDP_destroy_M(tmpmat1); QDP_destroy_M(tmpmat2); QDP_destroy_M(tmpmat3); QDP_destroy_M(staples); QDP_destroy_M(tmpmat4); for(mu=0; mu<4; mu++){ QDP_destroy_M(tempmom_qdp[mu]); } for(i=0; i<6; i++) { QDP_destroy_M(Amu[i]); } for(i=4; i<8; i++) { QDP_destroy_M(fblink[i]); } dtime += QOP_time(); double nflop = 96720; info->final_sec = dtime; info->final_flop = nflop*QDP_sites_on_node; info->status = QOP_SUCCESS; //QOP_printf0("Time in slow g_force: %e\n", info->final_sec); }
void RG_create_path(QDP_ColorMatrix *pr_wlink[RG_Ncn], QDP_ColorMatrix *link_qdp[RG_Nd], QDP_Sub_Block s,int len) { int i,j,k,t,x[4]; int count,c2,space_only; QDP_ColorMatrix *path_1[4]; QDP_ColorMatrix *path_2[12]; QDP_ColorMatrix *path_3[24]; QDP_ColorMatrix *path_4[24]; QDP_ColorMatrix *wlink[RG_Ncn]; QDP_Shift offset; shift_v *d1,*d2,*d3,*d4; QLA_Real c = 1.0; QLA_Real fact2 = 1.0/2.0; QLA_Real fact3 = 1.0/6.0; QLA_Real fact4 = 1.0/24.0; QLA_Complex unit; d1 = (shift_v *) malloc(4*sizeof(shift_v)); d2 = (shift_v *) malloc(12*sizeof(shift_v)); d3 = (shift_v *) malloc(24*sizeof(shift_v)); d4 = (shift_v *) malloc(24*sizeof(shift_v)); for (i = 0; i < RG_Ncn; i++) wlink[i] = QDP_create_M(); for (i = 0; i < 4; i++) path_1[i] = QDP_create_M(); for (i = 0; i < 4; i++) SQDP_M_eq_M(path_1[i],link_qdp[i],s); for (i = 0; i < 12; i++) path_2[i] = QDP_create_M(); for (i = 0; i < 24; i++) { path_3[i] = QDP_create_M(); path_4[i] = QDP_create_M(); } // printf("Start building paths %d\n",this_node); fflush(stdout); for (i = 0; i < RG_Nd; i++) { x[0] = i; d1[i] = create_shift(x,1,len); } // printf("First shift %d\n",this_node);fflush(stdout); count = 0; for (i = 0; i < RG_Nd; i++) { x[0] = i; c2 = find_count(d1,x,1); offset = QDP_create_shift(d1[c2].s); for (j = 0; j < RG_Nd ; j++) if ( j != i) { x[1] = j; d2[count] = create_shift(x,2,len); SQDP_M_eq_M_times_sM(path_2[count],path_1[c2],link_qdp[j],offset,QDP_forward,s); count ++; } QDP_destroy_shift(offset); } // printf("Second shift %d\n",this_node);fflush(stdout); count = 0; for (i = 0; i < RG_Nd; i++) for (j = 0; j < RG_Nd; j++) if (j != i) { x[0] = i; x[1] = j; c2 = find_count(d2,x,2); offset = QDP_create_shift(d2[c2].s); for (k = 0; k < RG_Nd; k++) if (k != i) if (k != j) { x[2] = k; d3[count] = create_shift(x,3,len); SQDP_M_eq_M_times_sM(path_3[count],path_2[c2],link_qdp[k],offset,QDP_forward,s); count++; } QDP_destroy_shift(offset); } // printf("Third shift %d\n",this_node);fflush(stdout); count = 0; for (i = 0; i < RG_Nd; i++) for (j = 0; j < RG_Nd; j++) if (j != i) for (k = 0; k < RG_Nd; k++) if (k != i) if (k != j) { x[0] = i; x[1] = j; x[2] = k; c2 = find_count(d3,x,3); offset = QDP_create_shift(d3[c2].s); for (t = 0; t < RG_Nd; t++) if (t != i) if (t != j) if (t != k) { x[3] = t; d4[count] = create_shift(x,4,len); SQDP_M_eq_M_times_sM(path_4[count],path_3[c2],link_qdp[t],offset,QDP_forward,s); count++; } QDP_destroy_shift(offset); } // printf("Fourth shift %d\n",this_node);fflush(stdout); QLA_C_eq_R(&unit,&c); SQDP_M_eq_c(wlink[0],&unit,s); for (i=1;i<5;i++) SQDP_M_eq_M(wlink[i],path_1[i-1],s); for (i=5;i<RG_Ncn;i++) { SQDP_M_eq_zero(wlink[i],s); for (j=0;j<12;j++) if(d2[j].rv == i) SQDP_M_peq_r_times_M(wlink[i],&fact2,path_2[j],s); for (j=0;j<24;j++) if(d3[j].rv == i) SQDP_M_peq_r_times_M(wlink[i],&fact3,path_3[j],s); for (j=0;j<24;j++) if(d4[j].rv == i) SQDP_M_peq_r_times_M(wlink[i],&fact4,path_3[j],s); } space_only = RG_Ncn; // printf("projection %d\n",this_node);fflush(stdout); project_qdp(wlink, pr_wlink,&space_only); for (i = 0; i < 4; i++) QDP_destroy_M(path_1[i]); for (i = 0; i < 12; i++) QDP_destroy_M(path_2[i]); for (i = 0; i < 24; i++) { QDP_destroy_M(path_3[i]); QDP_destroy_M(path_4[i]); } for (i = 0; i < RG_Ncn; i++) QDP_destroy_M(wlink[i]); free(d1); free(d2); free(d3); free(d4); return; }
void qopWilsonSolveMulti(Layout *l, real *x[], real *u[8], double masses[], real *y, int nmasses, double rsq, char *sub) { QDP_ColorMatrix *qu[4]; QDP_DiracFermion *out[nmasses], *in, **outp; outp = out; in = QDP_create_D(); unpackD(l, in, y); for(int i=0; i<nmasses; i++) { out[i] = QDP_create_D(); unpackD(l, out[i], x[i]); QDP_D_eq_zero(out[i], QDP_even); } for(int i=0; i<4; i++) { qu[i] = QDP_create_M(); unpackM(l, qu[i], u[2*i]); QLA_Real two = 2; QDP_M_eq_r_times_M(qu[i], &two, qu[i], QDP_all); } QOP_FermionLinksWilson *fla; fla = QOP_wilson_create_L_from_qdp(qu, NULL); #if 0 QOP_evenodd_t eo = QOP_EVENODD; if(sub[0]=='e') { eo = QOP_EVEN; } if(sub[0]=='o') { eo = QOP_ODD; } #endif QOP_evenodd_t eo = QOP_EVEN; QOP_info_t info = QOP_INFO_ZERO; QOP_invert_arg_t inv_arg = QOP_INVERT_ARG_DEFAULT; inv_arg.max_iter = 1000; inv_arg.restart = 500; inv_arg.max_restarts = 5; inv_arg.evenodd = eo; inv_arg.mixed_rsq = 0; QOP_resid_arg_t res_arg = QOP_RESID_ARG_DEFAULT; res_arg.rsqmin = rsq; QOP_resid_arg_t *ra[nmasses]; QOP_resid_arg_t **rap = ra; real mf[nmasses], *mfp; mfp = mf; for(int i=0; i<nmasses; i++) { ra[i] = &res_arg; mf[i] = masses[i]; } //QOP_verbose(3); QOP_wilson_invert_multi_qdp(&info, fla, &inv_arg, &rap, &mfp, &nmasses, &outp, &in, 1); //QLA_Real n2; //QDP_r_eq_norm2_D(&n2, (QDP_DiracFermion*)out, QDP_all); printf0("QOP its: %i\n", res_arg.final_iter); QDP_destroy_D(in); for(int i=0; i<nmasses; i++) { packD(l, x[i], out[i]); QDP_destroy_D(out[i]); } for(int i=0; i<4; i++) { QDP_destroy_M(qu[i]); } }
/* Smearing level 0 */ static void QOP_hisq_force_multi_smearing0_fnmat(QOP_info_t *info, REAL *residues, QDP_ColorVector *x[], int nterms, QDP_ColorMatrix *force_accum[4], QDP_ColorMatrix *force_accum_naik[4]) { int term; int i,k; int dir; REAL coeff; QDP_ColorMatrix *tmat; QDP_ColorMatrix *oprod_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mat_tmp0; QDP_ColorVector *tsrc[2], *vec_tmp[2]; size_t nflops = 0; if( nterms==0 )return; mat_tmp0 = QDP_create_M(); tmat = QDP_create_M(); tsrc[0] = QDP_create_V(); tsrc[1] = QDP_create_V(); vec_tmp[0] = QDP_create_V(); vec_tmp[1] = QDP_create_V(); for(i=0;i<=MAX_PATH_LENGTH;i++){ oprod_along_path[i] = QDP_create_M(); } // clear force accumulators for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum[dir], QDP_all); for(dir=XUP;dir<=TUP;dir++){ //AB loop on directions, path table is not needed k=0; // which vec_tmp we are using (0 or 1) QDP_V_eq_V(tsrc[k], x[0], QDP_all); QDP_V_eq_sV(vec_tmp[k], tsrc[k], fnshift(OPP_DIR(dir)), fndir(OPP_DIR(dir)), QDP_all); QDP_M_eq_zero(oprod_along_path[0], QDP_all); for(term=0;term<nterms;term++){ if(term<nterms-1) { QDP_V_eq_V(tsrc[1-k], x[term+1], QDP_all); QDP_V_eq_sV(vec_tmp[1-k], tsrc[1-k], fnshift(OPP_DIR(dir)), fndir(OPP_DIR(dir)), QDP_all); } //QDP_M_eq_V_times_Va(tmat, x[term], vec_tmp[k], QDP_all); QDP_M_eq_V_times_Va(tmat, tsrc[k], vec_tmp[k], QDP_all); nflops += 54; QDP_discard_V(vec_tmp[k]); QDP_M_peq_r_times_M(oprod_along_path[0], &residues[term], tmat, QDP_all); nflops += 36; k=1-k; // swap 0 and 1 } // end loop over terms in rational function expansion link_gather_connection_qdp(oprod_along_path[1], oprod_along_path[0], tmat, dir ); coeff = 1.; QDP_M_peq_r_times_M(force_accum[dir],&coeff,oprod_along_path[1],QDP_all); nflops += 36; } // end of loop on directions // // *** Naik part *** / // clear force accumulators for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum_naik[dir], QDP_all); for(dir=XUP;dir<=TUP;dir++){ //AB loop on directions, path table is not needed k=0; // which vec_tmp we are using (0 or 1) QDP_V_eq_V(tsrc[k], x[0], QDP_all); QDP_V_eq_sV(vec_tmp[k], tsrc[k], fnshift(OPP_3_DIR( DIR3(dir) )), fndir(OPP_3_DIR( DIR3(dir) )), QDP_all); QDP_M_eq_zero(oprod_along_path[0], QDP_all); for(term=0;term<nterms;term++){ if(term<nterms-1) { QDP_V_eq_V(tsrc[1-k], x[term+1], QDP_all); QDP_V_eq_sV(vec_tmp[1-k], tsrc[1-k], fnshift(OPP_3_DIR( DIR3(dir) )), fndir(OPP_3_DIR( DIR3(dir) )), QDP_all); } //QDP_M_eq_V_times_Va(tmat, x[term], vec_tmp[k], QDP_all); QDP_M_eq_V_times_Va(tmat, tsrc[k], vec_tmp[k], QDP_all); nflops += 54; QDP_discard_V(vec_tmp[k]); QDP_M_peq_r_times_M(oprod_along_path[0], &residues[term], tmat, QDP_all); nflops += 36; k=1-k; // swap 0 and 1 } // end loop over terms in rational function expansion link_gather_connection_qdp(oprod_along_path[1], oprod_along_path[0], tmat, DIR3(dir) ); coeff = 1; // fermion_eps is outside this routine in "wrapper" routine QDP_M_peq_r_times_M(force_accum_naik[dir],&coeff, oprod_along_path[1],QDP_all); nflops += 36; } // end of loop on directions QDP_destroy_V( tsrc[0] ); QDP_destroy_V( tsrc[1] ); QDP_destroy_V( vec_tmp[0] ); QDP_destroy_V( vec_tmp[1] ); QDP_destroy_M( mat_tmp0 ); QDP_destroy_M( tmat ); for(i=0;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( oprod_along_path[i] ); } info->final_flop = ((double)nflops)*QDP_sites_on_node; return; } //hisq_force_multi_smearing0_fnmat
void load_asqtad_links(int both, ferm_links_t *fn, ks_action_paths *ap) { su3_matrix **t_fl = &fn->fat; su3_matrix **t_ll = &fn->lng; Real *act_path_coeff = ap->act_path_coeff; QDP_ColorMatrix *fl[4]; QDP_ColorMatrix *ll[4]; QDP_ColorMatrix *gf[4]; int dir; double remaptime = -dclock(); char myname[] = "load_asqtad_links"; asqtad_path_coeff c; if( phases_in != 1){ node0_printf("%s: BOTCH: needs phases in\n",myname); terminate(1); } /* Create QDP fields for fat links, long links, and temp for gauge field */ FORALLUPDIR(dir){ fl[dir] = QDP_create_M(); ll[dir] = QDP_create_M(); gf[dir] = QDP_create_M(); } /* Map gauge links to QDP */ set4_M_from_site(gf, F_OFFSET(link), EVENANDODD); /* Load Asqtad path coefficients from table */ c.one_link = act_path_coeff[0]; c.naik = act_path_coeff[1]; c.three_staple = act_path_coeff[2]; c.five_staple = act_path_coeff[3]; c.seven_staple = act_path_coeff[4]; c.lepage = act_path_coeff[5]; /* Compute fat and long links as QDP fields */ remaptime += dclock(); create_fn_links_qdp(fl, ll, gf, &c); remaptime -= dclock(); /* Clean up */ FORALLUPDIR(dir){ QDP_destroy_M(gf[dir]); } /* Allocate space for t_fatlink if NULL */ if(*t_fl == NULL){ *t_fl = (su3_matrix *)special_alloc(sites_on_node*4*sizeof(su3_matrix)); if(*t_fl==NULL){ printf("%s(%d): no room for t_fatlink\n",myname,this_node); terminate(1); } } /* Allocate space for t_longlink if NULL and we are doing both fat and long */ if(*t_ll == NULL && both){ *t_ll = (su3_matrix *)special_alloc(sites_on_node*4*sizeof(su3_matrix)); if(*t_ll==NULL){ printf("%s(%d): no room for t_longlink\n",myname,this_node); terminate(1); } } /* Map QDP fields to MILC order */ set4_field_from_M(*t_fl, fl, EVENANDODD); if(both)set4_field_from_M(*t_ll, ll, EVENANDODD); /* Clean up */ FORALLUPDIR(dir){ QDP_destroy_M(fl[dir]); QDP_destroy_M(ll[dir]); } fn->valid = 1; remaptime += dclock(); #ifdef LLTIME #ifdef REMAP node0_printf("LLREMAP: time = %e\n",remaptime); #endif #endif }
void start(void) { double mf, best_mf; QLA_Real plaq; QDP_ColorMatrix **u; int i, bs, bsi, best_bs; u = (QDP_ColorMatrix **) malloc(ndim*sizeof(QDP_ColorMatrix *)); for(i=0; i<ndim; i++) u[i] = QDP_create_M(); get_random_links(u, ndim, 0.3); plaq = get_plaq(u); if(QDP_this_node==0) printf("plaquette = %g\n", plaq); QOP_layout_t qoplayout = QOP_LAYOUT_ZERO; qoplayout.latdim = ndim; qoplayout.latsize = (int *) malloc(ndim*sizeof(int)); for(i=0; i<ndim; i++) { qoplayout.latsize[i] = lattice_size[i]; } qoplayout.machdim = -1; if(QDP_this_node==0) { printf("begin init\n"); fflush(stdout); } QOP_init(&qoplayout); gauge = QOP_create_G_from_qdp(u); QOP_Force *force; QDP_ColorMatrix *cm[4]; for(i=0; i<4; i++) { cm[i] = QDP_create_M(); QDP_M_eq_zero(cm[i], QDP_all); } QOP_gauge_coeffs_t gcoeffs = QOP_GAUGE_COEFFS_ZERO; gcoeffs.plaquette = 0.2; gcoeffs.rectangle = 0.2; gcoeffs.parallelogram = 0.2; gcoeffs.adjoint_plaquette = 0.2; force = QOP_create_F_from_qdp(cm); mf = bench_action(&gcoeffs, force); QOP_destroy_F(force); printf0("action: sec%7.4f mflops = %g\n", secs, mf); if(QDP_this_node==0) { printf("begin force\n"); fflush(stdout); } best_mf = 0; best_bs = bsa[0]; for(bsi=0; bsi<bsn; bsi++) { bs = bsa[bsi]; QDP_set_block_size(bs); force = QOP_create_F_from_qdp(cm); mf = bench_force(&gcoeffs, force); QOP_destroy_F(force); printf0("GF: bs%5i sec%7.4f mflops = %g\n", bs, secs, mf); if(mf>best_mf) { best_mf = mf; best_bs = bs; } } QDP_set_block_size(best_bs); QDP_profcontrol(1); force = QOP_create_F_from_qdp(cm); mf = bench_force(&gcoeffs, force); QDP_profcontrol(0); printf0("prof: GF: bs%5i sec%7.4f mflops = %g\n", best_bs, secs, mf); printf0("best: GF: bs%5i mflops = %g\n", best_bs, best_mf); if(QDP_this_node==0) { printf("begin unload links\n"); fflush(stdout); } //QOP_asqtad_invert_unload_links(); if(QDP_this_node==0) { printf("begin finalize\n"); fflush(stdout); } QOP_finalize(); }
void start(void) { double mf, best_mf; QLA_Real plaq; QDP_ColorMatrix **u; QDP_DiracFermion *out, *in; int i, st, ns, nm, bs, sti, nsi, nmi, bsi, best_st, best_ns, best_nm, best_bs; u = (QDP_ColorMatrix **) malloc(ndim*sizeof(QDP_ColorMatrix *)); for(i=0; i<ndim; i++) u[i] = QDP_create_M(); get_random_links(u, ndim, 0.2); plaq = get_plaq(u); if(QDP_this_node==0) printf("plaquette = %g\n", plaq); out = QDP_create_D(); in = QDP_create_D(); QDP_D_eq_gaussian_S(in, rs, QDP_all); QOP_layout_t qoplayout = QOP_LAYOUT_ZERO; qoplayout.latdim = ndim; qoplayout.latsize = (int *) malloc(ndim*sizeof(int)); for(i=0; i<ndim; i++) { qoplayout.latsize[i] = lattice_size[i]; } qoplayout.machdim = -1; QOP_info_t info = QOP_INFO_ZERO; QOP_invert_arg_t inv_arg = QOP_INVERT_ARG_DEFAULT; QOP_resid_arg_t res_arg = QOP_RESID_ARG_DEFAULT; res_arg.rsqmin = rsqmin; inv_arg.max_iter = 600; inv_arg.restart = 200; inv_arg.evenodd = QOP_EVEN; if(QDP_this_node==0) { printf("begin init\n"); fflush(stdout); } QOP_init(&qoplayout); if(QDP_this_node==0) { printf("begin load links\n"); fflush(stdout); } //flw = QOP_wilson_create_L_from_qdp(u, NULL); if(QDP_this_node==0) { printf("begin invert\n"); fflush(stdout); } if(cgtype>=0) { QOP_opt_t optcg; optcg.tag = "cg"; optcg.value = cgtype; QOP_wilson_invert_set_opts(&optcg, 1); } best_mf = 0; best_st = sta[0]; best_ns = nsa[0]; best_nm = nma[0]; best_bs = bsa[0]; QOP_opt_t optst; optst.tag = "st"; QOP_opt_t optns; optns.tag = "ns"; QOP_opt_t optnm; optnm.tag = "nm"; for(sti=0; sti<stn; sti++) { if((style>=0)&&(sti!=style)) continue; st = sta[sti]; optst.value = st; if(QOP_wilson_invert_set_opts(&optst, 1)==QOP_FAIL) continue; for(nsi=0; nsi<nsn; nsi++) { ns = nsa[nsi]; optns.value = ns; if(QOP_wilson_invert_set_opts(&optns, 1)==QOP_FAIL) continue; for(nmi=0; nmi<nmn; nmi++) { nm = nma[nmi]; if(nm==0) nm = ns; optnm.value = nm; if(QOP_wilson_invert_set_opts(&optnm, 1)==QOP_FAIL) continue; for(bsi=0; bsi<bsn; bsi++) { bs = bsa[bsi]; QDP_set_block_size(bs); flw = QOP_wilson_create_L_from_qdp(u, NULL); mf = bench_inv(&info, &inv_arg, &res_arg, out, in); QOP_wilson_destroy_L(flw); printf0("CONGRAD: st%2i ns%2i nm%2i bs%5i iter%5i sec%7.4f mflops = %g\n", st, ns, nm, bs, res_arg.final_iter, info.final_sec, mf); if(mf>best_mf) { best_mf = mf; best_st = st; best_ns = ns; best_nm = nm; best_bs = bs; } } } } } flw = QOP_wilson_create_L_from_qdp(u, NULL); optst.value = best_st; optns.value = best_ns; optnm.value = best_nm; QOP_wilson_invert_set_opts(&optst, 1); QOP_wilson_invert_set_opts(&optns, 1); QOP_wilson_invert_set_opts(&optnm, 1); QDP_set_block_size(best_bs); QDP_profcontrol(1); mf = bench_inv(&info, &inv_arg, &res_arg, out, in); QDP_profcontrol(0); printf0("prof: CONGRAD: st%2i ns%2i nm%2i bs%5i iter%5i sec%7.4f mflops = %g\n", best_st, best_ns, best_nm, best_bs, res_arg.final_iter, info.final_sec, mf); printf0("best: CONGRAD: st%2i ns%2i nm%2i bs%5i mflops = %g\n", best_st, best_ns, best_nm, best_bs, best_mf); if(QDP_this_node==0) { printf("begin unload links\n"); fflush(stdout); } //QOP_wilson_invert_unload_links(); if(QDP_this_node==0) { printf("begin finalize\n"); fflush(stdout); } QOP_finalize(); }
void QOP_hisq_deriv_multi_fnmat2_qdp(QOP_info_t *info, QOP_FermionLinksHisq *flh, QDP_ColorMatrix *deriv[], QOP_hisq_coeffs_t *hisq_coeff, REAL *residues, QDP_ColorVector *x[], int *n_orders_naik) { #define NC QDP_get_nc(deriv[0]) if(!QOP_asqtad.inited) QOP_asqtad_invert_init(); double dtime = QDP_time(); double totalflops = 0; int siteflops = 0; QOP_info_t tinfo; QDP_ColorMatrix *Ugf[4], *Vgf[4], *Wgf[4]; for(int i=0; i<4; i++) { Ugf[i] = flh->U_links[i]; Vgf[i] = flh->V_links[i]; Wgf[i] = flh->W_unitlinks[i]; } QDP_ColorMatrix *force_accum_0[4]; QDP_ColorMatrix *force_accum_0_naik[4]; QDP_ColorMatrix *force_accum_1[4]; QDP_ColorMatrix *force_accum_1u[4]; QDP_ColorMatrix *force_accum_2[4]; QDP_ColorMatrix *force_final[4]; QDP_ColorMatrix *tmat = QDP_create_M(); for(int i=0; i<4; i++) { force_accum_0[i] = QDP_create_M(); force_accum_0_naik[i] = QDP_create_M(); force_accum_1[i] = QDP_create_M(); force_accum_1u[i] = QDP_create_M(); force_accum_2[i] = QDP_create_M(); force_final[i] = QDP_create_M(); QDP_M_eq_zero(force_accum_2[i], QDP_all); } int n_naiks = hisq_coeff->n_naiks; int nterms = 0; for(int inaik = 0; inaik < n_naiks; inaik++) nterms += n_orders_naik[inaik]; // loop on different naik masses int n_naik_shift = 0; for(int inaik=0; inaik<n_naiks; inaik++) { int n_orders_naik_current; if( inaik==0 ) { n_orders_naik_current = nterms; } else { n_orders_naik_current = n_orders_naik[inaik]; } QOP_get_mid(&tinfo, force_accum_0, QDP_neighbor, 4, residues+n_naik_shift, 1, x+n_naik_shift, n_orders_naik_current); totalflops += tinfo.final_flop; QOP_get_mid(&tinfo, force_accum_0_naik, QOP_common.neighbor3, 4, residues+n_naik_shift, 1, x+n_naik_shift, n_orders_naik_current); totalflops += tinfo.final_flop; // compensate for -1 on odd sites here instead of at end for(int dir=0; dir<4; dir++) { QDP_M_eqm_M(force_accum_0[dir], force_accum_0[dir], QDP_odd); QDP_M_eqm_M(force_accum_0_naik[dir], force_accum_0_naik[dir], QDP_odd); } // smearing level 0 for(int i=0; i<4; i++) QDP_M_eq_zero(force_accum_1[i], QDP_all); if(inaik==0) { QOP_asqtad_coeffs_t acoef; acoef.one_link = hisq_coeff->asqtad_one_link; acoef.three_staple = hisq_coeff->asqtad_three_staple; acoef.five_staple = hisq_coeff->asqtad_five_staple; acoef.seven_staple = hisq_coeff->asqtad_seven_staple; acoef.lepage = hisq_coeff->asqtad_lepage; acoef.naik = hisq_coeff->asqtad_naik; QOP_asqtad_deriv(&tinfo, Wgf, force_accum_1, &acoef, force_accum_0, force_accum_0_naik); //QOP_printf0("HISQ smear0 flops = %g\n", tinfo.final_flop); totalflops += tinfo.final_flop; } else { QOP_asqtad_coeffs_t acoef; acoef.one_link = hisq_coeff->difference_one_link; acoef.three_staple = 0; acoef.five_staple = 0; acoef.seven_staple = 0; acoef.lepage = 0; acoef.naik = hisq_coeff->difference_naik; QOP_asqtad_deriv(&tinfo, Wgf, force_accum_1, &acoef, force_accum_0, force_accum_0_naik); totalflops += tinfo.final_flop; } QLA_Real coeff_mult; if( inaik==0 ) { coeff_mult = 1.0; } else { coeff_mult = hisq_coeff->eps_naik[inaik]; } for(int dir=0; dir<4; dir++) { QDP_M_peq_r_times_M(force_accum_2[dir], &coeff_mult, force_accum_1[dir], QDP_all); } siteflops += 4*36; n_naik_shift += n_orders_naik[inaik]; } // smearing level 1 QOP_asqtad_coeffs_t acoef; acoef.one_link = hisq_coeff->fat7_one_link; acoef.three_staple = hisq_coeff->fat7_three_staple; acoef.five_staple = hisq_coeff->fat7_five_staple; acoef.seven_staple = hisq_coeff->fat7_seven_staple; acoef.lepage = 0; acoef.naik = 0; if(QOP_hisq_links.use_fat7_lepage) { acoef.lepage = hisq_coeff->fat7_lepage; } QOP_hisq_unitarize_method_t umethod = hisq_coeff->umethod; if ( umethod==QOP_UNITARIZE_NONE ){ for(int dir=0; dir<4; dir++) QDP_M_eq_zero(force_accum_1[dir], QDP_all); QOP_asqtad_deriv(&tinfo, Ugf, force_accum_1, &acoef, force_accum_2, NULL); totalflops += tinfo.final_flop; } else if ( umethod==QOP_UNITARIZE_RATIONAL ) { for(int mu=0; mu<4; mu++) QDP_M_eq_Ma(force_accum_1u[mu], force_accum_2[mu], QDP_all); // reunitarization #if QOP_Colors == 3 QOP_hisq_force_multi_reunit(&tinfo, Vgf, force_accum_2, force_accum_1u); #else for(int mu=0; mu<4; mu++) { QOP_projectU_deriv_qdp(&tinfo, force_accum_2[mu], Wgf[mu], Vgf[mu], force_accum_1u[mu]); } #endif //QOP_printf0("reunit flops = %g\n", tinfo.final_flop); for(int mu=0; mu<4; mu++) QDP_M_eq_Ma(force_accum_1u[mu], force_accum_2[mu], QDP_all); totalflops += tinfo.final_flop; for(int dir=0; dir<4; dir++) QDP_M_eq_zero(force_accum_1[dir], QDP_all); QOP_asqtad_deriv(&tinfo, Ugf, force_accum_1, &acoef, force_accum_1u, NULL); //QOP_printf0("HISQ smear1 flops = %g\n", tinfo.final_flop); totalflops += tinfo.final_flop; } else { QOP_printf0("Unknown or unsupported unitarization method\n"); exit(1); } // take into account even/odd parity (it is NOT done in "smearing" routine) // eps multiplication done outside QOP // extra factor of 2 for(int dir=0; dir<4; dir++) { QLA_Real treal = 2; //QDP_M_peq_r_times_M(deriv[dir], &treal, force_accum_1[dir], QDP_even); //QDP_M_meq_r_times_M(deriv[dir], &treal, force_accum_1[dir], QDP_odd); QDP_M_peq_r_times_M(deriv[dir], &treal, force_accum_1[dir], QDP_all); } siteflops += 4*36; for(int i=0; i<4; i++) { QDP_destroy_M( force_accum_0[i] ); QDP_destroy_M( force_accum_0_naik[i] ); QDP_destroy_M( force_accum_1[i] ); QDP_destroy_M( force_accum_1u[i] ); QDP_destroy_M( force_accum_2[i] ); QDP_destroy_M( force_final[i] ); } QDP_destroy_M( tmat ); totalflops += ((double)siteflops)*QDP_sites_on_node; info->final_sec = QDP_time() - dtime; info->final_flop = totalflops; info->status = QOP_SUCCESS; #undef NC }
/* Smearing level i*/ static void QOP_hisq_force_multi_smearing_fnmat(QOP_info_t *info, QDP_ColorMatrix * gf[4], REAL *residues, QDP_ColorVector *x[], int nterms, QDP_ColorMatrix *force_accum[4], QDP_ColorMatrix *force_accum_old[4], QDP_ColorMatrix *force_accum_naik_old[4], int internal_num_q_paths, Q_path *internal_q_paths_sorted, int *internal_netbackdir_table) { int i,j,k,lastdir=-99,ipath,ilink; int length,dir,odir; REAL coeff; QDP_ColorMatrix *tmat; QDP_ColorMatrix *oprod_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mats_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mat_tmp0,*mat_tmp1, *stmp[8];; QDP_ColorVector *vec_tmp[2]; int netbackdir; size_t nflops = 0; // table of net path displacements (backwards from usual convention) Q_path *this_path; // pointer to current path /* Allocate fields */ for(i=0;i<=MAX_PATH_LENGTH;i++){ oprod_along_path[i] = QDP_create_M(); } for(i=1;i<=MAX_PATH_LENGTH;i++){ // 0 element is never used (it's unit matrix) mats_along_path[i] = QDP_create_M(); } mat_tmp0 = QDP_create_M(); mat_tmp1 = QDP_create_M(); for(i=0; i<8; i++) stmp[i] = QDP_create_M(); tmat = QDP_create_M(); vec_tmp[0] = QDP_create_V(); vec_tmp[1] = QDP_create_V(); // clear force accumulators for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum[dir], QDP_all); // loop over paths, and loop over links in path for( ipath=0; ipath<internal_num_q_paths; ipath++ ){ this_path = &(internal_q_paths_sorted[ipath]); if(this_path->forwback== -1)continue; // skip backwards dslash length = this_path->length; netbackdir = internal_netbackdir_table[ipath]; // move f(i-1) force from current site in positive direction, // this corresponds to outer product |X><Y| calculated at the endpoint of the path if( netbackdir<8) { // Not a Naik path link_gather_connection_qdp(oprod_along_path[0] , force_accum_old[OPP_DIR(netbackdir)], tmat, netbackdir ); } else { // Naik path if( NULL==force_accum_naik_old ) { QOP_printf0( "hisq_force_multi_smearing_fnmat: mismatch:\n" ); QOP_printf0( "force_accum_naik_old is NULL, but path table contains Naik paths(!)\n" ); exit(0); } // CONVERSION FROM 3-LINK DIRECTION TO 1-LINK DIRECTION link_gather_connection_qdp(oprod_along_path[0] , force_accum_naik_old[OPP_DIR(netbackdir-8)], tmat, netbackdir ); } // figure out how much of the outer products along the path must be // recomputed. j is last one needing recomputation. k is first one. j=length-1; // default is recompute all if( GOES_BACKWARDS(this_path->dir[0]) ) k=1; else k=0; for(ilink=j;ilink>=k;ilink--){ link_transport_connection_qdp( oprod_along_path[length-ilink], oprod_along_path[length-ilink-1], gf, mat_tmp0, stmp, this_path->dir[ilink] ); nflops += 198; } // maintain an array of transports "to this point" along the path. // Don't recompute beginning parts of path if same as last path ilink=0; // first link where new transport is needed // Sometimes we don't need the matrix for the last link if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length; for( ; ilink<k; ilink++ ){ if( ilink==0 ){ dir = this_path->dir[0]; if( GOES_FORWARDS(dir) ){ QDP_M_eq_sM(tmat, gf[dir], QDP_neighbor[dir], QDP_backward, QDP_all); QDP_M_eq_Ma(mats_along_path[1], tmat, QDP_all); QDP_discard_M(tmat); } else{ QDP_M_eq_M(mats_along_path[1], gf[OPP_DIR(dir)], QDP_all); } } else { // ilink != 0 dir = OPP_DIR(this_path->dir[ilink]); link_transport_connection_qdp( mats_along_path[ilink+1], mats_along_path[ilink], gf, mat_tmp0, stmp, dir ); nflops += 198; } } // end loop over links // A path has (length+1) points, counting the ends. At first // point, no "down" direction links have their momenta "at this // point". At last, no "up" ... if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length; for( ilink=0; ilink<=k; ilink++ ){ if(ilink<length)dir = this_path->dir[ilink]; else dir=NODIR; coeff = this_path->coeff; if( (ilink%2)==1 )coeff = -coeff; // add in contribution to the force if( ilink<length && GOES_FORWARDS(dir) ){ link_gather_connection_qdp(mat_tmp1, oprod_along_path[length-ilink-1], tmat, dir ); if(ilink==0) { QDP_M_eq_M(mat_tmp0,mat_tmp1,QDP_all); } else { QDP_M_eq_M_times_Ma(mat_tmp0, mats_along_path[ilink], mat_tmp1, QDP_all); nflops += 198; QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all); } QDP_M_peq_r_times_M(force_accum[dir],&coeff,mat_tmp1,QDP_all); nflops += 36; } if( ilink>0 && GOES_BACKWARDS(lastdir) ){ odir = OPP_DIR(lastdir); if( ilink==1 ){ QDP_M_eq_M(mat_tmp0,oprod_along_path[length-ilink],QDP_all); QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all); } else{ link_gather_connection_qdp(mat_tmp1, mats_along_path[ilink-1], tmat, odir ); QDP_M_eq_M_times_Ma(mat_tmp0, oprod_along_path[length-ilink], mat_tmp1, QDP_all); nflops += 198; QDP_M_eq_Ma(mat_tmp1, mat_tmp0, QDP_all); } QDP_M_peq_r_times_M(force_accum[odir],&coeff,mat_tmp1,QDP_all); nflops += 36; } lastdir = dir; } // end loop over links in path // } // end loop over paths // QDP_destroy_V( vec_tmp[0] ); QDP_destroy_V( vec_tmp[1] ); QDP_destroy_M( mat_tmp0 ); QDP_destroy_M( mat_tmp1 ); QDP_destroy_M( tmat ); for(i=0; i<8; i++) QDP_destroy_M(stmp[i]); for(i=0;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( oprod_along_path[i] ); } for(i=1;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( mats_along_path[i] ); } info->final_flop = ((double)nflops)*QDP_sites_on_node; return; }//hisq_force_multi_smearing_fnmat
int congrad_w(int niter, Real rsqmin, Real *final_rsq_ptr) { int i; int iteration; /* counter for iterations */ double source_norm; double rsqstop; QLA_Real a, b; double rsq,oldrsq,pkp; /* Sugar's a,b,resid**2,previous resid*2 */ /* pkp = cg_p.K.cg_p */ QLA_Real mkappa; QLA_Real sum; #ifdef CGTIME double dtime; #endif #ifdef LU mkappa = -kappa*kappa; #else mkappa = -kappa; #endif setup_cg(); for(i=0; i<4; i++) { set_M_from_site(gaugelink[i], F_OFFSET(link[i]),EVENANDODD); } set_D_from_site(psi, F_OFFSET(psi),EVENANDODD); set_D_from_site(chi, F_OFFSET(chi),EVENANDODD); #ifdef PRESHIFT_LINKS { QDP_ColorMatrix *tcm; tcm = QDP_create_M(); for(i=0; i<4; i++) { QDP_M_eq_sM(tcm, gaugelink[i], QDP_neighbor[i], QDP_backward, QDP_all); QDP_M_eq_Ma(gaugelink[i+4], tcm, QDP_all); } QDP_destroy_M(tcm); } #endif #ifdef CGTIME dtime = -dclock(); #endif iteration=0; start: /* mp <- M_adjoint*M*psi r,p <- chi - mp rsq = |r|^2 source_norm = |chi|^2 */ rsq = source_norm = 0.0; #ifdef LU QDP_D_eq_D(cgp, psi, QDP_even); dslash_special_qdp(tt1, cgp, 1, QDP_odd, temp1); dslash_special_qdp(ttt, tt1, 1, QDP_even, temp2); QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_even); dslash_special_qdp(tt2, ttt, -1, QDP_odd, temp3); dslash_special_qdp(mp, tt2, -1, QDP_even, temp4); QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_even); QDP_D_eq_D_minus_D(cgr, chi, mp, QDP_even); QDP_D_eq_D(cgp, cgr, QDP_even); QDP_r_eq_norm2_D(&sum, chi, QDP_even); source_norm = sum; QDP_r_eq_norm2_D(&sum, cgr, QDP_even); rsq = sum; #else QDP_D_eq_D(cgp, psi, QDP_even); dslash_special_qdp(ttt, cgp, 1, QDP_all, temp1); QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_all); dslash_special_qdp(mp, ttt, -1, QDP_all, temp1); QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_all); QDP_D_eq_D_minus_D(cgr, chi, mp, QDP_all); QDP_D_eq_D(cgp, cgr, QDP_all); QDP_r_eq_norm2_D(&sum, chi, QDP_all); source_norm = sum; QDP_r_eq_norm2_D(&sum, cgr, QDP_all); rsq = sum; #endif iteration++ ; /* iteration counts number of multiplications by M_adjoint*M */ total_iters++; /**if(this_node==0)printf("congrad2: source_norm = %e\n",source_norm); if(this_node==0)printf("congrad2: iter %d, rsq %e, pkp %e, a %e\n", iteration,(double)rsq,(double)pkp,(double)a );**/ rsqstop = rsqmin * source_norm; if( rsq <= rsqstop ){ *final_rsq_ptr= (Real)rsq; return (iteration); } /* main loop - do until convergence or time to restart */ /* oldrsq <- rsq mp <- M_adjoint*M*p pkp <- p.M_adjoint*M.p a <- rsq/pkp psi <- psi + a*p r <- r - a*mp rsq <- |r|^2 b <- rsq/oldrsq p <- r + b*p */ do { oldrsq = rsq; #ifdef LU dslash_special_qdp(tt1, cgp, 1, QDP_odd, temp1); dslash_special_qdp(ttt, tt1, 1, QDP_even, temp2); QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_even); dslash_special_qdp(tt2, ttt, -1, QDP_odd, temp3); dslash_special_qdp(mp, tt2, -1, QDP_even, temp4); QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_even); QDP_r_eq_re_D_dot_D(&sum, cgp, mp, QDP_even); pkp = sum; #else dslash_special_qdp(ttt, cgp, 1, QDP_all, temp1); QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_all); dslash_special_qdp(mp, ttt, -1, QDP_all, temp1); QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_all); QDP_r_eq_re_D_dot_D(&sum, cgp, mp, QDP_all); pkp = sum; #endif iteration++; total_iters++; a = rsq / pkp; QDP_D_peq_r_times_D(psi, &a, cgp, MYSUBSET); QDP_D_meq_r_times_D(cgr, &a, mp, MYSUBSET); QDP_r_eq_norm2_D(&sum, cgr, MYSUBSET); rsq = sum; /**if(this_node==0)printf("congrad2: iter %d, rsq %e, pkp %e, a %e\n", iteration,(double)rsq,(double)pkp,(double)a );**/ if( rsq <= rsqstop ){ *final_rsq_ptr= (Real)rsq; #ifdef CGTIME dtime += dclock(); if(this_node==0) printf("CONGRAD2: time = %.2e size_r= %.2e iters= %d MF = %.1f\n", dtime,rsq,iteration, (double)6480*iteration*even_sites_on_node/(dtime*1e6)); //(double)5616*iteration*even_sites_on_node/(dtime*1e6)); #endif set_site_from_D(F_OFFSET(psi), psi,EVENANDODD); return (iteration); } b = rsq / oldrsq; QDP_D_eq_r_times_D_plus_D(cgp, &b, cgp, cgr, MYSUBSET); } while( iteration%niter != 0); set_site_from_D(F_OFFSET(psi), psi,EVENANDODD); if( iteration < 3*niter ) goto start; *final_rsq_ptr= (Real)rsq; return(iteration); }
// topdir = 1..nd // sidedir = -nd..nd // toplinknum,sidelinknum = 0..nin-1 void QOP_staples_deriv(QOP_info_t *info, int nout, int nin, QDP_ColorMatrix *deriv[], QDP_ColorMatrix *chain[], QDP_ColorMatrix *in[], int nstaples[], int *topdir[], int *sidedir[], int *toplinknum[], int *sidelinknum[], QLA_Real *coef[]) { #define NC QDP_get_nc(in[0]) double dtime = QOP_time(); double nflops = 0; int nd = QDP_ndim(); QDP_ColorMatrix *ftmps[nin][nd], *t1, *t2, *t3, *t4, *tc, *bt2[nd], *bt3[nd], *ctmps[nd]; int ctn[nd]; for(int i=0; i<nin; i++) for(int j=0; j<nd; j++) ftmps[i][j] = NULL; for(int i=0; i<nd; i++) bt2[i] = bt3[i] = ctmps[i] = NULL; t1 = QDP_create_M(); t2 = QDP_create_M(); t3 = QDP_create_M(); t4 = QDP_create_M(); tc = QDP_create_M(); // process in reverse in case calculated staples used as input for others for(int io=nout-1; io>=0; io--) { for(int i=0; i<nd; i++) { if(ctmps[i]) QDP_discard_M(ctmps[i]); ctn[i] = 0; } QDP_M_eq_M(tc, chain[io], QDP_all); for(int s=0; s<nstaples[io]; s++) { QLA_Real c = coef[io][s]; int tn = toplinknum[io][s]; int sdir = sidedir[io][s]; //QOP_printf0("io: %i s: %i sdir: %i tn: %i c: %g\n", io, s, sdir, tn, c); if(sdir==0) { if(c==1) { QDP_M_peq_M(deriv[tn], tc, QDP_all); nflops += PEQM; } else { QDP_M_peq_r_times_M(deriv[tn], &c, tc, QDP_all); nflops += 2*PEQM; } } else if(sdir>0) { int nu = sdir-1; int mu = topdir[io][s]-1; int sn = sidelinknum[io][s]; //QOP_printf0(" mu: %i nu: %i sn: %i\n", mu, nu, sn); QDP_ColorMatrix *Umunu = getU(tn, mu, nu); QDP_ColorMatrix *Unumu = getU(sn, nu, mu); QDP_M_eq_M_times_M(t1, in[sn], Umunu, QDP_all); QDP_M_eq_Ma_times_M(t2, tc, t1, QDP_all); QDP_ColorMatrix *tb2 = shiftb(t2, mu); QDP_M_eq_M_times_M(t1, tc, Unumu, QDP_all); QDP_M_eq_Ma_times_M(t3, in[sn], t1, QDP_all); QDP_ColorMatrix *tb3 = shiftb(t3, nu); if(c==1) { QDP_M_peq_M_times_Ma(deriv[sn], t1, Umunu, QDP_all); QDP_M_peq_M(deriv[sn], tb2, QDP_all); QDP_M_peq_M(deriv[tn], tb3, QDP_all); nflops += 4*EQMTM+PEQMTM+2*PEQM; } else { QDP_M_eq_M_times_Ma(t4, t1, Umunu, QDP_all); QDP_M_peq_r_times_M(deriv[sn], &c, t4, QDP_all); QDP_M_peq_r_times_M(deriv[sn], &c, tb2, QDP_all); QDP_M_peq_r_times_M(deriv[tn], &c, tb3, QDP_all); nflops += 5*EQMTM+6*PEQM; } QDP_discard_M(tb2); QDP_discard_M(tb3); } else { int nu = -sdir-1; int mu = topdir[io][s]-1; int sn = sidelinknum[io][s]; QDP_ColorMatrix *Cmunu = getC(nu); QDP_ColorMatrix *Unumu = getU(sn, nu, mu); QDP_M_eq_M_times_M(t1, in[sn], Cmunu, QDP_all); QDP_M_eq_Ma_times_M(t2, in[tn], t1, QDP_all); QDP_ColorMatrix *tb2 = shiftb(t2, mu); QDP_M_eq_M_times_M(t3, in[tn], Unumu, QDP_all); if(c==1) { QDP_M_peq_M_times_Ma(deriv[tn], t1, Unumu, QDP_all); QDP_M_peq_M_times_Ma(deriv[sn], t3, Cmunu, QDP_all); QDP_M_peq_M(deriv[sn], tb2, QDP_all); nflops += 3*EQMTM+2*PEQMTM+PEQM; } else { QDP_M_eq_M_times_Ma(t4, t1, Unumu, QDP_all); QDP_M_peq_r_times_M(deriv[tn], &c, t4, QDP_all); QDP_M_eq_M_times_Ma(t4, t3, Cmunu, QDP_all); QDP_M_peq_r_times_M(deriv[sn], &c, t4, QDP_all); QDP_M_peq_r_times_M(deriv[sn], &c, tb2, QDP_all); nflops += 5*EQMTM+6*PEQM; } QDP_discard_M(tb2); } } } for(int i=0; i<nin; i++) for(int j=0; j<nd; j++) if(ftmps[i][j]!=NULL) QDP_destroy_M(ftmps[i][j]); for(int i=0; i<nd; i++) { if(bt2[i]!=NULL) QDP_destroy_M(bt2[i]); if(bt3[i]!=NULL) QDP_destroy_M(bt3[i]); if(ctmps[i]!=NULL) QDP_destroy_M(ctmps[i]); } QDP_destroy_M(t1); QDP_destroy_M(t2); QDP_destroy_M(t3); QDP_destroy_M(t4); QDP_destroy_M(tc); info->final_sec = QOP_time() - dtime; info->final_flop = nflops*QDP_sites_on_node; info->status = QOP_SUCCESS; #undef NC }
// topdir = 1..nd // sidedir = -nd..nd // toplinknum,sidelinknum = 0..nin-1 void QOP_staples(QOP_info_t *info, int nout, int nin, QDP_ColorMatrix *out[], QDP_ColorMatrix *in[], int nstaples[], int *topdir[], int *sidedir[], int *toplinknum[], int *sidelinknum[], QLA_Real *coef[]) { #define NC QDP_get_nc(in[0]) double dtime = QOP_time(); double nflops = 0; int nd = QDP_ndim(); QDP_ColorMatrix *ftmps[nin][nd], *t1, *t2, *bt2[nd]; for(int i=0; i<nin; i++) for(int j=0; j<nd; j++) ftmps[i][j] = NULL; for(int i=0; i<nd; i++) bt2[i] = NULL; t1 = QDP_create_M(); t2 = QDP_create_M(); for(int io=0; io<nout; io++) { //QOP_printf0("%i: ns: %i\n", io, nstaples[io]); for(int s=0; s<nstaples[io]; s++) { QLA_Real c = coef[io][s]; int tn = toplinknum[io][s]; int sdir = sidedir[io][s]; //QOP_printf0(" %i: sdir: %i c: %g\n", s, sdir, c); if(sdir==0) { if(c==1) { QDP_M_peq_M(out[io], in[tn], QDP_all); nflops += PEQM; } else { QDP_M_peq_r_times_M(out[io], &c, in[tn], QDP_all); nflops += 2*PEQM; } } else if(sdir>0) { int nu = sdir-1; int mu = topdir[io][s]-1; int sn = sidelinknum[io][s]; QDP_ColorMatrix *Umunu = getU(tn, mu, nu); QDP_ColorMatrix *Unumu = getU(sn, nu, mu); QDP_M_eq_M_times_M(t1, in[sn], Umunu, QDP_all); if(c==1) { QDP_M_peq_M_times_Ma(out[io], t1, Unumu, QDP_all); nflops += EQMTM+PEQMTM; } else { QDP_M_eq_M_times_Ma(t2, t1, Unumu, QDP_all); QDP_M_peq_r_times_M(out[io], &c, t2, QDP_all); nflops += 2*EQMTM+2*PEQM; } } else { int nu = -sdir-1; int mu = topdir[io][s]-1; int sn = sidelinknum[io][s]; QDP_ColorMatrix *Unumu = getU(sn, nu, mu); QDP_M_eq_M_times_M(t1, in[tn], Unumu, QDP_all); QDP_M_eq_Ma_times_M(t2, in[sn], t1, QDP_all); QDP_ColorMatrix *tb = shiftb(t2, nu); if(c==1) { QDP_M_peq_M(out[io], tb, QDP_all); nflops += 2*EQMTM+PEQM; } else { QDP_M_peq_r_times_M(out[io], &c, tb, QDP_all); nflops += 2*EQMTM+2*PEQM; } QDP_discard_M(tb); } } } for(int i=0; i<nin; i++) for(int j=0; j<nd; j++) if(ftmps[i][j]!=NULL) QDP_destroy_M(ftmps[i][j]); for(int i=0; i<nd; i++) if(bt2[i]!=NULL) QDP_destroy_M(bt2[i]); QDP_destroy_M(t1); QDP_destroy_M(t2); info->final_sec = QOP_time() - dtime; info->final_flop = nflops*QDP_sites_on_node; info->status = QOP_SUCCESS; #undef NC }
static void create_fn_links_qdp(QDP_ColorMatrix *fl[], QDP_ColorMatrix *ll[], QDP_ColorMatrix *gf[], asqtad_path_coeff *coeffs) { int i, dir; QDP_ColorMatrix *staple, *tempmat1; int nu,rho,sig ; QLA_Real one_link; #ifdef LLTIME double nflopfl = 61632; double nflopll = 1804; #endif double dtimefl,dtimell; for(i=0; i<4; i++) { fl[i] = QDP_create_M(); ll[i] = QDP_create_M(); } staple = QDP_create_M(); tempmat1 = QDP_create_M(); dtimefl = -dclock(); /* to fix up the Lepage term, included by a trick below */ one_link = coeffs->one_link - 6.0*coeffs->lepage; for(dir=0; dir<4; dir++) { QDP_M_eq_r_times_M(fl[dir], &one_link, gf[dir], QDP_all); for(nu=0; nu<4; nu++) if(nu!=dir) { compute_gen_staple(staple, dir, nu, gf[dir], (double)coeffs->three_staple, gf, fl); compute_gen_staple(NULL, dir, nu, staple, coeffs->lepage, gf, fl); for(rho=0; rho<4; rho++) if((rho!=dir)&&(rho!=nu)) { compute_gen_staple(tempmat1, dir, rho, staple, (double)coeffs->five_staple, gf, fl); for(sig=0; sig<4; sig++) { if((sig!=dir)&&(sig!=nu)&&(sig!=rho)) { compute_gen_staple(NULL, dir, sig, tempmat1, (double)coeffs->seven_staple, gf, fl); } } /* sig */ } /* rho */ } /* nu */ } /* dir */ dtimell = -dclock(); dtimefl -= dtimell; #ifdef LLTIME node0_printf("LLTIME(Fat): time = %e (Asqtad opt) mflops = %e\n",dtimefl, (Real)nflopfl*volume/(1e6*dtimefl*numnodes()) ); #endif /* long links */ for(dir=0; dir<4; dir++) { QLA_Real naik = coeffs->naik; QDP_M_eq_sM(staple, gf[dir], QDP_neighbor[dir], QDP_forward, QDP_all); QDP_M_eq_M_times_M(tempmat1, gf[dir], staple, QDP_all); QDP_discard_M(staple); QDP_M_eq_sM(staple, tempmat1, QDP_neighbor[dir], QDP_forward, QDP_all); QDP_M_eq_M_times_M(ll[dir], gf[dir], staple, QDP_all); QDP_M_eq_r_times_M(ll[dir], &naik, ll[dir], QDP_all); } dtimell += dclock(); #ifdef LLTIME node0_printf("LLTIME(long): time = %e (Asqtad opt) mflops = %e\n",dtimell, (Real)nflopll*volume/(1e6*dtimell*numnodes()) ); #endif QDP_destroy_M(staple); QDP_destroy_M(tempmat1); }
void QOP_asqtad_force_multi_asvec_qdp(QOP_info_t *info, QDP_ColorMatrix *links[], QDP_ColorMatrix *force[], QOP_asqtad_coeffs_t *coef, REAL eps[], QDP_ColorVector *xin[], int nsrc) { #define NC QDP_get_nc(xin[0]) REAL coeff[nsrc]; REAL OneLink[nsrc], Lepage[nsrc], Naik[nsrc], FiveSt[nsrc], ThreeSt[nsrc], SevenSt[nsrc]; REAL mNaik[nsrc], mLepage[nsrc], mFiveSt[nsrc], mThreeSt[nsrc], mSevenSt[nsrc]; QDP_ColorVector *P3[8][nsrc]; QDP_ColorVector *P5[8][nsrc]; QDP_ColorVector *P5tmp[8][8][nsrc]; QDP_ColorVector *P5s[4][nsrc]; QDP_ColorVector *P5tmps[4][8][nsrc]; //QDP_ColorVector *xin[nsrc]; QDP_ColorVector *xintmp[8][nsrc]; QDP_ColorVector *Pmu[nsrc]; QDP_ColorVector *Pmutmp[8][nsrc]; QDP_ColorVector *Pnumu[nsrc]; QDP_ColorVector *Pnumutmp[8][nsrc]; QDP_ColorVector *Prhonumu[nsrc]; QDP_ColorVector *Prhonumutmp[8][nsrc]; QDP_ColorVector *P7[nsrc]; QDP_ColorVector *P7tmp[8][nsrc]; QDP_ColorVector *P7rho[nsrc]; QDP_ColorVector *ttv[nsrc]; int i, dir; int mu, nu, rho, sig; double nflop1 = 253935; double nflop2 = 433968; double nflop = nflop1 + (nflop2-nflop1)*(nsrc-1); double dtime; dtime = -QOP_time(); ASQTAD_FORCE_BEGIN; QOP_trace("test 1\n"); /* setup parallel transport */ QDP_ColorMatrix *tmpmat = QDP_create_M(); for(i=0; i<QOP_common.ndim; i++) { fbshift[i] = QDP_neighbor[i]; fbshiftdir[i] = QDP_forward; fblink[i] = links[i]; fbshift[OPP_DIR(i)] = QDP_neighbor[i]; fbshiftdir[OPP_DIR(i)] = QDP_backward; fblink[OPP_DIR(i)] = QDP_create_M(); QDP_M_eq_sM(tmpmat, fblink[i], QDP_neighbor[i], QDP_backward, QDP_all); QDP_M_eq_Ma(fblink[OPP_DIR(i)], tmpmat, QDP_all); } tv = ttv; for(i=0; i<nsrc; i++) { tv[i] = QDP_create_V(); } QOP_trace("test 2\n"); /* Allocate temporary vectors */ for(i=0; i<nsrc; i++) { Pmu[i] = QDP_create_V(); Pnumu[i] = QDP_create_V(); Prhonumu[i] = QDP_create_V(); P7[i] = QDP_create_V(); P7rho[i] = QDP_create_V(); for(dir=0; dir<8; dir++) { xintmp[dir][i] = QDP_create_V(); Pmutmp[dir][i] = QDP_create_V(); Pnumutmp[dir][i] = QDP_create_V(); Prhonumutmp[dir][i] = QDP_create_V(); P7tmp[dir][i] = QDP_create_V(); } #if 1 for(mu=0; mu<4; mu++) { P5s[mu][i] = QDP_create_V(); for(dir=0; dir<8; dir++) { P5tmps[mu][dir][i] = QDP_create_V(); } } #else for(mu=0; mu<8; mu++) { P5[mu][i] = QDP_create_V(); for(dir=0; dir<8; dir++) { P5tmp[mu][dir][i] = QDP_create_V(); //printf("%p %p\n", P5tmp[mu][dir][i], &(P5tmp[mu][dir][i])); fflush(stdout); if(P5tmp[mu][dir][i]==NULL) { fprintf(stderr, "error: can't create V\n"); QDP_abort(); } } } #endif } //printf("%p\n", P5tmp[0][4][0]); fflush(stdout); for(mu=0; mu<8; mu++) { for(i=0; i<nsrc; i++) { P3[mu][i] = QDP_create_V(); //P5[mu][i] = QDP_create_V(); } } for(mu=0; mu<4; mu++) { tempmom_qdp[mu] = force[mu]; QDP_M_eqm_M(tempmom_qdp[mu], tempmom_qdp[mu], QDP_odd); } /* Path coefficients times fermion epsilon */ /* Load path coefficients from table */ for(i=0; i<nsrc; i++) { OneLink[i] = coef->one_link * eps[i]; Naik[i] = coef->naik * eps[i]; mNaik[i] = -Naik[i]; ThreeSt[i] = coef->three_staple * eps[i]; mThreeSt[i] = -ThreeSt[i]; FiveSt[i] = coef->five_staple * eps[i]; mFiveSt[i] = -FiveSt[i]; SevenSt[i] = coef->seven_staple * eps[i]; mSevenSt[i] = -SevenSt[i]; Lepage[i] = coef->lepage * eps[i]; mLepage[i] = -Lepage[i]; } #if 0 printf("nsrc = %i\n", nsrc); printf("coeffs = %g %g %g %g %g %g\n", OneLink[0], ThreeSt[0], FiveSt[0], SevenSt[0], Lepage[0], Naik[0]); #endif /* *************************************** */ QOP_trace("start force loop\n"); for(mu=0; mu<8; mu++) { //u_shift_hw_fermion(temp_x_qdp, Pmu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)]); u_shift_color_vecs(xin, Pmu, OPP_DIR(mu), nsrc, xintmp[OPP_DIR(mu)]); for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) ) { //u_shift_hw_fermion(Pmu, P3[sig], sig, temp_hw[sig]); u_shift_color_vecs(Pmu, P3[sig], sig, nsrc, Pmutmp[sig]); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu]: x--+ * * | | * * o o * * the 1 link in the path: - (numbering starts form 0) */ add_forces_to_mom(P3[sig], Pmu, sig, mThreeSt, nsrc); } } for(nu=0; nu<8; nu++) if( (nu!=mu)&&(nu!=OPP_DIR(mu)) ) { int nP5 = 0; //Pnumu = hw_qdp[OPP_DIR(nu)]; //u_shift_hw_fermion(Pmu, Pnumu, OPP_DIR(nu), temp_hw[OPP_DIR(nu)]); u_shift_color_vecs(Pmu, Pnumu, OPP_DIR(nu), nsrc, Pmutmp[OPP_DIR(nu)]); //QDP_V_veq_V(Pnumu, P3[OPP_DIR(nu)], QDP_all, nsrc); for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) && (sig!=nu)&&(sig!=OPP_DIR(nu)) ) { #if 1 for(i=0; i<nsrc; i++) { P5[sig][i] = P5s[nP5][i]; for(dir=0; dir<8; dir++) P5tmp[sig][dir][i] = P5tmps[nP5][dir][i]; } #endif nP5++; //u_shift_hw_fermion(Pnumu, P5[sig], sig, temp_hw[sig]); u_shift_color_vecs(Pnumu, P5[sig], sig, nsrc, Pnumutmp[sig]); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu+nu]: x--+ * * | | * * o o * * the 2 link in the path: + (numbering starts form 0) */ add_forces_to_mom(P5[sig], Pnumu, sig, FiveSt, nsrc); } } QOP_trace("test 4\n"); for(rho=0; rho<8; rho++) if( (rho!=mu)&&(rho!=OPP_DIR(mu)) && (rho!=nu)&&(rho!=OPP_DIR(nu)) ) { //Prhonumu = hw_qdp[OPP_DIR(rho)]; //u_shift_hw_fermion(Pnumu, Prhonumu, OPP_DIR(rho), // temp_hw[OPP_DIR(rho)] ); u_shift_color_vecs(Pnumu, Prhonumu, OPP_DIR(rho), nsrc, Pnumutmp[OPP_DIR(rho)]); //QDP_V_veq_V(Prhonumu, P5[OPP_DIR(rho)], QDP_all, nsrc); for(sig=0; sig<8; sig++) if( (sig!=mu )&&(sig!=OPP_DIR(mu )) && (sig!=nu )&&(sig!=OPP_DIR(nu )) && (sig!=rho)&&(sig!=OPP_DIR(rho)) ) { /* Length 7 paths */ //P7 = hw_qdp[sig]; //u_shift_hw_fermion(Prhonumu, P7, sig, temp_hw[sig] ); QOP_trace("test 43\n"); u_shift_color_vecs(Prhonumu, P7, sig, nsrc, Prhonumutmp[sig]); QOP_trace("test 44\n"); //QDP_V_eq_r_times_V(P7[0], &SevenSt[0], P7[0], QDP_all); //QDP_V_eq_r_times_V(P7[1], &SevenSt[1], P7[1], QDP_all); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu+nu+rho]: x--+ * * | | * * o o * * the 3 link in the path: - (numbering starts form 0) */ QOP_trace("test 45\n"); add_forces_to_mom(P7, Prhonumu, sig, mSevenSt, nsrc); QOP_trace("test 46\n"); //mom_meq_force(P7, Prhonumu, sig); } /* Add the force F_rho the 2(4) link in the path: + */ //P7rho = hw_qdp[rho]; //u_shift_hw_fermion(P7, P7rho, rho, temp_hw[rho]); QOP_trace("test 47\n"); u_shift_color_vecs(P7, P7rho, rho, nsrc, P7tmp[rho]); QOP_trace("test 48\n"); side_link_forces(rho,sig,SevenSt,Pnumu,P7,Prhonumu,P7rho, nsrc); QOP_trace("test 49\n"); //side_link_3f_force2(rho,sig,Pnumu,P7,Prhonumu,P7rho); /* Add the P7rho vector to P5 */ for(i=0; i<nsrc; i++) { if(FiveSt[i]!=0) coeff[i] = SevenSt[i]/FiveSt[i]; else coeff[i] = 0; QOP_trace("test 410\n"); QDP_V_peq_r_times_V(P5[sig][i], &coeff[i], P7rho[i], QDP_all); QOP_trace("test 411\n"); } } /* sig */ } /* rho */ QOP_trace("test 5\n"); #define P5nu P7 for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) && (sig!=nu)&&(sig!=OPP_DIR(nu)) ) { /* Length 5 paths */ /* Add the force F_nu the 1(3) link in the path: - */ //P5nu = hw_qdp[nu]; //u_shift_hw_fermion(P5[sig], P5nu, nu, temp_hw[nu]); u_shift_color_vecs(P5[sig], P5nu, nu, nsrc, P5tmp[sig][nu]); side_link_forces(nu, sig, mFiveSt, Pmu, P5[sig], Pnumu, P5nu, nsrc); /* Add the P5nu vector to P3 */ for(i=0; i<nsrc; i++) { if(ThreeSt[i]!=0) coeff[i] = FiveSt[i]/ThreeSt[i]; else coeff[i] = 0; QDP_V_peq_r_times_V(P3[sig][i], &coeff[i], P5nu[i], QDP_all); } } /* sig */ } /* nu */ #define Pmumu Pnumu #define Pmumutmp Pnumutmp #define P5sig Prhonumu #define P5sigtmp Prhonumutmp #define P3mu P7 #define Popmu P7 #define Pmumumu P7 /* Now the Lepage term... It is the same as 5-link paths with nu=mu and FiveSt=Lepage. */ //u_shift_hw_fermion(Pmu, Pmumu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)] ); u_shift_color_vecs(Pmu, Pmumu, OPP_DIR(mu), nsrc, Pmutmp[OPP_DIR(mu)]); for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) ) { //P5sig = hw_qdp[sig]; //u_shift_hw_fermion(Pmumu, P5sig, sig, temp_hw[sig]); u_shift_color_vecs(Pmumu, P5sig, sig, nsrc, Pmumutmp[sig]); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu+nu]: x--+ * * | | * * o o * * the 2 link in the path: + (numbering starts form 0) */ add_forces_to_mom(P5sig, Pmumu, sig, Lepage, nsrc); } /* Add the force F_nu the 1(3) link in the path: - */ //P5nu = hw_qdp[mu]; //u_shift_hw_fermion(P5sig, P5nu, mu, temp_hw[mu]); u_shift_color_vecs(P5sig, P5nu, mu, nsrc, P5sigtmp[mu]); side_link_forces(mu, sig, mLepage, Pmu, P5sig, Pmumu, P5nu, nsrc); /* Add the P5nu vector to P3 */ for(i=0; i<nsrc; i++) { if(ThreeSt[i]!=0) coeff[i] = Lepage[i]/ThreeSt[i]; else coeff[i] = 0; QDP_V_peq_r_times_V(P3[sig][i], &coeff[i], P5nu[i], QDP_all); } /* Length 3 paths (Not the Naik term) */ /* Add the force F_mu the 0(2) link in the path: + */ if(GOES_FORWARDS(mu)) { //P3mu = hw_qdp[mu]; /* OK to clobber P5nu */ //u_shift_hw_fermion(P3[sig], P3mu, mu, temp_hw[mu]); //u_shift_color_vecs(P3[sig], P3mu, mu, 2, temp_hw[mu]); for(i=0; i<nsrc; i++) { QDP_V_eq_V(P5sig[i], P3[sig][i], QDP_all); } u_shift_color_vecs(P5sig, P3mu, mu, nsrc, P5sigtmp[mu]); } /* The above shift is not needed if mu is backwards */ side_link_forces(mu, sig, ThreeSt, xin, P3[sig], Pmu, P3mu, nsrc); } /* Finally the OneLink and the Naik term */ if(GOES_BACKWARDS(mu)) { /* Do only the forward terms in the Dslash */ /* Because I have shifted with OPP_DIR(mu) Pmu is a forward * * shift. */ /* The one link */ add_forces_to_mom(Pmu, xin, OPP_DIR(mu), OneLink, nsrc); /* For the same reason Pmumu is the forward double link */ /* Popmu is a backward shift */ //Popmu = hw_qdp[mu]; /* OK to clobber P3mu */ //u_shift_hw_fermion(xin, Popmu, mu, temp_hw[mu]); u_shift_color_vecs(xin, Popmu, mu, nsrc, xintmp[mu]); /* The Naik */ /* link no 1: - */ add_forces_to_mom(Pmumu, Popmu, OPP_DIR(mu), mNaik, nsrc); /* Pmumumu can overwrite Popmu which is no longer needed */ //Pmumumu = hw_qdp[OPP_DIR(mu)]; //u_shift_hw_fermion(Pmumu, Pmumumu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)]); u_shift_color_vecs(Pmumu, Pmumumu, OPP_DIR(mu), nsrc, Pmumutmp[OPP_DIR(mu)]); /* link no 0: + */ add_forces_to_mom(Pmumumu, xin, OPP_DIR(mu), Naik, nsrc); } else { /* The rest of the Naik terms */ //Popmu = hw_qdp[mu]; /* OK to clobber P3mu */ //u_shift_hw_fermion(xin, Popmu, mu, temp_hw[mu]); u_shift_color_vecs(xin, Popmu, mu, nsrc, xintmp[mu]); /* link no 2: + */ /* Pmumu is double backward shift */ add_forces_to_mom(Popmu, Pmumu, mu, Naik, nsrc); } /* Here we have to do together the Naik term and the one link term */ }/* mu */ QOP_trace("test 6\n"); QOP_trace("test 7\n"); for(mu=0; mu<4; mu++) { QDP_M_eq_M(tmpmat, tempmom_qdp[mu], QDP_even); QDP_M_eqm_M(tmpmat, tempmom_qdp[mu], QDP_odd); QDP_M_eq_antiherm_M(tempmom_qdp[mu], tmpmat, QDP_all); } QDP_destroy_M(tmpmat); //printf("%p\n", P5tmp[0][4][0]); fflush(stdout); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } /* Free temporary vectors */ for(i=0; i<nsrc; i++) { QDP_destroy_V(Pmu[i]); QDP_destroy_V(Pnumu[i]); QDP_destroy_V(Prhonumu[i]); QDP_destroy_V(P7[i]); QDP_destroy_V(P7rho[i]); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } for(dir=0; dir<8; dir++) { QDP_destroy_V(xintmp[dir][i]); QDP_destroy_V(Pmutmp[dir][i]); QDP_destroy_V(Pnumutmp[dir][i]); QDP_destroy_V(Prhonumutmp[dir][i]); QDP_destroy_V(P7tmp[dir][i]); } //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } for(mu=0; mu<4; mu++) { //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } QDP_destroy_V(P5s[mu][i]); //QDP_destroy_V(P5[mu][i]); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } for(dir=0; dir<8; dir++) { //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } QDP_destroy_V(P5tmps[mu][dir][i]); //printf("%p\n", P5tmp[mu][dir][i]); fflush(stdout); //QDP_destroy_V(P5tmp[mu][dir][i]); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } } //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } } //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } } //if(QDP_this_node==0) { printf("here3\n"); fflush(stdout); } for(mu=0; mu<8; mu++) { for(i=0; i<nsrc; i++) { QDP_destroy_V(P3[mu][i]); } //QDP_destroy_V(P5[mu][0]); //QDP_destroy_V(P5[mu][1]); } for(i=0; i<nsrc; i++) { QDP_destroy_V(tv[i]); } //if(QDP_this_node==0) { printf("here4\n"); fflush(stdout); } for(i=4; i<8; i++) { QDP_destroy_M(fblink[i]); } dtime += QOP_time(); info->final_sec = dtime; info->final_flop = nflop*QDP_sites_on_node; info->status = QOP_SUCCESS; ASQTAD_FORCE_END; #undef NC }
void QOP_hisq_force_multi_wrapper_fnmat(QOP_info_t *info, QOP_FermionLinksHisq *flh, QOP_Force *Force, QOP_hisq_coeffs_t *hisq_coeff, REAL *residues, QDP_ColorVector *x[], int *n_orders_naik) { double dtime = QDP_time(); int i, ipath, dir; REAL coeff_mult; double *eps_naik = hisq_coeff->eps_naik; int n_naiks = hisq_coeff->n_naiks; QOP_hisq_unitarize_method_t umethod = hisq_coeff->umethod; // Quark paths sorted by net displacement and last directions static Q_path *q_paths_sorted_1 = NULL; static Q_path *q_paths_sorted_2 = NULL; static Q_path *q_paths_sorted_3 = NULL; static int *netbackdir_table_1 = NULL; static int *netbackdir_table_2 = NULL; static int *netbackdir_table_3 = NULL; static int first_force = 1; if(first_force == 1) QOP_make_paths_and_dirs_hisq(hisq_coeff, umethod); int num_q_paths_1 = qop_get_num_q_paths_1(); int num_q_paths_2 = qop_get_num_q_paths_2(); int num_q_paths_3 = qop_get_num_q_paths_3(); Q_path *q_paths_1 = qop_get_q_paths_1(); Q_path *q_paths_2 = qop_get_q_paths_2(); Q_path *q_paths_3 = qop_get_q_paths_3(); Q_path *q_paths_sorted_current = NULL; int *netbackdir_table_current = NULL; int inaik; int n_naik_shift; double final_flop = 0.; size_t nflops = 0; QDP_ColorMatrix * force[4] = {Force->force[0], Force->force[1], Force->force[2], Force->force[3]}; int num_q_paths_current,n_orders_naik_current;//==nterms QDP_ColorMatrix *force_accum_0[4]; QDP_ColorMatrix *force_accum_0_naik[4]; QDP_ColorMatrix *force_accum_1[4]; QDP_ColorMatrix *force_accum_1u[4]; QDP_ColorMatrix *force_accum_2[4]; QDP_ColorMatrix *force_final[4]; QDP_ColorMatrix *Ugf[4], *Vgf[4], *Wgf[4]; int nterms = 0, n_order_naik_total; for(inaik = 0; inaik < n_naiks; inaik++) nterms += n_orders_naik[inaik]; n_order_naik_total = nterms; for(i=0;i<4;i++) { Ugf[i] = flh->U_links[i]; Vgf[i] = flh->V_links[i]; Wgf[i] = flh->W_unitlinks[i]; } QDP_ColorMatrix *tmat; QDP_ColorMatrix *mat_tmp0; REAL treal; if( first_force==1 ){ if( q_paths_sorted_1==NULL ) q_paths_sorted_1 = (Q_path *)malloc( num_q_paths_1*sizeof(Q_path) ); if(netbackdir_table_1==NULL ) netbackdir_table_1 = (int *)malloc( num_q_paths_1*sizeof(int) ); if( q_paths_sorted_2==NULL ) q_paths_sorted_2 = (Q_path *)malloc( num_q_paths_2*sizeof(Q_path) ); if(netbackdir_table_2==NULL ) netbackdir_table_2 = (int *)malloc( num_q_paths_2*sizeof(int) ); if( q_paths_sorted_3==NULL ) q_paths_sorted_3 = (Q_path *)malloc( num_q_paths_3*sizeof(Q_path) ); if(netbackdir_table_3==NULL ) netbackdir_table_3 = (int *)malloc( num_q_paths_3*sizeof(int) ); else{QOP_printf0("WARNING: remaking sorted path tables\n"); exit(0); } // make sorted tables sort_quark_paths_hisq( q_paths_1, q_paths_sorted_1, num_q_paths_1, 8 ); for( ipath=0; ipath<num_q_paths_1; ipath++ ) netbackdir_table_1[ipath] = find_backwards_gather( &(q_paths_sorted_1[ipath]) ); sort_quark_paths_hisq( q_paths_2, q_paths_sorted_2, num_q_paths_2, 16 ); for( ipath=0; ipath<num_q_paths_2; ipath++ ) netbackdir_table_2[ipath] = find_backwards_gather( &(q_paths_sorted_2[ipath]) ); sort_quark_paths_hisq( q_paths_3, q_paths_sorted_3, num_q_paths_3, 16 ); for( ipath=0; ipath<num_q_paths_3; ipath++ ) netbackdir_table_3[ipath] = find_backwards_gather( &(q_paths_sorted_3[ipath]) ); first_force=0; } tmat = QDP_create_M(); mat_tmp0 = QDP_create_M(); for(i=XUP;i<=TUP;i++){ force_accum_0[i] = QDP_create_M(); force_accum_0_naik[i] = QDP_create_M(); force_accum_1[i] = QDP_create_M(); force_accum_1u[i] = QDP_create_M(); force_accum_2[i] = QDP_create_M(); force_final[i] = QDP_create_M(); } for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum_2[dir], QDP_all); // loop on different naik masses n_naik_shift = 0; for( inaik=0; inaik<n_naiks; inaik++ ) { // smearing level 0 if( 0==inaik ) { n_orders_naik_current = n_order_naik_total; } else { n_orders_naik_current = n_orders_naik[inaik]; } QOP_hisq_force_multi_smearing0_fnmat(info,residues+n_naik_shift, x+n_naik_shift, n_orders_naik_current, force_accum_0, force_accum_0_naik); final_flop += info->final_flop; // smearing level 2 if( 0==inaik ) { q_paths_sorted_current = q_paths_sorted_2; num_q_paths_current = num_q_paths_2; netbackdir_table_current = netbackdir_table_2; } else { q_paths_sorted_current = q_paths_sorted_3; num_q_paths_current = num_q_paths_3; netbackdir_table_current = netbackdir_table_3; } QOP_hisq_force_multi_smearing_fnmat( info,Wgf,residues+n_naik_shift, x+n_naik_shift, n_orders_naik_current, force_accum_1, force_accum_0, force_accum_0_naik, num_q_paths_current, q_paths_sorted_current, netbackdir_table_current ); //QOP_printf0("HISQ smear0 flops = %g\n", info->final_flop); final_flop += info->final_flop; if( 0==inaik ) { coeff_mult = 1.0; } else { coeff_mult = eps_naik[inaik]; } for(dir=XUP;dir<=TUP;dir++) { QDP_M_peq_r_times_M(force_accum_2[dir],&coeff_mult, force_accum_1[dir],QDP_all); nflops += 36; } n_naik_shift += n_orders_naik[inaik]; } if ( umethod==QOP_UNITARIZE_NONE ){ // smearing level 1 QOP_hisq_force_multi_smearing_fnmat( info,Ugf,residues, x, nterms, force_accum_1, force_accum_2, NULL, num_q_paths_1, q_paths_sorted_1, netbackdir_table_1 ); final_flop += info->final_flop; } else if ( umethod==QOP_UNITARIZE_RATIONAL ){ // reunitarization QOP_hisq_force_multi_reunit(info,Vgf,force_accum_1u, force_accum_2); //QOP_printf0("reunit flops = %g\n", info->final_flop); final_flop += info->final_flop; // smearing level 1 QOP_hisq_force_multi_smearing_fnmat( info,Ugf,residues, x, nterms, force_accum_1, force_accum_1u, NULL, num_q_paths_1, q_paths_sorted_1, netbackdir_table_1 ); //QOP_printf0("HISQ smear1 flops = %g\n", info->final_flop); final_flop += info->final_flop; } else { QOP_printf0("Unknown or unsupported unitarization method\n"); exit(1); } // contraction with the link in question should be done here, // after contributions from all levels of smearing are taken into account for(dir=XUP;dir<=TUP;dir++){ QDP_M_eq_M_times_M(force_final[dir],Ugf[dir],force_accum_1[dir],QDP_all); nflops += 198; } // take into account even/odd parity (it is NOT done in "smearing" routine) //eps multiplication done outside QOP for(dir=XUP;dir<=TUP;dir++){ QDP_M_eq_M(tmat,force_final[dir],QDP_all); treal = 2.0; QDP_M_eq_r_times_M(force_final[dir],&treal,tmat,QDP_even); treal = -2.0; QDP_M_eq_r_times_M(force_final[dir],&treal,tmat,QDP_odd); nflops += 18; } // Put antihermitian traceless part into momentum // add force to momentum for(dir=XUP; dir<=TUP; dir++){ QDP_M_eq_antiherm_M(mat_tmp0, force_final[dir], QDP_all); QDP_M_peq_M(force[dir], mat_tmp0, QDP_all); nflops += 24+18; //QDP_M_peq_M(force_final[dir], force[dir], QDP_all); //QDP_M_eq_antiherm_M(force[dir], force_final[dir], QDP_all); } for(i=XUP;i<=TUP;i++){ QDP_destroy_M( force_accum_0[i] ); QDP_destroy_M( force_accum_0_naik[i] ); QDP_destroy_M( force_accum_1[i] ); QDP_destroy_M( force_accum_1u[i] ); QDP_destroy_M( force_accum_2[i] ); QDP_destroy_M( force_final[i] ); } QDP_destroy_M( tmat ); QDP_destroy_M( mat_tmp0 ); final_flop += ((double)nflops)*QDP_sites_on_node; info->final_sec = QDP_time() - dtime; info->final_flop = final_flop; info->status = QOP_SUCCESS; //QOP_printf0("HISQ force flops = %g\n", info->final_flop); } //hisq_force_multi_wrapper_fnmat