static void path_prod(QDP_ColorMatrix *u[], QDP_ColorMatrix *m, int path[], int len, int subl, QDP_Subset subset[], int (*neighsubl)(int subl, int dir)) { QDP_ShiftDir fb; QDP_ColorMatrix *p=NULL, *s=NULL; QDP_Lattice *lat = QDP_get_lattice_M(m); int nd = QDP_ndim_L(lat); int sn = 0; for(int i=0; i<len; i++) { int dir = abs(path[i])-1; // if the path moves in the + dir then we shift from the backward dir fb = path[i]<0 ? QDP_forward : QDP_backward; if(fb==QDP_backward) { // path is moving in + dir if(i==0) { QDP_M_eq_Ma(tm[sn], u[dir], subset[subl]); } else { QDP_M_eq_Ma_times_M(tm[sn], u[dir], p, subset[subl]); QDP_discard_M(p); } subl = neighsubl(subl, path[i]); s = sm[sn][nd+dir]; QDP_discard_M(s); QDP_M_eq_sM(s, tm[sn], QDP_neighbor_L(lat)[dir], fb, eosub(subl)); //p = t1; t1 = t2; t2 = p; sn = 1-sn; p = s; } else { if(i==0) { subl = neighsubl(subl, path[i]); QDP_M_eq_M(tm[1-sn], u[dir], subset[subl]); } else { QDP_M_eq_M(tm[sn], p, subset[subl]); QDP_discard_M(p); subl = neighsubl(subl, path[i]); s = sm[sn][dir]; QDP_discard_M(s); QDP_M_eq_sM(s, tm[sn], QDP_neighbor_L(lat)[dir], fb, eosub(subl)); QDP_M_eq_M_times_M(tm[1-sn], u[dir], s, subset[subl]); QDP_discard_M(s); } p = tm[1-sn]; } } QDP_M_eq_M(m, p, subset[subl]); QDP_discard_M(p); QDP_discard_M(s); }
void QOP_asqtad_force_multi_asvec_qdp(QOP_info_t *info, QDP_ColorMatrix *links[], QDP_ColorMatrix *force[], QOP_asqtad_coeffs_t *coef, REAL eps[], QDP_ColorVector *xin[], int nsrc) { #define NC QDP_get_nc(xin[0]) REAL coeff[nsrc]; REAL OneLink[nsrc], Lepage[nsrc], Naik[nsrc], FiveSt[nsrc], ThreeSt[nsrc], SevenSt[nsrc]; REAL mNaik[nsrc], mLepage[nsrc], mFiveSt[nsrc], mThreeSt[nsrc], mSevenSt[nsrc]; QDP_ColorVector *P3[8][nsrc]; QDP_ColorVector *P5[8][nsrc]; QDP_ColorVector *P5tmp[8][8][nsrc]; QDP_ColorVector *P5s[4][nsrc]; QDP_ColorVector *P5tmps[4][8][nsrc]; //QDP_ColorVector *xin[nsrc]; QDP_ColorVector *xintmp[8][nsrc]; QDP_ColorVector *Pmu[nsrc]; QDP_ColorVector *Pmutmp[8][nsrc]; QDP_ColorVector *Pnumu[nsrc]; QDP_ColorVector *Pnumutmp[8][nsrc]; QDP_ColorVector *Prhonumu[nsrc]; QDP_ColorVector *Prhonumutmp[8][nsrc]; QDP_ColorVector *P7[nsrc]; QDP_ColorVector *P7tmp[8][nsrc]; QDP_ColorVector *P7rho[nsrc]; QDP_ColorVector *ttv[nsrc]; int i, dir; int mu, nu, rho, sig; double nflop1 = 253935; double nflop2 = 433968; double nflop = nflop1 + (nflop2-nflop1)*(nsrc-1); double dtime; dtime = -QOP_time(); ASQTAD_FORCE_BEGIN; QOP_trace("test 1\n"); /* setup parallel transport */ QDP_ColorMatrix *tmpmat = QDP_create_M(); for(i=0; i<QOP_common.ndim; i++) { fbshift[i] = QDP_neighbor[i]; fbshiftdir[i] = QDP_forward; fblink[i] = links[i]; fbshift[OPP_DIR(i)] = QDP_neighbor[i]; fbshiftdir[OPP_DIR(i)] = QDP_backward; fblink[OPP_DIR(i)] = QDP_create_M(); QDP_M_eq_sM(tmpmat, fblink[i], QDP_neighbor[i], QDP_backward, QDP_all); QDP_M_eq_Ma(fblink[OPP_DIR(i)], tmpmat, QDP_all); } tv = ttv; for(i=0; i<nsrc; i++) { tv[i] = QDP_create_V(); } QOP_trace("test 2\n"); /* Allocate temporary vectors */ for(i=0; i<nsrc; i++) { Pmu[i] = QDP_create_V(); Pnumu[i] = QDP_create_V(); Prhonumu[i] = QDP_create_V(); P7[i] = QDP_create_V(); P7rho[i] = QDP_create_V(); for(dir=0; dir<8; dir++) { xintmp[dir][i] = QDP_create_V(); Pmutmp[dir][i] = QDP_create_V(); Pnumutmp[dir][i] = QDP_create_V(); Prhonumutmp[dir][i] = QDP_create_V(); P7tmp[dir][i] = QDP_create_V(); } #if 1 for(mu=0; mu<4; mu++) { P5s[mu][i] = QDP_create_V(); for(dir=0; dir<8; dir++) { P5tmps[mu][dir][i] = QDP_create_V(); } } #else for(mu=0; mu<8; mu++) { P5[mu][i] = QDP_create_V(); for(dir=0; dir<8; dir++) { P5tmp[mu][dir][i] = QDP_create_V(); //printf("%p %p\n", P5tmp[mu][dir][i], &(P5tmp[mu][dir][i])); fflush(stdout); if(P5tmp[mu][dir][i]==NULL) { fprintf(stderr, "error: can't create V\n"); QDP_abort(); } } } #endif } //printf("%p\n", P5tmp[0][4][0]); fflush(stdout); for(mu=0; mu<8; mu++) { for(i=0; i<nsrc; i++) { P3[mu][i] = QDP_create_V(); //P5[mu][i] = QDP_create_V(); } } for(mu=0; mu<4; mu++) { tempmom_qdp[mu] = force[mu]; QDP_M_eqm_M(tempmom_qdp[mu], tempmom_qdp[mu], QDP_odd); } /* Path coefficients times fermion epsilon */ /* Load path coefficients from table */ for(i=0; i<nsrc; i++) { OneLink[i] = coef->one_link * eps[i]; Naik[i] = coef->naik * eps[i]; mNaik[i] = -Naik[i]; ThreeSt[i] = coef->three_staple * eps[i]; mThreeSt[i] = -ThreeSt[i]; FiveSt[i] = coef->five_staple * eps[i]; mFiveSt[i] = -FiveSt[i]; SevenSt[i] = coef->seven_staple * eps[i]; mSevenSt[i] = -SevenSt[i]; Lepage[i] = coef->lepage * eps[i]; mLepage[i] = -Lepage[i]; } #if 0 printf("nsrc = %i\n", nsrc); printf("coeffs = %g %g %g %g %g %g\n", OneLink[0], ThreeSt[0], FiveSt[0], SevenSt[0], Lepage[0], Naik[0]); #endif /* *************************************** */ QOP_trace("start force loop\n"); for(mu=0; mu<8; mu++) { //u_shift_hw_fermion(temp_x_qdp, Pmu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)]); u_shift_color_vecs(xin, Pmu, OPP_DIR(mu), nsrc, xintmp[OPP_DIR(mu)]); for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) ) { //u_shift_hw_fermion(Pmu, P3[sig], sig, temp_hw[sig]); u_shift_color_vecs(Pmu, P3[sig], sig, nsrc, Pmutmp[sig]); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu]: x--+ * * | | * * o o * * the 1 link in the path: - (numbering starts form 0) */ add_forces_to_mom(P3[sig], Pmu, sig, mThreeSt, nsrc); } } for(nu=0; nu<8; nu++) if( (nu!=mu)&&(nu!=OPP_DIR(mu)) ) { int nP5 = 0; //Pnumu = hw_qdp[OPP_DIR(nu)]; //u_shift_hw_fermion(Pmu, Pnumu, OPP_DIR(nu), temp_hw[OPP_DIR(nu)]); u_shift_color_vecs(Pmu, Pnumu, OPP_DIR(nu), nsrc, Pmutmp[OPP_DIR(nu)]); //QDP_V_veq_V(Pnumu, P3[OPP_DIR(nu)], QDP_all, nsrc); for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) && (sig!=nu)&&(sig!=OPP_DIR(nu)) ) { #if 1 for(i=0; i<nsrc; i++) { P5[sig][i] = P5s[nP5][i]; for(dir=0; dir<8; dir++) P5tmp[sig][dir][i] = P5tmps[nP5][dir][i]; } #endif nP5++; //u_shift_hw_fermion(Pnumu, P5[sig], sig, temp_hw[sig]); u_shift_color_vecs(Pnumu, P5[sig], sig, nsrc, Pnumutmp[sig]); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu+nu]: x--+ * * | | * * o o * * the 2 link in the path: + (numbering starts form 0) */ add_forces_to_mom(P5[sig], Pnumu, sig, FiveSt, nsrc); } } QOP_trace("test 4\n"); for(rho=0; rho<8; rho++) if( (rho!=mu)&&(rho!=OPP_DIR(mu)) && (rho!=nu)&&(rho!=OPP_DIR(nu)) ) { //Prhonumu = hw_qdp[OPP_DIR(rho)]; //u_shift_hw_fermion(Pnumu, Prhonumu, OPP_DIR(rho), // temp_hw[OPP_DIR(rho)] ); u_shift_color_vecs(Pnumu, Prhonumu, OPP_DIR(rho), nsrc, Pnumutmp[OPP_DIR(rho)]); //QDP_V_veq_V(Prhonumu, P5[OPP_DIR(rho)], QDP_all, nsrc); for(sig=0; sig<8; sig++) if( (sig!=mu )&&(sig!=OPP_DIR(mu )) && (sig!=nu )&&(sig!=OPP_DIR(nu )) && (sig!=rho)&&(sig!=OPP_DIR(rho)) ) { /* Length 7 paths */ //P7 = hw_qdp[sig]; //u_shift_hw_fermion(Prhonumu, P7, sig, temp_hw[sig] ); QOP_trace("test 43\n"); u_shift_color_vecs(Prhonumu, P7, sig, nsrc, Prhonumutmp[sig]); QOP_trace("test 44\n"); //QDP_V_eq_r_times_V(P7[0], &SevenSt[0], P7[0], QDP_all); //QDP_V_eq_r_times_V(P7[1], &SevenSt[1], P7[1], QDP_all); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu+nu+rho]: x--+ * * | | * * o o * * the 3 link in the path: - (numbering starts form 0) */ QOP_trace("test 45\n"); add_forces_to_mom(P7, Prhonumu, sig, mSevenSt, nsrc); QOP_trace("test 46\n"); //mom_meq_force(P7, Prhonumu, sig); } /* Add the force F_rho the 2(4) link in the path: + */ //P7rho = hw_qdp[rho]; //u_shift_hw_fermion(P7, P7rho, rho, temp_hw[rho]); QOP_trace("test 47\n"); u_shift_color_vecs(P7, P7rho, rho, nsrc, P7tmp[rho]); QOP_trace("test 48\n"); side_link_forces(rho,sig,SevenSt,Pnumu,P7,Prhonumu,P7rho, nsrc); QOP_trace("test 49\n"); //side_link_3f_force2(rho,sig,Pnumu,P7,Prhonumu,P7rho); /* Add the P7rho vector to P5 */ for(i=0; i<nsrc; i++) { if(FiveSt[i]!=0) coeff[i] = SevenSt[i]/FiveSt[i]; else coeff[i] = 0; QOP_trace("test 410\n"); QDP_V_peq_r_times_V(P5[sig][i], &coeff[i], P7rho[i], QDP_all); QOP_trace("test 411\n"); } } /* sig */ } /* rho */ QOP_trace("test 5\n"); #define P5nu P7 for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) && (sig!=nu)&&(sig!=OPP_DIR(nu)) ) { /* Length 5 paths */ /* Add the force F_nu the 1(3) link in the path: - */ //P5nu = hw_qdp[nu]; //u_shift_hw_fermion(P5[sig], P5nu, nu, temp_hw[nu]); u_shift_color_vecs(P5[sig], P5nu, nu, nsrc, P5tmp[sig][nu]); side_link_forces(nu, sig, mFiveSt, Pmu, P5[sig], Pnumu, P5nu, nsrc); /* Add the P5nu vector to P3 */ for(i=0; i<nsrc; i++) { if(ThreeSt[i]!=0) coeff[i] = FiveSt[i]/ThreeSt[i]; else coeff[i] = 0; QDP_V_peq_r_times_V(P3[sig][i], &coeff[i], P5nu[i], QDP_all); } } /* sig */ } /* nu */ #define Pmumu Pnumu #define Pmumutmp Pnumutmp #define P5sig Prhonumu #define P5sigtmp Prhonumutmp #define P3mu P7 #define Popmu P7 #define Pmumumu P7 /* Now the Lepage term... It is the same as 5-link paths with nu=mu and FiveSt=Lepage. */ //u_shift_hw_fermion(Pmu, Pmumu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)] ); u_shift_color_vecs(Pmu, Pmumu, OPP_DIR(mu), nsrc, Pmutmp[OPP_DIR(mu)]); for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) ) { //P5sig = hw_qdp[sig]; //u_shift_hw_fermion(Pmumu, P5sig, sig, temp_hw[sig]); u_shift_color_vecs(Pmumu, P5sig, sig, nsrc, Pmumutmp[sig]); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu+nu]: x--+ * * | | * * o o * * the 2 link in the path: + (numbering starts form 0) */ add_forces_to_mom(P5sig, Pmumu, sig, Lepage, nsrc); } /* Add the force F_nu the 1(3) link in the path: - */ //P5nu = hw_qdp[mu]; //u_shift_hw_fermion(P5sig, P5nu, mu, temp_hw[mu]); u_shift_color_vecs(P5sig, P5nu, mu, nsrc, P5sigtmp[mu]); side_link_forces(mu, sig, mLepage, Pmu, P5sig, Pmumu, P5nu, nsrc); /* Add the P5nu vector to P3 */ for(i=0; i<nsrc; i++) { if(ThreeSt[i]!=0) coeff[i] = Lepage[i]/ThreeSt[i]; else coeff[i] = 0; QDP_V_peq_r_times_V(P3[sig][i], &coeff[i], P5nu[i], QDP_all); } /* Length 3 paths (Not the Naik term) */ /* Add the force F_mu the 0(2) link in the path: + */ if(GOES_FORWARDS(mu)) { //P3mu = hw_qdp[mu]; /* OK to clobber P5nu */ //u_shift_hw_fermion(P3[sig], P3mu, mu, temp_hw[mu]); //u_shift_color_vecs(P3[sig], P3mu, mu, 2, temp_hw[mu]); for(i=0; i<nsrc; i++) { QDP_V_eq_V(P5sig[i], P3[sig][i], QDP_all); } u_shift_color_vecs(P5sig, P3mu, mu, nsrc, P5sigtmp[mu]); } /* The above shift is not needed if mu is backwards */ side_link_forces(mu, sig, ThreeSt, xin, P3[sig], Pmu, P3mu, nsrc); } /* Finally the OneLink and the Naik term */ if(GOES_BACKWARDS(mu)) { /* Do only the forward terms in the Dslash */ /* Because I have shifted with OPP_DIR(mu) Pmu is a forward * * shift. */ /* The one link */ add_forces_to_mom(Pmu, xin, OPP_DIR(mu), OneLink, nsrc); /* For the same reason Pmumu is the forward double link */ /* Popmu is a backward shift */ //Popmu = hw_qdp[mu]; /* OK to clobber P3mu */ //u_shift_hw_fermion(xin, Popmu, mu, temp_hw[mu]); u_shift_color_vecs(xin, Popmu, mu, nsrc, xintmp[mu]); /* The Naik */ /* link no 1: - */ add_forces_to_mom(Pmumu, Popmu, OPP_DIR(mu), mNaik, nsrc); /* Pmumumu can overwrite Popmu which is no longer needed */ //Pmumumu = hw_qdp[OPP_DIR(mu)]; //u_shift_hw_fermion(Pmumu, Pmumumu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)]); u_shift_color_vecs(Pmumu, Pmumumu, OPP_DIR(mu), nsrc, Pmumutmp[OPP_DIR(mu)]); /* link no 0: + */ add_forces_to_mom(Pmumumu, xin, OPP_DIR(mu), Naik, nsrc); } else { /* The rest of the Naik terms */ //Popmu = hw_qdp[mu]; /* OK to clobber P3mu */ //u_shift_hw_fermion(xin, Popmu, mu, temp_hw[mu]); u_shift_color_vecs(xin, Popmu, mu, nsrc, xintmp[mu]); /* link no 2: + */ /* Pmumu is double backward shift */ add_forces_to_mom(Popmu, Pmumu, mu, Naik, nsrc); } /* Here we have to do together the Naik term and the one link term */ }/* mu */ QOP_trace("test 6\n"); QOP_trace("test 7\n"); for(mu=0; mu<4; mu++) { QDP_M_eq_M(tmpmat, tempmom_qdp[mu], QDP_even); QDP_M_eqm_M(tmpmat, tempmom_qdp[mu], QDP_odd); QDP_M_eq_antiherm_M(tempmom_qdp[mu], tmpmat, QDP_all); } QDP_destroy_M(tmpmat); //printf("%p\n", P5tmp[0][4][0]); fflush(stdout); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } /* Free temporary vectors */ for(i=0; i<nsrc; i++) { QDP_destroy_V(Pmu[i]); QDP_destroy_V(Pnumu[i]); QDP_destroy_V(Prhonumu[i]); QDP_destroy_V(P7[i]); QDP_destroy_V(P7rho[i]); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } for(dir=0; dir<8; dir++) { QDP_destroy_V(xintmp[dir][i]); QDP_destroy_V(Pmutmp[dir][i]); QDP_destroy_V(Pnumutmp[dir][i]); QDP_destroy_V(Prhonumutmp[dir][i]); QDP_destroy_V(P7tmp[dir][i]); } //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } for(mu=0; mu<4; mu++) { //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } QDP_destroy_V(P5s[mu][i]); //QDP_destroy_V(P5[mu][i]); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } for(dir=0; dir<8; dir++) { //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } QDP_destroy_V(P5tmps[mu][dir][i]); //printf("%p\n", P5tmp[mu][dir][i]); fflush(stdout); //QDP_destroy_V(P5tmp[mu][dir][i]); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } } //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } } //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } } //if(QDP_this_node==0) { printf("here3\n"); fflush(stdout); } for(mu=0; mu<8; mu++) { for(i=0; i<nsrc; i++) { QDP_destroy_V(P3[mu][i]); } //QDP_destroy_V(P5[mu][0]); //QDP_destroy_V(P5[mu][1]); } for(i=0; i<nsrc; i++) { QDP_destroy_V(tv[i]); } //if(QDP_this_node==0) { printf("here4\n"); fflush(stdout); } for(i=4; i<8; i++) { QDP_destroy_M(fblink[i]); } dtime += QOP_time(); info->final_sec = dtime; info->final_flop = nflop*QDP_sites_on_node; info->status = QOP_SUCCESS; ASQTAD_FORCE_END; #undef NC }
void QOPPC(symanzik_1loop_gauge_force1) (QOP_info_t *info, QOP_GaugeField *gauge, QOP_Force *force, QOP_gauge_coeffs_t *coeffs, REAL eps) { REAL Plaq, Rect, Pgm ; QDP_ColorMatrix *tempmom_qdp[4]; QDP_ColorMatrix *Amu[6]; // products of 2 links Unu(x)*Umu(x+nu) QDP_ColorMatrix *tmpmat; QDP_ColorMatrix *tmpmat1; QDP_ColorMatrix *tmpmat2; QDP_ColorMatrix *staples; QDP_ColorMatrix *tmpmat3; QDP_ColorMatrix *tmpmat4; int i, k; int mu, nu, sig; double dtime; //REAL eb3 = -eps*beta/3.0; REAL eb3 = -eps/3.0; int j[3][2] = {{1,2}, {0,2}, {0,1}}; // QOP_printf0("beta: %e, eb3: %e\n", beta, eb3); dtime = -QOP_time(); for(mu=0; mu<4; mu++) { tempmom_qdp[mu] = QDP_create_M(); QDP_M_eq_zero(tempmom_qdp[mu], QDP_all); } tmpmat = QDP_create_M(); for(i=0; i<QOP_common.ndim; i++) { fblink[i] = gauge->links[i]; fblink[OPP_DIR(i)] = QDP_create_M(); QDP_M_eq_sM(tmpmat, fblink[i], QDP_neighbor[i], QDP_backward, QDP_all); QDP_M_eq_Ma(fblink[OPP_DIR(i)], tmpmat, QDP_all); } for(i=0; i<6; i++) { Amu[i] = QDP_create_M(); } staples = QDP_create_M(); tmpmat1 = QDP_create_M(); tmpmat2 = QDP_create_M(); tmpmat3 = QDP_create_M(); tmpmat4 = QDP_create_M(); Plaq = coeffs->plaquette; Rect = coeffs->rectangle; Pgm = coeffs->parallelogram; //Construct 3-staples and rectangles for(mu=0; mu<4; mu++) { i=0; for(nu=0; nu<4; nu++) { if(nu!=mu){ // tmpmat1 = Umu(x+nu) QDP_M_eq_sM(tmpmat1, fblink[mu], QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_eq_M_times_M(Amu[i], fblink[nu], tmpmat1, QDP_all); //tmpmat2 = Umu(x-nu) QDP_M_eq_sM(tmpmat2, fblink[mu], QDP_neighbor[nu], QDP_backward, QDP_all); QDP_M_eq_M_times_M(Amu[i+3], fblink[OPP_DIR(nu)], tmpmat2, QDP_all); //tmpmat = U_{nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_Ma(staples, Amu[i], tmpmat, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Plaq, staples, QDP_all); //tmpmat = U_{-nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_Ma_times_M(tmpmat3, fblink[OPP_DIR(nu)], staples, QDP_all); QDP_M_eq_M_times_M(tmpmat4, tmpmat3, tmpmat, QDP_all); QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Rect, tmpmat, QDP_all); QDP_M_eq_Ma_times_M(tmpmat4, tmpmat2, tmpmat3, QDP_all); QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[mu], QDP_backward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat3, QDP_all); //tmpmat = U_{-nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_Ma(tmpmat3, tmpmat2, tmpmat, QDP_all); QDP_M_eq_M_times_Ma(tmpmat, tmpmat3, staples, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat3, QDP_all); //tmpmat = U_{-nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_Ma(staples, Amu[i+3], tmpmat, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Plaq, staples, QDP_all); QDP_M_eq_Ma_times_M(tmpmat3, fblink[nu], staples, QDP_all); QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_M(tmpmat4, tmpmat3, tmpmat, QDP_all); QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_backward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Rect, tmpmat, QDP_all); QDP_M_eq_Ma_times_M(tmpmat, tmpmat3, tmpmat1, QDP_all); QDP_M_eq_sM(tmpmat4, tmpmat, QDP_neighbor[mu], QDP_backward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat4, QDP_all); QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_M(tmpmat3, staples, tmpmat, QDP_all); QDP_M_eq_M_times_Ma(tmpmat4, tmpmat3, tmpmat1, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat4, QDP_all); i++; } } // Construct the pgm staples and add them to force QDP_M_eq_zero(staples, QDP_all); i=0; for(nu=0; nu<4; nu++){ if(nu!=mu){ k=0; for(sig=0; sig<4;sig ++){ if(sig!=mu && nu!=sig){ // the nu_sig_mu ... staple and 3 reflections //tmpmat = Amu["sig"](x+nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]], QDP_neighbor[nu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["sig"](x+nu) QDP_M_eq_M_times_M(tmpmat1, fblink[nu], tmpmat, QDP_all); //tmpmat3 = Unu(x+mu+sig) QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_forward, QDP_all); // HERE? //tmpmat2 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = Usig(x+mu) QDP_M_eq_sM(tmpmat, fblink[sig], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); //tmpmat = Amu["sig"](x-nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]], QDP_neighbor[nu], QDP_backward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["sig"](x-nu) QDP_M_eq_M_times_M(tmpmat1, fblink[OPP_DIR(nu)], tmpmat, QDP_all); //tmpmat3 = U_{-nu}(x+mu+sig) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_forward, QDP_all); // HERE? //tmpmat2 = U_{-nu}nu(x)*Amu["sig"](x-nu)*adj(Unu(x+mu+sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = Usig(x+mu) QDP_M_eq_sM(tmpmat, fblink[sig], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["sig"](x-nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); //tmpmat = Amu["-sig"](x-nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]+3], QDP_neighbor[nu], QDP_backward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["-sig"](x-nu) QDP_M_eq_M_times_M(tmpmat1, fblink[OPP_DIR(nu)], tmpmat, QDP_all); //tmpmat = U_{-nu}(x+mu-sig) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_backward, QDP_all); // HERE? //tmpmat2 = U_{-nu}nu(x)*Amu["-sig"](x-nu)*adj(Unu(x+mu-sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = U_{-sig}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(sig)], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["-sig"](x-nu)*adj(Unu(x+mu-sig))*adj(U_{-sig}(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); //tmpmat = Amu["-sig"](x+nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]+3], QDP_neighbor[nu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["-sig"](x+nu) QDP_M_eq_M_times_M(tmpmat1, fblink[nu], tmpmat, QDP_all); //tmpmat3 = Unu(x+mu-sig) QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_backward, QDP_all); // HERE? //tmpmat2 = Unu(x)*Amu["-sig"](x+nu)*adj(Unu(x+mu-sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = U_{-sig}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(sig)], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); k++; }//close if sig!=nu ... }//close sig loop i++; }// close if nu!=mu }//close the pgm nu loop QDP_M_peq_r_times_M(tempmom_qdp[mu], &Pgm, staples, QDP_all); }// closes the mu loop #ifdef CHKSUM QLA_ColorMatrix qcm; QLA_Complex det, chk; QLA_c_eq_r(chk, 0); #endif for(mu=0; mu<4; mu++){ QDP_M_eq_M_times_Ma(tmpmat, fblink[mu], tempmom_qdp[mu], QDP_all); // HERE? QDP_M_eq_r_times_M_plus_M( tempmom_qdp[mu], &eb3, tmpmat, force->force[mu], QDP_all);// HERE? QDP_M_eq_antiherm_M(force->force[mu], tempmom_qdp[mu], QDP_all);// HERE #ifdef CHKSUM QDP_m_eq_sum_M(&qcm, force->force[mu], QDP_all); QLA_C_eq_det_M(&det, &qcm); QLA_c_peq_c(chk, det); #endif } #ifdef CHKSUM QOP_printf0("chksum: %g %g\n", QLA_real(chk), QLA_imag(chk)); #endif //DESTROY various fields QDP_destroy_M(tmpmat); QDP_destroy_M(tmpmat1); QDP_destroy_M(tmpmat2); QDP_destroy_M(tmpmat3); QDP_destroy_M(staples); QDP_destroy_M(tmpmat4); for(mu=0; mu<4; mu++){ QDP_destroy_M(tempmom_qdp[mu]); } for(i=0; i<6; i++) { QDP_destroy_M(Amu[i]); } for(i=4; i<8; i++) { QDP_destroy_M(fblink[i]); } dtime += QOP_time(); double nflop = 96720; info->final_sec = dtime; info->final_flop = nflop*QDP_sites_on_node; info->status = QOP_SUCCESS; //QOP_printf0("Time in slow g_force: %e\n", info->final_sec); }
int congrad_w(int niter, Real rsqmin, Real *final_rsq_ptr) { int i; int iteration; /* counter for iterations */ double source_norm; double rsqstop; QLA_Real a, b; double rsq,oldrsq,pkp; /* Sugar's a,b,resid**2,previous resid*2 */ /* pkp = cg_p.K.cg_p */ QLA_Real mkappa; QLA_Real sum; #ifdef CGTIME double dtime; #endif #ifdef LU mkappa = -kappa*kappa; #else mkappa = -kappa; #endif setup_cg(); for(i=0; i<4; i++) { set_M_from_site(gaugelink[i], F_OFFSET(link[i]),EVENANDODD); } set_D_from_site(psi, F_OFFSET(psi),EVENANDODD); set_D_from_site(chi, F_OFFSET(chi),EVENANDODD); #ifdef PRESHIFT_LINKS { QDP_ColorMatrix *tcm; tcm = QDP_create_M(); for(i=0; i<4; i++) { QDP_M_eq_sM(tcm, gaugelink[i], QDP_neighbor[i], QDP_backward, QDP_all); QDP_M_eq_Ma(gaugelink[i+4], tcm, QDP_all); } QDP_destroy_M(tcm); } #endif #ifdef CGTIME dtime = -dclock(); #endif iteration=0; start: /* mp <- M_adjoint*M*psi r,p <- chi - mp rsq = |r|^2 source_norm = |chi|^2 */ rsq = source_norm = 0.0; #ifdef LU QDP_D_eq_D(cgp, psi, QDP_even); dslash_special_qdp(tt1, cgp, 1, QDP_odd, temp1); dslash_special_qdp(ttt, tt1, 1, QDP_even, temp2); QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_even); dslash_special_qdp(tt2, ttt, -1, QDP_odd, temp3); dslash_special_qdp(mp, tt2, -1, QDP_even, temp4); QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_even); QDP_D_eq_D_minus_D(cgr, chi, mp, QDP_even); QDP_D_eq_D(cgp, cgr, QDP_even); QDP_r_eq_norm2_D(&sum, chi, QDP_even); source_norm = sum; QDP_r_eq_norm2_D(&sum, cgr, QDP_even); rsq = sum; #else QDP_D_eq_D(cgp, psi, QDP_even); dslash_special_qdp(ttt, cgp, 1, QDP_all, temp1); QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_all); dslash_special_qdp(mp, ttt, -1, QDP_all, temp1); QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_all); QDP_D_eq_D_minus_D(cgr, chi, mp, QDP_all); QDP_D_eq_D(cgp, cgr, QDP_all); QDP_r_eq_norm2_D(&sum, chi, QDP_all); source_norm = sum; QDP_r_eq_norm2_D(&sum, cgr, QDP_all); rsq = sum; #endif iteration++ ; /* iteration counts number of multiplications by M_adjoint*M */ total_iters++; /**if(this_node==0)printf("congrad2: source_norm = %e\n",source_norm); if(this_node==0)printf("congrad2: iter %d, rsq %e, pkp %e, a %e\n", iteration,(double)rsq,(double)pkp,(double)a );**/ rsqstop = rsqmin * source_norm; if( rsq <= rsqstop ){ *final_rsq_ptr= (Real)rsq; return (iteration); } /* main loop - do until convergence or time to restart */ /* oldrsq <- rsq mp <- M_adjoint*M*p pkp <- p.M_adjoint*M.p a <- rsq/pkp psi <- psi + a*p r <- r - a*mp rsq <- |r|^2 b <- rsq/oldrsq p <- r + b*p */ do { oldrsq = rsq; #ifdef LU dslash_special_qdp(tt1, cgp, 1, QDP_odd, temp1); dslash_special_qdp(ttt, tt1, 1, QDP_even, temp2); QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_even); dslash_special_qdp(tt2, ttt, -1, QDP_odd, temp3); dslash_special_qdp(mp, tt2, -1, QDP_even, temp4); QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_even); QDP_r_eq_re_D_dot_D(&sum, cgp, mp, QDP_even); pkp = sum; #else dslash_special_qdp(ttt, cgp, 1, QDP_all, temp1); QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_all); dslash_special_qdp(mp, ttt, -1, QDP_all, temp1); QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_all); QDP_r_eq_re_D_dot_D(&sum, cgp, mp, QDP_all); pkp = sum; #endif iteration++; total_iters++; a = rsq / pkp; QDP_D_peq_r_times_D(psi, &a, cgp, MYSUBSET); QDP_D_meq_r_times_D(cgr, &a, mp, MYSUBSET); QDP_r_eq_norm2_D(&sum, cgr, MYSUBSET); rsq = sum; /**if(this_node==0)printf("congrad2: iter %d, rsq %e, pkp %e, a %e\n", iteration,(double)rsq,(double)pkp,(double)a );**/ if( rsq <= rsqstop ){ *final_rsq_ptr= (Real)rsq; #ifdef CGTIME dtime += dclock(); if(this_node==0) printf("CONGRAD2: time = %.2e size_r= %.2e iters= %d MF = %.1f\n", dtime,rsq,iteration, (double)6480*iteration*even_sites_on_node/(dtime*1e6)); //(double)5616*iteration*even_sites_on_node/(dtime*1e6)); #endif set_site_from_D(F_OFFSET(psi), psi,EVENANDODD); return (iteration); } b = rsq / oldrsq; QDP_D_eq_r_times_D_plus_D(cgp, &b, cgp, cgr, MYSUBSET); } while( iteration%niter != 0); set_site_from_D(F_OFFSET(psi), psi,EVENANDODD); if( iteration < 3*niter ) goto start; *final_rsq_ptr= (Real)rsq; return(iteration); }
/* Smearing level i*/ static void QOP_hisq_force_multi_smearing_fnmat(QOP_info_t *info, QDP_ColorMatrix * gf[4], REAL *residues, QDP_ColorVector *x[], int nterms, QDP_ColorMatrix *force_accum[4], QDP_ColorMatrix *force_accum_old[4], QDP_ColorMatrix *force_accum_naik_old[4], int internal_num_q_paths, Q_path *internal_q_paths_sorted, int *internal_netbackdir_table) { int i,j,k,lastdir=-99,ipath,ilink; int length,dir,odir; REAL coeff; QDP_ColorMatrix *tmat; QDP_ColorMatrix *oprod_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mats_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mat_tmp0,*mat_tmp1, *stmp[8];; QDP_ColorVector *vec_tmp[2]; int netbackdir; size_t nflops = 0; // table of net path displacements (backwards from usual convention) Q_path *this_path; // pointer to current path /* Allocate fields */ for(i=0;i<=MAX_PATH_LENGTH;i++){ oprod_along_path[i] = QDP_create_M(); } for(i=1;i<=MAX_PATH_LENGTH;i++){ // 0 element is never used (it's unit matrix) mats_along_path[i] = QDP_create_M(); } mat_tmp0 = QDP_create_M(); mat_tmp1 = QDP_create_M(); for(i=0; i<8; i++) stmp[i] = QDP_create_M(); tmat = QDP_create_M(); vec_tmp[0] = QDP_create_V(); vec_tmp[1] = QDP_create_V(); // clear force accumulators for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum[dir], QDP_all); // loop over paths, and loop over links in path for( ipath=0; ipath<internal_num_q_paths; ipath++ ){ this_path = &(internal_q_paths_sorted[ipath]); if(this_path->forwback== -1)continue; // skip backwards dslash length = this_path->length; netbackdir = internal_netbackdir_table[ipath]; // move f(i-1) force from current site in positive direction, // this corresponds to outer product |X><Y| calculated at the endpoint of the path if( netbackdir<8) { // Not a Naik path link_gather_connection_qdp(oprod_along_path[0] , force_accum_old[OPP_DIR(netbackdir)], tmat, netbackdir ); } else { // Naik path if( NULL==force_accum_naik_old ) { QOP_printf0( "hisq_force_multi_smearing_fnmat: mismatch:\n" ); QOP_printf0( "force_accum_naik_old is NULL, but path table contains Naik paths(!)\n" ); exit(0); } // CONVERSION FROM 3-LINK DIRECTION TO 1-LINK DIRECTION link_gather_connection_qdp(oprod_along_path[0] , force_accum_naik_old[OPP_DIR(netbackdir-8)], tmat, netbackdir ); } // figure out how much of the outer products along the path must be // recomputed. j is last one needing recomputation. k is first one. j=length-1; // default is recompute all if( GOES_BACKWARDS(this_path->dir[0]) ) k=1; else k=0; for(ilink=j;ilink>=k;ilink--){ link_transport_connection_qdp( oprod_along_path[length-ilink], oprod_along_path[length-ilink-1], gf, mat_tmp0, stmp, this_path->dir[ilink] ); nflops += 198; } // maintain an array of transports "to this point" along the path. // Don't recompute beginning parts of path if same as last path ilink=0; // first link where new transport is needed // Sometimes we don't need the matrix for the last link if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length; for( ; ilink<k; ilink++ ){ if( ilink==0 ){ dir = this_path->dir[0]; if( GOES_FORWARDS(dir) ){ QDP_M_eq_sM(tmat, gf[dir], QDP_neighbor[dir], QDP_backward, QDP_all); QDP_M_eq_Ma(mats_along_path[1], tmat, QDP_all); QDP_discard_M(tmat); } else{ QDP_M_eq_M(mats_along_path[1], gf[OPP_DIR(dir)], QDP_all); } } else { // ilink != 0 dir = OPP_DIR(this_path->dir[ilink]); link_transport_connection_qdp( mats_along_path[ilink+1], mats_along_path[ilink], gf, mat_tmp0, stmp, dir ); nflops += 198; } } // end loop over links // A path has (length+1) points, counting the ends. At first // point, no "down" direction links have their momenta "at this // point". At last, no "up" ... if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length; for( ilink=0; ilink<=k; ilink++ ){ if(ilink<length)dir = this_path->dir[ilink]; else dir=NODIR; coeff = this_path->coeff; if( (ilink%2)==1 )coeff = -coeff; // add in contribution to the force if( ilink<length && GOES_FORWARDS(dir) ){ link_gather_connection_qdp(mat_tmp1, oprod_along_path[length-ilink-1], tmat, dir ); if(ilink==0) { QDP_M_eq_M(mat_tmp0,mat_tmp1,QDP_all); } else { QDP_M_eq_M_times_Ma(mat_tmp0, mats_along_path[ilink], mat_tmp1, QDP_all); nflops += 198; QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all); } QDP_M_peq_r_times_M(force_accum[dir],&coeff,mat_tmp1,QDP_all); nflops += 36; } if( ilink>0 && GOES_BACKWARDS(lastdir) ){ odir = OPP_DIR(lastdir); if( ilink==1 ){ QDP_M_eq_M(mat_tmp0,oprod_along_path[length-ilink],QDP_all); QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all); } else{ link_gather_connection_qdp(mat_tmp1, mats_along_path[ilink-1], tmat, odir ); QDP_M_eq_M_times_Ma(mat_tmp0, oprod_along_path[length-ilink], mat_tmp1, QDP_all); nflops += 198; QDP_M_eq_Ma(mat_tmp1, mat_tmp0, QDP_all); } QDP_M_peq_r_times_M(force_accum[odir],&coeff,mat_tmp1,QDP_all); nflops += 36; } lastdir = dir; } // end loop over links in path // } // end loop over paths // QDP_destroy_V( vec_tmp[0] ); QDP_destroy_V( vec_tmp[1] ); QDP_destroy_M( mat_tmp0 ); QDP_destroy_M( mat_tmp1 ); QDP_destroy_M( tmat ); for(i=0; i<8; i++) QDP_destroy_M(stmp[i]); for(i=0;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( oprod_along_path[i] ); } for(i=1;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( mats_along_path[i] ); } info->final_flop = ((double)nflops)*QDP_sites_on_node; return; }//hisq_force_multi_smearing_fnmat
void QOP_hisq_deriv_multi_fnmat2_qdp(QOP_info_t *info, QOP_FermionLinksHisq *flh, QDP_ColorMatrix *deriv[], QOP_hisq_coeffs_t *hisq_coeff, REAL *residues, QDP_ColorVector *x[], int *n_orders_naik) { #define NC QDP_get_nc(deriv[0]) if(!QOP_asqtad.inited) QOP_asqtad_invert_init(); double dtime = QDP_time(); double totalflops = 0; int siteflops = 0; QOP_info_t tinfo; QDP_ColorMatrix *Ugf[4], *Vgf[4], *Wgf[4]; for(int i=0; i<4; i++) { Ugf[i] = flh->U_links[i]; Vgf[i] = flh->V_links[i]; Wgf[i] = flh->W_unitlinks[i]; } QDP_ColorMatrix *force_accum_0[4]; QDP_ColorMatrix *force_accum_0_naik[4]; QDP_ColorMatrix *force_accum_1[4]; QDP_ColorMatrix *force_accum_1u[4]; QDP_ColorMatrix *force_accum_2[4]; QDP_ColorMatrix *force_final[4]; QDP_ColorMatrix *tmat = QDP_create_M(); for(int i=0; i<4; i++) { force_accum_0[i] = QDP_create_M(); force_accum_0_naik[i] = QDP_create_M(); force_accum_1[i] = QDP_create_M(); force_accum_1u[i] = QDP_create_M(); force_accum_2[i] = QDP_create_M(); force_final[i] = QDP_create_M(); QDP_M_eq_zero(force_accum_2[i], QDP_all); } int n_naiks = hisq_coeff->n_naiks; int nterms = 0; for(int inaik = 0; inaik < n_naiks; inaik++) nterms += n_orders_naik[inaik]; // loop on different naik masses int n_naik_shift = 0; for(int inaik=0; inaik<n_naiks; inaik++) { int n_orders_naik_current; if( inaik==0 ) { n_orders_naik_current = nterms; } else { n_orders_naik_current = n_orders_naik[inaik]; } QOP_get_mid(&tinfo, force_accum_0, QDP_neighbor, 4, residues+n_naik_shift, 1, x+n_naik_shift, n_orders_naik_current); totalflops += tinfo.final_flop; QOP_get_mid(&tinfo, force_accum_0_naik, QOP_common.neighbor3, 4, residues+n_naik_shift, 1, x+n_naik_shift, n_orders_naik_current); totalflops += tinfo.final_flop; // compensate for -1 on odd sites here instead of at end for(int dir=0; dir<4; dir++) { QDP_M_eqm_M(force_accum_0[dir], force_accum_0[dir], QDP_odd); QDP_M_eqm_M(force_accum_0_naik[dir], force_accum_0_naik[dir], QDP_odd); } // smearing level 0 for(int i=0; i<4; i++) QDP_M_eq_zero(force_accum_1[i], QDP_all); if(inaik==0) { QOP_asqtad_coeffs_t acoef; acoef.one_link = hisq_coeff->asqtad_one_link; acoef.three_staple = hisq_coeff->asqtad_three_staple; acoef.five_staple = hisq_coeff->asqtad_five_staple; acoef.seven_staple = hisq_coeff->asqtad_seven_staple; acoef.lepage = hisq_coeff->asqtad_lepage; acoef.naik = hisq_coeff->asqtad_naik; QOP_asqtad_deriv(&tinfo, Wgf, force_accum_1, &acoef, force_accum_0, force_accum_0_naik); //QOP_printf0("HISQ smear0 flops = %g\n", tinfo.final_flop); totalflops += tinfo.final_flop; } else { QOP_asqtad_coeffs_t acoef; acoef.one_link = hisq_coeff->difference_one_link; acoef.three_staple = 0; acoef.five_staple = 0; acoef.seven_staple = 0; acoef.lepage = 0; acoef.naik = hisq_coeff->difference_naik; QOP_asqtad_deriv(&tinfo, Wgf, force_accum_1, &acoef, force_accum_0, force_accum_0_naik); totalflops += tinfo.final_flop; } QLA_Real coeff_mult; if( inaik==0 ) { coeff_mult = 1.0; } else { coeff_mult = hisq_coeff->eps_naik[inaik]; } for(int dir=0; dir<4; dir++) { QDP_M_peq_r_times_M(force_accum_2[dir], &coeff_mult, force_accum_1[dir], QDP_all); } siteflops += 4*36; n_naik_shift += n_orders_naik[inaik]; } // smearing level 1 QOP_asqtad_coeffs_t acoef; acoef.one_link = hisq_coeff->fat7_one_link; acoef.three_staple = hisq_coeff->fat7_three_staple; acoef.five_staple = hisq_coeff->fat7_five_staple; acoef.seven_staple = hisq_coeff->fat7_seven_staple; acoef.lepage = 0; acoef.naik = 0; if(QOP_hisq_links.use_fat7_lepage) { acoef.lepage = hisq_coeff->fat7_lepage; } QOP_hisq_unitarize_method_t umethod = hisq_coeff->umethod; if ( umethod==QOP_UNITARIZE_NONE ){ for(int dir=0; dir<4; dir++) QDP_M_eq_zero(force_accum_1[dir], QDP_all); QOP_asqtad_deriv(&tinfo, Ugf, force_accum_1, &acoef, force_accum_2, NULL); totalflops += tinfo.final_flop; } else if ( umethod==QOP_UNITARIZE_RATIONAL ) { for(int mu=0; mu<4; mu++) QDP_M_eq_Ma(force_accum_1u[mu], force_accum_2[mu], QDP_all); // reunitarization #if QOP_Colors == 3 QOP_hisq_force_multi_reunit(&tinfo, Vgf, force_accum_2, force_accum_1u); #else for(int mu=0; mu<4; mu++) { QOP_projectU_deriv_qdp(&tinfo, force_accum_2[mu], Wgf[mu], Vgf[mu], force_accum_1u[mu]); } #endif //QOP_printf0("reunit flops = %g\n", tinfo.final_flop); for(int mu=0; mu<4; mu++) QDP_M_eq_Ma(force_accum_1u[mu], force_accum_2[mu], QDP_all); totalflops += tinfo.final_flop; for(int dir=0; dir<4; dir++) QDP_M_eq_zero(force_accum_1[dir], QDP_all); QOP_asqtad_deriv(&tinfo, Ugf, force_accum_1, &acoef, force_accum_1u, NULL); //QOP_printf0("HISQ smear1 flops = %g\n", tinfo.final_flop); totalflops += tinfo.final_flop; } else { QOP_printf0("Unknown or unsupported unitarization method\n"); exit(1); } // take into account even/odd parity (it is NOT done in "smearing" routine) // eps multiplication done outside QOP // extra factor of 2 for(int dir=0; dir<4; dir++) { QLA_Real treal = 2; //QDP_M_peq_r_times_M(deriv[dir], &treal, force_accum_1[dir], QDP_even); //QDP_M_meq_r_times_M(deriv[dir], &treal, force_accum_1[dir], QDP_odd); QDP_M_peq_r_times_M(deriv[dir], &treal, force_accum_1[dir], QDP_all); } siteflops += 4*36; for(int i=0; i<4; i++) { QDP_destroy_M( force_accum_0[i] ); QDP_destroy_M( force_accum_0_naik[i] ); QDP_destroy_M( force_accum_1[i] ); QDP_destroy_M( force_accum_1u[i] ); QDP_destroy_M( force_accum_2[i] ); QDP_destroy_M( force_final[i] ); } QDP_destroy_M( tmat ); totalflops += ((double)siteflops)*QDP_sites_on_node; info->final_sec = QDP_time() - dtime; info->final_flop = totalflops; info->status = QOP_SUCCESS; #undef NC }