static void path_prod(QDP_ColorMatrix *u[], QDP_ColorMatrix *m, int path[], int len, int subl, QDP_Subset subset[], int (*neighsubl)(int subl, int dir)) { QDP_ShiftDir fb; QDP_ColorMatrix *p=NULL, *s=NULL; QDP_Lattice *lat = QDP_get_lattice_M(m); int nd = QDP_ndim_L(lat); int sn = 0; for(int i=0; i<len; i++) { int dir = abs(path[i])-1; // if the path moves in the + dir then we shift from the backward dir fb = path[i]<0 ? QDP_forward : QDP_backward; if(fb==QDP_backward) { // path is moving in + dir if(i==0) { QDP_M_eq_Ma(tm[sn], u[dir], subset[subl]); } else { QDP_M_eq_Ma_times_M(tm[sn], u[dir], p, subset[subl]); QDP_discard_M(p); } subl = neighsubl(subl, path[i]); s = sm[sn][nd+dir]; QDP_discard_M(s); QDP_M_eq_sM(s, tm[sn], QDP_neighbor_L(lat)[dir], fb, eosub(subl)); //p = t1; t1 = t2; t2 = p; sn = 1-sn; p = s; } else { if(i==0) { subl = neighsubl(subl, path[i]); QDP_M_eq_M(tm[1-sn], u[dir], subset[subl]); } else { QDP_M_eq_M(tm[sn], p, subset[subl]); QDP_discard_M(p); subl = neighsubl(subl, path[i]); s = sm[sn][dir]; QDP_discard_M(s); QDP_M_eq_sM(s, tm[sn], QDP_neighbor_L(lat)[dir], fb, eosub(subl)); QDP_M_eq_M_times_M(tm[1-sn], u[dir], s, subset[subl]); QDP_discard_M(s); } p = tm[1-sn]; } } QDP_M_eq_M(m, p, subset[subl]); QDP_discard_M(p); QDP_discard_M(s); }
/* Computes the staple : mu +-------+ nu | | | | X X Where the mu link can be any su3_matrix. The result is saved in staple. if staple==NULL then the result is not saved. It also adds the computed staple to the fatlink[mu] with weight coef. */ static void compute_gen_staple(QDP_ColorMatrix *staple, int mu, int nu, QDP_ColorMatrix *link, double dcoef, QDP_ColorMatrix *gauge[], QDP_ColorMatrix *fl[]) { QLA_Real coef = dcoef; QDP_ColorMatrix *ts0, *ts1; QDP_ColorMatrix *tmat1, *tmat2; QDP_ColorMatrix *tempmat; ts0 = QDP_create_M(); ts1 = QDP_create_M(); tmat1 = QDP_create_M(); tmat2 = QDP_create_M(); tempmat = QDP_create_M(); /* Upper staple */ QDP_M_eq_sM(ts0, link, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_eq_sM(ts1, gauge[nu], QDP_neighbor[mu], QDP_forward, QDP_all); if(staple!=NULL) { /* Save the staple */ QDP_M_eq_M_times_Ma(tmat1, ts0, ts1, QDP_all); QDP_M_eq_M_times_M(staple, gauge[nu], tmat1, QDP_all); } else { /* No need to save the staple. Add it to the fatlinks */ QDP_M_eq_M_times_Ma(tmat1, ts0, ts1, QDP_all); QDP_M_eq_M_times_M(tmat2, gauge[nu], tmat1, QDP_all); QDP_M_peq_r_times_M(fl[mu], &coef, tmat2, QDP_all); } /* lower staple */ QDP_M_eq_sM(ts0, gauge[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_Ma_times_M(tmat1, gauge[nu], link, QDP_all); QDP_M_eq_M_times_M(tempmat, tmat1, ts0, QDP_all); QDP_M_eq_sM(ts0, tempmat, QDP_neighbor[nu], QDP_backward, QDP_all); if(staple!=NULL) { /* Save the staple */ QDP_M_peq_M(staple, ts0, QDP_all); QDP_M_peq_r_times_M(fl[mu], &coef, staple, QDP_all); } else { /* No need to save the staple. Add it to the fatlinks */ QDP_M_peq_r_times_M(fl[mu], &coef, ts0, QDP_all); } QDP_destroy_M(ts0); QDP_destroy_M(ts1); QDP_destroy_M(tmat1); QDP_destroy_M(tmat2); QDP_destroy_M(tempmat); } /* compute_gen_staple */
/* special case to transport a "connection" by one link, does both parities */ static void link_transport_connection_qdp( QDP_ColorMatrix *dest, QDP_ColorMatrix *src, QDP_ColorMatrix *gf[4], QDP_ColorMatrix *work, QDP_ColorMatrix *st[8], int dir ){ if( GOES_FORWARDS(dir) ) { QDP_M_eq_M(work, src, QDP_all); QDP_M_eq_sM(st[dir], work, QDP_neighbor[dir], QDP_forward, QDP_all); QDP_M_eq_M_times_M(dest, gf[dir], st[dir], QDP_all); QDP_discard_M(st[dir]); } else { /* GOES_BACKWARDS(dir) */ QDP_M_eq_Ma_times_M(work, gf[OPP_DIR(dir)], src, QDP_all); QDP_M_eq_sM(st[dir], work, QDP_neighbor[OPP_DIR(dir)], QDP_backward,QDP_all); QDP_M_eq_M(dest, st[dir], QDP_all); QDP_discard_M(st[dir]); } } /* link_transport_connection_qdp */
static QDP_ColorMatrix * cacheshift(QDP_ColorMatrix **tmp, QDP_ColorMatrix *in, int mu, QDP_ShiftDir dir, int redo) { #define NC QDP_get_nc(in) QDP_ColorMatrix *r = *tmp; if(r==NULL) { r = *tmp = QDP_create_M(); redo = 1; } if(redo) { QDP_M_eq_sM(r, in, QDP_neighbor[mu], dir, QDP_all); } return r; #undef NC }
// like link_transport, except doesn't multiply by link matrices. // use this, for example, // when storing the intermediate HISQ force (a connection) at the lattice site // associated with a link static void link_gather_connection_qdp( QDP_ColorMatrix *dest, QDP_ColorMatrix *src, QDP_ColorMatrix *work, int dir ){ if (dir >= 8) //3 link shift needed { dir=dir-8; //do initial 2 shifts if( GOES_FORWARDS(dir) ) { QDP_M_eq_sM(dest, src, QDP_neighbor[dir], QDP_forward, QDP_all); QDP_M_eq_sM(work, dest, QDP_neighbor[dir], QDP_forward, QDP_all); } else { /* GOES_BACKWARDS(dir) */ QDP_M_eq_sM(dest, src, QDP_neighbor[OPP_DIR(dir)], QDP_backward, QDP_all); QDP_M_eq_sM(work, dest, QDP_neighbor[OPP_DIR(dir)], QDP_backward, QDP_all); } } else{ //only 1 link shift needed QDP_M_eq_M(work, src, QDP_all); } //do final shift if( GOES_FORWARDS(dir) ) { QDP_M_eq_sM(dest, work, QDP_neighbor[dir], QDP_forward, QDP_all); } else { /* GOES_BACKWARDS(dir) */ QDP_M_eq_sM(dest, work, QDP_neighbor[OPP_DIR(dir)], QDP_backward, QDP_all); } } /* link_gather_connection_qdp */
void QOP_asqtad_force_multi_asvec_qdp(QOP_info_t *info, QDP_ColorMatrix *links[], QDP_ColorMatrix *force[], QOP_asqtad_coeffs_t *coef, REAL eps[], QDP_ColorVector *xin[], int nsrc) { #define NC QDP_get_nc(xin[0]) REAL coeff[nsrc]; REAL OneLink[nsrc], Lepage[nsrc], Naik[nsrc], FiveSt[nsrc], ThreeSt[nsrc], SevenSt[nsrc]; REAL mNaik[nsrc], mLepage[nsrc], mFiveSt[nsrc], mThreeSt[nsrc], mSevenSt[nsrc]; QDP_ColorVector *P3[8][nsrc]; QDP_ColorVector *P5[8][nsrc]; QDP_ColorVector *P5tmp[8][8][nsrc]; QDP_ColorVector *P5s[4][nsrc]; QDP_ColorVector *P5tmps[4][8][nsrc]; //QDP_ColorVector *xin[nsrc]; QDP_ColorVector *xintmp[8][nsrc]; QDP_ColorVector *Pmu[nsrc]; QDP_ColorVector *Pmutmp[8][nsrc]; QDP_ColorVector *Pnumu[nsrc]; QDP_ColorVector *Pnumutmp[8][nsrc]; QDP_ColorVector *Prhonumu[nsrc]; QDP_ColorVector *Prhonumutmp[8][nsrc]; QDP_ColorVector *P7[nsrc]; QDP_ColorVector *P7tmp[8][nsrc]; QDP_ColorVector *P7rho[nsrc]; QDP_ColorVector *ttv[nsrc]; int i, dir; int mu, nu, rho, sig; double nflop1 = 253935; double nflop2 = 433968; double nflop = nflop1 + (nflop2-nflop1)*(nsrc-1); double dtime; dtime = -QOP_time(); ASQTAD_FORCE_BEGIN; QOP_trace("test 1\n"); /* setup parallel transport */ QDP_ColorMatrix *tmpmat = QDP_create_M(); for(i=0; i<QOP_common.ndim; i++) { fbshift[i] = QDP_neighbor[i]; fbshiftdir[i] = QDP_forward; fblink[i] = links[i]; fbshift[OPP_DIR(i)] = QDP_neighbor[i]; fbshiftdir[OPP_DIR(i)] = QDP_backward; fblink[OPP_DIR(i)] = QDP_create_M(); QDP_M_eq_sM(tmpmat, fblink[i], QDP_neighbor[i], QDP_backward, QDP_all); QDP_M_eq_Ma(fblink[OPP_DIR(i)], tmpmat, QDP_all); } tv = ttv; for(i=0; i<nsrc; i++) { tv[i] = QDP_create_V(); } QOP_trace("test 2\n"); /* Allocate temporary vectors */ for(i=0; i<nsrc; i++) { Pmu[i] = QDP_create_V(); Pnumu[i] = QDP_create_V(); Prhonumu[i] = QDP_create_V(); P7[i] = QDP_create_V(); P7rho[i] = QDP_create_V(); for(dir=0; dir<8; dir++) { xintmp[dir][i] = QDP_create_V(); Pmutmp[dir][i] = QDP_create_V(); Pnumutmp[dir][i] = QDP_create_V(); Prhonumutmp[dir][i] = QDP_create_V(); P7tmp[dir][i] = QDP_create_V(); } #if 1 for(mu=0; mu<4; mu++) { P5s[mu][i] = QDP_create_V(); for(dir=0; dir<8; dir++) { P5tmps[mu][dir][i] = QDP_create_V(); } } #else for(mu=0; mu<8; mu++) { P5[mu][i] = QDP_create_V(); for(dir=0; dir<8; dir++) { P5tmp[mu][dir][i] = QDP_create_V(); //printf("%p %p\n", P5tmp[mu][dir][i], &(P5tmp[mu][dir][i])); fflush(stdout); if(P5tmp[mu][dir][i]==NULL) { fprintf(stderr, "error: can't create V\n"); QDP_abort(); } } } #endif } //printf("%p\n", P5tmp[0][4][0]); fflush(stdout); for(mu=0; mu<8; mu++) { for(i=0; i<nsrc; i++) { P3[mu][i] = QDP_create_V(); //P5[mu][i] = QDP_create_V(); } } for(mu=0; mu<4; mu++) { tempmom_qdp[mu] = force[mu]; QDP_M_eqm_M(tempmom_qdp[mu], tempmom_qdp[mu], QDP_odd); } /* Path coefficients times fermion epsilon */ /* Load path coefficients from table */ for(i=0; i<nsrc; i++) { OneLink[i] = coef->one_link * eps[i]; Naik[i] = coef->naik * eps[i]; mNaik[i] = -Naik[i]; ThreeSt[i] = coef->three_staple * eps[i]; mThreeSt[i] = -ThreeSt[i]; FiveSt[i] = coef->five_staple * eps[i]; mFiveSt[i] = -FiveSt[i]; SevenSt[i] = coef->seven_staple * eps[i]; mSevenSt[i] = -SevenSt[i]; Lepage[i] = coef->lepage * eps[i]; mLepage[i] = -Lepage[i]; } #if 0 printf("nsrc = %i\n", nsrc); printf("coeffs = %g %g %g %g %g %g\n", OneLink[0], ThreeSt[0], FiveSt[0], SevenSt[0], Lepage[0], Naik[0]); #endif /* *************************************** */ QOP_trace("start force loop\n"); for(mu=0; mu<8; mu++) { //u_shift_hw_fermion(temp_x_qdp, Pmu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)]); u_shift_color_vecs(xin, Pmu, OPP_DIR(mu), nsrc, xintmp[OPP_DIR(mu)]); for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) ) { //u_shift_hw_fermion(Pmu, P3[sig], sig, temp_hw[sig]); u_shift_color_vecs(Pmu, P3[sig], sig, nsrc, Pmutmp[sig]); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu]: x--+ * * | | * * o o * * the 1 link in the path: - (numbering starts form 0) */ add_forces_to_mom(P3[sig], Pmu, sig, mThreeSt, nsrc); } } for(nu=0; nu<8; nu++) if( (nu!=mu)&&(nu!=OPP_DIR(mu)) ) { int nP5 = 0; //Pnumu = hw_qdp[OPP_DIR(nu)]; //u_shift_hw_fermion(Pmu, Pnumu, OPP_DIR(nu), temp_hw[OPP_DIR(nu)]); u_shift_color_vecs(Pmu, Pnumu, OPP_DIR(nu), nsrc, Pmutmp[OPP_DIR(nu)]); //QDP_V_veq_V(Pnumu, P3[OPP_DIR(nu)], QDP_all, nsrc); for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) && (sig!=nu)&&(sig!=OPP_DIR(nu)) ) { #if 1 for(i=0; i<nsrc; i++) { P5[sig][i] = P5s[nP5][i]; for(dir=0; dir<8; dir++) P5tmp[sig][dir][i] = P5tmps[nP5][dir][i]; } #endif nP5++; //u_shift_hw_fermion(Pnumu, P5[sig], sig, temp_hw[sig]); u_shift_color_vecs(Pnumu, P5[sig], sig, nsrc, Pnumutmp[sig]); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu+nu]: x--+ * * | | * * o o * * the 2 link in the path: + (numbering starts form 0) */ add_forces_to_mom(P5[sig], Pnumu, sig, FiveSt, nsrc); } } QOP_trace("test 4\n"); for(rho=0; rho<8; rho++) if( (rho!=mu)&&(rho!=OPP_DIR(mu)) && (rho!=nu)&&(rho!=OPP_DIR(nu)) ) { //Prhonumu = hw_qdp[OPP_DIR(rho)]; //u_shift_hw_fermion(Pnumu, Prhonumu, OPP_DIR(rho), // temp_hw[OPP_DIR(rho)] ); u_shift_color_vecs(Pnumu, Prhonumu, OPP_DIR(rho), nsrc, Pnumutmp[OPP_DIR(rho)]); //QDP_V_veq_V(Prhonumu, P5[OPP_DIR(rho)], QDP_all, nsrc); for(sig=0; sig<8; sig++) if( (sig!=mu )&&(sig!=OPP_DIR(mu )) && (sig!=nu )&&(sig!=OPP_DIR(nu )) && (sig!=rho)&&(sig!=OPP_DIR(rho)) ) { /* Length 7 paths */ //P7 = hw_qdp[sig]; //u_shift_hw_fermion(Prhonumu, P7, sig, temp_hw[sig] ); QOP_trace("test 43\n"); u_shift_color_vecs(Prhonumu, P7, sig, nsrc, Prhonumutmp[sig]); QOP_trace("test 44\n"); //QDP_V_eq_r_times_V(P7[0], &SevenSt[0], P7[0], QDP_all); //QDP_V_eq_r_times_V(P7[1], &SevenSt[1], P7[1], QDP_all); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu+nu+rho]: x--+ * * | | * * o o * * the 3 link in the path: - (numbering starts form 0) */ QOP_trace("test 45\n"); add_forces_to_mom(P7, Prhonumu, sig, mSevenSt, nsrc); QOP_trace("test 46\n"); //mom_meq_force(P7, Prhonumu, sig); } /* Add the force F_rho the 2(4) link in the path: + */ //P7rho = hw_qdp[rho]; //u_shift_hw_fermion(P7, P7rho, rho, temp_hw[rho]); QOP_trace("test 47\n"); u_shift_color_vecs(P7, P7rho, rho, nsrc, P7tmp[rho]); QOP_trace("test 48\n"); side_link_forces(rho,sig,SevenSt,Pnumu,P7,Prhonumu,P7rho, nsrc); QOP_trace("test 49\n"); //side_link_3f_force2(rho,sig,Pnumu,P7,Prhonumu,P7rho); /* Add the P7rho vector to P5 */ for(i=0; i<nsrc; i++) { if(FiveSt[i]!=0) coeff[i] = SevenSt[i]/FiveSt[i]; else coeff[i] = 0; QOP_trace("test 410\n"); QDP_V_peq_r_times_V(P5[sig][i], &coeff[i], P7rho[i], QDP_all); QOP_trace("test 411\n"); } } /* sig */ } /* rho */ QOP_trace("test 5\n"); #define P5nu P7 for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) && (sig!=nu)&&(sig!=OPP_DIR(nu)) ) { /* Length 5 paths */ /* Add the force F_nu the 1(3) link in the path: - */ //P5nu = hw_qdp[nu]; //u_shift_hw_fermion(P5[sig], P5nu, nu, temp_hw[nu]); u_shift_color_vecs(P5[sig], P5nu, nu, nsrc, P5tmp[sig][nu]); side_link_forces(nu, sig, mFiveSt, Pmu, P5[sig], Pnumu, P5nu, nsrc); /* Add the P5nu vector to P3 */ for(i=0; i<nsrc; i++) { if(ThreeSt[i]!=0) coeff[i] = FiveSt[i]/ThreeSt[i]; else coeff[i] = 0; QDP_V_peq_r_times_V(P3[sig][i], &coeff[i], P5nu[i], QDP_all); } } /* sig */ } /* nu */ #define Pmumu Pnumu #define Pmumutmp Pnumutmp #define P5sig Prhonumu #define P5sigtmp Prhonumutmp #define P3mu P7 #define Popmu P7 #define Pmumumu P7 /* Now the Lepage term... It is the same as 5-link paths with nu=mu and FiveSt=Lepage. */ //u_shift_hw_fermion(Pmu, Pmumu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)] ); u_shift_color_vecs(Pmu, Pmumu, OPP_DIR(mu), nsrc, Pmutmp[OPP_DIR(mu)]); for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) ) { //P5sig = hw_qdp[sig]; //u_shift_hw_fermion(Pmumu, P5sig, sig, temp_hw[sig]); u_shift_color_vecs(Pmumu, P5sig, sig, nsrc, Pmumutmp[sig]); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu+nu]: x--+ * * | | * * o o * * the 2 link in the path: + (numbering starts form 0) */ add_forces_to_mom(P5sig, Pmumu, sig, Lepage, nsrc); } /* Add the force F_nu the 1(3) link in the path: - */ //P5nu = hw_qdp[mu]; //u_shift_hw_fermion(P5sig, P5nu, mu, temp_hw[mu]); u_shift_color_vecs(P5sig, P5nu, mu, nsrc, P5sigtmp[mu]); side_link_forces(mu, sig, mLepage, Pmu, P5sig, Pmumu, P5nu, nsrc); /* Add the P5nu vector to P3 */ for(i=0; i<nsrc; i++) { if(ThreeSt[i]!=0) coeff[i] = Lepage[i]/ThreeSt[i]; else coeff[i] = 0; QDP_V_peq_r_times_V(P3[sig][i], &coeff[i], P5nu[i], QDP_all); } /* Length 3 paths (Not the Naik term) */ /* Add the force F_mu the 0(2) link in the path: + */ if(GOES_FORWARDS(mu)) { //P3mu = hw_qdp[mu]; /* OK to clobber P5nu */ //u_shift_hw_fermion(P3[sig], P3mu, mu, temp_hw[mu]); //u_shift_color_vecs(P3[sig], P3mu, mu, 2, temp_hw[mu]); for(i=0; i<nsrc; i++) { QDP_V_eq_V(P5sig[i], P3[sig][i], QDP_all); } u_shift_color_vecs(P5sig, P3mu, mu, nsrc, P5sigtmp[mu]); } /* The above shift is not needed if mu is backwards */ side_link_forces(mu, sig, ThreeSt, xin, P3[sig], Pmu, P3mu, nsrc); } /* Finally the OneLink and the Naik term */ if(GOES_BACKWARDS(mu)) { /* Do only the forward terms in the Dslash */ /* Because I have shifted with OPP_DIR(mu) Pmu is a forward * * shift. */ /* The one link */ add_forces_to_mom(Pmu, xin, OPP_DIR(mu), OneLink, nsrc); /* For the same reason Pmumu is the forward double link */ /* Popmu is a backward shift */ //Popmu = hw_qdp[mu]; /* OK to clobber P3mu */ //u_shift_hw_fermion(xin, Popmu, mu, temp_hw[mu]); u_shift_color_vecs(xin, Popmu, mu, nsrc, xintmp[mu]); /* The Naik */ /* link no 1: - */ add_forces_to_mom(Pmumu, Popmu, OPP_DIR(mu), mNaik, nsrc); /* Pmumumu can overwrite Popmu which is no longer needed */ //Pmumumu = hw_qdp[OPP_DIR(mu)]; //u_shift_hw_fermion(Pmumu, Pmumumu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)]); u_shift_color_vecs(Pmumu, Pmumumu, OPP_DIR(mu), nsrc, Pmumutmp[OPP_DIR(mu)]); /* link no 0: + */ add_forces_to_mom(Pmumumu, xin, OPP_DIR(mu), Naik, nsrc); } else { /* The rest of the Naik terms */ //Popmu = hw_qdp[mu]; /* OK to clobber P3mu */ //u_shift_hw_fermion(xin, Popmu, mu, temp_hw[mu]); u_shift_color_vecs(xin, Popmu, mu, nsrc, xintmp[mu]); /* link no 2: + */ /* Pmumu is double backward shift */ add_forces_to_mom(Popmu, Pmumu, mu, Naik, nsrc); } /* Here we have to do together the Naik term and the one link term */ }/* mu */ QOP_trace("test 6\n"); QOP_trace("test 7\n"); for(mu=0; mu<4; mu++) { QDP_M_eq_M(tmpmat, tempmom_qdp[mu], QDP_even); QDP_M_eqm_M(tmpmat, tempmom_qdp[mu], QDP_odd); QDP_M_eq_antiherm_M(tempmom_qdp[mu], tmpmat, QDP_all); } QDP_destroy_M(tmpmat); //printf("%p\n", P5tmp[0][4][0]); fflush(stdout); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } /* Free temporary vectors */ for(i=0; i<nsrc; i++) { QDP_destroy_V(Pmu[i]); QDP_destroy_V(Pnumu[i]); QDP_destroy_V(Prhonumu[i]); QDP_destroy_V(P7[i]); QDP_destroy_V(P7rho[i]); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } for(dir=0; dir<8; dir++) { QDP_destroy_V(xintmp[dir][i]); QDP_destroy_V(Pmutmp[dir][i]); QDP_destroy_V(Pnumutmp[dir][i]); QDP_destroy_V(Prhonumutmp[dir][i]); QDP_destroy_V(P7tmp[dir][i]); } //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } for(mu=0; mu<4; mu++) { //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } QDP_destroy_V(P5s[mu][i]); //QDP_destroy_V(P5[mu][i]); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } for(dir=0; dir<8; dir++) { //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } QDP_destroy_V(P5tmps[mu][dir][i]); //printf("%p\n", P5tmp[mu][dir][i]); fflush(stdout); //QDP_destroy_V(P5tmp[mu][dir][i]); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } } //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } } //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } } //if(QDP_this_node==0) { printf("here3\n"); fflush(stdout); } for(mu=0; mu<8; mu++) { for(i=0; i<nsrc; i++) { QDP_destroy_V(P3[mu][i]); } //QDP_destroy_V(P5[mu][0]); //QDP_destroy_V(P5[mu][1]); } for(i=0; i<nsrc; i++) { QDP_destroy_V(tv[i]); } //if(QDP_this_node==0) { printf("here4\n"); fflush(stdout); } for(i=4; i<8; i++) { QDP_destroy_M(fblink[i]); } dtime += QOP_time(); info->final_sec = dtime; info->final_flop = nflop*QDP_sites_on_node; info->status = QOP_SUCCESS; ASQTAD_FORCE_END; #undef NC }
void QOPPC(symanzik_1loop_gauge_force1) (QOP_info_t *info, QOP_GaugeField *gauge, QOP_Force *force, QOP_gauge_coeffs_t *coeffs, REAL eps) { REAL Plaq, Rect, Pgm ; QDP_ColorMatrix *tempmom_qdp[4]; QDP_ColorMatrix *Amu[6]; // products of 2 links Unu(x)*Umu(x+nu) QDP_ColorMatrix *tmpmat; QDP_ColorMatrix *tmpmat1; QDP_ColorMatrix *tmpmat2; QDP_ColorMatrix *staples; QDP_ColorMatrix *tmpmat3; QDP_ColorMatrix *tmpmat4; int i, k; int mu, nu, sig; double dtime; //REAL eb3 = -eps*beta/3.0; REAL eb3 = -eps/3.0; int j[3][2] = {{1,2}, {0,2}, {0,1}}; // QOP_printf0("beta: %e, eb3: %e\n", beta, eb3); dtime = -QOP_time(); for(mu=0; mu<4; mu++) { tempmom_qdp[mu] = QDP_create_M(); QDP_M_eq_zero(tempmom_qdp[mu], QDP_all); } tmpmat = QDP_create_M(); for(i=0; i<QOP_common.ndim; i++) { fblink[i] = gauge->links[i]; fblink[OPP_DIR(i)] = QDP_create_M(); QDP_M_eq_sM(tmpmat, fblink[i], QDP_neighbor[i], QDP_backward, QDP_all); QDP_M_eq_Ma(fblink[OPP_DIR(i)], tmpmat, QDP_all); } for(i=0; i<6; i++) { Amu[i] = QDP_create_M(); } staples = QDP_create_M(); tmpmat1 = QDP_create_M(); tmpmat2 = QDP_create_M(); tmpmat3 = QDP_create_M(); tmpmat4 = QDP_create_M(); Plaq = coeffs->plaquette; Rect = coeffs->rectangle; Pgm = coeffs->parallelogram; //Construct 3-staples and rectangles for(mu=0; mu<4; mu++) { i=0; for(nu=0; nu<4; nu++) { if(nu!=mu){ // tmpmat1 = Umu(x+nu) QDP_M_eq_sM(tmpmat1, fblink[mu], QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_eq_M_times_M(Amu[i], fblink[nu], tmpmat1, QDP_all); //tmpmat2 = Umu(x-nu) QDP_M_eq_sM(tmpmat2, fblink[mu], QDP_neighbor[nu], QDP_backward, QDP_all); QDP_M_eq_M_times_M(Amu[i+3], fblink[OPP_DIR(nu)], tmpmat2, QDP_all); //tmpmat = U_{nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_Ma(staples, Amu[i], tmpmat, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Plaq, staples, QDP_all); //tmpmat = U_{-nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_Ma_times_M(tmpmat3, fblink[OPP_DIR(nu)], staples, QDP_all); QDP_M_eq_M_times_M(tmpmat4, tmpmat3, tmpmat, QDP_all); QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Rect, tmpmat, QDP_all); QDP_M_eq_Ma_times_M(tmpmat4, tmpmat2, tmpmat3, QDP_all); QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[mu], QDP_backward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat3, QDP_all); //tmpmat = U_{-nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_Ma(tmpmat3, tmpmat2, tmpmat, QDP_all); QDP_M_eq_M_times_Ma(tmpmat, tmpmat3, staples, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat3, QDP_all); //tmpmat = U_{-nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_Ma(staples, Amu[i+3], tmpmat, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Plaq, staples, QDP_all); QDP_M_eq_Ma_times_M(tmpmat3, fblink[nu], staples, QDP_all); QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_M(tmpmat4, tmpmat3, tmpmat, QDP_all); QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_backward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Rect, tmpmat, QDP_all); QDP_M_eq_Ma_times_M(tmpmat, tmpmat3, tmpmat1, QDP_all); QDP_M_eq_sM(tmpmat4, tmpmat, QDP_neighbor[mu], QDP_backward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat4, QDP_all); QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_M(tmpmat3, staples, tmpmat, QDP_all); QDP_M_eq_M_times_Ma(tmpmat4, tmpmat3, tmpmat1, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat4, QDP_all); i++; } } // Construct the pgm staples and add them to force QDP_M_eq_zero(staples, QDP_all); i=0; for(nu=0; nu<4; nu++){ if(nu!=mu){ k=0; for(sig=0; sig<4;sig ++){ if(sig!=mu && nu!=sig){ // the nu_sig_mu ... staple and 3 reflections //tmpmat = Amu["sig"](x+nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]], QDP_neighbor[nu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["sig"](x+nu) QDP_M_eq_M_times_M(tmpmat1, fblink[nu], tmpmat, QDP_all); //tmpmat3 = Unu(x+mu+sig) QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_forward, QDP_all); // HERE? //tmpmat2 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = Usig(x+mu) QDP_M_eq_sM(tmpmat, fblink[sig], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); //tmpmat = Amu["sig"](x-nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]], QDP_neighbor[nu], QDP_backward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["sig"](x-nu) QDP_M_eq_M_times_M(tmpmat1, fblink[OPP_DIR(nu)], tmpmat, QDP_all); //tmpmat3 = U_{-nu}(x+mu+sig) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_forward, QDP_all); // HERE? //tmpmat2 = U_{-nu}nu(x)*Amu["sig"](x-nu)*adj(Unu(x+mu+sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = Usig(x+mu) QDP_M_eq_sM(tmpmat, fblink[sig], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["sig"](x-nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); //tmpmat = Amu["-sig"](x-nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]+3], QDP_neighbor[nu], QDP_backward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["-sig"](x-nu) QDP_M_eq_M_times_M(tmpmat1, fblink[OPP_DIR(nu)], tmpmat, QDP_all); //tmpmat = U_{-nu}(x+mu-sig) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_backward, QDP_all); // HERE? //tmpmat2 = U_{-nu}nu(x)*Amu["-sig"](x-nu)*adj(Unu(x+mu-sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = U_{-sig}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(sig)], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["-sig"](x-nu)*adj(Unu(x+mu-sig))*adj(U_{-sig}(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); //tmpmat = Amu["-sig"](x+nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]+3], QDP_neighbor[nu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["-sig"](x+nu) QDP_M_eq_M_times_M(tmpmat1, fblink[nu], tmpmat, QDP_all); //tmpmat3 = Unu(x+mu-sig) QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_backward, QDP_all); // HERE? //tmpmat2 = Unu(x)*Amu["-sig"](x+nu)*adj(Unu(x+mu-sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = U_{-sig}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(sig)], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); k++; }//close if sig!=nu ... }//close sig loop i++; }// close if nu!=mu }//close the pgm nu loop QDP_M_peq_r_times_M(tempmom_qdp[mu], &Pgm, staples, QDP_all); }// closes the mu loop #ifdef CHKSUM QLA_ColorMatrix qcm; QLA_Complex det, chk; QLA_c_eq_r(chk, 0); #endif for(mu=0; mu<4; mu++){ QDP_M_eq_M_times_Ma(tmpmat, fblink[mu], tempmom_qdp[mu], QDP_all); // HERE? QDP_M_eq_r_times_M_plus_M( tempmom_qdp[mu], &eb3, tmpmat, force->force[mu], QDP_all);// HERE? QDP_M_eq_antiherm_M(force->force[mu], tempmom_qdp[mu], QDP_all);// HERE #ifdef CHKSUM QDP_m_eq_sum_M(&qcm, force->force[mu], QDP_all); QLA_C_eq_det_M(&det, &qcm); QLA_c_peq_c(chk, det); #endif } #ifdef CHKSUM QOP_printf0("chksum: %g %g\n", QLA_real(chk), QLA_imag(chk)); #endif //DESTROY various fields QDP_destroy_M(tmpmat); QDP_destroy_M(tmpmat1); QDP_destroy_M(tmpmat2); QDP_destroy_M(tmpmat3); QDP_destroy_M(staples); QDP_destroy_M(tmpmat4); for(mu=0; mu<4; mu++){ QDP_destroy_M(tempmom_qdp[mu]); } for(i=0; i<6; i++) { QDP_destroy_M(Amu[i]); } for(i=4; i<8; i++) { QDP_destroy_M(fblink[i]); } dtime += QOP_time(); double nflop = 96720; info->final_sec = dtime; info->final_flop = nflop*QDP_sites_on_node; info->status = QOP_SUCCESS; //QOP_printf0("Time in slow g_force: %e\n", info->final_sec); }
static void create_fn_links_qdp(QDP_ColorMatrix *fl[], QDP_ColorMatrix *ll[], QDP_ColorMatrix *gf[], asqtad_path_coeff *coeffs) { int i, dir; QDP_ColorMatrix *staple, *tempmat1; int nu,rho,sig ; QLA_Real one_link; #ifdef LLTIME double nflopfl = 61632; double nflopll = 1804; #endif double dtimefl,dtimell; for(i=0; i<4; i++) { fl[i] = QDP_create_M(); ll[i] = QDP_create_M(); } staple = QDP_create_M(); tempmat1 = QDP_create_M(); dtimefl = -dclock(); /* to fix up the Lepage term, included by a trick below */ one_link = coeffs->one_link - 6.0*coeffs->lepage; for(dir=0; dir<4; dir++) { QDP_M_eq_r_times_M(fl[dir], &one_link, gf[dir], QDP_all); for(nu=0; nu<4; nu++) if(nu!=dir) { compute_gen_staple(staple, dir, nu, gf[dir], (double)coeffs->three_staple, gf, fl); compute_gen_staple(NULL, dir, nu, staple, coeffs->lepage, gf, fl); for(rho=0; rho<4; rho++) if((rho!=dir)&&(rho!=nu)) { compute_gen_staple(tempmat1, dir, rho, staple, (double)coeffs->five_staple, gf, fl); for(sig=0; sig<4; sig++) { if((sig!=dir)&&(sig!=nu)&&(sig!=rho)) { compute_gen_staple(NULL, dir, sig, tempmat1, (double)coeffs->seven_staple, gf, fl); } } /* sig */ } /* rho */ } /* nu */ } /* dir */ dtimell = -dclock(); dtimefl -= dtimell; #ifdef LLTIME node0_printf("LLTIME(Fat): time = %e (Asqtad opt) mflops = %e\n",dtimefl, (Real)nflopfl*volume/(1e6*dtimefl*numnodes()) ); #endif /* long links */ for(dir=0; dir<4; dir++) { QLA_Real naik = coeffs->naik; QDP_M_eq_sM(staple, gf[dir], QDP_neighbor[dir], QDP_forward, QDP_all); QDP_M_eq_M_times_M(tempmat1, gf[dir], staple, QDP_all); QDP_discard_M(staple); QDP_M_eq_sM(staple, tempmat1, QDP_neighbor[dir], QDP_forward, QDP_all); QDP_M_eq_M_times_M(ll[dir], gf[dir], staple, QDP_all); QDP_M_eq_r_times_M(ll[dir], &naik, ll[dir], QDP_all); } dtimell += dclock(); #ifdef LLTIME node0_printf("LLTIME(long): time = %e (Asqtad opt) mflops = %e\n",dtimell, (Real)nflopll*volume/(1e6*dtimell*numnodes()) ); #endif QDP_destroy_M(staple); QDP_destroy_M(tempmat1); }
int congrad_w(int niter, Real rsqmin, Real *final_rsq_ptr) { int i; int iteration; /* counter for iterations */ double source_norm; double rsqstop; QLA_Real a, b; double rsq,oldrsq,pkp; /* Sugar's a,b,resid**2,previous resid*2 */ /* pkp = cg_p.K.cg_p */ QLA_Real mkappa; QLA_Real sum; #ifdef CGTIME double dtime; #endif #ifdef LU mkappa = -kappa*kappa; #else mkappa = -kappa; #endif setup_cg(); for(i=0; i<4; i++) { set_M_from_site(gaugelink[i], F_OFFSET(link[i]),EVENANDODD); } set_D_from_site(psi, F_OFFSET(psi),EVENANDODD); set_D_from_site(chi, F_OFFSET(chi),EVENANDODD); #ifdef PRESHIFT_LINKS { QDP_ColorMatrix *tcm; tcm = QDP_create_M(); for(i=0; i<4; i++) { QDP_M_eq_sM(tcm, gaugelink[i], QDP_neighbor[i], QDP_backward, QDP_all); QDP_M_eq_Ma(gaugelink[i+4], tcm, QDP_all); } QDP_destroy_M(tcm); } #endif #ifdef CGTIME dtime = -dclock(); #endif iteration=0; start: /* mp <- M_adjoint*M*psi r,p <- chi - mp rsq = |r|^2 source_norm = |chi|^2 */ rsq = source_norm = 0.0; #ifdef LU QDP_D_eq_D(cgp, psi, QDP_even); dslash_special_qdp(tt1, cgp, 1, QDP_odd, temp1); dslash_special_qdp(ttt, tt1, 1, QDP_even, temp2); QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_even); dslash_special_qdp(tt2, ttt, -1, QDP_odd, temp3); dslash_special_qdp(mp, tt2, -1, QDP_even, temp4); QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_even); QDP_D_eq_D_minus_D(cgr, chi, mp, QDP_even); QDP_D_eq_D(cgp, cgr, QDP_even); QDP_r_eq_norm2_D(&sum, chi, QDP_even); source_norm = sum; QDP_r_eq_norm2_D(&sum, cgr, QDP_even); rsq = sum; #else QDP_D_eq_D(cgp, psi, QDP_even); dslash_special_qdp(ttt, cgp, 1, QDP_all, temp1); QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_all); dslash_special_qdp(mp, ttt, -1, QDP_all, temp1); QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_all); QDP_D_eq_D_minus_D(cgr, chi, mp, QDP_all); QDP_D_eq_D(cgp, cgr, QDP_all); QDP_r_eq_norm2_D(&sum, chi, QDP_all); source_norm = sum; QDP_r_eq_norm2_D(&sum, cgr, QDP_all); rsq = sum; #endif iteration++ ; /* iteration counts number of multiplications by M_adjoint*M */ total_iters++; /**if(this_node==0)printf("congrad2: source_norm = %e\n",source_norm); if(this_node==0)printf("congrad2: iter %d, rsq %e, pkp %e, a %e\n", iteration,(double)rsq,(double)pkp,(double)a );**/ rsqstop = rsqmin * source_norm; if( rsq <= rsqstop ){ *final_rsq_ptr= (Real)rsq; return (iteration); } /* main loop - do until convergence or time to restart */ /* oldrsq <- rsq mp <- M_adjoint*M*p pkp <- p.M_adjoint*M.p a <- rsq/pkp psi <- psi + a*p r <- r - a*mp rsq <- |r|^2 b <- rsq/oldrsq p <- r + b*p */ do { oldrsq = rsq; #ifdef LU dslash_special_qdp(tt1, cgp, 1, QDP_odd, temp1); dslash_special_qdp(ttt, tt1, 1, QDP_even, temp2); QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_even); dslash_special_qdp(tt2, ttt, -1, QDP_odd, temp3); dslash_special_qdp(mp, tt2, -1, QDP_even, temp4); QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_even); QDP_r_eq_re_D_dot_D(&sum, cgp, mp, QDP_even); pkp = sum; #else dslash_special_qdp(ttt, cgp, 1, QDP_all, temp1); QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_all); dslash_special_qdp(mp, ttt, -1, QDP_all, temp1); QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_all); QDP_r_eq_re_D_dot_D(&sum, cgp, mp, QDP_all); pkp = sum; #endif iteration++; total_iters++; a = rsq / pkp; QDP_D_peq_r_times_D(psi, &a, cgp, MYSUBSET); QDP_D_meq_r_times_D(cgr, &a, mp, MYSUBSET); QDP_r_eq_norm2_D(&sum, cgr, MYSUBSET); rsq = sum; /**if(this_node==0)printf("congrad2: iter %d, rsq %e, pkp %e, a %e\n", iteration,(double)rsq,(double)pkp,(double)a );**/ if( rsq <= rsqstop ){ *final_rsq_ptr= (Real)rsq; #ifdef CGTIME dtime += dclock(); if(this_node==0) printf("CONGRAD2: time = %.2e size_r= %.2e iters= %d MF = %.1f\n", dtime,rsq,iteration, (double)6480*iteration*even_sites_on_node/(dtime*1e6)); //(double)5616*iteration*even_sites_on_node/(dtime*1e6)); #endif set_site_from_D(F_OFFSET(psi), psi,EVENANDODD); return (iteration); } b = rsq / oldrsq; QDP_D_eq_r_times_D_plus_D(cgp, &b, cgp, cgr, MYSUBSET); } while( iteration%niter != 0); set_site_from_D(F_OFFSET(psi), psi,EVENANDODD); if( iteration < 3*niter ) goto start; *final_rsq_ptr= (Real)rsq; return(iteration); }
/* Smearing level i*/ static void QOP_hisq_force_multi_smearing_fnmat(QOP_info_t *info, QDP_ColorMatrix * gf[4], REAL *residues, QDP_ColorVector *x[], int nterms, QDP_ColorMatrix *force_accum[4], QDP_ColorMatrix *force_accum_old[4], QDP_ColorMatrix *force_accum_naik_old[4], int internal_num_q_paths, Q_path *internal_q_paths_sorted, int *internal_netbackdir_table) { int i,j,k,lastdir=-99,ipath,ilink; int length,dir,odir; REAL coeff; QDP_ColorMatrix *tmat; QDP_ColorMatrix *oprod_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mats_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mat_tmp0,*mat_tmp1, *stmp[8];; QDP_ColorVector *vec_tmp[2]; int netbackdir; size_t nflops = 0; // table of net path displacements (backwards from usual convention) Q_path *this_path; // pointer to current path /* Allocate fields */ for(i=0;i<=MAX_PATH_LENGTH;i++){ oprod_along_path[i] = QDP_create_M(); } for(i=1;i<=MAX_PATH_LENGTH;i++){ // 0 element is never used (it's unit matrix) mats_along_path[i] = QDP_create_M(); } mat_tmp0 = QDP_create_M(); mat_tmp1 = QDP_create_M(); for(i=0; i<8; i++) stmp[i] = QDP_create_M(); tmat = QDP_create_M(); vec_tmp[0] = QDP_create_V(); vec_tmp[1] = QDP_create_V(); // clear force accumulators for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum[dir], QDP_all); // loop over paths, and loop over links in path for( ipath=0; ipath<internal_num_q_paths; ipath++ ){ this_path = &(internal_q_paths_sorted[ipath]); if(this_path->forwback== -1)continue; // skip backwards dslash length = this_path->length; netbackdir = internal_netbackdir_table[ipath]; // move f(i-1) force from current site in positive direction, // this corresponds to outer product |X><Y| calculated at the endpoint of the path if( netbackdir<8) { // Not a Naik path link_gather_connection_qdp(oprod_along_path[0] , force_accum_old[OPP_DIR(netbackdir)], tmat, netbackdir ); } else { // Naik path if( NULL==force_accum_naik_old ) { QOP_printf0( "hisq_force_multi_smearing_fnmat: mismatch:\n" ); QOP_printf0( "force_accum_naik_old is NULL, but path table contains Naik paths(!)\n" ); exit(0); } // CONVERSION FROM 3-LINK DIRECTION TO 1-LINK DIRECTION link_gather_connection_qdp(oprod_along_path[0] , force_accum_naik_old[OPP_DIR(netbackdir-8)], tmat, netbackdir ); } // figure out how much of the outer products along the path must be // recomputed. j is last one needing recomputation. k is first one. j=length-1; // default is recompute all if( GOES_BACKWARDS(this_path->dir[0]) ) k=1; else k=0; for(ilink=j;ilink>=k;ilink--){ link_transport_connection_qdp( oprod_along_path[length-ilink], oprod_along_path[length-ilink-1], gf, mat_tmp0, stmp, this_path->dir[ilink] ); nflops += 198; } // maintain an array of transports "to this point" along the path. // Don't recompute beginning parts of path if same as last path ilink=0; // first link where new transport is needed // Sometimes we don't need the matrix for the last link if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length; for( ; ilink<k; ilink++ ){ if( ilink==0 ){ dir = this_path->dir[0]; if( GOES_FORWARDS(dir) ){ QDP_M_eq_sM(tmat, gf[dir], QDP_neighbor[dir], QDP_backward, QDP_all); QDP_M_eq_Ma(mats_along_path[1], tmat, QDP_all); QDP_discard_M(tmat); } else{ QDP_M_eq_M(mats_along_path[1], gf[OPP_DIR(dir)], QDP_all); } } else { // ilink != 0 dir = OPP_DIR(this_path->dir[ilink]); link_transport_connection_qdp( mats_along_path[ilink+1], mats_along_path[ilink], gf, mat_tmp0, stmp, dir ); nflops += 198; } } // end loop over links // A path has (length+1) points, counting the ends. At first // point, no "down" direction links have their momenta "at this // point". At last, no "up" ... if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length; for( ilink=0; ilink<=k; ilink++ ){ if(ilink<length)dir = this_path->dir[ilink]; else dir=NODIR; coeff = this_path->coeff; if( (ilink%2)==1 )coeff = -coeff; // add in contribution to the force if( ilink<length && GOES_FORWARDS(dir) ){ link_gather_connection_qdp(mat_tmp1, oprod_along_path[length-ilink-1], tmat, dir ); if(ilink==0) { QDP_M_eq_M(mat_tmp0,mat_tmp1,QDP_all); } else { QDP_M_eq_M_times_Ma(mat_tmp0, mats_along_path[ilink], mat_tmp1, QDP_all); nflops += 198; QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all); } QDP_M_peq_r_times_M(force_accum[dir],&coeff,mat_tmp1,QDP_all); nflops += 36; } if( ilink>0 && GOES_BACKWARDS(lastdir) ){ odir = OPP_DIR(lastdir); if( ilink==1 ){ QDP_M_eq_M(mat_tmp0,oprod_along_path[length-ilink],QDP_all); QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all); } else{ link_gather_connection_qdp(mat_tmp1, mats_along_path[ilink-1], tmat, odir ); QDP_M_eq_M_times_Ma(mat_tmp0, oprod_along_path[length-ilink], mat_tmp1, QDP_all); nflops += 198; QDP_M_eq_Ma(mat_tmp1, mat_tmp0, QDP_all); } QDP_M_peq_r_times_M(force_accum[odir],&coeff,mat_tmp1,QDP_all); nflops += 36; } lastdir = dir; } // end loop over links in path // } // end loop over paths // QDP_destroy_V( vec_tmp[0] ); QDP_destroy_V( vec_tmp[1] ); QDP_destroy_M( mat_tmp0 ); QDP_destroy_M( mat_tmp1 ); QDP_destroy_M( tmat ); for(i=0; i<8; i++) QDP_destroy_M(stmp[i]); for(i=0;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( oprod_along_path[i] ); } for(i=1;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( mats_along_path[i] ); } info->final_flop = ((double)nflops)*QDP_sites_on_node; return; }//hisq_force_multi_smearing_fnmat
static void get_staple_plaq(QDP_ColorMatrix *staple, int mu, QDP_ColorMatrix *u[], QOP_gauge_coeffs_t *coeffs, QDP_Subset subset, QDP_Subset osubset) { #define NC QDP_get_nc(staple) QDP_Lattice *lat = QDP_get_lattice_M(staple); int nd = QDP_ndim_L(lat); QDP_Shift *neighbor = QDP_neighbor_L(lat); QLA_Real plaq = coeffs->plaquette; QLA_Real adpl = coeffs->adjoint_plaquette; #if 1 QDP_ColorMatrix *temp1, *temp2, *temp3, *temp4, *temp5, *temp6; //temp1 = QDP_create_M(); temp2 = QDP_create_M_L(lat); //temp3 = QDP_create_M(); temp4 = QDP_create_M_L(lat); //temp5 = QDP_create_M(); temp6 = QDP_create_M_L(lat); QDP_Complex *tc = NULL; if(adpl!=0) tc = QDP_create_C_L(lat); /* staple += u[nu](x) u[mu](x+nu) u*[nu](x+mu) * + u*[nu](x-nu) u[mu](x-nu) u[nu](x-nu+mu) */ for(int nu=0; nu<nd; nu++) { if (nu == mu) continue; temp1 = QDP_create_M_L(lat); temp3 = QDP_create_M_L(lat); temp5 = QDP_create_M_L(lat); QDP_M_eq_sM(temp1, u[nu], neighbor[mu], QDP_forward, QDP_all_L(lat)); QDP_M_eq_Ma_times_M(temp2, u[nu], u[mu], osubset); QDP_M_eq_sM(temp3, u[mu], neighbor[nu], QDP_forward, subset); QDP_M_eq_M_times_M(temp4, temp2, temp1, osubset); QDP_M_eq_sM(temp5, temp4, neighbor[nu], QDP_backward, subset); QDP_M_eq_M_times_M(temp6, u[nu], temp3, subset); //QDP_M_peq_M_times_Ma(staple, temp6, temp1, subset); //QDP_M_peq_M(staple, temp5, subset); if(adpl==0) { QDP_M_peq_M_times_Ma(temp5, temp6, temp1, subset); QDP_M_peq_r_times_M(staple, &plaq, temp5, subset); } else { QLA_Complex z; QLA_c_eq_r(z, plaq/adpl); QDP_C_eq_c(tc, &z, subset); QDP_M_eq_M_times_Ma(temp2, temp6, temp1, subset); QDP_C_peq_M_dot_M(tc, temp2, u[mu], subset); QDP_C_eq_r_times_C(tc, &adpl, tc, subset); QDP_M_peq_C_times_M(staple, tc, temp2, subset); QDP_C_eq_c(tc, &z, subset); QDP_C_peq_M_dot_M(tc, temp5, u[mu], subset); QDP_C_eq_r_times_C(tc, &adpl, tc, subset); QDP_M_peq_C_times_M(staple, tc, temp5, subset); } //QDP_discard_M(temp1); //QDP_discard_M(temp3); //QDP_discard_M(temp5); QDP_destroy_M(temp1); QDP_destroy_M(temp3); QDP_destroy_M(temp5); } /* closes nu loop */ if(adpl!=0) QDP_destroy_C(tc); //QDP_destroy_M(temp1); QDP_destroy_M(temp2); //QDP_destroy_M(temp3); QDP_destroy_M(temp4); //QDP_destroy_M(temp5); QDP_destroy_M(temp6); #else QDP_ColorMatrix *t = QDP_create_M_L(lat); int nu, path[3]; QDP_Subset subs[2]; subs[0] = subset; subs[1] = osubset; for(nu=0; nu<nd; nu++) { if (nu == mu) continue; path[0] = 1+nu; path[1] = -(1+mu); path[2] = -(1+nu); path_prod(u, t, path, 3, 1, subs, neighsubeo); QDP_M_peq_M(staple, t, subset); path[0] = -(1+nu); path[1] = -(1+mu); path[2] = 1+nu; path_prod(u, t, path, 3, 1, subs, neighsubeo); QDP_M_peq_M(staple, t, subset); } QDP_destroy_M(t); #endif }