// like link_transport, except doesn't multiply by link matrices. // use this, for example, // when storing the intermediate HISQ force (a connection) at the lattice site // associated with a link static void link_gather_connection_qdp( QDP_ColorMatrix *dest, QDP_ColorMatrix *src, QDP_ColorMatrix *work, int dir ){ if (dir >= 8) //3 link shift needed { dir=dir-8; //do initial 2 shifts if( GOES_FORWARDS(dir) ) { QDP_M_eq_sM(dest, src, QDP_neighbor[dir], QDP_forward, QDP_all); QDP_M_eq_sM(work, dest, QDP_neighbor[dir], QDP_forward, QDP_all); } else { /* GOES_BACKWARDS(dir) */ QDP_M_eq_sM(dest, src, QDP_neighbor[OPP_DIR(dir)], QDP_backward, QDP_all); QDP_M_eq_sM(work, dest, QDP_neighbor[OPP_DIR(dir)], QDP_backward, QDP_all); } } else{ //only 1 link shift needed QDP_M_eq_M(work, src, QDP_all); } //do final shift if( GOES_FORWARDS(dir) ) { QDP_M_eq_sM(dest, work, QDP_neighbor[dir], QDP_forward, QDP_all); } else { /* GOES_BACKWARDS(dir) */ QDP_M_eq_sM(dest, work, QDP_neighbor[OPP_DIR(dir)], QDP_backward, QDP_all); } } /* link_gather_connection_qdp */
/* The 3 flavor version of side_link_force used * * to optimize fermion transports */ static void side_link_forces(int mu, int nu, REAL coeff[], QDP_ColorVector **Path, QDP_ColorVector **Path_nu, QDP_ColorVector **Path_mu, QDP_ColorVector **Path_numu, int nsrc) { REAL m_coeff[nsrc]; int i; for(i=0; i<nsrc; i++) { m_coeff[i] = -coeff[i]; } if(GOES_FORWARDS(mu)) { /* nu * * Add the force : +----+ * * mu | | * * x (x) * * o o */ if(GOES_FORWARDS(nu)) add_forces_to_mom(Path_numu, Path, mu, coeff, nsrc); else //add_forces_to_mom(Path,Path_numu,OPP_DIR(mu),m_coeff, nsrc); add_forces_to_mom(Path_numu,Path,mu,m_coeff, nsrc); } else /*GOES_BACKWARDS(mu)*/ { /* Add the force : o o * * mu | | * * x (x) * * +----+ * * nu */ if(GOES_FORWARDS(nu)) add_forces_to_mom(Path_nu, Path_mu, mu, m_coeff, nsrc); else add_forces_to_mom(Path_mu, Path_nu, OPP_DIR(mu), coeff, nsrc); } }
static int find_backwards_gather( Q_path *path ){ int disp[4], i; /* compute total displacement of path */ for(i=XUP;i<=TUP;i++)disp[i]=0; for( i=0; i<path->length; i++){ if( GOES_FORWARDS(path->dir[i]) ) disp[ path->dir[i] ]++; else disp[OPP_DIR(path->dir[i]) ]--; } // There must be an elegant way?? if( disp[XUP]==+1 && disp[YUP]== 0 && disp[ZUP]== 0 && disp[TUP]== 0 ) return(XDOWN); if( disp[XUP]==-1 && disp[YUP]== 0 && disp[ZUP]== 0 && disp[TUP]== 0 ) return(XUP); if( disp[XUP]== 0 && disp[YUP]==+1 && disp[ZUP]== 0 && disp[TUP]== 0 ) return(YDOWN); if( disp[XUP]== 0 && disp[YUP]==-1 && disp[ZUP]== 0 && disp[TUP]== 0 ) return(YUP); if( disp[XUP]== 0 && disp[YUP]== 0 && disp[ZUP]==+1 && disp[TUP]== 0 ) return(ZDOWN); if( disp[XUP]== 0 && disp[YUP]== 0 && disp[ZUP]==-1 && disp[TUP]== 0 ) return(ZUP); if( disp[XUP]== 0 && disp[YUP]== 0 && disp[ZUP]== 0 && disp[TUP]==+1 ) return(TDOWN); if( disp[XUP]== 0 && disp[YUP]== 0 && disp[ZUP]== 0 && disp[TUP]==-1 ) return(TUP); if( disp[XUP]==+3 && disp[YUP]== 0 && disp[ZUP]== 0 && disp[TUP]== 0 ) return(X3DOWN); if( disp[XUP]==-3 && disp[YUP]== 0 && disp[ZUP]== 0 && disp[TUP]== 0 ) return(X3UP); if( disp[XUP]== 0 && disp[YUP]==+3 && disp[ZUP]== 0 && disp[TUP]== 0 ) return(Y3DOWN); if( disp[XUP]== 0 && disp[YUP]==-3 && disp[ZUP]== 0 && disp[TUP]== 0 ) return(Y3UP); if( disp[XUP]== 0 && disp[YUP]== 0 && disp[ZUP]==+3 && disp[TUP]== 0 ) return(Z3DOWN); if( disp[XUP]== 0 && disp[YUP]== 0 && disp[ZUP]==-3 && disp[TUP]== 0 ) return(Z3UP); if( disp[XUP]== 0 && disp[YUP]== 0 && disp[ZUP]== 0 && disp[TUP]==+3 ) return(T3DOWN); if( disp[XUP]== 0 && disp[YUP]== 0 && disp[ZUP]== 0 && disp[TUP]==-3 ) return(T3UP); QOP_printf0("OOOPS: NODIR\n"); exit(0); return( NODIR ); } //find_backwards_gather
/* special case to transport a "connection" by one link, does both parities */ static void link_transport_connection_qdp( QDP_ColorMatrix *dest, QDP_ColorMatrix *src, QDP_ColorMatrix *gf[4], QDP_ColorMatrix *work, QDP_ColorMatrix *st[8], int dir ){ if( GOES_FORWARDS(dir) ) { QDP_M_eq_M(work, src, QDP_all); QDP_M_eq_sM(st[dir], work, QDP_neighbor[dir], QDP_forward, QDP_all); QDP_M_eq_M_times_M(dest, gf[dir], st[dir], QDP_all); QDP_discard_M(st[dir]); } else { /* GOES_BACKWARDS(dir) */ QDP_M_eq_Ma_times_M(work, gf[OPP_DIR(dir)], src, QDP_all); QDP_M_eq_sM(st[dir], work, QDP_neighbor[OPP_DIR(dir)], QDP_backward,QDP_all); QDP_M_eq_M(dest, st[dir], QDP_all); QDP_discard_M(st[dir]); } } /* link_transport_connection_qdp */
void QOP_asqtad_force_multi_asvec_qdp(QOP_info_t *info, QDP_ColorMatrix *links[], QDP_ColorMatrix *force[], QOP_asqtad_coeffs_t *coef, REAL eps[], QDP_ColorVector *xin[], int nsrc) { #define NC QDP_get_nc(xin[0]) REAL coeff[nsrc]; REAL OneLink[nsrc], Lepage[nsrc], Naik[nsrc], FiveSt[nsrc], ThreeSt[nsrc], SevenSt[nsrc]; REAL mNaik[nsrc], mLepage[nsrc], mFiveSt[nsrc], mThreeSt[nsrc], mSevenSt[nsrc]; QDP_ColorVector *P3[8][nsrc]; QDP_ColorVector *P5[8][nsrc]; QDP_ColorVector *P5tmp[8][8][nsrc]; QDP_ColorVector *P5s[4][nsrc]; QDP_ColorVector *P5tmps[4][8][nsrc]; //QDP_ColorVector *xin[nsrc]; QDP_ColorVector *xintmp[8][nsrc]; QDP_ColorVector *Pmu[nsrc]; QDP_ColorVector *Pmutmp[8][nsrc]; QDP_ColorVector *Pnumu[nsrc]; QDP_ColorVector *Pnumutmp[8][nsrc]; QDP_ColorVector *Prhonumu[nsrc]; QDP_ColorVector *Prhonumutmp[8][nsrc]; QDP_ColorVector *P7[nsrc]; QDP_ColorVector *P7tmp[8][nsrc]; QDP_ColorVector *P7rho[nsrc]; QDP_ColorVector *ttv[nsrc]; int i, dir; int mu, nu, rho, sig; double nflop1 = 253935; double nflop2 = 433968; double nflop = nflop1 + (nflop2-nflop1)*(nsrc-1); double dtime; dtime = -QOP_time(); ASQTAD_FORCE_BEGIN; QOP_trace("test 1\n"); /* setup parallel transport */ QDP_ColorMatrix *tmpmat = QDP_create_M(); for(i=0; i<QOP_common.ndim; i++) { fbshift[i] = QDP_neighbor[i]; fbshiftdir[i] = QDP_forward; fblink[i] = links[i]; fbshift[OPP_DIR(i)] = QDP_neighbor[i]; fbshiftdir[OPP_DIR(i)] = QDP_backward; fblink[OPP_DIR(i)] = QDP_create_M(); QDP_M_eq_sM(tmpmat, fblink[i], QDP_neighbor[i], QDP_backward, QDP_all); QDP_M_eq_Ma(fblink[OPP_DIR(i)], tmpmat, QDP_all); } tv = ttv; for(i=0; i<nsrc; i++) { tv[i] = QDP_create_V(); } QOP_trace("test 2\n"); /* Allocate temporary vectors */ for(i=0; i<nsrc; i++) { Pmu[i] = QDP_create_V(); Pnumu[i] = QDP_create_V(); Prhonumu[i] = QDP_create_V(); P7[i] = QDP_create_V(); P7rho[i] = QDP_create_V(); for(dir=0; dir<8; dir++) { xintmp[dir][i] = QDP_create_V(); Pmutmp[dir][i] = QDP_create_V(); Pnumutmp[dir][i] = QDP_create_V(); Prhonumutmp[dir][i] = QDP_create_V(); P7tmp[dir][i] = QDP_create_V(); } #if 1 for(mu=0; mu<4; mu++) { P5s[mu][i] = QDP_create_V(); for(dir=0; dir<8; dir++) { P5tmps[mu][dir][i] = QDP_create_V(); } } #else for(mu=0; mu<8; mu++) { P5[mu][i] = QDP_create_V(); for(dir=0; dir<8; dir++) { P5tmp[mu][dir][i] = QDP_create_V(); //printf("%p %p\n", P5tmp[mu][dir][i], &(P5tmp[mu][dir][i])); fflush(stdout); if(P5tmp[mu][dir][i]==NULL) { fprintf(stderr, "error: can't create V\n"); QDP_abort(); } } } #endif } //printf("%p\n", P5tmp[0][4][0]); fflush(stdout); for(mu=0; mu<8; mu++) { for(i=0; i<nsrc; i++) { P3[mu][i] = QDP_create_V(); //P5[mu][i] = QDP_create_V(); } } for(mu=0; mu<4; mu++) { tempmom_qdp[mu] = force[mu]; QDP_M_eqm_M(tempmom_qdp[mu], tempmom_qdp[mu], QDP_odd); } /* Path coefficients times fermion epsilon */ /* Load path coefficients from table */ for(i=0; i<nsrc; i++) { OneLink[i] = coef->one_link * eps[i]; Naik[i] = coef->naik * eps[i]; mNaik[i] = -Naik[i]; ThreeSt[i] = coef->three_staple * eps[i]; mThreeSt[i] = -ThreeSt[i]; FiveSt[i] = coef->five_staple * eps[i]; mFiveSt[i] = -FiveSt[i]; SevenSt[i] = coef->seven_staple * eps[i]; mSevenSt[i] = -SevenSt[i]; Lepage[i] = coef->lepage * eps[i]; mLepage[i] = -Lepage[i]; } #if 0 printf("nsrc = %i\n", nsrc); printf("coeffs = %g %g %g %g %g %g\n", OneLink[0], ThreeSt[0], FiveSt[0], SevenSt[0], Lepage[0], Naik[0]); #endif /* *************************************** */ QOP_trace("start force loop\n"); for(mu=0; mu<8; mu++) { //u_shift_hw_fermion(temp_x_qdp, Pmu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)]); u_shift_color_vecs(xin, Pmu, OPP_DIR(mu), nsrc, xintmp[OPP_DIR(mu)]); for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) ) { //u_shift_hw_fermion(Pmu, P3[sig], sig, temp_hw[sig]); u_shift_color_vecs(Pmu, P3[sig], sig, nsrc, Pmutmp[sig]); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu]: x--+ * * | | * * o o * * the 1 link in the path: - (numbering starts form 0) */ add_forces_to_mom(P3[sig], Pmu, sig, mThreeSt, nsrc); } } for(nu=0; nu<8; nu++) if( (nu!=mu)&&(nu!=OPP_DIR(mu)) ) { int nP5 = 0; //Pnumu = hw_qdp[OPP_DIR(nu)]; //u_shift_hw_fermion(Pmu, Pnumu, OPP_DIR(nu), temp_hw[OPP_DIR(nu)]); u_shift_color_vecs(Pmu, Pnumu, OPP_DIR(nu), nsrc, Pmutmp[OPP_DIR(nu)]); //QDP_V_veq_V(Pnumu, P3[OPP_DIR(nu)], QDP_all, nsrc); for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) && (sig!=nu)&&(sig!=OPP_DIR(nu)) ) { #if 1 for(i=0; i<nsrc; i++) { P5[sig][i] = P5s[nP5][i]; for(dir=0; dir<8; dir++) P5tmp[sig][dir][i] = P5tmps[nP5][dir][i]; } #endif nP5++; //u_shift_hw_fermion(Pnumu, P5[sig], sig, temp_hw[sig]); u_shift_color_vecs(Pnumu, P5[sig], sig, nsrc, Pnumutmp[sig]); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu+nu]: x--+ * * | | * * o o * * the 2 link in the path: + (numbering starts form 0) */ add_forces_to_mom(P5[sig], Pnumu, sig, FiveSt, nsrc); } } QOP_trace("test 4\n"); for(rho=0; rho<8; rho++) if( (rho!=mu)&&(rho!=OPP_DIR(mu)) && (rho!=nu)&&(rho!=OPP_DIR(nu)) ) { //Prhonumu = hw_qdp[OPP_DIR(rho)]; //u_shift_hw_fermion(Pnumu, Prhonumu, OPP_DIR(rho), // temp_hw[OPP_DIR(rho)] ); u_shift_color_vecs(Pnumu, Prhonumu, OPP_DIR(rho), nsrc, Pnumutmp[OPP_DIR(rho)]); //QDP_V_veq_V(Prhonumu, P5[OPP_DIR(rho)], QDP_all, nsrc); for(sig=0; sig<8; sig++) if( (sig!=mu )&&(sig!=OPP_DIR(mu )) && (sig!=nu )&&(sig!=OPP_DIR(nu )) && (sig!=rho)&&(sig!=OPP_DIR(rho)) ) { /* Length 7 paths */ //P7 = hw_qdp[sig]; //u_shift_hw_fermion(Prhonumu, P7, sig, temp_hw[sig] ); QOP_trace("test 43\n"); u_shift_color_vecs(Prhonumu, P7, sig, nsrc, Prhonumutmp[sig]); QOP_trace("test 44\n"); //QDP_V_eq_r_times_V(P7[0], &SevenSt[0], P7[0], QDP_all); //QDP_V_eq_r_times_V(P7[1], &SevenSt[1], P7[1], QDP_all); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu+nu+rho]: x--+ * * | | * * o o * * the 3 link in the path: - (numbering starts form 0) */ QOP_trace("test 45\n"); add_forces_to_mom(P7, Prhonumu, sig, mSevenSt, nsrc); QOP_trace("test 46\n"); //mom_meq_force(P7, Prhonumu, sig); } /* Add the force F_rho the 2(4) link in the path: + */ //P7rho = hw_qdp[rho]; //u_shift_hw_fermion(P7, P7rho, rho, temp_hw[rho]); QOP_trace("test 47\n"); u_shift_color_vecs(P7, P7rho, rho, nsrc, P7tmp[rho]); QOP_trace("test 48\n"); side_link_forces(rho,sig,SevenSt,Pnumu,P7,Prhonumu,P7rho, nsrc); QOP_trace("test 49\n"); //side_link_3f_force2(rho,sig,Pnumu,P7,Prhonumu,P7rho); /* Add the P7rho vector to P5 */ for(i=0; i<nsrc; i++) { if(FiveSt[i]!=0) coeff[i] = SevenSt[i]/FiveSt[i]; else coeff[i] = 0; QOP_trace("test 410\n"); QDP_V_peq_r_times_V(P5[sig][i], &coeff[i], P7rho[i], QDP_all); QOP_trace("test 411\n"); } } /* sig */ } /* rho */ QOP_trace("test 5\n"); #define P5nu P7 for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) && (sig!=nu)&&(sig!=OPP_DIR(nu)) ) { /* Length 5 paths */ /* Add the force F_nu the 1(3) link in the path: - */ //P5nu = hw_qdp[nu]; //u_shift_hw_fermion(P5[sig], P5nu, nu, temp_hw[nu]); u_shift_color_vecs(P5[sig], P5nu, nu, nsrc, P5tmp[sig][nu]); side_link_forces(nu, sig, mFiveSt, Pmu, P5[sig], Pnumu, P5nu, nsrc); /* Add the P5nu vector to P3 */ for(i=0; i<nsrc; i++) { if(ThreeSt[i]!=0) coeff[i] = FiveSt[i]/ThreeSt[i]; else coeff[i] = 0; QDP_V_peq_r_times_V(P3[sig][i], &coeff[i], P5nu[i], QDP_all); } } /* sig */ } /* nu */ #define Pmumu Pnumu #define Pmumutmp Pnumutmp #define P5sig Prhonumu #define P5sigtmp Prhonumutmp #define P3mu P7 #define Popmu P7 #define Pmumumu P7 /* Now the Lepage term... It is the same as 5-link paths with nu=mu and FiveSt=Lepage. */ //u_shift_hw_fermion(Pmu, Pmumu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)] ); u_shift_color_vecs(Pmu, Pmumu, OPP_DIR(mu), nsrc, Pmutmp[OPP_DIR(mu)]); for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) ) { //P5sig = hw_qdp[sig]; //u_shift_hw_fermion(Pmumu, P5sig, sig, temp_hw[sig]); u_shift_color_vecs(Pmumu, P5sig, sig, nsrc, Pmumutmp[sig]); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu+nu]: x--+ * * | | * * o o * * the 2 link in the path: + (numbering starts form 0) */ add_forces_to_mom(P5sig, Pmumu, sig, Lepage, nsrc); } /* Add the force F_nu the 1(3) link in the path: - */ //P5nu = hw_qdp[mu]; //u_shift_hw_fermion(P5sig, P5nu, mu, temp_hw[mu]); u_shift_color_vecs(P5sig, P5nu, mu, nsrc, P5sigtmp[mu]); side_link_forces(mu, sig, mLepage, Pmu, P5sig, Pmumu, P5nu, nsrc); /* Add the P5nu vector to P3 */ for(i=0; i<nsrc; i++) { if(ThreeSt[i]!=0) coeff[i] = Lepage[i]/ThreeSt[i]; else coeff[i] = 0; QDP_V_peq_r_times_V(P3[sig][i], &coeff[i], P5nu[i], QDP_all); } /* Length 3 paths (Not the Naik term) */ /* Add the force F_mu the 0(2) link in the path: + */ if(GOES_FORWARDS(mu)) { //P3mu = hw_qdp[mu]; /* OK to clobber P5nu */ //u_shift_hw_fermion(P3[sig], P3mu, mu, temp_hw[mu]); //u_shift_color_vecs(P3[sig], P3mu, mu, 2, temp_hw[mu]); for(i=0; i<nsrc; i++) { QDP_V_eq_V(P5sig[i], P3[sig][i], QDP_all); } u_shift_color_vecs(P5sig, P3mu, mu, nsrc, P5sigtmp[mu]); } /* The above shift is not needed if mu is backwards */ side_link_forces(mu, sig, ThreeSt, xin, P3[sig], Pmu, P3mu, nsrc); } /* Finally the OneLink and the Naik term */ if(GOES_BACKWARDS(mu)) { /* Do only the forward terms in the Dslash */ /* Because I have shifted with OPP_DIR(mu) Pmu is a forward * * shift. */ /* The one link */ add_forces_to_mom(Pmu, xin, OPP_DIR(mu), OneLink, nsrc); /* For the same reason Pmumu is the forward double link */ /* Popmu is a backward shift */ //Popmu = hw_qdp[mu]; /* OK to clobber P3mu */ //u_shift_hw_fermion(xin, Popmu, mu, temp_hw[mu]); u_shift_color_vecs(xin, Popmu, mu, nsrc, xintmp[mu]); /* The Naik */ /* link no 1: - */ add_forces_to_mom(Pmumu, Popmu, OPP_DIR(mu), mNaik, nsrc); /* Pmumumu can overwrite Popmu which is no longer needed */ //Pmumumu = hw_qdp[OPP_DIR(mu)]; //u_shift_hw_fermion(Pmumu, Pmumumu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)]); u_shift_color_vecs(Pmumu, Pmumumu, OPP_DIR(mu), nsrc, Pmumutmp[OPP_DIR(mu)]); /* link no 0: + */ add_forces_to_mom(Pmumumu, xin, OPP_DIR(mu), Naik, nsrc); } else { /* The rest of the Naik terms */ //Popmu = hw_qdp[mu]; /* OK to clobber P3mu */ //u_shift_hw_fermion(xin, Popmu, mu, temp_hw[mu]); u_shift_color_vecs(xin, Popmu, mu, nsrc, xintmp[mu]); /* link no 2: + */ /* Pmumu is double backward shift */ add_forces_to_mom(Popmu, Pmumu, mu, Naik, nsrc); } /* Here we have to do together the Naik term and the one link term */ }/* mu */ QOP_trace("test 6\n"); QOP_trace("test 7\n"); for(mu=0; mu<4; mu++) { QDP_M_eq_M(tmpmat, tempmom_qdp[mu], QDP_even); QDP_M_eqm_M(tmpmat, tempmom_qdp[mu], QDP_odd); QDP_M_eq_antiherm_M(tempmom_qdp[mu], tmpmat, QDP_all); } QDP_destroy_M(tmpmat); //printf("%p\n", P5tmp[0][4][0]); fflush(stdout); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } /* Free temporary vectors */ for(i=0; i<nsrc; i++) { QDP_destroy_V(Pmu[i]); QDP_destroy_V(Pnumu[i]); QDP_destroy_V(Prhonumu[i]); QDP_destroy_V(P7[i]); QDP_destroy_V(P7rho[i]); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } for(dir=0; dir<8; dir++) { QDP_destroy_V(xintmp[dir][i]); QDP_destroy_V(Pmutmp[dir][i]); QDP_destroy_V(Pnumutmp[dir][i]); QDP_destroy_V(Prhonumutmp[dir][i]); QDP_destroy_V(P7tmp[dir][i]); } //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } for(mu=0; mu<4; mu++) { //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } QDP_destroy_V(P5s[mu][i]); //QDP_destroy_V(P5[mu][i]); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } for(dir=0; dir<8; dir++) { //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } QDP_destroy_V(P5tmps[mu][dir][i]); //printf("%p\n", P5tmp[mu][dir][i]); fflush(stdout); //QDP_destroy_V(P5tmp[mu][dir][i]); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } } //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } } //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } } //if(QDP_this_node==0) { printf("here3\n"); fflush(stdout); } for(mu=0; mu<8; mu++) { for(i=0; i<nsrc; i++) { QDP_destroy_V(P3[mu][i]); } //QDP_destroy_V(P5[mu][0]); //QDP_destroy_V(P5[mu][1]); } for(i=0; i<nsrc; i++) { QDP_destroy_V(tv[i]); } //if(QDP_this_node==0) { printf("here4\n"); fflush(stdout); } for(i=4; i<8; i++) { QDP_destroy_M(fblink[i]); } dtime += QOP_time(); info->final_sec = dtime; info->final_flop = nflop*QDP_sites_on_node; info->status = QOP_SUCCESS; ASQTAD_FORCE_END; #undef NC }
static su3_matrix *create_longlinks_qop_milc(QOP_info_t *info, QOP_asqtad_coeffs_t *coeffs, QOP_GaugeField *gauge) { register int i; register site *s; int ipath,dir; int disp[4]; int num_q_paths = ks_act_paths.num_q_paths; Q_path *q_paths = ks_act_paths.q_paths; register su3_matrix *long1; su3_matrix *staple, *tempmat1; int nflop = 1804; double dtime; su3_matrix *t_ll; char myname[] = "create_longlinks_qop_milc"; dtime=-dclock(); if( phases_in != 1){ node0_printf("BOTCH: %s needs phases in\n",myname); terminate(0); } /* Allocate space for t_longlink if NULL */ t_ll = (su3_matrix *)special_alloc(sites_on_node*4*sizeof(su3_matrix)); if(t_ll==NULL){ printf("NODE %d: no room for t_ll\n",this_node); terminate(1); } staple = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(staple == NULL){ printf("%s: Can't malloc temporary\n",myname); terminate(1); } tempmat1 = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(tempmat1 == NULL){ printf("%s: Can't malloc temporary\n",myname); terminate(1); } for (dir=XUP; dir<=TUP; dir++){ /* loop over longlink directions */ /* set longlink to zero */ FORALLSITES(i,s){ long1 = &(t_ll[4*i+dir]); clear_su3mat( long1 ); } /* loop over paths, checking for ones with total displacement 3*dir */ for( ipath=0; ipath<num_q_paths; ipath++ ){ /* loop over paths */ /* compute total displacement of path */ for(i=XUP;i<=TUP;i++)disp[i]=0; for( i=0; i<q_paths[ipath].length; i++){ if( GOES_FORWARDS(q_paths[ipath].dir[i]) ) disp[ q_paths[ipath].dir[i] ]++; else disp[OPP_DIR(q_paths[ipath].dir[i]) ]--; } for( disp[dir]+=3,i=XUP; i<=TUP; i++)if(disp[i]!=0)break; if( i<=TUP )continue; /* skip if path doesn't go to right place */ /**printf("ipath = %d, found a path: ",ipath); for(j=0;j<q_paths[ipath].length;j++)printf("\t%d", q_paths[ipath].dir[j]); printf("\n");**/ path_product_qop_milc( q_paths[ipath].dir, q_paths[ipath].length, tempmat1, gauge ); FORALLSITES(i,s){ su3_adjoint( &tempmat1[i], &staple[i] ); long1 = &(t_ll[4*i+dir]); scalar_mult_add_su3_matrix( long1, &staple[i], -q_paths[ipath].coeff, long1 ); /* minus sign in coeff. because we used backward path*/ } } /* ipath */
/* Smearing level i*/ static void QOP_hisq_force_multi_smearing_fnmat(QOP_info_t *info, QDP_ColorMatrix * gf[4], REAL *residues, QDP_ColorVector *x[], int nterms, QDP_ColorMatrix *force_accum[4], QDP_ColorMatrix *force_accum_old[4], QDP_ColorMatrix *force_accum_naik_old[4], int internal_num_q_paths, Q_path *internal_q_paths_sorted, int *internal_netbackdir_table) { int i,j,k,lastdir=-99,ipath,ilink; int length,dir,odir; REAL coeff; QDP_ColorMatrix *tmat; QDP_ColorMatrix *oprod_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mats_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mat_tmp0,*mat_tmp1, *stmp[8];; QDP_ColorVector *vec_tmp[2]; int netbackdir; size_t nflops = 0; // table of net path displacements (backwards from usual convention) Q_path *this_path; // pointer to current path /* Allocate fields */ for(i=0;i<=MAX_PATH_LENGTH;i++){ oprod_along_path[i] = QDP_create_M(); } for(i=1;i<=MAX_PATH_LENGTH;i++){ // 0 element is never used (it's unit matrix) mats_along_path[i] = QDP_create_M(); } mat_tmp0 = QDP_create_M(); mat_tmp1 = QDP_create_M(); for(i=0; i<8; i++) stmp[i] = QDP_create_M(); tmat = QDP_create_M(); vec_tmp[0] = QDP_create_V(); vec_tmp[1] = QDP_create_V(); // clear force accumulators for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum[dir], QDP_all); // loop over paths, and loop over links in path for( ipath=0; ipath<internal_num_q_paths; ipath++ ){ this_path = &(internal_q_paths_sorted[ipath]); if(this_path->forwback== -1)continue; // skip backwards dslash length = this_path->length; netbackdir = internal_netbackdir_table[ipath]; // move f(i-1) force from current site in positive direction, // this corresponds to outer product |X><Y| calculated at the endpoint of the path if( netbackdir<8) { // Not a Naik path link_gather_connection_qdp(oprod_along_path[0] , force_accum_old[OPP_DIR(netbackdir)], tmat, netbackdir ); } else { // Naik path if( NULL==force_accum_naik_old ) { QOP_printf0( "hisq_force_multi_smearing_fnmat: mismatch:\n" ); QOP_printf0( "force_accum_naik_old is NULL, but path table contains Naik paths(!)\n" ); exit(0); } // CONVERSION FROM 3-LINK DIRECTION TO 1-LINK DIRECTION link_gather_connection_qdp(oprod_along_path[0] , force_accum_naik_old[OPP_DIR(netbackdir-8)], tmat, netbackdir ); } // figure out how much of the outer products along the path must be // recomputed. j is last one needing recomputation. k is first one. j=length-1; // default is recompute all if( GOES_BACKWARDS(this_path->dir[0]) ) k=1; else k=0; for(ilink=j;ilink>=k;ilink--){ link_transport_connection_qdp( oprod_along_path[length-ilink], oprod_along_path[length-ilink-1], gf, mat_tmp0, stmp, this_path->dir[ilink] ); nflops += 198; } // maintain an array of transports "to this point" along the path. // Don't recompute beginning parts of path if same as last path ilink=0; // first link where new transport is needed // Sometimes we don't need the matrix for the last link if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length; for( ; ilink<k; ilink++ ){ if( ilink==0 ){ dir = this_path->dir[0]; if( GOES_FORWARDS(dir) ){ QDP_M_eq_sM(tmat, gf[dir], QDP_neighbor[dir], QDP_backward, QDP_all); QDP_M_eq_Ma(mats_along_path[1], tmat, QDP_all); QDP_discard_M(tmat); } else{ QDP_M_eq_M(mats_along_path[1], gf[OPP_DIR(dir)], QDP_all); } } else { // ilink != 0 dir = OPP_DIR(this_path->dir[ilink]); link_transport_connection_qdp( mats_along_path[ilink+1], mats_along_path[ilink], gf, mat_tmp0, stmp, dir ); nflops += 198; } } // end loop over links // A path has (length+1) points, counting the ends. At first // point, no "down" direction links have their momenta "at this // point". At last, no "up" ... if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length; for( ilink=0; ilink<=k; ilink++ ){ if(ilink<length)dir = this_path->dir[ilink]; else dir=NODIR; coeff = this_path->coeff; if( (ilink%2)==1 )coeff = -coeff; // add in contribution to the force if( ilink<length && GOES_FORWARDS(dir) ){ link_gather_connection_qdp(mat_tmp1, oprod_along_path[length-ilink-1], tmat, dir ); if(ilink==0) { QDP_M_eq_M(mat_tmp0,mat_tmp1,QDP_all); } else { QDP_M_eq_M_times_Ma(mat_tmp0, mats_along_path[ilink], mat_tmp1, QDP_all); nflops += 198; QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all); } QDP_M_peq_r_times_M(force_accum[dir],&coeff,mat_tmp1,QDP_all); nflops += 36; } if( ilink>0 && GOES_BACKWARDS(lastdir) ){ odir = OPP_DIR(lastdir); if( ilink==1 ){ QDP_M_eq_M(mat_tmp0,oprod_along_path[length-ilink],QDP_all); QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all); } else{ link_gather_connection_qdp(mat_tmp1, mats_along_path[ilink-1], tmat, odir ); QDP_M_eq_M_times_Ma(mat_tmp0, oprod_along_path[length-ilink], mat_tmp1, QDP_all); nflops += 198; QDP_M_eq_Ma(mat_tmp1, mat_tmp0, QDP_all); } QDP_M_peq_r_times_M(force_accum[odir],&coeff,mat_tmp1,QDP_all); nflops += 36; } lastdir = dir; } // end loop over links in path // } // end loop over paths // QDP_destroy_V( vec_tmp[0] ); QDP_destroy_V( vec_tmp[1] ); QDP_destroy_M( mat_tmp0 ); QDP_destroy_M( mat_tmp1 ); QDP_destroy_M( tmat ); for(i=0; i<8; i++) QDP_destroy_M(stmp[i]); for(i=0;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( oprod_along_path[i] ); } for(i=1;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( mats_along_path[i] ); } info->final_flop = ((double)nflops)*QDP_sites_on_node; return; }//hisq_force_multi_smearing_fnmat
/* update the momenta with the gauge force */ void QOP_symanzik_1loop_gauge_force(QOP_info_t *info, QOP_GaugeField *gauge, QOP_Force *force, QOP_gauge_coeffs_t *coeffs, Real eps) { register int i,dir; register site *st; su3_matrix tmat1; register Real eb3; /* Note: eps now includes eps*beta */ register su3_matrix* momentum; su3_matrix *staple, *tempmat1; /* lengths of various kinds of loops */ int *loop_length = get_loop_length(); /* number of rotations/reflections for each kind */ int *loop_num = get_loop_num(); /* table of directions, 1 for each kind of loop */ int ***loop_table = get_loop_table(); /* table of coefficients in action, for various "representations" (actually, powers of the trace) */ Real **loop_coeff = get_loop_coeff(); /* We make our own */ int max_length = get_max_length(); /* For Symanzik 1 loop! */ int nloop = get_nloop(); int nreps = get_nreps(); su3_matrix *forwardlink[4]; su3_matrix *tmpmom[4]; int nflop = 153004; /* For Symanzik1 action */ Real final_flop; double dtime; int j,k; int *dirs,length; int *path_dir,path_length; int ln,iloop; Real action,act2,new_term; int ncount; char myname[] = "imp_gauge_force"; dtime=-dclock(); info->status = QOP_FAIL; /* Parity requirements */ if(gauge->evenodd != QOP_EVENODD || force->evenodd != QOP_EVENODD ) { printf("QOP_asqtad_force: Bad parity gauge %d force %d\n", gauge->evenodd, force->evenodd); return; } /* Map field pointers to local static pointers */ FORALLUPDIR(dir){ forwardlink[dir] = gauge->g + dir*sites_on_node; tmpmom[dir] = force->f + dir*sites_on_node; } /* Check loop coefficients */ if(coeffs->plaquette != loop_coeff[0][0] || coeffs->rectangle != loop_coeff[1][0] || coeffs->parallelogram != loop_coeff[2][0]) { printf("%s(%d): Path coeffs don't match\n",myname,this_node); return; } /* Allocate arrays according to action */ dirs = (int *)malloc(max_length*sizeof(int)); if(dirs == NULL){ printf("%s(%d): Can't malloc dirs\n",myname,this_node); return; } path_dir = (int *)malloc(max_length*sizeof(int)); if(path_dir == NULL){ printf("%s(%d): Can't malloc path_dir\n",myname,this_node); return; } staple = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(staple == NULL){ printf("%s(%d): Can't malloc temporary\n",myname,this_node); return; } tempmat1 = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(tempmat1 == NULL){ printf("%s(%d): Can't malloc temporary\n",myname,this_node); return; } eb3 = eps/3.0; /* Loop over directions, update mom[dir] */ for(dir=XUP; dir<=TUP; dir++){ FORALLSITES(i,st)for(j=0;j<3;j++)for(k=0;k<3;k++){ staple[i].e[j][k]=cmplx(0.0,0.0); } END_LOOP ncount=0; for(iloop=0;iloop<nloop;iloop++){ length=loop_length[iloop]; for(ln=0;ln<loop_num[iloop];ln++){ /**printf("UPD: "); printpath( loop_table[iloop][ln], length );**/ /* set up dirs. we are looking at loop starting in "XUP" direction, rotate so it starts in "dir" direction. */ for(k=0;k<length;k++){ if( GOES_FORWARDS(loop_table[iloop][ln][k]) ){ dirs[k]=(dir+loop_table[iloop][ln][k] )% 4; } else { dirs[k]=OPP_DIR( (dir+OPP_DIR(loop_table[iloop][ln][k]))%4 ); } } path_length= length-1; /* generalized "staple" */ /* check for links in direction of momentum to be updated, each such link gives a contribution. Note the direction of the path - opposite the link. */ for(k=0;k<length;k++)if( dirs[k]==dir||dirs[k]==OPP_DIR(dir)) { if( GOES_FORWARDS(dirs[k]) ) for(j=0;j<path_length;j++) { path_dir[j] = dirs[(k+j+1)%length]; } if( GOES_BACKWARDS(dirs[k]) ) for(j=0;j<path_length;j++) { path_dir[path_length-1-j] = OPP_DIR(dirs[(k+j+1)%length]); } /**if(dir==XUP)printf("X_UPDATE PATH: "); printpath( path_dir, path_length );**/ path_product(path_dir,path_length, tempmat1); /* We took the path in the other direction from our old convention in order to get it to end up "at our site", so now take adjoint */ /* then compute "single_action" contribution to staple */ FORALLSITES(i,st){ su3_adjoint( &(tempmat1[i]), &tmat1 ); /* first we compute the fundamental term */ new_term = loop_coeff[iloop][0]; /* now we add in the higher representations */ if(nreps > 1){ node0_printf("WARNING: THIS CODE IS NOT TESTED\n"); exit(0); act2=1.0; action = 3.0 - realtrace_su3(forwardlink[dir]+i, &tmat1 ); for(j=1;j<nreps;j++){ act2 *= action; new_term += loop_coeff[iloop][j]*act2*(Real)(j+1); } } /* end if nreps > 1 */ scalar_mult_add_su3_matrix( &(staple[i]), &tmat1, new_term, &(staple[i]) ); } END_LOOP ncount++; } /* k (location in path) */ } /* ln */ } /* iloop */ /* Now multiply the staple sum by the link, then update momentum */ FORALLSITES(i,st){ mult_su3_na( forwardlink[dir]+i, &(staple[i]), &tmat1 ); momentum = tmpmom[dir] + i; scalar_mult_sub_su3_matrix( momentum, &tmat1, eb3, momentum ); } END_LOOP
/*-------------------------------------------------------------------*/ void load_lnglinks(info_t *info, su3_matrix *lng, ks_component_paths *p, su3_matrix *links ) { register int i; int ipath,dir; int disp[4]; int num_q_paths = p->num_q_paths; Q_path *q_paths = p->q_paths; register su3_matrix *long1; su3_matrix *staple = NULL, *tempmat1 = NULL; char myname[] = "load_lnglinks"; double dtime = -dclock(); if( phases_in != 1){ node0_printf("BOTCH: %s needs phases in\n",myname); terminate(0); } staple = create_m_special(); tempmat1 = create_m_special(); for (dir=XUP; dir<=TUP; dir++){ /* loop over longlink directions */ /* set longlink to zero */ FORALLFIELDSITES_OMP(i,private(long1)){ long1 = lng + 4*i +dir; clear_su3mat( long1 ); } END_LOOP_OMP; /* loop over paths, checking for ones with total displacement 3*dir */ for( ipath=0; ipath<num_q_paths; ipath++ ){ /* loop over paths */ /* compute total displacement of path */ for(i=XUP;i<=TUP;i++)disp[i]=0; for( i=0; i<q_paths[ipath].length; i++){ if( GOES_FORWARDS(q_paths[ipath].dir[i]) ) disp[ q_paths[ipath].dir[i] ]++; else disp[OPP_DIR(q_paths[ipath].dir[i]) ]--; } for( disp[dir]+=3,i=XUP; i<=TUP; i++)if(disp[i]!=0)break; if( i<=TUP )continue; /* skip if path doesn't go to right place */ /**printf("ipath = %d, found a path: ",ipath); for(j=0;j<q_paths[ipath].length;j++)printf("\t%d", q_paths[ipath].dir[j]); printf("\n");**/ // path_product_field( q_paths[ipath].dir, q_paths[ipath].length, // tempmat1, links ); path_product_fields( links, q_paths[ipath].dir, q_paths[ipath].length, tempmat1 ); FORALLFIELDSITES(i){ su3_adjoint( &tempmat1[i], &staple[i] ); long1 = lng + 4*i + dir; scalar_mult_add_su3_matrix( long1, &staple[i], -q_paths[ipath].coeff, long1 ); /* minus sign in coeff. because we used backward path*/ } } /* ipath */ } /* loop over directions */ destroy_m_special(staple); staple = NULL; destroy_m_special(tempmat1); tempmat1 = NULL; dtime += dclock(); info->final_sec = dtime; info->final_flop = 1728.*volume/numnodes(); /* (formerly 1804) */ } /* load_lnglinks() */
void load_fatlinks_cpu(info_t *info, su3_matrix *fat, ks_component_paths *p, su3_matrix *links){ register int i; int dir; register su3_matrix *fat1; su3_matrix *staple = NULL, *tempmat1 = NULL; char myname[] = "load_fatlinks_cpu"; #ifdef ASQ_OPTIMIZED_FATTENING int nu,rho,sig ; Real one_link; #else int ipath; int disp[4]; int num_q_paths = p->num_q_paths; Q_path *q_paths = p->q_paths; #endif double dtime = -dclock(); staple = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(staple == NULL){ printf("%s: Can't malloc temporary\n",myname); terminate(1); } tempmat1 = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(tempmat1 == NULL){ printf("%s: Can't malloc temporary\n",myname); terminate(1); } #ifndef ASQ_OPTIMIZED_FATTENING /* general case code */ for (dir=XUP; dir<=TUP; dir++){ /* loop over fatlink directions */ /* set fatlink to zero */ FORALLFIELDSITES(i){ fat1 = fat + 4*i + dir; clear_su3mat( fat1 ); } /* loop over paths, checking for ones with total displacement 1*dir */ for( ipath=0; ipath<num_q_paths; ipath++ ){ /* loop over paths */ /* compute total displacement of path */ for(i=XUP;i<=TUP;i++)disp[i]=0; for( i=0; i<q_paths[ipath].length; i++){ if( GOES_FORWARDS(q_paths[ipath].dir[i]) ) disp[ q_paths[ipath].dir[i] ]++; else disp[OPP_DIR(q_paths[ipath].dir[i]) ]--; } for( disp[dir]+=1,i=XUP; i<=TUP; i++)if(disp[i]!=0)break; if( i<=TUP )continue; /* skip if path doesn't go to right place */ /**printf("dir = %d, found a path: ",dir); for(j=0;j<q_paths.[ipath].length;j++)printf("\t%d", q_paths[ipath].dir[j]); printf("\n");**/ // path_product( q_paths[ipath].dir, q_paths[ipath].length, tempmat1 ); // path_product_field( q_paths[ipath].dir, q_paths[ipath].length, // tempmat1, links ); path_product_fields( links, q_paths[ipath].dir, q_paths[ipath].length, tempmat1 ); FORALLFIELDSITES(i){ su3_adjoint( &tempmat1[i], &staple[i] ); fat1 = fat + 4*i + dir; scalar_mult_add_su3_matrix( fat1, &staple[i], -q_paths[ipath].coeff, fat1 ); /* minus sign in coeff. because we used backward path*/ } } /* ipath */ } /* loop over directions */ #else /* ASQ_OPTIMIZED_FATTENING, for Asq and Asqtad actions */ /* Optimized fattening code for the Asq and Asqtad actions. * * I assume that path 0 is the one link path 2 the 3-staple * * path 3 the 5-staple path 4 the 7-staple and path 5 the Lepage term. * * Path 1 is the Naik term. */ /* to fix up the Lepage term, included by a trick below */ one_link = (p->act_path_coeff.one_link - 6.0*p->act_path_coeff.lepage); for (dir=XUP; dir<=TUP; dir++){ FORALLFIELDSITES(i) /* Intialize fat links with c_1*U_\mu(x) */ { fat1 = fat + 4*i + dir; scalar_mult_su3_matrix(links + 4*i + dir, one_link, fat1 ); } /* Skip the rest of the calculation if the remaining coefficients vanish */ if( p->act_path_coeff.three_staple == 0.0 && p->act_path_coeff.lepage == 0.0 && p->act_path_coeff.five_staple == 0.0)continue; for(nu=XUP; nu<=TUP; nu++) if(nu!=dir) { // compute_gen_staple_site(staple,dir,nu,F_OFFSET(link[dir]), // *t_fl, act_path_coeff.three_staple); compute_gen_staple_field(staple, dir, nu, links + dir, 4, fat, p->act_path_coeff.three_staple, links); /* The Lepage term */ /* Note this also involves modifying c_1 (above) */ compute_gen_staple_field(NULL, dir, nu, staple, 1, fat, p->act_path_coeff.lepage, links); for(rho=XUP; rho<=TUP; rho++) if((rho!=dir)&&(rho!=nu)) { compute_gen_staple_field( tempmat1, dir, rho, staple, 1, fat, p->act_path_coeff.five_staple, links); for(sig=XUP; sig<=TUP; sig++) if((sig!=dir)&&(sig!=nu)&&(sig!=rho)) { compute_gen_staple_field(NULL,dir,sig,tempmat1, 1, fat, p->act_path_coeff.seven_staple, links); } /* sig */ } /* rho */ } /* nu */ }/* dir */ #endif special_free(staple); staple = NULL; special_free(tempmat1); tempmat1 = NULL; dtime += dclock(); info->final_sec += dtime; info->final_flop = 61632.*volume/numnodes(); if( p->act_path_coeff.three_staple == 0.0 && p->act_path_coeff.lepage == 0.0 && p->act_path_coeff.five_staple == 0.0) info->final_flop = 72.*volume/numnodes(); } /* load_fatlinks_cpu */
/* update the momenta with the gauge force */ void imp_gauge_force_cpu( Real eps, field_offset mom_off ){ register int i,dir; register site *st; su3_matrix tmat1,tmat2; register Real eb3; register anti_hermitmat* momentum; su3_matrix *staple, *tempmat1; /* lengths of various kinds of loops */ int *loop_length = get_loop_length(); /* number of rotations/reflections for each kind */ int *loop_num = get_loop_num(); /* table of directions, 1 for each kind of loop */ int ***loop_table = get_loop_table(); /* table of coefficients in action, for various "representations" (actually, powers of the trace) */ Real **loop_coeff = get_loop_coeff(); int max_length = get_max_length(); int nloop = get_nloop(); int nreps = get_nreps(); #ifdef GFTIME int nflop = 153004; /* For Symanzik1 action */ double dtime; #endif int j,k; int *dirs,length; int *path_dir,path_length; int ln,iloop; Real action,act2,new_term; int ncount; char myname[] = "imp_gauge_force"; #ifdef GFTIME dtime=-dclock(); #endif dirs = (int *)malloc(max_length*sizeof(int)); if(dirs == NULL){ printf("%s(%d): Can't malloc dirs\n",myname,this_node); terminate(1); } path_dir = (int *)malloc(max_length*sizeof(int)); if(path_dir == NULL){ printf("%s(%d): Can't malloc path_dir\n",myname,this_node); terminate(1); } staple = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(staple == NULL){ printf("%s(%d): Can't malloc temporary\n",myname,this_node); terminate(1); } tempmat1 = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(tempmat1 == NULL){ printf("%s(%d): Can't malloc temporary\n",myname,this_node); terminate(1); } eb3 = eps*beta/3.0; /* Loop over directions, update mom[dir] */ for(dir=XUP; dir<=TUP; dir++){ FORALLSITES(i,st)for(j=0;j<3;j++)for(k=0;k<3;k++){ staple[i].e[j][k]=cmplx(0.0,0.0); } END_LOOP ncount=0; for(iloop=0;iloop<nloop;iloop++){ length=loop_length[iloop]; for(ln=0;ln<loop_num[iloop];ln++){ /**printf("UPD: "); printpath( loop_table[iloop][ln], length );**/ /* set up dirs. we are looking at loop starting in "XUP" direction, rotate so it starts in "dir" direction. */ for(k=0;k<length;k++){ if( GOES_FORWARDS(loop_table[iloop][ln][k]) ){ dirs[k]=(dir+loop_table[iloop][ln][k] )% 4; } else { dirs[k]=OPP_DIR( (dir+OPP_DIR(loop_table[iloop][ln][k]))%4 ); } } path_length= length-1; /* generalized "staple" */ /* check for links in direction of momentum to be updated, each such link gives a contribution. Note the direction of the path - opposite the link. */ for(k=0;k<length;k++)if( dirs[k]==dir||dirs[k]==OPP_DIR(dir)) { if( GOES_FORWARDS(dirs[k]) ) for(j=0;j<path_length;j++) { path_dir[j] = dirs[(k+j+1)%length]; } if( GOES_BACKWARDS(dirs[k]) ) for(j=0;j<path_length;j++) { path_dir[path_length-1-j] = OPP_DIR(dirs[(k+j+1)%length]); } /**if(dir==XUP)printf("X_UPDATE PATH: "); printpath( path_dir, path_length );**/ path_product(path_dir,path_length, tempmat1); /* We took the path in the other direction from our old convention in order to get it to end up "at our site", so now take adjoint */ /* then compute "single_action" contribution to staple */ FORALLSITES(i,st){ su3_adjoint( &(tempmat1[i]), &tmat1 ); /* first we compute the fundamental term */ new_term = loop_coeff[iloop][0]; /* now we add in the higher representations */ if(nreps > 1){ node0_printf("WARNING: THIS CODE IS NOT TESTED\n"); exit(0); act2=1.0; action = 3.0 - realtrace_su3(&(st->link[dir]), &tmat1 ); for(j=1;j<nreps;j++){ act2 *= action; new_term += loop_coeff[iloop][j]*act2*(Real)(j+1); } } /* end if nreps > 1 */ scalar_mult_add_su3_matrix( &(staple[i]), &tmat1, new_term, &(staple[i]) ); } END_LOOP ncount++; } /* k (location in path) */ } /* ln */ } /* iloop */ /* Now multiply the staple sum by the link, then update momentum */ FORALLSITES(i,st){ mult_su3_na( &(st->link[dir]), &(staple[i]), &tmat1 ); momentum = (anti_hermitmat *)F_PT(st,mom_off); uncompress_anti_hermitian( &momentum[dir], &tmat2 ); scalar_mult_sub_su3_matrix( &tmat2, &tmat1, eb3, &(staple[i]) ); make_anti_hermitian( &(staple[i]), &momentum[dir] ); } END_LOOP