static void path_prod(QDP_ColorMatrix *u[], QDP_ColorMatrix *m, int path[], int len, int subl, QDP_Subset subset[], int (*neighsubl)(int subl, int dir)) { QDP_ShiftDir fb; QDP_ColorMatrix *p=NULL, *s=NULL; QDP_Lattice *lat = QDP_get_lattice_M(m); int nd = QDP_ndim_L(lat); int sn = 0; for(int i=0; i<len; i++) { int dir = abs(path[i])-1; // if the path moves in the + dir then we shift from the backward dir fb = path[i]<0 ? QDP_forward : QDP_backward; if(fb==QDP_backward) { // path is moving in + dir if(i==0) { QDP_M_eq_Ma(tm[sn], u[dir], subset[subl]); } else { QDP_M_eq_Ma_times_M(tm[sn], u[dir], p, subset[subl]); QDP_discard_M(p); } subl = neighsubl(subl, path[i]); s = sm[sn][nd+dir]; QDP_discard_M(s); QDP_M_eq_sM(s, tm[sn], QDP_neighbor_L(lat)[dir], fb, eosub(subl)); //p = t1; t1 = t2; t2 = p; sn = 1-sn; p = s; } else { if(i==0) { subl = neighsubl(subl, path[i]); QDP_M_eq_M(tm[1-sn], u[dir], subset[subl]); } else { QDP_M_eq_M(tm[sn], p, subset[subl]); QDP_discard_M(p); subl = neighsubl(subl, path[i]); s = sm[sn][dir]; QDP_discard_M(s); QDP_M_eq_sM(s, tm[sn], QDP_neighbor_L(lat)[dir], fb, eosub(subl)); QDP_M_eq_M_times_M(tm[1-sn], u[dir], s, subset[subl]); QDP_discard_M(s); } p = tm[1-sn]; } } QDP_M_eq_M(m, p, subset[subl]); QDP_discard_M(p); QDP_discard_M(s); }
/* special case to transport a "connection" by one link, does both parities */ static void link_transport_connection_qdp( QDP_ColorMatrix *dest, QDP_ColorMatrix *src, QDP_ColorMatrix *gf[4], QDP_ColorMatrix *work, QDP_ColorMatrix *st[8], int dir ){ if( GOES_FORWARDS(dir) ) { QDP_M_eq_M(work, src, QDP_all); QDP_M_eq_sM(st[dir], work, QDP_neighbor[dir], QDP_forward, QDP_all); QDP_M_eq_M_times_M(dest, gf[dir], st[dir], QDP_all); QDP_discard_M(st[dir]); } else { /* GOES_BACKWARDS(dir) */ QDP_M_eq_Ma_times_M(work, gf[OPP_DIR(dir)], src, QDP_all); QDP_M_eq_sM(st[dir], work, QDP_neighbor[OPP_DIR(dir)], QDP_backward,QDP_all); QDP_M_eq_M(dest, st[dir], QDP_all); QDP_discard_M(st[dir]); } } /* link_transport_connection_qdp */
// like link_transport, except doesn't multiply by link matrices. // use this, for example, // when storing the intermediate HISQ force (a connection) at the lattice site // associated with a link static void link_gather_connection_qdp( QDP_ColorMatrix *dest, QDP_ColorMatrix *src, QDP_ColorMatrix *work, int dir ){ if (dir >= 8) //3 link shift needed { dir=dir-8; //do initial 2 shifts if( GOES_FORWARDS(dir) ) { QDP_M_eq_sM(dest, src, QDP_neighbor[dir], QDP_forward, QDP_all); QDP_M_eq_sM(work, dest, QDP_neighbor[dir], QDP_forward, QDP_all); } else { /* GOES_BACKWARDS(dir) */ QDP_M_eq_sM(dest, src, QDP_neighbor[OPP_DIR(dir)], QDP_backward, QDP_all); QDP_M_eq_sM(work, dest, QDP_neighbor[OPP_DIR(dir)], QDP_backward, QDP_all); } } else{ //only 1 link shift needed QDP_M_eq_M(work, src, QDP_all); } //do final shift if( GOES_FORWARDS(dir) ) { QDP_M_eq_sM(dest, work, QDP_neighbor[dir], QDP_forward, QDP_all); } else { /* GOES_BACKWARDS(dir) */ QDP_M_eq_sM(dest, work, QDP_neighbor[OPP_DIR(dir)], QDP_backward, QDP_all); } } /* link_gather_connection_qdp */
static int qopqdp_gauge_call(lua_State *L) { BEGIN_ARGS; GET_GAUGE(g); GET_INT(dim); OPT_QOPQDP_CMATRIX(m, NULL); END_ARGS; if(m==NULL) { // return colormatrix for direction qopqdp_cmatrix_wrap(L, g->lat, g->links[dim-1], 0); return 1; } // set direction QDP_M_eq_M(g->links[dim-1], m->field, QDP_all_L(g->qlat)); return 0; }
void QOP_asqtad_force_multi_asvec_qdp(QOP_info_t *info, QDP_ColorMatrix *links[], QDP_ColorMatrix *force[], QOP_asqtad_coeffs_t *coef, REAL eps[], QDP_ColorVector *xin[], int nsrc) { #define NC QDP_get_nc(xin[0]) REAL coeff[nsrc]; REAL OneLink[nsrc], Lepage[nsrc], Naik[nsrc], FiveSt[nsrc], ThreeSt[nsrc], SevenSt[nsrc]; REAL mNaik[nsrc], mLepage[nsrc], mFiveSt[nsrc], mThreeSt[nsrc], mSevenSt[nsrc]; QDP_ColorVector *P3[8][nsrc]; QDP_ColorVector *P5[8][nsrc]; QDP_ColorVector *P5tmp[8][8][nsrc]; QDP_ColorVector *P5s[4][nsrc]; QDP_ColorVector *P5tmps[4][8][nsrc]; //QDP_ColorVector *xin[nsrc]; QDP_ColorVector *xintmp[8][nsrc]; QDP_ColorVector *Pmu[nsrc]; QDP_ColorVector *Pmutmp[8][nsrc]; QDP_ColorVector *Pnumu[nsrc]; QDP_ColorVector *Pnumutmp[8][nsrc]; QDP_ColorVector *Prhonumu[nsrc]; QDP_ColorVector *Prhonumutmp[8][nsrc]; QDP_ColorVector *P7[nsrc]; QDP_ColorVector *P7tmp[8][nsrc]; QDP_ColorVector *P7rho[nsrc]; QDP_ColorVector *ttv[nsrc]; int i, dir; int mu, nu, rho, sig; double nflop1 = 253935; double nflop2 = 433968; double nflop = nflop1 + (nflop2-nflop1)*(nsrc-1); double dtime; dtime = -QOP_time(); ASQTAD_FORCE_BEGIN; QOP_trace("test 1\n"); /* setup parallel transport */ QDP_ColorMatrix *tmpmat = QDP_create_M(); for(i=0; i<QOP_common.ndim; i++) { fbshift[i] = QDP_neighbor[i]; fbshiftdir[i] = QDP_forward; fblink[i] = links[i]; fbshift[OPP_DIR(i)] = QDP_neighbor[i]; fbshiftdir[OPP_DIR(i)] = QDP_backward; fblink[OPP_DIR(i)] = QDP_create_M(); QDP_M_eq_sM(tmpmat, fblink[i], QDP_neighbor[i], QDP_backward, QDP_all); QDP_M_eq_Ma(fblink[OPP_DIR(i)], tmpmat, QDP_all); } tv = ttv; for(i=0; i<nsrc; i++) { tv[i] = QDP_create_V(); } QOP_trace("test 2\n"); /* Allocate temporary vectors */ for(i=0; i<nsrc; i++) { Pmu[i] = QDP_create_V(); Pnumu[i] = QDP_create_V(); Prhonumu[i] = QDP_create_V(); P7[i] = QDP_create_V(); P7rho[i] = QDP_create_V(); for(dir=0; dir<8; dir++) { xintmp[dir][i] = QDP_create_V(); Pmutmp[dir][i] = QDP_create_V(); Pnumutmp[dir][i] = QDP_create_V(); Prhonumutmp[dir][i] = QDP_create_V(); P7tmp[dir][i] = QDP_create_V(); } #if 1 for(mu=0; mu<4; mu++) { P5s[mu][i] = QDP_create_V(); for(dir=0; dir<8; dir++) { P5tmps[mu][dir][i] = QDP_create_V(); } } #else for(mu=0; mu<8; mu++) { P5[mu][i] = QDP_create_V(); for(dir=0; dir<8; dir++) { P5tmp[mu][dir][i] = QDP_create_V(); //printf("%p %p\n", P5tmp[mu][dir][i], &(P5tmp[mu][dir][i])); fflush(stdout); if(P5tmp[mu][dir][i]==NULL) { fprintf(stderr, "error: can't create V\n"); QDP_abort(); } } } #endif } //printf("%p\n", P5tmp[0][4][0]); fflush(stdout); for(mu=0; mu<8; mu++) { for(i=0; i<nsrc; i++) { P3[mu][i] = QDP_create_V(); //P5[mu][i] = QDP_create_V(); } } for(mu=0; mu<4; mu++) { tempmom_qdp[mu] = force[mu]; QDP_M_eqm_M(tempmom_qdp[mu], tempmom_qdp[mu], QDP_odd); } /* Path coefficients times fermion epsilon */ /* Load path coefficients from table */ for(i=0; i<nsrc; i++) { OneLink[i] = coef->one_link * eps[i]; Naik[i] = coef->naik * eps[i]; mNaik[i] = -Naik[i]; ThreeSt[i] = coef->three_staple * eps[i]; mThreeSt[i] = -ThreeSt[i]; FiveSt[i] = coef->five_staple * eps[i]; mFiveSt[i] = -FiveSt[i]; SevenSt[i] = coef->seven_staple * eps[i]; mSevenSt[i] = -SevenSt[i]; Lepage[i] = coef->lepage * eps[i]; mLepage[i] = -Lepage[i]; } #if 0 printf("nsrc = %i\n", nsrc); printf("coeffs = %g %g %g %g %g %g\n", OneLink[0], ThreeSt[0], FiveSt[0], SevenSt[0], Lepage[0], Naik[0]); #endif /* *************************************** */ QOP_trace("start force loop\n"); for(mu=0; mu<8; mu++) { //u_shift_hw_fermion(temp_x_qdp, Pmu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)]); u_shift_color_vecs(xin, Pmu, OPP_DIR(mu), nsrc, xintmp[OPP_DIR(mu)]); for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) ) { //u_shift_hw_fermion(Pmu, P3[sig], sig, temp_hw[sig]); u_shift_color_vecs(Pmu, P3[sig], sig, nsrc, Pmutmp[sig]); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu]: x--+ * * | | * * o o * * the 1 link in the path: - (numbering starts form 0) */ add_forces_to_mom(P3[sig], Pmu, sig, mThreeSt, nsrc); } } for(nu=0; nu<8; nu++) if( (nu!=mu)&&(nu!=OPP_DIR(mu)) ) { int nP5 = 0; //Pnumu = hw_qdp[OPP_DIR(nu)]; //u_shift_hw_fermion(Pmu, Pnumu, OPP_DIR(nu), temp_hw[OPP_DIR(nu)]); u_shift_color_vecs(Pmu, Pnumu, OPP_DIR(nu), nsrc, Pmutmp[OPP_DIR(nu)]); //QDP_V_veq_V(Pnumu, P3[OPP_DIR(nu)], QDP_all, nsrc); for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) && (sig!=nu)&&(sig!=OPP_DIR(nu)) ) { #if 1 for(i=0; i<nsrc; i++) { P5[sig][i] = P5s[nP5][i]; for(dir=0; dir<8; dir++) P5tmp[sig][dir][i] = P5tmps[nP5][dir][i]; } #endif nP5++; //u_shift_hw_fermion(Pnumu, P5[sig], sig, temp_hw[sig]); u_shift_color_vecs(Pnumu, P5[sig], sig, nsrc, Pnumutmp[sig]); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu+nu]: x--+ * * | | * * o o * * the 2 link in the path: + (numbering starts form 0) */ add_forces_to_mom(P5[sig], Pnumu, sig, FiveSt, nsrc); } } QOP_trace("test 4\n"); for(rho=0; rho<8; rho++) if( (rho!=mu)&&(rho!=OPP_DIR(mu)) && (rho!=nu)&&(rho!=OPP_DIR(nu)) ) { //Prhonumu = hw_qdp[OPP_DIR(rho)]; //u_shift_hw_fermion(Pnumu, Prhonumu, OPP_DIR(rho), // temp_hw[OPP_DIR(rho)] ); u_shift_color_vecs(Pnumu, Prhonumu, OPP_DIR(rho), nsrc, Pnumutmp[OPP_DIR(rho)]); //QDP_V_veq_V(Prhonumu, P5[OPP_DIR(rho)], QDP_all, nsrc); for(sig=0; sig<8; sig++) if( (sig!=mu )&&(sig!=OPP_DIR(mu )) && (sig!=nu )&&(sig!=OPP_DIR(nu )) && (sig!=rho)&&(sig!=OPP_DIR(rho)) ) { /* Length 7 paths */ //P7 = hw_qdp[sig]; //u_shift_hw_fermion(Prhonumu, P7, sig, temp_hw[sig] ); QOP_trace("test 43\n"); u_shift_color_vecs(Prhonumu, P7, sig, nsrc, Prhonumutmp[sig]); QOP_trace("test 44\n"); //QDP_V_eq_r_times_V(P7[0], &SevenSt[0], P7[0], QDP_all); //QDP_V_eq_r_times_V(P7[1], &SevenSt[1], P7[1], QDP_all); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu+nu+rho]: x--+ * * | | * * o o * * the 3 link in the path: - (numbering starts form 0) */ QOP_trace("test 45\n"); add_forces_to_mom(P7, Prhonumu, sig, mSevenSt, nsrc); QOP_trace("test 46\n"); //mom_meq_force(P7, Prhonumu, sig); } /* Add the force F_rho the 2(4) link in the path: + */ //P7rho = hw_qdp[rho]; //u_shift_hw_fermion(P7, P7rho, rho, temp_hw[rho]); QOP_trace("test 47\n"); u_shift_color_vecs(P7, P7rho, rho, nsrc, P7tmp[rho]); QOP_trace("test 48\n"); side_link_forces(rho,sig,SevenSt,Pnumu,P7,Prhonumu,P7rho, nsrc); QOP_trace("test 49\n"); //side_link_3f_force2(rho,sig,Pnumu,P7,Prhonumu,P7rho); /* Add the P7rho vector to P5 */ for(i=0; i<nsrc; i++) { if(FiveSt[i]!=0) coeff[i] = SevenSt[i]/FiveSt[i]; else coeff[i] = 0; QOP_trace("test 410\n"); QDP_V_peq_r_times_V(P5[sig][i], &coeff[i], P7rho[i], QDP_all); QOP_trace("test 411\n"); } } /* sig */ } /* rho */ QOP_trace("test 5\n"); #define P5nu P7 for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) && (sig!=nu)&&(sig!=OPP_DIR(nu)) ) { /* Length 5 paths */ /* Add the force F_nu the 1(3) link in the path: - */ //P5nu = hw_qdp[nu]; //u_shift_hw_fermion(P5[sig], P5nu, nu, temp_hw[nu]); u_shift_color_vecs(P5[sig], P5nu, nu, nsrc, P5tmp[sig][nu]); side_link_forces(nu, sig, mFiveSt, Pmu, P5[sig], Pnumu, P5nu, nsrc); /* Add the P5nu vector to P3 */ for(i=0; i<nsrc; i++) { if(ThreeSt[i]!=0) coeff[i] = FiveSt[i]/ThreeSt[i]; else coeff[i] = 0; QDP_V_peq_r_times_V(P3[sig][i], &coeff[i], P5nu[i], QDP_all); } } /* sig */ } /* nu */ #define Pmumu Pnumu #define Pmumutmp Pnumutmp #define P5sig Prhonumu #define P5sigtmp Prhonumutmp #define P3mu P7 #define Popmu P7 #define Pmumumu P7 /* Now the Lepage term... It is the same as 5-link paths with nu=mu and FiveSt=Lepage. */ //u_shift_hw_fermion(Pmu, Pmumu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)] ); u_shift_color_vecs(Pmu, Pmumu, OPP_DIR(mu), nsrc, Pmutmp[OPP_DIR(mu)]); for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) ) { //P5sig = hw_qdp[sig]; //u_shift_hw_fermion(Pmumu, P5sig, sig, temp_hw[sig]); u_shift_color_vecs(Pmumu, P5sig, sig, nsrc, Pmumutmp[sig]); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu+nu]: x--+ * * | | * * o o * * the 2 link in the path: + (numbering starts form 0) */ add_forces_to_mom(P5sig, Pmumu, sig, Lepage, nsrc); } /* Add the force F_nu the 1(3) link in the path: - */ //P5nu = hw_qdp[mu]; //u_shift_hw_fermion(P5sig, P5nu, mu, temp_hw[mu]); u_shift_color_vecs(P5sig, P5nu, mu, nsrc, P5sigtmp[mu]); side_link_forces(mu, sig, mLepage, Pmu, P5sig, Pmumu, P5nu, nsrc); /* Add the P5nu vector to P3 */ for(i=0; i<nsrc; i++) { if(ThreeSt[i]!=0) coeff[i] = Lepage[i]/ThreeSt[i]; else coeff[i] = 0; QDP_V_peq_r_times_V(P3[sig][i], &coeff[i], P5nu[i], QDP_all); } /* Length 3 paths (Not the Naik term) */ /* Add the force F_mu the 0(2) link in the path: + */ if(GOES_FORWARDS(mu)) { //P3mu = hw_qdp[mu]; /* OK to clobber P5nu */ //u_shift_hw_fermion(P3[sig], P3mu, mu, temp_hw[mu]); //u_shift_color_vecs(P3[sig], P3mu, mu, 2, temp_hw[mu]); for(i=0; i<nsrc; i++) { QDP_V_eq_V(P5sig[i], P3[sig][i], QDP_all); } u_shift_color_vecs(P5sig, P3mu, mu, nsrc, P5sigtmp[mu]); } /* The above shift is not needed if mu is backwards */ side_link_forces(mu, sig, ThreeSt, xin, P3[sig], Pmu, P3mu, nsrc); } /* Finally the OneLink and the Naik term */ if(GOES_BACKWARDS(mu)) { /* Do only the forward terms in the Dslash */ /* Because I have shifted with OPP_DIR(mu) Pmu is a forward * * shift. */ /* The one link */ add_forces_to_mom(Pmu, xin, OPP_DIR(mu), OneLink, nsrc); /* For the same reason Pmumu is the forward double link */ /* Popmu is a backward shift */ //Popmu = hw_qdp[mu]; /* OK to clobber P3mu */ //u_shift_hw_fermion(xin, Popmu, mu, temp_hw[mu]); u_shift_color_vecs(xin, Popmu, mu, nsrc, xintmp[mu]); /* The Naik */ /* link no 1: - */ add_forces_to_mom(Pmumu, Popmu, OPP_DIR(mu), mNaik, nsrc); /* Pmumumu can overwrite Popmu which is no longer needed */ //Pmumumu = hw_qdp[OPP_DIR(mu)]; //u_shift_hw_fermion(Pmumu, Pmumumu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)]); u_shift_color_vecs(Pmumu, Pmumumu, OPP_DIR(mu), nsrc, Pmumutmp[OPP_DIR(mu)]); /* link no 0: + */ add_forces_to_mom(Pmumumu, xin, OPP_DIR(mu), Naik, nsrc); } else { /* The rest of the Naik terms */ //Popmu = hw_qdp[mu]; /* OK to clobber P3mu */ //u_shift_hw_fermion(xin, Popmu, mu, temp_hw[mu]); u_shift_color_vecs(xin, Popmu, mu, nsrc, xintmp[mu]); /* link no 2: + */ /* Pmumu is double backward shift */ add_forces_to_mom(Popmu, Pmumu, mu, Naik, nsrc); } /* Here we have to do together the Naik term and the one link term */ }/* mu */ QOP_trace("test 6\n"); QOP_trace("test 7\n"); for(mu=0; mu<4; mu++) { QDP_M_eq_M(tmpmat, tempmom_qdp[mu], QDP_even); QDP_M_eqm_M(tmpmat, tempmom_qdp[mu], QDP_odd); QDP_M_eq_antiherm_M(tempmom_qdp[mu], tmpmat, QDP_all); } QDP_destroy_M(tmpmat); //printf("%p\n", P5tmp[0][4][0]); fflush(stdout); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } /* Free temporary vectors */ for(i=0; i<nsrc; i++) { QDP_destroy_V(Pmu[i]); QDP_destroy_V(Pnumu[i]); QDP_destroy_V(Prhonumu[i]); QDP_destroy_V(P7[i]); QDP_destroy_V(P7rho[i]); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } for(dir=0; dir<8; dir++) { QDP_destroy_V(xintmp[dir][i]); QDP_destroy_V(Pmutmp[dir][i]); QDP_destroy_V(Pnumutmp[dir][i]); QDP_destroy_V(Prhonumutmp[dir][i]); QDP_destroy_V(P7tmp[dir][i]); } //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } for(mu=0; mu<4; mu++) { //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } QDP_destroy_V(P5s[mu][i]); //QDP_destroy_V(P5[mu][i]); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } for(dir=0; dir<8; dir++) { //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } QDP_destroy_V(P5tmps[mu][dir][i]); //printf("%p\n", P5tmp[mu][dir][i]); fflush(stdout); //QDP_destroy_V(P5tmp[mu][dir][i]); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } } //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } } //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } } //if(QDP_this_node==0) { printf("here3\n"); fflush(stdout); } for(mu=0; mu<8; mu++) { for(i=0; i<nsrc; i++) { QDP_destroy_V(P3[mu][i]); } //QDP_destroy_V(P5[mu][0]); //QDP_destroy_V(P5[mu][1]); } for(i=0; i<nsrc; i++) { QDP_destroy_V(tv[i]); } //if(QDP_this_node==0) { printf("here4\n"); fflush(stdout); } for(i=4; i<8; i++) { QDP_destroy_M(fblink[i]); } dtime += QOP_time(); info->final_sec = dtime; info->final_flop = nflop*QDP_sites_on_node; info->status = QOP_SUCCESS; ASQTAD_FORCE_END; #undef NC }
// topdir = 1..nd // sidedir = -nd..nd // toplinknum,sidelinknum = 0..nin-1 void QOP_staples_deriv(QOP_info_t *info, int nout, int nin, QDP_ColorMatrix *deriv[], QDP_ColorMatrix *chain[], QDP_ColorMatrix *in[], int nstaples[], int *topdir[], int *sidedir[], int *toplinknum[], int *sidelinknum[], QLA_Real *coef[]) { #define NC QDP_get_nc(in[0]) double dtime = QOP_time(); double nflops = 0; int nd = QDP_ndim(); QDP_ColorMatrix *ftmps[nin][nd], *t1, *t2, *t3, *t4, *tc, *bt2[nd], *bt3[nd], *ctmps[nd]; int ctn[nd]; for(int i=0; i<nin; i++) for(int j=0; j<nd; j++) ftmps[i][j] = NULL; for(int i=0; i<nd; i++) bt2[i] = bt3[i] = ctmps[i] = NULL; t1 = QDP_create_M(); t2 = QDP_create_M(); t3 = QDP_create_M(); t4 = QDP_create_M(); tc = QDP_create_M(); // process in reverse in case calculated staples used as input for others for(int io=nout-1; io>=0; io--) { for(int i=0; i<nd; i++) { if(ctmps[i]) QDP_discard_M(ctmps[i]); ctn[i] = 0; } QDP_M_eq_M(tc, chain[io], QDP_all); for(int s=0; s<nstaples[io]; s++) { QLA_Real c = coef[io][s]; int tn = toplinknum[io][s]; int sdir = sidedir[io][s]; //QOP_printf0("io: %i s: %i sdir: %i tn: %i c: %g\n", io, s, sdir, tn, c); if(sdir==0) { if(c==1) { QDP_M_peq_M(deriv[tn], tc, QDP_all); nflops += PEQM; } else { QDP_M_peq_r_times_M(deriv[tn], &c, tc, QDP_all); nflops += 2*PEQM; } } else if(sdir>0) { int nu = sdir-1; int mu = topdir[io][s]-1; int sn = sidelinknum[io][s]; //QOP_printf0(" mu: %i nu: %i sn: %i\n", mu, nu, sn); QDP_ColorMatrix *Umunu = getU(tn, mu, nu); QDP_ColorMatrix *Unumu = getU(sn, nu, mu); QDP_M_eq_M_times_M(t1, in[sn], Umunu, QDP_all); QDP_M_eq_Ma_times_M(t2, tc, t1, QDP_all); QDP_ColorMatrix *tb2 = shiftb(t2, mu); QDP_M_eq_M_times_M(t1, tc, Unumu, QDP_all); QDP_M_eq_Ma_times_M(t3, in[sn], t1, QDP_all); QDP_ColorMatrix *tb3 = shiftb(t3, nu); if(c==1) { QDP_M_peq_M_times_Ma(deriv[sn], t1, Umunu, QDP_all); QDP_M_peq_M(deriv[sn], tb2, QDP_all); QDP_M_peq_M(deriv[tn], tb3, QDP_all); nflops += 4*EQMTM+PEQMTM+2*PEQM; } else { QDP_M_eq_M_times_Ma(t4, t1, Umunu, QDP_all); QDP_M_peq_r_times_M(deriv[sn], &c, t4, QDP_all); QDP_M_peq_r_times_M(deriv[sn], &c, tb2, QDP_all); QDP_M_peq_r_times_M(deriv[tn], &c, tb3, QDP_all); nflops += 5*EQMTM+6*PEQM; } QDP_discard_M(tb2); QDP_discard_M(tb3); } else { int nu = -sdir-1; int mu = topdir[io][s]-1; int sn = sidelinknum[io][s]; QDP_ColorMatrix *Cmunu = getC(nu); QDP_ColorMatrix *Unumu = getU(sn, nu, mu); QDP_M_eq_M_times_M(t1, in[sn], Cmunu, QDP_all); QDP_M_eq_Ma_times_M(t2, in[tn], t1, QDP_all); QDP_ColorMatrix *tb2 = shiftb(t2, mu); QDP_M_eq_M_times_M(t3, in[tn], Unumu, QDP_all); if(c==1) { QDP_M_peq_M_times_Ma(deriv[tn], t1, Unumu, QDP_all); QDP_M_peq_M_times_Ma(deriv[sn], t3, Cmunu, QDP_all); QDP_M_peq_M(deriv[sn], tb2, QDP_all); nflops += 3*EQMTM+2*PEQMTM+PEQM; } else { QDP_M_eq_M_times_Ma(t4, t1, Unumu, QDP_all); QDP_M_peq_r_times_M(deriv[tn], &c, t4, QDP_all); QDP_M_eq_M_times_Ma(t4, t3, Cmunu, QDP_all); QDP_M_peq_r_times_M(deriv[sn], &c, t4, QDP_all); QDP_M_peq_r_times_M(deriv[sn], &c, tb2, QDP_all); nflops += 5*EQMTM+6*PEQM; } QDP_discard_M(tb2); } } } for(int i=0; i<nin; i++) for(int j=0; j<nd; j++) if(ftmps[i][j]!=NULL) QDP_destroy_M(ftmps[i][j]); for(int i=0; i<nd; i++) { if(bt2[i]!=NULL) QDP_destroy_M(bt2[i]); if(bt3[i]!=NULL) QDP_destroy_M(bt3[i]); if(ctmps[i]!=NULL) QDP_destroy_M(ctmps[i]); } QDP_destroy_M(t1); QDP_destroy_M(t2); QDP_destroy_M(t3); QDP_destroy_M(t4); QDP_destroy_M(tc); info->final_sec = QOP_time() - dtime; info->final_flop = nflops*QDP_sites_on_node; info->status = QOP_SUCCESS; #undef NC }
void QOP_hisq_force_multi_wrapper_fnmat(QOP_info_t *info, QOP_FermionLinksHisq *flh, QOP_Force *Force, QOP_hisq_coeffs_t *hisq_coeff, REAL *residues, QDP_ColorVector *x[], int *n_orders_naik) { double dtime = QDP_time(); int i, ipath, dir; REAL coeff_mult; double *eps_naik = hisq_coeff->eps_naik; int n_naiks = hisq_coeff->n_naiks; QOP_hisq_unitarize_method_t umethod = hisq_coeff->umethod; // Quark paths sorted by net displacement and last directions static Q_path *q_paths_sorted_1 = NULL; static Q_path *q_paths_sorted_2 = NULL; static Q_path *q_paths_sorted_3 = NULL; static int *netbackdir_table_1 = NULL; static int *netbackdir_table_2 = NULL; static int *netbackdir_table_3 = NULL; static int first_force = 1; if(first_force == 1) QOP_make_paths_and_dirs_hisq(hisq_coeff, umethod); int num_q_paths_1 = qop_get_num_q_paths_1(); int num_q_paths_2 = qop_get_num_q_paths_2(); int num_q_paths_3 = qop_get_num_q_paths_3(); Q_path *q_paths_1 = qop_get_q_paths_1(); Q_path *q_paths_2 = qop_get_q_paths_2(); Q_path *q_paths_3 = qop_get_q_paths_3(); Q_path *q_paths_sorted_current = NULL; int *netbackdir_table_current = NULL; int inaik; int n_naik_shift; double final_flop = 0.; size_t nflops = 0; QDP_ColorMatrix * force[4] = {Force->force[0], Force->force[1], Force->force[2], Force->force[3]}; int num_q_paths_current,n_orders_naik_current;//==nterms QDP_ColorMatrix *force_accum_0[4]; QDP_ColorMatrix *force_accum_0_naik[4]; QDP_ColorMatrix *force_accum_1[4]; QDP_ColorMatrix *force_accum_1u[4]; QDP_ColorMatrix *force_accum_2[4]; QDP_ColorMatrix *force_final[4]; QDP_ColorMatrix *Ugf[4], *Vgf[4], *Wgf[4]; int nterms = 0, n_order_naik_total; for(inaik = 0; inaik < n_naiks; inaik++) nterms += n_orders_naik[inaik]; n_order_naik_total = nterms; for(i=0;i<4;i++) { Ugf[i] = flh->U_links[i]; Vgf[i] = flh->V_links[i]; Wgf[i] = flh->W_unitlinks[i]; } QDP_ColorMatrix *tmat; QDP_ColorMatrix *mat_tmp0; REAL treal; if( first_force==1 ){ if( q_paths_sorted_1==NULL ) q_paths_sorted_1 = (Q_path *)malloc( num_q_paths_1*sizeof(Q_path) ); if(netbackdir_table_1==NULL ) netbackdir_table_1 = (int *)malloc( num_q_paths_1*sizeof(int) ); if( q_paths_sorted_2==NULL ) q_paths_sorted_2 = (Q_path *)malloc( num_q_paths_2*sizeof(Q_path) ); if(netbackdir_table_2==NULL ) netbackdir_table_2 = (int *)malloc( num_q_paths_2*sizeof(int) ); if( q_paths_sorted_3==NULL ) q_paths_sorted_3 = (Q_path *)malloc( num_q_paths_3*sizeof(Q_path) ); if(netbackdir_table_3==NULL ) netbackdir_table_3 = (int *)malloc( num_q_paths_3*sizeof(int) ); else{QOP_printf0("WARNING: remaking sorted path tables\n"); exit(0); } // make sorted tables sort_quark_paths_hisq( q_paths_1, q_paths_sorted_1, num_q_paths_1, 8 ); for( ipath=0; ipath<num_q_paths_1; ipath++ ) netbackdir_table_1[ipath] = find_backwards_gather( &(q_paths_sorted_1[ipath]) ); sort_quark_paths_hisq( q_paths_2, q_paths_sorted_2, num_q_paths_2, 16 ); for( ipath=0; ipath<num_q_paths_2; ipath++ ) netbackdir_table_2[ipath] = find_backwards_gather( &(q_paths_sorted_2[ipath]) ); sort_quark_paths_hisq( q_paths_3, q_paths_sorted_3, num_q_paths_3, 16 ); for( ipath=0; ipath<num_q_paths_3; ipath++ ) netbackdir_table_3[ipath] = find_backwards_gather( &(q_paths_sorted_3[ipath]) ); first_force=0; } tmat = QDP_create_M(); mat_tmp0 = QDP_create_M(); for(i=XUP;i<=TUP;i++){ force_accum_0[i] = QDP_create_M(); force_accum_0_naik[i] = QDP_create_M(); force_accum_1[i] = QDP_create_M(); force_accum_1u[i] = QDP_create_M(); force_accum_2[i] = QDP_create_M(); force_final[i] = QDP_create_M(); } for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum_2[dir], QDP_all); // loop on different naik masses n_naik_shift = 0; for( inaik=0; inaik<n_naiks; inaik++ ) { // smearing level 0 if( 0==inaik ) { n_orders_naik_current = n_order_naik_total; } else { n_orders_naik_current = n_orders_naik[inaik]; } QOP_hisq_force_multi_smearing0_fnmat(info,residues+n_naik_shift, x+n_naik_shift, n_orders_naik_current, force_accum_0, force_accum_0_naik); final_flop += info->final_flop; // smearing level 2 if( 0==inaik ) { q_paths_sorted_current = q_paths_sorted_2; num_q_paths_current = num_q_paths_2; netbackdir_table_current = netbackdir_table_2; } else { q_paths_sorted_current = q_paths_sorted_3; num_q_paths_current = num_q_paths_3; netbackdir_table_current = netbackdir_table_3; } QOP_hisq_force_multi_smearing_fnmat( info,Wgf,residues+n_naik_shift, x+n_naik_shift, n_orders_naik_current, force_accum_1, force_accum_0, force_accum_0_naik, num_q_paths_current, q_paths_sorted_current, netbackdir_table_current ); //QOP_printf0("HISQ smear0 flops = %g\n", info->final_flop); final_flop += info->final_flop; if( 0==inaik ) { coeff_mult = 1.0; } else { coeff_mult = eps_naik[inaik]; } for(dir=XUP;dir<=TUP;dir++) { QDP_M_peq_r_times_M(force_accum_2[dir],&coeff_mult, force_accum_1[dir],QDP_all); nflops += 36; } n_naik_shift += n_orders_naik[inaik]; } if ( umethod==QOP_UNITARIZE_NONE ){ // smearing level 1 QOP_hisq_force_multi_smearing_fnmat( info,Ugf,residues, x, nterms, force_accum_1, force_accum_2, NULL, num_q_paths_1, q_paths_sorted_1, netbackdir_table_1 ); final_flop += info->final_flop; } else if ( umethod==QOP_UNITARIZE_RATIONAL ){ // reunitarization QOP_hisq_force_multi_reunit(info,Vgf,force_accum_1u, force_accum_2); //QOP_printf0("reunit flops = %g\n", info->final_flop); final_flop += info->final_flop; // smearing level 1 QOP_hisq_force_multi_smearing_fnmat( info,Ugf,residues, x, nterms, force_accum_1, force_accum_1u, NULL, num_q_paths_1, q_paths_sorted_1, netbackdir_table_1 ); //QOP_printf0("HISQ smear1 flops = %g\n", info->final_flop); final_flop += info->final_flop; } else { QOP_printf0("Unknown or unsupported unitarization method\n"); exit(1); } // contraction with the link in question should be done here, // after contributions from all levels of smearing are taken into account for(dir=XUP;dir<=TUP;dir++){ QDP_M_eq_M_times_M(force_final[dir],Ugf[dir],force_accum_1[dir],QDP_all); nflops += 198; } // take into account even/odd parity (it is NOT done in "smearing" routine) //eps multiplication done outside QOP for(dir=XUP;dir<=TUP;dir++){ QDP_M_eq_M(tmat,force_final[dir],QDP_all); treal = 2.0; QDP_M_eq_r_times_M(force_final[dir],&treal,tmat,QDP_even); treal = -2.0; QDP_M_eq_r_times_M(force_final[dir],&treal,tmat,QDP_odd); nflops += 18; } // Put antihermitian traceless part into momentum // add force to momentum for(dir=XUP; dir<=TUP; dir++){ QDP_M_eq_antiherm_M(mat_tmp0, force_final[dir], QDP_all); QDP_M_peq_M(force[dir], mat_tmp0, QDP_all); nflops += 24+18; //QDP_M_peq_M(force_final[dir], force[dir], QDP_all); //QDP_M_eq_antiherm_M(force[dir], force_final[dir], QDP_all); } for(i=XUP;i<=TUP;i++){ QDP_destroy_M( force_accum_0[i] ); QDP_destroy_M( force_accum_0_naik[i] ); QDP_destroy_M( force_accum_1[i] ); QDP_destroy_M( force_accum_1u[i] ); QDP_destroy_M( force_accum_2[i] ); QDP_destroy_M( force_final[i] ); } QDP_destroy_M( tmat ); QDP_destroy_M( mat_tmp0 ); final_flop += ((double)nflops)*QDP_sites_on_node; info->final_sec = QDP_time() - dtime; info->final_flop = final_flop; info->status = QOP_SUCCESS; //QOP_printf0("HISQ force flops = %g\n", info->final_flop); } //hisq_force_multi_wrapper_fnmat
/* Smearing level i*/ static void QOP_hisq_force_multi_smearing_fnmat(QOP_info_t *info, QDP_ColorMatrix * gf[4], REAL *residues, QDP_ColorVector *x[], int nterms, QDP_ColorMatrix *force_accum[4], QDP_ColorMatrix *force_accum_old[4], QDP_ColorMatrix *force_accum_naik_old[4], int internal_num_q_paths, Q_path *internal_q_paths_sorted, int *internal_netbackdir_table) { int i,j,k,lastdir=-99,ipath,ilink; int length,dir,odir; REAL coeff; QDP_ColorMatrix *tmat; QDP_ColorMatrix *oprod_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mats_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mat_tmp0,*mat_tmp1, *stmp[8];; QDP_ColorVector *vec_tmp[2]; int netbackdir; size_t nflops = 0; // table of net path displacements (backwards from usual convention) Q_path *this_path; // pointer to current path /* Allocate fields */ for(i=0;i<=MAX_PATH_LENGTH;i++){ oprod_along_path[i] = QDP_create_M(); } for(i=1;i<=MAX_PATH_LENGTH;i++){ // 0 element is never used (it's unit matrix) mats_along_path[i] = QDP_create_M(); } mat_tmp0 = QDP_create_M(); mat_tmp1 = QDP_create_M(); for(i=0; i<8; i++) stmp[i] = QDP_create_M(); tmat = QDP_create_M(); vec_tmp[0] = QDP_create_V(); vec_tmp[1] = QDP_create_V(); // clear force accumulators for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum[dir], QDP_all); // loop over paths, and loop over links in path for( ipath=0; ipath<internal_num_q_paths; ipath++ ){ this_path = &(internal_q_paths_sorted[ipath]); if(this_path->forwback== -1)continue; // skip backwards dslash length = this_path->length; netbackdir = internal_netbackdir_table[ipath]; // move f(i-1) force from current site in positive direction, // this corresponds to outer product |X><Y| calculated at the endpoint of the path if( netbackdir<8) { // Not a Naik path link_gather_connection_qdp(oprod_along_path[0] , force_accum_old[OPP_DIR(netbackdir)], tmat, netbackdir ); } else { // Naik path if( NULL==force_accum_naik_old ) { QOP_printf0( "hisq_force_multi_smearing_fnmat: mismatch:\n" ); QOP_printf0( "force_accum_naik_old is NULL, but path table contains Naik paths(!)\n" ); exit(0); } // CONVERSION FROM 3-LINK DIRECTION TO 1-LINK DIRECTION link_gather_connection_qdp(oprod_along_path[0] , force_accum_naik_old[OPP_DIR(netbackdir-8)], tmat, netbackdir ); } // figure out how much of the outer products along the path must be // recomputed. j is last one needing recomputation. k is first one. j=length-1; // default is recompute all if( GOES_BACKWARDS(this_path->dir[0]) ) k=1; else k=0; for(ilink=j;ilink>=k;ilink--){ link_transport_connection_qdp( oprod_along_path[length-ilink], oprod_along_path[length-ilink-1], gf, mat_tmp0, stmp, this_path->dir[ilink] ); nflops += 198; } // maintain an array of transports "to this point" along the path. // Don't recompute beginning parts of path if same as last path ilink=0; // first link where new transport is needed // Sometimes we don't need the matrix for the last link if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length; for( ; ilink<k; ilink++ ){ if( ilink==0 ){ dir = this_path->dir[0]; if( GOES_FORWARDS(dir) ){ QDP_M_eq_sM(tmat, gf[dir], QDP_neighbor[dir], QDP_backward, QDP_all); QDP_M_eq_Ma(mats_along_path[1], tmat, QDP_all); QDP_discard_M(tmat); } else{ QDP_M_eq_M(mats_along_path[1], gf[OPP_DIR(dir)], QDP_all); } } else { // ilink != 0 dir = OPP_DIR(this_path->dir[ilink]); link_transport_connection_qdp( mats_along_path[ilink+1], mats_along_path[ilink], gf, mat_tmp0, stmp, dir ); nflops += 198; } } // end loop over links // A path has (length+1) points, counting the ends. At first // point, no "down" direction links have their momenta "at this // point". At last, no "up" ... if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length; for( ilink=0; ilink<=k; ilink++ ){ if(ilink<length)dir = this_path->dir[ilink]; else dir=NODIR; coeff = this_path->coeff; if( (ilink%2)==1 )coeff = -coeff; // add in contribution to the force if( ilink<length && GOES_FORWARDS(dir) ){ link_gather_connection_qdp(mat_tmp1, oprod_along_path[length-ilink-1], tmat, dir ); if(ilink==0) { QDP_M_eq_M(mat_tmp0,mat_tmp1,QDP_all); } else { QDP_M_eq_M_times_Ma(mat_tmp0, mats_along_path[ilink], mat_tmp1, QDP_all); nflops += 198; QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all); } QDP_M_peq_r_times_M(force_accum[dir],&coeff,mat_tmp1,QDP_all); nflops += 36; } if( ilink>0 && GOES_BACKWARDS(lastdir) ){ odir = OPP_DIR(lastdir); if( ilink==1 ){ QDP_M_eq_M(mat_tmp0,oprod_along_path[length-ilink],QDP_all); QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all); } else{ link_gather_connection_qdp(mat_tmp1, mats_along_path[ilink-1], tmat, odir ); QDP_M_eq_M_times_Ma(mat_tmp0, oprod_along_path[length-ilink], mat_tmp1, QDP_all); nflops += 198; QDP_M_eq_Ma(mat_tmp1, mat_tmp0, QDP_all); } QDP_M_peq_r_times_M(force_accum[odir],&coeff,mat_tmp1,QDP_all); nflops += 36; } lastdir = dir; } // end loop over links in path // } // end loop over paths // QDP_destroy_V( vec_tmp[0] ); QDP_destroy_V( vec_tmp[1] ); QDP_destroy_M( mat_tmp0 ); QDP_destroy_M( mat_tmp1 ); QDP_destroy_M( tmat ); for(i=0; i<8; i++) QDP_destroy_M(stmp[i]); for(i=0;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( oprod_along_path[i] ); } for(i=1;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( mats_along_path[i] ); } info->final_flop = ((double)nflops)*QDP_sites_on_node; return; }//hisq_force_multi_smearing_fnmat
void QOP_symanzik_1loop_gauge_heatbath_qdp(QOP_info_t *info, QDP_ColorMatrix *links[], QLA_Real beta, QOP_gauge_coeffs_t *coeffs, QDP_RandomState *rs0, int nup, int nhb, int nover) { #define NC QDP_get_nc(links[0]) double dtime = QOP_time(); double nflops = 0; if(coeffs->adjoint_plaquette) { QOP_error("%s: adj plaq not supported\n", __func__); } fac = beta/QLA_Nc; int imp = (coeffs->rectangle!=0)||(coeffs->parallelogram!=0); QDP_Lattice *lat = QDP_get_lattice_M(links[0]); int nd = QDP_ndim_L(lat); QDP_Subset *cbs=QDP_even_and_odd_L(lat); int ncb = 2; if(imp) { ncb = 32; cbs = QOP_get_sub32(lat); } QDP_ColorMatrix *staple = QDP_create_M_L(lat); QDP_ColorMatrix *v = QDP_create_M_L(lat); QDP_ColorMatrix *tmp = QDP_create_M_L(lat); rs = QDP_expose_S(rs0); for(int up=0; up<nup; up++) { for(int hb=0; hb<nhb; hb++) { for(int cb=0; cb<ncb; cb++) { QDP_Subset subset = cbs[cb]; for(int mu=0; mu<nd; mu++) { QDP_M_eq_zero(staple, subset); QOP_symanzik_1loop_gauge_staple_qdp(info, links, staple, mu, coeffs, cbs, cb); QDP_M_eq_M_times_Ma(v, links[mu], staple, subset); QDP_M_eq_funcit(v, hb_func, subset); QDP_M_eq_M_times_M(tmp, v, links[mu], subset); QDP_M_eq_M(links[mu], tmp, subset); } } } for(int over=0; over<nover; over++) { for(int cb=0; cb<ncb; cb++) { QDP_Subset subset = cbs[cb]; for(int mu=0; mu<nd; mu++) { QDP_M_eq_zero(staple, subset); QOP_symanzik_1loop_gauge_staple_qdp(info, links, staple, mu, coeffs, cbs, cb); QDP_M_eq_M_times_Ma(v, links[mu], staple, subset); QDP_M_eq_funcit(v, over_func, subset); QDP_M_eq_M_times_M(tmp, v, links[mu], subset); QDP_M_eq_M(links[mu], tmp, subset); } } } } QDP_reset_S(rs0); QDP_destroy_M(tmp); QDP_destroy_M(v); QDP_destroy_M(staple); info->final_sec = QOP_time() - dtime; info->final_flop = nflops*QDP_sites_on_node; info->status = QOP_SUCCESS; #undef NC }