/* Computes the staple : mu +-------+ nu | | | | X X Where the mu link can be any su3_matrix. The result is saved in staple. if staple==NULL then the result is not saved. It also adds the computed staple to the fatlink[mu] with weight coef. */ static void compute_gen_staple(QDP_ColorMatrix *staple, int mu, int nu, QDP_ColorMatrix *link, double dcoef, QDP_ColorMatrix *gauge[], QDP_ColorMatrix *fl[]) { QLA_Real coef = dcoef; QDP_ColorMatrix *ts0, *ts1; QDP_ColorMatrix *tmat1, *tmat2; QDP_ColorMatrix *tempmat; ts0 = QDP_create_M(); ts1 = QDP_create_M(); tmat1 = QDP_create_M(); tmat2 = QDP_create_M(); tempmat = QDP_create_M(); /* Upper staple */ QDP_M_eq_sM(ts0, link, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_eq_sM(ts1, gauge[nu], QDP_neighbor[mu], QDP_forward, QDP_all); if(staple!=NULL) { /* Save the staple */ QDP_M_eq_M_times_Ma(tmat1, ts0, ts1, QDP_all); QDP_M_eq_M_times_M(staple, gauge[nu], tmat1, QDP_all); } else { /* No need to save the staple. Add it to the fatlinks */ QDP_M_eq_M_times_Ma(tmat1, ts0, ts1, QDP_all); QDP_M_eq_M_times_M(tmat2, gauge[nu], tmat1, QDP_all); QDP_M_peq_r_times_M(fl[mu], &coef, tmat2, QDP_all); } /* lower staple */ QDP_M_eq_sM(ts0, gauge[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_Ma_times_M(tmat1, gauge[nu], link, QDP_all); QDP_M_eq_M_times_M(tempmat, tmat1, ts0, QDP_all); QDP_M_eq_sM(ts0, tempmat, QDP_neighbor[nu], QDP_backward, QDP_all); if(staple!=NULL) { /* Save the staple */ QDP_M_peq_M(staple, ts0, QDP_all); QDP_M_peq_r_times_M(fl[mu], &coef, staple, QDP_all); } else { /* No need to save the staple. Add it to the fatlinks */ QDP_M_peq_r_times_M(fl[mu], &coef, ts0, QDP_all); } QDP_destroy_M(ts0); QDP_destroy_M(ts1); QDP_destroy_M(tmat1); QDP_destroy_M(tmat2); QDP_destroy_M(tempmat); } /* compute_gen_staple */
void QOP_hisq_force_multi_fnmat2_qdp(QOP_info_t *info, QOP_FermionLinksHisq *flh, QDP_ColorMatrix *force[], QOP_hisq_coeffs_t *hisq_coeff, REAL *residues, QDP_ColorVector *x[], int *n_orders_naik) { #define NC QDP_get_nc(force[0]) double dtime = QOP_time(); QDP_ColorMatrix *deriv[4]; for(int mu=0; mu<4; mu++) { deriv[mu] = QDP_create_M(); QDP_M_eq_zero(deriv[mu], QDP_all); } QOP_hisq_deriv_multi_fnmat2_qdp(info, flh, deriv, hisq_coeff, residues, x, n_orders_naik); // contraction with the link in question should be done here, // after contributions from all levels of smearing are taken into account // Put antihermitian traceless part into momentum // add force to momentum QDP_ColorMatrix *mtmp = QDP_create_M(); for(int dir=0; dir<4; dir++) { QDP_M_eq_M_times_Ma(mtmp, flh->U_links[dir], deriv[dir], QDP_all); QDP_M_eq_antiherm_M(deriv[dir], mtmp, QDP_all); QDP_M_peq_M(force[dir], deriv[dir], QDP_all); } info->final_flop += (4.*(198+24+18))*QDP_sites_on_node; QDP_destroy_M(mtmp); for(int mu=0; mu<4; mu++) { QDP_destroy_M(deriv[mu]); } info->final_sec = QOP_time() - dtime; //QOP_printf0("HISQ force flops = %g\n", info->final_flop); #undef NC }
// topdir = 1..nd // sidedir = -nd..nd // toplinknum,sidelinknum = 0..nin-1 void QOP_staples(QOP_info_t *info, int nout, int nin, QDP_ColorMatrix *out[], QDP_ColorMatrix *in[], int nstaples[], int *topdir[], int *sidedir[], int *toplinknum[], int *sidelinknum[], QLA_Real *coef[]) { #define NC QDP_get_nc(in[0]) double dtime = QOP_time(); double nflops = 0; int nd = QDP_ndim(); QDP_ColorMatrix *ftmps[nin][nd], *t1, *t2, *bt2[nd]; for(int i=0; i<nin; i++) for(int j=0; j<nd; j++) ftmps[i][j] = NULL; for(int i=0; i<nd; i++) bt2[i] = NULL; t1 = QDP_create_M(); t2 = QDP_create_M(); for(int io=0; io<nout; io++) { //QOP_printf0("%i: ns: %i\n", io, nstaples[io]); for(int s=0; s<nstaples[io]; s++) { QLA_Real c = coef[io][s]; int tn = toplinknum[io][s]; int sdir = sidedir[io][s]; //QOP_printf0(" %i: sdir: %i c: %g\n", s, sdir, c); if(sdir==0) { if(c==1) { QDP_M_peq_M(out[io], in[tn], QDP_all); nflops += PEQM; } else { QDP_M_peq_r_times_M(out[io], &c, in[tn], QDP_all); nflops += 2*PEQM; } } else if(sdir>0) { int nu = sdir-1; int mu = topdir[io][s]-1; int sn = sidelinknum[io][s]; QDP_ColorMatrix *Umunu = getU(tn, mu, nu); QDP_ColorMatrix *Unumu = getU(sn, nu, mu); QDP_M_eq_M_times_M(t1, in[sn], Umunu, QDP_all); if(c==1) { QDP_M_peq_M_times_Ma(out[io], t1, Unumu, QDP_all); nflops += EQMTM+PEQMTM; } else { QDP_M_eq_M_times_Ma(t2, t1, Unumu, QDP_all); QDP_M_peq_r_times_M(out[io], &c, t2, QDP_all); nflops += 2*EQMTM+2*PEQM; } } else { int nu = -sdir-1; int mu = topdir[io][s]-1; int sn = sidelinknum[io][s]; QDP_ColorMatrix *Unumu = getU(sn, nu, mu); QDP_M_eq_M_times_M(t1, in[tn], Unumu, QDP_all); QDP_M_eq_Ma_times_M(t2, in[sn], t1, QDP_all); QDP_ColorMatrix *tb = shiftb(t2, nu); if(c==1) { QDP_M_peq_M(out[io], tb, QDP_all); nflops += 2*EQMTM+PEQM; } else { QDP_M_peq_r_times_M(out[io], &c, tb, QDP_all); nflops += 2*EQMTM+2*PEQM; } QDP_discard_M(tb); } } } for(int i=0; i<nin; i++) for(int j=0; j<nd; j++) if(ftmps[i][j]!=NULL) QDP_destroy_M(ftmps[i][j]); for(int i=0; i<nd; i++) if(bt2[i]!=NULL) QDP_destroy_M(bt2[i]); QDP_destroy_M(t1); QDP_destroy_M(t2); info->final_sec = QOP_time() - dtime; info->final_flop = nflops*QDP_sites_on_node; info->status = QOP_SUCCESS; #undef NC }
// topdir = 1..nd // sidedir = -nd..nd // toplinknum,sidelinknum = 0..nin-1 void QOP_staples_deriv(QOP_info_t *info, int nout, int nin, QDP_ColorMatrix *deriv[], QDP_ColorMatrix *chain[], QDP_ColorMatrix *in[], int nstaples[], int *topdir[], int *sidedir[], int *toplinknum[], int *sidelinknum[], QLA_Real *coef[]) { #define NC QDP_get_nc(in[0]) double dtime = QOP_time(); double nflops = 0; int nd = QDP_ndim(); QDP_ColorMatrix *ftmps[nin][nd], *t1, *t2, *t3, *t4, *tc, *bt2[nd], *bt3[nd], *ctmps[nd]; int ctn[nd]; for(int i=0; i<nin; i++) for(int j=0; j<nd; j++) ftmps[i][j] = NULL; for(int i=0; i<nd; i++) bt2[i] = bt3[i] = ctmps[i] = NULL; t1 = QDP_create_M(); t2 = QDP_create_M(); t3 = QDP_create_M(); t4 = QDP_create_M(); tc = QDP_create_M(); // process in reverse in case calculated staples used as input for others for(int io=nout-1; io>=0; io--) { for(int i=0; i<nd; i++) { if(ctmps[i]) QDP_discard_M(ctmps[i]); ctn[i] = 0; } QDP_M_eq_M(tc, chain[io], QDP_all); for(int s=0; s<nstaples[io]; s++) { QLA_Real c = coef[io][s]; int tn = toplinknum[io][s]; int sdir = sidedir[io][s]; //QOP_printf0("io: %i s: %i sdir: %i tn: %i c: %g\n", io, s, sdir, tn, c); if(sdir==0) { if(c==1) { QDP_M_peq_M(deriv[tn], tc, QDP_all); nflops += PEQM; } else { QDP_M_peq_r_times_M(deriv[tn], &c, tc, QDP_all); nflops += 2*PEQM; } } else if(sdir>0) { int nu = sdir-1; int mu = topdir[io][s]-1; int sn = sidelinknum[io][s]; //QOP_printf0(" mu: %i nu: %i sn: %i\n", mu, nu, sn); QDP_ColorMatrix *Umunu = getU(tn, mu, nu); QDP_ColorMatrix *Unumu = getU(sn, nu, mu); QDP_M_eq_M_times_M(t1, in[sn], Umunu, QDP_all); QDP_M_eq_Ma_times_M(t2, tc, t1, QDP_all); QDP_ColorMatrix *tb2 = shiftb(t2, mu); QDP_M_eq_M_times_M(t1, tc, Unumu, QDP_all); QDP_M_eq_Ma_times_M(t3, in[sn], t1, QDP_all); QDP_ColorMatrix *tb3 = shiftb(t3, nu); if(c==1) { QDP_M_peq_M_times_Ma(deriv[sn], t1, Umunu, QDP_all); QDP_M_peq_M(deriv[sn], tb2, QDP_all); QDP_M_peq_M(deriv[tn], tb3, QDP_all); nflops += 4*EQMTM+PEQMTM+2*PEQM; } else { QDP_M_eq_M_times_Ma(t4, t1, Umunu, QDP_all); QDP_M_peq_r_times_M(deriv[sn], &c, t4, QDP_all); QDP_M_peq_r_times_M(deriv[sn], &c, tb2, QDP_all); QDP_M_peq_r_times_M(deriv[tn], &c, tb3, QDP_all); nflops += 5*EQMTM+6*PEQM; } QDP_discard_M(tb2); QDP_discard_M(tb3); } else { int nu = -sdir-1; int mu = topdir[io][s]-1; int sn = sidelinknum[io][s]; QDP_ColorMatrix *Cmunu = getC(nu); QDP_ColorMatrix *Unumu = getU(sn, nu, mu); QDP_M_eq_M_times_M(t1, in[sn], Cmunu, QDP_all); QDP_M_eq_Ma_times_M(t2, in[tn], t1, QDP_all); QDP_ColorMatrix *tb2 = shiftb(t2, mu); QDP_M_eq_M_times_M(t3, in[tn], Unumu, QDP_all); if(c==1) { QDP_M_peq_M_times_Ma(deriv[tn], t1, Unumu, QDP_all); QDP_M_peq_M_times_Ma(deriv[sn], t3, Cmunu, QDP_all); QDP_M_peq_M(deriv[sn], tb2, QDP_all); nflops += 3*EQMTM+2*PEQMTM+PEQM; } else { QDP_M_eq_M_times_Ma(t4, t1, Unumu, QDP_all); QDP_M_peq_r_times_M(deriv[tn], &c, t4, QDP_all); QDP_M_eq_M_times_Ma(t4, t3, Cmunu, QDP_all); QDP_M_peq_r_times_M(deriv[sn], &c, t4, QDP_all); QDP_M_peq_r_times_M(deriv[sn], &c, tb2, QDP_all); nflops += 5*EQMTM+6*PEQM; } QDP_discard_M(tb2); } } } for(int i=0; i<nin; i++) for(int j=0; j<nd; j++) if(ftmps[i][j]!=NULL) QDP_destroy_M(ftmps[i][j]); for(int i=0; i<nd; i++) { if(bt2[i]!=NULL) QDP_destroy_M(bt2[i]); if(bt3[i]!=NULL) QDP_destroy_M(bt3[i]); if(ctmps[i]!=NULL) QDP_destroy_M(ctmps[i]); } QDP_destroy_M(t1); QDP_destroy_M(t2); QDP_destroy_M(t3); QDP_destroy_M(t4); QDP_destroy_M(tc); info->final_sec = QOP_time() - dtime; info->final_flop = nflops*QDP_sites_on_node; info->status = QOP_SUCCESS; #undef NC }
void QOPPC(symanzik_1loop_gauge_force1) (QOP_info_t *info, QOP_GaugeField *gauge, QOP_Force *force, QOP_gauge_coeffs_t *coeffs, REAL eps) { REAL Plaq, Rect, Pgm ; QDP_ColorMatrix *tempmom_qdp[4]; QDP_ColorMatrix *Amu[6]; // products of 2 links Unu(x)*Umu(x+nu) QDP_ColorMatrix *tmpmat; QDP_ColorMatrix *tmpmat1; QDP_ColorMatrix *tmpmat2; QDP_ColorMatrix *staples; QDP_ColorMatrix *tmpmat3; QDP_ColorMatrix *tmpmat4; int i, k; int mu, nu, sig; double dtime; //REAL eb3 = -eps*beta/3.0; REAL eb3 = -eps/3.0; int j[3][2] = {{1,2}, {0,2}, {0,1}}; // QOP_printf0("beta: %e, eb3: %e\n", beta, eb3); dtime = -QOP_time(); for(mu=0; mu<4; mu++) { tempmom_qdp[mu] = QDP_create_M(); QDP_M_eq_zero(tempmom_qdp[mu], QDP_all); } tmpmat = QDP_create_M(); for(i=0; i<QOP_common.ndim; i++) { fblink[i] = gauge->links[i]; fblink[OPP_DIR(i)] = QDP_create_M(); QDP_M_eq_sM(tmpmat, fblink[i], QDP_neighbor[i], QDP_backward, QDP_all); QDP_M_eq_Ma(fblink[OPP_DIR(i)], tmpmat, QDP_all); } for(i=0; i<6; i++) { Amu[i] = QDP_create_M(); } staples = QDP_create_M(); tmpmat1 = QDP_create_M(); tmpmat2 = QDP_create_M(); tmpmat3 = QDP_create_M(); tmpmat4 = QDP_create_M(); Plaq = coeffs->plaquette; Rect = coeffs->rectangle; Pgm = coeffs->parallelogram; //Construct 3-staples and rectangles for(mu=0; mu<4; mu++) { i=0; for(nu=0; nu<4; nu++) { if(nu!=mu){ // tmpmat1 = Umu(x+nu) QDP_M_eq_sM(tmpmat1, fblink[mu], QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_eq_M_times_M(Amu[i], fblink[nu], tmpmat1, QDP_all); //tmpmat2 = Umu(x-nu) QDP_M_eq_sM(tmpmat2, fblink[mu], QDP_neighbor[nu], QDP_backward, QDP_all); QDP_M_eq_M_times_M(Amu[i+3], fblink[OPP_DIR(nu)], tmpmat2, QDP_all); //tmpmat = U_{nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_Ma(staples, Amu[i], tmpmat, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Plaq, staples, QDP_all); //tmpmat = U_{-nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_Ma_times_M(tmpmat3, fblink[OPP_DIR(nu)], staples, QDP_all); QDP_M_eq_M_times_M(tmpmat4, tmpmat3, tmpmat, QDP_all); QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Rect, tmpmat, QDP_all); QDP_M_eq_Ma_times_M(tmpmat4, tmpmat2, tmpmat3, QDP_all); QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[mu], QDP_backward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat3, QDP_all); //tmpmat = U_{-nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_Ma(tmpmat3, tmpmat2, tmpmat, QDP_all); QDP_M_eq_M_times_Ma(tmpmat, tmpmat3, staples, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat3, QDP_all); //tmpmat = U_{-nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_Ma(staples, Amu[i+3], tmpmat, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Plaq, staples, QDP_all); QDP_M_eq_Ma_times_M(tmpmat3, fblink[nu], staples, QDP_all); QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_M(tmpmat4, tmpmat3, tmpmat, QDP_all); QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_backward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Rect, tmpmat, QDP_all); QDP_M_eq_Ma_times_M(tmpmat, tmpmat3, tmpmat1, QDP_all); QDP_M_eq_sM(tmpmat4, tmpmat, QDP_neighbor[mu], QDP_backward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat4, QDP_all); QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_M(tmpmat3, staples, tmpmat, QDP_all); QDP_M_eq_M_times_Ma(tmpmat4, tmpmat3, tmpmat1, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat4, QDP_all); i++; } } // Construct the pgm staples and add them to force QDP_M_eq_zero(staples, QDP_all); i=0; for(nu=0; nu<4; nu++){ if(nu!=mu){ k=0; for(sig=0; sig<4;sig ++){ if(sig!=mu && nu!=sig){ // the nu_sig_mu ... staple and 3 reflections //tmpmat = Amu["sig"](x+nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]], QDP_neighbor[nu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["sig"](x+nu) QDP_M_eq_M_times_M(tmpmat1, fblink[nu], tmpmat, QDP_all); //tmpmat3 = Unu(x+mu+sig) QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_forward, QDP_all); // HERE? //tmpmat2 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = Usig(x+mu) QDP_M_eq_sM(tmpmat, fblink[sig], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); //tmpmat = Amu["sig"](x-nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]], QDP_neighbor[nu], QDP_backward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["sig"](x-nu) QDP_M_eq_M_times_M(tmpmat1, fblink[OPP_DIR(nu)], tmpmat, QDP_all); //tmpmat3 = U_{-nu}(x+mu+sig) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_forward, QDP_all); // HERE? //tmpmat2 = U_{-nu}nu(x)*Amu["sig"](x-nu)*adj(Unu(x+mu+sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = Usig(x+mu) QDP_M_eq_sM(tmpmat, fblink[sig], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["sig"](x-nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); //tmpmat = Amu["-sig"](x-nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]+3], QDP_neighbor[nu], QDP_backward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["-sig"](x-nu) QDP_M_eq_M_times_M(tmpmat1, fblink[OPP_DIR(nu)], tmpmat, QDP_all); //tmpmat = U_{-nu}(x+mu-sig) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_backward, QDP_all); // HERE? //tmpmat2 = U_{-nu}nu(x)*Amu["-sig"](x-nu)*adj(Unu(x+mu-sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = U_{-sig}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(sig)], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["-sig"](x-nu)*adj(Unu(x+mu-sig))*adj(U_{-sig}(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); //tmpmat = Amu["-sig"](x+nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]+3], QDP_neighbor[nu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["-sig"](x+nu) QDP_M_eq_M_times_M(tmpmat1, fblink[nu], tmpmat, QDP_all); //tmpmat3 = Unu(x+mu-sig) QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_backward, QDP_all); // HERE? //tmpmat2 = Unu(x)*Amu["-sig"](x+nu)*adj(Unu(x+mu-sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = U_{-sig}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(sig)], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); k++; }//close if sig!=nu ... }//close sig loop i++; }// close if nu!=mu }//close the pgm nu loop QDP_M_peq_r_times_M(tempmom_qdp[mu], &Pgm, staples, QDP_all); }// closes the mu loop #ifdef CHKSUM QLA_ColorMatrix qcm; QLA_Complex det, chk; QLA_c_eq_r(chk, 0); #endif for(mu=0; mu<4; mu++){ QDP_M_eq_M_times_Ma(tmpmat, fblink[mu], tempmom_qdp[mu], QDP_all); // HERE? QDP_M_eq_r_times_M_plus_M( tempmom_qdp[mu], &eb3, tmpmat, force->force[mu], QDP_all);// HERE? QDP_M_eq_antiherm_M(force->force[mu], tempmom_qdp[mu], QDP_all);// HERE #ifdef CHKSUM QDP_m_eq_sum_M(&qcm, force->force[mu], QDP_all); QLA_C_eq_det_M(&det, &qcm); QLA_c_peq_c(chk, det); #endif } #ifdef CHKSUM QOP_printf0("chksum: %g %g\n", QLA_real(chk), QLA_imag(chk)); #endif //DESTROY various fields QDP_destroy_M(tmpmat); QDP_destroy_M(tmpmat1); QDP_destroy_M(tmpmat2); QDP_destroy_M(tmpmat3); QDP_destroy_M(staples); QDP_destroy_M(tmpmat4); for(mu=0; mu<4; mu++){ QDP_destroy_M(tempmom_qdp[mu]); } for(i=0; i<6; i++) { QDP_destroy_M(Amu[i]); } for(i=4; i<8; i++) { QDP_destroy_M(fblink[i]); } dtime += QOP_time(); double nflop = 96720; info->final_sec = dtime; info->final_flop = nflop*QDP_sites_on_node; info->status = QOP_SUCCESS; //QOP_printf0("Time in slow g_force: %e\n", info->final_sec); }
void QOP_hisq_force_multi_wrapper_fnmat(QOP_info_t *info, QOP_FermionLinksHisq *flh, QOP_Force *Force, QOP_hisq_coeffs_t *hisq_coeff, REAL *residues, QDP_ColorVector *x[], int *n_orders_naik) { double dtime = QDP_time(); int i, ipath, dir; REAL coeff_mult; double *eps_naik = hisq_coeff->eps_naik; int n_naiks = hisq_coeff->n_naiks; QOP_hisq_unitarize_method_t umethod = hisq_coeff->umethod; // Quark paths sorted by net displacement and last directions static Q_path *q_paths_sorted_1 = NULL; static Q_path *q_paths_sorted_2 = NULL; static Q_path *q_paths_sorted_3 = NULL; static int *netbackdir_table_1 = NULL; static int *netbackdir_table_2 = NULL; static int *netbackdir_table_3 = NULL; static int first_force = 1; if(first_force == 1) QOP_make_paths_and_dirs_hisq(hisq_coeff, umethod); int num_q_paths_1 = qop_get_num_q_paths_1(); int num_q_paths_2 = qop_get_num_q_paths_2(); int num_q_paths_3 = qop_get_num_q_paths_3(); Q_path *q_paths_1 = qop_get_q_paths_1(); Q_path *q_paths_2 = qop_get_q_paths_2(); Q_path *q_paths_3 = qop_get_q_paths_3(); Q_path *q_paths_sorted_current = NULL; int *netbackdir_table_current = NULL; int inaik; int n_naik_shift; double final_flop = 0.; size_t nflops = 0; QDP_ColorMatrix * force[4] = {Force->force[0], Force->force[1], Force->force[2], Force->force[3]}; int num_q_paths_current,n_orders_naik_current;//==nterms QDP_ColorMatrix *force_accum_0[4]; QDP_ColorMatrix *force_accum_0_naik[4]; QDP_ColorMatrix *force_accum_1[4]; QDP_ColorMatrix *force_accum_1u[4]; QDP_ColorMatrix *force_accum_2[4]; QDP_ColorMatrix *force_final[4]; QDP_ColorMatrix *Ugf[4], *Vgf[4], *Wgf[4]; int nterms = 0, n_order_naik_total; for(inaik = 0; inaik < n_naiks; inaik++) nterms += n_orders_naik[inaik]; n_order_naik_total = nterms; for(i=0;i<4;i++) { Ugf[i] = flh->U_links[i]; Vgf[i] = flh->V_links[i]; Wgf[i] = flh->W_unitlinks[i]; } QDP_ColorMatrix *tmat; QDP_ColorMatrix *mat_tmp0; REAL treal; if( first_force==1 ){ if( q_paths_sorted_1==NULL ) q_paths_sorted_1 = (Q_path *)malloc( num_q_paths_1*sizeof(Q_path) ); if(netbackdir_table_1==NULL ) netbackdir_table_1 = (int *)malloc( num_q_paths_1*sizeof(int) ); if( q_paths_sorted_2==NULL ) q_paths_sorted_2 = (Q_path *)malloc( num_q_paths_2*sizeof(Q_path) ); if(netbackdir_table_2==NULL ) netbackdir_table_2 = (int *)malloc( num_q_paths_2*sizeof(int) ); if( q_paths_sorted_3==NULL ) q_paths_sorted_3 = (Q_path *)malloc( num_q_paths_3*sizeof(Q_path) ); if(netbackdir_table_3==NULL ) netbackdir_table_3 = (int *)malloc( num_q_paths_3*sizeof(int) ); else{QOP_printf0("WARNING: remaking sorted path tables\n"); exit(0); } // make sorted tables sort_quark_paths_hisq( q_paths_1, q_paths_sorted_1, num_q_paths_1, 8 ); for( ipath=0; ipath<num_q_paths_1; ipath++ ) netbackdir_table_1[ipath] = find_backwards_gather( &(q_paths_sorted_1[ipath]) ); sort_quark_paths_hisq( q_paths_2, q_paths_sorted_2, num_q_paths_2, 16 ); for( ipath=0; ipath<num_q_paths_2; ipath++ ) netbackdir_table_2[ipath] = find_backwards_gather( &(q_paths_sorted_2[ipath]) ); sort_quark_paths_hisq( q_paths_3, q_paths_sorted_3, num_q_paths_3, 16 ); for( ipath=0; ipath<num_q_paths_3; ipath++ ) netbackdir_table_3[ipath] = find_backwards_gather( &(q_paths_sorted_3[ipath]) ); first_force=0; } tmat = QDP_create_M(); mat_tmp0 = QDP_create_M(); for(i=XUP;i<=TUP;i++){ force_accum_0[i] = QDP_create_M(); force_accum_0_naik[i] = QDP_create_M(); force_accum_1[i] = QDP_create_M(); force_accum_1u[i] = QDP_create_M(); force_accum_2[i] = QDP_create_M(); force_final[i] = QDP_create_M(); } for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum_2[dir], QDP_all); // loop on different naik masses n_naik_shift = 0; for( inaik=0; inaik<n_naiks; inaik++ ) { // smearing level 0 if( 0==inaik ) { n_orders_naik_current = n_order_naik_total; } else { n_orders_naik_current = n_orders_naik[inaik]; } QOP_hisq_force_multi_smearing0_fnmat(info,residues+n_naik_shift, x+n_naik_shift, n_orders_naik_current, force_accum_0, force_accum_0_naik); final_flop += info->final_flop; // smearing level 2 if( 0==inaik ) { q_paths_sorted_current = q_paths_sorted_2; num_q_paths_current = num_q_paths_2; netbackdir_table_current = netbackdir_table_2; } else { q_paths_sorted_current = q_paths_sorted_3; num_q_paths_current = num_q_paths_3; netbackdir_table_current = netbackdir_table_3; } QOP_hisq_force_multi_smearing_fnmat( info,Wgf,residues+n_naik_shift, x+n_naik_shift, n_orders_naik_current, force_accum_1, force_accum_0, force_accum_0_naik, num_q_paths_current, q_paths_sorted_current, netbackdir_table_current ); //QOP_printf0("HISQ smear0 flops = %g\n", info->final_flop); final_flop += info->final_flop; if( 0==inaik ) { coeff_mult = 1.0; } else { coeff_mult = eps_naik[inaik]; } for(dir=XUP;dir<=TUP;dir++) { QDP_M_peq_r_times_M(force_accum_2[dir],&coeff_mult, force_accum_1[dir],QDP_all); nflops += 36; } n_naik_shift += n_orders_naik[inaik]; } if ( umethod==QOP_UNITARIZE_NONE ){ // smearing level 1 QOP_hisq_force_multi_smearing_fnmat( info,Ugf,residues, x, nterms, force_accum_1, force_accum_2, NULL, num_q_paths_1, q_paths_sorted_1, netbackdir_table_1 ); final_flop += info->final_flop; } else if ( umethod==QOP_UNITARIZE_RATIONAL ){ // reunitarization QOP_hisq_force_multi_reunit(info,Vgf,force_accum_1u, force_accum_2); //QOP_printf0("reunit flops = %g\n", info->final_flop); final_flop += info->final_flop; // smearing level 1 QOP_hisq_force_multi_smearing_fnmat( info,Ugf,residues, x, nterms, force_accum_1, force_accum_1u, NULL, num_q_paths_1, q_paths_sorted_1, netbackdir_table_1 ); //QOP_printf0("HISQ smear1 flops = %g\n", info->final_flop); final_flop += info->final_flop; } else { QOP_printf0("Unknown or unsupported unitarization method\n"); exit(1); } // contraction with the link in question should be done here, // after contributions from all levels of smearing are taken into account for(dir=XUP;dir<=TUP;dir++){ QDP_M_eq_M_times_M(force_final[dir],Ugf[dir],force_accum_1[dir],QDP_all); nflops += 198; } // take into account even/odd parity (it is NOT done in "smearing" routine) //eps multiplication done outside QOP for(dir=XUP;dir<=TUP;dir++){ QDP_M_eq_M(tmat,force_final[dir],QDP_all); treal = 2.0; QDP_M_eq_r_times_M(force_final[dir],&treal,tmat,QDP_even); treal = -2.0; QDP_M_eq_r_times_M(force_final[dir],&treal,tmat,QDP_odd); nflops += 18; } // Put antihermitian traceless part into momentum // add force to momentum for(dir=XUP; dir<=TUP; dir++){ QDP_M_eq_antiherm_M(mat_tmp0, force_final[dir], QDP_all); QDP_M_peq_M(force[dir], mat_tmp0, QDP_all); nflops += 24+18; //QDP_M_peq_M(force_final[dir], force[dir], QDP_all); //QDP_M_eq_antiherm_M(force[dir], force_final[dir], QDP_all); } for(i=XUP;i<=TUP;i++){ QDP_destroy_M( force_accum_0[i] ); QDP_destroy_M( force_accum_0_naik[i] ); QDP_destroy_M( force_accum_1[i] ); QDP_destroy_M( force_accum_1u[i] ); QDP_destroy_M( force_accum_2[i] ); QDP_destroy_M( force_final[i] ); } QDP_destroy_M( tmat ); QDP_destroy_M( mat_tmp0 ); final_flop += ((double)nflops)*QDP_sites_on_node; info->final_sec = QDP_time() - dtime; info->final_flop = final_flop; info->status = QOP_SUCCESS; //QOP_printf0("HISQ force flops = %g\n", info->final_flop); } //hisq_force_multi_wrapper_fnmat
static void get_staple_plaq(QDP_ColorMatrix *staple, int mu, QDP_ColorMatrix *u[], QOP_gauge_coeffs_t *coeffs, QDP_Subset subset, QDP_Subset osubset) { #define NC QDP_get_nc(staple) QDP_Lattice *lat = QDP_get_lattice_M(staple); int nd = QDP_ndim_L(lat); QDP_Shift *neighbor = QDP_neighbor_L(lat); QLA_Real plaq = coeffs->plaquette; QLA_Real adpl = coeffs->adjoint_plaquette; #if 1 QDP_ColorMatrix *temp1, *temp2, *temp3, *temp4, *temp5, *temp6; //temp1 = QDP_create_M(); temp2 = QDP_create_M_L(lat); //temp3 = QDP_create_M(); temp4 = QDP_create_M_L(lat); //temp5 = QDP_create_M(); temp6 = QDP_create_M_L(lat); QDP_Complex *tc = NULL; if(adpl!=0) tc = QDP_create_C_L(lat); /* staple += u[nu](x) u[mu](x+nu) u*[nu](x+mu) * + u*[nu](x-nu) u[mu](x-nu) u[nu](x-nu+mu) */ for(int nu=0; nu<nd; nu++) { if (nu == mu) continue; temp1 = QDP_create_M_L(lat); temp3 = QDP_create_M_L(lat); temp5 = QDP_create_M_L(lat); QDP_M_eq_sM(temp1, u[nu], neighbor[mu], QDP_forward, QDP_all_L(lat)); QDP_M_eq_Ma_times_M(temp2, u[nu], u[mu], osubset); QDP_M_eq_sM(temp3, u[mu], neighbor[nu], QDP_forward, subset); QDP_M_eq_M_times_M(temp4, temp2, temp1, osubset); QDP_M_eq_sM(temp5, temp4, neighbor[nu], QDP_backward, subset); QDP_M_eq_M_times_M(temp6, u[nu], temp3, subset); //QDP_M_peq_M_times_Ma(staple, temp6, temp1, subset); //QDP_M_peq_M(staple, temp5, subset); if(adpl==0) { QDP_M_peq_M_times_Ma(temp5, temp6, temp1, subset); QDP_M_peq_r_times_M(staple, &plaq, temp5, subset); } else { QLA_Complex z; QLA_c_eq_r(z, plaq/adpl); QDP_C_eq_c(tc, &z, subset); QDP_M_eq_M_times_Ma(temp2, temp6, temp1, subset); QDP_C_peq_M_dot_M(tc, temp2, u[mu], subset); QDP_C_eq_r_times_C(tc, &adpl, tc, subset); QDP_M_peq_C_times_M(staple, tc, temp2, subset); QDP_C_eq_c(tc, &z, subset); QDP_C_peq_M_dot_M(tc, temp5, u[mu], subset); QDP_C_eq_r_times_C(tc, &adpl, tc, subset); QDP_M_peq_C_times_M(staple, tc, temp5, subset); } //QDP_discard_M(temp1); //QDP_discard_M(temp3); //QDP_discard_M(temp5); QDP_destroy_M(temp1); QDP_destroy_M(temp3); QDP_destroy_M(temp5); } /* closes nu loop */ if(adpl!=0) QDP_destroy_C(tc); //QDP_destroy_M(temp1); QDP_destroy_M(temp2); //QDP_destroy_M(temp3); QDP_destroy_M(temp4); //QDP_destroy_M(temp5); QDP_destroy_M(temp6); #else QDP_ColorMatrix *t = QDP_create_M_L(lat); int nu, path[3]; QDP_Subset subs[2]; subs[0] = subset; subs[1] = osubset; for(nu=0; nu<nd; nu++) { if (nu == mu) continue; path[0] = 1+nu; path[1] = -(1+mu); path[2] = -(1+nu); path_prod(u, t, path, 3, 1, subs, neighsubeo); QDP_M_peq_M(staple, t, subset); path[0] = -(1+nu); path[1] = -(1+mu); path[2] = 1+nu; path_prod(u, t, path, 3, 1, subs, neighsubeo); QDP_M_peq_M(staple, t, subset); } QDP_destroy_M(t); #endif }